1 //===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This pass performs global value numbering to eliminate fully redundant
11 // instructions. It also performs simple dead load elimination.
13 // Note that this pass does the value numbering itself; it does not use the
14 // ValueNumbering analysis passes.
16 //===----------------------------------------------------------------------===//
18 #define DEBUG_TYPE "gvn"
19 #include "llvm/Transforms/Scalar.h"
20 #include "llvm/BasicBlock.h"
21 #include "llvm/Constants.h"
22 #include "llvm/DerivedTypes.h"
23 #include "llvm/GlobalVariable.h"
24 #include "llvm/Function.h"
25 #include "llvm/IntrinsicInst.h"
26 #include "llvm/LLVMContext.h"
27 #include "llvm/Operator.h"
28 #include "llvm/Value.h"
29 #include "llvm/ADT/DenseMap.h"
30 #include "llvm/ADT/DepthFirstIterator.h"
31 #include "llvm/ADT/PostOrderIterator.h"
32 #include "llvm/ADT/SmallPtrSet.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/Statistic.h"
35 #include "llvm/Analysis/AliasAnalysis.h"
36 #include "llvm/Analysis/ConstantFolding.h"
37 #include "llvm/Analysis/Dominators.h"
38 #include "llvm/Analysis/Loads.h"
39 #include "llvm/Analysis/MemoryBuiltins.h"
40 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
41 #include "llvm/Analysis/PHITransAddr.h"
42 #include "llvm/Support/CFG.h"
43 #include "llvm/Support/CommandLine.h"
44 #include "llvm/Support/Debug.h"
45 #include "llvm/Support/ErrorHandling.h"
46 #include "llvm/Support/GetElementPtrTypeIterator.h"
47 #include "llvm/Support/IRBuilder.h"
48 #include "llvm/Support/raw_ostream.h"
49 #include "llvm/Target/TargetData.h"
50 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
51 #include "llvm/Transforms/Utils/Local.h"
52 #include "llvm/Transforms/Utils/SSAUpdater.h"
55 STATISTIC(NumGVNInstr
, "Number of instructions deleted");
56 STATISTIC(NumGVNLoad
, "Number of loads deleted");
57 STATISTIC(NumGVNPRE
, "Number of instructions PRE'd");
58 STATISTIC(NumGVNBlocks
, "Number of blocks merged");
59 STATISTIC(NumPRELoad
, "Number of loads PRE'd");
61 static cl::opt
<bool> EnablePRE("enable-pre",
62 cl::init(true), cl::Hidden
);
63 static cl::opt
<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
65 //===----------------------------------------------------------------------===//
67 //===----------------------------------------------------------------------===//
69 /// This class holds the mapping between values and value numbers. It is used
70 /// as an efficient mechanism to determine the expression-wise equivalence of
74 enum ExpressionOpcode
{
75 ADD
= Instruction::Add
,
76 FADD
= Instruction::FAdd
,
77 SUB
= Instruction::Sub
,
78 FSUB
= Instruction::FSub
,
79 MUL
= Instruction::Mul
,
80 FMUL
= Instruction::FMul
,
81 UDIV
= Instruction::UDiv
,
82 SDIV
= Instruction::SDiv
,
83 FDIV
= Instruction::FDiv
,
84 UREM
= Instruction::URem
,
85 SREM
= Instruction::SRem
,
86 FREM
= Instruction::FRem
,
87 SHL
= Instruction::Shl
,
88 LSHR
= Instruction::LShr
,
89 ASHR
= Instruction::AShr
,
90 AND
= Instruction::And
,
92 XOR
= Instruction::Xor
,
93 TRUNC
= Instruction::Trunc
,
94 ZEXT
= Instruction::ZExt
,
95 SEXT
= Instruction::SExt
,
96 FPTOUI
= Instruction::FPToUI
,
97 FPTOSI
= Instruction::FPToSI
,
98 UITOFP
= Instruction::UIToFP
,
99 SITOFP
= Instruction::SIToFP
,
100 FPTRUNC
= Instruction::FPTrunc
,
101 FPEXT
= Instruction::FPExt
,
102 PTRTOINT
= Instruction::PtrToInt
,
103 INTTOPTR
= Instruction::IntToPtr
,
104 BITCAST
= Instruction::BitCast
,
105 ICMPEQ
, ICMPNE
, ICMPUGT
, ICMPUGE
, ICMPULT
, ICMPULE
,
106 ICMPSGT
, ICMPSGE
, ICMPSLT
, ICMPSLE
, FCMPOEQ
,
107 FCMPOGT
, FCMPOGE
, FCMPOLT
, FCMPOLE
, FCMPONE
,
108 FCMPORD
, FCMPUNO
, FCMPUEQ
, FCMPUGT
, FCMPUGE
,
109 FCMPULT
, FCMPULE
, FCMPUNE
, EXTRACT
, INSERT
,
110 SHUFFLE
, SELECT
, GEP
, CALL
, CONSTANT
,
111 INSERTVALUE
, EXTRACTVALUE
, EMPTY
, TOMBSTONE
};
113 ExpressionOpcode opcode
;
115 SmallVector
<uint32_t, 4> varargs
;
119 Expression(ExpressionOpcode o
) : opcode(o
) { }
121 bool operator==(const Expression
&other
) const {
122 if (opcode
!= other
.opcode
)
124 else if (opcode
== EMPTY
|| opcode
== TOMBSTONE
)
126 else if (type
!= other
.type
)
128 else if (function
!= other
.function
)
131 if (varargs
.size() != other
.varargs
.size())
134 for (size_t i
= 0; i
< varargs
.size(); ++i
)
135 if (varargs
[i
] != other
.varargs
[i
])
142 /*bool operator!=(const Expression &other) const {
143 return !(*this == other);
149 DenseMap
<Value
*, uint32_t> valueNumbering
;
150 DenseMap
<Expression
, uint32_t> expressionNumbering
;
152 MemoryDependenceAnalysis
* MD
;
155 uint32_t nextValueNumber
;
157 Expression::ExpressionOpcode
getOpcode(CmpInst
* C
);
158 Expression
create_expression(BinaryOperator
* BO
);
159 Expression
create_expression(CmpInst
* C
);
160 Expression
create_expression(ShuffleVectorInst
* V
);
161 Expression
create_expression(ExtractElementInst
* C
);
162 Expression
create_expression(InsertElementInst
* V
);
163 Expression
create_expression(SelectInst
* V
);
164 Expression
create_expression(CastInst
* C
);
165 Expression
create_expression(GetElementPtrInst
* G
);
166 Expression
create_expression(CallInst
* C
);
167 Expression
create_expression(ExtractValueInst
* C
);
168 Expression
create_expression(InsertValueInst
* C
);
170 uint32_t lookup_or_add_call(CallInst
* C
);
172 ValueTable() : nextValueNumber(1) { }
173 uint32_t lookup_or_add(Value
*V
);
174 uint32_t lookup(Value
*V
) const;
175 void add(Value
*V
, uint32_t num
);
177 void erase(Value
*v
);
178 void setAliasAnalysis(AliasAnalysis
* A
) { AA
= A
; }
179 AliasAnalysis
*getAliasAnalysis() const { return AA
; }
180 void setMemDep(MemoryDependenceAnalysis
* M
) { MD
= M
; }
181 void setDomTree(DominatorTree
* D
) { DT
= D
; }
182 uint32_t getNextUnusedValueNumber() { return nextValueNumber
; }
183 void verifyRemoved(const Value
*) const;
188 template <> struct DenseMapInfo
<Expression
> {
189 static inline Expression
getEmptyKey() {
190 return Expression(Expression::EMPTY
);
193 static inline Expression
getTombstoneKey() {
194 return Expression(Expression::TOMBSTONE
);
197 static unsigned getHashValue(const Expression e
) {
198 unsigned hash
= e
.opcode
;
200 hash
= ((unsigned)((uintptr_t)e
.type
>> 4) ^
201 (unsigned)((uintptr_t)e
.type
>> 9));
203 for (SmallVector
<uint32_t, 4>::const_iterator I
= e
.varargs
.begin(),
204 E
= e
.varargs
.end(); I
!= E
; ++I
)
205 hash
= *I
+ hash
* 37;
207 hash
= ((unsigned)((uintptr_t)e
.function
>> 4) ^
208 (unsigned)((uintptr_t)e
.function
>> 9)) +
213 static bool isEqual(const Expression
&LHS
, const Expression
&RHS
) {
219 struct isPodLike
<Expression
> { static const bool value
= true; };
223 //===----------------------------------------------------------------------===//
224 // ValueTable Internal Functions
225 //===----------------------------------------------------------------------===//
227 Expression::ExpressionOpcode
ValueTable::getOpcode(CmpInst
* C
) {
228 if (isa
<ICmpInst
>(C
)) {
229 switch (C
->getPredicate()) {
230 default: // THIS SHOULD NEVER HAPPEN
231 llvm_unreachable("Comparison with unknown predicate?");
232 case ICmpInst::ICMP_EQ
: return Expression::ICMPEQ
;
233 case ICmpInst::ICMP_NE
: return Expression::ICMPNE
;
234 case ICmpInst::ICMP_UGT
: return Expression::ICMPUGT
;
235 case ICmpInst::ICMP_UGE
: return Expression::ICMPUGE
;
236 case ICmpInst::ICMP_ULT
: return Expression::ICMPULT
;
237 case ICmpInst::ICMP_ULE
: return Expression::ICMPULE
;
238 case ICmpInst::ICMP_SGT
: return Expression::ICMPSGT
;
239 case ICmpInst::ICMP_SGE
: return Expression::ICMPSGE
;
240 case ICmpInst::ICMP_SLT
: return Expression::ICMPSLT
;
241 case ICmpInst::ICMP_SLE
: return Expression::ICMPSLE
;
244 switch (C
->getPredicate()) {
245 default: // THIS SHOULD NEVER HAPPEN
246 llvm_unreachable("Comparison with unknown predicate?");
247 case FCmpInst::FCMP_OEQ
: return Expression::FCMPOEQ
;
248 case FCmpInst::FCMP_OGT
: return Expression::FCMPOGT
;
249 case FCmpInst::FCMP_OGE
: return Expression::FCMPOGE
;
250 case FCmpInst::FCMP_OLT
: return Expression::FCMPOLT
;
251 case FCmpInst::FCMP_OLE
: return Expression::FCMPOLE
;
252 case FCmpInst::FCMP_ONE
: return Expression::FCMPONE
;
253 case FCmpInst::FCMP_ORD
: return Expression::FCMPORD
;
254 case FCmpInst::FCMP_UNO
: return Expression::FCMPUNO
;
255 case FCmpInst::FCMP_UEQ
: return Expression::FCMPUEQ
;
256 case FCmpInst::FCMP_UGT
: return Expression::FCMPUGT
;
257 case FCmpInst::FCMP_UGE
: return Expression::FCMPUGE
;
258 case FCmpInst::FCMP_ULT
: return Expression::FCMPULT
;
259 case FCmpInst::FCMP_ULE
: return Expression::FCMPULE
;
260 case FCmpInst::FCMP_UNE
: return Expression::FCMPUNE
;
265 Expression
ValueTable::create_expression(CallInst
* C
) {
268 e
.type
= C
->getType();
269 e
.function
= C
->getCalledFunction();
270 e
.opcode
= Expression::CALL
;
273 for (CallInst::op_iterator I
= CS
.arg_begin(), E
= CS
.arg_end();
275 e
.varargs
.push_back(lookup_or_add(*I
));
280 Expression
ValueTable::create_expression(BinaryOperator
* BO
) {
282 e
.varargs
.push_back(lookup_or_add(BO
->getOperand(0)));
283 e
.varargs
.push_back(lookup_or_add(BO
->getOperand(1)));
285 e
.type
= BO
->getType();
286 e
.opcode
= static_cast<Expression::ExpressionOpcode
>(BO
->getOpcode());
291 Expression
ValueTable::create_expression(CmpInst
* C
) {
294 e
.varargs
.push_back(lookup_or_add(C
->getOperand(0)));
295 e
.varargs
.push_back(lookup_or_add(C
->getOperand(1)));
297 e
.type
= C
->getType();
298 e
.opcode
= getOpcode(C
);
303 Expression
ValueTable::create_expression(CastInst
* C
) {
306 e
.varargs
.push_back(lookup_or_add(C
->getOperand(0)));
308 e
.type
= C
->getType();
309 e
.opcode
= static_cast<Expression::ExpressionOpcode
>(C
->getOpcode());
314 Expression
ValueTable::create_expression(ShuffleVectorInst
* S
) {
317 e
.varargs
.push_back(lookup_or_add(S
->getOperand(0)));
318 e
.varargs
.push_back(lookup_or_add(S
->getOperand(1)));
319 e
.varargs
.push_back(lookup_or_add(S
->getOperand(2)));
321 e
.type
= S
->getType();
322 e
.opcode
= Expression::SHUFFLE
;
327 Expression
ValueTable::create_expression(ExtractElementInst
* E
) {
330 e
.varargs
.push_back(lookup_or_add(E
->getOperand(0)));
331 e
.varargs
.push_back(lookup_or_add(E
->getOperand(1)));
333 e
.type
= E
->getType();
334 e
.opcode
= Expression::EXTRACT
;
339 Expression
ValueTable::create_expression(InsertElementInst
* I
) {
342 e
.varargs
.push_back(lookup_or_add(I
->getOperand(0)));
343 e
.varargs
.push_back(lookup_or_add(I
->getOperand(1)));
344 e
.varargs
.push_back(lookup_or_add(I
->getOperand(2)));
346 e
.type
= I
->getType();
347 e
.opcode
= Expression::INSERT
;
352 Expression
ValueTable::create_expression(SelectInst
* I
) {
355 e
.varargs
.push_back(lookup_or_add(I
->getCondition()));
356 e
.varargs
.push_back(lookup_or_add(I
->getTrueValue()));
357 e
.varargs
.push_back(lookup_or_add(I
->getFalseValue()));
359 e
.type
= I
->getType();
360 e
.opcode
= Expression::SELECT
;
365 Expression
ValueTable::create_expression(GetElementPtrInst
* G
) {
368 e
.varargs
.push_back(lookup_or_add(G
->getPointerOperand()));
370 e
.type
= G
->getType();
371 e
.opcode
= Expression::GEP
;
373 for (GetElementPtrInst::op_iterator I
= G
->idx_begin(), E
= G
->idx_end();
375 e
.varargs
.push_back(lookup_or_add(*I
));
380 Expression
ValueTable::create_expression(ExtractValueInst
* E
) {
383 e
.varargs
.push_back(lookup_or_add(E
->getAggregateOperand()));
384 for (ExtractValueInst::idx_iterator II
= E
->idx_begin(), IE
= E
->idx_end();
386 e
.varargs
.push_back(*II
);
388 e
.type
= E
->getType();
389 e
.opcode
= Expression::EXTRACTVALUE
;
394 Expression
ValueTable::create_expression(InsertValueInst
* E
) {
397 e
.varargs
.push_back(lookup_or_add(E
->getAggregateOperand()));
398 e
.varargs
.push_back(lookup_or_add(E
->getInsertedValueOperand()));
399 for (InsertValueInst::idx_iterator II
= E
->idx_begin(), IE
= E
->idx_end();
401 e
.varargs
.push_back(*II
);
403 e
.type
= E
->getType();
404 e
.opcode
= Expression::INSERTVALUE
;
409 //===----------------------------------------------------------------------===//
410 // ValueTable External Functions
411 //===----------------------------------------------------------------------===//
413 /// add - Insert a value into the table with a specified value number.
414 void ValueTable::add(Value
*V
, uint32_t num
) {
415 valueNumbering
.insert(std::make_pair(V
, num
));
418 uint32_t ValueTable::lookup_or_add_call(CallInst
* C
) {
419 if (AA
->doesNotAccessMemory(C
)) {
420 Expression exp
= create_expression(C
);
421 uint32_t& e
= expressionNumbering
[exp
];
422 if (!e
) e
= nextValueNumber
++;
423 valueNumbering
[C
] = e
;
425 } else if (AA
->onlyReadsMemory(C
)) {
426 Expression exp
= create_expression(C
);
427 uint32_t& e
= expressionNumbering
[exp
];
429 e
= nextValueNumber
++;
430 valueNumbering
[C
] = e
;
434 e
= nextValueNumber
++;
435 valueNumbering
[C
] = e
;
439 MemDepResult local_dep
= MD
->getDependency(C
);
441 if (!local_dep
.isDef() && !local_dep
.isNonLocal()) {
442 valueNumbering
[C
] = nextValueNumber
;
443 return nextValueNumber
++;
446 if (local_dep
.isDef()) {
447 CallInst
* local_cdep
= cast
<CallInst
>(local_dep
.getInst());
449 if (local_cdep
->getNumArgOperands() != C
->getNumArgOperands()) {
450 valueNumbering
[C
] = nextValueNumber
;
451 return nextValueNumber
++;
454 for (unsigned i
= 0, e
= C
->getNumArgOperands(); i
< e
; ++i
) {
455 uint32_t c_vn
= lookup_or_add(C
->getArgOperand(i
));
456 uint32_t cd_vn
= lookup_or_add(local_cdep
->getArgOperand(i
));
458 valueNumbering
[C
] = nextValueNumber
;
459 return nextValueNumber
++;
463 uint32_t v
= lookup_or_add(local_cdep
);
464 valueNumbering
[C
] = v
;
469 const MemoryDependenceAnalysis::NonLocalDepInfo
&deps
=
470 MD
->getNonLocalCallDependency(CallSite(C
));
471 // FIXME: call/call dependencies for readonly calls should return def, not
472 // clobber! Move the checking logic to MemDep!
475 // Check to see if we have a single dominating call instruction that is
477 for (unsigned i
= 0, e
= deps
.size(); i
!= e
; ++i
) {
478 const NonLocalDepEntry
*I
= &deps
[i
];
479 // Ignore non-local dependencies.
480 if (I
->getResult().isNonLocal())
483 // We don't handle non-depedencies. If we already have a call, reject
484 // instruction dependencies.
485 if (I
->getResult().isClobber() || cdep
!= 0) {
490 CallInst
*NonLocalDepCall
= dyn_cast
<CallInst
>(I
->getResult().getInst());
491 // FIXME: All duplicated with non-local case.
492 if (NonLocalDepCall
&& DT
->properlyDominates(I
->getBB(), C
->getParent())){
493 cdep
= NonLocalDepCall
;
502 valueNumbering
[C
] = nextValueNumber
;
503 return nextValueNumber
++;
506 if (cdep
->getNumArgOperands() != C
->getNumArgOperands()) {
507 valueNumbering
[C
] = nextValueNumber
;
508 return nextValueNumber
++;
510 for (unsigned i
= 0, e
= C
->getNumArgOperands(); i
< e
; ++i
) {
511 uint32_t c_vn
= lookup_or_add(C
->getArgOperand(i
));
512 uint32_t cd_vn
= lookup_or_add(cdep
->getArgOperand(i
));
514 valueNumbering
[C
] = nextValueNumber
;
515 return nextValueNumber
++;
519 uint32_t v
= lookup_or_add(cdep
);
520 valueNumbering
[C
] = v
;
524 valueNumbering
[C
] = nextValueNumber
;
525 return nextValueNumber
++;
529 /// lookup_or_add - Returns the value number for the specified value, assigning
530 /// it a new number if it did not have one before.
531 uint32_t ValueTable::lookup_or_add(Value
*V
) {
532 DenseMap
<Value
*, uint32_t>::iterator VI
= valueNumbering
.find(V
);
533 if (VI
!= valueNumbering
.end())
536 if (!isa
<Instruction
>(V
)) {
537 valueNumbering
[V
] = nextValueNumber
;
538 return nextValueNumber
++;
541 Instruction
* I
= cast
<Instruction
>(V
);
543 switch (I
->getOpcode()) {
544 case Instruction::Call
:
545 return lookup_or_add_call(cast
<CallInst
>(I
));
546 case Instruction::Add
:
547 case Instruction::FAdd
:
548 case Instruction::Sub
:
549 case Instruction::FSub
:
550 case Instruction::Mul
:
551 case Instruction::FMul
:
552 case Instruction::UDiv
:
553 case Instruction::SDiv
:
554 case Instruction::FDiv
:
555 case Instruction::URem
:
556 case Instruction::SRem
:
557 case Instruction::FRem
:
558 case Instruction::Shl
:
559 case Instruction::LShr
:
560 case Instruction::AShr
:
561 case Instruction::And
:
562 case Instruction::Or
:
563 case Instruction::Xor
:
564 exp
= create_expression(cast
<BinaryOperator
>(I
));
566 case Instruction::ICmp
:
567 case Instruction::FCmp
:
568 exp
= create_expression(cast
<CmpInst
>(I
));
570 case Instruction::Trunc
:
571 case Instruction::ZExt
:
572 case Instruction::SExt
:
573 case Instruction::FPToUI
:
574 case Instruction::FPToSI
:
575 case Instruction::UIToFP
:
576 case Instruction::SIToFP
:
577 case Instruction::FPTrunc
:
578 case Instruction::FPExt
:
579 case Instruction::PtrToInt
:
580 case Instruction::IntToPtr
:
581 case Instruction::BitCast
:
582 exp
= create_expression(cast
<CastInst
>(I
));
584 case Instruction::Select
:
585 exp
= create_expression(cast
<SelectInst
>(I
));
587 case Instruction::ExtractElement
:
588 exp
= create_expression(cast
<ExtractElementInst
>(I
));
590 case Instruction::InsertElement
:
591 exp
= create_expression(cast
<InsertElementInst
>(I
));
593 case Instruction::ShuffleVector
:
594 exp
= create_expression(cast
<ShuffleVectorInst
>(I
));
596 case Instruction::ExtractValue
:
597 exp
= create_expression(cast
<ExtractValueInst
>(I
));
599 case Instruction::InsertValue
:
600 exp
= create_expression(cast
<InsertValueInst
>(I
));
602 case Instruction::GetElementPtr
:
603 exp
= create_expression(cast
<GetElementPtrInst
>(I
));
606 valueNumbering
[V
] = nextValueNumber
;
607 return nextValueNumber
++;
610 uint32_t& e
= expressionNumbering
[exp
];
611 if (!e
) e
= nextValueNumber
++;
612 valueNumbering
[V
] = e
;
616 /// lookup - Returns the value number of the specified value. Fails if
617 /// the value has not yet been numbered.
618 uint32_t ValueTable::lookup(Value
*V
) const {
619 DenseMap
<Value
*, uint32_t>::const_iterator VI
= valueNumbering
.find(V
);
620 assert(VI
!= valueNumbering
.end() && "Value not numbered?");
624 /// clear - Remove all entries from the ValueTable
625 void ValueTable::clear() {
626 valueNumbering
.clear();
627 expressionNumbering
.clear();
631 /// erase - Remove a value from the value numbering
632 void ValueTable::erase(Value
*V
) {
633 valueNumbering
.erase(V
);
636 /// verifyRemoved - Verify that the value is removed from all internal data
638 void ValueTable::verifyRemoved(const Value
*V
) const {
639 for (DenseMap
<Value
*, uint32_t>::const_iterator
640 I
= valueNumbering
.begin(), E
= valueNumbering
.end(); I
!= E
; ++I
) {
641 assert(I
->first
!= V
&& "Inst still occurs in value numbering map!");
645 //===----------------------------------------------------------------------===//
647 //===----------------------------------------------------------------------===//
650 struct ValueNumberScope
{
651 ValueNumberScope
* parent
;
652 DenseMap
<uint32_t, Value
*> table
;
654 ValueNumberScope(ValueNumberScope
* p
) : parent(p
) { }
660 class GVN
: public FunctionPass
{
661 bool runOnFunction(Function
&F
);
663 static char ID
; // Pass identification, replacement for typeid
664 explicit GVN(bool noloads
= false)
665 : FunctionPass(ID
), NoLoads(noloads
), MD(0) {
666 initializeGVNPass(*PassRegistry::getPassRegistry());
671 MemoryDependenceAnalysis
*MD
;
675 DenseMap
<BasicBlock
*, ValueNumberScope
*> localAvail
;
677 // List of critical edges to be split between iterations.
678 SmallVector
<std::pair
<TerminatorInst
*, unsigned>, 4> toSplit
;
680 // This transformation requires dominator postdominator info
681 virtual void getAnalysisUsage(AnalysisUsage
&AU
) const {
682 AU
.addRequired
<DominatorTree
>();
684 AU
.addRequired
<MemoryDependenceAnalysis
>();
685 AU
.addRequired
<AliasAnalysis
>();
687 AU
.addPreserved
<DominatorTree
>();
688 AU
.addPreserved
<AliasAnalysis
>();
692 // FIXME: eliminate or document these better
693 bool processLoad(LoadInst
* L
,
694 SmallVectorImpl
<Instruction
*> &toErase
);
695 bool processInstruction(Instruction
*I
,
696 SmallVectorImpl
<Instruction
*> &toErase
);
697 bool processNonLocalLoad(LoadInst
* L
,
698 SmallVectorImpl
<Instruction
*> &toErase
);
699 bool processBlock(BasicBlock
*BB
);
700 void dump(DenseMap
<uint32_t, Value
*>& d
);
701 bool iterateOnFunction(Function
&F
);
702 Value
*CollapsePhi(PHINode
* p
);
703 bool performPRE(Function
& F
);
704 Value
*lookupNumber(BasicBlock
*BB
, uint32_t num
);
705 void cleanupGlobalSets();
706 void verifyRemoved(const Instruction
*I
) const;
707 bool splitCriticalEdges();
713 // createGVNPass - The public interface to this file...
714 FunctionPass
*llvm::createGVNPass(bool NoLoads
) {
715 return new GVN(NoLoads
);
718 INITIALIZE_PASS_BEGIN(GVN
, "gvn", "Global Value Numbering", false, false)
719 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis
)
720 INITIALIZE_PASS_DEPENDENCY(DominatorTree
)
721 INITIALIZE_AG_DEPENDENCY(AliasAnalysis
)
722 INITIALIZE_PASS_END(GVN
, "gvn", "Global Value Numbering", false, false)
724 void GVN::dump(DenseMap
<uint32_t, Value
*>& d
) {
726 for (DenseMap
<uint32_t, Value
*>::iterator I
= d
.begin(),
727 E
= d
.end(); I
!= E
; ++I
) {
728 errs() << I
->first
<< "\n";
734 static bool isSafeReplacement(PHINode
* p
, Instruction
*inst
) {
735 if (!isa
<PHINode
>(inst
))
738 for (Instruction::use_iterator UI
= p
->use_begin(), E
= p
->use_end();
740 if (PHINode
* use_phi
= dyn_cast
<PHINode
>(*UI
))
741 if (use_phi
->getParent() == inst
->getParent())
747 Value
*GVN::CollapsePhi(PHINode
*PN
) {
748 Value
*ConstVal
= PN
->hasConstantValue(DT
);
749 if (!ConstVal
) return 0;
751 Instruction
*Inst
= dyn_cast
<Instruction
>(ConstVal
);
755 if (DT
->dominates(Inst
, PN
))
756 if (isSafeReplacement(PN
, Inst
))
761 /// IsValueFullyAvailableInBlock - Return true if we can prove that the value
762 /// we're analyzing is fully available in the specified block. As we go, keep
763 /// track of which blocks we know are fully alive in FullyAvailableBlocks. This
764 /// map is actually a tri-state map with the following values:
765 /// 0) we know the block *is not* fully available.
766 /// 1) we know the block *is* fully available.
767 /// 2) we do not know whether the block is fully available or not, but we are
768 /// currently speculating that it will be.
769 /// 3) we are speculating for this block and have used that to speculate for
771 static bool IsValueFullyAvailableInBlock(BasicBlock
*BB
,
772 DenseMap
<BasicBlock
*, char> &FullyAvailableBlocks
) {
773 // Optimistically assume that the block is fully available and check to see
774 // if we already know about this block in one lookup.
775 std::pair
<DenseMap
<BasicBlock
*, char>::iterator
, char> IV
=
776 FullyAvailableBlocks
.insert(std::make_pair(BB
, 2));
778 // If the entry already existed for this block, return the precomputed value.
780 // If this is a speculative "available" value, mark it as being used for
781 // speculation of other blocks.
782 if (IV
.first
->second
== 2)
783 IV
.first
->second
= 3;
784 return IV
.first
->second
!= 0;
787 // Otherwise, see if it is fully available in all predecessors.
788 pred_iterator PI
= pred_begin(BB
), PE
= pred_end(BB
);
790 // If this block has no predecessors, it isn't live-in here.
792 goto SpeculationFailure
;
794 for (; PI
!= PE
; ++PI
)
795 // If the value isn't fully available in one of our predecessors, then it
796 // isn't fully available in this block either. Undo our previous
797 // optimistic assumption and bail out.
798 if (!IsValueFullyAvailableInBlock(*PI
, FullyAvailableBlocks
))
799 goto SpeculationFailure
;
803 // SpeculationFailure - If we get here, we found out that this is not, after
804 // all, a fully-available block. We have a problem if we speculated on this and
805 // used the speculation to mark other blocks as available.
807 char &BBVal
= FullyAvailableBlocks
[BB
];
809 // If we didn't speculate on this, just return with it set to false.
815 // If we did speculate on this value, we could have blocks set to 1 that are
816 // incorrect. Walk the (transitive) successors of this block and mark them as
818 SmallVector
<BasicBlock
*, 32> BBWorklist
;
819 BBWorklist
.push_back(BB
);
822 BasicBlock
*Entry
= BBWorklist
.pop_back_val();
823 // Note that this sets blocks to 0 (unavailable) if they happen to not
824 // already be in FullyAvailableBlocks. This is safe.
825 char &EntryVal
= FullyAvailableBlocks
[Entry
];
826 if (EntryVal
== 0) continue; // Already unavailable.
828 // Mark as unavailable.
831 for (succ_iterator I
= succ_begin(Entry
), E
= succ_end(Entry
); I
!= E
; ++I
)
832 BBWorklist
.push_back(*I
);
833 } while (!BBWorklist
.empty());
839 /// CanCoerceMustAliasedValueToLoad - Return true if
840 /// CoerceAvailableValueToLoadType will succeed.
841 static bool CanCoerceMustAliasedValueToLoad(Value
*StoredVal
,
843 const TargetData
&TD
) {
844 // If the loaded or stored value is an first class array or struct, don't try
845 // to transform them. We need to be able to bitcast to integer.
846 if (LoadTy
->isStructTy() || LoadTy
->isArrayTy() ||
847 StoredVal
->getType()->isStructTy() ||
848 StoredVal
->getType()->isArrayTy())
851 // The store has to be at least as big as the load.
852 if (TD
.getTypeSizeInBits(StoredVal
->getType()) <
853 TD
.getTypeSizeInBits(LoadTy
))
860 /// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and
861 /// then a load from a must-aliased pointer of a different type, try to coerce
862 /// the stored value. LoadedTy is the type of the load we want to replace and
863 /// InsertPt is the place to insert new instructions.
865 /// If we can't do it, return null.
866 static Value
*CoerceAvailableValueToLoadType(Value
*StoredVal
,
867 const Type
*LoadedTy
,
868 Instruction
*InsertPt
,
869 const TargetData
&TD
) {
870 if (!CanCoerceMustAliasedValueToLoad(StoredVal
, LoadedTy
, TD
))
873 const Type
*StoredValTy
= StoredVal
->getType();
875 uint64_t StoreSize
= TD
.getTypeStoreSizeInBits(StoredValTy
);
876 uint64_t LoadSize
= TD
.getTypeSizeInBits(LoadedTy
);
878 // If the store and reload are the same size, we can always reuse it.
879 if (StoreSize
== LoadSize
) {
880 if (StoredValTy
->isPointerTy() && LoadedTy
->isPointerTy()) {
881 // Pointer to Pointer -> use bitcast.
882 return new BitCastInst(StoredVal
, LoadedTy
, "", InsertPt
);
885 // Convert source pointers to integers, which can be bitcast.
886 if (StoredValTy
->isPointerTy()) {
887 StoredValTy
= TD
.getIntPtrType(StoredValTy
->getContext());
888 StoredVal
= new PtrToIntInst(StoredVal
, StoredValTy
, "", InsertPt
);
891 const Type
*TypeToCastTo
= LoadedTy
;
892 if (TypeToCastTo
->isPointerTy())
893 TypeToCastTo
= TD
.getIntPtrType(StoredValTy
->getContext());
895 if (StoredValTy
!= TypeToCastTo
)
896 StoredVal
= new BitCastInst(StoredVal
, TypeToCastTo
, "", InsertPt
);
898 // Cast to pointer if the load needs a pointer type.
899 if (LoadedTy
->isPointerTy())
900 StoredVal
= new IntToPtrInst(StoredVal
, LoadedTy
, "", InsertPt
);
905 // If the loaded value is smaller than the available value, then we can
906 // extract out a piece from it. If the available value is too small, then we
907 // can't do anything.
908 assert(StoreSize
>= LoadSize
&& "CanCoerceMustAliasedValueToLoad fail");
910 // Convert source pointers to integers, which can be manipulated.
911 if (StoredValTy
->isPointerTy()) {
912 StoredValTy
= TD
.getIntPtrType(StoredValTy
->getContext());
913 StoredVal
= new PtrToIntInst(StoredVal
, StoredValTy
, "", InsertPt
);
916 // Convert vectors and fp to integer, which can be manipulated.
917 if (!StoredValTy
->isIntegerTy()) {
918 StoredValTy
= IntegerType::get(StoredValTy
->getContext(), StoreSize
);
919 StoredVal
= new BitCastInst(StoredVal
, StoredValTy
, "", InsertPt
);
922 // If this is a big-endian system, we need to shift the value down to the low
923 // bits so that a truncate will work.
924 if (TD
.isBigEndian()) {
925 Constant
*Val
= ConstantInt::get(StoredVal
->getType(), StoreSize
-LoadSize
);
926 StoredVal
= BinaryOperator::CreateLShr(StoredVal
, Val
, "tmp", InsertPt
);
929 // Truncate the integer to the right size now.
930 const Type
*NewIntTy
= IntegerType::get(StoredValTy
->getContext(), LoadSize
);
931 StoredVal
= new TruncInst(StoredVal
, NewIntTy
, "trunc", InsertPt
);
933 if (LoadedTy
== NewIntTy
)
936 // If the result is a pointer, inttoptr.
937 if (LoadedTy
->isPointerTy())
938 return new IntToPtrInst(StoredVal
, LoadedTy
, "inttoptr", InsertPt
);
940 // Otherwise, bitcast.
941 return new BitCastInst(StoredVal
, LoadedTy
, "bitcast", InsertPt
);
944 /// GetBaseWithConstantOffset - Analyze the specified pointer to see if it can
945 /// be expressed as a base pointer plus a constant offset. Return the base and
946 /// offset to the caller.
947 static Value
*GetBaseWithConstantOffset(Value
*Ptr
, int64_t &Offset
,
948 const TargetData
&TD
) {
949 Operator
*PtrOp
= dyn_cast
<Operator
>(Ptr
);
950 if (PtrOp
== 0) return Ptr
;
952 // Just look through bitcasts.
953 if (PtrOp
->getOpcode() == Instruction::BitCast
)
954 return GetBaseWithConstantOffset(PtrOp
->getOperand(0), Offset
, TD
);
956 // If this is a GEP with constant indices, we can look through it.
957 GEPOperator
*GEP
= dyn_cast
<GEPOperator
>(PtrOp
);
958 if (GEP
== 0 || !GEP
->hasAllConstantIndices()) return Ptr
;
960 gep_type_iterator GTI
= gep_type_begin(GEP
);
961 for (User::op_iterator I
= GEP
->idx_begin(), E
= GEP
->idx_end(); I
!= E
;
963 ConstantInt
*OpC
= cast
<ConstantInt
>(*I
);
964 if (OpC
->isZero()) continue;
966 // Handle a struct and array indices which add their offset to the pointer.
967 if (const StructType
*STy
= dyn_cast
<StructType
>(*GTI
)) {
968 Offset
+= TD
.getStructLayout(STy
)->getElementOffset(OpC
->getZExtValue());
970 uint64_t Size
= TD
.getTypeAllocSize(GTI
.getIndexedType());
971 Offset
+= OpC
->getSExtValue()*Size
;
975 // Re-sign extend from the pointer size if needed to get overflow edge cases
977 unsigned PtrSize
= TD
.getPointerSizeInBits();
979 Offset
= (Offset
<< (64-PtrSize
)) >> (64-PtrSize
);
981 return GetBaseWithConstantOffset(GEP
->getPointerOperand(), Offset
, TD
);
985 /// AnalyzeLoadFromClobberingWrite - This function is called when we have a
986 /// memdep query of a load that ends up being a clobbering memory write (store,
987 /// memset, memcpy, memmove). This means that the write *may* provide bits used
988 /// by the load but we can't be sure because the pointers don't mustalias.
990 /// Check this case to see if there is anything more we can do before we give
991 /// up. This returns -1 if we have to give up, or a byte number in the stored
992 /// value of the piece that feeds the load.
993 static int AnalyzeLoadFromClobberingWrite(const Type
*LoadTy
, Value
*LoadPtr
,
995 uint64_t WriteSizeInBits
,
996 const TargetData
&TD
) {
997 // If the loaded or stored value is an first class array or struct, don't try
998 // to transform them. We need to be able to bitcast to integer.
999 if (LoadTy
->isStructTy() || LoadTy
->isArrayTy())
1002 int64_t StoreOffset
= 0, LoadOffset
= 0;
1003 Value
*StoreBase
= GetBaseWithConstantOffset(WritePtr
, StoreOffset
, TD
);
1005 GetBaseWithConstantOffset(LoadPtr
, LoadOffset
, TD
);
1006 if (StoreBase
!= LoadBase
)
1009 // If the load and store are to the exact same address, they should have been
1010 // a must alias. AA must have gotten confused.
1011 // FIXME: Study to see if/when this happens. One case is forwarding a memset
1012 // to a load from the base of the memset.
1014 if (LoadOffset
== StoreOffset
) {
1015 dbgs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n"
1016 << "Base = " << *StoreBase
<< "\n"
1017 << "Store Ptr = " << *WritePtr
<< "\n"
1018 << "Store Offs = " << StoreOffset
<< "\n"
1019 << "Load Ptr = " << *LoadPtr
<< "\n";
1024 // If the load and store don't overlap at all, the store doesn't provide
1025 // anything to the load. In this case, they really don't alias at all, AA
1026 // must have gotten confused.
1027 // FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then
1028 // remove this check, as it is duplicated with what we have below.
1029 uint64_t LoadSize
= TD
.getTypeSizeInBits(LoadTy
);
1031 if ((WriteSizeInBits
& 7) | (LoadSize
& 7))
1033 uint64_t StoreSize
= WriteSizeInBits
>> 3; // Convert to bytes.
1037 bool isAAFailure
= false;
1038 if (StoreOffset
< LoadOffset
)
1039 isAAFailure
= StoreOffset
+int64_t(StoreSize
) <= LoadOffset
;
1041 isAAFailure
= LoadOffset
+int64_t(LoadSize
) <= StoreOffset
;
1045 dbgs() << "STORE LOAD DEP WITH COMMON BASE:\n"
1046 << "Base = " << *StoreBase
<< "\n"
1047 << "Store Ptr = " << *WritePtr
<< "\n"
1048 << "Store Offs = " << StoreOffset
<< "\n"
1049 << "Load Ptr = " << *LoadPtr
<< "\n";
1055 // If the Load isn't completely contained within the stored bits, we don't
1056 // have all the bits to feed it. We could do something crazy in the future
1057 // (issue a smaller load then merge the bits in) but this seems unlikely to be
1059 if (StoreOffset
> LoadOffset
||
1060 StoreOffset
+StoreSize
< LoadOffset
+LoadSize
)
1063 // Okay, we can do this transformation. Return the number of bytes into the
1064 // store that the load is.
1065 return LoadOffset
-StoreOffset
;
1068 /// AnalyzeLoadFromClobberingStore - This function is called when we have a
1069 /// memdep query of a load that ends up being a clobbering store.
1070 static int AnalyzeLoadFromClobberingStore(const Type
*LoadTy
, Value
*LoadPtr
,
1072 const TargetData
&TD
) {
1073 // Cannot handle reading from store of first-class aggregate yet.
1074 if (DepSI
->getOperand(0)->getType()->isStructTy() ||
1075 DepSI
->getOperand(0)->getType()->isArrayTy())
1078 Value
*StorePtr
= DepSI
->getPointerOperand();
1079 uint64_t StoreSize
= TD
.getTypeSizeInBits(DepSI
->getOperand(0)->getType());
1080 return AnalyzeLoadFromClobberingWrite(LoadTy
, LoadPtr
,
1081 StorePtr
, StoreSize
, TD
);
1084 static int AnalyzeLoadFromClobberingMemInst(const Type
*LoadTy
, Value
*LoadPtr
,
1086 const TargetData
&TD
) {
1087 // If the mem operation is a non-constant size, we can't handle it.
1088 ConstantInt
*SizeCst
= dyn_cast
<ConstantInt
>(MI
->getLength());
1089 if (SizeCst
== 0) return -1;
1090 uint64_t MemSizeInBits
= SizeCst
->getZExtValue()*8;
1092 // If this is memset, we just need to see if the offset is valid in the size
1094 if (MI
->getIntrinsicID() == Intrinsic::memset
)
1095 return AnalyzeLoadFromClobberingWrite(LoadTy
, LoadPtr
, MI
->getDest(),
1098 // If we have a memcpy/memmove, the only case we can handle is if this is a
1099 // copy from constant memory. In that case, we can read directly from the
1101 MemTransferInst
*MTI
= cast
<MemTransferInst
>(MI
);
1103 Constant
*Src
= dyn_cast
<Constant
>(MTI
->getSource());
1104 if (Src
== 0) return -1;
1106 GlobalVariable
*GV
= dyn_cast
<GlobalVariable
>(Src
->getUnderlyingObject());
1107 if (GV
== 0 || !GV
->isConstant()) return -1;
1109 // See if the access is within the bounds of the transfer.
1110 int Offset
= AnalyzeLoadFromClobberingWrite(LoadTy
, LoadPtr
,
1111 MI
->getDest(), MemSizeInBits
, TD
);
1115 // Otherwise, see if we can constant fold a load from the constant with the
1116 // offset applied as appropriate.
1117 Src
= ConstantExpr::getBitCast(Src
,
1118 llvm::Type::getInt8PtrTy(Src
->getContext()));
1119 Constant
*OffsetCst
=
1120 ConstantInt::get(Type::getInt64Ty(Src
->getContext()), (unsigned)Offset
);
1121 Src
= ConstantExpr::getGetElementPtr(Src
, &OffsetCst
, 1);
1122 Src
= ConstantExpr::getBitCast(Src
, PointerType::getUnqual(LoadTy
));
1123 if (ConstantFoldLoadFromConstPtr(Src
, &TD
))
1129 /// GetStoreValueForLoad - This function is called when we have a
1130 /// memdep query of a load that ends up being a clobbering store. This means
1131 /// that the store *may* provide bits used by the load but we can't be sure
1132 /// because the pointers don't mustalias. Check this case to see if there is
1133 /// anything more we can do before we give up.
1134 static Value
*GetStoreValueForLoad(Value
*SrcVal
, unsigned Offset
,
1136 Instruction
*InsertPt
, const TargetData
&TD
){
1137 LLVMContext
&Ctx
= SrcVal
->getType()->getContext();
1139 uint64_t StoreSize
= (TD
.getTypeSizeInBits(SrcVal
->getType()) + 7) / 8;
1140 uint64_t LoadSize
= (TD
.getTypeSizeInBits(LoadTy
) + 7) / 8;
1142 IRBuilder
<> Builder(InsertPt
->getParent(), InsertPt
);
1144 // Compute which bits of the stored value are being used by the load. Convert
1145 // to an integer type to start with.
1146 if (SrcVal
->getType()->isPointerTy())
1147 SrcVal
= Builder
.CreatePtrToInt(SrcVal
, TD
.getIntPtrType(Ctx
), "tmp");
1148 if (!SrcVal
->getType()->isIntegerTy())
1149 SrcVal
= Builder
.CreateBitCast(SrcVal
, IntegerType::get(Ctx
, StoreSize
*8),
1152 // Shift the bits to the least significant depending on endianness.
1154 if (TD
.isLittleEndian())
1155 ShiftAmt
= Offset
*8;
1157 ShiftAmt
= (StoreSize
-LoadSize
-Offset
)*8;
1160 SrcVal
= Builder
.CreateLShr(SrcVal
, ShiftAmt
, "tmp");
1162 if (LoadSize
!= StoreSize
)
1163 SrcVal
= Builder
.CreateTrunc(SrcVal
, IntegerType::get(Ctx
, LoadSize
*8),
1166 return CoerceAvailableValueToLoadType(SrcVal
, LoadTy
, InsertPt
, TD
);
1169 /// GetMemInstValueForLoad - This function is called when we have a
1170 /// memdep query of a load that ends up being a clobbering mem intrinsic.
1171 static Value
*GetMemInstValueForLoad(MemIntrinsic
*SrcInst
, unsigned Offset
,
1172 const Type
*LoadTy
, Instruction
*InsertPt
,
1173 const TargetData
&TD
){
1174 LLVMContext
&Ctx
= LoadTy
->getContext();
1175 uint64_t LoadSize
= TD
.getTypeSizeInBits(LoadTy
)/8;
1177 IRBuilder
<> Builder(InsertPt
->getParent(), InsertPt
);
1179 // We know that this method is only called when the mem transfer fully
1180 // provides the bits for the load.
1181 if (MemSetInst
*MSI
= dyn_cast
<MemSetInst
>(SrcInst
)) {
1182 // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and
1183 // independently of what the offset is.
1184 Value
*Val
= MSI
->getValue();
1186 Val
= Builder
.CreateZExt(Val
, IntegerType::get(Ctx
, LoadSize
*8));
1188 Value
*OneElt
= Val
;
1190 // Splat the value out to the right number of bits.
1191 for (unsigned NumBytesSet
= 1; NumBytesSet
!= LoadSize
; ) {
1192 // If we can double the number of bytes set, do it.
1193 if (NumBytesSet
*2 <= LoadSize
) {
1194 Value
*ShVal
= Builder
.CreateShl(Val
, NumBytesSet
*8);
1195 Val
= Builder
.CreateOr(Val
, ShVal
);
1200 // Otherwise insert one byte at a time.
1201 Value
*ShVal
= Builder
.CreateShl(Val
, 1*8);
1202 Val
= Builder
.CreateOr(OneElt
, ShVal
);
1206 return CoerceAvailableValueToLoadType(Val
, LoadTy
, InsertPt
, TD
);
1209 // Otherwise, this is a memcpy/memmove from a constant global.
1210 MemTransferInst
*MTI
= cast
<MemTransferInst
>(SrcInst
);
1211 Constant
*Src
= cast
<Constant
>(MTI
->getSource());
1213 // Otherwise, see if we can constant fold a load from the constant with the
1214 // offset applied as appropriate.
1215 Src
= ConstantExpr::getBitCast(Src
,
1216 llvm::Type::getInt8PtrTy(Src
->getContext()));
1217 Constant
*OffsetCst
=
1218 ConstantInt::get(Type::getInt64Ty(Src
->getContext()), (unsigned)Offset
);
1219 Src
= ConstantExpr::getGetElementPtr(Src
, &OffsetCst
, 1);
1220 Src
= ConstantExpr::getBitCast(Src
, PointerType::getUnqual(LoadTy
));
1221 return ConstantFoldLoadFromConstPtr(Src
, &TD
);
1226 struct AvailableValueInBlock
{
1227 /// BB - The basic block in question.
1230 SimpleVal
, // A simple offsetted value that is accessed.
1231 MemIntrin
// A memory intrinsic which is loaded from.
1234 /// V - The value that is live out of the block.
1235 PointerIntPair
<Value
*, 1, ValType
> Val
;
1237 /// Offset - The byte offset in Val that is interesting for the load query.
1240 static AvailableValueInBlock
get(BasicBlock
*BB
, Value
*V
,
1241 unsigned Offset
= 0) {
1242 AvailableValueInBlock Res
;
1244 Res
.Val
.setPointer(V
);
1245 Res
.Val
.setInt(SimpleVal
);
1246 Res
.Offset
= Offset
;
1250 static AvailableValueInBlock
getMI(BasicBlock
*BB
, MemIntrinsic
*MI
,
1251 unsigned Offset
= 0) {
1252 AvailableValueInBlock Res
;
1254 Res
.Val
.setPointer(MI
);
1255 Res
.Val
.setInt(MemIntrin
);
1256 Res
.Offset
= Offset
;
1260 bool isSimpleValue() const { return Val
.getInt() == SimpleVal
; }
1261 Value
*getSimpleValue() const {
1262 assert(isSimpleValue() && "Wrong accessor");
1263 return Val
.getPointer();
1266 MemIntrinsic
*getMemIntrinValue() const {
1267 assert(!isSimpleValue() && "Wrong accessor");
1268 return cast
<MemIntrinsic
>(Val
.getPointer());
1271 /// MaterializeAdjustedValue - Emit code into this block to adjust the value
1272 /// defined here to the specified type. This handles various coercion cases.
1273 Value
*MaterializeAdjustedValue(const Type
*LoadTy
,
1274 const TargetData
*TD
) const {
1276 if (isSimpleValue()) {
1277 Res
= getSimpleValue();
1278 if (Res
->getType() != LoadTy
) {
1279 assert(TD
&& "Need target data to handle type mismatch case");
1280 Res
= GetStoreValueForLoad(Res
, Offset
, LoadTy
, BB
->getTerminator(),
1283 DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset
<< " "
1284 << *getSimpleValue() << '\n'
1285 << *Res
<< '\n' << "\n\n\n");
1288 Res
= GetMemInstValueForLoad(getMemIntrinValue(), Offset
,
1289 LoadTy
, BB
->getTerminator(), *TD
);
1290 DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
1291 << " " << *getMemIntrinValue() << '\n'
1292 << *Res
<< '\n' << "\n\n\n");
1300 /// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
1301 /// construct SSA form, allowing us to eliminate LI. This returns the value
1302 /// that should be used at LI's definition site.
1303 static Value
*ConstructSSAForLoadSet(LoadInst
*LI
,
1304 SmallVectorImpl
<AvailableValueInBlock
> &ValuesPerBlock
,
1305 const TargetData
*TD
,
1306 const DominatorTree
&DT
,
1307 AliasAnalysis
*AA
) {
1308 // Check for the fully redundant, dominating load case. In this case, we can
1309 // just use the dominating value directly.
1310 if (ValuesPerBlock
.size() == 1 &&
1311 DT
.properlyDominates(ValuesPerBlock
[0].BB
, LI
->getParent()))
1312 return ValuesPerBlock
[0].MaterializeAdjustedValue(LI
->getType(), TD
);
1314 // Otherwise, we have to construct SSA form.
1315 SmallVector
<PHINode
*, 8> NewPHIs
;
1316 SSAUpdater
SSAUpdate(&NewPHIs
);
1317 SSAUpdate
.Initialize(LI
->getType(), LI
->getName());
1319 const Type
*LoadTy
= LI
->getType();
1321 for (unsigned i
= 0, e
= ValuesPerBlock
.size(); i
!= e
; ++i
) {
1322 const AvailableValueInBlock
&AV
= ValuesPerBlock
[i
];
1323 BasicBlock
*BB
= AV
.BB
;
1325 if (SSAUpdate
.HasValueForBlock(BB
))
1328 SSAUpdate
.AddAvailableValue(BB
, AV
.MaterializeAdjustedValue(LoadTy
, TD
));
1331 // Perform PHI construction.
1332 Value
*V
= SSAUpdate
.GetValueInMiddleOfBlock(LI
->getParent());
1334 // If new PHI nodes were created, notify alias analysis.
1335 if (V
->getType()->isPointerTy())
1336 for (unsigned i
= 0, e
= NewPHIs
.size(); i
!= e
; ++i
)
1337 AA
->copyValue(LI
, NewPHIs
[i
]);
1342 static bool isLifetimeStart(const Instruction
*Inst
) {
1343 if (const IntrinsicInst
* II
= dyn_cast
<IntrinsicInst
>(Inst
))
1344 return II
->getIntrinsicID() == Intrinsic::lifetime_start
;
1348 /// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
1349 /// non-local by performing PHI construction.
1350 bool GVN::processNonLocalLoad(LoadInst
*LI
,
1351 SmallVectorImpl
<Instruction
*> &toErase
) {
1352 // Find the non-local dependencies of the load.
1353 SmallVector
<NonLocalDepResult
, 64> Deps
;
1354 MD
->getNonLocalPointerDependency(LI
->getOperand(0), true, LI
->getParent(),
1356 //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: "
1357 // << Deps.size() << *LI << '\n');
1359 // If we had to process more than one hundred blocks to find the
1360 // dependencies, this load isn't worth worrying about. Optimizing
1361 // it will be too expensive.
1362 if (Deps
.size() > 100)
1365 // If we had a phi translation failure, we'll have a single entry which is a
1366 // clobber in the current block. Reject this early.
1367 if (Deps
.size() == 1 && Deps
[0].getResult().isClobber()) {
1369 dbgs() << "GVN: non-local load ";
1370 WriteAsOperand(dbgs(), LI
);
1371 dbgs() << " is clobbered by " << *Deps
[0].getResult().getInst() << '\n';
1376 // Filter out useless results (non-locals, etc). Keep track of the blocks
1377 // where we have a value available in repl, also keep track of whether we see
1378 // dependencies that produce an unknown value for the load (such as a call
1379 // that could potentially clobber the load).
1380 SmallVector
<AvailableValueInBlock
, 16> ValuesPerBlock
;
1381 SmallVector
<BasicBlock
*, 16> UnavailableBlocks
;
1383 const TargetData
*TD
= 0;
1385 for (unsigned i
= 0, e
= Deps
.size(); i
!= e
; ++i
) {
1386 BasicBlock
*DepBB
= Deps
[i
].getBB();
1387 MemDepResult DepInfo
= Deps
[i
].getResult();
1389 if (DepInfo
.isClobber()) {
1390 // The address being loaded in this non-local block may not be the same as
1391 // the pointer operand of the load if PHI translation occurs. Make sure
1392 // to consider the right address.
1393 Value
*Address
= Deps
[i
].getAddress();
1395 // If the dependence is to a store that writes to a superset of the bits
1396 // read by the load, we can extract the bits we need for the load from the
1398 if (StoreInst
*DepSI
= dyn_cast
<StoreInst
>(DepInfo
.getInst())) {
1400 TD
= getAnalysisIfAvailable
<TargetData
>();
1401 if (TD
&& Address
) {
1402 int Offset
= AnalyzeLoadFromClobberingStore(LI
->getType(), Address
,
1405 ValuesPerBlock
.push_back(AvailableValueInBlock::get(DepBB
,
1406 DepSI
->getOperand(0),
1413 // If the clobbering value is a memset/memcpy/memmove, see if we can
1414 // forward a value on from it.
1415 if (MemIntrinsic
*DepMI
= dyn_cast
<MemIntrinsic
>(DepInfo
.getInst())) {
1417 TD
= getAnalysisIfAvailable
<TargetData
>();
1418 if (TD
&& Address
) {
1419 int Offset
= AnalyzeLoadFromClobberingMemInst(LI
->getType(), Address
,
1422 ValuesPerBlock
.push_back(AvailableValueInBlock::getMI(DepBB
, DepMI
,
1429 UnavailableBlocks
.push_back(DepBB
);
1433 Instruction
*DepInst
= DepInfo
.getInst();
1435 // Loading the allocation -> undef.
1436 if (isa
<AllocaInst
>(DepInst
) || isMalloc(DepInst
) ||
1437 // Loading immediately after lifetime begin -> undef.
1438 isLifetimeStart(DepInst
)) {
1439 ValuesPerBlock
.push_back(AvailableValueInBlock::get(DepBB
,
1440 UndefValue::get(LI
->getType())));
1444 if (StoreInst
*S
= dyn_cast
<StoreInst
>(DepInst
)) {
1445 // Reject loads and stores that are to the same address but are of
1446 // different types if we have to.
1447 if (S
->getOperand(0)->getType() != LI
->getType()) {
1449 TD
= getAnalysisIfAvailable
<TargetData
>();
1451 // If the stored value is larger or equal to the loaded value, we can
1453 if (TD
== 0 || !CanCoerceMustAliasedValueToLoad(S
->getOperand(0),
1454 LI
->getType(), *TD
)) {
1455 UnavailableBlocks
.push_back(DepBB
);
1460 ValuesPerBlock
.push_back(AvailableValueInBlock::get(DepBB
,
1465 if (LoadInst
*LD
= dyn_cast
<LoadInst
>(DepInst
)) {
1466 // If the types mismatch and we can't handle it, reject reuse of the load.
1467 if (LD
->getType() != LI
->getType()) {
1469 TD
= getAnalysisIfAvailable
<TargetData
>();
1471 // If the stored value is larger or equal to the loaded value, we can
1473 if (TD
== 0 || !CanCoerceMustAliasedValueToLoad(LD
, LI
->getType(),*TD
)){
1474 UnavailableBlocks
.push_back(DepBB
);
1478 ValuesPerBlock
.push_back(AvailableValueInBlock::get(DepBB
, LD
));
1482 UnavailableBlocks
.push_back(DepBB
);
1486 // If we have no predecessors that produce a known value for this load, exit
1488 if (ValuesPerBlock
.empty()) return false;
1490 // If all of the instructions we depend on produce a known value for this
1491 // load, then it is fully redundant and we can use PHI insertion to compute
1492 // its value. Insert PHIs and remove the fully redundant value now.
1493 if (UnavailableBlocks
.empty()) {
1494 DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI
<< '\n');
1496 // Perform PHI construction.
1497 Value
*V
= ConstructSSAForLoadSet(LI
, ValuesPerBlock
, TD
, *DT
,
1498 VN
.getAliasAnalysis());
1499 LI
->replaceAllUsesWith(V
);
1501 if (isa
<PHINode
>(V
))
1503 if (V
->getType()->isPointerTy())
1504 MD
->invalidateCachedPointerInfo(V
);
1506 toErase
.push_back(LI
);
1511 if (!EnablePRE
|| !EnableLoadPRE
)
1514 // Okay, we have *some* definitions of the value. This means that the value
1515 // is available in some of our (transitive) predecessors. Lets think about
1516 // doing PRE of this load. This will involve inserting a new load into the
1517 // predecessor when it's not available. We could do this in general, but
1518 // prefer to not increase code size. As such, we only do this when we know
1519 // that we only have to insert *one* load (which means we're basically moving
1520 // the load, not inserting a new one).
1522 SmallPtrSet
<BasicBlock
*, 4> Blockers
;
1523 for (unsigned i
= 0, e
= UnavailableBlocks
.size(); i
!= e
; ++i
)
1524 Blockers
.insert(UnavailableBlocks
[i
]);
1526 // Lets find first basic block with more than one predecessor. Walk backwards
1527 // through predecessors if needed.
1528 BasicBlock
*LoadBB
= LI
->getParent();
1529 BasicBlock
*TmpBB
= LoadBB
;
1531 bool isSinglePred
= false;
1532 bool allSingleSucc
= true;
1533 while (TmpBB
->getSinglePredecessor()) {
1534 isSinglePred
= true;
1535 TmpBB
= TmpBB
->getSinglePredecessor();
1536 if (TmpBB
== LoadBB
) // Infinite (unreachable) loop.
1538 if (Blockers
.count(TmpBB
))
1541 // If any of these blocks has more than one successor (i.e. if the edge we
1542 // just traversed was critical), then there are other paths through this
1543 // block along which the load may not be anticipated. Hoisting the load
1544 // above this block would be adding the load to execution paths along
1545 // which it was not previously executed.
1546 if (TmpBB
->getTerminator()->getNumSuccessors() != 1)
1553 // FIXME: It is extremely unclear what this loop is doing, other than
1554 // artificially restricting loadpre.
1557 for (unsigned i
= 0, e
= ValuesPerBlock
.size(); i
!= e
; ++i
) {
1558 const AvailableValueInBlock
&AV
= ValuesPerBlock
[i
];
1559 if (AV
.isSimpleValue())
1560 // "Hot" Instruction is in some loop (because it dominates its dep.
1562 if (Instruction
*I
= dyn_cast
<Instruction
>(AV
.getSimpleValue()))
1563 if (DT
->dominates(LI
, I
)) {
1569 // We are interested only in "hot" instructions. We don't want to do any
1570 // mis-optimizations here.
1575 // Check to see how many predecessors have the loaded value fully
1577 DenseMap
<BasicBlock
*, Value
*> PredLoads
;
1578 DenseMap
<BasicBlock
*, char> FullyAvailableBlocks
;
1579 for (unsigned i
= 0, e
= ValuesPerBlock
.size(); i
!= e
; ++i
)
1580 FullyAvailableBlocks
[ValuesPerBlock
[i
].BB
] = true;
1581 for (unsigned i
= 0, e
= UnavailableBlocks
.size(); i
!= e
; ++i
)
1582 FullyAvailableBlocks
[UnavailableBlocks
[i
]] = false;
1584 SmallVector
<std::pair
<TerminatorInst
*, unsigned>, 4> NeedToSplit
;
1585 for (pred_iterator PI
= pred_begin(LoadBB
), E
= pred_end(LoadBB
);
1587 BasicBlock
*Pred
= *PI
;
1588 if (IsValueFullyAvailableInBlock(Pred
, FullyAvailableBlocks
)) {
1591 PredLoads
[Pred
] = 0;
1593 if (Pred
->getTerminator()->getNumSuccessors() != 1) {
1594 if (isa
<IndirectBrInst
>(Pred
->getTerminator())) {
1595 DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '"
1596 << Pred
->getName() << "': " << *LI
<< '\n');
1599 unsigned SuccNum
= GetSuccessorNumber(Pred
, LoadBB
);
1600 NeedToSplit
.push_back(std::make_pair(Pred
->getTerminator(), SuccNum
));
1603 if (!NeedToSplit
.empty()) {
1604 toSplit
.append(NeedToSplit
.begin(), NeedToSplit
.end());
1608 // Decide whether PRE is profitable for this load.
1609 unsigned NumUnavailablePreds
= PredLoads
.size();
1610 assert(NumUnavailablePreds
!= 0 &&
1611 "Fully available value should be eliminated above!");
1613 // If this load is unavailable in multiple predecessors, reject it.
1614 // FIXME: If we could restructure the CFG, we could make a common pred with
1615 // all the preds that don't have an available LI and insert a new load into
1617 if (NumUnavailablePreds
!= 1)
1620 // Check if the load can safely be moved to all the unavailable predecessors.
1621 bool CanDoPRE
= true;
1622 SmallVector
<Instruction
*, 8> NewInsts
;
1623 for (DenseMap
<BasicBlock
*, Value
*>::iterator I
= PredLoads
.begin(),
1624 E
= PredLoads
.end(); I
!= E
; ++I
) {
1625 BasicBlock
*UnavailablePred
= I
->first
;
1627 // Do PHI translation to get its value in the predecessor if necessary. The
1628 // returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
1630 // If all preds have a single successor, then we know it is safe to insert
1631 // the load on the pred (?!?), so we can insert code to materialize the
1632 // pointer if it is not available.
1633 PHITransAddr
Address(LI
->getOperand(0), TD
);
1635 if (allSingleSucc
) {
1636 LoadPtr
= Address
.PHITranslateWithInsertion(LoadBB
, UnavailablePred
,
1639 Address
.PHITranslateValue(LoadBB
, UnavailablePred
, DT
);
1640 LoadPtr
= Address
.getAddr();
1643 // If we couldn't find or insert a computation of this phi translated value,
1646 DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
1647 << *LI
->getOperand(0) << "\n");
1652 // Make sure it is valid to move this load here. We have to watch out for:
1653 // @1 = getelementptr (i8* p, ...
1654 // test p and branch if == 0
1656 // It is valid to have the getelementptr before the test, even if p can be 0,
1657 // as getelementptr only does address arithmetic.
1658 // If we are not pushing the value through any multiple-successor blocks
1659 // we do not have this case. Otherwise, check that the load is safe to
1660 // put anywhere; this can be improved, but should be conservatively safe.
1661 if (!allSingleSucc
&&
1662 // FIXME: REEVALUTE THIS.
1663 !isSafeToLoadUnconditionally(LoadPtr
,
1664 UnavailablePred
->getTerminator(),
1665 LI
->getAlignment(), TD
)) {
1670 I
->second
= LoadPtr
;
1674 while (!NewInsts
.empty())
1675 NewInsts
.pop_back_val()->eraseFromParent();
1679 // Okay, we can eliminate this load by inserting a reload in the predecessor
1680 // and using PHI construction to get the value in the other predecessors, do
1682 DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *LI
<< '\n');
1683 DEBUG(if (!NewInsts
.empty())
1684 dbgs() << "INSERTED " << NewInsts
.size() << " INSTS: "
1685 << *NewInsts
.back() << '\n');
1687 // Assign value numbers to the new instructions.
1688 for (unsigned i
= 0, e
= NewInsts
.size(); i
!= e
; ++i
) {
1689 // FIXME: We really _ought_ to insert these value numbers into their
1690 // parent's availability map. However, in doing so, we risk getting into
1691 // ordering issues. If a block hasn't been processed yet, we would be
1692 // marking a value as AVAIL-IN, which isn't what we intend.
1693 VN
.lookup_or_add(NewInsts
[i
]);
1696 for (DenseMap
<BasicBlock
*, Value
*>::iterator I
= PredLoads
.begin(),
1697 E
= PredLoads
.end(); I
!= E
; ++I
) {
1698 BasicBlock
*UnavailablePred
= I
->first
;
1699 Value
*LoadPtr
= I
->second
;
1701 Value
*NewLoad
= new LoadInst(LoadPtr
, LI
->getName()+".pre", false,
1703 UnavailablePred
->getTerminator());
1705 // Add the newly created load.
1706 ValuesPerBlock
.push_back(AvailableValueInBlock::get(UnavailablePred
,
1708 MD
->invalidateCachedPointerInfo(LoadPtr
);
1709 DEBUG(dbgs() << "GVN INSERTED " << *NewLoad
<< '\n');
1712 // Perform PHI construction.
1713 Value
*V
= ConstructSSAForLoadSet(LI
, ValuesPerBlock
, TD
, *DT
,
1714 VN
.getAliasAnalysis());
1715 LI
->replaceAllUsesWith(V
);
1716 if (isa
<PHINode
>(V
))
1718 if (V
->getType()->isPointerTy())
1719 MD
->invalidateCachedPointerInfo(V
);
1721 toErase
.push_back(LI
);
1726 /// processLoad - Attempt to eliminate a load, first by eliminating it
1727 /// locally, and then attempting non-local elimination if that fails.
1728 bool GVN::processLoad(LoadInst
*L
, SmallVectorImpl
<Instruction
*> &toErase
) {
1732 if (L
->isVolatile())
1735 // ... to a pointer that has been loaded from before...
1736 MemDepResult Dep
= MD
->getDependency(L
);
1738 // If the value isn't available, don't do anything!
1739 if (Dep
.isClobber()) {
1740 // Check to see if we have something like this:
1741 // store i32 123, i32* %P
1742 // %A = bitcast i32* %P to i8*
1743 // %B = gep i8* %A, i32 1
1746 // We could do that by recognizing if the clobber instructions are obviously
1747 // a common base + constant offset, and if the previous store (or memset)
1748 // completely covers this load. This sort of thing can happen in bitfield
1750 Value
*AvailVal
= 0;
1751 if (StoreInst
*DepSI
= dyn_cast
<StoreInst
>(Dep
.getInst()))
1752 if (const TargetData
*TD
= getAnalysisIfAvailable
<TargetData
>()) {
1753 int Offset
= AnalyzeLoadFromClobberingStore(L
->getType(),
1754 L
->getPointerOperand(),
1757 AvailVal
= GetStoreValueForLoad(DepSI
->getOperand(0), Offset
,
1758 L
->getType(), L
, *TD
);
1761 // If the clobbering value is a memset/memcpy/memmove, see if we can forward
1762 // a value on from it.
1763 if (MemIntrinsic
*DepMI
= dyn_cast
<MemIntrinsic
>(Dep
.getInst())) {
1764 if (const TargetData
*TD
= getAnalysisIfAvailable
<TargetData
>()) {
1765 int Offset
= AnalyzeLoadFromClobberingMemInst(L
->getType(),
1766 L
->getPointerOperand(),
1769 AvailVal
= GetMemInstValueForLoad(DepMI
, Offset
, L
->getType(), L
,*TD
);
1774 DEBUG(dbgs() << "GVN COERCED INST:\n" << *Dep
.getInst() << '\n'
1775 << *AvailVal
<< '\n' << *L
<< "\n\n\n");
1777 // Replace the load!
1778 L
->replaceAllUsesWith(AvailVal
);
1779 if (AvailVal
->getType()->isPointerTy())
1780 MD
->invalidateCachedPointerInfo(AvailVal
);
1782 toErase
.push_back(L
);
1788 // fast print dep, using operator<< on instruction would be too slow
1789 dbgs() << "GVN: load ";
1790 WriteAsOperand(dbgs(), L
);
1791 Instruction
*I
= Dep
.getInst();
1792 dbgs() << " is clobbered by " << *I
<< '\n';
1797 // If it is defined in another block, try harder.
1798 if (Dep
.isNonLocal())
1799 return processNonLocalLoad(L
, toErase
);
1801 Instruction
*DepInst
= Dep
.getInst();
1802 if (StoreInst
*DepSI
= dyn_cast
<StoreInst
>(DepInst
)) {
1803 Value
*StoredVal
= DepSI
->getOperand(0);
1805 // The store and load are to a must-aliased pointer, but they may not
1806 // actually have the same type. See if we know how to reuse the stored
1807 // value (depending on its type).
1808 const TargetData
*TD
= 0;
1809 if (StoredVal
->getType() != L
->getType()) {
1810 if ((TD
= getAnalysisIfAvailable
<TargetData
>())) {
1811 StoredVal
= CoerceAvailableValueToLoadType(StoredVal
, L
->getType(),
1816 DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI
<< '\n' << *StoredVal
1817 << '\n' << *L
<< "\n\n\n");
1824 L
->replaceAllUsesWith(StoredVal
);
1825 if (StoredVal
->getType()->isPointerTy())
1826 MD
->invalidateCachedPointerInfo(StoredVal
);
1828 toErase
.push_back(L
);
1833 if (LoadInst
*DepLI
= dyn_cast
<LoadInst
>(DepInst
)) {
1834 Value
*AvailableVal
= DepLI
;
1836 // The loads are of a must-aliased pointer, but they may not actually have
1837 // the same type. See if we know how to reuse the previously loaded value
1838 // (depending on its type).
1839 const TargetData
*TD
= 0;
1840 if (DepLI
->getType() != L
->getType()) {
1841 if ((TD
= getAnalysisIfAvailable
<TargetData
>())) {
1842 AvailableVal
= CoerceAvailableValueToLoadType(DepLI
, L
->getType(), L
,*TD
);
1843 if (AvailableVal
== 0)
1846 DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI
<< "\n" << *AvailableVal
1847 << "\n" << *L
<< "\n\n\n");
1854 L
->replaceAllUsesWith(AvailableVal
);
1855 if (DepLI
->getType()->isPointerTy())
1856 MD
->invalidateCachedPointerInfo(DepLI
);
1858 toErase
.push_back(L
);
1863 // If this load really doesn't depend on anything, then we must be loading an
1864 // undef value. This can happen when loading for a fresh allocation with no
1865 // intervening stores, for example.
1866 if (isa
<AllocaInst
>(DepInst
) || isMalloc(DepInst
)) {
1867 L
->replaceAllUsesWith(UndefValue::get(L
->getType()));
1869 toErase
.push_back(L
);
1874 // If this load occurs either right after a lifetime begin,
1875 // then the loaded value is undefined.
1876 if (IntrinsicInst
* II
= dyn_cast
<IntrinsicInst
>(DepInst
)) {
1877 if (II
->getIntrinsicID() == Intrinsic::lifetime_start
) {
1878 L
->replaceAllUsesWith(UndefValue::get(L
->getType()));
1880 toErase
.push_back(L
);
1889 Value
*GVN::lookupNumber(BasicBlock
*BB
, uint32_t num
) {
1890 DenseMap
<BasicBlock
*, ValueNumberScope
*>::iterator I
= localAvail
.find(BB
);
1891 if (I
== localAvail
.end())
1894 ValueNumberScope
*Locals
= I
->second
;
1896 DenseMap
<uint32_t, Value
*>::iterator I
= Locals
->table
.find(num
);
1897 if (I
!= Locals
->table
.end())
1899 Locals
= Locals
->parent
;
1906 /// processInstruction - When calculating availability, handle an instruction
1907 /// by inserting it into the appropriate sets
1908 bool GVN::processInstruction(Instruction
*I
,
1909 SmallVectorImpl
<Instruction
*> &toErase
) {
1910 // Ignore dbg info intrinsics.
1911 if (isa
<DbgInfoIntrinsic
>(I
))
1914 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(I
)) {
1915 bool Changed
= processLoad(LI
, toErase
);
1918 unsigned Num
= VN
.lookup_or_add(LI
);
1919 localAvail
[I
->getParent()]->table
.insert(std::make_pair(Num
, LI
));
1925 uint32_t NextNum
= VN
.getNextUnusedValueNumber();
1926 unsigned Num
= VN
.lookup_or_add(I
);
1928 if (BranchInst
*BI
= dyn_cast
<BranchInst
>(I
)) {
1929 localAvail
[I
->getParent()]->table
.insert(std::make_pair(Num
, I
));
1931 if (!BI
->isConditional() || isa
<Constant
>(BI
->getCondition()))
1934 Value
*BranchCond
= BI
->getCondition();
1935 uint32_t CondVN
= VN
.lookup_or_add(BranchCond
);
1937 BasicBlock
*TrueSucc
= BI
->getSuccessor(0);
1938 BasicBlock
*FalseSucc
= BI
->getSuccessor(1);
1940 if (TrueSucc
->getSinglePredecessor())
1941 localAvail
[TrueSucc
]->table
[CondVN
] =
1942 ConstantInt::getTrue(TrueSucc
->getContext());
1943 if (FalseSucc
->getSinglePredecessor())
1944 localAvail
[FalseSucc
]->table
[CondVN
] =
1945 ConstantInt::getFalse(TrueSucc
->getContext());
1949 // Allocations are always uniquely numbered, so we can save time and memory
1950 // by fast failing them.
1951 } else if (isa
<AllocaInst
>(I
) || isa
<TerminatorInst
>(I
)) {
1952 localAvail
[I
->getParent()]->table
.insert(std::make_pair(Num
, I
));
1956 // Collapse PHI nodes
1957 if (PHINode
* p
= dyn_cast
<PHINode
>(I
)) {
1958 Value
*constVal
= CollapsePhi(p
);
1961 p
->replaceAllUsesWith(constVal
);
1962 if (MD
&& constVal
->getType()->isPointerTy())
1963 MD
->invalidateCachedPointerInfo(constVal
);
1966 toErase
.push_back(p
);
1968 localAvail
[I
->getParent()]->table
.insert(std::make_pair(Num
, I
));
1971 // If the number we were assigned was a brand new VN, then we don't
1972 // need to do a lookup to see if the number already exists
1973 // somewhere in the domtree: it can't!
1974 } else if (Num
== NextNum
) {
1975 localAvail
[I
->getParent()]->table
.insert(std::make_pair(Num
, I
));
1977 // Perform fast-path value-number based elimination of values inherited from
1979 } else if (Value
*repl
= lookupNumber(I
->getParent(), Num
)) {
1982 I
->replaceAllUsesWith(repl
);
1983 if (MD
&& repl
->getType()->isPointerTy())
1984 MD
->invalidateCachedPointerInfo(repl
);
1985 toErase
.push_back(I
);
1989 localAvail
[I
->getParent()]->table
.insert(std::make_pair(Num
, I
));
1995 /// runOnFunction - This is the main transformation entry point for a function.
1996 bool GVN::runOnFunction(Function
& F
) {
1998 MD
= &getAnalysis
<MemoryDependenceAnalysis
>();
1999 DT
= &getAnalysis
<DominatorTree
>();
2000 VN
.setAliasAnalysis(&getAnalysis
<AliasAnalysis
>());
2004 bool Changed
= false;
2005 bool ShouldContinue
= true;
2007 // Merge unconditional branches, allowing PRE to catch more
2008 // optimization opportunities.
2009 for (Function::iterator FI
= F
.begin(), FE
= F
.end(); FI
!= FE
; ) {
2010 BasicBlock
*BB
= FI
;
2012 bool removedBlock
= MergeBlockIntoPredecessor(BB
, this);
2013 if (removedBlock
) ++NumGVNBlocks
;
2015 Changed
|= removedBlock
;
2018 unsigned Iteration
= 0;
2020 while (ShouldContinue
) {
2021 DEBUG(dbgs() << "GVN iteration: " << Iteration
<< "\n");
2022 ShouldContinue
= iterateOnFunction(F
);
2023 if (splitCriticalEdges())
2024 ShouldContinue
= true;
2025 Changed
|= ShouldContinue
;
2030 bool PREChanged
= true;
2031 while (PREChanged
) {
2032 PREChanged
= performPRE(F
);
2033 Changed
|= PREChanged
;
2036 // FIXME: Should perform GVN again after PRE does something. PRE can move
2037 // computations into blocks where they become fully redundant. Note that
2038 // we can't do this until PRE's critical edge splitting updates memdep.
2039 // Actually, when this happens, we should just fully integrate PRE into GVN.
2041 cleanupGlobalSets();
2047 bool GVN::processBlock(BasicBlock
*BB
) {
2048 // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and
2049 // incrementing BI before processing an instruction).
2050 SmallVector
<Instruction
*, 8> toErase
;
2051 bool ChangedFunction
= false;
2053 for (BasicBlock::iterator BI
= BB
->begin(), BE
= BB
->end();
2055 ChangedFunction
|= processInstruction(BI
, toErase
);
2056 if (toErase
.empty()) {
2061 // If we need some instructions deleted, do it now.
2062 NumGVNInstr
+= toErase
.size();
2064 // Avoid iterator invalidation.
2065 bool AtStart
= BI
== BB
->begin();
2069 for (SmallVector
<Instruction
*, 4>::iterator I
= toErase
.begin(),
2070 E
= toErase
.end(); I
!= E
; ++I
) {
2071 DEBUG(dbgs() << "GVN removed: " << **I
<< '\n');
2072 if (MD
) MD
->removeInstruction(*I
);
2073 (*I
)->eraseFromParent();
2074 DEBUG(verifyRemoved(*I
));
2084 return ChangedFunction
;
2087 /// performPRE - Perform a purely local form of PRE that looks for diamond
2088 /// control flow patterns and attempts to perform simple PRE at the join point.
2089 bool GVN::performPRE(Function
&F
) {
2090 bool Changed
= false;
2091 DenseMap
<BasicBlock
*, Value
*> predMap
;
2092 for (df_iterator
<BasicBlock
*> DI
= df_begin(&F
.getEntryBlock()),
2093 DE
= df_end(&F
.getEntryBlock()); DI
!= DE
; ++DI
) {
2094 BasicBlock
*CurrentBlock
= *DI
;
2096 // Nothing to PRE in the entry block.
2097 if (CurrentBlock
== &F
.getEntryBlock()) continue;
2099 for (BasicBlock::iterator BI
= CurrentBlock
->begin(),
2100 BE
= CurrentBlock
->end(); BI
!= BE
; ) {
2101 Instruction
*CurInst
= BI
++;
2103 if (isa
<AllocaInst
>(CurInst
) ||
2104 isa
<TerminatorInst
>(CurInst
) || isa
<PHINode
>(CurInst
) ||
2105 CurInst
->getType()->isVoidTy() ||
2106 CurInst
->mayReadFromMemory() || CurInst
->mayHaveSideEffects() ||
2107 isa
<DbgInfoIntrinsic
>(CurInst
))
2110 // We don't currently value number ANY inline asm calls.
2111 if (CallInst
*CallI
= dyn_cast
<CallInst
>(CurInst
))
2112 if (CallI
->isInlineAsm())
2115 uint32_t ValNo
= VN
.lookup(CurInst
);
2117 // Look for the predecessors for PRE opportunities. We're
2118 // only trying to solve the basic diamond case, where
2119 // a value is computed in the successor and one predecessor,
2120 // but not the other. We also explicitly disallow cases
2121 // where the successor is its own predecessor, because they're
2122 // more complicated to get right.
2123 unsigned NumWith
= 0;
2124 unsigned NumWithout
= 0;
2125 BasicBlock
*PREPred
= 0;
2128 for (pred_iterator PI
= pred_begin(CurrentBlock
),
2129 PE
= pred_end(CurrentBlock
); PI
!= PE
; ++PI
) {
2130 BasicBlock
*P
= *PI
;
2131 // We're not interested in PRE where the block is its
2132 // own predecessor, or in blocks with predecessors
2133 // that are not reachable.
2134 if (P
== CurrentBlock
) {
2137 } else if (!localAvail
.count(P
)) {
2142 DenseMap
<uint32_t, Value
*>::iterator predV
=
2143 localAvail
[P
]->table
.find(ValNo
);
2144 if (predV
== localAvail
[P
]->table
.end()) {
2147 } else if (predV
->second
== CurInst
) {
2150 predMap
[P
] = predV
->second
;
2155 // Don't do PRE when it might increase code size, i.e. when
2156 // we would need to insert instructions in more than one pred.
2157 if (NumWithout
!= 1 || NumWith
== 0)
2160 // Don't do PRE across indirect branch.
2161 if (isa
<IndirectBrInst
>(PREPred
->getTerminator()))
2164 // We can't do PRE safely on a critical edge, so instead we schedule
2165 // the edge to be split and perform the PRE the next time we iterate
2167 unsigned SuccNum
= GetSuccessorNumber(PREPred
, CurrentBlock
);
2168 if (isCriticalEdge(PREPred
->getTerminator(), SuccNum
)) {
2169 toSplit
.push_back(std::make_pair(PREPred
->getTerminator(), SuccNum
));
2173 // Instantiate the expression in the predecessor that lacked it.
2174 // Because we are going top-down through the block, all value numbers
2175 // will be available in the predecessor by the time we need them. Any
2176 // that weren't originally present will have been instantiated earlier
2178 Instruction
*PREInstr
= CurInst
->clone();
2179 bool success
= true;
2180 for (unsigned i
= 0, e
= CurInst
->getNumOperands(); i
!= e
; ++i
) {
2181 Value
*Op
= PREInstr
->getOperand(i
);
2182 if (isa
<Argument
>(Op
) || isa
<Constant
>(Op
) || isa
<GlobalValue
>(Op
))
2185 if (Value
*V
= lookupNumber(PREPred
, VN
.lookup(Op
))) {
2186 PREInstr
->setOperand(i
, V
);
2193 // Fail out if we encounter an operand that is not available in
2194 // the PRE predecessor. This is typically because of loads which
2195 // are not value numbered precisely.
2198 DEBUG(verifyRemoved(PREInstr
));
2202 PREInstr
->insertBefore(PREPred
->getTerminator());
2203 PREInstr
->setName(CurInst
->getName() + ".pre");
2204 predMap
[PREPred
] = PREInstr
;
2205 VN
.add(PREInstr
, ValNo
);
2208 // Update the availability map to include the new instruction.
2209 localAvail
[PREPred
]->table
.insert(std::make_pair(ValNo
, PREInstr
));
2211 // Create a PHI to make the value available in this block.
2212 PHINode
* Phi
= PHINode::Create(CurInst
->getType(),
2213 CurInst
->getName() + ".pre-phi",
2214 CurrentBlock
->begin());
2215 for (pred_iterator PI
= pred_begin(CurrentBlock
),
2216 PE
= pred_end(CurrentBlock
); PI
!= PE
; ++PI
) {
2217 BasicBlock
*P
= *PI
;
2218 Phi
->addIncoming(predMap
[P
], P
);
2222 localAvail
[CurrentBlock
]->table
[ValNo
] = Phi
;
2224 CurInst
->replaceAllUsesWith(Phi
);
2225 if (MD
&& Phi
->getType()->isPointerTy())
2226 MD
->invalidateCachedPointerInfo(Phi
);
2229 DEBUG(dbgs() << "GVN PRE removed: " << *CurInst
<< '\n');
2230 if (MD
) MD
->removeInstruction(CurInst
);
2231 CurInst
->eraseFromParent();
2232 DEBUG(verifyRemoved(CurInst
));
2237 if (splitCriticalEdges())
2243 /// splitCriticalEdges - Split critical edges found during the previous
2244 /// iteration that may enable further optimization.
2245 bool GVN::splitCriticalEdges() {
2246 if (toSplit
.empty())
2249 std::pair
<TerminatorInst
*, unsigned> Edge
= toSplit
.pop_back_val();
2250 SplitCriticalEdge(Edge
.first
, Edge
.second
, this);
2251 } while (!toSplit
.empty());
2252 if (MD
) MD
->invalidateCachedPredecessors();
2256 /// iterateOnFunction - Executes one iteration of GVN
2257 bool GVN::iterateOnFunction(Function
&F
) {
2258 cleanupGlobalSets();
2260 for (df_iterator
<DomTreeNode
*> DI
= df_begin(DT
->getRootNode()),
2261 DE
= df_end(DT
->getRootNode()); DI
!= DE
; ++DI
) {
2263 localAvail
[DI
->getBlock()] =
2264 new ValueNumberScope(localAvail
[DI
->getIDom()->getBlock()]);
2266 localAvail
[DI
->getBlock()] = new ValueNumberScope(0);
2269 // Top-down walk of the dominator tree
2270 bool Changed
= false;
2272 // Needed for value numbering with phi construction to work.
2273 ReversePostOrderTraversal
<Function
*> RPOT(&F
);
2274 for (ReversePostOrderTraversal
<Function
*>::rpo_iterator RI
= RPOT
.begin(),
2275 RE
= RPOT
.end(); RI
!= RE
; ++RI
)
2276 Changed
|= processBlock(*RI
);
2278 for (df_iterator
<DomTreeNode
*> DI
= df_begin(DT
->getRootNode()),
2279 DE
= df_end(DT
->getRootNode()); DI
!= DE
; ++DI
)
2280 Changed
|= processBlock(DI
->getBlock());
2286 void GVN::cleanupGlobalSets() {
2289 for (DenseMap
<BasicBlock
*, ValueNumberScope
*>::iterator
2290 I
= localAvail
.begin(), E
= localAvail
.end(); I
!= E
; ++I
)
2295 /// verifyRemoved - Verify that the specified instruction does not occur in our
2296 /// internal data structures.
2297 void GVN::verifyRemoved(const Instruction
*Inst
) const {
2298 VN
.verifyRemoved(Inst
);
2300 // Walk through the value number scope to make sure the instruction isn't
2301 // ferreted away in it.
2302 for (DenseMap
<BasicBlock
*, ValueNumberScope
*>::const_iterator
2303 I
= localAvail
.begin(), E
= localAvail
.end(); I
!= E
; ++I
) {
2304 const ValueNumberScope
*VNS
= I
->second
;
2307 for (DenseMap
<uint32_t, Value
*>::const_iterator
2308 II
= VNS
->table
.begin(), IE
= VNS
->table
.end(); II
!= IE
; ++II
) {
2309 assert(II
->second
!= Inst
&& "Inst still in value numbering scope!");