1 //===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This pass performs global value numbering to eliminate fully redundant
11 // instructions. It also performs simple dead load elimination.
13 // Note that this pass does the value numbering itself; it does not use the
14 // ValueNumbering analysis passes.
16 //===----------------------------------------------------------------------===//
18 #define DEBUG_TYPE "gvn"
19 #include "llvm/Transforms/Scalar.h"
20 #include "llvm/BasicBlock.h"
21 #include "llvm/Constants.h"
22 #include "llvm/DerivedTypes.h"
23 #include "llvm/Function.h"
24 #include "llvm/IntrinsicInst.h"
25 #include "llvm/LLVMContext.h"
26 #include "llvm/Value.h"
27 #include "llvm/ADT/DenseMap.h"
28 #include "llvm/ADT/DepthFirstIterator.h"
29 #include "llvm/ADT/PostOrderIterator.h"
30 #include "llvm/ADT/SmallPtrSet.h"
31 #include "llvm/ADT/SmallVector.h"
32 #include "llvm/ADT/Statistic.h"
33 #include "llvm/Analysis/Dominators.h"
34 #include "llvm/Analysis/AliasAnalysis.h"
35 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
36 #include "llvm/Support/CFG.h"
37 #include "llvm/Support/CommandLine.h"
38 #include "llvm/Support/Compiler.h"
39 #include "llvm/Support/Debug.h"
40 #include "llvm/Support/ErrorHandling.h"
41 #include "llvm/Support/raw_ostream.h"
42 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
43 #include "llvm/Transforms/Utils/Local.h"
47 STATISTIC(NumGVNInstr
, "Number of instructions deleted");
48 STATISTIC(NumGVNLoad
, "Number of loads deleted");
49 STATISTIC(NumGVNPRE
, "Number of instructions PRE'd");
50 STATISTIC(NumGVNBlocks
, "Number of blocks merged");
51 STATISTIC(NumPRELoad
, "Number of loads PRE'd");
53 static cl::opt
<bool> EnablePRE("enable-pre",
54 cl::init(true), cl::Hidden
);
55 static cl::opt
<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
57 //===----------------------------------------------------------------------===//
59 //===----------------------------------------------------------------------===//
61 /// This class holds the mapping between values and value numbers. It is used
62 /// as an efficient mechanism to determine the expression-wise equivalence of
65 struct VISIBILITY_HIDDEN Expression
{
66 enum ExpressionOpcode
{ ADD
, FADD
, SUB
, FSUB
, MUL
, FMUL
,
67 UDIV
, SDIV
, FDIV
, UREM
, SREM
,
68 FREM
, SHL
, LSHR
, ASHR
, AND
, OR
, XOR
, ICMPEQ
,
69 ICMPNE
, ICMPUGT
, ICMPUGE
, ICMPULT
, ICMPULE
,
70 ICMPSGT
, ICMPSGE
, ICMPSLT
, ICMPSLE
, FCMPOEQ
,
71 FCMPOGT
, FCMPOGE
, FCMPOLT
, FCMPOLE
, FCMPONE
,
72 FCMPORD
, FCMPUNO
, FCMPUEQ
, FCMPUGT
, FCMPUGE
,
73 FCMPULT
, FCMPULE
, FCMPUNE
, EXTRACT
, INSERT
,
74 SHUFFLE
, SELECT
, TRUNC
, ZEXT
, SEXT
, FPTOUI
,
75 FPTOSI
, UITOFP
, SITOFP
, FPTRUNC
, FPEXT
,
76 PTRTOINT
, INTTOPTR
, BITCAST
, GEP
, CALL
, CONSTANT
,
79 ExpressionOpcode opcode
;
84 SmallVector
<uint32_t, 4> varargs
;
88 Expression(ExpressionOpcode o
) : opcode(o
) { }
90 bool operator==(const Expression
&other
) const {
91 if (opcode
!= other
.opcode
)
93 else if (opcode
== EMPTY
|| opcode
== TOMBSTONE
)
95 else if (type
!= other
.type
)
97 else if (function
!= other
.function
)
99 else if (firstVN
!= other
.firstVN
)
101 else if (secondVN
!= other
.secondVN
)
103 else if (thirdVN
!= other
.thirdVN
)
106 if (varargs
.size() != other
.varargs
.size())
109 for (size_t i
= 0; i
< varargs
.size(); ++i
)
110 if (varargs
[i
] != other
.varargs
[i
])
117 bool operator!=(const Expression
&other
) const {
118 return !(*this == other
);
122 class VISIBILITY_HIDDEN ValueTable
{
124 DenseMap
<Value
*, uint32_t> valueNumbering
;
125 DenseMap
<Expression
, uint32_t> expressionNumbering
;
127 MemoryDependenceAnalysis
* MD
;
130 uint32_t nextValueNumber
;
132 Expression::ExpressionOpcode
getOpcode(BinaryOperator
* BO
);
133 Expression::ExpressionOpcode
getOpcode(CmpInst
* C
);
134 Expression::ExpressionOpcode
getOpcode(CastInst
* C
);
135 Expression
create_expression(BinaryOperator
* BO
);
136 Expression
create_expression(CmpInst
* C
);
137 Expression
create_expression(ShuffleVectorInst
* V
);
138 Expression
create_expression(ExtractElementInst
* C
);
139 Expression
create_expression(InsertElementInst
* V
);
140 Expression
create_expression(SelectInst
* V
);
141 Expression
create_expression(CastInst
* C
);
142 Expression
create_expression(GetElementPtrInst
* G
);
143 Expression
create_expression(CallInst
* C
);
144 Expression
create_expression(Constant
* C
);
146 ValueTable() : nextValueNumber(1) { }
147 uint32_t lookup_or_add(Value
* V
);
148 uint32_t lookup(Value
* V
) const;
149 void add(Value
* V
, uint32_t num
);
151 void erase(Value
* v
);
153 void setAliasAnalysis(AliasAnalysis
* A
) { AA
= A
; }
154 AliasAnalysis
*getAliasAnalysis() const { return AA
; }
155 void setMemDep(MemoryDependenceAnalysis
* M
) { MD
= M
; }
156 void setDomTree(DominatorTree
* D
) { DT
= D
; }
157 uint32_t getNextUnusedValueNumber() { return nextValueNumber
; }
158 void verifyRemoved(const Value
*) const;
163 template <> struct DenseMapInfo
<Expression
> {
164 static inline Expression
getEmptyKey() {
165 return Expression(Expression::EMPTY
);
168 static inline Expression
getTombstoneKey() {
169 return Expression(Expression::TOMBSTONE
);
172 static unsigned getHashValue(const Expression e
) {
173 unsigned hash
= e
.opcode
;
175 hash
= e
.firstVN
+ hash
* 37;
176 hash
= e
.secondVN
+ hash
* 37;
177 hash
= e
.thirdVN
+ hash
* 37;
179 hash
= ((unsigned)((uintptr_t)e
.type
>> 4) ^
180 (unsigned)((uintptr_t)e
.type
>> 9)) +
183 for (SmallVector
<uint32_t, 4>::const_iterator I
= e
.varargs
.begin(),
184 E
= e
.varargs
.end(); I
!= E
; ++I
)
185 hash
= *I
+ hash
* 37;
187 hash
= ((unsigned)((uintptr_t)e
.function
>> 4) ^
188 (unsigned)((uintptr_t)e
.function
>> 9)) +
193 static bool isEqual(const Expression
&LHS
, const Expression
&RHS
) {
196 static bool isPod() { return true; }
200 //===----------------------------------------------------------------------===//
201 // ValueTable Internal Functions
202 //===----------------------------------------------------------------------===//
203 Expression::ExpressionOpcode
ValueTable::getOpcode(BinaryOperator
* BO
) {
204 switch(BO
->getOpcode()) {
205 default: // THIS SHOULD NEVER HAPPEN
206 llvm_unreachable("Binary operator with unknown opcode?");
207 case Instruction::Add
: return Expression::ADD
;
208 case Instruction::FAdd
: return Expression::FADD
;
209 case Instruction::Sub
: return Expression::SUB
;
210 case Instruction::FSub
: return Expression::FSUB
;
211 case Instruction::Mul
: return Expression::MUL
;
212 case Instruction::FMul
: return Expression::FMUL
;
213 case Instruction::UDiv
: return Expression::UDIV
;
214 case Instruction::SDiv
: return Expression::SDIV
;
215 case Instruction::FDiv
: return Expression::FDIV
;
216 case Instruction::URem
: return Expression::UREM
;
217 case Instruction::SRem
: return Expression::SREM
;
218 case Instruction::FRem
: return Expression::FREM
;
219 case Instruction::Shl
: return Expression::SHL
;
220 case Instruction::LShr
: return Expression::LSHR
;
221 case Instruction::AShr
: return Expression::ASHR
;
222 case Instruction::And
: return Expression::AND
;
223 case Instruction::Or
: return Expression::OR
;
224 case Instruction::Xor
: return Expression::XOR
;
228 Expression::ExpressionOpcode
ValueTable::getOpcode(CmpInst
* C
) {
229 if (isa
<ICmpInst
>(C
)) {
230 switch (C
->getPredicate()) {
231 default: // THIS SHOULD NEVER HAPPEN
232 llvm_unreachable("Comparison with unknown predicate?");
233 case ICmpInst::ICMP_EQ
: return Expression::ICMPEQ
;
234 case ICmpInst::ICMP_NE
: return Expression::ICMPNE
;
235 case ICmpInst::ICMP_UGT
: return Expression::ICMPUGT
;
236 case ICmpInst::ICMP_UGE
: return Expression::ICMPUGE
;
237 case ICmpInst::ICMP_ULT
: return Expression::ICMPULT
;
238 case ICmpInst::ICMP_ULE
: return Expression::ICMPULE
;
239 case ICmpInst::ICMP_SGT
: return Expression::ICMPSGT
;
240 case ICmpInst::ICMP_SGE
: return Expression::ICMPSGE
;
241 case ICmpInst::ICMP_SLT
: return Expression::ICMPSLT
;
242 case ICmpInst::ICMP_SLE
: return Expression::ICMPSLE
;
245 switch (C
->getPredicate()) {
246 default: // THIS SHOULD NEVER HAPPEN
247 llvm_unreachable("Comparison with unknown predicate?");
248 case FCmpInst::FCMP_OEQ
: return Expression::FCMPOEQ
;
249 case FCmpInst::FCMP_OGT
: return Expression::FCMPOGT
;
250 case FCmpInst::FCMP_OGE
: return Expression::FCMPOGE
;
251 case FCmpInst::FCMP_OLT
: return Expression::FCMPOLT
;
252 case FCmpInst::FCMP_OLE
: return Expression::FCMPOLE
;
253 case FCmpInst::FCMP_ONE
: return Expression::FCMPONE
;
254 case FCmpInst::FCMP_ORD
: return Expression::FCMPORD
;
255 case FCmpInst::FCMP_UNO
: return Expression::FCMPUNO
;
256 case FCmpInst::FCMP_UEQ
: return Expression::FCMPUEQ
;
257 case FCmpInst::FCMP_UGT
: return Expression::FCMPUGT
;
258 case FCmpInst::FCMP_UGE
: return Expression::FCMPUGE
;
259 case FCmpInst::FCMP_ULT
: return Expression::FCMPULT
;
260 case FCmpInst::FCMP_ULE
: return Expression::FCMPULE
;
261 case FCmpInst::FCMP_UNE
: return Expression::FCMPUNE
;
266 Expression::ExpressionOpcode
ValueTable::getOpcode(CastInst
* C
) {
267 switch(C
->getOpcode()) {
268 default: // THIS SHOULD NEVER HAPPEN
269 llvm_unreachable("Cast operator with unknown opcode?");
270 case Instruction::Trunc
: return Expression::TRUNC
;
271 case Instruction::ZExt
: return Expression::ZEXT
;
272 case Instruction::SExt
: return Expression::SEXT
;
273 case Instruction::FPToUI
: return Expression::FPTOUI
;
274 case Instruction::FPToSI
: return Expression::FPTOSI
;
275 case Instruction::UIToFP
: return Expression::UITOFP
;
276 case Instruction::SIToFP
: return Expression::SITOFP
;
277 case Instruction::FPTrunc
: return Expression::FPTRUNC
;
278 case Instruction::FPExt
: return Expression::FPEXT
;
279 case Instruction::PtrToInt
: return Expression::PTRTOINT
;
280 case Instruction::IntToPtr
: return Expression::INTTOPTR
;
281 case Instruction::BitCast
: return Expression::BITCAST
;
285 Expression
ValueTable::create_expression(CallInst
* C
) {
288 e
.type
= C
->getType();
292 e
.function
= C
->getCalledFunction();
293 e
.opcode
= Expression::CALL
;
295 for (CallInst::op_iterator I
= C
->op_begin()+1, E
= C
->op_end();
297 e
.varargs
.push_back(lookup_or_add(*I
));
302 Expression
ValueTable::create_expression(BinaryOperator
* BO
) {
305 e
.firstVN
= lookup_or_add(BO
->getOperand(0));
306 e
.secondVN
= lookup_or_add(BO
->getOperand(1));
309 e
.type
= BO
->getType();
310 e
.opcode
= getOpcode(BO
);
315 Expression
ValueTable::create_expression(CmpInst
* C
) {
318 e
.firstVN
= lookup_or_add(C
->getOperand(0));
319 e
.secondVN
= lookup_or_add(C
->getOperand(1));
322 e
.type
= C
->getType();
323 e
.opcode
= getOpcode(C
);
328 Expression
ValueTable::create_expression(CastInst
* C
) {
331 e
.firstVN
= lookup_or_add(C
->getOperand(0));
335 e
.type
= C
->getType();
336 e
.opcode
= getOpcode(C
);
341 Expression
ValueTable::create_expression(ShuffleVectorInst
* S
) {
344 e
.firstVN
= lookup_or_add(S
->getOperand(0));
345 e
.secondVN
= lookup_or_add(S
->getOperand(1));
346 e
.thirdVN
= lookup_or_add(S
->getOperand(2));
348 e
.type
= S
->getType();
349 e
.opcode
= Expression::SHUFFLE
;
354 Expression
ValueTable::create_expression(ExtractElementInst
* E
) {
357 e
.firstVN
= lookup_or_add(E
->getOperand(0));
358 e
.secondVN
= lookup_or_add(E
->getOperand(1));
361 e
.type
= E
->getType();
362 e
.opcode
= Expression::EXTRACT
;
367 Expression
ValueTable::create_expression(InsertElementInst
* I
) {
370 e
.firstVN
= lookup_or_add(I
->getOperand(0));
371 e
.secondVN
= lookup_or_add(I
->getOperand(1));
372 e
.thirdVN
= lookup_or_add(I
->getOperand(2));
374 e
.type
= I
->getType();
375 e
.opcode
= Expression::INSERT
;
380 Expression
ValueTable::create_expression(SelectInst
* I
) {
383 e
.firstVN
= lookup_or_add(I
->getCondition());
384 e
.secondVN
= lookup_or_add(I
->getTrueValue());
385 e
.thirdVN
= lookup_or_add(I
->getFalseValue());
387 e
.type
= I
->getType();
388 e
.opcode
= Expression::SELECT
;
393 Expression
ValueTable::create_expression(GetElementPtrInst
* G
) {
396 e
.firstVN
= lookup_or_add(G
->getPointerOperand());
400 e
.type
= G
->getType();
401 e
.opcode
= Expression::GEP
;
403 for (GetElementPtrInst::op_iterator I
= G
->idx_begin(), E
= G
->idx_end();
405 e
.varargs
.push_back(lookup_or_add(*I
));
410 //===----------------------------------------------------------------------===//
411 // ValueTable External Functions
412 //===----------------------------------------------------------------------===//
414 /// add - Insert a value into the table with a specified value number.
415 void ValueTable::add(Value
* V
, uint32_t num
) {
416 valueNumbering
.insert(std::make_pair(V
, num
));
419 /// lookup_or_add - Returns the value number for the specified value, assigning
420 /// it a new number if it did not have one before.
421 uint32_t ValueTable::lookup_or_add(Value
* V
) {
422 DenseMap
<Value
*, uint32_t>::iterator VI
= valueNumbering
.find(V
);
423 if (VI
!= valueNumbering
.end())
426 if (CallInst
* C
= dyn_cast
<CallInst
>(V
)) {
427 if (AA
->doesNotAccessMemory(C
)) {
428 Expression e
= create_expression(C
);
430 DenseMap
<Expression
, uint32_t>::iterator EI
= expressionNumbering
.find(e
);
431 if (EI
!= expressionNumbering
.end()) {
432 valueNumbering
.insert(std::make_pair(V
, EI
->second
));
435 expressionNumbering
.insert(std::make_pair(e
, nextValueNumber
));
436 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
438 return nextValueNumber
++;
440 } else if (AA
->onlyReadsMemory(C
)) {
441 Expression e
= create_expression(C
);
443 if (expressionNumbering
.find(e
) == expressionNumbering
.end()) {
444 expressionNumbering
.insert(std::make_pair(e
, nextValueNumber
));
445 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
446 return nextValueNumber
++;
449 MemDepResult local_dep
= MD
->getDependency(C
);
451 if (!local_dep
.isDef() && !local_dep
.isNonLocal()) {
452 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
453 return nextValueNumber
++;
456 if (local_dep
.isDef()) {
457 CallInst
* local_cdep
= cast
<CallInst
>(local_dep
.getInst());
459 if (local_cdep
->getNumOperands() != C
->getNumOperands()) {
460 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
461 return nextValueNumber
++;
464 for (unsigned i
= 1; i
< C
->getNumOperands(); ++i
) {
465 uint32_t c_vn
= lookup_or_add(C
->getOperand(i
));
466 uint32_t cd_vn
= lookup_or_add(local_cdep
->getOperand(i
));
468 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
469 return nextValueNumber
++;
473 uint32_t v
= lookup_or_add(local_cdep
);
474 valueNumbering
.insert(std::make_pair(V
, v
));
479 const MemoryDependenceAnalysis::NonLocalDepInfo
&deps
=
480 MD
->getNonLocalCallDependency(CallSite(C
));
481 // FIXME: call/call dependencies for readonly calls should return def, not
482 // clobber! Move the checking logic to MemDep!
485 // Check to see if we have a single dominating call instruction that is
487 for (unsigned i
= 0, e
= deps
.size(); i
!= e
; ++i
) {
488 const MemoryDependenceAnalysis::NonLocalDepEntry
*I
= &deps
[i
];
489 // Ignore non-local dependencies.
490 if (I
->second
.isNonLocal())
493 // We don't handle non-depedencies. If we already have a call, reject
494 // instruction dependencies.
495 if (I
->second
.isClobber() || cdep
!= 0) {
500 CallInst
*NonLocalDepCall
= dyn_cast
<CallInst
>(I
->second
.getInst());
501 // FIXME: All duplicated with non-local case.
502 if (NonLocalDepCall
&& DT
->properlyDominates(I
->first
, C
->getParent())){
503 cdep
= NonLocalDepCall
;
512 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
513 return nextValueNumber
++;
516 if (cdep
->getNumOperands() != C
->getNumOperands()) {
517 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
518 return nextValueNumber
++;
520 for (unsigned i
= 1; i
< C
->getNumOperands(); ++i
) {
521 uint32_t c_vn
= lookup_or_add(C
->getOperand(i
));
522 uint32_t cd_vn
= lookup_or_add(cdep
->getOperand(i
));
524 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
525 return nextValueNumber
++;
529 uint32_t v
= lookup_or_add(cdep
);
530 valueNumbering
.insert(std::make_pair(V
, v
));
534 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
535 return nextValueNumber
++;
537 } else if (BinaryOperator
* BO
= dyn_cast
<BinaryOperator
>(V
)) {
538 Expression e
= create_expression(BO
);
540 DenseMap
<Expression
, uint32_t>::iterator EI
= expressionNumbering
.find(e
);
541 if (EI
!= expressionNumbering
.end()) {
542 valueNumbering
.insert(std::make_pair(V
, EI
->second
));
545 expressionNumbering
.insert(std::make_pair(e
, nextValueNumber
));
546 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
548 return nextValueNumber
++;
550 } else if (CmpInst
* C
= dyn_cast
<CmpInst
>(V
)) {
551 Expression e
= create_expression(C
);
553 DenseMap
<Expression
, uint32_t>::iterator EI
= expressionNumbering
.find(e
);
554 if (EI
!= expressionNumbering
.end()) {
555 valueNumbering
.insert(std::make_pair(V
, EI
->second
));
558 expressionNumbering
.insert(std::make_pair(e
, nextValueNumber
));
559 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
561 return nextValueNumber
++;
563 } else if (ShuffleVectorInst
* U
= dyn_cast
<ShuffleVectorInst
>(V
)) {
564 Expression e
= create_expression(U
);
566 DenseMap
<Expression
, uint32_t>::iterator EI
= expressionNumbering
.find(e
);
567 if (EI
!= expressionNumbering
.end()) {
568 valueNumbering
.insert(std::make_pair(V
, EI
->second
));
571 expressionNumbering
.insert(std::make_pair(e
, nextValueNumber
));
572 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
574 return nextValueNumber
++;
576 } else if (ExtractElementInst
* U
= dyn_cast
<ExtractElementInst
>(V
)) {
577 Expression e
= create_expression(U
);
579 DenseMap
<Expression
, uint32_t>::iterator EI
= expressionNumbering
.find(e
);
580 if (EI
!= expressionNumbering
.end()) {
581 valueNumbering
.insert(std::make_pair(V
, EI
->second
));
584 expressionNumbering
.insert(std::make_pair(e
, nextValueNumber
));
585 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
587 return nextValueNumber
++;
589 } else if (InsertElementInst
* U
= dyn_cast
<InsertElementInst
>(V
)) {
590 Expression e
= create_expression(U
);
592 DenseMap
<Expression
, uint32_t>::iterator EI
= expressionNumbering
.find(e
);
593 if (EI
!= expressionNumbering
.end()) {
594 valueNumbering
.insert(std::make_pair(V
, EI
->second
));
597 expressionNumbering
.insert(std::make_pair(e
, nextValueNumber
));
598 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
600 return nextValueNumber
++;
602 } else if (SelectInst
* U
= dyn_cast
<SelectInst
>(V
)) {
603 Expression e
= create_expression(U
);
605 DenseMap
<Expression
, uint32_t>::iterator EI
= expressionNumbering
.find(e
);
606 if (EI
!= expressionNumbering
.end()) {
607 valueNumbering
.insert(std::make_pair(V
, EI
->second
));
610 expressionNumbering
.insert(std::make_pair(e
, nextValueNumber
));
611 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
613 return nextValueNumber
++;
615 } else if (CastInst
* U
= dyn_cast
<CastInst
>(V
)) {
616 Expression e
= create_expression(U
);
618 DenseMap
<Expression
, uint32_t>::iterator EI
= expressionNumbering
.find(e
);
619 if (EI
!= expressionNumbering
.end()) {
620 valueNumbering
.insert(std::make_pair(V
, EI
->second
));
623 expressionNumbering
.insert(std::make_pair(e
, nextValueNumber
));
624 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
626 return nextValueNumber
++;
628 } else if (GetElementPtrInst
* U
= dyn_cast
<GetElementPtrInst
>(V
)) {
629 Expression e
= create_expression(U
);
631 DenseMap
<Expression
, uint32_t>::iterator EI
= expressionNumbering
.find(e
);
632 if (EI
!= expressionNumbering
.end()) {
633 valueNumbering
.insert(std::make_pair(V
, EI
->second
));
636 expressionNumbering
.insert(std::make_pair(e
, nextValueNumber
));
637 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
639 return nextValueNumber
++;
642 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
643 return nextValueNumber
++;
647 /// lookup - Returns the value number of the specified value. Fails if
648 /// the value has not yet been numbered.
649 uint32_t ValueTable::lookup(Value
* V
) const {
650 DenseMap
<Value
*, uint32_t>::iterator VI
= valueNumbering
.find(V
);
651 assert(VI
!= valueNumbering
.end() && "Value not numbered?");
655 /// clear - Remove all entries from the ValueTable
656 void ValueTable::clear() {
657 valueNumbering
.clear();
658 expressionNumbering
.clear();
662 /// erase - Remove a value from the value numbering
663 void ValueTable::erase(Value
* V
) {
664 valueNumbering
.erase(V
);
667 /// verifyRemoved - Verify that the value is removed from all internal data
669 void ValueTable::verifyRemoved(const Value
*V
) const {
670 for (DenseMap
<Value
*, uint32_t>::iterator
671 I
= valueNumbering
.begin(), E
= valueNumbering
.end(); I
!= E
; ++I
) {
672 assert(I
->first
!= V
&& "Inst still occurs in value numbering map!");
676 //===----------------------------------------------------------------------===//
678 //===----------------------------------------------------------------------===//
681 struct VISIBILITY_HIDDEN ValueNumberScope
{
682 ValueNumberScope
* parent
;
683 DenseMap
<uint32_t, Value
*> table
;
685 ValueNumberScope(ValueNumberScope
* p
) : parent(p
) { }
691 class VISIBILITY_HIDDEN GVN
: public FunctionPass
{
692 bool runOnFunction(Function
&F
);
694 static char ID
; // Pass identification, replacement for typeid
695 GVN() : FunctionPass(&ID
) { }
698 MemoryDependenceAnalysis
*MD
;
702 DenseMap
<BasicBlock
*, ValueNumberScope
*> localAvail
;
704 typedef DenseMap
<Value
*, SmallPtrSet
<Instruction
*, 4> > PhiMapType
;
708 // This transformation requires dominator postdominator info
709 virtual void getAnalysisUsage(AnalysisUsage
&AU
) const {
710 AU
.addRequired
<DominatorTree
>();
711 AU
.addRequired
<MemoryDependenceAnalysis
>();
712 AU
.addRequired
<AliasAnalysis
>();
714 AU
.addPreserved
<DominatorTree
>();
715 AU
.addPreserved
<AliasAnalysis
>();
719 // FIXME: eliminate or document these better
720 bool processLoad(LoadInst
* L
,
721 SmallVectorImpl
<Instruction
*> &toErase
);
722 bool processInstruction(Instruction
* I
,
723 SmallVectorImpl
<Instruction
*> &toErase
);
724 bool processNonLocalLoad(LoadInst
* L
,
725 SmallVectorImpl
<Instruction
*> &toErase
);
726 bool processBlock(BasicBlock
* BB
);
727 Value
*GetValueForBlock(BasicBlock
*BB
, Instruction
* orig
,
728 DenseMap
<BasicBlock
*, Value
*> &Phis
,
729 bool top_level
= false);
730 void dump(DenseMap
<uint32_t, Value
*>& d
);
731 bool iterateOnFunction(Function
&F
);
732 Value
* CollapsePhi(PHINode
* p
);
733 bool isSafeReplacement(PHINode
* p
, Instruction
* inst
);
734 bool performPRE(Function
& F
);
735 Value
* lookupNumber(BasicBlock
* BB
, uint32_t num
);
736 bool mergeBlockIntoPredecessor(BasicBlock
* BB
);
737 Value
* AttemptRedundancyElimination(Instruction
* orig
, unsigned valno
);
738 void cleanupGlobalSets();
739 void verifyRemoved(const Instruction
*I
) const;
745 // createGVNPass - The public interface to this file...
746 FunctionPass
*llvm::createGVNPass() { return new GVN(); }
748 static RegisterPass
<GVN
> X("gvn",
749 "Global Value Numbering");
751 void GVN::dump(DenseMap
<uint32_t, Value
*>& d
) {
753 for (DenseMap
<uint32_t, Value
*>::iterator I
= d
.begin(),
754 E
= d
.end(); I
!= E
; ++I
) {
755 printf("%d\n", I
->first
);
761 Value
* GVN::CollapsePhi(PHINode
* p
) {
762 Value
* constVal
= p
->hasConstantValue();
763 if (!constVal
) return 0;
765 Instruction
* inst
= dyn_cast
<Instruction
>(constVal
);
769 if (DT
->dominates(inst
, p
))
770 if (isSafeReplacement(p
, inst
))
775 bool GVN::isSafeReplacement(PHINode
* p
, Instruction
* inst
) {
776 if (!isa
<PHINode
>(inst
))
779 for (Instruction::use_iterator UI
= p
->use_begin(), E
= p
->use_end();
781 if (PHINode
* use_phi
= dyn_cast
<PHINode
>(UI
))
782 if (use_phi
->getParent() == inst
->getParent())
788 /// GetValueForBlock - Get the value to use within the specified basic block.
789 /// available values are in Phis.
790 Value
*GVN::GetValueForBlock(BasicBlock
*BB
, Instruction
* orig
,
791 DenseMap
<BasicBlock
*, Value
*> &Phis
,
794 // If we have already computed this value, return the previously computed val.
795 DenseMap
<BasicBlock
*, Value
*>::iterator V
= Phis
.find(BB
);
796 if (V
!= Phis
.end() && !top_level
) return V
->second
;
798 // If the block is unreachable, just return undef, since this path
799 // can't actually occur at runtime.
800 if (!DT
->isReachableFromEntry(BB
))
801 return Phis
[BB
] = UndefValue::get(orig
->getType());
803 if (BasicBlock
*Pred
= BB
->getSinglePredecessor()) {
804 Value
*ret
= GetValueForBlock(Pred
, orig
, Phis
);
809 // Get the number of predecessors of this block so we can reserve space later.
810 // If there is already a PHI in it, use the #preds from it, otherwise count.
811 // Getting it from the PHI is constant time.
813 if (PHINode
*ExistingPN
= dyn_cast
<PHINode
>(BB
->begin()))
814 NumPreds
= ExistingPN
->getNumIncomingValues();
816 NumPreds
= std::distance(pred_begin(BB
), pred_end(BB
));
818 // Otherwise, the idom is the loop, so we need to insert a PHI node. Do so
819 // now, then get values to fill in the incoming values for the PHI.
820 PHINode
*PN
= PHINode::Create(orig
->getType(), orig
->getName()+".rle",
822 PN
->reserveOperandSpace(NumPreds
);
824 Phis
.insert(std::make_pair(BB
, PN
));
826 // Fill in the incoming values for the block.
827 for (pred_iterator PI
= pred_begin(BB
), E
= pred_end(BB
); PI
!= E
; ++PI
) {
828 Value
* val
= GetValueForBlock(*PI
, orig
, Phis
);
829 PN
->addIncoming(val
, *PI
);
832 VN
.getAliasAnalysis()->copyValue(orig
, PN
);
834 // Attempt to collapse PHI nodes that are trivially redundant
835 Value
* v
= CollapsePhi(PN
);
837 // Cache our phi construction results
838 if (LoadInst
* L
= dyn_cast
<LoadInst
>(orig
))
839 phiMap
[L
->getPointerOperand()].insert(PN
);
841 phiMap
[orig
].insert(PN
);
846 PN
->replaceAllUsesWith(v
);
847 if (isa
<PointerType
>(v
->getType()))
848 MD
->invalidateCachedPointerInfo(v
);
850 for (DenseMap
<BasicBlock
*, Value
*>::iterator I
= Phis
.begin(),
851 E
= Phis
.end(); I
!= E
; ++I
)
855 DEBUG(errs() << "GVN removed: " << *PN
<< '\n');
856 MD
->removeInstruction(PN
);
857 PN
->eraseFromParent();
858 DEBUG(verifyRemoved(PN
));
864 /// IsValueFullyAvailableInBlock - Return true if we can prove that the value
865 /// we're analyzing is fully available in the specified block. As we go, keep
866 /// track of which blocks we know are fully alive in FullyAvailableBlocks. This
867 /// map is actually a tri-state map with the following values:
868 /// 0) we know the block *is not* fully available.
869 /// 1) we know the block *is* fully available.
870 /// 2) we do not know whether the block is fully available or not, but we are
871 /// currently speculating that it will be.
872 /// 3) we are speculating for this block and have used that to speculate for
874 static bool IsValueFullyAvailableInBlock(BasicBlock
*BB
,
875 DenseMap
<BasicBlock
*, char> &FullyAvailableBlocks
) {
876 // Optimistically assume that the block is fully available and check to see
877 // if we already know about this block in one lookup.
878 std::pair
<DenseMap
<BasicBlock
*, char>::iterator
, char> IV
=
879 FullyAvailableBlocks
.insert(std::make_pair(BB
, 2));
881 // If the entry already existed for this block, return the precomputed value.
883 // If this is a speculative "available" value, mark it as being used for
884 // speculation of other blocks.
885 if (IV
.first
->second
== 2)
886 IV
.first
->second
= 3;
887 return IV
.first
->second
!= 0;
890 // Otherwise, see if it is fully available in all predecessors.
891 pred_iterator PI
= pred_begin(BB
), PE
= pred_end(BB
);
893 // If this block has no predecessors, it isn't live-in here.
895 goto SpeculationFailure
;
897 for (; PI
!= PE
; ++PI
)
898 // If the value isn't fully available in one of our predecessors, then it
899 // isn't fully available in this block either. Undo our previous
900 // optimistic assumption and bail out.
901 if (!IsValueFullyAvailableInBlock(*PI
, FullyAvailableBlocks
))
902 goto SpeculationFailure
;
906 // SpeculationFailure - If we get here, we found out that this is not, after
907 // all, a fully-available block. We have a problem if we speculated on this and
908 // used the speculation to mark other blocks as available.
910 char &BBVal
= FullyAvailableBlocks
[BB
];
912 // If we didn't speculate on this, just return with it set to false.
918 // If we did speculate on this value, we could have blocks set to 1 that are
919 // incorrect. Walk the (transitive) successors of this block and mark them as
921 SmallVector
<BasicBlock
*, 32> BBWorklist
;
922 BBWorklist
.push_back(BB
);
924 while (!BBWorklist
.empty()) {
925 BasicBlock
*Entry
= BBWorklist
.pop_back_val();
926 // Note that this sets blocks to 0 (unavailable) if they happen to not
927 // already be in FullyAvailableBlocks. This is safe.
928 char &EntryVal
= FullyAvailableBlocks
[Entry
];
929 if (EntryVal
== 0) continue; // Already unavailable.
931 // Mark as unavailable.
934 for (succ_iterator I
= succ_begin(Entry
), E
= succ_end(Entry
); I
!= E
; ++I
)
935 BBWorklist
.push_back(*I
);
941 /// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
942 /// non-local by performing PHI construction.
943 bool GVN::processNonLocalLoad(LoadInst
*LI
,
944 SmallVectorImpl
<Instruction
*> &toErase
) {
945 // Find the non-local dependencies of the load.
946 SmallVector
<MemoryDependenceAnalysis::NonLocalDepEntry
, 64> Deps
;
947 MD
->getNonLocalPointerDependency(LI
->getOperand(0), true, LI
->getParent(),
949 //DEBUG(errs() << "INVESTIGATING NONLOCAL LOAD: "
950 // << Deps.size() << *LI << '\n');
952 // If we had to process more than one hundred blocks to find the
953 // dependencies, this load isn't worth worrying about. Optimizing
954 // it will be too expensive.
955 if (Deps
.size() > 100)
958 // If we had a phi translation failure, we'll have a single entry which is a
959 // clobber in the current block. Reject this early.
960 if (Deps
.size() == 1 && Deps
[0].second
.isClobber()) {
962 errs() << "GVN: non-local load ";
963 WriteAsOperand(errs(), LI
);
964 errs() << " is clobbered by " << *Deps
[0].second
.getInst() << '\n';
969 // Filter out useless results (non-locals, etc). Keep track of the blocks
970 // where we have a value available in repl, also keep track of whether we see
971 // dependencies that produce an unknown value for the load (such as a call
972 // that could potentially clobber the load).
973 SmallVector
<std::pair
<BasicBlock
*, Value
*>, 16> ValuesPerBlock
;
974 SmallVector
<BasicBlock
*, 16> UnavailableBlocks
;
976 for (unsigned i
= 0, e
= Deps
.size(); i
!= e
; ++i
) {
977 BasicBlock
*DepBB
= Deps
[i
].first
;
978 MemDepResult DepInfo
= Deps
[i
].second
;
980 if (DepInfo
.isClobber()) {
981 UnavailableBlocks
.push_back(DepBB
);
985 Instruction
*DepInst
= DepInfo
.getInst();
987 // Loading the allocation -> undef.
988 if (isa
<AllocationInst
>(DepInst
)) {
989 ValuesPerBlock
.push_back(std::make_pair(DepBB
,
990 UndefValue::get(LI
->getType())));
994 if (StoreInst
* S
= dyn_cast
<StoreInst
>(DepInst
)) {
995 // Reject loads and stores that are to the same address but are of
997 // NOTE: 403.gcc does have this case (e.g. in readonly_fields_p) because
998 // of bitfield access, it would be interesting to optimize for it at some
1000 if (S
->getOperand(0)->getType() != LI
->getType()) {
1001 UnavailableBlocks
.push_back(DepBB
);
1005 ValuesPerBlock
.push_back(std::make_pair(DepBB
, S
->getOperand(0)));
1007 } else if (LoadInst
* LD
= dyn_cast
<LoadInst
>(DepInst
)) {
1008 if (LD
->getType() != LI
->getType()) {
1009 UnavailableBlocks
.push_back(DepBB
);
1012 ValuesPerBlock
.push_back(std::make_pair(DepBB
, LD
));
1014 UnavailableBlocks
.push_back(DepBB
);
1019 // If we have no predecessors that produce a known value for this load, exit
1021 if (ValuesPerBlock
.empty()) return false;
1023 // If all of the instructions we depend on produce a known value for this
1024 // load, then it is fully redundant and we can use PHI insertion to compute
1025 // its value. Insert PHIs and remove the fully redundant value now.
1026 if (UnavailableBlocks
.empty()) {
1027 // Use cached PHI construction information from previous runs
1028 SmallPtrSet
<Instruction
*, 4> &p
= phiMap
[LI
->getPointerOperand()];
1029 // FIXME: What does phiMap do? Are we positive it isn't getting invalidated?
1030 for (SmallPtrSet
<Instruction
*, 4>::iterator I
= p
.begin(), E
= p
.end();
1032 if ((*I
)->getParent() == LI
->getParent()) {
1033 DEBUG(errs() << "GVN REMOVING NONLOCAL LOAD #1: " << *LI
<< '\n');
1034 LI
->replaceAllUsesWith(*I
);
1035 if (isa
<PointerType
>((*I
)->getType()))
1036 MD
->invalidateCachedPointerInfo(*I
);
1037 toErase
.push_back(LI
);
1042 ValuesPerBlock
.push_back(std::make_pair((*I
)->getParent(), *I
));
1045 DEBUG(errs() << "GVN REMOVING NONLOCAL LOAD: " << *LI
<< '\n');
1047 DenseMap
<BasicBlock
*, Value
*> BlockReplValues
;
1048 BlockReplValues
.insert(ValuesPerBlock
.begin(), ValuesPerBlock
.end());
1049 // Perform PHI construction.
1050 Value
* v
= GetValueForBlock(LI
->getParent(), LI
, BlockReplValues
, true);
1051 LI
->replaceAllUsesWith(v
);
1053 if (isa
<PHINode
>(v
))
1055 if (isa
<PointerType
>(v
->getType()))
1056 MD
->invalidateCachedPointerInfo(v
);
1057 toErase
.push_back(LI
);
1062 if (!EnablePRE
|| !EnableLoadPRE
)
1065 // Okay, we have *some* definitions of the value. This means that the value
1066 // is available in some of our (transitive) predecessors. Lets think about
1067 // doing PRE of this load. This will involve inserting a new load into the
1068 // predecessor when it's not available. We could do this in general, but
1069 // prefer to not increase code size. As such, we only do this when we know
1070 // that we only have to insert *one* load (which means we're basically moving
1071 // the load, not inserting a new one).
1073 SmallPtrSet
<BasicBlock
*, 4> Blockers
;
1074 for (unsigned i
= 0, e
= UnavailableBlocks
.size(); i
!= e
; ++i
)
1075 Blockers
.insert(UnavailableBlocks
[i
]);
1077 // Lets find first basic block with more than one predecessor. Walk backwards
1078 // through predecessors if needed.
1079 BasicBlock
*LoadBB
= LI
->getParent();
1080 BasicBlock
*TmpBB
= LoadBB
;
1082 bool isSinglePred
= false;
1083 bool allSingleSucc
= true;
1084 while (TmpBB
->getSinglePredecessor()) {
1085 isSinglePred
= true;
1086 TmpBB
= TmpBB
->getSinglePredecessor();
1087 if (!TmpBB
) // If haven't found any, bail now.
1089 if (TmpBB
== LoadBB
) // Infinite (unreachable) loop.
1091 if (Blockers
.count(TmpBB
))
1093 if (TmpBB
->getTerminator()->getNumSuccessors() != 1)
1094 allSingleSucc
= false;
1100 // If we have a repl set with LI itself in it, this means we have a loop where
1101 // at least one of the values is LI. Since this means that we won't be able
1102 // to eliminate LI even if we insert uses in the other predecessors, we will
1103 // end up increasing code size. Reject this by scanning for LI.
1104 for (unsigned i
= 0, e
= ValuesPerBlock
.size(); i
!= e
; ++i
)
1105 if (ValuesPerBlock
[i
].second
== LI
)
1110 for (unsigned i
= 0, e
= ValuesPerBlock
.size(); i
!= e
; ++i
)
1111 if (Instruction
*I
= dyn_cast
<Instruction
>(ValuesPerBlock
[i
].second
))
1112 // "Hot" Instruction is in some loop (because it dominates its dep.
1114 if (DT
->dominates(LI
, I
)) {
1119 // We are interested only in "hot" instructions. We don't want to do any
1120 // mis-optimizations here.
1125 // Okay, we have some hope :). Check to see if the loaded value is fully
1126 // available in all but one predecessor.
1127 // FIXME: If we could restructure the CFG, we could make a common pred with
1128 // all the preds that don't have an available LI and insert a new load into
1130 BasicBlock
*UnavailablePred
= 0;
1132 DenseMap
<BasicBlock
*, char> FullyAvailableBlocks
;
1133 for (unsigned i
= 0, e
= ValuesPerBlock
.size(); i
!= e
; ++i
)
1134 FullyAvailableBlocks
[ValuesPerBlock
[i
].first
] = true;
1135 for (unsigned i
= 0, e
= UnavailableBlocks
.size(); i
!= e
; ++i
)
1136 FullyAvailableBlocks
[UnavailableBlocks
[i
]] = false;
1138 for (pred_iterator PI
= pred_begin(LoadBB
), E
= pred_end(LoadBB
);
1140 if (IsValueFullyAvailableInBlock(*PI
, FullyAvailableBlocks
))
1143 // If this load is not available in multiple predecessors, reject it.
1144 if (UnavailablePred
&& UnavailablePred
!= *PI
)
1146 UnavailablePred
= *PI
;
1149 assert(UnavailablePred
!= 0 &&
1150 "Fully available value should be eliminated above!");
1152 // If the loaded pointer is PHI node defined in this block, do PHI translation
1153 // to get its value in the predecessor.
1154 Value
*LoadPtr
= LI
->getOperand(0)->DoPHITranslation(LoadBB
, UnavailablePred
);
1156 // Make sure the value is live in the predecessor. If it was defined by a
1157 // non-PHI instruction in this block, we don't know how to recompute it above.
1158 if (Instruction
*LPInst
= dyn_cast
<Instruction
>(LoadPtr
))
1159 if (!DT
->dominates(LPInst
->getParent(), UnavailablePred
)) {
1160 DEBUG(errs() << "COULDN'T PRE LOAD BECAUSE PTR IS UNAVAILABLE IN PRED: "
1161 << *LPInst
<< '\n' << *LI
<< "\n");
1165 // We don't currently handle critical edges :(
1166 if (UnavailablePred
->getTerminator()->getNumSuccessors() != 1) {
1167 DEBUG(errs() << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '"
1168 << UnavailablePred
->getName() << "': " << *LI
<< '\n');
1172 // Make sure it is valid to move this load here. We have to watch out for:
1173 // @1 = getelementptr (i8* p, ...
1174 // test p and branch if == 0
1176 // It is valid to have the getelementptr before the test, even if p can be 0,
1177 // as getelementptr only does address arithmetic.
1178 // If we are not pushing the value through any multiple-successor blocks
1179 // we do not have this case. Otherwise, check that the load is safe to
1180 // put anywhere; this can be improved, but should be conservatively safe.
1181 if (!allSingleSucc
&&
1182 !isSafeToLoadUnconditionally(LoadPtr
, UnavailablePred
->getTerminator()))
1185 // Okay, we can eliminate this load by inserting a reload in the predecessor
1186 // and using PHI construction to get the value in the other predecessors, do
1188 DEBUG(errs() << "GVN REMOVING PRE LOAD: " << *LI
<< '\n');
1190 Value
*NewLoad
= new LoadInst(LoadPtr
, LI
->getName()+".pre", false,
1192 UnavailablePred
->getTerminator());
1194 SmallPtrSet
<Instruction
*, 4> &p
= phiMap
[LI
->getPointerOperand()];
1195 for (SmallPtrSet
<Instruction
*, 4>::iterator I
= p
.begin(), E
= p
.end();
1197 ValuesPerBlock
.push_back(std::make_pair((*I
)->getParent(), *I
));
1199 DenseMap
<BasicBlock
*, Value
*> BlockReplValues
;
1200 BlockReplValues
.insert(ValuesPerBlock
.begin(), ValuesPerBlock
.end());
1201 BlockReplValues
[UnavailablePred
] = NewLoad
;
1203 // Perform PHI construction.
1204 Value
* v
= GetValueForBlock(LI
->getParent(), LI
, BlockReplValues
, true);
1205 LI
->replaceAllUsesWith(v
);
1206 if (isa
<PHINode
>(v
))
1208 if (isa
<PointerType
>(v
->getType()))
1209 MD
->invalidateCachedPointerInfo(v
);
1210 toErase
.push_back(LI
);
1215 /// processLoad - Attempt to eliminate a load, first by eliminating it
1216 /// locally, and then attempting non-local elimination if that fails.
1217 bool GVN::processLoad(LoadInst
*L
, SmallVectorImpl
<Instruction
*> &toErase
) {
1218 if (L
->isVolatile())
1221 Value
* pointer
= L
->getPointerOperand();
1223 // ... to a pointer that has been loaded from before...
1224 MemDepResult dep
= MD
->getDependency(L
);
1226 // If the value isn't available, don't do anything!
1227 if (dep
.isClobber()) {
1229 // fast print dep, using operator<< on instruction would be too slow
1230 errs() << "GVN: load ";
1231 WriteAsOperand(errs(), L
);
1232 Instruction
*I
= dep
.getInst();
1233 errs() << " is clobbered by " << *I
<< '\n';
1238 // If it is defined in another block, try harder.
1239 if (dep
.isNonLocal())
1240 return processNonLocalLoad(L
, toErase
);
1242 Instruction
*DepInst
= dep
.getInst();
1243 if (StoreInst
*DepSI
= dyn_cast
<StoreInst
>(DepInst
)) {
1244 // Only forward substitute stores to loads of the same type.
1245 // FIXME: Could do better!
1246 if (DepSI
->getPointerOperand()->getType() != pointer
->getType())
1250 L
->replaceAllUsesWith(DepSI
->getOperand(0));
1251 if (isa
<PointerType
>(DepSI
->getOperand(0)->getType()))
1252 MD
->invalidateCachedPointerInfo(DepSI
->getOperand(0));
1253 toErase
.push_back(L
);
1258 if (LoadInst
*DepLI
= dyn_cast
<LoadInst
>(DepInst
)) {
1259 // Only forward substitute stores to loads of the same type.
1260 // FIXME: Could do better! load i32 -> load i8 -> truncate on little endian.
1261 if (DepLI
->getType() != L
->getType())
1265 L
->replaceAllUsesWith(DepLI
);
1266 if (isa
<PointerType
>(DepLI
->getType()))
1267 MD
->invalidateCachedPointerInfo(DepLI
);
1268 toErase
.push_back(L
);
1273 // If this load really doesn't depend on anything, then we must be loading an
1274 // undef value. This can happen when loading for a fresh allocation with no
1275 // intervening stores, for example.
1276 if (isa
<AllocationInst
>(DepInst
)) {
1277 L
->replaceAllUsesWith(UndefValue::get(L
->getType()));
1278 toErase
.push_back(L
);
1286 Value
* GVN::lookupNumber(BasicBlock
* BB
, uint32_t num
) {
1287 DenseMap
<BasicBlock
*, ValueNumberScope
*>::iterator I
= localAvail
.find(BB
);
1288 if (I
== localAvail
.end())
1291 ValueNumberScope
* locals
= I
->second
;
1294 DenseMap
<uint32_t, Value
*>::iterator I
= locals
->table
.find(num
);
1295 if (I
!= locals
->table
.end())
1298 locals
= locals
->parent
;
1304 /// AttemptRedundancyElimination - If the "fast path" of redundancy elimination
1305 /// by inheritance from the dominator fails, see if we can perform phi
1306 /// construction to eliminate the redundancy.
1307 Value
* GVN::AttemptRedundancyElimination(Instruction
* orig
, unsigned valno
) {
1308 BasicBlock
* BaseBlock
= orig
->getParent();
1310 SmallPtrSet
<BasicBlock
*, 4> Visited
;
1311 SmallVector
<BasicBlock
*, 8> Stack
;
1312 Stack
.push_back(BaseBlock
);
1314 DenseMap
<BasicBlock
*, Value
*> Results
;
1316 // Walk backwards through our predecessors, looking for instances of the
1317 // value number we're looking for. Instances are recorded in the Results
1318 // map, which is then used to perform phi construction.
1319 while (!Stack
.empty()) {
1320 BasicBlock
* Current
= Stack
.back();
1323 // If we've walked all the way to a proper dominator, then give up. Cases
1324 // where the instance is in the dominator will have been caught by the fast
1325 // path, and any cases that require phi construction further than this are
1326 // probably not worth it anyways. Note that this is a SIGNIFICANT compile
1327 // time improvement.
1328 if (DT
->properlyDominates(Current
, orig
->getParent())) return 0;
1330 DenseMap
<BasicBlock
*, ValueNumberScope
*>::iterator LA
=
1331 localAvail
.find(Current
);
1332 if (LA
== localAvail
.end()) return 0;
1333 DenseMap
<uint32_t, Value
*>::iterator V
= LA
->second
->table
.find(valno
);
1335 if (V
!= LA
->second
->table
.end()) {
1336 // Found an instance, record it.
1337 Results
.insert(std::make_pair(Current
, V
->second
));
1341 // If we reach the beginning of the function, then give up.
1342 if (pred_begin(Current
) == pred_end(Current
))
1345 for (pred_iterator PI
= pred_begin(Current
), PE
= pred_end(Current
);
1347 if (Visited
.insert(*PI
))
1348 Stack
.push_back(*PI
);
1351 // If we didn't find instances, give up. Otherwise, perform phi construction.
1352 if (Results
.size() == 0)
1355 return GetValueForBlock(BaseBlock
, orig
, Results
, true);
1358 /// processInstruction - When calculating availability, handle an instruction
1359 /// by inserting it into the appropriate sets
1360 bool GVN::processInstruction(Instruction
*I
,
1361 SmallVectorImpl
<Instruction
*> &toErase
) {
1362 if (LoadInst
* L
= dyn_cast
<LoadInst
>(I
)) {
1363 bool changed
= processLoad(L
, toErase
);
1366 unsigned num
= VN
.lookup_or_add(L
);
1367 localAvail
[I
->getParent()]->table
.insert(std::make_pair(num
, L
));
1373 uint32_t nextNum
= VN
.getNextUnusedValueNumber();
1374 unsigned num
= VN
.lookup_or_add(I
);
1376 if (BranchInst
* BI
= dyn_cast
<BranchInst
>(I
)) {
1377 localAvail
[I
->getParent()]->table
.insert(std::make_pair(num
, I
));
1379 if (!BI
->isConditional() || isa
<Constant
>(BI
->getCondition()))
1382 Value
* branchCond
= BI
->getCondition();
1383 uint32_t condVN
= VN
.lookup_or_add(branchCond
);
1385 BasicBlock
* trueSucc
= BI
->getSuccessor(0);
1386 BasicBlock
* falseSucc
= BI
->getSuccessor(1);
1388 if (trueSucc
->getSinglePredecessor())
1389 localAvail
[trueSucc
]->table
[condVN
] =
1390 ConstantInt::getTrue(trueSucc
->getContext());
1391 if (falseSucc
->getSinglePredecessor())
1392 localAvail
[falseSucc
]->table
[condVN
] =
1393 ConstantInt::getFalse(trueSucc
->getContext());
1397 // Allocations are always uniquely numbered, so we can save time and memory
1398 // by fast failing them.
1399 } else if (isa
<AllocationInst
>(I
) || isa
<TerminatorInst
>(I
)) {
1400 localAvail
[I
->getParent()]->table
.insert(std::make_pair(num
, I
));
1404 // Collapse PHI nodes
1405 if (PHINode
* p
= dyn_cast
<PHINode
>(I
)) {
1406 Value
* constVal
= CollapsePhi(p
);
1409 for (PhiMapType::iterator PI
= phiMap
.begin(), PE
= phiMap
.end();
1411 PI
->second
.erase(p
);
1413 p
->replaceAllUsesWith(constVal
);
1414 if (isa
<PointerType
>(constVal
->getType()))
1415 MD
->invalidateCachedPointerInfo(constVal
);
1418 toErase
.push_back(p
);
1420 localAvail
[I
->getParent()]->table
.insert(std::make_pair(num
, I
));
1423 // If the number we were assigned was a brand new VN, then we don't
1424 // need to do a lookup to see if the number already exists
1425 // somewhere in the domtree: it can't!
1426 } else if (num
== nextNum
) {
1427 localAvail
[I
->getParent()]->table
.insert(std::make_pair(num
, I
));
1429 // Perform fast-path value-number based elimination of values inherited from
1431 } else if (Value
* repl
= lookupNumber(I
->getParent(), num
)) {
1434 I
->replaceAllUsesWith(repl
);
1435 if (isa
<PointerType
>(repl
->getType()))
1436 MD
->invalidateCachedPointerInfo(repl
);
1437 toErase
.push_back(I
);
1441 // Perform slow-pathvalue-number based elimination with phi construction.
1442 } else if (Value
* repl
= AttemptRedundancyElimination(I
, num
)) {
1445 I
->replaceAllUsesWith(repl
);
1446 if (isa
<PointerType
>(repl
->getType()))
1447 MD
->invalidateCachedPointerInfo(repl
);
1448 toErase
.push_back(I
);
1452 localAvail
[I
->getParent()]->table
.insert(std::make_pair(num
, I
));
1458 /// runOnFunction - This is the main transformation entry point for a function.
1459 bool GVN::runOnFunction(Function
& F
) {
1460 MD
= &getAnalysis
<MemoryDependenceAnalysis
>();
1461 DT
= &getAnalysis
<DominatorTree
>();
1462 VN
.setAliasAnalysis(&getAnalysis
<AliasAnalysis
>());
1466 bool changed
= false;
1467 bool shouldContinue
= true;
1469 // Merge unconditional branches, allowing PRE to catch more
1470 // optimization opportunities.
1471 for (Function::iterator FI
= F
.begin(), FE
= F
.end(); FI
!= FE
; ) {
1472 BasicBlock
* BB
= FI
;
1474 bool removedBlock
= MergeBlockIntoPredecessor(BB
, this);
1475 if (removedBlock
) NumGVNBlocks
++;
1477 changed
|= removedBlock
;
1480 unsigned Iteration
= 0;
1482 while (shouldContinue
) {
1483 DEBUG(errs() << "GVN iteration: " << Iteration
<< "\n");
1484 shouldContinue
= iterateOnFunction(F
);
1485 changed
|= shouldContinue
;
1490 bool PREChanged
= true;
1491 while (PREChanged
) {
1492 PREChanged
= performPRE(F
);
1493 changed
|= PREChanged
;
1496 // FIXME: Should perform GVN again after PRE does something. PRE can move
1497 // computations into blocks where they become fully redundant. Note that
1498 // we can't do this until PRE's critical edge splitting updates memdep.
1499 // Actually, when this happens, we should just fully integrate PRE into GVN.
1501 cleanupGlobalSets();
1507 bool GVN::processBlock(BasicBlock
* BB
) {
1508 // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and
1509 // incrementing BI before processing an instruction).
1510 SmallVector
<Instruction
*, 8> toErase
;
1511 bool changed_function
= false;
1513 for (BasicBlock::iterator BI
= BB
->begin(), BE
= BB
->end();
1515 changed_function
|= processInstruction(BI
, toErase
);
1516 if (toErase
.empty()) {
1521 // If we need some instructions deleted, do it now.
1522 NumGVNInstr
+= toErase
.size();
1524 // Avoid iterator invalidation.
1525 bool AtStart
= BI
== BB
->begin();
1529 for (SmallVector
<Instruction
*, 4>::iterator I
= toErase
.begin(),
1530 E
= toErase
.end(); I
!= E
; ++I
) {
1531 DEBUG(errs() << "GVN removed: " << **I
<< '\n');
1532 MD
->removeInstruction(*I
);
1533 (*I
)->eraseFromParent();
1534 DEBUG(verifyRemoved(*I
));
1544 return changed_function
;
1547 /// performPRE - Perform a purely local form of PRE that looks for diamond
1548 /// control flow patterns and attempts to perform simple PRE at the join point.
1549 bool GVN::performPRE(Function
& F
) {
1550 bool Changed
= false;
1551 SmallVector
<std::pair
<TerminatorInst
*, unsigned>, 4> toSplit
;
1552 DenseMap
<BasicBlock
*, Value
*> predMap
;
1553 for (df_iterator
<BasicBlock
*> DI
= df_begin(&F
.getEntryBlock()),
1554 DE
= df_end(&F
.getEntryBlock()); DI
!= DE
; ++DI
) {
1555 BasicBlock
* CurrentBlock
= *DI
;
1557 // Nothing to PRE in the entry block.
1558 if (CurrentBlock
== &F
.getEntryBlock()) continue;
1560 for (BasicBlock::iterator BI
= CurrentBlock
->begin(),
1561 BE
= CurrentBlock
->end(); BI
!= BE
; ) {
1562 Instruction
*CurInst
= BI
++;
1564 if (isa
<AllocationInst
>(CurInst
) || isa
<TerminatorInst
>(CurInst
) ||
1565 isa
<PHINode
>(CurInst
) || (CurInst
->getType() == Type::VoidTy
) ||
1566 CurInst
->mayReadFromMemory() || CurInst
->mayHaveSideEffects() ||
1567 isa
<DbgInfoIntrinsic
>(CurInst
))
1570 uint32_t valno
= VN
.lookup(CurInst
);
1572 // Look for the predecessors for PRE opportunities. We're
1573 // only trying to solve the basic diamond case, where
1574 // a value is computed in the successor and one predecessor,
1575 // but not the other. We also explicitly disallow cases
1576 // where the successor is its own predecessor, because they're
1577 // more complicated to get right.
1578 unsigned numWith
= 0;
1579 unsigned numWithout
= 0;
1580 BasicBlock
* PREPred
= 0;
1583 for (pred_iterator PI
= pred_begin(CurrentBlock
),
1584 PE
= pred_end(CurrentBlock
); PI
!= PE
; ++PI
) {
1585 // We're not interested in PRE where the block is its
1586 // own predecessor, on in blocks with predecessors
1587 // that are not reachable.
1588 if (*PI
== CurrentBlock
) {
1591 } else if (!localAvail
.count(*PI
)) {
1596 DenseMap
<uint32_t, Value
*>::iterator predV
=
1597 localAvail
[*PI
]->table
.find(valno
);
1598 if (predV
== localAvail
[*PI
]->table
.end()) {
1601 } else if (predV
->second
== CurInst
) {
1604 predMap
[*PI
] = predV
->second
;
1609 // Don't do PRE when it might increase code size, i.e. when
1610 // we would need to insert instructions in more than one pred.
1611 if (numWithout
!= 1 || numWith
== 0)
1614 // We can't do PRE safely on a critical edge, so instead we schedule
1615 // the edge to be split and perform the PRE the next time we iterate
1617 unsigned succNum
= 0;
1618 for (unsigned i
= 0, e
= PREPred
->getTerminator()->getNumSuccessors();
1620 if (PREPred
->getTerminator()->getSuccessor(i
) == CurrentBlock
) {
1625 if (isCriticalEdge(PREPred
->getTerminator(), succNum
)) {
1626 toSplit
.push_back(std::make_pair(PREPred
->getTerminator(), succNum
));
1630 // Instantiate the expression the in predecessor that lacked it.
1631 // Because we are going top-down through the block, all value numbers
1632 // will be available in the predecessor by the time we need them. Any
1633 // that weren't original present will have been instantiated earlier
1635 Instruction
* PREInstr
= CurInst
->clone(CurInst
->getContext());
1636 bool success
= true;
1637 for (unsigned i
= 0, e
= CurInst
->getNumOperands(); i
!= e
; ++i
) {
1638 Value
*Op
= PREInstr
->getOperand(i
);
1639 if (isa
<Argument
>(Op
) || isa
<Constant
>(Op
) || isa
<GlobalValue
>(Op
))
1642 if (Value
*V
= lookupNumber(PREPred
, VN
.lookup(Op
))) {
1643 PREInstr
->setOperand(i
, V
);
1650 // Fail out if we encounter an operand that is not available in
1651 // the PRE predecessor. This is typically because of loads which
1652 // are not value numbered precisely.
1655 DEBUG(verifyRemoved(PREInstr
));
1659 PREInstr
->insertBefore(PREPred
->getTerminator());
1660 PREInstr
->setName(CurInst
->getName() + ".pre");
1661 predMap
[PREPred
] = PREInstr
;
1662 VN
.add(PREInstr
, valno
);
1665 // Update the availability map to include the new instruction.
1666 localAvail
[PREPred
]->table
.insert(std::make_pair(valno
, PREInstr
));
1668 // Create a PHI to make the value available in this block.
1669 PHINode
* Phi
= PHINode::Create(CurInst
->getType(),
1670 CurInst
->getName() + ".pre-phi",
1671 CurrentBlock
->begin());
1672 for (pred_iterator PI
= pred_begin(CurrentBlock
),
1673 PE
= pred_end(CurrentBlock
); PI
!= PE
; ++PI
)
1674 Phi
->addIncoming(predMap
[*PI
], *PI
);
1677 localAvail
[CurrentBlock
]->table
[valno
] = Phi
;
1679 CurInst
->replaceAllUsesWith(Phi
);
1680 if (isa
<PointerType
>(Phi
->getType()))
1681 MD
->invalidateCachedPointerInfo(Phi
);
1684 DEBUG(errs() << "GVN PRE removed: " << *CurInst
<< '\n');
1685 MD
->removeInstruction(CurInst
);
1686 CurInst
->eraseFromParent();
1687 DEBUG(verifyRemoved(CurInst
));
1692 for (SmallVector
<std::pair
<TerminatorInst
*, unsigned>, 4>::iterator
1693 I
= toSplit
.begin(), E
= toSplit
.end(); I
!= E
; ++I
)
1694 SplitCriticalEdge(I
->first
, I
->second
, this);
1696 return Changed
|| toSplit
.size();
1699 /// iterateOnFunction - Executes one iteration of GVN
1700 bool GVN::iterateOnFunction(Function
&F
) {
1701 cleanupGlobalSets();
1703 for (df_iterator
<DomTreeNode
*> DI
= df_begin(DT
->getRootNode()),
1704 DE
= df_end(DT
->getRootNode()); DI
!= DE
; ++DI
) {
1706 localAvail
[DI
->getBlock()] =
1707 new ValueNumberScope(localAvail
[DI
->getIDom()->getBlock()]);
1709 localAvail
[DI
->getBlock()] = new ValueNumberScope(0);
1712 // Top-down walk of the dominator tree
1713 bool changed
= false;
1715 // Needed for value numbering with phi construction to work.
1716 ReversePostOrderTraversal
<Function
*> RPOT(&F
);
1717 for (ReversePostOrderTraversal
<Function
*>::rpo_iterator RI
= RPOT
.begin(),
1718 RE
= RPOT
.end(); RI
!= RE
; ++RI
)
1719 changed
|= processBlock(*RI
);
1721 for (df_iterator
<DomTreeNode
*> DI
= df_begin(DT
->getRootNode()),
1722 DE
= df_end(DT
->getRootNode()); DI
!= DE
; ++DI
)
1723 changed
|= processBlock(DI
->getBlock());
1729 void GVN::cleanupGlobalSets() {
1733 for (DenseMap
<BasicBlock
*, ValueNumberScope
*>::iterator
1734 I
= localAvail
.begin(), E
= localAvail
.end(); I
!= E
; ++I
)
1739 /// verifyRemoved - Verify that the specified instruction does not occur in our
1740 /// internal data structures.
1741 void GVN::verifyRemoved(const Instruction
*Inst
) const {
1742 VN
.verifyRemoved(Inst
);
1744 // Walk through the PHI map to make sure the instruction isn't hiding in there
1746 for (PhiMapType::iterator
1747 I
= phiMap
.begin(), E
= phiMap
.end(); I
!= E
; ++I
) {
1748 assert(I
->first
!= Inst
&& "Inst is still a key in PHI map!");
1750 for (SmallPtrSet
<Instruction
*, 4>::iterator
1751 II
= I
->second
.begin(), IE
= I
->second
.end(); II
!= IE
; ++II
) {
1752 assert(*II
!= Inst
&& "Inst is still a value in PHI map!");
1756 // Walk through the value number scope to make sure the instruction isn't
1757 // ferreted away in it.
1758 for (DenseMap
<BasicBlock
*, ValueNumberScope
*>::iterator
1759 I
= localAvail
.begin(), E
= localAvail
.end(); I
!= E
; ++I
) {
1760 const ValueNumberScope
*VNS
= I
->second
;
1763 for (DenseMap
<uint32_t, Value
*>::iterator
1764 II
= VNS
->table
.begin(), IE
= VNS
->table
.end(); II
!= IE
; ++II
) {
1765 assert(II
->second
!= Inst
&& "Inst still in value numbering scope!");