1 //===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This pass performs global value numbering to eliminate fully redundant
11 // instructions. It also performs simple dead load elimination.
13 // Note that this pass does the value numbering itself; it does not use the
14 // ValueNumbering analysis passes.
16 //===----------------------------------------------------------------------===//
18 #define DEBUG_TYPE "gvn"
19 #include "llvm/Transforms/Scalar.h"
20 #include "llvm/BasicBlock.h"
21 #include "llvm/Constants.h"
22 #include "llvm/DerivedTypes.h"
23 #include "llvm/Function.h"
24 #include "llvm/IntrinsicInst.h"
25 #include "llvm/LLVMContext.h"
26 #include "llvm/Value.h"
27 #include "llvm/ADT/DenseMap.h"
28 #include "llvm/ADT/DepthFirstIterator.h"
29 #include "llvm/ADT/PostOrderIterator.h"
30 #include "llvm/ADT/SmallPtrSet.h"
31 #include "llvm/ADT/SmallVector.h"
32 #include "llvm/ADT/Statistic.h"
33 #include "llvm/Analysis/Dominators.h"
34 #include "llvm/Analysis/AliasAnalysis.h"
35 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
36 #include "llvm/Support/CFG.h"
37 #include "llvm/Support/CommandLine.h"
38 #include "llvm/Support/Debug.h"
39 #include "llvm/Support/ErrorHandling.h"
40 #include "llvm/Support/raw_ostream.h"
41 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
42 #include "llvm/Transforms/Utils/Local.h"
46 STATISTIC(NumGVNInstr
, "Number of instructions deleted");
47 STATISTIC(NumGVNLoad
, "Number of loads deleted");
48 STATISTIC(NumGVNPRE
, "Number of instructions PRE'd");
49 STATISTIC(NumGVNBlocks
, "Number of blocks merged");
50 STATISTIC(NumPRELoad
, "Number of loads PRE'd");
52 static cl::opt
<bool> EnablePRE("enable-pre",
53 cl::init(true), cl::Hidden
);
54 static cl::opt
<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
56 //===----------------------------------------------------------------------===//
58 //===----------------------------------------------------------------------===//
60 /// This class holds the mapping between values and value numbers. It is used
61 /// as an efficient mechanism to determine the expression-wise equivalence of
65 enum ExpressionOpcode
{ ADD
, FADD
, SUB
, FSUB
, MUL
, FMUL
,
66 UDIV
, SDIV
, FDIV
, UREM
, SREM
,
67 FREM
, SHL
, LSHR
, ASHR
, AND
, OR
, XOR
, ICMPEQ
,
68 ICMPNE
, ICMPUGT
, ICMPUGE
, ICMPULT
, ICMPULE
,
69 ICMPSGT
, ICMPSGE
, ICMPSLT
, ICMPSLE
, FCMPOEQ
,
70 FCMPOGT
, FCMPOGE
, FCMPOLT
, FCMPOLE
, FCMPONE
,
71 FCMPORD
, FCMPUNO
, FCMPUEQ
, FCMPUGT
, FCMPUGE
,
72 FCMPULT
, FCMPULE
, FCMPUNE
, EXTRACT
, INSERT
,
73 SHUFFLE
, SELECT
, TRUNC
, ZEXT
, SEXT
, FPTOUI
,
74 FPTOSI
, UITOFP
, SITOFP
, FPTRUNC
, FPEXT
,
75 PTRTOINT
, INTTOPTR
, BITCAST
, GEP
, CALL
, CONSTANT
,
78 ExpressionOpcode opcode
;
83 SmallVector
<uint32_t, 4> varargs
;
87 Expression(ExpressionOpcode o
) : opcode(o
) { }
89 bool operator==(const Expression
&other
) const {
90 if (opcode
!= other
.opcode
)
92 else if (opcode
== EMPTY
|| opcode
== TOMBSTONE
)
94 else if (type
!= other
.type
)
96 else if (function
!= other
.function
)
98 else if (firstVN
!= other
.firstVN
)
100 else if (secondVN
!= other
.secondVN
)
102 else if (thirdVN
!= other
.thirdVN
)
105 if (varargs
.size() != other
.varargs
.size())
108 for (size_t i
= 0; i
< varargs
.size(); ++i
)
109 if (varargs
[i
] != other
.varargs
[i
])
116 bool operator!=(const Expression
&other
) const {
117 return !(*this == other
);
123 DenseMap
<Value
*, uint32_t> valueNumbering
;
124 DenseMap
<Expression
, uint32_t> expressionNumbering
;
126 MemoryDependenceAnalysis
* MD
;
129 uint32_t nextValueNumber
;
131 Expression::ExpressionOpcode
getOpcode(BinaryOperator
* BO
);
132 Expression::ExpressionOpcode
getOpcode(CmpInst
* C
);
133 Expression::ExpressionOpcode
getOpcode(CastInst
* C
);
134 Expression
create_expression(BinaryOperator
* BO
);
135 Expression
create_expression(CmpInst
* C
);
136 Expression
create_expression(ShuffleVectorInst
* V
);
137 Expression
create_expression(ExtractElementInst
* C
);
138 Expression
create_expression(InsertElementInst
* V
);
139 Expression
create_expression(SelectInst
* V
);
140 Expression
create_expression(CastInst
* C
);
141 Expression
create_expression(GetElementPtrInst
* G
);
142 Expression
create_expression(CallInst
* C
);
143 Expression
create_expression(Constant
* C
);
145 ValueTable() : nextValueNumber(1) { }
146 uint32_t lookup_or_add(Value
* V
);
147 uint32_t lookup(Value
* V
) const;
148 void add(Value
* V
, uint32_t num
);
150 void erase(Value
* v
);
152 void setAliasAnalysis(AliasAnalysis
* A
) { AA
= A
; }
153 AliasAnalysis
*getAliasAnalysis() const { return AA
; }
154 void setMemDep(MemoryDependenceAnalysis
* M
) { MD
= M
; }
155 void setDomTree(DominatorTree
* D
) { DT
= D
; }
156 uint32_t getNextUnusedValueNumber() { return nextValueNumber
; }
157 void verifyRemoved(const Value
*) const;
162 template <> struct DenseMapInfo
<Expression
> {
163 static inline Expression
getEmptyKey() {
164 return Expression(Expression::EMPTY
);
167 static inline Expression
getTombstoneKey() {
168 return Expression(Expression::TOMBSTONE
);
171 static unsigned getHashValue(const Expression e
) {
172 unsigned hash
= e
.opcode
;
174 hash
= e
.firstVN
+ hash
* 37;
175 hash
= e
.secondVN
+ hash
* 37;
176 hash
= e
.thirdVN
+ hash
* 37;
178 hash
= ((unsigned)((uintptr_t)e
.type
>> 4) ^
179 (unsigned)((uintptr_t)e
.type
>> 9)) +
182 for (SmallVector
<uint32_t, 4>::const_iterator I
= e
.varargs
.begin(),
183 E
= e
.varargs
.end(); I
!= E
; ++I
)
184 hash
= *I
+ hash
* 37;
186 hash
= ((unsigned)((uintptr_t)e
.function
>> 4) ^
187 (unsigned)((uintptr_t)e
.function
>> 9)) +
192 static bool isEqual(const Expression
&LHS
, const Expression
&RHS
) {
195 static bool isPod() { return true; }
199 //===----------------------------------------------------------------------===//
200 // ValueTable Internal Functions
201 //===----------------------------------------------------------------------===//
202 Expression::ExpressionOpcode
ValueTable::getOpcode(BinaryOperator
* BO
) {
203 switch(BO
->getOpcode()) {
204 default: // THIS SHOULD NEVER HAPPEN
205 llvm_unreachable("Binary operator with unknown opcode?");
206 case Instruction::Add
: return Expression::ADD
;
207 case Instruction::FAdd
: return Expression::FADD
;
208 case Instruction::Sub
: return Expression::SUB
;
209 case Instruction::FSub
: return Expression::FSUB
;
210 case Instruction::Mul
: return Expression::MUL
;
211 case Instruction::FMul
: return Expression::FMUL
;
212 case Instruction::UDiv
: return Expression::UDIV
;
213 case Instruction::SDiv
: return Expression::SDIV
;
214 case Instruction::FDiv
: return Expression::FDIV
;
215 case Instruction::URem
: return Expression::UREM
;
216 case Instruction::SRem
: return Expression::SREM
;
217 case Instruction::FRem
: return Expression::FREM
;
218 case Instruction::Shl
: return Expression::SHL
;
219 case Instruction::LShr
: return Expression::LSHR
;
220 case Instruction::AShr
: return Expression::ASHR
;
221 case Instruction::And
: return Expression::AND
;
222 case Instruction::Or
: return Expression::OR
;
223 case Instruction::Xor
: return Expression::XOR
;
227 Expression::ExpressionOpcode
ValueTable::getOpcode(CmpInst
* C
) {
228 if (isa
<ICmpInst
>(C
)) {
229 switch (C
->getPredicate()) {
230 default: // THIS SHOULD NEVER HAPPEN
231 llvm_unreachable("Comparison with unknown predicate?");
232 case ICmpInst::ICMP_EQ
: return Expression::ICMPEQ
;
233 case ICmpInst::ICMP_NE
: return Expression::ICMPNE
;
234 case ICmpInst::ICMP_UGT
: return Expression::ICMPUGT
;
235 case ICmpInst::ICMP_UGE
: return Expression::ICMPUGE
;
236 case ICmpInst::ICMP_ULT
: return Expression::ICMPULT
;
237 case ICmpInst::ICMP_ULE
: return Expression::ICMPULE
;
238 case ICmpInst::ICMP_SGT
: return Expression::ICMPSGT
;
239 case ICmpInst::ICMP_SGE
: return Expression::ICMPSGE
;
240 case ICmpInst::ICMP_SLT
: return Expression::ICMPSLT
;
241 case ICmpInst::ICMP_SLE
: return Expression::ICMPSLE
;
244 switch (C
->getPredicate()) {
245 default: // THIS SHOULD NEVER HAPPEN
246 llvm_unreachable("Comparison with unknown predicate?");
247 case FCmpInst::FCMP_OEQ
: return Expression::FCMPOEQ
;
248 case FCmpInst::FCMP_OGT
: return Expression::FCMPOGT
;
249 case FCmpInst::FCMP_OGE
: return Expression::FCMPOGE
;
250 case FCmpInst::FCMP_OLT
: return Expression::FCMPOLT
;
251 case FCmpInst::FCMP_OLE
: return Expression::FCMPOLE
;
252 case FCmpInst::FCMP_ONE
: return Expression::FCMPONE
;
253 case FCmpInst::FCMP_ORD
: return Expression::FCMPORD
;
254 case FCmpInst::FCMP_UNO
: return Expression::FCMPUNO
;
255 case FCmpInst::FCMP_UEQ
: return Expression::FCMPUEQ
;
256 case FCmpInst::FCMP_UGT
: return Expression::FCMPUGT
;
257 case FCmpInst::FCMP_UGE
: return Expression::FCMPUGE
;
258 case FCmpInst::FCMP_ULT
: return Expression::FCMPULT
;
259 case FCmpInst::FCMP_ULE
: return Expression::FCMPULE
;
260 case FCmpInst::FCMP_UNE
: return Expression::FCMPUNE
;
265 Expression::ExpressionOpcode
ValueTable::getOpcode(CastInst
* C
) {
266 switch(C
->getOpcode()) {
267 default: // THIS SHOULD NEVER HAPPEN
268 llvm_unreachable("Cast operator with unknown opcode?");
269 case Instruction::Trunc
: return Expression::TRUNC
;
270 case Instruction::ZExt
: return Expression::ZEXT
;
271 case Instruction::SExt
: return Expression::SEXT
;
272 case Instruction::FPToUI
: return Expression::FPTOUI
;
273 case Instruction::FPToSI
: return Expression::FPTOSI
;
274 case Instruction::UIToFP
: return Expression::UITOFP
;
275 case Instruction::SIToFP
: return Expression::SITOFP
;
276 case Instruction::FPTrunc
: return Expression::FPTRUNC
;
277 case Instruction::FPExt
: return Expression::FPEXT
;
278 case Instruction::PtrToInt
: return Expression::PTRTOINT
;
279 case Instruction::IntToPtr
: return Expression::INTTOPTR
;
280 case Instruction::BitCast
: return Expression::BITCAST
;
284 Expression
ValueTable::create_expression(CallInst
* C
) {
287 e
.type
= C
->getType();
291 e
.function
= C
->getCalledFunction();
292 e
.opcode
= Expression::CALL
;
294 for (CallInst::op_iterator I
= C
->op_begin()+1, E
= C
->op_end();
296 e
.varargs
.push_back(lookup_or_add(*I
));
301 Expression
ValueTable::create_expression(BinaryOperator
* BO
) {
304 e
.firstVN
= lookup_or_add(BO
->getOperand(0));
305 e
.secondVN
= lookup_or_add(BO
->getOperand(1));
308 e
.type
= BO
->getType();
309 e
.opcode
= getOpcode(BO
);
314 Expression
ValueTable::create_expression(CmpInst
* C
) {
317 e
.firstVN
= lookup_or_add(C
->getOperand(0));
318 e
.secondVN
= lookup_or_add(C
->getOperand(1));
321 e
.type
= C
->getType();
322 e
.opcode
= getOpcode(C
);
327 Expression
ValueTable::create_expression(CastInst
* C
) {
330 e
.firstVN
= lookup_or_add(C
->getOperand(0));
334 e
.type
= C
->getType();
335 e
.opcode
= getOpcode(C
);
340 Expression
ValueTable::create_expression(ShuffleVectorInst
* S
) {
343 e
.firstVN
= lookup_or_add(S
->getOperand(0));
344 e
.secondVN
= lookup_or_add(S
->getOperand(1));
345 e
.thirdVN
= lookup_or_add(S
->getOperand(2));
347 e
.type
= S
->getType();
348 e
.opcode
= Expression::SHUFFLE
;
353 Expression
ValueTable::create_expression(ExtractElementInst
* E
) {
356 e
.firstVN
= lookup_or_add(E
->getOperand(0));
357 e
.secondVN
= lookup_or_add(E
->getOperand(1));
360 e
.type
= E
->getType();
361 e
.opcode
= Expression::EXTRACT
;
366 Expression
ValueTable::create_expression(InsertElementInst
* I
) {
369 e
.firstVN
= lookup_or_add(I
->getOperand(0));
370 e
.secondVN
= lookup_or_add(I
->getOperand(1));
371 e
.thirdVN
= lookup_or_add(I
->getOperand(2));
373 e
.type
= I
->getType();
374 e
.opcode
= Expression::INSERT
;
379 Expression
ValueTable::create_expression(SelectInst
* I
) {
382 e
.firstVN
= lookup_or_add(I
->getCondition());
383 e
.secondVN
= lookup_or_add(I
->getTrueValue());
384 e
.thirdVN
= lookup_or_add(I
->getFalseValue());
386 e
.type
= I
->getType();
387 e
.opcode
= Expression::SELECT
;
392 Expression
ValueTable::create_expression(GetElementPtrInst
* G
) {
395 e
.firstVN
= lookup_or_add(G
->getPointerOperand());
399 e
.type
= G
->getType();
400 e
.opcode
= Expression::GEP
;
402 for (GetElementPtrInst::op_iterator I
= G
->idx_begin(), E
= G
->idx_end();
404 e
.varargs
.push_back(lookup_or_add(*I
));
409 //===----------------------------------------------------------------------===//
410 // ValueTable External Functions
411 //===----------------------------------------------------------------------===//
413 /// add - Insert a value into the table with a specified value number.
414 void ValueTable::add(Value
* V
, uint32_t num
) {
415 valueNumbering
.insert(std::make_pair(V
, num
));
418 /// lookup_or_add - Returns the value number for the specified value, assigning
419 /// it a new number if it did not have one before.
420 uint32_t ValueTable::lookup_or_add(Value
* V
) {
421 DenseMap
<Value
*, uint32_t>::iterator VI
= valueNumbering
.find(V
);
422 if (VI
!= valueNumbering
.end())
425 if (CallInst
* C
= dyn_cast
<CallInst
>(V
)) {
426 if (AA
->doesNotAccessMemory(C
)) {
427 Expression e
= create_expression(C
);
429 DenseMap
<Expression
, uint32_t>::iterator EI
= expressionNumbering
.find(e
);
430 if (EI
!= expressionNumbering
.end()) {
431 valueNumbering
.insert(std::make_pair(V
, EI
->second
));
434 expressionNumbering
.insert(std::make_pair(e
, nextValueNumber
));
435 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
437 return nextValueNumber
++;
439 } else if (AA
->onlyReadsMemory(C
)) {
440 Expression e
= create_expression(C
);
442 if (expressionNumbering
.find(e
) == expressionNumbering
.end()) {
443 expressionNumbering
.insert(std::make_pair(e
, nextValueNumber
));
444 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
445 return nextValueNumber
++;
448 MemDepResult local_dep
= MD
->getDependency(C
);
450 if (!local_dep
.isDef() && !local_dep
.isNonLocal()) {
451 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
452 return nextValueNumber
++;
455 if (local_dep
.isDef()) {
456 CallInst
* local_cdep
= cast
<CallInst
>(local_dep
.getInst());
458 if (local_cdep
->getNumOperands() != C
->getNumOperands()) {
459 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
460 return nextValueNumber
++;
463 for (unsigned i
= 1; i
< C
->getNumOperands(); ++i
) {
464 uint32_t c_vn
= lookup_or_add(C
->getOperand(i
));
465 uint32_t cd_vn
= lookup_or_add(local_cdep
->getOperand(i
));
467 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
468 return nextValueNumber
++;
472 uint32_t v
= lookup_or_add(local_cdep
);
473 valueNumbering
.insert(std::make_pair(V
, v
));
478 const MemoryDependenceAnalysis::NonLocalDepInfo
&deps
=
479 MD
->getNonLocalCallDependency(CallSite(C
));
480 // FIXME: call/call dependencies for readonly calls should return def, not
481 // clobber! Move the checking logic to MemDep!
484 // Check to see if we have a single dominating call instruction that is
486 for (unsigned i
= 0, e
= deps
.size(); i
!= e
; ++i
) {
487 const MemoryDependenceAnalysis::NonLocalDepEntry
*I
= &deps
[i
];
488 // Ignore non-local dependencies.
489 if (I
->second
.isNonLocal())
492 // We don't handle non-depedencies. If we already have a call, reject
493 // instruction dependencies.
494 if (I
->second
.isClobber() || cdep
!= 0) {
499 CallInst
*NonLocalDepCall
= dyn_cast
<CallInst
>(I
->second
.getInst());
500 // FIXME: All duplicated with non-local case.
501 if (NonLocalDepCall
&& DT
->properlyDominates(I
->first
, C
->getParent())){
502 cdep
= NonLocalDepCall
;
511 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
512 return nextValueNumber
++;
515 if (cdep
->getNumOperands() != C
->getNumOperands()) {
516 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
517 return nextValueNumber
++;
519 for (unsigned i
= 1; i
< C
->getNumOperands(); ++i
) {
520 uint32_t c_vn
= lookup_or_add(C
->getOperand(i
));
521 uint32_t cd_vn
= lookup_or_add(cdep
->getOperand(i
));
523 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
524 return nextValueNumber
++;
528 uint32_t v
= lookup_or_add(cdep
);
529 valueNumbering
.insert(std::make_pair(V
, v
));
533 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
534 return nextValueNumber
++;
536 } else if (BinaryOperator
* BO
= dyn_cast
<BinaryOperator
>(V
)) {
537 Expression e
= create_expression(BO
);
539 DenseMap
<Expression
, uint32_t>::iterator EI
= expressionNumbering
.find(e
);
540 if (EI
!= expressionNumbering
.end()) {
541 valueNumbering
.insert(std::make_pair(V
, EI
->second
));
544 expressionNumbering
.insert(std::make_pair(e
, nextValueNumber
));
545 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
547 return nextValueNumber
++;
549 } else if (CmpInst
* C
= dyn_cast
<CmpInst
>(V
)) {
550 Expression e
= create_expression(C
);
552 DenseMap
<Expression
, uint32_t>::iterator EI
= expressionNumbering
.find(e
);
553 if (EI
!= expressionNumbering
.end()) {
554 valueNumbering
.insert(std::make_pair(V
, EI
->second
));
557 expressionNumbering
.insert(std::make_pair(e
, nextValueNumber
));
558 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
560 return nextValueNumber
++;
562 } else if (ShuffleVectorInst
* U
= dyn_cast
<ShuffleVectorInst
>(V
)) {
563 Expression e
= create_expression(U
);
565 DenseMap
<Expression
, uint32_t>::iterator EI
= expressionNumbering
.find(e
);
566 if (EI
!= expressionNumbering
.end()) {
567 valueNumbering
.insert(std::make_pair(V
, EI
->second
));
570 expressionNumbering
.insert(std::make_pair(e
, nextValueNumber
));
571 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
573 return nextValueNumber
++;
575 } else if (ExtractElementInst
* U
= dyn_cast
<ExtractElementInst
>(V
)) {
576 Expression e
= create_expression(U
);
578 DenseMap
<Expression
, uint32_t>::iterator EI
= expressionNumbering
.find(e
);
579 if (EI
!= expressionNumbering
.end()) {
580 valueNumbering
.insert(std::make_pair(V
, EI
->second
));
583 expressionNumbering
.insert(std::make_pair(e
, nextValueNumber
));
584 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
586 return nextValueNumber
++;
588 } else if (InsertElementInst
* U
= dyn_cast
<InsertElementInst
>(V
)) {
589 Expression e
= create_expression(U
);
591 DenseMap
<Expression
, uint32_t>::iterator EI
= expressionNumbering
.find(e
);
592 if (EI
!= expressionNumbering
.end()) {
593 valueNumbering
.insert(std::make_pair(V
, EI
->second
));
596 expressionNumbering
.insert(std::make_pair(e
, nextValueNumber
));
597 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
599 return nextValueNumber
++;
601 } else if (SelectInst
* U
= dyn_cast
<SelectInst
>(V
)) {
602 Expression e
= create_expression(U
);
604 DenseMap
<Expression
, uint32_t>::iterator EI
= expressionNumbering
.find(e
);
605 if (EI
!= expressionNumbering
.end()) {
606 valueNumbering
.insert(std::make_pair(V
, EI
->second
));
609 expressionNumbering
.insert(std::make_pair(e
, nextValueNumber
));
610 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
612 return nextValueNumber
++;
614 } else if (CastInst
* U
= dyn_cast
<CastInst
>(V
)) {
615 Expression e
= create_expression(U
);
617 DenseMap
<Expression
, uint32_t>::iterator EI
= expressionNumbering
.find(e
);
618 if (EI
!= expressionNumbering
.end()) {
619 valueNumbering
.insert(std::make_pair(V
, EI
->second
));
622 expressionNumbering
.insert(std::make_pair(e
, nextValueNumber
));
623 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
625 return nextValueNumber
++;
627 } else if (GetElementPtrInst
* U
= dyn_cast
<GetElementPtrInst
>(V
)) {
628 Expression e
= create_expression(U
);
630 DenseMap
<Expression
, uint32_t>::iterator EI
= expressionNumbering
.find(e
);
631 if (EI
!= expressionNumbering
.end()) {
632 valueNumbering
.insert(std::make_pair(V
, EI
->second
));
635 expressionNumbering
.insert(std::make_pair(e
, nextValueNumber
));
636 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
638 return nextValueNumber
++;
641 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
642 return nextValueNumber
++;
646 /// lookup - Returns the value number of the specified value. Fails if
647 /// the value has not yet been numbered.
648 uint32_t ValueTable::lookup(Value
* V
) const {
649 DenseMap
<Value
*, uint32_t>::iterator VI
= valueNumbering
.find(V
);
650 assert(VI
!= valueNumbering
.end() && "Value not numbered?");
654 /// clear - Remove all entries from the ValueTable
655 void ValueTable::clear() {
656 valueNumbering
.clear();
657 expressionNumbering
.clear();
661 /// erase - Remove a value from the value numbering
662 void ValueTable::erase(Value
* V
) {
663 valueNumbering
.erase(V
);
666 /// verifyRemoved - Verify that the value is removed from all internal data
668 void ValueTable::verifyRemoved(const Value
*V
) const {
669 for (DenseMap
<Value
*, uint32_t>::iterator
670 I
= valueNumbering
.begin(), E
= valueNumbering
.end(); I
!= E
; ++I
) {
671 assert(I
->first
!= V
&& "Inst still occurs in value numbering map!");
675 //===----------------------------------------------------------------------===//
677 //===----------------------------------------------------------------------===//
680 struct ValueNumberScope
{
681 ValueNumberScope
* parent
;
682 DenseMap
<uint32_t, Value
*> table
;
684 ValueNumberScope(ValueNumberScope
* p
) : parent(p
) { }
690 class GVN
: public FunctionPass
{
691 bool runOnFunction(Function
&F
);
693 static char ID
; // Pass identification, replacement for typeid
694 GVN() : FunctionPass(&ID
) { }
697 MemoryDependenceAnalysis
*MD
;
701 DenseMap
<BasicBlock
*, ValueNumberScope
*> localAvail
;
703 typedef DenseMap
<Value
*, SmallPtrSet
<Instruction
*, 4> > PhiMapType
;
707 // This transformation requires dominator postdominator info
708 virtual void getAnalysisUsage(AnalysisUsage
&AU
) const {
709 AU
.addRequired
<DominatorTree
>();
710 AU
.addRequired
<MemoryDependenceAnalysis
>();
711 AU
.addRequired
<AliasAnalysis
>();
713 AU
.addPreserved
<DominatorTree
>();
714 AU
.addPreserved
<AliasAnalysis
>();
718 // FIXME: eliminate or document these better
719 bool processLoad(LoadInst
* L
,
720 SmallVectorImpl
<Instruction
*> &toErase
);
721 bool processInstruction(Instruction
* I
,
722 SmallVectorImpl
<Instruction
*> &toErase
);
723 bool processNonLocalLoad(LoadInst
* L
,
724 SmallVectorImpl
<Instruction
*> &toErase
);
725 bool processBlock(BasicBlock
* BB
);
726 Value
*GetValueForBlock(BasicBlock
*BB
, Instruction
* orig
,
727 DenseMap
<BasicBlock
*, Value
*> &Phis
,
728 bool top_level
= false);
729 void dump(DenseMap
<uint32_t, Value
*>& d
);
730 bool iterateOnFunction(Function
&F
);
731 Value
* CollapsePhi(PHINode
* p
);
732 bool performPRE(Function
& F
);
733 Value
* lookupNumber(BasicBlock
* BB
, uint32_t num
);
734 Value
* AttemptRedundancyElimination(Instruction
* orig
, unsigned valno
);
735 void cleanupGlobalSets();
736 void verifyRemoved(const Instruction
*I
) const;
742 // createGVNPass - The public interface to this file...
743 FunctionPass
*llvm::createGVNPass() { return new GVN(); }
745 static RegisterPass
<GVN
> X("gvn",
746 "Global Value Numbering");
748 void GVN::dump(DenseMap
<uint32_t, Value
*>& d
) {
750 for (DenseMap
<uint32_t, Value
*>::iterator I
= d
.begin(),
751 E
= d
.end(); I
!= E
; ++I
) {
752 printf("%d\n", I
->first
);
758 static bool isSafeReplacement(PHINode
* p
, Instruction
* inst
) {
759 if (!isa
<PHINode
>(inst
))
762 for (Instruction::use_iterator UI
= p
->use_begin(), E
= p
->use_end();
764 if (PHINode
* use_phi
= dyn_cast
<PHINode
>(UI
))
765 if (use_phi
->getParent() == inst
->getParent())
771 Value
* GVN::CollapsePhi(PHINode
* p
) {
772 Value
* constVal
= p
->hasConstantValue(DT
);
773 if (!constVal
) return 0;
775 Instruction
* inst
= dyn_cast
<Instruction
>(constVal
);
779 if (DT
->dominates(inst
, p
))
780 if (isSafeReplacement(p
, inst
))
785 /// GetValueForBlock - Get the value to use within the specified basic block.
786 /// available values are in Phis.
787 Value
*GVN::GetValueForBlock(BasicBlock
*BB
, Instruction
* orig
,
788 DenseMap
<BasicBlock
*, Value
*> &Phis
,
791 // If we have already computed this value, return the previously computed val.
792 DenseMap
<BasicBlock
*, Value
*>::iterator V
= Phis
.find(BB
);
793 if (V
!= Phis
.end() && !top_level
) return V
->second
;
795 // If the block is unreachable, just return undef, since this path
796 // can't actually occur at runtime.
797 if (!DT
->isReachableFromEntry(BB
))
798 return Phis
[BB
] = UndefValue::get(orig
->getType());
800 if (BasicBlock
*Pred
= BB
->getSinglePredecessor()) {
801 Value
*ret
= GetValueForBlock(Pred
, orig
, Phis
);
806 // Get the number of predecessors of this block so we can reserve space later.
807 // If there is already a PHI in it, use the #preds from it, otherwise count.
808 // Getting it from the PHI is constant time.
810 if (PHINode
*ExistingPN
= dyn_cast
<PHINode
>(BB
->begin()))
811 NumPreds
= ExistingPN
->getNumIncomingValues();
813 NumPreds
= std::distance(pred_begin(BB
), pred_end(BB
));
815 // Otherwise, the idom is the loop, so we need to insert a PHI node. Do so
816 // now, then get values to fill in the incoming values for the PHI.
817 PHINode
*PN
= PHINode::Create(orig
->getType(), orig
->getName()+".rle",
819 PN
->reserveOperandSpace(NumPreds
);
821 Phis
.insert(std::make_pair(BB
, PN
));
823 // Fill in the incoming values for the block.
824 for (pred_iterator PI
= pred_begin(BB
), E
= pred_end(BB
); PI
!= E
; ++PI
) {
825 Value
* val
= GetValueForBlock(*PI
, orig
, Phis
);
826 PN
->addIncoming(val
, *PI
);
829 VN
.getAliasAnalysis()->copyValue(orig
, PN
);
831 // Attempt to collapse PHI nodes that are trivially redundant
832 Value
* v
= CollapsePhi(PN
);
834 // Cache our phi construction results
835 if (LoadInst
* L
= dyn_cast
<LoadInst
>(orig
))
836 phiMap
[L
->getPointerOperand()].insert(PN
);
838 phiMap
[orig
].insert(PN
);
843 PN
->replaceAllUsesWith(v
);
844 if (isa
<PointerType
>(v
->getType()))
845 MD
->invalidateCachedPointerInfo(v
);
847 for (DenseMap
<BasicBlock
*, Value
*>::iterator I
= Phis
.begin(),
848 E
= Phis
.end(); I
!= E
; ++I
)
852 DEBUG(errs() << "GVN removed: " << *PN
<< '\n');
853 MD
->removeInstruction(PN
);
854 PN
->eraseFromParent();
855 DEBUG(verifyRemoved(PN
));
861 /// IsValueFullyAvailableInBlock - Return true if we can prove that the value
862 /// we're analyzing is fully available in the specified block. As we go, keep
863 /// track of which blocks we know are fully alive in FullyAvailableBlocks. This
864 /// map is actually a tri-state map with the following values:
865 /// 0) we know the block *is not* fully available.
866 /// 1) we know the block *is* fully available.
867 /// 2) we do not know whether the block is fully available or not, but we are
868 /// currently speculating that it will be.
869 /// 3) we are speculating for this block and have used that to speculate for
871 static bool IsValueFullyAvailableInBlock(BasicBlock
*BB
,
872 DenseMap
<BasicBlock
*, char> &FullyAvailableBlocks
) {
873 // Optimistically assume that the block is fully available and check to see
874 // if we already know about this block in one lookup.
875 std::pair
<DenseMap
<BasicBlock
*, char>::iterator
, char> IV
=
876 FullyAvailableBlocks
.insert(std::make_pair(BB
, 2));
878 // If the entry already existed for this block, return the precomputed value.
880 // If this is a speculative "available" value, mark it as being used for
881 // speculation of other blocks.
882 if (IV
.first
->second
== 2)
883 IV
.first
->second
= 3;
884 return IV
.first
->second
!= 0;
887 // Otherwise, see if it is fully available in all predecessors.
888 pred_iterator PI
= pred_begin(BB
), PE
= pred_end(BB
);
890 // If this block has no predecessors, it isn't live-in here.
892 goto SpeculationFailure
;
894 for (; PI
!= PE
; ++PI
)
895 // If the value isn't fully available in one of our predecessors, then it
896 // isn't fully available in this block either. Undo our previous
897 // optimistic assumption and bail out.
898 if (!IsValueFullyAvailableInBlock(*PI
, FullyAvailableBlocks
))
899 goto SpeculationFailure
;
903 // SpeculationFailure - If we get here, we found out that this is not, after
904 // all, a fully-available block. We have a problem if we speculated on this and
905 // used the speculation to mark other blocks as available.
907 char &BBVal
= FullyAvailableBlocks
[BB
];
909 // If we didn't speculate on this, just return with it set to false.
915 // If we did speculate on this value, we could have blocks set to 1 that are
916 // incorrect. Walk the (transitive) successors of this block and mark them as
918 SmallVector
<BasicBlock
*, 32> BBWorklist
;
919 BBWorklist
.push_back(BB
);
921 while (!BBWorklist
.empty()) {
922 BasicBlock
*Entry
= BBWorklist
.pop_back_val();
923 // Note that this sets blocks to 0 (unavailable) if they happen to not
924 // already be in FullyAvailableBlocks. This is safe.
925 char &EntryVal
= FullyAvailableBlocks
[Entry
];
926 if (EntryVal
== 0) continue; // Already unavailable.
928 // Mark as unavailable.
931 for (succ_iterator I
= succ_begin(Entry
), E
= succ_end(Entry
); I
!= E
; ++I
)
932 BBWorklist
.push_back(*I
);
938 /// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
939 /// non-local by performing PHI construction.
940 bool GVN::processNonLocalLoad(LoadInst
*LI
,
941 SmallVectorImpl
<Instruction
*> &toErase
) {
942 // Find the non-local dependencies of the load.
943 SmallVector
<MemoryDependenceAnalysis::NonLocalDepEntry
, 64> Deps
;
944 MD
->getNonLocalPointerDependency(LI
->getOperand(0), true, LI
->getParent(),
946 //DEBUG(errs() << "INVESTIGATING NONLOCAL LOAD: "
947 // << Deps.size() << *LI << '\n');
949 // If we had to process more than one hundred blocks to find the
950 // dependencies, this load isn't worth worrying about. Optimizing
951 // it will be too expensive.
952 if (Deps
.size() > 100)
955 // If we had a phi translation failure, we'll have a single entry which is a
956 // clobber in the current block. Reject this early.
957 if (Deps
.size() == 1 && Deps
[0].second
.isClobber()) {
959 errs() << "GVN: non-local load ";
960 WriteAsOperand(errs(), LI
);
961 errs() << " is clobbered by " << *Deps
[0].second
.getInst() << '\n';
966 // Filter out useless results (non-locals, etc). Keep track of the blocks
967 // where we have a value available in repl, also keep track of whether we see
968 // dependencies that produce an unknown value for the load (such as a call
969 // that could potentially clobber the load).
970 SmallVector
<std::pair
<BasicBlock
*, Value
*>, 16> ValuesPerBlock
;
971 SmallVector
<BasicBlock
*, 16> UnavailableBlocks
;
973 for (unsigned i
= 0, e
= Deps
.size(); i
!= e
; ++i
) {
974 BasicBlock
*DepBB
= Deps
[i
].first
;
975 MemDepResult DepInfo
= Deps
[i
].second
;
977 if (DepInfo
.isClobber()) {
978 UnavailableBlocks
.push_back(DepBB
);
982 Instruction
*DepInst
= DepInfo
.getInst();
984 // Loading the allocation -> undef.
985 if (isa
<AllocationInst
>(DepInst
)) {
986 ValuesPerBlock
.push_back(std::make_pair(DepBB
,
987 UndefValue::get(LI
->getType())));
991 if (StoreInst
* S
= dyn_cast
<StoreInst
>(DepInst
)) {
992 // Reject loads and stores that are to the same address but are of
994 // NOTE: 403.gcc does have this case (e.g. in readonly_fields_p) because
995 // of bitfield access, it would be interesting to optimize for it at some
997 if (S
->getOperand(0)->getType() != LI
->getType()) {
998 UnavailableBlocks
.push_back(DepBB
);
1002 ValuesPerBlock
.push_back(std::make_pair(DepBB
, S
->getOperand(0)));
1004 } else if (LoadInst
* LD
= dyn_cast
<LoadInst
>(DepInst
)) {
1005 if (LD
->getType() != LI
->getType()) {
1006 UnavailableBlocks
.push_back(DepBB
);
1009 ValuesPerBlock
.push_back(std::make_pair(DepBB
, LD
));
1011 UnavailableBlocks
.push_back(DepBB
);
1016 // If we have no predecessors that produce a known value for this load, exit
1018 if (ValuesPerBlock
.empty()) return false;
1020 // If all of the instructions we depend on produce a known value for this
1021 // load, then it is fully redundant and we can use PHI insertion to compute
1022 // its value. Insert PHIs and remove the fully redundant value now.
1023 if (UnavailableBlocks
.empty()) {
1024 // Use cached PHI construction information from previous runs
1025 SmallPtrSet
<Instruction
*, 4> &p
= phiMap
[LI
->getPointerOperand()];
1026 // FIXME: What does phiMap do? Are we positive it isn't getting invalidated?
1027 for (SmallPtrSet
<Instruction
*, 4>::iterator I
= p
.begin(), E
= p
.end();
1029 if ((*I
)->getParent() == LI
->getParent()) {
1030 DEBUG(errs() << "GVN REMOVING NONLOCAL LOAD #1: " << *LI
<< '\n');
1031 LI
->replaceAllUsesWith(*I
);
1032 if (isa
<PointerType
>((*I
)->getType()))
1033 MD
->invalidateCachedPointerInfo(*I
);
1034 toErase
.push_back(LI
);
1039 ValuesPerBlock
.push_back(std::make_pair((*I
)->getParent(), *I
));
1042 DEBUG(errs() << "GVN REMOVING NONLOCAL LOAD: " << *LI
<< '\n');
1044 DenseMap
<BasicBlock
*, Value
*> BlockReplValues
;
1045 BlockReplValues
.insert(ValuesPerBlock
.begin(), ValuesPerBlock
.end());
1046 // Perform PHI construction.
1047 Value
* v
= GetValueForBlock(LI
->getParent(), LI
, BlockReplValues
, true);
1048 LI
->replaceAllUsesWith(v
);
1050 if (isa
<PHINode
>(v
))
1052 if (isa
<PointerType
>(v
->getType()))
1053 MD
->invalidateCachedPointerInfo(v
);
1054 toErase
.push_back(LI
);
1059 if (!EnablePRE
|| !EnableLoadPRE
)
1062 // Okay, we have *some* definitions of the value. This means that the value
1063 // is available in some of our (transitive) predecessors. Lets think about
1064 // doing PRE of this load. This will involve inserting a new load into the
1065 // predecessor when it's not available. We could do this in general, but
1066 // prefer to not increase code size. As such, we only do this when we know
1067 // that we only have to insert *one* load (which means we're basically moving
1068 // the load, not inserting a new one).
1070 SmallPtrSet
<BasicBlock
*, 4> Blockers
;
1071 for (unsigned i
= 0, e
= UnavailableBlocks
.size(); i
!= e
; ++i
)
1072 Blockers
.insert(UnavailableBlocks
[i
]);
1074 // Lets find first basic block with more than one predecessor. Walk backwards
1075 // through predecessors if needed.
1076 BasicBlock
*LoadBB
= LI
->getParent();
1077 BasicBlock
*TmpBB
= LoadBB
;
1079 bool isSinglePred
= false;
1080 bool allSingleSucc
= true;
1081 while (TmpBB
->getSinglePredecessor()) {
1082 isSinglePred
= true;
1083 TmpBB
= TmpBB
->getSinglePredecessor();
1084 if (!TmpBB
) // If haven't found any, bail now.
1086 if (TmpBB
== LoadBB
) // Infinite (unreachable) loop.
1088 if (Blockers
.count(TmpBB
))
1090 if (TmpBB
->getTerminator()->getNumSuccessors() != 1)
1091 allSingleSucc
= false;
1097 // If we have a repl set with LI itself in it, this means we have a loop where
1098 // at least one of the values is LI. Since this means that we won't be able
1099 // to eliminate LI even if we insert uses in the other predecessors, we will
1100 // end up increasing code size. Reject this by scanning for LI.
1101 for (unsigned i
= 0, e
= ValuesPerBlock
.size(); i
!= e
; ++i
)
1102 if (ValuesPerBlock
[i
].second
== LI
)
1107 for (unsigned i
= 0, e
= ValuesPerBlock
.size(); i
!= e
; ++i
)
1108 if (Instruction
*I
= dyn_cast
<Instruction
>(ValuesPerBlock
[i
].second
))
1109 // "Hot" Instruction is in some loop (because it dominates its dep.
1111 if (DT
->dominates(LI
, I
)) {
1116 // We are interested only in "hot" instructions. We don't want to do any
1117 // mis-optimizations here.
1122 // Okay, we have some hope :). Check to see if the loaded value is fully
1123 // available in all but one predecessor.
1124 // FIXME: If we could restructure the CFG, we could make a common pred with
1125 // all the preds that don't have an available LI and insert a new load into
1127 BasicBlock
*UnavailablePred
= 0;
1129 DenseMap
<BasicBlock
*, char> FullyAvailableBlocks
;
1130 for (unsigned i
= 0, e
= ValuesPerBlock
.size(); i
!= e
; ++i
)
1131 FullyAvailableBlocks
[ValuesPerBlock
[i
].first
] = true;
1132 for (unsigned i
= 0, e
= UnavailableBlocks
.size(); i
!= e
; ++i
)
1133 FullyAvailableBlocks
[UnavailableBlocks
[i
]] = false;
1135 for (pred_iterator PI
= pred_begin(LoadBB
), E
= pred_end(LoadBB
);
1137 if (IsValueFullyAvailableInBlock(*PI
, FullyAvailableBlocks
))
1140 // If this load is not available in multiple predecessors, reject it.
1141 if (UnavailablePred
&& UnavailablePred
!= *PI
)
1143 UnavailablePred
= *PI
;
1146 assert(UnavailablePred
!= 0 &&
1147 "Fully available value should be eliminated above!");
1149 // If the loaded pointer is PHI node defined in this block, do PHI translation
1150 // to get its value in the predecessor.
1151 Value
*LoadPtr
= LI
->getOperand(0)->DoPHITranslation(LoadBB
, UnavailablePred
);
1153 // Make sure the value is live in the predecessor. If it was defined by a
1154 // non-PHI instruction in this block, we don't know how to recompute it above.
1155 if (Instruction
*LPInst
= dyn_cast
<Instruction
>(LoadPtr
))
1156 if (!DT
->dominates(LPInst
->getParent(), UnavailablePred
)) {
1157 DEBUG(errs() << "COULDN'T PRE LOAD BECAUSE PTR IS UNAVAILABLE IN PRED: "
1158 << *LPInst
<< '\n' << *LI
<< "\n");
1162 // We don't currently handle critical edges :(
1163 if (UnavailablePred
->getTerminator()->getNumSuccessors() != 1) {
1164 DEBUG(errs() << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '"
1165 << UnavailablePred
->getName() << "': " << *LI
<< '\n');
1169 // Make sure it is valid to move this load here. We have to watch out for:
1170 // @1 = getelementptr (i8* p, ...
1171 // test p and branch if == 0
1173 // It is valid to have the getelementptr before the test, even if p can be 0,
1174 // as getelementptr only does address arithmetic.
1175 // If we are not pushing the value through any multiple-successor blocks
1176 // we do not have this case. Otherwise, check that the load is safe to
1177 // put anywhere; this can be improved, but should be conservatively safe.
1178 if (!allSingleSucc
&&
1179 !isSafeToLoadUnconditionally(LoadPtr
, UnavailablePred
->getTerminator()))
1182 // Okay, we can eliminate this load by inserting a reload in the predecessor
1183 // and using PHI construction to get the value in the other predecessors, do
1185 DEBUG(errs() << "GVN REMOVING PRE LOAD: " << *LI
<< '\n');
1187 Value
*NewLoad
= new LoadInst(LoadPtr
, LI
->getName()+".pre", false,
1189 UnavailablePred
->getTerminator());
1191 SmallPtrSet
<Instruction
*, 4> &p
= phiMap
[LI
->getPointerOperand()];
1192 for (SmallPtrSet
<Instruction
*, 4>::iterator I
= p
.begin(), E
= p
.end();
1194 ValuesPerBlock
.push_back(std::make_pair((*I
)->getParent(), *I
));
1196 DenseMap
<BasicBlock
*, Value
*> BlockReplValues
;
1197 BlockReplValues
.insert(ValuesPerBlock
.begin(), ValuesPerBlock
.end());
1198 BlockReplValues
[UnavailablePred
] = NewLoad
;
1200 // Perform PHI construction.
1201 Value
* v
= GetValueForBlock(LI
->getParent(), LI
, BlockReplValues
, true);
1202 LI
->replaceAllUsesWith(v
);
1203 if (isa
<PHINode
>(v
))
1205 if (isa
<PointerType
>(v
->getType()))
1206 MD
->invalidateCachedPointerInfo(v
);
1207 toErase
.push_back(LI
);
1212 /// processLoad - Attempt to eliminate a load, first by eliminating it
1213 /// locally, and then attempting non-local elimination if that fails.
1214 bool GVN::processLoad(LoadInst
*L
, SmallVectorImpl
<Instruction
*> &toErase
) {
1215 if (L
->isVolatile())
1218 Value
* pointer
= L
->getPointerOperand();
1220 // ... to a pointer that has been loaded from before...
1221 MemDepResult dep
= MD
->getDependency(L
);
1223 // If the value isn't available, don't do anything!
1224 if (dep
.isClobber()) {
1226 // fast print dep, using operator<< on instruction would be too slow
1227 errs() << "GVN: load ";
1228 WriteAsOperand(errs(), L
);
1229 Instruction
*I
= dep
.getInst();
1230 errs() << " is clobbered by " << *I
<< '\n';
1235 // If it is defined in another block, try harder.
1236 if (dep
.isNonLocal())
1237 return processNonLocalLoad(L
, toErase
);
1239 Instruction
*DepInst
= dep
.getInst();
1240 if (StoreInst
*DepSI
= dyn_cast
<StoreInst
>(DepInst
)) {
1241 // Only forward substitute stores to loads of the same type.
1242 // FIXME: Could do better!
1243 if (DepSI
->getPointerOperand()->getType() != pointer
->getType())
1247 L
->replaceAllUsesWith(DepSI
->getOperand(0));
1248 if (isa
<PointerType
>(DepSI
->getOperand(0)->getType()))
1249 MD
->invalidateCachedPointerInfo(DepSI
->getOperand(0));
1250 toErase
.push_back(L
);
1255 if (LoadInst
*DepLI
= dyn_cast
<LoadInst
>(DepInst
)) {
1256 // Only forward substitute stores to loads of the same type.
1257 // FIXME: Could do better! load i32 -> load i8 -> truncate on little endian.
1258 if (DepLI
->getType() != L
->getType())
1262 L
->replaceAllUsesWith(DepLI
);
1263 if (isa
<PointerType
>(DepLI
->getType()))
1264 MD
->invalidateCachedPointerInfo(DepLI
);
1265 toErase
.push_back(L
);
1270 // If this load really doesn't depend on anything, then we must be loading an
1271 // undef value. This can happen when loading for a fresh allocation with no
1272 // intervening stores, for example.
1273 if (isa
<AllocationInst
>(DepInst
)) {
1274 L
->replaceAllUsesWith(UndefValue::get(L
->getType()));
1275 toErase
.push_back(L
);
1283 Value
* GVN::lookupNumber(BasicBlock
* BB
, uint32_t num
) {
1284 DenseMap
<BasicBlock
*, ValueNumberScope
*>::iterator I
= localAvail
.find(BB
);
1285 if (I
== localAvail
.end())
1288 ValueNumberScope
* locals
= I
->second
;
1291 DenseMap
<uint32_t, Value
*>::iterator I
= locals
->table
.find(num
);
1292 if (I
!= locals
->table
.end())
1295 locals
= locals
->parent
;
1301 /// AttemptRedundancyElimination - If the "fast path" of redundancy elimination
1302 /// by inheritance from the dominator fails, see if we can perform phi
1303 /// construction to eliminate the redundancy.
1304 Value
* GVN::AttemptRedundancyElimination(Instruction
* orig
, unsigned valno
) {
1305 BasicBlock
* BaseBlock
= orig
->getParent();
1307 SmallPtrSet
<BasicBlock
*, 4> Visited
;
1308 SmallVector
<BasicBlock
*, 8> Stack
;
1309 Stack
.push_back(BaseBlock
);
1311 DenseMap
<BasicBlock
*, Value
*> Results
;
1313 // Walk backwards through our predecessors, looking for instances of the
1314 // value number we're looking for. Instances are recorded in the Results
1315 // map, which is then used to perform phi construction.
1316 while (!Stack
.empty()) {
1317 BasicBlock
* Current
= Stack
.back();
1320 // If we've walked all the way to a proper dominator, then give up. Cases
1321 // where the instance is in the dominator will have been caught by the fast
1322 // path, and any cases that require phi construction further than this are
1323 // probably not worth it anyways. Note that this is a SIGNIFICANT compile
1324 // time improvement.
1325 if (DT
->properlyDominates(Current
, orig
->getParent())) return 0;
1327 DenseMap
<BasicBlock
*, ValueNumberScope
*>::iterator LA
=
1328 localAvail
.find(Current
);
1329 if (LA
== localAvail
.end()) return 0;
1330 DenseMap
<uint32_t, Value
*>::iterator V
= LA
->second
->table
.find(valno
);
1332 if (V
!= LA
->second
->table
.end()) {
1333 // Found an instance, record it.
1334 Results
.insert(std::make_pair(Current
, V
->second
));
1338 // If we reach the beginning of the function, then give up.
1339 if (pred_begin(Current
) == pred_end(Current
))
1342 for (pred_iterator PI
= pred_begin(Current
), PE
= pred_end(Current
);
1344 if (Visited
.insert(*PI
))
1345 Stack
.push_back(*PI
);
1348 // If we didn't find instances, give up. Otherwise, perform phi construction.
1349 if (Results
.size() == 0)
1352 return GetValueForBlock(BaseBlock
, orig
, Results
, true);
1355 /// processInstruction - When calculating availability, handle an instruction
1356 /// by inserting it into the appropriate sets
1357 bool GVN::processInstruction(Instruction
*I
,
1358 SmallVectorImpl
<Instruction
*> &toErase
) {
1359 if (LoadInst
* L
= dyn_cast
<LoadInst
>(I
)) {
1360 bool changed
= processLoad(L
, toErase
);
1363 unsigned num
= VN
.lookup_or_add(L
);
1364 localAvail
[I
->getParent()]->table
.insert(std::make_pair(num
, L
));
1370 uint32_t nextNum
= VN
.getNextUnusedValueNumber();
1371 unsigned num
= VN
.lookup_or_add(I
);
1373 if (BranchInst
* BI
= dyn_cast
<BranchInst
>(I
)) {
1374 localAvail
[I
->getParent()]->table
.insert(std::make_pair(num
, I
));
1376 if (!BI
->isConditional() || isa
<Constant
>(BI
->getCondition()))
1379 Value
* branchCond
= BI
->getCondition();
1380 uint32_t condVN
= VN
.lookup_or_add(branchCond
);
1382 BasicBlock
* trueSucc
= BI
->getSuccessor(0);
1383 BasicBlock
* falseSucc
= BI
->getSuccessor(1);
1385 if (trueSucc
->getSinglePredecessor())
1386 localAvail
[trueSucc
]->table
[condVN
] =
1387 ConstantInt::getTrue(trueSucc
->getContext());
1388 if (falseSucc
->getSinglePredecessor())
1389 localAvail
[falseSucc
]->table
[condVN
] =
1390 ConstantInt::getFalse(trueSucc
->getContext());
1394 // Allocations are always uniquely numbered, so we can save time and memory
1395 // by fast failing them.
1396 } else if (isa
<AllocationInst
>(I
) || isa
<TerminatorInst
>(I
)) {
1397 localAvail
[I
->getParent()]->table
.insert(std::make_pair(num
, I
));
1401 // Collapse PHI nodes
1402 if (PHINode
* p
= dyn_cast
<PHINode
>(I
)) {
1403 Value
* constVal
= CollapsePhi(p
);
1406 for (PhiMapType::iterator PI
= phiMap
.begin(), PE
= phiMap
.end();
1408 PI
->second
.erase(p
);
1410 p
->replaceAllUsesWith(constVal
);
1411 if (isa
<PointerType
>(constVal
->getType()))
1412 MD
->invalidateCachedPointerInfo(constVal
);
1415 toErase
.push_back(p
);
1417 localAvail
[I
->getParent()]->table
.insert(std::make_pair(num
, I
));
1420 // If the number we were assigned was a brand new VN, then we don't
1421 // need to do a lookup to see if the number already exists
1422 // somewhere in the domtree: it can't!
1423 } else if (num
== nextNum
) {
1424 localAvail
[I
->getParent()]->table
.insert(std::make_pair(num
, I
));
1426 // Perform fast-path value-number based elimination of values inherited from
1428 } else if (Value
* repl
= lookupNumber(I
->getParent(), num
)) {
1431 I
->replaceAllUsesWith(repl
);
1432 if (isa
<PointerType
>(repl
->getType()))
1433 MD
->invalidateCachedPointerInfo(repl
);
1434 toErase
.push_back(I
);
1438 // Perform slow-pathvalue-number based elimination with phi construction.
1439 } else if (Value
* repl
= AttemptRedundancyElimination(I
, num
)) {
1442 I
->replaceAllUsesWith(repl
);
1443 if (isa
<PointerType
>(repl
->getType()))
1444 MD
->invalidateCachedPointerInfo(repl
);
1445 toErase
.push_back(I
);
1449 localAvail
[I
->getParent()]->table
.insert(std::make_pair(num
, I
));
1455 /// runOnFunction - This is the main transformation entry point for a function.
1456 bool GVN::runOnFunction(Function
& F
) {
1457 MD
= &getAnalysis
<MemoryDependenceAnalysis
>();
1458 DT
= &getAnalysis
<DominatorTree
>();
1459 VN
.setAliasAnalysis(&getAnalysis
<AliasAnalysis
>());
1463 bool changed
= false;
1464 bool shouldContinue
= true;
1466 // Merge unconditional branches, allowing PRE to catch more
1467 // optimization opportunities.
1468 for (Function::iterator FI
= F
.begin(), FE
= F
.end(); FI
!= FE
; ) {
1469 BasicBlock
* BB
= FI
;
1471 bool removedBlock
= MergeBlockIntoPredecessor(BB
, this);
1472 if (removedBlock
) NumGVNBlocks
++;
1474 changed
|= removedBlock
;
1477 unsigned Iteration
= 0;
1479 while (shouldContinue
) {
1480 DEBUG(errs() << "GVN iteration: " << Iteration
<< "\n");
1481 shouldContinue
= iterateOnFunction(F
);
1482 changed
|= shouldContinue
;
1487 bool PREChanged
= true;
1488 while (PREChanged
) {
1489 PREChanged
= performPRE(F
);
1490 changed
|= PREChanged
;
1493 // FIXME: Should perform GVN again after PRE does something. PRE can move
1494 // computations into blocks where they become fully redundant. Note that
1495 // we can't do this until PRE's critical edge splitting updates memdep.
1496 // Actually, when this happens, we should just fully integrate PRE into GVN.
1498 cleanupGlobalSets();
1504 bool GVN::processBlock(BasicBlock
* BB
) {
1505 // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and
1506 // incrementing BI before processing an instruction).
1507 SmallVector
<Instruction
*, 8> toErase
;
1508 bool changed_function
= false;
1510 for (BasicBlock::iterator BI
= BB
->begin(), BE
= BB
->end();
1512 changed_function
|= processInstruction(BI
, toErase
);
1513 if (toErase
.empty()) {
1518 // If we need some instructions deleted, do it now.
1519 NumGVNInstr
+= toErase
.size();
1521 // Avoid iterator invalidation.
1522 bool AtStart
= BI
== BB
->begin();
1526 for (SmallVector
<Instruction
*, 4>::iterator I
= toErase
.begin(),
1527 E
= toErase
.end(); I
!= E
; ++I
) {
1528 DEBUG(errs() << "GVN removed: " << **I
<< '\n');
1529 MD
->removeInstruction(*I
);
1530 (*I
)->eraseFromParent();
1531 DEBUG(verifyRemoved(*I
));
1541 return changed_function
;
1544 /// performPRE - Perform a purely local form of PRE that looks for diamond
1545 /// control flow patterns and attempts to perform simple PRE at the join point.
1546 bool GVN::performPRE(Function
& F
) {
1547 bool Changed
= false;
1548 SmallVector
<std::pair
<TerminatorInst
*, unsigned>, 4> toSplit
;
1549 DenseMap
<BasicBlock
*, Value
*> predMap
;
1550 for (df_iterator
<BasicBlock
*> DI
= df_begin(&F
.getEntryBlock()),
1551 DE
= df_end(&F
.getEntryBlock()); DI
!= DE
; ++DI
) {
1552 BasicBlock
* CurrentBlock
= *DI
;
1554 // Nothing to PRE in the entry block.
1555 if (CurrentBlock
== &F
.getEntryBlock()) continue;
1557 for (BasicBlock::iterator BI
= CurrentBlock
->begin(),
1558 BE
= CurrentBlock
->end(); BI
!= BE
; ) {
1559 Instruction
*CurInst
= BI
++;
1561 if (isa
<AllocationInst
>(CurInst
) || isa
<TerminatorInst
>(CurInst
) ||
1562 isa
<PHINode
>(CurInst
) ||
1563 (CurInst
->getType() == Type::getVoidTy(F
.getContext())) ||
1564 CurInst
->mayReadFromMemory() || CurInst
->mayHaveSideEffects() ||
1565 isa
<DbgInfoIntrinsic
>(CurInst
))
1568 uint32_t valno
= VN
.lookup(CurInst
);
1570 // Look for the predecessors for PRE opportunities. We're
1571 // only trying to solve the basic diamond case, where
1572 // a value is computed in the successor and one predecessor,
1573 // but not the other. We also explicitly disallow cases
1574 // where the successor is its own predecessor, because they're
1575 // more complicated to get right.
1576 unsigned numWith
= 0;
1577 unsigned numWithout
= 0;
1578 BasicBlock
* PREPred
= 0;
1581 for (pred_iterator PI
= pred_begin(CurrentBlock
),
1582 PE
= pred_end(CurrentBlock
); PI
!= PE
; ++PI
) {
1583 // We're not interested in PRE where the block is its
1584 // own predecessor, on in blocks with predecessors
1585 // that are not reachable.
1586 if (*PI
== CurrentBlock
) {
1589 } else if (!localAvail
.count(*PI
)) {
1594 DenseMap
<uint32_t, Value
*>::iterator predV
=
1595 localAvail
[*PI
]->table
.find(valno
);
1596 if (predV
== localAvail
[*PI
]->table
.end()) {
1599 } else if (predV
->second
== CurInst
) {
1602 predMap
[*PI
] = predV
->second
;
1607 // Don't do PRE when it might increase code size, i.e. when
1608 // we would need to insert instructions in more than one pred.
1609 if (numWithout
!= 1 || numWith
== 0)
1612 // We can't do PRE safely on a critical edge, so instead we schedule
1613 // the edge to be split and perform the PRE the next time we iterate
1615 unsigned succNum
= 0;
1616 for (unsigned i
= 0, e
= PREPred
->getTerminator()->getNumSuccessors();
1618 if (PREPred
->getTerminator()->getSuccessor(i
) == CurrentBlock
) {
1623 if (isCriticalEdge(PREPred
->getTerminator(), succNum
)) {
1624 toSplit
.push_back(std::make_pair(PREPred
->getTerminator(), succNum
));
1628 // Instantiate the expression the in predecessor that lacked it.
1629 // Because we are going top-down through the block, all value numbers
1630 // will be available in the predecessor by the time we need them. Any
1631 // that weren't original present will have been instantiated earlier
1633 Instruction
* PREInstr
= CurInst
->clone(CurInst
->getContext());
1634 bool success
= true;
1635 for (unsigned i
= 0, e
= CurInst
->getNumOperands(); i
!= e
; ++i
) {
1636 Value
*Op
= PREInstr
->getOperand(i
);
1637 if (isa
<Argument
>(Op
) || isa
<Constant
>(Op
) || isa
<GlobalValue
>(Op
))
1640 if (Value
*V
= lookupNumber(PREPred
, VN
.lookup(Op
))) {
1641 PREInstr
->setOperand(i
, V
);
1648 // Fail out if we encounter an operand that is not available in
1649 // the PRE predecessor. This is typically because of loads which
1650 // are not value numbered precisely.
1653 DEBUG(verifyRemoved(PREInstr
));
1657 PREInstr
->insertBefore(PREPred
->getTerminator());
1658 PREInstr
->setName(CurInst
->getName() + ".pre");
1659 predMap
[PREPred
] = PREInstr
;
1660 VN
.add(PREInstr
, valno
);
1663 // Update the availability map to include the new instruction.
1664 localAvail
[PREPred
]->table
.insert(std::make_pair(valno
, PREInstr
));
1666 // Create a PHI to make the value available in this block.
1667 PHINode
* Phi
= PHINode::Create(CurInst
->getType(),
1668 CurInst
->getName() + ".pre-phi",
1669 CurrentBlock
->begin());
1670 for (pred_iterator PI
= pred_begin(CurrentBlock
),
1671 PE
= pred_end(CurrentBlock
); PI
!= PE
; ++PI
)
1672 Phi
->addIncoming(predMap
[*PI
], *PI
);
1675 localAvail
[CurrentBlock
]->table
[valno
] = Phi
;
1677 CurInst
->replaceAllUsesWith(Phi
);
1678 if (isa
<PointerType
>(Phi
->getType()))
1679 MD
->invalidateCachedPointerInfo(Phi
);
1682 DEBUG(errs() << "GVN PRE removed: " << *CurInst
<< '\n');
1683 MD
->removeInstruction(CurInst
);
1684 CurInst
->eraseFromParent();
1685 DEBUG(verifyRemoved(CurInst
));
1690 for (SmallVector
<std::pair
<TerminatorInst
*, unsigned>, 4>::iterator
1691 I
= toSplit
.begin(), E
= toSplit
.end(); I
!= E
; ++I
)
1692 SplitCriticalEdge(I
->first
, I
->second
, this);
1694 return Changed
|| toSplit
.size();
1697 /// iterateOnFunction - Executes one iteration of GVN
1698 bool GVN::iterateOnFunction(Function
&F
) {
1699 cleanupGlobalSets();
1701 for (df_iterator
<DomTreeNode
*> DI
= df_begin(DT
->getRootNode()),
1702 DE
= df_end(DT
->getRootNode()); DI
!= DE
; ++DI
) {
1704 localAvail
[DI
->getBlock()] =
1705 new ValueNumberScope(localAvail
[DI
->getIDom()->getBlock()]);
1707 localAvail
[DI
->getBlock()] = new ValueNumberScope(0);
1710 // Top-down walk of the dominator tree
1711 bool changed
= false;
1713 // Needed for value numbering with phi construction to work.
1714 ReversePostOrderTraversal
<Function
*> RPOT(&F
);
1715 for (ReversePostOrderTraversal
<Function
*>::rpo_iterator RI
= RPOT
.begin(),
1716 RE
= RPOT
.end(); RI
!= RE
; ++RI
)
1717 changed
|= processBlock(*RI
);
1719 for (df_iterator
<DomTreeNode
*> DI
= df_begin(DT
->getRootNode()),
1720 DE
= df_end(DT
->getRootNode()); DI
!= DE
; ++DI
)
1721 changed
|= processBlock(DI
->getBlock());
1727 void GVN::cleanupGlobalSets() {
1731 for (DenseMap
<BasicBlock
*, ValueNumberScope
*>::iterator
1732 I
= localAvail
.begin(), E
= localAvail
.end(); I
!= E
; ++I
)
1737 /// verifyRemoved - Verify that the specified instruction does not occur in our
1738 /// internal data structures.
1739 void GVN::verifyRemoved(const Instruction
*Inst
) const {
1740 VN
.verifyRemoved(Inst
);
1742 // Walk through the PHI map to make sure the instruction isn't hiding in there
1744 for (PhiMapType::iterator
1745 I
= phiMap
.begin(), E
= phiMap
.end(); I
!= E
; ++I
) {
1746 assert(I
->first
!= Inst
&& "Inst is still a key in PHI map!");
1748 for (SmallPtrSet
<Instruction
*, 4>::iterator
1749 II
= I
->second
.begin(), IE
= I
->second
.end(); II
!= IE
; ++II
) {
1750 assert(*II
!= Inst
&& "Inst is still a value in PHI map!");
1754 // Walk through the value number scope to make sure the instruction isn't
1755 // ferreted away in it.
1756 for (DenseMap
<BasicBlock
*, ValueNumberScope
*>::iterator
1757 I
= localAvail
.begin(), E
= localAvail
.end(); I
!= E
; ++I
) {
1758 const ValueNumberScope
*VNS
= I
->second
;
1761 for (DenseMap
<uint32_t, Value
*>::iterator
1762 II
= VNS
->table
.begin(), IE
= VNS
->table
.end(); II
!= IE
; ++II
) {
1763 assert(II
->second
!= Inst
&& "Inst still in value numbering scope!");