1 //===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This pass performs global value numbering to eliminate fully redundant
11 // instructions. It also performs simple dead load elimination.
13 // Note that this pass does the value numbering itself; it does not use the
14 // ValueNumbering analysis passes.
16 //===----------------------------------------------------------------------===//
18 #define DEBUG_TYPE "gvn"
19 #include "llvm/Transforms/Scalar.h"
20 #include "llvm/BasicBlock.h"
21 #include "llvm/Constants.h"
22 #include "llvm/DerivedTypes.h"
23 #include "llvm/Function.h"
24 #include "llvm/IntrinsicInst.h"
25 #include "llvm/Value.h"
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/DepthFirstIterator.h"
28 #include "llvm/ADT/PostOrderIterator.h"
29 #include "llvm/ADT/SmallPtrSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
32 #include "llvm/Analysis/Dominators.h"
33 #include "llvm/Analysis/AliasAnalysis.h"
34 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
35 #include "llvm/Support/CFG.h"
36 #include "llvm/Support/CommandLine.h"
37 #include "llvm/Support/Compiler.h"
38 #include "llvm/Support/Debug.h"
39 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
43 STATISTIC(NumGVNInstr
, "Number of instructions deleted");
44 STATISTIC(NumGVNLoad
, "Number of loads deleted");
45 STATISTIC(NumGVNPRE
, "Number of instructions PRE'd");
46 STATISTIC(NumGVNBlocks
, "Number of blocks merged");
47 STATISTIC(NumPRELoad
, "Number of loads PRE'd");
49 static cl::opt
<bool> EnablePRE("enable-pre",
50 cl::init(true), cl::Hidden
);
51 cl::opt
<bool> EnableLoadPRE("enable-load-pre"/*, cl::init(true)*/);
53 //===----------------------------------------------------------------------===//
55 //===----------------------------------------------------------------------===//
57 /// This class holds the mapping between values and value numbers. It is used
58 /// as an efficient mechanism to determine the expression-wise equivalence of
61 struct VISIBILITY_HIDDEN Expression
{
62 enum ExpressionOpcode
{ ADD
, SUB
, MUL
, UDIV
, SDIV
, FDIV
, UREM
, SREM
,
63 FREM
, SHL
, LSHR
, ASHR
, AND
, OR
, XOR
, ICMPEQ
,
64 ICMPNE
, ICMPUGT
, ICMPUGE
, ICMPULT
, ICMPULE
,
65 ICMPSGT
, ICMPSGE
, ICMPSLT
, ICMPSLE
, FCMPOEQ
,
66 FCMPOGT
, FCMPOGE
, FCMPOLT
, FCMPOLE
, FCMPONE
,
67 FCMPORD
, FCMPUNO
, FCMPUEQ
, FCMPUGT
, FCMPUGE
,
68 FCMPULT
, FCMPULE
, FCMPUNE
, EXTRACT
, INSERT
,
69 SHUFFLE
, SELECT
, TRUNC
, ZEXT
, SEXT
, FPTOUI
,
70 FPTOSI
, UITOFP
, SITOFP
, FPTRUNC
, FPEXT
,
71 PTRTOINT
, INTTOPTR
, BITCAST
, GEP
, CALL
, CONSTANT
,
74 ExpressionOpcode opcode
;
79 SmallVector
<uint32_t, 4> varargs
;
83 Expression(ExpressionOpcode o
) : opcode(o
) { }
85 bool operator==(const Expression
&other
) const {
86 if (opcode
!= other
.opcode
)
88 else if (opcode
== EMPTY
|| opcode
== TOMBSTONE
)
90 else if (type
!= other
.type
)
92 else if (function
!= other
.function
)
94 else if (firstVN
!= other
.firstVN
)
96 else if (secondVN
!= other
.secondVN
)
98 else if (thirdVN
!= other
.thirdVN
)
101 if (varargs
.size() != other
.varargs
.size())
104 for (size_t i
= 0; i
< varargs
.size(); ++i
)
105 if (varargs
[i
] != other
.varargs
[i
])
112 bool operator!=(const Expression
&other
) const {
113 return !(*this == other
);
117 class VISIBILITY_HIDDEN ValueTable
{
119 DenseMap
<Value
*, uint32_t> valueNumbering
;
120 DenseMap
<Expression
, uint32_t> expressionNumbering
;
122 MemoryDependenceAnalysis
* MD
;
125 uint32_t nextValueNumber
;
127 Expression::ExpressionOpcode
getOpcode(BinaryOperator
* BO
);
128 Expression::ExpressionOpcode
getOpcode(CmpInst
* C
);
129 Expression::ExpressionOpcode
getOpcode(CastInst
* C
);
130 Expression
create_expression(BinaryOperator
* BO
);
131 Expression
create_expression(CmpInst
* C
);
132 Expression
create_expression(ShuffleVectorInst
* V
);
133 Expression
create_expression(ExtractElementInst
* C
);
134 Expression
create_expression(InsertElementInst
* V
);
135 Expression
create_expression(SelectInst
* V
);
136 Expression
create_expression(CastInst
* C
);
137 Expression
create_expression(GetElementPtrInst
* G
);
138 Expression
create_expression(CallInst
* C
);
139 Expression
create_expression(Constant
* C
);
141 ValueTable() : nextValueNumber(1) { }
142 uint32_t lookup_or_add(Value
* V
);
143 uint32_t lookup(Value
* V
) const;
144 void add(Value
* V
, uint32_t num
);
146 void erase(Value
* v
);
148 void setAliasAnalysis(AliasAnalysis
* A
) { AA
= A
; }
149 AliasAnalysis
*getAliasAnalysis() const { return AA
; }
150 void setMemDep(MemoryDependenceAnalysis
* M
) { MD
= M
; }
151 void setDomTree(DominatorTree
* D
) { DT
= D
; }
152 uint32_t getNextUnusedValueNumber() { return nextValueNumber
; }
153 void verifyRemoved(const Value
*) const;
158 template <> struct DenseMapInfo
<Expression
> {
159 static inline Expression
getEmptyKey() {
160 return Expression(Expression::EMPTY
);
163 static inline Expression
getTombstoneKey() {
164 return Expression(Expression::TOMBSTONE
);
167 static unsigned getHashValue(const Expression e
) {
168 unsigned hash
= e
.opcode
;
170 hash
= e
.firstVN
+ hash
* 37;
171 hash
= e
.secondVN
+ hash
* 37;
172 hash
= e
.thirdVN
+ hash
* 37;
174 hash
= ((unsigned)((uintptr_t)e
.type
>> 4) ^
175 (unsigned)((uintptr_t)e
.type
>> 9)) +
178 for (SmallVector
<uint32_t, 4>::const_iterator I
= e
.varargs
.begin(),
179 E
= e
.varargs
.end(); I
!= E
; ++I
)
180 hash
= *I
+ hash
* 37;
182 hash
= ((unsigned)((uintptr_t)e
.function
>> 4) ^
183 (unsigned)((uintptr_t)e
.function
>> 9)) +
188 static bool isEqual(const Expression
&LHS
, const Expression
&RHS
) {
191 static bool isPod() { return true; }
195 //===----------------------------------------------------------------------===//
196 // ValueTable Internal Functions
197 //===----------------------------------------------------------------------===//
198 Expression::ExpressionOpcode
ValueTable::getOpcode(BinaryOperator
* BO
) {
199 switch(BO
->getOpcode()) {
200 default: // THIS SHOULD NEVER HAPPEN
201 assert(0 && "Binary operator with unknown opcode?");
202 case Instruction::Add
: return Expression::ADD
;
203 case Instruction::Sub
: return Expression::SUB
;
204 case Instruction::Mul
: return Expression::MUL
;
205 case Instruction::UDiv
: return Expression::UDIV
;
206 case Instruction::SDiv
: return Expression::SDIV
;
207 case Instruction::FDiv
: return Expression::FDIV
;
208 case Instruction::URem
: return Expression::UREM
;
209 case Instruction::SRem
: return Expression::SREM
;
210 case Instruction::FRem
: return Expression::FREM
;
211 case Instruction::Shl
: return Expression::SHL
;
212 case Instruction::LShr
: return Expression::LSHR
;
213 case Instruction::AShr
: return Expression::ASHR
;
214 case Instruction::And
: return Expression::AND
;
215 case Instruction::Or
: return Expression::OR
;
216 case Instruction::Xor
: return Expression::XOR
;
220 Expression::ExpressionOpcode
ValueTable::getOpcode(CmpInst
* C
) {
221 if (isa
<ICmpInst
>(C
) || isa
<VICmpInst
>(C
)) {
222 switch (C
->getPredicate()) {
223 default: // THIS SHOULD NEVER HAPPEN
224 assert(0 && "Comparison with unknown predicate?");
225 case ICmpInst::ICMP_EQ
: return Expression::ICMPEQ
;
226 case ICmpInst::ICMP_NE
: return Expression::ICMPNE
;
227 case ICmpInst::ICMP_UGT
: return Expression::ICMPUGT
;
228 case ICmpInst::ICMP_UGE
: return Expression::ICMPUGE
;
229 case ICmpInst::ICMP_ULT
: return Expression::ICMPULT
;
230 case ICmpInst::ICMP_ULE
: return Expression::ICMPULE
;
231 case ICmpInst::ICMP_SGT
: return Expression::ICMPSGT
;
232 case ICmpInst::ICMP_SGE
: return Expression::ICMPSGE
;
233 case ICmpInst::ICMP_SLT
: return Expression::ICMPSLT
;
234 case ICmpInst::ICMP_SLE
: return Expression::ICMPSLE
;
237 assert((isa
<FCmpInst
>(C
) || isa
<VFCmpInst
>(C
)) && "Unknown compare");
238 switch (C
->getPredicate()) {
239 default: // THIS SHOULD NEVER HAPPEN
240 assert(0 && "Comparison with unknown predicate?");
241 case FCmpInst::FCMP_OEQ
: return Expression::FCMPOEQ
;
242 case FCmpInst::FCMP_OGT
: return Expression::FCMPOGT
;
243 case FCmpInst::FCMP_OGE
: return Expression::FCMPOGE
;
244 case FCmpInst::FCMP_OLT
: return Expression::FCMPOLT
;
245 case FCmpInst::FCMP_OLE
: return Expression::FCMPOLE
;
246 case FCmpInst::FCMP_ONE
: return Expression::FCMPONE
;
247 case FCmpInst::FCMP_ORD
: return Expression::FCMPORD
;
248 case FCmpInst::FCMP_UNO
: return Expression::FCMPUNO
;
249 case FCmpInst::FCMP_UEQ
: return Expression::FCMPUEQ
;
250 case FCmpInst::FCMP_UGT
: return Expression::FCMPUGT
;
251 case FCmpInst::FCMP_UGE
: return Expression::FCMPUGE
;
252 case FCmpInst::FCMP_ULT
: return Expression::FCMPULT
;
253 case FCmpInst::FCMP_ULE
: return Expression::FCMPULE
;
254 case FCmpInst::FCMP_UNE
: return Expression::FCMPUNE
;
258 Expression::ExpressionOpcode
ValueTable::getOpcode(CastInst
* C
) {
259 switch(C
->getOpcode()) {
260 default: // THIS SHOULD NEVER HAPPEN
261 assert(0 && "Cast operator with unknown opcode?");
262 case Instruction::Trunc
: return Expression::TRUNC
;
263 case Instruction::ZExt
: return Expression::ZEXT
;
264 case Instruction::SExt
: return Expression::SEXT
;
265 case Instruction::FPToUI
: return Expression::FPTOUI
;
266 case Instruction::FPToSI
: return Expression::FPTOSI
;
267 case Instruction::UIToFP
: return Expression::UITOFP
;
268 case Instruction::SIToFP
: return Expression::SITOFP
;
269 case Instruction::FPTrunc
: return Expression::FPTRUNC
;
270 case Instruction::FPExt
: return Expression::FPEXT
;
271 case Instruction::PtrToInt
: return Expression::PTRTOINT
;
272 case Instruction::IntToPtr
: return Expression::INTTOPTR
;
273 case Instruction::BitCast
: return Expression::BITCAST
;
277 Expression
ValueTable::create_expression(CallInst
* C
) {
280 e
.type
= C
->getType();
284 e
.function
= C
->getCalledFunction();
285 e
.opcode
= Expression::CALL
;
287 for (CallInst::op_iterator I
= C
->op_begin()+1, E
= C
->op_end();
289 e
.varargs
.push_back(lookup_or_add(*I
));
294 Expression
ValueTable::create_expression(BinaryOperator
* BO
) {
297 e
.firstVN
= lookup_or_add(BO
->getOperand(0));
298 e
.secondVN
= lookup_or_add(BO
->getOperand(1));
301 e
.type
= BO
->getType();
302 e
.opcode
= getOpcode(BO
);
307 Expression
ValueTable::create_expression(CmpInst
* C
) {
310 e
.firstVN
= lookup_or_add(C
->getOperand(0));
311 e
.secondVN
= lookup_or_add(C
->getOperand(1));
314 e
.type
= C
->getType();
315 e
.opcode
= getOpcode(C
);
320 Expression
ValueTable::create_expression(CastInst
* C
) {
323 e
.firstVN
= lookup_or_add(C
->getOperand(0));
327 e
.type
= C
->getType();
328 e
.opcode
= getOpcode(C
);
333 Expression
ValueTable::create_expression(ShuffleVectorInst
* S
) {
336 e
.firstVN
= lookup_or_add(S
->getOperand(0));
337 e
.secondVN
= lookup_or_add(S
->getOperand(1));
338 e
.thirdVN
= lookup_or_add(S
->getOperand(2));
340 e
.type
= S
->getType();
341 e
.opcode
= Expression::SHUFFLE
;
346 Expression
ValueTable::create_expression(ExtractElementInst
* E
) {
349 e
.firstVN
= lookup_or_add(E
->getOperand(0));
350 e
.secondVN
= lookup_or_add(E
->getOperand(1));
353 e
.type
= E
->getType();
354 e
.opcode
= Expression::EXTRACT
;
359 Expression
ValueTable::create_expression(InsertElementInst
* I
) {
362 e
.firstVN
= lookup_or_add(I
->getOperand(0));
363 e
.secondVN
= lookup_or_add(I
->getOperand(1));
364 e
.thirdVN
= lookup_or_add(I
->getOperand(2));
366 e
.type
= I
->getType();
367 e
.opcode
= Expression::INSERT
;
372 Expression
ValueTable::create_expression(SelectInst
* I
) {
375 e
.firstVN
= lookup_or_add(I
->getCondition());
376 e
.secondVN
= lookup_or_add(I
->getTrueValue());
377 e
.thirdVN
= lookup_or_add(I
->getFalseValue());
379 e
.type
= I
->getType();
380 e
.opcode
= Expression::SELECT
;
385 Expression
ValueTable::create_expression(GetElementPtrInst
* G
) {
388 e
.firstVN
= lookup_or_add(G
->getPointerOperand());
392 e
.type
= G
->getType();
393 e
.opcode
= Expression::GEP
;
395 for (GetElementPtrInst::op_iterator I
= G
->idx_begin(), E
= G
->idx_end();
397 e
.varargs
.push_back(lookup_or_add(*I
));
402 //===----------------------------------------------------------------------===//
403 // ValueTable External Functions
404 //===----------------------------------------------------------------------===//
406 /// add - Insert a value into the table with a specified value number.
407 void ValueTable::add(Value
* V
, uint32_t num
) {
408 valueNumbering
.insert(std::make_pair(V
, num
));
411 /// lookup_or_add - Returns the value number for the specified value, assigning
412 /// it a new number if it did not have one before.
413 uint32_t ValueTable::lookup_or_add(Value
* V
) {
414 DenseMap
<Value
*, uint32_t>::iterator VI
= valueNumbering
.find(V
);
415 if (VI
!= valueNumbering
.end())
418 if (CallInst
* C
= dyn_cast
<CallInst
>(V
)) {
419 if (AA
->doesNotAccessMemory(C
)) {
420 Expression e
= create_expression(C
);
422 DenseMap
<Expression
, uint32_t>::iterator EI
= expressionNumbering
.find(e
);
423 if (EI
!= expressionNumbering
.end()) {
424 valueNumbering
.insert(std::make_pair(V
, EI
->second
));
427 expressionNumbering
.insert(std::make_pair(e
, nextValueNumber
));
428 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
430 return nextValueNumber
++;
432 } else if (AA
->onlyReadsMemory(C
)) {
433 Expression e
= create_expression(C
);
435 if (expressionNumbering
.find(e
) == expressionNumbering
.end()) {
436 expressionNumbering
.insert(std::make_pair(e
, nextValueNumber
));
437 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
438 return nextValueNumber
++;
441 MemDepResult local_dep
= MD
->getDependency(C
);
443 if (!local_dep
.isDef() && !local_dep
.isNonLocal()) {
444 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
445 return nextValueNumber
++;
448 if (local_dep
.isDef()) {
449 CallInst
* local_cdep
= cast
<CallInst
>(local_dep
.getInst());
451 if (local_cdep
->getNumOperands() != C
->getNumOperands()) {
452 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
453 return nextValueNumber
++;
456 for (unsigned i
= 1; i
< C
->getNumOperands(); ++i
) {
457 uint32_t c_vn
= lookup_or_add(C
->getOperand(i
));
458 uint32_t cd_vn
= lookup_or_add(local_cdep
->getOperand(i
));
460 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
461 return nextValueNumber
++;
465 uint32_t v
= lookup_or_add(local_cdep
);
466 valueNumbering
.insert(std::make_pair(V
, v
));
471 const MemoryDependenceAnalysis::NonLocalDepInfo
&deps
=
472 MD
->getNonLocalCallDependency(CallSite(C
));
473 // FIXME: call/call dependencies for readonly calls should return def, not
474 // clobber! Move the checking logic to MemDep!
477 // Check to see if we have a single dominating call instruction that is
479 for (unsigned i
= 0, e
= deps
.size(); i
!= e
; ++i
) {
480 const MemoryDependenceAnalysis::NonLocalDepEntry
*I
= &deps
[i
];
481 // Ignore non-local dependencies.
482 if (I
->second
.isNonLocal())
485 // We don't handle non-depedencies. If we already have a call, reject
486 // instruction dependencies.
487 if (I
->second
.isClobber() || cdep
!= 0) {
492 CallInst
*NonLocalDepCall
= dyn_cast
<CallInst
>(I
->second
.getInst());
493 // FIXME: All duplicated with non-local case.
494 if (NonLocalDepCall
&& DT
->properlyDominates(I
->first
, C
->getParent())){
495 cdep
= NonLocalDepCall
;
504 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
505 return nextValueNumber
++;
508 if (cdep
->getNumOperands() != C
->getNumOperands()) {
509 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
510 return nextValueNumber
++;
512 for (unsigned i
= 1; i
< C
->getNumOperands(); ++i
) {
513 uint32_t c_vn
= lookup_or_add(C
->getOperand(i
));
514 uint32_t cd_vn
= lookup_or_add(cdep
->getOperand(i
));
516 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
517 return nextValueNumber
++;
521 uint32_t v
= lookup_or_add(cdep
);
522 valueNumbering
.insert(std::make_pair(V
, v
));
526 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
527 return nextValueNumber
++;
529 } else if (BinaryOperator
* BO
= dyn_cast
<BinaryOperator
>(V
)) {
530 Expression e
= create_expression(BO
);
532 DenseMap
<Expression
, uint32_t>::iterator EI
= expressionNumbering
.find(e
);
533 if (EI
!= expressionNumbering
.end()) {
534 valueNumbering
.insert(std::make_pair(V
, EI
->second
));
537 expressionNumbering
.insert(std::make_pair(e
, nextValueNumber
));
538 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
540 return nextValueNumber
++;
542 } else if (CmpInst
* C
= dyn_cast
<CmpInst
>(V
)) {
543 Expression e
= create_expression(C
);
545 DenseMap
<Expression
, uint32_t>::iterator EI
= expressionNumbering
.find(e
);
546 if (EI
!= expressionNumbering
.end()) {
547 valueNumbering
.insert(std::make_pair(V
, EI
->second
));
550 expressionNumbering
.insert(std::make_pair(e
, nextValueNumber
));
551 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
553 return nextValueNumber
++;
555 } else if (ShuffleVectorInst
* U
= dyn_cast
<ShuffleVectorInst
>(V
)) {
556 Expression e
= create_expression(U
);
558 DenseMap
<Expression
, uint32_t>::iterator EI
= expressionNumbering
.find(e
);
559 if (EI
!= expressionNumbering
.end()) {
560 valueNumbering
.insert(std::make_pair(V
, EI
->second
));
563 expressionNumbering
.insert(std::make_pair(e
, nextValueNumber
));
564 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
566 return nextValueNumber
++;
568 } else if (ExtractElementInst
* U
= dyn_cast
<ExtractElementInst
>(V
)) {
569 Expression e
= create_expression(U
);
571 DenseMap
<Expression
, uint32_t>::iterator EI
= expressionNumbering
.find(e
);
572 if (EI
!= expressionNumbering
.end()) {
573 valueNumbering
.insert(std::make_pair(V
, EI
->second
));
576 expressionNumbering
.insert(std::make_pair(e
, nextValueNumber
));
577 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
579 return nextValueNumber
++;
581 } else if (InsertElementInst
* U
= dyn_cast
<InsertElementInst
>(V
)) {
582 Expression e
= create_expression(U
);
584 DenseMap
<Expression
, uint32_t>::iterator EI
= expressionNumbering
.find(e
);
585 if (EI
!= expressionNumbering
.end()) {
586 valueNumbering
.insert(std::make_pair(V
, EI
->second
));
589 expressionNumbering
.insert(std::make_pair(e
, nextValueNumber
));
590 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
592 return nextValueNumber
++;
594 } else if (SelectInst
* U
= dyn_cast
<SelectInst
>(V
)) {
595 Expression e
= create_expression(U
);
597 DenseMap
<Expression
, uint32_t>::iterator EI
= expressionNumbering
.find(e
);
598 if (EI
!= expressionNumbering
.end()) {
599 valueNumbering
.insert(std::make_pair(V
, EI
->second
));
602 expressionNumbering
.insert(std::make_pair(e
, nextValueNumber
));
603 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
605 return nextValueNumber
++;
607 } else if (CastInst
* U
= dyn_cast
<CastInst
>(V
)) {
608 Expression e
= create_expression(U
);
610 DenseMap
<Expression
, uint32_t>::iterator EI
= expressionNumbering
.find(e
);
611 if (EI
!= expressionNumbering
.end()) {
612 valueNumbering
.insert(std::make_pair(V
, EI
->second
));
615 expressionNumbering
.insert(std::make_pair(e
, nextValueNumber
));
616 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
618 return nextValueNumber
++;
620 } else if (GetElementPtrInst
* U
= dyn_cast
<GetElementPtrInst
>(V
)) {
621 Expression e
= create_expression(U
);
623 DenseMap
<Expression
, uint32_t>::iterator EI
= expressionNumbering
.find(e
);
624 if (EI
!= expressionNumbering
.end()) {
625 valueNumbering
.insert(std::make_pair(V
, EI
->second
));
628 expressionNumbering
.insert(std::make_pair(e
, nextValueNumber
));
629 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
631 return nextValueNumber
++;
634 valueNumbering
.insert(std::make_pair(V
, nextValueNumber
));
635 return nextValueNumber
++;
639 /// lookup - Returns the value number of the specified value. Fails if
640 /// the value has not yet been numbered.
641 uint32_t ValueTable::lookup(Value
* V
) const {
642 DenseMap
<Value
*, uint32_t>::iterator VI
= valueNumbering
.find(V
);
643 assert(VI
!= valueNumbering
.end() && "Value not numbered?");
647 /// clear - Remove all entries from the ValueTable
648 void ValueTable::clear() {
649 valueNumbering
.clear();
650 expressionNumbering
.clear();
654 /// erase - Remove a value from the value numbering
655 void ValueTable::erase(Value
* V
) {
656 valueNumbering
.erase(V
);
659 /// verifyRemoved - Verify that the value is removed from all internal data
661 void ValueTable::verifyRemoved(const Value
*V
) const {
662 for (DenseMap
<Value
*, uint32_t>::iterator
663 I
= valueNumbering
.begin(), E
= valueNumbering
.end(); I
!= E
; ++I
) {
664 assert(I
->first
!= V
&& "Inst still occurs in value numbering map!");
668 //===----------------------------------------------------------------------===//
670 //===----------------------------------------------------------------------===//
673 struct VISIBILITY_HIDDEN ValueNumberScope
{
674 ValueNumberScope
* parent
;
675 DenseMap
<uint32_t, Value
*> table
;
677 ValueNumberScope(ValueNumberScope
* p
) : parent(p
) { }
683 class VISIBILITY_HIDDEN GVN
: public FunctionPass
{
684 bool runOnFunction(Function
&F
);
686 static char ID
; // Pass identification, replacement for typeid
687 GVN() : FunctionPass(&ID
) { }
690 MemoryDependenceAnalysis
*MD
;
694 DenseMap
<BasicBlock
*, ValueNumberScope
*> localAvail
;
696 typedef DenseMap
<Value
*, SmallPtrSet
<Instruction
*, 4> > PhiMapType
;
700 // This transformation requires dominator postdominator info
701 virtual void getAnalysisUsage(AnalysisUsage
&AU
) const {
702 AU
.addRequired
<DominatorTree
>();
703 AU
.addRequired
<MemoryDependenceAnalysis
>();
704 AU
.addRequired
<AliasAnalysis
>();
706 AU
.addPreserved
<DominatorTree
>();
707 AU
.addPreserved
<AliasAnalysis
>();
711 // FIXME: eliminate or document these better
712 bool processLoad(LoadInst
* L
,
713 SmallVectorImpl
<Instruction
*> &toErase
);
714 bool processInstruction(Instruction
* I
,
715 SmallVectorImpl
<Instruction
*> &toErase
);
716 bool processNonLocalLoad(LoadInst
* L
,
717 SmallVectorImpl
<Instruction
*> &toErase
);
718 bool processBlock(BasicBlock
* BB
);
719 Value
*GetValueForBlock(BasicBlock
*BB
, Instruction
* orig
,
720 DenseMap
<BasicBlock
*, Value
*> &Phis
,
721 bool top_level
= false);
722 void dump(DenseMap
<uint32_t, Value
*>& d
);
723 bool iterateOnFunction(Function
&F
);
724 Value
* CollapsePhi(PHINode
* p
);
725 bool isSafeReplacement(PHINode
* p
, Instruction
* inst
);
726 bool performPRE(Function
& F
);
727 Value
* lookupNumber(BasicBlock
* BB
, uint32_t num
);
728 bool mergeBlockIntoPredecessor(BasicBlock
* BB
);
729 Value
* AttemptRedundancyElimination(Instruction
* orig
, unsigned valno
);
730 void cleanupGlobalSets();
731 void verifyRemoved(const Instruction
*I
) const;
737 // createGVNPass - The public interface to this file...
738 FunctionPass
*llvm::createGVNPass() { return new GVN(); }
740 static RegisterPass
<GVN
> X("gvn",
741 "Global Value Numbering");
743 void GVN::dump(DenseMap
<uint32_t, Value
*>& d
) {
745 for (DenseMap
<uint32_t, Value
*>::iterator I
= d
.begin(),
746 E
= d
.end(); I
!= E
; ++I
) {
747 printf("%d\n", I
->first
);
753 Value
* GVN::CollapsePhi(PHINode
* p
) {
754 Value
* constVal
= p
->hasConstantValue();
755 if (!constVal
) return 0;
757 Instruction
* inst
= dyn_cast
<Instruction
>(constVal
);
761 if (DT
->dominates(inst
, p
))
762 if (isSafeReplacement(p
, inst
))
767 bool GVN::isSafeReplacement(PHINode
* p
, Instruction
* inst
) {
768 if (!isa
<PHINode
>(inst
))
771 for (Instruction::use_iterator UI
= p
->use_begin(), E
= p
->use_end();
773 if (PHINode
* use_phi
= dyn_cast
<PHINode
>(UI
))
774 if (use_phi
->getParent() == inst
->getParent())
780 /// GetValueForBlock - Get the value to use within the specified basic block.
781 /// available values are in Phis.
782 Value
*GVN::GetValueForBlock(BasicBlock
*BB
, Instruction
* orig
,
783 DenseMap
<BasicBlock
*, Value
*> &Phis
,
786 // If we have already computed this value, return the previously computed val.
787 DenseMap
<BasicBlock
*, Value
*>::iterator V
= Phis
.find(BB
);
788 if (V
!= Phis
.end() && !top_level
) return V
->second
;
790 // If the block is unreachable, just return undef, since this path
791 // can't actually occur at runtime.
792 if (!DT
->isReachableFromEntry(BB
))
793 return Phis
[BB
] = UndefValue::get(orig
->getType());
795 if (BasicBlock
*Pred
= BB
->getSinglePredecessor()) {
796 Value
*ret
= GetValueForBlock(Pred
, orig
, Phis
);
801 // Get the number of predecessors of this block so we can reserve space later.
802 // If there is already a PHI in it, use the #preds from it, otherwise count.
803 // Getting it from the PHI is constant time.
805 if (PHINode
*ExistingPN
= dyn_cast
<PHINode
>(BB
->begin()))
806 NumPreds
= ExistingPN
->getNumIncomingValues();
808 NumPreds
= std::distance(pred_begin(BB
), pred_end(BB
));
810 // Otherwise, the idom is the loop, so we need to insert a PHI node. Do so
811 // now, then get values to fill in the incoming values for the PHI.
812 PHINode
*PN
= PHINode::Create(orig
->getType(), orig
->getName()+".rle",
814 PN
->reserveOperandSpace(NumPreds
);
816 Phis
.insert(std::make_pair(BB
, PN
));
818 // Fill in the incoming values for the block.
819 for (pred_iterator PI
= pred_begin(BB
), E
= pred_end(BB
); PI
!= E
; ++PI
) {
820 Value
* val
= GetValueForBlock(*PI
, orig
, Phis
);
821 PN
->addIncoming(val
, *PI
);
824 VN
.getAliasAnalysis()->copyValue(orig
, PN
);
826 // Attempt to collapse PHI nodes that are trivially redundant
827 Value
* v
= CollapsePhi(PN
);
829 // Cache our phi construction results
830 if (LoadInst
* L
= dyn_cast
<LoadInst
>(orig
))
831 phiMap
[L
->getPointerOperand()].insert(PN
);
833 phiMap
[orig
].insert(PN
);
838 PN
->replaceAllUsesWith(v
);
839 if (isa
<PointerType
>(v
->getType()))
840 MD
->invalidateCachedPointerInfo(v
);
842 for (DenseMap
<BasicBlock
*, Value
*>::iterator I
= Phis
.begin(),
843 E
= Phis
.end(); I
!= E
; ++I
)
847 DEBUG(cerr
<< "GVN removed: " << *PN
);
848 MD
->removeInstruction(PN
);
849 PN
->eraseFromParent();
850 DEBUG(verifyRemoved(PN
));
856 /// IsValueFullyAvailableInBlock - Return true if we can prove that the value
857 /// we're analyzing is fully available in the specified block. As we go, keep
858 /// track of which blocks we know are fully alive in FullyAvailableBlocks. This
859 /// map is actually a tri-state map with the following values:
860 /// 0) we know the block *is not* fully available.
861 /// 1) we know the block *is* fully available.
862 /// 2) we do not know whether the block is fully available or not, but we are
863 /// currently speculating that it will be.
864 /// 3) we are speculating for this block and have used that to speculate for
866 static bool IsValueFullyAvailableInBlock(BasicBlock
*BB
,
867 DenseMap
<BasicBlock
*, char> &FullyAvailableBlocks
) {
868 // Optimistically assume that the block is fully available and check to see
869 // if we already know about this block in one lookup.
870 std::pair
<DenseMap
<BasicBlock
*, char>::iterator
, char> IV
=
871 FullyAvailableBlocks
.insert(std::make_pair(BB
, 2));
873 // If the entry already existed for this block, return the precomputed value.
875 // If this is a speculative "available" value, mark it as being used for
876 // speculation of other blocks.
877 if (IV
.first
->second
== 2)
878 IV
.first
->second
= 3;
879 return IV
.first
->second
!= 0;
882 // Otherwise, see if it is fully available in all predecessors.
883 pred_iterator PI
= pred_begin(BB
), PE
= pred_end(BB
);
885 // If this block has no predecessors, it isn't live-in here.
887 goto SpeculationFailure
;
889 for (; PI
!= PE
; ++PI
)
890 // If the value isn't fully available in one of our predecessors, then it
891 // isn't fully available in this block either. Undo our previous
892 // optimistic assumption and bail out.
893 if (!IsValueFullyAvailableInBlock(*PI
, FullyAvailableBlocks
))
894 goto SpeculationFailure
;
898 // SpeculationFailure - If we get here, we found out that this is not, after
899 // all, a fully-available block. We have a problem if we speculated on this and
900 // used the speculation to mark other blocks as available.
902 char &BBVal
= FullyAvailableBlocks
[BB
];
904 // If we didn't speculate on this, just return with it set to false.
910 // If we did speculate on this value, we could have blocks set to 1 that are
911 // incorrect. Walk the (transitive) successors of this block and mark them as
913 SmallVector
<BasicBlock
*, 32> BBWorklist
;
914 BBWorklist
.push_back(BB
);
916 while (!BBWorklist
.empty()) {
917 BasicBlock
*Entry
= BBWorklist
.pop_back_val();
918 // Note that this sets blocks to 0 (unavailable) if they happen to not
919 // already be in FullyAvailableBlocks. This is safe.
920 char &EntryVal
= FullyAvailableBlocks
[Entry
];
921 if (EntryVal
== 0) continue; // Already unavailable.
923 // Mark as unavailable.
926 for (succ_iterator I
= succ_begin(Entry
), E
= succ_end(Entry
); I
!= E
; ++I
)
927 BBWorklist
.push_back(*I
);
933 /// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
934 /// non-local by performing PHI construction.
935 bool GVN::processNonLocalLoad(LoadInst
*LI
,
936 SmallVectorImpl
<Instruction
*> &toErase
) {
937 // Find the non-local dependencies of the load.
938 SmallVector
<MemoryDependenceAnalysis::NonLocalDepEntry
, 64> Deps
;
939 MD
->getNonLocalPointerDependency(LI
->getOperand(0), true, LI
->getParent(),
941 //DEBUG(cerr << "INVESTIGATING NONLOCAL LOAD: " << Deps.size() << *LI);
943 // If we had to process more than one hundred blocks to find the
944 // dependencies, this load isn't worth worrying about. Optimizing
945 // it will be too expensive.
946 if (Deps
.size() > 100)
949 // If we had a phi translation failure, we'll have a single entry which is a
950 // clobber in the current block. Reject this early.
951 if (Deps
.size() == 1 && Deps
[0].second
.isClobber())
954 // Filter out useless results (non-locals, etc). Keep track of the blocks
955 // where we have a value available in repl, also keep track of whether we see
956 // dependencies that produce an unknown value for the load (such as a call
957 // that could potentially clobber the load).
958 SmallVector
<std::pair
<BasicBlock
*, Value
*>, 16> ValuesPerBlock
;
959 SmallVector
<BasicBlock
*, 16> UnavailableBlocks
;
961 for (unsigned i
= 0, e
= Deps
.size(); i
!= e
; ++i
) {
962 BasicBlock
*DepBB
= Deps
[i
].first
;
963 MemDepResult DepInfo
= Deps
[i
].second
;
965 if (DepInfo
.isClobber()) {
966 UnavailableBlocks
.push_back(DepBB
);
970 Instruction
*DepInst
= DepInfo
.getInst();
972 // Loading the allocation -> undef.
973 if (isa
<AllocationInst
>(DepInst
)) {
974 ValuesPerBlock
.push_back(std::make_pair(DepBB
,
975 UndefValue::get(LI
->getType())));
979 if (StoreInst
* S
= dyn_cast
<StoreInst
>(DepInst
)) {
980 // Reject loads and stores that are to the same address but are of
982 // NOTE: 403.gcc does have this case (e.g. in readonly_fields_p) because
983 // of bitfield access, it would be interesting to optimize for it at some
985 if (S
->getOperand(0)->getType() != LI
->getType()) {
986 UnavailableBlocks
.push_back(DepBB
);
990 ValuesPerBlock
.push_back(std::make_pair(DepBB
, S
->getOperand(0)));
992 } else if (LoadInst
* LD
= dyn_cast
<LoadInst
>(DepInst
)) {
993 if (LD
->getType() != LI
->getType()) {
994 UnavailableBlocks
.push_back(DepBB
);
997 ValuesPerBlock
.push_back(std::make_pair(DepBB
, LD
));
999 UnavailableBlocks
.push_back(DepBB
);
1004 // If we have no predecessors that produce a known value for this load, exit
1006 if (ValuesPerBlock
.empty()) return false;
1008 // If all of the instructions we depend on produce a known value for this
1009 // load, then it is fully redundant and we can use PHI insertion to compute
1010 // its value. Insert PHIs and remove the fully redundant value now.
1011 if (UnavailableBlocks
.empty()) {
1012 // Use cached PHI construction information from previous runs
1013 SmallPtrSet
<Instruction
*, 4> &p
= phiMap
[LI
->getPointerOperand()];
1014 // FIXME: What does phiMap do? Are we positive it isn't getting invalidated?
1015 for (SmallPtrSet
<Instruction
*, 4>::iterator I
= p
.begin(), E
= p
.end();
1017 if ((*I
)->getParent() == LI
->getParent()) {
1018 DEBUG(cerr
<< "GVN REMOVING NONLOCAL LOAD #1: " << *LI
);
1019 LI
->replaceAllUsesWith(*I
);
1020 if (isa
<PointerType
>((*I
)->getType()))
1021 MD
->invalidateCachedPointerInfo(*I
);
1022 toErase
.push_back(LI
);
1027 ValuesPerBlock
.push_back(std::make_pair((*I
)->getParent(), *I
));
1030 DEBUG(cerr
<< "GVN REMOVING NONLOCAL LOAD: " << *LI
);
1032 DenseMap
<BasicBlock
*, Value
*> BlockReplValues
;
1033 BlockReplValues
.insert(ValuesPerBlock
.begin(), ValuesPerBlock
.end());
1034 // Perform PHI construction.
1035 Value
* v
= GetValueForBlock(LI
->getParent(), LI
, BlockReplValues
, true);
1036 LI
->replaceAllUsesWith(v
);
1038 if (isa
<PHINode
>(v
))
1040 if (isa
<PointerType
>(v
->getType()))
1041 MD
->invalidateCachedPointerInfo(v
);
1042 toErase
.push_back(LI
);
1047 if (!EnablePRE
|| !EnableLoadPRE
)
1050 // Okay, we have *some* definitions of the value. This means that the value
1051 // is available in some of our (transitive) predecessors. Lets think about
1052 // doing PRE of this load. This will involve inserting a new load into the
1053 // predecessor when it's not available. We could do this in general, but
1054 // prefer to not increase code size. As such, we only do this when we know
1055 // that we only have to insert *one* load (which means we're basically moving
1056 // the load, not inserting a new one).
1058 // Everything we do here is based on local predecessors of LI's block. If it
1059 // only has one predecessor, bail now.
1060 BasicBlock
*LoadBB
= LI
->getParent();
1061 if (LoadBB
->getSinglePredecessor())
1064 // If we have a repl set with LI itself in it, this means we have a loop where
1065 // at least one of the values is LI. Since this means that we won't be able
1066 // to eliminate LI even if we insert uses in the other predecessors, we will
1067 // end up increasing code size. Reject this by scanning for LI.
1068 for (unsigned i
= 0, e
= ValuesPerBlock
.size(); i
!= e
; ++i
)
1069 if (ValuesPerBlock
[i
].second
== LI
)
1072 // Okay, we have some hope :). Check to see if the loaded value is fully
1073 // available in all but one predecessor.
1074 // FIXME: If we could restructure the CFG, we could make a common pred with
1075 // all the preds that don't have an available LI and insert a new load into
1077 BasicBlock
*UnavailablePred
= 0;
1079 DenseMap
<BasicBlock
*, char> FullyAvailableBlocks
;
1080 for (unsigned i
= 0, e
= ValuesPerBlock
.size(); i
!= e
; ++i
)
1081 FullyAvailableBlocks
[ValuesPerBlock
[i
].first
] = true;
1082 for (unsigned i
= 0, e
= UnavailableBlocks
.size(); i
!= e
; ++i
)
1083 FullyAvailableBlocks
[UnavailableBlocks
[i
]] = false;
1085 for (pred_iterator PI
= pred_begin(LoadBB
), E
= pred_end(LoadBB
);
1087 if (IsValueFullyAvailableInBlock(*PI
, FullyAvailableBlocks
))
1090 // If this load is not available in multiple predecessors, reject it.
1091 if (UnavailablePred
&& UnavailablePred
!= *PI
)
1093 UnavailablePred
= *PI
;
1096 assert(UnavailablePred
!= 0 &&
1097 "Fully available value should be eliminated above!");
1099 // If the loaded pointer is PHI node defined in this block, do PHI translation
1100 // to get its value in the predecessor.
1101 Value
*LoadPtr
= LI
->getOperand(0)->DoPHITranslation(LoadBB
, UnavailablePred
);
1103 // Make sure the value is live in the predecessor. If it was defined by a
1104 // non-PHI instruction in this block, we don't know how to recompute it above.
1105 if (Instruction
*LPInst
= dyn_cast
<Instruction
>(LoadPtr
))
1106 if (!DT
->dominates(LPInst
->getParent(), UnavailablePred
)) {
1107 DEBUG(cerr
<< "COULDN'T PRE LOAD BECAUSE PTR IS UNAVAILABLE IN PRED: "
1108 << *LPInst
<< *LI
<< "\n");
1112 // We don't currently handle critical edges :(
1113 if (UnavailablePred
->getTerminator()->getNumSuccessors() != 1) {
1114 DEBUG(cerr
<< "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '"
1115 << UnavailablePred
->getName() << "': " << *LI
);
1119 // Okay, we can eliminate this load by inserting a reload in the predecessor
1120 // and using PHI construction to get the value in the other predecessors, do
1122 DEBUG(cerr
<< "GVN REMOVING PRE LOAD: " << *LI
);
1124 Value
*NewLoad
= new LoadInst(LoadPtr
, LI
->getName()+".pre", false,
1126 UnavailablePred
->getTerminator());
1128 DenseMap
<BasicBlock
*, Value
*> BlockReplValues
;
1129 BlockReplValues
.insert(ValuesPerBlock
.begin(), ValuesPerBlock
.end());
1130 BlockReplValues
[UnavailablePred
] = NewLoad
;
1132 // Perform PHI construction.
1133 Value
* v
= GetValueForBlock(LI
->getParent(), LI
, BlockReplValues
, true);
1134 LI
->replaceAllUsesWith(v
);
1135 if (isa
<PHINode
>(v
))
1137 if (isa
<PointerType
>(v
->getType()))
1138 MD
->invalidateCachedPointerInfo(v
);
1139 toErase
.push_back(LI
);
1144 /// processLoad - Attempt to eliminate a load, first by eliminating it
1145 /// locally, and then attempting non-local elimination if that fails.
1146 bool GVN::processLoad(LoadInst
*L
, SmallVectorImpl
<Instruction
*> &toErase
) {
1147 if (L
->isVolatile())
1150 Value
* pointer
= L
->getPointerOperand();
1152 // ... to a pointer that has been loaded from before...
1153 MemDepResult dep
= MD
->getDependency(L
);
1155 // If the value isn't available, don't do anything!
1156 if (dep
.isClobber())
1159 // If it is defined in another block, try harder.
1160 if (dep
.isNonLocal())
1161 return processNonLocalLoad(L
, toErase
);
1163 Instruction
*DepInst
= dep
.getInst();
1164 if (StoreInst
*DepSI
= dyn_cast
<StoreInst
>(DepInst
)) {
1165 // Only forward substitute stores to loads of the same type.
1166 // FIXME: Could do better!
1167 if (DepSI
->getPointerOperand()->getType() != pointer
->getType())
1171 L
->replaceAllUsesWith(DepSI
->getOperand(0));
1172 if (isa
<PointerType
>(DepSI
->getOperand(0)->getType()))
1173 MD
->invalidateCachedPointerInfo(DepSI
->getOperand(0));
1174 toErase
.push_back(L
);
1179 if (LoadInst
*DepLI
= dyn_cast
<LoadInst
>(DepInst
)) {
1180 // Only forward substitute stores to loads of the same type.
1181 // FIXME: Could do better! load i32 -> load i8 -> truncate on little endian.
1182 if (DepLI
->getType() != L
->getType())
1186 L
->replaceAllUsesWith(DepLI
);
1187 if (isa
<PointerType
>(DepLI
->getType()))
1188 MD
->invalidateCachedPointerInfo(DepLI
);
1189 toErase
.push_back(L
);
1194 // If this load really doesn't depend on anything, then we must be loading an
1195 // undef value. This can happen when loading for a fresh allocation with no
1196 // intervening stores, for example.
1197 if (isa
<AllocationInst
>(DepInst
)) {
1198 L
->replaceAllUsesWith(UndefValue::get(L
->getType()));
1199 toErase
.push_back(L
);
1207 Value
* GVN::lookupNumber(BasicBlock
* BB
, uint32_t num
) {
1208 DenseMap
<BasicBlock
*, ValueNumberScope
*>::iterator I
= localAvail
.find(BB
);
1209 if (I
== localAvail
.end())
1212 ValueNumberScope
* locals
= I
->second
;
1215 DenseMap
<uint32_t, Value
*>::iterator I
= locals
->table
.find(num
);
1216 if (I
!= locals
->table
.end())
1219 locals
= locals
->parent
;
1225 /// AttemptRedundancyElimination - If the "fast path" of redundancy elimination
1226 /// by inheritance from the dominator fails, see if we can perform phi
1227 /// construction to eliminate the redundancy.
1228 Value
* GVN::AttemptRedundancyElimination(Instruction
* orig
, unsigned valno
) {
1229 BasicBlock
* BaseBlock
= orig
->getParent();
1231 SmallPtrSet
<BasicBlock
*, 4> Visited
;
1232 SmallVector
<BasicBlock
*, 8> Stack
;
1233 Stack
.push_back(BaseBlock
);
1235 DenseMap
<BasicBlock
*, Value
*> Results
;
1237 // Walk backwards through our predecessors, looking for instances of the
1238 // value number we're looking for. Instances are recorded in the Results
1239 // map, which is then used to perform phi construction.
1240 while (!Stack
.empty()) {
1241 BasicBlock
* Current
= Stack
.back();
1244 // If we've walked all the way to a proper dominator, then give up. Cases
1245 // where the instance is in the dominator will have been caught by the fast
1246 // path, and any cases that require phi construction further than this are
1247 // probably not worth it anyways. Note that this is a SIGNIFICANT compile
1248 // time improvement.
1249 if (DT
->properlyDominates(Current
, orig
->getParent())) return 0;
1251 DenseMap
<BasicBlock
*, ValueNumberScope
*>::iterator LA
=
1252 localAvail
.find(Current
);
1253 if (LA
== localAvail
.end()) return 0;
1254 DenseMap
<uint32_t, Value
*>::iterator V
= LA
->second
->table
.find(valno
);
1256 if (V
!= LA
->second
->table
.end()) {
1257 // Found an instance, record it.
1258 Results
.insert(std::make_pair(Current
, V
->second
));
1262 // If we reach the beginning of the function, then give up.
1263 if (pred_begin(Current
) == pred_end(Current
))
1266 for (pred_iterator PI
= pred_begin(Current
), PE
= pred_end(Current
);
1268 if (Visited
.insert(*PI
))
1269 Stack
.push_back(*PI
);
1272 // If we didn't find instances, give up. Otherwise, perform phi construction.
1273 if (Results
.size() == 0)
1276 return GetValueForBlock(BaseBlock
, orig
, Results
, true);
1279 /// processInstruction - When calculating availability, handle an instruction
1280 /// by inserting it into the appropriate sets
1281 bool GVN::processInstruction(Instruction
*I
,
1282 SmallVectorImpl
<Instruction
*> &toErase
) {
1283 if (LoadInst
* L
= dyn_cast
<LoadInst
>(I
)) {
1284 bool changed
= processLoad(L
, toErase
);
1287 unsigned num
= VN
.lookup_or_add(L
);
1288 localAvail
[I
->getParent()]->table
.insert(std::make_pair(num
, L
));
1294 uint32_t nextNum
= VN
.getNextUnusedValueNumber();
1295 unsigned num
= VN
.lookup_or_add(I
);
1297 if (BranchInst
* BI
= dyn_cast
<BranchInst
>(I
)) {
1298 localAvail
[I
->getParent()]->table
.insert(std::make_pair(num
, I
));
1300 if (!BI
->isConditional() || isa
<Constant
>(BI
->getCondition()))
1303 Value
* branchCond
= BI
->getCondition();
1304 uint32_t condVN
= VN
.lookup_or_add(branchCond
);
1306 BasicBlock
* trueSucc
= BI
->getSuccessor(0);
1307 BasicBlock
* falseSucc
= BI
->getSuccessor(1);
1309 if (trueSucc
->getSinglePredecessor())
1310 localAvail
[trueSucc
]->table
[condVN
] = ConstantInt::getTrue();
1311 if (falseSucc
->getSinglePredecessor())
1312 localAvail
[falseSucc
]->table
[condVN
] = ConstantInt::getFalse();
1316 // Allocations are always uniquely numbered, so we can save time and memory
1317 // by fast failing them.
1318 } else if (isa
<AllocationInst
>(I
) || isa
<TerminatorInst
>(I
)) {
1319 localAvail
[I
->getParent()]->table
.insert(std::make_pair(num
, I
));
1323 // Collapse PHI nodes
1324 if (PHINode
* p
= dyn_cast
<PHINode
>(I
)) {
1325 Value
* constVal
= CollapsePhi(p
);
1328 for (PhiMapType::iterator PI
= phiMap
.begin(), PE
= phiMap
.end();
1330 PI
->second
.erase(p
);
1332 p
->replaceAllUsesWith(constVal
);
1333 if (isa
<PointerType
>(constVal
->getType()))
1334 MD
->invalidateCachedPointerInfo(constVal
);
1337 toErase
.push_back(p
);
1339 localAvail
[I
->getParent()]->table
.insert(std::make_pair(num
, I
));
1342 // If the number we were assigned was a brand new VN, then we don't
1343 // need to do a lookup to see if the number already exists
1344 // somewhere in the domtree: it can't!
1345 } else if (num
== nextNum
) {
1346 localAvail
[I
->getParent()]->table
.insert(std::make_pair(num
, I
));
1348 // Perform fast-path value-number based elimination of values inherited from
1350 } else if (Value
* repl
= lookupNumber(I
->getParent(), num
)) {
1353 I
->replaceAllUsesWith(repl
);
1354 if (isa
<PointerType
>(repl
->getType()))
1355 MD
->invalidateCachedPointerInfo(repl
);
1356 toErase
.push_back(I
);
1360 // Perform slow-pathvalue-number based elimination with phi construction.
1361 } else if (Value
* repl
= AttemptRedundancyElimination(I
, num
)) {
1364 I
->replaceAllUsesWith(repl
);
1365 if (isa
<PointerType
>(repl
->getType()))
1366 MD
->invalidateCachedPointerInfo(repl
);
1367 toErase
.push_back(I
);
1371 localAvail
[I
->getParent()]->table
.insert(std::make_pair(num
, I
));
1377 /// runOnFunction - This is the main transformation entry point for a function.
1378 bool GVN::runOnFunction(Function
& F
) {
1379 MD
= &getAnalysis
<MemoryDependenceAnalysis
>();
1380 DT
= &getAnalysis
<DominatorTree
>();
1381 VN
.setAliasAnalysis(&getAnalysis
<AliasAnalysis
>());
1385 bool changed
= false;
1386 bool shouldContinue
= true;
1388 // Merge unconditional branches, allowing PRE to catch more
1389 // optimization opportunities.
1390 for (Function::iterator FI
= F
.begin(), FE
= F
.end(); FI
!= FE
; ) {
1391 BasicBlock
* BB
= FI
;
1393 bool removedBlock
= MergeBlockIntoPredecessor(BB
, this);
1394 if (removedBlock
) NumGVNBlocks
++;
1396 changed
|= removedBlock
;
1399 unsigned Iteration
= 0;
1401 while (shouldContinue
) {
1402 DEBUG(cerr
<< "GVN iteration: " << Iteration
<< "\n");
1403 shouldContinue
= iterateOnFunction(F
);
1404 changed
|= shouldContinue
;
1409 bool PREChanged
= true;
1410 while (PREChanged
) {
1411 PREChanged
= performPRE(F
);
1412 changed
|= PREChanged
;
1415 // FIXME: Should perform GVN again after PRE does something. PRE can move
1416 // computations into blocks where they become fully redundant. Note that
1417 // we can't do this until PRE's critical edge splitting updates memdep.
1418 // Actually, when this happens, we should just fully integrate PRE into GVN.
1420 cleanupGlobalSets();
1426 bool GVN::processBlock(BasicBlock
* BB
) {
1427 // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and
1428 // incrementing BI before processing an instruction).
1429 SmallVector
<Instruction
*, 8> toErase
;
1430 bool changed_function
= false;
1432 for (BasicBlock::iterator BI
= BB
->begin(), BE
= BB
->end();
1434 changed_function
|= processInstruction(BI
, toErase
);
1435 if (toErase
.empty()) {
1440 // If we need some instructions deleted, do it now.
1441 NumGVNInstr
+= toErase
.size();
1443 // Avoid iterator invalidation.
1444 bool AtStart
= BI
== BB
->begin();
1448 for (SmallVector
<Instruction
*, 4>::iterator I
= toErase
.begin(),
1449 E
= toErase
.end(); I
!= E
; ++I
) {
1450 DEBUG(cerr
<< "GVN removed: " << **I
);
1451 MD
->removeInstruction(*I
);
1452 (*I
)->eraseFromParent();
1453 DEBUG(verifyRemoved(*I
));
1463 return changed_function
;
1466 /// performPRE - Perform a purely local form of PRE that looks for diamond
1467 /// control flow patterns and attempts to perform simple PRE at the join point.
1468 bool GVN::performPRE(Function
& F
) {
1469 bool Changed
= false;
1470 SmallVector
<std::pair
<TerminatorInst
*, unsigned>, 4> toSplit
;
1471 DenseMap
<BasicBlock
*, Value
*> predMap
;
1472 for (df_iterator
<BasicBlock
*> DI
= df_begin(&F
.getEntryBlock()),
1473 DE
= df_end(&F
.getEntryBlock()); DI
!= DE
; ++DI
) {
1474 BasicBlock
* CurrentBlock
= *DI
;
1476 // Nothing to PRE in the entry block.
1477 if (CurrentBlock
== &F
.getEntryBlock()) continue;
1479 for (BasicBlock::iterator BI
= CurrentBlock
->begin(),
1480 BE
= CurrentBlock
->end(); BI
!= BE
; ) {
1481 Instruction
*CurInst
= BI
++;
1483 if (isa
<AllocationInst
>(CurInst
) || isa
<TerminatorInst
>(CurInst
) ||
1484 isa
<PHINode
>(CurInst
) || (CurInst
->getType() == Type::VoidTy
) ||
1485 CurInst
->mayReadFromMemory() || CurInst
->mayHaveSideEffects() ||
1486 isa
<DbgInfoIntrinsic
>(CurInst
))
1489 uint32_t valno
= VN
.lookup(CurInst
);
1491 // Look for the predecessors for PRE opportunities. We're
1492 // only trying to solve the basic diamond case, where
1493 // a value is computed in the successor and one predecessor,
1494 // but not the other. We also explicitly disallow cases
1495 // where the successor is its own predecessor, because they're
1496 // more complicated to get right.
1497 unsigned numWith
= 0;
1498 unsigned numWithout
= 0;
1499 BasicBlock
* PREPred
= 0;
1502 for (pred_iterator PI
= pred_begin(CurrentBlock
),
1503 PE
= pred_end(CurrentBlock
); PI
!= PE
; ++PI
) {
1504 // We're not interested in PRE where the block is its
1505 // own predecessor, on in blocks with predecessors
1506 // that are not reachable.
1507 if (*PI
== CurrentBlock
) {
1510 } else if (!localAvail
.count(*PI
)) {
1515 DenseMap
<uint32_t, Value
*>::iterator predV
=
1516 localAvail
[*PI
]->table
.find(valno
);
1517 if (predV
== localAvail
[*PI
]->table
.end()) {
1520 } else if (predV
->second
== CurInst
) {
1523 predMap
[*PI
] = predV
->second
;
1528 // Don't do PRE when it might increase code size, i.e. when
1529 // we would need to insert instructions in more than one pred.
1530 if (numWithout
!= 1 || numWith
== 0)
1533 // We can't do PRE safely on a critical edge, so instead we schedule
1534 // the edge to be split and perform the PRE the next time we iterate
1536 unsigned succNum
= 0;
1537 for (unsigned i
= 0, e
= PREPred
->getTerminator()->getNumSuccessors();
1539 if (PREPred
->getTerminator()->getSuccessor(i
) == CurrentBlock
) {
1544 if (isCriticalEdge(PREPred
->getTerminator(), succNum
)) {
1545 toSplit
.push_back(std::make_pair(PREPred
->getTerminator(), succNum
));
1549 // Instantiate the expression the in predecessor that lacked it.
1550 // Because we are going top-down through the block, all value numbers
1551 // will be available in the predecessor by the time we need them. Any
1552 // that weren't original present will have been instantiated earlier
1554 Instruction
* PREInstr
= CurInst
->clone();
1555 bool success
= true;
1556 for (unsigned i
= 0, e
= CurInst
->getNumOperands(); i
!= e
; ++i
) {
1557 Value
*Op
= PREInstr
->getOperand(i
);
1558 if (isa
<Argument
>(Op
) || isa
<Constant
>(Op
) || isa
<GlobalValue
>(Op
))
1561 if (Value
*V
= lookupNumber(PREPred
, VN
.lookup(Op
))) {
1562 PREInstr
->setOperand(i
, V
);
1569 // Fail out if we encounter an operand that is not available in
1570 // the PRE predecessor. This is typically because of loads which
1571 // are not value numbered precisely.
1574 DEBUG(verifyRemoved(PREInstr
));
1578 PREInstr
->insertBefore(PREPred
->getTerminator());
1579 PREInstr
->setName(CurInst
->getName() + ".pre");
1580 predMap
[PREPred
] = PREInstr
;
1581 VN
.add(PREInstr
, valno
);
1584 // Update the availability map to include the new instruction.
1585 localAvail
[PREPred
]->table
.insert(std::make_pair(valno
, PREInstr
));
1587 // Create a PHI to make the value available in this block.
1588 PHINode
* Phi
= PHINode::Create(CurInst
->getType(),
1589 CurInst
->getName() + ".pre-phi",
1590 CurrentBlock
->begin());
1591 for (pred_iterator PI
= pred_begin(CurrentBlock
),
1592 PE
= pred_end(CurrentBlock
); PI
!= PE
; ++PI
)
1593 Phi
->addIncoming(predMap
[*PI
], *PI
);
1596 localAvail
[CurrentBlock
]->table
[valno
] = Phi
;
1598 CurInst
->replaceAllUsesWith(Phi
);
1599 if (isa
<PointerType
>(Phi
->getType()))
1600 MD
->invalidateCachedPointerInfo(Phi
);
1603 DEBUG(cerr
<< "GVN PRE removed: " << *CurInst
);
1604 MD
->removeInstruction(CurInst
);
1605 CurInst
->eraseFromParent();
1606 DEBUG(verifyRemoved(CurInst
));
1611 for (SmallVector
<std::pair
<TerminatorInst
*, unsigned>, 4>::iterator
1612 I
= toSplit
.begin(), E
= toSplit
.end(); I
!= E
; ++I
)
1613 SplitCriticalEdge(I
->first
, I
->second
, this);
1615 return Changed
|| toSplit
.size();
1618 /// iterateOnFunction - Executes one iteration of GVN
1619 bool GVN::iterateOnFunction(Function
&F
) {
1620 cleanupGlobalSets();
1622 for (df_iterator
<DomTreeNode
*> DI
= df_begin(DT
->getRootNode()),
1623 DE
= df_end(DT
->getRootNode()); DI
!= DE
; ++DI
) {
1625 localAvail
[DI
->getBlock()] =
1626 new ValueNumberScope(localAvail
[DI
->getIDom()->getBlock()]);
1628 localAvail
[DI
->getBlock()] = new ValueNumberScope(0);
1631 // Top-down walk of the dominator tree
1632 bool changed
= false;
1634 // Needed for value numbering with phi construction to work.
1635 ReversePostOrderTraversal
<Function
*> RPOT(&F
);
1636 for (ReversePostOrderTraversal
<Function
*>::rpo_iterator RI
= RPOT
.begin(),
1637 RE
= RPOT
.end(); RI
!= RE
; ++RI
)
1638 changed
|= processBlock(*RI
);
1640 for (df_iterator
<DomTreeNode
*> DI
= df_begin(DT
->getRootNode()),
1641 DE
= df_end(DT
->getRootNode()); DI
!= DE
; ++DI
)
1642 changed
|= processBlock(DI
->getBlock());
1648 void GVN::cleanupGlobalSets() {
1652 for (DenseMap
<BasicBlock
*, ValueNumberScope
*>::iterator
1653 I
= localAvail
.begin(), E
= localAvail
.end(); I
!= E
; ++I
)
1658 /// verifyRemoved - Verify that the specified instruction does not occur in our
1659 /// internal data structures.
1660 void GVN::verifyRemoved(const Instruction
*Inst
) const {
1661 VN
.verifyRemoved(Inst
);
1663 // Walk through the PHI map to make sure the instruction isn't hiding in there
1665 for (PhiMapType::iterator
1666 I
= phiMap
.begin(), E
= phiMap
.end(); I
!= E
; ++I
) {
1667 assert(I
->first
!= Inst
&& "Inst is still a key in PHI map!");
1669 for (SmallPtrSet
<Instruction
*, 4>::iterator
1670 II
= I
->second
.begin(), IE
= I
->second
.end(); II
!= IE
; ++II
) {
1671 assert(*II
!= Inst
&& "Inst is still a value in PHI map!");
1675 // Walk through the value number scope to make sure the instruction isn't
1676 // ferreted away in it.
1677 for (DenseMap
<BasicBlock
*, ValueNumberScope
*>::iterator
1678 I
= localAvail
.begin(), E
= localAvail
.end(); I
!= E
; ++I
) {
1679 const ValueNumberScope
*VNS
= I
->second
;
1682 for (DenseMap
<uint32_t, Value
*>::iterator
1683 II
= VNS
->table
.begin(), IE
= VNS
->table
.end(); II
!= IE
; ++II
) {
1684 assert(II
->second
!= Inst
&& "Inst still in value numbering scope!");