1 //===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This pass performs global value numbering to eliminate fully redundant
11 // instructions. It also performs simple dead load elimination.
13 // Note that this pass does the value numbering itself; it does not use the
14 // ValueNumbering analysis passes.
16 //===----------------------------------------------------------------------===//
18 #define DEBUG_TYPE "gvn"
19 #include "llvm/Transforms/Scalar.h"
20 #include "llvm/GlobalVariable.h"
21 #include "llvm/IntrinsicInst.h"
22 #include "llvm/LLVMContext.h"
23 #include "llvm/Analysis/AliasAnalysis.h"
24 #include "llvm/Analysis/ConstantFolding.h"
25 #include "llvm/Analysis/Dominators.h"
26 #include "llvm/Analysis/InstructionSimplify.h"
27 #include "llvm/Analysis/Loads.h"
28 #include "llvm/Analysis/MemoryBuiltins.h"
29 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
30 #include "llvm/Analysis/PHITransAddr.h"
31 #include "llvm/Analysis/ValueTracking.h"
32 #include "llvm/Assembly/Writer.h"
33 #include "llvm/Target/TargetData.h"
34 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
35 #include "llvm/Transforms/Utils/SSAUpdater.h"
36 #include "llvm/ADT/DenseMap.h"
37 #include "llvm/ADT/DepthFirstIterator.h"
38 #include "llvm/ADT/SmallPtrSet.h"
39 #include "llvm/ADT/Statistic.h"
40 #include "llvm/Support/Allocator.h"
41 #include "llvm/Support/CommandLine.h"
42 #include "llvm/Support/Debug.h"
43 #include "llvm/Support/IRBuilder.h"
46 STATISTIC(NumGVNInstr
, "Number of instructions deleted");
47 STATISTIC(NumGVNLoad
, "Number of loads deleted");
48 STATISTIC(NumGVNPRE
, "Number of instructions PRE'd");
49 STATISTIC(NumGVNBlocks
, "Number of blocks merged");
50 STATISTIC(NumPRELoad
, "Number of loads PRE'd");
52 static cl::opt
<bool> EnablePRE("enable-pre",
53 cl::init(true), cl::Hidden
);
54 static cl::opt
<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
56 //===----------------------------------------------------------------------===//
58 //===----------------------------------------------------------------------===//
60 /// This class holds the mapping between values and value numbers. It is used
61 /// as an efficient mechanism to determine the expression-wise equivalence of
67 SmallVector
<uint32_t, 4> varargs
;
69 Expression(uint32_t o
= ~2U) : opcode(o
) { }
71 bool operator==(const Expression
&other
) const {
72 if (opcode
!= other
.opcode
)
74 if (opcode
== ~0U || opcode
== ~1U)
76 if (type
!= other
.type
)
78 if (varargs
!= other
.varargs
)
85 DenseMap
<Value
*, uint32_t> valueNumbering
;
86 DenseMap
<Expression
, uint32_t> expressionNumbering
;
88 MemoryDependenceAnalysis
*MD
;
91 uint32_t nextValueNumber
;
93 Expression
create_expression(Instruction
* I
);
94 Expression
create_extractvalue_expression(ExtractValueInst
* EI
);
95 uint32_t lookup_or_add_call(CallInst
* C
);
97 ValueTable() : nextValueNumber(1) { }
98 uint32_t lookup_or_add(Value
*V
);
99 uint32_t lookup(Value
*V
) const;
100 void add(Value
*V
, uint32_t num
);
102 void erase(Value
*v
);
103 void setAliasAnalysis(AliasAnalysis
* A
) { AA
= A
; }
104 AliasAnalysis
*getAliasAnalysis() const { return AA
; }
105 void setMemDep(MemoryDependenceAnalysis
* M
) { MD
= M
; }
106 void setDomTree(DominatorTree
* D
) { DT
= D
; }
107 uint32_t getNextUnusedValueNumber() { return nextValueNumber
; }
108 void verifyRemoved(const Value
*) const;
113 template <> struct DenseMapInfo
<Expression
> {
114 static inline Expression
getEmptyKey() {
118 static inline Expression
getTombstoneKey() {
122 static unsigned getHashValue(const Expression e
) {
123 unsigned hash
= e
.opcode
;
125 hash
= ((unsigned)((uintptr_t)e
.type
>> 4) ^
126 (unsigned)((uintptr_t)e
.type
>> 9));
128 for (SmallVector
<uint32_t, 4>::const_iterator I
= e
.varargs
.begin(),
129 E
= e
.varargs
.end(); I
!= E
; ++I
)
130 hash
= *I
+ hash
* 37;
134 static bool isEqual(const Expression
&LHS
, const Expression
&RHS
) {
141 //===----------------------------------------------------------------------===//
142 // ValueTable Internal Functions
143 //===----------------------------------------------------------------------===//
145 Expression
ValueTable::create_expression(Instruction
*I
) {
147 e
.type
= I
->getType();
148 e
.opcode
= I
->getOpcode();
149 for (Instruction::op_iterator OI
= I
->op_begin(), OE
= I
->op_end();
151 e
.varargs
.push_back(lookup_or_add(*OI
));
153 if (CmpInst
*C
= dyn_cast
<CmpInst
>(I
)) {
154 e
.opcode
= (C
->getOpcode() << 8) | C
->getPredicate();
155 } else if (InsertValueInst
*E
= dyn_cast
<InsertValueInst
>(I
)) {
156 for (InsertValueInst::idx_iterator II
= E
->idx_begin(), IE
= E
->idx_end();
158 e
.varargs
.push_back(*II
);
164 Expression
ValueTable::create_extractvalue_expression(ExtractValueInst
*EI
) {
165 assert(EI
!= 0 && "Not an ExtractValueInst?");
167 e
.type
= EI
->getType();
170 IntrinsicInst
*I
= dyn_cast
<IntrinsicInst
>(EI
->getAggregateOperand());
171 if (I
!= 0 && EI
->getNumIndices() == 1 && *EI
->idx_begin() == 0 ) {
172 // EI might be an extract from one of our recognised intrinsics. If it
173 // is we'll synthesize a semantically equivalent expression instead on
174 // an extract value expression.
175 switch (I
->getIntrinsicID()) {
176 case Intrinsic::sadd_with_overflow
:
177 case Intrinsic::uadd_with_overflow
:
178 e
.opcode
= Instruction::Add
;
180 case Intrinsic::ssub_with_overflow
:
181 case Intrinsic::usub_with_overflow
:
182 e
.opcode
= Instruction::Sub
;
184 case Intrinsic::smul_with_overflow
:
185 case Intrinsic::umul_with_overflow
:
186 e
.opcode
= Instruction::Mul
;
193 // Intrinsic recognized. Grab its args to finish building the expression.
194 assert(I
->getNumArgOperands() == 2 &&
195 "Expect two args for recognised intrinsics.");
196 e
.varargs
.push_back(lookup_or_add(I
->getArgOperand(0)));
197 e
.varargs
.push_back(lookup_or_add(I
->getArgOperand(1)));
202 // Not a recognised intrinsic. Fall back to producing an extract value
204 e
.opcode
= EI
->getOpcode();
205 for (Instruction::op_iterator OI
= EI
->op_begin(), OE
= EI
->op_end();
207 e
.varargs
.push_back(lookup_or_add(*OI
));
209 for (ExtractValueInst::idx_iterator II
= EI
->idx_begin(), IE
= EI
->idx_end();
211 e
.varargs
.push_back(*II
);
216 //===----------------------------------------------------------------------===//
217 // ValueTable External Functions
218 //===----------------------------------------------------------------------===//
220 /// add - Insert a value into the table with a specified value number.
221 void ValueTable::add(Value
*V
, uint32_t num
) {
222 valueNumbering
.insert(std::make_pair(V
, num
));
225 uint32_t ValueTable::lookup_or_add_call(CallInst
* C
) {
226 if (AA
->doesNotAccessMemory(C
)) {
227 Expression exp
= create_expression(C
);
228 uint32_t& e
= expressionNumbering
[exp
];
229 if (!e
) e
= nextValueNumber
++;
230 valueNumbering
[C
] = e
;
232 } else if (AA
->onlyReadsMemory(C
)) {
233 Expression exp
= create_expression(C
);
234 uint32_t& e
= expressionNumbering
[exp
];
236 e
= nextValueNumber
++;
237 valueNumbering
[C
] = e
;
241 e
= nextValueNumber
++;
242 valueNumbering
[C
] = e
;
246 MemDepResult local_dep
= MD
->getDependency(C
);
248 if (!local_dep
.isDef() && !local_dep
.isNonLocal()) {
249 valueNumbering
[C
] = nextValueNumber
;
250 return nextValueNumber
++;
253 if (local_dep
.isDef()) {
254 CallInst
* local_cdep
= cast
<CallInst
>(local_dep
.getInst());
256 if (local_cdep
->getNumArgOperands() != C
->getNumArgOperands()) {
257 valueNumbering
[C
] = nextValueNumber
;
258 return nextValueNumber
++;
261 for (unsigned i
= 0, e
= C
->getNumArgOperands(); i
< e
; ++i
) {
262 uint32_t c_vn
= lookup_or_add(C
->getArgOperand(i
));
263 uint32_t cd_vn
= lookup_or_add(local_cdep
->getArgOperand(i
));
265 valueNumbering
[C
] = nextValueNumber
;
266 return nextValueNumber
++;
270 uint32_t v
= lookup_or_add(local_cdep
);
271 valueNumbering
[C
] = v
;
276 const MemoryDependenceAnalysis::NonLocalDepInfo
&deps
=
277 MD
->getNonLocalCallDependency(CallSite(C
));
278 // FIXME: Move the checking logic to MemDep!
281 // Check to see if we have a single dominating call instruction that is
283 for (unsigned i
= 0, e
= deps
.size(); i
!= e
; ++i
) {
284 const NonLocalDepEntry
*I
= &deps
[i
];
285 if (I
->getResult().isNonLocal())
288 // We don't handle non-definitions. If we already have a call, reject
289 // instruction dependencies.
290 if (!I
->getResult().isDef() || cdep
!= 0) {
295 CallInst
*NonLocalDepCall
= dyn_cast
<CallInst
>(I
->getResult().getInst());
296 // FIXME: All duplicated with non-local case.
297 if (NonLocalDepCall
&& DT
->properlyDominates(I
->getBB(), C
->getParent())){
298 cdep
= NonLocalDepCall
;
307 valueNumbering
[C
] = nextValueNumber
;
308 return nextValueNumber
++;
311 if (cdep
->getNumArgOperands() != C
->getNumArgOperands()) {
312 valueNumbering
[C
] = nextValueNumber
;
313 return nextValueNumber
++;
315 for (unsigned i
= 0, e
= C
->getNumArgOperands(); i
< e
; ++i
) {
316 uint32_t c_vn
= lookup_or_add(C
->getArgOperand(i
));
317 uint32_t cd_vn
= lookup_or_add(cdep
->getArgOperand(i
));
319 valueNumbering
[C
] = nextValueNumber
;
320 return nextValueNumber
++;
324 uint32_t v
= lookup_or_add(cdep
);
325 valueNumbering
[C
] = v
;
329 valueNumbering
[C
] = nextValueNumber
;
330 return nextValueNumber
++;
334 /// lookup_or_add - Returns the value number for the specified value, assigning
335 /// it a new number if it did not have one before.
336 uint32_t ValueTable::lookup_or_add(Value
*V
) {
337 DenseMap
<Value
*, uint32_t>::iterator VI
= valueNumbering
.find(V
);
338 if (VI
!= valueNumbering
.end())
341 if (!isa
<Instruction
>(V
)) {
342 valueNumbering
[V
] = nextValueNumber
;
343 return nextValueNumber
++;
346 Instruction
* I
= cast
<Instruction
>(V
);
348 switch (I
->getOpcode()) {
349 case Instruction::Call
:
350 return lookup_or_add_call(cast
<CallInst
>(I
));
351 case Instruction::Add
:
352 case Instruction::FAdd
:
353 case Instruction::Sub
:
354 case Instruction::FSub
:
355 case Instruction::Mul
:
356 case Instruction::FMul
:
357 case Instruction::UDiv
:
358 case Instruction::SDiv
:
359 case Instruction::FDiv
:
360 case Instruction::URem
:
361 case Instruction::SRem
:
362 case Instruction::FRem
:
363 case Instruction::Shl
:
364 case Instruction::LShr
:
365 case Instruction::AShr
:
366 case Instruction::And
:
367 case Instruction::Or
:
368 case Instruction::Xor
:
369 case Instruction::ICmp
:
370 case Instruction::FCmp
:
371 case Instruction::Trunc
:
372 case Instruction::ZExt
:
373 case Instruction::SExt
:
374 case Instruction::FPToUI
:
375 case Instruction::FPToSI
:
376 case Instruction::UIToFP
:
377 case Instruction::SIToFP
:
378 case Instruction::FPTrunc
:
379 case Instruction::FPExt
:
380 case Instruction::PtrToInt
:
381 case Instruction::IntToPtr
:
382 case Instruction::BitCast
:
383 case Instruction::Select
:
384 case Instruction::ExtractElement
:
385 case Instruction::InsertElement
:
386 case Instruction::ShuffleVector
:
387 case Instruction::InsertValue
:
388 case Instruction::GetElementPtr
:
389 exp
= create_expression(I
);
391 case Instruction::ExtractValue
:
392 exp
= create_extractvalue_expression(cast
<ExtractValueInst
>(I
));
395 valueNumbering
[V
] = nextValueNumber
;
396 return nextValueNumber
++;
399 uint32_t& e
= expressionNumbering
[exp
];
400 if (!e
) e
= nextValueNumber
++;
401 valueNumbering
[V
] = e
;
405 /// lookup - Returns the value number of the specified value. Fails if
406 /// the value has not yet been numbered.
407 uint32_t ValueTable::lookup(Value
*V
) const {
408 DenseMap
<Value
*, uint32_t>::const_iterator VI
= valueNumbering
.find(V
);
409 assert(VI
!= valueNumbering
.end() && "Value not numbered?");
413 /// clear - Remove all entries from the ValueTable.
414 void ValueTable::clear() {
415 valueNumbering
.clear();
416 expressionNumbering
.clear();
420 /// erase - Remove a value from the value numbering.
421 void ValueTable::erase(Value
*V
) {
422 valueNumbering
.erase(V
);
425 /// verifyRemoved - Verify that the value is removed from all internal data
427 void ValueTable::verifyRemoved(const Value
*V
) const {
428 for (DenseMap
<Value
*, uint32_t>::const_iterator
429 I
= valueNumbering
.begin(), E
= valueNumbering
.end(); I
!= E
; ++I
) {
430 assert(I
->first
!= V
&& "Inst still occurs in value numbering map!");
434 //===----------------------------------------------------------------------===//
436 //===----------------------------------------------------------------------===//
440 class GVN
: public FunctionPass
{
442 MemoryDependenceAnalysis
*MD
;
444 const TargetData
*TD
;
448 /// LeaderTable - A mapping from value numbers to lists of Value*'s that
449 /// have that value number. Use findLeader to query it.
450 struct LeaderTableEntry
{
453 LeaderTableEntry
*Next
;
455 DenseMap
<uint32_t, LeaderTableEntry
> LeaderTable
;
456 BumpPtrAllocator TableAllocator
;
458 SmallVector
<Instruction
*, 8> InstrsToErase
;
460 static char ID
; // Pass identification, replacement for typeid
461 explicit GVN(bool noloads
= false)
462 : FunctionPass(ID
), NoLoads(noloads
), MD(0) {
463 initializeGVNPass(*PassRegistry::getPassRegistry());
466 bool runOnFunction(Function
&F
);
468 /// markInstructionForDeletion - This removes the specified instruction from
469 /// our various maps and marks it for deletion.
470 void markInstructionForDeletion(Instruction
*I
) {
472 InstrsToErase
.push_back(I
);
475 const TargetData
*getTargetData() const { return TD
; }
476 DominatorTree
&getDominatorTree() const { return *DT
; }
477 AliasAnalysis
*getAliasAnalysis() const { return VN
.getAliasAnalysis(); }
478 MemoryDependenceAnalysis
&getMemDep() const { return *MD
; }
480 /// addToLeaderTable - Push a new Value to the LeaderTable onto the list for
481 /// its value number.
482 void addToLeaderTable(uint32_t N
, Value
*V
, BasicBlock
*BB
) {
483 LeaderTableEntry
&Curr
= LeaderTable
[N
];
490 LeaderTableEntry
*Node
= TableAllocator
.Allocate
<LeaderTableEntry
>();
493 Node
->Next
= Curr
.Next
;
497 /// removeFromLeaderTable - Scan the list of values corresponding to a given
498 /// value number, and remove the given value if encountered.
499 void removeFromLeaderTable(uint32_t N
, Value
*V
, BasicBlock
*BB
) {
500 LeaderTableEntry
* Prev
= 0;
501 LeaderTableEntry
* Curr
= &LeaderTable
[N
];
503 while (Curr
->Val
!= V
|| Curr
->BB
!= BB
) {
509 Prev
->Next
= Curr
->Next
;
515 LeaderTableEntry
* Next
= Curr
->Next
;
516 Curr
->Val
= Next
->Val
;
518 Curr
->Next
= Next
->Next
;
523 // List of critical edges to be split between iterations.
524 SmallVector
<std::pair
<TerminatorInst
*, unsigned>, 4> toSplit
;
526 // This transformation requires dominator postdominator info
527 virtual void getAnalysisUsage(AnalysisUsage
&AU
) const {
528 AU
.addRequired
<DominatorTree
>();
530 AU
.addRequired
<MemoryDependenceAnalysis
>();
531 AU
.addRequired
<AliasAnalysis
>();
533 AU
.addPreserved
<DominatorTree
>();
534 AU
.addPreserved
<AliasAnalysis
>();
539 // FIXME: eliminate or document these better
540 bool processLoad(LoadInst
*L
);
541 bool processInstruction(Instruction
*I
);
542 bool processNonLocalLoad(LoadInst
*L
);
543 bool processBlock(BasicBlock
*BB
);
544 void dump(DenseMap
<uint32_t, Value
*> &d
);
545 bool iterateOnFunction(Function
&F
);
546 bool performPRE(Function
&F
);
547 Value
*findLeader(BasicBlock
*BB
, uint32_t num
);
548 void cleanupGlobalSets();
549 void verifyRemoved(const Instruction
*I
) const;
550 bool splitCriticalEdges();
556 // createGVNPass - The public interface to this file...
557 FunctionPass
*llvm::createGVNPass(bool NoLoads
) {
558 return new GVN(NoLoads
);
561 INITIALIZE_PASS_BEGIN(GVN
, "gvn", "Global Value Numbering", false, false)
562 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis
)
563 INITIALIZE_PASS_DEPENDENCY(DominatorTree
)
564 INITIALIZE_AG_DEPENDENCY(AliasAnalysis
)
565 INITIALIZE_PASS_END(GVN
, "gvn", "Global Value Numbering", false, false)
567 void GVN::dump(DenseMap
<uint32_t, Value
*>& d
) {
569 for (DenseMap
<uint32_t, Value
*>::iterator I
= d
.begin(),
570 E
= d
.end(); I
!= E
; ++I
) {
571 errs() << I
->first
<< "\n";
577 /// IsValueFullyAvailableInBlock - Return true if we can prove that the value
578 /// we're analyzing is fully available in the specified block. As we go, keep
579 /// track of which blocks we know are fully alive in FullyAvailableBlocks. This
580 /// map is actually a tri-state map with the following values:
581 /// 0) we know the block *is not* fully available.
582 /// 1) we know the block *is* fully available.
583 /// 2) we do not know whether the block is fully available or not, but we are
584 /// currently speculating that it will be.
585 /// 3) we are speculating for this block and have used that to speculate for
587 static bool IsValueFullyAvailableInBlock(BasicBlock
*BB
,
588 DenseMap
<BasicBlock
*, char> &FullyAvailableBlocks
) {
589 // Optimistically assume that the block is fully available and check to see
590 // if we already know about this block in one lookup.
591 std::pair
<DenseMap
<BasicBlock
*, char>::iterator
, char> IV
=
592 FullyAvailableBlocks
.insert(std::make_pair(BB
, 2));
594 // If the entry already existed for this block, return the precomputed value.
596 // If this is a speculative "available" value, mark it as being used for
597 // speculation of other blocks.
598 if (IV
.first
->second
== 2)
599 IV
.first
->second
= 3;
600 return IV
.first
->second
!= 0;
603 // Otherwise, see if it is fully available in all predecessors.
604 pred_iterator PI
= pred_begin(BB
), PE
= pred_end(BB
);
606 // If this block has no predecessors, it isn't live-in here.
608 goto SpeculationFailure
;
610 for (; PI
!= PE
; ++PI
)
611 // If the value isn't fully available in one of our predecessors, then it
612 // isn't fully available in this block either. Undo our previous
613 // optimistic assumption and bail out.
614 if (!IsValueFullyAvailableInBlock(*PI
, FullyAvailableBlocks
))
615 goto SpeculationFailure
;
619 // SpeculationFailure - If we get here, we found out that this is not, after
620 // all, a fully-available block. We have a problem if we speculated on this and
621 // used the speculation to mark other blocks as available.
623 char &BBVal
= FullyAvailableBlocks
[BB
];
625 // If we didn't speculate on this, just return with it set to false.
631 // If we did speculate on this value, we could have blocks set to 1 that are
632 // incorrect. Walk the (transitive) successors of this block and mark them as
634 SmallVector
<BasicBlock
*, 32> BBWorklist
;
635 BBWorklist
.push_back(BB
);
638 BasicBlock
*Entry
= BBWorklist
.pop_back_val();
639 // Note that this sets blocks to 0 (unavailable) if they happen to not
640 // already be in FullyAvailableBlocks. This is safe.
641 char &EntryVal
= FullyAvailableBlocks
[Entry
];
642 if (EntryVal
== 0) continue; // Already unavailable.
644 // Mark as unavailable.
647 for (succ_iterator I
= succ_begin(Entry
), E
= succ_end(Entry
); I
!= E
; ++I
)
648 BBWorklist
.push_back(*I
);
649 } while (!BBWorklist
.empty());
655 /// CanCoerceMustAliasedValueToLoad - Return true if
656 /// CoerceAvailableValueToLoadType will succeed.
657 static bool CanCoerceMustAliasedValueToLoad(Value
*StoredVal
,
659 const TargetData
&TD
) {
660 // If the loaded or stored value is an first class array or struct, don't try
661 // to transform them. We need to be able to bitcast to integer.
662 if (LoadTy
->isStructTy() || LoadTy
->isArrayTy() ||
663 StoredVal
->getType()->isStructTy() ||
664 StoredVal
->getType()->isArrayTy())
667 // The store has to be at least as big as the load.
668 if (TD
.getTypeSizeInBits(StoredVal
->getType()) <
669 TD
.getTypeSizeInBits(LoadTy
))
676 /// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and
677 /// then a load from a must-aliased pointer of a different type, try to coerce
678 /// the stored value. LoadedTy is the type of the load we want to replace and
679 /// InsertPt is the place to insert new instructions.
681 /// If we can't do it, return null.
682 static Value
*CoerceAvailableValueToLoadType(Value
*StoredVal
,
683 const Type
*LoadedTy
,
684 Instruction
*InsertPt
,
685 const TargetData
&TD
) {
686 if (!CanCoerceMustAliasedValueToLoad(StoredVal
, LoadedTy
, TD
))
689 // If this is already the right type, just return it.
690 const Type
*StoredValTy
= StoredVal
->getType();
692 uint64_t StoreSize
= TD
.getTypeStoreSizeInBits(StoredValTy
);
693 uint64_t LoadSize
= TD
.getTypeStoreSizeInBits(LoadedTy
);
695 // If the store and reload are the same size, we can always reuse it.
696 if (StoreSize
== LoadSize
) {
697 // Pointer to Pointer -> use bitcast.
698 if (StoredValTy
->isPointerTy() && LoadedTy
->isPointerTy())
699 return new BitCastInst(StoredVal
, LoadedTy
, "", InsertPt
);
701 // Convert source pointers to integers, which can be bitcast.
702 if (StoredValTy
->isPointerTy()) {
703 StoredValTy
= TD
.getIntPtrType(StoredValTy
->getContext());
704 StoredVal
= new PtrToIntInst(StoredVal
, StoredValTy
, "", InsertPt
);
707 const Type
*TypeToCastTo
= LoadedTy
;
708 if (TypeToCastTo
->isPointerTy())
709 TypeToCastTo
= TD
.getIntPtrType(StoredValTy
->getContext());
711 if (StoredValTy
!= TypeToCastTo
)
712 StoredVal
= new BitCastInst(StoredVal
, TypeToCastTo
, "", InsertPt
);
714 // Cast to pointer if the load needs a pointer type.
715 if (LoadedTy
->isPointerTy())
716 StoredVal
= new IntToPtrInst(StoredVal
, LoadedTy
, "", InsertPt
);
721 // If the loaded value is smaller than the available value, then we can
722 // extract out a piece from it. If the available value is too small, then we
723 // can't do anything.
724 assert(StoreSize
>= LoadSize
&& "CanCoerceMustAliasedValueToLoad fail");
726 // Convert source pointers to integers, which can be manipulated.
727 if (StoredValTy
->isPointerTy()) {
728 StoredValTy
= TD
.getIntPtrType(StoredValTy
->getContext());
729 StoredVal
= new PtrToIntInst(StoredVal
, StoredValTy
, "", InsertPt
);
732 // Convert vectors and fp to integer, which can be manipulated.
733 if (!StoredValTy
->isIntegerTy()) {
734 StoredValTy
= IntegerType::get(StoredValTy
->getContext(), StoreSize
);
735 StoredVal
= new BitCastInst(StoredVal
, StoredValTy
, "", InsertPt
);
738 // If this is a big-endian system, we need to shift the value down to the low
739 // bits so that a truncate will work.
740 if (TD
.isBigEndian()) {
741 Constant
*Val
= ConstantInt::get(StoredVal
->getType(), StoreSize
-LoadSize
);
742 StoredVal
= BinaryOperator::CreateLShr(StoredVal
, Val
, "tmp", InsertPt
);
745 // Truncate the integer to the right size now.
746 const Type
*NewIntTy
= IntegerType::get(StoredValTy
->getContext(), LoadSize
);
747 StoredVal
= new TruncInst(StoredVal
, NewIntTy
, "trunc", InsertPt
);
749 if (LoadedTy
== NewIntTy
)
752 // If the result is a pointer, inttoptr.
753 if (LoadedTy
->isPointerTy())
754 return new IntToPtrInst(StoredVal
, LoadedTy
, "inttoptr", InsertPt
);
756 // Otherwise, bitcast.
757 return new BitCastInst(StoredVal
, LoadedTy
, "bitcast", InsertPt
);
760 /// AnalyzeLoadFromClobberingWrite - This function is called when we have a
761 /// memdep query of a load that ends up being a clobbering memory write (store,
762 /// memset, memcpy, memmove). This means that the write *may* provide bits used
763 /// by the load but we can't be sure because the pointers don't mustalias.
765 /// Check this case to see if there is anything more we can do before we give
766 /// up. This returns -1 if we have to give up, or a byte number in the stored
767 /// value of the piece that feeds the load.
768 static int AnalyzeLoadFromClobberingWrite(const Type
*LoadTy
, Value
*LoadPtr
,
770 uint64_t WriteSizeInBits
,
771 const TargetData
&TD
) {
772 // If the loaded or stored value is an first class array or struct, don't try
773 // to transform them. We need to be able to bitcast to integer.
774 if (LoadTy
->isStructTy() || LoadTy
->isArrayTy())
777 int64_t StoreOffset
= 0, LoadOffset
= 0;
778 Value
*StoreBase
= GetPointerBaseWithConstantOffset(WritePtr
, StoreOffset
,TD
);
779 Value
*LoadBase
= GetPointerBaseWithConstantOffset(LoadPtr
, LoadOffset
, TD
);
780 if (StoreBase
!= LoadBase
)
783 // If the load and store are to the exact same address, they should have been
784 // a must alias. AA must have gotten confused.
785 // FIXME: Study to see if/when this happens. One case is forwarding a memset
786 // to a load from the base of the memset.
788 if (LoadOffset
== StoreOffset
) {
789 dbgs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n"
790 << "Base = " << *StoreBase
<< "\n"
791 << "Store Ptr = " << *WritePtr
<< "\n"
792 << "Store Offs = " << StoreOffset
<< "\n"
793 << "Load Ptr = " << *LoadPtr
<< "\n";
798 // If the load and store don't overlap at all, the store doesn't provide
799 // anything to the load. In this case, they really don't alias at all, AA
800 // must have gotten confused.
801 uint64_t LoadSize
= TD
.getTypeSizeInBits(LoadTy
);
803 if ((WriteSizeInBits
& 7) | (LoadSize
& 7))
805 uint64_t StoreSize
= WriteSizeInBits
>> 3; // Convert to bytes.
809 bool isAAFailure
= false;
810 if (StoreOffset
< LoadOffset
)
811 isAAFailure
= StoreOffset
+int64_t(StoreSize
) <= LoadOffset
;
813 isAAFailure
= LoadOffset
+int64_t(LoadSize
) <= StoreOffset
;
817 dbgs() << "STORE LOAD DEP WITH COMMON BASE:\n"
818 << "Base = " << *StoreBase
<< "\n"
819 << "Store Ptr = " << *WritePtr
<< "\n"
820 << "Store Offs = " << StoreOffset
<< "\n"
821 << "Load Ptr = " << *LoadPtr
<< "\n";
827 // If the Load isn't completely contained within the stored bits, we don't
828 // have all the bits to feed it. We could do something crazy in the future
829 // (issue a smaller load then merge the bits in) but this seems unlikely to be
831 if (StoreOffset
> LoadOffset
||
832 StoreOffset
+StoreSize
< LoadOffset
+LoadSize
)
835 // Okay, we can do this transformation. Return the number of bytes into the
836 // store that the load is.
837 return LoadOffset
-StoreOffset
;
840 /// AnalyzeLoadFromClobberingStore - This function is called when we have a
841 /// memdep query of a load that ends up being a clobbering store.
842 static int AnalyzeLoadFromClobberingStore(const Type
*LoadTy
, Value
*LoadPtr
,
844 const TargetData
&TD
) {
845 // Cannot handle reading from store of first-class aggregate yet.
846 if (DepSI
->getValueOperand()->getType()->isStructTy() ||
847 DepSI
->getValueOperand()->getType()->isArrayTy())
850 Value
*StorePtr
= DepSI
->getPointerOperand();
851 uint64_t StoreSize
=TD
.getTypeSizeInBits(DepSI
->getValueOperand()->getType());
852 return AnalyzeLoadFromClobberingWrite(LoadTy
, LoadPtr
,
853 StorePtr
, StoreSize
, TD
);
856 /// AnalyzeLoadFromClobberingLoad - This function is called when we have a
857 /// memdep query of a load that ends up being clobbered by another load. See if
858 /// the other load can feed into the second load.
859 static int AnalyzeLoadFromClobberingLoad(const Type
*LoadTy
, Value
*LoadPtr
,
860 LoadInst
*DepLI
, const TargetData
&TD
){
861 // Cannot handle reading from store of first-class aggregate yet.
862 if (DepLI
->getType()->isStructTy() || DepLI
->getType()->isArrayTy())
865 Value
*DepPtr
= DepLI
->getPointerOperand();
866 uint64_t DepSize
= TD
.getTypeSizeInBits(DepLI
->getType());
867 int R
= AnalyzeLoadFromClobberingWrite(LoadTy
, LoadPtr
, DepPtr
, DepSize
, TD
);
868 if (R
!= -1) return R
;
870 // If we have a load/load clobber an DepLI can be widened to cover this load,
871 // then we should widen it!
872 int64_t LoadOffs
= 0;
873 const Value
*LoadBase
=
874 GetPointerBaseWithConstantOffset(LoadPtr
, LoadOffs
, TD
);
875 unsigned LoadSize
= TD
.getTypeStoreSize(LoadTy
);
877 unsigned Size
= MemoryDependenceAnalysis::
878 getLoadLoadClobberFullWidthSize(LoadBase
, LoadOffs
, LoadSize
, DepLI
, TD
);
879 if (Size
== 0) return -1;
881 return AnalyzeLoadFromClobberingWrite(LoadTy
, LoadPtr
, DepPtr
, Size
*8, TD
);
886 static int AnalyzeLoadFromClobberingMemInst(const Type
*LoadTy
, Value
*LoadPtr
,
888 const TargetData
&TD
) {
889 // If the mem operation is a non-constant size, we can't handle it.
890 ConstantInt
*SizeCst
= dyn_cast
<ConstantInt
>(MI
->getLength());
891 if (SizeCst
== 0) return -1;
892 uint64_t MemSizeInBits
= SizeCst
->getZExtValue()*8;
894 // If this is memset, we just need to see if the offset is valid in the size
896 if (MI
->getIntrinsicID() == Intrinsic::memset
)
897 return AnalyzeLoadFromClobberingWrite(LoadTy
, LoadPtr
, MI
->getDest(),
900 // If we have a memcpy/memmove, the only case we can handle is if this is a
901 // copy from constant memory. In that case, we can read directly from the
903 MemTransferInst
*MTI
= cast
<MemTransferInst
>(MI
);
905 Constant
*Src
= dyn_cast
<Constant
>(MTI
->getSource());
906 if (Src
== 0) return -1;
908 GlobalVariable
*GV
= dyn_cast
<GlobalVariable
>(GetUnderlyingObject(Src
, &TD
));
909 if (GV
== 0 || !GV
->isConstant()) return -1;
911 // See if the access is within the bounds of the transfer.
912 int Offset
= AnalyzeLoadFromClobberingWrite(LoadTy
, LoadPtr
,
913 MI
->getDest(), MemSizeInBits
, TD
);
917 // Otherwise, see if we can constant fold a load from the constant with the
918 // offset applied as appropriate.
919 Src
= ConstantExpr::getBitCast(Src
,
920 llvm::Type::getInt8PtrTy(Src
->getContext()));
921 Constant
*OffsetCst
=
922 ConstantInt::get(Type::getInt64Ty(Src
->getContext()), (unsigned)Offset
);
923 Src
= ConstantExpr::getGetElementPtr(Src
, &OffsetCst
, 1);
924 Src
= ConstantExpr::getBitCast(Src
, PointerType::getUnqual(LoadTy
));
925 if (ConstantFoldLoadFromConstPtr(Src
, &TD
))
931 /// GetStoreValueForLoad - This function is called when we have a
932 /// memdep query of a load that ends up being a clobbering store. This means
933 /// that the store provides bits used by the load but we the pointers don't
934 /// mustalias. Check this case to see if there is anything more we can do
935 /// before we give up.
936 static Value
*GetStoreValueForLoad(Value
*SrcVal
, unsigned Offset
,
938 Instruction
*InsertPt
, const TargetData
&TD
){
939 LLVMContext
&Ctx
= SrcVal
->getType()->getContext();
941 uint64_t StoreSize
= (TD
.getTypeSizeInBits(SrcVal
->getType()) + 7) / 8;
942 uint64_t LoadSize
= (TD
.getTypeSizeInBits(LoadTy
) + 7) / 8;
944 IRBuilder
<> Builder(InsertPt
->getParent(), InsertPt
);
946 // Compute which bits of the stored value are being used by the load. Convert
947 // to an integer type to start with.
948 if (SrcVal
->getType()->isPointerTy())
949 SrcVal
= Builder
.CreatePtrToInt(SrcVal
, TD
.getIntPtrType(Ctx
), "tmp");
950 if (!SrcVal
->getType()->isIntegerTy())
951 SrcVal
= Builder
.CreateBitCast(SrcVal
, IntegerType::get(Ctx
, StoreSize
*8),
954 // Shift the bits to the least significant depending on endianness.
956 if (TD
.isLittleEndian())
959 ShiftAmt
= (StoreSize
-LoadSize
-Offset
)*8;
962 SrcVal
= Builder
.CreateLShr(SrcVal
, ShiftAmt
, "tmp");
964 if (LoadSize
!= StoreSize
)
965 SrcVal
= Builder
.CreateTrunc(SrcVal
, IntegerType::get(Ctx
, LoadSize
*8),
968 return CoerceAvailableValueToLoadType(SrcVal
, LoadTy
, InsertPt
, TD
);
971 /// GetStoreValueForLoad - This function is called when we have a
972 /// memdep query of a load that ends up being a clobbering load. This means
973 /// that the load *may* provide bits used by the load but we can't be sure
974 /// because the pointers don't mustalias. Check this case to see if there is
975 /// anything more we can do before we give up.
976 static Value
*GetLoadValueForLoad(LoadInst
*SrcVal
, unsigned Offset
,
977 const Type
*LoadTy
, Instruction
*InsertPt
,
979 const TargetData
&TD
= *gvn
.getTargetData();
980 // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to
981 // widen SrcVal out to a larger load.
982 unsigned SrcValSize
= TD
.getTypeStoreSize(SrcVal
->getType());
983 unsigned LoadSize
= TD
.getTypeStoreSize(LoadTy
);
984 if (Offset
+LoadSize
> SrcValSize
) {
985 assert(!SrcVal
->isVolatile() && "Cannot widen volatile load!");
986 assert(isa
<IntegerType
>(SrcVal
->getType())&&"Can't widen non-integer load");
987 // If we have a load/load clobber an DepLI can be widened to cover this
988 // load, then we should widen it to the next power of 2 size big enough!
989 unsigned NewLoadSize
= Offset
+LoadSize
;
990 if (!isPowerOf2_32(NewLoadSize
))
991 NewLoadSize
= NextPowerOf2(NewLoadSize
);
993 Value
*PtrVal
= SrcVal
->getPointerOperand();
995 // Insert the new load after the old load. This ensures that subsequent
996 // memdep queries will find the new load. We can't easily remove the old
997 // load completely because it is already in the value numbering table.
998 IRBuilder
<> Builder(SrcVal
->getParent(), ++BasicBlock::iterator(SrcVal
));
999 const Type
*DestPTy
=
1000 IntegerType::get(LoadTy
->getContext(), NewLoadSize
*8);
1001 DestPTy
= PointerType::get(DestPTy
,
1002 cast
<PointerType
>(PtrVal
->getType())->getAddressSpace());
1003 Builder
.SetCurrentDebugLocation(SrcVal
->getDebugLoc());
1004 PtrVal
= Builder
.CreateBitCast(PtrVal
, DestPTy
);
1005 LoadInst
*NewLoad
= Builder
.CreateLoad(PtrVal
);
1006 NewLoad
->takeName(SrcVal
);
1007 NewLoad
->setAlignment(SrcVal
->getAlignment());
1009 DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal
<< "\n");
1010 DEBUG(dbgs() << "TO: " << *NewLoad
<< "\n");
1012 // Replace uses of the original load with the wider load. On a big endian
1013 // system, we need to shift down to get the relevant bits.
1014 Value
*RV
= NewLoad
;
1015 if (TD
.isBigEndian())
1016 RV
= Builder
.CreateLShr(RV
,
1017 NewLoadSize
*8-SrcVal
->getType()->getPrimitiveSizeInBits());
1018 RV
= Builder
.CreateTrunc(RV
, SrcVal
->getType());
1019 SrcVal
->replaceAllUsesWith(RV
);
1021 // We would like to use gvn.markInstructionForDeletion here, but we can't
1022 // because the load is already memoized into the leader map table that GVN
1023 // tracks. It is potentially possible to remove the load from the table,
1024 // but then there all of the operations based on it would need to be
1025 // rehashed. Just leave the dead load around.
1026 gvn
.getMemDep().removeInstruction(SrcVal
);
1030 return GetStoreValueForLoad(SrcVal
, Offset
, LoadTy
, InsertPt
, TD
);
1034 /// GetMemInstValueForLoad - This function is called when we have a
1035 /// memdep query of a load that ends up being a clobbering mem intrinsic.
1036 static Value
*GetMemInstValueForLoad(MemIntrinsic
*SrcInst
, unsigned Offset
,
1037 const Type
*LoadTy
, Instruction
*InsertPt
,
1038 const TargetData
&TD
){
1039 LLVMContext
&Ctx
= LoadTy
->getContext();
1040 uint64_t LoadSize
= TD
.getTypeSizeInBits(LoadTy
)/8;
1042 IRBuilder
<> Builder(InsertPt
->getParent(), InsertPt
);
1044 // We know that this method is only called when the mem transfer fully
1045 // provides the bits for the load.
1046 if (MemSetInst
*MSI
= dyn_cast
<MemSetInst
>(SrcInst
)) {
1047 // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and
1048 // independently of what the offset is.
1049 Value
*Val
= MSI
->getValue();
1051 Val
= Builder
.CreateZExt(Val
, IntegerType::get(Ctx
, LoadSize
*8));
1053 Value
*OneElt
= Val
;
1055 // Splat the value out to the right number of bits.
1056 for (unsigned NumBytesSet
= 1; NumBytesSet
!= LoadSize
; ) {
1057 // If we can double the number of bytes set, do it.
1058 if (NumBytesSet
*2 <= LoadSize
) {
1059 Value
*ShVal
= Builder
.CreateShl(Val
, NumBytesSet
*8);
1060 Val
= Builder
.CreateOr(Val
, ShVal
);
1065 // Otherwise insert one byte at a time.
1066 Value
*ShVal
= Builder
.CreateShl(Val
, 1*8);
1067 Val
= Builder
.CreateOr(OneElt
, ShVal
);
1071 return CoerceAvailableValueToLoadType(Val
, LoadTy
, InsertPt
, TD
);
1074 // Otherwise, this is a memcpy/memmove from a constant global.
1075 MemTransferInst
*MTI
= cast
<MemTransferInst
>(SrcInst
);
1076 Constant
*Src
= cast
<Constant
>(MTI
->getSource());
1078 // Otherwise, see if we can constant fold a load from the constant with the
1079 // offset applied as appropriate.
1080 Src
= ConstantExpr::getBitCast(Src
,
1081 llvm::Type::getInt8PtrTy(Src
->getContext()));
1082 Constant
*OffsetCst
=
1083 ConstantInt::get(Type::getInt64Ty(Src
->getContext()), (unsigned)Offset
);
1084 Src
= ConstantExpr::getGetElementPtr(Src
, &OffsetCst
, 1);
1085 Src
= ConstantExpr::getBitCast(Src
, PointerType::getUnqual(LoadTy
));
1086 return ConstantFoldLoadFromConstPtr(Src
, &TD
);
1091 struct AvailableValueInBlock
{
1092 /// BB - The basic block in question.
1095 SimpleVal
, // A simple offsetted value that is accessed.
1096 LoadVal
, // A value produced by a load.
1097 MemIntrin
// A memory intrinsic which is loaded from.
1100 /// V - The value that is live out of the block.
1101 PointerIntPair
<Value
*, 2, ValType
> Val
;
1103 /// Offset - The byte offset in Val that is interesting for the load query.
1106 static AvailableValueInBlock
get(BasicBlock
*BB
, Value
*V
,
1107 unsigned Offset
= 0) {
1108 AvailableValueInBlock Res
;
1110 Res
.Val
.setPointer(V
);
1111 Res
.Val
.setInt(SimpleVal
);
1112 Res
.Offset
= Offset
;
1116 static AvailableValueInBlock
getMI(BasicBlock
*BB
, MemIntrinsic
*MI
,
1117 unsigned Offset
= 0) {
1118 AvailableValueInBlock Res
;
1120 Res
.Val
.setPointer(MI
);
1121 Res
.Val
.setInt(MemIntrin
);
1122 Res
.Offset
= Offset
;
1126 static AvailableValueInBlock
getLoad(BasicBlock
*BB
, LoadInst
*LI
,
1127 unsigned Offset
= 0) {
1128 AvailableValueInBlock Res
;
1130 Res
.Val
.setPointer(LI
);
1131 Res
.Val
.setInt(LoadVal
);
1132 Res
.Offset
= Offset
;
1136 bool isSimpleValue() const { return Val
.getInt() == SimpleVal
; }
1137 bool isCoercedLoadValue() const { return Val
.getInt() == LoadVal
; }
1138 bool isMemIntrinValue() const { return Val
.getInt() == MemIntrin
; }
1140 Value
*getSimpleValue() const {
1141 assert(isSimpleValue() && "Wrong accessor");
1142 return Val
.getPointer();
1145 LoadInst
*getCoercedLoadValue() const {
1146 assert(isCoercedLoadValue() && "Wrong accessor");
1147 return cast
<LoadInst
>(Val
.getPointer());
1150 MemIntrinsic
*getMemIntrinValue() const {
1151 assert(isMemIntrinValue() && "Wrong accessor");
1152 return cast
<MemIntrinsic
>(Val
.getPointer());
1155 /// MaterializeAdjustedValue - Emit code into this block to adjust the value
1156 /// defined here to the specified type. This handles various coercion cases.
1157 Value
*MaterializeAdjustedValue(const Type
*LoadTy
, GVN
&gvn
) const {
1159 if (isSimpleValue()) {
1160 Res
= getSimpleValue();
1161 if (Res
->getType() != LoadTy
) {
1162 const TargetData
*TD
= gvn
.getTargetData();
1163 assert(TD
&& "Need target data to handle type mismatch case");
1164 Res
= GetStoreValueForLoad(Res
, Offset
, LoadTy
, BB
->getTerminator(),
1167 DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset
<< " "
1168 << *getSimpleValue() << '\n'
1169 << *Res
<< '\n' << "\n\n\n");
1171 } else if (isCoercedLoadValue()) {
1172 LoadInst
*Load
= getCoercedLoadValue();
1173 if (Load
->getType() == LoadTy
&& Offset
== 0) {
1176 Res
= GetLoadValueForLoad(Load
, Offset
, LoadTy
, BB
->getTerminator(),
1179 DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset
<< " "
1180 << *getCoercedLoadValue() << '\n'
1181 << *Res
<< '\n' << "\n\n\n");
1184 const TargetData
*TD
= gvn
.getTargetData();
1185 assert(TD
&& "Need target data to handle type mismatch case");
1186 Res
= GetMemInstValueForLoad(getMemIntrinValue(), Offset
,
1187 LoadTy
, BB
->getTerminator(), *TD
);
1188 DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
1189 << " " << *getMemIntrinValue() << '\n'
1190 << *Res
<< '\n' << "\n\n\n");
1196 } // end anonymous namespace
1198 /// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
1199 /// construct SSA form, allowing us to eliminate LI. This returns the value
1200 /// that should be used at LI's definition site.
1201 static Value
*ConstructSSAForLoadSet(LoadInst
*LI
,
1202 SmallVectorImpl
<AvailableValueInBlock
> &ValuesPerBlock
,
1204 // Check for the fully redundant, dominating load case. In this case, we can
1205 // just use the dominating value directly.
1206 if (ValuesPerBlock
.size() == 1 &&
1207 gvn
.getDominatorTree().properlyDominates(ValuesPerBlock
[0].BB
,
1209 return ValuesPerBlock
[0].MaterializeAdjustedValue(LI
->getType(), gvn
);
1211 // Otherwise, we have to construct SSA form.
1212 SmallVector
<PHINode
*, 8> NewPHIs
;
1213 SSAUpdater
SSAUpdate(&NewPHIs
);
1214 SSAUpdate
.Initialize(LI
->getType(), LI
->getName());
1216 const Type
*LoadTy
= LI
->getType();
1218 for (unsigned i
= 0, e
= ValuesPerBlock
.size(); i
!= e
; ++i
) {
1219 const AvailableValueInBlock
&AV
= ValuesPerBlock
[i
];
1220 BasicBlock
*BB
= AV
.BB
;
1222 if (SSAUpdate
.HasValueForBlock(BB
))
1225 SSAUpdate
.AddAvailableValue(BB
, AV
.MaterializeAdjustedValue(LoadTy
, gvn
));
1228 // Perform PHI construction.
1229 Value
*V
= SSAUpdate
.GetValueInMiddleOfBlock(LI
->getParent());
1231 // If new PHI nodes were created, notify alias analysis.
1232 if (V
->getType()->isPointerTy()) {
1233 AliasAnalysis
*AA
= gvn
.getAliasAnalysis();
1235 for (unsigned i
= 0, e
= NewPHIs
.size(); i
!= e
; ++i
)
1236 AA
->copyValue(LI
, NewPHIs
[i
]);
1238 // Now that we've copied information to the new PHIs, scan through
1239 // them again and inform alias analysis that we've added potentially
1240 // escaping uses to any values that are operands to these PHIs.
1241 for (unsigned i
= 0, e
= NewPHIs
.size(); i
!= e
; ++i
) {
1242 PHINode
*P
= NewPHIs
[i
];
1243 for (unsigned ii
= 0, ee
= P
->getNumIncomingValues(); ii
!= ee
; ++ii
) {
1244 unsigned jj
= PHINode::getOperandNumForIncomingValue(ii
);
1245 AA
->addEscapingUse(P
->getOperandUse(jj
));
1253 static bool isLifetimeStart(const Instruction
*Inst
) {
1254 if (const IntrinsicInst
* II
= dyn_cast
<IntrinsicInst
>(Inst
))
1255 return II
->getIntrinsicID() == Intrinsic::lifetime_start
;
1259 /// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
1260 /// non-local by performing PHI construction.
1261 bool GVN::processNonLocalLoad(LoadInst
*LI
) {
1262 // Find the non-local dependencies of the load.
1263 SmallVector
<NonLocalDepResult
, 64> Deps
;
1264 AliasAnalysis::Location Loc
= VN
.getAliasAnalysis()->getLocation(LI
);
1265 MD
->getNonLocalPointerDependency(Loc
, true, LI
->getParent(), Deps
);
1266 //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: "
1267 // << Deps.size() << *LI << '\n');
1269 // If we had to process more than one hundred blocks to find the
1270 // dependencies, this load isn't worth worrying about. Optimizing
1271 // it will be too expensive.
1272 if (Deps
.size() > 100)
1275 // If we had a phi translation failure, we'll have a single entry which is a
1276 // clobber in the current block. Reject this early.
1277 if (Deps
.size() == 1 && Deps
[0].getResult().isUnknown()) {
1279 dbgs() << "GVN: non-local load ";
1280 WriteAsOperand(dbgs(), LI
);
1281 dbgs() << " has unknown dependencies\n";
1286 // Filter out useless results (non-locals, etc). Keep track of the blocks
1287 // where we have a value available in repl, also keep track of whether we see
1288 // dependencies that produce an unknown value for the load (such as a call
1289 // that could potentially clobber the load).
1290 SmallVector
<AvailableValueInBlock
, 16> ValuesPerBlock
;
1291 SmallVector
<BasicBlock
*, 16> UnavailableBlocks
;
1293 for (unsigned i
= 0, e
= Deps
.size(); i
!= e
; ++i
) {
1294 BasicBlock
*DepBB
= Deps
[i
].getBB();
1295 MemDepResult DepInfo
= Deps
[i
].getResult();
1297 if (DepInfo
.isUnknown()) {
1298 UnavailableBlocks
.push_back(DepBB
);
1302 if (DepInfo
.isClobber()) {
1303 // The address being loaded in this non-local block may not be the same as
1304 // the pointer operand of the load if PHI translation occurs. Make sure
1305 // to consider the right address.
1306 Value
*Address
= Deps
[i
].getAddress();
1308 // If the dependence is to a store that writes to a superset of the bits
1309 // read by the load, we can extract the bits we need for the load from the
1311 if (StoreInst
*DepSI
= dyn_cast
<StoreInst
>(DepInfo
.getInst())) {
1312 if (TD
&& Address
) {
1313 int Offset
= AnalyzeLoadFromClobberingStore(LI
->getType(), Address
,
1316 ValuesPerBlock
.push_back(AvailableValueInBlock::get(DepBB
,
1317 DepSI
->getValueOperand(),
1324 // Check to see if we have something like this:
1327 // if we have this, replace the later with an extraction from the former.
1328 if (LoadInst
*DepLI
= dyn_cast
<LoadInst
>(DepInfo
.getInst())) {
1329 // If this is a clobber and L is the first instruction in its block, then
1330 // we have the first instruction in the entry block.
1331 if (DepLI
!= LI
&& Address
&& TD
) {
1332 int Offset
= AnalyzeLoadFromClobberingLoad(LI
->getType(),
1333 LI
->getPointerOperand(),
1337 ValuesPerBlock
.push_back(AvailableValueInBlock::getLoad(DepBB
,DepLI
,
1344 // If the clobbering value is a memset/memcpy/memmove, see if we can
1345 // forward a value on from it.
1346 if (MemIntrinsic
*DepMI
= dyn_cast
<MemIntrinsic
>(DepInfo
.getInst())) {
1347 if (TD
&& Address
) {
1348 int Offset
= AnalyzeLoadFromClobberingMemInst(LI
->getType(), Address
,
1351 ValuesPerBlock
.push_back(AvailableValueInBlock::getMI(DepBB
, DepMI
,
1358 UnavailableBlocks
.push_back(DepBB
);
1362 assert(DepInfo
.isDef() && "Expecting def here");
1364 Instruction
*DepInst
= DepInfo
.getInst();
1366 // Loading the allocation -> undef.
1367 if (isa
<AllocaInst
>(DepInst
) || isMalloc(DepInst
) ||
1368 // Loading immediately after lifetime begin -> undef.
1369 isLifetimeStart(DepInst
)) {
1370 ValuesPerBlock
.push_back(AvailableValueInBlock::get(DepBB
,
1371 UndefValue::get(LI
->getType())));
1375 if (StoreInst
*S
= dyn_cast
<StoreInst
>(DepInst
)) {
1376 // Reject loads and stores that are to the same address but are of
1377 // different types if we have to.
1378 if (S
->getValueOperand()->getType() != LI
->getType()) {
1379 // If the stored value is larger or equal to the loaded value, we can
1381 if (TD
== 0 || !CanCoerceMustAliasedValueToLoad(S
->getValueOperand(),
1382 LI
->getType(), *TD
)) {
1383 UnavailableBlocks
.push_back(DepBB
);
1388 ValuesPerBlock
.push_back(AvailableValueInBlock::get(DepBB
,
1389 S
->getValueOperand()));
1393 if (LoadInst
*LD
= dyn_cast
<LoadInst
>(DepInst
)) {
1394 // If the types mismatch and we can't handle it, reject reuse of the load.
1395 if (LD
->getType() != LI
->getType()) {
1396 // If the stored value is larger or equal to the loaded value, we can
1398 if (TD
== 0 || !CanCoerceMustAliasedValueToLoad(LD
, LI
->getType(),*TD
)){
1399 UnavailableBlocks
.push_back(DepBB
);
1403 ValuesPerBlock
.push_back(AvailableValueInBlock::getLoad(DepBB
, LD
));
1407 UnavailableBlocks
.push_back(DepBB
);
1411 // If we have no predecessors that produce a known value for this load, exit
1413 if (ValuesPerBlock
.empty()) return false;
1415 // If all of the instructions we depend on produce a known value for this
1416 // load, then it is fully redundant and we can use PHI insertion to compute
1417 // its value. Insert PHIs and remove the fully redundant value now.
1418 if (UnavailableBlocks
.empty()) {
1419 DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI
<< '\n');
1421 // Perform PHI construction.
1422 Value
*V
= ConstructSSAForLoadSet(LI
, ValuesPerBlock
, *this);
1423 LI
->replaceAllUsesWith(V
);
1425 if (isa
<PHINode
>(V
))
1427 if (V
->getType()->isPointerTy())
1428 MD
->invalidateCachedPointerInfo(V
);
1429 markInstructionForDeletion(LI
);
1434 if (!EnablePRE
|| !EnableLoadPRE
)
1437 // Okay, we have *some* definitions of the value. This means that the value
1438 // is available in some of our (transitive) predecessors. Lets think about
1439 // doing PRE of this load. This will involve inserting a new load into the
1440 // predecessor when it's not available. We could do this in general, but
1441 // prefer to not increase code size. As such, we only do this when we know
1442 // that we only have to insert *one* load (which means we're basically moving
1443 // the load, not inserting a new one).
1445 SmallPtrSet
<BasicBlock
*, 4> Blockers
;
1446 for (unsigned i
= 0, e
= UnavailableBlocks
.size(); i
!= e
; ++i
)
1447 Blockers
.insert(UnavailableBlocks
[i
]);
1449 // Lets find first basic block with more than one predecessor. Walk backwards
1450 // through predecessors if needed.
1451 BasicBlock
*LoadBB
= LI
->getParent();
1452 BasicBlock
*TmpBB
= LoadBB
;
1454 bool isSinglePred
= false;
1455 bool allSingleSucc
= true;
1456 while (TmpBB
->getSinglePredecessor()) {
1457 isSinglePred
= true;
1458 TmpBB
= TmpBB
->getSinglePredecessor();
1459 if (TmpBB
== LoadBB
) // Infinite (unreachable) loop.
1461 if (Blockers
.count(TmpBB
))
1464 // If any of these blocks has more than one successor (i.e. if the edge we
1465 // just traversed was critical), then there are other paths through this
1466 // block along which the load may not be anticipated. Hoisting the load
1467 // above this block would be adding the load to execution paths along
1468 // which it was not previously executed.
1469 if (TmpBB
->getTerminator()->getNumSuccessors() != 1)
1476 // FIXME: It is extremely unclear what this loop is doing, other than
1477 // artificially restricting loadpre.
1480 for (unsigned i
= 0, e
= ValuesPerBlock
.size(); i
!= e
; ++i
) {
1481 const AvailableValueInBlock
&AV
= ValuesPerBlock
[i
];
1482 if (AV
.isSimpleValue())
1483 // "Hot" Instruction is in some loop (because it dominates its dep.
1485 if (Instruction
*I
= dyn_cast
<Instruction
>(AV
.getSimpleValue()))
1486 if (DT
->dominates(LI
, I
)) {
1492 // We are interested only in "hot" instructions. We don't want to do any
1493 // mis-optimizations here.
1498 // Check to see how many predecessors have the loaded value fully
1500 DenseMap
<BasicBlock
*, Value
*> PredLoads
;
1501 DenseMap
<BasicBlock
*, char> FullyAvailableBlocks
;
1502 for (unsigned i
= 0, e
= ValuesPerBlock
.size(); i
!= e
; ++i
)
1503 FullyAvailableBlocks
[ValuesPerBlock
[i
].BB
] = true;
1504 for (unsigned i
= 0, e
= UnavailableBlocks
.size(); i
!= e
; ++i
)
1505 FullyAvailableBlocks
[UnavailableBlocks
[i
]] = false;
1507 SmallVector
<std::pair
<TerminatorInst
*, unsigned>, 4> NeedToSplit
;
1508 for (pred_iterator PI
= pred_begin(LoadBB
), E
= pred_end(LoadBB
);
1510 BasicBlock
*Pred
= *PI
;
1511 if (IsValueFullyAvailableInBlock(Pred
, FullyAvailableBlocks
)) {
1514 PredLoads
[Pred
] = 0;
1516 if (Pred
->getTerminator()->getNumSuccessors() != 1) {
1517 if (isa
<IndirectBrInst
>(Pred
->getTerminator())) {
1518 DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '"
1519 << Pred
->getName() << "': " << *LI
<< '\n');
1522 unsigned SuccNum
= GetSuccessorNumber(Pred
, LoadBB
);
1523 NeedToSplit
.push_back(std::make_pair(Pred
->getTerminator(), SuccNum
));
1526 if (!NeedToSplit
.empty()) {
1527 toSplit
.append(NeedToSplit
.begin(), NeedToSplit
.end());
1531 // Decide whether PRE is profitable for this load.
1532 unsigned NumUnavailablePreds
= PredLoads
.size();
1533 assert(NumUnavailablePreds
!= 0 &&
1534 "Fully available value should be eliminated above!");
1536 // If this load is unavailable in multiple predecessors, reject it.
1537 // FIXME: If we could restructure the CFG, we could make a common pred with
1538 // all the preds that don't have an available LI and insert a new load into
1540 if (NumUnavailablePreds
!= 1)
1543 // Check if the load can safely be moved to all the unavailable predecessors.
1544 bool CanDoPRE
= true;
1545 SmallVector
<Instruction
*, 8> NewInsts
;
1546 for (DenseMap
<BasicBlock
*, Value
*>::iterator I
= PredLoads
.begin(),
1547 E
= PredLoads
.end(); I
!= E
; ++I
) {
1548 BasicBlock
*UnavailablePred
= I
->first
;
1550 // Do PHI translation to get its value in the predecessor if necessary. The
1551 // returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
1553 // If all preds have a single successor, then we know it is safe to insert
1554 // the load on the pred (?!?), so we can insert code to materialize the
1555 // pointer if it is not available.
1556 PHITransAddr
Address(LI
->getPointerOperand(), TD
);
1558 if (allSingleSucc
) {
1559 LoadPtr
= Address
.PHITranslateWithInsertion(LoadBB
, UnavailablePred
,
1562 Address
.PHITranslateValue(LoadBB
, UnavailablePred
, DT
);
1563 LoadPtr
= Address
.getAddr();
1566 // If we couldn't find or insert a computation of this phi translated value,
1569 DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
1570 << *LI
->getPointerOperand() << "\n");
1575 // Make sure it is valid to move this load here. We have to watch out for:
1576 // @1 = getelementptr (i8* p, ...
1577 // test p and branch if == 0
1579 // It is valid to have the getelementptr before the test, even if p can
1580 // be 0, as getelementptr only does address arithmetic.
1581 // If we are not pushing the value through any multiple-successor blocks
1582 // we do not have this case. Otherwise, check that the load is safe to
1583 // put anywhere; this can be improved, but should be conservatively safe.
1584 if (!allSingleSucc
&&
1585 // FIXME: REEVALUTE THIS.
1586 !isSafeToLoadUnconditionally(LoadPtr
,
1587 UnavailablePred
->getTerminator(),
1588 LI
->getAlignment(), TD
)) {
1593 I
->second
= LoadPtr
;
1597 while (!NewInsts
.empty()) {
1598 Instruction
*I
= NewInsts
.pop_back_val();
1599 if (MD
) MD
->removeInstruction(I
);
1600 I
->eraseFromParent();
1605 // Okay, we can eliminate this load by inserting a reload in the predecessor
1606 // and using PHI construction to get the value in the other predecessors, do
1608 DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *LI
<< '\n');
1609 DEBUG(if (!NewInsts
.empty())
1610 dbgs() << "INSERTED " << NewInsts
.size() << " INSTS: "
1611 << *NewInsts
.back() << '\n');
1613 // Assign value numbers to the new instructions.
1614 for (unsigned i
= 0, e
= NewInsts
.size(); i
!= e
; ++i
) {
1615 // FIXME: We really _ought_ to insert these value numbers into their
1616 // parent's availability map. However, in doing so, we risk getting into
1617 // ordering issues. If a block hasn't been processed yet, we would be
1618 // marking a value as AVAIL-IN, which isn't what we intend.
1619 VN
.lookup_or_add(NewInsts
[i
]);
1622 for (DenseMap
<BasicBlock
*, Value
*>::iterator I
= PredLoads
.begin(),
1623 E
= PredLoads
.end(); I
!= E
; ++I
) {
1624 BasicBlock
*UnavailablePred
= I
->first
;
1625 Value
*LoadPtr
= I
->second
;
1627 Instruction
*NewLoad
= new LoadInst(LoadPtr
, LI
->getName()+".pre", false,
1629 UnavailablePred
->getTerminator());
1631 // Transfer the old load's TBAA tag to the new load.
1632 if (MDNode
*Tag
= LI
->getMetadata(LLVMContext::MD_tbaa
))
1633 NewLoad
->setMetadata(LLVMContext::MD_tbaa
, Tag
);
1635 // Transfer DebugLoc.
1636 NewLoad
->setDebugLoc(LI
->getDebugLoc());
1638 // Add the newly created load.
1639 ValuesPerBlock
.push_back(AvailableValueInBlock::get(UnavailablePred
,
1641 MD
->invalidateCachedPointerInfo(LoadPtr
);
1642 DEBUG(dbgs() << "GVN INSERTED " << *NewLoad
<< '\n');
1645 // Perform PHI construction.
1646 Value
*V
= ConstructSSAForLoadSet(LI
, ValuesPerBlock
, *this);
1647 LI
->replaceAllUsesWith(V
);
1648 if (isa
<PHINode
>(V
))
1650 if (V
->getType()->isPointerTy())
1651 MD
->invalidateCachedPointerInfo(V
);
1652 markInstructionForDeletion(LI
);
1657 /// processLoad - Attempt to eliminate a load, first by eliminating it
1658 /// locally, and then attempting non-local elimination if that fails.
1659 bool GVN::processLoad(LoadInst
*L
) {
1663 if (L
->isVolatile())
1666 if (L
->use_empty()) {
1667 markInstructionForDeletion(L
);
1671 // ... to a pointer that has been loaded from before...
1672 MemDepResult Dep
= MD
->getDependency(L
);
1674 // If we have a clobber and target data is around, see if this is a clobber
1675 // that we can fix up through code synthesis.
1676 if (Dep
.isClobber() && TD
) {
1677 // Check to see if we have something like this:
1678 // store i32 123, i32* %P
1679 // %A = bitcast i32* %P to i8*
1680 // %B = gep i8* %A, i32 1
1683 // We could do that by recognizing if the clobber instructions are obviously
1684 // a common base + constant offset, and if the previous store (or memset)
1685 // completely covers this load. This sort of thing can happen in bitfield
1687 Value
*AvailVal
= 0;
1688 if (StoreInst
*DepSI
= dyn_cast
<StoreInst
>(Dep
.getInst())) {
1689 int Offset
= AnalyzeLoadFromClobberingStore(L
->getType(),
1690 L
->getPointerOperand(),
1693 AvailVal
= GetStoreValueForLoad(DepSI
->getValueOperand(), Offset
,
1694 L
->getType(), L
, *TD
);
1697 // Check to see if we have something like this:
1700 // if we have this, replace the later with an extraction from the former.
1701 if (LoadInst
*DepLI
= dyn_cast
<LoadInst
>(Dep
.getInst())) {
1702 // If this is a clobber and L is the first instruction in its block, then
1703 // we have the first instruction in the entry block.
1707 int Offset
= AnalyzeLoadFromClobberingLoad(L
->getType(),
1708 L
->getPointerOperand(),
1711 AvailVal
= GetLoadValueForLoad(DepLI
, Offset
, L
->getType(), L
, *this);
1714 // If the clobbering value is a memset/memcpy/memmove, see if we can forward
1715 // a value on from it.
1716 if (MemIntrinsic
*DepMI
= dyn_cast
<MemIntrinsic
>(Dep
.getInst())) {
1717 int Offset
= AnalyzeLoadFromClobberingMemInst(L
->getType(),
1718 L
->getPointerOperand(),
1721 AvailVal
= GetMemInstValueForLoad(DepMI
, Offset
, L
->getType(), L
, *TD
);
1725 DEBUG(dbgs() << "GVN COERCED INST:\n" << *Dep
.getInst() << '\n'
1726 << *AvailVal
<< '\n' << *L
<< "\n\n\n");
1728 // Replace the load!
1729 L
->replaceAllUsesWith(AvailVal
);
1730 if (AvailVal
->getType()->isPointerTy())
1731 MD
->invalidateCachedPointerInfo(AvailVal
);
1732 markInstructionForDeletion(L
);
1738 // If the value isn't available, don't do anything!
1739 if (Dep
.isClobber()) {
1741 // fast print dep, using operator<< on instruction is too slow.
1742 dbgs() << "GVN: load ";
1743 WriteAsOperand(dbgs(), L
);
1744 Instruction
*I
= Dep
.getInst();
1745 dbgs() << " is clobbered by " << *I
<< '\n';
1750 if (Dep
.isUnknown()) {
1752 // fast print dep, using operator<< on instruction is too slow.
1753 dbgs() << "GVN: load ";
1754 WriteAsOperand(dbgs(), L
);
1755 dbgs() << " has unknown dependence\n";
1760 // If it is defined in another block, try harder.
1761 if (Dep
.isNonLocal())
1762 return processNonLocalLoad(L
);
1764 assert(Dep
.isDef() && "Expecting def here");
1766 Instruction
*DepInst
= Dep
.getInst();
1767 if (StoreInst
*DepSI
= dyn_cast
<StoreInst
>(DepInst
)) {
1768 Value
*StoredVal
= DepSI
->getValueOperand();
1770 // The store and load are to a must-aliased pointer, but they may not
1771 // actually have the same type. See if we know how to reuse the stored
1772 // value (depending on its type).
1773 if (StoredVal
->getType() != L
->getType()) {
1775 StoredVal
= CoerceAvailableValueToLoadType(StoredVal
, L
->getType(),
1780 DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI
<< '\n' << *StoredVal
1781 << '\n' << *L
<< "\n\n\n");
1788 L
->replaceAllUsesWith(StoredVal
);
1789 if (StoredVal
->getType()->isPointerTy())
1790 MD
->invalidateCachedPointerInfo(StoredVal
);
1791 markInstructionForDeletion(L
);
1796 if (LoadInst
*DepLI
= dyn_cast
<LoadInst
>(DepInst
)) {
1797 Value
*AvailableVal
= DepLI
;
1799 // The loads are of a must-aliased pointer, but they may not actually have
1800 // the same type. See if we know how to reuse the previously loaded value
1801 // (depending on its type).
1802 if (DepLI
->getType() != L
->getType()) {
1804 AvailableVal
= CoerceAvailableValueToLoadType(DepLI
, L
->getType(),
1806 if (AvailableVal
== 0)
1809 DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI
<< "\n" << *AvailableVal
1810 << "\n" << *L
<< "\n\n\n");
1817 L
->replaceAllUsesWith(AvailableVal
);
1818 if (DepLI
->getType()->isPointerTy())
1819 MD
->invalidateCachedPointerInfo(DepLI
);
1820 markInstructionForDeletion(L
);
1825 // If this load really doesn't depend on anything, then we must be loading an
1826 // undef value. This can happen when loading for a fresh allocation with no
1827 // intervening stores, for example.
1828 if (isa
<AllocaInst
>(DepInst
) || isMalloc(DepInst
)) {
1829 L
->replaceAllUsesWith(UndefValue::get(L
->getType()));
1830 markInstructionForDeletion(L
);
1835 // If this load occurs either right after a lifetime begin,
1836 // then the loaded value is undefined.
1837 if (IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(DepInst
)) {
1838 if (II
->getIntrinsicID() == Intrinsic::lifetime_start
) {
1839 L
->replaceAllUsesWith(UndefValue::get(L
->getType()));
1840 markInstructionForDeletion(L
);
1849 // findLeader - In order to find a leader for a given value number at a
1850 // specific basic block, we first obtain the list of all Values for that number,
1851 // and then scan the list to find one whose block dominates the block in
1852 // question. This is fast because dominator tree queries consist of only
1853 // a few comparisons of DFS numbers.
1854 Value
*GVN::findLeader(BasicBlock
*BB
, uint32_t num
) {
1855 LeaderTableEntry Vals
= LeaderTable
[num
];
1856 if (!Vals
.Val
) return 0;
1859 if (DT
->dominates(Vals
.BB
, BB
)) {
1861 if (isa
<Constant
>(Val
)) return Val
;
1864 LeaderTableEntry
* Next
= Vals
.Next
;
1866 if (DT
->dominates(Next
->BB
, BB
)) {
1867 if (isa
<Constant
>(Next
->Val
)) return Next
->Val
;
1868 if (!Val
) Val
= Next
->Val
;
1878 /// processInstruction - When calculating availability, handle an instruction
1879 /// by inserting it into the appropriate sets
1880 bool GVN::processInstruction(Instruction
*I
) {
1881 // Ignore dbg info intrinsics.
1882 if (isa
<DbgInfoIntrinsic
>(I
))
1885 // If the instruction can be easily simplified then do so now in preference
1886 // to value numbering it. Value numbering often exposes redundancies, for
1887 // example if it determines that %y is equal to %x then the instruction
1888 // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
1889 if (Value
*V
= SimplifyInstruction(I
, TD
, DT
)) {
1890 I
->replaceAllUsesWith(V
);
1891 if (MD
&& V
->getType()->isPointerTy())
1892 MD
->invalidateCachedPointerInfo(V
);
1893 markInstructionForDeletion(I
);
1897 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(I
)) {
1898 if (processLoad(LI
))
1901 unsigned Num
= VN
.lookup_or_add(LI
);
1902 addToLeaderTable(Num
, LI
, LI
->getParent());
1906 // For conditions branches, we can perform simple conditional propagation on
1907 // the condition value itself.
1908 if (BranchInst
*BI
= dyn_cast
<BranchInst
>(I
)) {
1909 if (!BI
->isConditional() || isa
<Constant
>(BI
->getCondition()))
1912 Value
*BranchCond
= BI
->getCondition();
1913 uint32_t CondVN
= VN
.lookup_or_add(BranchCond
);
1915 BasicBlock
*TrueSucc
= BI
->getSuccessor(0);
1916 BasicBlock
*FalseSucc
= BI
->getSuccessor(1);
1918 if (TrueSucc
->getSinglePredecessor())
1919 addToLeaderTable(CondVN
,
1920 ConstantInt::getTrue(TrueSucc
->getContext()),
1922 if (FalseSucc
->getSinglePredecessor())
1923 addToLeaderTable(CondVN
,
1924 ConstantInt::getFalse(TrueSucc
->getContext()),
1930 // Instructions with void type don't return a value, so there's
1931 // no point in trying to find redudancies in them.
1932 if (I
->getType()->isVoidTy()) return false;
1934 uint32_t NextNum
= VN
.getNextUnusedValueNumber();
1935 unsigned Num
= VN
.lookup_or_add(I
);
1937 // Allocations are always uniquely numbered, so we can save time and memory
1938 // by fast failing them.
1939 if (isa
<AllocaInst
>(I
) || isa
<TerminatorInst
>(I
) || isa
<PHINode
>(I
)) {
1940 addToLeaderTable(Num
, I
, I
->getParent());
1944 // If the number we were assigned was a brand new VN, then we don't
1945 // need to do a lookup to see if the number already exists
1946 // somewhere in the domtree: it can't!
1947 if (Num
== NextNum
) {
1948 addToLeaderTable(Num
, I
, I
->getParent());
1952 // Perform fast-path value-number based elimination of values inherited from
1954 Value
*repl
= findLeader(I
->getParent(), Num
);
1956 // Failure, just remember this instance for future use.
1957 addToLeaderTable(Num
, I
, I
->getParent());
1962 I
->replaceAllUsesWith(repl
);
1963 if (MD
&& repl
->getType()->isPointerTy())
1964 MD
->invalidateCachedPointerInfo(repl
);
1965 markInstructionForDeletion(I
);
1969 /// runOnFunction - This is the main transformation entry point for a function.
1970 bool GVN::runOnFunction(Function
& F
) {
1972 MD
= &getAnalysis
<MemoryDependenceAnalysis
>();
1973 DT
= &getAnalysis
<DominatorTree
>();
1974 TD
= getAnalysisIfAvailable
<TargetData
>();
1975 VN
.setAliasAnalysis(&getAnalysis
<AliasAnalysis
>());
1979 bool Changed
= false;
1980 bool ShouldContinue
= true;
1982 // Merge unconditional branches, allowing PRE to catch more
1983 // optimization opportunities.
1984 for (Function::iterator FI
= F
.begin(), FE
= F
.end(); FI
!= FE
; ) {
1985 BasicBlock
*BB
= FI
++;
1987 bool removedBlock
= MergeBlockIntoPredecessor(BB
, this);
1988 if (removedBlock
) ++NumGVNBlocks
;
1990 Changed
|= removedBlock
;
1993 unsigned Iteration
= 0;
1994 while (ShouldContinue
) {
1995 DEBUG(dbgs() << "GVN iteration: " << Iteration
<< "\n");
1996 ShouldContinue
= iterateOnFunction(F
);
1997 if (splitCriticalEdges())
1998 ShouldContinue
= true;
1999 Changed
|= ShouldContinue
;
2004 bool PREChanged
= true;
2005 while (PREChanged
) {
2006 PREChanged
= performPRE(F
);
2007 Changed
|= PREChanged
;
2010 // FIXME: Should perform GVN again after PRE does something. PRE can move
2011 // computations into blocks where they become fully redundant. Note that
2012 // we can't do this until PRE's critical edge splitting updates memdep.
2013 // Actually, when this happens, we should just fully integrate PRE into GVN.
2015 cleanupGlobalSets();
2021 bool GVN::processBlock(BasicBlock
*BB
) {
2022 // FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function
2023 // (and incrementing BI before processing an instruction).
2024 assert(InstrsToErase
.empty() &&
2025 "We expect InstrsToErase to be empty across iterations");
2026 bool ChangedFunction
= false;
2028 for (BasicBlock::iterator BI
= BB
->begin(), BE
= BB
->end();
2030 ChangedFunction
|= processInstruction(BI
);
2031 if (InstrsToErase
.empty()) {
2036 // If we need some instructions deleted, do it now.
2037 NumGVNInstr
+= InstrsToErase
.size();
2039 // Avoid iterator invalidation.
2040 bool AtStart
= BI
== BB
->begin();
2044 for (SmallVector
<Instruction
*, 4>::iterator I
= InstrsToErase
.begin(),
2045 E
= InstrsToErase
.end(); I
!= E
; ++I
) {
2046 DEBUG(dbgs() << "GVN removed: " << **I
<< '\n');
2047 if (MD
) MD
->removeInstruction(*I
);
2048 (*I
)->eraseFromParent();
2049 DEBUG(verifyRemoved(*I
));
2051 InstrsToErase
.clear();
2059 return ChangedFunction
;
2062 /// performPRE - Perform a purely local form of PRE that looks for diamond
2063 /// control flow patterns and attempts to perform simple PRE at the join point.
2064 bool GVN::performPRE(Function
&F
) {
2065 bool Changed
= false;
2066 DenseMap
<BasicBlock
*, Value
*> predMap
;
2067 for (df_iterator
<BasicBlock
*> DI
= df_begin(&F
.getEntryBlock()),
2068 DE
= df_end(&F
.getEntryBlock()); DI
!= DE
; ++DI
) {
2069 BasicBlock
*CurrentBlock
= *DI
;
2071 // Nothing to PRE in the entry block.
2072 if (CurrentBlock
== &F
.getEntryBlock()) continue;
2074 for (BasicBlock::iterator BI
= CurrentBlock
->begin(),
2075 BE
= CurrentBlock
->end(); BI
!= BE
; ) {
2076 Instruction
*CurInst
= BI
++;
2078 if (isa
<AllocaInst
>(CurInst
) ||
2079 isa
<TerminatorInst
>(CurInst
) || isa
<PHINode
>(CurInst
) ||
2080 CurInst
->getType()->isVoidTy() ||
2081 CurInst
->mayReadFromMemory() || CurInst
->mayHaveSideEffects() ||
2082 isa
<DbgInfoIntrinsic
>(CurInst
))
2085 // We don't currently value number ANY inline asm calls.
2086 if (CallInst
*CallI
= dyn_cast
<CallInst
>(CurInst
))
2087 if (CallI
->isInlineAsm())
2090 uint32_t ValNo
= VN
.lookup(CurInst
);
2092 // Look for the predecessors for PRE opportunities. We're
2093 // only trying to solve the basic diamond case, where
2094 // a value is computed in the successor and one predecessor,
2095 // but not the other. We also explicitly disallow cases
2096 // where the successor is its own predecessor, because they're
2097 // more complicated to get right.
2098 unsigned NumWith
= 0;
2099 unsigned NumWithout
= 0;
2100 BasicBlock
*PREPred
= 0;
2103 for (pred_iterator PI
= pred_begin(CurrentBlock
),
2104 PE
= pred_end(CurrentBlock
); PI
!= PE
; ++PI
) {
2105 BasicBlock
*P
= *PI
;
2106 // We're not interested in PRE where the block is its
2107 // own predecessor, or in blocks with predecessors
2108 // that are not reachable.
2109 if (P
== CurrentBlock
) {
2112 } else if (!DT
->dominates(&F
.getEntryBlock(), P
)) {
2117 Value
* predV
= findLeader(P
, ValNo
);
2121 } else if (predV
== CurInst
) {
2129 // Don't do PRE when it might increase code size, i.e. when
2130 // we would need to insert instructions in more than one pred.
2131 if (NumWithout
!= 1 || NumWith
== 0)
2134 // Don't do PRE across indirect branch.
2135 if (isa
<IndirectBrInst
>(PREPred
->getTerminator()))
2138 // We can't do PRE safely on a critical edge, so instead we schedule
2139 // the edge to be split and perform the PRE the next time we iterate
2141 unsigned SuccNum
= GetSuccessorNumber(PREPred
, CurrentBlock
);
2142 if (isCriticalEdge(PREPred
->getTerminator(), SuccNum
)) {
2143 toSplit
.push_back(std::make_pair(PREPred
->getTerminator(), SuccNum
));
2147 // Instantiate the expression in the predecessor that lacked it.
2148 // Because we are going top-down through the block, all value numbers
2149 // will be available in the predecessor by the time we need them. Any
2150 // that weren't originally present will have been instantiated earlier
2152 Instruction
*PREInstr
= CurInst
->clone();
2153 bool success
= true;
2154 for (unsigned i
= 0, e
= CurInst
->getNumOperands(); i
!= e
; ++i
) {
2155 Value
*Op
= PREInstr
->getOperand(i
);
2156 if (isa
<Argument
>(Op
) || isa
<Constant
>(Op
) || isa
<GlobalValue
>(Op
))
2159 if (Value
*V
= findLeader(PREPred
, VN
.lookup(Op
))) {
2160 PREInstr
->setOperand(i
, V
);
2167 // Fail out if we encounter an operand that is not available in
2168 // the PRE predecessor. This is typically because of loads which
2169 // are not value numbered precisely.
2172 DEBUG(verifyRemoved(PREInstr
));
2176 PREInstr
->insertBefore(PREPred
->getTerminator());
2177 PREInstr
->setName(CurInst
->getName() + ".pre");
2178 PREInstr
->setDebugLoc(CurInst
->getDebugLoc());
2179 predMap
[PREPred
] = PREInstr
;
2180 VN
.add(PREInstr
, ValNo
);
2183 // Update the availability map to include the new instruction.
2184 addToLeaderTable(ValNo
, PREInstr
, PREPred
);
2186 // Create a PHI to make the value available in this block.
2187 pred_iterator PB
= pred_begin(CurrentBlock
), PE
= pred_end(CurrentBlock
);
2188 PHINode
* Phi
= PHINode::Create(CurInst
->getType(), std::distance(PB
, PE
),
2189 CurInst
->getName() + ".pre-phi",
2190 CurrentBlock
->begin());
2191 for (pred_iterator PI
= PB
; PI
!= PE
; ++PI
) {
2192 BasicBlock
*P
= *PI
;
2193 Phi
->addIncoming(predMap
[P
], P
);
2197 addToLeaderTable(ValNo
, Phi
, CurrentBlock
);
2198 Phi
->setDebugLoc(CurInst
->getDebugLoc());
2199 CurInst
->replaceAllUsesWith(Phi
);
2200 if (Phi
->getType()->isPointerTy()) {
2201 // Because we have added a PHI-use of the pointer value, it has now
2202 // "escaped" from alias analysis' perspective. We need to inform
2204 for (unsigned ii
= 0, ee
= Phi
->getNumIncomingValues(); ii
!= ee
;
2206 unsigned jj
= PHINode::getOperandNumForIncomingValue(ii
);
2207 VN
.getAliasAnalysis()->addEscapingUse(Phi
->getOperandUse(jj
));
2211 MD
->invalidateCachedPointerInfo(Phi
);
2214 removeFromLeaderTable(ValNo
, CurInst
, CurrentBlock
);
2216 DEBUG(dbgs() << "GVN PRE removed: " << *CurInst
<< '\n');
2217 if (MD
) MD
->removeInstruction(CurInst
);
2218 CurInst
->eraseFromParent();
2219 DEBUG(verifyRemoved(CurInst
));
2224 if (splitCriticalEdges())
2230 /// splitCriticalEdges - Split critical edges found during the previous
2231 /// iteration that may enable further optimization.
2232 bool GVN::splitCriticalEdges() {
2233 if (toSplit
.empty())
2236 std::pair
<TerminatorInst
*, unsigned> Edge
= toSplit
.pop_back_val();
2237 SplitCriticalEdge(Edge
.first
, Edge
.second
, this);
2238 } while (!toSplit
.empty());
2239 if (MD
) MD
->invalidateCachedPredecessors();
2243 /// iterateOnFunction - Executes one iteration of GVN
2244 bool GVN::iterateOnFunction(Function
&F
) {
2245 cleanupGlobalSets();
2247 // Top-down walk of the dominator tree
2248 bool Changed
= false;
2250 // Needed for value numbering with phi construction to work.
2251 ReversePostOrderTraversal
<Function
*> RPOT(&F
);
2252 for (ReversePostOrderTraversal
<Function
*>::rpo_iterator RI
= RPOT
.begin(),
2253 RE
= RPOT
.end(); RI
!= RE
; ++RI
)
2254 Changed
|= processBlock(*RI
);
2256 for (df_iterator
<DomTreeNode
*> DI
= df_begin(DT
->getRootNode()),
2257 DE
= df_end(DT
->getRootNode()); DI
!= DE
; ++DI
)
2258 Changed
|= processBlock(DI
->getBlock());
2264 void GVN::cleanupGlobalSets() {
2266 LeaderTable
.clear();
2267 TableAllocator
.Reset();
2270 /// verifyRemoved - Verify that the specified instruction does not occur in our
2271 /// internal data structures.
2272 void GVN::verifyRemoved(const Instruction
*Inst
) const {
2273 VN
.verifyRemoved(Inst
);
2275 // Walk through the value number scope to make sure the instruction isn't
2276 // ferreted away in it.
2277 for (DenseMap
<uint32_t, LeaderTableEntry
>::const_iterator
2278 I
= LeaderTable
.begin(), E
= LeaderTable
.end(); I
!= E
; ++I
) {
2279 const LeaderTableEntry
*Node
= &I
->second
;
2280 assert(Node
->Val
!= Inst
&& "Inst still in value numbering scope!");
2282 while (Node
->Next
) {
2284 assert(Node
->Val
!= Inst
&& "Inst still in value numbering scope!");