1 //===-- DifferenceEngine.cpp - Structural function/module comparison ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This header defines the implementation of the LLVM difference
11 // engine, which structurally compares global values within a module.
13 //===----------------------------------------------------------------------===//
15 #include "DifferenceEngine.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/DenseSet.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringSet.h"
20 #include "llvm/IR/CFG.h"
21 #include "llvm/IR/CallSite.h"
22 #include "llvm/IR/Constants.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/Instructions.h"
25 #include "llvm/IR/Module.h"
26 #include "llvm/Support/ErrorHandling.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include "llvm/Support/type_traits.h"
35 /// A priority queue, implemented as a heap.
36 template <class T
, class Sorter
, unsigned InlineCapacity
>
39 llvm::SmallVector
<T
, InlineCapacity
> Storage
;
42 PriorityQueue(const Sorter
&Precedes
) : Precedes(Precedes
) {}
44 /// Checks whether the heap is empty.
45 bool empty() const { return Storage
.empty(); }
47 /// Insert a new value on the heap.
48 void insert(const T
&V
) {
49 unsigned Index
= Storage
.size();
51 if (Index
== 0) return;
53 T
*data
= Storage
.data();
55 unsigned Target
= (Index
+ 1) / 2 - 1;
56 if (!Precedes(data
[Index
], data
[Target
])) return;
57 std::swap(data
[Index
], data
[Target
]);
58 if (Target
== 0) return;
63 /// Remove the minimum value in the heap. Only valid on a non-empty heap.
68 unsigned NewSize
= Storage
.size() - 1;
70 // Move the slot at the end to the beginning.
71 if (isPodLike
<T
>::value
)
72 Storage
[0] = Storage
[NewSize
];
74 std::swap(Storage
[0], Storage
[NewSize
]);
76 // Bubble the root up as necessary.
79 // With a 1-based index, the children would be Index*2 and Index*2+1.
80 unsigned R
= (Index
+ 1) * 2;
83 // If R is out of bounds, we're done after this in any case.
85 // If L is also out of bounds, we're done immediately.
86 if (L
>= NewSize
) break;
88 // Otherwise, test whether we should swap L and Index.
89 if (Precedes(Storage
[L
], Storage
[Index
]))
90 std::swap(Storage
[L
], Storage
[Index
]);
94 // Otherwise, we need to compare with the smaller of L and R.
95 // Prefer R because it's closer to the end of the array.
96 unsigned IndexToTest
= (Precedes(Storage
[L
], Storage
[R
]) ? L
: R
);
98 // If Index is >= the min of L and R, then heap ordering is restored.
99 if (!Precedes(Storage
[IndexToTest
], Storage
[Index
]))
102 // Otherwise, keep bubbling up.
103 std::swap(Storage
[IndexToTest
], Storage
[Index
]);
113 /// A function-scope difference engine.
114 class FunctionDifferenceEngine
{
115 DifferenceEngine
&Engine
;
117 /// The current mapping from old local values to new local values.
118 DenseMap
<Value
*, Value
*> Values
;
120 /// The current mapping from old blocks to new blocks.
121 DenseMap
<BasicBlock
*, BasicBlock
*> Blocks
;
123 DenseSet
<std::pair
<Value
*, Value
*> > TentativeValues
;
125 unsigned getUnprocPredCount(BasicBlock
*Block
) const {
127 for (pred_iterator I
= pred_begin(Block
), E
= pred_end(Block
); I
!= E
; ++I
)
128 if (!Blocks
.count(*I
)) Count
++;
132 typedef std::pair
<BasicBlock
*, BasicBlock
*> BlockPair
;
134 /// A type which sorts a priority queue by the number of unprocessed
135 /// predecessor blocks it has remaining.
137 /// This is actually really expensive to calculate.
139 const FunctionDifferenceEngine
&fde
;
140 explicit QueueSorter(const FunctionDifferenceEngine
&fde
) : fde(fde
) {}
142 bool operator()(const BlockPair
&Old
, const BlockPair
&New
) {
143 return fde
.getUnprocPredCount(Old
.first
)
144 < fde
.getUnprocPredCount(New
.first
);
148 /// A queue of unified blocks to process.
149 PriorityQueue
<BlockPair
, QueueSorter
, 20> Queue
;
151 /// Try to unify the given two blocks. Enqueues them for processing
152 /// if they haven't already been processed.
154 /// Returns true if there was a problem unifying them.
155 bool tryUnify(BasicBlock
*L
, BasicBlock
*R
) {
156 BasicBlock
*&Ref
= Blocks
[L
];
159 if (Ref
== R
) return false;
161 Engine
.logf("successor %l cannot be equivalent to %r; "
162 "it's already equivalent to %r")
168 Queue
.insert(BlockPair(L
, R
));
172 /// Unifies two instructions, given that they're known not to have
173 /// structural differences.
174 void unify(Instruction
*L
, Instruction
*R
) {
175 DifferenceEngine::Context
C(Engine
, L
, R
);
177 bool Result
= diff(L
, R
, true, true);
178 assert(!Result
&& "structural differences second time around?");
184 void processQueue() {
185 while (!Queue
.empty()) {
186 BlockPair Pair
= Queue
.remove_min();
187 diff(Pair
.first
, Pair
.second
);
191 void diff(BasicBlock
*L
, BasicBlock
*R
) {
192 DifferenceEngine::Context
C(Engine
, L
, R
);
194 BasicBlock::iterator LI
= L
->begin(), LE
= L
->end();
195 BasicBlock::iterator RI
= R
->begin();
198 assert(LI
!= LE
&& RI
!= R
->end());
199 Instruction
*LeftI
= &*LI
, *RightI
= &*RI
;
201 // If the instructions differ, start the more sophisticated diff
202 // algorithm at the start of the block.
203 if (diff(LeftI
, RightI
, false, false)) {
204 TentativeValues
.clear();
205 return runBlockDiff(L
->begin(), R
->begin());
208 // Otherwise, tentatively unify them.
209 if (!LeftI
->use_empty())
210 TentativeValues
.insert(std::make_pair(LeftI
, RightI
));
214 } while (LI
!= LE
); // This is sufficient: we can't get equality of
215 // terminators if there are residual instructions.
217 // Unify everything in the block, non-tentatively this time.
218 TentativeValues
.clear();
219 for (LI
= L
->begin(), RI
= R
->begin(); LI
!= LE
; ++LI
, ++RI
)
223 bool matchForBlockDiff(Instruction
*L
, Instruction
*R
);
224 void runBlockDiff(BasicBlock::iterator LI
, BasicBlock::iterator RI
);
226 bool diffCallSites(CallSite L
, CallSite R
, bool Complain
) {
227 // FIXME: call attributes
228 if (!equivalentAsOperands(L
.getCalledValue(), R
.getCalledValue())) {
229 if (Complain
) Engine
.log("called functions differ");
232 if (L
.arg_size() != R
.arg_size()) {
233 if (Complain
) Engine
.log("argument counts differ");
236 for (unsigned I
= 0, E
= L
.arg_size(); I
!= E
; ++I
)
237 if (!equivalentAsOperands(L
.getArgument(I
), R
.getArgument(I
))) {
239 Engine
.logf("arguments %l and %r differ")
240 << L
.getArgument(I
) << R
.getArgument(I
);
246 bool diff(Instruction
*L
, Instruction
*R
, bool Complain
, bool TryUnify
) {
247 // FIXME: metadata (if Complain is set)
249 // Different opcodes always imply different operations.
250 if (L
->getOpcode() != R
->getOpcode()) {
251 if (Complain
) Engine
.log("different instruction types");
255 if (isa
<CmpInst
>(L
)) {
256 if (cast
<CmpInst
>(L
)->getPredicate()
257 != cast
<CmpInst
>(R
)->getPredicate()) {
258 if (Complain
) Engine
.log("different predicates");
261 } else if (isa
<CallInst
>(L
)) {
262 return diffCallSites(CallSite(L
), CallSite(R
), Complain
);
263 } else if (isa
<PHINode
>(L
)) {
266 // This is really weird; type uniquing is broken?
267 if (L
->getType() != R
->getType()) {
268 if (!L
->getType()->isPointerTy() || !R
->getType()->isPointerTy()) {
269 if (Complain
) Engine
.log("different phi types");
276 } else if (isa
<InvokeInst
>(L
)) {
277 InvokeInst
*LI
= cast
<InvokeInst
>(L
);
278 InvokeInst
*RI
= cast
<InvokeInst
>(R
);
279 if (diffCallSites(CallSite(LI
), CallSite(RI
), Complain
))
283 tryUnify(LI
->getNormalDest(), RI
->getNormalDest());
284 tryUnify(LI
->getUnwindDest(), RI
->getUnwindDest());
288 } else if (isa
<BranchInst
>(L
)) {
289 BranchInst
*LI
= cast
<BranchInst
>(L
);
290 BranchInst
*RI
= cast
<BranchInst
>(R
);
291 if (LI
->isConditional() != RI
->isConditional()) {
292 if (Complain
) Engine
.log("branch conditionality differs");
296 if (LI
->isConditional()) {
297 if (!equivalentAsOperands(LI
->getCondition(), RI
->getCondition())) {
298 if (Complain
) Engine
.log("branch conditions differ");
301 if (TryUnify
) tryUnify(LI
->getSuccessor(1), RI
->getSuccessor(1));
303 if (TryUnify
) tryUnify(LI
->getSuccessor(0), RI
->getSuccessor(0));
306 } else if (isa
<IndirectBrInst
>(L
)) {
307 IndirectBrInst
*LI
= cast
<IndirectBrInst
>(L
);
308 IndirectBrInst
*RI
= cast
<IndirectBrInst
>(R
);
309 if (LI
->getNumDestinations() != RI
->getNumDestinations()) {
310 if (Complain
) Engine
.log("indirectbr # of destinations differ");
314 if (!equivalentAsOperands(LI
->getAddress(), RI
->getAddress())) {
315 if (Complain
) Engine
.log("indirectbr addresses differ");
320 for (unsigned i
= 0; i
< LI
->getNumDestinations(); i
++) {
321 tryUnify(LI
->getDestination(i
), RI
->getDestination(i
));
326 } else if (isa
<SwitchInst
>(L
)) {
327 SwitchInst
*LI
= cast
<SwitchInst
>(L
);
328 SwitchInst
*RI
= cast
<SwitchInst
>(R
);
329 if (!equivalentAsOperands(LI
->getCondition(), RI
->getCondition())) {
330 if (Complain
) Engine
.log("switch conditions differ");
333 if (TryUnify
) tryUnify(LI
->getDefaultDest(), RI
->getDefaultDest());
335 bool Difference
= false;
337 DenseMap
<ConstantInt
*,BasicBlock
*> LCases
;
338 for (auto Case
: LI
->cases())
339 LCases
[Case
.getCaseValue()] = Case
.getCaseSuccessor();
341 for (auto Case
: RI
->cases()) {
342 ConstantInt
*CaseValue
= Case
.getCaseValue();
343 BasicBlock
*LCase
= LCases
[CaseValue
];
346 tryUnify(LCase
, Case
.getCaseSuccessor());
347 LCases
.erase(CaseValue
);
348 } else if (Complain
|| !Difference
) {
350 Engine
.logf("right switch has extra case %r") << CaseValue
;
355 for (DenseMap
<ConstantInt
*,BasicBlock
*>::iterator
356 I
= LCases
.begin(), E
= LCases
.end(); I
!= E
; ++I
) {
358 Engine
.logf("left switch has extra case %l") << I
->first
;
362 } else if (isa
<UnreachableInst
>(L
)) {
366 if (L
->getNumOperands() != R
->getNumOperands()) {
367 if (Complain
) Engine
.log("instructions have different operand counts");
371 for (unsigned I
= 0, E
= L
->getNumOperands(); I
!= E
; ++I
) {
372 Value
*LO
= L
->getOperand(I
), *RO
= R
->getOperand(I
);
373 if (!equivalentAsOperands(LO
, RO
)) {
374 if (Complain
) Engine
.logf("operands %l and %r differ") << LO
<< RO
;
382 bool equivalentAsOperands(Constant
*L
, Constant
*R
) {
383 // Use equality as a preliminary filter.
387 if (L
->getValueID() != R
->getValueID())
390 // Ask the engine about global values.
391 if (isa
<GlobalValue
>(L
))
392 return Engine
.equivalentAsOperands(cast
<GlobalValue
>(L
),
393 cast
<GlobalValue
>(R
));
395 // Compare constant expressions structurally.
396 if (isa
<ConstantExpr
>(L
))
397 return equivalentAsOperands(cast
<ConstantExpr
>(L
),
398 cast
<ConstantExpr
>(R
));
400 // Constants of the "same type" don't always actually have the same
401 // type; I don't know why. Just white-list them.
402 if (isa
<ConstantPointerNull
>(L
) || isa
<UndefValue
>(L
) || isa
<ConstantAggregateZero
>(L
))
405 // Block addresses only match if we've already encountered the
406 // block. FIXME: tentative matches?
407 if (isa
<BlockAddress
>(L
))
408 return Blocks
[cast
<BlockAddress
>(L
)->getBasicBlock()]
409 == cast
<BlockAddress
>(R
)->getBasicBlock();
411 // If L and R are ConstantVectors, compare each element
412 if (isa
<ConstantVector
>(L
)) {
413 ConstantVector
*CVL
= cast
<ConstantVector
>(L
);
414 ConstantVector
*CVR
= cast
<ConstantVector
>(R
);
415 if (CVL
->getType()->getNumElements() != CVR
->getType()->getNumElements())
417 for (unsigned i
= 0; i
< CVL
->getType()->getNumElements(); i
++) {
418 if (!equivalentAsOperands(CVL
->getOperand(i
), CVR
->getOperand(i
)))
427 bool equivalentAsOperands(ConstantExpr
*L
, ConstantExpr
*R
) {
430 if (L
->getOpcode() != R
->getOpcode())
433 switch (L
->getOpcode()) {
434 case Instruction::ICmp
:
435 case Instruction::FCmp
:
436 if (L
->getPredicate() != R
->getPredicate())
440 case Instruction::GetElementPtr
:
448 if (L
->getNumOperands() != R
->getNumOperands())
451 for (unsigned I
= 0, E
= L
->getNumOperands(); I
!= E
; ++I
)
452 if (!equivalentAsOperands(L
->getOperand(I
), R
->getOperand(I
)))
458 bool equivalentAsOperands(Value
*L
, Value
*R
) {
459 // Fall out if the values have different kind.
460 // This possibly shouldn't take priority over oracles.
461 if (L
->getValueID() != R
->getValueID())
464 // Value subtypes: Argument, Constant, Instruction, BasicBlock,
465 // InlineAsm, MDNode, MDString, PseudoSourceValue
467 if (isa
<Constant
>(L
))
468 return equivalentAsOperands(cast
<Constant
>(L
), cast
<Constant
>(R
));
470 if (isa
<Instruction
>(L
))
471 return Values
[L
] == R
|| TentativeValues
.count(std::make_pair(L
, R
));
473 if (isa
<Argument
>(L
))
474 return Values
[L
] == R
;
476 if (isa
<BasicBlock
>(L
))
477 return Blocks
[cast
<BasicBlock
>(L
)] != R
;
479 // Pretend everything else is identical.
483 // Avoid a gcc warning about accessing 'this' in an initializer.
484 FunctionDifferenceEngine
*this_() { return this; }
487 FunctionDifferenceEngine(DifferenceEngine
&Engine
) :
488 Engine(Engine
), Queue(QueueSorter(*this_())) {}
490 void diff(Function
*L
, Function
*R
) {
491 if (L
->arg_size() != R
->arg_size())
492 Engine
.log("different argument counts");
494 // Map the arguments.
495 for (Function::arg_iterator
496 LI
= L
->arg_begin(), LE
= L
->arg_end(),
497 RI
= R
->arg_begin(), RE
= R
->arg_end();
498 LI
!= LE
&& RI
!= RE
; ++LI
, ++RI
)
501 tryUnify(&*L
->begin(), &*R
->begin());
507 DiffEntry() : Cost(0) {}
510 llvm::SmallVector
<char, 8> Path
; // actually of DifferenceEngine::DiffChange
513 bool FunctionDifferenceEngine::matchForBlockDiff(Instruction
*L
,
515 return !diff(L
, R
, false, false);
518 void FunctionDifferenceEngine::runBlockDiff(BasicBlock::iterator LStart
,
519 BasicBlock::iterator RStart
) {
520 BasicBlock::iterator LE
= LStart
->getParent()->end();
521 BasicBlock::iterator RE
= RStart
->getParent()->end();
523 unsigned NL
= std::distance(LStart
, LE
);
525 SmallVector
<DiffEntry
, 20> Paths1(NL
+1);
526 SmallVector
<DiffEntry
, 20> Paths2(NL
+1);
528 DiffEntry
*Cur
= Paths1
.data();
529 DiffEntry
*Next
= Paths2
.data();
531 const unsigned LeftCost
= 2;
532 const unsigned RightCost
= 2;
533 const unsigned MatchCost
= 0;
535 assert(TentativeValues
.empty());
537 // Initialize the first column.
538 for (unsigned I
= 0; I
!= NL
+1; ++I
) {
539 Cur
[I
].Cost
= I
* LeftCost
;
540 for (unsigned J
= 0; J
!= I
; ++J
)
541 Cur
[I
].Path
.push_back(DC_left
);
544 for (BasicBlock::iterator RI
= RStart
; RI
!= RE
; ++RI
) {
545 // Initialize the first row.
547 Next
[0].Cost
+= RightCost
;
548 Next
[0].Path
.push_back(DC_right
);
551 for (BasicBlock::iterator LI
= LStart
; LI
!= LE
; ++LI
, ++Index
) {
552 if (matchForBlockDiff(&*LI
, &*RI
)) {
553 Next
[Index
] = Cur
[Index
-1];
554 Next
[Index
].Cost
+= MatchCost
;
555 Next
[Index
].Path
.push_back(DC_match
);
556 TentativeValues
.insert(std::make_pair(&*LI
, &*RI
));
557 } else if (Next
[Index
-1].Cost
<= Cur
[Index
].Cost
) {
558 Next
[Index
] = Next
[Index
-1];
559 Next
[Index
].Cost
+= LeftCost
;
560 Next
[Index
].Path
.push_back(DC_left
);
562 Next
[Index
] = Cur
[Index
];
563 Next
[Index
].Cost
+= RightCost
;
564 Next
[Index
].Path
.push_back(DC_right
);
568 std::swap(Cur
, Next
);
571 // We don't need the tentative values anymore; everything from here
572 // on out should be non-tentative.
573 TentativeValues
.clear();
575 SmallVectorImpl
<char> &Path
= Cur
[NL
].Path
;
576 BasicBlock::iterator LI
= LStart
, RI
= RStart
;
578 DiffLogBuilder
Diff(Engine
.getConsumer());
580 // Drop trailing matches.
581 while (Path
.back() == DC_match
)
584 // Skip leading matches.
585 SmallVectorImpl
<char>::iterator
586 PI
= Path
.begin(), PE
= Path
.end();
587 while (PI
!= PE
&& *PI
== DC_match
) {
594 for (; PI
!= PE
; ++PI
) {
595 switch (static_cast<DiffChange
>(*PI
)) {
597 assert(LI
!= LE
&& RI
!= RE
);
599 Instruction
*L
= &*LI
, *R
= &*RI
;
620 // Finishing unifying and complaining about the tails of the block,
621 // which should be matches all the way through.
629 // If the terminators have different kinds, but one is an invoke and the
630 // other is an unconditional branch immediately following a call, unify
631 // the results and the destinations.
632 TerminatorInst
*LTerm
= LStart
->getParent()->getTerminator();
633 TerminatorInst
*RTerm
= RStart
->getParent()->getTerminator();
634 if (isa
<BranchInst
>(LTerm
) && isa
<InvokeInst
>(RTerm
)) {
635 if (cast
<BranchInst
>(LTerm
)->isConditional()) return;
636 BasicBlock::iterator I
= LTerm
->getIterator();
637 if (I
== LStart
->getParent()->begin()) return;
639 if (!isa
<CallInst
>(*I
)) return;
640 CallInst
*LCall
= cast
<CallInst
>(&*I
);
641 InvokeInst
*RInvoke
= cast
<InvokeInst
>(RTerm
);
642 if (!equivalentAsOperands(LCall
->getCalledValue(), RInvoke
->getCalledValue()))
644 if (!LCall
->use_empty())
645 Values
[LCall
] = RInvoke
;
646 tryUnify(LTerm
->getSuccessor(0), RInvoke
->getNormalDest());
647 } else if (isa
<InvokeInst
>(LTerm
) && isa
<BranchInst
>(RTerm
)) {
648 if (cast
<BranchInst
>(RTerm
)->isConditional()) return;
649 BasicBlock::iterator I
= RTerm
->getIterator();
650 if (I
== RStart
->getParent()->begin()) return;
652 if (!isa
<CallInst
>(*I
)) return;
653 CallInst
*RCall
= cast
<CallInst
>(I
);
654 InvokeInst
*LInvoke
= cast
<InvokeInst
>(LTerm
);
655 if (!equivalentAsOperands(LInvoke
->getCalledValue(), RCall
->getCalledValue()))
657 if (!LInvoke
->use_empty())
658 Values
[LInvoke
] = RCall
;
659 tryUnify(LInvoke
->getNormalDest(), RTerm
->getSuccessor(0));
665 void DifferenceEngine::Oracle::anchor() { }
667 void DifferenceEngine::diff(Function
*L
, Function
*R
) {
668 Context
C(*this, L
, R
);
671 // FIXME: attributes and CC
672 // FIXME: parameter attributes
674 // If both are declarations, we're done.
675 if (L
->empty() && R
->empty())
678 log("left function is declaration, right function is definition");
680 log("right function is declaration, left function is definition");
682 FunctionDifferenceEngine(*this).diff(L
, R
);
685 void DifferenceEngine::diff(Module
*L
, Module
*R
) {
687 SmallVector
<std::pair
<Function
*,Function
*>, 20> Queue
;
689 for (Module::iterator I
= L
->begin(), E
= L
->end(); I
!= E
; ++I
) {
691 LNames
.insert(LFn
->getName());
693 if (Function
*RFn
= R
->getFunction(LFn
->getName()))
694 Queue
.push_back(std::make_pair(LFn
, RFn
));
696 logf("function %l exists only in left module") << LFn
;
699 for (Module::iterator I
= R
->begin(), E
= R
->end(); I
!= E
; ++I
) {
701 if (!LNames
.count(RFn
->getName()))
702 logf("function %r exists only in right module") << RFn
;
705 for (SmallVectorImpl
<std::pair
<Function
*,Function
*> >::iterator
706 I
= Queue
.begin(), E
= Queue
.end(); I
!= E
; ++I
)
707 diff(I
->first
, I
->second
);
710 bool DifferenceEngine::equivalentAsOperands(GlobalValue
*L
, GlobalValue
*R
) {
711 if (globalValueOracle
) return (*globalValueOracle
)(L
, R
);
712 return L
->getName() == R
->getName();