1 //===-- DifferenceEngine.cpp - Structural function/module comparison ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This header defines the implementation of the LLVM difference
11 // engine, which structurally compares global values within a module.
13 //===----------------------------------------------------------------------===//
15 #include "DifferenceEngine.h"
17 #include "llvm/Constants.h"
18 #include "llvm/Function.h"
19 #include "llvm/Instructions.h"
20 #include "llvm/Module.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/DenseSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/ADT/StringSet.h"
26 #include "llvm/Support/CallSite.h"
27 #include "llvm/Support/CFG.h"
28 #include "llvm/Support/ErrorHandling.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include "llvm/Support/type_traits.h"
38 /// A priority queue, implemented as a heap.
39 template <class T
, class Sorter
, unsigned InlineCapacity
>
42 llvm::SmallVector
<T
, InlineCapacity
> Storage
;
45 PriorityQueue(const Sorter
&Precedes
) : Precedes(Precedes
) {}
47 /// Checks whether the heap is empty.
48 bool empty() const { return Storage
.empty(); }
50 /// Insert a new value on the heap.
51 void insert(const T
&V
) {
52 unsigned Index
= Storage
.size();
54 if (Index
== 0) return;
56 T
*data
= Storage
.data();
58 unsigned Target
= (Index
+ 1) / 2 - 1;
59 if (!Precedes(data
[Index
], data
[Target
])) return;
60 std::swap(data
[Index
], data
[Target
]);
61 if (Target
== 0) return;
66 /// Remove the minimum value in the heap. Only valid on a non-empty heap.
71 unsigned NewSize
= Storage
.size() - 1;
73 // Move the slot at the end to the beginning.
74 if (isPodLike
<T
>::value
)
75 Storage
[0] = Storage
[NewSize
];
77 std::swap(Storage
[0], Storage
[NewSize
]);
79 // Bubble the root up as necessary.
82 // With a 1-based index, the children would be Index*2 and Index*2+1.
83 unsigned R
= (Index
+ 1) * 2;
86 // If R is out of bounds, we're done after this in any case.
88 // If L is also out of bounds, we're done immediately.
89 if (L
>= NewSize
) break;
91 // Otherwise, test whether we should swap L and Index.
92 if (Precedes(Storage
[L
], Storage
[Index
]))
93 std::swap(Storage
[L
], Storage
[Index
]);
97 // Otherwise, we need to compare with the smaller of L and R.
98 // Prefer R because it's closer to the end of the array.
99 unsigned IndexToTest
= (Precedes(Storage
[L
], Storage
[R
]) ? L
: R
);
101 // If Index is >= the min of L and R, then heap ordering is restored.
102 if (!Precedes(Storage
[IndexToTest
], Storage
[Index
]))
105 // Otherwise, keep bubbling up.
106 std::swap(Storage
[IndexToTest
], Storage
[Index
]);
116 /// A function-scope difference engine.
117 class FunctionDifferenceEngine
{
118 DifferenceEngine
&Engine
;
120 /// The current mapping from old local values to new local values.
121 DenseMap
<Value
*, Value
*> Values
;
123 /// The current mapping from old blocks to new blocks.
124 DenseMap
<BasicBlock
*, BasicBlock
*> Blocks
;
126 DenseSet
<std::pair
<Value
*, Value
*> > TentativeValues
;
128 unsigned getUnprocPredCount(BasicBlock
*Block
) const {
130 for (pred_iterator I
= pred_begin(Block
), E
= pred_end(Block
); I
!= E
; ++I
)
131 if (!Blocks
.count(*I
)) Count
++;
135 typedef std::pair
<BasicBlock
*, BasicBlock
*> BlockPair
;
137 /// A type which sorts a priority queue by the number of unprocessed
138 /// predecessor blocks it has remaining.
140 /// This is actually really expensive to calculate.
142 const FunctionDifferenceEngine
&fde
;
143 explicit QueueSorter(const FunctionDifferenceEngine
&fde
) : fde(fde
) {}
145 bool operator()(const BlockPair
&Old
, const BlockPair
&New
) {
146 return fde
.getUnprocPredCount(Old
.first
)
147 < fde
.getUnprocPredCount(New
.first
);
151 /// A queue of unified blocks to process.
152 PriorityQueue
<BlockPair
, QueueSorter
, 20> Queue
;
154 /// Try to unify the given two blocks. Enqueues them for processing
155 /// if they haven't already been processed.
157 /// Returns true if there was a problem unifying them.
158 bool tryUnify(BasicBlock
*L
, BasicBlock
*R
) {
159 BasicBlock
*&Ref
= Blocks
[L
];
162 if (Ref
== R
) return false;
164 Engine
.logf("successor %l cannot be equivalent to %r; "
165 "it's already equivalent to %r")
171 Queue
.insert(BlockPair(L
, R
));
175 /// Unifies two instructions, given that they're known not to have
176 /// structural differences.
177 void unify(Instruction
*L
, Instruction
*R
) {
178 DifferenceEngine::Context
C(Engine
, L
, R
);
180 bool Result
= diff(L
, R
, true, true);
181 assert(!Result
&& "structural differences second time around?");
187 void processQueue() {
188 while (!Queue
.empty()) {
189 BlockPair Pair
= Queue
.remove_min();
190 diff(Pair
.first
, Pair
.second
);
194 void diff(BasicBlock
*L
, BasicBlock
*R
) {
195 DifferenceEngine::Context
C(Engine
, L
, R
);
197 BasicBlock::iterator LI
= L
->begin(), LE
= L
->end();
198 BasicBlock::iterator RI
= R
->begin(), RE
= R
->end();
200 llvm::SmallVector
<std::pair
<Instruction
*,Instruction
*>, 20> TentativePairs
;
203 assert(LI
!= LE
&& RI
!= RE
);
204 Instruction
*LeftI
= &*LI
, *RightI
= &*RI
;
206 // If the instructions differ, start the more sophisticated diff
207 // algorithm at the start of the block.
208 if (diff(LeftI
, RightI
, false, false)) {
209 TentativeValues
.clear();
210 return runBlockDiff(L
->begin(), R
->begin());
213 // Otherwise, tentatively unify them.
214 if (!LeftI
->use_empty())
215 TentativeValues
.insert(std::make_pair(LeftI
, RightI
));
218 } while (LI
!= LE
); // This is sufficient: we can't get equality of
219 // terminators if there are residual instructions.
221 // Unify everything in the block, non-tentatively this time.
222 TentativeValues
.clear();
223 for (LI
= L
->begin(), RI
= R
->begin(); LI
!= LE
; ++LI
, ++RI
)
227 bool matchForBlockDiff(Instruction
*L
, Instruction
*R
);
228 void runBlockDiff(BasicBlock::iterator LI
, BasicBlock::iterator RI
);
230 bool diffCallSites(CallSite L
, CallSite R
, bool Complain
) {
231 // FIXME: call attributes
232 if (!equivalentAsOperands(L
.getCalledValue(), R
.getCalledValue())) {
233 if (Complain
) Engine
.log("called functions differ");
236 if (L
.arg_size() != R
.arg_size()) {
237 if (Complain
) Engine
.log("argument counts differ");
240 for (unsigned I
= 0, E
= L
.arg_size(); I
!= E
; ++I
)
241 if (!equivalentAsOperands(L
.getArgument(I
), R
.getArgument(I
))) {
243 Engine
.logf("arguments %l and %r differ")
244 << L
.getArgument(I
) << R
.getArgument(I
);
250 bool diff(Instruction
*L
, Instruction
*R
, bool Complain
, bool TryUnify
) {
251 // FIXME: metadata (if Complain is set)
253 // Different opcodes always imply different operations.
254 if (L
->getOpcode() != R
->getOpcode()) {
255 if (Complain
) Engine
.log("different instruction types");
259 if (isa
<CmpInst
>(L
)) {
260 if (cast
<CmpInst
>(L
)->getPredicate()
261 != cast
<CmpInst
>(R
)->getPredicate()) {
262 if (Complain
) Engine
.log("different predicates");
265 } else if (isa
<CallInst
>(L
)) {
266 return diffCallSites(CallSite(L
), CallSite(R
), Complain
);
267 } else if (isa
<PHINode
>(L
)) {
270 // This is really weird; type uniquing is broken?
271 if (L
->getType() != R
->getType()) {
272 if (!L
->getType()->isPointerTy() || !R
->getType()->isPointerTy()) {
273 if (Complain
) Engine
.log("different phi types");
280 } else if (isa
<InvokeInst
>(L
)) {
281 InvokeInst
*LI
= cast
<InvokeInst
>(L
);
282 InvokeInst
*RI
= cast
<InvokeInst
>(R
);
283 if (diffCallSites(CallSite(LI
), CallSite(RI
), Complain
))
287 tryUnify(LI
->getNormalDest(), RI
->getNormalDest());
288 tryUnify(LI
->getUnwindDest(), RI
->getUnwindDest());
292 } else if (isa
<BranchInst
>(L
)) {
293 BranchInst
*LI
= cast
<BranchInst
>(L
);
294 BranchInst
*RI
= cast
<BranchInst
>(R
);
295 if (LI
->isConditional() != RI
->isConditional()) {
296 if (Complain
) Engine
.log("branch conditionality differs");
300 if (LI
->isConditional()) {
301 if (!equivalentAsOperands(LI
->getCondition(), RI
->getCondition())) {
302 if (Complain
) Engine
.log("branch conditions differ");
305 if (TryUnify
) tryUnify(LI
->getSuccessor(1), RI
->getSuccessor(1));
307 if (TryUnify
) tryUnify(LI
->getSuccessor(0), RI
->getSuccessor(0));
310 } else if (isa
<SwitchInst
>(L
)) {
311 SwitchInst
*LI
= cast
<SwitchInst
>(L
);
312 SwitchInst
*RI
= cast
<SwitchInst
>(R
);
313 if (!equivalentAsOperands(LI
->getCondition(), RI
->getCondition())) {
314 if (Complain
) Engine
.log("switch conditions differ");
317 if (TryUnify
) tryUnify(LI
->getDefaultDest(), RI
->getDefaultDest());
319 bool Difference
= false;
321 DenseMap
<ConstantInt
*,BasicBlock
*> LCases
;
322 for (unsigned I
= 1, E
= LI
->getNumCases(); I
!= E
; ++I
)
323 LCases
[LI
->getCaseValue(I
)] = LI
->getSuccessor(I
);
324 for (unsigned I
= 1, E
= RI
->getNumCases(); I
!= E
; ++I
) {
325 ConstantInt
*CaseValue
= RI
->getCaseValue(I
);
326 BasicBlock
*LCase
= LCases
[CaseValue
];
328 if (TryUnify
) tryUnify(LCase
, RI
->getSuccessor(I
));
329 LCases
.erase(CaseValue
);
330 } else if (!Difference
) {
332 Engine
.logf("right switch has extra case %r") << CaseValue
;
337 for (DenseMap
<ConstantInt
*,BasicBlock
*>::iterator
338 I
= LCases
.begin(), E
= LCases
.end(); I
!= E
; ++I
) {
340 Engine
.logf("left switch has extra case %l") << I
->first
;
344 } else if (isa
<UnreachableInst
>(L
)) {
348 if (L
->getNumOperands() != R
->getNumOperands()) {
349 if (Complain
) Engine
.log("instructions have different operand counts");
353 for (unsigned I
= 0, E
= L
->getNumOperands(); I
!= E
; ++I
) {
354 Value
*LO
= L
->getOperand(I
), *RO
= R
->getOperand(I
);
355 if (!equivalentAsOperands(LO
, RO
)) {
356 if (Complain
) Engine
.logf("operands %l and %r differ") << LO
<< RO
;
364 bool equivalentAsOperands(Constant
*L
, Constant
*R
) {
365 // Use equality as a preliminary filter.
369 if (L
->getValueID() != R
->getValueID())
372 // Ask the engine about global values.
373 if (isa
<GlobalValue
>(L
))
374 return Engine
.equivalentAsOperands(cast
<GlobalValue
>(L
),
375 cast
<GlobalValue
>(R
));
377 // Compare constant expressions structurally.
378 if (isa
<ConstantExpr
>(L
))
379 return equivalentAsOperands(cast
<ConstantExpr
>(L
),
380 cast
<ConstantExpr
>(R
));
382 // Nulls of the "same type" don't always actually have the same
383 // type; I don't know why. Just white-list them.
384 if (isa
<ConstantPointerNull
>(L
))
387 // Block addresses only match if we've already encountered the
388 // block. FIXME: tentative matches?
389 if (isa
<BlockAddress
>(L
))
390 return Blocks
[cast
<BlockAddress
>(L
)->getBasicBlock()]
391 == cast
<BlockAddress
>(R
)->getBasicBlock();
396 bool equivalentAsOperands(ConstantExpr
*L
, ConstantExpr
*R
) {
399 if (L
->getOpcode() != R
->getOpcode())
402 switch (L
->getOpcode()) {
403 case Instruction::ICmp
:
404 case Instruction::FCmp
:
405 if (L
->getPredicate() != R
->getPredicate())
409 case Instruction::GetElementPtr
:
417 if (L
->getNumOperands() != R
->getNumOperands())
420 for (unsigned I
= 0, E
= L
->getNumOperands(); I
!= E
; ++I
)
421 if (!equivalentAsOperands(L
->getOperand(I
), R
->getOperand(I
)))
427 bool equivalentAsOperands(Value
*L
, Value
*R
) {
428 // Fall out if the values have different kind.
429 // This possibly shouldn't take priority over oracles.
430 if (L
->getValueID() != R
->getValueID())
433 // Value subtypes: Argument, Constant, Instruction, BasicBlock,
434 // InlineAsm, MDNode, MDString, PseudoSourceValue
436 if (isa
<Constant
>(L
))
437 return equivalentAsOperands(cast
<Constant
>(L
), cast
<Constant
>(R
));
439 if (isa
<Instruction
>(L
))
440 return Values
[L
] == R
|| TentativeValues
.count(std::make_pair(L
, R
));
442 if (isa
<Argument
>(L
))
443 return Values
[L
] == R
;
445 if (isa
<BasicBlock
>(L
))
446 return Blocks
[cast
<BasicBlock
>(L
)] != R
;
448 // Pretend everything else is identical.
452 // Avoid a gcc warning about accessing 'this' in an initializer.
453 FunctionDifferenceEngine
*this_() { return this; }
456 FunctionDifferenceEngine(DifferenceEngine
&Engine
) :
457 Engine(Engine
), Queue(QueueSorter(*this_())) {}
459 void diff(Function
*L
, Function
*R
) {
460 if (L
->arg_size() != R
->arg_size())
461 Engine
.log("different argument counts");
463 // Map the arguments.
464 for (Function::arg_iterator
465 LI
= L
->arg_begin(), LE
= L
->arg_end(),
466 RI
= R
->arg_begin(), RE
= R
->arg_end();
467 LI
!= LE
&& RI
!= RE
; ++LI
, ++RI
)
470 tryUnify(&*L
->begin(), &*R
->begin());
476 DiffEntry() : Cost(0) {}
479 llvm::SmallVector
<char, 8> Path
; // actually of DifferenceEngine::DiffChange
482 bool FunctionDifferenceEngine::matchForBlockDiff(Instruction
*L
,
484 return !diff(L
, R
, false, false);
487 void FunctionDifferenceEngine::runBlockDiff(BasicBlock::iterator LStart
,
488 BasicBlock::iterator RStart
) {
489 BasicBlock::iterator LE
= LStart
->getParent()->end();
490 BasicBlock::iterator RE
= RStart
->getParent()->end();
492 unsigned NL
= std::distance(LStart
, LE
);
494 SmallVector
<DiffEntry
, 20> Paths1(NL
+1);
495 SmallVector
<DiffEntry
, 20> Paths2(NL
+1);
497 DiffEntry
*Cur
= Paths1
.data();
498 DiffEntry
*Next
= Paths2
.data();
500 const unsigned LeftCost
= 2;
501 const unsigned RightCost
= 2;
502 const unsigned MatchCost
= 0;
504 assert(TentativeValues
.empty());
506 // Initialize the first column.
507 for (unsigned I
= 0; I
!= NL
+1; ++I
) {
508 Cur
[I
].Cost
= I
* LeftCost
;
509 for (unsigned J
= 0; J
!= I
; ++J
)
510 Cur
[I
].Path
.push_back(DC_left
);
513 for (BasicBlock::iterator RI
= RStart
; RI
!= RE
; ++RI
) {
514 // Initialize the first row.
516 Next
[0].Cost
+= RightCost
;
517 Next
[0].Path
.push_back(DC_right
);
520 for (BasicBlock::iterator LI
= LStart
; LI
!= LE
; ++LI
, ++Index
) {
521 if (matchForBlockDiff(&*LI
, &*RI
)) {
522 Next
[Index
] = Cur
[Index
-1];
523 Next
[Index
].Cost
+= MatchCost
;
524 Next
[Index
].Path
.push_back(DC_match
);
525 TentativeValues
.insert(std::make_pair(&*LI
, &*RI
));
526 } else if (Next
[Index
-1].Cost
<= Cur
[Index
].Cost
) {
527 Next
[Index
] = Next
[Index
-1];
528 Next
[Index
].Cost
+= LeftCost
;
529 Next
[Index
].Path
.push_back(DC_left
);
531 Next
[Index
] = Cur
[Index
];
532 Next
[Index
].Cost
+= RightCost
;
533 Next
[Index
].Path
.push_back(DC_right
);
537 std::swap(Cur
, Next
);
540 // We don't need the tentative values anymore; everything from here
541 // on out should be non-tentative.
542 TentativeValues
.clear();
544 SmallVectorImpl
<char> &Path
= Cur
[NL
].Path
;
545 BasicBlock::iterator LI
= LStart
, RI
= RStart
;
547 DiffLogBuilder
Diff(Engine
.getConsumer());
549 // Drop trailing matches.
550 while (Path
.back() == DC_match
)
553 // Skip leading matches.
554 SmallVectorImpl
<char>::iterator
555 PI
= Path
.begin(), PE
= Path
.end();
556 while (PI
!= PE
&& *PI
== DC_match
) {
561 for (; PI
!= PE
; ++PI
) {
562 switch (static_cast<DiffChange
>(*PI
)) {
564 assert(LI
!= LE
&& RI
!= RE
);
566 Instruction
*L
= &*LI
, *R
= &*RI
;
587 // Finishing unifying and complaining about the tails of the block,
588 // which should be matches all the way through.
595 // If the terminators have different kinds, but one is an invoke and the
596 // other is an unconditional branch immediately following a call, unify
597 // the results and the destinations.
598 TerminatorInst
*LTerm
= LStart
->getParent()->getTerminator();
599 TerminatorInst
*RTerm
= RStart
->getParent()->getTerminator();
600 if (isa
<BranchInst
>(LTerm
) && isa
<InvokeInst
>(RTerm
)) {
601 if (cast
<BranchInst
>(LTerm
)->isConditional()) return;
602 BasicBlock::iterator I
= LTerm
;
603 if (I
== LStart
->getParent()->begin()) return;
605 if (!isa
<CallInst
>(*I
)) return;
606 CallInst
*LCall
= cast
<CallInst
>(&*I
);
607 InvokeInst
*RInvoke
= cast
<InvokeInst
>(RTerm
);
608 if (!equivalentAsOperands(LCall
->getCalledValue(), RInvoke
->getCalledValue()))
610 if (!LCall
->use_empty())
611 Values
[LCall
] = RInvoke
;
612 tryUnify(LTerm
->getSuccessor(0), RInvoke
->getNormalDest());
613 } else if (isa
<InvokeInst
>(LTerm
) && isa
<BranchInst
>(RTerm
)) {
614 if (cast
<BranchInst
>(RTerm
)->isConditional()) return;
615 BasicBlock::iterator I
= RTerm
;
616 if (I
== RStart
->getParent()->begin()) return;
618 if (!isa
<CallInst
>(*I
)) return;
619 CallInst
*RCall
= cast
<CallInst
>(I
);
620 InvokeInst
*LInvoke
= cast
<InvokeInst
>(LTerm
);
621 if (!equivalentAsOperands(LInvoke
->getCalledValue(), RCall
->getCalledValue()))
623 if (!LInvoke
->use_empty())
624 Values
[LInvoke
] = RCall
;
625 tryUnify(LInvoke
->getNormalDest(), RTerm
->getSuccessor(0));
631 void DifferenceEngine::diff(Function
*L
, Function
*R
) {
632 Context
C(*this, L
, R
);
635 // FIXME: attributes and CC
636 // FIXME: parameter attributes
638 // If both are declarations, we're done.
639 if (L
->empty() && R
->empty())
642 log("left function is declaration, right function is definition");
644 log("right function is declaration, left function is definition");
646 FunctionDifferenceEngine(*this).diff(L
, R
);
649 void DifferenceEngine::diff(Module
*L
, Module
*R
) {
651 SmallVector
<std::pair
<Function
*,Function
*>, 20> Queue
;
653 for (Module::iterator I
= L
->begin(), E
= L
->end(); I
!= E
; ++I
) {
655 LNames
.insert(LFn
->getName());
657 if (Function
*RFn
= R
->getFunction(LFn
->getName()))
658 Queue
.push_back(std::make_pair(LFn
, RFn
));
660 logf("function %l exists only in left module") << LFn
;
663 for (Module::iterator I
= R
->begin(), E
= R
->end(); I
!= E
; ++I
) {
665 if (!LNames
.count(RFn
->getName()))
666 logf("function %r exists only in right module") << RFn
;
669 for (SmallVectorImpl
<std::pair
<Function
*,Function
*> >::iterator
670 I
= Queue
.begin(), E
= Queue
.end(); I
!= E
; ++I
)
671 diff(I
->first
, I
->second
);
674 bool DifferenceEngine::equivalentAsOperands(GlobalValue
*L
, GlobalValue
*R
) {
675 if (globalValueOracle
) return (*globalValueOracle
)(L
, R
);
676 return L
->getName() == R
->getName();