1 //===-- DifferenceEngine.cpp - Structural function/module comparison ------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This header defines the implementation of the LLVM difference
10 // engine, which structurally compares global values within a module.
12 //===----------------------------------------------------------------------===//
14 #include "DifferenceEngine.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringSet.h"
19 #include "llvm/IR/CFG.h"
20 #include "llvm/IR/CallSite.h"
21 #include "llvm/IR/Constants.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/Instructions.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/Support/ErrorHandling.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include "llvm/Support/type_traits.h"
34 /// A priority queue, implemented as a heap.
35 template <class T
, class Sorter
, unsigned InlineCapacity
>
38 llvm::SmallVector
<T
, InlineCapacity
> Storage
;
41 PriorityQueue(const Sorter
&Precedes
) : Precedes(Precedes
) {}
43 /// Checks whether the heap is empty.
44 bool empty() const { return Storage
.empty(); }
46 /// Insert a new value on the heap.
47 void insert(const T
&V
) {
48 unsigned Index
= Storage
.size();
50 if (Index
== 0) return;
52 T
*data
= Storage
.data();
54 unsigned Target
= (Index
+ 1) / 2 - 1;
55 if (!Precedes(data
[Index
], data
[Target
])) return;
56 std::swap(data
[Index
], data
[Target
]);
57 if (Target
== 0) return;
62 /// Remove the minimum value in the heap. Only valid on a non-empty heap.
67 unsigned NewSize
= Storage
.size() - 1;
69 // Move the slot at the end to the beginning.
70 if (is_trivially_copyable
<T
>::value
)
71 Storage
[0] = Storage
[NewSize
];
73 std::swap(Storage
[0], Storage
[NewSize
]);
75 // Bubble the root up as necessary.
78 // With a 1-based index, the children would be Index*2 and Index*2+1.
79 unsigned R
= (Index
+ 1) * 2;
82 // If R is out of bounds, we're done after this in any case.
84 // If L is also out of bounds, we're done immediately.
85 if (L
>= NewSize
) break;
87 // Otherwise, test whether we should swap L and Index.
88 if (Precedes(Storage
[L
], Storage
[Index
]))
89 std::swap(Storage
[L
], Storage
[Index
]);
93 // Otherwise, we need to compare with the smaller of L and R.
94 // Prefer R because it's closer to the end of the array.
95 unsigned IndexToTest
= (Precedes(Storage
[L
], Storage
[R
]) ? L
: R
);
97 // If Index is >= the min of L and R, then heap ordering is restored.
98 if (!Precedes(Storage
[IndexToTest
], Storage
[Index
]))
101 // Otherwise, keep bubbling up.
102 std::swap(Storage
[IndexToTest
], Storage
[Index
]);
112 /// A function-scope difference engine.
113 class FunctionDifferenceEngine
{
114 DifferenceEngine
&Engine
;
116 /// The current mapping from old local values to new local values.
117 DenseMap
<Value
*, Value
*> Values
;
119 /// The current mapping from old blocks to new blocks.
120 DenseMap
<BasicBlock
*, BasicBlock
*> Blocks
;
122 DenseSet
<std::pair
<Value
*, Value
*> > TentativeValues
;
124 unsigned getUnprocPredCount(BasicBlock
*Block
) const {
126 for (pred_iterator I
= pred_begin(Block
), E
= pred_end(Block
); I
!= E
; ++I
)
127 if (!Blocks
.count(*I
)) Count
++;
131 typedef std::pair
<BasicBlock
*, BasicBlock
*> BlockPair
;
133 /// A type which sorts a priority queue by the number of unprocessed
134 /// predecessor blocks it has remaining.
136 /// This is actually really expensive to calculate.
138 const FunctionDifferenceEngine
&fde
;
139 explicit QueueSorter(const FunctionDifferenceEngine
&fde
) : fde(fde
) {}
141 bool operator()(const BlockPair
&Old
, const BlockPair
&New
) {
142 return fde
.getUnprocPredCount(Old
.first
)
143 < fde
.getUnprocPredCount(New
.first
);
147 /// A queue of unified blocks to process.
148 PriorityQueue
<BlockPair
, QueueSorter
, 20> Queue
;
150 /// Try to unify the given two blocks. Enqueues them for processing
151 /// if they haven't already been processed.
153 /// Returns true if there was a problem unifying them.
154 bool tryUnify(BasicBlock
*L
, BasicBlock
*R
) {
155 BasicBlock
*&Ref
= Blocks
[L
];
158 if (Ref
== R
) return false;
160 Engine
.logf("successor %l cannot be equivalent to %r; "
161 "it's already equivalent to %r")
167 Queue
.insert(BlockPair(L
, R
));
171 /// Unifies two instructions, given that they're known not to have
172 /// structural differences.
173 void unify(Instruction
*L
, Instruction
*R
) {
174 DifferenceEngine::Context
C(Engine
, L
, R
);
176 bool Result
= diff(L
, R
, true, true);
177 assert(!Result
&& "structural differences second time around?");
183 void processQueue() {
184 while (!Queue
.empty()) {
185 BlockPair Pair
= Queue
.remove_min();
186 diff(Pair
.first
, Pair
.second
);
190 void diff(BasicBlock
*L
, BasicBlock
*R
) {
191 DifferenceEngine::Context
C(Engine
, L
, R
);
193 BasicBlock::iterator LI
= L
->begin(), LE
= L
->end();
194 BasicBlock::iterator RI
= R
->begin();
197 assert(LI
!= LE
&& RI
!= R
->end());
198 Instruction
*LeftI
= &*LI
, *RightI
= &*RI
;
200 // If the instructions differ, start the more sophisticated diff
201 // algorithm at the start of the block.
202 if (diff(LeftI
, RightI
, false, false)) {
203 TentativeValues
.clear();
204 return runBlockDiff(L
->begin(), R
->begin());
207 // Otherwise, tentatively unify them.
208 if (!LeftI
->use_empty())
209 TentativeValues
.insert(std::make_pair(LeftI
, RightI
));
213 } while (LI
!= LE
); // This is sufficient: we can't get equality of
214 // terminators if there are residual instructions.
216 // Unify everything in the block, non-tentatively this time.
217 TentativeValues
.clear();
218 for (LI
= L
->begin(), RI
= R
->begin(); LI
!= LE
; ++LI
, ++RI
)
222 bool matchForBlockDiff(Instruction
*L
, Instruction
*R
);
223 void runBlockDiff(BasicBlock::iterator LI
, BasicBlock::iterator RI
);
225 bool diffCallSites(CallSite L
, CallSite R
, bool Complain
) {
226 // FIXME: call attributes
227 if (!equivalentAsOperands(L
.getCalledValue(), R
.getCalledValue())) {
228 if (Complain
) Engine
.log("called functions differ");
231 if (L
.arg_size() != R
.arg_size()) {
232 if (Complain
) Engine
.log("argument counts differ");
235 for (unsigned I
= 0, E
= L
.arg_size(); I
!= E
; ++I
)
236 if (!equivalentAsOperands(L
.getArgument(I
), R
.getArgument(I
))) {
238 Engine
.logf("arguments %l and %r differ")
239 << L
.getArgument(I
) << R
.getArgument(I
);
245 bool diff(Instruction
*L
, Instruction
*R
, bool Complain
, bool TryUnify
) {
246 // FIXME: metadata (if Complain is set)
248 // Different opcodes always imply different operations.
249 if (L
->getOpcode() != R
->getOpcode()) {
250 if (Complain
) Engine
.log("different instruction types");
254 if (isa
<CmpInst
>(L
)) {
255 if (cast
<CmpInst
>(L
)->getPredicate()
256 != cast
<CmpInst
>(R
)->getPredicate()) {
257 if (Complain
) Engine
.log("different predicates");
260 } else if (isa
<CallInst
>(L
)) {
261 return diffCallSites(CallSite(L
), CallSite(R
), Complain
);
262 } else if (isa
<PHINode
>(L
)) {
265 // This is really weird; type uniquing is broken?
266 if (L
->getType() != R
->getType()) {
267 if (!L
->getType()->isPointerTy() || !R
->getType()->isPointerTy()) {
268 if (Complain
) Engine
.log("different phi types");
275 } else if (isa
<InvokeInst
>(L
)) {
276 InvokeInst
*LI
= cast
<InvokeInst
>(L
);
277 InvokeInst
*RI
= cast
<InvokeInst
>(R
);
278 if (diffCallSites(CallSite(LI
), CallSite(RI
), Complain
))
282 tryUnify(LI
->getNormalDest(), RI
->getNormalDest());
283 tryUnify(LI
->getUnwindDest(), RI
->getUnwindDest());
287 } else if (isa
<BranchInst
>(L
)) {
288 BranchInst
*LI
= cast
<BranchInst
>(L
);
289 BranchInst
*RI
= cast
<BranchInst
>(R
);
290 if (LI
->isConditional() != RI
->isConditional()) {
291 if (Complain
) Engine
.log("branch conditionality differs");
295 if (LI
->isConditional()) {
296 if (!equivalentAsOperands(LI
->getCondition(), RI
->getCondition())) {
297 if (Complain
) Engine
.log("branch conditions differ");
300 if (TryUnify
) tryUnify(LI
->getSuccessor(1), RI
->getSuccessor(1));
302 if (TryUnify
) tryUnify(LI
->getSuccessor(0), RI
->getSuccessor(0));
305 } else if (isa
<IndirectBrInst
>(L
)) {
306 IndirectBrInst
*LI
= cast
<IndirectBrInst
>(L
);
307 IndirectBrInst
*RI
= cast
<IndirectBrInst
>(R
);
308 if (LI
->getNumDestinations() != RI
->getNumDestinations()) {
309 if (Complain
) Engine
.log("indirectbr # of destinations differ");
313 if (!equivalentAsOperands(LI
->getAddress(), RI
->getAddress())) {
314 if (Complain
) Engine
.log("indirectbr addresses differ");
319 for (unsigned i
= 0; i
< LI
->getNumDestinations(); i
++) {
320 tryUnify(LI
->getDestination(i
), RI
->getDestination(i
));
325 } else if (isa
<SwitchInst
>(L
)) {
326 SwitchInst
*LI
= cast
<SwitchInst
>(L
);
327 SwitchInst
*RI
= cast
<SwitchInst
>(R
);
328 if (!equivalentAsOperands(LI
->getCondition(), RI
->getCondition())) {
329 if (Complain
) Engine
.log("switch conditions differ");
332 if (TryUnify
) tryUnify(LI
->getDefaultDest(), RI
->getDefaultDest());
334 bool Difference
= false;
336 DenseMap
<ConstantInt
*,BasicBlock
*> LCases
;
337 for (auto Case
: LI
->cases())
338 LCases
[Case
.getCaseValue()] = Case
.getCaseSuccessor();
340 for (auto Case
: RI
->cases()) {
341 ConstantInt
*CaseValue
= Case
.getCaseValue();
342 BasicBlock
*LCase
= LCases
[CaseValue
];
345 tryUnify(LCase
, Case
.getCaseSuccessor());
346 LCases
.erase(CaseValue
);
347 } else if (Complain
|| !Difference
) {
349 Engine
.logf("right switch has extra case %r") << CaseValue
;
354 for (DenseMap
<ConstantInt
*,BasicBlock
*>::iterator
355 I
= LCases
.begin(), E
= LCases
.end(); I
!= E
; ++I
) {
357 Engine
.logf("left switch has extra case %l") << I
->first
;
361 } else if (isa
<UnreachableInst
>(L
)) {
365 if (L
->getNumOperands() != R
->getNumOperands()) {
366 if (Complain
) Engine
.log("instructions have different operand counts");
370 for (unsigned I
= 0, E
= L
->getNumOperands(); I
!= E
; ++I
) {
371 Value
*LO
= L
->getOperand(I
), *RO
= R
->getOperand(I
);
372 if (!equivalentAsOperands(LO
, RO
)) {
373 if (Complain
) Engine
.logf("operands %l and %r differ") << LO
<< RO
;
381 bool equivalentAsOperands(Constant
*L
, Constant
*R
) {
382 // Use equality as a preliminary filter.
386 if (L
->getValueID() != R
->getValueID())
389 // Ask the engine about global values.
390 if (isa
<GlobalValue
>(L
))
391 return Engine
.equivalentAsOperands(cast
<GlobalValue
>(L
),
392 cast
<GlobalValue
>(R
));
394 // Compare constant expressions structurally.
395 if (isa
<ConstantExpr
>(L
))
396 return equivalentAsOperands(cast
<ConstantExpr
>(L
),
397 cast
<ConstantExpr
>(R
));
399 // Constants of the "same type" don't always actually have the same
400 // type; I don't know why. Just white-list them.
401 if (isa
<ConstantPointerNull
>(L
) || isa
<UndefValue
>(L
) || isa
<ConstantAggregateZero
>(L
))
404 // Block addresses only match if we've already encountered the
405 // block. FIXME: tentative matches?
406 if (isa
<BlockAddress
>(L
))
407 return Blocks
[cast
<BlockAddress
>(L
)->getBasicBlock()]
408 == cast
<BlockAddress
>(R
)->getBasicBlock();
410 // If L and R are ConstantVectors, compare each element
411 if (isa
<ConstantVector
>(L
)) {
412 ConstantVector
*CVL
= cast
<ConstantVector
>(L
);
413 ConstantVector
*CVR
= cast
<ConstantVector
>(R
);
414 if (CVL
->getType()->getNumElements() != CVR
->getType()->getNumElements())
416 for (unsigned i
= 0; i
< CVL
->getType()->getNumElements(); i
++) {
417 if (!equivalentAsOperands(CVL
->getOperand(i
), CVR
->getOperand(i
)))
426 bool equivalentAsOperands(ConstantExpr
*L
, ConstantExpr
*R
) {
429 if (L
->getOpcode() != R
->getOpcode())
432 switch (L
->getOpcode()) {
433 case Instruction::ICmp
:
434 case Instruction::FCmp
:
435 if (L
->getPredicate() != R
->getPredicate())
439 case Instruction::GetElementPtr
:
447 if (L
->getNumOperands() != R
->getNumOperands())
450 for (unsigned I
= 0, E
= L
->getNumOperands(); I
!= E
; ++I
)
451 if (!equivalentAsOperands(L
->getOperand(I
), R
->getOperand(I
)))
457 bool equivalentAsOperands(Value
*L
, Value
*R
) {
458 // Fall out if the values have different kind.
459 // This possibly shouldn't take priority over oracles.
460 if (L
->getValueID() != R
->getValueID())
463 // Value subtypes: Argument, Constant, Instruction, BasicBlock,
464 // InlineAsm, MDNode, MDString, PseudoSourceValue
466 if (isa
<Constant
>(L
))
467 return equivalentAsOperands(cast
<Constant
>(L
), cast
<Constant
>(R
));
469 if (isa
<Instruction
>(L
))
470 return Values
[L
] == R
|| TentativeValues
.count(std::make_pair(L
, R
));
472 if (isa
<Argument
>(L
))
473 return Values
[L
] == R
;
475 if (isa
<BasicBlock
>(L
))
476 return Blocks
[cast
<BasicBlock
>(L
)] != R
;
478 // Pretend everything else is identical.
482 // Avoid a gcc warning about accessing 'this' in an initializer.
483 FunctionDifferenceEngine
*this_() { return this; }
486 FunctionDifferenceEngine(DifferenceEngine
&Engine
) :
487 Engine(Engine
), Queue(QueueSorter(*this_())) {}
489 void diff(Function
*L
, Function
*R
) {
490 if (L
->arg_size() != R
->arg_size())
491 Engine
.log("different argument counts");
493 // Map the arguments.
494 for (Function::arg_iterator
495 LI
= L
->arg_begin(), LE
= L
->arg_end(),
496 RI
= R
->arg_begin(), RE
= R
->arg_end();
497 LI
!= LE
&& RI
!= RE
; ++LI
, ++RI
)
500 tryUnify(&*L
->begin(), &*R
->begin());
506 DiffEntry() : Cost(0) {}
509 llvm::SmallVector
<char, 8> Path
; // actually of DifferenceEngine::DiffChange
512 bool FunctionDifferenceEngine::matchForBlockDiff(Instruction
*L
,
514 return !diff(L
, R
, false, false);
517 void FunctionDifferenceEngine::runBlockDiff(BasicBlock::iterator LStart
,
518 BasicBlock::iterator RStart
) {
519 BasicBlock::iterator LE
= LStart
->getParent()->end();
520 BasicBlock::iterator RE
= RStart
->getParent()->end();
522 unsigned NL
= std::distance(LStart
, LE
);
524 SmallVector
<DiffEntry
, 20> Paths1(NL
+1);
525 SmallVector
<DiffEntry
, 20> Paths2(NL
+1);
527 DiffEntry
*Cur
= Paths1
.data();
528 DiffEntry
*Next
= Paths2
.data();
530 const unsigned LeftCost
= 2;
531 const unsigned RightCost
= 2;
532 const unsigned MatchCost
= 0;
534 assert(TentativeValues
.empty());
536 // Initialize the first column.
537 for (unsigned I
= 0; I
!= NL
+1; ++I
) {
538 Cur
[I
].Cost
= I
* LeftCost
;
539 for (unsigned J
= 0; J
!= I
; ++J
)
540 Cur
[I
].Path
.push_back(DC_left
);
543 for (BasicBlock::iterator RI
= RStart
; RI
!= RE
; ++RI
) {
544 // Initialize the first row.
546 Next
[0].Cost
+= RightCost
;
547 Next
[0].Path
.push_back(DC_right
);
550 for (BasicBlock::iterator LI
= LStart
; LI
!= LE
; ++LI
, ++Index
) {
551 if (matchForBlockDiff(&*LI
, &*RI
)) {
552 Next
[Index
] = Cur
[Index
-1];
553 Next
[Index
].Cost
+= MatchCost
;
554 Next
[Index
].Path
.push_back(DC_match
);
555 TentativeValues
.insert(std::make_pair(&*LI
, &*RI
));
556 } else if (Next
[Index
-1].Cost
<= Cur
[Index
].Cost
) {
557 Next
[Index
] = Next
[Index
-1];
558 Next
[Index
].Cost
+= LeftCost
;
559 Next
[Index
].Path
.push_back(DC_left
);
561 Next
[Index
] = Cur
[Index
];
562 Next
[Index
].Cost
+= RightCost
;
563 Next
[Index
].Path
.push_back(DC_right
);
567 std::swap(Cur
, Next
);
570 // We don't need the tentative values anymore; everything from here
571 // on out should be non-tentative.
572 TentativeValues
.clear();
574 SmallVectorImpl
<char> &Path
= Cur
[NL
].Path
;
575 BasicBlock::iterator LI
= LStart
, RI
= RStart
;
577 DiffLogBuilder
Diff(Engine
.getConsumer());
579 // Drop trailing matches.
580 while (Path
.back() == DC_match
)
583 // Skip leading matches.
584 SmallVectorImpl
<char>::iterator
585 PI
= Path
.begin(), PE
= Path
.end();
586 while (PI
!= PE
&& *PI
== DC_match
) {
593 for (; PI
!= PE
; ++PI
) {
594 switch (static_cast<DiffChange
>(*PI
)) {
596 assert(LI
!= LE
&& RI
!= RE
);
598 Instruction
*L
= &*LI
, *R
= &*RI
;
619 // Finishing unifying and complaining about the tails of the block,
620 // which should be matches all the way through.
628 // If the terminators have different kinds, but one is an invoke and the
629 // other is an unconditional branch immediately following a call, unify
630 // the results and the destinations.
631 Instruction
*LTerm
= LStart
->getParent()->getTerminator();
632 Instruction
*RTerm
= RStart
->getParent()->getTerminator();
633 if (isa
<BranchInst
>(LTerm
) && isa
<InvokeInst
>(RTerm
)) {
634 if (cast
<BranchInst
>(LTerm
)->isConditional()) return;
635 BasicBlock::iterator I
= LTerm
->getIterator();
636 if (I
== LStart
->getParent()->begin()) return;
638 if (!isa
<CallInst
>(*I
)) return;
639 CallInst
*LCall
= cast
<CallInst
>(&*I
);
640 InvokeInst
*RInvoke
= cast
<InvokeInst
>(RTerm
);
641 if (!equivalentAsOperands(LCall
->getCalledValue(), RInvoke
->getCalledValue()))
643 if (!LCall
->use_empty())
644 Values
[LCall
] = RInvoke
;
645 tryUnify(LTerm
->getSuccessor(0), RInvoke
->getNormalDest());
646 } else if (isa
<InvokeInst
>(LTerm
) && isa
<BranchInst
>(RTerm
)) {
647 if (cast
<BranchInst
>(RTerm
)->isConditional()) return;
648 BasicBlock::iterator I
= RTerm
->getIterator();
649 if (I
== RStart
->getParent()->begin()) return;
651 if (!isa
<CallInst
>(*I
)) return;
652 CallInst
*RCall
= cast
<CallInst
>(I
);
653 InvokeInst
*LInvoke
= cast
<InvokeInst
>(LTerm
);
654 if (!equivalentAsOperands(LInvoke
->getCalledValue(), RCall
->getCalledValue()))
656 if (!LInvoke
->use_empty())
657 Values
[LInvoke
] = RCall
;
658 tryUnify(LInvoke
->getNormalDest(), RTerm
->getSuccessor(0));
664 void DifferenceEngine::Oracle::anchor() { }
666 void DifferenceEngine::diff(Function
*L
, Function
*R
) {
667 Context
C(*this, L
, R
);
670 // FIXME: attributes and CC
671 // FIXME: parameter attributes
673 // If both are declarations, we're done.
674 if (L
->empty() && R
->empty())
677 log("left function is declaration, right function is definition");
679 log("right function is declaration, left function is definition");
681 FunctionDifferenceEngine(*this).diff(L
, R
);
684 void DifferenceEngine::diff(Module
*L
, Module
*R
) {
686 SmallVector
<std::pair
<Function
*,Function
*>, 20> Queue
;
688 unsigned LeftAnonCount
= 0;
689 unsigned RightAnonCount
= 0;
691 for (Module::iterator I
= L
->begin(), E
= L
->end(); I
!= E
; ++I
) {
693 StringRef Name
= LFn
->getName();
701 if (Function
*RFn
= R
->getFunction(LFn
->getName()))
702 Queue
.push_back(std::make_pair(LFn
, RFn
));
704 logf("function %l exists only in left module") << LFn
;
707 for (Module::iterator I
= R
->begin(), E
= R
->end(); I
!= E
; ++I
) {
709 StringRef Name
= RFn
->getName();
715 if (!LNames
.count(Name
))
716 logf("function %r exists only in right module") << RFn
;
720 if (LeftAnonCount
!= 0 || RightAnonCount
!= 0) {
722 logf(("not comparing " + Twine(LeftAnonCount
) +
723 " anonymous functions in the left module and " +
724 Twine(RightAnonCount
) + " in the right module")
728 for (SmallVectorImpl
<std::pair
<Function
*,Function
*> >::iterator
729 I
= Queue
.begin(), E
= Queue
.end(); I
!= E
; ++I
)
730 diff(I
->first
, I
->second
);
733 bool DifferenceEngine::equivalentAsOperands(GlobalValue
*L
, GlobalValue
*R
) {
734 if (globalValueOracle
) return (*globalValueOracle
)(L
, R
);
735 return L
->getName() == R
->getName();