1 //===- FunctionComparator.h - Function Comparator -------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the FunctionComparator and GlobalNumberState classes
10 // which are used by the MergeFunctions pass for comparing functions.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Transforms/Utils/FunctionComparator.h"
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APInt.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/SmallPtrSet.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/IR/Attributes.h"
22 #include "llvm/IR/BasicBlock.h"
23 #include "llvm/IR/CallSite.h"
24 #include "llvm/IR/Constant.h"
25 #include "llvm/IR/Constants.h"
26 #include "llvm/IR/DataLayout.h"
27 #include "llvm/IR/DerivedTypes.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/IR/GlobalValue.h"
30 #include "llvm/IR/InlineAsm.h"
31 #include "llvm/IR/InstrTypes.h"
32 #include "llvm/IR/Instruction.h"
33 #include "llvm/IR/Instructions.h"
34 #include "llvm/IR/LLVMContext.h"
35 #include "llvm/IR/Metadata.h"
36 #include "llvm/IR/Module.h"
37 #include "llvm/IR/Operator.h"
38 #include "llvm/IR/Type.h"
39 #include "llvm/IR/Value.h"
40 #include "llvm/Support/Casting.h"
41 #include "llvm/Support/Compiler.h"
42 #include "llvm/Support/Debug.h"
43 #include "llvm/Support/ErrorHandling.h"
44 #include "llvm/Support/raw_ostream.h"
52 #define DEBUG_TYPE "functioncomparator"
54 int FunctionComparator::cmpNumbers(uint64_t L
, uint64_t R
) const {
60 int FunctionComparator::cmpOrderings(AtomicOrdering L
, AtomicOrdering R
) const {
61 if ((int)L
< (int)R
) return -1;
62 if ((int)L
> (int)R
) return 1;
66 int FunctionComparator::cmpAPInts(const APInt
&L
, const APInt
&R
) const {
67 if (int Res
= cmpNumbers(L
.getBitWidth(), R
.getBitWidth()))
69 if (L
.ugt(R
)) return 1;
70 if (R
.ugt(L
)) return -1;
74 int FunctionComparator::cmpAPFloats(const APFloat
&L
, const APFloat
&R
) const {
75 // Floats are ordered first by semantics (i.e. float, double, half, etc.),
76 // then by value interpreted as a bitstring (aka APInt).
77 const fltSemantics
&SL
= L
.getSemantics(), &SR
= R
.getSemantics();
78 if (int Res
= cmpNumbers(APFloat::semanticsPrecision(SL
),
79 APFloat::semanticsPrecision(SR
)))
81 if (int Res
= cmpNumbers(APFloat::semanticsMaxExponent(SL
),
82 APFloat::semanticsMaxExponent(SR
)))
84 if (int Res
= cmpNumbers(APFloat::semanticsMinExponent(SL
),
85 APFloat::semanticsMinExponent(SR
)))
87 if (int Res
= cmpNumbers(APFloat::semanticsSizeInBits(SL
),
88 APFloat::semanticsSizeInBits(SR
)))
90 return cmpAPInts(L
.bitcastToAPInt(), R
.bitcastToAPInt());
93 int FunctionComparator::cmpMem(StringRef L
, StringRef R
) const {
94 // Prevent heavy comparison, compare sizes first.
95 if (int Res
= cmpNumbers(L
.size(), R
.size()))
98 // Compare strings lexicographically only when it is necessary: only when
99 // strings are equal in size.
103 int FunctionComparator::cmpAttrs(const AttributeList L
,
104 const AttributeList R
) const {
105 if (int Res
= cmpNumbers(L
.getNumAttrSets(), R
.getNumAttrSets()))
108 for (unsigned i
= L
.index_begin(), e
= L
.index_end(); i
!= e
; ++i
) {
109 AttributeSet LAS
= L
.getAttributes(i
);
110 AttributeSet RAS
= R
.getAttributes(i
);
111 AttributeSet::iterator LI
= LAS
.begin(), LE
= LAS
.end();
112 AttributeSet::iterator RI
= RAS
.begin(), RE
= RAS
.end();
113 for (; LI
!= LE
&& RI
!= RE
; ++LI
, ++RI
) {
116 if (LA
.isTypeAttribute() && RA
.isTypeAttribute()) {
117 if (LA
.getKindAsEnum() != RA
.getKindAsEnum())
118 return cmpNumbers(LA
.getKindAsEnum(), RA
.getKindAsEnum());
120 Type
*TyL
= LA
.getValueAsType();
121 Type
*TyR
= RA
.getValueAsType();
123 return cmpTypes(TyL
, TyR
);
125 // Two pointers, at least one null, so the comparison result is
126 // independent of the value of a real pointer.
127 return cmpNumbers((uint64_t)TyL
, (uint64_t)TyR
);
142 int FunctionComparator::cmpRangeMetadata(const MDNode
*L
,
143 const MDNode
*R
) const {
150 // Range metadata is a sequence of numbers. Make sure they are the same
152 // TODO: Note that as this is metadata, it is possible to drop and/or merge
153 // this data when considering functions to merge. Thus this comparison would
154 // return 0 (i.e. equivalent), but merging would become more complicated
155 // because the ranges would need to be unioned. It is not likely that
156 // functions differ ONLY in this metadata if they are actually the same
157 // function semantically.
158 if (int Res
= cmpNumbers(L
->getNumOperands(), R
->getNumOperands()))
160 for (size_t I
= 0; I
< L
->getNumOperands(); ++I
) {
161 ConstantInt
*LLow
= mdconst::extract
<ConstantInt
>(L
->getOperand(I
));
162 ConstantInt
*RLow
= mdconst::extract
<ConstantInt
>(R
->getOperand(I
));
163 if (int Res
= cmpAPInts(LLow
->getValue(), RLow
->getValue()))
169 int FunctionComparator::cmpOperandBundlesSchema(const Instruction
*L
,
170 const Instruction
*R
) const {
171 ImmutableCallSite
LCS(L
);
172 ImmutableCallSite
RCS(R
);
174 assert(LCS
&& RCS
&& "Must be calls or invokes!");
175 assert(LCS
.isCall() == RCS
.isCall() && "Can't compare otherwise!");
178 cmpNumbers(LCS
.getNumOperandBundles(), RCS
.getNumOperandBundles()))
181 for (unsigned i
= 0, e
= LCS
.getNumOperandBundles(); i
!= e
; ++i
) {
182 auto OBL
= LCS
.getOperandBundleAt(i
);
183 auto OBR
= RCS
.getOperandBundleAt(i
);
185 if (int Res
= OBL
.getTagName().compare(OBR
.getTagName()))
188 if (int Res
= cmpNumbers(OBL
.Inputs
.size(), OBR
.Inputs
.size()))
195 /// Constants comparison:
196 /// 1. Check whether type of L constant could be losslessly bitcasted to R
198 /// 2. Compare constant contents.
199 /// For more details see declaration comments.
200 int FunctionComparator::cmpConstants(const Constant
*L
,
201 const Constant
*R
) const {
202 Type
*TyL
= L
->getType();
203 Type
*TyR
= R
->getType();
205 // Check whether types are bitcastable. This part is just re-factored
206 // Type::canLosslesslyBitCastTo method, but instead of returning true/false,
207 // we also pack into result which type is "less" for us.
208 int TypesRes
= cmpTypes(TyL
, TyR
);
210 // Types are different, but check whether we can bitcast them.
211 if (!TyL
->isFirstClassType()) {
212 if (TyR
->isFirstClassType())
214 // Neither TyL nor TyR are values of first class type. Return the result
215 // of comparing the types
218 if (!TyR
->isFirstClassType()) {
219 if (TyL
->isFirstClassType())
224 // Vector -> Vector conversions are always lossless if the two vector types
225 // have the same size, otherwise not.
226 unsigned TyLWidth
= 0;
227 unsigned TyRWidth
= 0;
229 if (auto *VecTyL
= dyn_cast
<VectorType
>(TyL
))
230 TyLWidth
= VecTyL
->getBitWidth();
231 if (auto *VecTyR
= dyn_cast
<VectorType
>(TyR
))
232 TyRWidth
= VecTyR
->getBitWidth();
234 if (TyLWidth
!= TyRWidth
)
235 return cmpNumbers(TyLWidth
, TyRWidth
);
237 // Zero bit-width means neither TyL nor TyR are vectors.
239 PointerType
*PTyL
= dyn_cast
<PointerType
>(TyL
);
240 PointerType
*PTyR
= dyn_cast
<PointerType
>(TyR
);
242 unsigned AddrSpaceL
= PTyL
->getAddressSpace();
243 unsigned AddrSpaceR
= PTyR
->getAddressSpace();
244 if (int Res
= cmpNumbers(AddrSpaceL
, AddrSpaceR
))
252 // TyL and TyR aren't vectors, nor pointers. We don't know how to
258 // OK, types are bitcastable, now check constant contents.
260 if (L
->isNullValue() && R
->isNullValue())
262 if (L
->isNullValue() && !R
->isNullValue())
264 if (!L
->isNullValue() && R
->isNullValue())
267 auto GlobalValueL
= const_cast<GlobalValue
*>(dyn_cast
<GlobalValue
>(L
));
268 auto GlobalValueR
= const_cast<GlobalValue
*>(dyn_cast
<GlobalValue
>(R
));
269 if (GlobalValueL
&& GlobalValueR
) {
270 return cmpGlobalValues(GlobalValueL
, GlobalValueR
);
273 if (int Res
= cmpNumbers(L
->getValueID(), R
->getValueID()))
276 if (const auto *SeqL
= dyn_cast
<ConstantDataSequential
>(L
)) {
277 const auto *SeqR
= cast
<ConstantDataSequential
>(R
);
278 // This handles ConstantDataArray and ConstantDataVector. Note that we
279 // compare the two raw data arrays, which might differ depending on the host
280 // endianness. This isn't a problem though, because the endiness of a module
281 // will affect the order of the constants, but this order is the same
282 // for a given input module and host platform.
283 return cmpMem(SeqL
->getRawDataValues(), SeqR
->getRawDataValues());
286 switch (L
->getValueID()) {
287 case Value::UndefValueVal
:
288 case Value::ConstantTokenNoneVal
:
290 case Value::ConstantIntVal
: {
291 const APInt
&LInt
= cast
<ConstantInt
>(L
)->getValue();
292 const APInt
&RInt
= cast
<ConstantInt
>(R
)->getValue();
293 return cmpAPInts(LInt
, RInt
);
295 case Value::ConstantFPVal
: {
296 const APFloat
&LAPF
= cast
<ConstantFP
>(L
)->getValueAPF();
297 const APFloat
&RAPF
= cast
<ConstantFP
>(R
)->getValueAPF();
298 return cmpAPFloats(LAPF
, RAPF
);
300 case Value::ConstantArrayVal
: {
301 const ConstantArray
*LA
= cast
<ConstantArray
>(L
);
302 const ConstantArray
*RA
= cast
<ConstantArray
>(R
);
303 uint64_t NumElementsL
= cast
<ArrayType
>(TyL
)->getNumElements();
304 uint64_t NumElementsR
= cast
<ArrayType
>(TyR
)->getNumElements();
305 if (int Res
= cmpNumbers(NumElementsL
, NumElementsR
))
307 for (uint64_t i
= 0; i
< NumElementsL
; ++i
) {
308 if (int Res
= cmpConstants(cast
<Constant
>(LA
->getOperand(i
)),
309 cast
<Constant
>(RA
->getOperand(i
))))
314 case Value::ConstantStructVal
: {
315 const ConstantStruct
*LS
= cast
<ConstantStruct
>(L
);
316 const ConstantStruct
*RS
= cast
<ConstantStruct
>(R
);
317 unsigned NumElementsL
= cast
<StructType
>(TyL
)->getNumElements();
318 unsigned NumElementsR
= cast
<StructType
>(TyR
)->getNumElements();
319 if (int Res
= cmpNumbers(NumElementsL
, NumElementsR
))
321 for (unsigned i
= 0; i
!= NumElementsL
; ++i
) {
322 if (int Res
= cmpConstants(cast
<Constant
>(LS
->getOperand(i
)),
323 cast
<Constant
>(RS
->getOperand(i
))))
328 case Value::ConstantVectorVal
: {
329 const ConstantVector
*LV
= cast
<ConstantVector
>(L
);
330 const ConstantVector
*RV
= cast
<ConstantVector
>(R
);
331 unsigned NumElementsL
= cast
<VectorType
>(TyL
)->getNumElements();
332 unsigned NumElementsR
= cast
<VectorType
>(TyR
)->getNumElements();
333 if (int Res
= cmpNumbers(NumElementsL
, NumElementsR
))
335 for (uint64_t i
= 0; i
< NumElementsL
; ++i
) {
336 if (int Res
= cmpConstants(cast
<Constant
>(LV
->getOperand(i
)),
337 cast
<Constant
>(RV
->getOperand(i
))))
342 case Value::ConstantExprVal
: {
343 const ConstantExpr
*LE
= cast
<ConstantExpr
>(L
);
344 const ConstantExpr
*RE
= cast
<ConstantExpr
>(R
);
345 unsigned NumOperandsL
= LE
->getNumOperands();
346 unsigned NumOperandsR
= RE
->getNumOperands();
347 if (int Res
= cmpNumbers(NumOperandsL
, NumOperandsR
))
349 for (unsigned i
= 0; i
< NumOperandsL
; ++i
) {
350 if (int Res
= cmpConstants(cast
<Constant
>(LE
->getOperand(i
)),
351 cast
<Constant
>(RE
->getOperand(i
))))
356 case Value::BlockAddressVal
: {
357 const BlockAddress
*LBA
= cast
<BlockAddress
>(L
);
358 const BlockAddress
*RBA
= cast
<BlockAddress
>(R
);
359 if (int Res
= cmpValues(LBA
->getFunction(), RBA
->getFunction()))
361 if (LBA
->getFunction() == RBA
->getFunction()) {
362 // They are BBs in the same function. Order by which comes first in the
363 // BB order of the function. This order is deterministic.
364 Function
* F
= LBA
->getFunction();
365 BasicBlock
*LBB
= LBA
->getBasicBlock();
366 BasicBlock
*RBB
= RBA
->getBasicBlock();
369 for(BasicBlock
&BB
: F
->getBasicBlockList()) {
377 llvm_unreachable("Basic Block Address does not point to a basic block in "
381 // cmpValues said the functions are the same. So because they aren't
382 // literally the same pointer, they must respectively be the left and
384 assert(LBA
->getFunction() == FnL
&& RBA
->getFunction() == FnR
);
385 // cmpValues will tell us if these are equivalent BasicBlocks, in the
386 // context of their respective functions.
387 return cmpValues(LBA
->getBasicBlock(), RBA
->getBasicBlock());
390 default: // Unknown constant, abort.
391 LLVM_DEBUG(dbgs() << "Looking at valueID " << L
->getValueID() << "\n");
392 llvm_unreachable("Constant ValueID not recognized.");
397 int FunctionComparator::cmpGlobalValues(GlobalValue
*L
, GlobalValue
*R
) const {
398 uint64_t LNumber
= GlobalNumbers
->getNumber(L
);
399 uint64_t RNumber
= GlobalNumbers
->getNumber(R
);
400 return cmpNumbers(LNumber
, RNumber
);
403 /// cmpType - compares two types,
404 /// defines total ordering among the types set.
405 /// See method declaration comments for more details.
406 int FunctionComparator::cmpTypes(Type
*TyL
, Type
*TyR
) const {
407 PointerType
*PTyL
= dyn_cast
<PointerType
>(TyL
);
408 PointerType
*PTyR
= dyn_cast
<PointerType
>(TyR
);
410 const DataLayout
&DL
= FnL
->getParent()->getDataLayout();
411 if (PTyL
&& PTyL
->getAddressSpace() == 0)
412 TyL
= DL
.getIntPtrType(TyL
);
413 if (PTyR
&& PTyR
->getAddressSpace() == 0)
414 TyR
= DL
.getIntPtrType(TyR
);
419 if (int Res
= cmpNumbers(TyL
->getTypeID(), TyR
->getTypeID()))
422 switch (TyL
->getTypeID()) {
424 llvm_unreachable("Unknown type!");
425 case Type::IntegerTyID
:
426 return cmpNumbers(cast
<IntegerType
>(TyL
)->getBitWidth(),
427 cast
<IntegerType
>(TyR
)->getBitWidth());
428 // TyL == TyR would have returned true earlier, because types are uniqued.
430 case Type::FloatTyID
:
431 case Type::DoubleTyID
:
432 case Type::X86_FP80TyID
:
433 case Type::FP128TyID
:
434 case Type::PPC_FP128TyID
:
435 case Type::LabelTyID
:
436 case Type::MetadataTyID
:
437 case Type::TokenTyID
:
440 case Type::PointerTyID
:
441 assert(PTyL
&& PTyR
&& "Both types must be pointers here.");
442 return cmpNumbers(PTyL
->getAddressSpace(), PTyR
->getAddressSpace());
444 case Type::StructTyID
: {
445 StructType
*STyL
= cast
<StructType
>(TyL
);
446 StructType
*STyR
= cast
<StructType
>(TyR
);
447 if (STyL
->getNumElements() != STyR
->getNumElements())
448 return cmpNumbers(STyL
->getNumElements(), STyR
->getNumElements());
450 if (STyL
->isPacked() != STyR
->isPacked())
451 return cmpNumbers(STyL
->isPacked(), STyR
->isPacked());
453 for (unsigned i
= 0, e
= STyL
->getNumElements(); i
!= e
; ++i
) {
454 if (int Res
= cmpTypes(STyL
->getElementType(i
), STyR
->getElementType(i
)))
460 case Type::FunctionTyID
: {
461 FunctionType
*FTyL
= cast
<FunctionType
>(TyL
);
462 FunctionType
*FTyR
= cast
<FunctionType
>(TyR
);
463 if (FTyL
->getNumParams() != FTyR
->getNumParams())
464 return cmpNumbers(FTyL
->getNumParams(), FTyR
->getNumParams());
466 if (FTyL
->isVarArg() != FTyR
->isVarArg())
467 return cmpNumbers(FTyL
->isVarArg(), FTyR
->isVarArg());
469 if (int Res
= cmpTypes(FTyL
->getReturnType(), FTyR
->getReturnType()))
472 for (unsigned i
= 0, e
= FTyL
->getNumParams(); i
!= e
; ++i
) {
473 if (int Res
= cmpTypes(FTyL
->getParamType(i
), FTyR
->getParamType(i
)))
479 case Type::ArrayTyID
:
480 case Type::VectorTyID
: {
481 auto *STyL
= cast
<SequentialType
>(TyL
);
482 auto *STyR
= cast
<SequentialType
>(TyR
);
483 if (STyL
->getNumElements() != STyR
->getNumElements())
484 return cmpNumbers(STyL
->getNumElements(), STyR
->getNumElements());
485 return cmpTypes(STyL
->getElementType(), STyR
->getElementType());
490 // Determine whether the two operations are the same except that pointer-to-A
491 // and pointer-to-B are equivalent. This should be kept in sync with
492 // Instruction::isSameOperationAs.
493 // Read method declaration comments for more details.
494 int FunctionComparator::cmpOperations(const Instruction
*L
,
495 const Instruction
*R
,
496 bool &needToCmpOperands
) const {
497 needToCmpOperands
= true;
498 if (int Res
= cmpValues(L
, R
))
501 // Differences from Instruction::isSameOperationAs:
502 // * replace type comparison with calls to cmpTypes.
503 // * we test for I->getRawSubclassOptionalData (nuw/nsw/tail) at the top.
504 // * because of the above, we don't test for the tail bit on calls later on.
505 if (int Res
= cmpNumbers(L
->getOpcode(), R
->getOpcode()))
508 if (const GetElementPtrInst
*GEPL
= dyn_cast
<GetElementPtrInst
>(L
)) {
509 needToCmpOperands
= false;
510 const GetElementPtrInst
*GEPR
= cast
<GetElementPtrInst
>(R
);
512 cmpValues(GEPL
->getPointerOperand(), GEPR
->getPointerOperand()))
514 return cmpGEPs(GEPL
, GEPR
);
517 if (int Res
= cmpNumbers(L
->getNumOperands(), R
->getNumOperands()))
520 if (int Res
= cmpTypes(L
->getType(), R
->getType()))
523 if (int Res
= cmpNumbers(L
->getRawSubclassOptionalData(),
524 R
->getRawSubclassOptionalData()))
527 // We have two instructions of identical opcode and #operands. Check to see
528 // if all operands are the same type
529 for (unsigned i
= 0, e
= L
->getNumOperands(); i
!= e
; ++i
) {
531 cmpTypes(L
->getOperand(i
)->getType(), R
->getOperand(i
)->getType()))
535 // Check special state that is a part of some instructions.
536 if (const AllocaInst
*AI
= dyn_cast
<AllocaInst
>(L
)) {
537 if (int Res
= cmpTypes(AI
->getAllocatedType(),
538 cast
<AllocaInst
>(R
)->getAllocatedType()))
540 return cmpNumbers(AI
->getAlignment(), cast
<AllocaInst
>(R
)->getAlignment());
542 if (const LoadInst
*LI
= dyn_cast
<LoadInst
>(L
)) {
543 if (int Res
= cmpNumbers(LI
->isVolatile(), cast
<LoadInst
>(R
)->isVolatile()))
546 cmpNumbers(LI
->getAlignment(), cast
<LoadInst
>(R
)->getAlignment()))
549 cmpOrderings(LI
->getOrdering(), cast
<LoadInst
>(R
)->getOrdering()))
551 if (int Res
= cmpNumbers(LI
->getSyncScopeID(),
552 cast
<LoadInst
>(R
)->getSyncScopeID()))
554 return cmpRangeMetadata(LI
->getMetadata(LLVMContext::MD_range
),
555 cast
<LoadInst
>(R
)->getMetadata(LLVMContext::MD_range
));
557 if (const StoreInst
*SI
= dyn_cast
<StoreInst
>(L
)) {
559 cmpNumbers(SI
->isVolatile(), cast
<StoreInst
>(R
)->isVolatile()))
562 cmpNumbers(SI
->getAlignment(), cast
<StoreInst
>(R
)->getAlignment()))
565 cmpOrderings(SI
->getOrdering(), cast
<StoreInst
>(R
)->getOrdering()))
567 return cmpNumbers(SI
->getSyncScopeID(),
568 cast
<StoreInst
>(R
)->getSyncScopeID());
570 if (const CmpInst
*CI
= dyn_cast
<CmpInst
>(L
))
571 return cmpNumbers(CI
->getPredicate(), cast
<CmpInst
>(R
)->getPredicate());
572 if (auto CSL
= CallSite(const_cast<Instruction
*>(L
))) {
573 auto CSR
= CallSite(const_cast<Instruction
*>(R
));
574 if (int Res
= cmpNumbers(CSL
.getCallingConv(), CSR
.getCallingConv()))
576 if (int Res
= cmpAttrs(CSL
.getAttributes(), CSR
.getAttributes()))
578 if (int Res
= cmpOperandBundlesSchema(L
, R
))
580 if (const CallInst
*CI
= dyn_cast
<CallInst
>(L
))
581 if (int Res
= cmpNumbers(CI
->getTailCallKind(),
582 cast
<CallInst
>(R
)->getTailCallKind()))
584 return cmpRangeMetadata(L
->getMetadata(LLVMContext::MD_range
),
585 R
->getMetadata(LLVMContext::MD_range
));
587 if (const InsertValueInst
*IVI
= dyn_cast
<InsertValueInst
>(L
)) {
588 ArrayRef
<unsigned> LIndices
= IVI
->getIndices();
589 ArrayRef
<unsigned> RIndices
= cast
<InsertValueInst
>(R
)->getIndices();
590 if (int Res
= cmpNumbers(LIndices
.size(), RIndices
.size()))
592 for (size_t i
= 0, e
= LIndices
.size(); i
!= e
; ++i
) {
593 if (int Res
= cmpNumbers(LIndices
[i
], RIndices
[i
]))
598 if (const ExtractValueInst
*EVI
= dyn_cast
<ExtractValueInst
>(L
)) {
599 ArrayRef
<unsigned> LIndices
= EVI
->getIndices();
600 ArrayRef
<unsigned> RIndices
= cast
<ExtractValueInst
>(R
)->getIndices();
601 if (int Res
= cmpNumbers(LIndices
.size(), RIndices
.size()))
603 for (size_t i
= 0, e
= LIndices
.size(); i
!= e
; ++i
) {
604 if (int Res
= cmpNumbers(LIndices
[i
], RIndices
[i
]))
608 if (const FenceInst
*FI
= dyn_cast
<FenceInst
>(L
)) {
610 cmpOrderings(FI
->getOrdering(), cast
<FenceInst
>(R
)->getOrdering()))
612 return cmpNumbers(FI
->getSyncScopeID(),
613 cast
<FenceInst
>(R
)->getSyncScopeID());
615 if (const AtomicCmpXchgInst
*CXI
= dyn_cast
<AtomicCmpXchgInst
>(L
)) {
616 if (int Res
= cmpNumbers(CXI
->isVolatile(),
617 cast
<AtomicCmpXchgInst
>(R
)->isVolatile()))
619 if (int Res
= cmpNumbers(CXI
->isWeak(),
620 cast
<AtomicCmpXchgInst
>(R
)->isWeak()))
623 cmpOrderings(CXI
->getSuccessOrdering(),
624 cast
<AtomicCmpXchgInst
>(R
)->getSuccessOrdering()))
627 cmpOrderings(CXI
->getFailureOrdering(),
628 cast
<AtomicCmpXchgInst
>(R
)->getFailureOrdering()))
630 return cmpNumbers(CXI
->getSyncScopeID(),
631 cast
<AtomicCmpXchgInst
>(R
)->getSyncScopeID());
633 if (const AtomicRMWInst
*RMWI
= dyn_cast
<AtomicRMWInst
>(L
)) {
634 if (int Res
= cmpNumbers(RMWI
->getOperation(),
635 cast
<AtomicRMWInst
>(R
)->getOperation()))
637 if (int Res
= cmpNumbers(RMWI
->isVolatile(),
638 cast
<AtomicRMWInst
>(R
)->isVolatile()))
640 if (int Res
= cmpOrderings(RMWI
->getOrdering(),
641 cast
<AtomicRMWInst
>(R
)->getOrdering()))
643 return cmpNumbers(RMWI
->getSyncScopeID(),
644 cast
<AtomicRMWInst
>(R
)->getSyncScopeID());
646 if (const PHINode
*PNL
= dyn_cast
<PHINode
>(L
)) {
647 const PHINode
*PNR
= cast
<PHINode
>(R
);
648 // Ensure that in addition to the incoming values being identical
649 // (checked by the caller of this function), the incoming blocks
650 // are also identical.
651 for (unsigned i
= 0, e
= PNL
->getNumIncomingValues(); i
!= e
; ++i
) {
653 cmpValues(PNL
->getIncomingBlock(i
), PNR
->getIncomingBlock(i
)))
660 // Determine whether two GEP operations perform the same underlying arithmetic.
661 // Read method declaration comments for more details.
662 int FunctionComparator::cmpGEPs(const GEPOperator
*GEPL
,
663 const GEPOperator
*GEPR
) const {
664 unsigned int ASL
= GEPL
->getPointerAddressSpace();
665 unsigned int ASR
= GEPR
->getPointerAddressSpace();
667 if (int Res
= cmpNumbers(ASL
, ASR
))
670 // When we have target data, we can reduce the GEP down to the value in bytes
671 // added to the address.
672 const DataLayout
&DL
= FnL
->getParent()->getDataLayout();
673 unsigned BitWidth
= DL
.getPointerSizeInBits(ASL
);
674 APInt
OffsetL(BitWidth
, 0), OffsetR(BitWidth
, 0);
675 if (GEPL
->accumulateConstantOffset(DL
, OffsetL
) &&
676 GEPR
->accumulateConstantOffset(DL
, OffsetR
))
677 return cmpAPInts(OffsetL
, OffsetR
);
678 if (int Res
= cmpTypes(GEPL
->getSourceElementType(),
679 GEPR
->getSourceElementType()))
682 if (int Res
= cmpNumbers(GEPL
->getNumOperands(), GEPR
->getNumOperands()))
685 for (unsigned i
= 0, e
= GEPL
->getNumOperands(); i
!= e
; ++i
) {
686 if (int Res
= cmpValues(GEPL
->getOperand(i
), GEPR
->getOperand(i
)))
693 int FunctionComparator::cmpInlineAsm(const InlineAsm
*L
,
694 const InlineAsm
*R
) const {
695 // InlineAsm's are uniqued. If they are the same pointer, obviously they are
696 // the same, otherwise compare the fields.
699 if (int Res
= cmpTypes(L
->getFunctionType(), R
->getFunctionType()))
701 if (int Res
= cmpMem(L
->getAsmString(), R
->getAsmString()))
703 if (int Res
= cmpMem(L
->getConstraintString(), R
->getConstraintString()))
705 if (int Res
= cmpNumbers(L
->hasSideEffects(), R
->hasSideEffects()))
707 if (int Res
= cmpNumbers(L
->isAlignStack(), R
->isAlignStack()))
709 if (int Res
= cmpNumbers(L
->getDialect(), R
->getDialect()))
711 assert(L
->getFunctionType() != R
->getFunctionType());
715 /// Compare two values used by the two functions under pair-wise comparison. If
716 /// this is the first time the values are seen, they're added to the mapping so
717 /// that we will detect mismatches on next use.
718 /// See comments in declaration for more details.
719 int FunctionComparator::cmpValues(const Value
*L
, const Value
*R
) const {
720 // Catch self-reference case.
732 const Constant
*ConstL
= dyn_cast
<Constant
>(L
);
733 const Constant
*ConstR
= dyn_cast
<Constant
>(R
);
734 if (ConstL
&& ConstR
) {
737 return cmpConstants(ConstL
, ConstR
);
745 const InlineAsm
*InlineAsmL
= dyn_cast
<InlineAsm
>(L
);
746 const InlineAsm
*InlineAsmR
= dyn_cast
<InlineAsm
>(R
);
748 if (InlineAsmL
&& InlineAsmR
)
749 return cmpInlineAsm(InlineAsmL
, InlineAsmR
);
755 auto LeftSN
= sn_mapL
.insert(std::make_pair(L
, sn_mapL
.size())),
756 RightSN
= sn_mapR
.insert(std::make_pair(R
, sn_mapR
.size()));
758 return cmpNumbers(LeftSN
.first
->second
, RightSN
.first
->second
);
761 // Test whether two basic blocks have equivalent behaviour.
762 int FunctionComparator::cmpBasicBlocks(const BasicBlock
*BBL
,
763 const BasicBlock
*BBR
) const {
764 BasicBlock::const_iterator InstL
= BBL
->begin(), InstLE
= BBL
->end();
765 BasicBlock::const_iterator InstR
= BBR
->begin(), InstRE
= BBR
->end();
768 bool needToCmpOperands
= true;
769 if (int Res
= cmpOperations(&*InstL
, &*InstR
, needToCmpOperands
))
771 if (needToCmpOperands
) {
772 assert(InstL
->getNumOperands() == InstR
->getNumOperands());
774 for (unsigned i
= 0, e
= InstL
->getNumOperands(); i
!= e
; ++i
) {
775 Value
*OpL
= InstL
->getOperand(i
);
776 Value
*OpR
= InstR
->getOperand(i
);
777 if (int Res
= cmpValues(OpL
, OpR
))
779 // cmpValues should ensure this is true.
780 assert(cmpTypes(OpL
->getType(), OpR
->getType()) == 0);
786 } while (InstL
!= InstLE
&& InstR
!= InstRE
);
788 if (InstL
!= InstLE
&& InstR
== InstRE
)
790 if (InstL
== InstLE
&& InstR
!= InstRE
)
795 int FunctionComparator::compareSignature() const {
796 if (int Res
= cmpAttrs(FnL
->getAttributes(), FnR
->getAttributes()))
799 if (int Res
= cmpNumbers(FnL
->hasGC(), FnR
->hasGC()))
803 if (int Res
= cmpMem(FnL
->getGC(), FnR
->getGC()))
807 if (int Res
= cmpNumbers(FnL
->hasSection(), FnR
->hasSection()))
810 if (FnL
->hasSection()) {
811 if (int Res
= cmpMem(FnL
->getSection(), FnR
->getSection()))
815 if (int Res
= cmpNumbers(FnL
->isVarArg(), FnR
->isVarArg()))
818 // TODO: if it's internal and only used in direct calls, we could handle this
820 if (int Res
= cmpNumbers(FnL
->getCallingConv(), FnR
->getCallingConv()))
823 if (int Res
= cmpTypes(FnL
->getFunctionType(), FnR
->getFunctionType()))
826 assert(FnL
->arg_size() == FnR
->arg_size() &&
827 "Identically typed functions have different numbers of args!");
829 // Visit the arguments so that they get enumerated in the order they're
831 for (Function::const_arg_iterator ArgLI
= FnL
->arg_begin(),
832 ArgRI
= FnR
->arg_begin(),
833 ArgLE
= FnL
->arg_end();
834 ArgLI
!= ArgLE
; ++ArgLI
, ++ArgRI
) {
835 if (cmpValues(&*ArgLI
, &*ArgRI
) != 0)
836 llvm_unreachable("Arguments repeat!");
841 // Test whether the two functions have equivalent behaviour.
842 int FunctionComparator::compare() {
845 if (int Res
= compareSignature())
848 // We do a CFG-ordered walk since the actual ordering of the blocks in the
849 // linked list is immaterial. Our walk starts at the entry block for both
850 // functions, then takes each block from each terminator in order. As an
851 // artifact, this also means that unreachable blocks are ignored.
852 SmallVector
<const BasicBlock
*, 8> FnLBBs
, FnRBBs
;
853 SmallPtrSet
<const BasicBlock
*, 32> VisitedBBs
; // in terms of F1.
855 FnLBBs
.push_back(&FnL
->getEntryBlock());
856 FnRBBs
.push_back(&FnR
->getEntryBlock());
858 VisitedBBs
.insert(FnLBBs
[0]);
859 while (!FnLBBs
.empty()) {
860 const BasicBlock
*BBL
= FnLBBs
.pop_back_val();
861 const BasicBlock
*BBR
= FnRBBs
.pop_back_val();
863 if (int Res
= cmpValues(BBL
, BBR
))
866 if (int Res
= cmpBasicBlocks(BBL
, BBR
))
869 const Instruction
*TermL
= BBL
->getTerminator();
870 const Instruction
*TermR
= BBR
->getTerminator();
872 assert(TermL
->getNumSuccessors() == TermR
->getNumSuccessors());
873 for (unsigned i
= 0, e
= TermL
->getNumSuccessors(); i
!= e
; ++i
) {
874 if (!VisitedBBs
.insert(TermL
->getSuccessor(i
)).second
)
877 FnLBBs
.push_back(TermL
->getSuccessor(i
));
878 FnRBBs
.push_back(TermR
->getSuccessor(i
));
886 // Accumulate the hash of a sequence of 64-bit integers. This is similar to a
887 // hash of a sequence of 64bit ints, but the entire input does not need to be
888 // available at once. This interface is necessary for functionHash because it
889 // needs to accumulate the hash as the structure of the function is traversed
890 // without saving these values to an intermediate buffer. This form of hashing
891 // is not often needed, as usually the object to hash is just read from a
893 class HashAccumulator64
{
897 // Initialize to random constant, so the state isn't zero.
898 HashAccumulator64() { Hash
= 0x6acaa36bef8325c5ULL
; }
900 void add(uint64_t V
) {
901 Hash
= hashing::detail::hash_16_bytes(Hash
, V
);
904 // No finishing is required, because the entire hash value is used.
905 uint64_t getHash() { return Hash
; }
908 } // end anonymous namespace
910 // A function hash is calculated by considering only the number of arguments and
911 // whether a function is varargs, the order of basic blocks (given by the
912 // successors of each basic block in depth first order), and the order of
913 // opcodes of each instruction within each of these basic blocks. This mirrors
914 // the strategy compare() uses to compare functions by walking the BBs in depth
915 // first order and comparing each instruction in sequence. Because this hash
916 // does not look at the operands, it is insensitive to things such as the
917 // target of calls and the constants used in the function, which makes it useful
918 // when possibly merging functions which are the same modulo constants and call
920 FunctionComparator::FunctionHash
FunctionComparator::functionHash(Function
&F
) {
925 SmallVector
<const BasicBlock
*, 8> BBs
;
926 SmallPtrSet
<const BasicBlock
*, 16> VisitedBBs
;
928 // Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(),
929 // accumulating the hash of the function "structure." (BB and opcode sequence)
930 BBs
.push_back(&F
.getEntryBlock());
931 VisitedBBs
.insert(BBs
[0]);
932 while (!BBs
.empty()) {
933 const BasicBlock
*BB
= BBs
.pop_back_val();
934 // This random value acts as a block header, as otherwise the partition of
935 // opcodes into BBs wouldn't affect the hash, only the order of the opcodes
937 for (auto &Inst
: *BB
) {
938 H
.add(Inst
.getOpcode());
940 const Instruction
*Term
= BB
->getTerminator();
941 for (unsigned i
= 0, e
= Term
->getNumSuccessors(); i
!= e
; ++i
) {
942 if (!VisitedBBs
.insert(Term
->getSuccessor(i
)).second
)
944 BBs
.push_back(Term
->getSuccessor(i
));