1 //===- FunctionComparator.h - Function Comparator -------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the FunctionComparator and GlobalNumberState classes
11 // which are used by the MergeFunctions pass for comparing functions.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/Transforms/Utils/FunctionComparator.h"
16 #include "llvm/ADT/SmallSet.h"
17 #include "llvm/IR/CallSite.h"
18 #include "llvm/IR/InlineAsm.h"
19 #include "llvm/IR/Instructions.h"
20 #include "llvm/IR/Module.h"
21 #include "llvm/Support/Debug.h"
22 #include "llvm/Support/raw_ostream.h"
26 #define DEBUG_TYPE "functioncomparator"
28 int FunctionComparator::cmpNumbers(uint64_t L
, uint64_t R
) const {
34 int FunctionComparator::cmpOrderings(AtomicOrdering L
, AtomicOrdering R
) const {
35 if ((int)L
< (int)R
) return -1;
36 if ((int)L
> (int)R
) return 1;
40 int FunctionComparator::cmpAPInts(const APInt
&L
, const APInt
&R
) const {
41 if (int Res
= cmpNumbers(L
.getBitWidth(), R
.getBitWidth()))
43 if (L
.ugt(R
)) return 1;
44 if (R
.ugt(L
)) return -1;
48 int FunctionComparator::cmpAPFloats(const APFloat
&L
, const APFloat
&R
) const {
49 // Floats are ordered first by semantics (i.e. float, double, half, etc.),
50 // then by value interpreted as a bitstring (aka APInt).
51 const fltSemantics
&SL
= L
.getSemantics(), &SR
= R
.getSemantics();
52 if (int Res
= cmpNumbers(APFloat::semanticsPrecision(SL
),
53 APFloat::semanticsPrecision(SR
)))
55 if (int Res
= cmpNumbers(APFloat::semanticsMaxExponent(SL
),
56 APFloat::semanticsMaxExponent(SR
)))
58 if (int Res
= cmpNumbers(APFloat::semanticsMinExponent(SL
),
59 APFloat::semanticsMinExponent(SR
)))
61 if (int Res
= cmpNumbers(APFloat::semanticsSizeInBits(SL
),
62 APFloat::semanticsSizeInBits(SR
)))
64 return cmpAPInts(L
.bitcastToAPInt(), R
.bitcastToAPInt());
67 int FunctionComparator::cmpMem(StringRef L
, StringRef R
) const {
68 // Prevent heavy comparison, compare sizes first.
69 if (int Res
= cmpNumbers(L
.size(), R
.size()))
72 // Compare strings lexicographically only when it is necessary: only when
73 // strings are equal in size.
77 int FunctionComparator::cmpAttrs(const AttributeList L
,
78 const AttributeList R
) const {
79 if (int Res
= cmpNumbers(L
.getNumAttrSets(), R
.getNumAttrSets()))
82 for (unsigned i
= L
.index_begin(), e
= L
.index_end(); i
!= e
; ++i
) {
83 AttributeSet LAS
= L
.getAttributes(i
);
84 AttributeSet RAS
= R
.getAttributes(i
);
85 AttributeSet::iterator LI
= LAS
.begin(), LE
= LAS
.end();
86 AttributeSet::iterator RI
= RAS
.begin(), RE
= RAS
.end();
87 for (; LI
!= LE
&& RI
!= RE
; ++LI
, ++RI
) {
103 int FunctionComparator::cmpRangeMetadata(const MDNode
*L
,
104 const MDNode
*R
) const {
111 // Range metadata is a sequence of numbers. Make sure they are the same
113 // TODO: Note that as this is metadata, it is possible to drop and/or merge
114 // this data when considering functions to merge. Thus this comparison would
115 // return 0 (i.e. equivalent), but merging would become more complicated
116 // because the ranges would need to be unioned. It is not likely that
117 // functions differ ONLY in this metadata if they are actually the same
118 // function semantically.
119 if (int Res
= cmpNumbers(L
->getNumOperands(), R
->getNumOperands()))
121 for (size_t I
= 0; I
< L
->getNumOperands(); ++I
) {
122 ConstantInt
*LLow
= mdconst::extract
<ConstantInt
>(L
->getOperand(I
));
123 ConstantInt
*RLow
= mdconst::extract
<ConstantInt
>(R
->getOperand(I
));
124 if (int Res
= cmpAPInts(LLow
->getValue(), RLow
->getValue()))
130 int FunctionComparator::cmpOperandBundlesSchema(const Instruction
*L
,
131 const Instruction
*R
) const {
132 ImmutableCallSite
LCS(L
);
133 ImmutableCallSite
RCS(R
);
135 assert(LCS
&& RCS
&& "Must be calls or invokes!");
136 assert(LCS
.isCall() == RCS
.isCall() && "Can't compare otherwise!");
139 cmpNumbers(LCS
.getNumOperandBundles(), RCS
.getNumOperandBundles()))
142 for (unsigned i
= 0, e
= LCS
.getNumOperandBundles(); i
!= e
; ++i
) {
143 auto OBL
= LCS
.getOperandBundleAt(i
);
144 auto OBR
= RCS
.getOperandBundleAt(i
);
146 if (int Res
= OBL
.getTagName().compare(OBR
.getTagName()))
149 if (int Res
= cmpNumbers(OBL
.Inputs
.size(), OBR
.Inputs
.size()))
156 /// Constants comparison:
157 /// 1. Check whether type of L constant could be losslessly bitcasted to R
159 /// 2. Compare constant contents.
160 /// For more details see declaration comments.
161 int FunctionComparator::cmpConstants(const Constant
*L
,
162 const Constant
*R
) const {
164 Type
*TyL
= L
->getType();
165 Type
*TyR
= R
->getType();
167 // Check whether types are bitcastable. This part is just re-factored
168 // Type::canLosslesslyBitCastTo method, but instead of returning true/false,
169 // we also pack into result which type is "less" for us.
170 int TypesRes
= cmpTypes(TyL
, TyR
);
172 // Types are different, but check whether we can bitcast them.
173 if (!TyL
->isFirstClassType()) {
174 if (TyR
->isFirstClassType())
176 // Neither TyL nor TyR are values of first class type. Return the result
177 // of comparing the types
180 if (!TyR
->isFirstClassType()) {
181 if (TyL
->isFirstClassType())
186 // Vector -> Vector conversions are always lossless if the two vector types
187 // have the same size, otherwise not.
188 unsigned TyLWidth
= 0;
189 unsigned TyRWidth
= 0;
191 if (auto *VecTyL
= dyn_cast
<VectorType
>(TyL
))
192 TyLWidth
= VecTyL
->getBitWidth();
193 if (auto *VecTyR
= dyn_cast
<VectorType
>(TyR
))
194 TyRWidth
= VecTyR
->getBitWidth();
196 if (TyLWidth
!= TyRWidth
)
197 return cmpNumbers(TyLWidth
, TyRWidth
);
199 // Zero bit-width means neither TyL nor TyR are vectors.
201 PointerType
*PTyL
= dyn_cast
<PointerType
>(TyL
);
202 PointerType
*PTyR
= dyn_cast
<PointerType
>(TyR
);
204 unsigned AddrSpaceL
= PTyL
->getAddressSpace();
205 unsigned AddrSpaceR
= PTyR
->getAddressSpace();
206 if (int Res
= cmpNumbers(AddrSpaceL
, AddrSpaceR
))
214 // TyL and TyR aren't vectors, nor pointers. We don't know how to
220 // OK, types are bitcastable, now check constant contents.
222 if (L
->isNullValue() && R
->isNullValue())
224 if (L
->isNullValue() && !R
->isNullValue())
226 if (!L
->isNullValue() && R
->isNullValue())
229 auto GlobalValueL
= const_cast<GlobalValue
*>(dyn_cast
<GlobalValue
>(L
));
230 auto GlobalValueR
= const_cast<GlobalValue
*>(dyn_cast
<GlobalValue
>(R
));
231 if (GlobalValueL
&& GlobalValueR
) {
232 return cmpGlobalValues(GlobalValueL
, GlobalValueR
);
235 if (int Res
= cmpNumbers(L
->getValueID(), R
->getValueID()))
238 if (const auto *SeqL
= dyn_cast
<ConstantDataSequential
>(L
)) {
239 const auto *SeqR
= cast
<ConstantDataSequential
>(R
);
240 // This handles ConstantDataArray and ConstantDataVector. Note that we
241 // compare the two raw data arrays, which might differ depending on the host
242 // endianness. This isn't a problem though, because the endiness of a module
243 // will affect the order of the constants, but this order is the same
244 // for a given input module and host platform.
245 return cmpMem(SeqL
->getRawDataValues(), SeqR
->getRawDataValues());
248 switch (L
->getValueID()) {
249 case Value::UndefValueVal
:
250 case Value::ConstantTokenNoneVal
:
252 case Value::ConstantIntVal
: {
253 const APInt
&LInt
= cast
<ConstantInt
>(L
)->getValue();
254 const APInt
&RInt
= cast
<ConstantInt
>(R
)->getValue();
255 return cmpAPInts(LInt
, RInt
);
257 case Value::ConstantFPVal
: {
258 const APFloat
&LAPF
= cast
<ConstantFP
>(L
)->getValueAPF();
259 const APFloat
&RAPF
= cast
<ConstantFP
>(R
)->getValueAPF();
260 return cmpAPFloats(LAPF
, RAPF
);
262 case Value::ConstantArrayVal
: {
263 const ConstantArray
*LA
= cast
<ConstantArray
>(L
);
264 const ConstantArray
*RA
= cast
<ConstantArray
>(R
);
265 uint64_t NumElementsL
= cast
<ArrayType
>(TyL
)->getNumElements();
266 uint64_t NumElementsR
= cast
<ArrayType
>(TyR
)->getNumElements();
267 if (int Res
= cmpNumbers(NumElementsL
, NumElementsR
))
269 for (uint64_t i
= 0; i
< NumElementsL
; ++i
) {
270 if (int Res
= cmpConstants(cast
<Constant
>(LA
->getOperand(i
)),
271 cast
<Constant
>(RA
->getOperand(i
))))
276 case Value::ConstantStructVal
: {
277 const ConstantStruct
*LS
= cast
<ConstantStruct
>(L
);
278 const ConstantStruct
*RS
= cast
<ConstantStruct
>(R
);
279 unsigned NumElementsL
= cast
<StructType
>(TyL
)->getNumElements();
280 unsigned NumElementsR
= cast
<StructType
>(TyR
)->getNumElements();
281 if (int Res
= cmpNumbers(NumElementsL
, NumElementsR
))
283 for (unsigned i
= 0; i
!= NumElementsL
; ++i
) {
284 if (int Res
= cmpConstants(cast
<Constant
>(LS
->getOperand(i
)),
285 cast
<Constant
>(RS
->getOperand(i
))))
290 case Value::ConstantVectorVal
: {
291 const ConstantVector
*LV
= cast
<ConstantVector
>(L
);
292 const ConstantVector
*RV
= cast
<ConstantVector
>(R
);
293 unsigned NumElementsL
= cast
<VectorType
>(TyL
)->getNumElements();
294 unsigned NumElementsR
= cast
<VectorType
>(TyR
)->getNumElements();
295 if (int Res
= cmpNumbers(NumElementsL
, NumElementsR
))
297 for (uint64_t i
= 0; i
< NumElementsL
; ++i
) {
298 if (int Res
= cmpConstants(cast
<Constant
>(LV
->getOperand(i
)),
299 cast
<Constant
>(RV
->getOperand(i
))))
304 case Value::ConstantExprVal
: {
305 const ConstantExpr
*LE
= cast
<ConstantExpr
>(L
);
306 const ConstantExpr
*RE
= cast
<ConstantExpr
>(R
);
307 unsigned NumOperandsL
= LE
->getNumOperands();
308 unsigned NumOperandsR
= RE
->getNumOperands();
309 if (int Res
= cmpNumbers(NumOperandsL
, NumOperandsR
))
311 for (unsigned i
= 0; i
< NumOperandsL
; ++i
) {
312 if (int Res
= cmpConstants(cast
<Constant
>(LE
->getOperand(i
)),
313 cast
<Constant
>(RE
->getOperand(i
))))
318 case Value::BlockAddressVal
: {
319 const BlockAddress
*LBA
= cast
<BlockAddress
>(L
);
320 const BlockAddress
*RBA
= cast
<BlockAddress
>(R
);
321 if (int Res
= cmpValues(LBA
->getFunction(), RBA
->getFunction()))
323 if (LBA
->getFunction() == RBA
->getFunction()) {
324 // They are BBs in the same function. Order by which comes first in the
325 // BB order of the function. This order is deterministic.
326 Function
* F
= LBA
->getFunction();
327 BasicBlock
*LBB
= LBA
->getBasicBlock();
328 BasicBlock
*RBB
= RBA
->getBasicBlock();
331 for(BasicBlock
&BB
: F
->getBasicBlockList()) {
339 llvm_unreachable("Basic Block Address does not point to a basic block in "
343 // cmpValues said the functions are the same. So because they aren't
344 // literally the same pointer, they must respectively be the left and
346 assert(LBA
->getFunction() == FnL
&& RBA
->getFunction() == FnR
);
347 // cmpValues will tell us if these are equivalent BasicBlocks, in the
348 // context of their respective functions.
349 return cmpValues(LBA
->getBasicBlock(), RBA
->getBasicBlock());
352 default: // Unknown constant, abort.
353 DEBUG(dbgs() << "Looking at valueID " << L
->getValueID() << "\n");
354 llvm_unreachable("Constant ValueID not recognized.");
359 int FunctionComparator::cmpGlobalValues(GlobalValue
*L
, GlobalValue
*R
) const {
360 uint64_t LNumber
= GlobalNumbers
->getNumber(L
);
361 uint64_t RNumber
= GlobalNumbers
->getNumber(R
);
362 return cmpNumbers(LNumber
, RNumber
);
365 /// cmpType - compares two types,
366 /// defines total ordering among the types set.
367 /// See method declaration comments for more details.
368 int FunctionComparator::cmpTypes(Type
*TyL
, Type
*TyR
) const {
369 PointerType
*PTyL
= dyn_cast
<PointerType
>(TyL
);
370 PointerType
*PTyR
= dyn_cast
<PointerType
>(TyR
);
372 const DataLayout
&DL
= FnL
->getParent()->getDataLayout();
373 if (PTyL
&& PTyL
->getAddressSpace() == 0)
374 TyL
= DL
.getIntPtrType(TyL
);
375 if (PTyR
&& PTyR
->getAddressSpace() == 0)
376 TyR
= DL
.getIntPtrType(TyR
);
381 if (int Res
= cmpNumbers(TyL
->getTypeID(), TyR
->getTypeID()))
384 switch (TyL
->getTypeID()) {
386 llvm_unreachable("Unknown type!");
387 // Fall through in Release mode.
389 case Type::IntegerTyID
:
390 return cmpNumbers(cast
<IntegerType
>(TyL
)->getBitWidth(),
391 cast
<IntegerType
>(TyR
)->getBitWidth());
392 // TyL == TyR would have returned true earlier, because types are uniqued.
394 case Type::FloatTyID
:
395 case Type::DoubleTyID
:
396 case Type::X86_FP80TyID
:
397 case Type::FP128TyID
:
398 case Type::PPC_FP128TyID
:
399 case Type::LabelTyID
:
400 case Type::MetadataTyID
:
401 case Type::TokenTyID
:
404 case Type::PointerTyID
: {
405 assert(PTyL
&& PTyR
&& "Both types must be pointers here.");
406 return cmpNumbers(PTyL
->getAddressSpace(), PTyR
->getAddressSpace());
409 case Type::StructTyID
: {
410 StructType
*STyL
= cast
<StructType
>(TyL
);
411 StructType
*STyR
= cast
<StructType
>(TyR
);
412 if (STyL
->getNumElements() != STyR
->getNumElements())
413 return cmpNumbers(STyL
->getNumElements(), STyR
->getNumElements());
415 if (STyL
->isPacked() != STyR
->isPacked())
416 return cmpNumbers(STyL
->isPacked(), STyR
->isPacked());
418 for (unsigned i
= 0, e
= STyL
->getNumElements(); i
!= e
; ++i
) {
419 if (int Res
= cmpTypes(STyL
->getElementType(i
), STyR
->getElementType(i
)))
425 case Type::FunctionTyID
: {
426 FunctionType
*FTyL
= cast
<FunctionType
>(TyL
);
427 FunctionType
*FTyR
= cast
<FunctionType
>(TyR
);
428 if (FTyL
->getNumParams() != FTyR
->getNumParams())
429 return cmpNumbers(FTyL
->getNumParams(), FTyR
->getNumParams());
431 if (FTyL
->isVarArg() != FTyR
->isVarArg())
432 return cmpNumbers(FTyL
->isVarArg(), FTyR
->isVarArg());
434 if (int Res
= cmpTypes(FTyL
->getReturnType(), FTyR
->getReturnType()))
437 for (unsigned i
= 0, e
= FTyL
->getNumParams(); i
!= e
; ++i
) {
438 if (int Res
= cmpTypes(FTyL
->getParamType(i
), FTyR
->getParamType(i
)))
444 case Type::ArrayTyID
:
445 case Type::VectorTyID
: {
446 auto *STyL
= cast
<SequentialType
>(TyL
);
447 auto *STyR
= cast
<SequentialType
>(TyR
);
448 if (STyL
->getNumElements() != STyR
->getNumElements())
449 return cmpNumbers(STyL
->getNumElements(), STyR
->getNumElements());
450 return cmpTypes(STyL
->getElementType(), STyR
->getElementType());
455 // Determine whether the two operations are the same except that pointer-to-A
456 // and pointer-to-B are equivalent. This should be kept in sync with
457 // Instruction::isSameOperationAs.
458 // Read method declaration comments for more details.
459 int FunctionComparator::cmpOperations(const Instruction
*L
,
460 const Instruction
*R
,
461 bool &needToCmpOperands
) const {
462 needToCmpOperands
= true;
463 if (int Res
= cmpValues(L
, R
))
466 // Differences from Instruction::isSameOperationAs:
467 // * replace type comparison with calls to cmpTypes.
468 // * we test for I->getRawSubclassOptionalData (nuw/nsw/tail) at the top.
469 // * because of the above, we don't test for the tail bit on calls later on.
470 if (int Res
= cmpNumbers(L
->getOpcode(), R
->getOpcode()))
473 if (const GetElementPtrInst
*GEPL
= dyn_cast
<GetElementPtrInst
>(L
)) {
474 needToCmpOperands
= false;
475 const GetElementPtrInst
*GEPR
= cast
<GetElementPtrInst
>(R
);
477 cmpValues(GEPL
->getPointerOperand(), GEPR
->getPointerOperand()))
479 return cmpGEPs(GEPL
, GEPR
);
482 if (int Res
= cmpNumbers(L
->getNumOperands(), R
->getNumOperands()))
485 if (int Res
= cmpTypes(L
->getType(), R
->getType()))
488 if (int Res
= cmpNumbers(L
->getRawSubclassOptionalData(),
489 R
->getRawSubclassOptionalData()))
492 // We have two instructions of identical opcode and #operands. Check to see
493 // if all operands are the same type
494 for (unsigned i
= 0, e
= L
->getNumOperands(); i
!= e
; ++i
) {
496 cmpTypes(L
->getOperand(i
)->getType(), R
->getOperand(i
)->getType()))
500 // Check special state that is a part of some instructions.
501 if (const AllocaInst
*AI
= dyn_cast
<AllocaInst
>(L
)) {
502 if (int Res
= cmpTypes(AI
->getAllocatedType(),
503 cast
<AllocaInst
>(R
)->getAllocatedType()))
505 return cmpNumbers(AI
->getAlignment(), cast
<AllocaInst
>(R
)->getAlignment());
507 if (const LoadInst
*LI
= dyn_cast
<LoadInst
>(L
)) {
508 if (int Res
= cmpNumbers(LI
->isVolatile(), cast
<LoadInst
>(R
)->isVolatile()))
511 cmpNumbers(LI
->getAlignment(), cast
<LoadInst
>(R
)->getAlignment()))
514 cmpOrderings(LI
->getOrdering(), cast
<LoadInst
>(R
)->getOrdering()))
516 if (int Res
= cmpNumbers(LI
->getSyncScopeID(),
517 cast
<LoadInst
>(R
)->getSyncScopeID()))
519 return cmpRangeMetadata(LI
->getMetadata(LLVMContext::MD_range
),
520 cast
<LoadInst
>(R
)->getMetadata(LLVMContext::MD_range
));
522 if (const StoreInst
*SI
= dyn_cast
<StoreInst
>(L
)) {
524 cmpNumbers(SI
->isVolatile(), cast
<StoreInst
>(R
)->isVolatile()))
527 cmpNumbers(SI
->getAlignment(), cast
<StoreInst
>(R
)->getAlignment()))
530 cmpOrderings(SI
->getOrdering(), cast
<StoreInst
>(R
)->getOrdering()))
532 return cmpNumbers(SI
->getSyncScopeID(),
533 cast
<StoreInst
>(R
)->getSyncScopeID());
535 if (const CmpInst
*CI
= dyn_cast
<CmpInst
>(L
))
536 return cmpNumbers(CI
->getPredicate(), cast
<CmpInst
>(R
)->getPredicate());
537 if (const CallInst
*CI
= dyn_cast
<CallInst
>(L
)) {
538 if (int Res
= cmpNumbers(CI
->getCallingConv(),
539 cast
<CallInst
>(R
)->getCallingConv()))
542 cmpAttrs(CI
->getAttributes(), cast
<CallInst
>(R
)->getAttributes()))
544 if (int Res
= cmpOperandBundlesSchema(CI
, R
))
546 return cmpRangeMetadata(
547 CI
->getMetadata(LLVMContext::MD_range
),
548 cast
<CallInst
>(R
)->getMetadata(LLVMContext::MD_range
));
550 if (const InvokeInst
*II
= dyn_cast
<InvokeInst
>(L
)) {
551 if (int Res
= cmpNumbers(II
->getCallingConv(),
552 cast
<InvokeInst
>(R
)->getCallingConv()))
555 cmpAttrs(II
->getAttributes(), cast
<InvokeInst
>(R
)->getAttributes()))
557 if (int Res
= cmpOperandBundlesSchema(II
, R
))
559 return cmpRangeMetadata(
560 II
->getMetadata(LLVMContext::MD_range
),
561 cast
<InvokeInst
>(R
)->getMetadata(LLVMContext::MD_range
));
563 if (const InsertValueInst
*IVI
= dyn_cast
<InsertValueInst
>(L
)) {
564 ArrayRef
<unsigned> LIndices
= IVI
->getIndices();
565 ArrayRef
<unsigned> RIndices
= cast
<InsertValueInst
>(R
)->getIndices();
566 if (int Res
= cmpNumbers(LIndices
.size(), RIndices
.size()))
568 for (size_t i
= 0, e
= LIndices
.size(); i
!= e
; ++i
) {
569 if (int Res
= cmpNumbers(LIndices
[i
], RIndices
[i
]))
574 if (const ExtractValueInst
*EVI
= dyn_cast
<ExtractValueInst
>(L
)) {
575 ArrayRef
<unsigned> LIndices
= EVI
->getIndices();
576 ArrayRef
<unsigned> RIndices
= cast
<ExtractValueInst
>(R
)->getIndices();
577 if (int Res
= cmpNumbers(LIndices
.size(), RIndices
.size()))
579 for (size_t i
= 0, e
= LIndices
.size(); i
!= e
; ++i
) {
580 if (int Res
= cmpNumbers(LIndices
[i
], RIndices
[i
]))
584 if (const FenceInst
*FI
= dyn_cast
<FenceInst
>(L
)) {
586 cmpOrderings(FI
->getOrdering(), cast
<FenceInst
>(R
)->getOrdering()))
588 return cmpNumbers(FI
->getSyncScopeID(),
589 cast
<FenceInst
>(R
)->getSyncScopeID());
591 if (const AtomicCmpXchgInst
*CXI
= dyn_cast
<AtomicCmpXchgInst
>(L
)) {
592 if (int Res
= cmpNumbers(CXI
->isVolatile(),
593 cast
<AtomicCmpXchgInst
>(R
)->isVolatile()))
595 if (int Res
= cmpNumbers(CXI
->isWeak(),
596 cast
<AtomicCmpXchgInst
>(R
)->isWeak()))
599 cmpOrderings(CXI
->getSuccessOrdering(),
600 cast
<AtomicCmpXchgInst
>(R
)->getSuccessOrdering()))
603 cmpOrderings(CXI
->getFailureOrdering(),
604 cast
<AtomicCmpXchgInst
>(R
)->getFailureOrdering()))
606 return cmpNumbers(CXI
->getSyncScopeID(),
607 cast
<AtomicCmpXchgInst
>(R
)->getSyncScopeID());
609 if (const AtomicRMWInst
*RMWI
= dyn_cast
<AtomicRMWInst
>(L
)) {
610 if (int Res
= cmpNumbers(RMWI
->getOperation(),
611 cast
<AtomicRMWInst
>(R
)->getOperation()))
613 if (int Res
= cmpNumbers(RMWI
->isVolatile(),
614 cast
<AtomicRMWInst
>(R
)->isVolatile()))
616 if (int Res
= cmpOrderings(RMWI
->getOrdering(),
617 cast
<AtomicRMWInst
>(R
)->getOrdering()))
619 return cmpNumbers(RMWI
->getSyncScopeID(),
620 cast
<AtomicRMWInst
>(R
)->getSyncScopeID());
622 if (const PHINode
*PNL
= dyn_cast
<PHINode
>(L
)) {
623 const PHINode
*PNR
= cast
<PHINode
>(R
);
624 // Ensure that in addition to the incoming values being identical
625 // (checked by the caller of this function), the incoming blocks
626 // are also identical.
627 for (unsigned i
= 0, e
= PNL
->getNumIncomingValues(); i
!= e
; ++i
) {
629 cmpValues(PNL
->getIncomingBlock(i
), PNR
->getIncomingBlock(i
)))
636 // Determine whether two GEP operations perform the same underlying arithmetic.
637 // Read method declaration comments for more details.
638 int FunctionComparator::cmpGEPs(const GEPOperator
*GEPL
,
639 const GEPOperator
*GEPR
) const {
641 unsigned int ASL
= GEPL
->getPointerAddressSpace();
642 unsigned int ASR
= GEPR
->getPointerAddressSpace();
644 if (int Res
= cmpNumbers(ASL
, ASR
))
647 // When we have target data, we can reduce the GEP down to the value in bytes
648 // added to the address.
649 const DataLayout
&DL
= FnL
->getParent()->getDataLayout();
650 unsigned BitWidth
= DL
.getPointerSizeInBits(ASL
);
651 APInt
OffsetL(BitWidth
, 0), OffsetR(BitWidth
, 0);
652 if (GEPL
->accumulateConstantOffset(DL
, OffsetL
) &&
653 GEPR
->accumulateConstantOffset(DL
, OffsetR
))
654 return cmpAPInts(OffsetL
, OffsetR
);
655 if (int Res
= cmpTypes(GEPL
->getSourceElementType(),
656 GEPR
->getSourceElementType()))
659 if (int Res
= cmpNumbers(GEPL
->getNumOperands(), GEPR
->getNumOperands()))
662 for (unsigned i
= 0, e
= GEPL
->getNumOperands(); i
!= e
; ++i
) {
663 if (int Res
= cmpValues(GEPL
->getOperand(i
), GEPR
->getOperand(i
)))
670 int FunctionComparator::cmpInlineAsm(const InlineAsm
*L
,
671 const InlineAsm
*R
) const {
672 // InlineAsm's are uniqued. If they are the same pointer, obviously they are
673 // the same, otherwise compare the fields.
676 if (int Res
= cmpTypes(L
->getFunctionType(), R
->getFunctionType()))
678 if (int Res
= cmpMem(L
->getAsmString(), R
->getAsmString()))
680 if (int Res
= cmpMem(L
->getConstraintString(), R
->getConstraintString()))
682 if (int Res
= cmpNumbers(L
->hasSideEffects(), R
->hasSideEffects()))
684 if (int Res
= cmpNumbers(L
->isAlignStack(), R
->isAlignStack()))
686 if (int Res
= cmpNumbers(L
->getDialect(), R
->getDialect()))
688 llvm_unreachable("InlineAsm blocks were not uniqued.");
692 /// Compare two values used by the two functions under pair-wise comparison. If
693 /// this is the first time the values are seen, they're added to the mapping so
694 /// that we will detect mismatches on next use.
695 /// See comments in declaration for more details.
696 int FunctionComparator::cmpValues(const Value
*L
, const Value
*R
) const {
697 // Catch self-reference case.
709 const Constant
*ConstL
= dyn_cast
<Constant
>(L
);
710 const Constant
*ConstR
= dyn_cast
<Constant
>(R
);
711 if (ConstL
&& ConstR
) {
714 return cmpConstants(ConstL
, ConstR
);
722 const InlineAsm
*InlineAsmL
= dyn_cast
<InlineAsm
>(L
);
723 const InlineAsm
*InlineAsmR
= dyn_cast
<InlineAsm
>(R
);
725 if (InlineAsmL
&& InlineAsmR
)
726 return cmpInlineAsm(InlineAsmL
, InlineAsmR
);
732 auto LeftSN
= sn_mapL
.insert(std::make_pair(L
, sn_mapL
.size())),
733 RightSN
= sn_mapR
.insert(std::make_pair(R
, sn_mapR
.size()));
735 return cmpNumbers(LeftSN
.first
->second
, RightSN
.first
->second
);
738 // Test whether two basic blocks have equivalent behaviour.
739 int FunctionComparator::cmpBasicBlocks(const BasicBlock
*BBL
,
740 const BasicBlock
*BBR
) const {
741 BasicBlock::const_iterator InstL
= BBL
->begin(), InstLE
= BBL
->end();
742 BasicBlock::const_iterator InstR
= BBR
->begin(), InstRE
= BBR
->end();
745 bool needToCmpOperands
= true;
746 if (int Res
= cmpOperations(&*InstL
, &*InstR
, needToCmpOperands
))
748 if (needToCmpOperands
) {
749 assert(InstL
->getNumOperands() == InstR
->getNumOperands());
751 for (unsigned i
= 0, e
= InstL
->getNumOperands(); i
!= e
; ++i
) {
752 Value
*OpL
= InstL
->getOperand(i
);
753 Value
*OpR
= InstR
->getOperand(i
);
754 if (int Res
= cmpValues(OpL
, OpR
))
756 // cmpValues should ensure this is true.
757 assert(cmpTypes(OpL
->getType(), OpR
->getType()) == 0);
763 } while (InstL
!= InstLE
&& InstR
!= InstRE
);
765 if (InstL
!= InstLE
&& InstR
== InstRE
)
767 if (InstL
== InstLE
&& InstR
!= InstRE
)
772 int FunctionComparator::compareSignature() const {
773 if (int Res
= cmpAttrs(FnL
->getAttributes(), FnR
->getAttributes()))
776 if (int Res
= cmpNumbers(FnL
->hasGC(), FnR
->hasGC()))
780 if (int Res
= cmpMem(FnL
->getGC(), FnR
->getGC()))
784 if (int Res
= cmpNumbers(FnL
->hasSection(), FnR
->hasSection()))
787 if (FnL
->hasSection()) {
788 if (int Res
= cmpMem(FnL
->getSection(), FnR
->getSection()))
792 if (int Res
= cmpNumbers(FnL
->isVarArg(), FnR
->isVarArg()))
795 // TODO: if it's internal and only used in direct calls, we could handle this
797 if (int Res
= cmpNumbers(FnL
->getCallingConv(), FnR
->getCallingConv()))
800 if (int Res
= cmpTypes(FnL
->getFunctionType(), FnR
->getFunctionType()))
803 assert(FnL
->arg_size() == FnR
->arg_size() &&
804 "Identically typed functions have different numbers of args!");
806 // Visit the arguments so that they get enumerated in the order they're
808 for (Function::const_arg_iterator ArgLI
= FnL
->arg_begin(),
809 ArgRI
= FnR
->arg_begin(),
810 ArgLE
= FnL
->arg_end();
811 ArgLI
!= ArgLE
; ++ArgLI
, ++ArgRI
) {
812 if (cmpValues(&*ArgLI
, &*ArgRI
) != 0)
813 llvm_unreachable("Arguments repeat!");
818 // Test whether the two functions have equivalent behaviour.
819 int FunctionComparator::compare() {
822 if (int Res
= compareSignature())
825 // We do a CFG-ordered walk since the actual ordering of the blocks in the
826 // linked list is immaterial. Our walk starts at the entry block for both
827 // functions, then takes each block from each terminator in order. As an
828 // artifact, this also means that unreachable blocks are ignored.
829 SmallVector
<const BasicBlock
*, 8> FnLBBs
, FnRBBs
;
830 SmallPtrSet
<const BasicBlock
*, 32> VisitedBBs
; // in terms of F1.
832 FnLBBs
.push_back(&FnL
->getEntryBlock());
833 FnRBBs
.push_back(&FnR
->getEntryBlock());
835 VisitedBBs
.insert(FnLBBs
[0]);
836 while (!FnLBBs
.empty()) {
837 const BasicBlock
*BBL
= FnLBBs
.pop_back_val();
838 const BasicBlock
*BBR
= FnRBBs
.pop_back_val();
840 if (int Res
= cmpValues(BBL
, BBR
))
843 if (int Res
= cmpBasicBlocks(BBL
, BBR
))
846 const TerminatorInst
*TermL
= BBL
->getTerminator();
847 const TerminatorInst
*TermR
= BBR
->getTerminator();
849 assert(TermL
->getNumSuccessors() == TermR
->getNumSuccessors());
850 for (unsigned i
= 0, e
= TermL
->getNumSuccessors(); i
!= e
; ++i
) {
851 if (!VisitedBBs
.insert(TermL
->getSuccessor(i
)).second
)
854 FnLBBs
.push_back(TermL
->getSuccessor(i
));
855 FnRBBs
.push_back(TermR
->getSuccessor(i
));
863 // Accumulate the hash of a sequence of 64-bit integers. This is similar to a
864 // hash of a sequence of 64bit ints, but the entire input does not need to be
865 // available at once. This interface is necessary for functionHash because it
866 // needs to accumulate the hash as the structure of the function is traversed
867 // without saving these values to an intermediate buffer. This form of hashing
868 // is not often needed, as usually the object to hash is just read from a
870 class HashAccumulator64
{
873 // Initialize to random constant, so the state isn't zero.
874 HashAccumulator64() { Hash
= 0x6acaa36bef8325c5ULL
; }
875 void add(uint64_t V
) {
876 Hash
= llvm::hashing::detail::hash_16_bytes(Hash
, V
);
878 // No finishing is required, because the entire hash value is used.
879 uint64_t getHash() { return Hash
; }
881 } // end anonymous namespace
883 // A function hash is calculated by considering only the number of arguments and
884 // whether a function is varargs, the order of basic blocks (given by the
885 // successors of each basic block in depth first order), and the order of
886 // opcodes of each instruction within each of these basic blocks. This mirrors
887 // the strategy compare() uses to compare functions by walking the BBs in depth
888 // first order and comparing each instruction in sequence. Because this hash
889 // does not look at the operands, it is insensitive to things such as the
890 // target of calls and the constants used in the function, which makes it useful
891 // when possibly merging functions which are the same modulo constants and call
893 FunctionComparator::FunctionHash
FunctionComparator::functionHash(Function
&F
) {
898 SmallVector
<const BasicBlock
*, 8> BBs
;
899 SmallSet
<const BasicBlock
*, 16> VisitedBBs
;
901 // Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(),
902 // accumulating the hash of the function "structure." (BB and opcode sequence)
903 BBs
.push_back(&F
.getEntryBlock());
904 VisitedBBs
.insert(BBs
[0]);
905 while (!BBs
.empty()) {
906 const BasicBlock
*BB
= BBs
.pop_back_val();
907 // This random value acts as a block header, as otherwise the partition of
908 // opcodes into BBs wouldn't affect the hash, only the order of the opcodes
910 for (auto &Inst
: *BB
) {
911 H
.add(Inst
.getOpcode());
913 const TerminatorInst
*Term
= BB
->getTerminator();
914 for (unsigned i
= 0, e
= Term
->getNumSuccessors(); i
!= e
; ++i
) {
915 if (!VisitedBBs
.insert(Term
->getSuccessor(i
)).second
)
917 BBs
.push_back(Term
->getSuccessor(i
));