1 //===-- StructuralHash.cpp - IR Hashing -------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/IR/StructuralHash.h"
10 #include "llvm/ADT/Hashing.h"
11 #include "llvm/IR/Function.h"
12 #include "llvm/IR/GlobalVariable.h"
13 #include "llvm/IR/InstrTypes.h"
14 #include "llvm/IR/Instructions.h"
15 #include "llvm/IR/IntrinsicInst.h"
16 #include "llvm/IR/Module.h"
22 // Basic hashing mechanism to detect structural change to the IR, used to verify
23 // pass return status consistency with actual change. In addition to being used
24 // by the MergeFunctions pass.
26 class StructuralHashImpl
{
29 void hash(uint64_t V
) { Hash
= hashing::detail::hash_16_bytes(Hash
, V
); }
31 // This will produce different values on 32-bit and 64-bit systens as
32 // hash_combine returns a size_t. However, this is only used for
33 // detailed hashing which, in-tree, only needs to distinguish between
34 // differences in functions.
35 template <typename T
> void hashArbitaryType(const T
&V
) {
36 hash(hash_combine(V
));
39 void hashType(Type
*ValueType
) {
40 hash(ValueType
->getTypeID());
41 if (ValueType
->isIntegerTy())
42 hash(ValueType
->getIntegerBitWidth());
46 StructuralHashImpl() : Hash(4) {}
48 void updateOperand(Value
*Operand
) {
49 hashType(Operand
->getType());
51 // The cases enumerated below are not exhaustive and are only aimed to
52 // get decent coverage over the function.
53 if (ConstantInt
*ConstInt
= dyn_cast
<ConstantInt
>(Operand
)) {
54 hashArbitaryType(ConstInt
->getValue());
55 } else if (ConstantFP
*ConstFP
= dyn_cast
<ConstantFP
>(Operand
)) {
56 hashArbitaryType(ConstFP
->getValue());
57 } else if (Argument
*Arg
= dyn_cast
<Argument
>(Operand
)) {
58 hash(Arg
->getArgNo());
59 } else if (Function
*Func
= dyn_cast
<Function
>(Operand
)) {
60 // Hashing the name will be deterministic as LLVM's hashing infrastructure
61 // has explicit support for hashing strings and will not simply hash
63 hashArbitaryType(Func
->getName());
67 void updateInstruction(const Instruction
&Inst
, bool DetailedHash
) {
68 hash(Inst
.getOpcode());
73 hashType(Inst
.getType());
75 // Handle additional properties of specific instructions that cause
76 // semantic differences in the IR.
77 if (const auto *ComparisonInstruction
= dyn_cast
<CmpInst
>(&Inst
))
78 hash(ComparisonInstruction
->getPredicate());
80 for (const auto &Op
: Inst
.operands())
84 // A function hash is calculated by considering only the number of arguments
85 // and whether a function is varargs, the order of basic blocks (given by the
86 // successors of each basic block in depth first order), and the order of
87 // opcodes of each instruction within each of these basic blocks. This mirrors
88 // the strategy FunctionComparator::compare() uses to compare functions by
89 // walking the BBs in depth first order and comparing each instruction in
90 // sequence. Because this hash currently does not look at the operands, it is
91 // insensitive to things such as the target of calls and the constants used in
92 // the function, which makes it useful when possibly merging functions which
93 // are the same modulo constants and call targets.
95 // Note that different users of StructuralHash will want different behavior
96 // out of it (i.e., MergeFunctions will want something different from PM
97 // expensive checks for pass modification status). When modifying this
98 // function, most changes should be gated behind an option and enabled
100 void update(const Function
&F
, bool DetailedHash
) {
101 // Declarations don't affect analyses.
102 if (F
.isDeclaration())
105 hash(0x62642d6b6b2d6b72); // Function header
110 SmallVector
<const BasicBlock
*, 8> BBs
;
111 SmallPtrSet
<const BasicBlock
*, 16> VisitedBBs
;
113 // Walk the blocks in the same order as
114 // FunctionComparator::cmpBasicBlocks(), accumulating the hash of the
115 // function "structure." (BB and opcode sequence)
116 BBs
.push_back(&F
.getEntryBlock());
117 VisitedBBs
.insert(BBs
[0]);
118 while (!BBs
.empty()) {
119 const BasicBlock
*BB
= BBs
.pop_back_val();
121 // This random value acts as a block header, as otherwise the partition of
122 // opcodes into BBs wouldn't affect the hash, only the order of the
125 for (auto &Inst
: *BB
)
126 updateInstruction(Inst
, DetailedHash
);
128 const Instruction
*Term
= BB
->getTerminator();
129 for (unsigned i
= 0, e
= Term
->getNumSuccessors(); i
!= e
; ++i
) {
130 if (!VisitedBBs
.insert(Term
->getSuccessor(i
)).second
)
132 BBs
.push_back(Term
->getSuccessor(i
));
137 void update(const GlobalVariable
&GV
) {
138 // Declarations and used/compiler.used don't affect analyses.
139 // Since there are several `llvm.*` metadata, like `llvm.embedded.object`,
140 // we ignore anything with the `.llvm` prefix
141 if (GV
.isDeclaration() || GV
.getName().starts_with("llvm."))
143 hash(23456); // Global header
144 hash(GV
.getValueType()->getTypeID());
147 void update(const Module
&M
, bool DetailedHash
) {
148 for (const GlobalVariable
&GV
: M
.globals())
150 for (const Function
&F
: M
)
151 update(F
, DetailedHash
);
154 uint64_t getHash() const { return Hash
; }
159 IRHash
llvm::StructuralHash(const Function
&F
, bool DetailedHash
) {
160 StructuralHashImpl H
;
161 H
.update(F
, DetailedHash
);
165 IRHash
llvm::StructuralHash(const Module
&M
, bool DetailedHash
) {
166 StructuralHashImpl H
;
167 H
.update(M
, DetailedHash
);