1 //===-- StructuralHash.cpp - IR Hashing -------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/IR/StructuralHash.h"
10 #include "llvm/IR/Function.h"
11 #include "llvm/IR/GlobalVariable.h"
12 #include "llvm/IR/InstrTypes.h"
13 #include "llvm/IR/Instructions.h"
14 #include "llvm/IR/IntrinsicInst.h"
15 #include "llvm/IR/Module.h"
21 // Basic hashing mechanism to detect structural change to the IR, used to verify
22 // pass return status consistency with actual change. In addition to being used
23 // by the MergeFunctions pass.
25 class StructuralHashImpl
{
30 // This random value acts as a block header, as otherwise the partition of
31 // opcodes into BBs wouldn't affect the hash, only the order of the opcodes.
32 static constexpr stable_hash BlockHeaderHash
= 45798;
33 static constexpr stable_hash FunctionHeaderHash
= 0x62642d6b6b2d6b72;
34 static constexpr stable_hash GlobalHeaderHash
= 23456;
36 /// IgnoreOp is a function that returns true if the operand should be ignored.
37 IgnoreOperandFunc IgnoreOp
= nullptr;
38 /// A mapping from instruction indices to instruction pointers.
39 /// The index represents the position of an instruction based on the order in
40 /// which it is first encountered.
41 std::unique_ptr
<IndexInstrMap
> IndexInstruction
= nullptr;
42 /// A mapping from pairs of instruction indices and operand indices
43 /// to the hashes of the operands.
44 std::unique_ptr
<IndexOperandHashMapType
> IndexOperandHashMap
= nullptr;
46 /// Assign a unique ID to each Value in the order they are first seen.
47 DenseMap
<const Value
*, int> ValueToId
;
49 static stable_hash
hashType(Type
*ValueType
) {
50 SmallVector
<stable_hash
> Hashes
;
51 Hashes
.emplace_back(ValueType
->getTypeID());
52 if (ValueType
->isIntegerTy())
53 Hashes
.emplace_back(ValueType
->getIntegerBitWidth());
54 return stable_hash_combine(Hashes
);
58 StructuralHashImpl() = delete;
59 explicit StructuralHashImpl(bool DetailedHash
,
60 IgnoreOperandFunc IgnoreOp
= nullptr)
61 : DetailedHash(DetailedHash
), IgnoreOp(IgnoreOp
) {
63 IndexInstruction
= std::make_unique
<IndexInstrMap
>();
64 IndexOperandHashMap
= std::make_unique
<IndexOperandHashMapType
>();
68 static stable_hash
hashAPInt(const APInt
&I
) {
69 SmallVector
<stable_hash
> Hashes
;
70 Hashes
.emplace_back(I
.getBitWidth());
71 auto RawVals
= ArrayRef
<uint64_t>(I
.getRawData(), I
.getNumWords());
72 Hashes
.append(RawVals
.begin(), RawVals
.end());
73 return stable_hash_combine(Hashes
);
76 static stable_hash
hashAPFloat(const APFloat
&F
) {
77 return hashAPInt(F
.bitcastToAPInt());
80 static stable_hash
hashGlobalVariable(const GlobalVariable
&GVar
) {
81 if (!GVar
.hasInitializer())
82 return hashGlobalValue(&GVar
);
84 // Hash the contents of a string.
85 if (GVar
.getName().starts_with(".str")) {
86 auto *C
= GVar
.getInitializer();
87 if (const auto *Seq
= dyn_cast
<ConstantDataSequential
>(C
))
89 return stable_hash_name(Seq
->getAsString());
92 // Hash structural contents of Objective-C metadata in specific sections.
93 // This can be extended to other metadata if needed.
94 static constexpr const char *SectionNames
[] = {
95 "__cfstring", "__cstring", "__objc_classrefs",
96 "__objc_methname", "__objc_selrefs",
98 if (GVar
.hasSection()) {
99 StringRef SectionName
= GVar
.getSection();
100 for (const char *Name
: SectionNames
)
101 if (SectionName
.contains(Name
))
102 return hashConstant(GVar
.getInitializer());
105 return hashGlobalValue(&GVar
);
108 static stable_hash
hashGlobalValue(const GlobalValue
*GV
) {
111 return stable_hash_name(GV
->getName());
114 // Compute a hash for a Constant. This function is logically similar to
115 // FunctionComparator::cmpConstants() in FunctionComparator.cpp, but here
116 // we're interested in computing a hash rather than comparing two Constants.
117 // Some of the logic is simplified, e.g, we don't expand GEPOperator.
118 static stable_hash
hashConstant(const Constant
*C
) {
119 SmallVector
<stable_hash
> Hashes
;
121 Type
*Ty
= C
->getType();
122 Hashes
.emplace_back(hashType(Ty
));
124 if (C
->isNullValue()) {
125 Hashes
.emplace_back(static_cast<stable_hash
>('N'));
126 return stable_hash_combine(Hashes
);
129 if (auto *GVar
= dyn_cast
<GlobalVariable
>(C
)) {
130 Hashes
.emplace_back(hashGlobalVariable(*GVar
));
131 return stable_hash_combine(Hashes
);
134 if (auto *G
= dyn_cast
<GlobalValue
>(C
)) {
135 Hashes
.emplace_back(hashGlobalValue(G
));
136 return stable_hash_combine(Hashes
);
139 if (const auto *Seq
= dyn_cast
<ConstantDataSequential
>(C
)) {
140 if (Seq
->isString()) {
141 Hashes
.emplace_back(stable_hash_name(Seq
->getAsString()));
142 return stable_hash_combine(Hashes
);
146 switch (C
->getValueID()) {
147 case Value::ConstantIntVal
: {
148 const APInt
&Int
= cast
<ConstantInt
>(C
)->getValue();
149 Hashes
.emplace_back(hashAPInt(Int
));
150 return stable_hash_combine(Hashes
);
152 case Value::ConstantFPVal
: {
153 const APFloat
&APF
= cast
<ConstantFP
>(C
)->getValueAPF();
154 Hashes
.emplace_back(hashAPFloat(APF
));
155 return stable_hash_combine(Hashes
);
157 case Value::ConstantArrayVal
:
158 case Value::ConstantStructVal
:
159 case Value::ConstantVectorVal
:
160 case Value::ConstantExprVal
: {
161 for (const auto &Op
: C
->operands()) {
162 auto H
= hashConstant(cast
<Constant
>(Op
));
163 Hashes
.emplace_back(H
);
165 return stable_hash_combine(Hashes
);
167 case Value::BlockAddressVal
: {
168 const BlockAddress
*BA
= cast
<BlockAddress
>(C
);
169 auto H
= hashGlobalValue(BA
->getFunction());
170 Hashes
.emplace_back(H
);
171 return stable_hash_combine(Hashes
);
173 case Value::DSOLocalEquivalentVal
: {
174 const auto *Equiv
= cast
<DSOLocalEquivalent
>(C
);
175 auto H
= hashGlobalValue(Equiv
->getGlobalValue());
176 Hashes
.emplace_back(H
);
177 return stable_hash_combine(Hashes
);
180 // Skip other types of constants for simplicity.
181 return stable_hash_combine(Hashes
);
185 stable_hash
hashValue(Value
*V
) {
186 // Check constant and return its hash.
187 Constant
*C
= dyn_cast
<Constant
>(V
);
189 return hashConstant(C
);
191 // Hash argument number.
192 SmallVector
<stable_hash
> Hashes
;
193 if (Argument
*Arg
= dyn_cast
<Argument
>(V
))
194 Hashes
.emplace_back(Arg
->getArgNo());
196 // Get an index (an insertion order) for the non-constant value.
197 auto [It
, WasInserted
] = ValueToId
.try_emplace(V
, ValueToId
.size());
198 Hashes
.emplace_back(It
->second
);
200 return stable_hash_combine(Hashes
);
203 stable_hash
hashOperand(Value
*Operand
) {
204 SmallVector
<stable_hash
> Hashes
;
205 Hashes
.emplace_back(hashType(Operand
->getType()));
206 Hashes
.emplace_back(hashValue(Operand
));
207 return stable_hash_combine(Hashes
);
210 stable_hash
hashInstruction(const Instruction
&Inst
) {
211 SmallVector
<stable_hash
> Hashes
;
212 Hashes
.emplace_back(Inst
.getOpcode());
215 return stable_hash_combine(Hashes
);
217 Hashes
.emplace_back(hashType(Inst
.getType()));
219 // Handle additional properties of specific instructions that cause
220 // semantic differences in the IR.
221 if (const auto *ComparisonInstruction
= dyn_cast
<CmpInst
>(&Inst
))
222 Hashes
.emplace_back(ComparisonInstruction
->getPredicate());
224 unsigned InstIdx
= 0;
225 if (IndexInstruction
) {
226 InstIdx
= IndexInstruction
->size();
227 IndexInstruction
->try_emplace(InstIdx
, const_cast<Instruction
*>(&Inst
));
230 for (const auto [OpndIdx
, Op
] : enumerate(Inst
.operands())) {
231 auto OpndHash
= hashOperand(Op
);
232 if (IgnoreOp
&& IgnoreOp(&Inst
, OpndIdx
)) {
233 assert(IndexOperandHashMap
);
234 IndexOperandHashMap
->try_emplace({InstIdx
, OpndIdx
}, OpndHash
);
236 Hashes
.emplace_back(OpndHash
);
239 return stable_hash_combine(Hashes
);
242 // A function hash is calculated by considering only the number of arguments
243 // and whether a function is varargs, the order of basic blocks (given by the
244 // successors of each basic block in depth first order), and the order of
245 // opcodes of each instruction within each of these basic blocks. This mirrors
246 // the strategy FunctionComparator::compare() uses to compare functions by
247 // walking the BBs in depth first order and comparing each instruction in
248 // sequence. Because this hash currently does not look at the operands, it is
249 // insensitive to things such as the target of calls and the constants used in
250 // the function, which makes it useful when possibly merging functions which
251 // are the same modulo constants and call targets.
253 // Note that different users of StructuralHash will want different behavior
254 // out of it (i.e., MergeFunctions will want something different from PM
255 // expensive checks for pass modification status). When modifying this
256 // function, most changes should be gated behind an option and enabled
258 void update(const Function
&F
) {
259 // Declarations don't affect analyses.
260 if (F
.isDeclaration())
263 SmallVector
<stable_hash
> Hashes
;
264 Hashes
.emplace_back(Hash
);
265 Hashes
.emplace_back(FunctionHeaderHash
);
267 Hashes
.emplace_back(F
.isVarArg());
268 Hashes
.emplace_back(F
.arg_size());
270 SmallVector
<const BasicBlock
*, 8> BBs
;
271 SmallPtrSet
<const BasicBlock
*, 16> VisitedBBs
;
273 // Walk the blocks in the same order as
274 // FunctionComparator::cmpBasicBlocks(), accumulating the hash of the
275 // function "structure." (BB and opcode sequence)
276 BBs
.push_back(&F
.getEntryBlock());
277 VisitedBBs
.insert(BBs
[0]);
278 while (!BBs
.empty()) {
279 const BasicBlock
*BB
= BBs
.pop_back_val();
281 Hashes
.emplace_back(BlockHeaderHash
);
282 for (auto &Inst
: *BB
)
283 Hashes
.emplace_back(hashInstruction(Inst
));
285 for (const BasicBlock
*Succ
: successors(BB
))
286 if (VisitedBBs
.insert(Succ
).second
)
290 // Update the combined hash in place.
291 Hash
= stable_hash_combine(Hashes
);
294 void update(const GlobalVariable
&GV
) {
295 // Declarations and used/compiler.used don't affect analyses.
296 // Since there are several `llvm.*` metadata, like `llvm.embedded.object`,
297 // we ignore anything with the `.llvm` prefix
298 if (GV
.isDeclaration() || GV
.getName().starts_with("llvm."))
300 SmallVector
<stable_hash
> Hashes
;
301 Hashes
.emplace_back(Hash
);
302 Hashes
.emplace_back(GlobalHeaderHash
);
303 Hashes
.emplace_back(GV
.getValueType()->getTypeID());
305 // Update the combined hash in place.
306 Hash
= stable_hash_combine(Hashes
);
309 void update(const Module
&M
) {
310 for (const GlobalVariable
&GV
: M
.globals())
312 for (const Function
&F
: M
)
316 uint64_t getHash() const { return Hash
; }
318 std::unique_ptr
<IndexInstrMap
> getIndexInstrMap() {
319 return std::move(IndexInstruction
);
322 std::unique_ptr
<IndexOperandHashMapType
> getIndexPairOpndHashMap() {
323 return std::move(IndexOperandHashMap
);
329 stable_hash
llvm::StructuralHash(const Function
&F
, bool DetailedHash
) {
330 StructuralHashImpl
H(DetailedHash
);
335 stable_hash
llvm::StructuralHash(const GlobalVariable
&GVar
) {
336 return StructuralHashImpl::hashGlobalVariable(GVar
);
339 stable_hash
llvm::StructuralHash(const Module
&M
, bool DetailedHash
) {
340 StructuralHashImpl
H(DetailedHash
);
346 llvm::StructuralHashWithDifferences(const Function
&F
,
347 IgnoreOperandFunc IgnoreOp
) {
348 StructuralHashImpl
H(/*DetailedHash=*/true, IgnoreOp
);
350 return FunctionHashInfo(H
.getHash(), H
.getIndexInstrMap(),
351 H
.getIndexPairOpndHashMap());