[analyzer][Z3] Restore the original timeout of 15s (#118291)
[llvm-project.git] / llvm / lib / IR / StructuralHash.cpp
blob1c617c100c7dc05e0c38f406b02ae53f1a05f31d
1 //===-- StructuralHash.cpp - IR Hashing -------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "llvm/IR/StructuralHash.h"
10 #include "llvm/IR/Function.h"
11 #include "llvm/IR/GlobalVariable.h"
12 #include "llvm/IR/InstrTypes.h"
13 #include "llvm/IR/Instructions.h"
14 #include "llvm/IR/IntrinsicInst.h"
15 #include "llvm/IR/Module.h"
17 using namespace llvm;
19 namespace {
21 // Basic hashing mechanism to detect structural change to the IR, used to verify
22 // pass return status consistency with actual change. In addition to being used
23 // by the MergeFunctions pass.
25 class StructuralHashImpl {
26 stable_hash Hash = 4;
28 bool DetailedHash;
30 // This random value acts as a block header, as otherwise the partition of
31 // opcodes into BBs wouldn't affect the hash, only the order of the opcodes.
32 static constexpr stable_hash BlockHeaderHash = 45798;
33 static constexpr stable_hash FunctionHeaderHash = 0x62642d6b6b2d6b72;
34 static constexpr stable_hash GlobalHeaderHash = 23456;
36 /// IgnoreOp is a function that returns true if the operand should be ignored.
37 IgnoreOperandFunc IgnoreOp = nullptr;
38 /// A mapping from instruction indices to instruction pointers.
39 /// The index represents the position of an instruction based on the order in
40 /// which it is first encountered.
41 std::unique_ptr<IndexInstrMap> IndexInstruction = nullptr;
42 /// A mapping from pairs of instruction indices and operand indices
43 /// to the hashes of the operands.
44 std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap = nullptr;
46 /// Assign a unique ID to each Value in the order they are first seen.
47 DenseMap<const Value *, int> ValueToId;
49 static stable_hash hashType(Type *ValueType) {
50 SmallVector<stable_hash> Hashes;
51 Hashes.emplace_back(ValueType->getTypeID());
52 if (ValueType->isIntegerTy())
53 Hashes.emplace_back(ValueType->getIntegerBitWidth());
54 return stable_hash_combine(Hashes);
57 public:
58 StructuralHashImpl() = delete;
59 explicit StructuralHashImpl(bool DetailedHash,
60 IgnoreOperandFunc IgnoreOp = nullptr)
61 : DetailedHash(DetailedHash), IgnoreOp(IgnoreOp) {
62 if (IgnoreOp) {
63 IndexInstruction = std::make_unique<IndexInstrMap>();
64 IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>();
68 static stable_hash hashAPInt(const APInt &I) {
69 SmallVector<stable_hash> Hashes;
70 Hashes.emplace_back(I.getBitWidth());
71 auto RawVals = ArrayRef<uint64_t>(I.getRawData(), I.getNumWords());
72 Hashes.append(RawVals.begin(), RawVals.end());
73 return stable_hash_combine(Hashes);
76 static stable_hash hashAPFloat(const APFloat &F) {
77 return hashAPInt(F.bitcastToAPInt());
80 static stable_hash hashGlobalVariable(const GlobalVariable &GVar) {
81 if (!GVar.hasInitializer())
82 return hashGlobalValue(&GVar);
84 // Hash the contents of a string.
85 if (GVar.getName().starts_with(".str")) {
86 auto *C = GVar.getInitializer();
87 if (const auto *Seq = dyn_cast<ConstantDataSequential>(C))
88 if (Seq->isString())
89 return stable_hash_name(Seq->getAsString());
92 // Hash structural contents of Objective-C metadata in specific sections.
93 // This can be extended to other metadata if needed.
94 static constexpr const char *SectionNames[] = {
95 "__cfstring", "__cstring", "__objc_classrefs",
96 "__objc_methname", "__objc_selrefs",
98 if (GVar.hasSection()) {
99 StringRef SectionName = GVar.getSection();
100 for (const char *Name : SectionNames)
101 if (SectionName.contains(Name))
102 return hashConstant(GVar.getInitializer());
105 return hashGlobalValue(&GVar);
108 static stable_hash hashGlobalValue(const GlobalValue *GV) {
109 if (!GV->hasName())
110 return 0;
111 return stable_hash_name(GV->getName());
114 // Compute a hash for a Constant. This function is logically similar to
115 // FunctionComparator::cmpConstants() in FunctionComparator.cpp, but here
116 // we're interested in computing a hash rather than comparing two Constants.
117 // Some of the logic is simplified, e.g, we don't expand GEPOperator.
118 static stable_hash hashConstant(const Constant *C) {
119 SmallVector<stable_hash> Hashes;
121 Type *Ty = C->getType();
122 Hashes.emplace_back(hashType(Ty));
124 if (C->isNullValue()) {
125 Hashes.emplace_back(static_cast<stable_hash>('N'));
126 return stable_hash_combine(Hashes);
129 if (auto *GVar = dyn_cast<GlobalVariable>(C)) {
130 Hashes.emplace_back(hashGlobalVariable(*GVar));
131 return stable_hash_combine(Hashes);
134 if (auto *G = dyn_cast<GlobalValue>(C)) {
135 Hashes.emplace_back(hashGlobalValue(G));
136 return stable_hash_combine(Hashes);
139 if (const auto *Seq = dyn_cast<ConstantDataSequential>(C)) {
140 if (Seq->isString()) {
141 Hashes.emplace_back(stable_hash_name(Seq->getAsString()));
142 return stable_hash_combine(Hashes);
146 switch (C->getValueID()) {
147 case Value::ConstantIntVal: {
148 const APInt &Int = cast<ConstantInt>(C)->getValue();
149 Hashes.emplace_back(hashAPInt(Int));
150 return stable_hash_combine(Hashes);
152 case Value::ConstantFPVal: {
153 const APFloat &APF = cast<ConstantFP>(C)->getValueAPF();
154 Hashes.emplace_back(hashAPFloat(APF));
155 return stable_hash_combine(Hashes);
157 case Value::ConstantArrayVal:
158 case Value::ConstantStructVal:
159 case Value::ConstantVectorVal:
160 case Value::ConstantExprVal: {
161 for (const auto &Op : C->operands()) {
162 auto H = hashConstant(cast<Constant>(Op));
163 Hashes.emplace_back(H);
165 return stable_hash_combine(Hashes);
167 case Value::BlockAddressVal: {
168 const BlockAddress *BA = cast<BlockAddress>(C);
169 auto H = hashGlobalValue(BA->getFunction());
170 Hashes.emplace_back(H);
171 return stable_hash_combine(Hashes);
173 case Value::DSOLocalEquivalentVal: {
174 const auto *Equiv = cast<DSOLocalEquivalent>(C);
175 auto H = hashGlobalValue(Equiv->getGlobalValue());
176 Hashes.emplace_back(H);
177 return stable_hash_combine(Hashes);
179 default:
180 // Skip other types of constants for simplicity.
181 return stable_hash_combine(Hashes);
185 stable_hash hashValue(Value *V) {
186 // Check constant and return its hash.
187 Constant *C = dyn_cast<Constant>(V);
188 if (C)
189 return hashConstant(C);
191 // Hash argument number.
192 SmallVector<stable_hash> Hashes;
193 if (Argument *Arg = dyn_cast<Argument>(V))
194 Hashes.emplace_back(Arg->getArgNo());
196 // Get an index (an insertion order) for the non-constant value.
197 auto [It, WasInserted] = ValueToId.try_emplace(V, ValueToId.size());
198 Hashes.emplace_back(It->second);
200 return stable_hash_combine(Hashes);
203 stable_hash hashOperand(Value *Operand) {
204 SmallVector<stable_hash> Hashes;
205 Hashes.emplace_back(hashType(Operand->getType()));
206 Hashes.emplace_back(hashValue(Operand));
207 return stable_hash_combine(Hashes);
210 stable_hash hashInstruction(const Instruction &Inst) {
211 SmallVector<stable_hash> Hashes;
212 Hashes.emplace_back(Inst.getOpcode());
214 if (!DetailedHash)
215 return stable_hash_combine(Hashes);
217 Hashes.emplace_back(hashType(Inst.getType()));
219 // Handle additional properties of specific instructions that cause
220 // semantic differences in the IR.
221 if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst))
222 Hashes.emplace_back(ComparisonInstruction->getPredicate());
224 unsigned InstIdx = 0;
225 if (IndexInstruction) {
226 InstIdx = IndexInstruction->size();
227 IndexInstruction->try_emplace(InstIdx, const_cast<Instruction *>(&Inst));
230 for (const auto [OpndIdx, Op] : enumerate(Inst.operands())) {
231 auto OpndHash = hashOperand(Op);
232 if (IgnoreOp && IgnoreOp(&Inst, OpndIdx)) {
233 assert(IndexOperandHashMap);
234 IndexOperandHashMap->try_emplace({InstIdx, OpndIdx}, OpndHash);
235 } else
236 Hashes.emplace_back(OpndHash);
239 return stable_hash_combine(Hashes);
242 // A function hash is calculated by considering only the number of arguments
243 // and whether a function is varargs, the order of basic blocks (given by the
244 // successors of each basic block in depth first order), and the order of
245 // opcodes of each instruction within each of these basic blocks. This mirrors
246 // the strategy FunctionComparator::compare() uses to compare functions by
247 // walking the BBs in depth first order and comparing each instruction in
248 // sequence. Because this hash currently does not look at the operands, it is
249 // insensitive to things such as the target of calls and the constants used in
250 // the function, which makes it useful when possibly merging functions which
251 // are the same modulo constants and call targets.
253 // Note that different users of StructuralHash will want different behavior
254 // out of it (i.e., MergeFunctions will want something different from PM
255 // expensive checks for pass modification status). When modifying this
256 // function, most changes should be gated behind an option and enabled
257 // selectively.
258 void update(const Function &F) {
259 // Declarations don't affect analyses.
260 if (F.isDeclaration())
261 return;
263 SmallVector<stable_hash> Hashes;
264 Hashes.emplace_back(Hash);
265 Hashes.emplace_back(FunctionHeaderHash);
267 Hashes.emplace_back(F.isVarArg());
268 Hashes.emplace_back(F.arg_size());
270 SmallVector<const BasicBlock *, 8> BBs;
271 SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
273 // Walk the blocks in the same order as
274 // FunctionComparator::cmpBasicBlocks(), accumulating the hash of the
275 // function "structure." (BB and opcode sequence)
276 BBs.push_back(&F.getEntryBlock());
277 VisitedBBs.insert(BBs[0]);
278 while (!BBs.empty()) {
279 const BasicBlock *BB = BBs.pop_back_val();
281 Hashes.emplace_back(BlockHeaderHash);
282 for (auto &Inst : *BB)
283 Hashes.emplace_back(hashInstruction(Inst));
285 for (const BasicBlock *Succ : successors(BB))
286 if (VisitedBBs.insert(Succ).second)
287 BBs.push_back(Succ);
290 // Update the combined hash in place.
291 Hash = stable_hash_combine(Hashes);
294 void update(const GlobalVariable &GV) {
295 // Declarations and used/compiler.used don't affect analyses.
296 // Since there are several `llvm.*` metadata, like `llvm.embedded.object`,
297 // we ignore anything with the `.llvm` prefix
298 if (GV.isDeclaration() || GV.getName().starts_with("llvm."))
299 return;
300 SmallVector<stable_hash> Hashes;
301 Hashes.emplace_back(Hash);
302 Hashes.emplace_back(GlobalHeaderHash);
303 Hashes.emplace_back(GV.getValueType()->getTypeID());
305 // Update the combined hash in place.
306 Hash = stable_hash_combine(Hashes);
309 void update(const Module &M) {
310 for (const GlobalVariable &GV : M.globals())
311 update(GV);
312 for (const Function &F : M)
313 update(F);
316 uint64_t getHash() const { return Hash; }
318 std::unique_ptr<IndexInstrMap> getIndexInstrMap() {
319 return std::move(IndexInstruction);
322 std::unique_ptr<IndexOperandHashMapType> getIndexPairOpndHashMap() {
323 return std::move(IndexOperandHashMap);
327 } // namespace
329 stable_hash llvm::StructuralHash(const Function &F, bool DetailedHash) {
330 StructuralHashImpl H(DetailedHash);
331 H.update(F);
332 return H.getHash();
335 stable_hash llvm::StructuralHash(const GlobalVariable &GVar) {
336 return StructuralHashImpl::hashGlobalVariable(GVar);
339 stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) {
340 StructuralHashImpl H(DetailedHash);
341 H.update(M);
342 return H.getHash();
345 FunctionHashInfo
346 llvm::StructuralHashWithDifferences(const Function &F,
347 IgnoreOperandFunc IgnoreOp) {
348 StructuralHashImpl H(/*DetailedHash=*/true, IgnoreOp);
349 H.update(F);
350 return FunctionHashInfo(H.getHash(), H.getIndexInstrMap(),
351 H.getIndexPairOpndHashMap());