1 //===- bolt/Core/HashUtilities.cpp - Misc hash utilities ------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Computation of hash values over BinaryFunction and BinaryBasicBlock.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Core/HashUtilities.h"
14 #include "bolt/Core/BinaryContext.h"
15 #include "bolt/Utils/NameResolver.h"
16 #include "llvm/MC/MCInstPrinter.h"
21 std::string
hashInteger(uint64_t Value
) {
22 std::string HashString
;
24 HashString
.push_back(0);
27 uint8_t LSB
= Value
& 0xff;
28 HashString
.push_back(LSB
);
35 std::string
hashSymbol(BinaryContext
&BC
, const MCSymbol
&Symbol
) {
36 std::string HashString
;
38 // Ignore function references.
39 if (BC
.getFunctionForSymbol(&Symbol
))
42 llvm::ErrorOr
<uint64_t> ErrorOrValue
= BC
.getSymbolValue(Symbol
);
46 // Ignore jump table references.
47 if (BC
.getJumpTableContainingAddress(*ErrorOrValue
))
50 return HashString
.append(hashInteger(*ErrorOrValue
));
53 std::string
hashExpr(BinaryContext
&BC
, const MCExpr
&Expr
) {
54 switch (Expr
.getKind()) {
55 case MCExpr::Constant
:
56 return hashInteger(cast
<MCConstantExpr
>(Expr
).getValue());
57 case MCExpr::SymbolRef
:
58 return hashSymbol(BC
, cast
<MCSymbolRefExpr
>(Expr
).getSymbol());
60 const auto &UnaryExpr
= cast
<MCUnaryExpr
>(Expr
);
61 return hashInteger(UnaryExpr
.getOpcode())
62 .append(hashExpr(BC
, *UnaryExpr
.getSubExpr()));
64 case MCExpr::Binary
: {
65 const auto &BinaryExpr
= cast
<MCBinaryExpr
>(Expr
);
66 return hashExpr(BC
, *BinaryExpr
.getLHS())
67 .append(hashInteger(BinaryExpr
.getOpcode()))
68 .append(hashExpr(BC
, *BinaryExpr
.getRHS()));
74 llvm_unreachable("invalid expression kind");
77 std::string
hashInstOperand(BinaryContext
&BC
, const MCOperand
&Operand
) {
79 return hashInteger(Operand
.getImm());
81 return hashInteger(Operand
.getReg());
83 return hashExpr(BC
, *Operand
.getExpr());
88 std::string
hashBlock(BinaryContext
&BC
, const BinaryBasicBlock
&BB
,
89 OperandHashFuncTy OperandHashFunc
) {
90 const bool IsX86
= BC
.isX86();
92 // The hash is computed by creating a string of all instruction opcodes and
93 // possibly their operands and then hashing that string with std::hash.
94 std::string HashString
;
96 for (const MCInst
&Inst
: BB
) {
97 if (BC
.MIB
->isPseudo(Inst
))
100 unsigned Opcode
= Inst
.getOpcode();
102 // Ignore unconditional jumps since we check CFG consistency by processing
103 // basic blocks in order and do not rely on branches to be in-sync with
104 // CFG. Note that we still use condition code of conditional jumps.
105 if (BC
.MIB
->isUnconditionalBranch(Inst
))
108 if (IsX86
&& BC
.MIB
->isConditionalBranch(Inst
))
109 Opcode
= BC
.MIB
->getShortBranchOpcode(Opcode
);
112 HashString
.push_back(0);
114 StringRef OpcodeName
= BC
.InstPrinter
->getOpcodeName(Opcode
);
115 HashString
.append(OpcodeName
.str());
118 for (const MCOperand
&Op
: MCPlus::primeOperands(Inst
))
119 HashString
.append(OperandHashFunc(Op
));
124 /// A "loose" hash of a basic block to use with the stale profile matching. The
125 /// computed value will be the same for blocks with minor changes (such as
126 /// reordering of instructions or using different operands) but may result in
127 /// collisions that need to be resolved by a stronger hashing.
128 std::string
hashBlockLoose(BinaryContext
&BC
, const BinaryBasicBlock
&BB
) {
129 // The hash is computed by creating a string of all lexicographically ordered
130 // instruction opcodes, which is then hashed with std::hash.
131 std::set
<std::string
> Opcodes
;
132 for (const MCInst
&Inst
: BB
) {
133 // Skip pseudo instructions and nops.
134 if (BC
.MIB
->isPseudo(Inst
) || BC
.MIB
->isNoop(Inst
))
137 // Ignore unconditional jumps, as they can be added / removed as a result
138 // of basic block reordering.
139 if (BC
.MIB
->isUnconditionalBranch(Inst
))
142 // Do not distinguish different types of conditional jumps.
143 if (BC
.MIB
->isConditionalBranch(Inst
)) {
144 Opcodes
.insert("JMP");
148 std::string Mnemonic
= BC
.InstPrinter
->getMnemonic(Inst
).first
;
149 llvm::erase_if(Mnemonic
, [](unsigned char ch
) { return std::isspace(ch
); });
150 Opcodes
.insert(Mnemonic
);
153 std::string HashString
;
154 for (const std::string
&Opcode
: Opcodes
)
155 HashString
.append(Opcode
);
159 /// An even looser hash level relative to $ hashBlockLoose to use with stale
160 /// profile matching, composed of the names of a block's called functions in
161 /// lexicographic order.
162 std::string
hashBlockCalls(BinaryContext
&BC
, const BinaryBasicBlock
&BB
) {
163 // The hash is computed by creating a string of all lexicographically ordered
164 // called function names.
165 std::vector
<std::string
> FunctionNames
;
166 for (const MCInst
&Instr
: BB
) {
167 // Skip non-call instructions.
168 if (!BC
.MIB
->isCall(Instr
))
170 const MCSymbol
*CallSymbol
= BC
.MIB
->getTargetSymbol(Instr
);
173 FunctionNames
.push_back(std::string(CallSymbol
->getName()));
175 std::sort(FunctionNames
.begin(), FunctionNames
.end());
176 std::string HashString
;
177 for (const std::string
&FunctionName
: FunctionNames
)
178 HashString
.append(FunctionName
);
183 /// The same as the $hashBlockCalls function, but for profiled functions.
185 hashBlockCalls(const DenseMap
<uint32_t, yaml::bolt::BinaryFunctionProfile
*>
187 const yaml::bolt::BinaryBasicBlockProfile
&YamlBB
) {
188 std::vector
<std::string
> FunctionNames
;
189 for (const yaml::bolt::CallSiteInfo
&CallSiteInfo
: YamlBB
.CallSites
) {
190 auto It
= IdToYamlFunction
.find(CallSiteInfo
.DestId
);
191 if (It
== IdToYamlFunction
.end())
193 StringRef Name
= NameResolver::dropNumNames(It
->second
->Name
);
194 FunctionNames
.push_back(std::string(Name
));
196 std::sort(FunctionNames
.begin(), FunctionNames
.end());
197 std::string HashString
;
198 for (const std::string
&FunctionName
: FunctionNames
)
199 HashString
.append(FunctionName
);