[BOLT] Detect Linux kernel version if the binary is a Linux kernel (#119088)
[llvm-project.git] / bolt / lib / Core / HashUtilities.cpp
blob6bdf30460f340bbb4678f01d5451313d42d7cd5a
1 //===- bolt/Core/HashUtilities.cpp - Misc hash utilities ------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Computation of hash values over BinaryFunction and BinaryBasicBlock.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Core/HashUtilities.h"
14 #include "bolt/Core/BinaryContext.h"
15 #include "bolt/Utils/NameResolver.h"
16 #include "llvm/MC/MCInstPrinter.h"
18 namespace llvm {
19 namespace bolt {
21 std::string hashInteger(uint64_t Value) {
22 std::string HashString;
23 if (Value == 0)
24 HashString.push_back(0);
26 while (Value) {
27 uint8_t LSB = Value & 0xff;
28 HashString.push_back(LSB);
29 Value >>= 8;
32 return HashString;
35 std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol) {
36 std::string HashString;
38 // Ignore function references.
39 if (BC.getFunctionForSymbol(&Symbol))
40 return HashString;
42 llvm::ErrorOr<uint64_t> ErrorOrValue = BC.getSymbolValue(Symbol);
43 if (!ErrorOrValue)
44 return HashString;
46 // Ignore jump table references.
47 if (BC.getJumpTableContainingAddress(*ErrorOrValue))
48 return HashString;
50 return HashString.append(hashInteger(*ErrorOrValue));
53 std::string hashExpr(BinaryContext &BC, const MCExpr &Expr) {
54 switch (Expr.getKind()) {
55 case MCExpr::Constant:
56 return hashInteger(cast<MCConstantExpr>(Expr).getValue());
57 case MCExpr::SymbolRef:
58 return hashSymbol(BC, cast<MCSymbolRefExpr>(Expr).getSymbol());
59 case MCExpr::Unary: {
60 const auto &UnaryExpr = cast<MCUnaryExpr>(Expr);
61 return hashInteger(UnaryExpr.getOpcode())
62 .append(hashExpr(BC, *UnaryExpr.getSubExpr()));
64 case MCExpr::Binary: {
65 const auto &BinaryExpr = cast<MCBinaryExpr>(Expr);
66 return hashExpr(BC, *BinaryExpr.getLHS())
67 .append(hashInteger(BinaryExpr.getOpcode()))
68 .append(hashExpr(BC, *BinaryExpr.getRHS()));
70 case MCExpr::Target:
71 return std::string();
74 llvm_unreachable("invalid expression kind");
77 std::string hashInstOperand(BinaryContext &BC, const MCOperand &Operand) {
78 if (Operand.isImm())
79 return hashInteger(Operand.getImm());
80 if (Operand.isReg())
81 return hashInteger(Operand.getReg());
82 if (Operand.isExpr())
83 return hashExpr(BC, *Operand.getExpr());
85 return std::string();
88 std::string hashBlock(BinaryContext &BC, const BinaryBasicBlock &BB,
89 OperandHashFuncTy OperandHashFunc) {
90 const bool IsX86 = BC.isX86();
92 // The hash is computed by creating a string of all instruction opcodes and
93 // possibly their operands and then hashing that string with std::hash.
94 std::string HashString;
96 for (const MCInst &Inst : BB) {
97 if (BC.MIB->isPseudo(Inst))
98 continue;
100 unsigned Opcode = Inst.getOpcode();
102 // Ignore unconditional jumps since we check CFG consistency by processing
103 // basic blocks in order and do not rely on branches to be in-sync with
104 // CFG. Note that we still use condition code of conditional jumps.
105 if (BC.MIB->isUnconditionalBranch(Inst))
106 continue;
108 if (IsX86 && BC.MIB->isConditionalBranch(Inst))
109 Opcode = BC.MIB->getShortBranchOpcode(Opcode);
111 if (Opcode == 0) {
112 HashString.push_back(0);
113 } else {
114 StringRef OpcodeName = BC.InstPrinter->getOpcodeName(Opcode);
115 HashString.append(OpcodeName.str());
118 for (const MCOperand &Op : MCPlus::primeOperands(Inst))
119 HashString.append(OperandHashFunc(Op));
121 return HashString;
124 /// A "loose" hash of a basic block to use with the stale profile matching. The
125 /// computed value will be the same for blocks with minor changes (such as
126 /// reordering of instructions or using different operands) but may result in
127 /// collisions that need to be resolved by a stronger hashing.
128 std::string hashBlockLoose(BinaryContext &BC, const BinaryBasicBlock &BB) {
129 // The hash is computed by creating a string of all lexicographically ordered
130 // instruction opcodes, which is then hashed with std::hash.
131 std::set<std::string> Opcodes;
132 for (const MCInst &Inst : BB) {
133 // Skip pseudo instructions and nops.
134 if (BC.MIB->isPseudo(Inst) || BC.MIB->isNoop(Inst))
135 continue;
137 // Ignore unconditional jumps, as they can be added / removed as a result
138 // of basic block reordering.
139 if (BC.MIB->isUnconditionalBranch(Inst))
140 continue;
142 // Do not distinguish different types of conditional jumps.
143 if (BC.MIB->isConditionalBranch(Inst)) {
144 Opcodes.insert("JMP");
145 continue;
148 std::string Mnemonic = BC.InstPrinter->getMnemonic(Inst).first;
149 llvm::erase_if(Mnemonic, [](unsigned char ch) { return std::isspace(ch); });
150 Opcodes.insert(Mnemonic);
153 std::string HashString;
154 for (const std::string &Opcode : Opcodes)
155 HashString.append(Opcode);
156 return HashString;
159 /// An even looser hash level relative to $ hashBlockLoose to use with stale
160 /// profile matching, composed of the names of a block's called functions in
161 /// lexicographic order.
162 std::string hashBlockCalls(BinaryContext &BC, const BinaryBasicBlock &BB) {
163 // The hash is computed by creating a string of all lexicographically ordered
164 // called function names.
165 std::vector<std::string> FunctionNames;
166 for (const MCInst &Instr : BB) {
167 // Skip non-call instructions.
168 if (!BC.MIB->isCall(Instr))
169 continue;
170 const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr);
171 if (!CallSymbol)
172 continue;
173 FunctionNames.push_back(std::string(CallSymbol->getName()));
175 std::sort(FunctionNames.begin(), FunctionNames.end());
176 std::string HashString;
177 for (const std::string &FunctionName : FunctionNames)
178 HashString.append(FunctionName);
180 return HashString;
183 /// The same as the $hashBlockCalls function, but for profiled functions.
184 std::string
185 hashBlockCalls(const DenseMap<uint32_t, yaml::bolt::BinaryFunctionProfile *>
186 &IdToYamlFunction,
187 const yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
188 std::vector<std::string> FunctionNames;
189 for (const yaml::bolt::CallSiteInfo &CallSiteInfo : YamlBB.CallSites) {
190 auto It = IdToYamlFunction.find(CallSiteInfo.DestId);
191 if (It == IdToYamlFunction.end())
192 continue;
193 StringRef Name = NameResolver::dropNumNames(It->second->Name);
194 FunctionNames.push_back(std::string(Name));
196 std::sort(FunctionNames.begin(), FunctionNames.end());
197 std::string HashString;
198 for (const std::string &FunctionName : FunctionNames)
199 HashString.append(FunctionName);
201 return HashString;
204 } // namespace bolt
205 } // namespace llvm