Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / bolt / lib / Core / HashUtilities.cpp
blob6c7570dcc44e87b02a75b4ca36761fbddd4b8ecb
1 //===- bolt/Core/HashUtilities.cpp - Misc hash utilities ------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Computation of hash values over BinaryFunction and BinaryBasicBlock.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Core/HashUtilities.h"
14 #include "bolt/Core/BinaryContext.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "llvm/MC/MCInstPrinter.h"
18 namespace llvm {
19 namespace bolt {
21 /// Hashing a 64-bit integer to a 16-bit one.
22 uint16_t hash_64_to_16(const uint64_t Hash) {
23 uint16_t Res = (uint16_t)(Hash & 0xFFFF);
24 Res ^= (uint16_t)((Hash >> 16) & 0xFFFF);
25 Res ^= (uint16_t)((Hash >> 32) & 0xFFFF);
26 Res ^= (uint16_t)((Hash >> 48) & 0xFFFF);
27 return Res;
30 std::string hashInteger(uint64_t Value) {
31 std::string HashString;
32 if (Value == 0)
33 HashString.push_back(0);
35 while (Value) {
36 uint8_t LSB = Value & 0xff;
37 HashString.push_back(LSB);
38 Value >>= 8;
41 return HashString;
44 std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol) {
45 std::string HashString;
47 // Ignore function references.
48 if (BC.getFunctionForSymbol(&Symbol))
49 return HashString;
51 llvm::ErrorOr<uint64_t> ErrorOrValue = BC.getSymbolValue(Symbol);
52 if (!ErrorOrValue)
53 return HashString;
55 // Ignore jump table references.
56 if (BC.getJumpTableContainingAddress(*ErrorOrValue))
57 return HashString;
59 return HashString.append(hashInteger(*ErrorOrValue));
62 std::string hashExpr(BinaryContext &BC, const MCExpr &Expr) {
63 switch (Expr.getKind()) {
64 case MCExpr::Constant:
65 return hashInteger(cast<MCConstantExpr>(Expr).getValue());
66 case MCExpr::SymbolRef:
67 return hashSymbol(BC, cast<MCSymbolRefExpr>(Expr).getSymbol());
68 case MCExpr::Unary: {
69 const auto &UnaryExpr = cast<MCUnaryExpr>(Expr);
70 return hashInteger(UnaryExpr.getOpcode())
71 .append(hashExpr(BC, *UnaryExpr.getSubExpr()));
73 case MCExpr::Binary: {
74 const auto &BinaryExpr = cast<MCBinaryExpr>(Expr);
75 return hashExpr(BC, *BinaryExpr.getLHS())
76 .append(hashInteger(BinaryExpr.getOpcode()))
77 .append(hashExpr(BC, *BinaryExpr.getRHS()));
79 case MCExpr::Target:
80 return std::string();
83 llvm_unreachable("invalid expression kind");
86 std::string hashInstOperand(BinaryContext &BC, const MCOperand &Operand) {
87 if (Operand.isImm())
88 return hashInteger(Operand.getImm());
89 if (Operand.isReg())
90 return hashInteger(Operand.getReg());
91 if (Operand.isExpr())
92 return hashExpr(BC, *Operand.getExpr());
94 return std::string();
97 std::string hashBlock(BinaryContext &BC, const BinaryBasicBlock &BB,
98 OperandHashFuncTy OperandHashFunc) {
99 const bool IsX86 = BC.isX86();
101 // The hash is computed by creating a string of all instruction opcodes and
102 // possibly their operands and then hashing that string with std::hash.
103 std::string HashString;
105 for (const MCInst &Inst : BB) {
106 if (BC.MIB->isPseudo(Inst))
107 continue;
109 unsigned Opcode = Inst.getOpcode();
111 // Ignore unconditional jumps since we check CFG consistency by processing
112 // basic blocks in order and do not rely on branches to be in-sync with
113 // CFG. Note that we still use condition code of conditional jumps.
114 if (BC.MIB->isUnconditionalBranch(Inst))
115 continue;
117 if (IsX86 && BC.MIB->isConditionalBranch(Inst))
118 Opcode = BC.MIB->getShortBranchOpcode(Opcode);
120 if (Opcode == 0) {
121 HashString.push_back(0);
122 } else {
123 StringRef OpcodeName = BC.InstPrinter->getOpcodeName(Opcode);
124 HashString.append(OpcodeName.str());
127 for (const MCOperand &Op : MCPlus::primeOperands(Inst))
128 HashString.append(OperandHashFunc(Op));
130 return HashString;
133 /// A "loose" hash of a basic block to use with the stale profile matching. The
134 /// computed value will be the same for blocks with minor changes (such as
135 /// reordering of instructions or using different operands) but may result in
136 /// collisions that need to be resolved by a stronger hashing.
137 std::string hashBlockLoose(BinaryContext &BC, const BinaryBasicBlock &BB) {
138 // The hash is computed by creating a string of all lexicographically ordered
139 // instruction opcodes, which is then hashed with std::hash.
140 std::set<std::string> Opcodes;
141 for (const MCInst &Inst : BB) {
142 // Skip pseudo instructions and nops.
143 if (BC.MIB->isPseudo(Inst) || BC.MIB->isNoop(Inst))
144 continue;
146 // Ignore unconditional jumps, as they can be added / removed as a result
147 // of basic block reordering.
148 if (BC.MIB->isUnconditionalBranch(Inst))
149 continue;
151 // Do not distinguish different types of conditional jumps.
152 if (BC.MIB->isConditionalBranch(Inst)) {
153 Opcodes.insert("JMP");
154 continue;
157 std::string Mnemonic = BC.InstPrinter->getMnemonic(&Inst).first;
158 llvm::erase_if(Mnemonic, [](unsigned char ch) { return std::isspace(ch); });
159 Opcodes.insert(Mnemonic);
162 std::string HashString;
163 for (const std::string &Opcode : Opcodes)
164 HashString.append(Opcode);
165 return HashString;
168 } // namespace bolt
169 } // namespace llvm