1 //===- bolt/Core/HashUtilities.cpp - Misc hash utilities ------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Computation of hash values over BinaryFunction and BinaryBasicBlock.
11 //===----------------------------------------------------------------------===//
13 #include "bolt/Core/HashUtilities.h"
14 #include "bolt/Core/BinaryContext.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "llvm/MC/MCInstPrinter.h"
21 /// Hashing a 64-bit integer to a 16-bit one.
22 uint16_t hash_64_to_16(const uint64_t Hash
) {
23 uint16_t Res
= (uint16_t)(Hash
& 0xFFFF);
24 Res
^= (uint16_t)((Hash
>> 16) & 0xFFFF);
25 Res
^= (uint16_t)((Hash
>> 32) & 0xFFFF);
26 Res
^= (uint16_t)((Hash
>> 48) & 0xFFFF);
30 std::string
hashInteger(uint64_t Value
) {
31 std::string HashString
;
33 HashString
.push_back(0);
36 uint8_t LSB
= Value
& 0xff;
37 HashString
.push_back(LSB
);
44 std::string
hashSymbol(BinaryContext
&BC
, const MCSymbol
&Symbol
) {
45 std::string HashString
;
47 // Ignore function references.
48 if (BC
.getFunctionForSymbol(&Symbol
))
51 llvm::ErrorOr
<uint64_t> ErrorOrValue
= BC
.getSymbolValue(Symbol
);
55 // Ignore jump table references.
56 if (BC
.getJumpTableContainingAddress(*ErrorOrValue
))
59 return HashString
.append(hashInteger(*ErrorOrValue
));
62 std::string
hashExpr(BinaryContext
&BC
, const MCExpr
&Expr
) {
63 switch (Expr
.getKind()) {
64 case MCExpr::Constant
:
65 return hashInteger(cast
<MCConstantExpr
>(Expr
).getValue());
66 case MCExpr::SymbolRef
:
67 return hashSymbol(BC
, cast
<MCSymbolRefExpr
>(Expr
).getSymbol());
69 const auto &UnaryExpr
= cast
<MCUnaryExpr
>(Expr
);
70 return hashInteger(UnaryExpr
.getOpcode())
71 .append(hashExpr(BC
, *UnaryExpr
.getSubExpr()));
73 case MCExpr::Binary
: {
74 const auto &BinaryExpr
= cast
<MCBinaryExpr
>(Expr
);
75 return hashExpr(BC
, *BinaryExpr
.getLHS())
76 .append(hashInteger(BinaryExpr
.getOpcode()))
77 .append(hashExpr(BC
, *BinaryExpr
.getRHS()));
83 llvm_unreachable("invalid expression kind");
86 std::string
hashInstOperand(BinaryContext
&BC
, const MCOperand
&Operand
) {
88 return hashInteger(Operand
.getImm());
90 return hashInteger(Operand
.getReg());
92 return hashExpr(BC
, *Operand
.getExpr());
97 std::string
hashBlock(BinaryContext
&BC
, const BinaryBasicBlock
&BB
,
98 OperandHashFuncTy OperandHashFunc
) {
99 const bool IsX86
= BC
.isX86();
101 // The hash is computed by creating a string of all instruction opcodes and
102 // possibly their operands and then hashing that string with std::hash.
103 std::string HashString
;
105 for (const MCInst
&Inst
: BB
) {
106 if (BC
.MIB
->isPseudo(Inst
))
109 unsigned Opcode
= Inst
.getOpcode();
111 // Ignore unconditional jumps since we check CFG consistency by processing
112 // basic blocks in order and do not rely on branches to be in-sync with
113 // CFG. Note that we still use condition code of conditional jumps.
114 if (BC
.MIB
->isUnconditionalBranch(Inst
))
117 if (IsX86
&& BC
.MIB
->isConditionalBranch(Inst
))
118 Opcode
= BC
.MIB
->getShortBranchOpcode(Opcode
);
121 HashString
.push_back(0);
123 StringRef OpcodeName
= BC
.InstPrinter
->getOpcodeName(Opcode
);
124 HashString
.append(OpcodeName
.str());
127 for (const MCOperand
&Op
: MCPlus::primeOperands(Inst
))
128 HashString
.append(OperandHashFunc(Op
));
133 /// A "loose" hash of a basic block to use with the stale profile matching. The
134 /// computed value will be the same for blocks with minor changes (such as
135 /// reordering of instructions or using different operands) but may result in
136 /// collisions that need to be resolved by a stronger hashing.
137 std::string
hashBlockLoose(BinaryContext
&BC
, const BinaryBasicBlock
&BB
) {
138 // The hash is computed by creating a string of all lexicographically ordered
139 // instruction opcodes, which is then hashed with std::hash.
140 std::set
<std::string
> Opcodes
;
141 for (const MCInst
&Inst
: BB
) {
142 // Skip pseudo instructions and nops.
143 if (BC
.MIB
->isPseudo(Inst
) || BC
.MIB
->isNoop(Inst
))
146 // Ignore unconditional jumps, as they can be added / removed as a result
147 // of basic block reordering.
148 if (BC
.MIB
->isUnconditionalBranch(Inst
))
151 // Do not distinguish different types of conditional jumps.
152 if (BC
.MIB
->isConditionalBranch(Inst
)) {
153 Opcodes
.insert("JMP");
157 std::string Mnemonic
= BC
.InstPrinter
->getMnemonic(&Inst
).first
;
158 llvm::erase_if(Mnemonic
, [](unsigned char ch
) { return std::isspace(ch
); });
159 Opcodes
.insert(Mnemonic
);
162 std::string HashString
;
163 for (const std::string
&Opcode
: Opcodes
)
164 HashString
.append(Opcode
);