Revert "[libc] Use best-fit binary trie to make malloc logarithmic" (#117065)
[llvm-project.git] / libc / src / __support / hash.h
blob49138b1f43b9edbc642b9a15b89cd375067a6c83
1 //===-- Portable string hash function ---------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_LIBC_SRC___SUPPORT_HASH_H
10 #define LLVM_LIBC_SRC___SUPPORT_HASH_H
12 #include "src/__support/CPP/bit.h" // rotl
13 #include "src/__support/CPP/limits.h" // numeric_limits
14 #include "src/__support/macros/attributes.h" // LIBC_INLINE
15 #include "src/__support/macros/config.h"
16 #include "src/__support/uint128.h" // UInt128
17 #include <stdint.h> // For uint64_t
19 namespace LIBC_NAMESPACE_DECL {
20 namespace internal {
22 // Folded multiplication.
23 // This function multiplies two 64-bit integers and xor the high and
24 // low 64-bit parts of the result.
25 LIBC_INLINE uint64_t folded_multiply(uint64_t x, uint64_t y) {
26 UInt128 p = static_cast<UInt128>(x) * static_cast<UInt128>(y);
27 uint64_t low = static_cast<uint64_t>(p);
28 uint64_t high = static_cast<uint64_t>(p >> 64);
29 return low ^ high;
32 // Read as little endian.
33 // Shift-and-or implementation does not give a satisfactory code on aarch64.
34 // Therefore, we use a union to read the value.
35 template <typename T> LIBC_INLINE T read_little_endian(const void *ptr) {
36 const uint8_t *bytes = static_cast<const uint8_t *>(ptr);
37 uint8_t buffer[sizeof(T)];
38 #if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
39 // Compiler should able to optimize this as a load followed by a byte
40 // swap. On aarch64 (-mbig-endian), this compiles to the following for
41 // int:
42 // ldr w0, [x0]
43 // rev w0, w0
44 // ret
45 for (size_t i = 0; i < sizeof(T); ++i) {
46 buffer[i] = bytes[sizeof(T) - i - 1];
48 #else
49 for (size_t i = 0; i < sizeof(T); ++i) {
50 buffer[i] = bytes[i];
52 #endif
53 return cpp::bit_cast<T>(buffer);
56 // Specialized read functions for small values. size must be <= 8.
57 LIBC_INLINE void read_small_values(const void *ptr, size_t size, uint64_t &low,
58 uint64_t &high) {
59 const uint8_t *bytes = static_cast<const uint8_t *>(ptr);
60 if (size >= 2) {
61 if (size >= 4) {
62 low = static_cast<uint64_t>(read_little_endian<uint32_t>(&bytes[0]));
63 high =
64 static_cast<uint64_t>(read_little_endian<uint32_t>(&bytes[size - 4]));
65 } else {
66 low = static_cast<uint64_t>(read_little_endian<uint16_t>(&bytes[0]));
67 high = static_cast<uint64_t>(bytes[size - 1]);
69 } else {
70 if (size > 0) {
71 low = static_cast<uint64_t>(bytes[0]);
72 high = static_cast<uint64_t>(bytes[0]);
73 } else {
74 low = 0;
75 high = 0;
80 // This constant comes from Kunth's prng (it empirically works well).
81 LIBC_INLINE_VAR constexpr uint64_t MULTIPLE = 6364136223846793005;
82 // Rotation amount for mixing.
83 LIBC_INLINE_VAR constexpr uint64_t ROTATE = 23;
85 // Randomly generated values. For now, we use the same values as in aHash as
86 // they are widely tested.
87 // https://github.com/tkaitchuck/aHash/blob/9f6a2ad8b721fd28da8dc1d0b7996677b374357c/src/random_state.rs#L38
88 LIBC_INLINE_VAR constexpr uint64_t RANDOMNESS[2][4] = {
89 {0x243f6a8885a308d3, 0x13198a2e03707344, 0xa4093822299f31d0,
90 0x082efa98ec4e6c89},
91 {0x452821e638d01377, 0xbe5466cf34e90c6c, 0xc0ac29b7c97c50dd,
92 0x3f84d5b5b5470917},
95 // This is a portable string hasher. It is not cryptographically secure.
96 // The quality of the hash is good enough to pass all tests in SMHasher.
97 // The implementation is derived from the generic routine of aHash.
98 class HashState {
99 uint64_t buffer;
100 uint64_t pad;
101 uint64_t extra_keys[2];
102 LIBC_INLINE void update(uint64_t low, uint64_t high) {
103 uint64_t combined =
104 folded_multiply(low ^ extra_keys[0], high ^ extra_keys[1]);
105 buffer = (buffer + pad) ^ combined;
106 buffer = cpp::rotl(buffer, ROTATE);
108 LIBC_INLINE static uint64_t mix(uint64_t seed) {
109 HashState mixer{RANDOMNESS[0][0], RANDOMNESS[0][1], RANDOMNESS[0][2],
110 RANDOMNESS[0][3]};
111 mixer.update(seed, 0);
112 return mixer.finish();
115 public:
116 LIBC_INLINE constexpr HashState(uint64_t a, uint64_t b, uint64_t c,
117 uint64_t d)
118 : buffer(a), pad(b), extra_keys{c, d} {}
119 LIBC_INLINE HashState(uint64_t seed) {
120 // Mix one more round of the seed to make it stronger.
121 uint64_t mixed = mix(seed);
122 buffer = RANDOMNESS[1][0] ^ mixed;
123 pad = RANDOMNESS[1][1] ^ mixed;
124 extra_keys[0] = RANDOMNESS[1][2] ^ mixed;
125 extra_keys[1] = RANDOMNESS[1][3] ^ mixed;
127 LIBC_INLINE void update(const void *ptr, size_t size) {
128 uint8_t const *bytes = static_cast<const uint8_t *>(ptr);
129 buffer = (buffer + size) * MULTIPLE;
130 uint64_t low, high;
131 if (size > 8) {
132 if (size > 16) {
133 // update tail
134 low = read_little_endian<uint64_t>(&bytes[size - 16]);
135 high = read_little_endian<uint64_t>(&bytes[size - 8]);
136 update(low, high);
137 while (size > 16) {
138 low = read_little_endian<uint64_t>(&bytes[0]);
139 high = read_little_endian<uint64_t>(&bytes[8]);
140 update(low, high);
141 bytes += 16;
142 size -= 16;
144 } else {
145 low = read_little_endian<uint64_t>(&bytes[0]);
146 high = read_little_endian<uint64_t>(&bytes[size - 8]);
147 update(low, high);
149 } else {
150 read_small_values(ptr, size, low, high);
151 update(low, high);
154 LIBC_INLINE uint64_t finish() {
155 int rot = buffer & 63;
156 uint64_t folded = folded_multiply(buffer, pad);
157 return cpp::rotl(folded, rot);
161 } // namespace internal
162 } // namespace LIBC_NAMESPACE_DECL
164 #endif // LLVM_LIBC_SRC___SUPPORT_HASH_H