1 //===-- Portable string hash function ---------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_LIBC_SRC___SUPPORT_HASH_H
10 #define LLVM_LIBC_SRC___SUPPORT_HASH_H
12 #include "src/__support/CPP/bit.h" // rotl
13 #include "src/__support/CPP/limits.h" // numeric_limits
14 #include "src/__support/macros/attributes.h" // LIBC_INLINE
15 #include "src/__support/macros/config.h"
16 #include "src/__support/uint128.h" // UInt128
17 #include <stdint.h> // For uint64_t
19 namespace LIBC_NAMESPACE_DECL
{
22 // Folded multiplication.
23 // This function multiplies two 64-bit integers and xor the high and
24 // low 64-bit parts of the result.
25 LIBC_INLINE
uint64_t folded_multiply(uint64_t x
, uint64_t y
) {
26 UInt128 p
= static_cast<UInt128
>(x
) * static_cast<UInt128
>(y
);
27 uint64_t low
= static_cast<uint64_t>(p
);
28 uint64_t high
= static_cast<uint64_t>(p
>> 64);
32 // Read as little endian.
33 // Shift-and-or implementation does not give a satisfactory code on aarch64.
34 // Therefore, we use a union to read the value.
35 template <typename T
> LIBC_INLINE T
read_little_endian(const void *ptr
) {
36 const uint8_t *bytes
= static_cast<const uint8_t *>(ptr
);
37 uint8_t buffer
[sizeof(T
)];
38 #if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
39 // Compiler should able to optimize this as a load followed by a byte
40 // swap. On aarch64 (-mbig-endian), this compiles to the following for
45 for (size_t i
= 0; i
< sizeof(T
); ++i
) {
46 buffer
[i
] = bytes
[sizeof(T
) - i
- 1];
49 for (size_t i
= 0; i
< sizeof(T
); ++i
) {
53 return cpp::bit_cast
<T
>(buffer
);
56 // Specialized read functions for small values. size must be <= 8.
57 LIBC_INLINE
void read_small_values(const void *ptr
, size_t size
, uint64_t &low
,
59 const uint8_t *bytes
= static_cast<const uint8_t *>(ptr
);
62 low
= static_cast<uint64_t>(read_little_endian
<uint32_t>(&bytes
[0]));
64 static_cast<uint64_t>(read_little_endian
<uint32_t>(&bytes
[size
- 4]));
66 low
= static_cast<uint64_t>(read_little_endian
<uint16_t>(&bytes
[0]));
67 high
= static_cast<uint64_t>(bytes
[size
- 1]);
71 low
= static_cast<uint64_t>(bytes
[0]);
72 high
= static_cast<uint64_t>(bytes
[0]);
80 // This constant comes from Kunth's prng (it empirically works well).
81 LIBC_INLINE_VAR
constexpr uint64_t MULTIPLE
= 6364136223846793005;
82 // Rotation amount for mixing.
83 LIBC_INLINE_VAR
constexpr uint64_t ROTATE
= 23;
85 // Randomly generated values. For now, we use the same values as in aHash as
86 // they are widely tested.
87 // https://github.com/tkaitchuck/aHash/blob/9f6a2ad8b721fd28da8dc1d0b7996677b374357c/src/random_state.rs#L38
88 LIBC_INLINE_VAR
constexpr uint64_t RANDOMNESS
[2][4] = {
89 {0x243f6a8885a308d3, 0x13198a2e03707344, 0xa4093822299f31d0,
91 {0x452821e638d01377, 0xbe5466cf34e90c6c, 0xc0ac29b7c97c50dd,
95 // This is a portable string hasher. It is not cryptographically secure.
96 // The quality of the hash is good enough to pass all tests in SMHasher.
97 // The implementation is derived from the generic routine of aHash.
101 uint64_t extra_keys
[2];
102 LIBC_INLINE
void update(uint64_t low
, uint64_t high
) {
104 folded_multiply(low
^ extra_keys
[0], high
^ extra_keys
[1]);
105 buffer
= (buffer
+ pad
) ^ combined
;
106 buffer
= cpp::rotl(buffer
, ROTATE
);
108 LIBC_INLINE
static uint64_t mix(uint64_t seed
) {
109 HashState mixer
{RANDOMNESS
[0][0], RANDOMNESS
[0][1], RANDOMNESS
[0][2],
111 mixer
.update(seed
, 0);
112 return mixer
.finish();
116 LIBC_INLINE
constexpr HashState(uint64_t a
, uint64_t b
, uint64_t c
,
118 : buffer(a
), pad(b
), extra_keys
{c
, d
} {}
119 LIBC_INLINE
HashState(uint64_t seed
) {
120 // Mix one more round of the seed to make it stronger.
121 uint64_t mixed
= mix(seed
);
122 buffer
= RANDOMNESS
[1][0] ^ mixed
;
123 pad
= RANDOMNESS
[1][1] ^ mixed
;
124 extra_keys
[0] = RANDOMNESS
[1][2] ^ mixed
;
125 extra_keys
[1] = RANDOMNESS
[1][3] ^ mixed
;
127 LIBC_INLINE
void update(const void *ptr
, size_t size
) {
128 uint8_t const *bytes
= static_cast<const uint8_t *>(ptr
);
129 buffer
= (buffer
+ size
) * MULTIPLE
;
134 low
= read_little_endian
<uint64_t>(&bytes
[size
- 16]);
135 high
= read_little_endian
<uint64_t>(&bytes
[size
- 8]);
138 low
= read_little_endian
<uint64_t>(&bytes
[0]);
139 high
= read_little_endian
<uint64_t>(&bytes
[8]);
145 low
= read_little_endian
<uint64_t>(&bytes
[0]);
146 high
= read_little_endian
<uint64_t>(&bytes
[size
- 8]);
150 read_small_values(ptr
, size
, low
, high
);
154 LIBC_INLINE
uint64_t finish() {
155 int rot
= buffer
& 63;
156 uint64_t folded
= folded_multiply(buffer
, pad
);
157 return cpp::rotl(folded
, rot
);
161 } // namespace internal
162 } // namespace LIBC_NAMESPACE_DECL
164 #endif // LLVM_LIBC_SRC___SUPPORT_HASH_H