[AMDGPU] Test codegen'ing True16 additions.
[llvm-project.git] / llvm / lib / Support / xxhash.cpp
blob577f14189caff7d74377f7b28d8332deef4c62c4
1 /*
2 * xxHash - Fast Hash algorithm
3 * Copyright (C) 2012-2021, Yann Collet
5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met:
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
16 * distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * You can contact the author at :
31 * - xxHash homepage: http://www.xxhash.com
32 * - xxHash source repository : https://github.com/Cyan4973/xxHash
35 // xxhash64 is based on commit d2df04efcbef7d7f6886d345861e5dfda4edacc1. Removed
36 // everything but a simple interface for computing xxh64.
38 // xxh3_64bits is based on commit d5891596637d21366b9b1dcf2c0007a3edb26a9e (July
39 // 2023).
41 #include "llvm/Support/xxhash.h"
42 #include "llvm/Support/Compiler.h"
43 #include "llvm/Support/Endian.h"
45 #include <stdlib.h>
47 using namespace llvm;
48 using namespace support;
50 static uint64_t rotl64(uint64_t X, size_t R) {
51 return (X << R) | (X >> (64 - R));
54 constexpr uint32_t PRIME32_1 = 0x9E3779B1;
55 constexpr uint32_t PRIME32_2 = 0x85EBCA77;
56 constexpr uint32_t PRIME32_3 = 0xC2B2AE3D;
58 static const uint64_t PRIME64_1 = 11400714785074694791ULL;
59 static const uint64_t PRIME64_2 = 14029467366897019727ULL;
60 static const uint64_t PRIME64_3 = 1609587929392839161ULL;
61 static const uint64_t PRIME64_4 = 9650029242287828579ULL;
62 static const uint64_t PRIME64_5 = 2870177450012600261ULL;
64 static uint64_t round(uint64_t Acc, uint64_t Input) {
65 Acc += Input * PRIME64_2;
66 Acc = rotl64(Acc, 31);
67 Acc *= PRIME64_1;
68 return Acc;
71 static uint64_t mergeRound(uint64_t Acc, uint64_t Val) {
72 Val = round(0, Val);
73 Acc ^= Val;
74 Acc = Acc * PRIME64_1 + PRIME64_4;
75 return Acc;
78 static uint64_t XXH64_avalanche(uint64_t hash) {
79 hash ^= hash >> 33;
80 hash *= PRIME64_2;
81 hash ^= hash >> 29;
82 hash *= PRIME64_3;
83 hash ^= hash >> 32;
84 return hash;
87 uint64_t llvm::xxHash64(StringRef Data) {
88 size_t Len = Data.size();
89 uint64_t Seed = 0;
90 const unsigned char *P = Data.bytes_begin();
91 const unsigned char *const BEnd = Data.bytes_end();
92 uint64_t H64;
94 if (Len >= 32) {
95 const unsigned char *const Limit = BEnd - 32;
96 uint64_t V1 = Seed + PRIME64_1 + PRIME64_2;
97 uint64_t V2 = Seed + PRIME64_2;
98 uint64_t V3 = Seed + 0;
99 uint64_t V4 = Seed - PRIME64_1;
101 do {
102 V1 = round(V1, endian::read64le(P));
103 P += 8;
104 V2 = round(V2, endian::read64le(P));
105 P += 8;
106 V3 = round(V3, endian::read64le(P));
107 P += 8;
108 V4 = round(V4, endian::read64le(P));
109 P += 8;
110 } while (P <= Limit);
112 H64 = rotl64(V1, 1) + rotl64(V2, 7) + rotl64(V3, 12) + rotl64(V4, 18);
113 H64 = mergeRound(H64, V1);
114 H64 = mergeRound(H64, V2);
115 H64 = mergeRound(H64, V3);
116 H64 = mergeRound(H64, V4);
118 } else {
119 H64 = Seed + PRIME64_5;
122 H64 += (uint64_t)Len;
124 while (reinterpret_cast<uintptr_t>(P) + 8 <=
125 reinterpret_cast<uintptr_t>(BEnd)) {
126 uint64_t const K1 = round(0, endian::read64le(P));
127 H64 ^= K1;
128 H64 = rotl64(H64, 27) * PRIME64_1 + PRIME64_4;
129 P += 8;
132 if (reinterpret_cast<uintptr_t>(P) + 4 <= reinterpret_cast<uintptr_t>(BEnd)) {
133 H64 ^= (uint64_t)(endian::read32le(P)) * PRIME64_1;
134 H64 = rotl64(H64, 23) * PRIME64_2 + PRIME64_3;
135 P += 4;
138 while (P < BEnd) {
139 H64 ^= (*P) * PRIME64_5;
140 H64 = rotl64(H64, 11) * PRIME64_1;
141 P++;
144 return XXH64_avalanche(H64);
147 uint64_t llvm::xxHash64(ArrayRef<uint8_t> Data) {
148 return xxHash64({(const char *)Data.data(), Data.size()});
151 constexpr size_t XXH3_SECRETSIZE_MIN = 136;
152 constexpr size_t XXH_SECRET_DEFAULT_SIZE = 192;
154 /* Pseudorandom data taken directly from FARSH */
155 // clang-format off
156 constexpr uint8_t kSecret[XXH_SECRET_DEFAULT_SIZE] = {
157 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
158 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
159 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
160 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
161 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
162 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
163 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
164 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
165 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
166 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
167 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
168 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
170 // clang-format on
172 constexpr uint64_t PRIME_MX1 = 0x165667919E3779F9;
173 constexpr uint64_t PRIME_MX2 = 0x9FB21C651E98DF25;
175 // Calculates a 64-bit to 128-bit multiply, then XOR folds it.
176 static uint64_t XXH3_mul128_fold64(uint64_t lhs, uint64_t rhs) {
177 #if defined(__SIZEOF_INT128__) || \
178 (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
179 __uint128_t product = (__uint128_t)lhs * (__uint128_t)rhs;
180 return uint64_t(product) ^ uint64_t(product >> 64);
182 #else
183 /* First calculate all of the cross products. */
184 const uint64_t lo_lo = (lhs & 0xFFFFFFFF) * (rhs & 0xFFFFFFFF);
185 const uint64_t hi_lo = (lhs >> 32) * (rhs & 0xFFFFFFFF);
186 const uint64_t lo_hi = (lhs & 0xFFFFFFFF) * (rhs >> 32);
187 const uint64_t hi_hi = (lhs >> 32) * (rhs >> 32);
189 /* Now add the products together. These will never overflow. */
190 const uint64_t cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi;
191 const uint64_t upper = (hi_lo >> 32) + (cross >> 32) + hi_hi;
192 const uint64_t lower = (cross << 32) | (lo_lo & 0xFFFFFFFF);
194 return upper ^ lower;
195 #endif
198 constexpr size_t XXH_STRIPE_LEN = 64;
199 constexpr size_t XXH_SECRET_CONSUME_RATE = 8;
200 constexpr size_t XXH_ACC_NB = XXH_STRIPE_LEN / sizeof(uint64_t);
202 static uint64_t XXH3_avalanche(uint64_t hash) {
203 hash ^= hash >> 37;
204 hash *= PRIME_MX1;
205 hash ^= hash >> 32;
206 return hash;
209 static uint64_t XXH3_len_1to3_64b(const uint8_t *input, size_t len,
210 const uint8_t *secret, uint64_t seed) {
211 const uint8_t c1 = input[0];
212 const uint8_t c2 = input[len >> 1];
213 const uint8_t c3 = input[len - 1];
214 uint32_t combined = ((uint32_t)c1 << 16) | ((uint32_t)c2 << 24) |
215 ((uint32_t)c3 << 0) | ((uint32_t)len << 8);
216 uint64_t bitflip =
217 (uint64_t)(endian::read32le(secret) ^ endian::read32le(secret + 4)) +
218 seed;
219 return XXH64_avalanche(uint64_t(combined) ^ bitflip);
222 static uint64_t XXH3_len_4to8_64b(const uint8_t *input, size_t len,
223 const uint8_t *secret, uint64_t seed) {
224 seed ^= (uint64_t)byteswap(uint32_t(seed)) << 32;
225 const uint32_t input1 = endian::read32le(input);
226 const uint32_t input2 = endian::read32le(input + len - 4);
227 uint64_t acc =
228 (endian::read64le(secret + 8) ^ endian::read64le(secret + 16)) - seed;
229 const uint64_t input64 = (uint64_t)input2 | ((uint64_t)input1 << 32);
230 acc ^= input64;
231 // XXH3_rrmxmx(acc, len)
232 acc ^= rotl64(acc, 49) ^ rotl64(acc, 24);
233 acc *= PRIME_MX2;
234 acc ^= (acc >> 35) + (uint64_t)len;
235 acc *= PRIME_MX2;
236 return acc ^ (acc >> 28);
239 static uint64_t XXH3_len_9to16_64b(const uint8_t *input, size_t len,
240 const uint8_t *secret, uint64_t const seed) {
241 uint64_t input_lo =
242 (endian::read64le(secret + 24) ^ endian::read64le(secret + 32)) + seed;
243 uint64_t input_hi =
244 (endian::read64le(secret + 40) ^ endian::read64le(secret + 48)) - seed;
245 input_lo ^= endian::read64le(input);
246 input_hi ^= endian::read64le(input + len - 8);
247 uint64_t acc = uint64_t(len) + byteswap(input_lo) + input_hi +
248 XXH3_mul128_fold64(input_lo, input_hi);
249 return XXH3_avalanche(acc);
252 LLVM_ATTRIBUTE_ALWAYS_INLINE
253 static uint64_t XXH3_len_0to16_64b(const uint8_t *input, size_t len,
254 const uint8_t *secret, uint64_t const seed) {
255 if (LLVM_LIKELY(len > 8))
256 return XXH3_len_9to16_64b(input, len, secret, seed);
257 if (LLVM_LIKELY(len >= 4))
258 return XXH3_len_4to8_64b(input, len, secret, seed);
259 if (len != 0)
260 return XXH3_len_1to3_64b(input, len, secret, seed);
261 return XXH64_avalanche(seed ^ endian::read64le(secret + 56) ^
262 endian::read64le(secret + 64));
265 static uint64_t XXH3_mix16B(const uint8_t *input, uint8_t const *secret,
266 uint64_t seed) {
267 uint64_t lhs = seed;
268 uint64_t rhs = 0U - seed;
269 lhs += endian::read64le(secret);
270 rhs += endian::read64le(secret + 8);
271 lhs ^= endian::read64le(input);
272 rhs ^= endian::read64le(input + 8);
273 return XXH3_mul128_fold64(lhs, rhs);
276 /* For mid range keys, XXH3 uses a Mum-hash variant. */
277 LLVM_ATTRIBUTE_ALWAYS_INLINE
278 static uint64_t XXH3_len_17to128_64b(const uint8_t *input, size_t len,
279 const uint8_t *secret,
280 uint64_t const seed) {
281 uint64_t acc = len * PRIME64_1, acc_end;
282 acc += XXH3_mix16B(input + 0, secret + 0, seed);
283 acc_end = XXH3_mix16B(input + len - 16, secret + 16, seed);
284 if (len > 32) {
285 acc += XXH3_mix16B(input + 16, secret + 32, seed);
286 acc_end += XXH3_mix16B(input + len - 32, secret + 48, seed);
287 if (len > 64) {
288 acc += XXH3_mix16B(input + 32, secret + 64, seed);
289 acc_end += XXH3_mix16B(input + len - 48, secret + 80, seed);
290 if (len > 96) {
291 acc += XXH3_mix16B(input + 48, secret + 96, seed);
292 acc_end += XXH3_mix16B(input + len - 64, secret + 112, seed);
296 return XXH3_avalanche(acc + acc_end);
299 constexpr size_t XXH3_MIDSIZE_MAX = 240;
301 LLVM_ATTRIBUTE_NOINLINE
302 static uint64_t XXH3_len_129to240_64b(const uint8_t *input, size_t len,
303 const uint8_t *secret, uint64_t seed) {
304 constexpr size_t XXH3_MIDSIZE_STARTOFFSET = 3;
305 constexpr size_t XXH3_MIDSIZE_LASTOFFSET = 17;
306 uint64_t acc = (uint64_t)len * PRIME64_1;
307 const unsigned nbRounds = len / 16;
308 for (unsigned i = 0; i < 8; ++i)
309 acc += XXH3_mix16B(input + 16 * i, secret + 16 * i, seed);
310 acc = XXH3_avalanche(acc);
312 for (unsigned i = 8; i < nbRounds; ++i) {
313 acc += XXH3_mix16B(input + 16 * i,
314 secret + 16 * (i - 8) + XXH3_MIDSIZE_STARTOFFSET, seed);
316 /* last bytes */
317 acc +=
318 XXH3_mix16B(input + len - 16,
319 secret + XXH3_SECRETSIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed);
320 return XXH3_avalanche(acc);
323 LLVM_ATTRIBUTE_ALWAYS_INLINE
324 static void XXH3_accumulate_512_scalar(uint64_t *acc, const uint8_t *input,
325 const uint8_t *secret) {
326 for (size_t i = 0; i < XXH_ACC_NB; ++i) {
327 uint64_t data_val = endian::read64le(input + 8 * i);
328 uint64_t data_key = data_val ^ endian::read64le(secret + 8 * i);
329 acc[i ^ 1] += data_val;
330 acc[i] += uint32_t(data_key) * (data_key >> 32);
334 LLVM_ATTRIBUTE_ALWAYS_INLINE
335 static void XXH3_accumulate_scalar(uint64_t *acc, const uint8_t *input,
336 const uint8_t *secret, size_t nbStripes) {
337 for (size_t n = 0; n < nbStripes; ++n)
338 XXH3_accumulate_512_scalar(acc, input + n * XXH_STRIPE_LEN,
339 secret + n * XXH_SECRET_CONSUME_RATE);
342 static void XXH3_scrambleAcc(uint64_t *acc, const uint8_t *secret) {
343 for (size_t i = 0; i < XXH_ACC_NB; ++i) {
344 acc[i] ^= acc[i] >> 47;
345 acc[i] ^= endian::read64le(secret + 8 * i);
346 acc[i] *= PRIME32_1;
350 static uint64_t XXH3_mix2Accs(const uint64_t *acc, const uint8_t *secret) {
351 return XXH3_mul128_fold64(acc[0] ^ endian::read64le(secret),
352 acc[1] ^ endian::read64le(secret + 8));
355 static uint64_t XXH3_mergeAccs(const uint64_t *acc, const uint8_t *key,
356 uint64_t start) {
357 uint64_t result64 = start;
358 for (size_t i = 0; i < 4; ++i)
359 result64 += XXH3_mix2Accs(acc + 2 * i, key + 16 * i);
360 return XXH3_avalanche(result64);
363 LLVM_ATTRIBUTE_NOINLINE
364 static uint64_t XXH3_hashLong_64b(const uint8_t *input, size_t len,
365 const uint8_t *secret, size_t secretSize) {
366 const size_t nbStripesPerBlock =
367 (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE;
368 const size_t block_len = XXH_STRIPE_LEN * nbStripesPerBlock;
369 const size_t nb_blocks = (len - 1) / block_len;
370 alignas(16) uint64_t acc[XXH_ACC_NB] = {
371 PRIME32_3, PRIME64_1, PRIME64_2, PRIME64_3,
372 PRIME64_4, PRIME32_2, PRIME64_5, PRIME32_1,
374 for (size_t n = 0; n < nb_blocks; ++n) {
375 XXH3_accumulate_scalar(acc, input + n * block_len, secret,
376 nbStripesPerBlock);
377 XXH3_scrambleAcc(acc, secret + secretSize - XXH_STRIPE_LEN);
380 /* last partial block */
381 const size_t nbStripes = (len - 1 - (block_len * nb_blocks)) / XXH_STRIPE_LEN;
382 assert(nbStripes <= secretSize / XXH_SECRET_CONSUME_RATE);
383 XXH3_accumulate_scalar(acc, input + nb_blocks * block_len, secret, nbStripes);
385 /* last stripe */
386 constexpr size_t XXH_SECRET_LASTACC_START = 7;
387 XXH3_accumulate_512_scalar(acc, input + len - XXH_STRIPE_LEN,
388 secret + secretSize - XXH_STRIPE_LEN -
389 XXH_SECRET_LASTACC_START);
391 /* converge into final hash */
392 constexpr size_t XXH_SECRET_MERGEACCS_START = 11;
393 return XXH3_mergeAccs(acc, secret + XXH_SECRET_MERGEACCS_START,
394 (uint64_t)len * PRIME64_1);
397 uint64_t llvm::xxh3_64bits(ArrayRef<uint8_t> data) {
398 auto *in = data.data();
399 size_t len = data.size();
400 if (len <= 16)
401 return XXH3_len_0to16_64b(in, len, kSecret, 0);
402 if (len <= 128)
403 return XXH3_len_17to128_64b(in, len, kSecret, 0);
404 if (len <= XXH3_MIDSIZE_MAX)
405 return XXH3_len_129to240_64b(in, len, kSecret, 0);
406 return XXH3_hashLong_64b(in, len, kSecret, sizeof(kSecret));