2 * xxHash - Extremely Fast Hash algorithm
3 * Copyright (C) 2012-2023, Yann Collet
5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * You can contact the author at :
31 * - xxHash homepage: http://www.xxhash.com
32 * - xxHash source repository : https://github.com/Cyan4973/xxHash
35 // xxhash64 is based on commit d2df04efcbef7d7f6886d345861e5dfda4edacc1. Removed
36 // everything but a simple interface for computing xxh64.
38 // xxh3_64bits is based on commit d5891596637d21366b9b1dcf2c0007a3edb26a9e (July
41 // xxh3_128bits is based on commit b0adcc54188c3130b1793e7b19c62eb1e669f7df
44 #include "llvm/Support/xxhash.h"
45 #include "llvm/Support/Compiler.h"
46 #include "llvm/Support/Endian.h"
50 #if !defined(LLVM_XXH_USE_NEON)
51 #if (defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)) && \
52 !defined(__ARM_BIG_ENDIAN)
53 #define LLVM_XXH_USE_NEON 1
55 #define LLVM_XXH_USE_NEON 0
64 using namespace support
;
66 static uint64_t rotl64(uint64_t X
, size_t R
) {
67 return (X
<< R
) | (X
>> (64 - R
));
70 constexpr uint32_t PRIME32_1
= 0x9E3779B1;
71 constexpr uint32_t PRIME32_2
= 0x85EBCA77;
72 constexpr uint32_t PRIME32_3
= 0xC2B2AE3D;
74 static const uint64_t PRIME64_1
= 11400714785074694791ULL;
75 static const uint64_t PRIME64_2
= 14029467366897019727ULL;
76 static const uint64_t PRIME64_3
= 1609587929392839161ULL;
77 static const uint64_t PRIME64_4
= 9650029242287828579ULL;
78 static const uint64_t PRIME64_5
= 2870177450012600261ULL;
80 static uint64_t round(uint64_t Acc
, uint64_t Input
) {
81 Acc
+= Input
* PRIME64_2
;
82 Acc
= rotl64(Acc
, 31);
87 static uint64_t mergeRound(uint64_t Acc
, uint64_t Val
) {
90 Acc
= Acc
* PRIME64_1
+ PRIME64_4
;
94 static uint64_t XXH64_avalanche(uint64_t hash
) {
103 uint64_t llvm::xxHash64(StringRef Data
) {
104 size_t Len
= Data
.size();
106 const unsigned char *P
= Data
.bytes_begin();
107 const unsigned char *const BEnd
= Data
.bytes_end();
111 const unsigned char *const Limit
= BEnd
- 32;
112 uint64_t V1
= Seed
+ PRIME64_1
+ PRIME64_2
;
113 uint64_t V2
= Seed
+ PRIME64_2
;
114 uint64_t V3
= Seed
+ 0;
115 uint64_t V4
= Seed
- PRIME64_1
;
118 V1
= round(V1
, endian::read64le(P
));
120 V2
= round(V2
, endian::read64le(P
));
122 V3
= round(V3
, endian::read64le(P
));
124 V4
= round(V4
, endian::read64le(P
));
126 } while (P
<= Limit
);
128 H64
= rotl64(V1
, 1) + rotl64(V2
, 7) + rotl64(V3
, 12) + rotl64(V4
, 18);
129 H64
= mergeRound(H64
, V1
);
130 H64
= mergeRound(H64
, V2
);
131 H64
= mergeRound(H64
, V3
);
132 H64
= mergeRound(H64
, V4
);
135 H64
= Seed
+ PRIME64_5
;
138 H64
+= (uint64_t)Len
;
140 while (reinterpret_cast<uintptr_t>(P
) + 8 <=
141 reinterpret_cast<uintptr_t>(BEnd
)) {
142 uint64_t const K1
= round(0, endian::read64le(P
));
144 H64
= rotl64(H64
, 27) * PRIME64_1
+ PRIME64_4
;
148 if (reinterpret_cast<uintptr_t>(P
) + 4 <= reinterpret_cast<uintptr_t>(BEnd
)) {
149 H64
^= (uint64_t)(endian::read32le(P
)) * PRIME64_1
;
150 H64
= rotl64(H64
, 23) * PRIME64_2
+ PRIME64_3
;
155 H64
^= (*P
) * PRIME64_5
;
156 H64
= rotl64(H64
, 11) * PRIME64_1
;
160 return XXH64_avalanche(H64
);
163 uint64_t llvm::xxHash64(ArrayRef
<uint8_t> Data
) {
164 return xxHash64({(const char *)Data
.data(), Data
.size()});
167 constexpr size_t XXH3_SECRETSIZE_MIN
= 136;
168 constexpr size_t XXH_SECRET_DEFAULT_SIZE
= 192;
170 /* Pseudorandom data taken directly from FARSH */
172 constexpr uint8_t kSecret
[XXH_SECRET_DEFAULT_SIZE
] = {
173 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
174 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
175 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
176 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
177 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
178 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
179 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
180 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
181 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
182 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
183 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
184 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
188 constexpr uint64_t PRIME_MX1
= 0x165667919E3779F9;
189 constexpr uint64_t PRIME_MX2
= 0x9FB21C651E98DF25;
191 // Calculates a 64-bit to 128-bit multiply, then XOR folds it.
192 static uint64_t XXH3_mul128_fold64(uint64_t lhs
, uint64_t rhs
) {
193 #if defined(__SIZEOF_INT128__) || \
194 (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
195 __uint128_t product
= (__uint128_t
)lhs
* (__uint128_t
)rhs
;
196 return uint64_t(product
) ^ uint64_t(product
>> 64);
199 /* First calculate all of the cross products. */
200 const uint64_t lo_lo
= (lhs
& 0xFFFFFFFF) * (rhs
& 0xFFFFFFFF);
201 const uint64_t hi_lo
= (lhs
>> 32) * (rhs
& 0xFFFFFFFF);
202 const uint64_t lo_hi
= (lhs
& 0xFFFFFFFF) * (rhs
>> 32);
203 const uint64_t hi_hi
= (lhs
>> 32) * (rhs
>> 32);
205 /* Now add the products together. These will never overflow. */
206 const uint64_t cross
= (lo_lo
>> 32) + (hi_lo
& 0xFFFFFFFF) + lo_hi
;
207 const uint64_t upper
= (hi_lo
>> 32) + (cross
>> 32) + hi_hi
;
208 const uint64_t lower
= (cross
<< 32) | (lo_lo
& 0xFFFFFFFF);
210 return upper
^ lower
;
214 constexpr size_t XXH_STRIPE_LEN
= 64;
215 constexpr size_t XXH_SECRET_CONSUME_RATE
= 8;
216 constexpr size_t XXH_ACC_NB
= XXH_STRIPE_LEN
/ sizeof(uint64_t);
218 static uint64_t XXH3_avalanche(uint64_t hash
) {
225 static uint64_t XXH3_len_1to3_64b(const uint8_t *input
, size_t len
,
226 const uint8_t *secret
, uint64_t seed
) {
227 const uint8_t c1
= input
[0];
228 const uint8_t c2
= input
[len
>> 1];
229 const uint8_t c3
= input
[len
- 1];
230 uint32_t combined
= ((uint32_t)c1
<< 16) | ((uint32_t)c2
<< 24) |
231 ((uint32_t)c3
<< 0) | ((uint32_t)len
<< 8);
233 (uint64_t)(endian::read32le(secret
) ^ endian::read32le(secret
+ 4)) +
235 return XXH64_avalanche(uint64_t(combined
) ^ bitflip
);
238 static uint64_t XXH3_len_4to8_64b(const uint8_t *input
, size_t len
,
239 const uint8_t *secret
, uint64_t seed
) {
240 seed
^= (uint64_t)byteswap(uint32_t(seed
)) << 32;
241 const uint32_t input1
= endian::read32le(input
);
242 const uint32_t input2
= endian::read32le(input
+ len
- 4);
244 (endian::read64le(secret
+ 8) ^ endian::read64le(secret
+ 16)) - seed
;
245 const uint64_t input64
= (uint64_t)input2
| ((uint64_t)input1
<< 32);
247 // XXH3_rrmxmx(acc, len)
248 acc
^= rotl64(acc
, 49) ^ rotl64(acc
, 24);
250 acc
^= (acc
>> 35) + (uint64_t)len
;
252 return acc
^ (acc
>> 28);
255 static uint64_t XXH3_len_9to16_64b(const uint8_t *input
, size_t len
,
256 const uint8_t *secret
, uint64_t const seed
) {
258 (endian::read64le(secret
+ 24) ^ endian::read64le(secret
+ 32)) + seed
;
260 (endian::read64le(secret
+ 40) ^ endian::read64le(secret
+ 48)) - seed
;
261 input_lo
^= endian::read64le(input
);
262 input_hi
^= endian::read64le(input
+ len
- 8);
263 uint64_t acc
= uint64_t(len
) + byteswap(input_lo
) + input_hi
+
264 XXH3_mul128_fold64(input_lo
, input_hi
);
265 return XXH3_avalanche(acc
);
268 LLVM_ATTRIBUTE_ALWAYS_INLINE
269 static uint64_t XXH3_len_0to16_64b(const uint8_t *input
, size_t len
,
270 const uint8_t *secret
, uint64_t const seed
) {
271 if (LLVM_LIKELY(len
> 8))
272 return XXH3_len_9to16_64b(input
, len
, secret
, seed
);
273 if (LLVM_LIKELY(len
>= 4))
274 return XXH3_len_4to8_64b(input
, len
, secret
, seed
);
276 return XXH3_len_1to3_64b(input
, len
, secret
, seed
);
277 return XXH64_avalanche(seed
^ endian::read64le(secret
+ 56) ^
278 endian::read64le(secret
+ 64));
281 static uint64_t XXH3_mix16B(const uint8_t *input
, uint8_t const *secret
,
284 uint64_t rhs
= 0U - seed
;
285 lhs
+= endian::read64le(secret
);
286 rhs
+= endian::read64le(secret
+ 8);
287 lhs
^= endian::read64le(input
);
288 rhs
^= endian::read64le(input
+ 8);
289 return XXH3_mul128_fold64(lhs
, rhs
);
292 /* For mid range keys, XXH3 uses a Mum-hash variant. */
293 LLVM_ATTRIBUTE_ALWAYS_INLINE
294 static uint64_t XXH3_len_17to128_64b(const uint8_t *input
, size_t len
,
295 const uint8_t *secret
,
296 uint64_t const seed
) {
297 uint64_t acc
= len
* PRIME64_1
, acc_end
;
298 acc
+= XXH3_mix16B(input
+ 0, secret
+ 0, seed
);
299 acc_end
= XXH3_mix16B(input
+ len
- 16, secret
+ 16, seed
);
301 acc
+= XXH3_mix16B(input
+ 16, secret
+ 32, seed
);
302 acc_end
+= XXH3_mix16B(input
+ len
- 32, secret
+ 48, seed
);
304 acc
+= XXH3_mix16B(input
+ 32, secret
+ 64, seed
);
305 acc_end
+= XXH3_mix16B(input
+ len
- 48, secret
+ 80, seed
);
307 acc
+= XXH3_mix16B(input
+ 48, secret
+ 96, seed
);
308 acc_end
+= XXH3_mix16B(input
+ len
- 64, secret
+ 112, seed
);
312 return XXH3_avalanche(acc
+ acc_end
);
315 constexpr size_t XXH3_MIDSIZE_MAX
= 240;
316 constexpr size_t XXH3_MIDSIZE_STARTOFFSET
= 3;
317 constexpr size_t XXH3_MIDSIZE_LASTOFFSET
= 17;
319 LLVM_ATTRIBUTE_NOINLINE
320 static uint64_t XXH3_len_129to240_64b(const uint8_t *input
, size_t len
,
321 const uint8_t *secret
, uint64_t seed
) {
322 uint64_t acc
= (uint64_t)len
* PRIME64_1
;
323 const unsigned nbRounds
= len
/ 16;
324 for (unsigned i
= 0; i
< 8; ++i
)
325 acc
+= XXH3_mix16B(input
+ 16 * i
, secret
+ 16 * i
, seed
);
326 acc
= XXH3_avalanche(acc
);
328 for (unsigned i
= 8; i
< nbRounds
; ++i
) {
329 acc
+= XXH3_mix16B(input
+ 16 * i
,
330 secret
+ 16 * (i
- 8) + XXH3_MIDSIZE_STARTOFFSET
, seed
);
334 XXH3_mix16B(input
+ len
- 16,
335 secret
+ XXH3_SECRETSIZE_MIN
- XXH3_MIDSIZE_LASTOFFSET
, seed
);
336 return XXH3_avalanche(acc
);
339 #if LLVM_XXH_USE_NEON
341 #define XXH3_accumulate_512 XXH3_accumulate_512_neon
342 #define XXH3_scrambleAcc XXH3_scrambleAcc_neon
344 // NEON implementation based on commit a57f6cce2698049863af8c25787084ae0489d849
345 // (July 2024), with the following removed:
346 // - workaround for suboptimal codegen on older GCC
347 // - compiler barriers against instruction reordering
348 // - WebAssembly SIMD support
349 // - configurable split between NEON and scalar lanes (benchmarking shows no
350 // penalty when fully doing SIMD on the Apple M1)
352 #if defined(__GNUC__) || defined(__clang__)
353 #define XXH_ALIASING __attribute__((__may_alias__))
355 #define XXH_ALIASING /* nothing */
358 typedef uint64x2_t xxh_aliasing_uint64x2_t XXH_ALIASING
;
360 LLVM_ATTRIBUTE_ALWAYS_INLINE
static uint64x2_t
XXH_vld1q_u64(void const *ptr
) {
361 return vreinterpretq_u64_u8(vld1q_u8((uint8_t const *)ptr
));
364 LLVM_ATTRIBUTE_ALWAYS_INLINE
365 static void XXH3_accumulate_512_neon(uint64_t *acc
, const uint8_t *input
,
366 const uint8_t *secret
) {
367 xxh_aliasing_uint64x2_t
*const xacc
= (xxh_aliasing_uint64x2_t
*)acc
;
370 #pragma clang loop unroll(full)
372 for (size_t i
= 0; i
< XXH_ACC_NB
/ 2; i
+= 2) {
373 /* data_vec = input[i]; */
374 uint64x2_t data_vec_1
= XXH_vld1q_u64(input
+ (i
* 16));
375 uint64x2_t data_vec_2
= XXH_vld1q_u64(input
+ ((i
+ 1) * 16));
377 /* key_vec = secret[i]; */
378 uint64x2_t key_vec_1
= XXH_vld1q_u64(secret
+ (i
* 16));
379 uint64x2_t key_vec_2
= XXH_vld1q_u64(secret
+ ((i
+ 1) * 16));
381 /* data_swap = swap(data_vec) */
382 uint64x2_t data_swap_1
= vextq_u64(data_vec_1
, data_vec_1
, 1);
383 uint64x2_t data_swap_2
= vextq_u64(data_vec_2
, data_vec_2
, 1);
385 /* data_key = data_vec ^ key_vec; */
386 uint64x2_t data_key_1
= veorq_u64(data_vec_1
, key_vec_1
);
387 uint64x2_t data_key_2
= veorq_u64(data_vec_2
, key_vec_2
);
390 * If we reinterpret the 64x2 vectors as 32x4 vectors, we can use a
391 * de-interleave operation for 4 lanes in 1 step with `vuzpq_u32` to
392 * get one vector with the low 32 bits of each lane, and one vector
393 * with the high 32 bits of each lane.
395 * The intrinsic returns a double vector because the original ARMv7-a
396 * instruction modified both arguments in place. AArch64 and SIMD128 emit
397 * two instructions from this intrinsic.
399 * [ dk11L | dk11H | dk12L | dk12H ] -> [ dk11L | dk12L | dk21L | dk22L ]
400 * [ dk21L | dk21H | dk22L | dk22H ] -> [ dk11H | dk12H | dk21H | dk22H ]
402 uint32x4x2_t unzipped
= vuzpq_u32(vreinterpretq_u32_u64(data_key_1
),
403 vreinterpretq_u32_u64(data_key_2
));
405 /* data_key_lo = data_key & 0xFFFFFFFF */
406 uint32x4_t data_key_lo
= unzipped
.val
[0];
407 /* data_key_hi = data_key >> 32 */
408 uint32x4_t data_key_hi
= unzipped
.val
[1];
411 * Then, we can split the vectors horizontally and multiply which, as for
412 * most widening intrinsics, have a variant that works on both high half
413 * vectors for free on AArch64. A similar instruction is available on
416 * sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi
418 uint64x2_t sum_1
= vmlal_u32(data_swap_1
, vget_low_u32(data_key_lo
),
419 vget_low_u32(data_key_hi
));
420 uint64x2_t sum_2
= vmlal_u32(data_swap_2
, vget_high_u32(data_key_lo
),
421 vget_high_u32(data_key_hi
));
423 /* xacc[i] = acc_vec + sum; */
424 xacc
[i
] = vaddq_u64(xacc
[i
], sum_1
);
425 xacc
[i
+ 1] = vaddq_u64(xacc
[i
+ 1], sum_2
);
429 LLVM_ATTRIBUTE_ALWAYS_INLINE
430 static void XXH3_scrambleAcc_neon(uint64_t *acc
, const uint8_t *secret
) {
431 xxh_aliasing_uint64x2_t
*const xacc
= (xxh_aliasing_uint64x2_t
*)acc
;
433 /* { prime32_1, prime32_1 } */
434 uint32x2_t
const kPrimeLo
= vdup_n_u32(PRIME32_1
);
435 /* { 0, prime32_1, 0, prime32_1 } */
436 uint32x4_t
const kPrimeHi
=
437 vreinterpretq_u32_u64(vdupq_n_u64((uint64_t)PRIME32_1
<< 32));
439 for (size_t i
= 0; i
< XXH_ACC_NB
/ 2; ++i
) {
440 /* xacc[i] ^= (xacc[i] >> 47); */
441 uint64x2_t acc_vec
= XXH_vld1q_u64(acc
+ (2 * i
));
442 uint64x2_t shifted
= vshrq_n_u64(acc_vec
, 47);
443 uint64x2_t data_vec
= veorq_u64(acc_vec
, shifted
);
445 /* xacc[i] ^= secret[i]; */
446 uint64x2_t key_vec
= XXH_vld1q_u64(secret
+ (i
* 16));
447 uint64x2_t data_key
= veorq_u64(data_vec
, key_vec
);
450 * xacc[i] *= XXH_PRIME32_1
452 * Expanded version with portable NEON intrinsics
454 * lo(x) * lo(y) + (hi(x) * lo(y) << 32)
456 * prod_hi = hi(data_key) * lo(prime) << 32
458 * Since we only need 32 bits of this multiply a trick can be used,
459 * reinterpreting the vector as a uint32x4_t and multiplying by
460 * { 0, prime, 0, prime } to cancel out the unwanted bits and avoid the
463 uint32x4_t prod_hi
= vmulq_u32(vreinterpretq_u32_u64(data_key
), kPrimeHi
);
465 /* Extract low bits for vmlal_u32 */
466 uint32x2_t data_key_lo
= vmovn_u64(data_key
);
468 /* xacc[i] = prod_hi + lo(data_key) * XXH_PRIME32_1; */
469 xacc
[i
] = vmlal_u32(vreinterpretq_u64_u32(prod_hi
), data_key_lo
, kPrimeLo
);
474 #define XXH3_accumulate_512 XXH3_accumulate_512_scalar
475 #define XXH3_scrambleAcc XXH3_scrambleAcc_scalar
477 LLVM_ATTRIBUTE_ALWAYS_INLINE
478 static void XXH3_accumulate_512_scalar(uint64_t *acc
, const uint8_t *input
,
479 const uint8_t *secret
) {
480 for (size_t i
= 0; i
< XXH_ACC_NB
; ++i
) {
481 uint64_t data_val
= endian::read64le(input
+ 8 * i
);
482 uint64_t data_key
= data_val
^ endian::read64le(secret
+ 8 * i
);
483 acc
[i
^ 1] += data_val
;
484 acc
[i
] += uint32_t(data_key
) * (data_key
>> 32);
488 LLVM_ATTRIBUTE_ALWAYS_INLINE
489 static void XXH3_scrambleAcc_scalar(uint64_t *acc
, const uint8_t *secret
) {
490 for (size_t i
= 0; i
< XXH_ACC_NB
; ++i
) {
491 acc
[i
] ^= acc
[i
] >> 47;
492 acc
[i
] ^= endian::read64le(secret
+ 8 * i
);
498 LLVM_ATTRIBUTE_ALWAYS_INLINE
499 static void XXH3_accumulate(uint64_t *acc
, const uint8_t *input
,
500 const uint8_t *secret
, size_t nbStripes
) {
501 for (size_t n
= 0; n
< nbStripes
; ++n
) {
502 XXH3_accumulate_512(acc
, input
+ n
* XXH_STRIPE_LEN
,
503 secret
+ n
* XXH_SECRET_CONSUME_RATE
);
507 static uint64_t XXH3_mix2Accs(const uint64_t *acc
, const uint8_t *secret
) {
508 return XXH3_mul128_fold64(acc
[0] ^ endian::read64le(secret
),
509 acc
[1] ^ endian::read64le(secret
+ 8));
512 static uint64_t XXH3_mergeAccs(const uint64_t *acc
, const uint8_t *key
,
514 uint64_t result64
= start
;
515 for (size_t i
= 0; i
< 4; ++i
)
516 result64
+= XXH3_mix2Accs(acc
+ 2 * i
, key
+ 16 * i
);
517 return XXH3_avalanche(result64
);
520 LLVM_ATTRIBUTE_NOINLINE
521 static uint64_t XXH3_hashLong_64b(const uint8_t *input
, size_t len
,
522 const uint8_t *secret
, size_t secretSize
) {
523 const size_t nbStripesPerBlock
=
524 (secretSize
- XXH_STRIPE_LEN
) / XXH_SECRET_CONSUME_RATE
;
525 const size_t block_len
= XXH_STRIPE_LEN
* nbStripesPerBlock
;
526 const size_t nb_blocks
= (len
- 1) / block_len
;
527 alignas(16) uint64_t acc
[XXH_ACC_NB
] = {
528 PRIME32_3
, PRIME64_1
, PRIME64_2
, PRIME64_3
,
529 PRIME64_4
, PRIME32_2
, PRIME64_5
, PRIME32_1
,
531 for (size_t n
= 0; n
< nb_blocks
; ++n
) {
532 XXH3_accumulate(acc
, input
+ n
* block_len
, secret
, nbStripesPerBlock
);
533 XXH3_scrambleAcc(acc
, secret
+ secretSize
- XXH_STRIPE_LEN
);
536 /* last partial block */
537 const size_t nbStripes
= (len
- 1 - (block_len
* nb_blocks
)) / XXH_STRIPE_LEN
;
538 assert(nbStripes
<= secretSize
/ XXH_SECRET_CONSUME_RATE
);
539 XXH3_accumulate(acc
, input
+ nb_blocks
* block_len
, secret
, nbStripes
);
542 constexpr size_t XXH_SECRET_LASTACC_START
= 7;
543 XXH3_accumulate_512(acc
, input
+ len
- XXH_STRIPE_LEN
,
544 secret
+ secretSize
- XXH_STRIPE_LEN
-
545 XXH_SECRET_LASTACC_START
);
547 /* converge into final hash */
548 constexpr size_t XXH_SECRET_MERGEACCS_START
= 11;
549 return XXH3_mergeAccs(acc
, secret
+ XXH_SECRET_MERGEACCS_START
,
550 (uint64_t)len
* PRIME64_1
);
553 uint64_t llvm::xxh3_64bits(ArrayRef
<uint8_t> data
) {
554 auto *in
= data
.data();
555 size_t len
= data
.size();
557 return XXH3_len_0to16_64b(in
, len
, kSecret
, 0);
559 return XXH3_len_17to128_64b(in
, len
, kSecret
, 0);
560 if (len
<= XXH3_MIDSIZE_MAX
)
561 return XXH3_len_129to240_64b(in
, len
, kSecret
, 0);
562 return XXH3_hashLong_64b(in
, len
, kSecret
, sizeof(kSecret
));
565 /* ==========================================
566 * XXH3 128 bits (a.k.a XXH128)
567 * ==========================================
568 * XXH3's 128-bit variant has better mixing and strength than the 64-bit
569 * variant, even without counting the significantly larger output size.
571 * For example, extra steps are taken to avoid the seed-dependent collisions
572 * in 17-240 byte inputs (See XXH3_mix16B and XXH128_mix32B).
574 * This strength naturally comes at the cost of some speed, especially on short
575 * lengths. Note that longer hashes are about as fast as the 64-bit version
576 * due to it using only a slight modification of the 64-bit loop.
578 * XXH128 is also more oriented towards 64-bit machines. It is still extremely
579 * fast for a _128-bit_ hash on 32-bit (it usually clears XXH64).
584 * @def XXH_rotl32(x,r)
585 * @brief 32-bit rotate left.
587 * @param x The 32-bit integer to be rotated.
588 * @param r The number of bits to rotate.
590 * @p r > 0 && @p r < 32
592 * @p x and @p r may be evaluated multiple times.
593 * @return The rotated result.
595 #if __has_builtin(__builtin_rotateleft32) && \
596 __has_builtin(__builtin_rotateleft64)
597 #define XXH_rotl32 __builtin_rotateleft32
598 #define XXH_rotl64 __builtin_rotateleft64
599 /* Note: although _rotl exists for minGW (GCC under windows), performance seems
601 #elif defined(_MSC_VER)
602 #define XXH_rotl32(x, r) _rotl(x, r)
603 #define XXH_rotl64(x, r) _rotl64(x, r)
605 #define XXH_rotl32(x, r) (((x) << (r)) | ((x) >> (32 - (r))))
606 #define XXH_rotl64(x, r) (((x) << (r)) | ((x) >> (64 - (r))))
609 #define XXH_mult32to64(x, y) ((uint64_t)(uint32_t)(x) * (uint64_t)(uint32_t)(y))
612 * @brief Calculates a 64->128-bit long multiply.
614 * Uses `__uint128_t` and `_umul128` if available, otherwise uses a scalar
617 * @param lhs , rhs The 64-bit integers to be multiplied
618 * @return The 128-bit result represented in an @ref XXH128_hash_t.
620 static XXH128_hash_t
XXH_mult64to128(uint64_t lhs
, uint64_t rhs
) {
622 * GCC/Clang __uint128_t method.
624 * On most 64-bit targets, GCC and Clang define a __uint128_t type.
625 * This is usually the best way as it usually uses a native long 64-bit
626 * multiply, such as MULQ on x86_64 or MUL + UMULH on aarch64.
630 * Despite being a 32-bit platform, Clang (and emscripten) define this type
631 * despite not having the arithmetic for it. This results in a laggy
632 * compiler builtin call which calculates a full 128-bit multiply.
633 * In that case it is best to use the portable one.
634 * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677
636 #if (defined(__GNUC__) || defined(__clang__)) && !defined(__wasm__) && \
637 defined(__SIZEOF_INT128__) || \
638 (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
640 __uint128_t
const product
= (__uint128_t
)lhs
* (__uint128_t
)rhs
;
642 r128
.low64
= (uint64_t)(product
);
643 r128
.high64
= (uint64_t)(product
>> 64);
647 * MSVC for x64's _umul128 method.
649 * uint64_t _umul128(uint64_t Multiplier, uint64_t Multiplicand, uint64_t
652 * This compiles to single operand MUL on x64.
654 #elif (defined(_M_X64) || defined(_M_IA64)) && !defined(_M_ARM64EC)
657 #pragma intrinsic(_umul128)
659 uint64_t product_high
;
660 uint64_t const product_low
= _umul128(lhs
, rhs
, &product_high
);
662 r128
.low64
= product_low
;
663 r128
.high64
= product_high
;
667 * MSVC for ARM64's __umulh method.
669 * This compiles to the same MUL + UMULH as GCC/Clang's __uint128_t method.
671 #elif defined(_M_ARM64) || defined(_M_ARM64EC)
674 #pragma intrinsic(__umulh)
677 r128
.low64
= lhs
* rhs
;
678 r128
.high64
= __umulh(lhs
, rhs
);
683 * Portable scalar method. Optimized for 32-bit and 64-bit ALUs.
685 * This is a fast and simple grade school multiply, which is shown below
686 * with base 10 arithmetic instead of base 0x100000000.
689 * x 7 5 // D2 rhs = 75
691 * 1 5 // D2 lo_lo = (93 % 10) * (75 % 10) = 15
692 * 4 5 | // D2 hi_lo = (93 / 10) * (75 % 10) = 45
693 * 2 1 | // D2 lo_hi = (93 % 10) * (75 / 10) = 21
694 * + 6 3 | | // D2 hi_hi = (93 / 10) * (75 / 10) = 63
696 * 2 7 | // D2 cross = (15 / 10) + (45 % 10) + 21 = 27
697 * + 6 7 | | // D2 upper = (27 / 10) + (45 / 10) + 63 = 67
699 * 6 9 7 5 // D4 res = (27 * 10) + (15 % 10) + (67 * 100) = 6975
701 * The reasons for adding the products like this are:
702 * 1. It avoids manual carry tracking. Just like how
703 * (9 * 9) + 9 + 9 = 99, the same applies with this for UINT64_MAX.
704 * This avoids a lot of complexity.
706 * 2. It hints for, and on Clang, compiles to, the powerful UMAAL
707 * instruction available in ARM's Digital Signal Processing extension
708 * in 32-bit ARMv6 and later, which is shown below:
710 * void UMAAL(xxh_u32 *RdLo, xxh_u32 *RdHi, xxh_u32 Rn, xxh_u32 Rm)
712 * uint64_t product = (uint64_t)*RdLo * (uint64_t)*RdHi + Rn + Rm;
713 * *RdLo = (xxh_u32)(product & 0xFFFFFFFF);
714 * *RdHi = (xxh_u32)(product >> 32);
717 * This instruction was designed for efficient long multiplication, and
718 * allows this to be calculated in only 4 instructions at speeds
719 * comparable to some 64-bit ALUs.
721 * 3. It isn't terrible on other platforms. Usually this will be a couple
722 * of 32-bit ADD/ADCs.
725 /* First calculate all of the cross products. */
726 uint64_t const lo_lo
= XXH_mult32to64(lhs
& 0xFFFFFFFF, rhs
& 0xFFFFFFFF);
727 uint64_t const hi_lo
= XXH_mult32to64(lhs
>> 32, rhs
& 0xFFFFFFFF);
728 uint64_t const lo_hi
= XXH_mult32to64(lhs
& 0xFFFFFFFF, rhs
>> 32);
729 uint64_t const hi_hi
= XXH_mult32to64(lhs
>> 32, rhs
>> 32);
731 /* Now add the products together. These will never overflow. */
732 uint64_t const cross
= (lo_lo
>> 32) + (hi_lo
& 0xFFFFFFFF) + lo_hi
;
733 uint64_t const upper
= (hi_lo
>> 32) + (cross
>> 32) + hi_hi
;
734 uint64_t const lower
= (cross
<< 32) | (lo_lo
& 0xFFFFFFFF);
743 /*! Seems to produce slightly better code on GCC for some reason. */
744 LLVM_ATTRIBUTE_ALWAYS_INLINE
constexpr uint64_t XXH_xorshift64(uint64_t v64
,
746 return v64
^ (v64
>> shift
);
749 LLVM_ATTRIBUTE_ALWAYS_INLINE
static XXH128_hash_t
750 XXH3_len_1to3_128b(const uint8_t *input
, size_t len
, const uint8_t *secret
,
752 /* A doubled version of 1to3_64b with different constants. */
754 * len = 1: combinedl = { input[0], 0x01, input[0], input[0] }
755 * len = 2: combinedl = { input[1], 0x02, input[0], input[1] }
756 * len = 3: combinedl = { input[2], 0x03, input[0], input[1] }
758 uint8_t const c1
= input
[0];
759 uint8_t const c2
= input
[len
>> 1];
760 uint8_t const c3
= input
[len
- 1];
761 uint32_t const combinedl
= ((uint32_t)c1
<< 16) | ((uint32_t)c2
<< 24) |
762 ((uint32_t)c3
<< 0) | ((uint32_t)len
<< 8);
763 uint32_t const combinedh
= XXH_rotl32(byteswap(combinedl
), 13);
764 uint64_t const bitflipl
=
765 (endian::read32le(secret
) ^ endian::read32le(secret
+ 4)) + seed
;
766 uint64_t const bitfliph
=
767 (endian::read32le(secret
+ 8) ^ endian::read32le(secret
+ 12)) - seed
;
768 uint64_t const keyed_lo
= (uint64_t)combinedl
^ bitflipl
;
769 uint64_t const keyed_hi
= (uint64_t)combinedh
^ bitfliph
;
771 h128
.low64
= XXH64_avalanche(keyed_lo
);
772 h128
.high64
= XXH64_avalanche(keyed_hi
);
776 LLVM_ATTRIBUTE_ALWAYS_INLINE
static XXH128_hash_t
777 XXH3_len_4to8_128b(const uint8_t *input
, size_t len
, const uint8_t *secret
,
779 seed
^= (uint64_t)byteswap((uint32_t)seed
) << 32;
780 uint32_t const input_lo
= endian::read32le(input
);
781 uint32_t const input_hi
= endian::read32le(input
+ len
- 4);
782 uint64_t const input_64
= input_lo
+ ((uint64_t)input_hi
<< 32);
783 uint64_t const bitflip
=
784 (endian::read64le(secret
+ 16) ^ endian::read64le(secret
+ 24)) + seed
;
785 uint64_t const keyed
= input_64
^ bitflip
;
787 /* Shift len to the left to ensure it is even, this avoids even multiplies.
789 XXH128_hash_t m128
= XXH_mult64to128(keyed
, PRIME64_1
+ (len
<< 2));
791 m128
.high64
+= (m128
.low64
<< 1);
792 m128
.low64
^= (m128
.high64
>> 3);
794 m128
.low64
= XXH_xorshift64(m128
.low64
, 35);
795 m128
.low64
*= PRIME_MX2
;
796 m128
.low64
= XXH_xorshift64(m128
.low64
, 28);
797 m128
.high64
= XXH3_avalanche(m128
.high64
);
801 LLVM_ATTRIBUTE_ALWAYS_INLINE
static XXH128_hash_t
802 XXH3_len_9to16_128b(const uint8_t *input
, size_t len
, const uint8_t *secret
,
804 uint64_t const bitflipl
=
805 (endian::read64le(secret
+ 32) ^ endian::read64le(secret
+ 40)) - seed
;
806 uint64_t const bitfliph
=
807 (endian::read64le(secret
+ 48) ^ endian::read64le(secret
+ 56)) + seed
;
808 uint64_t const input_lo
= endian::read64le(input
);
809 uint64_t input_hi
= endian::read64le(input
+ len
- 8);
811 XXH_mult64to128(input_lo
^ input_hi
^ bitflipl
, PRIME64_1
);
813 * Put len in the middle of m128 to ensure that the length gets mixed to
814 * both the low and high bits in the 128x64 multiply below.
816 m128
.low64
+= (uint64_t)(len
- 1) << 54;
817 input_hi
^= bitfliph
;
819 * Add the high 32 bits of input_hi to the high 32 bits of m128, then
820 * add the long product of the low 32 bits of input_hi and PRIME32_2 to
821 * the high 64 bits of m128.
823 * The best approach to this operation is different on 32-bit and 64-bit.
825 if (sizeof(void *) < sizeof(uint64_t)) { /* 32-bit */
827 * 32-bit optimized version, which is more readable.
829 * On 32-bit, it removes an ADC and delays a dependency between the two
830 * halves of m128.high64, but it generates an extra mask on 64-bit.
832 m128
.high64
+= (input_hi
& 0xFFFFFFFF00000000ULL
) +
833 XXH_mult32to64((uint32_t)input_hi
, PRIME32_2
);
836 * 64-bit optimized (albeit more confusing) version.
838 * Uses some properties of addition and multiplication to remove the mask:
841 * a = input_hi.lo = (input_hi & 0x00000000FFFFFFFF)
842 * b = input_hi.hi = (input_hi & 0xFFFFFFFF00000000)
846 * Inverse Property: x + y - x == y
847 * a + (b * (1 + c - 1))
848 * Distributive Property: x * (y + z) == (x * y) + (x * z)
849 * a + (b * 1) + (b * (c - 1))
850 * Identity Property: x * 1 == x
851 * a + b + (b * (c - 1))
853 * Substitute a, b, and c:
854 * input_hi.hi + input_hi.lo + ((uint64_t)input_hi.lo * (PRIME32_2
857 * Since input_hi.hi + input_hi.lo == input_hi, we get this:
858 * input_hi + ((uint64_t)input_hi.lo * (PRIME32_2 - 1))
860 m128
.high64
+= input_hi
+ XXH_mult32to64((uint32_t)input_hi
, PRIME32_2
- 1);
862 /* m128 ^= XXH_swap64(m128 >> 64); */
863 m128
.low64
^= byteswap(m128
.high64
);
865 /* 128x64 multiply: h128 = m128 * PRIME64_2; */
866 XXH128_hash_t h128
= XXH_mult64to128(m128
.low64
, PRIME64_2
);
867 h128
.high64
+= m128
.high64
* PRIME64_2
;
869 h128
.low64
= XXH3_avalanche(h128
.low64
);
870 h128
.high64
= XXH3_avalanche(h128
.high64
);
875 * Assumption: `secret` size is >= XXH3_SECRET_SIZE_MIN
877 LLVM_ATTRIBUTE_ALWAYS_INLINE
static XXH128_hash_t
878 XXH3_len_0to16_128b(const uint8_t *input
, size_t len
, const uint8_t *secret
,
881 return XXH3_len_9to16_128b(input
, len
, secret
, seed
);
883 return XXH3_len_4to8_128b(input
, len
, secret
, seed
);
885 return XXH3_len_1to3_128b(input
, len
, secret
, seed
);
887 uint64_t const bitflipl
=
888 endian::read64le(secret
+ 64) ^ endian::read64le(secret
+ 72);
889 uint64_t const bitfliph
=
890 endian::read64le(secret
+ 80) ^ endian::read64le(secret
+ 88);
891 h128
.low64
= XXH64_avalanche(seed
^ bitflipl
);
892 h128
.high64
= XXH64_avalanche(seed
^ bitfliph
);
897 * A bit slower than XXH3_mix16B, but handles multiply by zero better.
899 LLVM_ATTRIBUTE_ALWAYS_INLINE
static XXH128_hash_t
900 XXH128_mix32B(XXH128_hash_t acc
, const uint8_t *input_1
, const uint8_t *input_2
,
901 const uint8_t *secret
, uint64_t seed
) {
902 acc
.low64
+= XXH3_mix16B(input_1
, secret
+ 0, seed
);
903 acc
.low64
^= endian::read64le(input_2
) + endian::read64le(input_2
+ 8);
904 acc
.high64
+= XXH3_mix16B(input_2
, secret
+ 16, seed
);
905 acc
.high64
^= endian::read64le(input_1
) + endian::read64le(input_1
+ 8);
909 LLVM_ATTRIBUTE_ALWAYS_INLINE
static XXH128_hash_t
910 XXH3_len_17to128_128b(const uint8_t *input
, size_t len
, const uint8_t *secret
,
911 size_t secretSize
, uint64_t seed
) {
915 acc
.low64
= len
* PRIME64_1
;
922 XXH128_mix32B(acc
, input
+ 48, input
+ len
- 64, secret
+ 96, seed
);
924 acc
= XXH128_mix32B(acc
, input
+ 32, input
+ len
- 48, secret
+ 64, seed
);
926 acc
= XXH128_mix32B(acc
, input
+ 16, input
+ len
- 32, secret
+ 32, seed
);
928 acc
= XXH128_mix32B(acc
, input
, input
+ len
- 16, secret
, seed
);
930 h128
.low64
= acc
.low64
+ acc
.high64
;
931 h128
.high64
= (acc
.low64
* PRIME64_1
) + (acc
.high64
* PRIME64_4
) +
932 ((len
- seed
) * PRIME64_2
);
933 h128
.low64
= XXH3_avalanche(h128
.low64
);
934 h128
.high64
= (uint64_t)0 - XXH3_avalanche(h128
.high64
);
938 LLVM_ATTRIBUTE_NOINLINE
static XXH128_hash_t
939 XXH3_len_129to240_128b(const uint8_t *input
, size_t len
, const uint8_t *secret
,
940 size_t secretSize
, uint64_t seed
) {
945 acc
.low64
= len
* PRIME64_1
;
948 * We set as `i` as offset + 32. We do this so that unchanged
949 * `len` can be used as upper bound. This reaches a sweet spot
950 * where both x86 and aarch64 get simple agen and good codegen
953 for (i
= 32; i
< 160; i
+= 32) {
954 acc
= XXH128_mix32B(acc
, input
+ i
- 32, input
+ i
- 16, secret
+ i
- 32,
957 acc
.low64
= XXH3_avalanche(acc
.low64
);
958 acc
.high64
= XXH3_avalanche(acc
.high64
);
960 * NB: `i <= len` will duplicate the last 32-bytes if
961 * len % 32 was zero. This is an unfortunate necessity to keep
962 * the hash result stable.
964 for (i
= 160; i
<= len
; i
+= 32) {
965 acc
= XXH128_mix32B(acc
, input
+ i
- 32, input
+ i
- 16,
966 secret
+ XXH3_MIDSIZE_STARTOFFSET
+ i
- 160, seed
);
970 XXH128_mix32B(acc
, input
+ len
- 16, input
+ len
- 32,
971 secret
+ XXH3_SECRETSIZE_MIN
- XXH3_MIDSIZE_LASTOFFSET
- 16,
975 h128
.low64
= acc
.low64
+ acc
.high64
;
976 h128
.high64
= (acc
.low64
* PRIME64_1
) + (acc
.high64
* PRIME64_4
) +
977 ((len
- seed
) * PRIME64_2
);
978 h128
.low64
= XXH3_avalanche(h128
.low64
);
979 h128
.high64
= (uint64_t)0 - XXH3_avalanche(h128
.high64
);
983 LLVM_ATTRIBUTE_ALWAYS_INLINE XXH128_hash_t
984 XXH3_hashLong_128b(const uint8_t *input
, size_t len
, const uint8_t *secret
,
986 const size_t nbStripesPerBlock
=
987 (secretSize
- XXH_STRIPE_LEN
) / XXH_SECRET_CONSUME_RATE
;
988 const size_t block_len
= XXH_STRIPE_LEN
* nbStripesPerBlock
;
989 const size_t nb_blocks
= (len
- 1) / block_len
;
990 alignas(16) uint64_t acc
[XXH_ACC_NB
] = {
991 PRIME32_3
, PRIME64_1
, PRIME64_2
, PRIME64_3
,
992 PRIME64_4
, PRIME32_2
, PRIME64_5
, PRIME32_1
,
995 for (size_t n
= 0; n
< nb_blocks
; ++n
) {
996 XXH3_accumulate(acc
, input
+ n
* block_len
, secret
, nbStripesPerBlock
);
997 XXH3_scrambleAcc(acc
, secret
+ secretSize
- XXH_STRIPE_LEN
);
1000 /* last partial block */
1001 const size_t nbStripes
= (len
- 1 - (block_len
* nb_blocks
)) / XXH_STRIPE_LEN
;
1002 assert(nbStripes
<= secretSize
/ XXH_SECRET_CONSUME_RATE
);
1003 XXH3_accumulate(acc
, input
+ nb_blocks
* block_len
, secret
, nbStripes
);
1006 constexpr size_t XXH_SECRET_LASTACC_START
= 7;
1007 XXH3_accumulate_512(acc
, input
+ len
- XXH_STRIPE_LEN
,
1008 secret
+ secretSize
- XXH_STRIPE_LEN
-
1009 XXH_SECRET_LASTACC_START
);
1011 /* converge into final hash */
1012 static_assert(sizeof(acc
) == 64);
1014 constexpr size_t XXH_SECRET_MERGEACCS_START
= 11;
1015 h128
.low64
= XXH3_mergeAccs(acc
, secret
+ XXH_SECRET_MERGEACCS_START
,
1016 (uint64_t)len
* PRIME64_1
);
1017 h128
.high64
= XXH3_mergeAccs(
1018 acc
, secret
+ secretSize
- sizeof(acc
) - XXH_SECRET_MERGEACCS_START
,
1019 ~((uint64_t)len
* PRIME64_2
));
1023 llvm::XXH128_hash_t
llvm::xxh3_128bits(ArrayRef
<uint8_t> data
) {
1024 size_t len
= data
.size();
1025 const uint8_t *input
= data
.data();
1028 * If an action is to be taken if `secret` conditions are not respected,
1029 * it should be done here.
1030 * For now, it's a contract pre-condition.
1031 * Adding a check and a branch here would cost performance at every hash.
1034 return XXH3_len_0to16_128b(input
, len
, kSecret
, /*seed64=*/0);
1036 return XXH3_len_17to128_128b(input
, len
, kSecret
, sizeof(kSecret
),
1038 if (len
<= XXH3_MIDSIZE_MAX
)
1039 return XXH3_len_129to240_128b(input
, len
, kSecret
, sizeof(kSecret
),
1041 return XXH3_hashLong_128b(input
, len
, kSecret
, sizeof(kSecret
));