drm/panthor: Don't add write fences to the shared BOs
[drm/drm-misc.git] / lib / xxhash.c
blobd5bb9ff106078d8e258c466dba1ff072b90543ac
1 /*
2 * xxHash - Extremely Fast Hash algorithm
3 * Copyright (C) 2012-2016, Yann Collet.
5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met:
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
16 * distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * This program is free software; you can redistribute it and/or modify it under
31 * the terms of the GNU General Public License version 2 as published by the
32 * Free Software Foundation. This program is dual-licensed; you may select
33 * either version 2 of the GNU General Public License ("GPL") or BSD license
34 * ("BSD").
36 * You can contact the author at:
37 * - xxHash homepage: https://cyan4973.github.io/xxHash/
38 * - xxHash source repository: https://github.com/Cyan4973/xxHash
41 #include <asm/unaligned.h>
42 #include <linux/errno.h>
43 #include <linux/compiler.h>
44 #include <linux/kernel.h>
45 #include <linux/module.h>
46 #include <linux/string.h>
47 #include <linux/xxhash.h>
49 /*-*************************************
50 * Macros
51 **************************************/
52 #define xxh_rotl32(x, r) ((x << r) | (x >> (32 - r)))
53 #define xxh_rotl64(x, r) ((x << r) | (x >> (64 - r)))
55 #ifdef __LITTLE_ENDIAN
56 # define XXH_CPU_LITTLE_ENDIAN 1
57 #else
58 # define XXH_CPU_LITTLE_ENDIAN 0
59 #endif
61 /*-*************************************
62 * Constants
63 **************************************/
64 static const uint32_t PRIME32_1 = 2654435761U;
65 static const uint32_t PRIME32_2 = 2246822519U;
66 static const uint32_t PRIME32_3 = 3266489917U;
67 static const uint32_t PRIME32_4 = 668265263U;
68 static const uint32_t PRIME32_5 = 374761393U;
70 static const uint64_t PRIME64_1 = 11400714785074694791ULL;
71 static const uint64_t PRIME64_2 = 14029467366897019727ULL;
72 static const uint64_t PRIME64_3 = 1609587929392839161ULL;
73 static const uint64_t PRIME64_4 = 9650029242287828579ULL;
74 static const uint64_t PRIME64_5 = 2870177450012600261ULL;
76 /*-**************************
77 * Utils
78 ***************************/
79 void xxh32_copy_state(struct xxh32_state *dst, const struct xxh32_state *src)
81 memcpy(dst, src, sizeof(*dst));
83 EXPORT_SYMBOL(xxh32_copy_state);
85 void xxh64_copy_state(struct xxh64_state *dst, const struct xxh64_state *src)
87 memcpy(dst, src, sizeof(*dst));
89 EXPORT_SYMBOL(xxh64_copy_state);
91 /*-***************************
92 * Simple Hash Functions
93 ****************************/
94 static uint32_t xxh32_round(uint32_t seed, const uint32_t input)
96 seed += input * PRIME32_2;
97 seed = xxh_rotl32(seed, 13);
98 seed *= PRIME32_1;
99 return seed;
102 uint32_t xxh32(const void *input, const size_t len, const uint32_t seed)
104 const uint8_t *p = (const uint8_t *)input;
105 const uint8_t *b_end = p + len;
106 uint32_t h32;
108 if (len >= 16) {
109 const uint8_t *const limit = b_end - 16;
110 uint32_t v1 = seed + PRIME32_1 + PRIME32_2;
111 uint32_t v2 = seed + PRIME32_2;
112 uint32_t v3 = seed + 0;
113 uint32_t v4 = seed - PRIME32_1;
115 do {
116 v1 = xxh32_round(v1, get_unaligned_le32(p));
117 p += 4;
118 v2 = xxh32_round(v2, get_unaligned_le32(p));
119 p += 4;
120 v3 = xxh32_round(v3, get_unaligned_le32(p));
121 p += 4;
122 v4 = xxh32_round(v4, get_unaligned_le32(p));
123 p += 4;
124 } while (p <= limit);
126 h32 = xxh_rotl32(v1, 1) + xxh_rotl32(v2, 7) +
127 xxh_rotl32(v3, 12) + xxh_rotl32(v4, 18);
128 } else {
129 h32 = seed + PRIME32_5;
132 h32 += (uint32_t)len;
134 while (p + 4 <= b_end) {
135 h32 += get_unaligned_le32(p) * PRIME32_3;
136 h32 = xxh_rotl32(h32, 17) * PRIME32_4;
137 p += 4;
140 while (p < b_end) {
141 h32 += (*p) * PRIME32_5;
142 h32 = xxh_rotl32(h32, 11) * PRIME32_1;
143 p++;
146 h32 ^= h32 >> 15;
147 h32 *= PRIME32_2;
148 h32 ^= h32 >> 13;
149 h32 *= PRIME32_3;
150 h32 ^= h32 >> 16;
152 return h32;
154 EXPORT_SYMBOL(xxh32);
156 static uint64_t xxh64_round(uint64_t acc, const uint64_t input)
158 acc += input * PRIME64_2;
159 acc = xxh_rotl64(acc, 31);
160 acc *= PRIME64_1;
161 return acc;
164 static uint64_t xxh64_merge_round(uint64_t acc, uint64_t val)
166 val = xxh64_round(0, val);
167 acc ^= val;
168 acc = acc * PRIME64_1 + PRIME64_4;
169 return acc;
172 uint64_t xxh64(const void *input, const size_t len, const uint64_t seed)
174 const uint8_t *p = (const uint8_t *)input;
175 const uint8_t *const b_end = p + len;
176 uint64_t h64;
178 if (len >= 32) {
179 const uint8_t *const limit = b_end - 32;
180 uint64_t v1 = seed + PRIME64_1 + PRIME64_2;
181 uint64_t v2 = seed + PRIME64_2;
182 uint64_t v3 = seed + 0;
183 uint64_t v4 = seed - PRIME64_1;
185 do {
186 v1 = xxh64_round(v1, get_unaligned_le64(p));
187 p += 8;
188 v2 = xxh64_round(v2, get_unaligned_le64(p));
189 p += 8;
190 v3 = xxh64_round(v3, get_unaligned_le64(p));
191 p += 8;
192 v4 = xxh64_round(v4, get_unaligned_le64(p));
193 p += 8;
194 } while (p <= limit);
196 h64 = xxh_rotl64(v1, 1) + xxh_rotl64(v2, 7) +
197 xxh_rotl64(v3, 12) + xxh_rotl64(v4, 18);
198 h64 = xxh64_merge_round(h64, v1);
199 h64 = xxh64_merge_round(h64, v2);
200 h64 = xxh64_merge_round(h64, v3);
201 h64 = xxh64_merge_round(h64, v4);
203 } else {
204 h64 = seed + PRIME64_5;
207 h64 += (uint64_t)len;
209 while (p + 8 <= b_end) {
210 const uint64_t k1 = xxh64_round(0, get_unaligned_le64(p));
212 h64 ^= k1;
213 h64 = xxh_rotl64(h64, 27) * PRIME64_1 + PRIME64_4;
214 p += 8;
217 if (p + 4 <= b_end) {
218 h64 ^= (uint64_t)(get_unaligned_le32(p)) * PRIME64_1;
219 h64 = xxh_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
220 p += 4;
223 while (p < b_end) {
224 h64 ^= (*p) * PRIME64_5;
225 h64 = xxh_rotl64(h64, 11) * PRIME64_1;
226 p++;
229 h64 ^= h64 >> 33;
230 h64 *= PRIME64_2;
231 h64 ^= h64 >> 29;
232 h64 *= PRIME64_3;
233 h64 ^= h64 >> 32;
235 return h64;
237 EXPORT_SYMBOL(xxh64);
239 /*-**************************************************
240 * Advanced Hash Functions
241 ***************************************************/
242 void xxh32_reset(struct xxh32_state *statePtr, const uint32_t seed)
244 /* use a local state for memcpy() to avoid strict-aliasing warnings */
245 struct xxh32_state state;
247 memset(&state, 0, sizeof(state));
248 state.v1 = seed + PRIME32_1 + PRIME32_2;
249 state.v2 = seed + PRIME32_2;
250 state.v3 = seed + 0;
251 state.v4 = seed - PRIME32_1;
252 memcpy(statePtr, &state, sizeof(state));
254 EXPORT_SYMBOL(xxh32_reset);
256 void xxh64_reset(struct xxh64_state *statePtr, const uint64_t seed)
258 /* use a local state for memcpy() to avoid strict-aliasing warnings */
259 struct xxh64_state state;
261 memset(&state, 0, sizeof(state));
262 state.v1 = seed + PRIME64_1 + PRIME64_2;
263 state.v2 = seed + PRIME64_2;
264 state.v3 = seed + 0;
265 state.v4 = seed - PRIME64_1;
266 memcpy(statePtr, &state, sizeof(state));
268 EXPORT_SYMBOL(xxh64_reset);
270 int xxh32_update(struct xxh32_state *state, const void *input, const size_t len)
272 const uint8_t *p = (const uint8_t *)input;
273 const uint8_t *const b_end = p + len;
275 if (input == NULL)
276 return -EINVAL;
278 state->total_len_32 += (uint32_t)len;
279 state->large_len |= (len >= 16) | (state->total_len_32 >= 16);
281 if (state->memsize + len < 16) { /* fill in tmp buffer */
282 memcpy((uint8_t *)(state->mem32) + state->memsize, input, len);
283 state->memsize += (uint32_t)len;
284 return 0;
287 if (state->memsize) { /* some data left from previous update */
288 const uint32_t *p32 = state->mem32;
290 memcpy((uint8_t *)(state->mem32) + state->memsize, input,
291 16 - state->memsize);
293 state->v1 = xxh32_round(state->v1, get_unaligned_le32(p32));
294 p32++;
295 state->v2 = xxh32_round(state->v2, get_unaligned_le32(p32));
296 p32++;
297 state->v3 = xxh32_round(state->v3, get_unaligned_le32(p32));
298 p32++;
299 state->v4 = xxh32_round(state->v4, get_unaligned_le32(p32));
300 p32++;
302 p += 16-state->memsize;
303 state->memsize = 0;
306 if (p <= b_end - 16) {
307 const uint8_t *const limit = b_end - 16;
308 uint32_t v1 = state->v1;
309 uint32_t v2 = state->v2;
310 uint32_t v3 = state->v3;
311 uint32_t v4 = state->v4;
313 do {
314 v1 = xxh32_round(v1, get_unaligned_le32(p));
315 p += 4;
316 v2 = xxh32_round(v2, get_unaligned_le32(p));
317 p += 4;
318 v3 = xxh32_round(v3, get_unaligned_le32(p));
319 p += 4;
320 v4 = xxh32_round(v4, get_unaligned_le32(p));
321 p += 4;
322 } while (p <= limit);
324 state->v1 = v1;
325 state->v2 = v2;
326 state->v3 = v3;
327 state->v4 = v4;
330 if (p < b_end) {
331 memcpy(state->mem32, p, (size_t)(b_end-p));
332 state->memsize = (uint32_t)(b_end-p);
335 return 0;
337 EXPORT_SYMBOL(xxh32_update);
339 uint32_t xxh32_digest(const struct xxh32_state *state)
341 const uint8_t *p = (const uint8_t *)state->mem32;
342 const uint8_t *const b_end = (const uint8_t *)(state->mem32) +
343 state->memsize;
344 uint32_t h32;
346 if (state->large_len) {
347 h32 = xxh_rotl32(state->v1, 1) + xxh_rotl32(state->v2, 7) +
348 xxh_rotl32(state->v3, 12) + xxh_rotl32(state->v4, 18);
349 } else {
350 h32 = state->v3 /* == seed */ + PRIME32_5;
353 h32 += state->total_len_32;
355 while (p + 4 <= b_end) {
356 h32 += get_unaligned_le32(p) * PRIME32_3;
357 h32 = xxh_rotl32(h32, 17) * PRIME32_4;
358 p += 4;
361 while (p < b_end) {
362 h32 += (*p) * PRIME32_5;
363 h32 = xxh_rotl32(h32, 11) * PRIME32_1;
364 p++;
367 h32 ^= h32 >> 15;
368 h32 *= PRIME32_2;
369 h32 ^= h32 >> 13;
370 h32 *= PRIME32_3;
371 h32 ^= h32 >> 16;
373 return h32;
375 EXPORT_SYMBOL(xxh32_digest);
377 int xxh64_update(struct xxh64_state *state, const void *input, const size_t len)
379 const uint8_t *p = (const uint8_t *)input;
380 const uint8_t *const b_end = p + len;
382 if (input == NULL)
383 return -EINVAL;
385 state->total_len += len;
387 if (state->memsize + len < 32) { /* fill in tmp buffer */
388 memcpy(((uint8_t *)state->mem64) + state->memsize, input, len);
389 state->memsize += (uint32_t)len;
390 return 0;
393 if (state->memsize) { /* tmp buffer is full */
394 uint64_t *p64 = state->mem64;
396 memcpy(((uint8_t *)p64) + state->memsize, input,
397 32 - state->memsize);
399 state->v1 = xxh64_round(state->v1, get_unaligned_le64(p64));
400 p64++;
401 state->v2 = xxh64_round(state->v2, get_unaligned_le64(p64));
402 p64++;
403 state->v3 = xxh64_round(state->v3, get_unaligned_le64(p64));
404 p64++;
405 state->v4 = xxh64_round(state->v4, get_unaligned_le64(p64));
407 p += 32 - state->memsize;
408 state->memsize = 0;
411 if (p + 32 <= b_end) {
412 const uint8_t *const limit = b_end - 32;
413 uint64_t v1 = state->v1;
414 uint64_t v2 = state->v2;
415 uint64_t v3 = state->v3;
416 uint64_t v4 = state->v4;
418 do {
419 v1 = xxh64_round(v1, get_unaligned_le64(p));
420 p += 8;
421 v2 = xxh64_round(v2, get_unaligned_le64(p));
422 p += 8;
423 v3 = xxh64_round(v3, get_unaligned_le64(p));
424 p += 8;
425 v4 = xxh64_round(v4, get_unaligned_le64(p));
426 p += 8;
427 } while (p <= limit);
429 state->v1 = v1;
430 state->v2 = v2;
431 state->v3 = v3;
432 state->v4 = v4;
435 if (p < b_end) {
436 memcpy(state->mem64, p, (size_t)(b_end-p));
437 state->memsize = (uint32_t)(b_end - p);
440 return 0;
442 EXPORT_SYMBOL(xxh64_update);
444 uint64_t xxh64_digest(const struct xxh64_state *state)
446 const uint8_t *p = (const uint8_t *)state->mem64;
447 const uint8_t *const b_end = (const uint8_t *)state->mem64 +
448 state->memsize;
449 uint64_t h64;
451 if (state->total_len >= 32) {
452 const uint64_t v1 = state->v1;
453 const uint64_t v2 = state->v2;
454 const uint64_t v3 = state->v3;
455 const uint64_t v4 = state->v4;
457 h64 = xxh_rotl64(v1, 1) + xxh_rotl64(v2, 7) +
458 xxh_rotl64(v3, 12) + xxh_rotl64(v4, 18);
459 h64 = xxh64_merge_round(h64, v1);
460 h64 = xxh64_merge_round(h64, v2);
461 h64 = xxh64_merge_round(h64, v3);
462 h64 = xxh64_merge_round(h64, v4);
463 } else {
464 h64 = state->v3 + PRIME64_5;
467 h64 += (uint64_t)state->total_len;
469 while (p + 8 <= b_end) {
470 const uint64_t k1 = xxh64_round(0, get_unaligned_le64(p));
472 h64 ^= k1;
473 h64 = xxh_rotl64(h64, 27) * PRIME64_1 + PRIME64_4;
474 p += 8;
477 if (p + 4 <= b_end) {
478 h64 ^= (uint64_t)(get_unaligned_le32(p)) * PRIME64_1;
479 h64 = xxh_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
480 p += 4;
483 while (p < b_end) {
484 h64 ^= (*p) * PRIME64_5;
485 h64 = xxh_rotl64(h64, 11) * PRIME64_1;
486 p++;
489 h64 ^= h64 >> 33;
490 h64 *= PRIME64_2;
491 h64 ^= h64 >> 29;
492 h64 *= PRIME64_3;
493 h64 ^= h64 >> 32;
495 return h64;
497 EXPORT_SYMBOL(xxh64_digest);
499 MODULE_LICENSE("Dual BSD/GPL");
500 MODULE_DESCRIPTION("xxHash");