1 /* SPDX-License-Identifier: GPL-2.0 */
3 * NH - ε-almost-universal hash function, x86_64 SSE2 accelerated
5 * Copyright 2018 Google LLC
7 * Author: Eric Biggers <ebiggers@google.com>
10 #include <linux/linkage.h>
11 #include <linux/cfi_types.h>
13 #define PASS0_SUMS %xmm0
14 #define PASS1_SUMS %xmm1
15 #define PASS2_SUMS %xmm2
16 #define PASS3_SUMS %xmm3
31 #define MESSAGE_LEN %rdx
34 .macro _nh_stride k0, k1, k2, k3, offset
36 // Load next message stride
37 movdqu \offset(MESSAGE), T1
39 // Load next key stride
40 movdqu \offset(KEY), \k3
42 // Add message words to key words
45 paddd T1, \k0 // reuse k0 to avoid a move
50 // Multiply 32x32 => 64 and accumulate
52 pshufd $0x32, \k0, \k0
70 * void nh_sse2(const u32 *key, const u8 *message, size_t message_len,
71 * __le64 hash[NH_NUM_PASSES])
73 * It's guaranteed that message_len % 16 == 0.
75 SYM_TYPED_FUNC_START(nh_sse2)
81 pxor PASS0_SUMS, PASS0_SUMS
82 pxor PASS1_SUMS, PASS1_SUMS
83 pxor PASS2_SUMS, PASS2_SUMS
84 pxor PASS3_SUMS, PASS3_SUMS
86 sub $0x40, MESSAGE_LEN
89 _nh_stride K0, K1, K2, K3, 0x00
90 _nh_stride K1, K2, K3, K0, 0x10
91 _nh_stride K2, K3, K0, K1, 0x20
92 _nh_stride K3, K0, K1, K2, 0x30
95 sub $0x40, MESSAGE_LEN
99 and $0x3f, MESSAGE_LEN
101 _nh_stride K0, K1, K2, K3, 0x00
103 sub $0x10, MESSAGE_LEN
105 _nh_stride K1, K2, K3, K0, 0x10
107 sub $0x10, MESSAGE_LEN
109 _nh_stride K2, K3, K0, K1, 0x20
112 // Sum the accumulators for each pass, then store the sums to 'hash'
113 movdqa PASS0_SUMS, T0
114 movdqa PASS2_SUMS, T1
115 punpcklqdq PASS1_SUMS, T0 // => (PASS0_SUM_A PASS1_SUM_A)
116 punpcklqdq PASS3_SUMS, T1 // => (PASS2_SUM_A PASS3_SUM_A)
117 punpckhqdq PASS1_SUMS, PASS0_SUMS // => (PASS0_SUM_B PASS1_SUM_B)
118 punpckhqdq PASS3_SUMS, PASS2_SUMS // => (PASS2_SUM_B PASS3_SUM_B)
121 movdqu T0, 0x00(HASH)
122 movdqu T1, 0x10(HASH)
124 SYM_FUNC_END(nh_sse2)