4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
24 * Copyright (c) 2019-2020 Samuel Neves and Jack O'Connor
25 * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
35 #include <sys/blake3.h>
37 #include <sys/asm_linkage.h>
40 * Methods used to define BLAKE3 assembler implementations
42 typedef void (*blake3_compress_in_place_f
)(uint32_t cv
[8],
43 const uint8_t block
[BLAKE3_BLOCK_LEN
],
44 uint8_t block_len
, uint64_t counter
,
47 typedef void (*blake3_compress_xof_f
)(const uint32_t cv
[8],
48 const uint8_t block
[BLAKE3_BLOCK_LEN
], uint8_t block_len
,
49 uint64_t counter
, uint8_t flags
, uint8_t out
[64]);
51 typedef void (*blake3_hash_many_f
)(const uint8_t * const *inputs
,
52 size_t num_inputs
, size_t blocks
, const uint32_t key
[8],
53 uint64_t counter
, boolean_t increment_counter
, uint8_t flags
,
54 uint8_t flags_start
, uint8_t flags_end
, uint8_t *out
);
56 typedef boolean_t (*blake3_is_supported_f
)(void);
59 blake3_compress_in_place_f compress_in_place
;
60 blake3_compress_xof_f compress_xof
;
61 blake3_hash_many_f hash_many
;
62 blake3_is_supported_f is_supported
;
67 /* return selected BLAKE3 implementation ops */
68 extern const blake3_ops_t
*blake3_get_ops(void);
71 #define MAX_SIMD_DEGREE 16
73 #define MAX_SIMD_DEGREE 4
76 #define MAX_SIMD_DEGREE_OR_2 (MAX_SIMD_DEGREE > 2 ? MAX_SIMD_DEGREE : 2)
78 static const uint32_t BLAKE3_IV
[8] = {
79 0x6A09E667UL
, 0xBB67AE85UL
, 0x3C6EF372UL
, 0xA54FF53AUL
,
80 0x510E527FUL
, 0x9B05688CUL
, 0x1F83D9ABUL
, 0x5BE0CD19UL
};
82 static const uint8_t BLAKE3_MSG_SCHEDULE
[7][16] = {
83 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
84 {2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8},
85 {3, 4, 10, 12, 13, 2, 7, 14, 6, 5, 9, 0, 11, 15, 8, 1},
86 {10, 7, 12, 9, 14, 3, 13, 15, 4, 0, 11, 2, 5, 8, 1, 6},
87 {12, 13, 9, 11, 15, 10, 14, 8, 7, 2, 5, 3, 0, 1, 6, 4},
88 {9, 14, 11, 5, 8, 12, 15, 1, 13, 3, 0, 10, 2, 6, 4, 7},
89 {11, 15, 5, 0, 1, 9, 8, 6, 14, 10, 2, 12, 3, 4, 7, 13},
92 /* Find index of the highest set bit */
93 static inline unsigned int highest_one(uint64_t x
) {
94 #if defined(__GNUC__) || defined(__clang__)
95 return (63 ^ __builtin_clzll(x
));
96 #elif defined(_MSC_VER) && defined(IS_X86_64)
98 _BitScanReverse64(&index
, x
);
100 #elif defined(_MSC_VER) && defined(IS_X86_32)
103 _BitScanReverse(&index
, x
>> 32);
107 _BitScanReverse(&index
, x
);
112 if (x
& 0xffffffff00000000ULL
) { x
>>= 32; c
+= 32; }
113 if (x
& 0x00000000ffff0000ULL
) { x
>>= 16; c
+= 16; }
114 if (x
& 0x000000000000ff00ULL
) { x
>>= 8; c
+= 8; }
115 if (x
& 0x00000000000000f0ULL
) { x
>>= 4; c
+= 4; }
116 if (x
& 0x000000000000000cULL
) { x
>>= 2; c
+= 2; }
117 if (x
& 0x0000000000000002ULL
) { c
+= 1; }
122 /* Count the number of 1 bits. */
123 static inline unsigned int popcnt(uint64_t x
) {
124 unsigned int count
= 0;
135 * Largest power of two less than or equal to x.
136 * As a special case, returns 1 when x is 0.
138 static inline uint64_t round_down_to_power_of_2(uint64_t x
) {
139 return (1ULL << highest_one(x
| 1));
142 static inline uint32_t counter_low(uint64_t counter
) {
143 return ((uint32_t)counter
);
146 static inline uint32_t counter_high(uint64_t counter
) {
147 return ((uint32_t)(counter
>> 32));
150 static inline uint32_t load32(const void *src
) {
151 const uint8_t *p
= (const uint8_t *)src
;
152 return ((uint32_t)(p
[0]) << 0) | ((uint32_t)(p
[1]) << 8) |
153 ((uint32_t)(p
[2]) << 16) | ((uint32_t)(p
[3]) << 24);
156 static inline void load_key_words(const uint8_t key
[BLAKE3_KEY_LEN
],
157 uint32_t key_words
[8]) {
158 key_words
[0] = load32(&key
[0 * 4]);
159 key_words
[1] = load32(&key
[1 * 4]);
160 key_words
[2] = load32(&key
[2 * 4]);
161 key_words
[3] = load32(&key
[3 * 4]);
162 key_words
[4] = load32(&key
[4 * 4]);
163 key_words
[5] = load32(&key
[5 * 4]);
164 key_words
[6] = load32(&key
[6 * 4]);
165 key_words
[7] = load32(&key
[7 * 4]);
168 static inline void store32(void *dst
, uint32_t w
) {
169 uint8_t *p
= (uint8_t *)dst
;
170 p
[0] = (uint8_t)(w
>> 0);
171 p
[1] = (uint8_t)(w
>> 8);
172 p
[2] = (uint8_t)(w
>> 16);
173 p
[3] = (uint8_t)(w
>> 24);
176 static inline void store_cv_words(uint8_t bytes_out
[32], uint32_t cv_words
[8]) {
177 store32(&bytes_out
[0 * 4], cv_words
[0]);
178 store32(&bytes_out
[1 * 4], cv_words
[1]);
179 store32(&bytes_out
[2 * 4], cv_words
[2]);
180 store32(&bytes_out
[3 * 4], cv_words
[3]);
181 store32(&bytes_out
[4 * 4], cv_words
[4]);
182 store32(&bytes_out
[5 * 4], cv_words
[5]);
183 store32(&bytes_out
[6 * 4], cv_words
[6]);
184 store32(&bytes_out
[7 * 4], cv_words
[7]);
191 #endif /* BLAKE3_IMPL_H */