4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
27 #include <sys/zfs_context.h>
28 #include <sys/zfs_impl.h>
29 #include <sys/blake3.h>
31 #include "blake3_impl.h"
33 #if defined(__aarch64__) || \
34 (defined(__x86_64) && defined(HAVE_SSE2)) || \
35 (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
37 extern void ASMABI
zfs_blake3_compress_in_place_sse2(uint32_t cv
[8],
38 const uint8_t block
[BLAKE3_BLOCK_LEN
], uint8_t block_len
,
39 uint64_t counter
, uint8_t flags
);
41 extern void ASMABI
zfs_blake3_compress_xof_sse2(const uint32_t cv
[8],
42 const uint8_t block
[BLAKE3_BLOCK_LEN
], uint8_t block_len
,
43 uint64_t counter
, uint8_t flags
, uint8_t out
[64]);
45 extern void ASMABI
zfs_blake3_hash_many_sse2(const uint8_t * const *inputs
,
46 size_t num_inputs
, size_t blocks
, const uint32_t key
[8],
47 uint64_t counter
, boolean_t increment_counter
, uint8_t flags
,
48 uint8_t flags_start
, uint8_t flags_end
, uint8_t *out
);
50 static void blake3_compress_in_place_sse2(uint32_t cv
[8],
51 const uint8_t block
[BLAKE3_BLOCK_LEN
], uint8_t block_len
,
52 uint64_t counter
, uint8_t flags
) {
54 zfs_blake3_compress_in_place_sse2(cv
, block
, block_len
, counter
,
59 static void blake3_compress_xof_sse2(const uint32_t cv
[8],
60 const uint8_t block
[BLAKE3_BLOCK_LEN
], uint8_t block_len
,
61 uint64_t counter
, uint8_t flags
, uint8_t out
[64]) {
63 zfs_blake3_compress_xof_sse2(cv
, block
, block_len
, counter
, flags
,
68 static void blake3_hash_many_sse2(const uint8_t * const *inputs
,
69 size_t num_inputs
, size_t blocks
, const uint32_t key
[8],
70 uint64_t counter
, boolean_t increment_counter
, uint8_t flags
,
71 uint8_t flags_start
, uint8_t flags_end
, uint8_t *out
) {
73 zfs_blake3_hash_many_sse2(inputs
, num_inputs
, blocks
, key
, counter
,
74 increment_counter
, flags
, flags_start
, flags_end
, out
);
78 static boolean_t
blake3_is_sse2_supported(void)
81 return (kfpu_allowed() && zfs_sse2_available());
82 #elif defined(__PPC64__)
83 return (kfpu_allowed() && zfs_vsx_available());
85 return (kfpu_allowed());
89 const blake3_ops_t blake3_sse2_impl
= {
90 .compress_in_place
= blake3_compress_in_place_sse2
,
91 .compress_xof
= blake3_compress_xof_sse2
,
92 .hash_many
= blake3_hash_many_sse2
,
93 .is_supported
= blake3_is_sse2_supported
,
99 #if defined(__aarch64__) || \
100 (defined(__x86_64) && defined(HAVE_SSE2)) || \
101 (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
103 extern void ASMABI
zfs_blake3_compress_in_place_sse41(uint32_t cv
[8],
104 const uint8_t block
[BLAKE3_BLOCK_LEN
], uint8_t block_len
,
105 uint64_t counter
, uint8_t flags
);
107 extern void ASMABI
zfs_blake3_compress_xof_sse41(const uint32_t cv
[8],
108 const uint8_t block
[BLAKE3_BLOCK_LEN
], uint8_t block_len
,
109 uint64_t counter
, uint8_t flags
, uint8_t out
[64]);
111 extern void ASMABI
zfs_blake3_hash_many_sse41(const uint8_t * const *inputs
,
112 size_t num_inputs
, size_t blocks
, const uint32_t key
[8],
113 uint64_t counter
, boolean_t increment_counter
, uint8_t flags
,
114 uint8_t flags_start
, uint8_t flags_end
, uint8_t *out
);
116 static void blake3_compress_in_place_sse41(uint32_t cv
[8],
117 const uint8_t block
[BLAKE3_BLOCK_LEN
], uint8_t block_len
,
118 uint64_t counter
, uint8_t flags
) {
120 zfs_blake3_compress_in_place_sse41(cv
, block
, block_len
, counter
,
125 static void blake3_compress_xof_sse41(const uint32_t cv
[8],
126 const uint8_t block
[BLAKE3_BLOCK_LEN
], uint8_t block_len
,
127 uint64_t counter
, uint8_t flags
, uint8_t out
[64]) {
129 zfs_blake3_compress_xof_sse41(cv
, block
, block_len
, counter
, flags
,
134 static void blake3_hash_many_sse41(const uint8_t * const *inputs
,
135 size_t num_inputs
, size_t blocks
, const uint32_t key
[8],
136 uint64_t counter
, boolean_t increment_counter
, uint8_t flags
,
137 uint8_t flags_start
, uint8_t flags_end
, uint8_t *out
) {
139 zfs_blake3_hash_many_sse41(inputs
, num_inputs
, blocks
, key
, counter
,
140 increment_counter
, flags
, flags_start
, flags_end
, out
);
144 static boolean_t
blake3_is_sse41_supported(void)
146 #if defined(__x86_64)
147 return (kfpu_allowed() && zfs_sse4_1_available());
148 #elif defined(__PPC64__)
149 return (kfpu_allowed() && zfs_vsx_available());
151 return (kfpu_allowed());
155 const blake3_ops_t blake3_sse41_impl
= {
156 .compress_in_place
= blake3_compress_in_place_sse41
,
157 .compress_xof
= blake3_compress_xof_sse41
,
158 .hash_many
= blake3_hash_many_sse41
,
159 .is_supported
= blake3_is_sse41_supported
,
165 #if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
166 extern void ASMABI
zfs_blake3_hash_many_avx2(const uint8_t * const *inputs
,
167 size_t num_inputs
, size_t blocks
, const uint32_t key
[8],
168 uint64_t counter
, boolean_t increment_counter
, uint8_t flags
,
169 uint8_t flags_start
, uint8_t flags_end
, uint8_t *out
);
171 static void blake3_hash_many_avx2(const uint8_t * const *inputs
,
172 size_t num_inputs
, size_t blocks
, const uint32_t key
[8],
173 uint64_t counter
, boolean_t increment_counter
, uint8_t flags
,
174 uint8_t flags_start
, uint8_t flags_end
, uint8_t *out
) {
176 zfs_blake3_hash_many_avx2(inputs
, num_inputs
, blocks
, key
, counter
,
177 increment_counter
, flags
, flags_start
, flags_end
, out
);
181 static boolean_t
blake3_is_avx2_supported(void)
183 return (kfpu_allowed() && zfs_sse4_1_available() &&
184 zfs_avx2_available());
189 .compress_in_place
= blake3_compress_in_place_sse41
,
190 .compress_xof
= blake3_compress_xof_sse41
,
191 .hash_many
= blake3_hash_many_avx2
,
192 .is_supported
= blake3_is_avx2_supported
,
198 #if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
199 extern void ASMABI
zfs_blake3_compress_in_place_avx512(uint32_t cv
[8],
200 const uint8_t block
[BLAKE3_BLOCK_LEN
], uint8_t block_len
,
201 uint64_t counter
, uint8_t flags
);
203 extern void ASMABI
zfs_blake3_compress_xof_avx512(const uint32_t cv
[8],
204 const uint8_t block
[BLAKE3_BLOCK_LEN
], uint8_t block_len
,
205 uint64_t counter
, uint8_t flags
, uint8_t out
[64]);
207 extern void ASMABI
zfs_blake3_hash_many_avx512(const uint8_t * const *inputs
,
208 size_t num_inputs
, size_t blocks
, const uint32_t key
[8],
209 uint64_t counter
, boolean_t increment_counter
, uint8_t flags
,
210 uint8_t flags_start
, uint8_t flags_end
, uint8_t *out
);
212 static void blake3_compress_in_place_avx512(uint32_t cv
[8],
213 const uint8_t block
[BLAKE3_BLOCK_LEN
], uint8_t block_len
,
214 uint64_t counter
, uint8_t flags
) {
216 zfs_blake3_compress_in_place_avx512(cv
, block
, block_len
, counter
,
221 static void blake3_compress_xof_avx512(const uint32_t cv
[8],
222 const uint8_t block
[BLAKE3_BLOCK_LEN
], uint8_t block_len
,
223 uint64_t counter
, uint8_t flags
, uint8_t out
[64]) {
225 zfs_blake3_compress_xof_avx512(cv
, block
, block_len
, counter
, flags
,
230 static void blake3_hash_many_avx512(const uint8_t * const *inputs
,
231 size_t num_inputs
, size_t blocks
, const uint32_t key
[8],
232 uint64_t counter
, boolean_t increment_counter
, uint8_t flags
,
233 uint8_t flags_start
, uint8_t flags_end
, uint8_t *out
) {
235 zfs_blake3_hash_many_avx512(inputs
, num_inputs
, blocks
, key
, counter
,
236 increment_counter
, flags
, flags_start
, flags_end
, out
);
240 static boolean_t
blake3_is_avx512_supported(void)
242 return (kfpu_allowed() && zfs_avx512f_available() &&
243 zfs_avx512vl_available());
246 const blake3_ops_t blake3_avx512_impl
= {
247 .compress_in_place
= blake3_compress_in_place_avx512
,
248 .compress_xof
= blake3_compress_xof_avx512
,
249 .hash_many
= blake3_hash_many_avx512
,
250 .is_supported
= blake3_is_avx512_supported
,
256 extern const blake3_ops_t blake3_generic_impl
;
258 static const blake3_ops_t
*const blake3_impls
[] = {
259 &blake3_generic_impl
,
260 #if defined(__aarch64__) || \
261 (defined(__x86_64) && defined(HAVE_SSE2)) || \
262 (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
265 #if defined(__aarch64__) || \
266 (defined(__x86_64) && defined(HAVE_SSE4_1)) || \
267 (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
270 #if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
273 #if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
278 /* use the generic implementation functions */
279 #define IMPL_NAME "blake3"
280 #define IMPL_OPS_T blake3_ops_t
281 #define IMPL_ARRAY blake3_impls
282 #define IMPL_GET_OPS blake3_get_ops
283 #define ZFS_IMPL_OPS zfs_blake3_ops
284 #include <generic_impl.c>
287 void **blake3_per_cpu_ctx
;
290 blake3_per_cpu_ctx_init(void)
293 * Create "The Godfather" ptr to hold all blake3 ctx
295 blake3_per_cpu_ctx
= kmem_alloc(max_ncpus
* sizeof (void *), KM_SLEEP
);
296 for (int i
= 0; i
< max_ncpus
; i
++) {
297 blake3_per_cpu_ctx
[i
] = kmem_alloc(sizeof (BLAKE3_CTX
),
303 blake3_per_cpu_ctx_fini(void)
305 for (int i
= 0; i
< max_ncpus
; i
++) {
306 memset(blake3_per_cpu_ctx
[i
], 0, sizeof (BLAKE3_CTX
));
307 kmem_free(blake3_per_cpu_ctx
[i
], sizeof (BLAKE3_CTX
));
309 memset(blake3_per_cpu_ctx
, 0, max_ncpus
* sizeof (void *));
310 kmem_free(blake3_per_cpu_ctx
, max_ncpus
* sizeof (void *));
313 #define IMPL_FMT(impl, i) (((impl) == (i)) ? "[%s] " : "%s ")
315 #if defined(__linux__)
318 blake3_param_get(char *buffer
, zfs_kernel_param_t
*unused
)
320 const uint32_t impl
= IMPL_READ(generic_impl_chosen
);
325 fmt
= IMPL_FMT(impl
, IMPL_CYCLE
);
326 cnt
+= kmem_scnprintf(buffer
+ cnt
, PAGE_SIZE
- cnt
, fmt
, "cycle");
329 fmt
= IMPL_FMT(impl
, IMPL_FASTEST
);
330 cnt
+= kmem_scnprintf(buffer
+ cnt
, PAGE_SIZE
- cnt
, fmt
, "fastest");
332 /* list all supported implementations */
334 for (uint32_t i
= 0; i
< generic_supp_impls_cnt
; ++i
) {
335 fmt
= IMPL_FMT(impl
, i
);
336 cnt
+= kmem_scnprintf(buffer
+ cnt
, PAGE_SIZE
- cnt
, fmt
,
337 blake3_impls
[i
]->name
);
344 blake3_param_set(const char *val
, zfs_kernel_param_t
*unused
)
347 return (generic_impl_setname(val
));
350 #elif defined(__FreeBSD__)
352 #include <sys/sbuf.h>
355 blake3_param(ZFS_MODULE_PARAM_ARGS
)
360 if (req
->newptr
== NULL
) {
361 const uint32_t impl
= IMPL_READ(generic_impl_chosen
);
362 const int init_buflen
= 64;
366 s
= sbuf_new_for_sysctl(NULL
, NULL
, init_buflen
, req
);
369 fmt
= IMPL_FMT(impl
, IMPL_CYCLE
);
370 (void) sbuf_printf(s
, fmt
, "cycle");
373 fmt
= IMPL_FMT(impl
, IMPL_FASTEST
);
374 (void) sbuf_printf(s
, fmt
, "fastest");
376 /* list all supported implementations */
377 for (uint32_t i
= 0; i
< generic_supp_impls_cnt
; ++i
) {
378 fmt
= IMPL_FMT(impl
, i
);
379 (void) sbuf_printf(s
, fmt
, generic_supp_impls
[i
]->name
);
382 err
= sbuf_finish(s
);
390 err
= sysctl_handle_string(oidp
, buf
, sizeof (buf
), req
);
395 return (-generic_impl_setname(buf
));
401 ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs
, zfs_
, blake3_impl
,
402 blake3_param_set
, blake3_param_get
, ZMOD_RW
, \
403 "Select BLAKE3 implementation.");