Update ABD stats for linear page Linux
[zfs.git] / module / icp / algs / blake3 / blake3_impl.c
blobf3f48c2dfa1ad33c8b497e9955b941a3e9c659f6
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
26 #include <sys/simd.h>
27 #include <sys/zfs_context.h>
28 #include <sys/zfs_impl.h>
29 #include <sys/blake3.h>
31 #include "blake3_impl.h"
33 #if defined(__aarch64__) || \
34 (defined(__x86_64) && defined(HAVE_SSE2)) || \
35 (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
37 extern void ASMABI zfs_blake3_compress_in_place_sse2(uint32_t cv[8],
38 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
39 uint64_t counter, uint8_t flags);
41 extern void ASMABI zfs_blake3_compress_xof_sse2(const uint32_t cv[8],
42 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
43 uint64_t counter, uint8_t flags, uint8_t out[64]);
45 extern void ASMABI zfs_blake3_hash_many_sse2(const uint8_t * const *inputs,
46 size_t num_inputs, size_t blocks, const uint32_t key[8],
47 uint64_t counter, boolean_t increment_counter, uint8_t flags,
48 uint8_t flags_start, uint8_t flags_end, uint8_t *out);
50 static void blake3_compress_in_place_sse2(uint32_t cv[8],
51 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
52 uint64_t counter, uint8_t flags) {
53 kfpu_begin();
54 zfs_blake3_compress_in_place_sse2(cv, block, block_len, counter,
55 flags);
56 kfpu_end();
59 static void blake3_compress_xof_sse2(const uint32_t cv[8],
60 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
61 uint64_t counter, uint8_t flags, uint8_t out[64]) {
62 kfpu_begin();
63 zfs_blake3_compress_xof_sse2(cv, block, block_len, counter, flags,
64 out);
65 kfpu_end();
68 static void blake3_hash_many_sse2(const uint8_t * const *inputs,
69 size_t num_inputs, size_t blocks, const uint32_t key[8],
70 uint64_t counter, boolean_t increment_counter, uint8_t flags,
71 uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
72 kfpu_begin();
73 zfs_blake3_hash_many_sse2(inputs, num_inputs, blocks, key, counter,
74 increment_counter, flags, flags_start, flags_end, out);
75 kfpu_end();
78 static boolean_t blake3_is_sse2_supported(void)
80 #if defined(__x86_64)
81 return (kfpu_allowed() && zfs_sse2_available());
82 #elif defined(__PPC64__)
83 return (kfpu_allowed() && zfs_vsx_available());
84 #else
85 return (kfpu_allowed());
86 #endif
89 const blake3_ops_t blake3_sse2_impl = {
90 .compress_in_place = blake3_compress_in_place_sse2,
91 .compress_xof = blake3_compress_xof_sse2,
92 .hash_many = blake3_hash_many_sse2,
93 .is_supported = blake3_is_sse2_supported,
94 .degree = 4,
95 .name = "sse2"
97 #endif
99 #if defined(__aarch64__) || \
100 (defined(__x86_64) && defined(HAVE_SSE2)) || \
101 (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
103 extern void ASMABI zfs_blake3_compress_in_place_sse41(uint32_t cv[8],
104 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
105 uint64_t counter, uint8_t flags);
107 extern void ASMABI zfs_blake3_compress_xof_sse41(const uint32_t cv[8],
108 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
109 uint64_t counter, uint8_t flags, uint8_t out[64]);
111 extern void ASMABI zfs_blake3_hash_many_sse41(const uint8_t * const *inputs,
112 size_t num_inputs, size_t blocks, const uint32_t key[8],
113 uint64_t counter, boolean_t increment_counter, uint8_t flags,
114 uint8_t flags_start, uint8_t flags_end, uint8_t *out);
116 static void blake3_compress_in_place_sse41(uint32_t cv[8],
117 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
118 uint64_t counter, uint8_t flags) {
119 kfpu_begin();
120 zfs_blake3_compress_in_place_sse41(cv, block, block_len, counter,
121 flags);
122 kfpu_end();
125 static void blake3_compress_xof_sse41(const uint32_t cv[8],
126 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
127 uint64_t counter, uint8_t flags, uint8_t out[64]) {
128 kfpu_begin();
129 zfs_blake3_compress_xof_sse41(cv, block, block_len, counter, flags,
130 out);
131 kfpu_end();
134 static void blake3_hash_many_sse41(const uint8_t * const *inputs,
135 size_t num_inputs, size_t blocks, const uint32_t key[8],
136 uint64_t counter, boolean_t increment_counter, uint8_t flags,
137 uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
138 kfpu_begin();
139 zfs_blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,
140 increment_counter, flags, flags_start, flags_end, out);
141 kfpu_end();
144 static boolean_t blake3_is_sse41_supported(void)
146 #if defined(__x86_64)
147 return (kfpu_allowed() && zfs_sse4_1_available());
148 #elif defined(__PPC64__)
149 return (kfpu_allowed() && zfs_vsx_available());
150 #else
151 return (kfpu_allowed());
152 #endif
155 const blake3_ops_t blake3_sse41_impl = {
156 .compress_in_place = blake3_compress_in_place_sse41,
157 .compress_xof = blake3_compress_xof_sse41,
158 .hash_many = blake3_hash_many_sse41,
159 .is_supported = blake3_is_sse41_supported,
160 .degree = 4,
161 .name = "sse41"
163 #endif
165 #if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
166 extern void ASMABI zfs_blake3_hash_many_avx2(const uint8_t * const *inputs,
167 size_t num_inputs, size_t blocks, const uint32_t key[8],
168 uint64_t counter, boolean_t increment_counter, uint8_t flags,
169 uint8_t flags_start, uint8_t flags_end, uint8_t *out);
171 static void blake3_hash_many_avx2(const uint8_t * const *inputs,
172 size_t num_inputs, size_t blocks, const uint32_t key[8],
173 uint64_t counter, boolean_t increment_counter, uint8_t flags,
174 uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
175 kfpu_begin();
176 zfs_blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter,
177 increment_counter, flags, flags_start, flags_end, out);
178 kfpu_end();
181 static boolean_t blake3_is_avx2_supported(void)
183 return (kfpu_allowed() && zfs_sse4_1_available() &&
184 zfs_avx2_available());
187 const blake3_ops_t
188 blake3_avx2_impl = {
189 .compress_in_place = blake3_compress_in_place_sse41,
190 .compress_xof = blake3_compress_xof_sse41,
191 .hash_many = blake3_hash_many_avx2,
192 .is_supported = blake3_is_avx2_supported,
193 .degree = 8,
194 .name = "avx2"
196 #endif
198 #if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
199 extern void ASMABI zfs_blake3_compress_in_place_avx512(uint32_t cv[8],
200 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
201 uint64_t counter, uint8_t flags);
203 extern void ASMABI zfs_blake3_compress_xof_avx512(const uint32_t cv[8],
204 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
205 uint64_t counter, uint8_t flags, uint8_t out[64]);
207 extern void ASMABI zfs_blake3_hash_many_avx512(const uint8_t * const *inputs,
208 size_t num_inputs, size_t blocks, const uint32_t key[8],
209 uint64_t counter, boolean_t increment_counter, uint8_t flags,
210 uint8_t flags_start, uint8_t flags_end, uint8_t *out);
212 static void blake3_compress_in_place_avx512(uint32_t cv[8],
213 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
214 uint64_t counter, uint8_t flags) {
215 kfpu_begin();
216 zfs_blake3_compress_in_place_avx512(cv, block, block_len, counter,
217 flags);
218 kfpu_end();
221 static void blake3_compress_xof_avx512(const uint32_t cv[8],
222 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
223 uint64_t counter, uint8_t flags, uint8_t out[64]) {
224 kfpu_begin();
225 zfs_blake3_compress_xof_avx512(cv, block, block_len, counter, flags,
226 out);
227 kfpu_end();
230 static void blake3_hash_many_avx512(const uint8_t * const *inputs,
231 size_t num_inputs, size_t blocks, const uint32_t key[8],
232 uint64_t counter, boolean_t increment_counter, uint8_t flags,
233 uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
234 kfpu_begin();
235 zfs_blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
236 increment_counter, flags, flags_start, flags_end, out);
237 kfpu_end();
240 static boolean_t blake3_is_avx512_supported(void)
242 return (kfpu_allowed() && zfs_avx512f_available() &&
243 zfs_avx512vl_available());
246 const blake3_ops_t blake3_avx512_impl = {
247 .compress_in_place = blake3_compress_in_place_avx512,
248 .compress_xof = blake3_compress_xof_avx512,
249 .hash_many = blake3_hash_many_avx512,
250 .is_supported = blake3_is_avx512_supported,
251 .degree = 16,
252 .name = "avx512"
254 #endif
256 extern const blake3_ops_t blake3_generic_impl;
258 static const blake3_ops_t *const blake3_impls[] = {
259 &blake3_generic_impl,
260 #if defined(__aarch64__) || \
261 (defined(__x86_64) && defined(HAVE_SSE2)) || \
262 (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
263 &blake3_sse2_impl,
264 #endif
265 #if defined(__aarch64__) || \
266 (defined(__x86_64) && defined(HAVE_SSE4_1)) || \
267 (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
268 &blake3_sse41_impl,
269 #endif
270 #if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
271 &blake3_avx2_impl,
272 #endif
273 #if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
274 &blake3_avx512_impl,
275 #endif
278 /* use the generic implementation functions */
279 #define IMPL_NAME "blake3"
280 #define IMPL_OPS_T blake3_ops_t
281 #define IMPL_ARRAY blake3_impls
282 #define IMPL_GET_OPS blake3_get_ops
283 #define ZFS_IMPL_OPS zfs_blake3_ops
284 #include <generic_impl.c>
286 #ifdef _KERNEL
287 void **blake3_per_cpu_ctx;
289 void
290 blake3_per_cpu_ctx_init(void)
293 * Create "The Godfather" ptr to hold all blake3 ctx
295 blake3_per_cpu_ctx = kmem_alloc(max_ncpus * sizeof (void *), KM_SLEEP);
296 for (int i = 0; i < max_ncpus; i++) {
297 blake3_per_cpu_ctx[i] = kmem_alloc(sizeof (BLAKE3_CTX),
298 KM_SLEEP);
302 void
303 blake3_per_cpu_ctx_fini(void)
305 for (int i = 0; i < max_ncpus; i++) {
306 memset(blake3_per_cpu_ctx[i], 0, sizeof (BLAKE3_CTX));
307 kmem_free(blake3_per_cpu_ctx[i], sizeof (BLAKE3_CTX));
309 memset(blake3_per_cpu_ctx, 0, max_ncpus * sizeof (void *));
310 kmem_free(blake3_per_cpu_ctx, max_ncpus * sizeof (void *));
313 #define IMPL_FMT(impl, i) (((impl) == (i)) ? "[%s] " : "%s ")
315 #if defined(__linux__)
317 static int
318 blake3_param_get(char *buffer, zfs_kernel_param_t *unused)
320 const uint32_t impl = IMPL_READ(generic_impl_chosen);
321 char *fmt;
322 int cnt = 0;
324 /* cycling */
325 fmt = IMPL_FMT(impl, IMPL_CYCLE);
326 cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt, "cycle");
328 /* list fastest */
329 fmt = IMPL_FMT(impl, IMPL_FASTEST);
330 cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt, "fastest");
332 /* list all supported implementations */
333 generic_impl_init();
334 for (uint32_t i = 0; i < generic_supp_impls_cnt; ++i) {
335 fmt = IMPL_FMT(impl, i);
336 cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt,
337 blake3_impls[i]->name);
340 return (cnt);
343 static int
344 blake3_param_set(const char *val, zfs_kernel_param_t *unused)
346 (void) unused;
347 return (generic_impl_setname(val));
350 #elif defined(__FreeBSD__)
352 #include <sys/sbuf.h>
354 static int
355 blake3_param(ZFS_MODULE_PARAM_ARGS)
357 int err;
359 generic_impl_init();
360 if (req->newptr == NULL) {
361 const uint32_t impl = IMPL_READ(generic_impl_chosen);
362 const int init_buflen = 64;
363 const char *fmt;
364 struct sbuf *s;
366 s = sbuf_new_for_sysctl(NULL, NULL, init_buflen, req);
368 /* cycling */
369 fmt = IMPL_FMT(impl, IMPL_CYCLE);
370 (void) sbuf_printf(s, fmt, "cycle");
372 /* list fastest */
373 fmt = IMPL_FMT(impl, IMPL_FASTEST);
374 (void) sbuf_printf(s, fmt, "fastest");
376 /* list all supported implementations */
377 for (uint32_t i = 0; i < generic_supp_impls_cnt; ++i) {
378 fmt = IMPL_FMT(impl, i);
379 (void) sbuf_printf(s, fmt, generic_supp_impls[i]->name);
382 err = sbuf_finish(s);
383 sbuf_delete(s);
385 return (err);
388 char buf[16];
390 err = sysctl_handle_string(oidp, buf, sizeof (buf), req);
391 if (err) {
392 return (err);
395 return (-generic_impl_setname(buf));
397 #endif
399 #undef IMPL_FMT
401 ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs, zfs_, blake3_impl,
402 blake3_param_set, blake3_param_get, ZMOD_RW, \
403 "Select BLAKE3 implementation.");
404 #endif