FreeBSD: Fix a pair of bugs in zfs_fhtovp()
[zfs.git] / module / zfs / zfs_chksum.c
blob74b4cb8d2e63a9634acff1110f2ec8beaeed2888
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
26 #include <sys/types.h>
27 #include <sys/spa.h>
28 #include <sys/zio_checksum.h>
29 #include <sys/zfs_context.h>
30 #include <sys/zfs_chksum.h>
32 #include <sys/blake3.h>
34 /* limit benchmarking to max 256KiB, when EdonR is slower then this: */
35 #define LIMIT_PERF_MBS 300
37 typedef struct {
38 const char *name;
39 const char *impl;
40 uint64_t bs1k;
41 uint64_t bs4k;
42 uint64_t bs16k;
43 uint64_t bs64k;
44 uint64_t bs256k;
45 uint64_t bs1m;
46 uint64_t bs4m;
47 uint64_t bs16m;
48 zio_cksum_salt_t salt;
49 zio_checksum_t *(func);
50 zio_checksum_tmpl_init_t *(init);
51 zio_checksum_tmpl_free_t *(free);
52 } chksum_stat_t;
54 static chksum_stat_t *chksum_stat_data = 0;
55 static int chksum_stat_cnt = 0;
56 static kstat_t *chksum_kstat = NULL;
59 * i3-1005G1 test output:
61 * implementation 1k 4k 16k 64k 256k 1m 4m
62 * fletcher-4 5421 15001 26468 32555 34720 32801 18847
63 * edonr-generic 1196 1602 1761 1749 1762 1759 1751
64 * skein-generic 546 591 608 615 619 612 616
65 * sha256-generic 246 270 274 274 277 275 276
66 * sha256-avx 262 296 304 307 307 307 306
67 * sha256-sha-ni 769 1072 1172 1220 1219 1232 1228
68 * sha256-openssl 240 300 316 314 304 285 276
69 * sha512-generic 333 374 385 392 391 393 392
70 * sha512-openssl 353 441 467 476 472 467 426
71 * sha512-avx 362 444 473 475 479 476 478
72 * sha512-avx2 394 500 530 538 543 545 542
73 * blake3-generic 308 313 313 313 312 313 312
74 * blake3-sse2 402 1289 1423 1446 1432 1458 1413
75 * blake3-sse41 427 1470 1625 1704 1679 1607 1629
76 * blake3-avx2 428 1920 3095 3343 3356 3318 3204
77 * blake3-avx512 473 2687 4905 5836 5844 5643 5374
79 static int
80 chksum_kstat_headers(char *buf, size_t size)
82 ssize_t off = 0;
84 off += snprintf(buf + off, size, "%-23s", "implementation");
85 off += snprintf(buf + off, size - off, "%8s", "1k");
86 off += snprintf(buf + off, size - off, "%8s", "4k");
87 off += snprintf(buf + off, size - off, "%8s", "16k");
88 off += snprintf(buf + off, size - off, "%8s", "64k");
89 off += snprintf(buf + off, size - off, "%8s", "256k");
90 off += snprintf(buf + off, size - off, "%8s", "1m");
91 off += snprintf(buf + off, size - off, "%8s", "4m");
92 (void) snprintf(buf + off, size - off, "%8s\n", "16m");
94 return (0);
97 static int
98 chksum_kstat_data(char *buf, size_t size, void *data)
100 chksum_stat_t *cs;
101 ssize_t off = 0;
102 char b[24];
104 cs = (chksum_stat_t *)data;
105 snprintf(b, 23, "%s-%s", cs->name, cs->impl);
106 off += snprintf(buf + off, size - off, "%-23s", b);
107 off += snprintf(buf + off, size - off, "%8llu",
108 (u_longlong_t)cs->bs1k);
109 off += snprintf(buf + off, size - off, "%8llu",
110 (u_longlong_t)cs->bs4k);
111 off += snprintf(buf + off, size - off, "%8llu",
112 (u_longlong_t)cs->bs16k);
113 off += snprintf(buf + off, size - off, "%8llu",
114 (u_longlong_t)cs->bs64k);
115 off += snprintf(buf + off, size - off, "%8llu",
116 (u_longlong_t)cs->bs256k);
117 off += snprintf(buf + off, size - off, "%8llu",
118 (u_longlong_t)cs->bs1m);
119 off += snprintf(buf + off, size - off, "%8llu",
120 (u_longlong_t)cs->bs4m);
121 (void) snprintf(buf + off, size - off, "%8llu\n",
122 (u_longlong_t)cs->bs16m);
124 return (0);
127 static void *
128 chksum_kstat_addr(kstat_t *ksp, loff_t n)
130 if (n < chksum_stat_cnt)
131 ksp->ks_private = (void *)(chksum_stat_data + n);
132 else
133 ksp->ks_private = NULL;
135 return (ksp->ks_private);
138 static void
139 chksum_run(chksum_stat_t *cs, abd_t *abd, void *ctx, int round,
140 uint64_t *result)
142 hrtime_t start;
143 uint64_t run_bw, run_time_ns, run_count = 0, size = 0;
144 uint32_t l, loops = 0;
145 zio_cksum_t zcp;
147 switch (round) {
148 case 1: /* 1k */
149 size = 1<<10; loops = 128; break;
150 case 2: /* 2k */
151 size = 1<<12; loops = 64; break;
152 case 3: /* 4k */
153 size = 1<<14; loops = 32; break;
154 case 4: /* 16k */
155 size = 1<<16; loops = 16; break;
156 case 5: /* 256k */
157 size = 1<<18; loops = 8; break;
158 case 6: /* 1m */
159 size = 1<<20; loops = 4; break;
160 case 7: /* 4m */
161 size = 1<<22; loops = 1; break;
162 case 8: /* 16m */
163 size = 1<<24; loops = 1; break;
166 kpreempt_disable();
167 start = gethrtime();
168 do {
169 for (l = 0; l < loops; l++, run_count++)
170 cs->func(abd, size, ctx, &zcp);
172 run_time_ns = gethrtime() - start;
173 } while (run_time_ns < MSEC2NSEC(1));
174 kpreempt_enable();
176 run_bw = size * run_count * NANOSEC;
177 run_bw /= run_time_ns; /* B/s */
178 *result = run_bw/1024/1024; /* MiB/s */
181 #define LIMIT_INIT 0
182 #define LIMIT_NEEDED 1
183 #define LIMIT_NOLIMIT 2
185 static void
186 chksum_benchit(chksum_stat_t *cs)
188 abd_t *abd;
189 void *ctx = 0;
190 void *salt = &cs->salt.zcs_bytes;
191 static int chksum_stat_limit = LIMIT_INIT;
193 memset(salt, 0, sizeof (cs->salt.zcs_bytes));
194 if (cs->init)
195 ctx = cs->init(&cs->salt);
197 /* allocate test memory via abd linear interface */
198 abd = abd_alloc_linear(1<<20, B_FALSE);
199 chksum_run(cs, abd, ctx, 1, &cs->bs1k);
200 chksum_run(cs, abd, ctx, 2, &cs->bs4k);
201 chksum_run(cs, abd, ctx, 3, &cs->bs16k);
202 chksum_run(cs, abd, ctx, 4, &cs->bs64k);
203 chksum_run(cs, abd, ctx, 5, &cs->bs256k);
205 /* check if we ran on a slow cpu */
206 if (chksum_stat_limit == LIMIT_INIT) {
207 if (cs->bs1k < LIMIT_PERF_MBS) {
208 chksum_stat_limit = LIMIT_NEEDED;
209 } else {
210 chksum_stat_limit = LIMIT_NOLIMIT;
214 /* skip benchmarks >= 1MiB when the CPU is to slow */
215 if (chksum_stat_limit == LIMIT_NEEDED)
216 goto abort;
218 chksum_run(cs, abd, ctx, 6, &cs->bs1m);
219 abd_free(abd);
221 /* allocate test memory via abd non linear interface */
222 abd = abd_alloc(1<<24, B_FALSE);
223 chksum_run(cs, abd, ctx, 7, &cs->bs4m);
224 chksum_run(cs, abd, ctx, 8, &cs->bs16m);
226 abort:
227 abd_free(abd);
229 /* free up temp memory */
230 if (cs->free)
231 cs->free(ctx);
235 * Initialize and benchmark all supported implementations.
237 static void
238 chksum_benchmark(void)
241 #ifndef _KERNEL
242 /* we need the benchmark only for the kernel module */
243 return;
244 #endif
246 chksum_stat_t *cs;
247 int cbid = 0;
248 uint64_t max = 0;
249 uint32_t id, id_save;
251 /* space for the benchmark times */
252 chksum_stat_cnt = 4;
253 chksum_stat_cnt += blake3_impl_getcnt();
254 chksum_stat_data = (chksum_stat_t *)kmem_zalloc(
255 sizeof (chksum_stat_t) * chksum_stat_cnt, KM_SLEEP);
257 /* edonr - needs to be the first one here (slow CPU check) */
258 cs = &chksum_stat_data[cbid++];
259 cs->init = abd_checksum_edonr_tmpl_init;
260 cs->func = abd_checksum_edonr_native;
261 cs->free = abd_checksum_edonr_tmpl_free;
262 cs->name = "edonr";
263 cs->impl = "generic";
264 chksum_benchit(cs);
266 /* skein */
267 cs = &chksum_stat_data[cbid++];
268 cs->init = abd_checksum_skein_tmpl_init;
269 cs->func = abd_checksum_skein_native;
270 cs->free = abd_checksum_skein_tmpl_free;
271 cs->name = "skein";
272 cs->impl = "generic";
273 chksum_benchit(cs);
275 /* sha256 */
276 cs = &chksum_stat_data[cbid++];
277 cs->init = 0;
278 cs->func = abd_checksum_SHA256;
279 cs->free = 0;
280 cs->name = "sha256";
281 cs->impl = "generic";
282 chksum_benchit(cs);
284 /* sha512 */
285 cs = &chksum_stat_data[cbid++];
286 cs->init = 0;
287 cs->func = abd_checksum_SHA512_native;
288 cs->free = 0;
289 cs->name = "sha512";
290 cs->impl = "generic";
291 chksum_benchit(cs);
293 /* blake3 */
294 id_save = blake3_impl_getid();
295 for (id = 0; id < blake3_impl_getcnt(); id++) {
296 blake3_impl_setid(id);
297 cs = &chksum_stat_data[cbid++];
298 cs->init = abd_checksum_blake3_tmpl_init;
299 cs->func = abd_checksum_blake3_native;
300 cs->free = abd_checksum_blake3_tmpl_free;
301 cs->name = "blake3";
302 cs->impl = blake3_impl_getname();
303 chksum_benchit(cs);
304 if (cs->bs256k > max) {
305 max = cs->bs256k;
306 blake3_impl_set_fastest(id);
310 /* restore initial value */
311 blake3_impl_setid(id_save);
314 void
315 chksum_init(void)
317 #ifdef _KERNEL
318 blake3_per_cpu_ctx_init();
319 #endif
321 /* Benchmark supported implementations */
322 chksum_benchmark();
324 /* Install kstats for all implementations */
325 chksum_kstat = kstat_create("zfs", 0, "chksum_bench", "misc",
326 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
328 if (chksum_kstat != NULL) {
329 chksum_kstat->ks_data = NULL;
330 chksum_kstat->ks_ndata = UINT32_MAX;
331 kstat_set_raw_ops(chksum_kstat,
332 chksum_kstat_headers,
333 chksum_kstat_data,
334 chksum_kstat_addr);
335 kstat_install(chksum_kstat);
339 void
340 chksum_fini(void)
342 if (chksum_kstat != NULL) {
343 kstat_delete(chksum_kstat);
344 chksum_kstat = NULL;
347 if (chksum_stat_cnt) {
348 kmem_free(chksum_stat_data,
349 sizeof (chksum_stat_t) * chksum_stat_cnt);
350 chksum_stat_cnt = 0;
351 chksum_stat_data = 0;
354 #ifdef _KERNEL
355 blake3_per_cpu_ctx_fini();
356 #endif