Use _Noreturn (C11; GNU89) properly
[zfs.git] / cmd / raidz_test / raidz_test.c
blob8bb38f2f72c7771c0e3b814b2553c676c4f046b1
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
26 #include <sys/zfs_context.h>
27 #include <sys/time.h>
28 #include <sys/wait.h>
29 #include <sys/zio.h>
30 #include <umem.h>
31 #include <sys/vdev_raidz.h>
32 #include <sys/vdev_raidz_impl.h>
33 #include <assert.h>
34 #include <stdio.h>
35 #include "raidz_test.h"
37 static int *rand_data;
38 raidz_test_opts_t rto_opts;
40 static char pid_s[16];
42 static void sig_handler(int signo)
44 int old_errno = errno;
45 struct sigaction action;
47 * Restore default action and re-raise signal so SIGSEGV and
48 * SIGABRT can trigger a core dump.
50 action.sa_handler = SIG_DFL;
51 sigemptyset(&action.sa_mask);
52 action.sa_flags = 0;
53 (void) sigaction(signo, &action, NULL);
55 if (rto_opts.rto_gdb) {
56 pid_t pid = fork();
57 if (pid == 0) {
58 execlp("gdb", "gdb", "-ex", "set pagination 0",
59 "-p", pid_s, NULL);
60 _exit(-1);
61 } else if (pid > 0)
62 while (waitpid(pid, NULL, 0) == -1 && errno == EINTR)
66 raise(signo);
67 errno = old_errno;
70 static void print_opts(raidz_test_opts_t *opts, boolean_t force)
72 char *verbose;
73 switch (opts->rto_v) {
74 case D_ALL:
75 verbose = "no";
76 break;
77 case D_INFO:
78 verbose = "info";
79 break;
80 case D_DEBUG:
81 default:
82 verbose = "debug";
83 break;
86 if (force || opts->rto_v >= D_INFO) {
87 (void) fprintf(stdout, DBLSEP "Running with options:\n"
88 " (-a) zio ashift : %zu\n"
89 " (-o) zio offset : 1 << %zu\n"
90 " (-e) expanded map : %s\n"
91 " (-r) reflow offset : %llx\n"
92 " (-d) number of raidz data columns : %zu\n"
93 " (-s) size of DATA : 1 << %zu\n"
94 " (-S) sweep parameters : %s \n"
95 " (-v) verbose : %s \n\n",
96 opts->rto_ashift, /* -a */
97 ilog2(opts->rto_offset), /* -o */
98 opts->rto_expand ? "yes" : "no", /* -e */
99 (u_longlong_t)opts->rto_expand_offset, /* -r */
100 opts->rto_dcols, /* -d */
101 ilog2(opts->rto_dsize), /* -s */
102 opts->rto_sweep ? "yes" : "no", /* -S */
103 verbose); /* -v */
107 static void usage(boolean_t requested)
109 const raidz_test_opts_t *o = &rto_opts_defaults;
111 FILE *fp = requested ? stdout : stderr;
113 (void) fprintf(fp, "Usage:\n"
114 "\t[-a zio ashift (default: %zu)]\n"
115 "\t[-o zio offset, exponent radix 2 (default: %zu)]\n"
116 "\t[-d number of raidz data columns (default: %zu)]\n"
117 "\t[-s zio size, exponent radix 2 (default: %zu)]\n"
118 "\t[-S parameter sweep (default: %s)]\n"
119 "\t[-t timeout for parameter sweep test]\n"
120 "\t[-B benchmark all raidz implementations]\n"
121 "\t[-e use expanded raidz map (default: %s)]\n"
122 "\t[-r expanded raidz map reflow offset (default: %llx)]\n"
123 "\t[-v increase verbosity (default: %d)]\n"
124 "\t[-h (print help)]\n"
125 "\t[-T test the test, see if failure would be detected]\n"
126 "\t[-D debug (attach gdb on SIGSEGV)]\n"
128 o->rto_ashift, /* -a */
129 ilog2(o->rto_offset), /* -o */
130 o->rto_dcols, /* -d */
131 ilog2(o->rto_dsize), /* -s */
132 rto_opts.rto_sweep ? "yes" : "no", /* -S */
133 rto_opts.rto_expand ? "yes" : "no", /* -e */
134 (u_longlong_t)o->rto_expand_offset, /* -r */
135 o->rto_v); /* -v */
137 exit(requested ? 0 : 1);
140 static void process_options(int argc, char **argv)
142 size_t value;
143 int opt;
145 raidz_test_opts_t *o = &rto_opts;
147 bcopy(&rto_opts_defaults, o, sizeof (*o));
149 while ((opt = getopt(argc, argv, "TDBSvha:er:o:d:s:t:")) != -1) {
150 value = 0;
152 switch (opt) {
153 case 'a':
154 value = strtoull(optarg, NULL, 0);
155 o->rto_ashift = MIN(13, MAX(9, value));
156 break;
157 case 'e':
158 o->rto_expand = 1;
159 break;
160 case 'r':
161 o->rto_expand_offset = strtoull(optarg, NULL, 0);
162 break;
163 case 'o':
164 value = strtoull(optarg, NULL, 0);
165 o->rto_offset = ((1ULL << MIN(12, value)) >> 9) << 9;
166 break;
167 case 'd':
168 value = strtoull(optarg, NULL, 0);
169 o->rto_dcols = MIN(255, MAX(1, value));
170 break;
171 case 's':
172 value = strtoull(optarg, NULL, 0);
173 o->rto_dsize = 1ULL << MIN(SPA_MAXBLOCKSHIFT,
174 MAX(SPA_MINBLOCKSHIFT, value));
175 break;
176 case 't':
177 value = strtoull(optarg, NULL, 0);
178 o->rto_sweep_timeout = value;
179 break;
180 case 'v':
181 o->rto_v++;
182 break;
183 case 'S':
184 o->rto_sweep = 1;
185 break;
186 case 'B':
187 o->rto_benchmark = 1;
188 break;
189 case 'D':
190 o->rto_gdb = 1;
191 break;
192 case 'T':
193 o->rto_sanity = 1;
194 break;
195 case 'h':
196 usage(B_TRUE);
197 break;
198 case '?':
199 default:
200 usage(B_FALSE);
201 break;
206 #define DATA_COL(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_abd)
207 #define DATA_COL_SIZE(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_size)
209 #define CODE_COL(rr, i) ((rr)->rr_col[(i)].rc_abd)
210 #define CODE_COL_SIZE(rr, i) ((rr)->rr_col[(i)].rc_size)
212 static int
213 cmp_code(raidz_test_opts_t *opts, const raidz_map_t *rm, const int parity)
215 int r, i, ret = 0;
217 VERIFY(parity >= 1 && parity <= 3);
219 for (r = 0; r < rm->rm_nrows; r++) {
220 raidz_row_t * const rr = rm->rm_row[r];
221 raidz_row_t * const rrg = opts->rm_golden->rm_row[r];
222 for (i = 0; i < parity; i++) {
223 if (CODE_COL_SIZE(rrg, i) == 0) {
224 VERIFY0(CODE_COL_SIZE(rr, i));
225 continue;
228 if (abd_cmp(CODE_COL(rr, i),
229 CODE_COL(rrg, i)) != 0) {
230 ret++;
231 LOG_OPT(D_DEBUG, opts,
232 "\nParity block [%d] different!\n", i);
236 return (ret);
239 static int
240 cmp_data(raidz_test_opts_t *opts, raidz_map_t *rm)
242 int r, i, dcols, ret = 0;
244 for (r = 0; r < rm->rm_nrows; r++) {
245 raidz_row_t *rr = rm->rm_row[r];
246 raidz_row_t *rrg = opts->rm_golden->rm_row[r];
247 dcols = opts->rm_golden->rm_row[0]->rr_cols -
248 raidz_parity(opts->rm_golden);
249 for (i = 0; i < dcols; i++) {
250 if (DATA_COL_SIZE(rrg, i) == 0) {
251 VERIFY0(DATA_COL_SIZE(rr, i));
252 continue;
255 if (abd_cmp(DATA_COL(rrg, i),
256 DATA_COL(rr, i)) != 0) {
257 ret++;
259 LOG_OPT(D_DEBUG, opts,
260 "\nData block [%d] different!\n", i);
264 return (ret);
267 static int
268 init_rand(void *data, size_t size, void *private)
270 (void) private;
271 memcpy(data, rand_data, size);
272 return (0);
275 static void
276 corrupt_colums(raidz_map_t *rm, const int *tgts, const int cnt)
278 for (int r = 0; r < rm->rm_nrows; r++) {
279 raidz_row_t *rr = rm->rm_row[r];
280 for (int i = 0; i < cnt; i++) {
281 raidz_col_t *col = &rr->rr_col[tgts[i]];
282 abd_iterate_func(col->rc_abd, 0, col->rc_size,
283 init_rand, NULL);
288 void
289 init_zio_abd(zio_t *zio)
291 abd_iterate_func(zio->io_abd, 0, zio->io_size, init_rand, NULL);
294 static void
295 fini_raidz_map(zio_t **zio, raidz_map_t **rm)
297 vdev_raidz_map_free(*rm);
298 raidz_free((*zio)->io_abd, (*zio)->io_size);
299 umem_free(*zio, sizeof (zio_t));
301 *zio = NULL;
302 *rm = NULL;
305 static int
306 init_raidz_golden_map(raidz_test_opts_t *opts, const int parity)
308 int err = 0;
309 zio_t *zio_test;
310 raidz_map_t *rm_test;
311 const size_t total_ncols = opts->rto_dcols + parity;
313 if (opts->rm_golden) {
314 fini_raidz_map(&opts->zio_golden, &opts->rm_golden);
317 opts->zio_golden = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL);
318 zio_test = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL);
320 opts->zio_golden->io_offset = zio_test->io_offset = opts->rto_offset;
321 opts->zio_golden->io_size = zio_test->io_size = opts->rto_dsize;
323 opts->zio_golden->io_abd = raidz_alloc(opts->rto_dsize);
324 zio_test->io_abd = raidz_alloc(opts->rto_dsize);
326 init_zio_abd(opts->zio_golden);
327 init_zio_abd(zio_test);
329 VERIFY0(vdev_raidz_impl_set("original"));
331 if (opts->rto_expand) {
332 opts->rm_golden =
333 vdev_raidz_map_alloc_expanded(opts->zio_golden->io_abd,
334 opts->zio_golden->io_size, opts->zio_golden->io_offset,
335 opts->rto_ashift, total_ncols+1, total_ncols,
336 parity, opts->rto_expand_offset);
337 rm_test = vdev_raidz_map_alloc_expanded(zio_test->io_abd,
338 zio_test->io_size, zio_test->io_offset,
339 opts->rto_ashift, total_ncols+1, total_ncols,
340 parity, opts->rto_expand_offset);
341 } else {
342 opts->rm_golden = vdev_raidz_map_alloc(opts->zio_golden,
343 opts->rto_ashift, total_ncols, parity);
344 rm_test = vdev_raidz_map_alloc(zio_test,
345 opts->rto_ashift, total_ncols, parity);
348 VERIFY(opts->zio_golden);
349 VERIFY(opts->rm_golden);
351 vdev_raidz_generate_parity(opts->rm_golden);
352 vdev_raidz_generate_parity(rm_test);
354 /* sanity check */
355 err |= cmp_data(opts, rm_test);
356 err |= cmp_code(opts, rm_test, parity);
358 if (err)
359 ERR("initializing the golden copy ... [FAIL]!\n");
361 /* tear down raidz_map of test zio */
362 fini_raidz_map(&zio_test, &rm_test);
364 return (err);
368 * If reflow is not in progress, reflow_offset should be UINT64_MAX.
369 * For each row, if the row is entirely before reflow_offset, it will
370 * come from the new location. Otherwise this row will come from the
371 * old location. Therefore, rows that straddle the reflow_offset will
372 * come from the old location.
374 * NOTE: Until raidz expansion is implemented this function is only
375 * needed by raidz_test.c to the multi-row raid_map_t functionality.
377 raidz_map_t *
378 vdev_raidz_map_alloc_expanded(abd_t *abd, uint64_t size, uint64_t offset,
379 uint64_t ashift, uint64_t physical_cols, uint64_t logical_cols,
380 uint64_t nparity, uint64_t reflow_offset)
382 /* The zio's size in units of the vdev's minimum sector size. */
383 uint64_t s = size >> ashift;
384 uint64_t q, r, bc, devidx, asize = 0, tot;
387 * "Quotient": The number of data sectors for this stripe on all but
388 * the "big column" child vdevs that also contain "remainder" data.
389 * AKA "full rows"
391 q = s / (logical_cols - nparity);
394 * "Remainder": The number of partial stripe data sectors in this I/O.
395 * This will add a sector to some, but not all, child vdevs.
397 r = s - q * (logical_cols - nparity);
399 /* The number of "big columns" - those which contain remainder data. */
400 bc = (r == 0 ? 0 : r + nparity);
403 * The total number of data and parity sectors associated with
404 * this I/O.
406 tot = s + nparity * (q + (r == 0 ? 0 : 1));
408 /* How many rows contain data (not skip) */
409 uint64_t rows = howmany(tot, logical_cols);
410 int cols = MIN(tot, logical_cols);
412 raidz_map_t *rm = kmem_zalloc(offsetof(raidz_map_t, rm_row[rows]),
413 KM_SLEEP);
414 rm->rm_nrows = rows;
416 for (uint64_t row = 0; row < rows; row++) {
417 raidz_row_t *rr = kmem_alloc(offsetof(raidz_row_t,
418 rr_col[cols]), KM_SLEEP);
419 rm->rm_row[row] = rr;
421 /* The starting RAIDZ (parent) vdev sector of the row. */
422 uint64_t b = (offset >> ashift) + row * logical_cols;
425 * If we are in the middle of a reflow, and any part of this
426 * row has not been copied, then use the old location of
427 * this row.
429 int row_phys_cols = physical_cols;
430 if (b + (logical_cols - nparity) > reflow_offset >> ashift)
431 row_phys_cols--;
433 /* starting child of this row */
434 uint64_t child_id = b % row_phys_cols;
435 /* The starting byte offset on each child vdev. */
436 uint64_t child_offset = (b / row_phys_cols) << ashift;
439 * We set cols to the entire width of the block, even
440 * if this row is shorter. This is needed because parity
441 * generation (for Q and R) needs to know the entire width,
442 * because it treats the short row as though it was
443 * full-width (and the "phantom" sectors were zero-filled).
445 * Another approach to this would be to set cols shorter
446 * (to just the number of columns that we might do i/o to)
447 * and have another mechanism to tell the parity generation
448 * about the "entire width". Reconstruction (at least
449 * vdev_raidz_reconstruct_general()) would also need to
450 * know about the "entire width".
452 rr->rr_cols = cols;
453 rr->rr_bigcols = bc;
454 rr->rr_missingdata = 0;
455 rr->rr_missingparity = 0;
456 rr->rr_firstdatacol = nparity;
457 rr->rr_abd_empty = NULL;
458 rr->rr_nempty = 0;
460 for (int c = 0; c < rr->rr_cols; c++, child_id++) {
461 if (child_id >= row_phys_cols) {
462 child_id -= row_phys_cols;
463 child_offset += 1ULL << ashift;
465 rr->rr_col[c].rc_devidx = child_id;
466 rr->rr_col[c].rc_offset = child_offset;
467 rr->rr_col[c].rc_orig_data = NULL;
468 rr->rr_col[c].rc_error = 0;
469 rr->rr_col[c].rc_tried = 0;
470 rr->rr_col[c].rc_skipped = 0;
471 rr->rr_col[c].rc_need_orig_restore = B_FALSE;
473 uint64_t dc = c - rr->rr_firstdatacol;
474 if (c < rr->rr_firstdatacol) {
475 rr->rr_col[c].rc_size = 1ULL << ashift;
476 rr->rr_col[c].rc_abd =
477 abd_alloc_linear(rr->rr_col[c].rc_size,
478 B_TRUE);
479 } else if (row == rows - 1 && bc != 0 && c >= bc) {
481 * Past the end, this for parity generation.
483 rr->rr_col[c].rc_size = 0;
484 rr->rr_col[c].rc_abd = NULL;
485 } else {
487 * "data column" (col excluding parity)
488 * Add an ASCII art diagram here
490 uint64_t off;
492 if (c < bc || r == 0) {
493 off = dc * rows + row;
494 } else {
495 off = r * rows +
496 (dc - r) * (rows - 1) + row;
498 rr->rr_col[c].rc_size = 1ULL << ashift;
499 rr->rr_col[c].rc_abd = abd_get_offset_struct(
500 &rr->rr_col[c].rc_abdstruct,
501 abd, off << ashift, 1 << ashift);
504 asize += rr->rr_col[c].rc_size;
507 * If all data stored spans all columns, there's a danger that
508 * parity will always be on the same device and, since parity
509 * isn't read during normal operation, that that device's I/O
510 * bandwidth won't be used effectively. We therefore switch
511 * the parity every 1MB.
513 * ...at least that was, ostensibly, the theory. As a practical
514 * matter unless we juggle the parity between all devices
515 * evenly, we won't see any benefit. Further, occasional writes
516 * that aren't a multiple of the LCM of the number of children
517 * and the minimum stripe width are sufficient to avoid pessimal
518 * behavior. Unfortunately, this decision created an implicit
519 * on-disk format requirement that we need to support for all
520 * eternity, but only for single-parity RAID-Z.
522 * If we intend to skip a sector in the zeroth column for
523 * padding we must make sure to note this swap. We will never
524 * intend to skip the first column since at least one data and
525 * one parity column must appear in each row.
527 if (rr->rr_firstdatacol == 1 && rr->rr_cols > 1 &&
528 (offset & (1ULL << 20))) {
529 ASSERT(rr->rr_cols >= 2);
530 ASSERT(rr->rr_col[0].rc_size == rr->rr_col[1].rc_size);
531 devidx = rr->rr_col[0].rc_devidx;
532 uint64_t o = rr->rr_col[0].rc_offset;
533 rr->rr_col[0].rc_devidx = rr->rr_col[1].rc_devidx;
534 rr->rr_col[0].rc_offset = rr->rr_col[1].rc_offset;
535 rr->rr_col[1].rc_devidx = devidx;
536 rr->rr_col[1].rc_offset = o;
540 ASSERT3U(asize, ==, tot << ashift);
542 /* init RAIDZ parity ops */
543 rm->rm_ops = vdev_raidz_math_get_ops();
545 return (rm);
548 static raidz_map_t *
549 init_raidz_map(raidz_test_opts_t *opts, zio_t **zio, const int parity)
551 raidz_map_t *rm = NULL;
552 const size_t alloc_dsize = opts->rto_dsize;
553 const size_t total_ncols = opts->rto_dcols + parity;
554 const int ccols[] = { 0, 1, 2 };
556 VERIFY(zio);
557 VERIFY(parity <= 3 && parity >= 1);
559 *zio = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL);
561 (*zio)->io_offset = 0;
562 (*zio)->io_size = alloc_dsize;
563 (*zio)->io_abd = raidz_alloc(alloc_dsize);
564 init_zio_abd(*zio);
566 if (opts->rto_expand) {
567 rm = vdev_raidz_map_alloc_expanded((*zio)->io_abd,
568 (*zio)->io_size, (*zio)->io_offset,
569 opts->rto_ashift, total_ncols+1, total_ncols,
570 parity, opts->rto_expand_offset);
571 } else {
572 rm = vdev_raidz_map_alloc(*zio, opts->rto_ashift,
573 total_ncols, parity);
575 VERIFY(rm);
577 /* Make sure code columns are destroyed */
578 corrupt_colums(rm, ccols, parity);
580 return (rm);
583 static int
584 run_gen_check(raidz_test_opts_t *opts)
586 char **impl_name;
587 int fn, err = 0;
588 zio_t *zio_test;
589 raidz_map_t *rm_test;
591 err = init_raidz_golden_map(opts, PARITY_PQR);
592 if (0 != err)
593 return (err);
595 LOG(D_INFO, DBLSEP);
596 LOG(D_INFO, "Testing parity generation...\n");
598 for (impl_name = (char **)raidz_impl_names+1; *impl_name != NULL;
599 impl_name++) {
601 LOG(D_INFO, SEP);
602 LOG(D_INFO, "\tTesting [%s] implementation...", *impl_name);
604 if (0 != vdev_raidz_impl_set(*impl_name)) {
605 LOG(D_INFO, "[SKIP]\n");
606 continue;
607 } else {
608 LOG(D_INFO, "[SUPPORTED]\n");
611 for (fn = 0; fn < RAIDZ_GEN_NUM; fn++) {
613 /* Check if should stop */
614 if (rto_opts.rto_should_stop)
615 return (err);
617 /* create suitable raidz_map */
618 rm_test = init_raidz_map(opts, &zio_test, fn+1);
619 VERIFY(rm_test);
621 LOG(D_INFO, "\t\tTesting method [%s] ...",
622 raidz_gen_name[fn]);
624 if (!opts->rto_sanity)
625 vdev_raidz_generate_parity(rm_test);
627 if (cmp_code(opts, rm_test, fn+1) != 0) {
628 LOG(D_INFO, "[FAIL]\n");
629 err++;
630 } else
631 LOG(D_INFO, "[PASS]\n");
633 fini_raidz_map(&zio_test, &rm_test);
637 fini_raidz_map(&opts->zio_golden, &opts->rm_golden);
639 return (err);
642 static int
643 run_rec_check_impl(raidz_test_opts_t *opts, raidz_map_t *rm, const int fn)
645 int x0, x1, x2;
646 int tgtidx[3];
647 int err = 0;
648 static const int rec_tgts[7][3] = {
649 {1, 2, 3}, /* rec_p: bad QR & D[0] */
650 {0, 2, 3}, /* rec_q: bad PR & D[0] */
651 {0, 1, 3}, /* rec_r: bad PQ & D[0] */
652 {2, 3, 4}, /* rec_pq: bad R & D[0][1] */
653 {1, 3, 4}, /* rec_pr: bad Q & D[0][1] */
654 {0, 3, 4}, /* rec_qr: bad P & D[0][1] */
655 {3, 4, 5} /* rec_pqr: bad & D[0][1][2] */
658 memcpy(tgtidx, rec_tgts[fn], sizeof (tgtidx));
660 if (fn < RAIDZ_REC_PQ) {
661 /* can reconstruct 1 failed data disk */
662 for (x0 = 0; x0 < opts->rto_dcols; x0++) {
663 if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm))
664 continue;
666 /* Check if should stop */
667 if (rto_opts.rto_should_stop)
668 return (err);
670 LOG(D_DEBUG, "[%d] ", x0);
672 tgtidx[2] = x0 + raidz_parity(rm);
674 corrupt_colums(rm, tgtidx+2, 1);
676 if (!opts->rto_sanity)
677 vdev_raidz_reconstruct(rm, tgtidx, 3);
679 if (cmp_data(opts, rm) != 0) {
680 err++;
681 LOG(D_DEBUG, "\nREC D[%d]... [FAIL]\n", x0);
685 } else if (fn < RAIDZ_REC_PQR) {
686 /* can reconstruct 2 failed data disk */
687 for (x0 = 0; x0 < opts->rto_dcols; x0++) {
688 if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm))
689 continue;
690 for (x1 = x0 + 1; x1 < opts->rto_dcols; x1++) {
691 if (x1 >= rm->rm_row[0]->rr_cols -
692 raidz_parity(rm))
693 continue;
695 /* Check if should stop */
696 if (rto_opts.rto_should_stop)
697 return (err);
699 LOG(D_DEBUG, "[%d %d] ", x0, x1);
701 tgtidx[1] = x0 + raidz_parity(rm);
702 tgtidx[2] = x1 + raidz_parity(rm);
704 corrupt_colums(rm, tgtidx+1, 2);
706 if (!opts->rto_sanity)
707 vdev_raidz_reconstruct(rm, tgtidx, 3);
709 if (cmp_data(opts, rm) != 0) {
710 err++;
711 LOG(D_DEBUG, "\nREC D[%d %d]... "
712 "[FAIL]\n", x0, x1);
716 } else {
717 /* can reconstruct 3 failed data disk */
718 for (x0 = 0; x0 < opts->rto_dcols; x0++) {
719 if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm))
720 continue;
721 for (x1 = x0 + 1; x1 < opts->rto_dcols; x1++) {
722 if (x1 >= rm->rm_row[0]->rr_cols -
723 raidz_parity(rm))
724 continue;
725 for (x2 = x1 + 1; x2 < opts->rto_dcols; x2++) {
726 if (x2 >= rm->rm_row[0]->rr_cols -
727 raidz_parity(rm))
728 continue;
730 /* Check if should stop */
731 if (rto_opts.rto_should_stop)
732 return (err);
734 LOG(D_DEBUG, "[%d %d %d]", x0, x1, x2);
736 tgtidx[0] = x0 + raidz_parity(rm);
737 tgtidx[1] = x1 + raidz_parity(rm);
738 tgtidx[2] = x2 + raidz_parity(rm);
740 corrupt_colums(rm, tgtidx, 3);
742 if (!opts->rto_sanity)
743 vdev_raidz_reconstruct(rm,
744 tgtidx, 3);
746 if (cmp_data(opts, rm) != 0) {
747 err++;
748 LOG(D_DEBUG,
749 "\nREC D[%d %d %d]... "
750 "[FAIL]\n", x0, x1, x2);
756 return (err);
759 static int
760 run_rec_check(raidz_test_opts_t *opts)
762 char **impl_name;
763 unsigned fn, err = 0;
764 zio_t *zio_test;
765 raidz_map_t *rm_test;
767 err = init_raidz_golden_map(opts, PARITY_PQR);
768 if (0 != err)
769 return (err);
771 LOG(D_INFO, DBLSEP);
772 LOG(D_INFO, "Testing data reconstruction...\n");
774 for (impl_name = (char **)raidz_impl_names+1; *impl_name != NULL;
775 impl_name++) {
777 LOG(D_INFO, SEP);
778 LOG(D_INFO, "\tTesting [%s] implementation...", *impl_name);
780 if (vdev_raidz_impl_set(*impl_name) != 0) {
781 LOG(D_INFO, "[SKIP]\n");
782 continue;
783 } else
784 LOG(D_INFO, "[SUPPORTED]\n");
787 /* create suitable raidz_map */
788 rm_test = init_raidz_map(opts, &zio_test, PARITY_PQR);
789 /* generate parity */
790 vdev_raidz_generate_parity(rm_test);
792 for (fn = 0; fn < RAIDZ_REC_NUM; fn++) {
794 LOG(D_INFO, "\t\tTesting method [%s] ...",
795 raidz_rec_name[fn]);
797 if (run_rec_check_impl(opts, rm_test, fn) != 0) {
798 LOG(D_INFO, "[FAIL]\n");
799 err++;
801 } else
802 LOG(D_INFO, "[PASS]\n");
805 /* tear down test raidz_map */
806 fini_raidz_map(&zio_test, &rm_test);
809 fini_raidz_map(&opts->zio_golden, &opts->rm_golden);
811 return (err);
814 static int
815 run_test(raidz_test_opts_t *opts)
817 int err = 0;
819 if (opts == NULL)
820 opts = &rto_opts;
822 print_opts(opts, B_FALSE);
824 err |= run_gen_check(opts);
825 err |= run_rec_check(opts);
827 return (err);
830 #define SWEEP_RUNNING 0
831 #define SWEEP_FINISHED 1
832 #define SWEEP_ERROR 2
833 #define SWEEP_TIMEOUT 3
835 static int sweep_state = 0;
836 static raidz_test_opts_t failed_opts;
838 static kmutex_t sem_mtx;
839 static kcondvar_t sem_cv;
840 static int max_free_slots;
841 static int free_slots;
843 static _Noreturn void
844 sweep_thread(void *arg)
846 int err = 0;
847 raidz_test_opts_t *opts = (raidz_test_opts_t *)arg;
848 VERIFY(opts != NULL);
850 err = run_test(opts);
852 if (rto_opts.rto_sanity) {
853 /* 25% chance that a sweep test fails */
854 if (rand() < (RAND_MAX/4))
855 err = 1;
858 if (0 != err) {
859 mutex_enter(&sem_mtx);
860 memcpy(&failed_opts, opts, sizeof (raidz_test_opts_t));
861 sweep_state = SWEEP_ERROR;
862 mutex_exit(&sem_mtx);
865 umem_free(opts, sizeof (raidz_test_opts_t));
867 /* signal the next thread */
868 mutex_enter(&sem_mtx);
869 free_slots++;
870 cv_signal(&sem_cv);
871 mutex_exit(&sem_mtx);
873 thread_exit();
876 static int
877 run_sweep(void)
879 static const size_t dcols_v[] = { 1, 2, 3, 4, 5, 6, 7, 8, 12, 15, 16 };
880 static const size_t ashift_v[] = { 9, 12, 14 };
881 static const size_t size_v[] = { 1 << 9, 21 * (1 << 9), 13 * (1 << 12),
882 1 << 17, (1 << 20) - (1 << 12), SPA_MAXBLOCKSIZE };
884 (void) setvbuf(stdout, NULL, _IONBF, 0);
886 ulong_t total_comb = ARRAY_SIZE(size_v) * ARRAY_SIZE(ashift_v) *
887 ARRAY_SIZE(dcols_v);
888 ulong_t tried_comb = 0;
889 hrtime_t time_diff, start_time = gethrtime();
890 raidz_test_opts_t *opts;
891 int a, d, s;
893 max_free_slots = free_slots = MAX(2, boot_ncpus);
895 mutex_init(&sem_mtx, NULL, MUTEX_DEFAULT, NULL);
896 cv_init(&sem_cv, NULL, CV_DEFAULT, NULL);
898 for (s = 0; s < ARRAY_SIZE(size_v); s++)
899 for (a = 0; a < ARRAY_SIZE(ashift_v); a++)
900 for (d = 0; d < ARRAY_SIZE(dcols_v); d++) {
902 if (size_v[s] < (1 << ashift_v[a])) {
903 total_comb--;
904 continue;
907 if (++tried_comb % 20 == 0)
908 LOG(D_ALL, "%lu/%lu... ", tried_comb, total_comb);
910 /* wait for signal to start new thread */
911 mutex_enter(&sem_mtx);
912 while (cv_timedwait_sig(&sem_cv, &sem_mtx,
913 ddi_get_lbolt() + hz)) {
915 /* check if should stop the test (timeout) */
916 time_diff = (gethrtime() - start_time) / NANOSEC;
917 if (rto_opts.rto_sweep_timeout > 0 &&
918 time_diff >= rto_opts.rto_sweep_timeout) {
919 sweep_state = SWEEP_TIMEOUT;
920 rto_opts.rto_should_stop = B_TRUE;
921 mutex_exit(&sem_mtx);
922 goto exit;
925 /* check if should stop the test (error) */
926 if (sweep_state != SWEEP_RUNNING) {
927 mutex_exit(&sem_mtx);
928 goto exit;
931 /* exit loop if a slot is available */
932 if (free_slots > 0) {
933 break;
937 free_slots--;
938 mutex_exit(&sem_mtx);
940 opts = umem_zalloc(sizeof (raidz_test_opts_t), UMEM_NOFAIL);
941 opts->rto_ashift = ashift_v[a];
942 opts->rto_dcols = dcols_v[d];
943 opts->rto_offset = (1 << ashift_v[a]) * rand();
944 opts->rto_dsize = size_v[s];
945 opts->rto_expand = rto_opts.rto_expand;
946 opts->rto_expand_offset = rto_opts.rto_expand_offset;
947 opts->rto_v = 0; /* be quiet */
949 VERIFY3P(thread_create(NULL, 0, sweep_thread, (void *) opts,
950 0, NULL, TS_RUN, defclsyspri), !=, NULL);
953 exit:
954 LOG(D_ALL, "\nWaiting for test threads to finish...\n");
955 mutex_enter(&sem_mtx);
956 VERIFY(free_slots <= max_free_slots);
957 while (free_slots < max_free_slots) {
958 (void) cv_wait(&sem_cv, &sem_mtx);
960 mutex_exit(&sem_mtx);
962 if (sweep_state == SWEEP_ERROR) {
963 ERR("Sweep test failed! Failed option: \n");
964 print_opts(&failed_opts, B_TRUE);
965 } else {
966 if (sweep_state == SWEEP_TIMEOUT)
967 LOG(D_ALL, "Test timeout (%lus). Stopping...\n",
968 (ulong_t)rto_opts.rto_sweep_timeout);
970 LOG(D_ALL, "Sweep test succeeded on %lu raidz maps!\n",
971 (ulong_t)tried_comb);
974 mutex_destroy(&sem_mtx);
976 return (sweep_state == SWEEP_ERROR ? SWEEP_ERROR : 0);
981 main(int argc, char **argv)
983 size_t i;
984 struct sigaction action;
985 int err = 0;
987 /* init gdb pid string early */
988 (void) sprintf(pid_s, "%d", getpid());
990 action.sa_handler = sig_handler;
991 sigemptyset(&action.sa_mask);
992 action.sa_flags = 0;
994 if (sigaction(SIGSEGV, &action, NULL) < 0) {
995 ERR("raidz_test: cannot catch SIGSEGV: %s.\n", strerror(errno));
996 exit(EXIT_FAILURE);
999 (void) setvbuf(stdout, NULL, _IOLBF, 0);
1001 dprintf_setup(&argc, argv);
1003 process_options(argc, argv);
1005 kernel_init(SPA_MODE_READ);
1007 /* setup random data because rand() is not reentrant */
1008 rand_data = (int *)umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
1009 srand((unsigned)time(NULL) * getpid());
1010 for (i = 0; i < SPA_MAXBLOCKSIZE / sizeof (int); i++)
1011 rand_data[i] = rand();
1013 mprotect(rand_data, SPA_MAXBLOCKSIZE, PROT_READ);
1015 if (rto_opts.rto_benchmark) {
1016 run_raidz_benchmark();
1017 } else if (rto_opts.rto_sweep) {
1018 err = run_sweep();
1019 } else {
1020 err = run_test(NULL);
1023 umem_free(rand_data, SPA_MAXBLOCKSIZE);
1024 kernel_fini();
1026 return (err);