4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
26 #include <sys/zfs_context.h>
31 #include <sys/vdev_raidz.h>
32 #include <sys/vdev_raidz_impl.h>
35 #include "raidz_test.h"
37 static int *rand_data
;
38 raidz_test_opts_t rto_opts
;
40 static char pid_s
[16];
42 static void sig_handler(int signo
)
44 int old_errno
= errno
;
45 struct sigaction action
;
47 * Restore default action and re-raise signal so SIGSEGV and
48 * SIGABRT can trigger a core dump.
50 action
.sa_handler
= SIG_DFL
;
51 sigemptyset(&action
.sa_mask
);
53 (void) sigaction(signo
, &action
, NULL
);
55 if (rto_opts
.rto_gdb
) {
58 execlp("gdb", "gdb", "-ex", "set pagination 0",
62 while (waitpid(pid
, NULL
, 0) == -1 && errno
== EINTR
)
70 static void print_opts(raidz_test_opts_t
*opts
, boolean_t force
)
73 switch (opts
->rto_v
) {
86 if (force
|| opts
->rto_v
>= D_INFO
) {
87 (void) fprintf(stdout
, DBLSEP
"Running with options:\n"
88 " (-a) zio ashift : %zu\n"
89 " (-o) zio offset : 1 << %zu\n"
90 " (-e) expanded map : %s\n"
91 " (-r) reflow offset : %llx\n"
92 " (-d) number of raidz data columns : %zu\n"
93 " (-s) size of DATA : 1 << %zu\n"
94 " (-S) sweep parameters : %s \n"
95 " (-v) verbose : %s \n\n",
96 opts
->rto_ashift
, /* -a */
97 ilog2(opts
->rto_offset
), /* -o */
98 opts
->rto_expand
? "yes" : "no", /* -e */
99 (u_longlong_t
)opts
->rto_expand_offset
, /* -r */
100 opts
->rto_dcols
, /* -d */
101 ilog2(opts
->rto_dsize
), /* -s */
102 opts
->rto_sweep
? "yes" : "no", /* -S */
107 static void usage(boolean_t requested
)
109 const raidz_test_opts_t
*o
= &rto_opts_defaults
;
111 FILE *fp
= requested
? stdout
: stderr
;
113 (void) fprintf(fp
, "Usage:\n"
114 "\t[-a zio ashift (default: %zu)]\n"
115 "\t[-o zio offset, exponent radix 2 (default: %zu)]\n"
116 "\t[-d number of raidz data columns (default: %zu)]\n"
117 "\t[-s zio size, exponent radix 2 (default: %zu)]\n"
118 "\t[-S parameter sweep (default: %s)]\n"
119 "\t[-t timeout for parameter sweep test]\n"
120 "\t[-B benchmark all raidz implementations]\n"
121 "\t[-e use expanded raidz map (default: %s)]\n"
122 "\t[-r expanded raidz map reflow offset (default: %llx)]\n"
123 "\t[-v increase verbosity (default: %d)]\n"
124 "\t[-h (print help)]\n"
125 "\t[-T test the test, see if failure would be detected]\n"
126 "\t[-D debug (attach gdb on SIGSEGV)]\n"
128 o
->rto_ashift
, /* -a */
129 ilog2(o
->rto_offset
), /* -o */
130 o
->rto_dcols
, /* -d */
131 ilog2(o
->rto_dsize
), /* -s */
132 rto_opts
.rto_sweep
? "yes" : "no", /* -S */
133 rto_opts
.rto_expand
? "yes" : "no", /* -e */
134 (u_longlong_t
)o
->rto_expand_offset
, /* -r */
137 exit(requested
? 0 : 1);
140 static void process_options(int argc
, char **argv
)
145 raidz_test_opts_t
*o
= &rto_opts
;
147 bcopy(&rto_opts_defaults
, o
, sizeof (*o
));
149 while ((opt
= getopt(argc
, argv
, "TDBSvha:er:o:d:s:t:")) != -1) {
154 value
= strtoull(optarg
, NULL
, 0);
155 o
->rto_ashift
= MIN(13, MAX(9, value
));
161 o
->rto_expand_offset
= strtoull(optarg
, NULL
, 0);
164 value
= strtoull(optarg
, NULL
, 0);
165 o
->rto_offset
= ((1ULL << MIN(12, value
)) >> 9) << 9;
168 value
= strtoull(optarg
, NULL
, 0);
169 o
->rto_dcols
= MIN(255, MAX(1, value
));
172 value
= strtoull(optarg
, NULL
, 0);
173 o
->rto_dsize
= 1ULL << MIN(SPA_MAXBLOCKSHIFT
,
174 MAX(SPA_MINBLOCKSHIFT
, value
));
177 value
= strtoull(optarg
, NULL
, 0);
178 o
->rto_sweep_timeout
= value
;
187 o
->rto_benchmark
= 1;
206 #define DATA_COL(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_abd)
207 #define DATA_COL_SIZE(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_size)
209 #define CODE_COL(rr, i) ((rr)->rr_col[(i)].rc_abd)
210 #define CODE_COL_SIZE(rr, i) ((rr)->rr_col[(i)].rc_size)
213 cmp_code(raidz_test_opts_t
*opts
, const raidz_map_t
*rm
, const int parity
)
217 VERIFY(parity
>= 1 && parity
<= 3);
219 for (r
= 0; r
< rm
->rm_nrows
; r
++) {
220 raidz_row_t
* const rr
= rm
->rm_row
[r
];
221 raidz_row_t
* const rrg
= opts
->rm_golden
->rm_row
[r
];
222 for (i
= 0; i
< parity
; i
++) {
223 if (CODE_COL_SIZE(rrg
, i
) == 0) {
224 VERIFY0(CODE_COL_SIZE(rr
, i
));
228 if (abd_cmp(CODE_COL(rr
, i
),
229 CODE_COL(rrg
, i
)) != 0) {
231 LOG_OPT(D_DEBUG
, opts
,
232 "\nParity block [%d] different!\n", i
);
240 cmp_data(raidz_test_opts_t
*opts
, raidz_map_t
*rm
)
242 int r
, i
, dcols
, ret
= 0;
244 for (r
= 0; r
< rm
->rm_nrows
; r
++) {
245 raidz_row_t
*rr
= rm
->rm_row
[r
];
246 raidz_row_t
*rrg
= opts
->rm_golden
->rm_row
[r
];
247 dcols
= opts
->rm_golden
->rm_row
[0]->rr_cols
-
248 raidz_parity(opts
->rm_golden
);
249 for (i
= 0; i
< dcols
; i
++) {
250 if (DATA_COL_SIZE(rrg
, i
) == 0) {
251 VERIFY0(DATA_COL_SIZE(rr
, i
));
255 if (abd_cmp(DATA_COL(rrg
, i
),
256 DATA_COL(rr
, i
)) != 0) {
259 LOG_OPT(D_DEBUG
, opts
,
260 "\nData block [%d] different!\n", i
);
268 init_rand(void *data
, size_t size
, void *private)
271 memcpy(data
, rand_data
, size
);
276 corrupt_colums(raidz_map_t
*rm
, const int *tgts
, const int cnt
)
278 for (int r
= 0; r
< rm
->rm_nrows
; r
++) {
279 raidz_row_t
*rr
= rm
->rm_row
[r
];
280 for (int i
= 0; i
< cnt
; i
++) {
281 raidz_col_t
*col
= &rr
->rr_col
[tgts
[i
]];
282 abd_iterate_func(col
->rc_abd
, 0, col
->rc_size
,
289 init_zio_abd(zio_t
*zio
)
291 abd_iterate_func(zio
->io_abd
, 0, zio
->io_size
, init_rand
, NULL
);
295 fini_raidz_map(zio_t
**zio
, raidz_map_t
**rm
)
297 vdev_raidz_map_free(*rm
);
298 raidz_free((*zio
)->io_abd
, (*zio
)->io_size
);
299 umem_free(*zio
, sizeof (zio_t
));
306 init_raidz_golden_map(raidz_test_opts_t
*opts
, const int parity
)
310 raidz_map_t
*rm_test
;
311 const size_t total_ncols
= opts
->rto_dcols
+ parity
;
313 if (opts
->rm_golden
) {
314 fini_raidz_map(&opts
->zio_golden
, &opts
->rm_golden
);
317 opts
->zio_golden
= umem_zalloc(sizeof (zio_t
), UMEM_NOFAIL
);
318 zio_test
= umem_zalloc(sizeof (zio_t
), UMEM_NOFAIL
);
320 opts
->zio_golden
->io_offset
= zio_test
->io_offset
= opts
->rto_offset
;
321 opts
->zio_golden
->io_size
= zio_test
->io_size
= opts
->rto_dsize
;
323 opts
->zio_golden
->io_abd
= raidz_alloc(opts
->rto_dsize
);
324 zio_test
->io_abd
= raidz_alloc(opts
->rto_dsize
);
326 init_zio_abd(opts
->zio_golden
);
327 init_zio_abd(zio_test
);
329 VERIFY0(vdev_raidz_impl_set("original"));
331 if (opts
->rto_expand
) {
333 vdev_raidz_map_alloc_expanded(opts
->zio_golden
->io_abd
,
334 opts
->zio_golden
->io_size
, opts
->zio_golden
->io_offset
,
335 opts
->rto_ashift
, total_ncols
+1, total_ncols
,
336 parity
, opts
->rto_expand_offset
);
337 rm_test
= vdev_raidz_map_alloc_expanded(zio_test
->io_abd
,
338 zio_test
->io_size
, zio_test
->io_offset
,
339 opts
->rto_ashift
, total_ncols
+1, total_ncols
,
340 parity
, opts
->rto_expand_offset
);
342 opts
->rm_golden
= vdev_raidz_map_alloc(opts
->zio_golden
,
343 opts
->rto_ashift
, total_ncols
, parity
);
344 rm_test
= vdev_raidz_map_alloc(zio_test
,
345 opts
->rto_ashift
, total_ncols
, parity
);
348 VERIFY(opts
->zio_golden
);
349 VERIFY(opts
->rm_golden
);
351 vdev_raidz_generate_parity(opts
->rm_golden
);
352 vdev_raidz_generate_parity(rm_test
);
355 err
|= cmp_data(opts
, rm_test
);
356 err
|= cmp_code(opts
, rm_test
, parity
);
359 ERR("initializing the golden copy ... [FAIL]!\n");
361 /* tear down raidz_map of test zio */
362 fini_raidz_map(&zio_test
, &rm_test
);
368 * If reflow is not in progress, reflow_offset should be UINT64_MAX.
369 * For each row, if the row is entirely before reflow_offset, it will
370 * come from the new location. Otherwise this row will come from the
371 * old location. Therefore, rows that straddle the reflow_offset will
372 * come from the old location.
374 * NOTE: Until raidz expansion is implemented this function is only
375 * needed by raidz_test.c to the multi-row raid_map_t functionality.
378 vdev_raidz_map_alloc_expanded(abd_t
*abd
, uint64_t size
, uint64_t offset
,
379 uint64_t ashift
, uint64_t physical_cols
, uint64_t logical_cols
,
380 uint64_t nparity
, uint64_t reflow_offset
)
382 /* The zio's size in units of the vdev's minimum sector size. */
383 uint64_t s
= size
>> ashift
;
384 uint64_t q
, r
, bc
, devidx
, asize
= 0, tot
;
387 * "Quotient": The number of data sectors for this stripe on all but
388 * the "big column" child vdevs that also contain "remainder" data.
391 q
= s
/ (logical_cols
- nparity
);
394 * "Remainder": The number of partial stripe data sectors in this I/O.
395 * This will add a sector to some, but not all, child vdevs.
397 r
= s
- q
* (logical_cols
- nparity
);
399 /* The number of "big columns" - those which contain remainder data. */
400 bc
= (r
== 0 ? 0 : r
+ nparity
);
403 * The total number of data and parity sectors associated with
406 tot
= s
+ nparity
* (q
+ (r
== 0 ? 0 : 1));
408 /* How many rows contain data (not skip) */
409 uint64_t rows
= howmany(tot
, logical_cols
);
410 int cols
= MIN(tot
, logical_cols
);
412 raidz_map_t
*rm
= kmem_zalloc(offsetof(raidz_map_t
, rm_row
[rows
]),
416 for (uint64_t row
= 0; row
< rows
; row
++) {
417 raidz_row_t
*rr
= kmem_alloc(offsetof(raidz_row_t
,
418 rr_col
[cols
]), KM_SLEEP
);
419 rm
->rm_row
[row
] = rr
;
421 /* The starting RAIDZ (parent) vdev sector of the row. */
422 uint64_t b
= (offset
>> ashift
) + row
* logical_cols
;
425 * If we are in the middle of a reflow, and any part of this
426 * row has not been copied, then use the old location of
429 int row_phys_cols
= physical_cols
;
430 if (b
+ (logical_cols
- nparity
) > reflow_offset
>> ashift
)
433 /* starting child of this row */
434 uint64_t child_id
= b
% row_phys_cols
;
435 /* The starting byte offset on each child vdev. */
436 uint64_t child_offset
= (b
/ row_phys_cols
) << ashift
;
439 * We set cols to the entire width of the block, even
440 * if this row is shorter. This is needed because parity
441 * generation (for Q and R) needs to know the entire width,
442 * because it treats the short row as though it was
443 * full-width (and the "phantom" sectors were zero-filled).
445 * Another approach to this would be to set cols shorter
446 * (to just the number of columns that we might do i/o to)
447 * and have another mechanism to tell the parity generation
448 * about the "entire width". Reconstruction (at least
449 * vdev_raidz_reconstruct_general()) would also need to
450 * know about the "entire width".
454 rr
->rr_missingdata
= 0;
455 rr
->rr_missingparity
= 0;
456 rr
->rr_firstdatacol
= nparity
;
457 rr
->rr_abd_empty
= NULL
;
460 for (int c
= 0; c
< rr
->rr_cols
; c
++, child_id
++) {
461 if (child_id
>= row_phys_cols
) {
462 child_id
-= row_phys_cols
;
463 child_offset
+= 1ULL << ashift
;
465 rr
->rr_col
[c
].rc_devidx
= child_id
;
466 rr
->rr_col
[c
].rc_offset
= child_offset
;
467 rr
->rr_col
[c
].rc_orig_data
= NULL
;
468 rr
->rr_col
[c
].rc_error
= 0;
469 rr
->rr_col
[c
].rc_tried
= 0;
470 rr
->rr_col
[c
].rc_skipped
= 0;
471 rr
->rr_col
[c
].rc_need_orig_restore
= B_FALSE
;
473 uint64_t dc
= c
- rr
->rr_firstdatacol
;
474 if (c
< rr
->rr_firstdatacol
) {
475 rr
->rr_col
[c
].rc_size
= 1ULL << ashift
;
476 rr
->rr_col
[c
].rc_abd
=
477 abd_alloc_linear(rr
->rr_col
[c
].rc_size
,
479 } else if (row
== rows
- 1 && bc
!= 0 && c
>= bc
) {
481 * Past the end, this for parity generation.
483 rr
->rr_col
[c
].rc_size
= 0;
484 rr
->rr_col
[c
].rc_abd
= NULL
;
487 * "data column" (col excluding parity)
488 * Add an ASCII art diagram here
492 if (c
< bc
|| r
== 0) {
493 off
= dc
* rows
+ row
;
496 (dc
- r
) * (rows
- 1) + row
;
498 rr
->rr_col
[c
].rc_size
= 1ULL << ashift
;
499 rr
->rr_col
[c
].rc_abd
= abd_get_offset_struct(
500 &rr
->rr_col
[c
].rc_abdstruct
,
501 abd
, off
<< ashift
, 1 << ashift
);
504 asize
+= rr
->rr_col
[c
].rc_size
;
507 * If all data stored spans all columns, there's a danger that
508 * parity will always be on the same device and, since parity
509 * isn't read during normal operation, that that device's I/O
510 * bandwidth won't be used effectively. We therefore switch
511 * the parity every 1MB.
513 * ...at least that was, ostensibly, the theory. As a practical
514 * matter unless we juggle the parity between all devices
515 * evenly, we won't see any benefit. Further, occasional writes
516 * that aren't a multiple of the LCM of the number of children
517 * and the minimum stripe width are sufficient to avoid pessimal
518 * behavior. Unfortunately, this decision created an implicit
519 * on-disk format requirement that we need to support for all
520 * eternity, but only for single-parity RAID-Z.
522 * If we intend to skip a sector in the zeroth column for
523 * padding we must make sure to note this swap. We will never
524 * intend to skip the first column since at least one data and
525 * one parity column must appear in each row.
527 if (rr
->rr_firstdatacol
== 1 && rr
->rr_cols
> 1 &&
528 (offset
& (1ULL << 20))) {
529 ASSERT(rr
->rr_cols
>= 2);
530 ASSERT(rr
->rr_col
[0].rc_size
== rr
->rr_col
[1].rc_size
);
531 devidx
= rr
->rr_col
[0].rc_devidx
;
532 uint64_t o
= rr
->rr_col
[0].rc_offset
;
533 rr
->rr_col
[0].rc_devidx
= rr
->rr_col
[1].rc_devidx
;
534 rr
->rr_col
[0].rc_offset
= rr
->rr_col
[1].rc_offset
;
535 rr
->rr_col
[1].rc_devidx
= devidx
;
536 rr
->rr_col
[1].rc_offset
= o
;
540 ASSERT3U(asize
, ==, tot
<< ashift
);
542 /* init RAIDZ parity ops */
543 rm
->rm_ops
= vdev_raidz_math_get_ops();
549 init_raidz_map(raidz_test_opts_t
*opts
, zio_t
**zio
, const int parity
)
551 raidz_map_t
*rm
= NULL
;
552 const size_t alloc_dsize
= opts
->rto_dsize
;
553 const size_t total_ncols
= opts
->rto_dcols
+ parity
;
554 const int ccols
[] = { 0, 1, 2 };
557 VERIFY(parity
<= 3 && parity
>= 1);
559 *zio
= umem_zalloc(sizeof (zio_t
), UMEM_NOFAIL
);
561 (*zio
)->io_offset
= 0;
562 (*zio
)->io_size
= alloc_dsize
;
563 (*zio
)->io_abd
= raidz_alloc(alloc_dsize
);
566 if (opts
->rto_expand
) {
567 rm
= vdev_raidz_map_alloc_expanded((*zio
)->io_abd
,
568 (*zio
)->io_size
, (*zio
)->io_offset
,
569 opts
->rto_ashift
, total_ncols
+1, total_ncols
,
570 parity
, opts
->rto_expand_offset
);
572 rm
= vdev_raidz_map_alloc(*zio
, opts
->rto_ashift
,
573 total_ncols
, parity
);
577 /* Make sure code columns are destroyed */
578 corrupt_colums(rm
, ccols
, parity
);
584 run_gen_check(raidz_test_opts_t
*opts
)
589 raidz_map_t
*rm_test
;
591 err
= init_raidz_golden_map(opts
, PARITY_PQR
);
596 LOG(D_INFO
, "Testing parity generation...\n");
598 for (impl_name
= (char **)raidz_impl_names
+1; *impl_name
!= NULL
;
602 LOG(D_INFO
, "\tTesting [%s] implementation...", *impl_name
);
604 if (0 != vdev_raidz_impl_set(*impl_name
)) {
605 LOG(D_INFO
, "[SKIP]\n");
608 LOG(D_INFO
, "[SUPPORTED]\n");
611 for (fn
= 0; fn
< RAIDZ_GEN_NUM
; fn
++) {
613 /* Check if should stop */
614 if (rto_opts
.rto_should_stop
)
617 /* create suitable raidz_map */
618 rm_test
= init_raidz_map(opts
, &zio_test
, fn
+1);
621 LOG(D_INFO
, "\t\tTesting method [%s] ...",
624 if (!opts
->rto_sanity
)
625 vdev_raidz_generate_parity(rm_test
);
627 if (cmp_code(opts
, rm_test
, fn
+1) != 0) {
628 LOG(D_INFO
, "[FAIL]\n");
631 LOG(D_INFO
, "[PASS]\n");
633 fini_raidz_map(&zio_test
, &rm_test
);
637 fini_raidz_map(&opts
->zio_golden
, &opts
->rm_golden
);
643 run_rec_check_impl(raidz_test_opts_t
*opts
, raidz_map_t
*rm
, const int fn
)
648 static const int rec_tgts
[7][3] = {
649 {1, 2, 3}, /* rec_p: bad QR & D[0] */
650 {0, 2, 3}, /* rec_q: bad PR & D[0] */
651 {0, 1, 3}, /* rec_r: bad PQ & D[0] */
652 {2, 3, 4}, /* rec_pq: bad R & D[0][1] */
653 {1, 3, 4}, /* rec_pr: bad Q & D[0][1] */
654 {0, 3, 4}, /* rec_qr: bad P & D[0][1] */
655 {3, 4, 5} /* rec_pqr: bad & D[0][1][2] */
658 memcpy(tgtidx
, rec_tgts
[fn
], sizeof (tgtidx
));
660 if (fn
< RAIDZ_REC_PQ
) {
661 /* can reconstruct 1 failed data disk */
662 for (x0
= 0; x0
< opts
->rto_dcols
; x0
++) {
663 if (x0
>= rm
->rm_row
[0]->rr_cols
- raidz_parity(rm
))
666 /* Check if should stop */
667 if (rto_opts
.rto_should_stop
)
670 LOG(D_DEBUG
, "[%d] ", x0
);
672 tgtidx
[2] = x0
+ raidz_parity(rm
);
674 corrupt_colums(rm
, tgtidx
+2, 1);
676 if (!opts
->rto_sanity
)
677 vdev_raidz_reconstruct(rm
, tgtidx
, 3);
679 if (cmp_data(opts
, rm
) != 0) {
681 LOG(D_DEBUG
, "\nREC D[%d]... [FAIL]\n", x0
);
685 } else if (fn
< RAIDZ_REC_PQR
) {
686 /* can reconstruct 2 failed data disk */
687 for (x0
= 0; x0
< opts
->rto_dcols
; x0
++) {
688 if (x0
>= rm
->rm_row
[0]->rr_cols
- raidz_parity(rm
))
690 for (x1
= x0
+ 1; x1
< opts
->rto_dcols
; x1
++) {
691 if (x1
>= rm
->rm_row
[0]->rr_cols
-
695 /* Check if should stop */
696 if (rto_opts
.rto_should_stop
)
699 LOG(D_DEBUG
, "[%d %d] ", x0
, x1
);
701 tgtidx
[1] = x0
+ raidz_parity(rm
);
702 tgtidx
[2] = x1
+ raidz_parity(rm
);
704 corrupt_colums(rm
, tgtidx
+1, 2);
706 if (!opts
->rto_sanity
)
707 vdev_raidz_reconstruct(rm
, tgtidx
, 3);
709 if (cmp_data(opts
, rm
) != 0) {
711 LOG(D_DEBUG
, "\nREC D[%d %d]... "
717 /* can reconstruct 3 failed data disk */
718 for (x0
= 0; x0
< opts
->rto_dcols
; x0
++) {
719 if (x0
>= rm
->rm_row
[0]->rr_cols
- raidz_parity(rm
))
721 for (x1
= x0
+ 1; x1
< opts
->rto_dcols
; x1
++) {
722 if (x1
>= rm
->rm_row
[0]->rr_cols
-
725 for (x2
= x1
+ 1; x2
< opts
->rto_dcols
; x2
++) {
726 if (x2
>= rm
->rm_row
[0]->rr_cols
-
730 /* Check if should stop */
731 if (rto_opts
.rto_should_stop
)
734 LOG(D_DEBUG
, "[%d %d %d]", x0
, x1
, x2
);
736 tgtidx
[0] = x0
+ raidz_parity(rm
);
737 tgtidx
[1] = x1
+ raidz_parity(rm
);
738 tgtidx
[2] = x2
+ raidz_parity(rm
);
740 corrupt_colums(rm
, tgtidx
, 3);
742 if (!opts
->rto_sanity
)
743 vdev_raidz_reconstruct(rm
,
746 if (cmp_data(opts
, rm
) != 0) {
749 "\nREC D[%d %d %d]... "
750 "[FAIL]\n", x0
, x1
, x2
);
760 run_rec_check(raidz_test_opts_t
*opts
)
763 unsigned fn
, err
= 0;
765 raidz_map_t
*rm_test
;
767 err
= init_raidz_golden_map(opts
, PARITY_PQR
);
772 LOG(D_INFO
, "Testing data reconstruction...\n");
774 for (impl_name
= (char **)raidz_impl_names
+1; *impl_name
!= NULL
;
778 LOG(D_INFO
, "\tTesting [%s] implementation...", *impl_name
);
780 if (vdev_raidz_impl_set(*impl_name
) != 0) {
781 LOG(D_INFO
, "[SKIP]\n");
784 LOG(D_INFO
, "[SUPPORTED]\n");
787 /* create suitable raidz_map */
788 rm_test
= init_raidz_map(opts
, &zio_test
, PARITY_PQR
);
789 /* generate parity */
790 vdev_raidz_generate_parity(rm_test
);
792 for (fn
= 0; fn
< RAIDZ_REC_NUM
; fn
++) {
794 LOG(D_INFO
, "\t\tTesting method [%s] ...",
797 if (run_rec_check_impl(opts
, rm_test
, fn
) != 0) {
798 LOG(D_INFO
, "[FAIL]\n");
802 LOG(D_INFO
, "[PASS]\n");
805 /* tear down test raidz_map */
806 fini_raidz_map(&zio_test
, &rm_test
);
809 fini_raidz_map(&opts
->zio_golden
, &opts
->rm_golden
);
815 run_test(raidz_test_opts_t
*opts
)
822 print_opts(opts
, B_FALSE
);
824 err
|= run_gen_check(opts
);
825 err
|= run_rec_check(opts
);
830 #define SWEEP_RUNNING 0
831 #define SWEEP_FINISHED 1
832 #define SWEEP_ERROR 2
833 #define SWEEP_TIMEOUT 3
835 static int sweep_state
= 0;
836 static raidz_test_opts_t failed_opts
;
838 static kmutex_t sem_mtx
;
839 static kcondvar_t sem_cv
;
840 static int max_free_slots
;
841 static int free_slots
;
843 static _Noreturn
void
844 sweep_thread(void *arg
)
847 raidz_test_opts_t
*opts
= (raidz_test_opts_t
*)arg
;
848 VERIFY(opts
!= NULL
);
850 err
= run_test(opts
);
852 if (rto_opts
.rto_sanity
) {
853 /* 25% chance that a sweep test fails */
854 if (rand() < (RAND_MAX
/4))
859 mutex_enter(&sem_mtx
);
860 memcpy(&failed_opts
, opts
, sizeof (raidz_test_opts_t
));
861 sweep_state
= SWEEP_ERROR
;
862 mutex_exit(&sem_mtx
);
865 umem_free(opts
, sizeof (raidz_test_opts_t
));
867 /* signal the next thread */
868 mutex_enter(&sem_mtx
);
871 mutex_exit(&sem_mtx
);
879 static const size_t dcols_v
[] = { 1, 2, 3, 4, 5, 6, 7, 8, 12, 15, 16 };
880 static const size_t ashift_v
[] = { 9, 12, 14 };
881 static const size_t size_v
[] = { 1 << 9, 21 * (1 << 9), 13 * (1 << 12),
882 1 << 17, (1 << 20) - (1 << 12), SPA_MAXBLOCKSIZE
};
884 (void) setvbuf(stdout
, NULL
, _IONBF
, 0);
886 ulong_t total_comb
= ARRAY_SIZE(size_v
) * ARRAY_SIZE(ashift_v
) *
888 ulong_t tried_comb
= 0;
889 hrtime_t time_diff
, start_time
= gethrtime();
890 raidz_test_opts_t
*opts
;
893 max_free_slots
= free_slots
= MAX(2, boot_ncpus
);
895 mutex_init(&sem_mtx
, NULL
, MUTEX_DEFAULT
, NULL
);
896 cv_init(&sem_cv
, NULL
, CV_DEFAULT
, NULL
);
898 for (s
= 0; s
< ARRAY_SIZE(size_v
); s
++)
899 for (a
= 0; a
< ARRAY_SIZE(ashift_v
); a
++)
900 for (d
= 0; d
< ARRAY_SIZE(dcols_v
); d
++) {
902 if (size_v
[s
] < (1 << ashift_v
[a
])) {
907 if (++tried_comb
% 20 == 0)
908 LOG(D_ALL
, "%lu/%lu... ", tried_comb
, total_comb
);
910 /* wait for signal to start new thread */
911 mutex_enter(&sem_mtx
);
912 while (cv_timedwait_sig(&sem_cv
, &sem_mtx
,
913 ddi_get_lbolt() + hz
)) {
915 /* check if should stop the test (timeout) */
916 time_diff
= (gethrtime() - start_time
) / NANOSEC
;
917 if (rto_opts
.rto_sweep_timeout
> 0 &&
918 time_diff
>= rto_opts
.rto_sweep_timeout
) {
919 sweep_state
= SWEEP_TIMEOUT
;
920 rto_opts
.rto_should_stop
= B_TRUE
;
921 mutex_exit(&sem_mtx
);
925 /* check if should stop the test (error) */
926 if (sweep_state
!= SWEEP_RUNNING
) {
927 mutex_exit(&sem_mtx
);
931 /* exit loop if a slot is available */
932 if (free_slots
> 0) {
938 mutex_exit(&sem_mtx
);
940 opts
= umem_zalloc(sizeof (raidz_test_opts_t
), UMEM_NOFAIL
);
941 opts
->rto_ashift
= ashift_v
[a
];
942 opts
->rto_dcols
= dcols_v
[d
];
943 opts
->rto_offset
= (1 << ashift_v
[a
]) * rand();
944 opts
->rto_dsize
= size_v
[s
];
945 opts
->rto_expand
= rto_opts
.rto_expand
;
946 opts
->rto_expand_offset
= rto_opts
.rto_expand_offset
;
947 opts
->rto_v
= 0; /* be quiet */
949 VERIFY3P(thread_create(NULL
, 0, sweep_thread
, (void *) opts
,
950 0, NULL
, TS_RUN
, defclsyspri
), !=, NULL
);
954 LOG(D_ALL
, "\nWaiting for test threads to finish...\n");
955 mutex_enter(&sem_mtx
);
956 VERIFY(free_slots
<= max_free_slots
);
957 while (free_slots
< max_free_slots
) {
958 (void) cv_wait(&sem_cv
, &sem_mtx
);
960 mutex_exit(&sem_mtx
);
962 if (sweep_state
== SWEEP_ERROR
) {
963 ERR("Sweep test failed! Failed option: \n");
964 print_opts(&failed_opts
, B_TRUE
);
966 if (sweep_state
== SWEEP_TIMEOUT
)
967 LOG(D_ALL
, "Test timeout (%lus). Stopping...\n",
968 (ulong_t
)rto_opts
.rto_sweep_timeout
);
970 LOG(D_ALL
, "Sweep test succeeded on %lu raidz maps!\n",
971 (ulong_t
)tried_comb
);
974 mutex_destroy(&sem_mtx
);
976 return (sweep_state
== SWEEP_ERROR
? SWEEP_ERROR
: 0);
981 main(int argc
, char **argv
)
984 struct sigaction action
;
987 /* init gdb pid string early */
988 (void) sprintf(pid_s
, "%d", getpid());
990 action
.sa_handler
= sig_handler
;
991 sigemptyset(&action
.sa_mask
);
994 if (sigaction(SIGSEGV
, &action
, NULL
) < 0) {
995 ERR("raidz_test: cannot catch SIGSEGV: %s.\n", strerror(errno
));
999 (void) setvbuf(stdout
, NULL
, _IOLBF
, 0);
1001 dprintf_setup(&argc
, argv
);
1003 process_options(argc
, argv
);
1005 kernel_init(SPA_MODE_READ
);
1007 /* setup random data because rand() is not reentrant */
1008 rand_data
= (int *)umem_alloc(SPA_MAXBLOCKSIZE
, UMEM_NOFAIL
);
1009 srand((unsigned)time(NULL
) * getpid());
1010 for (i
= 0; i
< SPA_MAXBLOCKSIZE
/ sizeof (int); i
++)
1011 rand_data
[i
] = rand();
1013 mprotect(rand_data
, SPA_MAXBLOCKSIZE
, PROT_READ
);
1015 if (rto_opts
.rto_benchmark
) {
1016 run_raidz_benchmark();
1017 } else if (rto_opts
.rto_sweep
) {
1020 err
= run_test(NULL
);
1023 umem_free(rand_data
, SPA_MAXBLOCKSIZE
);