4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
26 #include <sys/zfs_context.h>
31 #include <sys/vdev_raidz.h>
32 #include <sys/vdev_raidz_impl.h>
35 #include "raidz_test.h"
37 static int *rand_data
;
38 raidz_test_opts_t rto_opts
;
41 static const char gdb_tmpl
[] = "gdb -ex \"set pagination 0\" -p %d";
43 static void sig_handler(int signo
)
45 struct sigaction action
;
47 * Restore default action and re-raise signal so SIGSEGV and
48 * SIGABRT can trigger a core dump.
50 action
.sa_handler
= SIG_DFL
;
51 sigemptyset(&action
.sa_mask
);
53 (void) sigaction(signo
, &action
, NULL
);
61 static void print_opts(raidz_test_opts_t
*opts
, boolean_t force
)
64 switch (opts
->rto_v
) {
76 if (force
|| opts
->rto_v
>= D_INFO
) {
77 (void) fprintf(stdout
, DBLSEP
"Running with options:\n"
78 " (-a) zio ashift : %zu\n"
79 " (-o) zio offset : 1 << %zu\n"
80 " (-e) expanded map : %s\n"
81 " (-r) reflow offset : %llx\n"
82 " (-d) number of raidz data columns : %zu\n"
83 " (-s) size of DATA : 1 << %zu\n"
84 " (-S) sweep parameters : %s \n"
85 " (-v) verbose : %s \n\n",
86 opts
->rto_ashift
, /* -a */
87 ilog2(opts
->rto_offset
), /* -o */
88 opts
->rto_expand
? "yes" : "no", /* -e */
89 (u_longlong_t
)opts
->rto_expand_offset
, /* -r */
90 opts
->rto_dcols
, /* -d */
91 ilog2(opts
->rto_dsize
), /* -s */
92 opts
->rto_sweep
? "yes" : "no", /* -S */
97 static void usage(boolean_t requested
)
99 const raidz_test_opts_t
*o
= &rto_opts_defaults
;
101 FILE *fp
= requested
? stdout
: stderr
;
103 (void) fprintf(fp
, "Usage:\n"
104 "\t[-a zio ashift (default: %zu)]\n"
105 "\t[-o zio offset, exponent radix 2 (default: %zu)]\n"
106 "\t[-d number of raidz data columns (default: %zu)]\n"
107 "\t[-s zio size, exponent radix 2 (default: %zu)]\n"
108 "\t[-S parameter sweep (default: %s)]\n"
109 "\t[-t timeout for parameter sweep test]\n"
110 "\t[-B benchmark all raidz implementations]\n"
111 "\t[-e use expanded raidz map (default: %s)]\n"
112 "\t[-r expanded raidz map reflow offset (default: %llx)]\n"
113 "\t[-v increase verbosity (default: %zu)]\n"
114 "\t[-h (print help)]\n"
115 "\t[-T test the test, see if failure would be detected]\n"
116 "\t[-D debug (attach gdb on SIGSEGV)]\n"
118 o
->rto_ashift
, /* -a */
119 ilog2(o
->rto_offset
), /* -o */
120 o
->rto_dcols
, /* -d */
121 ilog2(o
->rto_dsize
), /* -s */
122 rto_opts
.rto_sweep
? "yes" : "no", /* -S */
123 rto_opts
.rto_expand
? "yes" : "no", /* -e */
124 (u_longlong_t
)o
->rto_expand_offset
, /* -r */
127 exit(requested
? 0 : 1);
130 static void process_options(int argc
, char **argv
)
135 raidz_test_opts_t
*o
= &rto_opts
;
137 bcopy(&rto_opts_defaults
, o
, sizeof (*o
));
139 while ((opt
= getopt(argc
, argv
, "TDBSvha:er:o:d:s:t:")) != -1) {
144 value
= strtoull(optarg
, NULL
, 0);
145 o
->rto_ashift
= MIN(13, MAX(9, value
));
151 o
->rto_expand_offset
= strtoull(optarg
, NULL
, 0);
154 value
= strtoull(optarg
, NULL
, 0);
155 o
->rto_offset
= ((1ULL << MIN(12, value
)) >> 9) << 9;
158 value
= strtoull(optarg
, NULL
, 0);
159 o
->rto_dcols
= MIN(255, MAX(1, value
));
162 value
= strtoull(optarg
, NULL
, 0);
163 o
->rto_dsize
= 1ULL << MIN(SPA_MAXBLOCKSHIFT
,
164 MAX(SPA_MINBLOCKSHIFT
, value
));
167 value
= strtoull(optarg
, NULL
, 0);
168 o
->rto_sweep_timeout
= value
;
177 o
->rto_benchmark
= 1;
196 #define DATA_COL(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_abd)
197 #define DATA_COL_SIZE(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_size)
199 #define CODE_COL(rr, i) ((rr)->rr_col[(i)].rc_abd)
200 #define CODE_COL_SIZE(rr, i) ((rr)->rr_col[(i)].rc_size)
203 cmp_code(raidz_test_opts_t
*opts
, const raidz_map_t
*rm
, const int parity
)
207 VERIFY(parity
>= 1 && parity
<= 3);
209 for (r
= 0; r
< rm
->rm_nrows
; r
++) {
210 raidz_row_t
* const rr
= rm
->rm_row
[r
];
211 raidz_row_t
* const rrg
= opts
->rm_golden
->rm_row
[r
];
212 for (i
= 0; i
< parity
; i
++) {
213 if (CODE_COL_SIZE(rrg
, i
) == 0) {
214 VERIFY0(CODE_COL_SIZE(rr
, i
));
218 if (abd_cmp(CODE_COL(rr
, i
),
219 CODE_COL(rrg
, i
)) != 0) {
221 LOG_OPT(D_DEBUG
, opts
,
222 "\nParity block [%d] different!\n", i
);
230 cmp_data(raidz_test_opts_t
*opts
, raidz_map_t
*rm
)
232 int r
, i
, dcols
, ret
= 0;
234 for (r
= 0; r
< rm
->rm_nrows
; r
++) {
235 raidz_row_t
*rr
= rm
->rm_row
[r
];
236 raidz_row_t
*rrg
= opts
->rm_golden
->rm_row
[r
];
237 dcols
= opts
->rm_golden
->rm_row
[0]->rr_cols
-
238 raidz_parity(opts
->rm_golden
);
239 for (i
= 0; i
< dcols
; i
++) {
240 if (DATA_COL_SIZE(rrg
, i
) == 0) {
241 VERIFY0(DATA_COL_SIZE(rr
, i
));
245 if (abd_cmp(DATA_COL(rrg
, i
),
246 DATA_COL(rr
, i
)) != 0) {
249 LOG_OPT(D_DEBUG
, opts
,
250 "\nData block [%d] different!\n", i
);
258 init_rand(void *data
, size_t size
, void *private)
261 int *dst
= (int *)data
;
263 for (i
= 0; i
< size
/ sizeof (int); i
++)
264 dst
[i
] = rand_data
[i
];
270 corrupt_colums(raidz_map_t
*rm
, const int *tgts
, const int cnt
)
272 for (int r
= 0; r
< rm
->rm_nrows
; r
++) {
273 raidz_row_t
*rr
= rm
->rm_row
[r
];
274 for (int i
= 0; i
< cnt
; i
++) {
275 raidz_col_t
*col
= &rr
->rr_col
[tgts
[i
]];
276 abd_iterate_func(col
->rc_abd
, 0, col
->rc_size
,
283 init_zio_abd(zio_t
*zio
)
285 abd_iterate_func(zio
->io_abd
, 0, zio
->io_size
, init_rand
, NULL
);
289 fini_raidz_map(zio_t
**zio
, raidz_map_t
**rm
)
291 vdev_raidz_map_free(*rm
);
292 raidz_free((*zio
)->io_abd
, (*zio
)->io_size
);
293 umem_free(*zio
, sizeof (zio_t
));
300 init_raidz_golden_map(raidz_test_opts_t
*opts
, const int parity
)
304 raidz_map_t
*rm_test
;
305 const size_t total_ncols
= opts
->rto_dcols
+ parity
;
307 if (opts
->rm_golden
) {
308 fini_raidz_map(&opts
->zio_golden
, &opts
->rm_golden
);
311 opts
->zio_golden
= umem_zalloc(sizeof (zio_t
), UMEM_NOFAIL
);
312 zio_test
= umem_zalloc(sizeof (zio_t
), UMEM_NOFAIL
);
314 opts
->zio_golden
->io_offset
= zio_test
->io_offset
= opts
->rto_offset
;
315 opts
->zio_golden
->io_size
= zio_test
->io_size
= opts
->rto_dsize
;
317 opts
->zio_golden
->io_abd
= raidz_alloc(opts
->rto_dsize
);
318 zio_test
->io_abd
= raidz_alloc(opts
->rto_dsize
);
320 init_zio_abd(opts
->zio_golden
);
321 init_zio_abd(zio_test
);
323 VERIFY0(vdev_raidz_impl_set("original"));
325 if (opts
->rto_expand
) {
327 vdev_raidz_map_alloc_expanded(opts
->zio_golden
->io_abd
,
328 opts
->zio_golden
->io_size
, opts
->zio_golden
->io_offset
,
329 opts
->rto_ashift
, total_ncols
+1, total_ncols
,
330 parity
, opts
->rto_expand_offset
);
331 rm_test
= vdev_raidz_map_alloc_expanded(zio_test
->io_abd
,
332 zio_test
->io_size
, zio_test
->io_offset
,
333 opts
->rto_ashift
, total_ncols
+1, total_ncols
,
334 parity
, opts
->rto_expand_offset
);
336 opts
->rm_golden
= vdev_raidz_map_alloc(opts
->zio_golden
,
337 opts
->rto_ashift
, total_ncols
, parity
);
338 rm_test
= vdev_raidz_map_alloc(zio_test
,
339 opts
->rto_ashift
, total_ncols
, parity
);
342 VERIFY(opts
->zio_golden
);
343 VERIFY(opts
->rm_golden
);
345 vdev_raidz_generate_parity(opts
->rm_golden
);
346 vdev_raidz_generate_parity(rm_test
);
349 err
|= cmp_data(opts
, rm_test
);
350 err
|= cmp_code(opts
, rm_test
, parity
);
353 ERR("initializing the golden copy ... [FAIL]!\n");
355 /* tear down raidz_map of test zio */
356 fini_raidz_map(&zio_test
, &rm_test
);
362 * If reflow is not in progress, reflow_offset should be UINT64_MAX.
363 * For each row, if the row is entirely before reflow_offset, it will
364 * come from the new location. Otherwise this row will come from the
365 * old location. Therefore, rows that straddle the reflow_offset will
366 * come from the old location.
368 * NOTE: Until raidz expansion is implemented this function is only
369 * needed by raidz_test.c to the multi-row raid_map_t functionality.
372 vdev_raidz_map_alloc_expanded(abd_t
*abd
, uint64_t size
, uint64_t offset
,
373 uint64_t ashift
, uint64_t physical_cols
, uint64_t logical_cols
,
374 uint64_t nparity
, uint64_t reflow_offset
)
376 /* The zio's size in units of the vdev's minimum sector size. */
377 uint64_t s
= size
>> ashift
;
378 uint64_t q
, r
, bc
, devidx
, asize
= 0, tot
;
381 * "Quotient": The number of data sectors for this stripe on all but
382 * the "big column" child vdevs that also contain "remainder" data.
385 q
= s
/ (logical_cols
- nparity
);
388 * "Remainder": The number of partial stripe data sectors in this I/O.
389 * This will add a sector to some, but not all, child vdevs.
391 r
= s
- q
* (logical_cols
- nparity
);
393 /* The number of "big columns" - those which contain remainder data. */
394 bc
= (r
== 0 ? 0 : r
+ nparity
);
397 * The total number of data and parity sectors associated with
400 tot
= s
+ nparity
* (q
+ (r
== 0 ? 0 : 1));
402 /* How many rows contain data (not skip) */
403 uint64_t rows
= howmany(tot
, logical_cols
);
404 int cols
= MIN(tot
, logical_cols
);
406 raidz_map_t
*rm
= kmem_zalloc(offsetof(raidz_map_t
, rm_row
[rows
]),
410 for (uint64_t row
= 0; row
< rows
; row
++) {
411 raidz_row_t
*rr
= kmem_alloc(offsetof(raidz_row_t
,
412 rr_col
[cols
]), KM_SLEEP
);
413 rm
->rm_row
[row
] = rr
;
415 /* The starting RAIDZ (parent) vdev sector of the row. */
416 uint64_t b
= (offset
>> ashift
) + row
* logical_cols
;
419 * If we are in the middle of a reflow, and any part of this
420 * row has not been copied, then use the old location of
423 int row_phys_cols
= physical_cols
;
424 if (b
+ (logical_cols
- nparity
) > reflow_offset
>> ashift
)
427 /* starting child of this row */
428 uint64_t child_id
= b
% row_phys_cols
;
429 /* The starting byte offset on each child vdev. */
430 uint64_t child_offset
= (b
/ row_phys_cols
) << ashift
;
433 * We set cols to the entire width of the block, even
434 * if this row is shorter. This is needed because parity
435 * generation (for Q and R) needs to know the entire width,
436 * because it treats the short row as though it was
437 * full-width (and the "phantom" sectors were zero-filled).
439 * Another approach to this would be to set cols shorter
440 * (to just the number of columns that we might do i/o to)
441 * and have another mechanism to tell the parity generation
442 * about the "entire width". Reconstruction (at least
443 * vdev_raidz_reconstruct_general()) would also need to
444 * know about the "entire width".
448 rr
->rr_missingdata
= 0;
449 rr
->rr_missingparity
= 0;
450 rr
->rr_firstdatacol
= nparity
;
451 rr
->rr_abd_copy
= NULL
;
452 rr
->rr_abd_empty
= NULL
;
455 for (int c
= 0; c
< rr
->rr_cols
; c
++, child_id
++) {
456 if (child_id
>= row_phys_cols
) {
457 child_id
-= row_phys_cols
;
458 child_offset
+= 1ULL << ashift
;
460 rr
->rr_col
[c
].rc_devidx
= child_id
;
461 rr
->rr_col
[c
].rc_offset
= child_offset
;
462 rr
->rr_col
[c
].rc_gdata
= NULL
;
463 rr
->rr_col
[c
].rc_orig_data
= NULL
;
464 rr
->rr_col
[c
].rc_error
= 0;
465 rr
->rr_col
[c
].rc_tried
= 0;
466 rr
->rr_col
[c
].rc_skipped
= 0;
467 rr
->rr_col
[c
].rc_need_orig_restore
= B_FALSE
;
469 uint64_t dc
= c
- rr
->rr_firstdatacol
;
470 if (c
< rr
->rr_firstdatacol
) {
471 rr
->rr_col
[c
].rc_size
= 1ULL << ashift
;
472 rr
->rr_col
[c
].rc_abd
=
473 abd_alloc_linear(rr
->rr_col
[c
].rc_size
,
475 } else if (row
== rows
- 1 && bc
!= 0 && c
>= bc
) {
477 * Past the end, this for parity generation.
479 rr
->rr_col
[c
].rc_size
= 0;
480 rr
->rr_col
[c
].rc_abd
= NULL
;
483 * "data column" (col excluding parity)
484 * Add an ASCII art diagram here
488 if (c
< bc
|| r
== 0) {
489 off
= dc
* rows
+ row
;
492 (dc
- r
) * (rows
- 1) + row
;
494 rr
->rr_col
[c
].rc_size
= 1ULL << ashift
;
495 rr
->rr_col
[c
].rc_abd
= abd_get_offset_struct(
496 &rr
->rr_col
[c
].rc_abdstruct
,
497 abd
, off
<< ashift
, 1 << ashift
);
500 asize
+= rr
->rr_col
[c
].rc_size
;
503 * If all data stored spans all columns, there's a danger that
504 * parity will always be on the same device and, since parity
505 * isn't read during normal operation, that that device's I/O
506 * bandwidth won't be used effectively. We therefore switch
507 * the parity every 1MB.
509 * ...at least that was, ostensibly, the theory. As a practical
510 * matter unless we juggle the parity between all devices
511 * evenly, we won't see any benefit. Further, occasional writes
512 * that aren't a multiple of the LCM of the number of children
513 * and the minimum stripe width are sufficient to avoid pessimal
514 * behavior. Unfortunately, this decision created an implicit
515 * on-disk format requirement that we need to support for all
516 * eternity, but only for single-parity RAID-Z.
518 * If we intend to skip a sector in the zeroth column for
519 * padding we must make sure to note this swap. We will never
520 * intend to skip the first column since at least one data and
521 * one parity column must appear in each row.
523 if (rr
->rr_firstdatacol
== 1 && rr
->rr_cols
> 1 &&
524 (offset
& (1ULL << 20))) {
525 ASSERT(rr
->rr_cols
>= 2);
526 ASSERT(rr
->rr_col
[0].rc_size
== rr
->rr_col
[1].rc_size
);
527 devidx
= rr
->rr_col
[0].rc_devidx
;
528 uint64_t o
= rr
->rr_col
[0].rc_offset
;
529 rr
->rr_col
[0].rc_devidx
= rr
->rr_col
[1].rc_devidx
;
530 rr
->rr_col
[0].rc_offset
= rr
->rr_col
[1].rc_offset
;
531 rr
->rr_col
[1].rc_devidx
= devidx
;
532 rr
->rr_col
[1].rc_offset
= o
;
536 ASSERT3U(asize
, ==, tot
<< ashift
);
538 /* init RAIDZ parity ops */
539 rm
->rm_ops
= vdev_raidz_math_get_ops();
545 init_raidz_map(raidz_test_opts_t
*opts
, zio_t
**zio
, const int parity
)
547 raidz_map_t
*rm
= NULL
;
548 const size_t alloc_dsize
= opts
->rto_dsize
;
549 const size_t total_ncols
= opts
->rto_dcols
+ parity
;
550 const int ccols
[] = { 0, 1, 2 };
553 VERIFY(parity
<= 3 && parity
>= 1);
555 *zio
= umem_zalloc(sizeof (zio_t
), UMEM_NOFAIL
);
557 (*zio
)->io_offset
= 0;
558 (*zio
)->io_size
= alloc_dsize
;
559 (*zio
)->io_abd
= raidz_alloc(alloc_dsize
);
562 if (opts
->rto_expand
) {
563 rm
= vdev_raidz_map_alloc_expanded((*zio
)->io_abd
,
564 (*zio
)->io_size
, (*zio
)->io_offset
,
565 opts
->rto_ashift
, total_ncols
+1, total_ncols
,
566 parity
, opts
->rto_expand_offset
);
568 rm
= vdev_raidz_map_alloc(*zio
, opts
->rto_ashift
,
569 total_ncols
, parity
);
573 /* Make sure code columns are destroyed */
574 corrupt_colums(rm
, ccols
, parity
);
580 run_gen_check(raidz_test_opts_t
*opts
)
585 raidz_map_t
*rm_test
;
587 err
= init_raidz_golden_map(opts
, PARITY_PQR
);
592 LOG(D_INFO
, "Testing parity generation...\n");
594 for (impl_name
= (char **)raidz_impl_names
+1; *impl_name
!= NULL
;
598 LOG(D_INFO
, "\tTesting [%s] implementation...", *impl_name
);
600 if (0 != vdev_raidz_impl_set(*impl_name
)) {
601 LOG(D_INFO
, "[SKIP]\n");
604 LOG(D_INFO
, "[SUPPORTED]\n");
607 for (fn
= 0; fn
< RAIDZ_GEN_NUM
; fn
++) {
609 /* Check if should stop */
610 if (rto_opts
.rto_should_stop
)
613 /* create suitable raidz_map */
614 rm_test
= init_raidz_map(opts
, &zio_test
, fn
+1);
617 LOG(D_INFO
, "\t\tTesting method [%s] ...",
620 if (!opts
->rto_sanity
)
621 vdev_raidz_generate_parity(rm_test
);
623 if (cmp_code(opts
, rm_test
, fn
+1) != 0) {
624 LOG(D_INFO
, "[FAIL]\n");
627 LOG(D_INFO
, "[PASS]\n");
629 fini_raidz_map(&zio_test
, &rm_test
);
633 fini_raidz_map(&opts
->zio_golden
, &opts
->rm_golden
);
639 run_rec_check_impl(raidz_test_opts_t
*opts
, raidz_map_t
*rm
, const int fn
)
644 static const int rec_tgts
[7][3] = {
645 {1, 2, 3}, /* rec_p: bad QR & D[0] */
646 {0, 2, 3}, /* rec_q: bad PR & D[0] */
647 {0, 1, 3}, /* rec_r: bad PQ & D[0] */
648 {2, 3, 4}, /* rec_pq: bad R & D[0][1] */
649 {1, 3, 4}, /* rec_pr: bad Q & D[0][1] */
650 {0, 3, 4}, /* rec_qr: bad P & D[0][1] */
651 {3, 4, 5} /* rec_pqr: bad & D[0][1][2] */
654 memcpy(tgtidx
, rec_tgts
[fn
], sizeof (tgtidx
));
656 if (fn
< RAIDZ_REC_PQ
) {
657 /* can reconstruct 1 failed data disk */
658 for (x0
= 0; x0
< opts
->rto_dcols
; x0
++) {
659 if (x0
>= rm
->rm_row
[0]->rr_cols
- raidz_parity(rm
))
662 /* Check if should stop */
663 if (rto_opts
.rto_should_stop
)
666 LOG(D_DEBUG
, "[%d] ", x0
);
668 tgtidx
[2] = x0
+ raidz_parity(rm
);
670 corrupt_colums(rm
, tgtidx
+2, 1);
672 if (!opts
->rto_sanity
)
673 vdev_raidz_reconstruct(rm
, tgtidx
, 3);
675 if (cmp_data(opts
, rm
) != 0) {
677 LOG(D_DEBUG
, "\nREC D[%d]... [FAIL]\n", x0
);
681 } else if (fn
< RAIDZ_REC_PQR
) {
682 /* can reconstruct 2 failed data disk */
683 for (x0
= 0; x0
< opts
->rto_dcols
; x0
++) {
684 if (x0
>= rm
->rm_row
[0]->rr_cols
- raidz_parity(rm
))
686 for (x1
= x0
+ 1; x1
< opts
->rto_dcols
; x1
++) {
687 if (x1
>= rm
->rm_row
[0]->rr_cols
-
691 /* Check if should stop */
692 if (rto_opts
.rto_should_stop
)
695 LOG(D_DEBUG
, "[%d %d] ", x0
, x1
);
697 tgtidx
[1] = x0
+ raidz_parity(rm
);
698 tgtidx
[2] = x1
+ raidz_parity(rm
);
700 corrupt_colums(rm
, tgtidx
+1, 2);
702 if (!opts
->rto_sanity
)
703 vdev_raidz_reconstruct(rm
, tgtidx
, 3);
705 if (cmp_data(opts
, rm
) != 0) {
707 LOG(D_DEBUG
, "\nREC D[%d %d]... "
713 /* can reconstruct 3 failed data disk */
714 for (x0
= 0; x0
< opts
->rto_dcols
; x0
++) {
715 if (x0
>= rm
->rm_row
[0]->rr_cols
- raidz_parity(rm
))
717 for (x1
= x0
+ 1; x1
< opts
->rto_dcols
; x1
++) {
718 if (x1
>= rm
->rm_row
[0]->rr_cols
-
721 for (x2
= x1
+ 1; x2
< opts
->rto_dcols
; x2
++) {
722 if (x2
>= rm
->rm_row
[0]->rr_cols
-
726 /* Check if should stop */
727 if (rto_opts
.rto_should_stop
)
730 LOG(D_DEBUG
, "[%d %d %d]", x0
, x1
, x2
);
732 tgtidx
[0] = x0
+ raidz_parity(rm
);
733 tgtidx
[1] = x1
+ raidz_parity(rm
);
734 tgtidx
[2] = x2
+ raidz_parity(rm
);
736 corrupt_colums(rm
, tgtidx
, 3);
738 if (!opts
->rto_sanity
)
739 vdev_raidz_reconstruct(rm
,
742 if (cmp_data(opts
, rm
) != 0) {
745 "\nREC D[%d %d %d]... "
746 "[FAIL]\n", x0
, x1
, x2
);
756 run_rec_check(raidz_test_opts_t
*opts
)
759 unsigned fn
, err
= 0;
761 raidz_map_t
*rm_test
;
763 err
= init_raidz_golden_map(opts
, PARITY_PQR
);
768 LOG(D_INFO
, "Testing data reconstruction...\n");
770 for (impl_name
= (char **)raidz_impl_names
+1; *impl_name
!= NULL
;
774 LOG(D_INFO
, "\tTesting [%s] implementation...", *impl_name
);
776 if (vdev_raidz_impl_set(*impl_name
) != 0) {
777 LOG(D_INFO
, "[SKIP]\n");
780 LOG(D_INFO
, "[SUPPORTED]\n");
783 /* create suitable raidz_map */
784 rm_test
= init_raidz_map(opts
, &zio_test
, PARITY_PQR
);
785 /* generate parity */
786 vdev_raidz_generate_parity(rm_test
);
788 for (fn
= 0; fn
< RAIDZ_REC_NUM
; fn
++) {
790 LOG(D_INFO
, "\t\tTesting method [%s] ...",
793 if (run_rec_check_impl(opts
, rm_test
, fn
) != 0) {
794 LOG(D_INFO
, "[FAIL]\n");
798 LOG(D_INFO
, "[PASS]\n");
801 /* tear down test raidz_map */
802 fini_raidz_map(&zio_test
, &rm_test
);
805 fini_raidz_map(&opts
->zio_golden
, &opts
->rm_golden
);
811 run_test(raidz_test_opts_t
*opts
)
818 print_opts(opts
, B_FALSE
);
820 err
|= run_gen_check(opts
);
821 err
|= run_rec_check(opts
);
826 #define SWEEP_RUNNING 0
827 #define SWEEP_FINISHED 1
828 #define SWEEP_ERROR 2
829 #define SWEEP_TIMEOUT 3
831 static int sweep_state
= 0;
832 static raidz_test_opts_t failed_opts
;
834 static kmutex_t sem_mtx
;
835 static kcondvar_t sem_cv
;
836 static int max_free_slots
;
837 static int free_slots
;
840 sweep_thread(void *arg
)
843 raidz_test_opts_t
*opts
= (raidz_test_opts_t
*)arg
;
844 VERIFY(opts
!= NULL
);
846 err
= run_test(opts
);
848 if (rto_opts
.rto_sanity
) {
849 /* 25% chance that a sweep test fails */
850 if (rand() < (RAND_MAX
/4))
855 mutex_enter(&sem_mtx
);
856 memcpy(&failed_opts
, opts
, sizeof (raidz_test_opts_t
));
857 sweep_state
= SWEEP_ERROR
;
858 mutex_exit(&sem_mtx
);
861 umem_free(opts
, sizeof (raidz_test_opts_t
));
863 /* signal the next thread */
864 mutex_enter(&sem_mtx
);
867 mutex_exit(&sem_mtx
);
875 static const size_t dcols_v
[] = { 1, 2, 3, 4, 5, 6, 7, 8, 12, 15, 16 };
876 static const size_t ashift_v
[] = { 9, 12, 14 };
877 static const size_t size_v
[] = { 1 << 9, 21 * (1 << 9), 13 * (1 << 12),
878 1 << 17, (1 << 20) - (1 << 12), SPA_MAXBLOCKSIZE
};
880 (void) setvbuf(stdout
, NULL
, _IONBF
, 0);
882 ulong_t total_comb
= ARRAY_SIZE(size_v
) * ARRAY_SIZE(ashift_v
) *
884 ulong_t tried_comb
= 0;
885 hrtime_t time_diff
, start_time
= gethrtime();
886 raidz_test_opts_t
*opts
;
889 max_free_slots
= free_slots
= MAX(2, boot_ncpus
);
891 mutex_init(&sem_mtx
, NULL
, MUTEX_DEFAULT
, NULL
);
892 cv_init(&sem_cv
, NULL
, CV_DEFAULT
, NULL
);
894 for (s
= 0; s
< ARRAY_SIZE(size_v
); s
++)
895 for (a
= 0; a
< ARRAY_SIZE(ashift_v
); a
++)
896 for (d
= 0; d
< ARRAY_SIZE(dcols_v
); d
++) {
898 if (size_v
[s
] < (1 << ashift_v
[a
])) {
903 if (++tried_comb
% 20 == 0)
904 LOG(D_ALL
, "%lu/%lu... ", tried_comb
, total_comb
);
906 /* wait for signal to start new thread */
907 mutex_enter(&sem_mtx
);
908 while (cv_timedwait_sig(&sem_cv
, &sem_mtx
,
909 ddi_get_lbolt() + hz
)) {
911 /* check if should stop the test (timeout) */
912 time_diff
= (gethrtime() - start_time
) / NANOSEC
;
913 if (rto_opts
.rto_sweep_timeout
> 0 &&
914 time_diff
>= rto_opts
.rto_sweep_timeout
) {
915 sweep_state
= SWEEP_TIMEOUT
;
916 rto_opts
.rto_should_stop
= B_TRUE
;
917 mutex_exit(&sem_mtx
);
921 /* check if should stop the test (error) */
922 if (sweep_state
!= SWEEP_RUNNING
) {
923 mutex_exit(&sem_mtx
);
927 /* exit loop if a slot is available */
928 if (free_slots
> 0) {
934 mutex_exit(&sem_mtx
);
936 opts
= umem_zalloc(sizeof (raidz_test_opts_t
), UMEM_NOFAIL
);
937 opts
->rto_ashift
= ashift_v
[a
];
938 opts
->rto_dcols
= dcols_v
[d
];
939 opts
->rto_offset
= (1 << ashift_v
[a
]) * rand();
940 opts
->rto_dsize
= size_v
[s
];
941 opts
->rto_expand
= rto_opts
.rto_expand
;
942 opts
->rto_expand_offset
= rto_opts
.rto_expand_offset
;
943 opts
->rto_v
= 0; /* be quiet */
945 VERIFY3P(thread_create(NULL
, 0, sweep_thread
, (void *) opts
,
946 0, NULL
, TS_RUN
, defclsyspri
), !=, NULL
);
950 LOG(D_ALL
, "\nWaiting for test threads to finish...\n");
951 mutex_enter(&sem_mtx
);
952 VERIFY(free_slots
<= max_free_slots
);
953 while (free_slots
< max_free_slots
) {
954 (void) cv_wait(&sem_cv
, &sem_mtx
);
956 mutex_exit(&sem_mtx
);
958 if (sweep_state
== SWEEP_ERROR
) {
959 ERR("Sweep test failed! Failed option: \n");
960 print_opts(&failed_opts
, B_TRUE
);
962 if (sweep_state
== SWEEP_TIMEOUT
)
963 LOG(D_ALL
, "Test timeout (%lus). Stopping...\n",
964 (ulong_t
)rto_opts
.rto_sweep_timeout
);
966 LOG(D_ALL
, "Sweep test succeeded on %lu raidz maps!\n",
967 (ulong_t
)tried_comb
);
970 mutex_destroy(&sem_mtx
);
972 return (sweep_state
== SWEEP_ERROR
? SWEEP_ERROR
: 0);
977 main(int argc
, char **argv
)
980 struct sigaction action
;
983 /* init gdb string early */
984 (void) sprintf(gdb
, gdb_tmpl
, getpid());
986 action
.sa_handler
= sig_handler
;
987 sigemptyset(&action
.sa_mask
);
990 if (sigaction(SIGSEGV
, &action
, NULL
) < 0) {
991 ERR("raidz_test: cannot catch SIGSEGV: %s.\n", strerror(errno
));
995 (void) setvbuf(stdout
, NULL
, _IOLBF
, 0);
997 dprintf_setup(&argc
, argv
);
999 process_options(argc
, argv
);
1001 kernel_init(SPA_MODE_READ
);
1003 /* setup random data because rand() is not reentrant */
1004 rand_data
= (int *)umem_alloc(SPA_MAXBLOCKSIZE
, UMEM_NOFAIL
);
1005 srand((unsigned)time(NULL
) * getpid());
1006 for (i
= 0; i
< SPA_MAXBLOCKSIZE
/ sizeof (int); i
++)
1007 rand_data
[i
] = rand();
1009 mprotect(rand_data
, SPA_MAXBLOCKSIZE
, PROT_READ
);
1011 if (rto_opts
.rto_benchmark
) {
1012 run_raidz_benchmark();
1013 } else if (rto_opts
.rto_sweep
) {
1016 err
= run_test(NULL
);
1019 umem_free(rand_data
, SPA_MAXBLOCKSIZE
);