4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
26 #include <sys/zfs_context.h>
31 #include <sys/vdev_raidz.h>
32 #include <sys/vdev_raidz_impl.h>
35 #include "raidz_test.h"
37 static int *rand_data
;
38 raidz_test_opts_t rto_opts
;
40 static char pid_s
[16];
42 static void sig_handler(int signo
)
44 int old_errno
= errno
;
45 struct sigaction action
;
47 * Restore default action and re-raise signal so SIGSEGV and
48 * SIGABRT can trigger a core dump.
50 action
.sa_handler
= SIG_DFL
;
51 sigemptyset(&action
.sa_mask
);
53 (void) sigaction(signo
, &action
, NULL
);
55 if (rto_opts
.rto_gdb
) {
58 execlp("gdb", "gdb", "-ex", "set pagination 0",
62 while (waitpid(pid
, NULL
, 0) == -1 && errno
== EINTR
)
70 static void print_opts(raidz_test_opts_t
*opts
, boolean_t force
)
73 switch (opts
->rto_v
) {
86 if (force
|| opts
->rto_v
>= D_INFO
) {
87 (void) fprintf(stdout
, DBLSEP
"Running with options:\n"
88 " (-a) zio ashift : %zu\n"
89 " (-o) zio offset : 1 << %zu\n"
90 " (-e) expanded map : %s\n"
91 " (-r) reflow offset : %llx\n"
92 " (-d) number of raidz data columns : %zu\n"
93 " (-s) size of DATA : 1 << %zu\n"
94 " (-S) sweep parameters : %s \n"
95 " (-v) verbose : %s \n\n",
96 opts
->rto_ashift
, /* -a */
97 ilog2(opts
->rto_offset
), /* -o */
98 opts
->rto_expand
? "yes" : "no", /* -e */
99 (u_longlong_t
)opts
->rto_expand_offset
, /* -r */
100 opts
->rto_dcols
, /* -d */
101 ilog2(opts
->rto_dsize
), /* -s */
102 opts
->rto_sweep
? "yes" : "no", /* -S */
107 static void usage(boolean_t requested
)
109 const raidz_test_opts_t
*o
= &rto_opts_defaults
;
111 FILE *fp
= requested
? stdout
: stderr
;
113 (void) fprintf(fp
, "Usage:\n"
114 "\t[-a zio ashift (default: %zu)]\n"
115 "\t[-o zio offset, exponent radix 2 (default: %zu)]\n"
116 "\t[-d number of raidz data columns (default: %zu)]\n"
117 "\t[-s zio size, exponent radix 2 (default: %zu)]\n"
118 "\t[-S parameter sweep (default: %s)]\n"
119 "\t[-t timeout for parameter sweep test]\n"
120 "\t[-B benchmark all raidz implementations]\n"
121 "\t[-e use expanded raidz map (default: %s)]\n"
122 "\t[-r expanded raidz map reflow offset (default: %llx)]\n"
123 "\t[-v increase verbosity (default: %d)]\n"
124 "\t[-h (print help)]\n"
125 "\t[-T test the test, see if failure would be detected]\n"
126 "\t[-D debug (attach gdb on SIGSEGV)]\n"
128 o
->rto_ashift
, /* -a */
129 ilog2(o
->rto_offset
), /* -o */
130 o
->rto_dcols
, /* -d */
131 ilog2(o
->rto_dsize
), /* -s */
132 rto_opts
.rto_sweep
? "yes" : "no", /* -S */
133 rto_opts
.rto_expand
? "yes" : "no", /* -e */
134 (u_longlong_t
)o
->rto_expand_offset
, /* -r */
137 exit(requested
? 0 : 1);
140 static void process_options(int argc
, char **argv
)
144 raidz_test_opts_t
*o
= &rto_opts
;
146 memcpy(o
, &rto_opts_defaults
, sizeof (*o
));
148 while ((opt
= getopt(argc
, argv
, "TDBSvha:er:o:d:s:t:")) != -1) {
151 value
= strtoull(optarg
, NULL
, 0);
152 o
->rto_ashift
= MIN(13, MAX(9, value
));
158 o
->rto_expand_offset
= strtoull(optarg
, NULL
, 0);
161 value
= strtoull(optarg
, NULL
, 0);
162 o
->rto_offset
= ((1ULL << MIN(12, value
)) >> 9) << 9;
165 value
= strtoull(optarg
, NULL
, 0);
166 o
->rto_dcols
= MIN(255, MAX(1, value
));
169 value
= strtoull(optarg
, NULL
, 0);
170 o
->rto_dsize
= 1ULL << MIN(SPA_MAXBLOCKSHIFT
,
171 MAX(SPA_MINBLOCKSHIFT
, value
));
174 value
= strtoull(optarg
, NULL
, 0);
175 o
->rto_sweep_timeout
= value
;
184 o
->rto_benchmark
= 1;
203 #define DATA_COL(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_abd)
204 #define DATA_COL_SIZE(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_size)
206 #define CODE_COL(rr, i) ((rr)->rr_col[(i)].rc_abd)
207 #define CODE_COL_SIZE(rr, i) ((rr)->rr_col[(i)].rc_size)
210 cmp_code(raidz_test_opts_t
*opts
, const raidz_map_t
*rm
, const int parity
)
214 VERIFY(parity
>= 1 && parity
<= 3);
216 for (r
= 0; r
< rm
->rm_nrows
; r
++) {
217 raidz_row_t
* const rr
= rm
->rm_row
[r
];
218 raidz_row_t
* const rrg
= opts
->rm_golden
->rm_row
[r
];
219 for (i
= 0; i
< parity
; i
++) {
220 if (CODE_COL_SIZE(rrg
, i
) == 0) {
221 VERIFY0(CODE_COL_SIZE(rr
, i
));
225 if (abd_cmp(CODE_COL(rr
, i
),
226 CODE_COL(rrg
, i
)) != 0) {
228 LOG_OPT(D_DEBUG
, opts
,
229 "\nParity block [%d] different!\n", i
);
237 cmp_data(raidz_test_opts_t
*opts
, raidz_map_t
*rm
)
239 int r
, i
, dcols
, ret
= 0;
241 for (r
= 0; r
< rm
->rm_nrows
; r
++) {
242 raidz_row_t
*rr
= rm
->rm_row
[r
];
243 raidz_row_t
*rrg
= opts
->rm_golden
->rm_row
[r
];
244 dcols
= opts
->rm_golden
->rm_row
[0]->rr_cols
-
245 raidz_parity(opts
->rm_golden
);
246 for (i
= 0; i
< dcols
; i
++) {
247 if (DATA_COL_SIZE(rrg
, i
) == 0) {
248 VERIFY0(DATA_COL_SIZE(rr
, i
));
252 if (abd_cmp(DATA_COL(rrg
, i
),
253 DATA_COL(rr
, i
)) != 0) {
256 LOG_OPT(D_DEBUG
, opts
,
257 "\nData block [%d] different!\n", i
);
265 init_rand(void *data
, size_t size
, void *private)
268 memcpy(data
, rand_data
, size
);
273 corrupt_colums(raidz_map_t
*rm
, const int *tgts
, const int cnt
)
275 for (int r
= 0; r
< rm
->rm_nrows
; r
++) {
276 raidz_row_t
*rr
= rm
->rm_row
[r
];
277 for (int i
= 0; i
< cnt
; i
++) {
278 raidz_col_t
*col
= &rr
->rr_col
[tgts
[i
]];
279 abd_iterate_func(col
->rc_abd
, 0, col
->rc_size
,
286 init_zio_abd(zio_t
*zio
)
288 abd_iterate_func(zio
->io_abd
, 0, zio
->io_size
, init_rand
, NULL
);
292 fini_raidz_map(zio_t
**zio
, raidz_map_t
**rm
)
294 vdev_raidz_map_free(*rm
);
295 raidz_free((*zio
)->io_abd
, (*zio
)->io_size
);
296 umem_free(*zio
, sizeof (zio_t
));
303 init_raidz_golden_map(raidz_test_opts_t
*opts
, const int parity
)
307 raidz_map_t
*rm_test
;
308 const size_t total_ncols
= opts
->rto_dcols
+ parity
;
310 if (opts
->rm_golden
) {
311 fini_raidz_map(&opts
->zio_golden
, &opts
->rm_golden
);
314 opts
->zio_golden
= umem_zalloc(sizeof (zio_t
), UMEM_NOFAIL
);
315 zio_test
= umem_zalloc(sizeof (zio_t
), UMEM_NOFAIL
);
317 opts
->zio_golden
->io_offset
= zio_test
->io_offset
= opts
->rto_offset
;
318 opts
->zio_golden
->io_size
= zio_test
->io_size
= opts
->rto_dsize
;
320 opts
->zio_golden
->io_abd
= raidz_alloc(opts
->rto_dsize
);
321 zio_test
->io_abd
= raidz_alloc(opts
->rto_dsize
);
323 init_zio_abd(opts
->zio_golden
);
324 init_zio_abd(zio_test
);
326 VERIFY0(vdev_raidz_impl_set("original"));
328 if (opts
->rto_expand
) {
330 vdev_raidz_map_alloc_expanded(opts
->zio_golden
->io_abd
,
331 opts
->zio_golden
->io_size
, opts
->zio_golden
->io_offset
,
332 opts
->rto_ashift
, total_ncols
+1, total_ncols
,
333 parity
, opts
->rto_expand_offset
);
334 rm_test
= vdev_raidz_map_alloc_expanded(zio_test
->io_abd
,
335 zio_test
->io_size
, zio_test
->io_offset
,
336 opts
->rto_ashift
, total_ncols
+1, total_ncols
,
337 parity
, opts
->rto_expand_offset
);
339 opts
->rm_golden
= vdev_raidz_map_alloc(opts
->zio_golden
,
340 opts
->rto_ashift
, total_ncols
, parity
);
341 rm_test
= vdev_raidz_map_alloc(zio_test
,
342 opts
->rto_ashift
, total_ncols
, parity
);
345 VERIFY(opts
->zio_golden
);
346 VERIFY(opts
->rm_golden
);
348 vdev_raidz_generate_parity(opts
->rm_golden
);
349 vdev_raidz_generate_parity(rm_test
);
352 err
|= cmp_data(opts
, rm_test
);
353 err
|= cmp_code(opts
, rm_test
, parity
);
356 ERR("initializing the golden copy ... [FAIL]!\n");
358 /* tear down raidz_map of test zio */
359 fini_raidz_map(&zio_test
, &rm_test
);
365 * If reflow is not in progress, reflow_offset should be UINT64_MAX.
366 * For each row, if the row is entirely before reflow_offset, it will
367 * come from the new location. Otherwise this row will come from the
368 * old location. Therefore, rows that straddle the reflow_offset will
369 * come from the old location.
371 * NOTE: Until raidz expansion is implemented this function is only
372 * needed by raidz_test.c to the multi-row raid_map_t functionality.
375 vdev_raidz_map_alloc_expanded(abd_t
*abd
, uint64_t size
, uint64_t offset
,
376 uint64_t ashift
, uint64_t physical_cols
, uint64_t logical_cols
,
377 uint64_t nparity
, uint64_t reflow_offset
)
379 /* The zio's size in units of the vdev's minimum sector size. */
380 uint64_t s
= size
>> ashift
;
381 uint64_t q
, r
, bc
, devidx
, asize
= 0, tot
;
384 * "Quotient": The number of data sectors for this stripe on all but
385 * the "big column" child vdevs that also contain "remainder" data.
388 q
= s
/ (logical_cols
- nparity
);
391 * "Remainder": The number of partial stripe data sectors in this I/O.
392 * This will add a sector to some, but not all, child vdevs.
394 r
= s
- q
* (logical_cols
- nparity
);
396 /* The number of "big columns" - those which contain remainder data. */
397 bc
= (r
== 0 ? 0 : r
+ nparity
);
400 * The total number of data and parity sectors associated with
403 tot
= s
+ nparity
* (q
+ (r
== 0 ? 0 : 1));
405 /* How many rows contain data (not skip) */
406 uint64_t rows
= howmany(tot
, logical_cols
);
407 int cols
= MIN(tot
, logical_cols
);
409 raidz_map_t
*rm
= kmem_zalloc(offsetof(raidz_map_t
, rm_row
[rows
]),
413 for (uint64_t row
= 0; row
< rows
; row
++) {
414 raidz_row_t
*rr
= kmem_alloc(offsetof(raidz_row_t
,
415 rr_col
[cols
]), KM_SLEEP
);
416 rm
->rm_row
[row
] = rr
;
418 /* The starting RAIDZ (parent) vdev sector of the row. */
419 uint64_t b
= (offset
>> ashift
) + row
* logical_cols
;
422 * If we are in the middle of a reflow, and any part of this
423 * row has not been copied, then use the old location of
426 int row_phys_cols
= physical_cols
;
427 if (b
+ (logical_cols
- nparity
) > reflow_offset
>> ashift
)
430 /* starting child of this row */
431 uint64_t child_id
= b
% row_phys_cols
;
432 /* The starting byte offset on each child vdev. */
433 uint64_t child_offset
= (b
/ row_phys_cols
) << ashift
;
436 * We set cols to the entire width of the block, even
437 * if this row is shorter. This is needed because parity
438 * generation (for Q and R) needs to know the entire width,
439 * because it treats the short row as though it was
440 * full-width (and the "phantom" sectors were zero-filled).
442 * Another approach to this would be to set cols shorter
443 * (to just the number of columns that we might do i/o to)
444 * and have another mechanism to tell the parity generation
445 * about the "entire width". Reconstruction (at least
446 * vdev_raidz_reconstruct_general()) would also need to
447 * know about the "entire width".
451 rr
->rr_missingdata
= 0;
452 rr
->rr_missingparity
= 0;
453 rr
->rr_firstdatacol
= nparity
;
454 rr
->rr_abd_empty
= NULL
;
457 for (int c
= 0; c
< rr
->rr_cols
; c
++, child_id
++) {
458 if (child_id
>= row_phys_cols
) {
459 child_id
-= row_phys_cols
;
460 child_offset
+= 1ULL << ashift
;
462 rr
->rr_col
[c
].rc_devidx
= child_id
;
463 rr
->rr_col
[c
].rc_offset
= child_offset
;
464 rr
->rr_col
[c
].rc_orig_data
= NULL
;
465 rr
->rr_col
[c
].rc_error
= 0;
466 rr
->rr_col
[c
].rc_tried
= 0;
467 rr
->rr_col
[c
].rc_skipped
= 0;
468 rr
->rr_col
[c
].rc_need_orig_restore
= B_FALSE
;
470 uint64_t dc
= c
- rr
->rr_firstdatacol
;
471 if (c
< rr
->rr_firstdatacol
) {
472 rr
->rr_col
[c
].rc_size
= 1ULL << ashift
;
473 rr
->rr_col
[c
].rc_abd
=
474 abd_alloc_linear(rr
->rr_col
[c
].rc_size
,
476 } else if (row
== rows
- 1 && bc
!= 0 && c
>= bc
) {
478 * Past the end, this for parity generation.
480 rr
->rr_col
[c
].rc_size
= 0;
481 rr
->rr_col
[c
].rc_abd
= NULL
;
484 * "data column" (col excluding parity)
485 * Add an ASCII art diagram here
489 if (c
< bc
|| r
== 0) {
490 off
= dc
* rows
+ row
;
493 (dc
- r
) * (rows
- 1) + row
;
495 rr
->rr_col
[c
].rc_size
= 1ULL << ashift
;
496 rr
->rr_col
[c
].rc_abd
= abd_get_offset_struct(
497 &rr
->rr_col
[c
].rc_abdstruct
,
498 abd
, off
<< ashift
, 1 << ashift
);
501 asize
+= rr
->rr_col
[c
].rc_size
;
504 * If all data stored spans all columns, there's a danger that
505 * parity will always be on the same device and, since parity
506 * isn't read during normal operation, that that device's I/O
507 * bandwidth won't be used effectively. We therefore switch
508 * the parity every 1MB.
510 * ...at least that was, ostensibly, the theory. As a practical
511 * matter unless we juggle the parity between all devices
512 * evenly, we won't see any benefit. Further, occasional writes
513 * that aren't a multiple of the LCM of the number of children
514 * and the minimum stripe width are sufficient to avoid pessimal
515 * behavior. Unfortunately, this decision created an implicit
516 * on-disk format requirement that we need to support for all
517 * eternity, but only for single-parity RAID-Z.
519 * If we intend to skip a sector in the zeroth column for
520 * padding we must make sure to note this swap. We will never
521 * intend to skip the first column since at least one data and
522 * one parity column must appear in each row.
524 if (rr
->rr_firstdatacol
== 1 && rr
->rr_cols
> 1 &&
525 (offset
& (1ULL << 20))) {
526 ASSERT(rr
->rr_cols
>= 2);
527 ASSERT(rr
->rr_col
[0].rc_size
== rr
->rr_col
[1].rc_size
);
528 devidx
= rr
->rr_col
[0].rc_devidx
;
529 uint64_t o
= rr
->rr_col
[0].rc_offset
;
530 rr
->rr_col
[0].rc_devidx
= rr
->rr_col
[1].rc_devidx
;
531 rr
->rr_col
[0].rc_offset
= rr
->rr_col
[1].rc_offset
;
532 rr
->rr_col
[1].rc_devidx
= devidx
;
533 rr
->rr_col
[1].rc_offset
= o
;
537 ASSERT3U(asize
, ==, tot
<< ashift
);
539 /* init RAIDZ parity ops */
540 rm
->rm_ops
= vdev_raidz_math_get_ops();
546 init_raidz_map(raidz_test_opts_t
*opts
, zio_t
**zio
, const int parity
)
548 raidz_map_t
*rm
= NULL
;
549 const size_t alloc_dsize
= opts
->rto_dsize
;
550 const size_t total_ncols
= opts
->rto_dcols
+ parity
;
551 const int ccols
[] = { 0, 1, 2 };
554 VERIFY(parity
<= 3 && parity
>= 1);
556 *zio
= umem_zalloc(sizeof (zio_t
), UMEM_NOFAIL
);
558 (*zio
)->io_offset
= 0;
559 (*zio
)->io_size
= alloc_dsize
;
560 (*zio
)->io_abd
= raidz_alloc(alloc_dsize
);
563 if (opts
->rto_expand
) {
564 rm
= vdev_raidz_map_alloc_expanded((*zio
)->io_abd
,
565 (*zio
)->io_size
, (*zio
)->io_offset
,
566 opts
->rto_ashift
, total_ncols
+1, total_ncols
,
567 parity
, opts
->rto_expand_offset
);
569 rm
= vdev_raidz_map_alloc(*zio
, opts
->rto_ashift
,
570 total_ncols
, parity
);
574 /* Make sure code columns are destroyed */
575 corrupt_colums(rm
, ccols
, parity
);
581 run_gen_check(raidz_test_opts_t
*opts
)
586 raidz_map_t
*rm_test
;
588 err
= init_raidz_golden_map(opts
, PARITY_PQR
);
593 LOG(D_INFO
, "Testing parity generation...\n");
595 for (impl_name
= (char **)raidz_impl_names
+1; *impl_name
!= NULL
;
599 LOG(D_INFO
, "\tTesting [%s] implementation...", *impl_name
);
601 if (0 != vdev_raidz_impl_set(*impl_name
)) {
602 LOG(D_INFO
, "[SKIP]\n");
605 LOG(D_INFO
, "[SUPPORTED]\n");
608 for (fn
= 0; fn
< RAIDZ_GEN_NUM
; fn
++) {
610 /* Check if should stop */
611 if (rto_opts
.rto_should_stop
)
614 /* create suitable raidz_map */
615 rm_test
= init_raidz_map(opts
, &zio_test
, fn
+1);
618 LOG(D_INFO
, "\t\tTesting method [%s] ...",
621 if (!opts
->rto_sanity
)
622 vdev_raidz_generate_parity(rm_test
);
624 if (cmp_code(opts
, rm_test
, fn
+1) != 0) {
625 LOG(D_INFO
, "[FAIL]\n");
628 LOG(D_INFO
, "[PASS]\n");
630 fini_raidz_map(&zio_test
, &rm_test
);
634 fini_raidz_map(&opts
->zio_golden
, &opts
->rm_golden
);
640 run_rec_check_impl(raidz_test_opts_t
*opts
, raidz_map_t
*rm
, const int fn
)
645 static const int rec_tgts
[7][3] = {
646 {1, 2, 3}, /* rec_p: bad QR & D[0] */
647 {0, 2, 3}, /* rec_q: bad PR & D[0] */
648 {0, 1, 3}, /* rec_r: bad PQ & D[0] */
649 {2, 3, 4}, /* rec_pq: bad R & D[0][1] */
650 {1, 3, 4}, /* rec_pr: bad Q & D[0][1] */
651 {0, 3, 4}, /* rec_qr: bad P & D[0][1] */
652 {3, 4, 5} /* rec_pqr: bad & D[0][1][2] */
655 memcpy(tgtidx
, rec_tgts
[fn
], sizeof (tgtidx
));
657 if (fn
< RAIDZ_REC_PQ
) {
658 /* can reconstruct 1 failed data disk */
659 for (x0
= 0; x0
< opts
->rto_dcols
; x0
++) {
660 if (x0
>= rm
->rm_row
[0]->rr_cols
- raidz_parity(rm
))
663 /* Check if should stop */
664 if (rto_opts
.rto_should_stop
)
667 LOG(D_DEBUG
, "[%d] ", x0
);
669 tgtidx
[2] = x0
+ raidz_parity(rm
);
671 corrupt_colums(rm
, tgtidx
+2, 1);
673 if (!opts
->rto_sanity
)
674 vdev_raidz_reconstruct(rm
, tgtidx
, 3);
676 if (cmp_data(opts
, rm
) != 0) {
678 LOG(D_DEBUG
, "\nREC D[%d]... [FAIL]\n", x0
);
682 } else if (fn
< RAIDZ_REC_PQR
) {
683 /* can reconstruct 2 failed data disk */
684 for (x0
= 0; x0
< opts
->rto_dcols
; x0
++) {
685 if (x0
>= rm
->rm_row
[0]->rr_cols
- raidz_parity(rm
))
687 for (x1
= x0
+ 1; x1
< opts
->rto_dcols
; x1
++) {
688 if (x1
>= rm
->rm_row
[0]->rr_cols
-
692 /* Check if should stop */
693 if (rto_opts
.rto_should_stop
)
696 LOG(D_DEBUG
, "[%d %d] ", x0
, x1
);
698 tgtidx
[1] = x0
+ raidz_parity(rm
);
699 tgtidx
[2] = x1
+ raidz_parity(rm
);
701 corrupt_colums(rm
, tgtidx
+1, 2);
703 if (!opts
->rto_sanity
)
704 vdev_raidz_reconstruct(rm
, tgtidx
, 3);
706 if (cmp_data(opts
, rm
) != 0) {
708 LOG(D_DEBUG
, "\nREC D[%d %d]... "
714 /* can reconstruct 3 failed data disk */
715 for (x0
= 0; x0
< opts
->rto_dcols
; x0
++) {
716 if (x0
>= rm
->rm_row
[0]->rr_cols
- raidz_parity(rm
))
718 for (x1
= x0
+ 1; x1
< opts
->rto_dcols
; x1
++) {
719 if (x1
>= rm
->rm_row
[0]->rr_cols
-
722 for (x2
= x1
+ 1; x2
< opts
->rto_dcols
; x2
++) {
723 if (x2
>= rm
->rm_row
[0]->rr_cols
-
727 /* Check if should stop */
728 if (rto_opts
.rto_should_stop
)
731 LOG(D_DEBUG
, "[%d %d %d]", x0
, x1
, x2
);
733 tgtidx
[0] = x0
+ raidz_parity(rm
);
734 tgtidx
[1] = x1
+ raidz_parity(rm
);
735 tgtidx
[2] = x2
+ raidz_parity(rm
);
737 corrupt_colums(rm
, tgtidx
, 3);
739 if (!opts
->rto_sanity
)
740 vdev_raidz_reconstruct(rm
,
743 if (cmp_data(opts
, rm
) != 0) {
746 "\nREC D[%d %d %d]... "
747 "[FAIL]\n", x0
, x1
, x2
);
757 run_rec_check(raidz_test_opts_t
*opts
)
760 unsigned fn
, err
= 0;
762 raidz_map_t
*rm_test
;
764 err
= init_raidz_golden_map(opts
, PARITY_PQR
);
769 LOG(D_INFO
, "Testing data reconstruction...\n");
771 for (impl_name
= (char **)raidz_impl_names
+1; *impl_name
!= NULL
;
775 LOG(D_INFO
, "\tTesting [%s] implementation...", *impl_name
);
777 if (vdev_raidz_impl_set(*impl_name
) != 0) {
778 LOG(D_INFO
, "[SKIP]\n");
781 LOG(D_INFO
, "[SUPPORTED]\n");
784 /* create suitable raidz_map */
785 rm_test
= init_raidz_map(opts
, &zio_test
, PARITY_PQR
);
786 /* generate parity */
787 vdev_raidz_generate_parity(rm_test
);
789 for (fn
= 0; fn
< RAIDZ_REC_NUM
; fn
++) {
791 LOG(D_INFO
, "\t\tTesting method [%s] ...",
794 if (run_rec_check_impl(opts
, rm_test
, fn
) != 0) {
795 LOG(D_INFO
, "[FAIL]\n");
799 LOG(D_INFO
, "[PASS]\n");
802 /* tear down test raidz_map */
803 fini_raidz_map(&zio_test
, &rm_test
);
806 fini_raidz_map(&opts
->zio_golden
, &opts
->rm_golden
);
812 run_test(raidz_test_opts_t
*opts
)
819 print_opts(opts
, B_FALSE
);
821 err
|= run_gen_check(opts
);
822 err
|= run_rec_check(opts
);
827 #define SWEEP_RUNNING 0
828 #define SWEEP_FINISHED 1
829 #define SWEEP_ERROR 2
830 #define SWEEP_TIMEOUT 3
832 static int sweep_state
= 0;
833 static raidz_test_opts_t failed_opts
;
835 static kmutex_t sem_mtx
;
836 static kcondvar_t sem_cv
;
837 static int max_free_slots
;
838 static int free_slots
;
840 static __attribute__((noreturn
)) void
841 sweep_thread(void *arg
)
844 raidz_test_opts_t
*opts
= (raidz_test_opts_t
*)arg
;
845 VERIFY(opts
!= NULL
);
847 err
= run_test(opts
);
849 if (rto_opts
.rto_sanity
) {
850 /* 25% chance that a sweep test fails */
851 if (rand() < (RAND_MAX
/4))
856 mutex_enter(&sem_mtx
);
857 memcpy(&failed_opts
, opts
, sizeof (raidz_test_opts_t
));
858 sweep_state
= SWEEP_ERROR
;
859 mutex_exit(&sem_mtx
);
862 umem_free(opts
, sizeof (raidz_test_opts_t
));
864 /* signal the next thread */
865 mutex_enter(&sem_mtx
);
868 mutex_exit(&sem_mtx
);
876 static const size_t dcols_v
[] = { 1, 2, 3, 4, 5, 6, 7, 8, 12, 15, 16 };
877 static const size_t ashift_v
[] = { 9, 12, 14 };
878 static const size_t size_v
[] = { 1 << 9, 21 * (1 << 9), 13 * (1 << 12),
879 1 << 17, (1 << 20) - (1 << 12), SPA_MAXBLOCKSIZE
};
881 (void) setvbuf(stdout
, NULL
, _IONBF
, 0);
883 ulong_t total_comb
= ARRAY_SIZE(size_v
) * ARRAY_SIZE(ashift_v
) *
885 ulong_t tried_comb
= 0;
886 hrtime_t time_diff
, start_time
= gethrtime();
887 raidz_test_opts_t
*opts
;
890 max_free_slots
= free_slots
= MAX(2, boot_ncpus
);
892 mutex_init(&sem_mtx
, NULL
, MUTEX_DEFAULT
, NULL
);
893 cv_init(&sem_cv
, NULL
, CV_DEFAULT
, NULL
);
895 for (s
= 0; s
< ARRAY_SIZE(size_v
); s
++)
896 for (a
= 0; a
< ARRAY_SIZE(ashift_v
); a
++)
897 for (d
= 0; d
< ARRAY_SIZE(dcols_v
); d
++) {
899 if (size_v
[s
] < (1 << ashift_v
[a
])) {
904 if (++tried_comb
% 20 == 0)
905 LOG(D_ALL
, "%lu/%lu... ", tried_comb
, total_comb
);
907 /* wait for signal to start new thread */
908 mutex_enter(&sem_mtx
);
909 while (cv_timedwait_sig(&sem_cv
, &sem_mtx
,
910 ddi_get_lbolt() + hz
)) {
912 /* check if should stop the test (timeout) */
913 time_diff
= (gethrtime() - start_time
) / NANOSEC
;
914 if (rto_opts
.rto_sweep_timeout
> 0 &&
915 time_diff
>= rto_opts
.rto_sweep_timeout
) {
916 sweep_state
= SWEEP_TIMEOUT
;
917 rto_opts
.rto_should_stop
= B_TRUE
;
918 mutex_exit(&sem_mtx
);
922 /* check if should stop the test (error) */
923 if (sweep_state
!= SWEEP_RUNNING
) {
924 mutex_exit(&sem_mtx
);
928 /* exit loop if a slot is available */
929 if (free_slots
> 0) {
935 mutex_exit(&sem_mtx
);
937 opts
= umem_zalloc(sizeof (raidz_test_opts_t
), UMEM_NOFAIL
);
938 opts
->rto_ashift
= ashift_v
[a
];
939 opts
->rto_dcols
= dcols_v
[d
];
940 opts
->rto_offset
= (1ULL << ashift_v
[a
]) * rand();
941 opts
->rto_dsize
= size_v
[s
];
942 opts
->rto_expand
= rto_opts
.rto_expand
;
943 opts
->rto_expand_offset
= rto_opts
.rto_expand_offset
;
944 opts
->rto_v
= 0; /* be quiet */
946 VERIFY3P(thread_create(NULL
, 0, sweep_thread
, (void *) opts
,
947 0, NULL
, TS_RUN
, defclsyspri
), !=, NULL
);
951 LOG(D_ALL
, "\nWaiting for test threads to finish...\n");
952 mutex_enter(&sem_mtx
);
953 VERIFY(free_slots
<= max_free_slots
);
954 while (free_slots
< max_free_slots
) {
955 (void) cv_wait(&sem_cv
, &sem_mtx
);
957 mutex_exit(&sem_mtx
);
959 if (sweep_state
== SWEEP_ERROR
) {
960 ERR("Sweep test failed! Failed option: \n");
961 print_opts(&failed_opts
, B_TRUE
);
963 if (sweep_state
== SWEEP_TIMEOUT
)
964 LOG(D_ALL
, "Test timeout (%lus). Stopping...\n",
965 (ulong_t
)rto_opts
.rto_sweep_timeout
);
967 LOG(D_ALL
, "Sweep test succeeded on %lu raidz maps!\n",
968 (ulong_t
)tried_comb
);
971 mutex_destroy(&sem_mtx
);
973 return (sweep_state
== SWEEP_ERROR
? SWEEP_ERROR
: 0);
978 main(int argc
, char **argv
)
981 struct sigaction action
;
984 /* init gdb pid string early */
985 (void) sprintf(pid_s
, "%d", getpid());
987 action
.sa_handler
= sig_handler
;
988 sigemptyset(&action
.sa_mask
);
991 if (sigaction(SIGSEGV
, &action
, NULL
) < 0) {
992 ERR("raidz_test: cannot catch SIGSEGV: %s.\n", strerror(errno
));
996 (void) setvbuf(stdout
, NULL
, _IOLBF
, 0);
998 dprintf_setup(&argc
, argv
);
1000 process_options(argc
, argv
);
1002 kernel_init(SPA_MODE_READ
);
1004 /* setup random data because rand() is not reentrant */
1005 rand_data
= (int *)umem_alloc(SPA_MAXBLOCKSIZE
, UMEM_NOFAIL
);
1006 srand((unsigned)time(NULL
) * getpid());
1007 for (i
= 0; i
< SPA_MAXBLOCKSIZE
/ sizeof (int); i
++)
1008 rand_data
[i
] = rand();
1010 mprotect(rand_data
, SPA_MAXBLOCKSIZE
, PROT_READ
);
1012 if (rto_opts
.rto_benchmark
) {
1013 run_raidz_benchmark();
1014 } else if (rto_opts
.rto_sweep
) {
1017 err
= run_test(NULL
);
1020 umem_free(rand_data
, SPA_MAXBLOCKSIZE
);