4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2013, 2018 by Delphix. All rights reserved.
25 * Copyright (c) 2016, 2017 Intel Corporation.
26 * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>.
30 * Functions to convert between a list of vdevs and an nvlist representing the
31 * configuration. Each entry in the list can be one of:
34 * disk=(path=..., devid=...)
43 * While the underlying implementation supports it, group vdevs cannot contain
44 * other group vdevs. All userland verification of devices is contained within
45 * this file. If successful, the nvlist returned can be passed directly to the
46 * kernel; we've done as much verification as possible in userland.
48 * Hot spares are a special case, and passed down as an array of disk vdevs, at
49 * the same level as the root of the vdev tree.
51 * The only function exported by this file is 'make_root_vdev'. The
52 * function performs several passes:
54 * 1. Construct the vdev specification. Performs syntax validation and
55 * makes sure each device is valid.
56 * 2. Check for devices in use. Using libblkid to make sure that no
57 * devices are also in use. Some can be overridden using the 'force'
58 * flag, others cannot.
59 * 3. Check for replication errors if the 'force' flag is not specified.
60 * validates that the replication level is consistent across the
62 * 4. Call libzfs to label any whole disks with an EFI label.
70 #include <libnvpair.h>
77 #include "zpool_util.h"
78 #include <sys/zfs_context.h>
82 * For any given vdev specification, we can have multiple errors. The
83 * vdev_error() function keeps track of whether we have seen an error yet, and
84 * prints out a header if its the first error we've seen.
90 vdev_error(const char *fmt
, ...)
95 (void) fprintf(stderr
, gettext("invalid vdev specification\n"));
97 (void) fprintf(stderr
, gettext("use '-f' to override "
98 "the following errors:\n"));
100 (void) fprintf(stderr
, gettext("the following errors "
101 "must be manually repaired:\n"));
106 (void) vfprintf(stderr
, fmt
, ap
);
111 * Check that a file is valid. All we can do in this case is check that it's
112 * not in use by another pool, and not in use by swap.
115 check_file_generic(const char *file
, boolean_t force
, boolean_t isspare
)
123 if ((fd
= open(file
, O_RDONLY
)) < 0)
126 if (zpool_in_use(g_zfs
, fd
, &state
, &name
, &inuse
) == 0 && inuse
) {
130 case POOL_STATE_ACTIVE
:
131 desc
= gettext("active");
134 case POOL_STATE_EXPORTED
:
135 desc
= gettext("exported");
138 case POOL_STATE_POTENTIALLY_ACTIVE
:
139 desc
= gettext("potentially active");
143 desc
= gettext("unknown");
148 * Allow hot spares to be shared between pools.
150 if (state
== POOL_STATE_SPARE
&& isspare
) {
156 if (state
== POOL_STATE_ACTIVE
||
157 state
== POOL_STATE_SPARE
|| !force
) {
159 case POOL_STATE_SPARE
:
160 vdev_error(gettext("%s is reserved as a hot "
161 "spare for pool %s\n"), file
, name
);
164 vdev_error(gettext("%s is part of %s pool "
165 "'%s'\n"), file
, desc
, name
);
179 * This may be a shorthand device path or it could be total gibberish.
180 * Check to see if it is a known device available in zfs_vdev_paths.
181 * As part of this check, see if we've been given an entire disk
182 * (minus the slice number).
185 is_shorthand_path(const char *arg
, char *path
, size_t path_size
,
186 struct stat64
*statbuf
, boolean_t
*wholedisk
)
190 error
= zfs_resolve_shortname(arg
, path
, path_size
);
192 *wholedisk
= zfs_dev_is_whole_disk(path
);
193 if (*wholedisk
|| (stat64(path
, statbuf
) == 0))
197 strlcpy(path
, arg
, path_size
);
198 memset(statbuf
, 0, sizeof (*statbuf
));
199 *wholedisk
= B_FALSE
;
205 * Determine if the given path is a hot spare within the given configuration.
206 * If no configuration is given we rely solely on the label.
209 is_spare(nvlist_t
*config
, const char *path
)
215 uint64_t guid
, spareguid
;
221 if (zpool_is_draid_spare(path
))
224 if ((fd
= open(path
, O_RDONLY
|O_DIRECT
)) < 0)
227 if (zpool_in_use(g_zfs
, fd
, &state
, &name
, &inuse
) != 0 ||
229 state
!= POOL_STATE_SPARE
||
230 zpool_read_label(fd
, &label
, NULL
) != 0) {
238 if (config
== NULL
) {
243 verify(nvlist_lookup_uint64(label
, ZPOOL_CONFIG_GUID
, &guid
) == 0);
246 verify(nvlist_lookup_nvlist(config
, ZPOOL_CONFIG_VDEV_TREE
,
248 if (nvlist_lookup_nvlist_array(nvroot
, ZPOOL_CONFIG_SPARES
,
249 &spares
, &nspares
) == 0) {
250 for (i
= 0; i
< nspares
; i
++) {
251 verify(nvlist_lookup_uint64(spares
[i
],
252 ZPOOL_CONFIG_GUID
, &spareguid
) == 0);
253 if (spareguid
== guid
)
262 * Create a leaf vdev. Determine if this is a file or a device. If it's a
263 * device, fill in the device id to make a complete nvlist. Valid forms for a
266 * /dev/xxx Complete disk path
267 * /xxx Full path to file
268 * xxx Shorthand for <zfs_vdev_paths>/xxx
269 * draid* Virtual dRAID spare
272 make_leaf_vdev(nvlist_t
*props
, const char *arg
, boolean_t is_primary
)
274 char path
[MAXPATHLEN
];
275 struct stat64 statbuf
;
276 nvlist_t
*vdev
= NULL
;
277 const char *type
= NULL
;
278 boolean_t wholedisk
= B_FALSE
;
283 * Determine what type of vdev this is, and put the full path into
284 * 'path'. We detect whether this is a device of file afterwards by
285 * checking the st_mode of the file.
289 * Complete device or file path. Exact type is determined by
290 * examining the file descriptor afterwards. Symbolic links
291 * are resolved to their real paths to determine whole disk
292 * and S_ISBLK/S_ISREG type checks. However, we are careful
293 * to store the given path as ZPOOL_CONFIG_PATH to ensure we
294 * can leverage udev's persistent device labels.
296 if (realpath(arg
, path
) == NULL
) {
297 (void) fprintf(stderr
,
298 gettext("cannot resolve path '%s'\n"), arg
);
302 wholedisk
= zfs_dev_is_whole_disk(path
);
303 if (!wholedisk
&& (stat64(path
, &statbuf
) != 0)) {
304 (void) fprintf(stderr
,
305 gettext("cannot open '%s': %s\n"),
306 path
, strerror(errno
));
310 /* After whole disk check restore original passed path */
311 strlcpy(path
, arg
, sizeof (path
));
312 } else if (zpool_is_draid_spare(arg
)) {
314 (void) fprintf(stderr
,
315 gettext("cannot open '%s': dRAID spares can only "
316 "be used to replace primary vdevs\n"), arg
);
321 strlcpy(path
, arg
, sizeof (path
));
322 type
= VDEV_TYPE_DRAID_SPARE
;
324 err
= is_shorthand_path(arg
, path
, sizeof (path
),
325 &statbuf
, &wholedisk
);
328 * If we got ENOENT, then the user gave us
329 * gibberish, so try to direct them with a
330 * reasonable error message. Otherwise,
331 * regurgitate strerror() since it's the best we
335 (void) fprintf(stderr
,
336 gettext("cannot open '%s': no such "
337 "device in %s\n"), arg
, DISK_ROOT
);
338 (void) fprintf(stderr
,
339 gettext("must be a full path or "
340 "shorthand device name\n"));
343 (void) fprintf(stderr
,
344 gettext("cannot open '%s': %s\n"),
345 path
, strerror(errno
));
353 * Determine whether this is a device or a file.
355 if (wholedisk
|| S_ISBLK(statbuf
.st_mode
)) {
356 type
= VDEV_TYPE_DISK
;
357 } else if (S_ISREG(statbuf
.st_mode
)) {
358 type
= VDEV_TYPE_FILE
;
360 fprintf(stderr
, gettext("cannot use '%s': must "
361 "be a block device or regular file\n"), path
);
367 * Finally, we have the complete device or file, and we know that it is
368 * acceptable to use. Construct the nvlist to describe this vdev. All
369 * vdevs have a 'path' element, and devices also have a 'devid' element.
371 verify(nvlist_alloc(&vdev
, NV_UNIQUE_NAME
, 0) == 0);
372 verify(nvlist_add_string(vdev
, ZPOOL_CONFIG_PATH
, path
) == 0);
373 verify(nvlist_add_string(vdev
, ZPOOL_CONFIG_TYPE
, type
) == 0);
375 /* Lookup and add the enclosure sysfs path (if exists) */
376 update_vdev_config_dev_sysfs_path(vdev
, path
,
377 ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH
);
379 if (strcmp(type
, VDEV_TYPE_DISK
) == 0)
380 verify(nvlist_add_uint64(vdev
, ZPOOL_CONFIG_WHOLE_DISK
,
381 (uint64_t)wholedisk
) == 0);
384 * Override defaults if custom properties are provided.
387 const char *value
= NULL
;
389 if (nvlist_lookup_string(props
,
390 zpool_prop_to_name(ZPOOL_PROP_ASHIFT
), &value
) == 0) {
391 if (zfs_nicestrtonum(NULL
, value
, &ashift
) != 0) {
392 (void) fprintf(stderr
,
393 gettext("ashift must be a number.\n"));
397 (ashift
< ASHIFT_MIN
|| ashift
> ASHIFT_MAX
)) {
398 (void) fprintf(stderr
,
399 gettext("invalid 'ashift=%" PRIu64
"' "
400 "property: only values between %" PRId32
" "
401 "and %" PRId32
" are allowed.\n"),
402 ashift
, ASHIFT_MIN
, ASHIFT_MAX
);
409 * If the device is known to incorrectly report its physical sector
410 * size explicitly provide the known correct value.
415 if (check_sector_size_database(path
, §or_size
) == B_TRUE
)
416 ashift
= highbit64(sector_size
) - 1;
420 (void) nvlist_add_uint64(vdev
, ZPOOL_CONFIG_ASHIFT
, ashift
);
426 * Go through and verify the replication level of the pool is consistent.
427 * Performs the following checks:
429 * For the new spec, verifies that devices in mirrors and raidz are the
432 * If the current configuration already has inconsistent replication
433 * levels, ignore any other potential problems in the new spec.
435 * Otherwise, make sure that the current spec (if there is one) and the new
436 * spec have consistent replication levels.
438 * If there is no current spec (create), make sure new spec has at least
439 * one general purpose vdev.
441 typedef struct replication_level
{
442 const char *zprl_type
;
443 uint64_t zprl_children
;
444 uint64_t zprl_parity
;
445 } replication_level_t
;
447 #define ZPOOL_FUZZ (16 * 1024 * 1024)
450 * N.B. For the purposes of comparing replication levels dRAID can be
451 * considered functionally equivalent to raidz.
454 is_raidz_mirror(replication_level_t
*a
, replication_level_t
*b
,
455 replication_level_t
**raidz
, replication_level_t
**mirror
)
457 if ((strcmp(a
->zprl_type
, "raidz") == 0 ||
458 strcmp(a
->zprl_type
, "draid") == 0) &&
459 strcmp(b
->zprl_type
, "mirror") == 0) {
468 * Comparison for determining if dRAID and raidz where passed in either order.
471 is_raidz_draid(replication_level_t
*a
, replication_level_t
*b
)
473 if ((strcmp(a
->zprl_type
, "raidz") == 0 ||
474 strcmp(a
->zprl_type
, "draid") == 0) &&
475 (strcmp(b
->zprl_type
, "raidz") == 0 ||
476 strcmp(b
->zprl_type
, "draid") == 0)) {
484 * Given a list of toplevel vdevs, return the current replication level. If
485 * the config is inconsistent, then NULL is returned. If 'fatal' is set, then
486 * an error message will be displayed for each self-inconsistent vdev.
488 static replication_level_t
*
489 get_replication(nvlist_t
*nvroot
, boolean_t fatal
)
497 replication_level_t lastrep
= {0};
498 replication_level_t rep
;
499 replication_level_t
*ret
;
500 replication_level_t
*raidz
, *mirror
;
501 boolean_t dontreport
;
503 ret
= safe_malloc(sizeof (replication_level_t
));
505 verify(nvlist_lookup_nvlist_array(nvroot
, ZPOOL_CONFIG_CHILDREN
,
506 &top
, &toplevels
) == 0);
508 for (t
= 0; t
< toplevels
; t
++) {
509 uint64_t is_log
= B_FALSE
;
514 * For separate logs we ignore the top level vdev replication
517 (void) nvlist_lookup_uint64(nv
, ZPOOL_CONFIG_IS_LOG
, &is_log
);
522 * Ignore holes introduced by removing aux devices, along
523 * with indirect vdevs introduced by previously removed
526 verify(nvlist_lookup_string(nv
, ZPOOL_CONFIG_TYPE
, &type
) == 0);
527 if (strcmp(type
, VDEV_TYPE_HOLE
) == 0 ||
528 strcmp(type
, VDEV_TYPE_INDIRECT
) == 0)
531 if (nvlist_lookup_nvlist_array(nv
, ZPOOL_CONFIG_CHILDREN
,
532 &child
, &children
) != 0) {
534 * This is a 'file' or 'disk' vdev.
536 rep
.zprl_type
= type
;
537 rep
.zprl_children
= 1;
543 * This is a mirror or RAID-Z vdev. Go through and make
544 * sure the contents are all the same (files vs. disks),
545 * keeping track of the number of elements in the
548 * We also check that the size of each vdev (if it can
549 * be determined) is the same.
551 rep
.zprl_type
= type
;
552 rep
.zprl_children
= 0;
554 if (strcmp(type
, VDEV_TYPE_RAIDZ
) == 0 ||
555 strcmp(type
, VDEV_TYPE_DRAID
) == 0) {
556 verify(nvlist_lookup_uint64(nv
,
557 ZPOOL_CONFIG_NPARITY
,
558 &rep
.zprl_parity
) == 0);
559 assert(rep
.zprl_parity
!= 0);
565 * The 'dontreport' variable indicates that we've
566 * already reported an error for this spec, so don't
567 * bother doing it again.
572 for (c
= 0; c
< children
; c
++) {
573 nvlist_t
*cnv
= child
[c
];
575 struct stat64 statbuf
;
577 const char *childtype
;
582 verify(nvlist_lookup_string(cnv
,
583 ZPOOL_CONFIG_TYPE
, &childtype
) == 0);
586 * If this is a replacing or spare vdev, then
587 * get the real first child of the vdev: do this
588 * in a loop because replacing and spare vdevs
591 while (strcmp(childtype
,
592 VDEV_TYPE_REPLACING
) == 0 ||
593 strcmp(childtype
, VDEV_TYPE_SPARE
) == 0) {
597 verify(nvlist_lookup_nvlist_array(cnv
,
598 ZPOOL_CONFIG_CHILDREN
, &rchild
,
600 assert(rchildren
== 2);
603 verify(nvlist_lookup_string(cnv
,
608 verify(nvlist_lookup_string(cnv
,
609 ZPOOL_CONFIG_PATH
, &path
) == 0);
612 * If we have a raidz/mirror that combines disks
613 * with files, report it as an error.
615 if (!dontreport
&& type
!= NULL
&&
616 strcmp(type
, childtype
) != 0) {
622 "mismatched replication "
623 "level: %s contains both "
624 "files and devices\n"),
632 * According to stat(2), the value of 'st_size'
633 * is undefined for block devices and character
634 * devices. But there is no effective way to
635 * determine the real size in userland.
637 * Instead, we'll take advantage of an
638 * implementation detail of spec_size(). If the
639 * device is currently open, then we (should)
640 * return a valid size.
642 * If we still don't get a valid size (indicated
643 * by a size of 0 or MAXOFFSET_T), then ignore
644 * this device altogether.
646 if ((fd
= open(path
, O_RDONLY
)) >= 0) {
647 err
= fstat64_blk(fd
, &statbuf
);
650 err
= stat64(path
, &statbuf
);
654 statbuf
.st_size
== 0 ||
655 statbuf
.st_size
== MAXOFFSET_T
)
658 size
= statbuf
.st_size
;
661 * Also make sure that devices and
662 * slices have a consistent size. If
663 * they differ by a significant amount
664 * (~16MB) then report an error.
667 (vdev_size
!= -1LL &&
668 (llabs(size
- vdev_size
) >
675 "%s contains devices of "
676 "different sizes\n"),
689 * At this point, we have the replication of the last toplevel
690 * vdev in 'rep'. Compare it to 'lastrep' to see if it is
693 if (lastrep
.zprl_type
!= NULL
) {
694 if (is_raidz_mirror(&lastrep
, &rep
, &raidz
, &mirror
) ||
695 is_raidz_mirror(&rep
, &lastrep
, &raidz
, &mirror
)) {
697 * Accepted raidz and mirror when they can
698 * handle the same number of disk failures.
700 if (raidz
->zprl_parity
!=
701 mirror
->zprl_children
- 1) {
707 "mismatched replication "
709 "%s and %s vdevs with "
710 "different redundancy, "
711 "%llu vs. %llu (%llu-way) "
718 mirror
->zprl_children
- 1,
720 mirror
->zprl_children
);
724 } else if (is_raidz_draid(&lastrep
, &rep
)) {
726 * Accepted raidz and draid when they can
727 * handle the same number of disk failures.
729 if (lastrep
.zprl_parity
!= rep
.zprl_parity
) {
735 "mismatched replication "
736 "level: %s and %s vdevs "
738 "redundancy, %llu vs. "
739 "%llu are present\n"),
749 } else if (strcmp(lastrep
.zprl_type
, rep
.zprl_type
) !=
756 "mismatched replication level: "
757 "both %s and %s vdevs are "
759 lastrep
.zprl_type
, rep
.zprl_type
);
762 } else if (lastrep
.zprl_parity
!= rep
.zprl_parity
) {
768 "mismatched replication level: "
769 "both %llu and %llu device parity "
770 "%s vdevs are present\n"),
773 (u_longlong_t
)rep
.zprl_parity
,
777 } else if (lastrep
.zprl_children
!= rep
.zprl_children
) {
783 "mismatched replication level: "
784 "both %llu-way and %llu-way %s "
785 "vdevs are present\n"),
787 lastrep
.zprl_children
,
805 * Check the replication level of the vdev spec against the current pool. Calls
806 * get_replication() to make sure the new spec is self-consistent. If the pool
807 * has a consistent replication level, then we ignore any errors. Otherwise,
808 * report any difference between the two.
811 check_replication(nvlist_t
*config
, nvlist_t
*newroot
)
815 replication_level_t
*current
= NULL
, *new;
816 replication_level_t
*raidz
, *mirror
;
820 * If we have a current pool configuration, check to see if it's
821 * self-consistent. If not, simply return success.
823 if (config
!= NULL
) {
826 verify(nvlist_lookup_nvlist(config
, ZPOOL_CONFIG_VDEV_TREE
,
828 if ((current
= get_replication(nvroot
, B_FALSE
)) == NULL
)
832 * for spares there may be no children, and therefore no
833 * replication level to check
835 if ((nvlist_lookup_nvlist_array(newroot
, ZPOOL_CONFIG_CHILDREN
,
836 &child
, &children
) != 0) || (children
== 0)) {
842 * If all we have is logs then there's no replication level to check.
844 if (num_logs(newroot
) == children
) {
850 * Get the replication level of the new vdev spec, reporting any
851 * inconsistencies found.
853 if ((new = get_replication(newroot
, B_TRUE
)) == NULL
) {
859 * Check to see if the new vdev spec matches the replication level of
863 if (current
!= NULL
) {
864 if (is_raidz_mirror(current
, new, &raidz
, &mirror
) ||
865 is_raidz_mirror(new, current
, &raidz
, &mirror
)) {
866 if (raidz
->zprl_parity
!= mirror
->zprl_children
- 1) {
868 "mismatched replication level: pool and "
869 "new vdev with different redundancy, %s "
870 "and %s vdevs, %llu vs. %llu (%llu-way)\n"),
873 (u_longlong_t
)raidz
->zprl_parity
,
874 (u_longlong_t
)mirror
->zprl_children
- 1,
875 (u_longlong_t
)mirror
->zprl_children
);
878 } else if (strcmp(current
->zprl_type
, new->zprl_type
) != 0) {
880 "mismatched replication level: pool uses %s "
881 "and new vdev is %s\n"),
882 current
->zprl_type
, new->zprl_type
);
884 } else if (current
->zprl_parity
!= new->zprl_parity
) {
886 "mismatched replication level: pool uses %llu "
887 "device parity and new vdev uses %llu\n"),
888 (u_longlong_t
)current
->zprl_parity
,
889 (u_longlong_t
)new->zprl_parity
);
891 } else if (current
->zprl_children
!= new->zprl_children
) {
893 "mismatched replication level: pool uses %llu-way "
894 "%s and new vdev uses %llu-way %s\n"),
895 (u_longlong_t
)current
->zprl_children
,
897 (u_longlong_t
)new->zprl_children
,
911 zero_label(const char *path
)
913 const int size
= 4096;
917 if ((fd
= open(path
, O_WRONLY
|O_EXCL
)) < 0) {
918 (void) fprintf(stderr
, gettext("cannot open '%s': %s\n"),
919 path
, strerror(errno
));
923 memset(buf
, 0, size
);
924 err
= write(fd
, buf
, size
);
925 (void) fdatasync(fd
);
929 (void) fprintf(stderr
, gettext("cannot zero first %d bytes "
930 "of '%s': %s\n"), size
, path
, strerror(errno
));
935 (void) fprintf(stderr
, gettext("could only zero %d/%d bytes "
936 "of '%s'\n"), err
, size
, path
);
944 lines_to_stderr(char *lines
[], int lines_cnt
)
947 for (i
= 0; i
< lines_cnt
; i
++) {
948 fprintf(stderr
, "%s\n", lines
[i
]);
953 * Go through and find any whole disks in the vdev specification, labelling them
954 * as appropriate. When constructing the vdev spec, we were unable to open this
955 * device in order to provide a devid. Now that we have labelled the disk and
956 * know that slice 0 is valid, we can construct the devid now.
958 * If the disk was already labeled with an EFI label, we will have gotten the
959 * devid already (because we were able to open the whole disk). Otherwise, we
960 * need to get the devid after we label the disk.
963 make_disks(zpool_handle_t
*zhp
, nvlist_t
*nv
, boolean_t replacing
)
967 const char *type
, *path
;
968 char devpath
[MAXPATHLEN
];
969 char udevpath
[MAXPATHLEN
];
971 struct stat64 statbuf
;
972 int is_exclusive
= 0;
976 verify(nvlist_lookup_string(nv
, ZPOOL_CONFIG_TYPE
, &type
) == 0);
978 if (nvlist_lookup_nvlist_array(nv
, ZPOOL_CONFIG_CHILDREN
,
979 &child
, &children
) != 0) {
981 if (strcmp(type
, VDEV_TYPE_DISK
) != 0)
985 * We have a disk device. If this is a whole disk write
986 * out the efi partition table, otherwise write zero's to
987 * the first 4k of the partition. This is to ensure that
988 * libblkid will not misidentify the partition due to a
989 * magic value left by the previous filesystem.
991 verify(!nvlist_lookup_string(nv
, ZPOOL_CONFIG_PATH
, &path
));
992 verify(!nvlist_lookup_uint64(nv
, ZPOOL_CONFIG_WHOLE_DISK
,
997 * Update device id string for mpath nodes (Linux only)
999 if (is_mpath_whole_disk(path
))
1000 update_vdev_config_dev_strs(nv
);
1002 if (!is_spare(NULL
, path
))
1003 (void) zero_label(path
);
1007 if (realpath(path
, devpath
) == NULL
) {
1009 (void) fprintf(stderr
,
1010 gettext("cannot resolve path '%s'\n"), path
);
1015 * Remove any previously existing symlink from a udev path to
1016 * the device before labeling the disk. This ensures that
1017 * only newly created links are used. Otherwise there is a
1018 * window between when udev deletes and recreates the link
1019 * during which access attempts will fail with ENOENT.
1021 strlcpy(udevpath
, path
, MAXPATHLEN
);
1022 (void) zfs_append_partition(udevpath
, MAXPATHLEN
);
1024 fd
= open(devpath
, O_RDWR
|O_EXCL
);
1037 * If the partition exists, contains a valid spare label,
1038 * and is opened exclusively there is no need to partition
1039 * it. Hot spares have already been partitioned and are
1040 * held open exclusively by the kernel as a safety measure.
1042 * If the provided path is for a /dev/disk/ device its
1043 * symbolic link will be removed, partition table created,
1044 * and then block until udev creates the new link.
1046 if (!is_exclusive
&& !is_spare(NULL
, udevpath
)) {
1047 char *devnode
= strrchr(devpath
, '/') + 1;
1048 char **lines
= NULL
;
1051 ret
= strncmp(udevpath
, UDISK_ROOT
, strlen(UDISK_ROOT
));
1053 ret
= lstat64(udevpath
, &statbuf
);
1054 if (ret
== 0 && S_ISLNK(statbuf
.st_mode
))
1055 (void) unlink(udevpath
);
1059 * When labeling a pool the raw device node name
1060 * is provided as it appears under /dev/.
1062 * Note that 'zhp' will be NULL when we're creating a
1065 if (zpool_prepare_and_label_disk(g_zfs
, zhp
, devnode
,
1066 nv
, zhp
== NULL
? "create" :
1067 replacing
? "replace" : "add", &lines
,
1069 (void) fprintf(stderr
,
1071 "Error preparing/labeling disk.\n"));
1072 if (lines_cnt
> 0) {
1073 (void) fprintf(stderr
,
1074 gettext("zfs_prepare_disk output:\n"));
1075 lines_to_stderr(lines
, lines_cnt
);
1078 libzfs_free_str_array(lines
, lines_cnt
);
1081 libzfs_free_str_array(lines
, lines_cnt
);
1084 * Wait for udev to signal the device is available
1085 * by the provided path.
1087 ret
= zpool_label_disk_wait(udevpath
, DISK_LABEL_WAIT
);
1089 (void) fprintf(stderr
,
1090 gettext("missing link: %s was "
1091 "partitioned but %s is missing\n"),
1096 ret
= zero_label(udevpath
);
1102 * Update the path to refer to the partition. The presence of
1103 * the 'whole_disk' field indicates to the CLI that we should
1104 * chop off the partition number when displaying the device in
1107 verify(nvlist_add_string(nv
, ZPOOL_CONFIG_PATH
, udevpath
) == 0);
1110 * Update device id strings for whole disks (Linux only)
1112 update_vdev_config_dev_strs(nv
);
1117 for (c
= 0; c
< children
; c
++)
1118 if ((ret
= make_disks(zhp
, child
[c
], replacing
)) != 0)
1121 if (nvlist_lookup_nvlist_array(nv
, ZPOOL_CONFIG_SPARES
,
1122 &child
, &children
) == 0)
1123 for (c
= 0; c
< children
; c
++)
1124 if ((ret
= make_disks(zhp
, child
[c
], replacing
)) != 0)
1127 if (nvlist_lookup_nvlist_array(nv
, ZPOOL_CONFIG_L2CACHE
,
1128 &child
, &children
) == 0)
1129 for (c
= 0; c
< children
; c
++)
1130 if ((ret
= make_disks(zhp
, child
[c
], replacing
)) != 0)
1137 * Go through and find any devices that are in use. We rely on libdiskmgt for
1138 * the majority of this task.
1141 is_device_in_use(nvlist_t
*config
, nvlist_t
*nv
, boolean_t force
,
1142 boolean_t replacing
, boolean_t isspare
)
1146 const char *type
, *path
;
1148 char buf
[MAXPATHLEN
];
1149 uint64_t wholedisk
= B_FALSE
;
1150 boolean_t anyinuse
= B_FALSE
;
1152 verify(nvlist_lookup_string(nv
, ZPOOL_CONFIG_TYPE
, &type
) == 0);
1154 if (nvlist_lookup_nvlist_array(nv
, ZPOOL_CONFIG_CHILDREN
,
1155 &child
, &children
) != 0) {
1157 verify(!nvlist_lookup_string(nv
, ZPOOL_CONFIG_PATH
, &path
));
1158 if (strcmp(type
, VDEV_TYPE_DISK
) == 0)
1159 verify(!nvlist_lookup_uint64(nv
,
1160 ZPOOL_CONFIG_WHOLE_DISK
, &wholedisk
));
1163 * As a generic check, we look to see if this is a replace of a
1164 * hot spare within the same pool. If so, we allow it
1165 * regardless of what libblkid or zpool_in_use() says.
1168 (void) strlcpy(buf
, path
, sizeof (buf
));
1170 ret
= zfs_append_partition(buf
, sizeof (buf
));
1175 if (is_spare(config
, buf
))
1179 if (strcmp(type
, VDEV_TYPE_DISK
) == 0)
1180 ret
= check_device(path
, force
, isspare
, wholedisk
);
1182 else if (strcmp(type
, VDEV_TYPE_FILE
) == 0)
1183 ret
= check_file(path
, force
, isspare
);
1188 for (c
= 0; c
< children
; c
++)
1189 if (is_device_in_use(config
, child
[c
], force
, replacing
,
1193 if (nvlist_lookup_nvlist_array(nv
, ZPOOL_CONFIG_SPARES
,
1194 &child
, &children
) == 0)
1195 for (c
= 0; c
< children
; c
++)
1196 if (is_device_in_use(config
, child
[c
], force
, replacing
,
1200 if (nvlist_lookup_nvlist_array(nv
, ZPOOL_CONFIG_L2CACHE
,
1201 &child
, &children
) == 0)
1202 for (c
= 0; c
< children
; c
++)
1203 if (is_device_in_use(config
, child
[c
], force
, replacing
,
1211 * Returns the parity level extracted from a raidz or draid type.
1212 * If the parity cannot be determined zero is returned.
1215 get_parity(const char *type
)
1220 if (strncmp(type
, VDEV_TYPE_RAIDZ
, strlen(VDEV_TYPE_RAIDZ
)) == 0) {
1221 p
= type
+ strlen(VDEV_TYPE_RAIDZ
);
1224 /* when unspecified default to single parity */
1226 } else if (*p
== '0') {
1227 /* no zero prefixes allowed */
1230 /* 0-3, no suffixes allowed */
1233 parity
= strtol(p
, &end
, 10);
1234 if (errno
!= 0 || *end
!= '\0' ||
1235 parity
< 1 || parity
> VDEV_RAIDZ_MAXPARITY
) {
1239 } else if (strncmp(type
, VDEV_TYPE_DRAID
,
1240 strlen(VDEV_TYPE_DRAID
)) == 0) {
1241 p
= type
+ strlen(VDEV_TYPE_DRAID
);
1243 if (*p
== '\0' || *p
== ':') {
1244 /* when unspecified default to single parity */
1246 } else if (*p
== '0') {
1247 /* no zero prefixes allowed */
1250 /* 0-3, allowed suffixes: '\0' or ':' */
1253 parity
= strtol(p
, &end
, 10);
1255 parity
< 1 || parity
> VDEV_DRAID_MAXPARITY
||
1256 (*end
!= '\0' && *end
!= ':')) {
1262 return ((int)parity
);
1266 * Assign the minimum and maximum number of devices allowed for
1267 * the specified type. On error NULL is returned, otherwise the
1268 * type prefix is returned (raidz, mirror, etc).
1271 is_grouping(const char *type
, int *mindev
, int *maxdev
)
1275 if (strncmp(type
, VDEV_TYPE_RAIDZ
, strlen(VDEV_TYPE_RAIDZ
)) == 0 ||
1276 strncmp(type
, VDEV_TYPE_DRAID
, strlen(VDEV_TYPE_DRAID
)) == 0) {
1277 nparity
= get_parity(type
);
1281 *mindev
= nparity
+ 1;
1285 if (strncmp(type
, VDEV_TYPE_RAIDZ
,
1286 strlen(VDEV_TYPE_RAIDZ
)) == 0) {
1287 return (VDEV_TYPE_RAIDZ
);
1289 return (VDEV_TYPE_DRAID
);
1296 if (strcmp(type
, "mirror") == 0) {
1299 return (VDEV_TYPE_MIRROR
);
1302 if (strcmp(type
, "spare") == 0) {
1305 return (VDEV_TYPE_SPARE
);
1308 if (strcmp(type
, "log") == 0) {
1311 return (VDEV_TYPE_LOG
);
1314 if (strcmp(type
, VDEV_ALLOC_BIAS_SPECIAL
) == 0 ||
1315 strcmp(type
, VDEV_ALLOC_BIAS_DEDUP
) == 0) {
1321 if (strcmp(type
, "cache") == 0) {
1324 return (VDEV_TYPE_L2CACHE
);
1331 * Extract the configuration parameters encoded in the dRAID type and
1332 * use them to generate a dRAID configuration. The expected format is:
1334 * draid[<parity>][:<data><d|D>][:<children><c|C>][:<spares><s|S>]
1336 * The intent is to be able to generate a good configuration when no
1337 * additional information is provided. The only mandatory component
1338 * of the 'type' is the 'draid' prefix. If a value is not provided
1339 * then reasonable defaults are used. The optional components may
1340 * appear in any order but the d/s/c suffix is required.
1343 * - data: number of data devices per group (1-255)
1344 * - parity: number of parity blocks per group (1-3)
1345 * - spares: number of distributed spare (0-100)
1346 * - children: total number of devices (1-255)
1349 * - zpool create tank draid <devices...>
1350 * - zpool create tank draid2:8d:51c:2s <devices...>
1353 draid_config_by_type(nvlist_t
*nv
, const char *type
, uint64_t children
)
1355 uint64_t nparity
= 1;
1356 uint64_t nspares
= 0;
1357 uint64_t ndata
= UINT64_MAX
;
1358 uint64_t ngroups
= 1;
1361 if (strncmp(type
, VDEV_TYPE_DRAID
, strlen(VDEV_TYPE_DRAID
)) != 0)
1364 nparity
= (uint64_t)get_parity(type
);
1365 if (nparity
== 0 || nparity
> VDEV_DRAID_MAXPARITY
) {
1367 gettext("invalid dRAID parity level %llu; must be "
1368 "between 1 and %d\n"), (u_longlong_t
)nparity
,
1369 VDEV_DRAID_MAXPARITY
);
1373 char *p
= (char *)type
;
1374 while ((p
= strchr(p
, ':')) != NULL
) {
1380 if (!isdigit(p
[0])) {
1381 (void) fprintf(stderr
, gettext("invalid dRAID "
1382 "syntax; expected [:<number><c|d|s>] not '%s'\n"),
1387 /* Expected non-zero value with c/d/s suffix */
1388 value
= strtol(p
, &end
, 10);
1389 char suffix
= tolower(*end
);
1391 (suffix
!= 'c' && suffix
!= 'd' && suffix
!= 's')) {
1392 (void) fprintf(stderr
, gettext("invalid dRAID "
1393 "syntax; expected [:<number><c|d|s>] not '%s'\n"),
1398 if (suffix
== 'c') {
1399 if ((uint64_t)value
!= children
) {
1401 gettext("invalid number of dRAID children; "
1402 "%llu required but %llu provided\n"),
1403 (u_longlong_t
)value
,
1404 (u_longlong_t
)children
);
1407 } else if (suffix
== 'd') {
1408 ndata
= (uint64_t)value
;
1409 } else if (suffix
== 's') {
1410 nspares
= (uint64_t)value
;
1412 verify(0); /* Unreachable */
1417 * When a specific number of data disks is not provided limit a
1418 * redundancy group to 8 data disks. This value was selected to
1419 * provide a reasonable tradeoff between capacity and performance.
1421 if (ndata
== UINT64_MAX
) {
1422 if (children
> nspares
+ nparity
) {
1423 ndata
= MIN(children
- nspares
- nparity
, 8);
1425 fprintf(stderr
, gettext("request number of "
1426 "distributed spares %llu and parity level %llu\n"
1427 "leaves no disks available for data\n"),
1428 (u_longlong_t
)nspares
, (u_longlong_t
)nparity
);
1433 /* Verify the maximum allowed group size is never exceeded. */
1434 if (ndata
== 0 || (ndata
+ nparity
> children
- nspares
)) {
1435 fprintf(stderr
, gettext("requested number of dRAID data "
1436 "disks per group %llu is too high,\nat most %llu disks "
1437 "are available for data\n"), (u_longlong_t
)ndata
,
1438 (u_longlong_t
)(children
- nspares
- nparity
));
1443 * Verify the requested number of spares can be satisfied.
1444 * An arbitrary limit of 100 distributed spares is applied.
1446 if (nspares
> 100 || nspares
> (children
- (ndata
+ nparity
))) {
1448 gettext("invalid number of dRAID spares %llu; additional "
1449 "disks would be required\n"), (u_longlong_t
)nspares
);
1453 /* Verify the requested number children is sufficient. */
1454 if (children
< (ndata
+ nparity
+ nspares
)) {
1455 fprintf(stderr
, gettext("%llu disks were provided, but at "
1456 "least %llu disks are required for this config\n"),
1457 (u_longlong_t
)children
,
1458 (u_longlong_t
)(ndata
+ nparity
+ nspares
));
1461 if (children
> VDEV_DRAID_MAX_CHILDREN
) {
1462 fprintf(stderr
, gettext("%llu disks were provided, but "
1463 "dRAID only supports up to %u disks"),
1464 (u_longlong_t
)children
, VDEV_DRAID_MAX_CHILDREN
);
1468 * Calculate the minimum number of groups required to fill a slice.
1469 * This is the LCM of the stripe width (ndata + nparity) and the
1470 * number of data drives (children - nspares).
1472 while (ngroups
* (ndata
+ nparity
) % (children
- nspares
) != 0)
1475 /* Store the basic dRAID configuration. */
1476 fnvlist_add_uint64(nv
, ZPOOL_CONFIG_NPARITY
, nparity
);
1477 fnvlist_add_uint64(nv
, ZPOOL_CONFIG_DRAID_NDATA
, ndata
);
1478 fnvlist_add_uint64(nv
, ZPOOL_CONFIG_DRAID_NSPARES
, nspares
);
1479 fnvlist_add_uint64(nv
, ZPOOL_CONFIG_DRAID_NGROUPS
, ngroups
);
1485 * Construct a syntactically valid vdev specification,
1486 * and ensure that all devices and files exist and can be opened.
1487 * Note: we don't bother freeing anything in the error paths
1488 * because the program is just going to exit anyway.
1491 construct_spec(nvlist_t
*props
, int argc
, char **argv
)
1493 nvlist_t
*nvroot
, *nv
, **top
, **spares
, **l2cache
;
1494 int t
, toplevels
, mindev
, maxdev
, nspares
, nlogs
, nl2cache
;
1495 const char *type
, *fulltype
;
1496 boolean_t is_log
, is_special
, is_dedup
, is_spare
;
1497 boolean_t seen_logs
;
1506 is_log
= is_special
= is_dedup
= is_spare
= B_FALSE
;
1507 seen_logs
= B_FALSE
;
1515 * If it's a mirror, raidz, or draid the subsequent arguments
1516 * are its leaves -- until we encounter the next mirror,
1519 if ((type
= is_grouping(fulltype
, &mindev
, &maxdev
)) != NULL
) {
1520 nvlist_t
**child
= NULL
;
1521 int c
, children
= 0;
1523 if (strcmp(type
, VDEV_TYPE_SPARE
) == 0) {
1524 if (spares
!= NULL
) {
1525 (void) fprintf(stderr
,
1526 gettext("invalid vdev "
1527 "specification: 'spare' can be "
1528 "specified only once\n"));
1532 is_log
= is_special
= is_dedup
= B_FALSE
;
1535 if (strcmp(type
, VDEV_TYPE_LOG
) == 0) {
1537 (void) fprintf(stderr
,
1538 gettext("invalid vdev "
1539 "specification: 'log' can be "
1540 "specified only once\n"));
1545 is_special
= is_dedup
= is_spare
= B_FALSE
;
1549 * A log is not a real grouping device.
1550 * We just set is_log and continue.
1555 if (strcmp(type
, VDEV_ALLOC_BIAS_SPECIAL
) == 0) {
1556 is_special
= B_TRUE
;
1557 is_log
= is_dedup
= is_spare
= B_FALSE
;
1563 if (strcmp(type
, VDEV_ALLOC_BIAS_DEDUP
) == 0) {
1565 is_log
= is_special
= is_spare
= B_FALSE
;
1571 if (strcmp(type
, VDEV_TYPE_L2CACHE
) == 0) {
1572 if (l2cache
!= NULL
) {
1573 (void) fprintf(stderr
,
1574 gettext("invalid vdev "
1575 "specification: 'cache' can be "
1576 "specified only once\n"));
1579 is_log
= is_special
= B_FALSE
;
1580 is_dedup
= is_spare
= B_FALSE
;
1583 if (is_log
|| is_special
|| is_dedup
) {
1584 if (strcmp(type
, VDEV_TYPE_MIRROR
) != 0) {
1585 (void) fprintf(stderr
,
1586 gettext("invalid vdev "
1587 "specification: unsupported '%s' "
1588 "device: %s\n"), is_log
? "log" :
1595 for (c
= 1; c
< argc
; c
++) {
1596 if (is_grouping(argv
[c
], NULL
, NULL
) != NULL
)
1600 child
= realloc(child
,
1601 children
* sizeof (nvlist_t
*));
1604 if ((nv
= make_leaf_vdev(props
, argv
[c
],
1605 !(is_log
|| is_special
|| is_dedup
||
1606 is_spare
))) == NULL
) {
1607 for (c
= 0; c
< children
- 1; c
++)
1608 nvlist_free(child
[c
]);
1613 child
[children
- 1] = nv
;
1616 if (children
< mindev
) {
1617 (void) fprintf(stderr
, gettext("invalid vdev "
1618 "specification: %s requires at least %d "
1619 "devices\n"), argv
[0], mindev
);
1620 for (c
= 0; c
< children
; c
++)
1621 nvlist_free(child
[c
]);
1626 if (children
> maxdev
) {
1627 (void) fprintf(stderr
, gettext("invalid vdev "
1628 "specification: %s supports no more than "
1629 "%d devices\n"), argv
[0], maxdev
);
1630 for (c
= 0; c
< children
; c
++)
1631 nvlist_free(child
[c
]);
1639 if (strcmp(type
, VDEV_TYPE_SPARE
) == 0) {
1643 } else if (strcmp(type
, VDEV_TYPE_L2CACHE
) == 0) {
1645 nl2cache
= children
;
1648 /* create a top-level vdev with children */
1649 verify(nvlist_alloc(&nv
, NV_UNIQUE_NAME
,
1651 verify(nvlist_add_string(nv
, ZPOOL_CONFIG_TYPE
,
1653 verify(nvlist_add_uint64(nv
,
1654 ZPOOL_CONFIG_IS_LOG
, is_log
) == 0);
1656 verify(nvlist_add_string(nv
,
1657 ZPOOL_CONFIG_ALLOCATION_BIAS
,
1658 VDEV_ALLOC_BIAS_LOG
) == 0);
1661 verify(nvlist_add_string(nv
,
1662 ZPOOL_CONFIG_ALLOCATION_BIAS
,
1663 VDEV_ALLOC_BIAS_SPECIAL
) == 0);
1666 verify(nvlist_add_string(nv
,
1667 ZPOOL_CONFIG_ALLOCATION_BIAS
,
1668 VDEV_ALLOC_BIAS_DEDUP
) == 0);
1670 if (strcmp(type
, VDEV_TYPE_RAIDZ
) == 0) {
1671 verify(nvlist_add_uint64(nv
,
1672 ZPOOL_CONFIG_NPARITY
,
1675 if (strcmp(type
, VDEV_TYPE_DRAID
) == 0) {
1676 if (draid_config_by_type(nv
,
1677 fulltype
, children
) != 0) {
1678 for (c
= 0; c
< children
; c
++)
1679 nvlist_free(child
[c
]);
1684 verify(nvlist_add_nvlist_array(nv
,
1685 ZPOOL_CONFIG_CHILDREN
,
1686 (const nvlist_t
**)child
, children
) == 0);
1688 for (c
= 0; c
< children
; c
++)
1689 nvlist_free(child
[c
]);
1694 * We have a device. Pass off to make_leaf_vdev() to
1695 * construct the appropriate nvlist describing the vdev.
1697 if ((nv
= make_leaf_vdev(props
, argv
[0], !(is_log
||
1698 is_special
|| is_dedup
|| is_spare
))) == NULL
)
1701 verify(nvlist_add_uint64(nv
,
1702 ZPOOL_CONFIG_IS_LOG
, is_log
) == 0);
1704 verify(nvlist_add_string(nv
,
1705 ZPOOL_CONFIG_ALLOCATION_BIAS
,
1706 VDEV_ALLOC_BIAS_LOG
) == 0);
1711 verify(nvlist_add_string(nv
,
1712 ZPOOL_CONFIG_ALLOCATION_BIAS
,
1713 VDEV_ALLOC_BIAS_SPECIAL
) == 0);
1716 verify(nvlist_add_string(nv
,
1717 ZPOOL_CONFIG_ALLOCATION_BIAS
,
1718 VDEV_ALLOC_BIAS_DEDUP
) == 0);
1725 top
= realloc(top
, toplevels
* sizeof (nvlist_t
*));
1728 top
[toplevels
- 1] = nv
;
1731 if (toplevels
== 0 && nspares
== 0 && nl2cache
== 0) {
1732 (void) fprintf(stderr
, gettext("invalid vdev "
1733 "specification: at least one toplevel vdev must be "
1738 if (seen_logs
&& nlogs
== 0) {
1739 (void) fprintf(stderr
, gettext("invalid vdev specification: "
1740 "log requires at least 1 device\n"));
1745 * Finally, create nvroot and add all top-level vdevs to it.
1747 verify(nvlist_alloc(&nvroot
, NV_UNIQUE_NAME
, 0) == 0);
1748 verify(nvlist_add_string(nvroot
, ZPOOL_CONFIG_TYPE
,
1749 VDEV_TYPE_ROOT
) == 0);
1750 verify(nvlist_add_nvlist_array(nvroot
, ZPOOL_CONFIG_CHILDREN
,
1751 (const nvlist_t
**)top
, toplevels
) == 0);
1753 verify(nvlist_add_nvlist_array(nvroot
, ZPOOL_CONFIG_SPARES
,
1754 (const nvlist_t
**)spares
, nspares
) == 0);
1756 verify(nvlist_add_nvlist_array(nvroot
, ZPOOL_CONFIG_L2CACHE
,
1757 (const nvlist_t
**)l2cache
, nl2cache
) == 0);
1760 for (t
= 0; t
< toplevels
; t
++)
1761 nvlist_free(top
[t
]);
1762 for (t
= 0; t
< nspares
; t
++)
1763 nvlist_free(spares
[t
]);
1764 for (t
= 0; t
< nl2cache
; t
++)
1765 nvlist_free(l2cache
[t
]);
1775 split_mirror_vdev(zpool_handle_t
*zhp
, char *newname
, nvlist_t
*props
,
1776 splitflags_t flags
, int argc
, char **argv
)
1778 nvlist_t
*newroot
= NULL
, **child
;
1782 if ((newroot
= construct_spec(props
, argc
, argv
)) == NULL
) {
1783 (void) fprintf(stderr
, gettext("Unable to build a "
1784 "pool from the specified devices\n"));
1788 if (!flags
.dryrun
&& make_disks(zhp
, newroot
, B_FALSE
) != 0) {
1789 nvlist_free(newroot
);
1793 /* avoid any tricks in the spec */
1794 verify(nvlist_lookup_nvlist_array(newroot
,
1795 ZPOOL_CONFIG_CHILDREN
, &child
, &children
) == 0);
1796 for (c
= 0; c
< children
; c
++) {
1801 verify(nvlist_lookup_string(child
[c
],
1802 ZPOOL_CONFIG_PATH
, &path
) == 0);
1803 if ((type
= is_grouping(path
, &min
, &max
)) != NULL
) {
1804 (void) fprintf(stderr
, gettext("Cannot use "
1805 "'%s' as a device for splitting\n"), type
);
1806 nvlist_free(newroot
);
1812 if (zpool_vdev_split(zhp
, newname
, &newroot
, props
, flags
) != 0) {
1813 nvlist_free(newroot
);
1821 num_normal_vdevs(nvlist_t
*nvroot
)
1824 uint_t t
, toplevels
, normal
= 0;
1826 verify(nvlist_lookup_nvlist_array(nvroot
, ZPOOL_CONFIG_CHILDREN
,
1827 &top
, &toplevels
) == 0);
1829 for (t
= 0; t
< toplevels
; t
++) {
1830 uint64_t log
= B_FALSE
;
1832 (void) nvlist_lookup_uint64(top
[t
], ZPOOL_CONFIG_IS_LOG
, &log
);
1835 if (nvlist_exists(top
[t
], ZPOOL_CONFIG_ALLOCATION_BIAS
))
1845 * Get and validate the contents of the given vdev specification. This ensures
1846 * that the nvlist returned is well-formed, that all the devices exist, and that
1847 * they are not currently in use by any other known consumer. The 'poolconfig'
1848 * parameter is the current configuration of the pool when adding devices
1849 * existing pool, and is used to perform additional checks, such as changing the
1850 * replication level of the pool. It can be 'NULL' to indicate that this is a
1851 * new pool. The 'force' flag controls whether devices should be forcefully
1852 * added, even if they appear in use.
1855 make_root_vdev(zpool_handle_t
*zhp
, nvlist_t
*props
, int force
, int check_rep
,
1856 boolean_t replacing
, boolean_t dryrun
, int argc
, char **argv
)
1859 nvlist_t
*poolconfig
= NULL
;
1863 * Construct the vdev specification. If this is successful, we know
1864 * that we have a valid specification, and that all devices can be
1867 if ((newroot
= construct_spec(props
, argc
, argv
)) == NULL
)
1870 if (zhp
&& ((poolconfig
= zpool_get_config(zhp
, NULL
)) == NULL
)) {
1871 nvlist_free(newroot
);
1876 * Validate each device to make sure that it's not shared with another
1877 * subsystem. We do this even if 'force' is set, because there are some
1878 * uses (such as a dedicated dump device) that even '-f' cannot
1881 if (is_device_in_use(poolconfig
, newroot
, force
, replacing
, B_FALSE
)) {
1882 nvlist_free(newroot
);
1887 * Check the replication level of the given vdevs and report any errors
1888 * found. We include the existing pool spec, if any, as we need to
1889 * catch changes against the existing replication level.
1891 if (check_rep
&& check_replication(poolconfig
, newroot
) != 0) {
1892 nvlist_free(newroot
);
1897 * On pool create the new vdev spec must have one normal vdev.
1899 if (poolconfig
== NULL
&& num_normal_vdevs(newroot
) == 0) {
1900 vdev_error(gettext("at least one general top-level vdev must "
1902 nvlist_free(newroot
);
1907 * Run through the vdev specification and label any whole disks found.
1909 if (!dryrun
&& make_disks(zhp
, newroot
, replacing
) != 0) {
1910 nvlist_free(newroot
);