4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
24 * Copyright (c) 2013 Steven Hartland. All rights reserved.
25 * Copyright 2017 RackTop Systems.
26 * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
27 * Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
28 * Copyright (c) 2019 Datto Inc.
32 * LibZFS_Core (lzc) is intended to replace most functionality in libzfs.
33 * It has the following characteristics:
35 * - Thread Safe. libzfs_core is accessible concurrently from multiple
36 * threads. This is accomplished primarily by avoiding global data
37 * (e.g. caching). Since it's thread-safe, there is no reason for a
38 * process to have multiple libzfs "instances". Therefore, we store
39 * our few pieces of data (e.g. the file descriptor) in global
40 * variables. The fd is reference-counted so that the libzfs_core
41 * library can be "initialized" multiple times (e.g. by different
42 * consumers within the same process).
44 * - Committed Interface. The libzfs_core interface will be committed,
45 * therefore consumers can compile against it and be confident that
46 * their code will continue to work on future releases of this code.
47 * Currently, the interface is Evolving (not Committed), but we intend
48 * to commit to it once it is more complete and we determine that it
49 * meets the needs of all consumers.
51 * - Programmatic Error Handling. libzfs_core communicates errors with
52 * defined error numbers, and doesn't print anything to stdout/stderr.
54 * - Thin Layer. libzfs_core is a thin layer, marshaling arguments
55 * to/from the kernel ioctls. There is generally a 1:1 correspondence
56 * between libzfs_core functions and ioctls to ZFS_DEV.
58 * - Clear Atomicity. Because libzfs_core functions are generally 1:1
59 * with kernel ioctls, and kernel ioctls are general atomic, each
60 * libzfs_core function is atomic. For example, creating multiple
61 * snapshots with a single call to lzc_snapshot() is atomic -- it
62 * can't fail with only some of the requested snapshots created, even
63 * in the event of power loss or system crash.
65 * - Continued libzfs Support. Some higher-level operations (e.g.
66 * support for "zfs send -R") are too complicated to fit the scope of
67 * libzfs_core. This functionality will continue to live in libzfs.
68 * Where appropriate, libzfs will use the underlying atomic operations
69 * of libzfs_core. For example, libzfs may implement "zfs send -R |
70 * zfs receive" by using individual "send one snapshot", rename,
71 * destroy, and "receive one snapshot" operations in libzfs_core.
72 * /sbin/zfs and /sbin/zpool will link with both libzfs and
73 * libzfs_core. Other consumers should aim to use only libzfs_core,
74 * since that will be the supported, stable interface going forwards.
77 #include <libzfs_core.h>
89 #include <sys/nvpair.h>
90 #include <sys/param.h>
91 #include <sys/types.h>
93 #include <sys/zfs_ioctl.h>
95 #define BIG_PIPE_SIZE (64 * 1024) /* From sys/pipe.h */
99 static pthread_mutex_t g_lock
= PTHREAD_MUTEX_INITIALIZER
;
100 static int g_refcount
;
103 static zfs_ioc_t fail_ioc_cmd
= ZFS_IOC_LAST
;
104 static zfs_errno_t fail_ioc_err
;
107 libzfs_core_debug_ioc(void)
110 * To test running newer user space binaries with kernel's
111 * that don't yet support an ioctl or a new ioctl arg we
112 * provide an override to intentionally fail an ioctl.
115 * The override variable, ZFS_IOC_TEST, is of the form "cmd:err"
117 * For example, to fail a ZFS_IOC_POOL_CHECKPOINT with a
118 * ZFS_ERR_IOC_CMD_UNAVAIL, the string would be "0x5a4d:1029"
120 * $ sudo sh -c "ZFS_IOC_TEST=0x5a4d:1029 zpool checkpoint tank"
121 * cannot checkpoint 'tank': the loaded zfs module does not support
122 * this operation. A reboot may be required to enable this operation.
124 if (fail_ioc_cmd
== ZFS_IOC_LAST
) {
125 char *ioc_test
= getenv("ZFS_IOC_TEST");
126 unsigned int ioc_num
= 0, ioc_err
= 0;
128 if (ioc_test
!= NULL
&&
129 sscanf(ioc_test
, "%i:%i", &ioc_num
, &ioc_err
) == 2 &&
130 ioc_num
< ZFS_IOC_LAST
) {
131 fail_ioc_cmd
= ioc_num
;
132 fail_ioc_err
= ioc_err
;
139 libzfs_core_init(void)
141 (void) pthread_mutex_lock(&g_lock
);
142 if (g_refcount
== 0) {
143 g_fd
= open(ZFS_DEV
, O_RDWR
|O_CLOEXEC
);
145 (void) pthread_mutex_unlock(&g_lock
);
152 libzfs_core_debug_ioc();
154 (void) pthread_mutex_unlock(&g_lock
);
159 libzfs_core_fini(void)
161 (void) pthread_mutex_lock(&g_lock
);
162 ASSERT3S(g_refcount
, >, 0);
166 if (g_refcount
== 0 && g_fd
!= -1) {
170 (void) pthread_mutex_unlock(&g_lock
);
174 lzc_ioctl(zfs_ioc_t ioc
, const char *name
,
175 nvlist_t
*source
, nvlist_t
**resultp
)
177 zfs_cmd_t zc
= {"\0"};
182 ASSERT3S(g_refcount
, >, 0);
183 VERIFY3S(g_fd
, !=, -1);
186 if (ioc
== fail_ioc_cmd
)
187 return (fail_ioc_err
);
191 (void) strlcpy(zc
.zc_name
, name
, sizeof (zc
.zc_name
));
193 if (source
!= NULL
) {
194 packed
= fnvlist_pack(source
, &size
);
195 zc
.zc_nvlist_src
= (uint64_t)(uintptr_t)packed
;
196 zc
.zc_nvlist_src_size
= size
;
199 if (resultp
!= NULL
) {
201 if (ioc
== ZFS_IOC_CHANNEL_PROGRAM
) {
202 zc
.zc_nvlist_dst_size
= fnvlist_lookup_uint64(source
,
205 zc
.zc_nvlist_dst_size
= MAX(size
* 2, 128 * 1024);
207 zc
.zc_nvlist_dst
= (uint64_t)(uintptr_t)
208 malloc(zc
.zc_nvlist_dst_size
);
209 if (zc
.zc_nvlist_dst
== (uint64_t)0) {
215 while (lzc_ioctl_fd(g_fd
, ioc
, &zc
) != 0) {
217 * If ioctl exited with ENOMEM, we retry the ioctl after
218 * increasing the size of the destination nvlist.
220 * Channel programs that exit with ENOMEM ran over the
221 * lua memory sandbox; they should not be retried.
223 if (errno
== ENOMEM
&& resultp
!= NULL
&&
224 ioc
!= ZFS_IOC_CHANNEL_PROGRAM
) {
225 free((void *)(uintptr_t)zc
.zc_nvlist_dst
);
226 zc
.zc_nvlist_dst_size
*= 2;
227 zc
.zc_nvlist_dst
= (uint64_t)(uintptr_t)
228 malloc(zc
.zc_nvlist_dst_size
);
229 if (zc
.zc_nvlist_dst
== (uint64_t)0) {
238 if (zc
.zc_nvlist_dst_filled
&& resultp
!= NULL
) {
239 *resultp
= fnvlist_unpack((void *)(uintptr_t)zc
.zc_nvlist_dst
,
240 zc
.zc_nvlist_dst_size
);
245 fnvlist_pack_free(packed
, size
);
246 free((void *)(uintptr_t)zc
.zc_nvlist_dst
);
251 lzc_scrub(zfs_ioc_t ioc
, const char *name
,
252 nvlist_t
*source
, nvlist_t
**resultp
)
254 return (lzc_ioctl(ioc
, name
, source
, resultp
));
258 lzc_create(const char *fsname
, enum lzc_dataset_type type
, nvlist_t
*props
,
259 uint8_t *wkeydata
, uint_t wkeylen
)
262 nvlist_t
*hidden_args
= NULL
;
263 nvlist_t
*args
= fnvlist_alloc();
265 fnvlist_add_int32(args
, "type", (dmu_objset_type_t
)type
);
267 fnvlist_add_nvlist(args
, "props", props
);
269 if (wkeydata
!= NULL
) {
270 hidden_args
= fnvlist_alloc();
271 fnvlist_add_uint8_array(hidden_args
, "wkeydata", wkeydata
,
273 fnvlist_add_nvlist(args
, ZPOOL_HIDDEN_ARGS
, hidden_args
);
276 error
= lzc_ioctl(ZFS_IOC_CREATE
, fsname
, args
, NULL
);
277 nvlist_free(hidden_args
);
283 lzc_clone(const char *fsname
, const char *origin
, nvlist_t
*props
)
286 nvlist_t
*hidden_args
= NULL
;
287 nvlist_t
*args
= fnvlist_alloc();
289 fnvlist_add_string(args
, "origin", origin
);
291 fnvlist_add_nvlist(args
, "props", props
);
292 error
= lzc_ioctl(ZFS_IOC_CLONE
, fsname
, args
, NULL
);
293 nvlist_free(hidden_args
);
299 lzc_promote(const char *fsname
, char *snapnamebuf
, int snapnamelen
)
302 * The promote ioctl is still legacy, so we need to construct our
303 * own zfs_cmd_t rather than using lzc_ioctl().
305 zfs_cmd_t zc
= {"\0"};
307 ASSERT3S(g_refcount
, >, 0);
308 VERIFY3S(g_fd
, !=, -1);
310 (void) strlcpy(zc
.zc_name
, fsname
, sizeof (zc
.zc_name
));
311 if (lzc_ioctl_fd(g_fd
, ZFS_IOC_PROMOTE
, &zc
) != 0) {
313 if (error
== EEXIST
&& snapnamebuf
!= NULL
)
314 (void) strlcpy(snapnamebuf
, zc
.zc_string
, snapnamelen
);
321 lzc_rename(const char *source
, const char *target
)
323 zfs_cmd_t zc
= {"\0"};
326 ASSERT3S(g_refcount
, >, 0);
327 VERIFY3S(g_fd
, !=, -1);
328 (void) strlcpy(zc
.zc_name
, source
, sizeof (zc
.zc_name
));
329 (void) strlcpy(zc
.zc_value
, target
, sizeof (zc
.zc_value
));
330 error
= lzc_ioctl_fd(g_fd
, ZFS_IOC_RENAME
, &zc
);
337 lzc_destroy(const char *fsname
)
340 nvlist_t
*args
= fnvlist_alloc();
341 error
= lzc_ioctl(ZFS_IOC_DESTROY
, fsname
, args
, NULL
);
349 * The keys in the snaps nvlist are the snapshots to be created.
350 * They must all be in the same pool.
352 * The props nvlist is properties to set. Currently only user properties
353 * are supported. { user:prop_name -> string value }
355 * The returned results nvlist will have an entry for each snapshot that failed.
356 * The value will be the (int32) error code.
358 * The return value will be 0 if all snapshots were created, otherwise it will
359 * be the errno of a (unspecified) snapshot that failed.
362 lzc_snapshot(nvlist_t
*snaps
, nvlist_t
*props
, nvlist_t
**errlist
)
367 char pool
[ZFS_MAX_DATASET_NAME_LEN
];
371 /* determine the pool name */
372 elem
= nvlist_next_nvpair(snaps
, NULL
);
375 (void) strlcpy(pool
, nvpair_name(elem
), sizeof (pool
));
376 pool
[strcspn(pool
, "/@")] = '\0';
378 args
= fnvlist_alloc();
379 fnvlist_add_nvlist(args
, "snaps", snaps
);
381 fnvlist_add_nvlist(args
, "props", props
);
383 error
= lzc_ioctl(ZFS_IOC_SNAPSHOT
, pool
, args
, errlist
);
390 * Destroys snapshots.
392 * The keys in the snaps nvlist are the snapshots to be destroyed.
393 * They must all be in the same pool.
395 * Snapshots that do not exist will be silently ignored.
397 * If 'defer' is not set, and a snapshot has user holds or clones, the
398 * destroy operation will fail and none of the snapshots will be
401 * If 'defer' is set, and a snapshot has user holds or clones, it will be
402 * marked for deferred destruction, and will be destroyed when the last hold
403 * or clone is removed/destroyed.
405 * The return value will be 0 if all snapshots were destroyed (or marked for
406 * later destruction if 'defer' is set) or didn't exist to begin with.
408 * Otherwise the return value will be the errno of a (unspecified) snapshot
409 * that failed, no snapshots will be destroyed, and the errlist will have an
410 * entry for each snapshot that failed. The value in the errlist will be
411 * the (int32) error code.
414 lzc_destroy_snaps(nvlist_t
*snaps
, boolean_t defer
, nvlist_t
**errlist
)
419 char pool
[ZFS_MAX_DATASET_NAME_LEN
];
421 /* determine the pool name */
422 elem
= nvlist_next_nvpair(snaps
, NULL
);
425 (void) strlcpy(pool
, nvpair_name(elem
), sizeof (pool
));
426 pool
[strcspn(pool
, "/@")] = '\0';
428 args
= fnvlist_alloc();
429 fnvlist_add_nvlist(args
, "snaps", snaps
);
431 fnvlist_add_boolean(args
, "defer");
433 error
= lzc_ioctl(ZFS_IOC_DESTROY_SNAPS
, pool
, args
, errlist
);
440 lzc_snaprange_space(const char *firstsnap
, const char *lastsnap
,
446 char fs
[ZFS_MAX_DATASET_NAME_LEN
];
449 /* determine the fs name */
450 (void) strlcpy(fs
, firstsnap
, sizeof (fs
));
451 atp
= strchr(fs
, '@');
456 args
= fnvlist_alloc();
457 fnvlist_add_string(args
, "firstsnap", firstsnap
);
459 err
= lzc_ioctl(ZFS_IOC_SPACE_SNAPS
, lastsnap
, args
, &result
);
462 *usedp
= fnvlist_lookup_uint64(result
, "used");
463 fnvlist_free(result
);
469 lzc_exists(const char *dataset
)
472 * The objset_stats ioctl is still legacy, so we need to construct our
473 * own zfs_cmd_t rather than using lzc_ioctl().
475 zfs_cmd_t zc
= {"\0"};
477 ASSERT3S(g_refcount
, >, 0);
478 VERIFY3S(g_fd
, !=, -1);
480 (void) strlcpy(zc
.zc_name
, dataset
, sizeof (zc
.zc_name
));
481 return (lzc_ioctl_fd(g_fd
, ZFS_IOC_OBJSET_STATS
, &zc
) == 0);
486 * It was added to preserve the function signature in case it is
487 * needed in the future.
490 lzc_sync(const char *pool_name
, nvlist_t
*innvl
, nvlist_t
**outnvl
)
493 return (lzc_ioctl(ZFS_IOC_POOL_SYNC
, pool_name
, innvl
, NULL
));
497 * Create "user holds" on snapshots. If there is a hold on a snapshot,
498 * the snapshot can not be destroyed. (However, it can be marked for deletion
499 * by lzc_destroy_snaps(defer=B_TRUE).)
501 * The keys in the nvlist are snapshot names.
502 * The snapshots must all be in the same pool.
503 * The value is the name of the hold (string type).
505 * If cleanup_fd is not -1, it must be the result of open(ZFS_DEV, O_EXCL).
506 * In this case, when the cleanup_fd is closed (including on process
507 * termination), the holds will be released. If the system is shut down
508 * uncleanly, the holds will be released when the pool is next opened
511 * Holds for snapshots which don't exist will be skipped and have an entry
512 * added to errlist, but will not cause an overall failure.
514 * The return value will be 0 if all holds, for snapshots that existed,
515 * were successfully created.
517 * Otherwise the return value will be the errno of a (unspecified) hold that
518 * failed and no holds will be created.
520 * In all cases the errlist will have an entry for each hold that failed
521 * (name = snapshot), with its value being the error code (int32).
524 lzc_hold(nvlist_t
*holds
, int cleanup_fd
, nvlist_t
**errlist
)
526 char pool
[ZFS_MAX_DATASET_NAME_LEN
];
531 /* determine the pool name */
532 elem
= nvlist_next_nvpair(holds
, NULL
);
535 (void) strlcpy(pool
, nvpair_name(elem
), sizeof (pool
));
536 pool
[strcspn(pool
, "/@")] = '\0';
538 args
= fnvlist_alloc();
539 fnvlist_add_nvlist(args
, "holds", holds
);
540 if (cleanup_fd
!= -1)
541 fnvlist_add_int32(args
, "cleanup_fd", cleanup_fd
);
543 error
= lzc_ioctl(ZFS_IOC_HOLD
, pool
, args
, errlist
);
549 * Release "user holds" on snapshots. If the snapshot has been marked for
550 * deferred destroy (by lzc_destroy_snaps(defer=B_TRUE)), it does not have
551 * any clones, and all the user holds are removed, then the snapshot will be
554 * The keys in the nvlist are snapshot names.
555 * The snapshots must all be in the same pool.
556 * The value is an nvlist whose keys are the holds to remove.
558 * Holds which failed to release because they didn't exist will have an entry
559 * added to errlist, but will not cause an overall failure.
561 * The return value will be 0 if the nvl holds was empty or all holds that
562 * existed, were successfully removed.
564 * Otherwise the return value will be the errno of a (unspecified) hold that
565 * failed to release and no holds will be released.
567 * In all cases the errlist will have an entry for each hold that failed to
571 lzc_release(nvlist_t
*holds
, nvlist_t
**errlist
)
573 char pool
[ZFS_MAX_DATASET_NAME_LEN
];
576 /* determine the pool name */
577 elem
= nvlist_next_nvpair(holds
, NULL
);
580 (void) strlcpy(pool
, nvpair_name(elem
), sizeof (pool
));
581 pool
[strcspn(pool
, "/@")] = '\0';
583 return (lzc_ioctl(ZFS_IOC_RELEASE
, pool
, holds
, errlist
));
587 * Retrieve list of user holds on the specified snapshot.
589 * On success, *holdsp will be set to an nvlist which the caller must free.
590 * The keys are the names of the holds, and the value is the creation time
591 * of the hold (uint64) in seconds since the epoch.
594 lzc_get_holds(const char *snapname
, nvlist_t
**holdsp
)
596 return (lzc_ioctl(ZFS_IOC_GET_HOLDS
, snapname
, NULL
, holdsp
));
600 lzc_get_props(const char *poolname
, nvlist_t
**props
)
602 return (lzc_ioctl(ZFS_IOC_POOL_GET_PROPS
, poolname
, NULL
, props
));
606 max_pipe_buffer(int infd
)
609 static unsigned int max
;
611 max
= 1048576; /* fs/pipe.c default */
613 FILE *procf
= fopen("/proc/sys/fs/pipe-max-size", "re");
615 if (fscanf(procf
, "%u", &max
) <= 0) {
616 /* ignore error: max untouched if parse fails */
622 unsigned int cur
= fcntl(infd
, F_GETPIPE_SZ
);
624 * Sadly, Linux has an unfixed deadlock if you do SETPIPE_SZ on a pipe
626 * cf. #13232, https://bugzilla.kernel.org/show_bug.cgi?id=212295
628 * And since the problem is in waking up the writer, there's nothing
629 * we can do about it from here.
631 * So if people want to, they can set this, but they
634 if (getenv("ZFS_SET_PIPE_MAX") == NULL
)
636 if (cur
< max
&& fcntl(infd
, F_SETPIPE_SZ
, max
) != -1)
640 /* FreeBSD automatically resizes */
642 return (BIG_PIPE_SIZE
);
647 struct send_worker_ctx
{
648 int from
; /* read end of pipe, with send data; closed on exit */
649 int to
; /* original arbitrary output fd; mustn't be a pipe */
653 send_worker(void *arg
)
655 struct send_worker_ctx
*ctx
= arg
;
656 unsigned int bufsiz
= max_pipe_buffer(ctx
->from
);
660 rd
= splice(ctx
->from
, NULL
, ctx
->to
, NULL
, bufsiz
,
661 SPLICE_F_MOVE
| SPLICE_F_MORE
);
662 if ((rd
== -1 && errno
!= EINTR
) || rd
== 0)
665 int err
= (rd
== -1) ? errno
: 0;
667 return ((void *)(uintptr_t)err
);
672 * Since Linux 5.10, 4d03e3cc59828c82ee89ea6e27a2f3cdf95aaadf
673 * ("fs: don't allow kernel reads and writes without iter ops"),
674 * ZFS_IOC_SEND* will EINVAL when writing to /dev/null, /dev/zero, &c.
676 * This wrapper transparently executes func() with a pipe
677 * by spawning a thread to copy from that pipe to the original output
680 * Returns the error from func(), if nonzero,
681 * otherwise the error from the thread.
683 * No-op if orig_fd is -1, already a pipe (but the buffer size is bumped),
684 * and on not-Linux; as such, it is safe to wrap/call wrapped functions
685 * in a wrapped context.
688 lzc_send_wrapper(int (*func
)(int, void *), int orig_fd
, void *data
)
692 if (orig_fd
!= -1 && fstat(orig_fd
, &sb
) == -1)
694 if (orig_fd
== -1 || S_ISFIFO(sb
.st_mode
)) {
696 (void) max_pipe_buffer(orig_fd
);
697 return (func(orig_fd
, data
));
699 if ((fcntl(orig_fd
, F_GETFL
) & O_ACCMODE
) == O_RDONLY
)
700 return (errno
= EBADF
);
703 if (pipe2(rw
, O_CLOEXEC
) == -1)
707 pthread_t send_thread
;
708 struct send_worker_ctx ctx
= {.from
= rw
[0], .to
= orig_fd
};
709 if ((err
= pthread_create(&send_thread
, NULL
, send_worker
, &ctx
))
713 return (errno
= err
);
716 err
= func(rw
[1], data
);
720 pthread_join(send_thread
, &send_err
);
721 if (err
== 0 && send_err
!= 0)
722 errno
= err
= (uintptr_t)send_err
;
726 return (func(orig_fd
, data
));
731 * Generate a zfs send stream for the specified snapshot and write it to
732 * the specified file descriptor.
734 * "snapname" is the full name of the snapshot to send (e.g. "pool/fs@snap")
736 * If "from" is NULL, a full (non-incremental) stream will be sent.
737 * If "from" is non-NULL, it must be the full name of a snapshot or
738 * bookmark to send an incremental from (e.g. "pool/fs@earlier_snap" or
739 * "pool/fs#earlier_bmark"). If non-NULL, the specified snapshot or
740 * bookmark must represent an earlier point in the history of "snapname").
741 * It can be an earlier snapshot in the same filesystem or zvol as "snapname",
742 * or it can be the origin of "snapname"'s filesystem, or an earlier
743 * snapshot in the origin, etc.
745 * "fd" is the file descriptor to write the send stream to.
747 * If "flags" contains LZC_SEND_FLAG_LARGE_BLOCK, the stream is permitted
748 * to contain DRR_WRITE records with drr_length > 128K, and DRR_OBJECT
749 * records with drr_blksz > 128K.
751 * If "flags" contains LZC_SEND_FLAG_EMBED_DATA, the stream is permitted
752 * to contain DRR_WRITE_EMBEDDED records with drr_etype==BP_EMBEDDED_TYPE_DATA,
753 * which the receiving system must support (as indicated by support
754 * for the "embedded_data" feature).
756 * If "flags" contains LZC_SEND_FLAG_COMPRESS, the stream is generated by using
757 * compressed WRITE records for blocks which are compressed on disk and in
758 * memory. If the lz4_compress feature is active on the sending system, then
759 * the receiving system must have that feature enabled as well.
761 * If "flags" contains LZC_SEND_FLAG_RAW, the stream is generated, for encrypted
762 * datasets, by sending data exactly as it exists on disk. This allows backups
763 * to be taken even if encryption keys are not currently loaded.
766 lzc_send(const char *snapname
, const char *from
, int fd
,
767 enum lzc_send_flags flags
)
769 return (lzc_send_resume_redacted(snapname
, from
, fd
, flags
, 0, 0,
774 lzc_send_redacted(const char *snapname
, const char *from
, int fd
,
775 enum lzc_send_flags flags
, const char *redactbook
)
777 return (lzc_send_resume_redacted(snapname
, from
, fd
, flags
, 0, 0,
782 lzc_send_resume(const char *snapname
, const char *from
, int fd
,
783 enum lzc_send_flags flags
, uint64_t resumeobj
, uint64_t resumeoff
)
785 return (lzc_send_resume_redacted(snapname
, from
, fd
, flags
, resumeobj
,
790 * snapname: The name of the "tosnap", or the snapshot whose contents we are
792 * from: The name of the "fromsnap", or the incremental source.
793 * fd: File descriptor to write the stream to.
794 * flags: flags that determine features to be used by the stream.
795 * resumeobj: Object to resume from, for resuming send
796 * resumeoff: Offset to resume from, for resuming send.
797 * redactnv: nvlist of string -> boolean(ignored) containing the names of all
798 * the snapshots that we should redact with respect to.
799 * redactbook: Name of the redaction bookmark to create.
804 lzc_send_resume_redacted_cb_impl(const char *snapname
, const char *from
, int fd
,
805 enum lzc_send_flags flags
, uint64_t resumeobj
, uint64_t resumeoff
,
806 const char *redactbook
)
811 args
= fnvlist_alloc();
812 fnvlist_add_int32(args
, "fd", fd
);
814 fnvlist_add_string(args
, "fromsnap", from
);
815 if (flags
& LZC_SEND_FLAG_LARGE_BLOCK
)
816 fnvlist_add_boolean(args
, "largeblockok");
817 if (flags
& LZC_SEND_FLAG_EMBED_DATA
)
818 fnvlist_add_boolean(args
, "embedok");
819 if (flags
& LZC_SEND_FLAG_COMPRESS
)
820 fnvlist_add_boolean(args
, "compressok");
821 if (flags
& LZC_SEND_FLAG_RAW
)
822 fnvlist_add_boolean(args
, "rawok");
823 if (flags
& LZC_SEND_FLAG_SAVED
)
824 fnvlist_add_boolean(args
, "savedok");
825 if (resumeobj
!= 0 || resumeoff
!= 0) {
826 fnvlist_add_uint64(args
, "resume_object", resumeobj
);
827 fnvlist_add_uint64(args
, "resume_offset", resumeoff
);
829 if (redactbook
!= NULL
)
830 fnvlist_add_string(args
, "redactbook", redactbook
);
832 err
= lzc_ioctl(ZFS_IOC_SEND_NEW
, snapname
, args
, NULL
);
837 struct lzc_send_resume_redacted
{
838 const char *snapname
;
840 enum lzc_send_flags flags
;
843 const char *redactbook
;
847 lzc_send_resume_redacted_cb(int fd
, void *arg
)
849 struct lzc_send_resume_redacted
*zsrr
= arg
;
850 return (lzc_send_resume_redacted_cb_impl(zsrr
->snapname
, zsrr
->from
,
851 fd
, zsrr
->flags
, zsrr
->resumeobj
, zsrr
->resumeoff
,
856 lzc_send_resume_redacted(const char *snapname
, const char *from
, int fd
,
857 enum lzc_send_flags flags
, uint64_t resumeobj
, uint64_t resumeoff
,
858 const char *redactbook
)
860 struct lzc_send_resume_redacted zsrr
= {
861 .snapname
= snapname
,
864 .resumeobj
= resumeobj
,
865 .resumeoff
= resumeoff
,
866 .redactbook
= redactbook
,
868 return (lzc_send_wrapper(lzc_send_resume_redacted_cb
, fd
, &zsrr
));
872 * "from" can be NULL, a snapshot, or a bookmark.
874 * If from is NULL, a full (non-incremental) stream will be estimated. This
875 * is calculated very efficiently.
877 * If from is a snapshot, lzc_send_space uses the deadlists attached to
878 * each snapshot to efficiently estimate the stream size.
880 * If from is a bookmark, the indirect blocks in the destination snapshot
881 * are traversed, looking for blocks with a birth time since the creation TXG of
882 * the snapshot this bookmark was created from. This will result in
883 * significantly more I/O and be less efficient than a send space estimation on
884 * an equivalent snapshot. This process is also used if redact_snaps is
890 lzc_send_space_resume_redacted_cb_impl(const char *snapname
, const char *from
,
891 enum lzc_send_flags flags
, uint64_t resumeobj
, uint64_t resumeoff
,
892 uint64_t resume_bytes
, const char *redactbook
, int fd
, uint64_t *spacep
)
898 args
= fnvlist_alloc();
900 fnvlist_add_string(args
, "from", from
);
901 if (flags
& LZC_SEND_FLAG_LARGE_BLOCK
)
902 fnvlist_add_boolean(args
, "largeblockok");
903 if (flags
& LZC_SEND_FLAG_EMBED_DATA
)
904 fnvlist_add_boolean(args
, "embedok");
905 if (flags
& LZC_SEND_FLAG_COMPRESS
)
906 fnvlist_add_boolean(args
, "compressok");
907 if (flags
& LZC_SEND_FLAG_RAW
)
908 fnvlist_add_boolean(args
, "rawok");
909 if (resumeobj
!= 0 || resumeoff
!= 0) {
910 fnvlist_add_uint64(args
, "resume_object", resumeobj
);
911 fnvlist_add_uint64(args
, "resume_offset", resumeoff
);
912 fnvlist_add_uint64(args
, "bytes", resume_bytes
);
914 if (redactbook
!= NULL
)
915 fnvlist_add_string(args
, "redactbook", redactbook
);
917 fnvlist_add_int32(args
, "fd", fd
);
919 err
= lzc_ioctl(ZFS_IOC_SEND_SPACE
, snapname
, args
, &result
);
922 *spacep
= fnvlist_lookup_uint64(result
, "space");
927 struct lzc_send_space_resume_redacted
{
928 const char *snapname
;
930 enum lzc_send_flags flags
;
933 uint64_t resume_bytes
;
934 const char *redactbook
;
939 lzc_send_space_resume_redacted_cb(int fd
, void *arg
)
941 struct lzc_send_space_resume_redacted
*zssrr
= arg
;
942 return (lzc_send_space_resume_redacted_cb_impl(zssrr
->snapname
,
943 zssrr
->from
, zssrr
->flags
, zssrr
->resumeobj
, zssrr
->resumeoff
,
944 zssrr
->resume_bytes
, zssrr
->redactbook
, fd
, zssrr
->spacep
));
948 lzc_send_space_resume_redacted(const char *snapname
, const char *from
,
949 enum lzc_send_flags flags
, uint64_t resumeobj
, uint64_t resumeoff
,
950 uint64_t resume_bytes
, const char *redactbook
, int fd
, uint64_t *spacep
)
952 struct lzc_send_space_resume_redacted zssrr
= {
953 .snapname
= snapname
,
956 .resumeobj
= resumeobj
,
957 .resumeoff
= resumeoff
,
958 .resume_bytes
= resume_bytes
,
959 .redactbook
= redactbook
,
962 return (lzc_send_wrapper(lzc_send_space_resume_redacted_cb
,
967 lzc_send_space(const char *snapname
, const char *from
,
968 enum lzc_send_flags flags
, uint64_t *spacep
)
970 return (lzc_send_space_resume_redacted(snapname
, from
, flags
, 0, 0, 0,
975 recv_read(int fd
, void *buf
, int ilen
)
982 rv
= read(fd
, cp
, len
);
987 if (rv
< 0 || len
!= 0)
994 * Linux adds ZFS_IOC_RECV_NEW for resumable and raw streams and preserves the
995 * legacy ZFS_IOC_RECV user/kernel interface. The new interface supports all
996 * stream options but is currently only used for resumable streams. This way
997 * updated user space utilities will interoperate with older kernel modules.
999 * Non-Linux OpenZFS platforms have opted to modify the legacy interface.
1002 recv_impl(const char *snapname
, nvlist_t
*recvdprops
, nvlist_t
*localprops
,
1003 uint8_t *wkeydata
, uint_t wkeylen
, const char *origin
, boolean_t force
,
1004 boolean_t heal
, boolean_t resumable
, boolean_t raw
, int input_fd
,
1005 const dmu_replay_record_t
*begin_record
, uint64_t *read_bytes
,
1006 uint64_t *errflags
, nvlist_t
**errors
)
1008 dmu_replay_record_t drr
;
1009 char fsname
[MAXPATHLEN
];
1012 boolean_t payload
= B_FALSE
;
1014 ASSERT3S(g_refcount
, >, 0);
1015 VERIFY3S(g_fd
, !=, -1);
1017 /* Set 'fsname' to the name of containing filesystem */
1018 (void) strlcpy(fsname
, snapname
, sizeof (fsname
));
1019 atp
= strchr(fsname
, '@');
1024 /* If the fs does not exist, try its parent. */
1025 if (!lzc_exists(fsname
)) {
1026 char *slashp
= strrchr(fsname
, '/');
1033 * It is not uncommon for gigabytes to be processed by zfs receive.
1034 * Speculatively increase the buffer size if supported by the platform.
1037 if (fstat(input_fd
, &sb
) == -1)
1039 if (S_ISFIFO(sb
.st_mode
))
1040 (void) max_pipe_buffer(input_fd
);
1043 * The begin_record is normally a non-byteswapped BEGIN record.
1044 * For resumable streams it may be set to any non-byteswapped
1045 * dmu_replay_record_t.
1047 if (begin_record
== NULL
) {
1048 error
= recv_read(input_fd
, &drr
, sizeof (drr
));
1052 drr
= *begin_record
;
1053 payload
= (begin_record
->drr_payloadlen
!= 0);
1057 * All receives with a payload should use the new interface.
1059 if (resumable
|| heal
|| raw
|| wkeydata
!= NULL
|| payload
) {
1060 nvlist_t
*outnvl
= NULL
;
1061 nvlist_t
*innvl
= fnvlist_alloc();
1063 fnvlist_add_string(innvl
, "snapname", snapname
);
1065 if (recvdprops
!= NULL
)
1066 fnvlist_add_nvlist(innvl
, "props", recvdprops
);
1068 if (localprops
!= NULL
)
1069 fnvlist_add_nvlist(innvl
, "localprops", localprops
);
1071 if (wkeydata
!= NULL
) {
1073 * wkeydata must be placed in the special
1074 * ZPOOL_HIDDEN_ARGS nvlist so that it
1075 * will not be printed to the zpool history.
1077 nvlist_t
*hidden_args
= fnvlist_alloc();
1078 fnvlist_add_uint8_array(hidden_args
, "wkeydata",
1080 fnvlist_add_nvlist(innvl
, ZPOOL_HIDDEN_ARGS
,
1082 nvlist_free(hidden_args
);
1085 if (origin
!= NULL
&& strlen(origin
))
1086 fnvlist_add_string(innvl
, "origin", origin
);
1088 fnvlist_add_byte_array(innvl
, "begin_record",
1089 (uchar_t
*)&drr
, sizeof (drr
));
1091 fnvlist_add_int32(innvl
, "input_fd", input_fd
);
1094 fnvlist_add_boolean(innvl
, "force");
1097 fnvlist_add_boolean(innvl
, "resumable");
1100 fnvlist_add_boolean(innvl
, "heal");
1102 error
= lzc_ioctl(ZFS_IOC_RECV_NEW
, fsname
, innvl
, &outnvl
);
1104 if (error
== 0 && read_bytes
!= NULL
)
1105 error
= nvlist_lookup_uint64(outnvl
, "read_bytes",
1108 if (error
== 0 && errflags
!= NULL
)
1109 error
= nvlist_lookup_uint64(outnvl
, "error_flags",
1112 if (error
== 0 && errors
!= NULL
) {
1114 error
= nvlist_lookup_nvlist(outnvl
, "errors", &nvl
);
1116 *errors
= fnvlist_dup(nvl
);
1119 fnvlist_free(innvl
);
1120 fnvlist_free(outnvl
);
1122 zfs_cmd_t zc
= {"\0"};
1123 char *rp_packed
= NULL
;
1124 char *lp_packed
= NULL
;
1127 ASSERT3S(g_refcount
, >, 0);
1129 (void) strlcpy(zc
.zc_name
, fsname
, sizeof (zc
.zc_name
));
1130 (void) strlcpy(zc
.zc_value
, snapname
, sizeof (zc
.zc_value
));
1132 if (recvdprops
!= NULL
) {
1133 rp_packed
= fnvlist_pack(recvdprops
, &size
);
1134 zc
.zc_nvlist_src
= (uint64_t)(uintptr_t)rp_packed
;
1135 zc
.zc_nvlist_src_size
= size
;
1138 if (localprops
!= NULL
) {
1139 lp_packed
= fnvlist_pack(localprops
, &size
);
1140 zc
.zc_nvlist_conf
= (uint64_t)(uintptr_t)lp_packed
;
1141 zc
.zc_nvlist_conf_size
= size
;
1145 (void) strlcpy(zc
.zc_string
, origin
,
1146 sizeof (zc
.zc_string
));
1148 ASSERT3S(drr
.drr_type
, ==, DRR_BEGIN
);
1149 zc
.zc_begin_record
= drr
.drr_u
.drr_begin
;
1151 zc
.zc_cookie
= input_fd
;
1152 zc
.zc_cleanup_fd
= -1;
1153 zc
.zc_action_handle
= 0;
1155 zc
.zc_nvlist_dst_size
= 128 * 1024;
1156 zc
.zc_nvlist_dst
= (uint64_t)(uintptr_t)
1157 malloc(zc
.zc_nvlist_dst_size
);
1159 error
= lzc_ioctl_fd(g_fd
, ZFS_IOC_RECV
, &zc
);
1163 if (read_bytes
!= NULL
)
1164 *read_bytes
= zc
.zc_cookie
;
1166 if (errflags
!= NULL
)
1167 *errflags
= zc
.zc_obj
;
1170 VERIFY0(nvlist_unpack(
1171 (void *)(uintptr_t)zc
.zc_nvlist_dst
,
1172 zc
.zc_nvlist_dst_size
, errors
, KM_SLEEP
));
1175 if (rp_packed
!= NULL
)
1176 fnvlist_pack_free(rp_packed
, size
);
1177 if (lp_packed
!= NULL
)
1178 fnvlist_pack_free(lp_packed
, size
);
1179 free((void *)(uintptr_t)zc
.zc_nvlist_dst
);
1186 * The simplest receive case: receive from the specified fd, creating the
1187 * specified snapshot. Apply the specified properties as "received" properties
1188 * (which can be overridden by locally-set properties). If the stream is a
1189 * clone, its origin snapshot must be specified by 'origin'. The 'force'
1190 * flag will cause the target filesystem to be rolled back or destroyed if
1191 * necessary to receive.
1193 * Return 0 on success or an errno on failure.
1195 * Note: this interface does not work on dedup'd streams
1196 * (those with DMU_BACKUP_FEATURE_DEDUP).
1199 lzc_receive(const char *snapname
, nvlist_t
*props
, const char *origin
,
1200 boolean_t force
, boolean_t raw
, int fd
)
1202 return (recv_impl(snapname
, props
, NULL
, NULL
, 0, origin
, force
,
1203 B_FALSE
, B_FALSE
, raw
, fd
, NULL
, NULL
, NULL
, NULL
));
1207 * Like lzc_receive, but if the receive fails due to premature stream
1208 * termination, the intermediate state will be preserved on disk. In this
1209 * case, ECKSUM will be returned. The receive may subsequently be resumed
1210 * with a resuming send stream generated by lzc_send_resume().
1213 lzc_receive_resumable(const char *snapname
, nvlist_t
*props
, const char *origin
,
1214 boolean_t force
, boolean_t raw
, int fd
)
1216 return (recv_impl(snapname
, props
, NULL
, NULL
, 0, origin
, force
,
1217 B_FALSE
, B_TRUE
, raw
, fd
, NULL
, NULL
, NULL
, NULL
));
1221 * Like lzc_receive, but allows the caller to read the begin record and then to
1222 * pass it in. That could be useful if the caller wants to derive, for example,
1223 * the snapname or the origin parameters based on the information contained in
1225 * The begin record must be in its original form as read from the stream,
1226 * in other words, it should not be byteswapped.
1228 * The 'resumable' parameter allows to obtain the same behavior as with
1229 * lzc_receive_resumable.
1232 lzc_receive_with_header(const char *snapname
, nvlist_t
*props
,
1233 const char *origin
, boolean_t force
, boolean_t resumable
, boolean_t raw
,
1234 int fd
, const dmu_replay_record_t
*begin_record
)
1236 if (begin_record
== NULL
)
1239 return (recv_impl(snapname
, props
, NULL
, NULL
, 0, origin
, force
,
1240 B_FALSE
, resumable
, raw
, fd
, begin_record
, NULL
, NULL
, NULL
));
1244 * Like lzc_receive, but allows the caller to pass all supported arguments
1245 * and retrieve all values returned. The only additional input parameter
1246 * is 'cleanup_fd' which is used to set a cleanup-on-exit file descriptor.
1248 * The following parameters all provide return values. Several may be set
1249 * in the failure case and will contain additional information.
1251 * The 'read_bytes' value will be set to the total number of bytes read.
1253 * The 'errflags' value will contain zprop_errflags_t flags which are
1254 * used to describe any failures.
1256 * The 'action_handle' and 'cleanup_fd' are no longer used, and are ignored.
1258 * The 'errors' nvlist contains an entry for each unapplied received
1259 * property. Callers are responsible for freeing this nvlist.
1262 lzc_receive_one(const char *snapname
, nvlist_t
*props
,
1263 const char *origin
, boolean_t force
, boolean_t resumable
, boolean_t raw
,
1264 int input_fd
, const dmu_replay_record_t
*begin_record
, int cleanup_fd
,
1265 uint64_t *read_bytes
, uint64_t *errflags
, uint64_t *action_handle
,
1268 (void) action_handle
, (void) cleanup_fd
;
1269 return (recv_impl(snapname
, props
, NULL
, NULL
, 0, origin
, force
,
1270 B_FALSE
, resumable
, raw
, input_fd
, begin_record
,
1271 read_bytes
, errflags
, errors
));
1275 * Like lzc_receive_one, but allows the caller to pass an additional 'cmdprops'
1278 * The 'cmdprops' nvlist contains both override ('zfs receive -o') and
1279 * exclude ('zfs receive -x') properties. Callers are responsible for freeing
1283 lzc_receive_with_cmdprops(const char *snapname
, nvlist_t
*props
,
1284 nvlist_t
*cmdprops
, uint8_t *wkeydata
, uint_t wkeylen
, const char *origin
,
1285 boolean_t force
, boolean_t resumable
, boolean_t raw
, int input_fd
,
1286 const dmu_replay_record_t
*begin_record
, int cleanup_fd
,
1287 uint64_t *read_bytes
, uint64_t *errflags
, uint64_t *action_handle
,
1290 (void) action_handle
, (void) cleanup_fd
;
1291 return (recv_impl(snapname
, props
, cmdprops
, wkeydata
, wkeylen
, origin
,
1292 force
, B_FALSE
, resumable
, raw
, input_fd
, begin_record
,
1293 read_bytes
, errflags
, errors
));
1297 * Like lzc_receive_with_cmdprops, but allows the caller to pass an additional
1300 * The heal arguments tells us to heal the provided snapshot using the provided
1303 int lzc_receive_with_heal(const char *snapname
, nvlist_t
*props
,
1304 nvlist_t
*cmdprops
, uint8_t *wkeydata
, uint_t wkeylen
, const char *origin
,
1305 boolean_t force
, boolean_t heal
, boolean_t resumable
, boolean_t raw
,
1306 int input_fd
, const dmu_replay_record_t
*begin_record
, int cleanup_fd
,
1307 uint64_t *read_bytes
, uint64_t *errflags
, uint64_t *action_handle
,
1310 (void) action_handle
, (void) cleanup_fd
;
1311 return (recv_impl(snapname
, props
, cmdprops
, wkeydata
, wkeylen
, origin
,
1312 force
, heal
, resumable
, raw
, input_fd
, begin_record
,
1313 read_bytes
, errflags
, errors
));
1317 * Roll back this filesystem or volume to its most recent snapshot.
1318 * If snapnamebuf is not NULL, it will be filled in with the name
1319 * of the most recent snapshot.
1320 * Note that the latest snapshot may change if a new one is concurrently
1321 * created or the current one is destroyed. lzc_rollback_to can be used
1322 * to roll back to a specific latest snapshot.
1324 * Return 0 on success or an errno on failure.
1327 lzc_rollback(const char *fsname
, char *snapnamebuf
, int snapnamelen
)
1333 args
= fnvlist_alloc();
1334 err
= lzc_ioctl(ZFS_IOC_ROLLBACK
, fsname
, args
, &result
);
1336 if (err
== 0 && snapnamebuf
!= NULL
) {
1337 const char *snapname
= fnvlist_lookup_string(result
, "target");
1338 (void) strlcpy(snapnamebuf
, snapname
, snapnamelen
);
1340 nvlist_free(result
);
1346 * Roll back this filesystem or volume to the specified snapshot,
1349 * Return 0 on success or an errno on failure.
1352 lzc_rollback_to(const char *fsname
, const char *snapname
)
1358 args
= fnvlist_alloc();
1359 fnvlist_add_string(args
, "target", snapname
);
1360 err
= lzc_ioctl(ZFS_IOC_ROLLBACK
, fsname
, args
, &result
);
1362 nvlist_free(result
);
1367 * Creates new bookmarks from existing snapshot or bookmark.
1369 * The bookmarks nvlist maps from the full name of the new bookmark to
1370 * the full name of the source snapshot or bookmark.
1371 * All the bookmarks and snapshots must be in the same pool.
1372 * The new bookmarks names must be unique.
1373 * => see function dsl_bookmark_create_nvl_validate
1375 * The returned results nvlist will have an entry for each bookmark that failed.
1376 * The value will be the (int32) error code.
1378 * The return value will be 0 if all bookmarks were created, otherwise it will
1379 * be the errno of a (undetermined) bookmarks that failed.
1382 lzc_bookmark(nvlist_t
*bookmarks
, nvlist_t
**errlist
)
1386 char pool
[ZFS_MAX_DATASET_NAME_LEN
];
1388 /* determine pool name from first bookmark */
1389 elem
= nvlist_next_nvpair(bookmarks
, NULL
);
1392 (void) strlcpy(pool
, nvpair_name(elem
), sizeof (pool
));
1393 pool
[strcspn(pool
, "/#")] = '\0';
1395 error
= lzc_ioctl(ZFS_IOC_BOOKMARK
, pool
, bookmarks
, errlist
);
1401 * Retrieve bookmarks.
1403 * Retrieve the list of bookmarks for the given file system. The props
1404 * parameter is an nvlist of property names (with no values) that will be
1405 * returned for each bookmark.
1407 * The following are valid properties on bookmarks, most of which are numbers
1408 * (represented as uint64 in the nvlist), except redact_snaps, which is a
1409 * uint64 array, and redact_complete, which is a boolean
1411 * "guid" - globally unique identifier of the snapshot it refers to
1412 * "createtxg" - txg when the snapshot it refers to was created
1413 * "creation" - timestamp when the snapshot it refers to was created
1414 * "ivsetguid" - IVset guid for identifying encrypted snapshots
1415 * "redact_snaps" - list of guids of the redaction snapshots for the specified
1416 * bookmark. If the bookmark is not a redaction bookmark, the nvlist will
1417 * not contain an entry for this value. If it is redacted with respect to
1418 * no snapshots, it will contain value -> NULL uint64 array
1419 * "redact_complete" - boolean value; true if the redaction bookmark is
1420 * complete, false otherwise.
1422 * The format of the returned nvlist as follows:
1423 * <short name of bookmark> -> {
1424 * <name of property> -> {
1428 * "redact_snaps" -> {
1429 * "value" -> uint64 array
1431 * "redact_complete" -> {
1432 * "value" -> boolean value
1437 lzc_get_bookmarks(const char *fsname
, nvlist_t
*props
, nvlist_t
**bmarks
)
1439 return (lzc_ioctl(ZFS_IOC_GET_BOOKMARKS
, fsname
, props
, bmarks
));
1443 * Get bookmark properties.
1445 * Given a bookmark's full name, retrieve all properties for the bookmark.
1447 * The format of the returned property list is as follows:
1449 * <name of property> -> {
1453 * "redact_snaps" -> {
1454 * "value" -> uint64 array
1458 lzc_get_bookmark_props(const char *bookmark
, nvlist_t
**props
)
1462 nvlist_t
*innvl
= fnvlist_alloc();
1463 error
= lzc_ioctl(ZFS_IOC_GET_BOOKMARK_PROPS
, bookmark
, innvl
, props
);
1464 fnvlist_free(innvl
);
1470 * Destroys bookmarks.
1472 * The keys in the bmarks nvlist are the bookmarks to be destroyed.
1473 * They must all be in the same pool. Bookmarks are specified as
1476 * Bookmarks that do not exist will be silently ignored.
1478 * The return value will be 0 if all bookmarks that existed were destroyed.
1480 * Otherwise the return value will be the errno of a (undetermined) bookmark
1481 * that failed, no bookmarks will be destroyed, and the errlist will have an
1482 * entry for each bookmarks that failed. The value in the errlist will be
1483 * the (int32) error code.
1486 lzc_destroy_bookmarks(nvlist_t
*bmarks
, nvlist_t
**errlist
)
1490 char pool
[ZFS_MAX_DATASET_NAME_LEN
];
1492 /* determine the pool name */
1493 elem
= nvlist_next_nvpair(bmarks
, NULL
);
1496 (void) strlcpy(pool
, nvpair_name(elem
), sizeof (pool
));
1497 pool
[strcspn(pool
, "/#")] = '\0';
1499 error
= lzc_ioctl(ZFS_IOC_DESTROY_BOOKMARKS
, pool
, bmarks
, errlist
);
1505 lzc_channel_program_impl(const char *pool
, const char *program
, boolean_t sync
,
1506 uint64_t instrlimit
, uint64_t memlimit
, nvlist_t
*argnvl
, nvlist_t
**outnvl
)
1511 args
= fnvlist_alloc();
1512 fnvlist_add_string(args
, ZCP_ARG_PROGRAM
, program
);
1513 fnvlist_add_nvlist(args
, ZCP_ARG_ARGLIST
, argnvl
);
1514 fnvlist_add_boolean_value(args
, ZCP_ARG_SYNC
, sync
);
1515 fnvlist_add_uint64(args
, ZCP_ARG_INSTRLIMIT
, instrlimit
);
1516 fnvlist_add_uint64(args
, ZCP_ARG_MEMLIMIT
, memlimit
);
1517 error
= lzc_ioctl(ZFS_IOC_CHANNEL_PROGRAM
, pool
, args
, outnvl
);
1524 * Executes a channel program.
1526 * If this function returns 0 the channel program was successfully loaded and
1527 * ran without failing. Note that individual commands the channel program ran
1528 * may have failed and the channel program is responsible for reporting such
1529 * errors through outnvl if they are important.
1531 * This method may also return:
1533 * EINVAL The program contains syntax errors, or an invalid memory or time
1534 * limit was given. No part of the channel program was executed.
1535 * If caused by syntax errors, 'outnvl' contains information about the
1538 * ECHRNG The program was executed, but encountered a runtime error, such as
1539 * calling a function with incorrect arguments, invoking the error()
1540 * function directly, failing an assert() command, etc. Some portion
1541 * of the channel program may have executed and committed changes.
1542 * Information about the failure can be found in 'outnvl'.
1544 * ENOMEM The program fully executed, but the output buffer was not large
1545 * enough to store the returned value. No output is returned through
1548 * ENOSPC The program was terminated because it exceeded its memory usage
1549 * limit. Some portion of the channel program may have executed and
1550 * committed changes to disk. No output is returned through 'outnvl'.
1552 * ETIME The program was terminated because it exceeded its Lua instruction
1553 * limit. Some portion of the channel program may have executed and
1554 * committed changes to disk. No output is returned through 'outnvl'.
1557 lzc_channel_program(const char *pool
, const char *program
, uint64_t instrlimit
,
1558 uint64_t memlimit
, nvlist_t
*argnvl
, nvlist_t
**outnvl
)
1560 return (lzc_channel_program_impl(pool
, program
, B_TRUE
, instrlimit
,
1561 memlimit
, argnvl
, outnvl
));
1565 * Creates a checkpoint for the specified pool.
1567 * If this function returns 0 the pool was successfully checkpointed.
1569 * This method may also return:
1571 * ZFS_ERR_CHECKPOINT_EXISTS
1572 * The pool already has a checkpoint. A pools can only have one
1573 * checkpoint at most, at any given time.
1575 * ZFS_ERR_DISCARDING_CHECKPOINT
1576 * ZFS is in the middle of discarding a checkpoint for this pool.
1577 * The pool can be checkpointed again once the discard is done.
1579 * ZFS_DEVRM_IN_PROGRESS
1580 * A vdev is currently being removed. The pool cannot be
1581 * checkpointed until the device removal is done.
1584 * One or more top-level vdevs exceed the maximum vdev size
1585 * supported for this feature.
1588 lzc_pool_checkpoint(const char *pool
)
1592 nvlist_t
*result
= NULL
;
1593 nvlist_t
*args
= fnvlist_alloc();
1595 error
= lzc_ioctl(ZFS_IOC_POOL_CHECKPOINT
, pool
, args
, &result
);
1598 fnvlist_free(result
);
1604 * Discard the checkpoint from the specified pool.
1606 * If this function returns 0 the checkpoint was successfully discarded.
1608 * This method may also return:
1610 * ZFS_ERR_NO_CHECKPOINT
1611 * The pool does not have a checkpoint.
1613 * ZFS_ERR_DISCARDING_CHECKPOINT
1614 * ZFS is already in the middle of discarding the checkpoint.
1617 lzc_pool_checkpoint_discard(const char *pool
)
1621 nvlist_t
*result
= NULL
;
1622 nvlist_t
*args
= fnvlist_alloc();
1624 error
= lzc_ioctl(ZFS_IOC_POOL_DISCARD_CHECKPOINT
, pool
, args
, &result
);
1627 fnvlist_free(result
);
1633 * Load the requested data type for the specified pool.
1636 lzc_pool_prefetch(const char *pool
, zpool_prefetch_type_t type
)
1639 nvlist_t
*result
= NULL
;
1640 nvlist_t
*args
= fnvlist_alloc();
1642 fnvlist_add_int32(args
, ZPOOL_PREFETCH_TYPE
, type
);
1644 error
= lzc_ioctl(ZFS_IOC_POOL_PREFETCH
, pool
, args
, &result
);
1647 fnvlist_free(result
);
1653 * Executes a read-only channel program.
1655 * A read-only channel program works programmatically the same way as a
1656 * normal channel program executed with lzc_channel_program(). The only
1657 * difference is it runs exclusively in open-context and therefore can
1658 * return faster. The downside to that, is that the program cannot change
1659 * on-disk state by calling functions from the zfs.sync submodule.
1661 * The return values of this function (and their meaning) are exactly the
1662 * same as the ones described in lzc_channel_program().
1665 lzc_channel_program_nosync(const char *pool
, const char *program
,
1666 uint64_t timeout
, uint64_t memlimit
, nvlist_t
*argnvl
, nvlist_t
**outnvl
)
1668 return (lzc_channel_program_impl(pool
, program
, B_FALSE
, timeout
,
1669 memlimit
, argnvl
, outnvl
));
1673 lzc_get_vdev_prop(const char *poolname
, nvlist_t
*innvl
, nvlist_t
**outnvl
)
1675 return (lzc_ioctl(ZFS_IOC_VDEV_GET_PROPS
, poolname
, innvl
, outnvl
));
1679 lzc_set_vdev_prop(const char *poolname
, nvlist_t
*innvl
, nvlist_t
**outnvl
)
1681 return (lzc_ioctl(ZFS_IOC_VDEV_SET_PROPS
, poolname
, innvl
, outnvl
));
1685 * Performs key management functions
1687 * crypto_cmd should be a value from dcp_cmd_t. If the command specifies to
1688 * load or change a wrapping key, the key should be specified in the
1689 * hidden_args nvlist so that it is not logged.
1692 lzc_load_key(const char *fsname
, boolean_t noop
, uint8_t *wkeydata
,
1697 nvlist_t
*hidden_args
;
1699 if (wkeydata
== NULL
)
1702 ioc_args
= fnvlist_alloc();
1703 hidden_args
= fnvlist_alloc();
1704 fnvlist_add_uint8_array(hidden_args
, "wkeydata", wkeydata
, wkeylen
);
1705 fnvlist_add_nvlist(ioc_args
, ZPOOL_HIDDEN_ARGS
, hidden_args
);
1707 fnvlist_add_boolean(ioc_args
, "noop");
1708 error
= lzc_ioctl(ZFS_IOC_LOAD_KEY
, fsname
, ioc_args
, NULL
);
1709 nvlist_free(hidden_args
);
1710 nvlist_free(ioc_args
);
1716 lzc_unload_key(const char *fsname
)
1718 return (lzc_ioctl(ZFS_IOC_UNLOAD_KEY
, fsname
, NULL
, NULL
));
1722 lzc_change_key(const char *fsname
, uint64_t crypt_cmd
, nvlist_t
*props
,
1723 uint8_t *wkeydata
, uint_t wkeylen
)
1726 nvlist_t
*ioc_args
= fnvlist_alloc();
1727 nvlist_t
*hidden_args
= NULL
;
1729 fnvlist_add_uint64(ioc_args
, "crypt_cmd", crypt_cmd
);
1731 if (wkeydata
!= NULL
) {
1732 hidden_args
= fnvlist_alloc();
1733 fnvlist_add_uint8_array(hidden_args
, "wkeydata", wkeydata
,
1735 fnvlist_add_nvlist(ioc_args
, ZPOOL_HIDDEN_ARGS
, hidden_args
);
1739 fnvlist_add_nvlist(ioc_args
, "props", props
);
1741 error
= lzc_ioctl(ZFS_IOC_CHANGE_KEY
, fsname
, ioc_args
, NULL
);
1742 nvlist_free(hidden_args
);
1743 nvlist_free(ioc_args
);
1749 lzc_reopen(const char *pool_name
, boolean_t scrub_restart
)
1751 nvlist_t
*args
= fnvlist_alloc();
1754 fnvlist_add_boolean_value(args
, "scrub_restart", scrub_restart
);
1756 error
= lzc_ioctl(ZFS_IOC_POOL_REOPEN
, pool_name
, args
, NULL
);
1762 * Changes initializing state.
1764 * vdevs should be a list of (<key>, guid) where guid is a uint64 vdev GUID.
1765 * The key is ignored.
1767 * If there are errors related to vdev arguments, per-vdev errors are returned
1768 * in an nvlist with the key "vdevs". Each error is a (guid, errno) pair where
1769 * guid is stringified with PRIu64, and errno is one of the following as
1771 * - ENODEV if the device was not found
1772 * - EINVAL if the devices is not a leaf or is not concrete (e.g. missing)
1773 * - EROFS if the device is not writeable
1774 * - EBUSY start requested but the device is already being either
1775 * initialized or trimmed
1776 * - ESRCH cancel/suspend requested but device is not being initialized
1778 * If the errlist is empty, then return value will be:
1779 * - EINVAL if one or more arguments was invalid
1780 * - Other spa_open failures
1781 * - 0 if the operation succeeded
1784 lzc_initialize(const char *poolname
, pool_initialize_func_t cmd_type
,
1785 nvlist_t
*vdevs
, nvlist_t
**errlist
)
1789 nvlist_t
*args
= fnvlist_alloc();
1790 fnvlist_add_uint64(args
, ZPOOL_INITIALIZE_COMMAND
, (uint64_t)cmd_type
);
1791 fnvlist_add_nvlist(args
, ZPOOL_INITIALIZE_VDEVS
, vdevs
);
1793 error
= lzc_ioctl(ZFS_IOC_POOL_INITIALIZE
, poolname
, args
, errlist
);
1801 * Changes TRIM state.
1803 * vdevs should be a list of (<key>, guid) where guid is a uint64 vdev GUID.
1804 * The key is ignored.
1806 * If there are errors related to vdev arguments, per-vdev errors are returned
1807 * in an nvlist with the key "vdevs". Each error is a (guid, errno) pair where
1808 * guid is stringified with PRIu64, and errno is one of the following as
1810 * - ENODEV if the device was not found
1811 * - EINVAL if the devices is not a leaf or is not concrete (e.g. missing)
1812 * - EROFS if the device is not writeable
1813 * - EBUSY start requested but the device is already being either trimmed
1815 * - ESRCH cancel/suspend requested but device is not being initialized
1816 * - EOPNOTSUPP if the device does not support TRIM (or secure TRIM)
1818 * If the errlist is empty, then return value will be:
1819 * - EINVAL if one or more arguments was invalid
1820 * - Other spa_open failures
1821 * - 0 if the operation succeeded
1824 lzc_trim(const char *poolname
, pool_trim_func_t cmd_type
, uint64_t rate
,
1825 boolean_t secure
, nvlist_t
*vdevs
, nvlist_t
**errlist
)
1829 nvlist_t
*args
= fnvlist_alloc();
1830 fnvlist_add_uint64(args
, ZPOOL_TRIM_COMMAND
, (uint64_t)cmd_type
);
1831 fnvlist_add_nvlist(args
, ZPOOL_TRIM_VDEVS
, vdevs
);
1832 fnvlist_add_uint64(args
, ZPOOL_TRIM_RATE
, rate
);
1833 fnvlist_add_boolean_value(args
, ZPOOL_TRIM_SECURE
, secure
);
1835 error
= lzc_ioctl(ZFS_IOC_POOL_TRIM
, poolname
, args
, errlist
);
1843 * Create a redaction bookmark named bookname by redacting snapshot with respect
1844 * to all the snapshots in snapnv.
1847 lzc_redact(const char *snapshot
, const char *bookname
, nvlist_t
*snapnv
)
1849 nvlist_t
*args
= fnvlist_alloc();
1850 fnvlist_add_string(args
, "bookname", bookname
);
1851 fnvlist_add_nvlist(args
, "snapnv", snapnv
);
1852 int error
= lzc_ioctl(ZFS_IOC_REDACT
, snapshot
, args
, NULL
);
1858 wait_common(const char *pool
, zpool_wait_activity_t activity
, boolean_t use_tag
,
1859 uint64_t tag
, boolean_t
*waited
)
1861 nvlist_t
*args
= fnvlist_alloc();
1862 nvlist_t
*result
= NULL
;
1864 fnvlist_add_int32(args
, ZPOOL_WAIT_ACTIVITY
, activity
);
1866 fnvlist_add_uint64(args
, ZPOOL_WAIT_TAG
, tag
);
1868 int error
= lzc_ioctl(ZFS_IOC_WAIT
, pool
, args
, &result
);
1870 if (error
== 0 && waited
!= NULL
)
1871 *waited
= fnvlist_lookup_boolean_value(result
,
1875 fnvlist_free(result
);
1881 lzc_wait(const char *pool
, zpool_wait_activity_t activity
, boolean_t
*waited
)
1883 return (wait_common(pool
, activity
, B_FALSE
, 0, waited
));
1887 lzc_wait_tag(const char *pool
, zpool_wait_activity_t activity
, uint64_t tag
,
1890 return (wait_common(pool
, activity
, B_TRUE
, tag
, waited
));
1894 lzc_wait_fs(const char *fs
, zfs_wait_activity_t activity
, boolean_t
*waited
)
1896 nvlist_t
*args
= fnvlist_alloc();
1897 nvlist_t
*result
= NULL
;
1899 fnvlist_add_int32(args
, ZFS_WAIT_ACTIVITY
, activity
);
1901 int error
= lzc_ioctl(ZFS_IOC_WAIT_FS
, fs
, args
, &result
);
1903 if (error
== 0 && waited
!= NULL
)
1904 *waited
= fnvlist_lookup_boolean_value(result
,
1908 fnvlist_free(result
);
1914 * Set the bootenv contents for the given pool.
1917 lzc_set_bootenv(const char *pool
, const nvlist_t
*env
)
1919 return (lzc_ioctl(ZFS_IOC_SET_BOOTENV
, pool
, (nvlist_t
*)env
, NULL
));
1923 * Get the contents of the bootenv of the given pool.
1926 lzc_get_bootenv(const char *pool
, nvlist_t
**outnvl
)
1928 return (lzc_ioctl(ZFS_IOC_GET_BOOTENV
, pool
, NULL
, outnvl
));
1932 * Prune the specified amount from the pool's dedup table.
1935 lzc_ddt_prune(const char *pool
, zpool_ddt_prune_unit_t unit
, uint64_t amount
)
1939 nvlist_t
*result
= NULL
;
1940 nvlist_t
*args
= fnvlist_alloc();
1942 fnvlist_add_int32(args
, DDT_PRUNE_UNIT
, unit
);
1943 fnvlist_add_uint64(args
, DDT_PRUNE_AMOUNT
, amount
);
1945 error
= lzc_ioctl(ZFS_IOC_DDT_PRUNE
, pool
, args
, &result
);
1948 fnvlist_free(result
);