4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
24 * Copyright (c) 2013 Steven Hartland. All rights reserved.
27 #include <sys/zfs_context.h>
28 #include <sys/dsl_userhold.h>
29 #include <sys/dsl_dataset.h>
30 #include <sys/dsl_destroy.h>
31 #include <sys/dsl_synctask.h>
32 #include <sys/dmu_tx.h>
33 #include <sys/zfs_onexit.h>
34 #include <sys/dsl_pool.h>
35 #include <sys/dsl_dir.h>
36 #include <sys/zfs_ioctl.h>
39 typedef struct dsl_dataset_user_hold_arg
{
40 nvlist_t
*dduha_holds
;
41 nvlist_t
*dduha_chkholds
;
42 nvlist_t
*dduha_errlist
;
44 } dsl_dataset_user_hold_arg_t
;
47 * If you add new checks here, you may need to add additional checks to the
48 * "temporary" case in snapshot_check() in dmu_objset.c.
51 dsl_dataset_user_hold_check_one(dsl_dataset_t
*ds
, const char *htag
,
52 boolean_t temphold
, dmu_tx_t
*tx
)
54 dsl_pool_t
*dp
= dmu_tx_pool(tx
);
55 objset_t
*mos
= dp
->dp_meta_objset
;
58 ASSERT(dsl_pool_config_held(dp
));
60 if (strlen(htag
) > MAXNAMELEN
)
61 return (SET_ERROR(E2BIG
));
62 /* Tempholds have a more restricted length */
63 if (temphold
&& strlen(htag
) + MAX_TAG_PREFIX_LEN
>= MAXNAMELEN
)
64 return (SET_ERROR(E2BIG
));
66 /* tags must be unique (if ds already exists) */
67 if (ds
!= NULL
&& dsl_dataset_phys(ds
)->ds_userrefs_obj
!= 0) {
70 error
= zap_lookup(mos
, dsl_dataset_phys(ds
)->ds_userrefs_obj
,
73 error
= SET_ERROR(EEXIST
);
74 else if (error
== ENOENT
)
82 dsl_dataset_user_hold_check(void *arg
, dmu_tx_t
*tx
)
84 dsl_dataset_user_hold_arg_t
*dduha
= arg
;
85 dsl_pool_t
*dp
= dmu_tx_pool(tx
);
88 if (spa_version(dp
->dp_spa
) < SPA_VERSION_USERREFS
)
89 return (SET_ERROR(ENOTSUP
));
91 if (!dmu_tx_is_syncing(tx
))
95 * Ensure the list has no duplicates by copying name/values from
96 * non-unique dduha_holds to unique tmp_holds, and comparing counts.
98 tmp_holds
= fnvlist_alloc();
99 for (nvpair_t
*pair
= nvlist_next_nvpair(dduha
->dduha_holds
, NULL
);
100 pair
!= NULL
; pair
= nvlist_next_nvpair(dduha
->dduha_holds
, pair
)) {
101 size_t len
= strlen(nvpair_name(pair
)) +
102 strlen(fnvpair_value_string(pair
));
103 char *nameval
= kmem_zalloc(len
+ 2, KM_SLEEP
);
104 (void) strlcpy(nameval
, nvpair_name(pair
), len
+ 2);
105 (void) strlcat(nameval
, "@", len
+ 2);
106 (void) strlcat(nameval
, fnvpair_value_string(pair
), len
+ 2);
107 fnvlist_add_string(tmp_holds
, nameval
, "");
108 kmem_free(nameval
, len
+ 2);
110 size_t tmp_count
= fnvlist_num_pairs(tmp_holds
);
111 fnvlist_free(tmp_holds
);
112 if (tmp_count
!= fnvlist_num_pairs(dduha
->dduha_holds
))
113 return (SET_ERROR(EEXIST
));
114 for (nvpair_t
*pair
= nvlist_next_nvpair(dduha
->dduha_holds
, NULL
);
115 pair
!= NULL
; pair
= nvlist_next_nvpair(dduha
->dduha_holds
, pair
)) {
118 const char *htag
, *name
;
120 /* must be a snapshot */
121 name
= nvpair_name(pair
);
122 if (strchr(name
, '@') == NULL
)
123 error
= SET_ERROR(EINVAL
);
126 error
= nvpair_value_string(pair
, &htag
);
129 error
= dsl_dataset_hold(dp
, name
, FTAG
, &ds
);
132 error
= dsl_dataset_user_hold_check_one(ds
, htag
,
133 dduha
->dduha_minor
!= 0, tx
);
134 dsl_dataset_rele(ds
, FTAG
);
138 fnvlist_add_string(dduha
->dduha_chkholds
, name
, htag
);
141 * We register ENOENT errors so they can be correctly
142 * reported if needed, such as when all holds fail.
144 fnvlist_add_int32(dduha
->dduha_errlist
, name
, error
);
155 dsl_dataset_user_hold_sync_one_impl(nvlist_t
*tmpholds
, dsl_dataset_t
*ds
,
156 const char *htag
, minor_t minor
, uint64_t now
, dmu_tx_t
*tx
)
158 dsl_pool_t
*dp
= ds
->ds_dir
->dd_pool
;
159 objset_t
*mos
= dp
->dp_meta_objset
;
162 ASSERT(RRW_WRITE_HELD(&dp
->dp_config_rwlock
));
164 if (dsl_dataset_phys(ds
)->ds_userrefs_obj
== 0) {
166 * This is the first user hold for this dataset. Create
167 * the userrefs zap object.
169 dmu_buf_will_dirty(ds
->ds_dbuf
, tx
);
170 zapobj
= dsl_dataset_phys(ds
)->ds_userrefs_obj
=
171 zap_create(mos
, DMU_OT_USERREFS
, DMU_OT_NONE
, 0, tx
);
173 zapobj
= dsl_dataset_phys(ds
)->ds_userrefs_obj
;
177 VERIFY0(zap_add(mos
, zapobj
, htag
, 8, 1, &now
, tx
));
180 char name
[MAXNAMELEN
];
183 VERIFY0(dsl_pool_user_hold(dp
, ds
->ds_object
,
185 (void) snprintf(name
, sizeof (name
), "%llx",
186 (u_longlong_t
)ds
->ds_object
);
188 if (nvlist_lookup_nvlist(tmpholds
, name
, &tags
) != 0) {
189 tags
= fnvlist_alloc();
190 fnvlist_add_boolean(tags
, htag
);
191 fnvlist_add_nvlist(tmpholds
, name
, tags
);
194 fnvlist_add_boolean(tags
, htag
);
198 spa_history_log_internal_ds(ds
, "hold", tx
,
199 "tag=%s temp=%d refs=%llu",
200 htag
, minor
!= 0, (u_longlong_t
)ds
->ds_userrefs
);
203 typedef struct zfs_hold_cleanup_arg
{
204 char zhca_spaname
[ZFS_MAX_DATASET_NAME_LEN
];
205 uint64_t zhca_spa_load_guid
;
206 nvlist_t
*zhca_holds
;
207 } zfs_hold_cleanup_arg_t
;
210 dsl_dataset_user_release_onexit(void *arg
)
212 zfs_hold_cleanup_arg_t
*ca
= arg
;
216 error
= spa_open(ca
->zhca_spaname
, &spa
, FTAG
);
218 zfs_dbgmsg("couldn't release holds on pool=%s "
219 "because pool is no longer loaded",
223 if (spa_load_guid(spa
) != ca
->zhca_spa_load_guid
) {
224 zfs_dbgmsg("couldn't release holds on pool=%s "
225 "because pool is no longer loaded (guid doesn't match)",
227 spa_close(spa
, FTAG
);
231 (void) dsl_dataset_user_release_tmp(spa_get_dsl(spa
), ca
->zhca_holds
);
232 fnvlist_free(ca
->zhca_holds
);
233 kmem_free(ca
, sizeof (zfs_hold_cleanup_arg_t
));
234 spa_close(spa
, FTAG
);
238 dsl_onexit_hold_cleanup(spa_t
*spa
, nvlist_t
*holds
, minor_t minor
)
240 zfs_hold_cleanup_arg_t
*ca
;
242 if (minor
== 0 || nvlist_empty(holds
)) {
248 ca
= kmem_alloc(sizeof (*ca
), KM_SLEEP
);
250 (void) strlcpy(ca
->zhca_spaname
, spa_name(spa
),
251 sizeof (ca
->zhca_spaname
));
252 ca
->zhca_spa_load_guid
= spa_load_guid(spa
);
253 ca
->zhca_holds
= holds
;
254 VERIFY0(zfs_onexit_add_cb(minor
,
255 dsl_dataset_user_release_onexit
, ca
, NULL
));
259 dsl_dataset_user_hold_sync_one(dsl_dataset_t
*ds
, const char *htag
,
260 minor_t minor
, uint64_t now
, dmu_tx_t
*tx
)
265 tmpholds
= fnvlist_alloc();
268 dsl_dataset_user_hold_sync_one_impl(tmpholds
, ds
, htag
, minor
, now
, tx
);
269 dsl_onexit_hold_cleanup(dsl_dataset_get_spa(ds
), tmpholds
, minor
);
273 dsl_dataset_user_hold_sync(void *arg
, dmu_tx_t
*tx
)
275 dsl_dataset_user_hold_arg_t
*dduha
= arg
;
276 dsl_pool_t
*dp
= dmu_tx_pool(tx
);
278 uint64_t now
= gethrestime_sec();
280 if (dduha
->dduha_minor
!= 0)
281 tmpholds
= fnvlist_alloc();
284 for (nvpair_t
*pair
= nvlist_next_nvpair(dduha
->dduha_chkholds
, NULL
);
286 pair
= nvlist_next_nvpair(dduha
->dduha_chkholds
, pair
)) {
289 VERIFY0(dsl_dataset_hold(dp
, nvpair_name(pair
), FTAG
, &ds
));
290 dsl_dataset_user_hold_sync_one_impl(tmpholds
, ds
,
291 fnvpair_value_string(pair
), dduha
->dduha_minor
, now
, tx
);
292 dsl_dataset_rele(ds
, FTAG
);
294 dsl_onexit_hold_cleanup(dp
->dp_spa
, tmpholds
, dduha
->dduha_minor
);
298 * The full semantics of this function are described in the comment above
302 * holds is nvl of snapname -> holdname
303 * errlist will be filled in with snapname -> error
305 * The snapshots must all be in the same pool.
307 * Holds for snapshots that don't exist will be skipped.
309 * If none of the snapshots for requested holds exist then ENOENT will be
312 * If cleanup_minor is not 0, the holds will be temporary, which will be cleaned
313 * up when the process exits.
315 * On success all the holds, for snapshots that existed, will be created and 0
318 * On failure no holds will be created, the errlist will be filled in,
319 * and an errno will returned.
321 * In all cases the errlist will contain entries for holds where the snapshot
325 dsl_dataset_user_hold(nvlist_t
*holds
, minor_t cleanup_minor
, nvlist_t
*errlist
)
327 dsl_dataset_user_hold_arg_t dduha
;
331 pair
= nvlist_next_nvpair(holds
, NULL
);
335 dduha
.dduha_holds
= holds
;
336 /* chkholds can have non-unique name */
337 VERIFY(0 == nvlist_alloc(&dduha
.dduha_chkholds
, 0, KM_SLEEP
));
338 dduha
.dduha_errlist
= errlist
;
339 dduha
.dduha_minor
= cleanup_minor
;
341 ret
= dsl_sync_task(nvpair_name(pair
), dsl_dataset_user_hold_check
,
342 dsl_dataset_user_hold_sync
, &dduha
,
343 fnvlist_num_pairs(holds
), ZFS_SPACE_CHECK_RESERVED
);
344 fnvlist_free(dduha
.dduha_chkholds
);
349 typedef int (dsl_holdfunc_t
)(dsl_pool_t
*dp
, const char *name
, const void *tag
,
350 dsl_dataset_t
**dsp
);
352 typedef struct dsl_dataset_user_release_arg
{
353 dsl_holdfunc_t
*ddura_holdfunc
;
354 nvlist_t
*ddura_holds
;
355 nvlist_t
*ddura_todelete
;
356 nvlist_t
*ddura_errlist
;
357 nvlist_t
*ddura_chkholds
;
358 } dsl_dataset_user_release_arg_t
;
360 /* Place a dataset hold on the snapshot identified by passed dsobj string */
362 dsl_dataset_hold_obj_string(dsl_pool_t
*dp
, const char *dsobj
, const void *tag
,
365 return (dsl_dataset_hold_obj(dp
, zfs_strtonum(dsobj
, NULL
), tag
, dsp
));
369 dsl_dataset_user_release_check_one(dsl_dataset_user_release_arg_t
*ddura
,
370 dsl_dataset_t
*ds
, nvlist_t
*holds
, const char *snapname
)
373 nvlist_t
*holds_found
;
377 if (!ds
->ds_is_snapshot
)
378 return (SET_ERROR(EINVAL
));
380 if (nvlist_empty(holds
))
384 mos
= ds
->ds_dir
->dd_pool
->dp_meta_objset
;
385 zapobj
= dsl_dataset_phys(ds
)->ds_userrefs_obj
;
386 VERIFY0(nvlist_alloc(&holds_found
, NV_UNIQUE_NAME
, KM_SLEEP
));
388 for (nvpair_t
*pair
= nvlist_next_nvpair(holds
, NULL
); pair
!= NULL
;
389 pair
= nvlist_next_nvpair(holds
, pair
)) {
392 const char *holdname
= nvpair_name(pair
);
395 error
= zap_lookup(mos
, zapobj
, holdname
, 8, 1, &tmp
);
397 error
= SET_ERROR(ENOENT
);
400 * Non-existent holds are put on the errlist, but don't
401 * cause an overall failure.
403 if (error
== ENOENT
) {
404 if (ddura
->ddura_errlist
!= NULL
) {
405 char *errtag
= kmem_asprintf("%s#%s",
407 fnvlist_add_int32(ddura
->ddura_errlist
, errtag
,
409 kmem_strfree(errtag
);
415 fnvlist_free(holds_found
);
419 fnvlist_add_boolean(holds_found
, holdname
);
423 if (DS_IS_DEFER_DESTROY(ds
) &&
424 dsl_dataset_phys(ds
)->ds_num_children
== 1 &&
425 ds
->ds_userrefs
== numholds
) {
426 /* we need to destroy the snapshot as well */
427 if (dsl_dataset_long_held(ds
)) {
428 fnvlist_free(holds_found
);
429 return (SET_ERROR(EBUSY
));
431 fnvlist_add_boolean(ddura
->ddura_todelete
, snapname
);
435 fnvlist_add_nvlist(ddura
->ddura_chkholds
, snapname
,
438 fnvlist_free(holds_found
);
444 dsl_dataset_user_release_check(void *arg
, dmu_tx_t
*tx
)
446 dsl_dataset_user_release_arg_t
*ddura
;
447 dsl_holdfunc_t
*holdfunc
;
450 if (!dmu_tx_is_syncing(tx
))
453 dp
= dmu_tx_pool(tx
);
455 ASSERT(RRW_WRITE_HELD(&dp
->dp_config_rwlock
));
458 holdfunc
= ddura
->ddura_holdfunc
;
460 for (nvpair_t
*pair
= nvlist_next_nvpair(ddura
->ddura_holds
, NULL
);
461 pair
!= NULL
; pair
= nvlist_next_nvpair(ddura
->ddura_holds
, pair
)) {
465 const char *snapname
= nvpair_name(pair
);
467 error
= nvpair_value_nvlist(pair
, &holds
);
469 error
= (SET_ERROR(EINVAL
));
471 error
= holdfunc(dp
, snapname
, FTAG
, &ds
);
473 error
= dsl_dataset_user_release_check_one(ddura
, ds
,
475 dsl_dataset_rele(ds
, FTAG
);
478 if (ddura
->ddura_errlist
!= NULL
) {
479 fnvlist_add_int32(ddura
->ddura_errlist
,
483 * Non-existent snapshots are put on the errlist,
484 * but don't cause an overall failure.
495 dsl_dataset_user_release_sync_one(dsl_dataset_t
*ds
, nvlist_t
*holds
,
498 dsl_pool_t
*dp
= ds
->ds_dir
->dd_pool
;
499 objset_t
*mos
= dp
->dp_meta_objset
;
501 for (nvpair_t
*pair
= nvlist_next_nvpair(holds
, NULL
); pair
!= NULL
;
502 pair
= nvlist_next_nvpair(holds
, pair
)) {
504 const char *holdname
= nvpair_name(pair
);
506 /* Remove temporary hold if one exists. */
507 error
= dsl_pool_user_release(dp
, ds
->ds_object
, holdname
, tx
);
508 VERIFY(error
== 0 || error
== ENOENT
);
510 VERIFY0(zap_remove(mos
, dsl_dataset_phys(ds
)->ds_userrefs_obj
,
514 spa_history_log_internal_ds(ds
, "release", tx
,
515 "tag=%s refs=%lld", holdname
, (longlong_t
)ds
->ds_userrefs
);
520 dsl_dataset_user_release_sync(void *arg
, dmu_tx_t
*tx
)
522 dsl_dataset_user_release_arg_t
*ddura
= arg
;
523 dsl_holdfunc_t
*holdfunc
= ddura
->ddura_holdfunc
;
524 dsl_pool_t
*dp
= dmu_tx_pool(tx
);
526 ASSERT(RRW_WRITE_HELD(&dp
->dp_config_rwlock
));
528 for (nvpair_t
*pair
= nvlist_next_nvpair(ddura
->ddura_chkholds
, NULL
);
529 pair
!= NULL
; pair
= nvlist_next_nvpair(ddura
->ddura_chkholds
,
532 const char *name
= nvpair_name(pair
);
534 VERIFY0(holdfunc(dp
, name
, FTAG
, &ds
));
536 dsl_dataset_user_release_sync_one(ds
,
537 fnvpair_value_nvlist(pair
), tx
);
538 if (nvlist_exists(ddura
->ddura_todelete
, name
)) {
539 ASSERT(ds
->ds_userrefs
== 0 &&
540 dsl_dataset_phys(ds
)->ds_num_children
== 1 &&
541 DS_IS_DEFER_DESTROY(ds
));
542 dsl_destroy_snapshot_sync_impl(ds
, B_FALSE
, tx
);
544 dsl_dataset_rele(ds
, FTAG
);
549 * The full semantics of this function are described in the comment above
553 * Releases holds specified in the nvl holds.
555 * holds is nvl of snapname -> { holdname, ... }
556 * errlist will be filled in with snapname -> error
558 * If tmpdp is not NULL the names for holds should be the dsobj's of snapshots,
559 * otherwise they should be the names of snapshots.
561 * As a release may cause snapshots to be destroyed this tries to ensure they
564 * The release of non-existent holds are skipped.
566 * At least one hold must have been released for the this function to succeed
570 dsl_dataset_user_release_impl(nvlist_t
*holds
, nvlist_t
*errlist
,
573 dsl_dataset_user_release_arg_t ddura
;
578 pair
= nvlist_next_nvpair(holds
, NULL
);
583 * The release may cause snapshots to be destroyed; make sure they
587 /* Temporary holds are specified by dsobj string. */
588 ddura
.ddura_holdfunc
= dsl_dataset_hold_obj_string
;
589 pool
= spa_name(tmpdp
->dp_spa
);
591 for (pair
= nvlist_next_nvpair(holds
, NULL
); pair
!= NULL
;
592 pair
= nvlist_next_nvpair(holds
, pair
)) {
595 dsl_pool_config_enter(tmpdp
, FTAG
);
596 error
= dsl_dataset_hold_obj_string(tmpdp
,
597 nvpair_name(pair
), FTAG
, &ds
);
599 char name
[ZFS_MAX_DATASET_NAME_LEN
];
600 dsl_dataset_name(ds
, name
);
601 dsl_pool_config_exit(tmpdp
, FTAG
);
602 dsl_dataset_rele(ds
, FTAG
);
603 (void) zfs_unmount_snap(name
);
605 dsl_pool_config_exit(tmpdp
, FTAG
);
610 /* Non-temporary holds are specified by name. */
611 ddura
.ddura_holdfunc
= dsl_dataset_hold
;
612 pool
= nvpair_name(pair
);
614 for (pair
= nvlist_next_nvpair(holds
, NULL
); pair
!= NULL
;
615 pair
= nvlist_next_nvpair(holds
, pair
)) {
616 (void) zfs_unmount_snap(nvpair_name(pair
));
621 ddura
.ddura_holds
= holds
;
622 ddura
.ddura_errlist
= errlist
;
623 VERIFY0(nvlist_alloc(&ddura
.ddura_todelete
, NV_UNIQUE_NAME
,
625 VERIFY0(nvlist_alloc(&ddura
.ddura_chkholds
, NV_UNIQUE_NAME
,
628 error
= dsl_sync_task(pool
, dsl_dataset_user_release_check
,
629 dsl_dataset_user_release_sync
, &ddura
, 0,
630 ZFS_SPACE_CHECK_EXTRA_RESERVED
);
631 fnvlist_free(ddura
.ddura_todelete
);
632 fnvlist_free(ddura
.ddura_chkholds
);
638 * holds is nvl of snapname -> { holdname, ... }
639 * errlist will be filled in with snapname -> error
642 dsl_dataset_user_release(nvlist_t
*holds
, nvlist_t
*errlist
)
644 return (dsl_dataset_user_release_impl(holds
, errlist
, NULL
));
648 * holds is nvl of snapdsobj -> { holdname, ... }
651 dsl_dataset_user_release_tmp(struct dsl_pool
*dp
, nvlist_t
*holds
)
654 (void) dsl_dataset_user_release_impl(holds
, NULL
, dp
);
658 dsl_dataset_get_holds(const char *dsname
, nvlist_t
*nvl
)
664 err
= dsl_pool_hold(dsname
, FTAG
, &dp
);
667 err
= dsl_dataset_hold(dp
, dsname
, FTAG
, &ds
);
669 dsl_pool_rele(dp
, FTAG
);
673 if (dsl_dataset_phys(ds
)->ds_userrefs_obj
!= 0) {
677 za
= zap_attribute_alloc();
678 for (zap_cursor_init(&zc
, ds
->ds_dir
->dd_pool
->dp_meta_objset
,
679 dsl_dataset_phys(ds
)->ds_userrefs_obj
);
680 zap_cursor_retrieve(&zc
, za
) == 0;
681 zap_cursor_advance(&zc
)) {
682 fnvlist_add_uint64(nvl
, za
->za_name
,
683 za
->za_first_integer
);
685 zap_cursor_fini(&zc
);
686 zap_attribute_free(za
);
688 dsl_dataset_rele(ds
, FTAG
);
689 dsl_pool_rele(dp
, FTAG
);