4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #include <sys/dmu_objset.h>
27 #include <sys/dsl_dataset.h>
28 #include <sys/dsl_dir.h>
29 #include <sys/dsl_prop.h>
30 #include <sys/dsl_synctask.h>
31 #include <sys/dmu_traverse.h>
32 #include <sys/dmu_tx.h>
36 #include <sys/unique.h>
37 #include <sys/zfs_context.h>
38 #include <sys/zfs_ioctl.h>
40 #include <sys/zfs_znode.h>
41 #include <sys/sunddi.h>
43 static char *dsl_reaper
= "the grim reaper";
45 static dsl_checkfunc_t dsl_dataset_destroy_begin_check
;
46 static dsl_syncfunc_t dsl_dataset_destroy_begin_sync
;
47 static dsl_checkfunc_t dsl_dataset_rollback_check
;
48 static dsl_syncfunc_t dsl_dataset_rollback_sync
;
49 static dsl_syncfunc_t dsl_dataset_set_reservation_sync
;
51 #define DS_REF_MAX (1ULL << 62)
53 #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE
55 #define DSL_DATASET_IS_DESTROYED(ds) ((ds)->ds_owner == dsl_reaper)
59 * Figure out how much of this delta should be propogated to the dsl_dir
60 * layer. If there's a refreservation, that space has already been
61 * partially accounted for in our ancestors.
64 parent_delta(dsl_dataset_t
*ds
, int64_t delta
)
66 uint64_t old_bytes
, new_bytes
;
68 if (ds
->ds_reserved
== 0)
71 old_bytes
= MAX(ds
->ds_phys
->ds_unique_bytes
, ds
->ds_reserved
);
72 new_bytes
= MAX(ds
->ds_phys
->ds_unique_bytes
+ delta
, ds
->ds_reserved
);
74 ASSERT3U(ABS((int64_t)(new_bytes
- old_bytes
)), <=, ABS(delta
));
75 return (new_bytes
- old_bytes
);
79 dsl_dataset_block_born(dsl_dataset_t
*ds
, blkptr_t
*bp
, dmu_tx_t
*tx
)
81 int used
= bp_get_dasize(tx
->tx_pool
->dp_spa
, bp
);
82 int compressed
= BP_GET_PSIZE(bp
);
83 int uncompressed
= BP_GET_UCSIZE(bp
);
86 dprintf_bp(bp
, "born, ds=%p\n", ds
);
88 ASSERT(dmu_tx_is_syncing(tx
));
89 /* It could have been compressed away to nothing */
92 ASSERT(BP_GET_TYPE(bp
) != DMU_OT_NONE
);
93 ASSERT3U(BP_GET_TYPE(bp
), <, DMU_OT_NUMTYPES
);
96 * Account for the meta-objset space in its placeholder
99 ASSERT3U(compressed
, ==, uncompressed
); /* it's all metadata */
100 dsl_dir_diduse_space(tx
->tx_pool
->dp_mos_dir
, DD_USED_HEAD
,
101 used
, compressed
, uncompressed
, tx
);
102 dsl_dir_dirty(tx
->tx_pool
->dp_mos_dir
, tx
);
105 dmu_buf_will_dirty(ds
->ds_dbuf
, tx
);
106 mutex_enter(&ds
->ds_dir
->dd_lock
);
107 mutex_enter(&ds
->ds_lock
);
108 delta
= parent_delta(ds
, used
);
109 ds
->ds_phys
->ds_used_bytes
+= used
;
110 ds
->ds_phys
->ds_compressed_bytes
+= compressed
;
111 ds
->ds_phys
->ds_uncompressed_bytes
+= uncompressed
;
112 ds
->ds_phys
->ds_unique_bytes
+= used
;
113 mutex_exit(&ds
->ds_lock
);
114 dsl_dir_diduse_space(ds
->ds_dir
, DD_USED_HEAD
, delta
,
115 compressed
, uncompressed
, tx
);
116 dsl_dir_transfer_space(ds
->ds_dir
, used
- delta
,
117 DD_USED_REFRSRV
, DD_USED_HEAD
, tx
);
118 mutex_exit(&ds
->ds_dir
->dd_lock
);
122 dsl_dataset_block_kill(dsl_dataset_t
*ds
, blkptr_t
*bp
, zio_t
*pio
,
125 int used
= bp_get_dasize(tx
->tx_pool
->dp_spa
, bp
);
126 int compressed
= BP_GET_PSIZE(bp
);
127 int uncompressed
= BP_GET_UCSIZE(bp
);
130 ASSERT(dmu_tx_is_syncing(tx
));
131 /* No block pointer => nothing to free */
139 * Account for the meta-objset space in its placeholder
142 err
= dsl_free(pio
, tx
->tx_pool
,
143 tx
->tx_txg
, bp
, NULL
, NULL
, ARC_NOWAIT
);
146 dsl_dir_diduse_space(tx
->tx_pool
->dp_mos_dir
, DD_USED_HEAD
,
147 -used
, -compressed
, -uncompressed
, tx
);
148 dsl_dir_dirty(tx
->tx_pool
->dp_mos_dir
, tx
);
151 ASSERT3P(tx
->tx_pool
, ==, ds
->ds_dir
->dd_pool
);
153 ASSERT(!dsl_dataset_is_snapshot(ds
));
154 dmu_buf_will_dirty(ds
->ds_dbuf
, tx
);
156 if (bp
->blk_birth
> ds
->ds_phys
->ds_prev_snap_txg
) {
160 dprintf_bp(bp
, "freeing: %s", "");
161 err
= dsl_free(pio
, tx
->tx_pool
,
162 tx
->tx_txg
, bp
, NULL
, NULL
, ARC_NOWAIT
);
165 mutex_enter(&ds
->ds_dir
->dd_lock
);
166 mutex_enter(&ds
->ds_lock
);
167 ASSERT(ds
->ds_phys
->ds_unique_bytes
>= used
||
168 !DS_UNIQUE_IS_ACCURATE(ds
));
169 delta
= parent_delta(ds
, -used
);
170 ds
->ds_phys
->ds_unique_bytes
-= used
;
171 mutex_exit(&ds
->ds_lock
);
172 dsl_dir_diduse_space(ds
->ds_dir
, DD_USED_HEAD
,
173 delta
, -compressed
, -uncompressed
, tx
);
174 dsl_dir_transfer_space(ds
->ds_dir
, -used
- delta
,
175 DD_USED_REFRSRV
, DD_USED_HEAD
, tx
);
176 mutex_exit(&ds
->ds_dir
->dd_lock
);
178 dprintf_bp(bp
, "putting on dead list: %s", "");
179 VERIFY(0 == bplist_enqueue(&ds
->ds_deadlist
, bp
, tx
));
180 ASSERT3U(ds
->ds_prev
->ds_object
, ==,
181 ds
->ds_phys
->ds_prev_snap_obj
);
182 ASSERT(ds
->ds_prev
->ds_phys
->ds_num_children
> 0);
183 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */
184 if (ds
->ds_prev
->ds_phys
->ds_next_snap_obj
==
185 ds
->ds_object
&& bp
->blk_birth
>
186 ds
->ds_prev
->ds_phys
->ds_prev_snap_txg
) {
187 dmu_buf_will_dirty(ds
->ds_prev
->ds_dbuf
, tx
);
188 mutex_enter(&ds
->ds_prev
->ds_lock
);
189 ds
->ds_prev
->ds_phys
->ds_unique_bytes
+= used
;
190 mutex_exit(&ds
->ds_prev
->ds_lock
);
192 if (bp
->blk_birth
> ds
->ds_origin_txg
) {
193 dsl_dir_transfer_space(ds
->ds_dir
, used
,
194 DD_USED_HEAD
, DD_USED_SNAP
, tx
);
197 mutex_enter(&ds
->ds_lock
);
198 ASSERT3U(ds
->ds_phys
->ds_used_bytes
, >=, used
);
199 ds
->ds_phys
->ds_used_bytes
-= used
;
200 ASSERT3U(ds
->ds_phys
->ds_compressed_bytes
, >=, compressed
);
201 ds
->ds_phys
->ds_compressed_bytes
-= compressed
;
202 ASSERT3U(ds
->ds_phys
->ds_uncompressed_bytes
, >=, uncompressed
);
203 ds
->ds_phys
->ds_uncompressed_bytes
-= uncompressed
;
204 mutex_exit(&ds
->ds_lock
);
210 dsl_dataset_prev_snap_txg(dsl_dataset_t
*ds
)
212 uint64_t trysnap
= 0;
217 * The snapshot creation could fail, but that would cause an
218 * incorrect FALSE return, which would only result in an
219 * overestimation of the amount of space that an operation would
220 * consume, which is OK.
222 * There's also a small window where we could miss a pending
223 * snapshot, because we could set the sync task in the quiescing
224 * phase. So this should only be used as a guess.
226 if (ds
->ds_trysnap_txg
>
227 spa_last_synced_txg(ds
->ds_dir
->dd_pool
->dp_spa
))
228 trysnap
= ds
->ds_trysnap_txg
;
229 return (MAX(ds
->ds_phys
->ds_prev_snap_txg
, trysnap
));
233 dsl_dataset_block_freeable(dsl_dataset_t
*ds
, uint64_t blk_birth
)
235 return (blk_birth
> dsl_dataset_prev_snap_txg(ds
));
240 dsl_dataset_evict(dmu_buf_t
*db
, void *dsv
)
242 dsl_dataset_t
*ds
= dsv
;
244 ASSERT(ds
->ds_owner
== NULL
|| DSL_DATASET_IS_DESTROYED(ds
));
246 dprintf_ds(ds
, "evicting %s\n", "");
248 unique_remove(ds
->ds_fsid_guid
);
250 if (ds
->ds_user_ptr
!= NULL
)
251 ds
->ds_user_evict_func(ds
, ds
->ds_user_ptr
);
254 dsl_dataset_drop_ref(ds
->ds_prev
, ds
);
258 bplist_close(&ds
->ds_deadlist
);
260 dsl_dir_close(ds
->ds_dir
, ds
);
262 ASSERT(!list_link_active(&ds
->ds_synced_link
));
264 mutex_destroy(&ds
->ds_lock
);
265 mutex_destroy(&ds
->ds_opening_lock
);
266 mutex_destroy(&ds
->ds_deadlist
.bpl_lock
);
267 rw_destroy(&ds
->ds_rwlock
);
268 cv_destroy(&ds
->ds_exclusive_cv
);
270 kmem_free(ds
, sizeof (dsl_dataset_t
));
274 dsl_dataset_get_snapname(dsl_dataset_t
*ds
)
276 dsl_dataset_phys_t
*headphys
;
279 dsl_pool_t
*dp
= ds
->ds_dir
->dd_pool
;
280 objset_t
*mos
= dp
->dp_meta_objset
;
282 if (ds
->ds_snapname
[0])
284 if (ds
->ds_phys
->ds_next_snap_obj
== 0)
287 err
= dmu_bonus_hold(mos
, ds
->ds_dir
->dd_phys
->dd_head_dataset_obj
,
291 headphys
= headdbuf
->db_data
;
292 err
= zap_value_search(dp
->dp_meta_objset
,
293 headphys
->ds_snapnames_zapobj
, ds
->ds_object
, 0, ds
->ds_snapname
);
294 dmu_buf_rele(headdbuf
, FTAG
);
299 dsl_dataset_snap_lookup(dsl_dataset_t
*ds
, const char *name
, uint64_t *value
)
301 objset_t
*mos
= ds
->ds_dir
->dd_pool
->dp_meta_objset
;
302 uint64_t snapobj
= ds
->ds_phys
->ds_snapnames_zapobj
;
306 if (ds
->ds_phys
->ds_flags
& DS_FLAG_CI_DATASET
)
311 err
= zap_lookup_norm(mos
, snapobj
, name
, 8, 1,
312 value
, mt
, NULL
, 0, NULL
);
313 if (err
== ENOTSUP
&& mt
== MT_FIRST
)
314 err
= zap_lookup(mos
, snapobj
, name
, 8, 1, value
);
319 dsl_dataset_snap_remove(dsl_dataset_t
*ds
, char *name
, dmu_tx_t
*tx
)
321 objset_t
*mos
= ds
->ds_dir
->dd_pool
->dp_meta_objset
;
322 uint64_t snapobj
= ds
->ds_phys
->ds_snapnames_zapobj
;
326 if (ds
->ds_phys
->ds_flags
& DS_FLAG_CI_DATASET
)
331 err
= zap_remove_norm(mos
, snapobj
, name
, mt
, tx
);
332 if (err
== ENOTSUP
&& mt
== MT_FIRST
)
333 err
= zap_remove(mos
, snapobj
, name
, tx
);
338 dsl_dataset_get_ref(dsl_pool_t
*dp
, uint64_t dsobj
, void *tag
,
341 objset_t
*mos
= dp
->dp_meta_objset
;
346 ASSERT(RW_LOCK_HELD(&dp
->dp_config_rwlock
) ||
347 dsl_pool_sync_context(dp
));
349 err
= dmu_bonus_hold(mos
, dsobj
, tag
, &dbuf
);
352 ds
= dmu_buf_get_user(dbuf
);
354 dsl_dataset_t
*winner
;
356 ds
= kmem_zalloc(sizeof (dsl_dataset_t
), KM_SLEEP
);
358 ds
->ds_object
= dsobj
;
359 ds
->ds_phys
= dbuf
->db_data
;
361 mutex_init(&ds
->ds_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
362 mutex_init(&ds
->ds_opening_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
363 mutex_init(&ds
->ds_deadlist
.bpl_lock
, NULL
, MUTEX_DEFAULT
,
365 rw_init(&ds
->ds_rwlock
, 0, 0, 0);
366 cv_init(&ds
->ds_exclusive_cv
, NULL
, CV_DEFAULT
, NULL
);
368 err
= bplist_open(&ds
->ds_deadlist
,
369 mos
, ds
->ds_phys
->ds_deadlist_obj
);
371 err
= dsl_dir_open_obj(dp
,
372 ds
->ds_phys
->ds_dir_obj
, NULL
, ds
, &ds
->ds_dir
);
376 * we don't really need to close the blist if we
379 mutex_destroy(&ds
->ds_lock
);
380 mutex_destroy(&ds
->ds_opening_lock
);
381 mutex_destroy(&ds
->ds_deadlist
.bpl_lock
);
382 rw_destroy(&ds
->ds_rwlock
);
383 cv_destroy(&ds
->ds_exclusive_cv
);
384 kmem_free(ds
, sizeof (dsl_dataset_t
));
385 dmu_buf_rele(dbuf
, tag
);
389 if (!dsl_dataset_is_snapshot(ds
)) {
390 ds
->ds_snapname
[0] = '\0';
391 if (ds
->ds_phys
->ds_prev_snap_obj
) {
392 err
= dsl_dataset_get_ref(dp
,
393 ds
->ds_phys
->ds_prev_snap_obj
,
397 if (err
== 0 && dsl_dir_is_clone(ds
->ds_dir
)) {
398 dsl_dataset_t
*origin
;
400 err
= dsl_dataset_hold_obj(dp
,
401 ds
->ds_dir
->dd_phys
->dd_origin_obj
,
405 origin
->ds_phys
->ds_creation_txg
;
406 dsl_dataset_rele(origin
, FTAG
);
409 } else if (zfs_flags
& ZFS_DEBUG_SNAPNAMES
) {
410 err
= dsl_dataset_get_snapname(ds
);
413 if (err
== 0 && !dsl_dataset_is_snapshot(ds
)) {
415 * In sync context, we're called with either no lock
416 * or with the write lock. If we're not syncing,
417 * we're always called with the read lock held.
419 boolean_t need_lock
=
420 !RW_WRITE_HELD(&dp
->dp_config_rwlock
) &&
421 dsl_pool_sync_context(dp
);
424 rw_enter(&dp
->dp_config_rwlock
, RW_READER
);
426 err
= dsl_prop_get_ds(ds
,
427 "refreservation", sizeof (uint64_t), 1,
428 &ds
->ds_reserved
, NULL
);
430 err
= dsl_prop_get_ds(ds
,
431 "refquota", sizeof (uint64_t), 1,
432 &ds
->ds_quota
, NULL
);
436 rw_exit(&dp
->dp_config_rwlock
);
438 ds
->ds_reserved
= ds
->ds_quota
= 0;
442 winner
= dmu_buf_set_user_ie(dbuf
, ds
, &ds
->ds_phys
,
446 bplist_close(&ds
->ds_deadlist
);
448 dsl_dataset_drop_ref(ds
->ds_prev
, ds
);
449 dsl_dir_close(ds
->ds_dir
, ds
);
450 mutex_destroy(&ds
->ds_lock
);
451 mutex_destroy(&ds
->ds_opening_lock
);
452 mutex_destroy(&ds
->ds_deadlist
.bpl_lock
);
453 rw_destroy(&ds
->ds_rwlock
);
454 cv_destroy(&ds
->ds_exclusive_cv
);
455 kmem_free(ds
, sizeof (dsl_dataset_t
));
457 dmu_buf_rele(dbuf
, tag
);
463 unique_insert(ds
->ds_phys
->ds_fsid_guid
);
466 ASSERT3P(ds
->ds_dbuf
, ==, dbuf
);
467 ASSERT3P(ds
->ds_phys
, ==, dbuf
->db_data
);
468 ASSERT(ds
->ds_phys
->ds_prev_snap_obj
!= 0 ||
469 spa_version(dp
->dp_spa
) < SPA_VERSION_ORIGIN
||
470 dp
->dp_origin_snap
== NULL
|| ds
== dp
->dp_origin_snap
);
471 mutex_enter(&ds
->ds_lock
);
472 if (!dsl_pool_sync_context(dp
) && DSL_DATASET_IS_DESTROYED(ds
)) {
473 mutex_exit(&ds
->ds_lock
);
474 dmu_buf_rele(ds
->ds_dbuf
, tag
);
477 mutex_exit(&ds
->ds_lock
);
483 dsl_dataset_hold_ref(dsl_dataset_t
*ds
, void *tag
)
485 dsl_pool_t
*dp
= ds
->ds_dir
->dd_pool
;
488 * In syncing context we don't want the rwlock lock: there
489 * may be an existing writer waiting for sync phase to
490 * finish. We don't need to worry about such writers, since
491 * sync phase is single-threaded, so the writer can't be
492 * doing anything while we are active.
494 if (dsl_pool_sync_context(dp
)) {
495 ASSERT(!DSL_DATASET_IS_DESTROYED(ds
));
500 * Normal users will hold the ds_rwlock as a READER until they
501 * are finished (i.e., call dsl_dataset_rele()). "Owners" will
502 * drop their READER lock after they set the ds_owner field.
504 * If the dataset is being destroyed, the destroy thread will
505 * obtain a WRITER lock for exclusive access after it's done its
506 * open-context work and then change the ds_owner to
507 * dsl_reaper once destruction is assured. So threads
508 * may block here temporarily, until the "destructability" of
509 * the dataset is determined.
511 ASSERT(!RW_WRITE_HELD(&dp
->dp_config_rwlock
));
512 mutex_enter(&ds
->ds_lock
);
513 while (!rw_tryenter(&ds
->ds_rwlock
, RW_READER
)) {
514 rw_exit(&dp
->dp_config_rwlock
);
515 cv_wait(&ds
->ds_exclusive_cv
, &ds
->ds_lock
);
516 if (DSL_DATASET_IS_DESTROYED(ds
)) {
517 mutex_exit(&ds
->ds_lock
);
518 dsl_dataset_drop_ref(ds
, tag
);
519 rw_enter(&dp
->dp_config_rwlock
, RW_READER
);
522 rw_enter(&dp
->dp_config_rwlock
, RW_READER
);
524 mutex_exit(&ds
->ds_lock
);
529 dsl_dataset_hold_obj(dsl_pool_t
*dp
, uint64_t dsobj
, void *tag
,
532 int err
= dsl_dataset_get_ref(dp
, dsobj
, tag
, dsp
);
536 return (dsl_dataset_hold_ref(*dsp
, tag
));
540 dsl_dataset_own_obj(dsl_pool_t
*dp
, uint64_t dsobj
, int flags
, void *owner
,
543 int err
= dsl_dataset_hold_obj(dp
, dsobj
, owner
, dsp
);
545 ASSERT(DS_MODE_TYPE(flags
) != DS_MODE_USER
);
549 if (!dsl_dataset_tryown(*dsp
, DS_MODE_IS_INCONSISTENT(flags
), owner
)) {
550 dsl_dataset_rele(*dsp
, owner
);
557 dsl_dataset_hold(const char *name
, void *tag
, dsl_dataset_t
**dsp
)
561 const char *snapname
;
565 err
= dsl_dir_open_spa(NULL
, name
, FTAG
, &dd
, &snapname
);
570 obj
= dd
->dd_phys
->dd_head_dataset_obj
;
571 rw_enter(&dp
->dp_config_rwlock
, RW_READER
);
573 err
= dsl_dataset_get_ref(dp
, obj
, tag
, dsp
);
579 err
= dsl_dataset_hold_ref(*dsp
, tag
);
581 /* we may be looking for a snapshot */
582 if (err
== 0 && snapname
!= NULL
) {
583 dsl_dataset_t
*ds
= NULL
;
585 if (*snapname
++ != '@') {
586 dsl_dataset_rele(*dsp
, tag
);
591 dprintf("looking for snapshot '%s'\n", snapname
);
592 err
= dsl_dataset_snap_lookup(*dsp
, snapname
, &obj
);
594 err
= dsl_dataset_get_ref(dp
, obj
, tag
, &ds
);
595 dsl_dataset_rele(*dsp
, tag
);
597 ASSERT3U((err
== 0), ==, (ds
!= NULL
));
600 mutex_enter(&ds
->ds_lock
);
601 if (ds
->ds_snapname
[0] == 0)
602 (void) strlcpy(ds
->ds_snapname
, snapname
,
603 sizeof (ds
->ds_snapname
));
604 mutex_exit(&ds
->ds_lock
);
605 err
= dsl_dataset_hold_ref(ds
, tag
);
606 *dsp
= err
? NULL
: ds
;
610 rw_exit(&dp
->dp_config_rwlock
);
611 dsl_dir_close(dd
, FTAG
);
616 dsl_dataset_own(const char *name
, int flags
, void *owner
, dsl_dataset_t
**dsp
)
618 int err
= dsl_dataset_hold(name
, owner
, dsp
);
621 if ((*dsp
)->ds_phys
->ds_num_children
> 0 &&
622 !DS_MODE_IS_READONLY(flags
)) {
623 dsl_dataset_rele(*dsp
, owner
);
626 if (!dsl_dataset_tryown(*dsp
, DS_MODE_IS_INCONSISTENT(flags
), owner
)) {
627 dsl_dataset_rele(*dsp
, owner
);
634 dsl_dataset_name(dsl_dataset_t
*ds
, char *name
)
637 (void) strcpy(name
, "mos");
639 dsl_dir_name(ds
->ds_dir
, name
);
640 VERIFY(0 == dsl_dataset_get_snapname(ds
));
641 if (ds
->ds_snapname
[0]) {
642 (void) strcat(name
, "@");
644 * We use a "recursive" mutex so that we
645 * can call dprintf_ds() with ds_lock held.
647 if (!MUTEX_HELD(&ds
->ds_lock
)) {
648 mutex_enter(&ds
->ds_lock
);
649 (void) strcat(name
, ds
->ds_snapname
);
650 mutex_exit(&ds
->ds_lock
);
652 (void) strcat(name
, ds
->ds_snapname
);
659 dsl_dataset_namelen(dsl_dataset_t
*ds
)
664 result
= 3; /* "mos" */
666 result
= dsl_dir_namelen(ds
->ds_dir
);
667 VERIFY(0 == dsl_dataset_get_snapname(ds
));
668 if (ds
->ds_snapname
[0]) {
669 ++result
; /* adding one for the @-sign */
670 if (!MUTEX_HELD(&ds
->ds_lock
)) {
671 mutex_enter(&ds
->ds_lock
);
672 result
+= strlen(ds
->ds_snapname
);
673 mutex_exit(&ds
->ds_lock
);
675 result
+= strlen(ds
->ds_snapname
);
684 dsl_dataset_drop_ref(dsl_dataset_t
*ds
, void *tag
)
686 dmu_buf_rele(ds
->ds_dbuf
, tag
);
690 dsl_dataset_rele(dsl_dataset_t
*ds
, void *tag
)
692 if (!dsl_pool_sync_context(ds
->ds_dir
->dd_pool
)) {
693 rw_exit(&ds
->ds_rwlock
);
695 dsl_dataset_drop_ref(ds
, tag
);
699 dsl_dataset_disown(dsl_dataset_t
*ds
, void *owner
)
701 ASSERT((ds
->ds_owner
== owner
&& ds
->ds_dbuf
) ||
702 (DSL_DATASET_IS_DESTROYED(ds
) && ds
->ds_dbuf
== NULL
));
704 mutex_enter(&ds
->ds_lock
);
706 if (RW_WRITE_HELD(&ds
->ds_rwlock
)) {
707 rw_exit(&ds
->ds_rwlock
);
708 cv_broadcast(&ds
->ds_exclusive_cv
);
710 mutex_exit(&ds
->ds_lock
);
712 dsl_dataset_drop_ref(ds
, owner
);
714 dsl_dataset_evict(ds
->ds_dbuf
, ds
);
718 dsl_dataset_tryown(dsl_dataset_t
*ds
, boolean_t inconsistentok
, void *owner
)
720 boolean_t gotit
= FALSE
;
722 mutex_enter(&ds
->ds_lock
);
723 if (ds
->ds_owner
== NULL
&&
724 (!DS_IS_INCONSISTENT(ds
) || inconsistentok
)) {
725 ds
->ds_owner
= owner
;
726 if (!dsl_pool_sync_context(ds
->ds_dir
->dd_pool
))
727 rw_exit(&ds
->ds_rwlock
);
730 mutex_exit(&ds
->ds_lock
);
735 dsl_dataset_make_exclusive(dsl_dataset_t
*ds
, void *owner
)
737 ASSERT3P(owner
, ==, ds
->ds_owner
);
738 if (!RW_WRITE_HELD(&ds
->ds_rwlock
))
739 rw_enter(&ds
->ds_rwlock
, RW_WRITER
);
743 dsl_dataset_create_sync_dd(dsl_dir_t
*dd
, dsl_dataset_t
*origin
,
744 uint64_t flags
, dmu_tx_t
*tx
)
746 dsl_pool_t
*dp
= dd
->dd_pool
;
748 dsl_dataset_phys_t
*dsphys
;
750 objset_t
*mos
= dp
->dp_meta_objset
;
753 origin
= dp
->dp_origin_snap
;
755 ASSERT(origin
== NULL
|| origin
->ds_dir
->dd_pool
== dp
);
756 ASSERT(origin
== NULL
|| origin
->ds_phys
->ds_num_children
> 0);
757 ASSERT(dmu_tx_is_syncing(tx
));
758 ASSERT(dd
->dd_phys
->dd_head_dataset_obj
== 0);
760 dsobj
= dmu_object_alloc(mos
, DMU_OT_DSL_DATASET
, 0,
761 DMU_OT_DSL_DATASET
, sizeof (dsl_dataset_phys_t
), tx
);
762 VERIFY(0 == dmu_bonus_hold(mos
, dsobj
, FTAG
, &dbuf
));
763 dmu_buf_will_dirty(dbuf
, tx
);
764 dsphys
= dbuf
->db_data
;
765 bzero(dsphys
, sizeof (dsl_dataset_phys_t
));
766 dsphys
->ds_dir_obj
= dd
->dd_object
;
767 dsphys
->ds_flags
= flags
;
768 dsphys
->ds_fsid_guid
= unique_create();
769 (void) random_get_pseudo_bytes((void*)&dsphys
->ds_guid
,
770 sizeof (dsphys
->ds_guid
));
771 dsphys
->ds_snapnames_zapobj
=
772 zap_create_norm(mos
, U8_TEXTPREP_TOUPPER
, DMU_OT_DSL_DS_SNAP_MAP
,
774 dsphys
->ds_creation_time
= gethrestime_sec();
775 dsphys
->ds_creation_txg
= tx
->tx_txg
== TXG_INITIAL
? 1 : tx
->tx_txg
;
776 dsphys
->ds_deadlist_obj
=
777 bplist_create(mos
, DSL_DEADLIST_BLOCKSIZE
, tx
);
780 dsphys
->ds_prev_snap_obj
= origin
->ds_object
;
781 dsphys
->ds_prev_snap_txg
=
782 origin
->ds_phys
->ds_creation_txg
;
783 dsphys
->ds_used_bytes
=
784 origin
->ds_phys
->ds_used_bytes
;
785 dsphys
->ds_compressed_bytes
=
786 origin
->ds_phys
->ds_compressed_bytes
;
787 dsphys
->ds_uncompressed_bytes
=
788 origin
->ds_phys
->ds_uncompressed_bytes
;
789 dsphys
->ds_bp
= origin
->ds_phys
->ds_bp
;
790 dsphys
->ds_flags
|= origin
->ds_phys
->ds_flags
;
792 dmu_buf_will_dirty(origin
->ds_dbuf
, tx
);
793 origin
->ds_phys
->ds_num_children
++;
795 if (spa_version(dp
->dp_spa
) >= SPA_VERSION_NEXT_CLONES
) {
796 if (origin
->ds_phys
->ds_next_clones_obj
== 0) {
797 origin
->ds_phys
->ds_next_clones_obj
=
799 DMU_OT_NEXT_CLONES
, DMU_OT_NONE
, 0, tx
);
801 VERIFY(0 == zap_add_int(mos
,
802 origin
->ds_phys
->ds_next_clones_obj
,
806 dmu_buf_will_dirty(dd
->dd_dbuf
, tx
);
807 dd
->dd_phys
->dd_origin_obj
= origin
->ds_object
;
810 if (spa_version(dp
->dp_spa
) >= SPA_VERSION_UNIQUE_ACCURATE
)
811 dsphys
->ds_flags
|= DS_FLAG_UNIQUE_ACCURATE
;
813 dmu_buf_rele(dbuf
, FTAG
);
815 dmu_buf_will_dirty(dd
->dd_dbuf
, tx
);
816 dd
->dd_phys
->dd_head_dataset_obj
= dsobj
;
822 dsl_dataset_create_sync(dsl_dir_t
*pdd
, const char *lastname
,
823 dsl_dataset_t
*origin
, uint64_t flags
, cred_t
*cr
, dmu_tx_t
*tx
)
825 dsl_pool_t
*dp
= pdd
->dd_pool
;
826 uint64_t dsobj
, ddobj
;
829 ASSERT(lastname
[0] != '@');
831 ddobj
= dsl_dir_create_sync(dp
, pdd
, lastname
, tx
);
832 VERIFY(0 == dsl_dir_open_obj(dp
, ddobj
, lastname
, FTAG
, &dd
));
834 dsobj
= dsl_dataset_create_sync_dd(dd
, origin
, flags
, tx
);
836 dsl_deleg_set_create_perms(dd
, tx
, cr
);
838 dsl_dir_close(dd
, FTAG
);
844 dsl_sync_task_group_t
*dstg
;
850 dsl_snapshot_destroy_one(char *name
, void *arg
)
852 struct destroyarg
*da
= arg
;
857 (void) strcat(name
, "@");
858 (void) strcat(name
, da
->snapname
);
859 err
= dsl_dataset_own(name
, DS_MODE_READONLY
| DS_MODE_INCONSISTENT
,
861 cp
= strchr(name
, '@');
864 dsl_dataset_make_exclusive(ds
, da
->dstg
);
865 if (ds
->ds_user_ptr
) {
866 ds
->ds_user_evict_func(ds
, ds
->ds_user_ptr
);
867 ds
->ds_user_ptr
= NULL
;
869 dsl_sync_task_create(da
->dstg
, dsl_dataset_destroy_check
,
870 dsl_dataset_destroy_sync
, ds
, da
->dstg
, 0);
871 } else if (err
== ENOENT
) {
874 (void) strcpy(da
->failed
, name
);
880 * Destroy 'snapname' in all descendants of 'fsname'.
882 #pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy
884 dsl_snapshots_destroy(char *fsname
, char *snapname
)
887 struct destroyarg da
;
888 dsl_sync_task_t
*dst
;
891 err
= spa_open(fsname
, &spa
, FTAG
);
894 da
.dstg
= dsl_sync_task_group_create(spa_get_dsl(spa
));
895 da
.snapname
= snapname
;
898 err
= dmu_objset_find(fsname
,
899 dsl_snapshot_destroy_one
, &da
, DS_FIND_CHILDREN
);
902 err
= dsl_sync_task_group_wait(da
.dstg
);
904 for (dst
= list_head(&da
.dstg
->dstg_tasks
); dst
;
905 dst
= list_next(&da
.dstg
->dstg_tasks
, dst
)) {
906 dsl_dataset_t
*ds
= dst
->dst_arg1
;
908 * Return the file system name that triggered the error
911 dsl_dataset_name(ds
, fsname
);
912 *strchr(fsname
, '@') = '\0';
914 dsl_dataset_disown(ds
, da
.dstg
);
917 dsl_sync_task_group_destroy(da
.dstg
);
918 spa_close(spa
, FTAG
);
923 * ds must be opened as OWNER. On return (whether successful or not),
924 * ds will be closed and caller can no longer dereference it.
927 dsl_dataset_destroy(dsl_dataset_t
*ds
, void *tag
)
930 dsl_sync_task_group_t
*dstg
;
935 if (dsl_dataset_is_snapshot(ds
)) {
936 /* Destroying a snapshot is simpler */
937 dsl_dataset_make_exclusive(ds
, tag
);
939 if (ds
->ds_user_ptr
) {
940 ds
->ds_user_evict_func(ds
, ds
->ds_user_ptr
);
941 ds
->ds_user_ptr
= NULL
;
943 err
= dsl_sync_task_do(ds
->ds_dir
->dd_pool
,
944 dsl_dataset_destroy_check
, dsl_dataset_destroy_sync
,
952 * Check for errors and mark this ds as inconsistent, in
953 * case we crash while freeing the objects.
955 err
= dsl_sync_task_do(dd
->dd_pool
, dsl_dataset_destroy_begin_check
,
956 dsl_dataset_destroy_begin_sync
, ds
, NULL
, 0);
960 err
= dmu_objset_open_ds(ds
, DMU_OST_ANY
, &os
);
965 * remove the objects in open context, so that we won't
966 * have too much to do in syncing context.
968 for (obj
= 0; err
== 0; err
= dmu_object_next(os
, &obj
, FALSE
,
969 ds
->ds_phys
->ds_prev_snap_txg
)) {
971 * Ignore errors, if there is not enough disk space
972 * we will deal with it in dsl_dataset_destroy_sync().
974 (void) dmu_free_object(os
, obj
);
977 dmu_objset_close(os
);
981 rw_enter(&dd
->dd_pool
->dp_config_rwlock
, RW_READER
);
982 err
= dsl_dir_open_obj(dd
->dd_pool
, dd
->dd_object
, NULL
, FTAG
, &dd
);
983 rw_exit(&dd
->dd_pool
->dp_config_rwlock
);
988 if (ds
->ds_user_ptr
) {
990 * We need to sync out all in-flight IO before we try
991 * to evict (the dataset evict func is trying to clear
992 * the cached entries for this dataset in the ARC).
994 txg_wait_synced(dd
->dd_pool
, 0);
998 * Blow away the dsl_dir + head dataset.
1000 dsl_dataset_make_exclusive(ds
, tag
);
1001 if (ds
->ds_user_ptr
) {
1002 ds
->ds_user_evict_func(ds
, ds
->ds_user_ptr
);
1003 ds
->ds_user_ptr
= NULL
;
1005 dstg
= dsl_sync_task_group_create(ds
->ds_dir
->dd_pool
);
1006 dsl_sync_task_create(dstg
, dsl_dataset_destroy_check
,
1007 dsl_dataset_destroy_sync
, ds
, tag
, 0);
1008 dsl_sync_task_create(dstg
, dsl_dir_destroy_check
,
1009 dsl_dir_destroy_sync
, dd
, FTAG
, 0);
1010 err
= dsl_sync_task_group_wait(dstg
);
1011 dsl_sync_task_group_destroy(dstg
);
1012 /* if it is successful, dsl_dir_destroy_sync will close the dd */
1014 dsl_dir_close(dd
, FTAG
);
1016 dsl_dataset_disown(ds
, tag
);
1021 dsl_dataset_rollback(dsl_dataset_t
*ds
, dmu_objset_type_t ost
)
1025 ASSERT(ds
->ds_owner
);
1027 dsl_dataset_make_exclusive(ds
, ds
->ds_owner
);
1028 err
= dsl_sync_task_do(ds
->ds_dir
->dd_pool
,
1029 dsl_dataset_rollback_check
, dsl_dataset_rollback_sync
,
1031 /* drop exclusive access */
1032 mutex_enter(&ds
->ds_lock
);
1033 rw_exit(&ds
->ds_rwlock
);
1034 cv_broadcast(&ds
->ds_exclusive_cv
);
1035 mutex_exit(&ds
->ds_lock
);
1040 dsl_dataset_set_user_ptr(dsl_dataset_t
*ds
,
1041 void *p
, dsl_dataset_evict_func_t func
)
1045 mutex_enter(&ds
->ds_lock
);
1046 old
= ds
->ds_user_ptr
;
1048 ds
->ds_user_ptr
= p
;
1049 ds
->ds_user_evict_func
= func
;
1051 mutex_exit(&ds
->ds_lock
);
1056 dsl_dataset_get_user_ptr(dsl_dataset_t
*ds
)
1058 return (ds
->ds_user_ptr
);
1063 dsl_dataset_get_blkptr(dsl_dataset_t
*ds
)
1065 return (&ds
->ds_phys
->ds_bp
);
1069 dsl_dataset_set_blkptr(dsl_dataset_t
*ds
, blkptr_t
*bp
, dmu_tx_t
*tx
)
1071 ASSERT(dmu_tx_is_syncing(tx
));
1072 /* If it's the meta-objset, set dp_meta_rootbp */
1074 tx
->tx_pool
->dp_meta_rootbp
= *bp
;
1076 dmu_buf_will_dirty(ds
->ds_dbuf
, tx
);
1077 ds
->ds_phys
->ds_bp
= *bp
;
1082 dsl_dataset_get_spa(dsl_dataset_t
*ds
)
1084 return (ds
->ds_dir
->dd_pool
->dp_spa
);
1088 dsl_dataset_dirty(dsl_dataset_t
*ds
, dmu_tx_t
*tx
)
1092 if (ds
== NULL
) /* this is the meta-objset */
1095 ASSERT(ds
->ds_user_ptr
!= NULL
);
1097 if (ds
->ds_phys
->ds_next_snap_obj
!= 0)
1098 panic("dirtying snapshot!");
1100 dp
= ds
->ds_dir
->dd_pool
;
1102 if (txg_list_add(&dp
->dp_dirty_datasets
, ds
, tx
->tx_txg
) == 0) {
1103 /* up the hold count until we can be written out */
1104 dmu_buf_add_ref(ds
->ds_dbuf
, ds
);
1109 * The unique space in the head dataset can be calculated by subtracting
1110 * the space used in the most recent snapshot, that is still being used
1111 * in this file system, from the space currently in use. To figure out
1112 * the space in the most recent snapshot still in use, we need to take
1113 * the total space used in the snapshot and subtract out the space that
1114 * has been freed up since the snapshot was taken.
1117 dsl_dataset_recalc_head_uniq(dsl_dataset_t
*ds
)
1120 uint64_t dlused
, dlcomp
, dluncomp
;
1122 ASSERT(ds
->ds_object
== ds
->ds_dir
->dd_phys
->dd_head_dataset_obj
);
1124 if (ds
->ds_phys
->ds_prev_snap_obj
!= 0)
1125 mrs_used
= ds
->ds_prev
->ds_phys
->ds_used_bytes
;
1129 VERIFY(0 == bplist_space(&ds
->ds_deadlist
, &dlused
, &dlcomp
,
1132 ASSERT3U(dlused
, <=, mrs_used
);
1133 ds
->ds_phys
->ds_unique_bytes
=
1134 ds
->ds_phys
->ds_used_bytes
- (mrs_used
- dlused
);
1136 if (!DS_UNIQUE_IS_ACCURATE(ds
) &&
1137 spa_version(ds
->ds_dir
->dd_pool
->dp_spa
) >=
1138 SPA_VERSION_UNIQUE_ACCURATE
)
1139 ds
->ds_phys
->ds_flags
|= DS_FLAG_UNIQUE_ACCURATE
;
1143 dsl_dataset_unique(dsl_dataset_t
*ds
)
1145 if (!DS_UNIQUE_IS_ACCURATE(ds
) && !dsl_dataset_is_snapshot(ds
))
1146 dsl_dataset_recalc_head_uniq(ds
);
1148 return (ds
->ds_phys
->ds_unique_bytes
);
1159 kill_blkptr(spa_t
*spa
, blkptr_t
*bp
, const zbookmark_t
*zb
,
1160 const dnode_phys_t
*dnp
, void *arg
)
1162 struct killarg
*ka
= arg
;
1167 ASSERT3U(bp
->blk_birth
, >, ka
->ds
->ds_phys
->ds_prev_snap_txg
);
1168 (void) dsl_dataset_block_kill(ka
->ds
, bp
, ka
->zio
, ka
->tx
);
1175 dsl_dataset_rollback_check(void *arg1
, void *arg2
, dmu_tx_t
*tx
)
1177 dsl_dataset_t
*ds
= arg1
;
1178 dmu_objset_type_t
*ost
= arg2
;
1181 * We can only roll back to emptyness if it is a ZPL objset.
1183 if (*ost
!= DMU_OST_ZFS
&& ds
->ds_phys
->ds_prev_snap_txg
== 0)
1187 * This must not be a snapshot.
1189 if (ds
->ds_phys
->ds_next_snap_obj
!= 0)
1193 * If we made changes this txg, traverse_dataset won't find
1196 if (ds
->ds_phys
->ds_bp
.blk_birth
>= tx
->tx_txg
)
1204 dsl_dataset_rollback_sync(void *arg1
, void *arg2
, cred_t
*cr
, dmu_tx_t
*tx
)
1206 dsl_dataset_t
*ds
= arg1
;
1207 dmu_objset_type_t
*ost
= arg2
;
1208 objset_t
*mos
= ds
->ds_dir
->dd_pool
->dp_meta_objset
;
1210 dmu_buf_will_dirty(ds
->ds_dbuf
, tx
);
1213 * Before the roll back destroy the zil.
1215 if (ds
->ds_user_ptr
!= NULL
) {
1216 zil_rollback_destroy(
1217 ((objset_impl_t
*)ds
->ds_user_ptr
)->os_zil
, tx
);
1220 * We need to make sure that the objset_impl_t is reopened after
1221 * we do the rollback, otherwise it will have the wrong
1222 * objset_phys_t. Normally this would happen when this
1223 * dataset-open is closed, thus causing the
1224 * dataset to be immediately evicted. But when doing "zfs recv
1225 * -F", we reopen the objset before that, so that there is no
1226 * window where the dataset is closed and inconsistent.
1228 ds
->ds_user_evict_func(ds
, ds
->ds_user_ptr
);
1229 ds
->ds_user_ptr
= NULL
;
1232 /* Transfer space that was freed since last snap back to the head. */
1236 VERIFY(0 == bplist_space_birthrange(&ds
->ds_deadlist
,
1237 ds
->ds_origin_txg
, UINT64_MAX
, &used
));
1238 dsl_dir_transfer_space(ds
->ds_dir
, used
,
1239 DD_USED_SNAP
, DD_USED_HEAD
, tx
);
1242 /* Zero out the deadlist. */
1243 bplist_close(&ds
->ds_deadlist
);
1244 bplist_destroy(mos
, ds
->ds_phys
->ds_deadlist_obj
, tx
);
1245 ds
->ds_phys
->ds_deadlist_obj
=
1246 bplist_create(mos
, DSL_DEADLIST_BLOCKSIZE
, tx
);
1247 VERIFY(0 == bplist_open(&ds
->ds_deadlist
, mos
,
1248 ds
->ds_phys
->ds_deadlist_obj
));
1251 /* Free blkptrs that we gave birth to */
1255 zio
= zio_root(tx
->tx_pool
->dp_spa
, NULL
, NULL
,
1256 ZIO_FLAG_MUSTSUCCEED
);
1260 (void) traverse_dataset(ds
, ds
->ds_phys
->ds_prev_snap_txg
,
1261 TRAVERSE_POST
, kill_blkptr
, &ka
);
1262 (void) zio_wait(zio
);
1265 ASSERT(!(ds
->ds_phys
->ds_flags
& DS_FLAG_UNIQUE_ACCURATE
) ||
1266 ds
->ds_phys
->ds_unique_bytes
== 0);
1268 if (ds
->ds_prev
&& ds
->ds_prev
!= ds
->ds_dir
->dd_pool
->dp_origin_snap
) {
1269 /* Change our contents to that of the prev snapshot */
1271 ASSERT3U(ds
->ds_prev
->ds_object
, ==,
1272 ds
->ds_phys
->ds_prev_snap_obj
);
1273 ASSERT3U(ds
->ds_phys
->ds_used_bytes
, <=,
1274 ds
->ds_prev
->ds_phys
->ds_used_bytes
);
1276 ds
->ds_phys
->ds_bp
= ds
->ds_prev
->ds_phys
->ds_bp
;
1277 ds
->ds_phys
->ds_used_bytes
=
1278 ds
->ds_prev
->ds_phys
->ds_used_bytes
;
1279 ds
->ds_phys
->ds_compressed_bytes
=
1280 ds
->ds_prev
->ds_phys
->ds_compressed_bytes
;
1281 ds
->ds_phys
->ds_uncompressed_bytes
=
1282 ds
->ds_prev
->ds_phys
->ds_uncompressed_bytes
;
1283 ds
->ds_phys
->ds_flags
= ds
->ds_prev
->ds_phys
->ds_flags
;
1285 if (ds
->ds_prev
->ds_phys
->ds_next_snap_obj
== ds
->ds_object
) {
1286 dmu_buf_will_dirty(ds
->ds_prev
->ds_dbuf
, tx
);
1287 ds
->ds_prev
->ds_phys
->ds_unique_bytes
= 0;
1292 ASSERT3U(ds
->ds_phys
->ds_used_bytes
, ==, 0);
1293 ASSERT3U(ds
->ds_phys
->ds_compressed_bytes
, ==, 0);
1294 ASSERT3U(ds
->ds_phys
->ds_uncompressed_bytes
, ==, 0);
1296 bzero(&ds
->ds_phys
->ds_bp
, sizeof (blkptr_t
));
1297 ds
->ds_phys
->ds_flags
= 0;
1298 ds
->ds_phys
->ds_unique_bytes
= 0;
1299 if (spa_version(ds
->ds_dir
->dd_pool
->dp_spa
) >=
1300 SPA_VERSION_UNIQUE_ACCURATE
)
1301 ds
->ds_phys
->ds_flags
|= DS_FLAG_UNIQUE_ACCURATE
;
1303 osi
= dmu_objset_create_impl(ds
->ds_dir
->dd_pool
->dp_spa
, ds
,
1304 &ds
->ds_phys
->ds_bp
, *ost
, tx
);
1306 zfs_create_fs(&osi
->os
, kcred
, NULL
, tx
);
1310 spa_history_internal_log(LOG_DS_ROLLBACK
, ds
->ds_dir
->dd_pool
->dp_spa
,
1311 tx
, cr
, "dataset = %llu", ds
->ds_object
);
1316 dsl_dataset_destroy_begin_check(void *arg1
, void *arg2
, dmu_tx_t
*tx
)
1318 dsl_dataset_t
*ds
= arg1
;
1319 objset_t
*mos
= ds
->ds_dir
->dd_pool
->dp_meta_objset
;
1324 * Can't delete a head dataset if there are snapshots of it.
1325 * (Except if the only snapshots are from the branch we cloned
1328 if (ds
->ds_prev
!= NULL
&&
1329 ds
->ds_prev
->ds_phys
->ds_next_snap_obj
== ds
->ds_object
)
1333 * This is really a dsl_dir thing, but check it here so that
1334 * we'll be less likely to leave this dataset inconsistent &
1337 err
= zap_count(mos
, ds
->ds_dir
->dd_phys
->dd_child_dir_zapobj
, &count
);
1348 dsl_dataset_destroy_begin_sync(void *arg1
, void *arg2
, cred_t
*cr
, dmu_tx_t
*tx
)
1350 dsl_dataset_t
*ds
= arg1
;
1351 dsl_pool_t
*dp
= ds
->ds_dir
->dd_pool
;
1353 /* Mark it as inconsistent on-disk, in case we crash */
1354 dmu_buf_will_dirty(ds
->ds_dbuf
, tx
);
1355 ds
->ds_phys
->ds_flags
|= DS_FLAG_INCONSISTENT
;
1357 spa_history_internal_log(LOG_DS_DESTROY_BEGIN
, dp
->dp_spa
, tx
,
1358 cr
, "dataset = %llu", ds
->ds_object
);
1363 dsl_dataset_destroy_check(void *arg1
, void *arg2
, dmu_tx_t
*tx
)
1365 dsl_dataset_t
*ds
= arg1
;
1367 /* we have an owner hold, so noone else can destroy us */
1368 ASSERT(!DSL_DATASET_IS_DESTROYED(ds
));
1370 /* Can't delete a branch point. */
1371 if (ds
->ds_phys
->ds_num_children
> 1)
1375 * Can't delete a head dataset if there are snapshots of it.
1376 * (Except if the only snapshots are from the branch we cloned
1379 if (ds
->ds_prev
!= NULL
&&
1380 ds
->ds_prev
->ds_phys
->ds_next_snap_obj
== ds
->ds_object
)
1384 * If we made changes this txg, traverse_dsl_dataset won't find
1387 if (ds
->ds_phys
->ds_bp
.blk_birth
>= tx
->tx_txg
)
1390 /* XXX we should do some i/o error checking... */
1402 dsl_dataset_refs_gone(dmu_buf_t
*db
, void *argv
)
1404 struct refsarg
*arg
= argv
;
1406 mutex_enter(&arg
->lock
);
1408 cv_signal(&arg
->cv
);
1409 mutex_exit(&arg
->lock
);
1413 dsl_dataset_drain_refs(dsl_dataset_t
*ds
, void *tag
)
1417 mutex_init(&arg
.lock
, NULL
, MUTEX_DEFAULT
, NULL
);
1418 cv_init(&arg
.cv
, NULL
, CV_DEFAULT
, NULL
);
1420 (void) dmu_buf_update_user(ds
->ds_dbuf
, ds
, &arg
, &ds
->ds_phys
,
1421 dsl_dataset_refs_gone
);
1422 dmu_buf_rele(ds
->ds_dbuf
, tag
);
1423 mutex_enter(&arg
.lock
);
1425 cv_wait(&arg
.cv
, &arg
.lock
);
1427 mutex_exit(&arg
.lock
);
1430 mutex_destroy(&arg
.lock
);
1431 cv_destroy(&arg
.cv
);
1435 dsl_dataset_destroy_sync(void *arg1
, void *tag
, cred_t
*cr
, dmu_tx_t
*tx
)
1437 dsl_dataset_t
*ds
= arg1
;
1440 int after_branch_point
= FALSE
;
1441 dsl_pool_t
*dp
= ds
->ds_dir
->dd_pool
;
1442 objset_t
*mos
= dp
->dp_meta_objset
;
1443 dsl_dataset_t
*ds_prev
= NULL
;
1446 ASSERT(ds
->ds_owner
);
1447 ASSERT3U(ds
->ds_phys
->ds_num_children
, <=, 1);
1448 ASSERT(ds
->ds_prev
== NULL
||
1449 ds
->ds_prev
->ds_phys
->ds_next_snap_obj
!= ds
->ds_object
);
1450 ASSERT3U(ds
->ds_phys
->ds_bp
.blk_birth
, <=, tx
->tx_txg
);
1452 /* signal any waiters that this dataset is going away */
1453 mutex_enter(&ds
->ds_lock
);
1454 ds
->ds_owner
= dsl_reaper
;
1455 cv_broadcast(&ds
->ds_exclusive_cv
);
1456 mutex_exit(&ds
->ds_lock
);
1458 /* Remove our reservation */
1459 if (ds
->ds_reserved
!= 0) {
1461 dsl_dataset_set_reservation_sync(ds
, &val
, cr
, tx
);
1462 ASSERT3U(ds
->ds_reserved
, ==, 0);
1465 ASSERT(RW_WRITE_HELD(&dp
->dp_config_rwlock
));
1467 dsl_pool_ds_destroyed(ds
, tx
);
1469 obj
= ds
->ds_object
;
1471 if (ds
->ds_phys
->ds_prev_snap_obj
!= 0) {
1473 ds_prev
= ds
->ds_prev
;
1475 VERIFY(0 == dsl_dataset_hold_obj(dp
,
1476 ds
->ds_phys
->ds_prev_snap_obj
, FTAG
, &ds_prev
));
1478 after_branch_point
=
1479 (ds_prev
->ds_phys
->ds_next_snap_obj
!= obj
);
1481 dmu_buf_will_dirty(ds_prev
->ds_dbuf
, tx
);
1482 if (after_branch_point
&&
1483 ds_prev
->ds_phys
->ds_next_clones_obj
!= 0) {
1484 VERIFY(0 == zap_remove_int(mos
,
1485 ds_prev
->ds_phys
->ds_next_clones_obj
, obj
, tx
));
1486 if (ds
->ds_phys
->ds_next_snap_obj
!= 0) {
1487 VERIFY(0 == zap_add_int(mos
,
1488 ds_prev
->ds_phys
->ds_next_clones_obj
,
1489 ds
->ds_phys
->ds_next_snap_obj
, tx
));
1492 if (after_branch_point
&&
1493 ds
->ds_phys
->ds_next_snap_obj
== 0) {
1494 /* This clone is toast. */
1495 ASSERT(ds_prev
->ds_phys
->ds_num_children
> 1);
1496 ds_prev
->ds_phys
->ds_num_children
--;
1497 } else if (!after_branch_point
) {
1498 ds_prev
->ds_phys
->ds_next_snap_obj
=
1499 ds
->ds_phys
->ds_next_snap_obj
;
1503 zio
= zio_root(dp
->dp_spa
, NULL
, NULL
, ZIO_FLAG_MUSTSUCCEED
);
1505 if (ds
->ds_phys
->ds_next_snap_obj
!= 0) {
1507 dsl_dataset_t
*ds_next
;
1509 uint64_t old_unique
;
1510 int64_t used
= 0, compressed
= 0, uncompressed
= 0;
1512 VERIFY(0 == dsl_dataset_hold_obj(dp
,
1513 ds
->ds_phys
->ds_next_snap_obj
, FTAG
, &ds_next
));
1514 ASSERT3U(ds_next
->ds_phys
->ds_prev_snap_obj
, ==, obj
);
1516 old_unique
= dsl_dataset_unique(ds_next
);
1518 dmu_buf_will_dirty(ds_next
->ds_dbuf
, tx
);
1519 ds_next
->ds_phys
->ds_prev_snap_obj
=
1520 ds
->ds_phys
->ds_prev_snap_obj
;
1521 ds_next
->ds_phys
->ds_prev_snap_txg
=
1522 ds
->ds_phys
->ds_prev_snap_txg
;
1523 ASSERT3U(ds
->ds_phys
->ds_prev_snap_txg
, ==,
1524 ds_prev
? ds_prev
->ds_phys
->ds_creation_txg
: 0);
1527 * Transfer to our deadlist (which will become next's
1528 * new deadlist) any entries from next's current
1529 * deadlist which were born before prev, and free the
1532 * XXX we're doing this long task with the config lock held
1534 while (bplist_iterate(&ds_next
->ds_deadlist
, &itor
, &bp
) == 0) {
1535 if (bp
.blk_birth
<= ds
->ds_phys
->ds_prev_snap_txg
) {
1536 VERIFY(0 == bplist_enqueue(&ds
->ds_deadlist
,
1538 if (ds_prev
&& !after_branch_point
&&
1540 ds_prev
->ds_phys
->ds_prev_snap_txg
) {
1541 ds_prev
->ds_phys
->ds_unique_bytes
+=
1542 bp_get_dasize(dp
->dp_spa
, &bp
);
1545 used
+= bp_get_dasize(dp
->dp_spa
, &bp
);
1546 compressed
+= BP_GET_PSIZE(&bp
);
1547 uncompressed
+= BP_GET_UCSIZE(&bp
);
1548 /* XXX check return value? */
1549 (void) dsl_free(zio
, dp
, tx
->tx_txg
,
1550 &bp
, NULL
, NULL
, ARC_NOWAIT
);
1554 ASSERT3U(used
, ==, ds
->ds_phys
->ds_unique_bytes
);
1556 /* change snapused */
1557 dsl_dir_diduse_space(ds
->ds_dir
, DD_USED_SNAP
,
1558 -used
, -compressed
, -uncompressed
, tx
);
1560 /* free next's deadlist */
1561 bplist_close(&ds_next
->ds_deadlist
);
1562 bplist_destroy(mos
, ds_next
->ds_phys
->ds_deadlist_obj
, tx
);
1564 /* set next's deadlist to our deadlist */
1565 bplist_close(&ds
->ds_deadlist
);
1566 ds_next
->ds_phys
->ds_deadlist_obj
=
1567 ds
->ds_phys
->ds_deadlist_obj
;
1568 VERIFY(0 == bplist_open(&ds_next
->ds_deadlist
, mos
,
1569 ds_next
->ds_phys
->ds_deadlist_obj
));
1570 ds
->ds_phys
->ds_deadlist_obj
= 0;
1572 if (ds_next
->ds_phys
->ds_next_snap_obj
!= 0) {
1574 * Update next's unique to include blocks which
1575 * were previously shared by only this snapshot
1576 * and it. Those blocks will be born after the
1577 * prev snap and before this snap, and will have
1578 * died after the next snap and before the one
1579 * after that (ie. be on the snap after next's
1582 * XXX we're doing this long task with the
1585 dsl_dataset_t
*ds_after_next
;
1588 VERIFY(0 == dsl_dataset_hold_obj(dp
,
1589 ds_next
->ds_phys
->ds_next_snap_obj
,
1590 FTAG
, &ds_after_next
));
1593 bplist_space_birthrange(&ds_after_next
->ds_deadlist
,
1594 ds
->ds_phys
->ds_prev_snap_txg
,
1595 ds
->ds_phys
->ds_creation_txg
, &space
));
1596 ds_next
->ds_phys
->ds_unique_bytes
+= space
;
1598 dsl_dataset_rele(ds_after_next
, FTAG
);
1599 ASSERT3P(ds_next
->ds_prev
, ==, NULL
);
1601 ASSERT3P(ds_next
->ds_prev
, ==, ds
);
1602 dsl_dataset_drop_ref(ds_next
->ds_prev
, ds_next
);
1603 ds_next
->ds_prev
= NULL
;
1605 VERIFY(0 == dsl_dataset_get_ref(dp
,
1606 ds
->ds_phys
->ds_prev_snap_obj
,
1607 ds_next
, &ds_next
->ds_prev
));
1610 dsl_dataset_recalc_head_uniq(ds_next
);
1613 * Reduce the amount of our unconsmed refreservation
1614 * being charged to our parent by the amount of
1615 * new unique data we have gained.
1617 if (old_unique
< ds_next
->ds_reserved
) {
1619 uint64_t new_unique
=
1620 ds_next
->ds_phys
->ds_unique_bytes
;
1622 ASSERT(old_unique
<= new_unique
);
1623 mrsdelta
= MIN(new_unique
- old_unique
,
1624 ds_next
->ds_reserved
- old_unique
);
1625 dsl_dir_diduse_space(ds
->ds_dir
,
1626 DD_USED_REFRSRV
, -mrsdelta
, 0, 0, tx
);
1629 dsl_dataset_rele(ds_next
, FTAG
);
1632 * There's no next snapshot, so this is a head dataset.
1633 * Destroy the deadlist. Unless it's a clone, the
1634 * deadlist should be empty. (If it's a clone, it's
1635 * safe to ignore the deadlist contents.)
1639 ASSERT(after_branch_point
|| bplist_empty(&ds
->ds_deadlist
));
1640 bplist_close(&ds
->ds_deadlist
);
1641 bplist_destroy(mos
, ds
->ds_phys
->ds_deadlist_obj
, tx
);
1642 ds
->ds_phys
->ds_deadlist_obj
= 0;
1645 * Free everything that we point to (that's born after
1646 * the previous snapshot, if we are a clone)
1648 * NB: this should be very quick, because we already
1649 * freed all the objects in open context.
1654 err
= traverse_dataset(ds
, ds
->ds_phys
->ds_prev_snap_txg
,
1655 TRAVERSE_POST
, kill_blkptr
, &ka
);
1656 ASSERT3U(err
, ==, 0);
1657 ASSERT(spa_version(dp
->dp_spa
) < SPA_VERSION_UNIQUE_ACCURATE
||
1658 ds
->ds_phys
->ds_unique_bytes
== 0);
1661 err
= zio_wait(zio
);
1662 ASSERT3U(err
, ==, 0);
1664 if (ds
->ds_dir
->dd_phys
->dd_head_dataset_obj
== ds
->ds_object
) {
1665 /* Erase the link in the dir */
1666 dmu_buf_will_dirty(ds
->ds_dir
->dd_dbuf
, tx
);
1667 ds
->ds_dir
->dd_phys
->dd_head_dataset_obj
= 0;
1668 ASSERT(ds
->ds_phys
->ds_snapnames_zapobj
!= 0);
1669 err
= zap_destroy(mos
, ds
->ds_phys
->ds_snapnames_zapobj
, tx
);
1672 /* remove from snapshot namespace */
1673 dsl_dataset_t
*ds_head
;
1674 ASSERT(ds
->ds_phys
->ds_snapnames_zapobj
== 0);
1675 VERIFY(0 == dsl_dataset_hold_obj(dp
,
1676 ds
->ds_dir
->dd_phys
->dd_head_dataset_obj
, FTAG
, &ds_head
));
1677 VERIFY(0 == dsl_dataset_get_snapname(ds
));
1682 err
= dsl_dataset_snap_lookup(ds_head
,
1683 ds
->ds_snapname
, &val
);
1684 ASSERT3U(err
, ==, 0);
1685 ASSERT3U(val
, ==, obj
);
1688 err
= dsl_dataset_snap_remove(ds_head
, ds
->ds_snapname
, tx
);
1690 dsl_dataset_rele(ds_head
, FTAG
);
1693 if (ds_prev
&& ds
->ds_prev
!= ds_prev
)
1694 dsl_dataset_rele(ds_prev
, FTAG
);
1696 spa_prop_clear_bootfs(dp
->dp_spa
, ds
->ds_object
, tx
);
1697 spa_history_internal_log(LOG_DS_DESTROY
, dp
->dp_spa
, tx
,
1698 cr
, "dataset = %llu", ds
->ds_object
);
1700 if (ds
->ds_phys
->ds_next_clones_obj
!= 0) {
1702 ASSERT(0 == zap_count(mos
,
1703 ds
->ds_phys
->ds_next_clones_obj
, &count
) && count
== 0);
1704 VERIFY(0 == dmu_object_free(mos
,
1705 ds
->ds_phys
->ds_next_clones_obj
, tx
));
1707 if (ds
->ds_phys
->ds_props_obj
!= 0)
1708 VERIFY(0 == zap_destroy(mos
, ds
->ds_phys
->ds_props_obj
, tx
));
1709 dsl_dir_close(ds
->ds_dir
, ds
);
1711 dsl_dataset_drain_refs(ds
, tag
);
1712 VERIFY(0 == dmu_object_free(mos
, obj
, tx
));
1716 dsl_dataset_snapshot_reserve_space(dsl_dataset_t
*ds
, dmu_tx_t
*tx
)
1720 if (!dmu_tx_is_syncing(tx
))
1724 * If there's an fs-only reservation, any blocks that might become
1725 * owned by the snapshot dataset must be accommodated by space
1726 * outside of the reservation.
1728 asize
= MIN(dsl_dataset_unique(ds
), ds
->ds_reserved
);
1729 if (asize
> dsl_dir_space_available(ds
->ds_dir
, NULL
, 0, FALSE
))
1733 * Propogate any reserved space for this snapshot to other
1734 * snapshot checks in this sync group.
1737 dsl_dir_willuse_space(ds
->ds_dir
, asize
, tx
);
1744 dsl_dataset_snapshot_check(void *arg1
, void *arg2
, dmu_tx_t
*tx
)
1746 dsl_dataset_t
*ds
= arg1
;
1747 const char *snapname
= arg2
;
1752 * We don't allow multiple snapshots of the same txg. If there
1753 * is already one, try again.
1755 if (ds
->ds_phys
->ds_prev_snap_txg
>= tx
->tx_txg
)
1759 * Check for conflicting name snapshot name.
1761 err
= dsl_dataset_snap_lookup(ds
, snapname
, &value
);
1768 * Check that the dataset's name is not too long. Name consists
1769 * of the dataset's length + 1 for the @-sign + snapshot name's length
1771 if (dsl_dataset_namelen(ds
) + 1 + strlen(snapname
) >= MAXNAMELEN
)
1772 return (ENAMETOOLONG
);
1774 err
= dsl_dataset_snapshot_reserve_space(ds
, tx
);
1778 ds
->ds_trysnap_txg
= tx
->tx_txg
;
1783 dsl_dataset_snapshot_sync(void *arg1
, void *arg2
, cred_t
*cr
, dmu_tx_t
*tx
)
1785 dsl_dataset_t
*ds
= arg1
;
1786 const char *snapname
= arg2
;
1787 dsl_pool_t
*dp
= ds
->ds_dir
->dd_pool
;
1789 dsl_dataset_phys_t
*dsphys
;
1790 uint64_t dsobj
, crtxg
;
1791 objset_t
*mos
= dp
->dp_meta_objset
;
1794 ASSERT(RW_WRITE_HELD(&dp
->dp_config_rwlock
));
1797 * The origin's ds_creation_txg has to be < TXG_INITIAL
1799 if (strcmp(snapname
, ORIGIN_DIR_NAME
) == 0)
1804 dsobj
= dmu_object_alloc(mos
, DMU_OT_DSL_DATASET
, 0,
1805 DMU_OT_DSL_DATASET
, sizeof (dsl_dataset_phys_t
), tx
);
1806 VERIFY(0 == dmu_bonus_hold(mos
, dsobj
, FTAG
, &dbuf
));
1807 dmu_buf_will_dirty(dbuf
, tx
);
1808 dsphys
= dbuf
->db_data
;
1809 bzero(dsphys
, sizeof (dsl_dataset_phys_t
));
1810 dsphys
->ds_dir_obj
= ds
->ds_dir
->dd_object
;
1811 dsphys
->ds_fsid_guid
= unique_create();
1812 (void) random_get_pseudo_bytes((void*)&dsphys
->ds_guid
,
1813 sizeof (dsphys
->ds_guid
));
1814 dsphys
->ds_prev_snap_obj
= ds
->ds_phys
->ds_prev_snap_obj
;
1815 dsphys
->ds_prev_snap_txg
= ds
->ds_phys
->ds_prev_snap_txg
;
1816 dsphys
->ds_next_snap_obj
= ds
->ds_object
;
1817 dsphys
->ds_num_children
= 1;
1818 dsphys
->ds_creation_time
= gethrestime_sec();
1819 dsphys
->ds_creation_txg
= crtxg
;
1820 dsphys
->ds_deadlist_obj
= ds
->ds_phys
->ds_deadlist_obj
;
1821 dsphys
->ds_used_bytes
= ds
->ds_phys
->ds_used_bytes
;
1822 dsphys
->ds_compressed_bytes
= ds
->ds_phys
->ds_compressed_bytes
;
1823 dsphys
->ds_uncompressed_bytes
= ds
->ds_phys
->ds_uncompressed_bytes
;
1824 dsphys
->ds_flags
= ds
->ds_phys
->ds_flags
;
1825 dsphys
->ds_bp
= ds
->ds_phys
->ds_bp
;
1826 dmu_buf_rele(dbuf
, FTAG
);
1828 ASSERT3U(ds
->ds_prev
!= 0, ==, ds
->ds_phys
->ds_prev_snap_obj
!= 0);
1830 uint64_t next_clones_obj
=
1831 ds
->ds_prev
->ds_phys
->ds_next_clones_obj
;
1832 ASSERT(ds
->ds_prev
->ds_phys
->ds_next_snap_obj
==
1834 ds
->ds_prev
->ds_phys
->ds_num_children
> 1);
1835 if (ds
->ds_prev
->ds_phys
->ds_next_snap_obj
== ds
->ds_object
) {
1836 dmu_buf_will_dirty(ds
->ds_prev
->ds_dbuf
, tx
);
1837 ASSERT3U(ds
->ds_phys
->ds_prev_snap_txg
, ==,
1838 ds
->ds_prev
->ds_phys
->ds_creation_txg
);
1839 ds
->ds_prev
->ds_phys
->ds_next_snap_obj
= dsobj
;
1840 } else if (next_clones_obj
!= 0) {
1841 VERIFY3U(0, ==, zap_remove_int(mos
,
1842 next_clones_obj
, dsphys
->ds_next_snap_obj
, tx
));
1843 VERIFY3U(0, ==, zap_add_int(mos
,
1844 next_clones_obj
, dsobj
, tx
));
1849 * If we have a reference-reservation on this dataset, we will
1850 * need to increase the amount of refreservation being charged
1851 * since our unique space is going to zero.
1853 if (ds
->ds_reserved
) {
1854 int64_t add
= MIN(dsl_dataset_unique(ds
), ds
->ds_reserved
);
1855 dsl_dir_diduse_space(ds
->ds_dir
, DD_USED_REFRSRV
,
1859 bplist_close(&ds
->ds_deadlist
);
1860 dmu_buf_will_dirty(ds
->ds_dbuf
, tx
);
1861 ASSERT3U(ds
->ds_phys
->ds_prev_snap_txg
, <, tx
->tx_txg
);
1862 ds
->ds_phys
->ds_prev_snap_obj
= dsobj
;
1863 ds
->ds_phys
->ds_prev_snap_txg
= crtxg
;
1864 ds
->ds_phys
->ds_unique_bytes
= 0;
1865 if (spa_version(dp
->dp_spa
) >= SPA_VERSION_UNIQUE_ACCURATE
)
1866 ds
->ds_phys
->ds_flags
|= DS_FLAG_UNIQUE_ACCURATE
;
1867 ds
->ds_phys
->ds_deadlist_obj
=
1868 bplist_create(mos
, DSL_DEADLIST_BLOCKSIZE
, tx
);
1869 VERIFY(0 == bplist_open(&ds
->ds_deadlist
, mos
,
1870 ds
->ds_phys
->ds_deadlist_obj
));
1872 dprintf("snap '%s' -> obj %llu\n", snapname
, dsobj
);
1873 err
= zap_add(mos
, ds
->ds_phys
->ds_snapnames_zapobj
,
1874 snapname
, 8, 1, &dsobj
, tx
);
1878 dsl_dataset_drop_ref(ds
->ds_prev
, ds
);
1879 VERIFY(0 == dsl_dataset_get_ref(dp
,
1880 ds
->ds_phys
->ds_prev_snap_obj
, ds
, &ds
->ds_prev
));
1882 dsl_pool_ds_snapshotted(ds
, tx
);
1884 spa_history_internal_log(LOG_DS_SNAPSHOT
, dp
->dp_spa
, tx
, cr
,
1885 "dataset = %llu", dsobj
);
1889 dsl_dataset_sync(dsl_dataset_t
*ds
, zio_t
*zio
, dmu_tx_t
*tx
)
1891 ASSERT(dmu_tx_is_syncing(tx
));
1892 ASSERT(ds
->ds_user_ptr
!= NULL
);
1893 ASSERT(ds
->ds_phys
->ds_next_snap_obj
== 0);
1896 * in case we had to change ds_fsid_guid when we opened it,
1899 dmu_buf_will_dirty(ds
->ds_dbuf
, tx
);
1900 ds
->ds_phys
->ds_fsid_guid
= ds
->ds_fsid_guid
;
1902 dsl_dir_dirty(ds
->ds_dir
, tx
);
1903 dmu_objset_sync(ds
->ds_user_ptr
, zio
, tx
);
1907 dsl_dataset_stats(dsl_dataset_t
*ds
, nvlist_t
*nv
)
1909 uint64_t refd
, avail
, uobjs
, aobjs
;
1911 dsl_dir_stats(ds
->ds_dir
, nv
);
1913 dsl_dataset_space(ds
, &refd
, &avail
, &uobjs
, &aobjs
);
1914 dsl_prop_nvlist_add_uint64(nv
, ZFS_PROP_AVAILABLE
, avail
);
1915 dsl_prop_nvlist_add_uint64(nv
, ZFS_PROP_REFERENCED
, refd
);
1917 dsl_prop_nvlist_add_uint64(nv
, ZFS_PROP_CREATION
,
1918 ds
->ds_phys
->ds_creation_time
);
1919 dsl_prop_nvlist_add_uint64(nv
, ZFS_PROP_CREATETXG
,
1920 ds
->ds_phys
->ds_creation_txg
);
1921 dsl_prop_nvlist_add_uint64(nv
, ZFS_PROP_REFQUOTA
,
1923 dsl_prop_nvlist_add_uint64(nv
, ZFS_PROP_REFRESERVATION
,
1925 dsl_prop_nvlist_add_uint64(nv
, ZFS_PROP_GUID
,
1926 ds
->ds_phys
->ds_guid
);
1928 if (ds
->ds_phys
->ds_next_snap_obj
) {
1930 * This is a snapshot; override the dd's space used with
1931 * our unique space and compression ratio.
1933 dsl_prop_nvlist_add_uint64(nv
, ZFS_PROP_USED
,
1934 ds
->ds_phys
->ds_unique_bytes
);
1935 dsl_prop_nvlist_add_uint64(nv
, ZFS_PROP_COMPRESSRATIO
,
1936 ds
->ds_phys
->ds_compressed_bytes
== 0 ? 100 :
1937 (ds
->ds_phys
->ds_uncompressed_bytes
* 100 /
1938 ds
->ds_phys
->ds_compressed_bytes
));
1943 dsl_dataset_fast_stat(dsl_dataset_t
*ds
, dmu_objset_stats_t
*stat
)
1945 stat
->dds_creation_txg
= ds
->ds_phys
->ds_creation_txg
;
1946 stat
->dds_inconsistent
= ds
->ds_phys
->ds_flags
& DS_FLAG_INCONSISTENT
;
1947 stat
->dds_guid
= ds
->ds_phys
->ds_guid
;
1948 if (ds
->ds_phys
->ds_next_snap_obj
) {
1949 stat
->dds_is_snapshot
= B_TRUE
;
1950 stat
->dds_num_clones
= ds
->ds_phys
->ds_num_children
- 1;
1953 /* clone origin is really a dsl_dir thing... */
1954 rw_enter(&ds
->ds_dir
->dd_pool
->dp_config_rwlock
, RW_READER
);
1955 if (dsl_dir_is_clone(ds
->ds_dir
)) {
1958 VERIFY(0 == dsl_dataset_get_ref(ds
->ds_dir
->dd_pool
,
1959 ds
->ds_dir
->dd_phys
->dd_origin_obj
, FTAG
, &ods
));
1960 dsl_dataset_name(ods
, stat
->dds_origin
);
1961 dsl_dataset_drop_ref(ods
, FTAG
);
1963 rw_exit(&ds
->ds_dir
->dd_pool
->dp_config_rwlock
);
1967 dsl_dataset_fsid_guid(dsl_dataset_t
*ds
)
1969 return (ds
->ds_fsid_guid
);
1973 dsl_dataset_space(dsl_dataset_t
*ds
,
1974 uint64_t *refdbytesp
, uint64_t *availbytesp
,
1975 uint64_t *usedobjsp
, uint64_t *availobjsp
)
1977 *refdbytesp
= ds
->ds_phys
->ds_used_bytes
;
1978 *availbytesp
= dsl_dir_space_available(ds
->ds_dir
, NULL
, 0, TRUE
);
1979 if (ds
->ds_reserved
> ds
->ds_phys
->ds_unique_bytes
)
1980 *availbytesp
+= ds
->ds_reserved
- ds
->ds_phys
->ds_unique_bytes
;
1981 if (ds
->ds_quota
!= 0) {
1983 * Adjust available bytes according to refquota
1985 if (*refdbytesp
< ds
->ds_quota
)
1986 *availbytesp
= MIN(*availbytesp
,
1987 ds
->ds_quota
- *refdbytesp
);
1991 *usedobjsp
= ds
->ds_phys
->ds_bp
.blk_fill
;
1992 *availobjsp
= DN_MAX_OBJECT
- *usedobjsp
;
1996 dsl_dataset_modified_since_lastsnap(dsl_dataset_t
*ds
)
1998 dsl_pool_t
*dp
= ds
->ds_dir
->dd_pool
;
2000 ASSERT(RW_LOCK_HELD(&dp
->dp_config_rwlock
) ||
2001 dsl_pool_sync_context(dp
));
2002 if (ds
->ds_prev
== NULL
)
2004 if (ds
->ds_phys
->ds_bp
.blk_birth
>
2005 ds
->ds_prev
->ds_phys
->ds_creation_txg
)
2012 dsl_dataset_snapshot_rename_check(void *arg1
, void *arg2
, dmu_tx_t
*tx
)
2014 dsl_dataset_t
*ds
= arg1
;
2015 char *newsnapname
= arg2
;
2016 dsl_dir_t
*dd
= ds
->ds_dir
;
2021 err
= dsl_dataset_hold_obj(dd
->dd_pool
,
2022 dd
->dd_phys
->dd_head_dataset_obj
, FTAG
, &hds
);
2026 /* new name better not be in use */
2027 err
= dsl_dataset_snap_lookup(hds
, newsnapname
, &val
);
2028 dsl_dataset_rele(hds
, FTAG
);
2032 else if (err
== ENOENT
)
2035 /* dataset name + 1 for the "@" + the new snapshot name must fit */
2036 if (dsl_dir_namelen(ds
->ds_dir
) + 1 + strlen(newsnapname
) >= MAXNAMELEN
)
2043 dsl_dataset_snapshot_rename_sync(void *arg1
, void *arg2
,
2044 cred_t
*cr
, dmu_tx_t
*tx
)
2046 dsl_dataset_t
*ds
= arg1
;
2047 const char *newsnapname
= arg2
;
2048 dsl_dir_t
*dd
= ds
->ds_dir
;
2049 objset_t
*mos
= dd
->dd_pool
->dp_meta_objset
;
2053 ASSERT(ds
->ds_phys
->ds_next_snap_obj
!= 0);
2055 VERIFY(0 == dsl_dataset_hold_obj(dd
->dd_pool
,
2056 dd
->dd_phys
->dd_head_dataset_obj
, FTAG
, &hds
));
2058 VERIFY(0 == dsl_dataset_get_snapname(ds
));
2059 err
= dsl_dataset_snap_remove(hds
, ds
->ds_snapname
, tx
);
2060 ASSERT3U(err
, ==, 0);
2061 mutex_enter(&ds
->ds_lock
);
2062 (void) strcpy(ds
->ds_snapname
, newsnapname
);
2063 mutex_exit(&ds
->ds_lock
);
2064 err
= zap_add(mos
, hds
->ds_phys
->ds_snapnames_zapobj
,
2065 ds
->ds_snapname
, 8, 1, &ds
->ds_object
, tx
);
2066 ASSERT3U(err
, ==, 0);
2068 spa_history_internal_log(LOG_DS_RENAME
, dd
->dd_pool
->dp_spa
, tx
,
2069 cr
, "dataset = %llu", ds
->ds_object
);
2070 dsl_dataset_rele(hds
, FTAG
);
2073 struct renamesnaparg
{
2074 dsl_sync_task_group_t
*dstg
;
2075 char failed
[MAXPATHLEN
];
2081 dsl_snapshot_rename_one(char *name
, void *arg
)
2083 struct renamesnaparg
*ra
= arg
;
2084 dsl_dataset_t
*ds
= NULL
;
2088 cp
= name
+ strlen(name
);
2090 (void) strcpy(cp
+ 1, ra
->oldsnap
);
2093 * For recursive snapshot renames the parent won't be changing
2094 * so we just pass name for both the to/from argument.
2096 err
= zfs_secpolicy_rename_perms(name
, name
, CRED());
2097 if (err
== ENOENT
) {
2100 (void) strcpy(ra
->failed
, name
);
2106 * For all filesystems undergoing rename, we'll need to unmount it.
2108 (void) zfs_unmount_snap(name
, NULL
);
2110 err
= dsl_dataset_hold(name
, ra
->dstg
, &ds
);
2112 if (err
== ENOENT
) {
2115 (void) strcpy(ra
->failed
, name
);
2119 dsl_sync_task_create(ra
->dstg
, dsl_dataset_snapshot_rename_check
,
2120 dsl_dataset_snapshot_rename_sync
, ds
, ra
->newsnap
, 0);
2126 dsl_recursive_rename(char *oldname
, const char *newname
)
2129 struct renamesnaparg
*ra
;
2130 dsl_sync_task_t
*dst
;
2132 char *cp
, *fsname
= spa_strdup(oldname
);
2133 int len
= strlen(oldname
);
2135 /* truncate the snapshot name to get the fsname */
2136 cp
= strchr(fsname
, '@');
2139 err
= spa_open(fsname
, &spa
, FTAG
);
2141 kmem_free(fsname
, len
+ 1);
2144 ra
= kmem_alloc(sizeof (struct renamesnaparg
), KM_SLEEP
);
2145 ra
->dstg
= dsl_sync_task_group_create(spa_get_dsl(spa
));
2147 ra
->oldsnap
= strchr(oldname
, '@') + 1;
2148 ra
->newsnap
= strchr(newname
, '@') + 1;
2151 err
= dmu_objset_find(fsname
, dsl_snapshot_rename_one
, ra
,
2153 kmem_free(fsname
, len
+ 1);
2156 err
= dsl_sync_task_group_wait(ra
->dstg
);
2159 for (dst
= list_head(&ra
->dstg
->dstg_tasks
); dst
;
2160 dst
= list_next(&ra
->dstg
->dstg_tasks
, dst
)) {
2161 dsl_dataset_t
*ds
= dst
->dst_arg1
;
2163 dsl_dir_name(ds
->ds_dir
, ra
->failed
);
2164 (void) strcat(ra
->failed
, "@");
2165 (void) strcat(ra
->failed
, ra
->newsnap
);
2167 dsl_dataset_rele(ds
, ra
->dstg
);
2171 (void) strcpy(oldname
, ra
->failed
);
2173 dsl_sync_task_group_destroy(ra
->dstg
);
2174 kmem_free(ra
, sizeof (struct renamesnaparg
));
2175 spa_close(spa
, FTAG
);
2180 dsl_valid_rename(char *oldname
, void *arg
)
2182 int delta
= *(int *)arg
;
2184 if (strlen(oldname
) + delta
>= MAXNAMELEN
)
2185 return (ENAMETOOLONG
);
2190 #pragma weak dmu_objset_rename = dsl_dataset_rename
2192 dsl_dataset_rename(char *oldname
, const char *newname
, boolean_t recursive
)
2199 err
= dsl_dir_open(oldname
, FTAG
, &dd
, &tail
);
2203 int delta
= strlen(newname
) - strlen(oldname
);
2205 /* if we're growing, validate child name lengths */
2207 err
= dmu_objset_find(oldname
, dsl_valid_rename
,
2208 &delta
, DS_FIND_CHILDREN
| DS_FIND_SNAPSHOTS
);
2211 err
= dsl_dir_rename(dd
, newname
);
2212 dsl_dir_close(dd
, FTAG
);
2215 if (tail
[0] != '@') {
2216 /* the name ended in a nonexistant component */
2217 dsl_dir_close(dd
, FTAG
);
2221 dsl_dir_close(dd
, FTAG
);
2223 /* new name must be snapshot in same filesystem */
2224 tail
= strchr(newname
, '@');
2228 if (strncmp(oldname
, newname
, tail
- newname
) != 0)
2232 err
= dsl_recursive_rename(oldname
, newname
);
2234 err
= dsl_dataset_hold(oldname
, FTAG
, &ds
);
2238 err
= dsl_sync_task_do(ds
->ds_dir
->dd_pool
,
2239 dsl_dataset_snapshot_rename_check
,
2240 dsl_dataset_snapshot_rename_sync
, ds
, (char *)tail
, 1);
2242 dsl_dataset_rele(ds
, FTAG
);
2248 struct promotenode
{
2254 list_t shared_snaps
, origin_snaps
, clone_snaps
;
2255 dsl_dataset_t
*origin_origin
, *origin_head
;
2256 uint64_t used
, comp
, uncomp
, unique
, cloneusedsnap
, originusedsnap
;
2259 static int snaplist_space(list_t
*l
, uint64_t mintxg
, uint64_t *spacep
);
2263 dsl_dataset_promote_check(void *arg1
, void *arg2
, dmu_tx_t
*tx
)
2265 dsl_dataset_t
*hds
= arg1
;
2266 struct promotearg
*pa
= arg2
;
2267 struct promotenode
*snap
= list_head(&pa
->shared_snaps
);
2268 dsl_dataset_t
*origin_ds
= snap
->ds
;
2271 /* Check that it is a real clone */
2272 if (!dsl_dir_is_clone(hds
->ds_dir
))
2275 /* Since this is so expensive, don't do the preliminary check */
2276 if (!dmu_tx_is_syncing(tx
))
2279 if (hds
->ds_phys
->ds_flags
& DS_FLAG_NOPROMOTE
)
2282 /* compute origin's new unique space */
2283 snap
= list_tail(&pa
->clone_snaps
);
2284 ASSERT3U(snap
->ds
->ds_phys
->ds_prev_snap_obj
, ==, origin_ds
->ds_object
);
2285 err
= bplist_space_birthrange(&snap
->ds
->ds_deadlist
,
2286 origin_ds
->ds_phys
->ds_prev_snap_txg
, UINT64_MAX
, &pa
->unique
);
2291 * Walk the snapshots that we are moving
2293 * Compute space to transfer. Consider the incremental changes
2294 * to used for each snapshot:
2295 * (my used) = (prev's used) + (blocks born) - (blocks killed)
2296 * So each snapshot gave birth to:
2297 * (blocks born) = (my used) - (prev's used) + (blocks killed)
2298 * So a sequence would look like:
2299 * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0)
2300 * Which simplifies to:
2301 * uN + kN + kN-1 + ... + k1 + k0
2302 * Note however, if we stop before we reach the ORIGIN we get:
2303 * uN + kN + kN-1 + ... + kM - uM-1
2305 pa
->used
= origin_ds
->ds_phys
->ds_used_bytes
;
2306 pa
->comp
= origin_ds
->ds_phys
->ds_compressed_bytes
;
2307 pa
->uncomp
= origin_ds
->ds_phys
->ds_uncompressed_bytes
;
2308 for (snap
= list_head(&pa
->shared_snaps
); snap
;
2309 snap
= list_next(&pa
->shared_snaps
, snap
)) {
2310 uint64_t val
, dlused
, dlcomp
, dluncomp
;
2311 dsl_dataset_t
*ds
= snap
->ds
;
2313 /* Check that the snapshot name does not conflict */
2314 VERIFY(0 == dsl_dataset_get_snapname(ds
));
2315 err
= dsl_dataset_snap_lookup(hds
, ds
->ds_snapname
, &val
);
2321 /* The very first snapshot does not have a deadlist */
2322 if (ds
->ds_phys
->ds_prev_snap_obj
== 0)
2325 if (err
= bplist_space(&ds
->ds_deadlist
,
2326 &dlused
, &dlcomp
, &dluncomp
))
2330 pa
->uncomp
+= dluncomp
;
2334 * If we are a clone of a clone then we never reached ORIGIN,
2335 * so we need to subtract out the clone origin's used space.
2337 if (pa
->origin_origin
) {
2338 pa
->used
-= pa
->origin_origin
->ds_phys
->ds_used_bytes
;
2339 pa
->comp
-= pa
->origin_origin
->ds_phys
->ds_compressed_bytes
;
2340 pa
->uncomp
-= pa
->origin_origin
->ds_phys
->ds_uncompressed_bytes
;
2343 /* Check that there is enough space here */
2344 err
= dsl_dir_transfer_possible(origin_ds
->ds_dir
, hds
->ds_dir
,
2350 * Compute the amounts of space that will be used by snapshots
2351 * after the promotion (for both origin and clone). For each,
2352 * it is the amount of space that will be on all of their
2353 * deadlists (that was not born before their new origin).
2355 if (hds
->ds_dir
->dd_phys
->dd_flags
& DD_FLAG_USED_BREAKDOWN
) {
2359 * Note, typically this will not be a clone of a clone,
2360 * so snap->ds->ds_origin_txg will be < TXG_INITIAL, so
2361 * these snaplist_space() -> bplist_space_birthrange()
2362 * calls will be fast because they do not have to
2363 * iterate over all bps.
2365 snap
= list_head(&pa
->origin_snaps
);
2366 err
= snaplist_space(&pa
->shared_snaps
,
2367 snap
->ds
->ds_origin_txg
, &pa
->cloneusedsnap
);
2371 err
= snaplist_space(&pa
->clone_snaps
,
2372 snap
->ds
->ds_origin_txg
, &space
);
2375 pa
->cloneusedsnap
+= space
;
2377 if (origin_ds
->ds_dir
->dd_phys
->dd_flags
& DD_FLAG_USED_BREAKDOWN
) {
2378 err
= snaplist_space(&pa
->origin_snaps
,
2379 origin_ds
->ds_phys
->ds_creation_txg
, &pa
->originusedsnap
);
2388 dsl_dataset_promote_sync(void *arg1
, void *arg2
, cred_t
*cr
, dmu_tx_t
*tx
)
2390 dsl_dataset_t
*hds
= arg1
;
2391 struct promotearg
*pa
= arg2
;
2392 struct promotenode
*snap
= list_head(&pa
->shared_snaps
);
2393 dsl_dataset_t
*origin_ds
= snap
->ds
;
2394 dsl_dataset_t
*origin_head
;
2395 dsl_dir_t
*dd
= hds
->ds_dir
;
2396 dsl_pool_t
*dp
= hds
->ds_dir
->dd_pool
;
2397 dsl_dir_t
*odd
= NULL
;
2398 uint64_t oldnext_obj
;
2401 ASSERT(0 == (hds
->ds_phys
->ds_flags
& DS_FLAG_NOPROMOTE
));
2403 snap
= list_head(&pa
->origin_snaps
);
2404 origin_head
= snap
->ds
;
2407 * We need to explicitly open odd, since origin_ds's dd will be
2410 VERIFY(0 == dsl_dir_open_obj(dp
, origin_ds
->ds_dir
->dd_object
,
2413 /* change origin's next snap */
2414 dmu_buf_will_dirty(origin_ds
->ds_dbuf
, tx
);
2415 oldnext_obj
= origin_ds
->ds_phys
->ds_next_snap_obj
;
2416 snap
= list_tail(&pa
->clone_snaps
);
2417 ASSERT3U(snap
->ds
->ds_phys
->ds_prev_snap_obj
, ==, origin_ds
->ds_object
);
2418 origin_ds
->ds_phys
->ds_next_snap_obj
= snap
->ds
->ds_object
;
2420 /* change the origin's next clone */
2421 if (origin_ds
->ds_phys
->ds_next_clones_obj
) {
2422 VERIFY3U(0, ==, zap_remove_int(dp
->dp_meta_objset
,
2423 origin_ds
->ds_phys
->ds_next_clones_obj
,
2424 origin_ds
->ds_phys
->ds_next_snap_obj
, tx
));
2425 VERIFY3U(0, ==, zap_add_int(dp
->dp_meta_objset
,
2426 origin_ds
->ds_phys
->ds_next_clones_obj
,
2431 dmu_buf_will_dirty(dd
->dd_dbuf
, tx
);
2432 ASSERT3U(dd
->dd_phys
->dd_origin_obj
, ==, origin_ds
->ds_object
);
2433 dd
->dd_phys
->dd_origin_obj
= odd
->dd_phys
->dd_origin_obj
;
2434 hds
->ds_origin_txg
= origin_head
->ds_origin_txg
;
2435 dmu_buf_will_dirty(odd
->dd_dbuf
, tx
);
2436 odd
->dd_phys
->dd_origin_obj
= origin_ds
->ds_object
;
2437 origin_head
->ds_origin_txg
= origin_ds
->ds_phys
->ds_creation_txg
;
2439 /* move snapshots to this dir */
2440 for (snap
= list_head(&pa
->shared_snaps
); snap
;
2441 snap
= list_next(&pa
->shared_snaps
, snap
)) {
2442 dsl_dataset_t
*ds
= snap
->ds
;
2444 /* unregister props as dsl_dir is changing */
2445 if (ds
->ds_user_ptr
) {
2446 ds
->ds_user_evict_func(ds
, ds
->ds_user_ptr
);
2447 ds
->ds_user_ptr
= NULL
;
2449 /* move snap name entry */
2450 VERIFY(0 == dsl_dataset_get_snapname(ds
));
2451 VERIFY(0 == dsl_dataset_snap_remove(origin_head
,
2452 ds
->ds_snapname
, tx
));
2453 VERIFY(0 == zap_add(dp
->dp_meta_objset
,
2454 hds
->ds_phys
->ds_snapnames_zapobj
, ds
->ds_snapname
,
2455 8, 1, &ds
->ds_object
, tx
));
2456 /* change containing dsl_dir */
2457 dmu_buf_will_dirty(ds
->ds_dbuf
, tx
);
2458 ASSERT3U(ds
->ds_phys
->ds_dir_obj
, ==, odd
->dd_object
);
2459 ds
->ds_phys
->ds_dir_obj
= dd
->dd_object
;
2460 ASSERT3P(ds
->ds_dir
, ==, odd
);
2461 dsl_dir_close(ds
->ds_dir
, ds
);
2462 VERIFY(0 == dsl_dir_open_obj(dp
, dd
->dd_object
,
2463 NULL
, ds
, &ds
->ds_dir
));
2465 ASSERT3U(dsl_prop_numcb(ds
), ==, 0);
2469 * Change space accounting.
2470 * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either
2471 * both be valid, or both be 0 (resulting in delta == 0). This
2472 * is true for each of {clone,origin} independently.
2475 delta
= pa
->cloneusedsnap
-
2476 dd
->dd_phys
->dd_used_breakdown
[DD_USED_SNAP
];
2477 ASSERT3S(delta
, >=, 0);
2478 ASSERT3U(pa
->used
, >=, delta
);
2479 dsl_dir_diduse_space(dd
, DD_USED_SNAP
, delta
, 0, 0, tx
);
2480 dsl_dir_diduse_space(dd
, DD_USED_HEAD
,
2481 pa
->used
- delta
, pa
->comp
, pa
->uncomp
, tx
);
2483 delta
= pa
->originusedsnap
-
2484 odd
->dd_phys
->dd_used_breakdown
[DD_USED_SNAP
];
2485 ASSERT3S(delta
, <=, 0);
2486 ASSERT3U(pa
->used
, >=, -delta
);
2487 dsl_dir_diduse_space(odd
, DD_USED_SNAP
, delta
, 0, 0, tx
);
2488 dsl_dir_diduse_space(odd
, DD_USED_HEAD
,
2489 -pa
->used
- delta
, -pa
->comp
, -pa
->uncomp
, tx
);
2491 origin_ds
->ds_phys
->ds_unique_bytes
= pa
->unique
;
2493 /* log history record */
2494 spa_history_internal_log(LOG_DS_PROMOTE
, dd
->dd_pool
->dp_spa
, tx
,
2495 cr
, "dataset = %llu", hds
->ds_object
);
2497 dsl_dir_close(odd
, FTAG
);
2500 static char *snaplist_tag
= "snaplist";
2502 * Make a list of dsl_dataset_t's for the snapshots between first_obj
2503 * (exclusive) and last_obj (inclusive). The list will be in reverse
2504 * order (last_obj will be the list_head()). If first_obj == 0, do all
2505 * snapshots back to this dataset's origin.
2508 snaplist_make(dsl_pool_t
*dp
, boolean_t own
,
2509 uint64_t first_obj
, uint64_t last_obj
, list_t
*l
)
2511 uint64_t obj
= last_obj
;
2513 ASSERT(RW_LOCK_HELD(&dp
->dp_config_rwlock
));
2515 list_create(l
, sizeof (struct promotenode
),
2516 offsetof(struct promotenode
, link
));
2518 while (obj
!= first_obj
) {
2520 struct promotenode
*snap
;
2524 err
= dsl_dataset_own_obj(dp
, obj
,
2525 0, snaplist_tag
, &ds
);
2527 dsl_dataset_make_exclusive(ds
, snaplist_tag
);
2529 err
= dsl_dataset_hold_obj(dp
, obj
, snaplist_tag
, &ds
);
2531 if (err
== ENOENT
) {
2532 /* lost race with snapshot destroy */
2533 struct promotenode
*last
= list_tail(l
);
2534 ASSERT(obj
!= last
->ds
->ds_phys
->ds_prev_snap_obj
);
2535 obj
= last
->ds
->ds_phys
->ds_prev_snap_obj
;
2542 first_obj
= ds
->ds_dir
->dd_phys
->dd_origin_obj
;
2544 snap
= kmem_alloc(sizeof (struct promotenode
), KM_SLEEP
);
2546 list_insert_tail(l
, snap
);
2547 obj
= ds
->ds_phys
->ds_prev_snap_obj
;
2554 snaplist_space(list_t
*l
, uint64_t mintxg
, uint64_t *spacep
)
2556 struct promotenode
*snap
;
2559 for (snap
= list_head(l
); snap
; snap
= list_next(l
, snap
)) {
2561 int err
= bplist_space_birthrange(&snap
->ds
->ds_deadlist
,
2562 mintxg
, UINT64_MAX
, &used
);
2571 snaplist_destroy(list_t
*l
, boolean_t own
)
2573 struct promotenode
*snap
;
2575 if (!list_link_active(&l
->list_head
))
2578 while ((snap
= list_tail(l
)) != NULL
) {
2579 list_remove(l
, snap
);
2581 dsl_dataset_disown(snap
->ds
, snaplist_tag
);
2583 dsl_dataset_rele(snap
->ds
, snaplist_tag
);
2584 kmem_free(snap
, sizeof (struct promotenode
));
2590 * Promote a clone. Nomenclature note:
2591 * "clone" or "cds": the original clone which is being promoted
2592 * "origin" or "ods": the snapshot which is originally clone's origin
2593 * "origin head" or "ohds": the dataset which is the head
2594 * (filesystem/volume) for the origin
2595 * "origin origin": the origin of the origin's filesystem (typically
2596 * NULL, indicating that the clone is not a clone of a clone).
2599 dsl_dataset_promote(const char *name
)
2604 dmu_object_info_t doi
;
2605 struct promotearg pa
= { 0 };
2606 struct promotenode
*snap
;
2609 err
= dsl_dataset_hold(name
, FTAG
, &ds
);
2615 err
= dmu_object_info(dp
->dp_meta_objset
,
2616 ds
->ds_phys
->ds_snapnames_zapobj
, &doi
);
2618 dsl_dataset_rele(ds
, FTAG
);
2622 if (dsl_dataset_is_snapshot(ds
) || dd
->dd_phys
->dd_origin_obj
== 0) {
2623 dsl_dataset_rele(ds
, FTAG
);
2628 * We are going to inherit all the snapshots taken before our
2629 * origin (i.e., our new origin will be our parent's origin).
2630 * Take ownership of them so that we can rename them into our
2633 rw_enter(&dp
->dp_config_rwlock
, RW_READER
);
2635 err
= snaplist_make(dp
, B_TRUE
, 0, dd
->dd_phys
->dd_origin_obj
,
2640 err
= snaplist_make(dp
, B_FALSE
, 0, ds
->ds_object
, &pa
.clone_snaps
);
2644 snap
= list_head(&pa
.shared_snaps
);
2645 ASSERT3U(snap
->ds
->ds_object
, ==, dd
->dd_phys
->dd_origin_obj
);
2646 err
= snaplist_make(dp
, B_FALSE
, dd
->dd_phys
->dd_origin_obj
,
2647 snap
->ds
->ds_dir
->dd_phys
->dd_head_dataset_obj
, &pa
.origin_snaps
);
2651 if (dsl_dir_is_clone(snap
->ds
->ds_dir
)) {
2652 err
= dsl_dataset_own_obj(dp
,
2653 snap
->ds
->ds_dir
->dd_phys
->dd_origin_obj
,
2654 0, FTAG
, &pa
.origin_origin
);
2660 rw_exit(&dp
->dp_config_rwlock
);
2663 * Add in 128x the snapnames zapobj size, since we will be moving
2664 * a bunch of snapnames to the promoted ds, and dirtying their
2668 err
= dsl_sync_task_do(dp
, dsl_dataset_promote_check
,
2669 dsl_dataset_promote_sync
, ds
, &pa
,
2670 2 + 2 * doi
.doi_physical_blks
);
2673 snaplist_destroy(&pa
.shared_snaps
, B_TRUE
);
2674 snaplist_destroy(&pa
.clone_snaps
, B_FALSE
);
2675 snaplist_destroy(&pa
.origin_snaps
, B_FALSE
);
2676 if (pa
.origin_origin
)
2677 dsl_dataset_disown(pa
.origin_origin
, FTAG
);
2678 dsl_dataset_rele(ds
, FTAG
);
2682 struct cloneswaparg
{
2683 dsl_dataset_t
*cds
; /* clone dataset */
2684 dsl_dataset_t
*ohds
; /* origin's head dataset */
2686 int64_t unused_refres_delta
; /* change in unconsumed refreservation */
2691 dsl_dataset_clone_swap_check(void *arg1
, void *arg2
, dmu_tx_t
*tx
)
2693 struct cloneswaparg
*csa
= arg1
;
2695 /* they should both be heads */
2696 if (dsl_dataset_is_snapshot(csa
->cds
) ||
2697 dsl_dataset_is_snapshot(csa
->ohds
))
2700 /* the branch point should be just before them */
2701 if (csa
->cds
->ds_prev
!= csa
->ohds
->ds_prev
)
2704 /* cds should be the clone */
2705 if (csa
->cds
->ds_prev
->ds_phys
->ds_next_snap_obj
!=
2706 csa
->ohds
->ds_object
)
2709 /* the clone should be a child of the origin */
2710 if (csa
->cds
->ds_dir
->dd_parent
!= csa
->ohds
->ds_dir
)
2713 /* ohds shouldn't be modified unless 'force' */
2714 if (!csa
->force
&& dsl_dataset_modified_since_lastsnap(csa
->ohds
))
2717 /* adjust amount of any unconsumed refreservation */
2718 csa
->unused_refres_delta
=
2719 (int64_t)MIN(csa
->ohds
->ds_reserved
,
2720 csa
->ohds
->ds_phys
->ds_unique_bytes
) -
2721 (int64_t)MIN(csa
->ohds
->ds_reserved
,
2722 csa
->cds
->ds_phys
->ds_unique_bytes
);
2724 if (csa
->unused_refres_delta
> 0 &&
2725 csa
->unused_refres_delta
>
2726 dsl_dir_space_available(csa
->ohds
->ds_dir
, NULL
, 0, TRUE
))
2734 dsl_dataset_clone_swap_sync(void *arg1
, void *arg2
, cred_t
*cr
, dmu_tx_t
*tx
)
2736 struct cloneswaparg
*csa
= arg1
;
2737 dsl_pool_t
*dp
= csa
->cds
->ds_dir
->dd_pool
;
2739 ASSERT(csa
->cds
->ds_reserved
== 0);
2740 ASSERT(csa
->cds
->ds_quota
== csa
->ohds
->ds_quota
);
2742 dmu_buf_will_dirty(csa
->cds
->ds_dbuf
, tx
);
2743 dmu_buf_will_dirty(csa
->ohds
->ds_dbuf
, tx
);
2744 dmu_buf_will_dirty(csa
->cds
->ds_prev
->ds_dbuf
, tx
);
2746 if (csa
->cds
->ds_user_ptr
!= NULL
) {
2747 csa
->cds
->ds_user_evict_func(csa
->cds
, csa
->cds
->ds_user_ptr
);
2748 csa
->cds
->ds_user_ptr
= NULL
;
2751 if (csa
->ohds
->ds_user_ptr
!= NULL
) {
2752 csa
->ohds
->ds_user_evict_func(csa
->ohds
,
2753 csa
->ohds
->ds_user_ptr
);
2754 csa
->ohds
->ds_user_ptr
= NULL
;
2757 /* reset origin's unique bytes */
2758 VERIFY(0 == bplist_space_birthrange(&csa
->cds
->ds_deadlist
,
2759 csa
->cds
->ds_prev
->ds_phys
->ds_prev_snap_txg
, UINT64_MAX
,
2760 &csa
->cds
->ds_prev
->ds_phys
->ds_unique_bytes
));
2765 tmp
= csa
->ohds
->ds_phys
->ds_bp
;
2766 csa
->ohds
->ds_phys
->ds_bp
= csa
->cds
->ds_phys
->ds_bp
;
2767 csa
->cds
->ds_phys
->ds_bp
= tmp
;
2770 /* set dd_*_bytes */
2772 int64_t dused
, dcomp
, duncomp
;
2773 uint64_t cdl_used
, cdl_comp
, cdl_uncomp
;
2774 uint64_t odl_used
, odl_comp
, odl_uncomp
;
2776 ASSERT3U(csa
->cds
->ds_dir
->dd_phys
->
2777 dd_used_breakdown
[DD_USED_SNAP
], ==, 0);
2779 VERIFY(0 == bplist_space(&csa
->cds
->ds_deadlist
, &cdl_used
,
2780 &cdl_comp
, &cdl_uncomp
));
2781 VERIFY(0 == bplist_space(&csa
->ohds
->ds_deadlist
, &odl_used
,
2782 &odl_comp
, &odl_uncomp
));
2784 dused
= csa
->cds
->ds_phys
->ds_used_bytes
+ cdl_used
-
2785 (csa
->ohds
->ds_phys
->ds_used_bytes
+ odl_used
);
2786 dcomp
= csa
->cds
->ds_phys
->ds_compressed_bytes
+ cdl_comp
-
2787 (csa
->ohds
->ds_phys
->ds_compressed_bytes
+ odl_comp
);
2788 duncomp
= csa
->cds
->ds_phys
->ds_uncompressed_bytes
+
2790 (csa
->ohds
->ds_phys
->ds_uncompressed_bytes
+ odl_uncomp
);
2792 dsl_dir_diduse_space(csa
->ohds
->ds_dir
, DD_USED_HEAD
,
2793 dused
, dcomp
, duncomp
, tx
);
2794 dsl_dir_diduse_space(csa
->cds
->ds_dir
, DD_USED_HEAD
,
2795 -dused
, -dcomp
, -duncomp
, tx
);
2798 * The difference in the space used by snapshots is the
2799 * difference in snapshot space due to the head's
2800 * deadlist (since that's the only thing that's
2801 * changing that affects the snapused).
2803 VERIFY(0 == bplist_space_birthrange(&csa
->cds
->ds_deadlist
,
2804 csa
->ohds
->ds_origin_txg
, UINT64_MAX
, &cdl_used
));
2805 VERIFY(0 == bplist_space_birthrange(&csa
->ohds
->ds_deadlist
,
2806 csa
->ohds
->ds_origin_txg
, UINT64_MAX
, &odl_used
));
2807 dsl_dir_transfer_space(csa
->ohds
->ds_dir
, cdl_used
- odl_used
,
2808 DD_USED_HEAD
, DD_USED_SNAP
, tx
);
2811 #define SWITCH64(x, y) \
2813 uint64_t __tmp = (x); \
2818 /* swap ds_*_bytes */
2819 SWITCH64(csa
->ohds
->ds_phys
->ds_used_bytes
,
2820 csa
->cds
->ds_phys
->ds_used_bytes
);
2821 SWITCH64(csa
->ohds
->ds_phys
->ds_compressed_bytes
,
2822 csa
->cds
->ds_phys
->ds_compressed_bytes
);
2823 SWITCH64(csa
->ohds
->ds_phys
->ds_uncompressed_bytes
,
2824 csa
->cds
->ds_phys
->ds_uncompressed_bytes
);
2825 SWITCH64(csa
->ohds
->ds_phys
->ds_unique_bytes
,
2826 csa
->cds
->ds_phys
->ds_unique_bytes
);
2828 /* apply any parent delta for change in unconsumed refreservation */
2829 dsl_dir_diduse_space(csa
->ohds
->ds_dir
, DD_USED_REFRSRV
,
2830 csa
->unused_refres_delta
, 0, 0, tx
);
2832 /* swap deadlists */
2833 bplist_close(&csa
->cds
->ds_deadlist
);
2834 bplist_close(&csa
->ohds
->ds_deadlist
);
2835 SWITCH64(csa
->ohds
->ds_phys
->ds_deadlist_obj
,
2836 csa
->cds
->ds_phys
->ds_deadlist_obj
);
2837 VERIFY(0 == bplist_open(&csa
->cds
->ds_deadlist
, dp
->dp_meta_objset
,
2838 csa
->cds
->ds_phys
->ds_deadlist_obj
));
2839 VERIFY(0 == bplist_open(&csa
->ohds
->ds_deadlist
, dp
->dp_meta_objset
,
2840 csa
->ohds
->ds_phys
->ds_deadlist_obj
));
2842 dsl_pool_ds_clone_swapped(csa
->ohds
, csa
->cds
, tx
);
2846 * Swap 'clone' with its origin head file system. Used at the end
2847 * of "online recv" to swizzle the file system to the new version.
2850 dsl_dataset_clone_swap(dsl_dataset_t
*clone
, dsl_dataset_t
*origin_head
,
2853 struct cloneswaparg csa
;
2856 ASSERT(clone
->ds_owner
);
2857 ASSERT(origin_head
->ds_owner
);
2859 /* Need exclusive access for the swap */
2860 rw_enter(&clone
->ds_rwlock
, RW_WRITER
);
2861 if (!rw_tryenter(&origin_head
->ds_rwlock
, RW_WRITER
)) {
2862 rw_exit(&clone
->ds_rwlock
);
2863 rw_enter(&origin_head
->ds_rwlock
, RW_WRITER
);
2864 if (!rw_tryenter(&clone
->ds_rwlock
, RW_WRITER
)) {
2865 rw_exit(&origin_head
->ds_rwlock
);
2870 csa
.ohds
= origin_head
;
2872 error
= dsl_sync_task_do(clone
->ds_dir
->dd_pool
,
2873 dsl_dataset_clone_swap_check
,
2874 dsl_dataset_clone_swap_sync
, &csa
, NULL
, 9);
2879 * Given a pool name and a dataset object number in that pool,
2880 * return the name of that dataset.
2883 dsl_dsobj_to_dsname(char *pname
, uint64_t obj
, char *buf
)
2890 if ((error
= spa_open(pname
, &spa
, FTAG
)) != 0)
2892 dp
= spa_get_dsl(spa
);
2893 rw_enter(&dp
->dp_config_rwlock
, RW_READER
);
2894 if ((error
= dsl_dataset_hold_obj(dp
, obj
, FTAG
, &ds
)) == 0) {
2895 dsl_dataset_name(ds
, buf
);
2896 dsl_dataset_rele(ds
, FTAG
);
2898 rw_exit(&dp
->dp_config_rwlock
);
2899 spa_close(spa
, FTAG
);
2905 dsl_dataset_check_quota(dsl_dataset_t
*ds
, boolean_t check_quota
,
2906 uint64_t asize
, uint64_t inflight
, uint64_t *used
, uint64_t *ref_rsrv
)
2910 ASSERT3S(asize
, >, 0);
2913 * *ref_rsrv is the portion of asize that will come from any
2914 * unconsumed refreservation space.
2918 mutex_enter(&ds
->ds_lock
);
2920 * Make a space adjustment for reserved bytes.
2922 if (ds
->ds_reserved
> ds
->ds_phys
->ds_unique_bytes
) {
2924 ds
->ds_reserved
- ds
->ds_phys
->ds_unique_bytes
);
2925 *used
-= (ds
->ds_reserved
- ds
->ds_phys
->ds_unique_bytes
);
2927 asize
- MIN(asize
, parent_delta(ds
, asize
+ inflight
));
2930 if (!check_quota
|| ds
->ds_quota
== 0) {
2931 mutex_exit(&ds
->ds_lock
);
2935 * If they are requesting more space, and our current estimate
2936 * is over quota, they get to try again unless the actual
2937 * on-disk is over quota and there are no pending changes (which
2938 * may free up space for us).
2940 if (ds
->ds_phys
->ds_used_bytes
+ inflight
>= ds
->ds_quota
) {
2941 if (inflight
> 0 || ds
->ds_phys
->ds_used_bytes
< ds
->ds_quota
)
2946 mutex_exit(&ds
->ds_lock
);
2953 dsl_dataset_set_quota_check(void *arg1
, void *arg2
, dmu_tx_t
*tx
)
2955 dsl_dataset_t
*ds
= arg1
;
2956 uint64_t *quotap
= arg2
;
2957 uint64_t new_quota
= *quotap
;
2959 if (spa_version(ds
->ds_dir
->dd_pool
->dp_spa
) < SPA_VERSION_REFQUOTA
)
2965 if (new_quota
< ds
->ds_phys
->ds_used_bytes
||
2966 new_quota
< ds
->ds_reserved
)
2974 dsl_dataset_set_quota_sync(void *arg1
, void *arg2
, cred_t
*cr
, dmu_tx_t
*tx
)
2976 dsl_dataset_t
*ds
= arg1
;
2977 uint64_t *quotap
= arg2
;
2978 uint64_t new_quota
= *quotap
;
2980 dmu_buf_will_dirty(ds
->ds_dbuf
, tx
);
2982 ds
->ds_quota
= new_quota
;
2984 dsl_prop_set_uint64_sync(ds
->ds_dir
, "refquota", new_quota
, cr
, tx
);
2986 spa_history_internal_log(LOG_DS_REFQUOTA
, ds
->ds_dir
->dd_pool
->dp_spa
,
2987 tx
, cr
, "%lld dataset = %llu ",
2988 (longlong_t
)new_quota
, ds
->ds_object
);
2992 dsl_dataset_set_quota(const char *dsname
, uint64_t quota
)
2997 err
= dsl_dataset_hold(dsname
, FTAG
, &ds
);
3001 if (quota
!= ds
->ds_quota
) {
3003 * If someone removes a file, then tries to set the quota, we
3004 * want to make sure the file freeing takes effect.
3006 txg_wait_open(ds
->ds_dir
->dd_pool
, 0);
3008 err
= dsl_sync_task_do(ds
->ds_dir
->dd_pool
,
3009 dsl_dataset_set_quota_check
, dsl_dataset_set_quota_sync
,
3012 dsl_dataset_rele(ds
, FTAG
);
3017 dsl_dataset_set_reservation_check(void *arg1
, void *arg2
, dmu_tx_t
*tx
)
3019 dsl_dataset_t
*ds
= arg1
;
3020 uint64_t *reservationp
= arg2
;
3021 uint64_t new_reservation
= *reservationp
;
3025 if (new_reservation
> INT64_MAX
)
3028 if (spa_version(ds
->ds_dir
->dd_pool
->dp_spa
) <
3029 SPA_VERSION_REFRESERVATION
)
3032 if (dsl_dataset_is_snapshot(ds
))
3036 * If we are doing the preliminary check in open context, the
3037 * space estimates may be inaccurate.
3039 if (!dmu_tx_is_syncing(tx
))
3042 mutex_enter(&ds
->ds_lock
);
3043 unique
= dsl_dataset_unique(ds
);
3044 delta
= MAX(unique
, new_reservation
) - MAX(unique
, ds
->ds_reserved
);
3045 mutex_exit(&ds
->ds_lock
);
3048 delta
> dsl_dir_space_available(ds
->ds_dir
, NULL
, 0, TRUE
))
3050 if (delta
> 0 && ds
->ds_quota
> 0 &&
3051 new_reservation
> ds
->ds_quota
)
3059 dsl_dataset_set_reservation_sync(void *arg1
, void *arg2
, cred_t
*cr
,
3062 dsl_dataset_t
*ds
= arg1
;
3063 uint64_t *reservationp
= arg2
;
3064 uint64_t new_reservation
= *reservationp
;
3068 dmu_buf_will_dirty(ds
->ds_dbuf
, tx
);
3070 mutex_enter(&ds
->ds_dir
->dd_lock
);
3071 mutex_enter(&ds
->ds_lock
);
3072 unique
= dsl_dataset_unique(ds
);
3073 delta
= MAX(0, (int64_t)(new_reservation
- unique
)) -
3074 MAX(0, (int64_t)(ds
->ds_reserved
- unique
));
3075 ds
->ds_reserved
= new_reservation
;
3076 mutex_exit(&ds
->ds_lock
);
3078 dsl_dir_diduse_space(ds
->ds_dir
, DD_USED_REFRSRV
, delta
, 0, 0, tx
);
3079 mutex_exit(&ds
->ds_dir
->dd_lock
);
3080 dsl_prop_set_uint64_sync(ds
->ds_dir
, "refreservation",
3081 new_reservation
, cr
, tx
);
3083 spa_history_internal_log(LOG_DS_REFRESERV
,
3084 ds
->ds_dir
->dd_pool
->dp_spa
, tx
, cr
, "%lld dataset = %llu",
3085 (longlong_t
)new_reservation
, ds
->ds_object
);
3089 dsl_dataset_set_reservation(const char *dsname
, uint64_t reservation
)
3094 err
= dsl_dataset_hold(dsname
, FTAG
, &ds
);
3098 err
= dsl_sync_task_do(ds
->ds_dir
->dd_pool
,
3099 dsl_dataset_set_reservation_check
,
3100 dsl_dataset_set_reservation_sync
, ds
, &reservation
, 0);
3101 dsl_dataset_rele(ds
, FTAG
);