4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 #include <sys/zfs_context.h>
28 #include <sys/dmu_objset.h>
29 #include <sys/dsl_dir.h>
30 #include <sys/dsl_dataset.h>
31 #include <sys/dsl_prop.h>
32 #include <sys/dsl_pool.h>
33 #include <sys/dsl_synctask.h>
34 #include <sys/dsl_deleg.h>
35 #include <sys/dnode.h>
38 #include <sys/dmu_tx.h>
39 #include <sys/zio_checksum.h>
42 #include <sys/dmu_impl.h>
43 #include <sys/zfs_ioctl.h>
46 dmu_objset_spa(objset_t
*os
)
48 return (os
->os
->os_spa
);
52 dmu_objset_zil(objset_t
*os
)
54 return (os
->os
->os_zil
);
58 dmu_objset_pool(objset_t
*os
)
62 if ((ds
= os
->os
->os_dsl_dataset
) != NULL
&& ds
->ds_dir
)
63 return (ds
->ds_dir
->dd_pool
);
65 return (spa_get_dsl(os
->os
->os_spa
));
69 dmu_objset_ds(objset_t
*os
)
71 return (os
->os
->os_dsl_dataset
);
75 dmu_objset_type(objset_t
*os
)
77 return (os
->os
->os_phys
->os_type
);
81 dmu_objset_name(objset_t
*os
, char *buf
)
83 dsl_dataset_name(os
->os
->os_dsl_dataset
, buf
);
87 dmu_objset_id(objset_t
*os
)
89 dsl_dataset_t
*ds
= os
->os
->os_dsl_dataset
;
91 return (ds
? ds
->ds_object
: 0);
95 checksum_changed_cb(void *arg
, uint64_t newval
)
97 objset_impl_t
*osi
= arg
;
100 * Inheritance should have been done by now.
102 ASSERT(newval
!= ZIO_CHECKSUM_INHERIT
);
104 osi
->os_checksum
= zio_checksum_select(newval
, ZIO_CHECKSUM_ON_VALUE
);
108 compression_changed_cb(void *arg
, uint64_t newval
)
110 objset_impl_t
*osi
= arg
;
113 * Inheritance and range checking should have been done by now.
115 ASSERT(newval
!= ZIO_COMPRESS_INHERIT
);
117 osi
->os_compress
= zio_compress_select(newval
, ZIO_COMPRESS_ON_VALUE
);
121 copies_changed_cb(void *arg
, uint64_t newval
)
123 objset_impl_t
*osi
= arg
;
126 * Inheritance and range checking should have been done by now.
129 ASSERT(newval
<= spa_max_replication(osi
->os_spa
));
131 osi
->os_copies
= newval
;
135 primary_cache_changed_cb(void *arg
, uint64_t newval
)
137 objset_impl_t
*osi
= arg
;
140 * Inheritance and range checking should have been done by now.
142 ASSERT(newval
== ZFS_CACHE_ALL
|| newval
== ZFS_CACHE_NONE
||
143 newval
== ZFS_CACHE_METADATA
);
145 osi
->os_primary_cache
= newval
;
149 secondary_cache_changed_cb(void *arg
, uint64_t newval
)
151 objset_impl_t
*osi
= arg
;
154 * Inheritance and range checking should have been done by now.
156 ASSERT(newval
== ZFS_CACHE_ALL
|| newval
== ZFS_CACHE_NONE
||
157 newval
== ZFS_CACHE_METADATA
);
159 osi
->os_secondary_cache
= newval
;
163 dmu_objset_byteswap(void *buf
, size_t size
)
165 objset_phys_t
*osp
= buf
;
167 ASSERT(size
== sizeof (objset_phys_t
));
168 dnode_byteswap(&osp
->os_meta_dnode
);
169 byteswap_uint64_array(&osp
->os_zil_header
, sizeof (zil_header_t
));
170 osp
->os_type
= BSWAP_64(osp
->os_type
);
174 dmu_objset_open_impl(spa_t
*spa
, dsl_dataset_t
*ds
, blkptr_t
*bp
,
175 objset_impl_t
**osip
)
180 ASSERT(ds
== NULL
|| MUTEX_HELD(&ds
->ds_opening_lock
));
182 osi
= kmem_zalloc(sizeof (objset_impl_t
), KM_SLEEP
);
184 osi
->os_dsl_dataset
= ds
;
187 if (!BP_IS_HOLE(osi
->os_rootbp
)) {
188 uint32_t aflags
= ARC_WAIT
;
190 zb
.zb_objset
= ds
? ds
->ds_object
: 0;
194 if (DMU_OS_IS_L2CACHEABLE(osi
))
195 aflags
|= ARC_L2CACHE
;
197 dprintf_bp(osi
->os_rootbp
, "reading %s", "");
199 * NB: when bprewrite scrub can change the bp,
200 * and this is called from dmu_objset_open_ds_os, the bp
201 * could change, and we'll need a lock.
203 err
= arc_read_nolock(NULL
, spa
, osi
->os_rootbp
,
204 arc_getbuf_func
, &osi
->os_phys_buf
,
205 ZIO_PRIORITY_SYNC_READ
, ZIO_FLAG_CANFAIL
, &aflags
, &zb
);
207 kmem_free(osi
, sizeof (objset_impl_t
));
208 /* convert checksum errors into IO errors */
213 osi
->os_phys
= osi
->os_phys_buf
->b_data
;
215 osi
->os_phys_buf
= arc_buf_alloc(spa
, sizeof (objset_phys_t
),
216 &osi
->os_phys_buf
, ARC_BUFC_METADATA
);
217 osi
->os_phys
= osi
->os_phys_buf
->b_data
;
218 bzero(osi
->os_phys
, sizeof (objset_phys_t
));
222 * Note: the changed_cb will be called once before the register
223 * func returns, thus changing the checksum/compression from the
224 * default (fletcher2/off). Snapshots don't need to know about
225 * checksum/compression/copies.
228 err
= dsl_prop_register(ds
, "primarycache",
229 primary_cache_changed_cb
, osi
);
231 err
= dsl_prop_register(ds
, "secondarycache",
232 secondary_cache_changed_cb
, osi
);
233 if (!dsl_dataset_is_snapshot(ds
)) {
235 err
= dsl_prop_register(ds
, "checksum",
236 checksum_changed_cb
, osi
);
238 err
= dsl_prop_register(ds
, "compression",
239 compression_changed_cb
, osi
);
241 err
= dsl_prop_register(ds
, "copies",
242 copies_changed_cb
, osi
);
245 VERIFY(arc_buf_remove_ref(osi
->os_phys_buf
,
246 &osi
->os_phys_buf
) == 1);
247 kmem_free(osi
, sizeof (objset_impl_t
));
250 } else if (ds
== NULL
) {
251 /* It's the meta-objset. */
252 osi
->os_checksum
= ZIO_CHECKSUM_FLETCHER_4
;
253 osi
->os_compress
= ZIO_COMPRESS_LZJB
;
254 osi
->os_copies
= spa_max_replication(spa
);
255 osi
->os_primary_cache
= ZFS_CACHE_ALL
;
256 osi
->os_secondary_cache
= ZFS_CACHE_ALL
;
259 osi
->os_zil_header
= osi
->os_phys
->os_zil_header
;
260 osi
->os_zil
= zil_alloc(&osi
->os
, &osi
->os_zil_header
);
262 for (i
= 0; i
< TXG_SIZE
; i
++) {
263 list_create(&osi
->os_dirty_dnodes
[i
], sizeof (dnode_t
),
264 offsetof(dnode_t
, dn_dirty_link
[i
]));
265 list_create(&osi
->os_free_dnodes
[i
], sizeof (dnode_t
),
266 offsetof(dnode_t
, dn_dirty_link
[i
]));
268 list_create(&osi
->os_dnodes
, sizeof (dnode_t
),
269 offsetof(dnode_t
, dn_link
));
270 list_create(&osi
->os_downgraded_dbufs
, sizeof (dmu_buf_impl_t
),
271 offsetof(dmu_buf_impl_t
, db_link
));
273 mutex_init(&osi
->os_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
274 mutex_init(&osi
->os_obj_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
275 mutex_init(&osi
->os_user_ptr_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
277 osi
->os_meta_dnode
= dnode_special_open(osi
,
278 &osi
->os_phys
->os_meta_dnode
, DMU_META_DNODE_OBJECT
);
281 * We should be the only thread trying to do this because we
282 * have ds_opening_lock
285 VERIFY(NULL
== dsl_dataset_set_user_ptr(ds
, osi
,
294 dmu_objset_open_ds_os(dsl_dataset_t
*ds
, objset_t
*os
, dmu_objset_type_t type
)
298 mutex_enter(&ds
->ds_opening_lock
);
299 osi
= dsl_dataset_get_user_ptr(ds
);
303 err
= dmu_objset_open_impl(dsl_dataset_get_spa(ds
),
304 ds
, &ds
->ds_phys
->ds_bp
, &osi
);
306 mutex_exit(&ds
->ds_opening_lock
);
310 mutex_exit(&ds
->ds_opening_lock
);
313 os
->os_mode
= DS_MODE_NOHOLD
;
315 if (type
!= DMU_OST_ANY
&& type
!= os
->os
->os_phys
->os_type
)
321 dmu_objset_open_ds(dsl_dataset_t
*ds
, dmu_objset_type_t type
, objset_t
**osp
)
326 os
= kmem_alloc(sizeof (objset_t
), KM_SLEEP
);
327 err
= dmu_objset_open_ds_os(ds
, os
, type
);
329 kmem_free(os
, sizeof (objset_t
));
335 /* called from zpl */
337 dmu_objset_open(const char *name
, dmu_objset_type_t type
, int mode
,
344 ASSERT(DS_MODE_TYPE(mode
) == DS_MODE_USER
||
345 DS_MODE_TYPE(mode
) == DS_MODE_OWNER
);
347 os
= kmem_alloc(sizeof (objset_t
), KM_SLEEP
);
348 if (DS_MODE_TYPE(mode
) == DS_MODE_USER
)
349 err
= dsl_dataset_hold(name
, os
, &ds
);
351 err
= dsl_dataset_own(name
, mode
, os
, &ds
);
353 kmem_free(os
, sizeof (objset_t
));
357 err
= dmu_objset_open_ds_os(ds
, os
, type
);
359 if (DS_MODE_TYPE(mode
) == DS_MODE_USER
)
360 dsl_dataset_rele(ds
, os
);
362 dsl_dataset_disown(ds
, os
);
363 kmem_free(os
, sizeof (objset_t
));
372 dmu_objset_close(objset_t
*os
)
374 ASSERT(DS_MODE_TYPE(os
->os_mode
) == DS_MODE_USER
||
375 DS_MODE_TYPE(os
->os_mode
) == DS_MODE_OWNER
||
376 DS_MODE_TYPE(os
->os_mode
) == DS_MODE_NOHOLD
);
378 if (DS_MODE_TYPE(os
->os_mode
) == DS_MODE_USER
)
379 dsl_dataset_rele(os
->os
->os_dsl_dataset
, os
);
380 else if (DS_MODE_TYPE(os
->os_mode
) == DS_MODE_OWNER
)
381 dsl_dataset_disown(os
->os
->os_dsl_dataset
, os
);
382 kmem_free(os
, sizeof (objset_t
));
386 dmu_objset_evict_dbufs(objset_t
*os
)
388 objset_impl_t
*osi
= os
->os
;
391 mutex_enter(&osi
->os_lock
);
393 /* process the mdn last, since the other dnodes have holds on it */
394 list_remove(&osi
->os_dnodes
, osi
->os_meta_dnode
);
395 list_insert_tail(&osi
->os_dnodes
, osi
->os_meta_dnode
);
398 * Find the first dnode with holds. We have to do this dance
399 * because dnode_add_ref() only works if you already have a
400 * hold. If there are no holds then it has no dbufs so OK to
403 for (dn
= list_head(&osi
->os_dnodes
);
404 dn
&& !dnode_add_ref(dn
, FTAG
);
405 dn
= list_next(&osi
->os_dnodes
, dn
))
409 dnode_t
*next_dn
= dn
;
412 next_dn
= list_next(&osi
->os_dnodes
, next_dn
);
413 } while (next_dn
&& !dnode_add_ref(next_dn
, FTAG
));
415 mutex_exit(&osi
->os_lock
);
416 dnode_evict_dbufs(dn
);
417 dnode_rele(dn
, FTAG
);
418 mutex_enter(&osi
->os_lock
);
421 mutex_exit(&osi
->os_lock
);
422 return (list_head(&osi
->os_dnodes
) != osi
->os_meta_dnode
);
426 dmu_objset_evict(dsl_dataset_t
*ds
, void *arg
)
428 objset_impl_t
*osi
= arg
;
432 for (i
= 0; i
< TXG_SIZE
; i
++) {
433 ASSERT(list_head(&osi
->os_dirty_dnodes
[i
]) == NULL
);
434 ASSERT(list_head(&osi
->os_free_dnodes
[i
]) == NULL
);
438 if (!dsl_dataset_is_snapshot(ds
)) {
439 VERIFY(0 == dsl_prop_unregister(ds
, "checksum",
440 checksum_changed_cb
, osi
));
441 VERIFY(0 == dsl_prop_unregister(ds
, "compression",
442 compression_changed_cb
, osi
));
443 VERIFY(0 == dsl_prop_unregister(ds
, "copies",
444 copies_changed_cb
, osi
));
446 VERIFY(0 == dsl_prop_unregister(ds
, "primarycache",
447 primary_cache_changed_cb
, osi
));
448 VERIFY(0 == dsl_prop_unregister(ds
, "secondarycache",
449 secondary_cache_changed_cb
, osi
));
453 * We should need only a single pass over the dnode list, since
454 * nothing can be added to the list at this point.
457 (void) dmu_objset_evict_dbufs(&os
);
459 ASSERT3P(list_head(&osi
->os_dnodes
), ==, osi
->os_meta_dnode
);
460 ASSERT3P(list_tail(&osi
->os_dnodes
), ==, osi
->os_meta_dnode
);
461 ASSERT3P(list_head(&osi
->os_meta_dnode
->dn_dbufs
), ==, NULL
);
463 dnode_special_close(osi
->os_meta_dnode
);
464 zil_free(osi
->os_zil
);
466 VERIFY(arc_buf_remove_ref(osi
->os_phys_buf
, &osi
->os_phys_buf
) == 1);
467 mutex_destroy(&osi
->os_lock
);
468 mutex_destroy(&osi
->os_obj_lock
);
469 mutex_destroy(&osi
->os_user_ptr_lock
);
470 kmem_free(osi
, sizeof (objset_impl_t
));
473 /* called from dsl for meta-objset */
475 dmu_objset_create_impl(spa_t
*spa
, dsl_dataset_t
*ds
, blkptr_t
*bp
,
476 dmu_objset_type_t type
, dmu_tx_t
*tx
)
481 ASSERT(dmu_tx_is_syncing(tx
));
483 mutex_enter(&ds
->ds_opening_lock
);
484 VERIFY(0 == dmu_objset_open_impl(spa
, ds
, bp
, &osi
));
486 mutex_exit(&ds
->ds_opening_lock
);
487 mdn
= osi
->os_meta_dnode
;
489 dnode_allocate(mdn
, DMU_OT_DNODE
, 1 << DNODE_BLOCK_SHIFT
,
490 DN_MAX_INDBLKSHIFT
, DMU_OT_NONE
, 0, tx
);
493 * We don't want to have to increase the meta-dnode's nlevels
494 * later, because then we could do it in quescing context while
495 * we are also accessing it in open context.
497 * This precaution is not necessary for the MOS (ds == NULL),
498 * because the MOS is only updated in syncing context.
499 * This is most fortunate: the MOS is the only objset that
500 * needs to be synced multiple times as spa_sync() iterates
501 * to convergence, so minimizing its dn_nlevels matters.
507 * Determine the number of levels necessary for the meta-dnode
508 * to contain DN_MAX_OBJECT dnodes.
510 while ((uint64_t)mdn
->dn_nblkptr
<< (mdn
->dn_datablkshift
+
511 (levels
- 1) * (mdn
->dn_indblkshift
- SPA_BLKPTRSHIFT
)) <
512 DN_MAX_OBJECT
* sizeof (dnode_phys_t
))
515 mdn
->dn_next_nlevels
[tx
->tx_txg
& TXG_MASK
] =
516 mdn
->dn_nlevels
= levels
;
519 ASSERT(type
!= DMU_OST_NONE
);
520 ASSERT(type
!= DMU_OST_ANY
);
521 ASSERT(type
< DMU_OST_NUMTYPES
);
522 osi
->os_phys
->os_type
= type
;
524 dsl_dataset_dirty(ds
, tx
);
530 void (*userfunc
)(objset_t
*os
, void *arg
, cred_t
*cr
, dmu_tx_t
*tx
);
532 dsl_dataset_t
*clone_parent
;
533 const char *lastname
;
534 dmu_objset_type_t type
;
540 dmu_objset_create_check(void *arg1
, void *arg2
, dmu_tx_t
*tx
)
542 dsl_dir_t
*dd
= arg1
;
543 struct oscarg
*oa
= arg2
;
544 objset_t
*mos
= dd
->dd_pool
->dp_meta_objset
;
548 err
= zap_lookup(mos
, dd
->dd_phys
->dd_child_dir_zapobj
,
549 oa
->lastname
, sizeof (uint64_t), 1, &ddobj
);
551 return (err
? err
: EEXIST
);
553 if (oa
->clone_parent
!= NULL
) {
555 * You can't clone across pools.
557 if (oa
->clone_parent
->ds_dir
->dd_pool
!= dd
->dd_pool
)
561 * You can only clone snapshots, not the head datasets.
563 if (oa
->clone_parent
->ds_phys
->ds_num_children
== 0)
571 dmu_objset_create_sync(void *arg1
, void *arg2
, cred_t
*cr
, dmu_tx_t
*tx
)
573 dsl_dir_t
*dd
= arg1
;
574 struct oscarg
*oa
= arg2
;
579 ASSERT(dmu_tx_is_syncing(tx
));
581 dsobj
= dsl_dataset_create_sync(dd
, oa
->lastname
,
582 oa
->clone_parent
, oa
->flags
, cr
, tx
);
584 VERIFY(0 == dsl_dataset_hold_obj(dd
->dd_pool
, dsobj
, FTAG
, &ds
));
585 bp
= dsl_dataset_get_blkptr(ds
);
586 if (BP_IS_HOLE(bp
)) {
589 /* This is an empty dmu_objset; not a clone. */
590 osi
= dmu_objset_create_impl(dsl_dataset_get_spa(ds
),
591 ds
, bp
, oa
->type
, tx
);
594 oa
->userfunc(&osi
->os
, oa
->userarg
, cr
, tx
);
597 spa_history_internal_log(LOG_DS_CREATE
, dd
->dd_pool
->dp_spa
,
598 tx
, cr
, "dataset = %llu", dsobj
);
600 dsl_dataset_rele(ds
, FTAG
);
604 dmu_objset_create(const char *name
, dmu_objset_type_t type
,
605 objset_t
*clone_parent
, uint64_t flags
,
606 void (*func
)(objset_t
*os
, void *arg
, cred_t
*cr
, dmu_tx_t
*tx
), void *arg
)
611 struct oscarg oa
= { 0 };
613 ASSERT(strchr(name
, '@') == NULL
);
614 err
= dsl_dir_open(name
, FTAG
, &pdd
, &tail
);
618 dsl_dir_close(pdd
, FTAG
);
622 dprintf("name=%s\n", name
);
630 if (clone_parent
!= NULL
) {
632 * You can't clone to a different type.
634 if (clone_parent
->os
->os_phys
->os_type
!= type
) {
635 dsl_dir_close(pdd
, FTAG
);
638 oa
.clone_parent
= clone_parent
->os
->os_dsl_dataset
;
640 err
= dsl_sync_task_do(pdd
->dd_pool
, dmu_objset_create_check
,
641 dmu_objset_create_sync
, pdd
, &oa
, 5);
642 dsl_dir_close(pdd
, FTAG
);
647 dmu_objset_destroy(const char *name
)
653 * If it looks like we'll be able to destroy it, and there's
654 * an unplayed replay log sitting around, destroy the log.
655 * It would be nicer to do this in dsl_dataset_destroy_sync(),
656 * but the replay log objset is modified in open context.
658 error
= dmu_objset_open(name
, DMU_OST_ANY
,
659 DS_MODE_OWNER
|DS_MODE_READONLY
|DS_MODE_INCONSISTENT
, &os
);
661 dsl_dataset_t
*ds
= os
->os
->os_dsl_dataset
;
662 zil_destroy(dmu_objset_zil(os
), B_FALSE
);
664 error
= dsl_dataset_destroy(ds
, os
);
666 * dsl_dataset_destroy() closes the ds.
668 kmem_free(os
, sizeof (objset_t
));
675 * This will close the objset.
678 dmu_objset_rollback(objset_t
*os
)
683 ds
= os
->os
->os_dsl_dataset
;
685 if (!dsl_dataset_tryown(ds
, TRUE
, os
)) {
686 dmu_objset_close(os
);
690 err
= dsl_dataset_rollback(ds
, os
->os
->os_phys
->os_type
);
693 * NB: we close the objset manually because the rollback
694 * actually implicitly called dmu_objset_evict(), thus freeing
697 dsl_dataset_disown(ds
, os
);
698 kmem_free(os
, sizeof (objset_t
));
703 dsl_sync_task_group_t
*dstg
;
705 char failed
[MAXPATHLEN
];
706 boolean_t checkperms
;
716 dmu_objset_snapshot_one(char *name
, void *arg
)
718 struct snaparg
*sn
= arg
;
722 (void) strcpy(sn
->failed
, name
);
725 * Check permissions only when requested. This only applies when
726 * doing a recursive snapshot. The permission checks for the starting
727 * dataset have already been performed in zfs_secpolicy_snapshot()
729 if (sn
->checkperms
== B_TRUE
&&
730 (err
= zfs_secpolicy_snapshot_perms(name
, CRED())))
733 err
= dmu_objset_open(name
, DMU_OST_ANY
, DS_MODE_USER
, &os
);
737 /* If the objset is in an inconsistent state, return busy */
738 if (os
->os
->os_dsl_dataset
->ds_phys
->ds_flags
& DS_FLAG_INCONSISTENT
) {
739 dmu_objset_close(os
);
744 * NB: we need to wait for all in-flight changes to get to disk,
745 * so that we snapshot those changes. zil_suspend does this as
748 err
= zil_suspend(dmu_objset_zil(os
));
751 dsl_sync_task_create(sn
->dstg
, dsl_dataset_snapshot_check
,
752 dsl_dataset_snapshot_sync
, os
->os
->os_dsl_dataset
,
754 osn
= kmem_alloc(sizeof (struct osnode
), KM_SLEEP
);
756 list_insert_tail(&sn
->objsets
, osn
);
758 dmu_objset_close(os
);
765 dmu_objset_snapshot(char *fsname
, char *snapname
, boolean_t recursive
)
767 dsl_sync_task_t
*dst
;
769 struct snaparg sn
= { 0 };
773 (void) strcpy(sn
.failed
, fsname
);
775 err
= spa_open(fsname
, &spa
, FTAG
);
779 sn
.dstg
= dsl_sync_task_group_create(spa_get_dsl(spa
));
780 sn
.snapname
= snapname
;
781 list_create(&sn
.objsets
, sizeof (struct osnode
),
782 offsetof(struct osnode
, node
));
785 sn
.checkperms
= B_TRUE
;
786 err
= dmu_objset_find(fsname
,
787 dmu_objset_snapshot_one
, &sn
, DS_FIND_CHILDREN
);
789 sn
.checkperms
= B_FALSE
;
790 err
= dmu_objset_snapshot_one(fsname
, &sn
);
796 err
= dsl_sync_task_group_wait(sn
.dstg
);
798 for (dst
= list_head(&sn
.dstg
->dstg_tasks
); dst
;
799 dst
= list_next(&sn
.dstg
->dstg_tasks
, dst
)) {
800 dsl_dataset_t
*ds
= dst
->dst_arg1
;
802 dsl_dataset_name(ds
, sn
.failed
);
806 while (osn
= list_head(&sn
.objsets
)) {
807 list_remove(&sn
.objsets
, osn
);
808 zil_resume(dmu_objset_zil(osn
->os
));
809 dmu_objset_close(osn
->os
);
810 kmem_free(osn
, sizeof (struct osnode
));
812 list_destroy(&sn
.objsets
);
815 (void) strcpy(fsname
, sn
.failed
);
816 dsl_sync_task_group_destroy(sn
.dstg
);
817 spa_close(spa
, FTAG
);
822 dmu_objset_sync_dnodes(list_t
*list
, dmu_tx_t
*tx
)
826 while (dn
= list_head(list
)) {
827 ASSERT(dn
->dn_object
!= DMU_META_DNODE_OBJECT
);
828 ASSERT(dn
->dn_dbuf
->db_data_pending
);
830 * Initialize dn_zio outside dnode_sync()
831 * to accomodate meta-dnode
833 dn
->dn_zio
= dn
->dn_dbuf
->db_data_pending
->dr_zio
;
836 ASSERT3U(dn
->dn_nlevels
, <=, DN_MAX_LEVELS
);
837 list_remove(list
, dn
);
844 ready(zio_t
*zio
, arc_buf_t
*abuf
, void *arg
)
846 blkptr_t
*bp
= zio
->io_bp
;
847 blkptr_t
*bp_orig
= &zio
->io_bp_orig
;
848 objset_impl_t
*os
= arg
;
849 dnode_phys_t
*dnp
= &os
->os_phys
->os_meta_dnode
;
851 ASSERT(bp
== os
->os_rootbp
);
852 ASSERT(BP_GET_TYPE(bp
) == DMU_OT_OBJSET
);
853 ASSERT(BP_GET_LEVEL(bp
) == 0);
856 * Update rootbp fill count.
858 bp
->blk_fill
= 1; /* count the meta-dnode */
859 for (int i
= 0; i
< dnp
->dn_nblkptr
; i
++)
860 bp
->blk_fill
+= dnp
->dn_blkptr
[i
].blk_fill
;
862 if (zio
->io_flags
& ZIO_FLAG_IO_REWRITE
) {
863 ASSERT(DVA_EQUAL(BP_IDENTITY(bp
), BP_IDENTITY(bp_orig
)));
865 if (zio
->io_bp_orig
.blk_birth
== os
->os_synctx
->tx_txg
)
866 (void) dsl_dataset_block_kill(os
->os_dsl_dataset
,
867 &zio
->io_bp_orig
, zio
, os
->os_synctx
);
868 dsl_dataset_block_born(os
->os_dsl_dataset
, bp
, os
->os_synctx
);
872 /* called from dsl */
874 dmu_objset_sync(objset_impl_t
*os
, zio_t
*pio
, dmu_tx_t
*tx
)
878 writeprops_t wp
= { 0 };
881 dbuf_dirty_record_t
*dr
;
883 dprintf_ds(os
->os_dsl_dataset
, "txg=%llu\n", tx
->tx_txg
);
885 ASSERT(dmu_tx_is_syncing(tx
));
886 /* XXX the write_done callback should really give us the tx... */
889 if (os
->os_dsl_dataset
== NULL
) {
891 * This is the MOS. If we have upgraded,
892 * spa_max_replication() could change, so reset
895 os
->os_copies
= spa_max_replication(os
->os_spa
);
899 * Create the root block IO
901 zb
.zb_objset
= os
->os_dsl_dataset
? os
->os_dsl_dataset
->ds_object
: 0;
903 zb
.zb_level
= -1; /* for block ordering; it's level 0 on disk */
906 wp
.wp_type
= DMU_OT_OBJSET
;
907 wp
.wp_level
= 0; /* on-disk BP level; see above */
908 wp
.wp_copies
= os
->os_copies
;
909 wp
.wp_oschecksum
= os
->os_checksum
;
910 wp
.wp_oscompress
= os
->os_compress
;
912 if (BP_IS_OLDER(os
->os_rootbp
, tx
->tx_txg
)) {
913 (void) dsl_dataset_block_kill(os
->os_dsl_dataset
,
914 os
->os_rootbp
, pio
, tx
);
917 arc_release(os
->os_phys_buf
, &os
->os_phys_buf
);
918 zio
= arc_write(pio
, os
->os_spa
, &wp
, DMU_OS_IS_L2CACHEABLE(os
),
919 tx
->tx_txg
, os
->os_rootbp
, os
->os_phys_buf
, ready
, NULL
, os
,
920 ZIO_PRIORITY_ASYNC_WRITE
, ZIO_FLAG_MUSTSUCCEED
, &zb
);
923 * Sync meta-dnode - the parent IO for the sync is the root block
925 os
->os_meta_dnode
->dn_zio
= zio
;
926 dnode_sync(os
->os_meta_dnode
, tx
);
928 txgoff
= tx
->tx_txg
& TXG_MASK
;
930 dmu_objset_sync_dnodes(&os
->os_free_dnodes
[txgoff
], tx
);
931 dmu_objset_sync_dnodes(&os
->os_dirty_dnodes
[txgoff
], tx
);
933 list
= &os
->os_meta_dnode
->dn_dirty_records
[txgoff
];
934 while (dr
= list_head(list
)) {
935 ASSERT(dr
->dr_dbuf
->db_level
== 0);
936 list_remove(list
, dr
);
938 zio_nowait(dr
->dr_zio
);
941 * Free intent log blocks up to this tx.
943 zil_sync(os
->os_zil
, tx
);
944 os
->os_phys
->os_zil_header
= os
->os_zil_header
;
949 dmu_objset_space(objset_t
*os
, uint64_t *refdbytesp
, uint64_t *availbytesp
,
950 uint64_t *usedobjsp
, uint64_t *availobjsp
)
952 dsl_dataset_space(os
->os
->os_dsl_dataset
, refdbytesp
, availbytesp
,
953 usedobjsp
, availobjsp
);
957 dmu_objset_fsid_guid(objset_t
*os
)
959 return (dsl_dataset_fsid_guid(os
->os
->os_dsl_dataset
));
963 dmu_objset_fast_stat(objset_t
*os
, dmu_objset_stats_t
*stat
)
965 stat
->dds_type
= os
->os
->os_phys
->os_type
;
966 if (os
->os
->os_dsl_dataset
)
967 dsl_dataset_fast_stat(os
->os
->os_dsl_dataset
, stat
);
971 dmu_objset_stats(objset_t
*os
, nvlist_t
*nv
)
973 ASSERT(os
->os
->os_dsl_dataset
||
974 os
->os
->os_phys
->os_type
== DMU_OST_META
);
976 if (os
->os
->os_dsl_dataset
!= NULL
)
977 dsl_dataset_stats(os
->os
->os_dsl_dataset
, nv
);
979 dsl_prop_nvlist_add_uint64(nv
, ZFS_PROP_TYPE
,
980 os
->os
->os_phys
->os_type
);
984 dmu_objset_is_snapshot(objset_t
*os
)
986 if (os
->os
->os_dsl_dataset
!= NULL
)
987 return (dsl_dataset_is_snapshot(os
->os
->os_dsl_dataset
));
993 dmu_snapshot_realname(objset_t
*os
, char *name
, char *real
, int maxlen
,
996 dsl_dataset_t
*ds
= os
->os
->os_dsl_dataset
;
999 if (ds
->ds_phys
->ds_snapnames_zapobj
== 0)
1002 return (zap_lookup_norm(ds
->ds_dir
->dd_pool
->dp_meta_objset
,
1003 ds
->ds_phys
->ds_snapnames_zapobj
, name
, 8, 1, &ignored
, MT_FIRST
,
1004 real
, maxlen
, conflict
));
1008 dmu_snapshot_list_next(objset_t
*os
, int namelen
, char *name
,
1009 uint64_t *idp
, uint64_t *offp
, boolean_t
*case_conflict
)
1011 dsl_dataset_t
*ds
= os
->os
->os_dsl_dataset
;
1012 zap_cursor_t cursor
;
1013 zap_attribute_t attr
;
1015 if (ds
->ds_phys
->ds_snapnames_zapobj
== 0)
1018 zap_cursor_init_serialized(&cursor
,
1019 ds
->ds_dir
->dd_pool
->dp_meta_objset
,
1020 ds
->ds_phys
->ds_snapnames_zapobj
, *offp
);
1022 if (zap_cursor_retrieve(&cursor
, &attr
) != 0) {
1023 zap_cursor_fini(&cursor
);
1027 if (strlen(attr
.za_name
) + 1 > namelen
) {
1028 zap_cursor_fini(&cursor
);
1029 return (ENAMETOOLONG
);
1032 (void) strcpy(name
, attr
.za_name
);
1034 *idp
= attr
.za_first_integer
;
1036 *case_conflict
= attr
.za_normalization_conflict
;
1037 zap_cursor_advance(&cursor
);
1038 *offp
= zap_cursor_serialize(&cursor
);
1039 zap_cursor_fini(&cursor
);
1045 dmu_dir_list_next(objset_t
*os
, int namelen
, char *name
,
1046 uint64_t *idp
, uint64_t *offp
)
1048 dsl_dir_t
*dd
= os
->os
->os_dsl_dataset
->ds_dir
;
1049 zap_cursor_t cursor
;
1050 zap_attribute_t attr
;
1052 /* there is no next dir on a snapshot! */
1053 if (os
->os
->os_dsl_dataset
->ds_object
!=
1054 dd
->dd_phys
->dd_head_dataset_obj
)
1057 zap_cursor_init_serialized(&cursor
,
1058 dd
->dd_pool
->dp_meta_objset
,
1059 dd
->dd_phys
->dd_child_dir_zapobj
, *offp
);
1061 if (zap_cursor_retrieve(&cursor
, &attr
) != 0) {
1062 zap_cursor_fini(&cursor
);
1066 if (strlen(attr
.za_name
) + 1 > namelen
) {
1067 zap_cursor_fini(&cursor
);
1068 return (ENAMETOOLONG
);
1071 (void) strcpy(name
, attr
.za_name
);
1073 *idp
= attr
.za_first_integer
;
1074 zap_cursor_advance(&cursor
);
1075 *offp
= zap_cursor_serialize(&cursor
);
1076 zap_cursor_fini(&cursor
);
1082 int (*func
)(char *, void *);
1088 findfunc(spa_t
*spa
, uint64_t dsobj
, const char *dsname
, void *arg
)
1090 struct findarg
*fa
= arg
;
1091 return (fa
->func((char *)dsname
, fa
->arg
));
1095 * Find all objsets under name, and for each, call 'func(child_name, arg)'.
1096 * Perhaps change all callers to use dmu_objset_find_spa()?
1099 dmu_objset_find(char *name
, int func(char *, void *), void *arg
, int flags
)
1104 return (dmu_objset_find_spa(NULL
, name
, findfunc
, &fa
, flags
));
1108 * Find all objsets under name, call func on each
1111 dmu_objset_find_spa(spa_t
*spa
, const char *name
,
1112 int func(spa_t
*, uint64_t, const char *, void *), void *arg
, int flags
)
1118 zap_attribute_t
*attr
;
1124 name
= spa_name(spa
);
1125 err
= dsl_dir_open_spa(spa
, name
, FTAG
, &dd
, NULL
);
1129 /* Don't visit hidden ($MOS & $ORIGIN) objsets. */
1130 if (dd
->dd_myname
[0] == '$') {
1131 dsl_dir_close(dd
, FTAG
);
1135 thisobj
= dd
->dd_phys
->dd_head_dataset_obj
;
1136 attr
= kmem_alloc(sizeof (zap_attribute_t
), KM_SLEEP
);
1140 * Iterate over all children.
1142 if (flags
& DS_FIND_CHILDREN
) {
1143 for (zap_cursor_init(&zc
, dp
->dp_meta_objset
,
1144 dd
->dd_phys
->dd_child_dir_zapobj
);
1145 zap_cursor_retrieve(&zc
, attr
) == 0;
1146 (void) zap_cursor_advance(&zc
)) {
1147 ASSERT(attr
->za_integer_length
== sizeof (uint64_t));
1148 ASSERT(attr
->za_num_integers
== 1);
1150 child
= kmem_alloc(MAXPATHLEN
, KM_SLEEP
);
1151 (void) strcpy(child
, name
);
1152 (void) strcat(child
, "/");
1153 (void) strcat(child
, attr
->za_name
);
1154 err
= dmu_objset_find_spa(spa
, child
, func
, arg
, flags
);
1155 kmem_free(child
, MAXPATHLEN
);
1159 zap_cursor_fini(&zc
);
1162 dsl_dir_close(dd
, FTAG
);
1163 kmem_free(attr
, sizeof (zap_attribute_t
));
1169 * Iterate over all snapshots.
1171 if (flags
& DS_FIND_SNAPSHOTS
) {
1172 if (!dsl_pool_sync_context(dp
))
1173 rw_enter(&dp
->dp_config_rwlock
, RW_READER
);
1174 err
= dsl_dataset_hold_obj(dp
, thisobj
, FTAG
, &ds
);
1175 if (!dsl_pool_sync_context(dp
))
1176 rw_exit(&dp
->dp_config_rwlock
);
1179 uint64_t snapobj
= ds
->ds_phys
->ds_snapnames_zapobj
;
1180 dsl_dataset_rele(ds
, FTAG
);
1182 for (zap_cursor_init(&zc
, dp
->dp_meta_objset
, snapobj
);
1183 zap_cursor_retrieve(&zc
, attr
) == 0;
1184 (void) zap_cursor_advance(&zc
)) {
1185 ASSERT(attr
->za_integer_length
==
1187 ASSERT(attr
->za_num_integers
== 1);
1189 child
= kmem_alloc(MAXPATHLEN
, KM_SLEEP
);
1190 (void) strcpy(child
, name
);
1191 (void) strcat(child
, "@");
1192 (void) strcat(child
, attr
->za_name
);
1193 err
= func(spa
, attr
->za_first_integer
,
1195 kmem_free(child
, MAXPATHLEN
);
1199 zap_cursor_fini(&zc
);
1203 dsl_dir_close(dd
, FTAG
);
1204 kmem_free(attr
, sizeof (zap_attribute_t
));
1210 * Apply to self if appropriate.
1212 err
= func(spa
, thisobj
, name
, arg
);
1218 dmu_objset_prefetch(char *name
, void *arg
)
1223 os
= kmem_alloc(sizeof (objset_t
), KM_SLEEP
);
1224 if (dsl_dataset_hold(name
, os
, &ds
)) {
1225 kmem_free(os
, sizeof (objset_t
));
1229 if (!BP_IS_HOLE(&ds
->ds_phys
->ds_bp
)) {
1230 uint32_t aflags
= ARC_NOWAIT
| ARC_PREFETCH
;
1233 zb
.zb_objset
= ds
->ds_object
;
1238 (void) arc_read_nolock(NULL
, dsl_dataset_get_spa(ds
),
1239 &ds
->ds_phys
->ds_bp
, NULL
, NULL
, ZIO_PRIORITY_ASYNC_READ
,
1240 ZIO_FLAG_CANFAIL
| ZIO_FLAG_SPECULATIVE
, &aflags
, &zb
);
1243 dsl_dataset_rele(ds
, os
);
1244 kmem_free(os
, sizeof (objset_t
));
1249 dmu_objset_set_user(objset_t
*os
, void *user_ptr
)
1251 ASSERT(MUTEX_HELD(&os
->os
->os_user_ptr_lock
));
1252 os
->os
->os_user_ptr
= user_ptr
;
1256 dmu_objset_get_user(objset_t
*os
)
1258 ASSERT(MUTEX_HELD(&os
->os
->os_user_ptr_lock
));
1259 return (os
->os
->os_user_ptr
);