4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #include <sys/types.h>
27 #include <sys/param.h>
28 #include <sys/systm.h>
29 #include <sys/sysmacros.h>
31 #include <sys/pathname.h>
32 #include <sys/vnode.h>
34 #include <sys/vfs_opreg.h>
35 #include <sys/mntent.h>
36 #include <sys/mount.h>
37 #include <sys/cmn_err.h>
38 #include <sys/zfs_znode.h>
39 #include <sys/zfs_dir.h>
41 #include <sys/fs/zfs.h>
43 #include <sys/dsl_prop.h>
44 #include <sys/dsl_dataset.h>
45 #include <sys/dsl_deleg.h>
48 #include <sys/varargs.h>
49 #include <sys/policy.h>
50 #include <sys/atomic.h>
51 #include <sys/mkdev.h>
52 #include <sys/modctl.h>
53 #include <sys/zfs_ioctl.h>
54 #include <sys/zfs_ctldir.h>
55 #include <sys/zfs_fuid.h>
56 #include <sys/sunddi.h>
58 #include <sys/dmu_objset.h>
59 #include <sys/spa_boot.h>
62 /* include ddi_name_to_major function is there better place for it ?*/
64 #include <sys/systm.h>
68 vfsops_t
*zfs_vfsops
= NULL
;
69 static major_t zfs_major
;
70 static minor_t zfs_minor
;
71 static kmutex_t zfs_dev_mtx
;
74 kmutex_t zfs_debug_mtx
;
76 /* XXX NetBSD static int zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr);*/
77 static int zfs_mount(vfs_t
*vfsp
, const char *path
, void *data
, size_t *data_len
);
78 static int zfs_umount(vfs_t
*vfsp
, int fflag
);
79 static int zfs_root(vfs_t
*vfsp
, vnode_t
**vpp
);
80 static int zfs_statvfs(vfs_t
*vfsp
, struct statvfs
*statp
);
81 static int zfs_fhtovp(vfs_t
*vfsp
, fid_t
*fidp
, vnode_t
**vpp
);
82 static int zfs_vget(vfs_t
*vfsp
, ino_t ino
, vnode_t
**vpp
);
83 static int zfs_start(vfs_t
*vfsp
, int flags
);
84 static void zfs_freevfs(vfs_t
*vfsp
);
90 extern const struct vnodeopv_desc zfs_vnodeop_opv_desc
;
92 static const struct vnodeopv_desc
* const zfs_vnodeop_descs
[] = {
93 &zfs_vnodeop_opv_desc
,
97 static struct vfsops zfs_vfsops_template
= {
98 .vfs_name
= MOUNT_ZFS
,
99 .vfs_min_mount_data
= sizeof(struct zfs_args
),
100 .vfs_opv_descs
= zfs_vnodeop_descs
,
101 .vfs_mount
= zfs_mount
,
102 .vfs_unmount
= zfs_umount
,
103 .vfs_root
= zfs_root
,
104 .vfs_statvfs
= zfs_statvfs
,
105 .vfs_sync
= zfs_sync
,
106 .vfs_vget
= zfs_vget
,
107 .vfs_fhtovp
= zfs_fhtovp
,
108 .vfs_init
= zfs_init
,
109 .vfs_done
= zfs_fini
,
110 .vfs_start
= zfs_start
,
111 .vfs_renamelock_enter
= (void*)nullop
,
112 .vfs_renamelock_exit
= (void*)nullop
,
113 .vfs_reinit
= (void *)nullop
,
114 .vfs_vptofh
= (void *)eopnotsupp
,
115 .vfs_fhtovp
= (void *)eopnotsupp
,
116 .vfs_quotactl
= (void *)eopnotsupp
,
117 .vfs_extattrctl
= (void *)eopnotsupp
,
118 .vfs_snapshot
= (void *)eopnotsupp
,
119 .vfs_fsync
= (void *)eopnotsupp
,
123 * We need to keep a count of active fs's.
124 * This is necessary to prevent our module
125 * from being unloaded after a umount -f
127 static uint32_t zfs_active_fs_count
= 0;
129 static char *noatime_cancel
[] = { MNTOPT_ATIME
, NULL
};
130 static char *atime_cancel
[] = { MNTOPT_NOATIME
, NULL
};
131 static char *noxattr_cancel
[] = { MNTOPT_XATTR
, NULL
};
132 static char *xattr_cancel
[] = { MNTOPT_NOXATTR
, NULL
};
135 * MO_DEFAULT is not used since the default value is determined
136 * by the equivalent property.
138 static mntopt_t mntopts
[] = {
139 { MNTOPT_NOXATTR
, noxattr_cancel
, NULL
, 0, NULL
},
140 { MNTOPT_XATTR
, xattr_cancel
, NULL
, 0, NULL
},
141 { MNTOPT_NOATIME
, noatime_cancel
, NULL
, 0, NULL
},
142 { MNTOPT_ATIME
, atime_cancel
, NULL
, 0, NULL
}
145 static mntopts_t zfs_mntopts
= {
146 sizeof (mntopts
) / sizeof (mntopt_t
),
152 zfs_sync(vfs_t
*vfsp
, int flag
, cred_t
*cr
)
154 zfsvfs_t
*zfsvfs
= vfsp
->vfs_data
;
156 vnode_t
*vp
, *nvp
, *mvp
;
164 * Data integrity is job one. We don't want a compromised kernel
165 * writing to the storage pool, so we never sync during panic.
170 /* Allocate a marker vnode. */
171 if ((mvp
= vnalloc(vfsp
)) == NULL
)
176 * On NetBSD, we need to push out atime updates. Solaris does
177 * this during VOP_INACTIVE, but that does not work well with the
178 * BSD VFS, so we do it in batch here.
180 mutex_enter(&mntvnode_lock
);
182 for (vp
= TAILQ_FIRST(&vfsp
->mnt_vnodelist
); vp
; vp
= nvp
) {
183 nvp
= TAILQ_NEXT(vp
, v_mntvnodes
);
185 * If the vnode that we are about to sync is no
186 * longer associated with this mount point, start
189 if (vp
->v_mount
!= vfsp
)
192 * Don't interfere with concurrent scans of this FS.
197 * Skip the vnode/inode if inaccessible, or if the
200 mutex_enter(&vp
->v_interlock
);
202 if (zp
== NULL
|| vp
->v_type
== VNON
||
203 (vp
->v_iflag
& (VI_XLOCK
| VI_CLEAN
)) != 0 ||
204 zp
->z_atime_dirty
== 0 || zp
->z_unlinked
) {
205 mutex_exit(&vp
->v_interlock
);
209 mutex_exit(&mntvnode_lock
);
210 error
= vget(vp
, LK_EXCLUSIVE
| LK_INTERLOCK
);
212 mutex_enter(&mntvnode_lock
);
214 if (error
== ENOENT
) {
219 tx
= dmu_tx_create(zfsvfs
->z_os
);
220 dmu_tx_hold_bonus(tx
, zp
->z_id
);
221 error
= dmu_tx_assign(tx
, TXG_WAIT
);
225 dmu_buf_will_dirty(zp
->z_dbuf
, tx
);
226 mutex_enter(&zp
->z_lock
);
227 zp
->z_atime_dirty
= 0;
228 mutex_exit(&zp
->z_lock
);
232 mutex_enter(&mntvnode_lock
);
235 mutex_exit(&mntvnode_lock
);
238 * SYNC_ATTR is used by fsflush() to force old filesystems like UFS
239 * to sync metadata, which they would otherwise cache indefinitely.
240 * Semantically, the only requirement is that the sync be initiated.
241 * The DMU syncs out txgs frequently, so there's nothing to do.
243 if ((flag
& MNT_LAZY
) != 0)
248 * Sync a specific filesystem.
253 if (zfsvfs
->z_log
!= NULL
)
254 zil_commit(zfsvfs
->z_log
, UINT64_MAX
, 0);
256 txg_wait_synced(dmu_objset_pool(zfsvfs
->z_os
), 0);
260 * Sync all ZFS filesystems. This is what happens when you
261 * run sync(1M). Unlike other filesystems, ZFS honors the
262 * request by waiting for all pools to commit all dirty data.
273 zfs_create_unique_device(dev_t
*dev
)
278 ASSERT3U(zfs_minor
, <=, MAXMIN
);
279 minor_t start
= zfs_minor
;
281 mutex_enter(&zfs_dev_mtx
);
282 if (zfs_minor
>= MAXMIN
) {
284 * If we're still using the real major
285 * keep out of /dev/zfs and /dev/zvol minor
286 * number space. If we're using a getudev()'ed
287 * major number, we can use all of its minors.
289 if (zfs_major
== ddi_name_to_major(ZFS_DRIVER
))
290 zfs_minor
= ZFS_MIN_MINOR
;
296 *dev
= makedevice(zfs_major
, zfs_minor
);
297 mutex_exit(&zfs_dev_mtx
);
298 } while (vfs_devismounted(*dev
) && zfs_minor
!= start
);
301 if (zfs_minor
== start
) {
303 * We are using all ~262,000 minor numbers for the
304 * current major number. Create a new major number.
306 if ((new_major
= getudev()) == (major_t
)-1) {
308 "zfs_mount: Can't get unique major "
312 mutex_enter(&zfs_dev_mtx
);
313 zfs_major
= new_major
;
316 mutex_exit(&zfs_dev_mtx
);
320 /* CONSTANTCONDITION */
328 atime_changed_cb(void *arg
, uint64_t newval
)
330 zfsvfs_t
*zfsvfs
= arg
;
332 if (newval
== TRUE
) {
333 zfsvfs
->z_atime
= TRUE
;
334 vfs_clearmntopt(zfsvfs
->z_vfs
, MNTOPT_NOATIME
);
335 vfs_setmntopt(zfsvfs
->z_vfs
, MNTOPT_ATIME
, NULL
, 0);
337 zfsvfs
->z_atime
= FALSE
;
338 vfs_clearmntopt(zfsvfs
->z_vfs
, MNTOPT_ATIME
);
339 vfs_setmntopt(zfsvfs
->z_vfs
, MNTOPT_NOATIME
, NULL
, 0);
344 xattr_changed_cb(void *arg
, uint64_t newval
)
346 zfsvfs_t
*zfsvfs
= arg
;
348 if (newval
== TRUE
) {
349 /* XXX locking on vfs_flag? */
351 zfsvfs
->z_vfs
->vfs_flag
|= VFS_XATTR
;
353 vfs_clearmntopt(zfsvfs
->z_vfs
, MNTOPT_NOXATTR
);
354 vfs_setmntopt(zfsvfs
->z_vfs
, MNTOPT_XATTR
, NULL
, 0);
356 /* XXX locking on vfs_flag? */
358 zfsvfs
->z_vfs
->vfs_flag
&= ~VFS_XATTR
;
360 vfs_clearmntopt(zfsvfs
->z_vfs
, MNTOPT_XATTR
);
361 vfs_setmntopt(zfsvfs
->z_vfs
, MNTOPT_NOXATTR
, NULL
, 0);
366 blksz_changed_cb(void *arg
, uint64_t newval
)
368 zfsvfs_t
*zfsvfs
= arg
;
370 if (newval
< SPA_MINBLOCKSIZE
||
371 newval
> SPA_MAXBLOCKSIZE
|| !ISP2(newval
))
372 newval
= SPA_MAXBLOCKSIZE
;
374 zfsvfs
->z_max_blksz
= newval
;
375 zfsvfs
->z_vfs
->vfs_bsize
= newval
;
379 readonly_changed_cb(void *arg
, uint64_t newval
)
381 zfsvfs_t
*zfsvfs
= arg
;
384 /* XXX locking on vfs_flag? */
385 zfsvfs
->z_vfs
->vfs_flag
|= VFS_RDONLY
;
386 vfs_clearmntopt(zfsvfs
->z_vfs
, MNTOPT_RW
);
387 vfs_setmntopt(zfsvfs
->z_vfs
, MNTOPT_RO
, NULL
, 0);
389 /* XXX locking on vfs_flag? */
390 zfsvfs
->z_vfs
->vfs_flag
&= ~VFS_RDONLY
;
391 vfs_clearmntopt(zfsvfs
->z_vfs
, MNTOPT_RO
);
392 vfs_setmntopt(zfsvfs
->z_vfs
, MNTOPT_RW
, NULL
, 0);
397 devices_changed_cb(void *arg
, uint64_t newval
)
399 zfsvfs_t
*zfsvfs
= arg
;
401 if (newval
== FALSE
) {
402 zfsvfs
->z_vfs
->vfs_flag
|= VFS_NODEVICES
;
403 vfs_clearmntopt(zfsvfs
->z_vfs
, MNTOPT_DEVICES
);
404 vfs_setmntopt(zfsvfs
->z_vfs
, MNTOPT_NODEVICES
, NULL
, 0);
406 zfsvfs
->z_vfs
->vfs_flag
&= ~VFS_NODEVICES
;
407 vfs_clearmntopt(zfsvfs
->z_vfs
, MNTOPT_NODEVICES
);
408 vfs_setmntopt(zfsvfs
->z_vfs
, MNTOPT_DEVICES
, NULL
, 0);
413 setuid_changed_cb(void *arg
, uint64_t newval
)
415 zfsvfs_t
*zfsvfs
= arg
;
417 if (newval
== FALSE
) {
418 zfsvfs
->z_vfs
->vfs_flag
|= VFS_NOSETUID
;
419 vfs_clearmntopt(zfsvfs
->z_vfs
, MNTOPT_SETUID
);
420 vfs_setmntopt(zfsvfs
->z_vfs
, MNTOPT_NOSETUID
, NULL
, 0);
422 zfsvfs
->z_vfs
->vfs_flag
&= ~VFS_NOSETUID
;
423 vfs_clearmntopt(zfsvfs
->z_vfs
, MNTOPT_NOSETUID
);
424 vfs_setmntopt(zfsvfs
->z_vfs
, MNTOPT_SETUID
, NULL
, 0);
429 exec_changed_cb(void *arg
, uint64_t newval
)
431 zfsvfs_t
*zfsvfs
= arg
;
433 if (newval
== FALSE
) {
434 zfsvfs
->z_vfs
->vfs_flag
|= VFS_NOEXEC
;
435 vfs_clearmntopt(zfsvfs
->z_vfs
, MNTOPT_EXEC
);
436 vfs_setmntopt(zfsvfs
->z_vfs
, MNTOPT_NOEXEC
, NULL
, 0);
438 zfsvfs
->z_vfs
->vfs_flag
&= ~VFS_NOEXEC
;
439 vfs_clearmntopt(zfsvfs
->z_vfs
, MNTOPT_NOEXEC
);
440 vfs_setmntopt(zfsvfs
->z_vfs
, MNTOPT_EXEC
, NULL
, 0);
445 * The nbmand mount option can be changed at mount time.
446 * We can't allow it to be toggled on live file systems or incorrect
447 * behavior may be seen from cifs clients
449 * This property isn't registered via dsl_prop_register(), but this callback
450 * will be called when a file system is first mounted
453 nbmand_changed_cb(void *arg
, uint64_t newval
)
455 zfsvfs_t
*zfsvfs
= arg
;
456 if (newval
== FALSE
) {
457 vfs_clearmntopt(zfsvfs
->z_vfs
, MNTOPT_NBMAND
);
458 vfs_setmntopt(zfsvfs
->z_vfs
, MNTOPT_NONBMAND
, NULL
, 0);
460 vfs_clearmntopt(zfsvfs
->z_vfs
, MNTOPT_NONBMAND
);
461 vfs_setmntopt(zfsvfs
->z_vfs
, MNTOPT_NBMAND
, NULL
, 0);
466 snapdir_changed_cb(void *arg
, uint64_t newval
)
468 zfsvfs_t
*zfsvfs
= arg
;
470 zfsvfs
->z_show_ctldir
= newval
;
474 vscan_changed_cb(void *arg
, uint64_t newval
)
476 zfsvfs_t
*zfsvfs
= arg
;
478 zfsvfs
->z_vscan
= newval
;
482 acl_mode_changed_cb(void *arg
, uint64_t newval
)
484 zfsvfs_t
*zfsvfs
= arg
;
486 zfsvfs
->z_acl_mode
= newval
;
490 acl_inherit_changed_cb(void *arg
, uint64_t newval
)
492 zfsvfs_t
*zfsvfs
= arg
;
494 zfsvfs
->z_acl_inherit
= newval
;
498 zfs_register_callbacks(vfs_t
*vfsp
)
500 struct dsl_dataset
*ds
= NULL
;
502 zfsvfs_t
*zfsvfs
= NULL
;
504 int readonly
, do_readonly
= B_FALSE
;
505 int setuid
, do_setuid
= B_FALSE
;
506 int exec
, do_exec
= B_FALSE
;
507 int devices
, do_devices
= B_FALSE
;
508 int xattr
, do_xattr
= B_FALSE
;
509 int atime
, do_atime
= B_FALSE
;
513 zfsvfs
= vfsp
->vfs_data
;
518 * The act of registering our callbacks will destroy any mount
519 * options we may have. In order to enable temporary overrides
520 * of mount options, we stash away the current values and
521 * restore them after we register the callbacks.
523 if (vfs_optionisset(vfsp
, MNTOPT_RO
, NULL
)) {
525 do_readonly
= B_TRUE
;
526 } else if (vfs_optionisset(vfsp
, MNTOPT_RW
, NULL
)) {
528 do_readonly
= B_TRUE
;
530 if (vfs_optionisset(vfsp
, MNTOPT_NOSUID
, NULL
)) {
536 if (vfs_optionisset(vfsp
, MNTOPT_NODEVICES
, NULL
)) {
539 } else if (vfs_optionisset(vfsp
, MNTOPT_DEVICES
, NULL
)) {
544 if (vfs_optionisset(vfsp
, MNTOPT_NOSETUID
, NULL
)) {
547 } else if (vfs_optionisset(vfsp
, MNTOPT_SETUID
, NULL
)) {
552 if (vfs_optionisset(vfsp
, MNTOPT_NOEXEC
, NULL
)) {
555 } else if (vfs_optionisset(vfsp
, MNTOPT_EXEC
, NULL
)) {
559 if (vfs_optionisset(vfsp
, MNTOPT_NOXATTR
, NULL
)) {
562 } else if (vfs_optionisset(vfsp
, MNTOPT_XATTR
, NULL
)) {
566 if (vfs_optionisset(vfsp
, MNTOPT_NOATIME
, NULL
)) {
569 } else if (vfs_optionisset(vfsp
, MNTOPT_ATIME
, NULL
)) {
575 * nbmand is a special property. It can only be changed at
578 * This is weird, but it is documented to only be changeable
581 if (vfs_optionisset(vfsp
, MNTOPT_NONBMAND
, NULL
)) {
583 } else if (vfs_optionisset(vfsp
, MNTOPT_NBMAND
, NULL
)) {
586 char osname
[MAXNAMELEN
];
588 dmu_objset_name(os
, osname
);
589 if (error
= dsl_prop_get_integer(osname
, "nbmand", &nbmand
,
596 * Register property callbacks.
598 * It would probably be fine to just check for i/o error from
599 * the first prop_register(), but I guess I like to go
602 ds
= dmu_objset_ds(os
);
603 error
= dsl_prop_register(ds
, "atime", atime_changed_cb
, zfsvfs
);
604 error
= error
? error
: dsl_prop_register(ds
,
605 "xattr", xattr_changed_cb
, zfsvfs
);
606 error
= error
? error
: dsl_prop_register(ds
,
607 "recordsize", blksz_changed_cb
, zfsvfs
);
608 error
= error
? error
: dsl_prop_register(ds
,
609 "readonly", readonly_changed_cb
, zfsvfs
);
610 error
= error
? error
: dsl_prop_register(ds
,
611 "devices", devices_changed_cb
, zfsvfs
);
612 error
= error
? error
: dsl_prop_register(ds
,
613 "setuid", setuid_changed_cb
, zfsvfs
);
614 error
= error
? error
: dsl_prop_register(ds
,
615 "exec", exec_changed_cb
, zfsvfs
);
616 error
= error
? error
: dsl_prop_register(ds
,
617 "snapdir", snapdir_changed_cb
, zfsvfs
);
618 error
= error
? error
: dsl_prop_register(ds
,
619 "aclmode", acl_mode_changed_cb
, zfsvfs
);
620 error
= error
? error
: dsl_prop_register(ds
,
621 "aclinherit", acl_inherit_changed_cb
, zfsvfs
);
622 error
= error
? error
: dsl_prop_register(ds
,
623 "vscan", vscan_changed_cb
, zfsvfs
);
628 * Invoke our callbacks to restore temporary mount options.
631 readonly_changed_cb(zfsvfs
, readonly
);
633 setuid_changed_cb(zfsvfs
, setuid
);
635 exec_changed_cb(zfsvfs
, exec
);
637 devices_changed_cb(zfsvfs
, devices
);
639 xattr_changed_cb(zfsvfs
, xattr
);
641 atime_changed_cb(zfsvfs
, atime
);
643 nbmand_changed_cb(zfsvfs
, nbmand
);
649 * We may attempt to unregister some callbacks that are not
650 * registered, but this is OK; it will simply return ENOMSG,
651 * which we will ignore.
653 (void) dsl_prop_unregister(ds
, "atime", atime_changed_cb
, zfsvfs
);
654 (void) dsl_prop_unregister(ds
, "xattr", xattr_changed_cb
, zfsvfs
);
655 (void) dsl_prop_unregister(ds
, "recordsize", blksz_changed_cb
, zfsvfs
);
656 (void) dsl_prop_unregister(ds
, "readonly", readonly_changed_cb
, zfsvfs
);
657 (void) dsl_prop_unregister(ds
, "devices", devices_changed_cb
, zfsvfs
);
658 (void) dsl_prop_unregister(ds
, "setuid", setuid_changed_cb
, zfsvfs
);
659 (void) dsl_prop_unregister(ds
, "exec", exec_changed_cb
, zfsvfs
);
660 (void) dsl_prop_unregister(ds
, "snapdir", snapdir_changed_cb
, zfsvfs
);
661 (void) dsl_prop_unregister(ds
, "aclmode", acl_mode_changed_cb
, zfsvfs
);
662 (void) dsl_prop_unregister(ds
, "aclinherit", acl_inherit_changed_cb
,
664 (void) dsl_prop_unregister(ds
, "vscan", vscan_changed_cb
, zfsvfs
);
670 zfsvfs_setup(zfsvfs_t
*zfsvfs
, boolean_t mounting
)
674 error
= zfs_register_callbacks(zfsvfs
->z_vfs
);
679 * Set the objset user_ptr to track its zfsvfs.
681 mutex_enter(&zfsvfs
->z_os
->os
->os_user_ptr_lock
);
682 dmu_objset_set_user(zfsvfs
->z_os
, zfsvfs
);
683 mutex_exit(&zfsvfs
->z_os
->os
->os_user_ptr_lock
);
686 * If we are not mounting (ie: online recv), then we don't
687 * have to worry about replaying the log as we blocked all
688 * operations out since we closed the ZIL.
694 * During replay we remove the read only flag to
695 * allow replays to succeed.
697 readonly
= zfsvfs
->z_vfs
->vfs_flag
& VFS_RDONLY
;
698 zfsvfs
->z_vfs
->vfs_flag
&= ~VFS_RDONLY
;
701 * Parse and replay the intent log.
703 zil_replay(zfsvfs
->z_os
, zfsvfs
, &zfsvfs
->z_assign
,
704 zfs_replay_vector
, zfs_unlinked_drain
);
706 zfs_unlinked_drain(zfsvfs
);
707 zfsvfs
->z_vfs
->vfs_flag
|= readonly
; /* restore readonly bit */
711 zfsvfs
->z_log
= zil_open(zfsvfs
->z_os
, zfs_get_data
);
717 zfs_freezfsvfs(zfsvfs_t
*zfsvfs
)
720 mutex_destroy(&zfsvfs
->z_znodes_lock
);
721 mutex_destroy(&zfsvfs
->z_online_recv_lock
);
722 list_destroy(&zfsvfs
->z_all_znodes
);
723 rrw_destroy(&zfsvfs
->z_teardown_lock
);
724 rw_destroy(&zfsvfs
->z_teardown_inactive_lock
);
725 rw_destroy(&zfsvfs
->z_fuid_lock
);
726 kmem_free(zfsvfs
, sizeof (zfsvfs_t
));
730 zfs_domount(vfs_t
*vfsp
, char *osname
)
733 uint64_t recordsize
, readonly
;
743 * Initialize the zfs-specific filesystem structure.
744 * Should probably make this a kmem cache, shuffle fields,
745 * and just bzero up to z_hold_mtx[].
747 zfsvfs
= kmem_zalloc(sizeof (zfsvfs_t
), KM_SLEEP
);
748 zfsvfs
->z_vfs
= vfsp
;
749 zfsvfs
->z_parent
= zfsvfs
;
750 zfsvfs
->z_assign
= TXG_NOWAIT
;
751 zfsvfs
->z_max_blksz
= SPA_MAXBLOCKSIZE
;
752 zfsvfs
->z_show_ctldir
= ZFS_SNAPDIR_VISIBLE
;
754 dprintf("Creating zfsvfs %p\n", zfsvfs
);
755 mutex_init(&zfsvfs
->z_znodes_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
756 mutex_init(&zfsvfs
->z_online_recv_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
757 list_create(&zfsvfs
->z_all_znodes
, sizeof (znode_t
),
758 offsetof(znode_t
, z_link_node
));
759 rrw_init(&zfsvfs
->z_teardown_lock
);
760 rw_init(&zfsvfs
->z_teardown_inactive_lock
, NULL
, RW_DEFAULT
, NULL
);
761 rw_init(&zfsvfs
->z_fuid_lock
, NULL
, RW_DEFAULT
, NULL
);
763 /* Initialize the generic filesystem structure. */
764 vfsp
->vfs_data
= NULL
;
766 if (zfs_create_unique_device(&mount_dev
) == -1) {
770 ASSERT(vfs_devismounted(mount_dev
) == 0);
772 if (error
= dsl_prop_get_integer(osname
, "recordsize", &recordsize
,
776 vfsp
->vfs_bsize
= DEV_BSIZE
;
777 vfsp
->vfs_flag
|= VFS_NOTRUNC
;
778 vfsp
->vfs_data
= zfsvfs
;
780 if (error
= dsl_prop_get_integer(osname
, "readonly", &readonly
, NULL
))
783 mode
= DS_MODE_OWNER
;
785 mode
|= DS_MODE_READONLY
;
787 error
= dmu_objset_open(osname
, DMU_OST_ZFS
, mode
, &zfsvfs
->z_os
);
788 if (error
== EROFS
) {
789 mode
= DS_MODE_OWNER
| DS_MODE_READONLY
;
790 error
= dmu_objset_open(osname
, DMU_OST_ZFS
, mode
,
797 if (error
= zfs_init_fs(zfsvfs
, &zp
))
800 dprintf("zfs_domount vrele before vfsp->vfs_count %d\n", vfsp
->vfs_count
);
801 /* The call to zfs_init_fs leaves the vnode held, release it here. */
804 dprintf("zfs_domount vrele after vfsp->vfs_count %d\n", vfsp
->vfs_count
);
806 * Set features for file system.
808 zfsvfs
->z_use_fuids
= USE_FUIDS(zfsvfs
->z_version
, zfsvfs
->z_os
);
809 if (zfsvfs
->z_use_fuids
) {
810 vfs_set_feature(vfsp
, VFSFT_XVATTR
);
811 vfs_set_feature(vfsp
, VFSFT_SYSATTR_VIEWS
);
812 vfs_set_feature(vfsp
, VFSFT_ACEMASKONACCESS
);
813 vfs_set_feature(vfsp
, VFSFT_ACLONCREATE
);
815 if (zfsvfs
->z_case
== ZFS_CASE_INSENSITIVE
) {
816 vfs_set_feature(vfsp
, VFSFT_DIRENTFLAGS
);
817 vfs_set_feature(vfsp
, VFSFT_CASEINSENSITIVE
);
818 vfs_set_feature(vfsp
, VFSFT_NOCASESENSITIVE
);
819 } else if (zfsvfs
->z_case
== ZFS_CASE_MIXED
) {
820 vfs_set_feature(vfsp
, VFSFT_DIRENTFLAGS
);
821 vfs_set_feature(vfsp
, VFSFT_CASEINSENSITIVE
);
824 if (dmu_objset_is_snapshot(zfsvfs
->z_os
)) {
826 ASSERT(mode
& DS_MODE_READONLY
);
827 atime_changed_cb(zfsvfs
, B_FALSE
);
828 readonly_changed_cb(zfsvfs
, B_TRUE
);
829 if (error
= dsl_prop_get_integer(osname
, "xattr", &pval
, NULL
))
831 xattr_changed_cb(zfsvfs
, pval
);
832 zfsvfs
->z_issnap
= B_TRUE
;
834 error
= zfsvfs_setup(zfsvfs
, B_TRUE
);
837 dprintf("zfs_vfsops.c zfs_domount called\n");
838 dprintf("vfsp->vfs_count %d\n", vfsp
->vfs_count
);
840 if (!zfsvfs
->z_issnap
)
841 zfsctl_create(zfsvfs
);
845 dmu_objset_close(zfsvfs
->z_os
);
846 zfs_freezfsvfs(zfsvfs
);
848 atomic_add_32(&zfs_active_fs_count
, 1);
854 zfs_unregister_callbacks(zfsvfs_t
*zfsvfs
)
856 objset_t
*os
= zfsvfs
->z_os
;
857 struct dsl_dataset
*ds
;
860 * Unregister properties.
862 if (!dmu_objset_is_snapshot(os
)) {
863 ds
= dmu_objset_ds(os
);
864 VERIFY(dsl_prop_unregister(ds
, "atime", atime_changed_cb
,
867 VERIFY(dsl_prop_unregister(ds
, "xattr", xattr_changed_cb
,
870 VERIFY(dsl_prop_unregister(ds
, "recordsize", blksz_changed_cb
,
873 VERIFY(dsl_prop_unregister(ds
, "readonly", readonly_changed_cb
,
876 VERIFY(dsl_prop_unregister(ds
, "devices", devices_changed_cb
,
879 VERIFY(dsl_prop_unregister(ds
, "setuid", setuid_changed_cb
,
882 VERIFY(dsl_prop_unregister(ds
, "exec", exec_changed_cb
,
885 VERIFY(dsl_prop_unregister(ds
, "snapdir", snapdir_changed_cb
,
888 VERIFY(dsl_prop_unregister(ds
, "aclmode", acl_mode_changed_cb
,
891 VERIFY(dsl_prop_unregister(ds
, "aclinherit",
892 acl_inherit_changed_cb
, zfsvfs
) == 0);
894 VERIFY(dsl_prop_unregister(ds
, "vscan",
895 vscan_changed_cb
, zfsvfs
) == 0);
900 * Convert a decimal digit string to a uint64_t integer.
903 str_to_uint64(char *str
, uint64_t *objnum
)
908 if (*str
< '0' || *str
> '9')
911 num
= num
*10 + *str
++ - '0';
919 * The boot path passed from the boot loader is in the form of
920 * "rootpool-name/root-filesystem-object-number'. Convert this
921 * string to a dataset name: "rootpool-name/root-filesystem-name".
924 zfs_parse_bootfs(char *bpath
, char *outpath
)
930 if (*bpath
== 0 || *bpath
== '/')
933 (void) strcpy(outpath
, bpath
);
935 slashp
= strchr(bpath
, '/');
937 /* if no '/', just return the pool name */
938 if (slashp
== NULL
) {
942 /* if not a number, just return the root dataset name */
943 if (str_to_uint64(slashp
+1, &objnum
)) {
948 error
= dsl_dsobj_to_dsname(bpath
, objnum
, outpath
);
956 zfs_mountroot(vfs_t
*vfsp
, enum whymountroot why
)
959 static int zfsrootdone
= 0;
960 zfsvfs_t
*zfsvfs
= NULL
;
969 * The filesystem that we mount as root is defined in the
970 * boot property "zfs-bootfs" with a format of
971 * "poolname/root-dataset-objnum".
973 if (why
== ROOT_INIT
) {
977 * the process of doing a spa_load will require the
978 * clock to be set before we could (for example) do
979 * something better by looking at the timestamp on
980 * an uberblock, so just set it to -1.
984 if ((zfs_bootfs
= spa_get_bootprop("zfs-bootfs")) == NULL
) {
985 cmn_err(CE_NOTE
, "spa_get_bootfs: can not get "
989 zfs_devid
= spa_get_bootprop("diskdevid");
990 error
= spa_import_rootpool(rootfs
.bo_name
, zfs_devid
);
992 spa_free_bootprop(zfs_devid
);
994 spa_free_bootprop(zfs_bootfs
);
995 cmn_err(CE_NOTE
, "spa_import_rootpool: error %d",
999 if (error
= zfs_parse_bootfs(zfs_bootfs
, rootfs
.bo_name
)) {
1000 spa_free_bootprop(zfs_bootfs
);
1001 cmn_err(CE_NOTE
, "zfs_parse_bootfs: error %d",
1006 spa_free_bootprop(zfs_bootfs
);
1008 if (error
= vfs_lock(vfsp
))
1011 if (error
= zfs_domount(vfsp
, rootfs
.bo_name
)) {
1012 cmn_err(CE_NOTE
, "zfs_domount: error %d", error
);
1016 zfsvfs
= (zfsvfs_t
*)vfsp
->vfs_data
;
1018 if (error
= zfs_zget(zfsvfs
, zfsvfs
->z_root
, &zp
)) {
1019 cmn_err(CE_NOTE
, "zfs_zget: error %d", error
);
1024 mutex_enter(&vp
->v_lock
);
1025 vp
->v_flag
|= VROOT
;
1026 mutex_exit(&vp
->v_lock
);
1030 * Leave rootvp held. The root file system is never unmounted.
1033 vfs_add((struct vnode
*)0, vfsp
,
1034 (vfsp
->vfs_flag
& VFS_RDONLY
) ? MS_RDONLY
: 0);
1038 } else if (why
== ROOT_REMOUNT
) {
1039 readonly_changed_cb(vfsp
->vfs_data
, B_FALSE
);
1040 vfsp
->vfs_flag
|= VFS_REMOUNT
;
1042 /* refresh mount options */
1043 zfs_unregister_callbacks(vfsp
->vfs_data
);
1044 return (zfs_register_callbacks(vfsp
));
1046 } else if (why
== ROOT_UNMOUNT
) {
1047 zfs_unregister_callbacks((zfsvfs_t
*)vfsp
->vfs_data
);
1048 (void) zfs_sync(vfsp
, 0, 0);
1053 * if "why" is equal to anything else other than ROOT_INIT,
1054 * ROOT_REMOUNT, or ROOT_UNMOUNT, we do not support it.
1058 #endif /*__NetBSD__ */
1062 zfs_mount(vfs_t
*vfsp
, const char *path
, void *data
, size_t *data_len
)
1066 vnode_t
*mvp
= vfsp
->mnt_vnodecovered
;
1067 struct mounta
*uap
= data
;
1073 dprintf("zfs_vfsops.c zfs_mount called\n");
1074 dprintf("vfsp->vfs_count %d\n", vfsp
->vfs_count
);
1075 if (mvp
->v_type
!= VDIR
)
1078 mutex_enter(&mvp
->v_interlock
);
1079 if ((uap
->flags
& MS_REMOUNT
) == 0 &&
1080 (uap
->flags
& MS_OVERLAY
) == 0 &&
1081 (mvp
->v_count
!= 1 || (mvp
->v_flag
& VROOT
))) {
1082 mutex_exit(&mvp
->v_interlock
);
1085 mutex_exit(&mvp
->v_interlock
);
1088 * ZFS does not support passing unparsed data in via MS_DATA.
1089 * Users should use the MS_OPTIONSTR interface; this means
1090 * that all option parsing is already done and the options struct
1091 * can be interrogated.
1093 if ((uap
->flags
& MS_DATA
) && uap
->datalen
> 0)
1096 osname
= PNBUF_GET();
1098 strlcpy(osname
, uap
->fspec
, strlen(uap
->fspec
) + 1);
1101 * Check for mount privilege?
1103 * If we don't have privilege then see if
1104 * we have local permission to allow it
1106 error
= secpolicy_fs_mount(cr
, mvp
, vfsp
);
1108 error
= dsl_deleg_access(osname
, ZFS_DELEG_PERM_MOUNT
, cr
);
1113 * Make sure user is the owner of the mount point
1114 * or has sufficient privileges.
1117 vattr
.va_mask
= AT_UID
;
1119 if (error
= VOP_GETATTR(mvp
, &vattr
, 0, cr
, NULL
)) {
1123 if (secpolicy_vnode_owner(cr
, vattr
.va_uid
) != 0 &&
1124 VOP_ACCESS(mvp
, VWRITE
, cr
) != 0) {
1129 /* XXX NetBSD secpolicy_fs_mount_clearopts(cr, vfsp);*/
1136 * Refuse to mount a filesystem if we are in a local zone and the
1137 * dataset is not visible.
1139 if (!INGLOBALZONE(curproc
) &&
1140 (!zone_dataset_visible(osname
, &canwrite
) || !canwrite
)) {
1145 dprintf("uap->flags %d -- mflag %d -- MS_REMOUNT %d -- MTN_UPDATE %d\n", uap
->flags
, uap
->mflag
, MS_REMOUNT
, MNT_UPDATE
);
1147 * When doing a remount, we simply refresh our temporary properties
1148 * according to those options set in the current VFS options.
1150 if (uap
->flags
& MS_REMOUNT
) {
1151 /* refresh mount options */
1152 zfs_unregister_callbacks(vfsp
->vfs_data
);
1153 error
= zfs_register_callbacks(vfsp
);
1157 /* Mark ZFS as MP SAFE */
1158 vfsp
->mnt_iflag
|= IMNT_MPSAFE
;
1160 error
= zfs_domount(vfsp
, osname
);
1162 vfs_getnewfsid(vfsp
);
1164 /* setup zfs mount info */
1165 strlcpy(vfsp
->mnt_stat
.f_mntfromname
, osname
,
1166 sizeof(vfsp
->mnt_stat
.f_mntfromname
));
1167 set_statvfs_info(path
, UIO_USERSPACE
, vfsp
->mnt_stat
.f_mntfromname
,
1168 UIO_SYSSPACE
, vfsp
->mnt_op
->vfs_name
, vfsp
, curlwp
);
1177 zfs_statvfs(vfs_t
*vfsp
, struct statvfs
*statp
)
1179 zfsvfs_t
*zfsvfs
= vfsp
->vfs_data
;
1181 uint64_t refdbytes
, availbytes
, usedobjs
, availobjs
;
1185 dmu_objset_space(zfsvfs
->z_os
,
1186 &refdbytes
, &availbytes
, &usedobjs
, &availobjs
);
1189 * The underlying storage pool actually uses multiple block sizes.
1190 * We report the fragsize as the smallest block size we support,
1191 * and we report our blocksize as the filesystem's maximum blocksize.
1193 statp
->f_frsize
= 1UL << SPA_MINBLOCKSHIFT
;
1194 statp
->f_bsize
= zfsvfs
->z_max_blksz
;
1197 * The following report "total" blocks of various kinds in the
1198 * file system, but reported in terms of f_frsize - the
1202 statp
->f_blocks
= (refdbytes
+ availbytes
) >> SPA_MINBLOCKSHIFT
;
1203 statp
->f_bfree
= availbytes
>> SPA_MINBLOCKSHIFT
;
1204 statp
->f_bavail
= statp
->f_bfree
; /* no root reservation */
1207 * statvfs() should really be called statufs(), because it assumes
1208 * static metadata. ZFS doesn't preallocate files, so the best
1209 * we can do is report the max that could possibly fit in f_files,
1210 * and that minus the number actually used in f_ffree.
1211 * For f_ffree, report the smaller of the number of object available
1212 * and the number of blocks (each object will take at least a block).
1214 statp
->f_ffree
= MIN(availobjs
, statp
->f_bfree
);
1215 statp
->f_favail
= statp
->f_ffree
; /* no "root reservation" */
1216 statp
->f_files
= statp
->f_ffree
+ usedobjs
;
1218 statp
->f_fsid
= vfsp
->mnt_stat
.f_fsidx
.__fsid_val
[0];
1221 * We're a zfs filesystem.
1223 (void) strlcpy(statp
->f_fstypename
, "zfs", sizeof(statp
->f_fstypename
));
1224 (void) strlcpy(statp
->f_mntfromname
, vfsp
->mnt_stat
.f_mntfromname
,
1225 sizeof(statp
->f_mntfromname
));
1226 (void) strlcpy(statp
->f_mntonname
, vfsp
->mnt_stat
.f_mntonname
,
1227 sizeof(statp
->f_mntonname
));
1229 statp
->f_namemax
= ZFS_MAXNAMELEN
;
1232 * We have all of 32 characters to stuff a string here.
1233 * Is there anything useful we could/should provide?
1236 bzero(statp
->f_fstr
, sizeof (statp
->f_fstr
));
1243 zfs_root(vfs_t
*vfsp
, vnode_t
**vpp
)
1245 zfsvfs_t
*zfsvfs
= vfsp
->vfs_data
;
1250 dprintf("zfs_root called\n");
1251 error
= zfs_zget(zfsvfs
, zfsvfs
->z_root
, &rootzp
);
1253 *vpp
= ZTOV(rootzp
);
1254 dprintf("vpp -> %d, error %d -- %p\n", (*vpp
)->v_type
, error
, *vpp
);
1260 * Teardown the zfsvfs::z_os.
1262 * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock'
1263 * and 'z_teardown_inactive_lock' held.
1266 zfsvfs_teardown(zfsvfs_t
*zfsvfs
, boolean_t unmounting
)
1270 rrw_enter(&zfsvfs
->z_teardown_lock
, RW_WRITER
, FTAG
);
1274 * We purge the parent filesystem's vfsp as the parent
1275 * filesystem and all of its snapshots have their vnode's
1276 * v_vfsp set to the parent's filesystem's vfsp. Note,
1277 * 'z_parent' is self referential for non-snapshots.
1279 (void) dnlc_purge_vfsp(zfsvfs
->z_parent
->z_vfs
, 0);
1283 * Close the zil. NB: Can't close the zil while zfs_inactive
1284 * threads are blocked as zil_close can call zfs_inactive.
1286 if (zfsvfs
->z_log
) {
1287 zil_close(zfsvfs
->z_log
);
1288 zfsvfs
->z_log
= NULL
;
1291 rw_enter(&zfsvfs
->z_teardown_inactive_lock
, RW_WRITER
);
1294 * If we are not unmounting (ie: online recv) and someone already
1295 * unmounted this file system while we were doing the switcheroo,
1296 * or a reopen of z_os failed then just bail out now.
1298 if (!unmounting
&& (zfsvfs
->z_unmounted
|| zfsvfs
->z_os
== NULL
)) {
1299 rw_exit(&zfsvfs
->z_teardown_inactive_lock
);
1300 rrw_exit(&zfsvfs
->z_teardown_lock
, FTAG
);
1305 * At this point there are no vops active, and any new vops will
1306 * fail with EIO since we have z_teardown_lock for writer (only
1307 * relavent for forced unmount).
1309 * Release all holds on dbufs.
1311 mutex_enter(&zfsvfs
->z_znodes_lock
);
1312 for (zp
= list_head(&zfsvfs
->z_all_znodes
); zp
!= NULL
;
1313 zp
= list_next(&zfsvfs
->z_all_znodes
, zp
))
1315 ASSERT(ZTOV(zp
)->v_count
> 0);
1316 zfs_znode_dmu_fini(zp
);
1318 mutex_exit(&zfsvfs
->z_znodes_lock
);
1321 * If we are unmounting, set the unmounted flag and let new vops
1322 * unblock. zfs_inactive will have the unmounted behavior, and all
1323 * other vops will fail with EIO.
1326 zfsvfs
->z_unmounted
= B_TRUE
;
1327 rrw_exit(&zfsvfs
->z_teardown_lock
, FTAG
);
1328 rw_exit(&zfsvfs
->z_teardown_inactive_lock
);
1332 * z_os will be NULL if there was an error in attempting to reopen
1333 * zfsvfs, so just return as the properties had already been
1334 * unregistered and cached data had been evicted before.
1336 if (zfsvfs
->z_os
== NULL
)
1340 * Unregister properties.
1342 zfs_unregister_callbacks(zfsvfs
);
1347 if (dmu_objset_evict_dbufs(zfsvfs
->z_os
)) {
1348 txg_wait_synced(dmu_objset_pool(zfsvfs
->z_os
), 0);
1349 (void) dmu_objset_evict_dbufs(zfsvfs
->z_os
);
1357 zfs_umount(vfs_t
*vfsp
, int fflag
)
1359 zfsvfs_t
*zfsvfs
= vfsp
->vfs_data
;
1369 dprintf("ZFS_UMOUNT called\n");
1371 /*TAILQ_FOREACH(vpp, &vfsp->mnt_vnodelist, v_mntvnodes) {
1372 printf("vnode list vnode number %d -- vnode address %p\n", counter, vpp);
1373 vprint("ZFS vfsp vnode list", vpp);
1379 ret
= secpolicy_fs_unmount(cr
, vfsp
);
1381 ret
= dsl_deleg_access((char *)refstr_value(vfsp
->vfs_resource
),
1382 ZFS_DELEG_PERM_MOUNT
, cr
);
1388 * We purge the parent filesystem's vfsp as the parent filesystem
1389 * and all of its snapshots have their vnode's v_vfsp set to the
1390 * parent's filesystem's vfsp. Note, 'z_parent' is self
1391 * referential for non-snapshots.
1393 (void) dnlc_purge_vfsp(zfsvfs
->z_parent
->z_vfs
, 0);
1396 * Unmount any snapshots mounted under .zfs before unmounting the
1399 if (zfsvfs
->z_ctldir
!= NULL
&&
1400 (ret
= zfsctl_umount_snapshots(vfsp
, fflag
, cr
)) != 0) {
1405 if (!(fflag
& MS_FORCE
)) {
1407 * Check the number of active vnodes in the file system.
1408 * Our count is maintained in the vfs structure, but the
1409 * number is off by 1 to indicate a hold on the vfs
1412 * The '.zfs' directory maintains a reference of its
1413 * own, and any active references underneath are
1414 * reflected in the vnode count.
1416 if (zfsvfs
->z_ctldir
== NULL
) {
1417 if (vfsp
->vfs_count
> 1){
1421 if (vfsp
->vfs_count
> 2 ||
1422 zfsvfs
->z_ctldir
->v_count
> 1) {
1428 vfsp
->vfs_flag
|= VFS_UNMOUNTED
;
1430 VERIFY(zfsvfs_teardown(zfsvfs
, B_TRUE
) == 0);
1434 * z_os will be NULL if there was an error in
1435 * attempting to reopen zfsvfs.
1439 * Unset the objset user_ptr.
1441 mutex_enter(&os
->os
->os_user_ptr_lock
);
1442 dmu_objset_set_user(os
, NULL
);
1443 mutex_exit(&os
->os
->os_user_ptr_lock
);
1446 * Finally release the objset
1448 dmu_objset_close(os
);
1452 * We can now safely destroy the '.zfs' directory node.
1454 if (zfsvfs
->z_ctldir
!= NULL
)
1455 zfsctl_destroy(zfsvfs
);
1457 if (fflag
& MS_FORCE
)
1458 flags
|= FORCECLOSE
;
1460 ret
= vflush(vfsp
, NULL
, 0);
1468 zfs_vget(vfs_t
*vfsp
, ino_t ino
, vnode_t
**vpp
)
1470 zfsvfs_t
*zfsvfs
= vfsp
->vfs_data
;
1474 dprintf("zfs_vget called\n");
1475 dprintf("vfsp->vfs_count %d\n", vfsp
->vfs_count
);
1478 err
= zfs_zget(zfsvfs
, ino
, &zp
);
1479 if (err
== 0 && zp
->z_unlinked
) {
1487 /* XXX NetBSD how to get flags for vn_lock ? */
1495 zfs_fhtovp(vfs_t
*vfsp
, fid_t
*fidp
, vnode_t
**vpp
)
1497 zfsvfs_t
*zfsvfs
= vfsp
->vfs_data
;
1499 uint64_t object
= 0;
1500 uint64_t fid_gen
= 0;
1507 dprintf("zfs_fhtovp called\n");
1508 dprintf("vfsp->vfs_count %d\n", vfsp
->vfs_count
);
1512 if (fidp
->fid_len
== LONG_FID_LEN
) {
1513 zfid_long_t
*zlfid
= (zfid_long_t
*)fidp
;
1514 uint64_t objsetid
= 0;
1515 uint64_t setgen
= 0;
1517 for (i
= 0; i
< sizeof (zlfid
->zf_setid
); i
++)
1518 objsetid
|= ((uint64_t)zlfid
->zf_setid
[i
]) << (8 * i
);
1520 for (i
= 0; i
< sizeof (zlfid
->zf_setgen
); i
++)
1521 setgen
|= ((uint64_t)zlfid
->zf_setgen
[i
]) << (8 * i
);
1525 err
= zfsctl_lookup_objset(vfsp
, objsetid
, &zfsvfs
);
1531 if (fidp
->fid_len
== SHORT_FID_LEN
|| fidp
->fid_len
== LONG_FID_LEN
) {
1532 zfid_short_t
*zfid
= (zfid_short_t
*)fidp
;
1534 for (i
= 0; i
< sizeof (zfid
->zf_object
); i
++)
1535 object
|= ((uint64_t)zfid
->zf_object
[i
]) << (8 * i
);
1537 for (i
= 0; i
< sizeof (zfid
->zf_gen
); i
++)
1538 fid_gen
|= ((uint64_t)zfid
->zf_gen
[i
]) << (8 * i
);
1544 /* A zero fid_gen means we are in the .zfs control directories */
1546 (object
== ZFSCTL_INO_ROOT
|| object
== ZFSCTL_INO_SNAPDIR
)) {
1547 *vpp
= zfsvfs
->z_ctldir
;
1548 ASSERT(*vpp
!= NULL
);
1549 if (object
== ZFSCTL_INO_SNAPDIR
) {
1550 VERIFY(zfsctl_root_lookup(*vpp
, "snapshot", vpp
, NULL
,
1551 0, NULL
, NULL
, NULL
, NULL
, NULL
) == 0);
1556 /* XXX: LK_RETRY? */
1557 vn_lock(*vpp
, LK_EXCLUSIVE
| LK_RETRY
);
1561 gen_mask
= -1ULL >> (64 - 8 * i
);
1563 dprintf("getting %llu [%u mask %llx]\n", object
, fid_gen
, gen_mask
);
1564 if (err
= zfs_zget(zfsvfs
, object
, &zp
)) {
1568 zp_gen
= zp
->z_phys
->zp_gen
& gen_mask
;
1571 if (zp
->z_unlinked
|| zp_gen
!= fid_gen
) {
1572 dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen
, fid_gen
);
1579 /* XXX: LK_RETRY? */
1580 vn_lock(*vpp
, LK_EXCLUSIVE
| LK_RETRY
);
1586 * Block out VOPs and close zfsvfs_t::z_os
1588 * Note, if successful, then we return with the 'z_teardown_lock' and
1589 * 'z_teardown_inactive_lock' write held.
1592 zfs_suspend_fs(zfsvfs_t
*zfsvfs
, char *name
, int *mode
)
1596 if ((error
= zfsvfs_teardown(zfsvfs
, B_FALSE
)) != 0)
1599 *mode
= zfsvfs
->z_os
->os_mode
;
1600 dmu_objset_name(zfsvfs
->z_os
, name
);
1601 dmu_objset_close(zfsvfs
->z_os
);
1607 * Reopen zfsvfs_t::z_os and release VOPs.
1610 zfs_resume_fs(zfsvfs_t
*zfsvfs
, const char *osname
, int mode
)
1614 ASSERT(RRW_WRITE_HELD(&zfsvfs
->z_teardown_lock
));
1615 ASSERT(RW_WRITE_HELD(&zfsvfs
->z_teardown_inactive_lock
));
1617 err
= dmu_objset_open(osname
, DMU_OST_ZFS
, mode
, &zfsvfs
->z_os
);
1619 zfsvfs
->z_os
= NULL
;
1623 VERIFY(zfsvfs_setup(zfsvfs
, B_FALSE
) == 0);
1626 * Attempt to re-establish all the active znodes with
1627 * their dbufs. If a zfs_rezget() fails, then we'll let
1628 * any potential callers discover that via ZFS_ENTER_VERIFY_VP
1629 * when they try to use their znode.
1631 mutex_enter(&zfsvfs
->z_znodes_lock
);
1632 for (zp
= list_head(&zfsvfs
->z_all_znodes
); zp
;
1633 zp
= list_next(&zfsvfs
->z_all_znodes
, zp
)) {
1634 (void) zfs_rezget(zp
);
1636 mutex_exit(&zfsvfs
->z_znodes_lock
);
1640 /* release the VOPs */
1641 rw_exit(&zfsvfs
->z_teardown_inactive_lock
);
1642 rrw_exit(&zfsvfs
->z_teardown_lock
, FTAG
);
1646 * Since we couldn't reopen zfsvfs::z_os, force
1647 * unmount this file system.
1649 if (vn_vfswlock(zfsvfs
->z_vfs
->vfs_vnodecovered
) == 0)
1650 (void) dounmount(zfsvfs
->z_vfs
, MS_FORCE
, curlwp
);
1656 zfs_freevfs(vfs_t
*vfsp
)
1658 zfsvfs_t
*zfsvfs
= vfsp
->vfs_data
;
1661 for (i
= 0; i
!= ZFS_OBJ_MTX_SZ
; i
++)
1662 mutex_destroy(&zfsvfs
->z_hold_mtx
[i
]);
1664 zfs_fuid_destroy(zfsvfs
);
1665 zfs_freezfsvfs(zfsvfs
);
1667 atomic_add_32(&zfs_active_fs_count
, -1);
1671 * VFS_INIT() initialization. Note that there is no VFS_FINI(),
1672 * so we can't safely do any non-idempotent initialization here.
1673 * Leave that to zfs_init() and zfs_fini(), which are called
1674 * from the module's _init() and _fini() entry points.
1678 zfs_vfsinit(int fstype
, char *name
)
1685 * Setup vfsops and vnodeops tables.
1687 error
= vfs_setfsops(fstype
, zfs_vfsops_template
, &zfs_vfsops
);
1689 error
= zfs_create_op_tables();
1691 zfs_remove_op_tables();
1692 cmn_err(CE_WARN
, "zfs: bad vnode ops template");
1693 vfs_freevfsops_by_type(zfsfstype
);
1697 mutex_init(&zfs_dev_mtx
, NULL
, MUTEX_DEFAULT
, NULL
);
1698 mutex_init(&zfs_debug_mtx
, NULL
, MUTEX_DEFAULT
, NULL
);
1701 * Unique major number for all zfs mounts.
1702 * If we run out of 32-bit minors, we'll getudev() another major.
1704 zfs_major
= ddi_name_to_major(ZFS_DRIVER
);
1705 zfs_minor
= ZFS_MIN_MINOR
;
1715 err
= vfs_detach(&zfs_vfsops_template
);
1719 mutex_destroy(&zfs_debug_mtx
);
1720 mutex_destroy(&zfs_dev_mtx
);
1729 * Initialize .zfs directory structures
1734 * Initialize znode cache, vnode ops, etc...
1749 return (zfs_active_fs_count
!= 0);
1753 zfs_set_version(const char *name
, uint64_t newvers
)
1761 * XXX for now, require that the filesystem be unmounted. Would
1762 * be nice to find the zfsvfs_t and just update that if
1766 if (newvers
< ZPL_VERSION_INITIAL
|| newvers
> ZPL_VERSION
)
1769 error
= dmu_objset_open(name
, DMU_OST_ZFS
, DS_MODE_OWNER
, &os
);
1773 error
= zap_lookup(os
, MASTER_NODE_OBJ
, ZPL_VERSION_STR
,
1777 if (newvers
< curvers
) {
1782 tx
= dmu_tx_create(os
);
1783 dmu_tx_hold_zap(tx
, MASTER_NODE_OBJ
, 0, ZPL_VERSION_STR
);
1784 error
= dmu_tx_assign(tx
, TXG_WAIT
);
1789 error
= zap_update(os
, MASTER_NODE_OBJ
, ZPL_VERSION_STR
, 8, 1,
1792 spa_history_internal_log(LOG_DS_UPGRADE
,
1793 dmu_objset_spa(os
), tx
, CRED(),
1794 "oldver=%llu newver=%llu dataset = %llu", curvers
, newvers
,
1799 dmu_objset_close(os
);
1804 * Read a property stored within the master node.
1807 zfs_get_zplprop(objset_t
*os
, zfs_prop_t prop
, uint64_t *value
)
1813 * Look up the file system's value for the property. For the
1814 * version property, we look up a slightly different string.
1816 if (prop
== ZFS_PROP_VERSION
)
1817 pname
= ZPL_VERSION_STR
;
1819 pname
= zfs_prop_to_name(prop
);
1822 error
= zap_lookup(os
, MASTER_NODE_OBJ
, pname
, 8, 1, value
);
1824 if (error
== ENOENT
) {
1825 /* No value set, use the default value */
1827 case ZFS_PROP_VERSION
:
1828 *value
= ZPL_VERSION
;
1830 case ZFS_PROP_NORMALIZE
:
1831 case ZFS_PROP_UTF8ONLY
:
1835 *value
= ZFS_CASE_SENSITIVE
;
1846 zfs_start(vfs_t
*vfsp
, int flags
)
1854 static vfsdef_t vfw
= {
1858 VSW_HASPROTO
|VSW_CANRWRO
|VSW_CANREMOUNT
|VSW_VOLATILEDEV
|VSW_STATS
|
1863 struct modlfs zfs_modlfs
= {
1864 &mod_fsops
, "ZFS filesystem version " SPA_VERSION_STRING
, &vfw