1 /* $NetBSD: ffs_vfsops.c,v 1.291 2013/11/23 13:35:37 christos Exp $ */
4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Wasabi Systems, Inc, and by Andrew Doran.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
33 * Copyright (c) 1989, 1991, 1993, 1994
34 * The Regents of the University of California. All rights reserved.
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * @(#)ffs_vfsops.c 8.31 (Berkeley) 5/20/95
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.291 2013/11/23 13:35:37 christos Exp $");
66 #if defined(_KERNEL_OPT)
68 #include "opt_quota.h"
69 #include "opt_wapbl.h"
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/namei.h>
76 #include <sys/kernel.h>
77 #include <sys/vnode.h>
78 #include <sys/socket.h>
79 #include <sys/mount.h>
81 #include <sys/device.h>
85 #include <sys/disklabel.h>
86 #include <sys/ioctl.h>
87 #include <sys/errno.h>
91 #include <sys/sysctl.h>
93 #include <sys/kauth.h>
94 #include <sys/wapbl.h>
95 #include <sys/fstrans.h>
96 #include <sys/module.h>
98 #include <miscfs/genfs/genfs.h>
99 #include <miscfs/specfs/specdev.h>
101 #include <ufs/ufs/quota.h>
102 #include <ufs/ufs/ufsmount.h>
103 #include <ufs/ufs/inode.h>
104 #include <ufs/ufs/dir.h>
105 #include <ufs/ufs/ufs_extern.h>
106 #include <ufs/ufs/ufs_bswap.h>
107 #include <ufs/ufs/ufs_wapbl.h>
109 #include <ufs/ffs/fs.h>
110 #include <ufs/ffs/ffs_extern.h>
112 MODULE(MODULE_CLASS_VFS
, ffs
, NULL
);
114 static int ffs_vfs_fsync(vnode_t
*, int);
116 static struct sysctllog
*ffs_sysctl_log
;
118 static kauth_listener_t ffs_snapshot_listener
;
120 /* how many times ffs_init() was called */
121 int ffs_initcount
= 0;
123 extern const struct vnodeopv_desc ffs_vnodeop_opv_desc
;
124 extern const struct vnodeopv_desc ffs_specop_opv_desc
;
125 extern const struct vnodeopv_desc ffs_fifoop_opv_desc
;
127 const struct vnodeopv_desc
* const ffs_vnodeopv_descs
[] = {
128 &ffs_vnodeop_opv_desc
,
129 &ffs_specop_opv_desc
,
130 &ffs_fifoop_opv_desc
,
134 struct vfsops ffs_vfsops
= {
136 sizeof (struct ufs_args
),
154 genfs_renamelock_enter
,
155 genfs_renamelock_exit
,
162 static const struct genfs_ops ffs_genfsops
= {
163 .gop_size
= ffs_gop_size
,
164 .gop_alloc
= ufs_gop_alloc
,
165 .gop_write
= genfs_gop_write
,
166 .gop_markupdate
= ufs_gop_markupdate
,
169 static const struct ufs_ops ffs_ufsops
= {
170 .uo_itimes
= ffs_itimes
,
171 .uo_update
= ffs_update
,
172 .uo_truncate
= ffs_truncate
,
173 .uo_valloc
= ffs_valloc
,
174 .uo_vfree
= ffs_vfree
,
175 .uo_balloc
= ffs_balloc
,
176 .uo_snapgone
= ffs_snapgone
,
180 ffs_snapshot_cb(kauth_cred_t cred
, kauth_action_t action
, void *cookie
,
181 void *arg0
, void *arg1
, void *arg2
, void *arg3
)
184 int result
= KAUTH_RESULT_DEFER
;;
186 if (action
!= KAUTH_SYSTEM_FS_SNAPSHOT
)
189 if (VTOI(vp
)->i_uid
== kauth_cred_geteuid(cred
))
190 result
= KAUTH_RESULT_ALLOW
;
196 ffs_modcmd(modcmd_t cmd
, void *arg
)
201 extern int doasyncfree
;
204 extern int ufs_extattr_autocreate
;
206 extern int ffs_log_changeopt
;
209 case MODULE_CMD_INIT
:
210 error
= vfs_attach(&ffs_vfsops
);
214 sysctl_createv(&ffs_sysctl_log
, 0, NULL
, NULL
,
216 CTLTYPE_NODE
, "vfs", NULL
,
219 sysctl_createv(&ffs_sysctl_log
, 0, NULL
, NULL
,
222 SYSCTL_DESCR("Berkeley Fast File System"),
224 CTL_VFS
, 1, CTL_EOL
);
226 * @@@ should we even bother with these first three?
228 sysctl_createv(&ffs_sysctl_log
, 0, NULL
, NULL
,
229 CTLFLAG_PERMANENT
|CTLFLAG_READWRITE
,
230 CTLTYPE_INT
, "doclusterread", NULL
,
231 sysctl_notavail
, 0, NULL
, 0,
232 CTL_VFS
, 1, FFS_CLUSTERREAD
, CTL_EOL
);
233 sysctl_createv(&ffs_sysctl_log
, 0, NULL
, NULL
,
234 CTLFLAG_PERMANENT
|CTLFLAG_READWRITE
,
235 CTLTYPE_INT
, "doclusterwrite", NULL
,
236 sysctl_notavail
, 0, NULL
, 0,
237 CTL_VFS
, 1, FFS_CLUSTERWRITE
, CTL_EOL
);
238 sysctl_createv(&ffs_sysctl_log
, 0, NULL
, NULL
,
239 CTLFLAG_PERMANENT
|CTLFLAG_READWRITE
,
240 CTLTYPE_INT
, "doreallocblks", NULL
,
241 sysctl_notavail
, 0, NULL
, 0,
242 CTL_VFS
, 1, FFS_REALLOCBLKS
, CTL_EOL
);
244 sysctl_createv(&ffs_sysctl_log
, 0, NULL
, NULL
,
245 CTLFLAG_PERMANENT
|CTLFLAG_READWRITE
,
246 CTLTYPE_INT
, "doasyncfree",
247 SYSCTL_DESCR("Release dirty blocks asynchronously"),
248 NULL
, 0, &doasyncfree
, 0,
249 CTL_VFS
, 1, FFS_ASYNCFREE
, CTL_EOL
);
251 sysctl_createv(&ffs_sysctl_log
, 0, NULL
, NULL
,
252 CTLFLAG_PERMANENT
|CTLFLAG_READWRITE
,
253 CTLTYPE_INT
, "log_changeopt",
254 SYSCTL_DESCR("Log changes in optimization strategy"),
255 NULL
, 0, &ffs_log_changeopt
, 0,
256 CTL_VFS
, 1, FFS_LOG_CHANGEOPT
, CTL_EOL
);
258 sysctl_createv(&ffs_sysctl_log
, 0, NULL
, NULL
,
259 CTLFLAG_PERMANENT
|CTLFLAG_READWRITE
,
260 CTLTYPE_INT
, "extattr_autocreate",
261 SYSCTL_DESCR("Size of attribute for "
262 "backing file autocreation"),
263 NULL
, 0, &ufs_extattr_autocreate
, 0,
264 CTL_VFS
, 1, FFS_EXTATTR_AUTOCREATE
, CTL_EOL
);
266 #endif /* UFS_EXTATTR */
268 ffs_snapshot_listener
= kauth_listen_scope(KAUTH_SCOPE_SYSTEM
,
269 ffs_snapshot_cb
, NULL
);
270 if (ffs_snapshot_listener
== NULL
)
271 printf("ffs_modcmd: can't listen on system scope.\n");
274 case MODULE_CMD_FINI
:
275 error
= vfs_detach(&ffs_vfsops
);
278 sysctl_teardown(&ffs_sysctl_log
);
279 if (ffs_snapshot_listener
!= NULL
)
280 kauth_unlisten_scope(ffs_snapshot_listener
);
290 pool_cache_t ffs_inode_cache
;
291 pool_cache_t ffs_dinode1_cache
;
292 pool_cache_t ffs_dinode2_cache
;
294 static void ffs_oldfscompat_read(struct fs
*, struct ufsmount
*, daddr_t
);
295 static void ffs_oldfscompat_write(struct fs
*, struct ufsmount
*);
298 * Called by main() when ffs is going to be mounted as root.
306 struct lwp
*l
= curlwp
; /* XXX */
307 struct ufsmount
*ump
;
310 if (device_class(root_device
) != DV_DISK
)
313 if ((error
= vfs_rootmountalloc(MOUNT_FFS
, "root_device", &mp
))) {
319 * We always need to be able to mount the root file system.
321 mp
->mnt_flag
|= MNT_FORCE
;
322 if ((error
= ffs_mountfs(rootvp
, mp
, l
)) != 0) {
323 vfs_unbusy(mp
, false, NULL
);
327 mp
->mnt_flag
&= ~MNT_FORCE
;
328 mountlist_append(mp
);
331 memset(fs
->fs_fsmnt
, 0, sizeof(fs
->fs_fsmnt
));
332 (void)copystr(mp
->mnt_stat
.f_mntonname
, fs
->fs_fsmnt
, MNAMELEN
- 1, 0);
333 (void)ffs_statvfs(mp
, &mp
->mnt_stat
);
334 vfs_unbusy(mp
, false, NULL
);
335 setrootfstime((time_t)fs
->fs_time
);
345 ffs_mount(struct mount
*mp
, const char *path
, void *data
, size_t *data_len
)
347 struct lwp
*l
= curlwp
;
348 struct vnode
*devvp
= NULL
;
349 struct ufs_args
*args
= data
;
350 struct ufsmount
*ump
= NULL
;
352 int error
= 0, flags
, update
;
355 if (*data_len
< sizeof *args
)
358 if (mp
->mnt_flag
& MNT_GETARGS
) {
363 *data_len
= sizeof *args
;
367 update
= mp
->mnt_flag
& MNT_UPDATE
;
369 /* Check arguments */
370 if (args
->fspec
!= NULL
) {
372 * Look up the name and verify that it's sane.
374 error
= namei_simple_user(args
->fspec
,
375 NSM_FOLLOW_NOEMULROOT
, &devvp
);
381 * Be sure this is a valid block device
383 if (devvp
->v_type
!= VBLK
)
385 else if (bdevsw_lookup(devvp
->v_rdev
) == NULL
)
389 * Be sure we're still naming the same device
390 * used for our initial mount
393 if (devvp
!= ump
->um_devvp
) {
394 if (devvp
->v_rdev
!= ump
->um_devvp
->v_rdev
)
398 devvp
= ump
->um_devvp
;
405 /* New mounts must have a filename for the device */
408 /* Use the extant mount */
410 devvp
= ump
->um_devvp
;
416 * If mount by non-root, then verify that user has necessary
417 * permissions on the device.
419 * Permission to update a mount is checked higher, so here we presume
420 * updating the mount is okay (for example, as far as securelevel goes)
421 * which leaves us with the normal check.
426 (mp
->mnt_iflag
& IMNT_WANTRDWR
) != 0 :
427 (mp
->mnt_flag
& MNT_RDONLY
) == 0)
428 accessmode
|= VWRITE
;
429 vn_lock(devvp
, LK_EXCLUSIVE
| LK_RETRY
);
430 error
= kauth_authorize_system(l
->l_cred
, KAUTH_SYSTEM_MOUNT
,
431 KAUTH_REQ_SYSTEM_MOUNT_DEVICE
, mp
, devvp
,
432 KAUTH_ARG(accessmode
));
442 /* WAPBL can only be enabled on a r/w mount. */
443 if ((mp
->mnt_flag
& MNT_RDONLY
) && !(mp
->mnt_iflag
& IMNT_WANTRDWR
)) {
444 mp
->mnt_flag
&= ~MNT_LOG
;
447 mp
->mnt_flag
&= ~MNT_LOG
;
453 if (mp
->mnt_flag
& MNT_RDONLY
)
456 xflags
= FREAD
| FWRITE
;
457 vn_lock(devvp
, LK_EXCLUSIVE
| LK_RETRY
);
458 error
= VOP_OPEN(devvp
, xflags
, FSCRED
);
462 error
= ffs_mountfs(devvp
, mp
, l
);
464 vn_lock(devvp
, LK_EXCLUSIVE
| LK_RETRY
);
465 (void)VOP_CLOSE(devvp
, xflags
, NOCRED
);
478 * The initial mount got a reference on this
479 * device, so drop the one obtained via
486 if (fs
->fs_ronly
== 0 && (mp
->mnt_flag
& MNT_RDONLY
)) {
488 * Changing from r/w to r/o
491 if (mp
->mnt_flag
& MNT_FORCE
)
493 error
= ffs_flushfiles(mp
, flags
, l
);
495 error
= UFS_WAPBL_BEGIN(mp
);
497 ffs_cgupdate(ump
, MNT_WAIT
) == 0 &&
498 fs
->fs_clean
& FS_WASCLEAN
) {
499 if (mp
->mnt_flag
& MNT_SOFTDEP
)
500 fs
->fs_flags
&= ~FS_DOSOFTDEP
;
501 fs
->fs_clean
= FS_ISCLEAN
;
502 (void) ffs_sbupdate(ump
, MNT_WAIT
);
511 if ((mp
->mnt_flag
& MNT_LOG
) == 0) {
512 error
= ffs_wapbl_stop(mp
, mp
->mnt_flag
& MNT_FORCE
);
518 if (fs
->fs_ronly
== 0 && (mp
->mnt_flag
& MNT_RDONLY
)) {
520 * Finish change from r/w to r/o
526 if (mp
->mnt_flag
& MNT_RELOAD
) {
527 error
= ffs_reload(mp
, l
->l_cred
, l
);
532 if (fs
->fs_ronly
&& (mp
->mnt_iflag
& IMNT_WANTRDWR
)) {
534 * Changing from read-only to read/write
537 if (fs
->fs_flags
& FS_DOQUOTA2
) {
538 ump
->um_flags
|= UFS_QUOTA2
;
539 uprintf("%s: options QUOTA2 not enabled%s\n",
540 mp
->mnt_stat
.f_mntonname
,
541 (mp
->mnt_flag
& MNT_FORCE
) ? "" :
550 if (fs
->fs_flags
& FS_DOWAPBL
) {
551 printf("%s: replaying log to disk\n",
552 mp
->mnt_stat
.f_mntonname
);
553 KDASSERT(mp
->mnt_wapbl_replay
);
554 error
= wapbl_replay_write(mp
->mnt_wapbl_replay
,
559 wapbl_replay_stop(mp
->mnt_wapbl_replay
);
560 fs
->fs_clean
= FS_WASCLEAN
;
563 if (fs
->fs_snapinum
[0] != 0)
564 ffs_snapshot_mount(mp
);
568 error
= ffs_wapbl_start(mp
);
575 error
= ffs_quota2_mount(mp
);
582 if ((mp
->mnt_flag
& MNT_DISCARD
) && !(ump
->um_discarddata
))
583 ump
->um_discarddata
= ffs_discard_init(devvp
, fs
);
585 if (args
->fspec
== NULL
)
589 error
= set_statvfs_info(path
, UIO_USERSPACE
, args
->fspec
,
590 UIO_USERSPACE
, mp
->mnt_op
->vfs_name
, mp
, l
);
592 (void)strncpy(fs
->fs_fsmnt
, mp
->mnt_stat
.f_mntonname
,
593 sizeof(fs
->fs_fsmnt
));
594 fs
->fs_flags
&= ~FS_DOSOFTDEP
;
595 if (fs
->fs_fmod
!= 0) { /* XXX */
599 if (fs
->fs_clean
& FS_WASCLEAN
)
600 fs
->fs_time
= time_second
;
602 printf("%s: file system not clean (fs_clean=%#x); "
603 "please fsck(8)\n", mp
->mnt_stat
.f_mntfromname
,
605 printf("%s: lost blocks %" PRId64
" files %d\n",
606 mp
->mnt_stat
.f_mntfromname
, fs
->fs_pendingblocks
,
607 fs
->fs_pendinginodes
);
609 err
= UFS_WAPBL_BEGIN(mp
);
611 (void) ffs_cgupdate(ump
, MNT_WAIT
);
615 if ((mp
->mnt_flag
& MNT_SOFTDEP
) != 0) {
616 printf("%s: `-o softdep' is no longer supported, "
617 "consider `-o log'\n", mp
->mnt_stat
.f_mntfromname
);
618 mp
->mnt_flag
&= ~MNT_SOFTDEP
;
629 * Reload all incore data for a filesystem (used after running fsck on
630 * the root filesystem and finding things to fix). The filesystem must
631 * be mounted read-only.
633 * Things to do to update the mount:
634 * 1) invalidate all cached meta-data.
635 * 2) re-read superblock from disk.
636 * 3) re-read summary information from disk.
637 * 4) invalidate all inactive vnodes.
638 * 5) invalidate all cached file data.
639 * 6) re-read inode data for all active vnodes.
642 ffs_reload(struct mount
*mp
, kauth_cred_t cred
, struct lwp
*l
)
644 struct vnode
*vp
, *mvp
, *devvp
;
648 struct fs
*fs
, *newfs
;
649 struct dkwedge_info dkw
;
650 int i
, bsize
, blks
, error
;
652 struct ufsmount
*ump
;
655 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0)
660 * Step 1: invalidate all cached meta-data.
662 devvp
= ump
->um_devvp
;
663 vn_lock(devvp
, LK_EXCLUSIVE
| LK_RETRY
);
664 error
= vinvalbuf(devvp
, 0, cred
, l
, 0, 0);
667 panic("ffs_reload: dirty1");
669 * Step 2: re-read superblock from disk.
673 /* XXX we don't handle possibility that superblock moved. */
674 error
= bread(devvp
, fs
->fs_sblockloc
/ DEV_BSIZE
, fs
->fs_sbsize
,
679 newfs
= kmem_alloc(fs
->fs_sbsize
, KM_SLEEP
);
680 memcpy(newfs
, bp
->b_data
, fs
->fs_sbsize
);
682 if (ump
->um_flags
& UFS_NEEDSWAP
) {
683 ffs_sb_swap((struct fs
*)bp
->b_data
, newfs
);
684 fs
->fs_flags
|= FS_SWAPPED
;
687 fs
->fs_flags
&= ~FS_SWAPPED
;
688 if ((newfs
->fs_magic
!= FS_UFS1_MAGIC
&&
689 newfs
->fs_magic
!= FS_UFS2_MAGIC
)||
690 newfs
->fs_bsize
> MAXBSIZE
||
691 newfs
->fs_bsize
< sizeof(struct fs
)) {
693 kmem_free(newfs
, fs
->fs_sbsize
);
694 return (EIO
); /* XXX needs translation */
696 /* Store off old fs_sblockloc for fs_oldfscompat_read. */
697 sblockloc
= fs
->fs_sblockloc
;
699 * Copy pointer fields back into superblock before copying in XXX
700 * new superblock. These should really be in the ufsmount. XXX
701 * Note that important parameters (eg fs_ncg) are unchanged.
703 newfs
->fs_csp
= fs
->fs_csp
;
704 newfs
->fs_maxcluster
= fs
->fs_maxcluster
;
705 newfs
->fs_contigdirs
= fs
->fs_contigdirs
;
706 newfs
->fs_ronly
= fs
->fs_ronly
;
707 newfs
->fs_active
= fs
->fs_active
;
708 memcpy(fs
, newfs
, (u_int
)fs
->fs_sbsize
);
710 kmem_free(newfs
, fs
->fs_sbsize
);
712 /* Recheck for apple UFS filesystem */
713 ump
->um_flags
&= ~UFS_ISAPPLEUFS
;
714 /* First check to see if this is tagged as an Apple UFS filesystem
717 if (getdiskinfo(devvp
, &dkw
) == 0 &&
718 strcmp(dkw
.dkw_ptype
, DKW_PTYPE_APPLEUFS
) == 0)
719 ump
->um_flags
|= UFS_ISAPPLEUFS
;
722 /* Manually look for an apple ufs label, and if a valid one
723 * is found, then treat it like an Apple UFS filesystem anyway
725 * EINVAL is most probably a blocksize or alignment problem,
726 * it is unlikely that this is an Apple UFS filesystem then.
728 error
= bread(devvp
, (daddr_t
)(APPLEUFS_LABEL_OFFSET
/ DEV_BSIZE
),
729 APPLEUFS_LABEL_SIZE
, cred
, 0, &bp
);
730 if (error
&& error
!= EINVAL
) {
734 error
= ffs_appleufs_validate(fs
->fs_fsmnt
,
735 (struct appleufslabel
*)bp
->b_data
, NULL
);
737 ump
->um_flags
|= UFS_ISAPPLEUFS
;
743 if (ump
->um_flags
& UFS_ISAPPLEUFS
)
747 if (UFS_MPISAPPLEUFS(ump
)) {
748 /* see comment about NeXT below */
749 ump
->um_maxsymlinklen
= APPLEUFS_MAXSYMLINKLEN
;
750 ump
->um_dirblksiz
= APPLEUFS_DIRBLKSIZ
;
751 mp
->mnt_iflag
|= IMNT_DTYPE
;
753 ump
->um_maxsymlinklen
= fs
->fs_maxsymlinklen
;
754 ump
->um_dirblksiz
= UFS_DIRBLKSIZ
;
755 if (ump
->um_maxsymlinklen
> 0)
756 mp
->mnt_iflag
|= IMNT_DTYPE
;
758 mp
->mnt_iflag
&= ~IMNT_DTYPE
;
760 ffs_oldfscompat_read(fs
, ump
, sblockloc
);
762 mutex_enter(&ump
->um_lock
);
763 ump
->um_maxfilesize
= fs
->fs_maxfilesize
;
764 if (fs
->fs_flags
& ~(FS_KNOWN_FLAGS
| FS_INTERNAL
)) {
765 uprintf("%s: unknown ufs flags: 0x%08"PRIx32
"%s\n",
766 mp
->mnt_stat
.f_mntonname
, fs
->fs_flags
,
767 (mp
->mnt_flag
& MNT_FORCE
) ? "" : ", not mounting");
768 if ((mp
->mnt_flag
& MNT_FORCE
) == 0) {
769 mutex_exit(&ump
->um_lock
);
773 if (fs
->fs_pendingblocks
!= 0 || fs
->fs_pendinginodes
!= 0) {
774 fs
->fs_pendingblocks
= 0;
775 fs
->fs_pendinginodes
= 0;
777 mutex_exit(&ump
->um_lock
);
779 ffs_statvfs(mp
, &mp
->mnt_stat
);
781 * Step 3: re-read summary information from disk.
783 blks
= howmany(fs
->fs_cssize
, fs
->fs_fsize
);
785 for (i
= 0; i
< blks
; i
+= fs
->fs_frag
) {
786 bsize
= fs
->fs_bsize
;
787 if (i
+ fs
->fs_frag
> blks
)
788 bsize
= (blks
- i
) * fs
->fs_fsize
;
789 error
= bread(devvp
, FFS_FSBTODB(fs
, fs
->fs_csaddr
+ i
), bsize
,
795 if (UFS_FSNEEDSWAP(fs
))
796 ffs_csum_swap((struct csum
*)bp
->b_data
,
797 (struct csum
*)space
, bsize
);
800 memcpy(space
, bp
->b_data
, (size_t)bsize
);
801 space
= (char *)space
+ bsize
;
805 * We no longer know anything about clusters per cylinder group.
807 if (fs
->fs_contigsumsize
> 0) {
808 lp
= fs
->fs_maxcluster
;
809 for (i
= 0; i
< fs
->fs_ncg
; i
++)
810 *lp
++ = fs
->fs_contigsumsize
;
813 /* Allocate a marker vnode. */
816 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
817 * and vclean() can be called indirectly
819 mutex_enter(&mntvnode_lock
);
821 for (vp
= TAILQ_FIRST(&mp
->mnt_vnodelist
); vp
; vp
= vunmark(mvp
)) {
823 if (vp
->v_mount
!= mp
|| vismarker(vp
))
826 * Step 4: invalidate all inactive vnodes.
828 if (vrecycle(vp
, &mntvnode_lock
)) {
829 mutex_enter(&mntvnode_lock
);
834 * Step 5: invalidate all cached file data.
836 mutex_enter(vp
->v_interlock
);
837 mutex_exit(&mntvnode_lock
);
838 if (vget(vp
, LK_EXCLUSIVE
)) {
842 if (vinvalbuf(vp
, 0, cred
, l
, 0, 0))
843 panic("ffs_reload: dirty2");
845 * Step 6: re-read inode data for all active vnodes.
848 error
= bread(devvp
, FFS_FSBTODB(fs
, ino_to_fsba(fs
, ip
->i_number
)),
849 (int)fs
->fs_bsize
, NOCRED
, 0, &bp
);
855 ffs_load_inode(bp
, ip
, fs
, ip
->i_number
);
858 mutex_enter(&mntvnode_lock
);
860 mutex_exit(&mntvnode_lock
);
866 * Possible superblock locations ordered from most to least likely.
868 static const int sblock_try
[] = SBLOCKSEARCH
;
871 * Common code for mount and mountroot
874 ffs_mountfs(struct vnode
*devvp
, struct mount
*mp
, struct lwp
*l
)
876 struct ufsmount
*ump
;
880 struct dkwedge_info dkw
;
882 daddr_t sblockloc
, fsblockloc
;
884 int error
, i
, bsize
, ronly
, bset
= 0;
886 int needswap
= 0; /* keep gcc happy */
890 u_int32_t sbsize
= 8192; /* keep gcc happy*/
891 u_int32_t allocsbsize
;
895 cred
= l
? l
->l_cred
: NOCRED
;
897 /* Flush out any old buffers remaining from a previous use. */
898 vn_lock(devvp
, LK_EXCLUSIVE
| LK_RETRY
);
899 error
= vinvalbuf(devvp
, V_SAVE
, cred
, l
, 0, 0);
904 ronly
= (mp
->mnt_flag
& MNT_RDONLY
) != 0;
912 error
= fstrans_mount(mp
);
916 ump
= kmem_zalloc(sizeof(*ump
), KM_SLEEP
);
917 mutex_init(&ump
->um_lock
, MUTEX_DEFAULT
, IPL_NONE
);
918 error
= ffs_snapshot_init(ump
);
921 ump
->um_ops
= &ffs_ufsops
;
927 * Try reading the superblock in each of its possible locations.
931 brelse(bp
, BC_NOCACHE
);
934 if (sblock_try
[i
] == -1) {
939 error
= bread(devvp
, sblock_try
[i
] / DEV_BSIZE
, SBLOCKSIZE
, cred
,
945 fs
= (struct fs
*)bp
->b_data
;
946 fsblockloc
= sblockloc
= sblock_try
[i
];
947 if (fs
->fs_magic
== FS_UFS1_MAGIC
) {
948 sbsize
= fs
->fs_sbsize
;
950 fsbsize
= fs
->fs_bsize
;
953 } else if (fs
->fs_magic
== FS_UFS1_MAGIC_SWAPPED
) {
954 sbsize
= bswap32(fs
->fs_sbsize
);
956 fsbsize
= bswap32(fs
->fs_bsize
);
959 } else if (fs
->fs_magic
== FS_UFS2_MAGIC
) {
960 sbsize
= fs
->fs_sbsize
;
962 fsbsize
= fs
->fs_bsize
;
965 } else if (fs
->fs_magic
== FS_UFS2_MAGIC_SWAPPED
) {
966 sbsize
= bswap32(fs
->fs_sbsize
);
968 fsbsize
= bswap32(fs
->fs_bsize
);
975 /* fs->fs_sblockloc isn't defined for old filesystems */
976 if (fstype
== UFS1
&& !(fs
->fs_old_flags
& FS_FLAGS_UPDATED
)) {
977 if (sblockloc
== SBLOCK_UFS2
)
979 * This is likely to be the first alternate
980 * in a filesystem with 64k blocks.
984 fsblockloc
= sblockloc
;
986 fsblockloc
= fs
->fs_sblockloc
;
989 fsblockloc
= bswap64(fsblockloc
);
993 /* Check we haven't found an alternate superblock */
994 if (fsblockloc
!= sblockloc
)
997 /* Validate size of superblock */
998 if (sbsize
> MAXBSIZE
|| sbsize
< sizeof(struct fs
))
1001 /* Check that we can handle the file system blocksize */
1002 if (fsbsize
> MAXBSIZE
) {
1003 printf("ffs_mountfs: block size (%d) > MAXBSIZE (%d)\n",
1008 /* Ok seems to be a good superblock */
1012 fs
= kmem_alloc((u_long
)sbsize
, KM_SLEEP
);
1013 memcpy(fs
, bp
->b_data
, sbsize
);
1018 ffs_sb_swap((struct fs
*)bp
->b_data
, fs
);
1019 fs
->fs_flags
|= FS_SWAPPED
;
1022 fs
->fs_flags
&= ~FS_SWAPPED
;
1025 if ((mp
->mnt_wapbl_replay
== 0) && (fs
->fs_flags
& FS_DOWAPBL
)) {
1026 error
= ffs_wapbl_replay_start(mp
, fs
, devvp
);
1027 if (error
&& (mp
->mnt_flag
& MNT_FORCE
) == 0)
1031 /* XXX fsmnt may be stale. */
1032 printf("%s: replaying log to disk\n",
1034 error
= wapbl_replay_write(mp
->mnt_wapbl_replay
,
1038 wapbl_replay_stop(mp
->mnt_wapbl_replay
);
1039 fs
->fs_clean
= FS_WASCLEAN
;
1041 /* XXX fsmnt may be stale */
1042 printf("%s: replaying log to memory\n",
1046 /* Force a re-read of the superblock */
1047 brelse(bp
, BC_INVAL
);
1049 kmem_free(fs
, sbsize
);
1055 if ((fs
->fs_flags
& FS_DOWAPBL
) && (mp
->mnt_flag
& MNT_FORCE
) == 0) {
1061 ffs_oldfscompat_read(fs
, ump
, sblockloc
);
1062 ump
->um_maxfilesize
= fs
->fs_maxfilesize
;
1064 if (fs
->fs_flags
& ~(FS_KNOWN_FLAGS
| FS_INTERNAL
)) {
1065 uprintf("%s: unknown ufs flags: 0x%08"PRIx32
"%s\n",
1066 mp
->mnt_stat
.f_mntonname
, fs
->fs_flags
,
1067 (mp
->mnt_flag
& MNT_FORCE
) ? "" : ", not mounting");
1068 if ((mp
->mnt_flag
& MNT_FORCE
) == 0) {
1074 if (fs
->fs_pendingblocks
!= 0 || fs
->fs_pendinginodes
!= 0) {
1075 fs
->fs_pendingblocks
= 0;
1076 fs
->fs_pendinginodes
= 0;
1079 ump
->um_fstype
= fstype
;
1080 if (fs
->fs_sbsize
< SBLOCKSIZE
)
1081 brelse(bp
, BC_INVAL
);
1086 /* First check to see if this is tagged as an Apple UFS filesystem
1089 if (getdiskinfo(devvp
, &dkw
) == 0 &&
1090 strcmp(dkw
.dkw_ptype
, DKW_PTYPE_APPLEUFS
) == 0)
1091 ump
->um_flags
|= UFS_ISAPPLEUFS
;
1094 /* Manually look for an apple ufs label, and if a valid one
1095 * is found, then treat it like an Apple UFS filesystem anyway
1097 error
= bread(devvp
, (daddr_t
)(APPLEUFS_LABEL_OFFSET
/ DEV_BSIZE
),
1098 APPLEUFS_LABEL_SIZE
, cred
, 0, &bp
);
1101 error
= ffs_appleufs_validate(fs
->fs_fsmnt
,
1102 (struct appleufslabel
*)bp
->b_data
, NULL
);
1104 ump
->um_flags
|= UFS_ISAPPLEUFS
;
1110 if (ump
->um_flags
& UFS_ISAPPLEUFS
) {
1118 * XXX This code changes the behaviour of mounting dirty filesystems, to
1119 * XXX require "mount -f ..." to mount them. This doesn't match what
1120 * XXX mount(8) describes and is disabled for now.
1123 * If the file system is not clean, don't allow it to be mounted
1124 * unless MNT_FORCE is specified. (Note: MNT_FORCE is always set
1125 * for the root file system.)
1127 if (fs
->fs_flags
& FS_DOWAPBL
) {
1129 * wapbl normally expects to be FS_WASCLEAN when the FS_DOWAPBL
1130 * bit is set, although there's a window in unmount where it
1131 * could be FS_ISCLEAN
1133 if ((mp
->mnt_flag
& MNT_FORCE
) == 0 &&
1134 (fs
->fs_clean
& (FS_WASCLEAN
| FS_ISCLEAN
)) == 0) {
1139 if ((fs
->fs_clean
& FS_ISCLEAN
) == 0 &&
1140 (mp
->mnt_flag
& MNT_FORCE
) == 0) {
1147 * verify that we can access the last block in the fs
1148 * if we're mounting read/write.
1152 error
= bread(devvp
, FFS_FSBTODB(fs
, fs
->fs_size
- 1), fs
->fs_fsize
,
1154 if (bp
->b_bcount
!= fs
->fs_fsize
)
1160 brelse(bp
, BC_INVAL
);
1164 fs
->fs_ronly
= ronly
;
1165 /* Don't bump fs_clean if we're replaying journal */
1166 if (!((fs
->fs_flags
& FS_DOWAPBL
) && (fs
->fs_clean
& FS_WASCLEAN
)))
1171 bsize
= fs
->fs_cssize
;
1172 blks
= howmany(bsize
, fs
->fs_fsize
);
1173 if (fs
->fs_contigsumsize
> 0)
1174 bsize
+= fs
->fs_ncg
* sizeof(int32_t);
1175 bsize
+= fs
->fs_ncg
* sizeof(*fs
->fs_contigdirs
);
1176 allocsbsize
= bsize
;
1177 space
= kmem_alloc((u_long
)allocsbsize
, KM_SLEEP
);
1179 for (i
= 0; i
< blks
; i
+= fs
->fs_frag
) {
1180 bsize
= fs
->fs_bsize
;
1181 if (i
+ fs
->fs_frag
> blks
)
1182 bsize
= (blks
- i
) * fs
->fs_fsize
;
1183 error
= bread(devvp
, FFS_FSBTODB(fs
, fs
->fs_csaddr
+ i
), bsize
,
1186 kmem_free(fs
->fs_csp
, allocsbsize
);
1191 ffs_csum_swap((struct csum
*)bp
->b_data
,
1192 (struct csum
*)space
, bsize
);
1195 memcpy(space
, bp
->b_data
, (u_int
)bsize
);
1197 space
= (char *)space
+ bsize
;
1201 if (fs
->fs_contigsumsize
> 0) {
1202 fs
->fs_maxcluster
= lp
= space
;
1203 for (i
= 0; i
< fs
->fs_ncg
; i
++)
1204 *lp
++ = fs
->fs_contigsumsize
;
1207 bsize
= fs
->fs_ncg
* sizeof(*fs
->fs_contigdirs
);
1208 fs
->fs_contigdirs
= space
;
1209 space
= (char *)space
+ bsize
;
1210 memset(fs
->fs_contigdirs
, 0, bsize
);
1211 /* Compatibility for old filesystems - XXX */
1212 if (fs
->fs_avgfilesize
<= 0)
1213 fs
->fs_avgfilesize
= AVFILESIZ
;
1214 if (fs
->fs_avgfpdir
<= 0)
1215 fs
->fs_avgfpdir
= AFPDIR
;
1216 fs
->fs_active
= NULL
;
1218 mp
->mnt_stat
.f_fsidx
.__fsid_val
[0] = (long)dev
;
1219 mp
->mnt_stat
.f_fsidx
.__fsid_val
[1] = makefstype(MOUNT_FFS
);
1220 mp
->mnt_stat
.f_fsid
= mp
->mnt_stat
.f_fsidx
.__fsid_val
[0];
1221 mp
->mnt_stat
.f_namemax
= FFS_MAXNAMLEN
;
1222 if (UFS_MPISAPPLEUFS(ump
)) {
1223 /* NeXT used to keep short symlinks in the inode even
1224 * when using FS_42INODEFMT. In that case fs->fs_maxsymlinklen
1225 * is probably -1, but we still need to be able to identify
1228 ump
->um_maxsymlinklen
= APPLEUFS_MAXSYMLINKLEN
;
1229 ump
->um_dirblksiz
= APPLEUFS_DIRBLKSIZ
;
1230 mp
->mnt_iflag
|= IMNT_DTYPE
;
1232 ump
->um_maxsymlinklen
= fs
->fs_maxsymlinklen
;
1233 ump
->um_dirblksiz
= UFS_DIRBLKSIZ
;
1234 if (ump
->um_maxsymlinklen
> 0)
1235 mp
->mnt_iflag
|= IMNT_DTYPE
;
1237 mp
->mnt_iflag
&= ~IMNT_DTYPE
;
1239 mp
->mnt_fs_bshift
= fs
->fs_bshift
;
1240 mp
->mnt_dev_bshift
= DEV_BSHIFT
; /* XXX */
1241 mp
->mnt_flag
|= MNT_LOCAL
;
1242 mp
->mnt_iflag
|= IMNT_MPSAFE
;
1245 ump
->um_flags
|= UFS_NEEDSWAP
;
1247 ump
->um_mountp
= mp
;
1249 ump
->um_devvp
= devvp
;
1250 ump
->um_nindir
= fs
->fs_nindir
;
1251 ump
->um_lognindir
= ffs(fs
->fs_nindir
) - 1;
1252 ump
->um_bptrtodb
= fs
->fs_fshift
- DEV_BSHIFT
;
1253 ump
->um_seqinc
= fs
->fs_frag
;
1254 for (i
= 0; i
< MAXQUOTAS
; i
++)
1255 ump
->um_quotas
[i
] = NULLVP
;
1256 spec_node_setmountedfs(devvp
, mp
);
1257 if (ronly
== 0 && fs
->fs_snapinum
[0] != 0)
1258 ffs_snapshot_mount(mp
);
1261 KDASSERT(fs
->fs_ronly
== 0);
1263 * ffs_wapbl_start() needs mp->mnt_stat initialised if it
1264 * needs to create a new log file in-filesystem.
1266 ffs_statvfs(mp
, &mp
->mnt_stat
);
1268 error
= ffs_wapbl_start(mp
);
1270 kmem_free(fs
->fs_csp
, allocsbsize
);
1277 error
= ffs_quota2_mount(mp
);
1279 kmem_free(fs
->fs_csp
, allocsbsize
);
1283 if (fs
->fs_flags
& FS_DOQUOTA2
) {
1284 ump
->um_flags
|= UFS_QUOTA2
;
1285 uprintf("%s: options QUOTA2 not enabled%s\n",
1286 mp
->mnt_stat
.f_mntonname
,
1287 (mp
->mnt_flag
& MNT_FORCE
) ? "" : ", not mounting");
1288 if ((mp
->mnt_flag
& MNT_FORCE
) == 0) {
1290 kmem_free(fs
->fs_csp
, allocsbsize
);
1298 * Initialize file-backed extended attributes on UFS1 file
1301 if (ump
->um_fstype
== UFS1
)
1302 ufs_extattr_uepm_init(&ump
->um_extattr
);
1303 #endif /* UFS_EXTATTR */
1305 if (mp
->mnt_flag
& MNT_DISCARD
)
1306 ump
->um_discarddata
= ffs_discard_init(devvp
, fs
);
1311 if (mp
->mnt_wapbl_replay
) {
1312 wapbl_replay_stop(mp
->mnt_wapbl_replay
);
1313 wapbl_replay_free(mp
->mnt_wapbl_replay
);
1314 mp
->mnt_wapbl_replay
= 0;
1318 fstrans_unmount(mp
);
1320 kmem_free(fs
, fs
->fs_sbsize
);
1321 spec_node_setmountedfs(devvp
, NULL
);
1325 if (ump
->um_oldfscompat
)
1326 kmem_free(ump
->um_oldfscompat
, 512 + 3*sizeof(int32_t));
1327 mutex_destroy(&ump
->um_lock
);
1328 kmem_free(ump
, sizeof(*ump
));
1329 mp
->mnt_data
= NULL
;
1335 * Sanity checks for loading old filesystem superblocks.
1336 * See ffs_oldfscompat_write below for unwound actions.
1338 * XXX - Parts get retired eventually.
1339 * Unfortunately new bits get added.
1342 ffs_oldfscompat_read(struct fs
*fs
, struct ufsmount
*ump
, daddr_t sblockloc
)
1347 if ((fs
->fs_magic
!= FS_UFS1_MAGIC
) ||
1348 (fs
->fs_old_flags
& FS_FLAGS_UPDATED
))
1351 if (!ump
->um_oldfscompat
)
1352 ump
->um_oldfscompat
= kmem_alloc(512 + 3*sizeof(int32_t),
1355 memcpy(ump
->um_oldfscompat
, &fs
->fs_old_postbl_start
, 512);
1356 extrasave
= ump
->um_oldfscompat
;
1357 extrasave
+= 512/sizeof(int32_t);
1358 extrasave
[0] = fs
->fs_old_npsect
;
1359 extrasave
[1] = fs
->fs_old_interleave
;
1360 extrasave
[2] = fs
->fs_old_trackskew
;
1362 /* These fields will be overwritten by their
1363 * original values in fs_oldfscompat_write, so it is harmless
1364 * to modify them here.
1366 fs
->fs_cstotal
.cs_ndir
= fs
->fs_old_cstotal
.cs_ndir
;
1367 fs
->fs_cstotal
.cs_nbfree
= fs
->fs_old_cstotal
.cs_nbfree
;
1368 fs
->fs_cstotal
.cs_nifree
= fs
->fs_old_cstotal
.cs_nifree
;
1369 fs
->fs_cstotal
.cs_nffree
= fs
->fs_old_cstotal
.cs_nffree
;
1371 fs
->fs_maxbsize
= fs
->fs_bsize
;
1372 fs
->fs_time
= fs
->fs_old_time
;
1373 fs
->fs_size
= fs
->fs_old_size
;
1374 fs
->fs_dsize
= fs
->fs_old_dsize
;
1375 fs
->fs_csaddr
= fs
->fs_old_csaddr
;
1376 fs
->fs_sblockloc
= sblockloc
;
1378 fs
->fs_flags
= fs
->fs_old_flags
| (fs
->fs_flags
& FS_INTERNAL
);
1380 if (fs
->fs_old_postblformat
== FS_42POSTBLFMT
) {
1381 fs
->fs_old_nrpos
= 8;
1382 fs
->fs_old_npsect
= fs
->fs_old_nsect
;
1383 fs
->fs_old_interleave
= 1;
1384 fs
->fs_old_trackskew
= 0;
1387 if (fs
->fs_old_inodefmt
< FS_44INODEFMT
) {
1388 fs
->fs_maxfilesize
= (u_quad_t
) 1LL << 39;
1389 fs
->fs_qbmask
= ~fs
->fs_bmask
;
1390 fs
->fs_qfmask
= ~fs
->fs_fmask
;
1393 maxfilesize
= (u_int64_t
)0x80000000 * fs
->fs_bsize
- 1;
1394 if (fs
->fs_maxfilesize
> maxfilesize
)
1395 fs
->fs_maxfilesize
= maxfilesize
;
1397 /* Compatibility for old filesystems */
1398 if (fs
->fs_avgfilesize
<= 0)
1399 fs
->fs_avgfilesize
= AVFILESIZ
;
1400 if (fs
->fs_avgfpdir
<= 0)
1401 fs
->fs_avgfpdir
= AFPDIR
;
1405 fs
->fs_save_cgsize
= fs
->fs_cgsize
;
1406 fs
->fs_cgsize
= fs
->fs_bsize
;
1412 * Unwinding superblock updates for old filesystems.
1413 * See ffs_oldfscompat_read above for details.
1415 * XXX - Parts get retired eventually.
1416 * Unfortunately new bits get added.
1419 ffs_oldfscompat_write(struct fs
*fs
, struct ufsmount
*ump
)
1423 if ((fs
->fs_magic
!= FS_UFS1_MAGIC
) ||
1424 (fs
->fs_old_flags
& FS_FLAGS_UPDATED
))
1427 fs
->fs_old_time
= fs
->fs_time
;
1428 fs
->fs_old_cstotal
.cs_ndir
= fs
->fs_cstotal
.cs_ndir
;
1429 fs
->fs_old_cstotal
.cs_nbfree
= fs
->fs_cstotal
.cs_nbfree
;
1430 fs
->fs_old_cstotal
.cs_nifree
= fs
->fs_cstotal
.cs_nifree
;
1431 fs
->fs_old_cstotal
.cs_nffree
= fs
->fs_cstotal
.cs_nffree
;
1432 fs
->fs_old_flags
= fs
->fs_flags
;
1436 fs
->fs_cgsize
= fs
->fs_save_cgsize
;
1440 memcpy(&fs
->fs_old_postbl_start
, ump
->um_oldfscompat
, 512);
1441 extrasave
= ump
->um_oldfscompat
;
1442 extrasave
+= 512/sizeof(int32_t);
1443 fs
->fs_old_npsect
= extrasave
[0];
1444 fs
->fs_old_interleave
= extrasave
[1];
1445 fs
->fs_old_trackskew
= extrasave
[2];
1450 * unmount vfs operation
1453 ffs_unmount(struct mount
*mp
, int mntflags
)
1455 struct lwp
*l
= curlwp
;
1456 struct ufsmount
*ump
= VFSTOUFS(mp
);
1457 struct fs
*fs
= ump
->um_fs
;
1464 if (ump
->um_discarddata
) {
1465 ffs_discard_finish(ump
->um_discarddata
, mntflags
);
1466 ump
->um_discarddata
= NULL
;
1470 if (mntflags
& MNT_FORCE
)
1471 flags
|= FORCECLOSE
;
1472 if ((error
= ffs_flushfiles(mp
, flags
, l
)) != 0)
1474 error
= UFS_WAPBL_BEGIN(mp
);
1476 if (fs
->fs_ronly
== 0 &&
1477 ffs_cgupdate(ump
, MNT_WAIT
) == 0 &&
1478 fs
->fs_clean
& FS_WASCLEAN
) {
1479 fs
->fs_clean
= FS_ISCLEAN
;
1481 (void) ffs_sbupdate(ump
, MNT_WAIT
);
1486 KASSERT(!(mp
->mnt_wapbl_replay
&& mp
->mnt_wapbl
));
1487 if (mp
->mnt_wapbl_replay
) {
1488 KDASSERT(fs
->fs_ronly
);
1489 wapbl_replay_stop(mp
->mnt_wapbl_replay
);
1490 wapbl_replay_free(mp
->mnt_wapbl_replay
);
1491 mp
->mnt_wapbl_replay
= 0;
1493 error
= ffs_wapbl_stop(mp
, doforce
&& (mntflags
& MNT_FORCE
));
1499 if (ump
->um_devvp
->v_type
!= VBAD
)
1500 spec_node_setmountedfs(ump
->um_devvp
, NULL
);
1501 vn_lock(ump
->um_devvp
, LK_EXCLUSIVE
| LK_RETRY
);
1502 (void)VOP_CLOSE(ump
->um_devvp
, fs
->fs_ronly
? FREAD
: FREAD
| FWRITE
,
1504 vput(ump
->um_devvp
);
1506 bsize
= fs
->fs_cssize
;
1507 if (fs
->fs_contigsumsize
> 0)
1508 bsize
+= fs
->fs_ncg
* sizeof(int32_t);
1509 bsize
+= fs
->fs_ncg
* sizeof(*fs
->fs_contigdirs
);
1510 kmem_free(fs
->fs_csp
, bsize
);
1512 kmem_free(fs
, fs
->fs_sbsize
);
1513 if (ump
->um_oldfscompat
!= NULL
)
1514 kmem_free(ump
->um_oldfscompat
, 512 + 3*sizeof(int32_t));
1515 mutex_destroy(&ump
->um_lock
);
1516 ffs_snapshot_fini(ump
);
1517 kmem_free(ump
, sizeof(*ump
));
1518 mp
->mnt_data
= NULL
;
1519 mp
->mnt_flag
&= ~MNT_LOCAL
;
1520 fstrans_unmount(mp
);
1525 * Flush out all the files in a filesystem.
1528 ffs_flushfiles(struct mount
*mp
, int flags
, struct lwp
*l
)
1531 struct ufsmount
*ump
;
1535 flags
&= ~FORCECLOSE
;
1538 if ((error
= quota1_umount(mp
, flags
)) != 0)
1542 if ((error
= quota2_umount(mp
, flags
)) != 0)
1546 if (ump
->um_fstype
== UFS1
) {
1547 if (ump
->um_extattr
.uepm_flags
& UFS_EXTATTR_UEPM_STARTED
)
1548 ufs_extattr_stop(mp
, l
);
1549 if (ump
->um_extattr
.uepm_flags
& UFS_EXTATTR_UEPM_INITIALIZED
)
1550 ufs_extattr_uepm_destroy(&ump
->um_extattr
);
1553 if ((error
= vflush(mp
, 0, SKIPSYSTEM
| flags
)) != 0)
1555 ffs_snapshot_unmount(mp
);
1557 * Flush all the files.
1559 error
= vflush(mp
, NULLVP
, flags
);
1563 * Flush filesystem metadata.
1565 vn_lock(ump
->um_devvp
, LK_EXCLUSIVE
| LK_RETRY
);
1566 error
= VOP_FSYNC(ump
->um_devvp
, l
->l_cred
, FSYNC_WAIT
, 0, 0);
1567 VOP_UNLOCK(ump
->um_devvp
);
1568 if (flags
& FORCECLOSE
) /* XXXDBJ */
1574 if (mp
->mnt_wapbl
) {
1575 error
= wapbl_flush(mp
->mnt_wapbl
, 1);
1576 if (flags
& FORCECLOSE
)
1585 * Get file system statistics.
1588 ffs_statvfs(struct mount
*mp
, struct statvfs
*sbp
)
1590 struct ufsmount
*ump
;
1595 mutex_enter(&ump
->um_lock
);
1596 sbp
->f_bsize
= fs
->fs_bsize
;
1597 sbp
->f_frsize
= fs
->fs_fsize
;
1598 sbp
->f_iosize
= fs
->fs_bsize
;
1599 sbp
->f_blocks
= fs
->fs_dsize
;
1600 sbp
->f_bfree
= ffs_blkstofrags(fs
, fs
->fs_cstotal
.cs_nbfree
) +
1601 fs
->fs_cstotal
.cs_nffree
+ FFS_DBTOFSB(fs
, fs
->fs_pendingblocks
);
1602 sbp
->f_bresvd
= ((u_int64_t
) fs
->fs_dsize
* (u_int64_t
)
1603 fs
->fs_minfree
) / (u_int64_t
) 100;
1604 if (sbp
->f_bfree
> sbp
->f_bresvd
)
1605 sbp
->f_bavail
= sbp
->f_bfree
- sbp
->f_bresvd
;
1608 sbp
->f_files
= fs
->fs_ncg
* fs
->fs_ipg
- UFS_ROOTINO
;
1609 sbp
->f_ffree
= fs
->fs_cstotal
.cs_nifree
+ fs
->fs_pendinginodes
;
1610 sbp
->f_favail
= sbp
->f_ffree
;
1612 mutex_exit(&ump
->um_lock
);
1613 copy_statvfs_info(sbp
, mp
);
1619 * Go through the disk queues to initiate sandbagged IO;
1620 * go through the inodes to write those that have been modified;
1621 * initiate the writing of the super block if it has been modified.
1623 * Note: we are always called with the filesystem marked `MPBUSY'.
1626 ffs_sync(struct mount
*mp
, int waitfor
, kauth_cred_t cred
)
1628 struct vnode
*vp
, *mvp
, *nvp
;
1630 struct ufsmount
*ump
= VFSTOUFS(mp
);
1632 int error
, allerror
= 0;
1636 if (fs
->fs_fmod
!= 0 && fs
->fs_ronly
!= 0) { /* XXX */
1637 printf("fs = %s\n", fs
->fs_fsmnt
);
1638 panic("update: rofs mod");
1641 /* Allocate a marker vnode. */
1644 fstrans_start(mp
, FSTRANS_SHARED
);
1645 is_suspending
= (fstrans_getstate(mp
) == FSTRANS_SUSPENDING
);
1647 * Write back each (modified) inode.
1649 mutex_enter(&mntvnode_lock
);
1652 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
1653 * and vclean() can be called indirectly
1655 for (vp
= TAILQ_FIRST(&mp
->mnt_vnodelist
); vp
; vp
= nvp
) {
1656 nvp
= TAILQ_NEXT(vp
, v_mntvnodes
);
1658 * If the vnode that we are about to sync is no longer
1659 * associated with this mount point, start over.
1661 if (vp
->v_mount
!= mp
)
1664 * Don't interfere with concurrent scans of this FS.
1668 mutex_enter(vp
->v_interlock
);
1672 * Skip the vnode/inode if inaccessible.
1674 if (ip
== NULL
|| (vp
->v_iflag
& (VI_XLOCK
| VI_CLEAN
)) != 0 ||
1675 vp
->v_type
== VNON
) {
1676 mutex_exit(vp
->v_interlock
);
1681 * We deliberately update inode times here. This will
1682 * prevent a massive queue of updates accumulating, only
1683 * to be handled by a call to unmount.
1685 * XXX It would be better to have the syncer trickle these
1686 * out. Adjustment needed to allow registering vnodes for
1687 * sync when the vnode is clean, but the inode dirty. Or
1688 * have ufs itself trickle out inode updates.
1690 * If doing a lazy sync, we don't care about metadata or
1691 * data updates, because they are handled by each vnode's
1692 * synclist entry. In this case we are only interested in
1693 * writing back modified inodes.
1695 if ((ip
->i_flag
& (IN_ACCESS
| IN_CHANGE
| IN_UPDATE
|
1696 IN_MODIFY
| IN_MODIFIED
| IN_ACCESSED
)) == 0 &&
1697 (waitfor
== MNT_LAZY
|| (LIST_EMPTY(&vp
->v_dirtyblkhd
) &&
1698 UVM_OBJ_IS_CLEAN(&vp
->v_uobj
)))) {
1699 mutex_exit(vp
->v_interlock
);
1702 if (vp
->v_type
== VBLK
&& is_suspending
) {
1703 mutex_exit(vp
->v_interlock
);
1707 mutex_exit(&mntvnode_lock
);
1708 error
= vget(vp
, LK_EXCLUSIVE
| LK_NOWAIT
);
1710 mutex_enter(&mntvnode_lock
);
1712 if (error
== ENOENT
) {
1717 if (waitfor
== MNT_LAZY
) {
1718 error
= UFS_WAPBL_BEGIN(vp
->v_mount
);
1720 error
= ffs_update(vp
, NULL
, NULL
,
1722 UFS_WAPBL_END(vp
->v_mount
);
1725 error
= VOP_FSYNC(vp
, cred
, FSYNC_NOLOG
|
1726 (waitfor
== MNT_WAIT
? FSYNC_WAIT
: 0), 0, 0);
1731 mutex_enter(&mntvnode_lock
);
1734 mutex_exit(&mntvnode_lock
);
1736 * Force stale file system control information to be flushed.
1738 if (waitfor
!= MNT_LAZY
&& (ump
->um_devvp
->v_numoutput
> 0 ||
1739 !LIST_EMPTY(&ump
->um_devvp
->v_dirtyblkhd
))) {
1740 vn_lock(ump
->um_devvp
, LK_EXCLUSIVE
| LK_RETRY
);
1741 if ((error
= VOP_FSYNC(ump
->um_devvp
, cred
,
1742 (waitfor
== MNT_WAIT
? FSYNC_WAIT
: 0) | FSYNC_NOLOG
,
1745 VOP_UNLOCK(ump
->um_devvp
);
1746 if (allerror
== 0 && waitfor
== MNT_WAIT
&& !mp
->mnt_wapbl
) {
1747 mutex_enter(&mntvnode_lock
);
1751 #if defined(QUOTA) || defined(QUOTA2)
1755 * Write back modified superblock.
1757 if (fs
->fs_fmod
!= 0) {
1759 fs
->fs_time
= time_second
;
1760 error
= UFS_WAPBL_BEGIN(mp
);
1764 if ((error
= ffs_cgupdate(ump
, waitfor
)))
1771 if (mp
->mnt_wapbl
) {
1772 error
= wapbl_flush(mp
->mnt_wapbl
, 0);
1784 * Look up a FFS dinode number to find its incore vnode, otherwise read it
1785 * in from disk. If it is in core, wait for the lock bit to clear, then
1786 * return the inode locked. Detection and handling of mount points must be
1787 * done by the calling routine.
1790 ffs_vget(struct mount
*mp
, ino_t ino
, struct vnode
**vpp
)
1794 struct ufsmount
*ump
;
1804 if ((*vpp
= ufs_ihashget(dev
, ino
, LK_EXCLUSIVE
)) != NULL
)
1807 /* Allocate a new vnode/inode. */
1808 error
= getnewvnode(VT_UFS
, mp
, ffs_vnodeop_p
, NULL
, &vp
);
1813 ip
= pool_cache_get(ffs_inode_cache
, PR_WAITOK
);
1816 * If someone beat us to it, put back the freshly allocated
1817 * vnode/inode pair and retry.
1819 mutex_enter(&ufs_hashlock
);
1820 if (ufs_ihashget(dev
, ino
, 0) != NULL
) {
1821 mutex_exit(&ufs_hashlock
);
1823 pool_cache_put(ffs_inode_cache
, ip
);
1827 vp
->v_vflag
|= VV_LOCKSWORK
;
1830 * XXX MFS ends up here, too, to allocate an inode. Should we
1831 * XXX create another pool for MFS inodes?
1834 memset(ip
, 0, sizeof(struct inode
));
1838 ip
->i_fs
= fs
= ump
->um_fs
;
1841 #if defined(QUOTA) || defined(QUOTA2)
1846 * Initialize genfs node, we might proceed to destroy it in
1849 genfs_node_init(vp
, &ffs_genfsops
);
1852 * Put it onto its hash chain and lock it so that other requests for
1853 * this inode will block if they arrive while we are sleeping waiting
1854 * for old data structures to be purged or for the contents of the
1855 * disk portion of this inode to be read.
1859 mutex_exit(&ufs_hashlock
);
1861 /* Read in the disk contents for the inode, copy into the inode. */
1862 error
= bread(ump
->um_devvp
, FFS_FSBTODB(fs
, ino_to_fsba(fs
, ino
)),
1863 (int)fs
->fs_bsize
, NOCRED
, 0, &bp
);
1867 * The inode does not contain anything useful, so it would
1868 * be misleading to leave it on its hash chain. With mode
1869 * still zero, it will be unlinked and returned to the free
1877 if (ip
->i_ump
->um_fstype
== UFS1
)
1878 ip
->i_din
.ffs1_din
= pool_cache_get(ffs_dinode1_cache
,
1881 ip
->i_din
.ffs2_din
= pool_cache_get(ffs_dinode2_cache
,
1883 ffs_load_inode(bp
, ip
, fs
, ino
);
1887 * Initialize the vnode from the inode, check for aliases.
1888 * Note that the underlying vnode may have changed.
1891 ufs_vinit(mp
, ffs_specop_p
, ffs_fifoop_p
, &vp
);
1894 * Finish inode initialization now that aliasing has been resolved.
1897 ip
->i_devvp
= ump
->um_devvp
;
1901 * Ensure that uid and gid are correct. This is a temporary
1902 * fix until fsck has been changed to do the update.
1905 if (fs
->fs_old_inodefmt
< FS_44INODEFMT
) { /* XXX */
1906 ip
->i_uid
= ip
->i_ffs1_ouid
; /* XXX */
1907 ip
->i_gid
= ip
->i_ffs1_ogid
; /* XXX */
1909 uvm_vnp_setsize(vp
, ip
->i_size
);
1915 * File handle to vnode
1917 * Have to be really careful about stale file handles:
1918 * - check that the inode number is valid
1919 * - call ffs_vget() to get the locked inode
1920 * - check for an unallocated inode (i_mode == 0)
1921 * - check that the given client host has export rights and return
1922 * those rights via. exflagsp and credanonp
1925 ffs_fhtovp(struct mount
*mp
, struct fid
*fhp
, struct vnode
**vpp
)
1930 if (fhp
->fid_len
!= sizeof(struct ufid
))
1933 memcpy(&ufh
, fhp
, sizeof(ufh
));
1934 fs
= VFSTOUFS(mp
)->um_fs
;
1935 if (ufh
.ufid_ino
< UFS_ROOTINO
||
1936 ufh
.ufid_ino
>= fs
->fs_ncg
* fs
->fs_ipg
)
1938 return (ufs_fhtovp(mp
, &ufh
, vpp
));
1942 * Vnode pointer to File handle
1946 ffs_vptofh(struct vnode
*vp
, struct fid
*fhp
, size_t *fh_size
)
1951 if (*fh_size
< sizeof(struct ufid
)) {
1952 *fh_size
= sizeof(struct ufid
);
1956 *fh_size
= sizeof(struct ufid
);
1957 memset(&ufh
, 0, sizeof(ufh
));
1958 ufh
.ufid_len
= sizeof(struct ufid
);
1959 ufh
.ufid_ino
= ip
->i_number
;
1960 ufh
.ufid_gen
= ip
->i_gen
;
1961 memcpy(fhp
, &ufh
, sizeof(ufh
));
1968 if (ffs_initcount
++ > 0)
1971 ffs_inode_cache
= pool_cache_init(sizeof(struct inode
), 0, 0, 0,
1972 "ffsino", NULL
, IPL_NONE
, NULL
, NULL
, NULL
);
1973 ffs_dinode1_cache
= pool_cache_init(sizeof(struct ufs1_dinode
), 0, 0, 0,
1974 "ffsdino1", NULL
, IPL_NONE
, NULL
, NULL
, NULL
);
1975 ffs_dinode2_cache
= pool_cache_init(sizeof(struct ufs2_dinode
), 0, 0, 0,
1976 "ffsdino2", NULL
, IPL_NONE
, NULL
, NULL
, NULL
);
1990 if (--ffs_initcount
> 0)
1994 pool_cache_destroy(ffs_dinode2_cache
);
1995 pool_cache_destroy(ffs_dinode1_cache
);
1996 pool_cache_destroy(ffs_inode_cache
);
2000 * Write a superblock and associated information back to disk.
2003 ffs_sbupdate(struct ufsmount
*mp
, int waitfor
)
2005 struct fs
*fs
= mp
->um_fs
;
2010 error
= ffs_getblk(mp
->um_devvp
,
2011 fs
->fs_sblockloc
/ DEV_BSIZE
, FFS_NOBLK
,
2012 fs
->fs_sbsize
, false, &bp
);
2015 saveflag
= fs
->fs_flags
& FS_INTERNAL
;
2016 fs
->fs_flags
&= ~FS_INTERNAL
;
2018 memcpy(bp
->b_data
, fs
, fs
->fs_sbsize
);
2020 ffs_oldfscompat_write((struct fs
*)bp
->b_data
, mp
);
2022 if (mp
->um_flags
& UFS_NEEDSWAP
)
2023 ffs_sb_swap((struct fs
*)bp
->b_data
, (struct fs
*)bp
->b_data
);
2025 fs
->fs_flags
|= saveflag
;
2027 if (waitfor
== MNT_WAIT
)
2035 ffs_cgupdate(struct ufsmount
*mp
, int waitfor
)
2037 struct fs
*fs
= mp
->um_fs
;
2041 int i
, size
, error
= 0, allerror
= 0;
2043 allerror
= ffs_sbupdate(mp
, waitfor
);
2044 blks
= howmany(fs
->fs_cssize
, fs
->fs_fsize
);
2046 for (i
= 0; i
< blks
; i
+= fs
->fs_frag
) {
2047 size
= fs
->fs_bsize
;
2048 if (i
+ fs
->fs_frag
> blks
)
2049 size
= (blks
- i
) * fs
->fs_fsize
;
2050 error
= ffs_getblk(mp
->um_devvp
, FFS_FSBTODB(fs
, fs
->fs_csaddr
+ i
),
2051 FFS_NOBLK
, size
, false, &bp
);
2055 if (mp
->um_flags
& UFS_NEEDSWAP
)
2056 ffs_csum_swap((struct csum
*)space
,
2057 (struct csum
*)bp
->b_data
, size
);
2060 memcpy(bp
->b_data
, space
, (u_int
)size
);
2061 space
= (char *)space
+ size
;
2062 if (waitfor
== MNT_WAIT
)
2067 if (!allerror
&& error
)
2073 ffs_extattrctl(struct mount
*mp
, int cmd
, struct vnode
*vp
,
2074 int attrnamespace
, const char *attrname
)
2078 * File-backed extended attributes are only supported on UFS1.
2079 * UFS2 has native extended attributes.
2081 if (VFSTOUFS(mp
)->um_fstype
== UFS1
)
2082 return (ufs_extattrctl(mp
, cmd
, vp
, attrnamespace
, attrname
));
2084 return (vfs_stdextattrctl(mp
, cmd
, vp
, attrnamespace
, attrname
));
2088 ffs_suspendctl(struct mount
*mp
, int cmd
)
2091 struct lwp
*l
= curlwp
;
2094 case SUSPEND_SUSPEND
:
2095 if ((error
= fstrans_setstate(mp
, FSTRANS_SUSPENDING
)) != 0)
2097 error
= ffs_sync(mp
, MNT_WAIT
, l
->l_proc
->p_cred
);
2099 error
= fstrans_setstate(mp
, FSTRANS_SUSPENDED
);
2101 if (error
== 0 && mp
->mnt_wapbl
)
2102 error
= wapbl_flush(mp
->mnt_wapbl
, 1);
2105 (void) fstrans_setstate(mp
, FSTRANS_NORMAL
);
2110 case SUSPEND_RESUME
:
2111 return fstrans_setstate(mp
, FSTRANS_NORMAL
);
2119 * Synch vnode for a mounted file system.
2122 ffs_vfs_fsync(vnode_t
*vp
, int flags
)
2124 int error
, i
, pflags
;
2129 KASSERT(vp
->v_type
== VBLK
);
2130 KASSERT(spec_node_getmountedfs(vp
) != NULL
);
2133 * Flush all dirty data associated with the vnode.
2135 pflags
= PGO_ALLPAGES
| PGO_CLEANIT
;
2136 if ((flags
& FSYNC_WAIT
) != 0)
2137 pflags
|= PGO_SYNCIO
;
2138 mutex_enter(vp
->v_interlock
);
2139 error
= VOP_PUTPAGES(vp
, 0, 0, pflags
);
2144 mp
= spec_node_getmountedfs(vp
);
2145 if (mp
&& mp
->mnt_wapbl
) {
2147 * Don't bother writing out metadata if the syncer is
2148 * making the request. We will let the sync vnode
2149 * write it out in a single burst through a call to
2152 if ((flags
& (FSYNC_DATAONLY
| FSYNC_LAZY
| FSYNC_NOLOG
)) != 0)
2156 * Don't flush the log if the vnode being flushed
2157 * contains no dirty buffers that could be in the log.
2159 if (!LIST_EMPTY(&vp
->v_dirtyblkhd
)) {
2160 error
= wapbl_flush(mp
->mnt_wapbl
, 0);
2165 if ((flags
& FSYNC_WAIT
) != 0) {
2166 mutex_enter(vp
->v_interlock
);
2167 while (vp
->v_numoutput
)
2168 cv_wait(&vp
->v_cv
, vp
->v_interlock
);
2169 mutex_exit(vp
->v_interlock
);
2176 error
= vflushbuf(vp
, flags
);
2177 if (error
== 0 && (flags
& FSYNC_CACHE
) != 0) {
2179 (void)VOP_IOCTL(vp
, DIOCCACHESYNC
, &i
, FWRITE
,