1 /* $NetBSD: ffs_vfsops.c,v 1.271 2011/11/14 18:35:14 hannken Exp $ */
4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Wasabi Systems, Inc, and by Andrew Doran.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
33 * Copyright (c) 1989, 1991, 1993, 1994
34 * The Regents of the University of California. All rights reserved.
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * @(#)ffs_vfsops.c 8.31 (Berkeley) 5/20/95
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.271 2011/11/14 18:35:14 hannken Exp $");
66 #if defined(_KERNEL_OPT)
68 #include "opt_quota.h"
69 #include "opt_wapbl.h"
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/namei.h>
76 #include <sys/kernel.h>
77 #include <sys/vnode.h>
78 #include <sys/socket.h>
79 #include <sys/mount.h>
81 #include <sys/device.h>
85 #include <sys/disklabel.h>
86 #include <sys/ioctl.h>
87 #include <sys/errno.h>
88 #include <sys/malloc.h>
91 #include <sys/sysctl.h>
93 #include <sys/kauth.h>
94 #include <sys/wapbl.h>
95 #include <sys/fstrans.h>
96 #include <sys/module.h>
98 #include <miscfs/genfs/genfs.h>
99 #include <miscfs/specfs/specdev.h>
101 #include <ufs/ufs/quota.h>
102 #include <ufs/ufs/ufsmount.h>
103 #include <ufs/ufs/inode.h>
104 #include <ufs/ufs/dir.h>
105 #include <ufs/ufs/ufs_extern.h>
106 #include <ufs/ufs/ufs_bswap.h>
107 #include <ufs/ufs/ufs_wapbl.h>
109 #include <ufs/ffs/fs.h>
110 #include <ufs/ffs/ffs_extern.h>
112 MODULE(MODULE_CLASS_VFS
, ffs
, NULL
);
114 static int ffs_vfs_fsync(vnode_t
*, int);
116 static struct sysctllog
*ffs_sysctl_log
;
118 /* how many times ffs_init() was called */
119 int ffs_initcount
= 0;
121 extern const struct vnodeopv_desc ffs_vnodeop_opv_desc
;
122 extern const struct vnodeopv_desc ffs_specop_opv_desc
;
123 extern const struct vnodeopv_desc ffs_fifoop_opv_desc
;
125 const struct vnodeopv_desc
* const ffs_vnodeopv_descs
[] = {
126 &ffs_vnodeop_opv_desc
,
127 &ffs_specop_opv_desc
,
128 &ffs_fifoop_opv_desc
,
132 struct vfsops ffs_vfsops
= {
134 sizeof (struct ufs_args
),
152 genfs_renamelock_enter
,
153 genfs_renamelock_exit
,
160 static const struct genfs_ops ffs_genfsops
= {
161 .gop_size
= ffs_gop_size
,
162 .gop_alloc
= ufs_gop_alloc
,
163 .gop_write
= genfs_gop_write
,
164 .gop_markupdate
= ufs_gop_markupdate
,
167 static const struct ufs_ops ffs_ufsops
= {
168 .uo_itimes
= ffs_itimes
,
169 .uo_update
= ffs_update
,
170 .uo_truncate
= ffs_truncate
,
171 .uo_valloc
= ffs_valloc
,
172 .uo_vfree
= ffs_vfree
,
173 .uo_balloc
= ffs_balloc
,
174 .uo_unmark_vnode
= (void (*)(vnode_t
*))nullop
,
178 ffs_modcmd(modcmd_t cmd
, void *arg
)
183 extern int doasyncfree
;
186 extern int ufs_extattr_autocreate
;
188 extern int ffs_log_changeopt
;
191 case MODULE_CMD_INIT
:
192 error
= vfs_attach(&ffs_vfsops
);
196 sysctl_createv(&ffs_sysctl_log
, 0, NULL
, NULL
,
198 CTLTYPE_NODE
, "vfs", NULL
,
201 sysctl_createv(&ffs_sysctl_log
, 0, NULL
, NULL
,
204 SYSCTL_DESCR("Berkeley Fast File System"),
206 CTL_VFS
, 1, CTL_EOL
);
208 * @@@ should we even bother with these first three?
210 sysctl_createv(&ffs_sysctl_log
, 0, NULL
, NULL
,
211 CTLFLAG_PERMANENT
|CTLFLAG_READWRITE
,
212 CTLTYPE_INT
, "doclusterread", NULL
,
213 sysctl_notavail
, 0, NULL
, 0,
214 CTL_VFS
, 1, FFS_CLUSTERREAD
, CTL_EOL
);
215 sysctl_createv(&ffs_sysctl_log
, 0, NULL
, NULL
,
216 CTLFLAG_PERMANENT
|CTLFLAG_READWRITE
,
217 CTLTYPE_INT
, "doclusterwrite", NULL
,
218 sysctl_notavail
, 0, NULL
, 0,
219 CTL_VFS
, 1, FFS_CLUSTERWRITE
, CTL_EOL
);
220 sysctl_createv(&ffs_sysctl_log
, 0, NULL
, NULL
,
221 CTLFLAG_PERMANENT
|CTLFLAG_READWRITE
,
222 CTLTYPE_INT
, "doreallocblks", NULL
,
223 sysctl_notavail
, 0, NULL
, 0,
224 CTL_VFS
, 1, FFS_REALLOCBLKS
, CTL_EOL
);
226 sysctl_createv(&ffs_sysctl_log
, 0, NULL
, NULL
,
227 CTLFLAG_PERMANENT
|CTLFLAG_READWRITE
,
228 CTLTYPE_INT
, "doasyncfree",
229 SYSCTL_DESCR("Release dirty blocks asynchronously"),
230 NULL
, 0, &doasyncfree
, 0,
231 CTL_VFS
, 1, FFS_ASYNCFREE
, CTL_EOL
);
233 sysctl_createv(&ffs_sysctl_log
, 0, NULL
, NULL
,
234 CTLFLAG_PERMANENT
|CTLFLAG_READWRITE
,
235 CTLTYPE_INT
, "log_changeopt",
236 SYSCTL_DESCR("Log changes in optimization strategy"),
237 NULL
, 0, &ffs_log_changeopt
, 0,
238 CTL_VFS
, 1, FFS_LOG_CHANGEOPT
, CTL_EOL
);
240 sysctl_createv(&ffs_sysctl_log
, 0, NULL
, NULL
,
241 CTLFLAG_PERMANENT
|CTLFLAG_READWRITE
,
242 CTLTYPE_INT
, "extattr_autocreate",
243 SYSCTL_DESCR("Size of attribute for "
244 "backing file autocreation"),
245 NULL
, 0, &ufs_extattr_autocreate
, 0,
246 CTL_VFS
, 1, FFS_EXTATTR_AUTOCREATE
, CTL_EOL
);
248 #endif /* UFS_EXTATTR */
251 case MODULE_CMD_FINI
:
252 error
= vfs_detach(&ffs_vfsops
);
255 sysctl_teardown(&ffs_sysctl_log
);
265 pool_cache_t ffs_inode_cache
;
266 pool_cache_t ffs_dinode1_cache
;
267 pool_cache_t ffs_dinode2_cache
;
269 static void ffs_oldfscompat_read(struct fs
*, struct ufsmount
*, daddr_t
);
270 static void ffs_oldfscompat_write(struct fs
*, struct ufsmount
*);
273 * Called by main() when ffs is going to be mounted as root.
281 struct lwp
*l
= curlwp
; /* XXX */
282 struct ufsmount
*ump
;
285 if (device_class(root_device
) != DV_DISK
)
288 if ((error
= vfs_rootmountalloc(MOUNT_FFS
, "root_device", &mp
))) {
294 * We always need to be able to mount the root file system.
296 mp
->mnt_flag
|= MNT_FORCE
;
297 if ((error
= ffs_mountfs(rootvp
, mp
, l
)) != 0) {
298 vfs_unbusy(mp
, false, NULL
);
302 mp
->mnt_flag
&= ~MNT_FORCE
;
303 mutex_enter(&mountlist_lock
);
304 CIRCLEQ_INSERT_TAIL(&mountlist
, mp
, mnt_list
);
305 mutex_exit(&mountlist_lock
);
308 memset(fs
->fs_fsmnt
, 0, sizeof(fs
->fs_fsmnt
));
309 (void)copystr(mp
->mnt_stat
.f_mntonname
, fs
->fs_fsmnt
, MNAMELEN
- 1, 0);
310 (void)ffs_statvfs(mp
, &mp
->mnt_stat
);
311 vfs_unbusy(mp
, false, NULL
);
312 setrootfstime((time_t)fs
->fs_time
);
322 ffs_mount(struct mount
*mp
, const char *path
, void *data
, size_t *data_len
)
324 struct lwp
*l
= curlwp
;
325 struct vnode
*devvp
= NULL
;
326 struct ufs_args
*args
= data
;
327 struct ufsmount
*ump
= NULL
;
329 int error
= 0, flags
, update
;
332 if (*data_len
< sizeof *args
)
335 if (mp
->mnt_flag
& MNT_GETARGS
) {
340 *data_len
= sizeof *args
;
344 update
= mp
->mnt_flag
& MNT_UPDATE
;
346 /* Check arguments */
347 if (args
->fspec
!= NULL
) {
349 * Look up the name and verify that it's sane.
351 error
= namei_simple_user(args
->fspec
,
352 NSM_FOLLOW_NOEMULROOT
, &devvp
);
358 * Be sure this is a valid block device
360 if (devvp
->v_type
!= VBLK
)
362 else if (bdevsw_lookup(devvp
->v_rdev
) == NULL
)
366 * Be sure we're still naming the same device
367 * used for our initial mount
370 if (devvp
!= ump
->um_devvp
) {
371 if (devvp
->v_rdev
!= ump
->um_devvp
->v_rdev
)
375 devvp
= ump
->um_devvp
;
382 /* New mounts must have a filename for the device */
385 /* Use the extant mount */
387 devvp
= ump
->um_devvp
;
393 * If mount by non-root, then verify that user has necessary
394 * permissions on the device.
396 * Permission to update a mount is checked higher, so here we presume
397 * updating the mount is okay (for example, as far as securelevel goes)
398 * which leaves us with the normal check.
403 (mp
->mnt_iflag
& IMNT_WANTRDWR
) != 0 :
404 (mp
->mnt_flag
& MNT_RDONLY
) == 0)
405 accessmode
|= VWRITE
;
406 vn_lock(devvp
, LK_EXCLUSIVE
| LK_RETRY
);
407 error
= genfs_can_mount(devvp
, accessmode
, l
->l_cred
);
417 /* WAPBL can only be enabled on a r/w mount. */
418 if ((mp
->mnt_flag
& MNT_RDONLY
) && !(mp
->mnt_iflag
& IMNT_WANTRDWR
)) {
419 mp
->mnt_flag
&= ~MNT_LOG
;
422 mp
->mnt_flag
&= ~MNT_LOG
;
428 if (mp
->mnt_flag
& MNT_RDONLY
)
431 xflags
= FREAD
| FWRITE
;
432 vn_lock(devvp
, LK_EXCLUSIVE
| LK_RETRY
);
433 error
= VOP_OPEN(devvp
, xflags
, FSCRED
);
437 error
= ffs_mountfs(devvp
, mp
, l
);
439 vn_lock(devvp
, LK_EXCLUSIVE
| LK_RETRY
);
440 (void)VOP_CLOSE(devvp
, xflags
, NOCRED
);
453 * The initial mount got a reference on this
454 * device, so drop the one obtained via
461 if (fs
->fs_ronly
== 0 && (mp
->mnt_flag
& MNT_RDONLY
)) {
463 * Changing from r/w to r/o
466 if (mp
->mnt_flag
& MNT_FORCE
)
468 error
= ffs_flushfiles(mp
, flags
, l
);
470 error
= UFS_WAPBL_BEGIN(mp
);
472 ffs_cgupdate(ump
, MNT_WAIT
) == 0 &&
473 fs
->fs_clean
& FS_WASCLEAN
) {
474 if (mp
->mnt_flag
& MNT_SOFTDEP
)
475 fs
->fs_flags
&= ~FS_DOSOFTDEP
;
476 fs
->fs_clean
= FS_ISCLEAN
;
477 (void) ffs_sbupdate(ump
, MNT_WAIT
);
486 if ((mp
->mnt_flag
& MNT_LOG
) == 0) {
487 error
= ffs_wapbl_stop(mp
, mp
->mnt_flag
& MNT_FORCE
);
493 if (fs
->fs_ronly
== 0 && (mp
->mnt_flag
& MNT_RDONLY
)) {
495 * Finish change from r/w to r/o
501 if (mp
->mnt_flag
& MNT_RELOAD
) {
502 error
= ffs_reload(mp
, l
->l_cred
, l
);
507 if (fs
->fs_ronly
&& (mp
->mnt_iflag
& IMNT_WANTRDWR
)) {
509 * Changing from read-only to read/write
512 if (fs
->fs_flags
& FS_DOQUOTA2
) {
513 ump
->um_flags
|= UFS_QUOTA2
;
514 uprintf("%s: options QUOTA2 not enabled%s\n",
515 mp
->mnt_stat
.f_mntonname
,
516 (mp
->mnt_flag
& MNT_FORCE
) ? "" :
525 if (fs
->fs_flags
& FS_DOWAPBL
) {
526 printf("%s: replaying log to disk\n",
528 KDASSERT(mp
->mnt_wapbl_replay
);
529 error
= wapbl_replay_write(mp
->mnt_wapbl_replay
,
534 wapbl_replay_stop(mp
->mnt_wapbl_replay
);
535 fs
->fs_clean
= FS_WASCLEAN
;
538 if (fs
->fs_snapinum
[0] != 0)
539 ffs_snapshot_mount(mp
);
543 error
= ffs_wapbl_start(mp
);
550 error
= ffs_quota2_mount(mp
);
556 if (args
->fspec
== NULL
)
560 error
= set_statvfs_info(path
, UIO_USERSPACE
, args
->fspec
,
561 UIO_USERSPACE
, mp
->mnt_op
->vfs_name
, mp
, l
);
563 (void)strncpy(fs
->fs_fsmnt
, mp
->mnt_stat
.f_mntonname
,
564 sizeof(fs
->fs_fsmnt
));
565 fs
->fs_flags
&= ~FS_DOSOFTDEP
;
566 if (fs
->fs_fmod
!= 0) { /* XXX */
570 if (fs
->fs_clean
& FS_WASCLEAN
)
571 fs
->fs_time
= time_second
;
573 printf("%s: file system not clean (fs_clean=%#x); "
574 "please fsck(8)\n", mp
->mnt_stat
.f_mntfromname
,
576 printf("%s: lost blocks %" PRId64
" files %d\n",
577 mp
->mnt_stat
.f_mntfromname
, fs
->fs_pendingblocks
,
578 fs
->fs_pendinginodes
);
580 err
= UFS_WAPBL_BEGIN(mp
);
582 (void) ffs_cgupdate(ump
, MNT_WAIT
);
586 if ((mp
->mnt_flag
& MNT_SOFTDEP
) != 0) {
587 printf("%s: `-o softdep' is no longer supported, "
588 "consider `-o log'\n", mp
->mnt_stat
.f_mntfromname
);
589 mp
->mnt_flag
&= ~MNT_SOFTDEP
;
600 * Reload all incore data for a filesystem (used after running fsck on
601 * the root filesystem and finding things to fix). The filesystem must
602 * be mounted read-only.
604 * Things to do to update the mount:
605 * 1) invalidate all cached meta-data.
606 * 2) re-read superblock from disk.
607 * 3) re-read summary information from disk.
608 * 4) invalidate all inactive vnodes.
609 * 5) invalidate all cached file data.
610 * 6) re-read inode data for all active vnodes.
613 ffs_reload(struct mount
*mp
, kauth_cred_t cred
, struct lwp
*l
)
615 struct vnode
*vp
, *mvp
, *devvp
;
619 struct fs
*fs
, *newfs
;
620 struct dkwedge_info dkw
;
621 int i
, bsize
, blks
, error
;
623 struct ufsmount
*ump
;
626 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0)
631 * Step 1: invalidate all cached meta-data.
633 devvp
= ump
->um_devvp
;
634 vn_lock(devvp
, LK_EXCLUSIVE
| LK_RETRY
);
635 error
= vinvalbuf(devvp
, 0, cred
, l
, 0, 0);
638 panic("ffs_reload: dirty1");
640 * Step 2: re-read superblock from disk.
644 /* XXX we don't handle possibility that superblock moved. */
645 error
= bread(devvp
, fs
->fs_sblockloc
/ DEV_BSIZE
, fs
->fs_sbsize
,
651 newfs
= malloc(fs
->fs_sbsize
, M_UFSMNT
, M_WAITOK
);
652 memcpy(newfs
, bp
->b_data
, fs
->fs_sbsize
);
654 if (ump
->um_flags
& UFS_NEEDSWAP
) {
655 ffs_sb_swap((struct fs
*)bp
->b_data
, newfs
);
656 fs
->fs_flags
|= FS_SWAPPED
;
659 fs
->fs_flags
&= ~FS_SWAPPED
;
660 if ((newfs
->fs_magic
!= FS_UFS1_MAGIC
&&
661 newfs
->fs_magic
!= FS_UFS2_MAGIC
)||
662 newfs
->fs_bsize
> MAXBSIZE
||
663 newfs
->fs_bsize
< sizeof(struct fs
)) {
665 free(newfs
, M_UFSMNT
);
666 return (EIO
); /* XXX needs translation */
668 /* Store off old fs_sblockloc for fs_oldfscompat_read. */
669 sblockloc
= fs
->fs_sblockloc
;
671 * Copy pointer fields back into superblock before copying in XXX
672 * new superblock. These should really be in the ufsmount. XXX
673 * Note that important parameters (eg fs_ncg) are unchanged.
675 newfs
->fs_csp
= fs
->fs_csp
;
676 newfs
->fs_maxcluster
= fs
->fs_maxcluster
;
677 newfs
->fs_contigdirs
= fs
->fs_contigdirs
;
678 newfs
->fs_ronly
= fs
->fs_ronly
;
679 newfs
->fs_active
= fs
->fs_active
;
680 memcpy(fs
, newfs
, (u_int
)fs
->fs_sbsize
);
682 free(newfs
, M_UFSMNT
);
684 /* Recheck for apple UFS filesystem */
685 ump
->um_flags
&= ~UFS_ISAPPLEUFS
;
686 /* First check to see if this is tagged as an Apple UFS filesystem
689 if (getdiskinfo(devvp
, &dkw
) == 0 &&
690 strcmp(dkw
.dkw_ptype
, DKW_PTYPE_APPLEUFS
) == 0)
691 ump
->um_flags
|= UFS_ISAPPLEUFS
;
694 /* Manually look for an apple ufs label, and if a valid one
695 * is found, then treat it like an Apple UFS filesystem anyway
697 * EINVAL is most probably a blocksize or alignment problem,
698 * it is unlikely that this is an Apple UFS filesystem then.
700 error
= bread(devvp
, (daddr_t
)(APPLEUFS_LABEL_OFFSET
/ DEV_BSIZE
),
701 APPLEUFS_LABEL_SIZE
, cred
, 0, &bp
);
702 if (error
&& error
!= EINVAL
) {
707 error
= ffs_appleufs_validate(fs
->fs_fsmnt
,
708 (struct appleufslabel
*)bp
->b_data
, NULL
);
710 ump
->um_flags
|= UFS_ISAPPLEUFS
;
716 if (ump
->um_flags
& UFS_ISAPPLEUFS
)
720 if (UFS_MPISAPPLEUFS(ump
)) {
721 /* see comment about NeXT below */
722 ump
->um_maxsymlinklen
= APPLEUFS_MAXSYMLINKLEN
;
723 ump
->um_dirblksiz
= APPLEUFS_DIRBLKSIZ
;
724 mp
->mnt_iflag
|= IMNT_DTYPE
;
726 ump
->um_maxsymlinklen
= fs
->fs_maxsymlinklen
;
727 ump
->um_dirblksiz
= DIRBLKSIZ
;
728 if (ump
->um_maxsymlinklen
> 0)
729 mp
->mnt_iflag
|= IMNT_DTYPE
;
731 mp
->mnt_iflag
&= ~IMNT_DTYPE
;
733 ffs_oldfscompat_read(fs
, ump
, sblockloc
);
735 mutex_enter(&ump
->um_lock
);
736 ump
->um_maxfilesize
= fs
->fs_maxfilesize
;
737 if (fs
->fs_flags
& ~(FS_KNOWN_FLAGS
| FS_INTERNAL
)) {
738 uprintf("%s: unknown ufs flags: 0x%08"PRIx32
"%s\n",
739 mp
->mnt_stat
.f_mntonname
, fs
->fs_flags
,
740 (mp
->mnt_flag
& MNT_FORCE
) ? "" : ", not mounting");
741 if ((mp
->mnt_flag
& MNT_FORCE
) == 0) {
742 mutex_exit(&ump
->um_lock
);
746 if (fs
->fs_pendingblocks
!= 0 || fs
->fs_pendinginodes
!= 0) {
747 fs
->fs_pendingblocks
= 0;
748 fs
->fs_pendinginodes
= 0;
750 mutex_exit(&ump
->um_lock
);
752 ffs_statvfs(mp
, &mp
->mnt_stat
);
754 * Step 3: re-read summary information from disk.
756 blks
= howmany(fs
->fs_cssize
, fs
->fs_fsize
);
758 for (i
= 0; i
< blks
; i
+= fs
->fs_frag
) {
759 bsize
= fs
->fs_bsize
;
760 if (i
+ fs
->fs_frag
> blks
)
761 bsize
= (blks
- i
) * fs
->fs_fsize
;
762 error
= bread(devvp
, fsbtodb(fs
, fs
->fs_csaddr
+ i
), bsize
,
769 if (UFS_FSNEEDSWAP(fs
))
770 ffs_csum_swap((struct csum
*)bp
->b_data
,
771 (struct csum
*)space
, bsize
);
774 memcpy(space
, bp
->b_data
, (size_t)bsize
);
775 space
= (char *)space
+ bsize
;
778 if (fs
->fs_snapinum
[0] != 0)
779 ffs_snapshot_mount(mp
);
781 * We no longer know anything about clusters per cylinder group.
783 if (fs
->fs_contigsumsize
> 0) {
784 lp
= fs
->fs_maxcluster
;
785 for (i
= 0; i
< fs
->fs_ncg
; i
++)
786 *lp
++ = fs
->fs_contigsumsize
;
789 /* Allocate a marker vnode. */
792 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
793 * and vclean() can be called indirectly
795 mutex_enter(&mntvnode_lock
);
797 for (vp
= TAILQ_FIRST(&mp
->mnt_vnodelist
); vp
; vp
= vunmark(mvp
)) {
799 if (vp
->v_mount
!= mp
|| vismarker(vp
))
802 * Step 4: invalidate all inactive vnodes.
804 if (vrecycle(vp
, &mntvnode_lock
, l
)) {
805 mutex_enter(&mntvnode_lock
);
810 * Step 5: invalidate all cached file data.
812 mutex_enter(vp
->v_interlock
);
813 mutex_exit(&mntvnode_lock
);
814 if (vget(vp
, LK_EXCLUSIVE
)) {
818 if (vinvalbuf(vp
, 0, cred
, l
, 0, 0))
819 panic("ffs_reload: dirty2");
821 * Step 6: re-read inode data for all active vnodes.
824 error
= bread(devvp
, fsbtodb(fs
, ino_to_fsba(fs
, ip
->i_number
)),
825 (int)fs
->fs_bsize
, NOCRED
, 0, &bp
);
832 ffs_load_inode(bp
, ip
, fs
, ip
->i_number
);
835 mutex_enter(&mntvnode_lock
);
837 mutex_exit(&mntvnode_lock
);
843 * Possible superblock locations ordered from most to least likely.
845 static const int sblock_try
[] = SBLOCKSEARCH
;
848 * Common code for mount and mountroot
851 ffs_mountfs(struct vnode
*devvp
, struct mount
*mp
, struct lwp
*l
)
853 struct ufsmount
*ump
;
857 struct dkwedge_info dkw
;
859 daddr_t sblockloc
, fsblockloc
;
861 int error
, i
, bsize
, ronly
, bset
= 0;
863 int needswap
= 0; /* keep gcc happy */
867 u_int32_t sbsize
= 8192; /* keep gcc happy*/
871 cred
= l
? l
->l_cred
: NOCRED
;
873 /* Flush out any old buffers remaining from a previous use. */
874 vn_lock(devvp
, LK_EXCLUSIVE
| LK_RETRY
);
875 error
= vinvalbuf(devvp
, V_SAVE
, cred
, l
, 0, 0);
880 ronly
= (mp
->mnt_flag
& MNT_RDONLY
) != 0;
888 error
= fstrans_mount(mp
);
892 ump
= malloc(sizeof *ump
, M_UFSMNT
, M_WAITOK
);
893 memset(ump
, 0, sizeof *ump
);
894 mutex_init(&ump
->um_lock
, MUTEX_DEFAULT
, IPL_NONE
);
895 error
= ffs_snapshot_init(ump
);
898 ump
->um_ops
= &ffs_ufsops
;
904 * Try reading the superblock in each of its possible locations.
908 brelse(bp
, BC_NOCACHE
);
911 if (sblock_try
[i
] == -1) {
916 error
= bread(devvp
, sblock_try
[i
] / DEV_BSIZE
, SBLOCKSIZE
, cred
,
922 fs
= (struct fs
*)bp
->b_data
;
923 fsblockloc
= sblockloc
= sblock_try
[i
];
924 if (fs
->fs_magic
== FS_UFS1_MAGIC
) {
925 sbsize
= fs
->fs_sbsize
;
927 fsbsize
= fs
->fs_bsize
;
930 } else if (fs
->fs_magic
== bswap32(FS_UFS1_MAGIC
)) {
931 sbsize
= bswap32(fs
->fs_sbsize
);
933 fsbsize
= bswap32(fs
->fs_bsize
);
936 } else if (fs
->fs_magic
== FS_UFS2_MAGIC
) {
937 sbsize
= fs
->fs_sbsize
;
939 fsbsize
= fs
->fs_bsize
;
942 } else if (fs
->fs_magic
== bswap32(FS_UFS2_MAGIC
)) {
943 sbsize
= bswap32(fs
->fs_sbsize
);
945 fsbsize
= bswap32(fs
->fs_bsize
);
952 /* fs->fs_sblockloc isn't defined for old filesystems */
953 if (fstype
== UFS1
&& !(fs
->fs_old_flags
& FS_FLAGS_UPDATED
)) {
954 if (sblockloc
== SBLOCK_UFS2
)
956 * This is likely to be the first alternate
957 * in a filesystem with 64k blocks.
961 fsblockloc
= sblockloc
;
963 fsblockloc
= fs
->fs_sblockloc
;
966 fsblockloc
= bswap64(fsblockloc
);
970 /* Check we haven't found an alternate superblock */
971 if (fsblockloc
!= sblockloc
)
974 /* Validate size of superblock */
975 if (sbsize
> MAXBSIZE
|| sbsize
< sizeof(struct fs
))
978 /* Check that we can handle the file system blocksize */
979 if (fsbsize
> MAXBSIZE
) {
980 printf("ffs_mountfs: block size (%d) > MAXBSIZE (%d)\n",
985 /* Ok seems to be a good superblock */
989 fs
= malloc((u_long
)sbsize
, M_UFSMNT
, M_WAITOK
);
990 memcpy(fs
, bp
->b_data
, sbsize
);
995 ffs_sb_swap((struct fs
*)bp
->b_data
, fs
);
996 fs
->fs_flags
|= FS_SWAPPED
;
999 fs
->fs_flags
&= ~FS_SWAPPED
;
1002 if ((mp
->mnt_wapbl_replay
== 0) && (fs
->fs_flags
& FS_DOWAPBL
)) {
1003 error
= ffs_wapbl_replay_start(mp
, fs
, devvp
);
1004 if (error
&& (mp
->mnt_flag
& MNT_FORCE
) == 0)
1008 /* XXX fsmnt may be stale. */
1009 printf("%s: replaying log to disk\n",
1011 error
= wapbl_replay_write(mp
->mnt_wapbl_replay
,
1015 wapbl_replay_stop(mp
->mnt_wapbl_replay
);
1016 fs
->fs_clean
= FS_WASCLEAN
;
1018 /* XXX fsmnt may be stale */
1019 printf("%s: replaying log to memory\n",
1023 /* Force a re-read of the superblock */
1024 brelse(bp
, BC_INVAL
);
1032 if ((fs
->fs_flags
& FS_DOWAPBL
) && (mp
->mnt_flag
& MNT_FORCE
) == 0) {
1038 ffs_oldfscompat_read(fs
, ump
, sblockloc
);
1039 ump
->um_maxfilesize
= fs
->fs_maxfilesize
;
1041 if (fs
->fs_flags
& ~(FS_KNOWN_FLAGS
| FS_INTERNAL
)) {
1042 uprintf("%s: unknown ufs flags: 0x%08"PRIx32
"%s\n",
1043 mp
->mnt_stat
.f_mntonname
, fs
->fs_flags
,
1044 (mp
->mnt_flag
& MNT_FORCE
) ? "" : ", not mounting");
1045 if ((mp
->mnt_flag
& MNT_FORCE
) == 0) {
1051 if (fs
->fs_pendingblocks
!= 0 || fs
->fs_pendinginodes
!= 0) {
1052 fs
->fs_pendingblocks
= 0;
1053 fs
->fs_pendinginodes
= 0;
1056 ump
->um_fstype
= fstype
;
1057 if (fs
->fs_sbsize
< SBLOCKSIZE
)
1058 brelse(bp
, BC_INVAL
);
1063 /* First check to see if this is tagged as an Apple UFS filesystem
1066 if (getdiskinfo(devvp
, &dkw
) == 0 &&
1067 strcmp(dkw
.dkw_ptype
, DKW_PTYPE_APPLEUFS
) == 0)
1068 ump
->um_flags
|= UFS_ISAPPLEUFS
;
1071 /* Manually look for an apple ufs label, and if a valid one
1072 * is found, then treat it like an Apple UFS filesystem anyway
1074 error
= bread(devvp
, (daddr_t
)(APPLEUFS_LABEL_OFFSET
/ DEV_BSIZE
),
1075 APPLEUFS_LABEL_SIZE
, cred
, 0, &bp
);
1078 error
= ffs_appleufs_validate(fs
->fs_fsmnt
,
1079 (struct appleufslabel
*)bp
->b_data
, NULL
);
1081 ump
->um_flags
|= UFS_ISAPPLEUFS
;
1087 if (ump
->um_flags
& UFS_ISAPPLEUFS
) {
1095 * XXX This code changes the behaviour of mounting dirty filesystems, to
1096 * XXX require "mount -f ..." to mount them. This doesn't match what
1097 * XXX mount(8) describes and is disabled for now.
1100 * If the file system is not clean, don't allow it to be mounted
1101 * unless MNT_FORCE is specified. (Note: MNT_FORCE is always set
1102 * for the root file system.)
1104 if (fs
->fs_flags
& FS_DOWAPBL
) {
1106 * wapbl normally expects to be FS_WASCLEAN when the FS_DOWAPBL
1107 * bit is set, although there's a window in unmount where it
1108 * could be FS_ISCLEAN
1110 if ((mp
->mnt_flag
& MNT_FORCE
) == 0 &&
1111 (fs
->fs_clean
& (FS_WASCLEAN
| FS_ISCLEAN
)) == 0) {
1116 if ((fs
->fs_clean
& FS_ISCLEAN
) == 0 &&
1117 (mp
->mnt_flag
& MNT_FORCE
) == 0) {
1124 * verify that we can access the last block in the fs
1125 * if we're mounting read/write.
1129 error
= bread(devvp
, fsbtodb(fs
, fs
->fs_size
- 1), fs
->fs_fsize
,
1131 if (bp
->b_bcount
!= fs
->fs_fsize
)
1137 brelse(bp
, BC_INVAL
);
1141 fs
->fs_ronly
= ronly
;
1142 /* Don't bump fs_clean if we're replaying journal */
1143 if (!((fs
->fs_flags
& FS_DOWAPBL
) && (fs
->fs_clean
& FS_WASCLEAN
)))
1148 bsize
= fs
->fs_cssize
;
1149 blks
= howmany(bsize
, fs
->fs_fsize
);
1150 if (fs
->fs_contigsumsize
> 0)
1151 bsize
+= fs
->fs_ncg
* sizeof(int32_t);
1152 bsize
+= fs
->fs_ncg
* sizeof(*fs
->fs_contigdirs
);
1153 space
= malloc((u_long
)bsize
, M_UFSMNT
, M_WAITOK
);
1155 for (i
= 0; i
< blks
; i
+= fs
->fs_frag
) {
1156 bsize
= fs
->fs_bsize
;
1157 if (i
+ fs
->fs_frag
> blks
)
1158 bsize
= (blks
- i
) * fs
->fs_fsize
;
1159 error
= bread(devvp
, fsbtodb(fs
, fs
->fs_csaddr
+ i
), bsize
,
1162 free(fs
->fs_csp
, M_UFSMNT
);
1167 ffs_csum_swap((struct csum
*)bp
->b_data
,
1168 (struct csum
*)space
, bsize
);
1171 memcpy(space
, bp
->b_data
, (u_int
)bsize
);
1173 space
= (char *)space
+ bsize
;
1177 if (fs
->fs_contigsumsize
> 0) {
1178 fs
->fs_maxcluster
= lp
= space
;
1179 for (i
= 0; i
< fs
->fs_ncg
; i
++)
1180 *lp
++ = fs
->fs_contigsumsize
;
1183 bsize
= fs
->fs_ncg
* sizeof(*fs
->fs_contigdirs
);
1184 fs
->fs_contigdirs
= space
;
1185 space
= (char *)space
+ bsize
;
1186 memset(fs
->fs_contigdirs
, 0, bsize
);
1187 /* Compatibility for old filesystems - XXX */
1188 if (fs
->fs_avgfilesize
<= 0)
1189 fs
->fs_avgfilesize
= AVFILESIZ
;
1190 if (fs
->fs_avgfpdir
<= 0)
1191 fs
->fs_avgfpdir
= AFPDIR
;
1192 fs
->fs_active
= NULL
;
1194 mp
->mnt_stat
.f_fsidx
.__fsid_val
[0] = (long)dev
;
1195 mp
->mnt_stat
.f_fsidx
.__fsid_val
[1] = makefstype(MOUNT_FFS
);
1196 mp
->mnt_stat
.f_fsid
= mp
->mnt_stat
.f_fsidx
.__fsid_val
[0];
1197 mp
->mnt_stat
.f_namemax
= FFS_MAXNAMLEN
;
1198 if (UFS_MPISAPPLEUFS(ump
)) {
1199 /* NeXT used to keep short symlinks in the inode even
1200 * when using FS_42INODEFMT. In that case fs->fs_maxsymlinklen
1201 * is probably -1, but we still need to be able to identify
1204 ump
->um_maxsymlinklen
= APPLEUFS_MAXSYMLINKLEN
;
1205 ump
->um_dirblksiz
= APPLEUFS_DIRBLKSIZ
;
1206 mp
->mnt_iflag
|= IMNT_DTYPE
;
1208 ump
->um_maxsymlinklen
= fs
->fs_maxsymlinklen
;
1209 ump
->um_dirblksiz
= DIRBLKSIZ
;
1210 if (ump
->um_maxsymlinklen
> 0)
1211 mp
->mnt_iflag
|= IMNT_DTYPE
;
1213 mp
->mnt_iflag
&= ~IMNT_DTYPE
;
1215 mp
->mnt_fs_bshift
= fs
->fs_bshift
;
1216 mp
->mnt_dev_bshift
= DEV_BSHIFT
; /* XXX */
1217 mp
->mnt_flag
|= MNT_LOCAL
;
1218 mp
->mnt_iflag
|= IMNT_MPSAFE
;
1221 ump
->um_flags
|= UFS_NEEDSWAP
;
1223 ump
->um_mountp
= mp
;
1225 ump
->um_devvp
= devvp
;
1226 ump
->um_nindir
= fs
->fs_nindir
;
1227 ump
->um_lognindir
= ffs(fs
->fs_nindir
) - 1;
1228 ump
->um_bptrtodb
= fs
->fs_fshift
- DEV_BSHIFT
;
1229 ump
->um_seqinc
= fs
->fs_frag
;
1230 for (i
= 0; i
< MAXQUOTAS
; i
++)
1231 ump
->um_quotas
[i
] = NULLVP
;
1232 devvp
->v_specmountpoint
= mp
;
1233 if (ronly
== 0 && fs
->fs_snapinum
[0] != 0)
1234 ffs_snapshot_mount(mp
);
1237 KDASSERT(fs
->fs_ronly
== 0);
1239 * ffs_wapbl_start() needs mp->mnt_stat initialised if it
1240 * needs to create a new log file in-filesystem.
1242 ffs_statvfs(mp
, &mp
->mnt_stat
);
1244 error
= ffs_wapbl_start(mp
);
1246 free(fs
->fs_csp
, M_UFSMNT
);
1253 error
= ffs_quota2_mount(mp
);
1255 free(fs
->fs_csp
, M_UFSMNT
);
1259 if (fs
->fs_flags
& FS_DOQUOTA2
) {
1260 ump
->um_flags
|= UFS_QUOTA2
;
1261 uprintf("%s: options QUOTA2 not enabled%s\n",
1262 mp
->mnt_stat
.f_mntonname
,
1263 (mp
->mnt_flag
& MNT_FORCE
) ? "" : ", not mounting");
1264 if ((mp
->mnt_flag
& MNT_FORCE
) == 0) {
1266 free(fs
->fs_csp
, M_UFSMNT
);
1274 * Initialize file-backed extended attributes on UFS1 file
1277 if (ump
->um_fstype
== UFS1
)
1278 ufs_extattr_uepm_init(&ump
->um_extattr
);
1279 #endif /* UFS_EXTATTR */
1284 if (mp
->mnt_wapbl_replay
) {
1285 wapbl_replay_stop(mp
->mnt_wapbl_replay
);
1286 wapbl_replay_free(mp
->mnt_wapbl_replay
);
1287 mp
->mnt_wapbl_replay
= 0;
1291 fstrans_unmount(mp
);
1294 devvp
->v_specmountpoint
= NULL
;
1298 if (ump
->um_oldfscompat
)
1299 free(ump
->um_oldfscompat
, M_UFSMNT
);
1300 mutex_destroy(&ump
->um_lock
);
1301 free(ump
, M_UFSMNT
);
1302 mp
->mnt_data
= NULL
;
1308 * Sanity checks for loading old filesystem superblocks.
1309 * See ffs_oldfscompat_write below for unwound actions.
1311 * XXX - Parts get retired eventually.
1312 * Unfortunately new bits get added.
1315 ffs_oldfscompat_read(struct fs
*fs
, struct ufsmount
*ump
, daddr_t sblockloc
)
1320 if ((fs
->fs_magic
!= FS_UFS1_MAGIC
) ||
1321 (fs
->fs_old_flags
& FS_FLAGS_UPDATED
))
1324 if (!ump
->um_oldfscompat
)
1325 ump
->um_oldfscompat
= malloc(512 + 3*sizeof(int32_t),
1326 M_UFSMNT
, M_WAITOK
);
1328 memcpy(ump
->um_oldfscompat
, &fs
->fs_old_postbl_start
, 512);
1329 extrasave
= ump
->um_oldfscompat
;
1330 extrasave
+= 512/sizeof(int32_t);
1331 extrasave
[0] = fs
->fs_old_npsect
;
1332 extrasave
[1] = fs
->fs_old_interleave
;
1333 extrasave
[2] = fs
->fs_old_trackskew
;
1335 /* These fields will be overwritten by their
1336 * original values in fs_oldfscompat_write, so it is harmless
1337 * to modify them here.
1339 fs
->fs_cstotal
.cs_ndir
= fs
->fs_old_cstotal
.cs_ndir
;
1340 fs
->fs_cstotal
.cs_nbfree
= fs
->fs_old_cstotal
.cs_nbfree
;
1341 fs
->fs_cstotal
.cs_nifree
= fs
->fs_old_cstotal
.cs_nifree
;
1342 fs
->fs_cstotal
.cs_nffree
= fs
->fs_old_cstotal
.cs_nffree
;
1344 fs
->fs_maxbsize
= fs
->fs_bsize
;
1345 fs
->fs_time
= fs
->fs_old_time
;
1346 fs
->fs_size
= fs
->fs_old_size
;
1347 fs
->fs_dsize
= fs
->fs_old_dsize
;
1348 fs
->fs_csaddr
= fs
->fs_old_csaddr
;
1349 fs
->fs_sblockloc
= sblockloc
;
1351 fs
->fs_flags
= fs
->fs_old_flags
| (fs
->fs_flags
& FS_INTERNAL
);
1353 if (fs
->fs_old_postblformat
== FS_42POSTBLFMT
) {
1354 fs
->fs_old_nrpos
= 8;
1355 fs
->fs_old_npsect
= fs
->fs_old_nsect
;
1356 fs
->fs_old_interleave
= 1;
1357 fs
->fs_old_trackskew
= 0;
1360 if (fs
->fs_old_inodefmt
< FS_44INODEFMT
) {
1361 fs
->fs_maxfilesize
= (u_quad_t
) 1LL << 39;
1362 fs
->fs_qbmask
= ~fs
->fs_bmask
;
1363 fs
->fs_qfmask
= ~fs
->fs_fmask
;
1366 maxfilesize
= (u_int64_t
)0x80000000 * fs
->fs_bsize
- 1;
1367 if (fs
->fs_maxfilesize
> maxfilesize
)
1368 fs
->fs_maxfilesize
= maxfilesize
;
1370 /* Compatibility for old filesystems */
1371 if (fs
->fs_avgfilesize
<= 0)
1372 fs
->fs_avgfilesize
= AVFILESIZ
;
1373 if (fs
->fs_avgfpdir
<= 0)
1374 fs
->fs_avgfpdir
= AFPDIR
;
1378 fs
->fs_save_cgsize
= fs
->fs_cgsize
;
1379 fs
->fs_cgsize
= fs
->fs_bsize
;
1385 * Unwinding superblock updates for old filesystems.
1386 * See ffs_oldfscompat_read above for details.
1388 * XXX - Parts get retired eventually.
1389 * Unfortunately new bits get added.
1392 ffs_oldfscompat_write(struct fs
*fs
, struct ufsmount
*ump
)
1396 if ((fs
->fs_magic
!= FS_UFS1_MAGIC
) ||
1397 (fs
->fs_old_flags
& FS_FLAGS_UPDATED
))
1400 fs
->fs_old_time
= fs
->fs_time
;
1401 fs
->fs_old_cstotal
.cs_ndir
= fs
->fs_cstotal
.cs_ndir
;
1402 fs
->fs_old_cstotal
.cs_nbfree
= fs
->fs_cstotal
.cs_nbfree
;
1403 fs
->fs_old_cstotal
.cs_nifree
= fs
->fs_cstotal
.cs_nifree
;
1404 fs
->fs_old_cstotal
.cs_nffree
= fs
->fs_cstotal
.cs_nffree
;
1405 fs
->fs_old_flags
= fs
->fs_flags
;
1409 fs
->fs_cgsize
= fs
->fs_save_cgsize
;
1413 memcpy(&fs
->fs_old_postbl_start
, ump
->um_oldfscompat
, 512);
1414 extrasave
= ump
->um_oldfscompat
;
1415 extrasave
+= 512/sizeof(int32_t);
1416 fs
->fs_old_npsect
= extrasave
[0];
1417 fs
->fs_old_interleave
= extrasave
[1];
1418 fs
->fs_old_trackskew
= extrasave
[2];
1423 * unmount vfs operation
1426 ffs_unmount(struct mount
*mp
, int mntflags
)
1428 struct lwp
*l
= curlwp
;
1429 struct ufsmount
*ump
= VFSTOUFS(mp
);
1430 struct fs
*fs
= ump
->um_fs
;
1437 if (mntflags
& MNT_FORCE
)
1438 flags
|= FORCECLOSE
;
1439 if ((error
= ffs_flushfiles(mp
, flags
, l
)) != 0)
1441 error
= UFS_WAPBL_BEGIN(mp
);
1443 if (fs
->fs_ronly
== 0 &&
1444 ffs_cgupdate(ump
, MNT_WAIT
) == 0 &&
1445 fs
->fs_clean
& FS_WASCLEAN
) {
1446 fs
->fs_clean
= FS_ISCLEAN
;
1448 (void) ffs_sbupdate(ump
, MNT_WAIT
);
1453 KASSERT(!(mp
->mnt_wapbl_replay
&& mp
->mnt_wapbl
));
1454 if (mp
->mnt_wapbl_replay
) {
1455 KDASSERT(fs
->fs_ronly
);
1456 wapbl_replay_stop(mp
->mnt_wapbl_replay
);
1457 wapbl_replay_free(mp
->mnt_wapbl_replay
);
1458 mp
->mnt_wapbl_replay
= 0;
1460 error
= ffs_wapbl_stop(mp
, doforce
&& (mntflags
& MNT_FORCE
));
1466 if (ump
->um_fstype
== UFS1
) {
1467 ufs_extattr_stop(mp
, l
);
1468 ufs_extattr_uepm_destroy(&ump
->um_extattr
);
1470 #endif /* UFS_EXTATTR */
1472 if (ump
->um_devvp
->v_type
!= VBAD
)
1473 ump
->um_devvp
->v_specmountpoint
= NULL
;
1474 vn_lock(ump
->um_devvp
, LK_EXCLUSIVE
| LK_RETRY
);
1475 (void)VOP_CLOSE(ump
->um_devvp
, fs
->fs_ronly
? FREAD
: FREAD
| FWRITE
,
1477 vput(ump
->um_devvp
);
1478 free(fs
->fs_csp
, M_UFSMNT
);
1480 if (ump
->um_oldfscompat
!= NULL
)
1481 free(ump
->um_oldfscompat
, M_UFSMNT
);
1482 mutex_destroy(&ump
->um_lock
);
1483 ffs_snapshot_fini(ump
);
1484 free(ump
, M_UFSMNT
);
1485 mp
->mnt_data
= NULL
;
1486 mp
->mnt_flag
&= ~MNT_LOCAL
;
1487 fstrans_unmount(mp
);
1492 * Flush out all the files in a filesystem.
1495 ffs_flushfiles(struct mount
*mp
, int flags
, struct lwp
*l
)
1498 struct ufsmount
*ump
;
1502 flags
&= ~FORCECLOSE
;
1505 if ((error
= quota1_umount(mp
, flags
)) != 0)
1509 if ((error
= quota2_umount(mp
, flags
)) != 0)
1512 if ((error
= vflush(mp
, 0, SKIPSYSTEM
| flags
)) != 0)
1514 ffs_snapshot_unmount(mp
);
1516 * Flush all the files.
1518 error
= vflush(mp
, NULLVP
, flags
);
1522 * Flush filesystem metadata.
1524 vn_lock(ump
->um_devvp
, LK_EXCLUSIVE
| LK_RETRY
);
1525 error
= VOP_FSYNC(ump
->um_devvp
, l
->l_cred
, FSYNC_WAIT
, 0, 0);
1526 VOP_UNLOCK(ump
->um_devvp
);
1527 if (flags
& FORCECLOSE
) /* XXXDBJ */
1533 if (mp
->mnt_wapbl
) {
1534 error
= wapbl_flush(mp
->mnt_wapbl
, 1);
1535 if (flags
& FORCECLOSE
)
1544 * Get file system statistics.
1547 ffs_statvfs(struct mount
*mp
, struct statvfs
*sbp
)
1549 struct ufsmount
*ump
;
1554 mutex_enter(&ump
->um_lock
);
1555 sbp
->f_bsize
= fs
->fs_bsize
;
1556 sbp
->f_frsize
= fs
->fs_fsize
;
1557 sbp
->f_iosize
= fs
->fs_bsize
;
1558 sbp
->f_blocks
= fs
->fs_dsize
;
1559 sbp
->f_bfree
= blkstofrags(fs
, fs
->fs_cstotal
.cs_nbfree
) +
1560 fs
->fs_cstotal
.cs_nffree
+ dbtofsb(fs
, fs
->fs_pendingblocks
);
1561 sbp
->f_bresvd
= ((u_int64_t
) fs
->fs_dsize
* (u_int64_t
)
1562 fs
->fs_minfree
) / (u_int64_t
) 100;
1563 if (sbp
->f_bfree
> sbp
->f_bresvd
)
1564 sbp
->f_bavail
= sbp
->f_bfree
- sbp
->f_bresvd
;
1567 sbp
->f_files
= fs
->fs_ncg
* fs
->fs_ipg
- ROOTINO
;
1568 sbp
->f_ffree
= fs
->fs_cstotal
.cs_nifree
+ fs
->fs_pendinginodes
;
1569 sbp
->f_favail
= sbp
->f_ffree
;
1571 mutex_exit(&ump
->um_lock
);
1572 copy_statvfs_info(sbp
, mp
);
1578 * Go through the disk queues to initiate sandbagged IO;
1579 * go through the inodes to write those that have been modified;
1580 * initiate the writing of the super block if it has been modified.
1582 * Note: we are always called with the filesystem marked `MPBUSY'.
1585 ffs_sync(struct mount
*mp
, int waitfor
, kauth_cred_t cred
)
1587 struct vnode
*vp
, *mvp
, *nvp
;
1589 struct ufsmount
*ump
= VFSTOUFS(mp
);
1591 int error
, allerror
= 0;
1595 if (fs
->fs_fmod
!= 0 && fs
->fs_ronly
!= 0) { /* XXX */
1596 printf("fs = %s\n", fs
->fs_fsmnt
);
1597 panic("update: rofs mod");
1600 /* Allocate a marker vnode. */
1603 fstrans_start(mp
, FSTRANS_SHARED
);
1604 is_suspending
= (fstrans_getstate(mp
) == FSTRANS_SUSPENDING
);
1606 * Write back each (modified) inode.
1608 mutex_enter(&mntvnode_lock
);
1611 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
1612 * and vclean() can be called indirectly
1614 for (vp
= TAILQ_FIRST(&mp
->mnt_vnodelist
); vp
; vp
= nvp
) {
1615 nvp
= TAILQ_NEXT(vp
, v_mntvnodes
);
1617 * If the vnode that we are about to sync is no longer
1618 * associated with this mount point, start over.
1620 if (vp
->v_mount
!= mp
)
1623 * Don't interfere with concurrent scans of this FS.
1627 mutex_enter(vp
->v_interlock
);
1631 * Skip the vnode/inode if inaccessible.
1633 if (ip
== NULL
|| (vp
->v_iflag
& (VI_XLOCK
| VI_CLEAN
)) != 0 ||
1634 vp
->v_type
== VNON
) {
1635 mutex_exit(vp
->v_interlock
);
1640 * We deliberately update inode times here. This will
1641 * prevent a massive queue of updates accumulating, only
1642 * to be handled by a call to unmount.
1644 * XXX It would be better to have the syncer trickle these
1645 * out. Adjustment needed to allow registering vnodes for
1646 * sync when the vnode is clean, but the inode dirty. Or
1647 * have ufs itself trickle out inode updates.
1649 * If doing a lazy sync, we don't care about metadata or
1650 * data updates, because they are handled by each vnode's
1651 * synclist entry. In this case we are only interested in
1652 * writing back modified inodes.
1654 if ((ip
->i_flag
& (IN_ACCESS
| IN_CHANGE
| IN_UPDATE
|
1655 IN_MODIFY
| IN_MODIFIED
| IN_ACCESSED
)) == 0 &&
1656 (waitfor
== MNT_LAZY
|| (LIST_EMPTY(&vp
->v_dirtyblkhd
) &&
1657 UVM_OBJ_IS_CLEAN(&vp
->v_uobj
)))) {
1658 mutex_exit(vp
->v_interlock
);
1661 if (vp
->v_type
== VBLK
&& is_suspending
) {
1662 mutex_exit(vp
->v_interlock
);
1666 mutex_exit(&mntvnode_lock
);
1667 error
= vget(vp
, LK_EXCLUSIVE
| LK_NOWAIT
);
1669 mutex_enter(&mntvnode_lock
);
1671 if (error
== ENOENT
) {
1676 if (waitfor
== MNT_LAZY
) {
1677 error
= UFS_WAPBL_BEGIN(vp
->v_mount
);
1679 error
= ffs_update(vp
, NULL
, NULL
,
1681 UFS_WAPBL_END(vp
->v_mount
);
1684 error
= VOP_FSYNC(vp
, cred
, FSYNC_NOLOG
|
1685 (waitfor
== MNT_WAIT
? FSYNC_WAIT
: 0), 0, 0);
1690 mutex_enter(&mntvnode_lock
);
1693 mutex_exit(&mntvnode_lock
);
1695 * Force stale file system control information to be flushed.
1697 if (waitfor
!= MNT_LAZY
&& (ump
->um_devvp
->v_numoutput
> 0 ||
1698 !LIST_EMPTY(&ump
->um_devvp
->v_dirtyblkhd
))) {
1699 vn_lock(ump
->um_devvp
, LK_EXCLUSIVE
| LK_RETRY
);
1700 if ((error
= VOP_FSYNC(ump
->um_devvp
, cred
,
1701 (waitfor
== MNT_WAIT
? FSYNC_WAIT
: 0) | FSYNC_NOLOG
,
1704 VOP_UNLOCK(ump
->um_devvp
);
1705 if (allerror
== 0 && waitfor
== MNT_WAIT
&& !mp
->mnt_wapbl
) {
1706 mutex_enter(&mntvnode_lock
);
1710 #if defined(QUOTA) || defined(QUOTA2)
1714 * Write back modified superblock.
1716 if (fs
->fs_fmod
!= 0) {
1718 fs
->fs_time
= time_second
;
1719 error
= UFS_WAPBL_BEGIN(mp
);
1723 if ((error
= ffs_cgupdate(ump
, waitfor
)))
1730 if (mp
->mnt_wapbl
) {
1731 error
= wapbl_flush(mp
->mnt_wapbl
, 0);
1743 * Look up a FFS dinode number to find its incore vnode, otherwise read it
1744 * in from disk. If it is in core, wait for the lock bit to clear, then
1745 * return the inode locked. Detection and handling of mount points must be
1746 * done by the calling routine.
1749 ffs_vget(struct mount
*mp
, ino_t ino
, struct vnode
**vpp
)
1753 struct ufsmount
*ump
;
1763 if ((*vpp
= ufs_ihashget(dev
, ino
, LK_EXCLUSIVE
)) != NULL
)
1766 /* Allocate a new vnode/inode. */
1767 error
= getnewvnode(VT_UFS
, mp
, ffs_vnodeop_p
, NULL
, &vp
);
1772 ip
= pool_cache_get(ffs_inode_cache
, PR_WAITOK
);
1775 * If someone beat us to it, put back the freshly allocated
1776 * vnode/inode pair and retry.
1778 mutex_enter(&ufs_hashlock
);
1779 if (ufs_ihashget(dev
, ino
, 0) != NULL
) {
1780 mutex_exit(&ufs_hashlock
);
1782 pool_cache_put(ffs_inode_cache
, ip
);
1786 vp
->v_vflag
|= VV_LOCKSWORK
;
1789 * XXX MFS ends up here, too, to allocate an inode. Should we
1790 * XXX create another pool for MFS inodes?
1793 memset(ip
, 0, sizeof(struct inode
));
1797 ip
->i_fs
= fs
= ump
->um_fs
;
1800 #if defined(QUOTA) || defined(QUOTA2)
1805 * Initialize genfs node, we might proceed to destroy it in
1808 genfs_node_init(vp
, &ffs_genfsops
);
1811 * Put it onto its hash chain and lock it so that other requests for
1812 * this inode will block if they arrive while we are sleeping waiting
1813 * for old data structures to be purged or for the contents of the
1814 * disk portion of this inode to be read.
1818 mutex_exit(&ufs_hashlock
);
1820 /* Read in the disk contents for the inode, copy into the inode. */
1821 error
= bread(ump
->um_devvp
, fsbtodb(fs
, ino_to_fsba(fs
, ino
)),
1822 (int)fs
->fs_bsize
, NOCRED
, 0, &bp
);
1826 * The inode does not contain anything useful, so it would
1827 * be misleading to leave it on its hash chain. With mode
1828 * still zero, it will be unlinked and returned to the free
1837 if (ip
->i_ump
->um_fstype
== UFS1
)
1838 ip
->i_din
.ffs1_din
= pool_cache_get(ffs_dinode1_cache
,
1841 ip
->i_din
.ffs2_din
= pool_cache_get(ffs_dinode2_cache
,
1843 ffs_load_inode(bp
, ip
, fs
, ino
);
1847 * Initialize the vnode from the inode, check for aliases.
1848 * Note that the underlying vnode may have changed.
1851 ufs_vinit(mp
, ffs_specop_p
, ffs_fifoop_p
, &vp
);
1854 * Finish inode initialization now that aliasing has been resolved.
1857 ip
->i_devvp
= ump
->um_devvp
;
1861 * Ensure that uid and gid are correct. This is a temporary
1862 * fix until fsck has been changed to do the update.
1865 if (fs
->fs_old_inodefmt
< FS_44INODEFMT
) { /* XXX */
1866 ip
->i_uid
= ip
->i_ffs1_ouid
; /* XXX */
1867 ip
->i_gid
= ip
->i_ffs1_ogid
; /* XXX */
1869 uvm_vnp_setsize(vp
, ip
->i_size
);
1875 * File handle to vnode
1877 * Have to be really careful about stale file handles:
1878 * - check that the inode number is valid
1879 * - call ffs_vget() to get the locked inode
1880 * - check for an unallocated inode (i_mode == 0)
1881 * - check that the given client host has export rights and return
1882 * those rights via. exflagsp and credanonp
1885 ffs_fhtovp(struct mount
*mp
, struct fid
*fhp
, struct vnode
**vpp
)
1890 if (fhp
->fid_len
!= sizeof(struct ufid
))
1893 memcpy(&ufh
, fhp
, sizeof(ufh
));
1894 fs
= VFSTOUFS(mp
)->um_fs
;
1895 if (ufh
.ufid_ino
< ROOTINO
||
1896 ufh
.ufid_ino
>= fs
->fs_ncg
* fs
->fs_ipg
)
1898 return (ufs_fhtovp(mp
, &ufh
, vpp
));
1902 * Vnode pointer to File handle
1906 ffs_vptofh(struct vnode
*vp
, struct fid
*fhp
, size_t *fh_size
)
1911 if (*fh_size
< sizeof(struct ufid
)) {
1912 *fh_size
= sizeof(struct ufid
);
1916 *fh_size
= sizeof(struct ufid
);
1917 memset(&ufh
, 0, sizeof(ufh
));
1918 ufh
.ufid_len
= sizeof(struct ufid
);
1919 ufh
.ufid_ino
= ip
->i_number
;
1920 ufh
.ufid_gen
= ip
->i_gen
;
1921 memcpy(fhp
, &ufh
, sizeof(ufh
));
1928 if (ffs_initcount
++ > 0)
1931 ffs_inode_cache
= pool_cache_init(sizeof(struct inode
), 0, 0, 0,
1932 "ffsino", NULL
, IPL_NONE
, NULL
, NULL
, NULL
);
1933 ffs_dinode1_cache
= pool_cache_init(sizeof(struct ufs1_dinode
), 0, 0, 0,
1934 "ffsdino1", NULL
, IPL_NONE
, NULL
, NULL
, NULL
);
1935 ffs_dinode2_cache
= pool_cache_init(sizeof(struct ufs2_dinode
), 0, 0, 0,
1936 "ffsdino2", NULL
, IPL_NONE
, NULL
, NULL
, NULL
);
1950 if (--ffs_initcount
> 0)
1954 pool_cache_destroy(ffs_dinode2_cache
);
1955 pool_cache_destroy(ffs_dinode1_cache
);
1956 pool_cache_destroy(ffs_inode_cache
);
1960 * Write a superblock and associated information back to disk.
1963 ffs_sbupdate(struct ufsmount
*mp
, int waitfor
)
1965 struct fs
*fs
= mp
->um_fs
;
1970 error
= ffs_getblk(mp
->um_devvp
,
1971 fs
->fs_sblockloc
/ DEV_BSIZE
, FFS_NOBLK
,
1972 fs
->fs_sbsize
, false, &bp
);
1975 saveflag
= fs
->fs_flags
& FS_INTERNAL
;
1976 fs
->fs_flags
&= ~FS_INTERNAL
;
1978 memcpy(bp
->b_data
, fs
, fs
->fs_sbsize
);
1980 ffs_oldfscompat_write((struct fs
*)bp
->b_data
, mp
);
1982 if (mp
->um_flags
& UFS_NEEDSWAP
)
1983 ffs_sb_swap((struct fs
*)bp
->b_data
, (struct fs
*)bp
->b_data
);
1985 fs
->fs_flags
|= saveflag
;
1987 if (waitfor
== MNT_WAIT
)
1995 ffs_cgupdate(struct ufsmount
*mp
, int waitfor
)
1997 struct fs
*fs
= mp
->um_fs
;
2001 int i
, size
, error
= 0, allerror
= 0;
2003 allerror
= ffs_sbupdate(mp
, waitfor
);
2004 blks
= howmany(fs
->fs_cssize
, fs
->fs_fsize
);
2006 for (i
= 0; i
< blks
; i
+= fs
->fs_frag
) {
2007 size
= fs
->fs_bsize
;
2008 if (i
+ fs
->fs_frag
> blks
)
2009 size
= (blks
- i
) * fs
->fs_fsize
;
2010 error
= ffs_getblk(mp
->um_devvp
, fsbtodb(fs
, fs
->fs_csaddr
+ i
),
2011 FFS_NOBLK
, size
, false, &bp
);
2015 if (mp
->um_flags
& UFS_NEEDSWAP
)
2016 ffs_csum_swap((struct csum
*)space
,
2017 (struct csum
*)bp
->b_data
, size
);
2020 memcpy(bp
->b_data
, space
, (u_int
)size
);
2021 space
= (char *)space
+ size
;
2022 if (waitfor
== MNT_WAIT
)
2027 if (!allerror
&& error
)
2033 ffs_extattrctl(struct mount
*mp
, int cmd
, struct vnode
*vp
,
2034 int attrnamespace
, const char *attrname
)
2038 * File-backed extended attributes are only supported on UFS1.
2039 * UFS2 has native extended attributes.
2041 if (VFSTOUFS(mp
)->um_fstype
== UFS1
)
2042 return (ufs_extattrctl(mp
, cmd
, vp
, attrnamespace
, attrname
));
2044 return (vfs_stdextattrctl(mp
, cmd
, vp
, attrnamespace
, attrname
));
2048 ffs_suspendctl(struct mount
*mp
, int cmd
)
2051 struct lwp
*l
= curlwp
;
2054 case SUSPEND_SUSPEND
:
2055 if ((error
= fstrans_setstate(mp
, FSTRANS_SUSPENDING
)) != 0)
2057 error
= ffs_sync(mp
, MNT_WAIT
, l
->l_proc
->p_cred
);
2059 error
= fstrans_setstate(mp
, FSTRANS_SUSPENDED
);
2061 if (error
== 0 && mp
->mnt_wapbl
)
2062 error
= wapbl_flush(mp
->mnt_wapbl
, 1);
2065 (void) fstrans_setstate(mp
, FSTRANS_NORMAL
);
2070 case SUSPEND_RESUME
:
2071 return fstrans_setstate(mp
, FSTRANS_NORMAL
);
2079 * Synch vnode for a mounted file system.
2082 ffs_vfs_fsync(vnode_t
*vp
, int flags
)
2084 int error
, i
, pflags
;
2089 KASSERT(vp
->v_type
== VBLK
);
2090 KASSERT(vp
->v_specmountpoint
!= NULL
);
2093 * Flush all dirty data associated with the vnode.
2095 pflags
= PGO_ALLPAGES
| PGO_CLEANIT
;
2096 if ((flags
& FSYNC_WAIT
) != 0)
2097 pflags
|= PGO_SYNCIO
;
2098 mutex_enter(vp
->v_interlock
);
2099 error
= VOP_PUTPAGES(vp
, 0, 0, pflags
);
2104 mp
= vp
->v_specmountpoint
;
2105 if (mp
&& mp
->mnt_wapbl
) {
2107 * Don't bother writing out metadata if the syncer is
2108 * making the request. We will let the sync vnode
2109 * write it out in a single burst through a call to
2112 if ((flags
& (FSYNC_DATAONLY
| FSYNC_LAZY
| FSYNC_NOLOG
)) != 0)
2116 * Don't flush the log if the vnode being flushed
2117 * contains no dirty buffers that could be in the log.
2119 if (!LIST_EMPTY(&vp
->v_dirtyblkhd
)) {
2120 error
= wapbl_flush(mp
->mnt_wapbl
, 0);
2125 if ((flags
& FSYNC_WAIT
) != 0) {
2126 mutex_enter(vp
->v_interlock
);
2127 while (vp
->v_numoutput
)
2128 cv_wait(&vp
->v_cv
, vp
->v_interlock
);
2129 mutex_exit(vp
->v_interlock
);
2136 error
= vflushbuf(vp
, (flags
& FSYNC_WAIT
) != 0);
2137 if (error
== 0 && (flags
& FSYNC_CACHE
) != 0) {
2139 (void)VOP_IOCTL(vp
, DIOCCACHESYNC
, &i
, FWRITE
,