1 /* $NetBSD: ffs_vfsops.c,v 1.253 2009/11/04 09:45:05 hannken Exp $ */
4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Wasabi Systems, Inc, and by Andrew Doran.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
33 * Copyright (c) 1989, 1991, 1993, 1994
34 * The Regents of the University of California. All rights reserved.
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * @(#)ffs_vfsops.c 8.31 (Berkeley) 5/20/95
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.253 2009/11/04 09:45:05 hannken Exp $");
66 #if defined(_KERNEL_OPT)
68 #include "opt_quota.h"
69 #include "opt_wapbl.h"
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/namei.h>
76 #include <sys/kernel.h>
77 #include <sys/vnode.h>
78 #include <sys/socket.h>
79 #include <sys/mount.h>
81 #include <sys/device.h>
84 #include <sys/disklabel.h>
85 #include <sys/ioctl.h>
86 #include <sys/errno.h>
87 #include <sys/malloc.h>
90 #include <sys/sysctl.h>
92 #include <sys/kauth.h>
93 #include <sys/wapbl.h>
94 #include <sys/fstrans.h>
95 #include <sys/module.h>
97 #include <miscfs/genfs/genfs.h>
98 #include <miscfs/specfs/specdev.h>
100 #include <ufs/ufs/quota.h>
101 #include <ufs/ufs/ufsmount.h>
102 #include <ufs/ufs/inode.h>
103 #include <ufs/ufs/dir.h>
104 #include <ufs/ufs/ufs_extern.h>
105 #include <ufs/ufs/ufs_bswap.h>
106 #include <ufs/ufs/ufs_wapbl.h>
108 #include <ufs/ffs/fs.h>
109 #include <ufs/ffs/ffs_extern.h>
111 MODULE(MODULE_CLASS_VFS
, ffs
, NULL
);
113 static int ffs_vfs_fsync(vnode_t
*, int);
115 static struct sysctllog
*ffs_sysctl_log
;
117 /* how many times ffs_init() was called */
118 int ffs_initcount
= 0;
120 extern const struct vnodeopv_desc ffs_vnodeop_opv_desc
;
121 extern const struct vnodeopv_desc ffs_specop_opv_desc
;
122 extern const struct vnodeopv_desc ffs_fifoop_opv_desc
;
124 const struct vnodeopv_desc
* const ffs_vnodeopv_descs
[] = {
125 &ffs_vnodeop_opv_desc
,
126 &ffs_specop_opv_desc
,
127 &ffs_fifoop_opv_desc
,
131 struct vfsops ffs_vfsops
= {
133 sizeof (struct ufs_args
),
151 genfs_renamelock_enter
,
152 genfs_renamelock_exit
,
159 static const struct genfs_ops ffs_genfsops
= {
160 .gop_size
= ffs_gop_size
,
161 .gop_alloc
= ufs_gop_alloc
,
162 .gop_write
= genfs_gop_write
,
163 .gop_markupdate
= ufs_gop_markupdate
,
166 static const struct ufs_ops ffs_ufsops
= {
167 .uo_itimes
= ffs_itimes
,
168 .uo_update
= ffs_update
,
169 .uo_truncate
= ffs_truncate
,
170 .uo_valloc
= ffs_valloc
,
171 .uo_vfree
= ffs_vfree
,
172 .uo_balloc
= ffs_balloc
,
173 .uo_unmark_vnode
= (void (*)(vnode_t
*))nullop
,
177 ffs_modcmd(modcmd_t cmd
, void *arg
)
182 extern int doasyncfree
;
184 extern int ffs_log_changeopt
;
187 case MODULE_CMD_INIT
:
188 error
= vfs_attach(&ffs_vfsops
);
192 sysctl_createv(&ffs_sysctl_log
, 0, NULL
, NULL
,
194 CTLTYPE_NODE
, "vfs", NULL
,
197 sysctl_createv(&ffs_sysctl_log
, 0, NULL
, NULL
,
200 SYSCTL_DESCR("Berkeley Fast File System"),
202 CTL_VFS
, 1, CTL_EOL
);
205 * @@@ should we even bother with these first three?
207 sysctl_createv(&ffs_sysctl_log
, 0, NULL
, NULL
,
208 CTLFLAG_PERMANENT
|CTLFLAG_READWRITE
,
209 CTLTYPE_INT
, "doclusterread", NULL
,
210 sysctl_notavail
, 0, NULL
, 0,
211 CTL_VFS
, 1, FFS_CLUSTERREAD
, CTL_EOL
);
212 sysctl_createv(&ffs_sysctl_log
, 0, NULL
, NULL
,
213 CTLFLAG_PERMANENT
|CTLFLAG_READWRITE
,
214 CTLTYPE_INT
, "doclusterwrite", NULL
,
215 sysctl_notavail
, 0, NULL
, 0,
216 CTL_VFS
, 1, FFS_CLUSTERWRITE
, CTL_EOL
);
217 sysctl_createv(&ffs_sysctl_log
, 0, NULL
, NULL
,
218 CTLFLAG_PERMANENT
|CTLFLAG_READWRITE
,
219 CTLTYPE_INT
, "doreallocblks", NULL
,
220 sysctl_notavail
, 0, NULL
, 0,
221 CTL_VFS
, 1, FFS_REALLOCBLKS
, CTL_EOL
);
223 sysctl_createv(&ffs_sysctl_log
, 0, NULL
, NULL
,
224 CTLFLAG_PERMANENT
|CTLFLAG_READWRITE
,
225 CTLTYPE_INT
, "doasyncfree",
226 SYSCTL_DESCR("Release dirty blocks asynchronously"),
227 NULL
, 0, &doasyncfree
, 0,
228 CTL_VFS
, 1, FFS_ASYNCFREE
, CTL_EOL
);
230 sysctl_createv(&ffs_sysctl_log
, 0, NULL
, NULL
,
231 CTLFLAG_PERMANENT
|CTLFLAG_READWRITE
,
232 CTLTYPE_INT
, "log_changeopt",
233 SYSCTL_DESCR("Log changes in optimization strategy"),
234 NULL
, 0, &ffs_log_changeopt
, 0,
235 CTL_VFS
, 1, FFS_LOG_CHANGEOPT
, CTL_EOL
);
237 case MODULE_CMD_FINI
:
238 error
= vfs_detach(&ffs_vfsops
);
241 sysctl_teardown(&ffs_sysctl_log
);
251 pool_cache_t ffs_inode_cache
;
252 pool_cache_t ffs_dinode1_cache
;
253 pool_cache_t ffs_dinode2_cache
;
255 static void ffs_oldfscompat_read(struct fs
*, struct ufsmount
*, daddr_t
);
256 static void ffs_oldfscompat_write(struct fs
*, struct ufsmount
*);
259 * Called by main() when ffs is going to be mounted as root.
267 struct lwp
*l
= curlwp
; /* XXX */
268 struct ufsmount
*ump
;
271 if (device_class(root_device
) != DV_DISK
)
274 if ((error
= vfs_rootmountalloc(MOUNT_FFS
, "root_device", &mp
))) {
280 * We always need to be able to mount the root file system.
282 mp
->mnt_flag
|= MNT_FORCE
;
283 if ((error
= ffs_mountfs(rootvp
, mp
, l
)) != 0) {
284 vfs_unbusy(mp
, false, NULL
);
288 mp
->mnt_flag
&= ~MNT_FORCE
;
289 mutex_enter(&mountlist_lock
);
290 CIRCLEQ_INSERT_TAIL(&mountlist
, mp
, mnt_list
);
291 mutex_exit(&mountlist_lock
);
294 memset(fs
->fs_fsmnt
, 0, sizeof(fs
->fs_fsmnt
));
295 (void)copystr(mp
->mnt_stat
.f_mntonname
, fs
->fs_fsmnt
, MNAMELEN
- 1, 0);
296 (void)ffs_statvfs(mp
, &mp
->mnt_stat
);
297 vfs_unbusy(mp
, false, NULL
);
298 setrootfstime((time_t)fs
->fs_time
);
308 ffs_mount(struct mount
*mp
, const char *path
, void *data
, size_t *data_len
)
310 struct lwp
*l
= curlwp
;
311 struct vnode
*devvp
= NULL
;
312 struct ufs_args
*args
= data
;
313 struct ufsmount
*ump
= NULL
;
315 int error
= 0, flags
, update
;
318 if (*data_len
< sizeof *args
)
321 if (mp
->mnt_flag
& MNT_GETARGS
) {
326 *data_len
= sizeof *args
;
330 update
= mp
->mnt_flag
& MNT_UPDATE
;
332 /* Check arguments */
333 if (args
->fspec
!= NULL
) {
335 * Look up the name and verify that it's sane.
337 error
= namei_simple_user(args
->fspec
,
338 NSM_FOLLOW_NOEMULROOT
, &devvp
);
344 * Be sure this is a valid block device
346 if (devvp
->v_type
!= VBLK
)
348 else if (bdevsw_lookup(devvp
->v_rdev
) == NULL
)
352 * Be sure we're still naming the same device
353 * used for our initial mount
356 if (devvp
!= ump
->um_devvp
) {
357 if (devvp
->v_rdev
!= ump
->um_devvp
->v_rdev
)
361 devvp
= ump
->um_devvp
;
368 /* New mounts must have a filename for the device */
371 /* Use the extant mount */
373 devvp
= ump
->um_devvp
;
379 * If mount by non-root, then verify that user has necessary
380 * permissions on the device.
382 * Permission to update a mount is checked higher, so here we presume
383 * updating the mount is okay (for example, as far as securelevel goes)
384 * which leaves us with the normal check.
389 (mp
->mnt_iflag
& IMNT_WANTRDWR
) != 0 :
390 (mp
->mnt_flag
& MNT_RDONLY
) == 0)
391 accessmode
|= VWRITE
;
392 vn_lock(devvp
, LK_EXCLUSIVE
| LK_RETRY
);
393 error
= genfs_can_mount(devvp
, accessmode
, l
->l_cred
);
394 VOP_UNLOCK(devvp
, 0);
403 /* WAPBL can only be enabled on a r/w mount. */
404 if ((mp
->mnt_flag
& MNT_RDONLY
) && !(mp
->mnt_iflag
& IMNT_WANTRDWR
)) {
405 mp
->mnt_flag
&= ~MNT_LOG
;
408 mp
->mnt_flag
&= ~MNT_LOG
;
414 if (mp
->mnt_flag
& MNT_RDONLY
)
417 xflags
= FREAD
| FWRITE
;
418 error
= VOP_OPEN(devvp
, xflags
, FSCRED
);
421 error
= ffs_mountfs(devvp
, mp
, l
);
423 vn_lock(devvp
, LK_EXCLUSIVE
| LK_RETRY
);
424 (void)VOP_CLOSE(devvp
, xflags
, NOCRED
);
425 VOP_UNLOCK(devvp
, 0);
437 * The initial mount got a reference on this
438 * device, so drop the one obtained via
445 if (fs
->fs_ronly
== 0 && (mp
->mnt_flag
& MNT_RDONLY
)) {
447 * Changing from r/w to r/o
450 if (mp
->mnt_flag
& MNT_FORCE
)
452 error
= ffs_flushfiles(mp
, flags
, l
);
454 error
= UFS_WAPBL_BEGIN(mp
);
456 ffs_cgupdate(ump
, MNT_WAIT
) == 0 &&
457 fs
->fs_clean
& FS_WASCLEAN
) {
458 if (mp
->mnt_flag
& MNT_SOFTDEP
)
459 fs
->fs_flags
&= ~FS_DOSOFTDEP
;
460 fs
->fs_clean
= FS_ISCLEAN
;
461 (void) ffs_sbupdate(ump
, MNT_WAIT
);
470 if ((mp
->mnt_flag
& MNT_LOG
) == 0) {
471 error
= ffs_wapbl_stop(mp
, mp
->mnt_flag
& MNT_FORCE
);
477 if (fs
->fs_ronly
== 0 && (mp
->mnt_flag
& MNT_RDONLY
)) {
479 * Finish change from r/w to r/o
485 if (mp
->mnt_flag
& MNT_RELOAD
) {
486 error
= ffs_reload(mp
, l
->l_cred
, l
);
491 if (fs
->fs_ronly
&& (mp
->mnt_iflag
& IMNT_WANTRDWR
)) {
493 * Changing from read-only to read/write
499 if (fs
->fs_flags
& FS_DOWAPBL
) {
500 printf("%s: replaying log to disk\n",
502 KDASSERT(mp
->mnt_wapbl_replay
);
503 error
= wapbl_replay_write(mp
->mnt_wapbl_replay
,
508 wapbl_replay_stop(mp
->mnt_wapbl_replay
);
509 fs
->fs_clean
= FS_WASCLEAN
;
512 if (fs
->fs_snapinum
[0] != 0)
513 ffs_snapshot_mount(mp
);
517 error
= ffs_wapbl_start(mp
);
522 if (args
->fspec
== NULL
)
526 error
= set_statvfs_info(path
, UIO_USERSPACE
, args
->fspec
,
527 UIO_USERSPACE
, mp
->mnt_op
->vfs_name
, mp
, l
);
529 (void)strncpy(fs
->fs_fsmnt
, mp
->mnt_stat
.f_mntonname
,
530 sizeof(fs
->fs_fsmnt
));
531 fs
->fs_flags
&= ~FS_DOSOFTDEP
;
532 if (fs
->fs_fmod
!= 0) { /* XXX */
536 if (fs
->fs_clean
& FS_WASCLEAN
)
537 fs
->fs_time
= time_second
;
539 printf("%s: file system not clean (fs_clean=%#x); "
540 "please fsck(8)\n", mp
->mnt_stat
.f_mntfromname
,
542 printf("%s: lost blocks %" PRId64
" files %d\n",
543 mp
->mnt_stat
.f_mntfromname
, fs
->fs_pendingblocks
,
544 fs
->fs_pendinginodes
);
546 err
= UFS_WAPBL_BEGIN(mp
);
548 (void) ffs_cgupdate(ump
, MNT_WAIT
);
552 if ((mp
->mnt_flag
& MNT_SOFTDEP
) != 0) {
553 printf("%s: `-o softdep' is no longer supported, "
554 "consider `-o log'\n", mp
->mnt_stat
.f_mntfromname
);
555 mp
->mnt_flag
&= ~MNT_SOFTDEP
;
566 * Reload all incore data for a filesystem (used after running fsck on
567 * the root filesystem and finding things to fix). The filesystem must
568 * be mounted read-only.
570 * Things to do to update the mount:
571 * 1) invalidate all cached meta-data.
572 * 2) re-read superblock from disk.
573 * 3) re-read summary information from disk.
574 * 4) invalidate all inactive vnodes.
575 * 5) invalidate all cached file data.
576 * 6) re-read inode data for all active vnodes.
579 ffs_reload(struct mount
*mp
, kauth_cred_t cred
, struct lwp
*l
)
581 struct vnode
*vp
, *mvp
, *devvp
;
585 struct fs
*fs
, *newfs
;
586 struct partinfo dpart
;
587 int i
, blks
, size
, error
;
589 struct ufsmount
*ump
;
592 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0)
597 * Step 1: invalidate all cached meta-data.
599 devvp
= ump
->um_devvp
;
600 vn_lock(devvp
, LK_EXCLUSIVE
| LK_RETRY
);
601 error
= vinvalbuf(devvp
, 0, cred
, l
, 0, 0);
602 VOP_UNLOCK(devvp
, 0);
604 panic("ffs_reload: dirty1");
606 * Step 2: re-read superblock from disk.
609 if (VOP_IOCTL(devvp
, DIOCGPART
, &dpart
, FREAD
, NOCRED
) != 0)
612 size
= dpart
.disklab
->d_secsize
;
613 /* XXX we don't handle possibility that superblock moved. */
614 error
= bread(devvp
, fs
->fs_sblockloc
/ size
, fs
->fs_sbsize
,
620 newfs
= malloc(fs
->fs_sbsize
, M_UFSMNT
, M_WAITOK
);
621 memcpy(newfs
, bp
->b_data
, fs
->fs_sbsize
);
623 if (ump
->um_flags
& UFS_NEEDSWAP
) {
624 ffs_sb_swap((struct fs
*)bp
->b_data
, newfs
);
625 fs
->fs_flags
|= FS_SWAPPED
;
628 fs
->fs_flags
&= ~FS_SWAPPED
;
629 if ((newfs
->fs_magic
!= FS_UFS1_MAGIC
&&
630 newfs
->fs_magic
!= FS_UFS2_MAGIC
)||
631 newfs
->fs_bsize
> MAXBSIZE
||
632 newfs
->fs_bsize
< sizeof(struct fs
)) {
634 free(newfs
, M_UFSMNT
);
635 return (EIO
); /* XXX needs translation */
637 /* Store off old fs_sblockloc for fs_oldfscompat_read. */
638 sblockloc
= fs
->fs_sblockloc
;
640 * Copy pointer fields back into superblock before copying in XXX
641 * new superblock. These should really be in the ufsmount. XXX
642 * Note that important parameters (eg fs_ncg) are unchanged.
644 newfs
->fs_csp
= fs
->fs_csp
;
645 newfs
->fs_maxcluster
= fs
->fs_maxcluster
;
646 newfs
->fs_contigdirs
= fs
->fs_contigdirs
;
647 newfs
->fs_ronly
= fs
->fs_ronly
;
648 newfs
->fs_active
= fs
->fs_active
;
649 memcpy(fs
, newfs
, (u_int
)fs
->fs_sbsize
);
651 free(newfs
, M_UFSMNT
);
653 /* Recheck for apple UFS filesystem */
654 ump
->um_flags
&= ~UFS_ISAPPLEUFS
;
655 /* First check to see if this is tagged as an Apple UFS filesystem
658 if ((VOP_IOCTL(devvp
, DIOCGPART
, &dpart
, FREAD
, cred
) == 0) &&
659 (dpart
.part
->p_fstype
== FS_APPLEUFS
)) {
660 ump
->um_flags
|= UFS_ISAPPLEUFS
;
664 /* Manually look for an apple ufs label, and if a valid one
665 * is found, then treat it like an Apple UFS filesystem anyway
667 error
= bread(devvp
, (daddr_t
)(APPLEUFS_LABEL_OFFSET
/ size
),
668 APPLEUFS_LABEL_SIZE
, cred
, 0, &bp
);
673 error
= ffs_appleufs_validate(fs
->fs_fsmnt
,
674 (struct appleufslabel
*)bp
->b_data
, NULL
);
676 ump
->um_flags
|= UFS_ISAPPLEUFS
;
681 if (ump
->um_flags
& UFS_ISAPPLEUFS
)
685 if (UFS_MPISAPPLEUFS(ump
)) {
686 /* see comment about NeXT below */
687 ump
->um_maxsymlinklen
= APPLEUFS_MAXSYMLINKLEN
;
688 ump
->um_dirblksiz
= APPLEUFS_DIRBLKSIZ
;
689 mp
->mnt_iflag
|= IMNT_DTYPE
;
691 ump
->um_maxsymlinklen
= fs
->fs_maxsymlinklen
;
692 ump
->um_dirblksiz
= DIRBLKSIZ
;
693 if (ump
->um_maxsymlinklen
> 0)
694 mp
->mnt_iflag
|= IMNT_DTYPE
;
696 mp
->mnt_iflag
&= ~IMNT_DTYPE
;
698 ffs_oldfscompat_read(fs
, ump
, sblockloc
);
700 mutex_enter(&ump
->um_lock
);
701 ump
->um_maxfilesize
= fs
->fs_maxfilesize
;
702 if (fs
->fs_flags
& ~(FS_KNOWN_FLAGS
| FS_INTERNAL
)) {
703 uprintf("%s: unknown ufs flags: 0x%08"PRIx32
"%s\n",
704 mp
->mnt_stat
.f_mntonname
, fs
->fs_flags
,
705 (mp
->mnt_flag
& MNT_FORCE
) ? "" : ", not mounting");
706 if ((mp
->mnt_flag
& MNT_FORCE
) == 0) {
707 mutex_exit(&ump
->um_lock
);
711 if (fs
->fs_pendingblocks
!= 0 || fs
->fs_pendinginodes
!= 0) {
712 fs
->fs_pendingblocks
= 0;
713 fs
->fs_pendinginodes
= 0;
715 mutex_exit(&ump
->um_lock
);
717 ffs_statvfs(mp
, &mp
->mnt_stat
);
719 * Step 3: re-read summary information from disk.
721 blks
= howmany(fs
->fs_cssize
, fs
->fs_fsize
);
723 for (i
= 0; i
< blks
; i
+= fs
->fs_frag
) {
725 if (i
+ fs
->fs_frag
> blks
)
726 size
= (blks
- i
) * fs
->fs_fsize
;
727 error
= bread(devvp
, fsbtodb(fs
, fs
->fs_csaddr
+ i
), size
,
734 if (UFS_FSNEEDSWAP(fs
))
735 ffs_csum_swap((struct csum
*)bp
->b_data
,
736 (struct csum
*)space
, size
);
739 memcpy(space
, bp
->b_data
, (size_t)size
);
740 space
= (char *)space
+ size
;
743 if (fs
->fs_snapinum
[0] != 0)
744 ffs_snapshot_mount(mp
);
746 * We no longer know anything about clusters per cylinder group.
748 if (fs
->fs_contigsumsize
> 0) {
749 lp
= fs
->fs_maxcluster
;
750 for (i
= 0; i
< fs
->fs_ncg
; i
++)
751 *lp
++ = fs
->fs_contigsumsize
;
754 /* Allocate a marker vnode. */
755 if ((mvp
= vnalloc(mp
)) == NULL
)
758 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
759 * and vclean() can be called indirectly
761 mutex_enter(&mntvnode_lock
);
763 for (vp
= TAILQ_FIRST(&mp
->mnt_vnodelist
); vp
; vp
= vunmark(mvp
)) {
765 if (vp
->v_mount
!= mp
|| vismarker(vp
))
768 * Step 4: invalidate all inactive vnodes.
770 if (vrecycle(vp
, &mntvnode_lock
, l
)) {
771 mutex_enter(&mntvnode_lock
);
776 * Step 5: invalidate all cached file data.
778 mutex_enter(&vp
->v_interlock
);
779 mutex_exit(&mntvnode_lock
);
780 if (vget(vp
, LK_EXCLUSIVE
| LK_INTERLOCK
)) {
784 if (vinvalbuf(vp
, 0, cred
, l
, 0, 0))
785 panic("ffs_reload: dirty2");
787 * Step 6: re-read inode data for all active vnodes.
790 error
= bread(devvp
, fsbtodb(fs
, ino_to_fsba(fs
, ip
->i_number
)),
791 (int)fs
->fs_bsize
, NOCRED
, 0, &bp
);
798 ffs_load_inode(bp
, ip
, fs
, ip
->i_number
);
801 mutex_enter(&mntvnode_lock
);
803 mutex_exit(&mntvnode_lock
);
809 * Possible superblock locations ordered from most to least likely.
811 static const int sblock_try
[] = SBLOCKSEARCH
;
814 * Common code for mount and mountroot
817 ffs_mountfs(struct vnode
*devvp
, struct mount
*mp
, struct lwp
*l
)
819 struct ufsmount
*ump
;
823 struct partinfo dpart
;
825 daddr_t sblockloc
, fsblockloc
;
827 int error
, i
, size
, ronly
, bset
= 0;
829 int needswap
= 0; /* keep gcc happy */
833 u_int32_t sbsize
= 8192; /* keep gcc happy*/
836 cred
= l
? l
->l_cred
: NOCRED
;
838 /* Flush out any old buffers remaining from a previous use. */
839 vn_lock(devvp
, LK_EXCLUSIVE
| LK_RETRY
);
840 error
= vinvalbuf(devvp
, V_SAVE
, cred
, l
, 0, 0);
841 VOP_UNLOCK(devvp
, 0);
845 ronly
= (mp
->mnt_flag
& MNT_RDONLY
) != 0;
846 if (VOP_IOCTL(devvp
, DIOCGPART
, &dpart
, FREAD
, cred
) != 0)
849 size
= dpart
.disklab
->d_secsize
;
857 error
= fstrans_mount(mp
);
861 ump
= malloc(sizeof *ump
, M_UFSMNT
, M_WAITOK
);
862 memset(ump
, 0, sizeof *ump
);
863 mutex_init(&ump
->um_lock
, MUTEX_DEFAULT
, IPL_NONE
);
864 error
= ffs_snapshot_init(ump
);
867 ump
->um_ops
= &ffs_ufsops
;
873 * Try reading the superblock in each of its possible locations.
877 brelse(bp
, BC_NOCACHE
);
880 if (sblock_try
[i
] == -1) {
885 error
= bread(devvp
, sblock_try
[i
] / size
, SBLOCKSIZE
, cred
,
891 fs
= (struct fs
*)bp
->b_data
;
892 fsblockloc
= sblockloc
= sblock_try
[i
];
893 if (fs
->fs_magic
== FS_UFS1_MAGIC
) {
894 sbsize
= fs
->fs_sbsize
;
898 } else if (fs
->fs_magic
== bswap32(FS_UFS1_MAGIC
)) {
899 sbsize
= bswap32(fs
->fs_sbsize
);
903 } else if (fs
->fs_magic
== FS_UFS2_MAGIC
) {
904 sbsize
= fs
->fs_sbsize
;
908 } else if (fs
->fs_magic
== bswap32(FS_UFS2_MAGIC
)) {
909 sbsize
= bswap32(fs
->fs_sbsize
);
917 /* fs->fs_sblockloc isn't defined for old filesystems */
918 if (fstype
== UFS1
&& !(fs
->fs_old_flags
& FS_FLAGS_UPDATED
)) {
919 if (sblockloc
== SBLOCK_UFS2
)
921 * This is likely to be the first alternate
922 * in a filesystem with 64k blocks.
926 fsblockloc
= sblockloc
;
928 fsblockloc
= fs
->fs_sblockloc
;
931 fsblockloc
= bswap64(fsblockloc
);
935 /* Check we haven't found an alternate superblock */
936 if (fsblockloc
!= sblockloc
)
939 /* Validate size of superblock */
940 if (sbsize
> MAXBSIZE
|| sbsize
< sizeof(struct fs
))
943 /* Ok seems to be a good superblock */
947 fs
= malloc((u_long
)sbsize
, M_UFSMNT
, M_WAITOK
);
948 memcpy(fs
, bp
->b_data
, sbsize
);
953 ffs_sb_swap((struct fs
*)bp
->b_data
, fs
);
954 fs
->fs_flags
|= FS_SWAPPED
;
957 fs
->fs_flags
&= ~FS_SWAPPED
;
960 if ((mp
->mnt_wapbl_replay
== 0) && (fs
->fs_flags
& FS_DOWAPBL
)) {
961 error
= ffs_wapbl_replay_start(mp
, fs
, devvp
);
962 if (error
&& (mp
->mnt_flag
& MNT_FORCE
) == 0)
966 /* XXX fsmnt may be stale. */
967 printf("%s: replaying log to disk\n",
969 error
= wapbl_replay_write(mp
->mnt_wapbl_replay
,
973 wapbl_replay_stop(mp
->mnt_wapbl_replay
);
974 fs
->fs_clean
= FS_WASCLEAN
;
976 /* XXX fsmnt may be stale */
977 printf("%s: replaying log to memory\n",
981 /* Force a re-read of the superblock */
982 brelse(bp
, BC_INVAL
);
990 if ((fs
->fs_flags
& FS_DOWAPBL
) && (mp
->mnt_flag
& MNT_FORCE
) == 0) {
996 ffs_oldfscompat_read(fs
, ump
, sblockloc
);
997 ump
->um_maxfilesize
= fs
->fs_maxfilesize
;
999 if (fs
->fs_flags
& ~(FS_KNOWN_FLAGS
| FS_INTERNAL
)) {
1000 uprintf("%s: unknown ufs flags: 0x%08"PRIx32
"%s\n",
1001 mp
->mnt_stat
.f_mntonname
, fs
->fs_flags
,
1002 (mp
->mnt_flag
& MNT_FORCE
) ? "" : ", not mounting");
1003 if ((mp
->mnt_flag
& MNT_FORCE
) == 0) {
1009 if (fs
->fs_pendingblocks
!= 0 || fs
->fs_pendinginodes
!= 0) {
1010 fs
->fs_pendingblocks
= 0;
1011 fs
->fs_pendinginodes
= 0;
1014 ump
->um_fstype
= fstype
;
1015 if (fs
->fs_sbsize
< SBLOCKSIZE
)
1016 brelse(bp
, BC_INVAL
);
1021 /* First check to see if this is tagged as an Apple UFS filesystem
1024 if ((VOP_IOCTL(devvp
, DIOCGPART
, &dpart
, FREAD
, cred
) == 0) &&
1025 (dpart
.part
->p_fstype
== FS_APPLEUFS
)) {
1026 ump
->um_flags
|= UFS_ISAPPLEUFS
;
1030 /* Manually look for an apple ufs label, and if a valid one
1031 * is found, then treat it like an Apple UFS filesystem anyway
1033 error
= bread(devvp
, (daddr_t
)(APPLEUFS_LABEL_OFFSET
/ size
),
1034 APPLEUFS_LABEL_SIZE
, cred
, 0, &bp
);
1037 error
= ffs_appleufs_validate(fs
->fs_fsmnt
,
1038 (struct appleufslabel
*)bp
->b_data
, NULL
);
1040 ump
->um_flags
|= UFS_ISAPPLEUFS
;
1046 if (ump
->um_flags
& UFS_ISAPPLEUFS
) {
1054 * XXX This code changes the behaviour of mounting dirty filesystems, to
1055 * XXX require "mount -f ..." to mount them. This doesn't match what
1056 * XXX mount(8) describes and is disabled for now.
1059 * If the file system is not clean, don't allow it to be mounted
1060 * unless MNT_FORCE is specified. (Note: MNT_FORCE is always set
1061 * for the root file system.)
1063 if (fs
->fs_flags
& FS_DOWAPBL
) {
1065 * wapbl normally expects to be FS_WASCLEAN when the FS_DOWAPBL
1066 * bit is set, although there's a window in unmount where it
1067 * could be FS_ISCLEAN
1069 if ((mp
->mnt_flag
& MNT_FORCE
) == 0 &&
1070 (fs
->fs_clean
& (FS_WASCLEAN
| FS_ISCLEAN
)) == 0) {
1075 if ((fs
->fs_clean
& FS_ISCLEAN
) == 0 &&
1076 (mp
->mnt_flag
& MNT_FORCE
) == 0) {
1083 * verify that we can access the last block in the fs
1084 * if we're mounting read/write.
1088 error
= bread(devvp
, fsbtodb(fs
, fs
->fs_size
- 1), fs
->fs_fsize
,
1090 if (bp
->b_bcount
!= fs
->fs_fsize
)
1096 brelse(bp
, BC_INVAL
);
1100 fs
->fs_ronly
= ronly
;
1101 /* Don't bump fs_clean if we're replaying journal */
1102 if (!((fs
->fs_flags
& FS_DOWAPBL
) && (fs
->fs_clean
& FS_WASCLEAN
)))
1107 size
= fs
->fs_cssize
;
1108 blks
= howmany(size
, fs
->fs_fsize
);
1109 if (fs
->fs_contigsumsize
> 0)
1110 size
+= fs
->fs_ncg
* sizeof(int32_t);
1111 size
+= fs
->fs_ncg
* sizeof(*fs
->fs_contigdirs
);
1112 space
= malloc((u_long
)size
, M_UFSMNT
, M_WAITOK
);
1114 for (i
= 0; i
< blks
; i
+= fs
->fs_frag
) {
1115 size
= fs
->fs_bsize
;
1116 if (i
+ fs
->fs_frag
> blks
)
1117 size
= (blks
- i
) * fs
->fs_fsize
;
1118 error
= bread(devvp
, fsbtodb(fs
, fs
->fs_csaddr
+ i
), size
,
1121 free(fs
->fs_csp
, M_UFSMNT
);
1126 ffs_csum_swap((struct csum
*)bp
->b_data
,
1127 (struct csum
*)space
, size
);
1130 memcpy(space
, bp
->b_data
, (u_int
)size
);
1132 space
= (char *)space
+ size
;
1136 if (fs
->fs_contigsumsize
> 0) {
1137 fs
->fs_maxcluster
= lp
= space
;
1138 for (i
= 0; i
< fs
->fs_ncg
; i
++)
1139 *lp
++ = fs
->fs_contigsumsize
;
1142 size
= fs
->fs_ncg
* sizeof(*fs
->fs_contigdirs
);
1143 fs
->fs_contigdirs
= space
;
1144 space
= (char *)space
+ size
;
1145 memset(fs
->fs_contigdirs
, 0, size
);
1146 /* Compatibility for old filesystems - XXX */
1147 if (fs
->fs_avgfilesize
<= 0)
1148 fs
->fs_avgfilesize
= AVFILESIZ
;
1149 if (fs
->fs_avgfpdir
<= 0)
1150 fs
->fs_avgfpdir
= AFPDIR
;
1151 fs
->fs_active
= NULL
;
1153 mp
->mnt_stat
.f_fsidx
.__fsid_val
[0] = (long)dev
;
1154 mp
->mnt_stat
.f_fsidx
.__fsid_val
[1] = makefstype(MOUNT_FFS
);
1155 mp
->mnt_stat
.f_fsid
= mp
->mnt_stat
.f_fsidx
.__fsid_val
[0];
1156 mp
->mnt_stat
.f_namemax
= FFS_MAXNAMLEN
;
1157 if (UFS_MPISAPPLEUFS(ump
)) {
1158 /* NeXT used to keep short symlinks in the inode even
1159 * when using FS_42INODEFMT. In that case fs->fs_maxsymlinklen
1160 * is probably -1, but we still need to be able to identify
1163 ump
->um_maxsymlinklen
= APPLEUFS_MAXSYMLINKLEN
;
1164 ump
->um_dirblksiz
= APPLEUFS_DIRBLKSIZ
;
1165 mp
->mnt_iflag
|= IMNT_DTYPE
;
1167 ump
->um_maxsymlinklen
= fs
->fs_maxsymlinklen
;
1168 ump
->um_dirblksiz
= DIRBLKSIZ
;
1169 if (ump
->um_maxsymlinklen
> 0)
1170 mp
->mnt_iflag
|= IMNT_DTYPE
;
1172 mp
->mnt_iflag
&= ~IMNT_DTYPE
;
1174 mp
->mnt_fs_bshift
= fs
->fs_bshift
;
1175 mp
->mnt_dev_bshift
= DEV_BSHIFT
; /* XXX */
1176 mp
->mnt_flag
|= MNT_LOCAL
;
1177 mp
->mnt_iflag
|= IMNT_MPSAFE
;
1180 ump
->um_flags
|= UFS_NEEDSWAP
;
1182 ump
->um_mountp
= mp
;
1184 ump
->um_devvp
= devvp
;
1185 ump
->um_nindir
= fs
->fs_nindir
;
1186 ump
->um_lognindir
= ffs(fs
->fs_nindir
) - 1;
1187 ump
->um_bptrtodb
= fs
->fs_fsbtodb
;
1188 ump
->um_seqinc
= fs
->fs_frag
;
1189 for (i
= 0; i
< MAXQUOTAS
; i
++)
1190 ump
->um_quotas
[i
] = NULLVP
;
1191 devvp
->v_specmountpoint
= mp
;
1192 if (ronly
== 0 && fs
->fs_snapinum
[0] != 0)
1193 ffs_snapshot_mount(mp
);
1197 KDASSERT(fs
->fs_ronly
== 0);
1199 * ffs_wapbl_start() needs mp->mnt_stat initialised if it
1200 * needs to create a new log file in-filesystem.
1202 ffs_statvfs(mp
, &mp
->mnt_stat
);
1204 error
= ffs_wapbl_start(mp
);
1206 free(fs
->fs_csp
, M_UFSMNT
);
1213 * Initialize file-backed extended attributes on UFS1 file
1216 if (ump
->um_fstype
== UFS1
) {
1217 ufs_extattr_uepm_init(&ump
->um_extattr
);
1218 #ifdef UFS_EXTATTR_AUTOSTART
1220 * XXX Just ignore errors. Not clear that we should
1221 * XXX fail the mount in this case.
1223 (void) ufs_extattr_autostart(mp
, l
);
1226 #endif /* UFS_EXTATTR */
1230 if (mp
->mnt_wapbl_replay
) {
1231 wapbl_replay_stop(mp
->mnt_wapbl_replay
);
1232 wapbl_replay_free(mp
->mnt_wapbl_replay
);
1233 mp
->mnt_wapbl_replay
= 0;
1237 fstrans_unmount(mp
);
1240 devvp
->v_specmountpoint
= NULL
;
1244 if (ump
->um_oldfscompat
)
1245 free(ump
->um_oldfscompat
, M_UFSMNT
);
1246 mutex_destroy(&ump
->um_lock
);
1247 free(ump
, M_UFSMNT
);
1248 mp
->mnt_data
= NULL
;
1254 * Sanity checks for loading old filesystem superblocks.
1255 * See ffs_oldfscompat_write below for unwound actions.
1257 * XXX - Parts get retired eventually.
1258 * Unfortunately new bits get added.
1261 ffs_oldfscompat_read(struct fs
*fs
, struct ufsmount
*ump
, daddr_t sblockloc
)
1266 if ((fs
->fs_magic
!= FS_UFS1_MAGIC
) ||
1267 (fs
->fs_old_flags
& FS_FLAGS_UPDATED
))
1270 if (!ump
->um_oldfscompat
)
1271 ump
->um_oldfscompat
= malloc(512 + 3*sizeof(int32_t),
1272 M_UFSMNT
, M_WAITOK
);
1274 memcpy(ump
->um_oldfscompat
, &fs
->fs_old_postbl_start
, 512);
1275 extrasave
= ump
->um_oldfscompat
;
1276 extrasave
+= 512/sizeof(int32_t);
1277 extrasave
[0] = fs
->fs_old_npsect
;
1278 extrasave
[1] = fs
->fs_old_interleave
;
1279 extrasave
[2] = fs
->fs_old_trackskew
;
1281 /* These fields will be overwritten by their
1282 * original values in fs_oldfscompat_write, so it is harmless
1283 * to modify them here.
1285 fs
->fs_cstotal
.cs_ndir
= fs
->fs_old_cstotal
.cs_ndir
;
1286 fs
->fs_cstotal
.cs_nbfree
= fs
->fs_old_cstotal
.cs_nbfree
;
1287 fs
->fs_cstotal
.cs_nifree
= fs
->fs_old_cstotal
.cs_nifree
;
1288 fs
->fs_cstotal
.cs_nffree
= fs
->fs_old_cstotal
.cs_nffree
;
1290 fs
->fs_maxbsize
= fs
->fs_bsize
;
1291 fs
->fs_time
= fs
->fs_old_time
;
1292 fs
->fs_size
= fs
->fs_old_size
;
1293 fs
->fs_dsize
= fs
->fs_old_dsize
;
1294 fs
->fs_csaddr
= fs
->fs_old_csaddr
;
1295 fs
->fs_sblockloc
= sblockloc
;
1297 fs
->fs_flags
= fs
->fs_old_flags
| (fs
->fs_flags
& FS_INTERNAL
);
1299 if (fs
->fs_old_postblformat
== FS_42POSTBLFMT
) {
1300 fs
->fs_old_nrpos
= 8;
1301 fs
->fs_old_npsect
= fs
->fs_old_nsect
;
1302 fs
->fs_old_interleave
= 1;
1303 fs
->fs_old_trackskew
= 0;
1306 if (fs
->fs_old_inodefmt
< FS_44INODEFMT
) {
1307 fs
->fs_maxfilesize
= (u_quad_t
) 1LL << 39;
1308 fs
->fs_qbmask
= ~fs
->fs_bmask
;
1309 fs
->fs_qfmask
= ~fs
->fs_fmask
;
1312 maxfilesize
= (u_int64_t
)0x80000000 * fs
->fs_bsize
- 1;
1313 if (fs
->fs_maxfilesize
> maxfilesize
)
1314 fs
->fs_maxfilesize
= maxfilesize
;
1316 /* Compatibility for old filesystems */
1317 if (fs
->fs_avgfilesize
<= 0)
1318 fs
->fs_avgfilesize
= AVFILESIZ
;
1319 if (fs
->fs_avgfpdir
<= 0)
1320 fs
->fs_avgfpdir
= AFPDIR
;
1324 fs
->fs_save_cgsize
= fs
->fs_cgsize
;
1325 fs
->fs_cgsize
= fs
->fs_bsize
;
1331 * Unwinding superblock updates for old filesystems.
1332 * See ffs_oldfscompat_read above for details.
1334 * XXX - Parts get retired eventually.
1335 * Unfortunately new bits get added.
1338 ffs_oldfscompat_write(struct fs
*fs
, struct ufsmount
*ump
)
1342 if ((fs
->fs_magic
!= FS_UFS1_MAGIC
) ||
1343 (fs
->fs_old_flags
& FS_FLAGS_UPDATED
))
1346 fs
->fs_old_time
= fs
->fs_time
;
1347 fs
->fs_old_cstotal
.cs_ndir
= fs
->fs_cstotal
.cs_ndir
;
1348 fs
->fs_old_cstotal
.cs_nbfree
= fs
->fs_cstotal
.cs_nbfree
;
1349 fs
->fs_old_cstotal
.cs_nifree
= fs
->fs_cstotal
.cs_nifree
;
1350 fs
->fs_old_cstotal
.cs_nffree
= fs
->fs_cstotal
.cs_nffree
;
1351 fs
->fs_old_flags
= fs
->fs_flags
;
1355 fs
->fs_cgsize
= fs
->fs_save_cgsize
;
1359 memcpy(&fs
->fs_old_postbl_start
, ump
->um_oldfscompat
, 512);
1360 extrasave
= ump
->um_oldfscompat
;
1361 extrasave
+= 512/sizeof(int32_t);
1362 fs
->fs_old_npsect
= extrasave
[0];
1363 fs
->fs_old_interleave
= extrasave
[1];
1364 fs
->fs_old_trackskew
= extrasave
[2];
1369 * unmount vfs operation
1372 ffs_unmount(struct mount
*mp
, int mntflags
)
1374 struct lwp
*l
= curlwp
;
1375 struct ufsmount
*ump
= VFSTOUFS(mp
);
1376 struct fs
*fs
= ump
->um_fs
;
1383 if (mntflags
& MNT_FORCE
)
1384 flags
|= FORCECLOSE
;
1385 if ((error
= ffs_flushfiles(mp
, flags
, l
)) != 0)
1387 error
= UFS_WAPBL_BEGIN(mp
);
1389 if (fs
->fs_ronly
== 0 &&
1390 ffs_cgupdate(ump
, MNT_WAIT
) == 0 &&
1391 fs
->fs_clean
& FS_WASCLEAN
) {
1392 fs
->fs_clean
= FS_ISCLEAN
;
1394 (void) ffs_sbupdate(ump
, MNT_WAIT
);
1399 KASSERT(!(mp
->mnt_wapbl_replay
&& mp
->mnt_wapbl
));
1400 if (mp
->mnt_wapbl_replay
) {
1401 KDASSERT(fs
->fs_ronly
);
1402 wapbl_replay_stop(mp
->mnt_wapbl_replay
);
1403 wapbl_replay_free(mp
->mnt_wapbl_replay
);
1404 mp
->mnt_wapbl_replay
= 0;
1406 error
= ffs_wapbl_stop(mp
, doforce
&& (mntflags
& MNT_FORCE
));
1412 if (ump
->um_fstype
== UFS1
) {
1413 ufs_extattr_stop(mp
, l
);
1414 ufs_extattr_uepm_destroy(&ump
->um_extattr
);
1416 #endif /* UFS_EXTATTR */
1418 if (ump
->um_devvp
->v_type
!= VBAD
)
1419 ump
->um_devvp
->v_specmountpoint
= NULL
;
1420 vn_lock(ump
->um_devvp
, LK_EXCLUSIVE
| LK_RETRY
);
1421 (void)VOP_CLOSE(ump
->um_devvp
, fs
->fs_ronly
? FREAD
: FREAD
| FWRITE
,
1423 vput(ump
->um_devvp
);
1424 free(fs
->fs_csp
, M_UFSMNT
);
1426 if (ump
->um_oldfscompat
!= NULL
)
1427 free(ump
->um_oldfscompat
, M_UFSMNT
);
1428 mutex_destroy(&ump
->um_lock
);
1429 ffs_snapshot_fini(ump
);
1430 free(ump
, M_UFSMNT
);
1431 mp
->mnt_data
= NULL
;
1432 mp
->mnt_flag
&= ~MNT_LOCAL
;
1433 fstrans_unmount(mp
);
1438 * Flush out all the files in a filesystem.
1441 ffs_flushfiles(struct mount
*mp
, int flags
, struct lwp
*l
)
1444 struct ufsmount
*ump
;
1448 flags
&= ~FORCECLOSE
;
1451 if (mp
->mnt_flag
& MNT_QUOTA
) {
1453 if ((error
= vflush(mp
, NULLVP
, SKIPSYSTEM
| flags
)) != 0)
1455 for (i
= 0; i
< MAXQUOTAS
; i
++) {
1456 if (ump
->um_quotas
[i
] == NULLVP
)
1461 * Here we fall through to vflush again to ensure
1462 * that we have gotten rid of all the system vnodes.
1466 if ((error
= vflush(mp
, 0, SKIPSYSTEM
| flags
)) != 0)
1468 ffs_snapshot_unmount(mp
);
1470 * Flush all the files.
1472 error
= vflush(mp
, NULLVP
, flags
);
1476 * Flush filesystem metadata.
1478 vn_lock(ump
->um_devvp
, LK_EXCLUSIVE
| LK_RETRY
);
1479 error
= VOP_FSYNC(ump
->um_devvp
, l
->l_cred
, FSYNC_WAIT
, 0, 0);
1480 VOP_UNLOCK(ump
->um_devvp
, 0);
1481 if (flags
& FORCECLOSE
) /* XXXDBJ */
1487 if (mp
->mnt_wapbl
) {
1488 error
= wapbl_flush(mp
->mnt_wapbl
, 1);
1489 if (flags
& FORCECLOSE
)
1498 * Get file system statistics.
1501 ffs_statvfs(struct mount
*mp
, struct statvfs
*sbp
)
1503 struct ufsmount
*ump
;
1508 mutex_enter(&ump
->um_lock
);
1509 sbp
->f_bsize
= fs
->fs_bsize
;
1510 sbp
->f_frsize
= fs
->fs_fsize
;
1511 sbp
->f_iosize
= fs
->fs_bsize
;
1512 sbp
->f_blocks
= fs
->fs_dsize
;
1513 sbp
->f_bfree
= blkstofrags(fs
, fs
->fs_cstotal
.cs_nbfree
) +
1514 fs
->fs_cstotal
.cs_nffree
+ dbtofsb(fs
, fs
->fs_pendingblocks
);
1515 sbp
->f_bresvd
= ((u_int64_t
) fs
->fs_dsize
* (u_int64_t
)
1516 fs
->fs_minfree
) / (u_int64_t
) 100;
1517 if (sbp
->f_bfree
> sbp
->f_bresvd
)
1518 sbp
->f_bavail
= sbp
->f_bfree
- sbp
->f_bresvd
;
1521 sbp
->f_files
= fs
->fs_ncg
* fs
->fs_ipg
- ROOTINO
;
1522 sbp
->f_ffree
= fs
->fs_cstotal
.cs_nifree
+ fs
->fs_pendinginodes
;
1523 sbp
->f_favail
= sbp
->f_ffree
;
1525 mutex_exit(&ump
->um_lock
);
1526 copy_statvfs_info(sbp
, mp
);
1532 * Go through the disk queues to initiate sandbagged IO;
1533 * go through the inodes to write those that have been modified;
1534 * initiate the writing of the super block if it has been modified.
1536 * Note: we are always called with the filesystem marked `MPBUSY'.
1539 ffs_sync(struct mount
*mp
, int waitfor
, kauth_cred_t cred
)
1541 struct vnode
*vp
, *mvp
, *nvp
;
1543 struct ufsmount
*ump
= VFSTOUFS(mp
);
1545 int lk_flags
, error
, allerror
= 0;
1549 if (fs
->fs_fmod
!= 0 && fs
->fs_ronly
!= 0) { /* XXX */
1550 printf("fs = %s\n", fs
->fs_fsmnt
);
1551 panic("update: rofs mod");
1554 /* Allocate a marker vnode. */
1555 if ((mvp
= vnalloc(mp
)) == NULL
)
1558 fstrans_start(mp
, FSTRANS_SHARED
);
1559 is_suspending
= (fstrans_getstate(mp
) == FSTRANS_SUSPENDING
);
1561 * We can't lock vnodes while the file system is suspending because
1562 * threads waiting on fstrans may have locked vnodes.
1565 lk_flags
= LK_INTERLOCK
;
1567 lk_flags
= LK_EXCLUSIVE
| LK_NOWAIT
| LK_INTERLOCK
;
1569 * Write back each (modified) inode.
1571 mutex_enter(&mntvnode_lock
);
1574 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
1575 * and vclean() can be called indirectly
1577 for (vp
= TAILQ_FIRST(&mp
->mnt_vnodelist
); vp
; vp
= nvp
) {
1578 nvp
= TAILQ_NEXT(vp
, v_mntvnodes
);
1580 * If the vnode that we are about to sync is no longer
1581 * associated with this mount point, start over.
1583 if (vp
->v_mount
!= mp
)
1586 * Don't interfere with concurrent scans of this FS.
1590 mutex_enter(&vp
->v_interlock
);
1594 * Skip the vnode/inode if inaccessible.
1596 if (ip
== NULL
|| (vp
->v_iflag
& (VI_XLOCK
| VI_CLEAN
)) != 0 ||
1597 vp
->v_type
== VNON
) {
1598 mutex_exit(&vp
->v_interlock
);
1603 * We deliberately update inode times here. This will
1604 * prevent a massive queue of updates accumulating, only
1605 * to be handled by a call to unmount.
1607 * XXX It would be better to have the syncer trickle these
1608 * out. Adjustment needed to allow registering vnodes for
1609 * sync when the vnode is clean, but the inode dirty. Or
1610 * have ufs itself trickle out inode updates.
1612 * If doing a lazy sync, we don't care about metadata or
1613 * data updates, because they are handled by each vnode's
1614 * synclist entry. In this case we are only interested in
1615 * writing back modified inodes.
1617 if ((ip
->i_flag
& (IN_ACCESS
| IN_CHANGE
| IN_UPDATE
|
1618 IN_MODIFY
| IN_MODIFIED
| IN_ACCESSED
)) == 0 &&
1619 (waitfor
== MNT_LAZY
|| (LIST_EMPTY(&vp
->v_dirtyblkhd
) &&
1620 UVM_OBJ_IS_CLEAN(&vp
->v_uobj
)))) {
1621 mutex_exit(&vp
->v_interlock
);
1624 if (vp
->v_type
== VBLK
&& is_suspending
) {
1625 mutex_exit(&vp
->v_interlock
);
1629 mutex_exit(&mntvnode_lock
);
1630 error
= vget(vp
, lk_flags
);
1632 mutex_enter(&mntvnode_lock
);
1634 if (error
== ENOENT
) {
1639 if (waitfor
== MNT_LAZY
) {
1640 error
= UFS_WAPBL_BEGIN(vp
->v_mount
);
1642 error
= ffs_update(vp
, NULL
, NULL
,
1644 UFS_WAPBL_END(vp
->v_mount
);
1647 error
= VOP_FSYNC(vp
, cred
, FSYNC_NOLOG
|
1648 (waitfor
== MNT_WAIT
? FSYNC_WAIT
: 0), 0, 0);
1656 mutex_enter(&mntvnode_lock
);
1659 mutex_exit(&mntvnode_lock
);
1661 * Force stale file system control information to be flushed.
1663 if (waitfor
!= MNT_LAZY
&& (ump
->um_devvp
->v_numoutput
> 0 ||
1664 !LIST_EMPTY(&ump
->um_devvp
->v_dirtyblkhd
))) {
1665 vn_lock(ump
->um_devvp
, LK_EXCLUSIVE
| LK_RETRY
);
1666 if ((error
= VOP_FSYNC(ump
->um_devvp
, cred
,
1667 (waitfor
== MNT_WAIT
? FSYNC_WAIT
: 0) | FSYNC_NOLOG
,
1670 VOP_UNLOCK(ump
->um_devvp
, 0);
1671 if (allerror
== 0 && waitfor
== MNT_WAIT
&& !mp
->mnt_wapbl
) {
1672 mutex_enter(&mntvnode_lock
);
1680 * Write back modified superblock.
1682 if (fs
->fs_fmod
!= 0) {
1684 fs
->fs_time
= time_second
;
1685 error
= UFS_WAPBL_BEGIN(mp
);
1689 if ((error
= ffs_cgupdate(ump
, waitfor
)))
1696 if (mp
->mnt_wapbl
) {
1697 error
= wapbl_flush(mp
->mnt_wapbl
, 0);
1709 * Look up a FFS dinode number to find its incore vnode, otherwise read it
1710 * in from disk. If it is in core, wait for the lock bit to clear, then
1711 * return the inode locked. Detection and handling of mount points must be
1712 * done by the calling routine.
1715 ffs_vget(struct mount
*mp
, ino_t ino
, struct vnode
**vpp
)
1719 struct ufsmount
*ump
;
1729 if ((*vpp
= ufs_ihashget(dev
, ino
, LK_EXCLUSIVE
)) != NULL
)
1732 /* Allocate a new vnode/inode. */
1733 if ((error
= getnewvnode(VT_UFS
, mp
, ffs_vnodeop_p
, &vp
)) != 0) {
1737 ip
= pool_cache_get(ffs_inode_cache
, PR_WAITOK
);
1740 * If someone beat us to it, put back the freshly allocated
1741 * vnode/inode pair and retry.
1743 mutex_enter(&ufs_hashlock
);
1744 if (ufs_ihashget(dev
, ino
, 0) != NULL
) {
1745 mutex_exit(&ufs_hashlock
);
1747 pool_cache_put(ffs_inode_cache
, ip
);
1751 vp
->v_vflag
|= VV_LOCKSWORK
;
1754 * XXX MFS ends up here, too, to allocate an inode. Should we
1755 * XXX create another pool for MFS inodes?
1758 memset(ip
, 0, sizeof(struct inode
));
1762 ip
->i_fs
= fs
= ump
->um_fs
;
1770 * Initialize genfs node, we might proceed to destroy it in
1773 genfs_node_init(vp
, &ffs_genfsops
);
1776 * Put it onto its hash chain and lock it so that other requests for
1777 * this inode will block if they arrive while we are sleeping waiting
1778 * for old data structures to be purged or for the contents of the
1779 * disk portion of this inode to be read.
1783 mutex_exit(&ufs_hashlock
);
1785 /* Read in the disk contents for the inode, copy into the inode. */
1786 error
= bread(ump
->um_devvp
, fsbtodb(fs
, ino_to_fsba(fs
, ino
)),
1787 (int)fs
->fs_bsize
, NOCRED
, 0, &bp
);
1791 * The inode does not contain anything useful, so it would
1792 * be misleading to leave it on its hash chain. With mode
1793 * still zero, it will be unlinked and returned to the free
1802 if (ip
->i_ump
->um_fstype
== UFS1
)
1803 ip
->i_din
.ffs1_din
= pool_cache_get(ffs_dinode1_cache
,
1806 ip
->i_din
.ffs2_din
= pool_cache_get(ffs_dinode2_cache
,
1808 ffs_load_inode(bp
, ip
, fs
, ino
);
1812 * Initialize the vnode from the inode, check for aliases.
1813 * Note that the underlying vnode may have changed.
1816 ufs_vinit(mp
, ffs_specop_p
, ffs_fifoop_p
, &vp
);
1819 * Finish inode initialization now that aliasing has been resolved.
1822 ip
->i_devvp
= ump
->um_devvp
;
1826 * Ensure that uid and gid are correct. This is a temporary
1827 * fix until fsck has been changed to do the update.
1830 if (fs
->fs_old_inodefmt
< FS_44INODEFMT
) { /* XXX */
1831 ip
->i_uid
= ip
->i_ffs1_ouid
; /* XXX */
1832 ip
->i_gid
= ip
->i_ffs1_ogid
; /* XXX */
1834 uvm_vnp_setsize(vp
, ip
->i_size
);
1840 * File handle to vnode
1842 * Have to be really careful about stale file handles:
1843 * - check that the inode number is valid
1844 * - call ffs_vget() to get the locked inode
1845 * - check for an unallocated inode (i_mode == 0)
1846 * - check that the given client host has export rights and return
1847 * those rights via. exflagsp and credanonp
1850 ffs_fhtovp(struct mount
*mp
, struct fid
*fhp
, struct vnode
**vpp
)
1855 if (fhp
->fid_len
!= sizeof(struct ufid
))
1858 memcpy(&ufh
, fhp
, sizeof(ufh
));
1859 fs
= VFSTOUFS(mp
)->um_fs
;
1860 if (ufh
.ufid_ino
< ROOTINO
||
1861 ufh
.ufid_ino
>= fs
->fs_ncg
* fs
->fs_ipg
)
1863 return (ufs_fhtovp(mp
, &ufh
, vpp
));
1867 * Vnode pointer to File handle
1871 ffs_vptofh(struct vnode
*vp
, struct fid
*fhp
, size_t *fh_size
)
1876 if (*fh_size
< sizeof(struct ufid
)) {
1877 *fh_size
= sizeof(struct ufid
);
1881 *fh_size
= sizeof(struct ufid
);
1882 memset(&ufh
, 0, sizeof(ufh
));
1883 ufh
.ufid_len
= sizeof(struct ufid
);
1884 ufh
.ufid_ino
= ip
->i_number
;
1885 ufh
.ufid_gen
= ip
->i_gen
;
1886 memcpy(fhp
, &ufh
, sizeof(ufh
));
1893 if (ffs_initcount
++ > 0)
1896 ffs_inode_cache
= pool_cache_init(sizeof(struct inode
), 0, 0, 0,
1897 "ffsino", NULL
, IPL_NONE
, NULL
, NULL
, NULL
);
1898 ffs_dinode1_cache
= pool_cache_init(sizeof(struct ufs1_dinode
), 0, 0, 0,
1899 "ffsdino1", NULL
, IPL_NONE
, NULL
, NULL
, NULL
);
1900 ffs_dinode2_cache
= pool_cache_init(sizeof(struct ufs2_dinode
), 0, 0, 0,
1901 "ffsdino2", NULL
, IPL_NONE
, NULL
, NULL
, NULL
);
1915 if (--ffs_initcount
> 0)
1919 pool_cache_destroy(ffs_dinode2_cache
);
1920 pool_cache_destroy(ffs_dinode1_cache
);
1921 pool_cache_destroy(ffs_inode_cache
);
1925 * Write a superblock and associated information back to disk.
1928 ffs_sbupdate(struct ufsmount
*mp
, int waitfor
)
1930 struct fs
*fs
= mp
->um_fs
;
1935 error
= ffs_getblk(mp
->um_devvp
,
1936 fs
->fs_sblockloc
>> (fs
->fs_fshift
- fs
->fs_fsbtodb
), FFS_NOBLK
,
1937 fs
->fs_sbsize
, false, &bp
);
1940 saveflag
= fs
->fs_flags
& FS_INTERNAL
;
1941 fs
->fs_flags
&= ~FS_INTERNAL
;
1943 memcpy(bp
->b_data
, fs
, fs
->fs_sbsize
);
1945 ffs_oldfscompat_write((struct fs
*)bp
->b_data
, mp
);
1947 if (mp
->um_flags
& UFS_NEEDSWAP
)
1948 ffs_sb_swap((struct fs
*)bp
->b_data
, (struct fs
*)bp
->b_data
);
1950 fs
->fs_flags
|= saveflag
;
1952 if (waitfor
== MNT_WAIT
)
1960 ffs_cgupdate(struct ufsmount
*mp
, int waitfor
)
1962 struct fs
*fs
= mp
->um_fs
;
1966 int i
, size
, error
= 0, allerror
= 0;
1968 allerror
= ffs_sbupdate(mp
, waitfor
);
1969 blks
= howmany(fs
->fs_cssize
, fs
->fs_fsize
);
1971 for (i
= 0; i
< blks
; i
+= fs
->fs_frag
) {
1972 size
= fs
->fs_bsize
;
1973 if (i
+ fs
->fs_frag
> blks
)
1974 size
= (blks
- i
) * fs
->fs_fsize
;
1975 error
= ffs_getblk(mp
->um_devvp
, fsbtodb(fs
, fs
->fs_csaddr
+ i
),
1976 FFS_NOBLK
, size
, false, &bp
);
1980 if (mp
->um_flags
& UFS_NEEDSWAP
)
1981 ffs_csum_swap((struct csum
*)space
,
1982 (struct csum
*)bp
->b_data
, size
);
1985 memcpy(bp
->b_data
, space
, (u_int
)size
);
1986 space
= (char *)space
+ size
;
1987 if (waitfor
== MNT_WAIT
)
1992 if (!allerror
&& error
)
1998 ffs_extattrctl(struct mount
*mp
, int cmd
, struct vnode
*vp
,
1999 int attrnamespace
, const char *attrname
)
2003 * File-backed extended attributes are only supported on UFS1.
2004 * UFS2 has native extended attributes.
2006 if (VFSTOUFS(mp
)->um_fstype
== UFS1
)
2007 return (ufs_extattrctl(mp
, cmd
, vp
, attrnamespace
, attrname
));
2009 return (vfs_stdextattrctl(mp
, cmd
, vp
, attrnamespace
, attrname
));
2013 ffs_suspendctl(struct mount
*mp
, int cmd
)
2016 struct lwp
*l
= curlwp
;
2019 case SUSPEND_SUSPEND
:
2020 if ((error
= fstrans_setstate(mp
, FSTRANS_SUSPENDING
)) != 0)
2022 error
= ffs_sync(mp
, MNT_WAIT
, l
->l_proc
->p_cred
);
2024 error
= fstrans_setstate(mp
, FSTRANS_SUSPENDED
);
2026 if (error
== 0 && mp
->mnt_wapbl
)
2027 error
= wapbl_flush(mp
->mnt_wapbl
, 1);
2030 (void) fstrans_setstate(mp
, FSTRANS_NORMAL
);
2035 case SUSPEND_RESUME
:
2036 return fstrans_setstate(mp
, FSTRANS_NORMAL
);
2044 * Synch vnode for a mounted file system. This is called for foreign
2045 * vnodes, i.e. non-ffs.
2048 ffs_vfs_fsync(vnode_t
*vp
, int flags
)
2050 int error
, passes
, skipmeta
, i
, pflags
;
2056 KASSERT(vp
->v_type
== VBLK
);
2057 KASSERT(vp
->v_specmountpoint
!= NULL
);
2060 * Flush all dirty data associated with the vnode.
2062 pflags
= PGO_ALLPAGES
| PGO_CLEANIT
;
2063 if ((flags
& FSYNC_WAIT
) != 0)
2064 pflags
|= PGO_SYNCIO
;
2065 mutex_enter(&vp
->v_interlock
);
2066 error
= VOP_PUTPAGES(vp
, 0, 0, pflags
);
2071 mp
= vp
->v_specmountpoint
;
2072 if (mp
&& mp
->mnt_wapbl
) {
2074 * Don't bother writing out metadata if the syncer is
2075 * making the request. We will let the sync vnode
2076 * write it out in a single burst through a call to
2079 if ((flags
& (FSYNC_DATAONLY
| FSYNC_LAZY
| FSYNC_NOLOG
)) != 0)
2083 * Don't flush the log if the vnode being flushed
2084 * contains no dirty buffers that could be in the log.
2086 if (!LIST_EMPTY(&vp
->v_dirtyblkhd
)) {
2087 error
= wapbl_flush(mp
->mnt_wapbl
, 0);
2092 if ((flags
& FSYNC_WAIT
) != 0) {
2093 mutex_enter(&vp
->v_interlock
);
2094 while (vp
->v_numoutput
)
2095 cv_wait(&vp
->v_cv
, &vp
->v_interlock
);
2096 mutex_exit(&vp
->v_interlock
);
2104 * Write out metadata for non-logging file systems. XXX This block
2105 * should be simplified now that softdep is gone.
2107 passes
= NIADDR
+ 1;
2109 if (flags
& FSYNC_WAIT
)
2113 mutex_enter(&bufcache_lock
);
2114 LIST_FOREACH(bp
, &vp
->v_dirtyblkhd
, b_vnbufs
) {
2115 bp
->b_cflags
&= ~BC_SCANNED
;
2117 for (bp
= LIST_FIRST(&vp
->v_dirtyblkhd
); bp
; bp
= nbp
) {
2118 nbp
= LIST_NEXT(bp
, b_vnbufs
);
2119 if (bp
->b_cflags
& (BC_BUSY
| BC_SCANNED
))
2121 if ((bp
->b_oflags
& BO_DELWRI
) == 0)
2122 panic("ffs_fsync: not dirty");
2123 if (skipmeta
&& bp
->b_lblkno
< 0)
2125 bp
->b_cflags
|= BC_BUSY
| BC_VFLUSH
| BC_SCANNED
;
2126 mutex_exit(&bufcache_lock
);
2128 * On our final pass through, do all I/O synchronously
2129 * so that we can find out if our flush is failing
2130 * because of write errors.
2132 if (passes
> 0 || !(flags
& FSYNC_WAIT
))
2134 else if ((error
= bwrite(bp
)) != 0)
2137 * Since we unlocked during the I/O, we need
2138 * to start from a known point.
2140 mutex_enter(&bufcache_lock
);
2141 nbp
= LIST_FIRST(&vp
->v_dirtyblkhd
);
2143 mutex_exit(&bufcache_lock
);
2149 if ((flags
& FSYNC_WAIT
) != 0) {
2150 mutex_enter(&vp
->v_interlock
);
2151 while (vp
->v_numoutput
) {
2152 cv_wait(&vp
->v_cv
, &vp
->v_interlock
);
2154 mutex_exit(&vp
->v_interlock
);
2156 if (!LIST_EMPTY(&vp
->v_dirtyblkhd
)) {
2158 * Block devices associated with filesystems may
2159 * have new I/O requests posted for them even if
2160 * the vnode is locked, so no amount of trying will
2161 * get them clean. Thus we give block devices a
2162 * good effort, then just give up. For all other file
2163 * types, go around and try again until it is clean.
2170 if (vp
->v_type
!= VBLK
)
2171 vprint("ffs_fsync: dirty", vp
);
2176 if (error
== 0 && (flags
& FSYNC_CACHE
) != 0) {
2177 (void)VOP_IOCTL(vp
, DIOCCACHESYNC
, &i
, FWRITE
,