sys/ufs/ffs/ffs_vfsops.c

   1 /*      $NetBSD: ffs_vfsops.c,v 1.335 2015/07/24 13:02:52 maxv Exp $    */
   2
   3 /*-
   4  * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
   5  * All rights reserved.
   6  *
   7  * This code is derived from software contributed to The NetBSD Foundation
   8  * by Wasabi Systems, Inc, and by Andrew Doran.
   9  *
  10  * Redistribution and use in source and binary forms, with or without
  11  * modification, are permitted provided that the following conditions
  12  * are met:
  13  * 1. Redistributions of source code must retain the above copyright
  14  *    notice, this list of conditions and the following disclaimer.
  15  * 2. Redistributions in binary form must reproduce the above copyright
  16  *    notice, this list of conditions and the following disclaimer in the
  17  *    documentation and/or other materials provided with the distribution.
  18  *
  19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  29  * POSSIBILITY OF SUCH DAMAGE.
  30  */
  31
  32 /*
  33  * Copyright (c) 1989, 1991, 1993, 1994
  34  *      The Regents of the University of California.  All rights reserved.
  35  *
  36  * Redistribution and use in source and binary forms, with or without
  37  * modification, are permitted provided that the following conditions
  38  * are met:
  39  * 1. Redistributions of source code must retain the above copyright
  40  *    notice, this list of conditions and the following disclaimer.
  41  * 2. Redistributions in binary form must reproduce the above copyright
  42  *    notice, this list of conditions and the following disclaimer in the
  43  *    documentation and/or other materials provided with the distribution.
  44  * 3. Neither the name of the University nor the names of its contributors
  45  *    may be used to endorse or promote products derived from this software
  46  *    without specific prior written permission.
  47  *
  48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  58  * SUCH DAMAGE.
  59  *
  60  *      @(#)ffs_vfsops.c        8.31 (Berkeley) 5/20/95
  61  */
  62
  63 #include <sys/cdefs.h>
  64 __KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.335 2015/07/24 13:02:52 maxv Exp $");
  65
  66 #if defined(_KERNEL_OPT)
  67 #include "opt_ffs.h"
  68 #include "opt_quota.h"
  69 #include "opt_wapbl.h"
  70 #endif
  71
  72 #include <sys/param.h>
  73 #include <sys/systm.h>
  74 #include <sys/namei.h>
  75 #include <sys/proc.h>
  76 #include <sys/kernel.h>
  77 #include <sys/vnode.h>
  78 #include <sys/socket.h>
  79 #include <sys/mount.h>
  80 #include <sys/buf.h>
  81 #include <sys/device.h>
  82 #include <sys/disk.h>
  83 #include <sys/mbuf.h>
  84 #include <sys/file.h>
  85 #include <sys/disklabel.h>
  86 #include <sys/ioctl.h>
  87 #include <sys/errno.h>
  88 #include <sys/kmem.h>
  89 #include <sys/pool.h>
  90 #include <sys/lock.h>
  91 #include <sys/sysctl.h>
  92 #include <sys/conf.h>
  93 #include <sys/kauth.h>
  94 #include <sys/wapbl.h>
  95 #include <sys/fstrans.h>
  96 #include <sys/module.h>
  97
  98 #include <miscfs/genfs/genfs.h>
  99 #include <miscfs/specfs/specdev.h>
 100
 101 #include <ufs/ufs/quota.h>
 102 #include <ufs/ufs/ufsmount.h>
 103 #include <ufs/ufs/inode.h>
 104 #include <ufs/ufs/dir.h>
 105 #include <ufs/ufs/ufs_extern.h>
 106 #include <ufs/ufs/ufs_bswap.h>
 107 #include <ufs/ufs/ufs_wapbl.h>
 108
 109 #include <ufs/ffs/fs.h>
 110 #include <ufs/ffs/ffs_extern.h>
 111
 112 MODULE(MODULE_CLASS_VFS, ffs, NULL);
 113
 114 static int ffs_vfs_fsync(vnode_t *, int);
 115 static int ffs_superblock_validate(struct fs *);
 116 static int ffs_is_appleufs(struct vnode *, struct fs *);
 117
 118 static int ffs_init_vnode(struct ufsmount *, struct vnode *, ino_t);
 119 static void ffs_deinit_vnode(struct ufsmount *, struct vnode *);
 120
 121 static struct sysctllog *ffs_sysctl_log;
 122
 123 static kauth_listener_t ffs_snapshot_listener;
 124
 125 /* how many times ffs_init() was called */
 126 int ffs_initcount = 0;
 127
 128 #ifdef DEBUG_FFS_MOUNT
 129 #define DPRINTF(_fmt, args...)  printf("%s: " _fmt "\n", __func__, ##args)
 130 #else
 131 #define DPRINTF(_fmt, args...)  do {} while (/*CONSTCOND*/0)
 132 #endif
 133
 134 extern const struct vnodeopv_desc ffs_vnodeop_opv_desc;
 135 extern const struct vnodeopv_desc ffs_specop_opv_desc;
 136 extern const struct vnodeopv_desc ffs_fifoop_opv_desc;
 137
 138 const struct vnodeopv_desc * const ffs_vnodeopv_descs[] = {
 139         &ffs_vnodeop_opv_desc,
 140         &ffs_specop_opv_desc,
 141         &ffs_fifoop_opv_desc,
 142         NULL,
 143 };
 144
 145 struct vfsops ffs_vfsops = {
 146         .vfs_name = MOUNT_FFS,
 147         .vfs_min_mount_data = sizeof (struct ufs_args),
 148         .vfs_mount = ffs_mount,
 149         .vfs_start = ufs_start,
 150         .vfs_unmount = ffs_unmount,
 151         .vfs_root = ufs_root,
 152         .vfs_quotactl = ufs_quotactl,
 153         .vfs_statvfs = ffs_statvfs,
 154         .vfs_sync = ffs_sync,
 155         .vfs_vget = ufs_vget,
 156         .vfs_loadvnode = ffs_loadvnode,
 157         .vfs_newvnode = ffs_newvnode,
 158         .vfs_fhtovp = ffs_fhtovp,
 159         .vfs_vptofh = ffs_vptofh,
 160         .vfs_init = ffs_init,
 161         .vfs_reinit = ffs_reinit,
 162         .vfs_done = ffs_done,
 163         .vfs_mountroot = ffs_mountroot,
 164         .vfs_snapshot = ffs_snapshot,
 165         .vfs_extattrctl = ffs_extattrctl,
 166         .vfs_suspendctl = ffs_suspendctl,
 167         .vfs_renamelock_enter = genfs_renamelock_enter,
 168         .vfs_renamelock_exit = genfs_renamelock_exit,
 169         .vfs_fsync = ffs_vfs_fsync,
 170         .vfs_opv_descs = ffs_vnodeopv_descs
 171 };
 172
 173 static const struct genfs_ops ffs_genfsops = {
 174         .gop_size = ffs_gop_size,
 175         .gop_alloc = ufs_gop_alloc,
 176         .gop_write = genfs_gop_write,
 177         .gop_markupdate = ufs_gop_markupdate,
 178 };
 179
 180 static const struct ufs_ops ffs_ufsops = {
 181         .uo_itimes = ffs_itimes,
 182         .uo_update = ffs_update,
 183         .uo_truncate = ffs_truncate,
 184         .uo_balloc = ffs_balloc,
 185         .uo_snapgone = ffs_snapgone,
 186         .uo_bufrd = ffs_bufrd,
 187         .uo_bufwr = ffs_bufwr,
 188 };
 189
 190 static int
 191 ffs_snapshot_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
 192     void *arg0, void *arg1, void *arg2, void *arg3)
 193 {
 194         vnode_t *vp = arg2;
 195         int result = KAUTH_RESULT_DEFER;
 196
 197         if (action != KAUTH_SYSTEM_FS_SNAPSHOT)
 198                 return result;
 199
 200         if (VTOI(vp)->i_uid == kauth_cred_geteuid(cred))
 201                 result = KAUTH_RESULT_ALLOW;
 202
 203         return result;
 204 }
 205
 206 static int
 207 ffs_modcmd(modcmd_t cmd, void *arg)
 208 {
 209         int error;
 210
 211 #if 0
 212         extern int doasyncfree;
 213 #endif
 214 #ifdef UFS_EXTATTR
 215         extern int ufs_extattr_autocreate;
 216 #endif
 217         extern int ffs_log_changeopt;
 218
 219         switch (cmd) {
 220         case MODULE_CMD_INIT:
 221                 error = vfs_attach(&ffs_vfsops);
 222                 if (error != 0)
 223                         break;
 224
 225                 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
 226                                CTLFLAG_PERMANENT,
 227                                CTLTYPE_NODE, "ffs",
 228                                SYSCTL_DESCR("Berkeley Fast File System"),
 229                                NULL, 0, NULL, 0,
 230                                CTL_VFS, 1, CTL_EOL);
 231                 /*
 232                  * @@@ should we even bother with these first three?
 233                  */
 234                 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
 235                                CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 236                                CTLTYPE_INT, "doclusterread", NULL,
 237                                sysctl_notavail, 0, NULL, 0,
 238                                CTL_VFS, 1, FFS_CLUSTERREAD, CTL_EOL);
 239                 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
 240                                CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 241                                CTLTYPE_INT, "doclusterwrite", NULL,
 242                                sysctl_notavail, 0, NULL, 0,
 243                                CTL_VFS, 1, FFS_CLUSTERWRITE, CTL_EOL);
 244                 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
 245                                CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 246                                CTLTYPE_INT, "doreallocblks", NULL,
 247                                sysctl_notavail, 0, NULL, 0,
 248                                CTL_VFS, 1, FFS_REALLOCBLKS, CTL_EOL);
 249 #if 0
 250                 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
 251                                CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 252                                CTLTYPE_INT, "doasyncfree",
 253                                SYSCTL_DESCR("Release dirty blocks asynchronously"),
 254                                NULL, 0, &doasyncfree, 0,
 255                                CTL_VFS, 1, FFS_ASYNCFREE, CTL_EOL);
 256 #endif
 257                 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
 258                                CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 259                                CTLTYPE_INT, "log_changeopt",
 260                                SYSCTL_DESCR("Log changes in optimization strategy"),
 261                                NULL, 0, &ffs_log_changeopt, 0,
 262                                CTL_VFS, 1, FFS_LOG_CHANGEOPT, CTL_EOL);
 263 #ifdef UFS_EXTATTR
 264                 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
 265                                CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 266                                CTLTYPE_INT, "extattr_autocreate",
 267                                SYSCTL_DESCR("Size of attribute for "
 268                                             "backing file autocreation"),
 269                                NULL, 0, &ufs_extattr_autocreate, 0,
 270                                CTL_VFS, 1, FFS_EXTATTR_AUTOCREATE, CTL_EOL);
 271
 272 #endif /* UFS_EXTATTR */
 273
 274                 ffs_snapshot_listener = kauth_listen_scope(KAUTH_SCOPE_SYSTEM,
 275                     ffs_snapshot_cb, NULL);
 276                 if (ffs_snapshot_listener == NULL)
 277                         printf("ffs_modcmd: can't listen on system scope.\n");
 278
 279                 break;
 280         case MODULE_CMD_FINI:
 281                 error = vfs_detach(&ffs_vfsops);
 282                 if (error != 0)
 283                         break;
 284                 sysctl_teardown(&ffs_sysctl_log);
 285                 if (ffs_snapshot_listener != NULL)
 286                         kauth_unlisten_scope(ffs_snapshot_listener);
 287                 break;
 288         default:
 289                 error = ENOTTY;
 290                 break;
 291         }
 292
 293         return (error);
 294 }
 295
 296 pool_cache_t ffs_inode_cache;
 297 pool_cache_t ffs_dinode1_cache;
 298 pool_cache_t ffs_dinode2_cache;
 299
 300 static void ffs_oldfscompat_read(struct fs *, struct ufsmount *, daddr_t);
 301 static void ffs_oldfscompat_write(struct fs *, struct ufsmount *);
 302
 303 /*
 304  * Called by main() when ffs is going to be mounted as root.
 305  */
 306
 307 int
 308 ffs_mountroot(void)
 309 {
 310         struct fs *fs;
 311         struct mount *mp;
 312         struct lwp *l = curlwp;                 /* XXX */
 313         struct ufsmount *ump;
 314         int error;
 315
 316         if (device_class(root_device) != DV_DISK)
 317                 return (ENODEV);
 318
 319         if ((error = vfs_rootmountalloc(MOUNT_FFS, "root_device", &mp))) {
 320                 vrele(rootvp);
 321                 return (error);
 322         }
 323
 324         /*
 325          * We always need to be able to mount the root file system.
 326          */
 327         mp->mnt_flag |= MNT_FORCE;
 328         if ((error = ffs_mountfs(rootvp, mp, l)) != 0) {
 329                 vfs_unbusy(mp, false, NULL);
 330                 vfs_destroy(mp);
 331                 return (error);
 332         }
 333         mp->mnt_flag &= ~MNT_FORCE;
 334         mountlist_append(mp);
 335         ump = VFSTOUFS(mp);
 336         fs = ump->um_fs;
 337         memset(fs->fs_fsmnt, 0, sizeof(fs->fs_fsmnt));
 338         (void)copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0);
 339         (void)ffs_statvfs(mp, &mp->mnt_stat);
 340         vfs_unbusy(mp, false, NULL);
 341         setrootfstime((time_t)fs->fs_time);
 342         return (0);
 343 }
 344
 345 /*
 346  * VFS Operations.
 347  *
 348  * mount system call
 349  */
 350 int
 351 ffs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
 352 {
 353         struct lwp *l = curlwp;
 354         struct vnode *devvp = NULL;
 355         struct ufs_args *args = data;
 356         struct ufsmount *ump = NULL;
 357         struct fs *fs;
 358         int error = 0, flags, update;
 359         mode_t accessmode;
 360
 361         if (args == NULL) {
 362                 DPRINTF("NULL args");
 363                 return EINVAL;
 364         }
 365         if (*data_len < sizeof(*args)) {
 366                 DPRINTF("bad size args %zu != %zu", *data_len, sizeof(*args));
 367                 return EINVAL;
 368         }
 369
 370         if (mp->mnt_flag & MNT_GETARGS) {
 371                 ump = VFSTOUFS(mp);
 372                 if (ump == NULL) {
 373                         DPRINTF("no ump");
 374                         return EIO;
 375                 }
 376                 args->fspec = NULL;
 377                 *data_len = sizeof *args;
 378                 return 0;
 379         }
 380
 381         update = mp->mnt_flag & MNT_UPDATE;
 382
 383         /* Check arguments */
 384         if (args->fspec != NULL) {
 385                 /*
 386                  * Look up the name and verify that it's sane.
 387                  */
 388                 error = namei_simple_user(args->fspec,
 389                     NSM_FOLLOW_NOEMULROOT, &devvp);
 390                 if (error != 0) {
 391                         DPRINTF("namei_simple_user returned %d", error);
 392                         return error;
 393                 }
 394
 395                 if (!update) {
 396                         /*
 397                          * Be sure this is a valid block device
 398                          */
 399                         if (devvp->v_type != VBLK) {
 400                                 DPRINTF("non block device %d", devvp->v_type);
 401                                 error = ENOTBLK;
 402                         } else if (bdevsw_lookup(devvp->v_rdev) == NULL) {
 403                                 DPRINTF("can't find block device 0x%jx",
 404                                     devvp->v_rdev);
 405                                 error = ENXIO;
 406                         }
 407                 } else {
 408                         /*
 409                          * Be sure we're still naming the same device
 410                          * used for our initial mount
 411                          */
 412                         ump = VFSTOUFS(mp);
 413                         if (devvp != ump->um_devvp) {
 414                                 if (devvp->v_rdev != ump->um_devvp->v_rdev) {
 415                                         DPRINTF("wrong device 0x%jx != 0x%jx",
 416                                             (uintmax_t)devvp->v_rdev,
 417                                             (uintmax_t)ump->um_devvp->v_rdev);
 418                                         error = EINVAL;
 419                                 } else {
 420                                         vrele(devvp);
 421                                         devvp = ump->um_devvp;
 422                                         vref(devvp);
 423                                 }
 424                         }
 425                 }
 426         } else {
 427                 if (!update) {
 428                         /* New mounts must have a filename for the device */
 429                         DPRINTF("no filename for mount");
 430                         return EINVAL;
 431                 } else {
 432                         /* Use the extant mount */
 433                         ump = VFSTOUFS(mp);
 434                         devvp = ump->um_devvp;
 435                         vref(devvp);
 436                 }
 437         }
 438
 439         /*
 440          * If mount by non-root, then verify that user has necessary
 441          * permissions on the device.
 442          *
 443          * Permission to update a mount is checked higher, so here we presume
 444          * updating the mount is okay (for example, as far as securelevel goes)
 445          * which leaves us with the normal check.
 446          */
 447         if (error == 0) {
 448                 accessmode = VREAD;
 449                 if (update ?
 450                     (mp->mnt_iflag & IMNT_WANTRDWR) != 0 :
 451                     (mp->mnt_flag & MNT_RDONLY) == 0)
 452                         accessmode |= VWRITE;
 453                 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 454                 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
 455                     KAUTH_REQ_SYSTEM_MOUNT_DEVICE, mp, devvp,
 456                     KAUTH_ARG(accessmode));
 457                 if (error) {
 458                         DPRINTF("kauth returned %d", error);
 459                 }
 460                 VOP_UNLOCK(devvp);
 461         }
 462
 463         if (error) {
 464                 vrele(devvp);
 465                 return (error);
 466         }
 467
 468 #ifdef WAPBL
 469         /* WAPBL can only be enabled on a r/w mount. */
 470         if ((mp->mnt_flag & MNT_RDONLY) && !(mp->mnt_iflag & IMNT_WANTRDWR)) {
 471                 mp->mnt_flag &= ~MNT_LOG;
 472         }
 473 #else /* !WAPBL */
 474         mp->mnt_flag &= ~MNT_LOG;
 475 #endif /* !WAPBL */
 476
 477         if (!update) {
 478                 int xflags;
 479
 480                 if (mp->mnt_flag & MNT_RDONLY)
 481                         xflags = FREAD;
 482                 else
 483                         xflags = FREAD | FWRITE;
 484                 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 485                 error = VOP_OPEN(devvp, xflags, FSCRED);
 486                 VOP_UNLOCK(devvp);
 487                 if (error) {
 488                         DPRINTF("VOP_OPEN returned %d", error);
 489                         goto fail;
 490                 }
 491                 error = ffs_mountfs(devvp, mp, l);
 492                 if (error) {
 493                         DPRINTF("ffs_mountfs returned %d", error);
 494                         vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 495                         (void)VOP_CLOSE(devvp, xflags, NOCRED);
 496                         VOP_UNLOCK(devvp);
 497                         goto fail;
 498                 }
 499
 500                 ump = VFSTOUFS(mp);
 501                 fs = ump->um_fs;
 502         } else {
 503                 /*
 504                  * Update the mount.
 505                  */
 506
 507                 /*
 508                  * The initial mount got a reference on this
 509                  * device, so drop the one obtained via
 510                  * namei(), above.
 511                  */
 512                 vrele(devvp);
 513
 514                 ump = VFSTOUFS(mp);
 515                 fs = ump->um_fs;
 516                 if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
 517                         /*
 518                          * Changing from r/w to r/o
 519                          */
 520                         flags = WRITECLOSE;
 521                         if (mp->mnt_flag & MNT_FORCE)
 522                                 flags |= FORCECLOSE;
 523                         error = ffs_flushfiles(mp, flags, l);
 524                         if (error == 0)
 525                                 error = UFS_WAPBL_BEGIN(mp);
 526                         if (error == 0 &&
 527                             ffs_cgupdate(ump, MNT_WAIT) == 0 &&
 528                             fs->fs_clean & FS_WASCLEAN) {
 529                                 if (mp->mnt_flag & MNT_SOFTDEP)
 530                                         fs->fs_flags &= ~FS_DOSOFTDEP;
 531                                 fs->fs_clean = FS_ISCLEAN;
 532                                 (void) ffs_sbupdate(ump, MNT_WAIT);
 533                         }
 534                         if (error) {
 535                                 DPRINTF("wapbl %d", error);
 536                                 return error;
 537                         }
 538                         UFS_WAPBL_END(mp);
 539                 }
 540
 541 #ifdef WAPBL
 542                 if ((mp->mnt_flag & MNT_LOG) == 0) {
 543                         error = ffs_wapbl_stop(mp, mp->mnt_flag & MNT_FORCE);
 544                         if (error) {
 545                                 DPRINTF("ffs_wapbl_stop returned %d", error);
 546                                 return error;
 547                         }
 548                 }
 549 #endif /* WAPBL */
 550
 551                 if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
 552                         /*
 553                          * Finish change from r/w to r/o
 554                          */
 555                         fs->fs_ronly = 1;
 556                         fs->fs_fmod = 0;
 557                 }
 558
 559                 if (mp->mnt_flag & MNT_RELOAD) {
 560                         error = ffs_reload(mp, l->l_cred, l);
 561                         if (error) {
 562                                 DPRINTF("ffs_reload returned %d", error);
 563                                 return error;
 564                         }
 565                 }
 566
 567                 if (fs->fs_ronly && (mp->mnt_iflag & IMNT_WANTRDWR)) {
 568                         /*
 569                          * Changing from read-only to read/write
 570                          */
 571 #ifndef QUOTA2
 572                         if (fs->fs_flags & FS_DOQUOTA2) {
 573                                 ump->um_flags |= UFS_QUOTA2;
 574                                 uprintf("%s: options QUOTA2 not enabled%s\n",
 575                                     mp->mnt_stat.f_mntonname,
 576                                     (mp->mnt_flag & MNT_FORCE) ? "" :
 577                                     ", not mounting");
 578                                 DPRINTF("ffs_quota2 %d", EINVAL);
 579                                 return EINVAL;
 580                         }
 581 #endif
 582                         fs->fs_ronly = 0;
 583                         fs->fs_clean <<= 1;
 584                         fs->fs_fmod = 1;
 585 #ifdef WAPBL
 586                         if (fs->fs_flags & FS_DOWAPBL) {
 587                                 const char *nm = mp->mnt_stat.f_mntonname;
 588                                 if (!mp->mnt_wapbl_replay) {
 589                                         printf("%s: log corrupted;"
 590                                             " replay cancelled\n", nm);
 591                                         return EFTYPE;
 592                                 }
 593                                 printf("%s: replaying log to disk\n", nm);
 594                                 error = wapbl_replay_write(mp->mnt_wapbl_replay,
 595                                     devvp);
 596                                 if (error) {
 597                                         DPRINTF("%s: wapbl_replay_write %d",
 598                                             nm, error);
 599                                         return error;
 600                                 }
 601                                 wapbl_replay_stop(mp->mnt_wapbl_replay);
 602                                 fs->fs_clean = FS_WASCLEAN;
 603                         }
 604 #endif /* WAPBL */
 605                         if (fs->fs_snapinum[0] != 0)
 606                                 ffs_snapshot_mount(mp);
 607                 }
 608
 609 #ifdef WAPBL
 610                 error = ffs_wapbl_start(mp);
 611                 if (error) {
 612                         DPRINTF("ffs_wapbl_start returned %d", error);
 613                         return error;
 614                 }
 615 #endif /* WAPBL */
 616
 617 #ifdef QUOTA2
 618                 if (!fs->fs_ronly) {
 619                         error = ffs_quota2_mount(mp);
 620                         if (error) {
 621                                 DPRINTF("ffs_quota2_mount returned %d", error);
 622                                 return error;
 623                         }
 624                 }
 625 #endif
 626
 627                 if ((mp->mnt_flag & MNT_DISCARD) && !(ump->um_discarddata))
 628                         ump->um_discarddata = ffs_discard_init(devvp, fs);
 629
 630                 if (args->fspec == NULL)
 631                         return 0;
 632         }
 633
 634         error = set_statvfs_info(path, UIO_USERSPACE, args->fspec,
 635             UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l);
 636         if (error == 0)
 637                 (void)strncpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname,
 638                     sizeof(fs->fs_fsmnt));
 639         else {
 640             DPRINTF("set_statvfs_info returned %d", error);
 641         }
 642         fs->fs_flags &= ~FS_DOSOFTDEP;
 643         if (fs->fs_fmod != 0) { /* XXX */
 644                 int err;
 645
 646                 fs->fs_fmod = 0;
 647                 if (fs->fs_clean & FS_WASCLEAN)
 648                         fs->fs_time = time_second;
 649                 else {
 650                         printf("%s: file system not clean (fs_clean=%#x); "
 651                             "please fsck(8)\n", mp->mnt_stat.f_mntfromname,
 652                             fs->fs_clean);
 653                         printf("%s: lost blocks %" PRId64 " files %d\n",
 654                             mp->mnt_stat.f_mntfromname, fs->fs_pendingblocks,
 655                             fs->fs_pendinginodes);
 656                 }
 657                 err = UFS_WAPBL_BEGIN(mp);
 658                 if (err == 0) {
 659                         (void) ffs_cgupdate(ump, MNT_WAIT);
 660                         UFS_WAPBL_END(mp);
 661                 }
 662         }
 663         if ((mp->mnt_flag & MNT_SOFTDEP) != 0) {
 664                 printf("%s: `-o softdep' is no longer supported, "
 665                     "consider `-o log'\n", mp->mnt_stat.f_mntfromname);
 666                 mp->mnt_flag &= ~MNT_SOFTDEP;
 667         }
 668
 669         return (error);
 670
 671 fail:
 672         vrele(devvp);
 673         return (error);
 674 }
 675
 676 /*
 677  * Reload all incore data for a filesystem (used after running fsck on
 678  * the root filesystem and finding things to fix). The filesystem must
 679  * be mounted read-only.
 680  *
 681  * Things to do to update the mount:
 682  *      1) invalidate all cached meta-data.
 683  *      2) re-read superblock from disk.
 684  *      3) re-read summary information from disk.
 685  *      4) invalidate all inactive vnodes.
 686  *      5) invalidate all cached file data.
 687  *      6) re-read inode data for all active vnodes.
 688  */
 689 int
 690 ffs_reload(struct mount *mp, kauth_cred_t cred, struct lwp *l)
 691 {
 692         struct vnode *vp, *devvp;
 693         struct inode *ip;
 694         void *space;
 695         struct buf *bp;
 696         struct fs *fs, *newfs;
 697         int i, bsize, blks, error;
 698         int32_t *lp, fs_sbsize;
 699         struct ufsmount *ump;
 700         daddr_t sblockloc;
 701         struct vnode_iterator *marker;
 702
 703         if ((mp->mnt_flag & MNT_RDONLY) == 0)
 704                 return (EINVAL);
 705
 706         ump = VFSTOUFS(mp);
 707
 708         /*
 709          * Step 1: invalidate all cached meta-data.
 710          */
 711         devvp = ump->um_devvp;
 712         vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 713         error = vinvalbuf(devvp, 0, cred, l, 0, 0);
 714         VOP_UNLOCK(devvp);
 715         if (error)
 716                 panic("ffs_reload: dirty1");
 717
 718         /*
 719          * Step 2: re-read superblock from disk. XXX: We don't handle
 720          * possibility that superblock moved. Which implies that we don't
 721          * want its size to change either.
 722          */
 723         fs = ump->um_fs;
 724         fs_sbsize = fs->fs_sbsize;
 725         error = bread(devvp, fs->fs_sblockloc / DEV_BSIZE, fs_sbsize,
 726                       0, &bp);
 727         if (error)
 728                 return (error);
 729         newfs = kmem_alloc(fs_sbsize, KM_SLEEP);
 730         memcpy(newfs, bp->b_data, fs_sbsize);
 731
 732 #ifdef FFS_EI
 733         if (ump->um_flags & UFS_NEEDSWAP) {
 734                 ffs_sb_swap((struct fs *)bp->b_data, newfs);
 735                 newfs->fs_flags |= FS_SWAPPED;
 736         } else
 737 #endif
 738                 newfs->fs_flags &= ~FS_SWAPPED;
 739
 740         brelse(bp, 0);
 741
 742         if ((newfs->fs_magic != FS_UFS1_MAGIC) &&
 743             (newfs->fs_magic != FS_UFS2_MAGIC)) {
 744                 kmem_free(newfs, fs_sbsize);
 745                 return (EIO);           /* XXX needs translation */
 746         }
 747         if (!ffs_superblock_validate(newfs)) {
 748                 kmem_free(newfs, fs_sbsize);
 749                 return (EINVAL);
 750         }
 751
 752         /*
 753          * The current implementation doesn't handle the possibility that
 754          * these values may have changed.
 755          */
 756         if ((newfs->fs_sbsize != fs_sbsize) ||
 757             (newfs->fs_cssize != fs->fs_cssize) ||
 758             (newfs->fs_contigsumsize != fs->fs_contigsumsize) ||
 759             (newfs->fs_ncg != fs->fs_ncg)) {
 760                 kmem_free(newfs, fs_sbsize);
 761                 return (EINVAL);
 762         }
 763
 764         /* Store off old fs_sblockloc for fs_oldfscompat_read. */
 765         sblockloc = fs->fs_sblockloc;
 766         /*
 767          * Copy pointer fields back into superblock before copying in   XXX
 768          * new superblock. These should really be in the ufsmount.      XXX
 769          * Note that important parameters (eg fs_ncg) are unchanged.
 770          */
 771         newfs->fs_csp = fs->fs_csp;
 772         newfs->fs_maxcluster = fs->fs_maxcluster;
 773         newfs->fs_contigdirs = fs->fs_contigdirs;
 774         newfs->fs_ronly = fs->fs_ronly;
 775         newfs->fs_active = fs->fs_active;
 776         memcpy(fs, newfs, (u_int)fs_sbsize);
 777         kmem_free(newfs, fs_sbsize);
 778
 779         /*
 780          * Recheck for Apple UFS filesystem.
 781          */
 782         ump->um_flags &= ~UFS_ISAPPLEUFS;
 783         if (ffs_is_appleufs(devvp, fs)) {
 784 #ifdef APPLE_UFS
 785                 ump->um_flags |= UFS_ISAPPLEUFS;
 786 #else
 787                 DPRINTF("AppleUFS not supported");
 788                 return (EIO); /* XXX: really? */
 789 #endif
 790         }
 791
 792         if (UFS_MPISAPPLEUFS(ump)) {
 793                 /* see comment about NeXT below */
 794                 ump->um_maxsymlinklen = APPLEUFS_MAXSYMLINKLEN;
 795                 ump->um_dirblksiz = APPLEUFS_DIRBLKSIZ;
 796                 mp->mnt_iflag |= IMNT_DTYPE;
 797         } else {
 798                 ump->um_maxsymlinklen = fs->fs_maxsymlinklen;
 799                 ump->um_dirblksiz = UFS_DIRBLKSIZ;
 800                 if (ump->um_maxsymlinklen > 0)
 801                         mp->mnt_iflag |= IMNT_DTYPE;
 802                 else
 803                         mp->mnt_iflag &= ~IMNT_DTYPE;
 804         }
 805         ffs_oldfscompat_read(fs, ump, sblockloc);
 806
 807         mutex_enter(&ump->um_lock);
 808         ump->um_maxfilesize = fs->fs_maxfilesize;
 809         if (fs->fs_flags & ~(FS_KNOWN_FLAGS | FS_INTERNAL)) {
 810                 uprintf("%s: unknown ufs flags: 0x%08"PRIx32"%s\n",
 811                     mp->mnt_stat.f_mntonname, fs->fs_flags,
 812                     (mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting");
 813                 if ((mp->mnt_flag & MNT_FORCE) == 0) {
 814                         mutex_exit(&ump->um_lock);
 815                         return (EINVAL);
 816                 }
 817         }
 818         if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
 819                 fs->fs_pendingblocks = 0;
 820                 fs->fs_pendinginodes = 0;
 821         }
 822         mutex_exit(&ump->um_lock);
 823
 824         ffs_statvfs(mp, &mp->mnt_stat);
 825         /*
 826          * Step 3: re-read summary information from disk.
 827          */
 828         blks = howmany(fs->fs_cssize, fs->fs_fsize);
 829         space = fs->fs_csp;
 830         for (i = 0; i < blks; i += fs->fs_frag) {
 831                 bsize = fs->fs_bsize;
 832                 if (i + fs->fs_frag > blks)
 833                         bsize = (blks - i) * fs->fs_fsize;
 834                 error = bread(devvp, FFS_FSBTODB(fs, fs->fs_csaddr + i), bsize,
 835                               0, &bp);
 836                 if (error) {
 837                         return (error);
 838                 }
 839 #ifdef FFS_EI
 840                 if (UFS_FSNEEDSWAP(fs))
 841                         ffs_csum_swap((struct csum *)bp->b_data,
 842                             (struct csum *)space, bsize);
 843                 else
 844 #endif
 845                         memcpy(space, bp->b_data, (size_t)bsize);
 846                 space = (char *)space + bsize;
 847                 brelse(bp, 0);
 848         }
 849         /*
 850          * We no longer know anything about clusters per cylinder group.
 851          */
 852         if (fs->fs_contigsumsize > 0) {
 853                 lp = fs->fs_maxcluster;
 854                 for (i = 0; i < fs->fs_ncg; i++)
 855                         *lp++ = fs->fs_contigsumsize;
 856         }
 857
 858         vfs_vnode_iterator_init(mp, &marker);
 859         while ((vp = vfs_vnode_iterator_next(marker, NULL, NULL))) {
 860                 /*
 861                  * Step 4: invalidate all inactive vnodes.
 862                  */
 863                 if (vrecycle(vp))
 864                         continue;
 865                 /*
 866                  * Step 5: invalidate all cached file data.
 867                  */
 868                 if (vn_lock(vp, LK_EXCLUSIVE)) {
 869                         vrele(vp);
 870                         continue;
 871                 }
 872                 if (vinvalbuf(vp, 0, cred, l, 0, 0))
 873                         panic("ffs_reload: dirty2");
 874                 /*
 875                  * Step 6: re-read inode data for all active vnodes.
 876                  */
 877                 ip = VTOI(vp);
 878                 error = bread(devvp, FFS_FSBTODB(fs, ino_to_fsba(fs, ip->i_number)),
 879                               (int)fs->fs_bsize, 0, &bp);
 880                 if (error) {
 881                         vput(vp);
 882                         break;
 883                 }
 884                 ffs_load_inode(bp, ip, fs, ip->i_number);
 885                 brelse(bp, 0);
 886                 vput(vp);
 887         }
 888         vfs_vnode_iterator_destroy(marker);
 889         return (error);
 890 }
 891
 892 /*
 893  * Possible superblock locations ordered from most to least likely.
 894  */
 895 static const int sblock_try[] = SBLOCKSEARCH;
 896
 897
 898 static int
 899 ffs_superblock_validate(struct fs *fs)
 900 {
 901         int32_t i, fs_bshift = 0, fs_fshift = 0, fs_fragshift = 0, fs_frag;
 902         int32_t fs_inopb, fs_cgsize;
 903
 904         /* Check the superblock size */
 905         if (fs->fs_sbsize > SBLOCKSIZE || fs->fs_sbsize < sizeof(struct fs))
 906                 return 0;
 907
 908         /* Check the file system blocksize */
 909         if (fs->fs_bsize > MAXBSIZE || fs->fs_bsize < MINBSIZE)
 910                 return 0;
 911         if (!powerof2(fs->fs_bsize))
 912                 return 0;
 913
 914         /* Check the size of frag blocks */
 915         if (!powerof2(fs->fs_fsize))
 916                 return 0;
 917         if (fs->fs_fsize == 0)
 918                 return 0;
 919
 920         /*
 921          * XXX: these values are just zero-checked to prevent obvious
 922          * bugs. We need more strict checks.
 923          */
 924         if (fs->fs_size == 0)
 925                 return 0;
 926         if (fs->fs_cssize == 0)
 927                 return 0;
 928         if (fs->fs_ipg == 0)
 929                 return 0;
 930         if (fs->fs_fpg == 0)
 931                 return 0;
 932         if (fs->fs_ncg == 0)
 933                 return 0;
 934         if (fs->fs_maxbpg == 0)
 935                 return 0;
 936
 937         /* Check the number of inodes per block */
 938         if (fs->fs_magic == FS_UFS1_MAGIC)
 939                 fs_inopb = fs->fs_bsize / sizeof(struct ufs1_dinode);
 940         else /* fs->fs_magic == FS_UFS2_MAGIC */
 941                 fs_inopb = fs->fs_bsize / sizeof(struct ufs2_dinode);
 942         if (fs->fs_inopb != fs_inopb)
 943                 return 0;
 944
 945         /* Block size cannot be smaller than fragment size */
 946         if (fs->fs_bsize < fs->fs_fsize)
 947                 return 0;
 948
 949         /* Compute fs_bshift and ensure it is consistent */
 950         for (i = fs->fs_bsize; i > 1; i >>= 1)
 951                 fs_bshift++;
 952         if (fs->fs_bshift != fs_bshift)
 953                 return 0;
 954
 955         /* Compute fs_fshift and ensure it is consistent */
 956         for (i = fs->fs_fsize; i > 1; i >>= 1)
 957                 fs_fshift++;
 958         if (fs->fs_fshift != fs_fshift)
 959                 return 0;
 960
 961         /* Compute fs_fragshift and ensure it is consistent */
 962         for (i = fs->fs_frag; i > 1; i >>= 1)
 963                 fs_fragshift++;
 964         if (fs->fs_fragshift != fs_fragshift)
 965                 return 0;
 966
 967         /* Check the masks */
 968         if (fs->fs_bmask != ~(fs->fs_bsize - 1))
 969                 return 0;
 970         if (fs->fs_fmask != ~(fs->fs_fsize - 1))
 971                 return 0;
 972
 973         /*
 974          * Now that the shifts and masks are sanitized, we can use the ffs_ API.
 975          */
 976
 977         /* Check the number of frag blocks */
 978         if ((fs_frag = ffs_numfrags(fs, fs->fs_bsize)) > MAXFRAG)
 979                 return 0;
 980         if (fs->fs_frag != fs_frag)
 981                 return 0;
 982
 983         /* Check the size of cylinder groups */
 984         fs_cgsize = ffs_fragroundup(fs, CGSIZE(fs));
 985         if (fs->fs_cgsize != fs_cgsize) {
 986                 if (fs->fs_cgsize+1 == CGSIZE(fs)) {
 987                         printf("CGSIZE(fs) miscalculated by one - this file "
 988                             "system may have been created by\n"
 989                             "  an old (buggy) userland, see\n"
 990                             "  http://www.NetBSD.org/"
 991                             "docs/ffsv1badsuperblock.html\n");
 992                 } else {
 993                         printf("ERROR: cylinder group size mismatch: "
 994                             "fs_cgsize = 0x%zx, "
 995                             "fs->fs_cgsize = 0x%zx, CGSIZE(fs) = 0x%zx\n",
 996                             (size_t)fs_cgsize, (size_t)fs->fs_cgsize,
 997                             (size_t)CGSIZE(fs));
 998                         return 0;
 999                 }
1000         }
1001
1002         return 1;
1003 }
1004
1005 static int
1006 ffs_is_appleufs(struct vnode *devvp, struct fs *fs)
1007 {
1008         struct dkwedge_info dkw;
1009         int ret = 0;
1010
1011         /*
1012          * First check to see if this is tagged as an Apple UFS filesystem
1013          * in the disklabel.
1014          */
1015         if (getdiskinfo(devvp, &dkw) == 0 &&
1016             strcmp(dkw.dkw_ptype, DKW_PTYPE_APPLEUFS) == 0)
1017                 ret = 1;
1018 #ifdef APPLE_UFS
1019         else {
1020                 struct appleufslabel *applefs;
1021                 struct buf *bp;
1022                 daddr_t blkno = APPLEUFS_LABEL_OFFSET / DEV_BSIZE;
1023                 int error;
1024
1025                 /*
1026                  * Manually look for an Apple UFS label, and if a valid one
1027                  * is found, then treat it like an Apple UFS filesystem anyway.
1028                  */
1029                 error = bread(devvp, blkno, APPLEUFS_LABEL_SIZE, 0, &bp);
1030                 if (error) {
1031                         DPRINTF("bread@0x%jx returned %d", (intmax_t)blkno, error);
1032                         return 0;
1033                 }
1034                 applefs = (struct appleufslabel *)bp->b_data;
1035                 error = ffs_appleufs_validate(fs->fs_fsmnt, applefs, NULL);
1036                 if (error == 0)
1037                         ret = 1;
1038                 brelse(bp, 0);
1039         }
1040 #endif
1041
1042         return ret;
1043 }
1044
1045 /*
1046  * Common code for mount and mountroot
1047  */
1048 int
1049 ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l)
1050 {
1051         struct ufsmount *ump = NULL;
1052         struct buf *bp = NULL;
1053         struct fs *fs = NULL;
1054         dev_t dev;
1055         void *space;
1056         daddr_t sblockloc = 0;
1057         int blks, fstype = 0;
1058         int error, i, bsize, ronly, bset = 0;
1059 #ifdef FFS_EI
1060         int needswap = 0;               /* keep gcc happy */
1061 #endif
1062         int32_t *lp;
1063         kauth_cred_t cred;
1064         u_int32_t allocsbsize, fs_sbsize = 0;
1065
1066         dev = devvp->v_rdev;
1067         cred = l ? l->l_cred : NOCRED;
1068
1069         /* Flush out any old buffers remaining from a previous use. */
1070         vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
1071         error = vinvalbuf(devvp, V_SAVE, cred, l, 0, 0);
1072         VOP_UNLOCK(devvp);
1073         if (error) {
1074                 DPRINTF("vinvalbuf returned %d", error);
1075                 return error;
1076         }
1077
1078         ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
1079
1080         error = fstrans_mount(mp);
1081         if (error) {
1082                 DPRINTF("fstrans_mount returned %d", error);
1083                 return error;
1084         }
1085
1086         ump = kmem_zalloc(sizeof(*ump), KM_SLEEP);
1087         mutex_init(&ump->um_lock, MUTEX_DEFAULT, IPL_NONE);
1088         error = ffs_snapshot_init(ump);
1089         if (error) {
1090                 DPRINTF("ffs_snapshot_init returned %d", error);
1091                 goto out;
1092         }
1093         ump->um_ops = &ffs_ufsops;
1094
1095 #ifdef WAPBL
1096  sbagain:
1097 #endif
1098         /*
1099          * Try reading the superblock in each of its possible locations.
1100          */
1101         for (i = 0; ; i++) {
1102                 daddr_t fs_sblockloc;
1103
1104                 if (bp != NULL) {
1105                         brelse(bp, BC_NOCACHE);
1106                         bp = NULL;
1107                 }
1108                 if (sblock_try[i] == -1) {
1109                         DPRINTF("no superblock found");
1110                         error = EINVAL;
1111                         fs = NULL;
1112                         goto out;
1113                 }
1114
1115                 error = bread(devvp, sblock_try[i] / DEV_BSIZE, SBLOCKSIZE,
1116                     0, &bp);
1117                 if (error) {
1118                         DPRINTF("bread@0x%x returned %d",
1119                             sblock_try[i] / DEV_BSIZE, error);
1120                         fs = NULL;
1121                         goto out;
1122                 }
1123                 fs = (struct fs *)bp->b_data;
1124
1125                 sblockloc = sblock_try[i];
1126                 DPRINTF("fs_magic 0x%x", fs->fs_magic);
1127
1128                 /*
1129                  * Swap: here, we swap fs->fs_sbsize in order to get the correct
1130                  * size to read the superblock. Once read, we swap the whole
1131                  * superblock structure.
1132                  */
1133                 if (fs->fs_magic == FS_UFS1_MAGIC) {
1134                         fs_sbsize = fs->fs_sbsize;
1135                         fstype = UFS1;
1136 #ifdef FFS_EI
1137                         needswap = 0;
1138                 } else if (fs->fs_magic == FS_UFS1_MAGIC_SWAPPED) {
1139                         fs_sbsize = bswap32(fs->fs_sbsize);
1140                         fstype = UFS1;
1141                         needswap = 1;
1142 #endif
1143                 } else if (fs->fs_magic == FS_UFS2_MAGIC) {
1144                         fs_sbsize = fs->fs_sbsize;
1145                         fstype = UFS2;
1146 #ifdef FFS_EI
1147                         needswap = 0;
1148                 } else if (fs->fs_magic == FS_UFS2_MAGIC_SWAPPED) {
1149                         fs_sbsize = bswap32(fs->fs_sbsize);
1150                         fstype = UFS2;
1151                         needswap = 1;
1152 #endif
1153                 } else
1154                         continue;
1155
1156                 /* fs->fs_sblockloc isn't defined for old filesystems */
1157                 if (fstype == UFS1 && !(fs->fs_old_flags & FS_FLAGS_UPDATED)) {
1158                         if (sblockloc == SBLOCK_UFS2)
1159                                 /*
1160                                  * This is likely to be the first alternate
1161                                  * in a filesystem with 64k blocks.
1162                                  * Don't use it.
1163                                  */
1164                                 continue;
1165                         fs_sblockloc = sblockloc;
1166                 } else {
1167                         fs_sblockloc = fs->fs_sblockloc;
1168 #ifdef FFS_EI
1169                         if (needswap)
1170                                 fs_sblockloc = bswap64(fs_sblockloc);
1171 #endif
1172                 }
1173
1174                 /* Check we haven't found an alternate superblock */
1175                 if (fs_sblockloc != sblockloc)
1176                         continue;
1177
1178                 /* Check the superblock size */
1179                 if (fs_sbsize > SBLOCKSIZE || fs_sbsize < sizeof(struct fs))
1180                         continue;
1181                 fs = kmem_alloc((u_long)fs_sbsize, KM_SLEEP);
1182                 memcpy(fs, bp->b_data, fs_sbsize);
1183
1184                 /* Swap the whole superblock structure, if necessary. */
1185 #ifdef FFS_EI
1186                 if (needswap) {
1187                         ffs_sb_swap((struct fs*)bp->b_data, fs);
1188                         fs->fs_flags |= FS_SWAPPED;
1189                 } else
1190 #endif
1191                         fs->fs_flags &= ~FS_SWAPPED;
1192
1193                 /*
1194                  * Now that everything is swapped, the superblock is ready to
1195                  * be sanitized.
1196                  */
1197                 if (!ffs_superblock_validate(fs)) {
1198                         kmem_free(fs, fs_sbsize);
1199                         continue;
1200                 }
1201
1202                 /* Ok seems to be a good superblock */
1203                 break;
1204         }
1205
1206         ump->um_fs = fs;
1207
1208 #ifdef WAPBL
1209         if ((mp->mnt_wapbl_replay == 0) && (fs->fs_flags & FS_DOWAPBL)) {
1210                 error = ffs_wapbl_replay_start(mp, fs, devvp);
1211                 if (error && (mp->mnt_flag & MNT_FORCE) == 0) {
1212                         DPRINTF("ffs_wapbl_replay_start returned %d", error);
1213                         goto out;
1214                 }
1215                 if (!error) {
1216                         if (!ronly) {
1217                                 /* XXX fsmnt may be stale. */
1218                                 printf("%s: replaying log to disk\n",
1219                                     fs->fs_fsmnt);
1220                                 error = wapbl_replay_write(mp->mnt_wapbl_replay,
1221                                     devvp);
1222                                 if (error) {
1223                                         DPRINTF("wapbl_replay_write returned %d",
1224                                             error);
1225                                         goto out;
1226                                 }
1227                                 wapbl_replay_stop(mp->mnt_wapbl_replay);
1228                                 fs->fs_clean = FS_WASCLEAN;
1229                         } else {
1230                                 /* XXX fsmnt may be stale */
1231                                 printf("%s: replaying log to memory\n",
1232                                     fs->fs_fsmnt);
1233                         }
1234
1235                         /* Force a re-read of the superblock */
1236                         brelse(bp, BC_INVAL);
1237                         bp = NULL;
1238                         kmem_free(fs, fs_sbsize);
1239                         fs = NULL;
1240                         goto sbagain;
1241                 }
1242         }
1243 #else /* !WAPBL */
1244         if ((fs->fs_flags & FS_DOWAPBL) && (mp->mnt_flag & MNT_FORCE) == 0) {
1245                 error = EPERM;
1246                 DPRINTF("no force %d", error);
1247                 goto out;
1248         }
1249 #endif /* !WAPBL */
1250
1251         ffs_oldfscompat_read(fs, ump, sblockloc);
1252         ump->um_maxfilesize = fs->fs_maxfilesize;
1253
1254         if (fs->fs_flags & ~(FS_KNOWN_FLAGS | FS_INTERNAL)) {
1255                 uprintf("%s: unknown ufs flags: 0x%08"PRIx32"%s\n",
1256                     mp->mnt_stat.f_mntonname, fs->fs_flags,
1257                     (mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting");
1258                 if ((mp->mnt_flag & MNT_FORCE) == 0) {
1259                         error = EINVAL;
1260                         DPRINTF("no force %d", error);
1261                         goto out;
1262                 }
1263         }
1264
1265         if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
1266                 fs->fs_pendingblocks = 0;
1267                 fs->fs_pendinginodes = 0;
1268         }
1269
1270         ump->um_fstype = fstype;
1271         if (fs->fs_sbsize < SBLOCKSIZE)
1272                 brelse(bp, BC_INVAL);
1273         else
1274                 brelse(bp, 0);
1275         bp = NULL;
1276
1277         if (ffs_is_appleufs(devvp, fs)) {
1278 #ifdef APPLE_UFS
1279                 ump->um_flags |= UFS_ISAPPLEUFS;
1280 #else
1281                 DPRINTF("AppleUFS not supported");
1282                 error = EINVAL;
1283                 goto out;
1284 #endif
1285         }
1286
1287 #if 0
1288 /*
1289  * XXX This code changes the behaviour of mounting dirty filesystems, to
1290  * XXX require "mount -f ..." to mount them.  This doesn't match what
1291  * XXX mount(8) describes and is disabled for now.
1292  */
1293         /*
1294          * If the file system is not clean, don't allow it to be mounted
1295          * unless MNT_FORCE is specified.  (Note: MNT_FORCE is always set
1296          * for the root file system.)
1297          */
1298         if (fs->fs_flags & FS_DOWAPBL) {
1299                 /*
1300                  * wapbl normally expects to be FS_WASCLEAN when the FS_DOWAPBL
1301                  * bit is set, although there's a window in unmount where it
1302                  * could be FS_ISCLEAN
1303                  */
1304                 if ((mp->mnt_flag & MNT_FORCE) == 0 &&
1305                     (fs->fs_clean & (FS_WASCLEAN | FS_ISCLEAN)) == 0) {
1306                         error = EPERM;
1307                         goto out;
1308                 }
1309         } else
1310                 if ((fs->fs_clean & FS_ISCLEAN) == 0 &&
1311                     (mp->mnt_flag & MNT_FORCE) == 0) {
1312                         error = EPERM;
1313                         goto out;
1314                 }
1315 #endif
1316
1317         /*
1318          * Verify that we can access the last block in the fs
1319          * if we're mounting read/write.
1320          */
1321         if (!ronly) {
1322                 error = bread(devvp, FFS_FSBTODB(fs, fs->fs_size - 1),
1323                     fs->fs_fsize, 0, &bp);
1324                 if (error) {
1325                         DPRINTF("bread@0x%jx returned %d",
1326                             (intmax_t)FFS_FSBTODB(fs, fs->fs_size - 1),
1327                             error);
1328                         bset = BC_INVAL;
1329                         goto out;
1330                 }
1331                 if (bp->b_bcount != fs->fs_fsize) {
1332                         DPRINTF("bcount %x != fsize %x", bp->b_bcount,
1333                             fs->fs_fsize);
1334                         error = EINVAL;
1335                         bset = BC_INVAL;
1336                         goto out;
1337                 }
1338                 brelse(bp, BC_INVAL);
1339                 bp = NULL;
1340         }
1341
1342         fs->fs_ronly = ronly;
1343         /* Don't bump fs_clean if we're replaying journal */
1344         if (!((fs->fs_flags & FS_DOWAPBL) && (fs->fs_clean & FS_WASCLEAN))) {
1345                 if (ronly == 0) {
1346                         fs->fs_clean <<= 1;
1347                         fs->fs_fmod = 1;
1348                 }
1349         }
1350
1351         bsize = fs->fs_cssize;
1352         blks = howmany(bsize, fs->fs_fsize);
1353         if (fs->fs_contigsumsize > 0)
1354                 bsize += fs->fs_ncg * sizeof(int32_t);
1355         bsize += fs->fs_ncg * sizeof(*fs->fs_contigdirs);
1356         allocsbsize = bsize;
1357         space = kmem_alloc((u_long)allocsbsize, KM_SLEEP);
1358         fs->fs_csp = space;
1359
1360         for (i = 0; i < blks; i += fs->fs_frag) {
1361                 bsize = fs->fs_bsize;
1362                 if (i + fs->fs_frag > blks)
1363                         bsize = (blks - i) * fs->fs_fsize;
1364                 error = bread(devvp, FFS_FSBTODB(fs, fs->fs_csaddr + i), bsize,
1365                               0, &bp);
1366                 if (error) {
1367                         DPRINTF("bread@0x%jx %d",
1368                             (intmax_t)FFS_FSBTODB(fs, fs->fs_csaddr + i),
1369                             error);
1370                         goto out1;
1371                 }
1372 #ifdef FFS_EI
1373                 if (needswap)
1374                         ffs_csum_swap((struct csum *)bp->b_data,
1375                                 (struct csum *)space, bsize);
1376                 else
1377 #endif
1378                         memcpy(space, bp->b_data, (u_int)bsize);
1379
1380                 space = (char *)space + bsize;
1381                 brelse(bp, 0);
1382                 bp = NULL;
1383         }
1384         if (fs->fs_contigsumsize > 0) {
1385                 fs->fs_maxcluster = lp = space;
1386                 for (i = 0; i < fs->fs_ncg; i++)
1387                         *lp++ = fs->fs_contigsumsize;
1388                 space = lp;
1389         }
1390         bsize = fs->fs_ncg * sizeof(*fs->fs_contigdirs);
1391         fs->fs_contigdirs = space;
1392         space = (char *)space + bsize;
1393         memset(fs->fs_contigdirs, 0, bsize);
1394
1395         /* Compatibility for old filesystems - XXX */
1396         if (fs->fs_avgfilesize <= 0)
1397                 fs->fs_avgfilesize = AVFILESIZ;
1398         if (fs->fs_avgfpdir <= 0)
1399                 fs->fs_avgfpdir = AFPDIR;
1400         fs->fs_active = NULL;
1401
1402         mp->mnt_data = ump;
1403         mp->mnt_stat.f_fsidx.__fsid_val[0] = (long)dev;
1404         mp->mnt_stat.f_fsidx.__fsid_val[1] = makefstype(MOUNT_FFS);
1405         mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
1406         mp->mnt_stat.f_namemax = FFS_MAXNAMLEN;
1407         if (UFS_MPISAPPLEUFS(ump)) {
1408                 /* NeXT used to keep short symlinks in the inode even
1409                  * when using FS_42INODEFMT.  In that case fs->fs_maxsymlinklen
1410                  * is probably -1, but we still need to be able to identify
1411                  * short symlinks.
1412                  */
1413                 ump->um_maxsymlinklen = APPLEUFS_MAXSYMLINKLEN;
1414                 ump->um_dirblksiz = APPLEUFS_DIRBLKSIZ;
1415                 mp->mnt_iflag |= IMNT_DTYPE;
1416         } else {
1417                 ump->um_maxsymlinklen = fs->fs_maxsymlinklen;
1418                 ump->um_dirblksiz = UFS_DIRBLKSIZ;
1419                 if (ump->um_maxsymlinklen > 0)
1420                         mp->mnt_iflag |= IMNT_DTYPE;
1421                 else
1422                         mp->mnt_iflag &= ~IMNT_DTYPE;
1423         }
1424         mp->mnt_fs_bshift = fs->fs_bshift;
1425         mp->mnt_dev_bshift = DEV_BSHIFT;        /* XXX */
1426         mp->mnt_flag |= MNT_LOCAL;
1427         mp->mnt_iflag |= IMNT_MPSAFE;
1428 #ifdef FFS_EI
1429         if (needswap)
1430                 ump->um_flags |= UFS_NEEDSWAP;
1431 #endif
1432         ump->um_mountp = mp;
1433         ump->um_dev = dev;
1434         ump->um_devvp = devvp;
1435         ump->um_nindir = fs->fs_nindir;
1436         ump->um_lognindir = ffs(fs->fs_nindir) - 1;
1437         ump->um_bptrtodb = fs->fs_fshift - DEV_BSHIFT;
1438         ump->um_seqinc = fs->fs_frag;
1439         for (i = 0; i < MAXQUOTAS; i++)
1440                 ump->um_quotas[i] = NULLVP;
1441         spec_node_setmountedfs(devvp, mp);
1442         if (ronly == 0 && fs->fs_snapinum[0] != 0)
1443                 ffs_snapshot_mount(mp);
1444 #ifdef WAPBL
1445         if (!ronly) {
1446                 KDASSERT(fs->fs_ronly == 0);
1447                 /*
1448                  * ffs_wapbl_start() needs mp->mnt_stat initialised if it
1449                  * needs to create a new log file in-filesystem.
1450                  */
1451                 error = ffs_statvfs(mp, &mp->mnt_stat);
1452                 if (error) {
1453                         DPRINTF("ffs_statvfs returned %d", error);
1454                         goto out1;
1455                 }
1456
1457                 error = ffs_wapbl_start(mp);
1458                 if (error) {
1459                         DPRINTF("ffs_wapbl_start returned %d", error);
1460                         goto out1;
1461                 }
1462         }
1463 #endif /* WAPBL */
1464         if (ronly == 0) {
1465 #ifdef QUOTA2
1466                 error = ffs_quota2_mount(mp);
1467                 if (error) {
1468                         DPRINTF("ffs_quota2_mount returned %d", error);
1469                         goto out1;
1470                 }
1471 #else
1472                 if (fs->fs_flags & FS_DOQUOTA2) {
1473                         ump->um_flags |= UFS_QUOTA2;
1474                         uprintf("%s: options QUOTA2 not enabled%s\n",
1475                             mp->mnt_stat.f_mntonname,
1476                             (mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting");
1477                         if ((mp->mnt_flag & MNT_FORCE) == 0) {
1478                                 error = EINVAL;
1479                                 DPRINTF("quota disabled %d", error);
1480                                 goto out1;
1481                         }
1482                 }
1483 #endif
1484          }
1485
1486         if (mp->mnt_flag & MNT_DISCARD)
1487                 ump->um_discarddata = ffs_discard_init(devvp, fs);
1488
1489         return (0);
1490 out1:
1491         kmem_free(fs->fs_csp, allocsbsize);
1492 out:
1493 #ifdef WAPBL
1494         if (mp->mnt_wapbl_replay) {
1495                 wapbl_replay_stop(mp->mnt_wapbl_replay);
1496                 wapbl_replay_free(mp->mnt_wapbl_replay);
1497                 mp->mnt_wapbl_replay = 0;
1498         }
1499 #endif
1500
1501         fstrans_unmount(mp);
1502         if (fs)
1503                 kmem_free(fs, fs->fs_sbsize);
1504         spec_node_setmountedfs(devvp, NULL);
1505         if (bp)
1506                 brelse(bp, bset);
1507         if (ump) {
1508                 if (ump->um_oldfscompat)
1509                         kmem_free(ump->um_oldfscompat, 512 + 3*sizeof(int32_t));
1510                 mutex_destroy(&ump->um_lock);
1511                 kmem_free(ump, sizeof(*ump));
1512                 mp->mnt_data = NULL;
1513         }
1514         return (error);
1515 }
1516
1517 /*
1518  * Sanity checks for loading old filesystem superblocks.
1519  * See ffs_oldfscompat_write below for unwound actions.
1520  *
1521  * XXX - Parts get retired eventually.
1522  * Unfortunately new bits get added.
1523  */
1524 static void
1525 ffs_oldfscompat_read(struct fs *fs, struct ufsmount *ump, daddr_t sblockloc)
1526 {
1527         off_t maxfilesize;
1528         int32_t *extrasave;
1529
1530         if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1531             (fs->fs_old_flags & FS_FLAGS_UPDATED))
1532                 return;
1533
1534         if (!ump->um_oldfscompat)
1535                 ump->um_oldfscompat = kmem_alloc(512 + 3*sizeof(int32_t),
1536                     KM_SLEEP);
1537
1538         memcpy(ump->um_oldfscompat, &fs->fs_old_postbl_start, 512);
1539         extrasave = ump->um_oldfscompat;
1540         extrasave += 512/sizeof(int32_t);
1541         extrasave[0] = fs->fs_old_npsect;
1542         extrasave[1] = fs->fs_old_interleave;
1543         extrasave[2] = fs->fs_old_trackskew;
1544
1545         /* These fields will be overwritten by their
1546          * original values in fs_oldfscompat_write, so it is harmless
1547          * to modify them here.
1548          */
1549         fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
1550         fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
1551         fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
1552         fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
1553
1554         fs->fs_maxbsize = fs->fs_bsize;
1555         fs->fs_time = fs->fs_old_time;
1556         fs->fs_size = fs->fs_old_size;
1557         fs->fs_dsize = fs->fs_old_dsize;
1558         fs->fs_csaddr = fs->fs_old_csaddr;
1559         fs->fs_sblockloc = sblockloc;
1560
1561         fs->fs_flags = fs->fs_old_flags | (fs->fs_flags & FS_INTERNAL);
1562
1563         if (fs->fs_old_postblformat == FS_42POSTBLFMT) {
1564                 fs->fs_old_nrpos = 8;
1565                 fs->fs_old_npsect = fs->fs_old_nsect;
1566                 fs->fs_old_interleave = 1;
1567                 fs->fs_old_trackskew = 0;
1568         }
1569
1570         if (fs->fs_old_inodefmt < FS_44INODEFMT) {
1571                 fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
1572                 fs->fs_qbmask = ~fs->fs_bmask;
1573                 fs->fs_qfmask = ~fs->fs_fmask;
1574         }
1575
1576         maxfilesize = (u_int64_t)0x80000000 * fs->fs_bsize - 1;
1577         if (fs->fs_maxfilesize > maxfilesize)
1578                 fs->fs_maxfilesize = maxfilesize;
1579
1580         /* Compatibility for old filesystems */
1581         if (fs->fs_avgfilesize <= 0)
1582                 fs->fs_avgfilesize = AVFILESIZ;
1583         if (fs->fs_avgfpdir <= 0)
1584                 fs->fs_avgfpdir = AFPDIR;
1585
1586 #if 0
1587         if (bigcgs) {
1588                 fs->fs_save_cgsize = fs->fs_cgsize;
1589                 fs->fs_cgsize = fs->fs_bsize;
1590         }
1591 #endif
1592 }
1593
1594 /*
1595  * Unwinding superblock updates for old filesystems.
1596  * See ffs_oldfscompat_read above for details.
1597  *
1598  * XXX - Parts get retired eventually.
1599  * Unfortunately new bits get added.
1600  */
1601 static void
1602 ffs_oldfscompat_write(struct fs *fs, struct ufsmount *ump)
1603 {
1604         int32_t *extrasave;
1605
1606         if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1607             (fs->fs_old_flags & FS_FLAGS_UPDATED))
1608                 return;
1609
1610         fs->fs_old_time = fs->fs_time;
1611         fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
1612         fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
1613         fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
1614         fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
1615         fs->fs_old_flags = fs->fs_flags;
1616
1617 #if 0
1618         if (bigcgs) {
1619                 fs->fs_cgsize = fs->fs_save_cgsize;
1620         }
1621 #endif
1622
1623         memcpy(&fs->fs_old_postbl_start, ump->um_oldfscompat, 512);
1624         extrasave = ump->um_oldfscompat;
1625         extrasave += 512/sizeof(int32_t);
1626         fs->fs_old_npsect = extrasave[0];
1627         fs->fs_old_interleave = extrasave[1];
1628         fs->fs_old_trackskew = extrasave[2];
1629
1630 }
1631
1632 /*
1633  * unmount vfs operation
1634  */
1635 int
1636 ffs_unmount(struct mount *mp, int mntflags)
1637 {
1638         struct lwp *l = curlwp;
1639         struct ufsmount *ump = VFSTOUFS(mp);
1640         struct fs *fs = ump->um_fs;
1641         int error, flags;
1642         u_int32_t bsize;
1643 #ifdef WAPBL
1644         extern int doforce;
1645 #endif
1646
1647         if (ump->um_discarddata) {
1648                 ffs_discard_finish(ump->um_discarddata, mntflags);
1649                 ump->um_discarddata = NULL;
1650         }
1651
1652         flags = 0;
1653         if (mntflags & MNT_FORCE)
1654                 flags |= FORCECLOSE;
1655         if ((error = ffs_flushfiles(mp, flags, l)) != 0)
1656                 return (error);
1657         error = UFS_WAPBL_BEGIN(mp);
1658         if (error == 0)
1659                 if (fs->fs_ronly == 0 &&
1660                     ffs_cgupdate(ump, MNT_WAIT) == 0 &&
1661                     fs->fs_clean & FS_WASCLEAN) {
1662                         fs->fs_clean = FS_ISCLEAN;
1663                         fs->fs_fmod = 0;
1664                         (void) ffs_sbupdate(ump, MNT_WAIT);
1665                 }
1666         if (error == 0)
1667                 UFS_WAPBL_END(mp);
1668 #ifdef WAPBL
1669         KASSERT(!(mp->mnt_wapbl_replay && mp->mnt_wapbl));
1670         if (mp->mnt_wapbl_replay) {
1671                 KDASSERT(fs->fs_ronly);
1672                 wapbl_replay_stop(mp->mnt_wapbl_replay);
1673                 wapbl_replay_free(mp->mnt_wapbl_replay);
1674                 mp->mnt_wapbl_replay = 0;
1675         }
1676         error = ffs_wapbl_stop(mp, doforce && (mntflags & MNT_FORCE));
1677         if (error) {
1678                 return error;
1679         }
1680 #endif /* WAPBL */
1681
1682         if (ump->um_devvp->v_type != VBAD)
1683                 spec_node_setmountedfs(ump->um_devvp, NULL);
1684         vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1685         (void)VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD | FWRITE,
1686                 NOCRED);
1687         vput(ump->um_devvp);
1688
1689         bsize = fs->fs_cssize;
1690         if (fs->fs_contigsumsize > 0)
1691                 bsize += fs->fs_ncg * sizeof(int32_t);
1692         bsize += fs->fs_ncg * sizeof(*fs->fs_contigdirs);
1693         kmem_free(fs->fs_csp, bsize);
1694
1695         kmem_free(fs, fs->fs_sbsize);
1696         if (ump->um_oldfscompat != NULL)
1697                 kmem_free(ump->um_oldfscompat, 512 + 3*sizeof(int32_t));
1698         mutex_destroy(&ump->um_lock);
1699         ffs_snapshot_fini(ump);
1700         kmem_free(ump, sizeof(*ump));
1701         mp->mnt_data = NULL;
1702         mp->mnt_flag &= ~MNT_LOCAL;
1703         fstrans_unmount(mp);
1704         return (0);
1705 }
1706
1707 /*
1708  * Flush out all the files in a filesystem.
1709  */
1710 int
1711 ffs_flushfiles(struct mount *mp, int flags, struct lwp *l)
1712 {
1713         extern int doforce;
1714         struct ufsmount *ump;
1715         int error;
1716
1717         if (!doforce)
1718                 flags &= ~FORCECLOSE;
1719         ump = VFSTOUFS(mp);
1720 #ifdef QUOTA
1721         if ((error = quota1_umount(mp, flags)) != 0)
1722                 return (error);
1723 #endif
1724 #ifdef QUOTA2
1725         if ((error = quota2_umount(mp, flags)) != 0)
1726                 return (error);
1727 #endif
1728 #ifdef UFS_EXTATTR
1729         if (ump->um_fstype == UFS1) {
1730                 if (ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED)
1731                         ufs_extattr_stop(mp, l);
1732                 if (ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_INITIALIZED)
1733                         ufs_extattr_uepm_destroy(&ump->um_extattr);
1734                 mp->mnt_flag &= ~MNT_EXTATTR;
1735         }
1736 #endif
1737         if ((error = vflush(mp, 0, SKIPSYSTEM | flags)) != 0)
1738                 return (error);
1739         ffs_snapshot_unmount(mp);
1740         /*
1741          * Flush all the files.
1742          */
1743         error = vflush(mp, NULLVP, flags);
1744         if (error)
1745                 return (error);
1746         /*
1747          * Flush filesystem metadata.
1748          */
1749         vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1750         error = VOP_FSYNC(ump->um_devvp, l->l_cred, FSYNC_WAIT, 0, 0);
1751         VOP_UNLOCK(ump->um_devvp);
1752         if (flags & FORCECLOSE) /* XXXDBJ */
1753                 error = 0;
1754
1755 #ifdef WAPBL
1756         if (error)
1757                 return error;
1758         if (mp->mnt_wapbl) {
1759                 error = wapbl_flush(mp->mnt_wapbl, 1);
1760                 if (flags & FORCECLOSE)
1761                         error = 0;
1762         }
1763 #endif
1764
1765         return (error);
1766 }
1767
1768 /*
1769  * Get file system statistics.
1770  */
1771 int
1772 ffs_statvfs(struct mount *mp, struct statvfs *sbp)
1773 {
1774         struct ufsmount *ump;
1775         struct fs *fs;
1776
1777         ump = VFSTOUFS(mp);
1778         fs = ump->um_fs;
1779         mutex_enter(&ump->um_lock);
1780         sbp->f_bsize = fs->fs_bsize;
1781         sbp->f_frsize = fs->fs_fsize;
1782         sbp->f_iosize = fs->fs_bsize;
1783         sbp->f_blocks = fs->fs_dsize;
1784         sbp->f_bfree = ffs_blkstofrags(fs, fs->fs_cstotal.cs_nbfree) +
1785             fs->fs_cstotal.cs_nffree + FFS_DBTOFSB(fs, fs->fs_pendingblocks);
1786         sbp->f_bresvd = ((u_int64_t) fs->fs_dsize * (u_int64_t)
1787             fs->fs_minfree) / (u_int64_t) 100;
1788         if (sbp->f_bfree > sbp->f_bresvd)
1789                 sbp->f_bavail = sbp->f_bfree - sbp->f_bresvd;
1790         else
1791                 sbp->f_bavail = 0;
1792         sbp->f_files =  fs->fs_ncg * fs->fs_ipg - UFS_ROOTINO;
1793         sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1794         sbp->f_favail = sbp->f_ffree;
1795         sbp->f_fresvd = 0;
1796         mutex_exit(&ump->um_lock);
1797         copy_statvfs_info(sbp, mp);
1798
1799         return (0);
1800 }
1801
1802 struct ffs_sync_ctx {
1803         int waitfor;
1804         bool is_suspending;
1805 };
1806
1807 static bool
1808 ffs_sync_selector(void *cl, struct vnode *vp)
1809 {
1810         struct ffs_sync_ctx *c = cl;
1811         struct inode *ip;
1812
1813         ip = VTOI(vp);
1814         /*
1815          * Skip the vnode/inode if inaccessible.
1816          */
1817         if (ip == NULL || vp->v_type == VNON)
1818                 return false;
1819
1820         /*
1821          * We deliberately update inode times here.  This will
1822          * prevent a massive queue of updates accumulating, only
1823          * to be handled by a call to unmount.
1824          *
1825          * XXX It would be better to have the syncer trickle these
1826          * out.  Adjustment needed to allow registering vnodes for
1827          * sync when the vnode is clean, but the inode dirty.  Or
1828          * have ufs itself trickle out inode updates.
1829          *
1830          * If doing a lazy sync, we don't care about metadata or
1831          * data updates, because they are handled by each vnode's
1832          * synclist entry.  In this case we are only interested in
1833          * writing back modified inodes.
1834          */
1835         if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE |
1836             IN_MODIFY | IN_MODIFIED | IN_ACCESSED)) == 0 &&
1837             (c->waitfor == MNT_LAZY || (LIST_EMPTY(&vp->v_dirtyblkhd) &&
1838             UVM_OBJ_IS_CLEAN(&vp->v_uobj))))
1839                 return false;
1840
1841         if (vp->v_type == VBLK && c->is_suspending)
1842                 return false;
1843
1844         return true;
1845 }
1846
1847 /*
1848  * Go through the disk queues to initiate sandbagged IO;
1849  * go through the inodes to write those that have been modified;
1850  * initiate the writing of the super block if it has been modified.
1851  *
1852  * Note: we are always called with the filesystem marked `MPBUSY'.
1853  */
1854 int
1855 ffs_sync(struct mount *mp, int waitfor, kauth_cred_t cred)
1856 {
1857         struct vnode *vp;
1858         struct ufsmount *ump = VFSTOUFS(mp);
1859         struct fs *fs;
1860         struct vnode_iterator *marker;
1861         int error, allerror = 0;
1862         bool is_suspending;
1863         struct ffs_sync_ctx ctx;
1864
1865         fs = ump->um_fs;
1866         if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {            /* XXX */
1867                 printf("fs = %s\n", fs->fs_fsmnt);
1868                 panic("update: rofs mod");
1869         }
1870
1871         fstrans_start(mp, FSTRANS_SHARED);
1872         is_suspending = (fstrans_getstate(mp) == FSTRANS_SUSPENDING);
1873         /*
1874          * Write back each (modified) inode.
1875          */
1876         vfs_vnode_iterator_init(mp, &marker);
1877
1878         ctx.waitfor = waitfor;
1879         ctx.is_suspending = is_suspending;
1880         while ((vp = vfs_vnode_iterator_next(marker, ffs_sync_selector, &ctx)))
1881         {
1882                 error = vn_lock(vp, LK_EXCLUSIVE);
1883                 if (error) {
1884                         vrele(vp);
1885                         continue;
1886                 }
1887                 if (waitfor == MNT_LAZY) {
1888                         error = UFS_WAPBL_BEGIN(vp->v_mount);
1889                         if (!error) {
1890                                 error = ffs_update(vp, NULL, NULL,
1891                                     UPDATE_CLOSE);
1892                                 UFS_WAPBL_END(vp->v_mount);
1893                         }
1894                 } else {
1895                         error = VOP_FSYNC(vp, cred, FSYNC_NOLOG |
1896                             (waitfor == MNT_WAIT ? FSYNC_WAIT : 0), 0, 0);
1897                 }
1898                 if (error)
1899                         allerror = error;
1900                 vput(vp);
1901         }
1902         vfs_vnode_iterator_destroy(marker);
1903
1904         /*
1905          * Force stale file system control information to be flushed.
1906          */
1907         if (waitfor != MNT_LAZY && (ump->um_devvp->v_numoutput > 0 ||
1908             !LIST_EMPTY(&ump->um_devvp->v_dirtyblkhd))) {
1909                 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1910                 if ((error = VOP_FSYNC(ump->um_devvp, cred,
1911                     (waitfor == MNT_WAIT ? FSYNC_WAIT : 0) | FSYNC_NOLOG,
1912                     0, 0)) != 0)
1913                         allerror = error;
1914                 VOP_UNLOCK(ump->um_devvp);
1915         }
1916 #if defined(QUOTA) || defined(QUOTA2)
1917         qsync(mp);
1918 #endif
1919         /*
1920          * Write back modified superblock.
1921          */
1922         if (fs->fs_fmod != 0) {
1923                 fs->fs_fmod = 0;
1924                 fs->fs_time = time_second;
1925                 error = UFS_WAPBL_BEGIN(mp);
1926                 if (error)
1927                         allerror = error;
1928                 else {
1929                         if ((error = ffs_cgupdate(ump, waitfor)))
1930                                 allerror = error;
1931                         UFS_WAPBL_END(mp);
1932                 }
1933         }
1934
1935 #ifdef WAPBL
1936         if (mp->mnt_wapbl) {
1937                 error = wapbl_flush(mp->mnt_wapbl, 0);
1938                 if (error)
1939                         allerror = error;
1940         }
1941 #endif
1942
1943         fstrans_done(mp);
1944         return (allerror);
1945 }
1946
1947 /*
1948  * Load inode from disk and initialize vnode.
1949  */
1950 static int
1951 ffs_init_vnode(struct ufsmount *ump, struct vnode *vp, ino_t ino)
1952 {
1953         struct fs *fs;
1954         struct inode *ip;
1955         struct buf *bp;
1956         int error;
1957
1958         fs = ump->um_fs;
1959
1960         /* Read in the disk contents for the inode. */
1961         error = bread(ump->um_devvp, FFS_FSBTODB(fs, ino_to_fsba(fs, ino)),
1962                       (int)fs->fs_bsize, 0, &bp);
1963         if (error)
1964                 return error;
1965
1966         /* Allocate and initialize inode. */
1967         ip = pool_cache_get(ffs_inode_cache, PR_WAITOK);
1968         memset(ip, 0, sizeof(struct inode));
1969         ip->i_ump = ump;
1970         ip->i_fs = fs;
1971         ip->i_dev = ump->um_dev;
1972         ip->i_number = ino;
1973         if (ump->um_fstype == UFS1)
1974                 ip->i_din.ffs1_din = pool_cache_get(ffs_dinode1_cache,
1975                     PR_WAITOK);
1976         else
1977                 ip->i_din.ffs2_din = pool_cache_get(ffs_dinode2_cache,
1978                     PR_WAITOK);
1979         ffs_load_inode(bp, ip, fs, ino);
1980         brelse(bp, 0);
1981         ip->i_vnode = vp;
1982 #if defined(QUOTA) || defined(QUOTA2)
1983         ufsquota_init(ip);
1984 #endif
1985
1986         /* Initialise vnode with this inode. */
1987         vp->v_tag = VT_UFS;
1988         vp->v_op = ffs_vnodeop_p;
1989         vp->v_vflag |= VV_LOCKSWORK;
1990         vp->v_data = ip;
1991
1992         /* Initialize genfs node. */
1993         genfs_node_init(vp, &ffs_genfsops);
1994
1995         return 0;
1996 }
1997
1998 /*
1999  * Undo ffs_init_vnode().
2000  */
2001 static void
2002 ffs_deinit_vnode(struct ufsmount *ump, struct vnode *vp)
2003 {
2004         struct inode *ip = VTOI(vp);
2005
2006         if (ump->um_fstype == UFS1)
2007                 pool_cache_put(ffs_dinode1_cache, ip->i_din.ffs1_din);
2008         else
2009                 pool_cache_put(ffs_dinode2_cache, ip->i_din.ffs2_din);
2010         pool_cache_put(ffs_inode_cache, ip);
2011
2012         genfs_node_destroy(vp);
2013         vp->v_data = NULL;
2014 }
2015
2016 /*
2017  * Read an inode from disk and initialize this vnode / inode pair.
2018  * Caller assures no other thread will try to load this inode.
2019  */
2020 int
2021 ffs_loadvnode(struct mount *mp, struct vnode *vp,
2022     const void *key, size_t key_len, const void **new_key)
2023 {
2024         ino_t ino;
2025         struct fs *fs;
2026         struct inode *ip;
2027         struct ufsmount *ump;
2028         int error;
2029
2030         KASSERT(key_len == sizeof(ino));
2031         memcpy(&ino, key, key_len);
2032         ump = VFSTOUFS(mp);
2033         fs = ump->um_fs;
2034
2035         error = ffs_init_vnode(ump, vp, ino);
2036         if (error)
2037                 return error;
2038
2039         ip = VTOI(vp);
2040         if (ip->i_mode == 0) {
2041                 ffs_deinit_vnode(ump, vp);
2042
2043                 return ENOENT;
2044         }
2045
2046         /* Initialize the vnode from the inode. */
2047         ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
2048
2049         /* Finish inode initialization.  */
2050         ip->i_devvp = ump->um_devvp;
2051         vref(ip->i_devvp);
2052
2053         /*
2054          * Ensure that uid and gid are correct. This is a temporary
2055          * fix until fsck has been changed to do the update.
2056          */
2057
2058         if (fs->fs_old_inodefmt < FS_44INODEFMT) {              /* XXX */
2059                 ip->i_uid = ip->i_ffs1_ouid;                    /* XXX */
2060                 ip->i_gid = ip->i_ffs1_ogid;                    /* XXX */
2061         }                                                       /* XXX */
2062         uvm_vnp_setsize(vp, ip->i_size);
2063         *new_key = &ip->i_number;
2064         return 0;
2065 }
2066
2067 /*
2068  * Create a new inode on disk and initialize this vnode / inode pair.
2069  */
2070 int
2071 ffs_newvnode(struct mount *mp, struct vnode *dvp, struct vnode *vp,
2072     struct vattr *vap, kauth_cred_t cred,
2073     size_t *key_len, const void **new_key)
2074 {
2075         ino_t ino;
2076         struct fs *fs;
2077         struct inode *ip;
2078         struct timespec ts;
2079         struct ufsmount *ump;
2080         int error, mode;
2081
2082         KASSERT(dvp->v_mount == mp);
2083         KASSERT(vap->va_type != VNON);
2084
2085         *key_len = sizeof(ino);
2086         ump = VFSTOUFS(mp);
2087         fs = ump->um_fs;
2088         mode = MAKEIMODE(vap->va_type, vap->va_mode);
2089
2090         /* Allocate fresh inode. */
2091         error = ffs_valloc(dvp, mode, cred, &ino);
2092         if (error)
2093                 return error;
2094
2095         /* Attach inode to vnode. */
2096         error = ffs_init_vnode(ump, vp, ino);
2097         if (error) {
2098                 if (UFS_WAPBL_BEGIN(mp) == 0) {
2099                         ffs_vfree(dvp, ino, mode);
2100                         UFS_WAPBL_END(mp);
2101                 }
2102                 return error;
2103         }
2104
2105         ip = VTOI(vp);
2106         if (ip->i_mode || DIP(ip, size) || DIP(ip, blocks)) {
2107                 printf("free ino %" PRId64 " on %s:\n", ino, fs->fs_fsmnt);
2108                 printf("dmode %x mode %x dgen %x gen %x\n",
2109                     DIP(ip, mode), ip->i_mode,
2110                     DIP(ip, gen), ip->i_gen);
2111                 printf("size %" PRIx64 " blocks %" PRIx64 "\n",
2112                     DIP(ip, size), DIP(ip, blocks));
2113                 panic("ffs_init_vnode: dup alloc");
2114         }
2115
2116         /* Set uid / gid. */
2117         if (cred == NOCRED || cred == FSCRED) {
2118                 ip->i_gid = 0;
2119                 ip->i_uid = 0;
2120         } else {
2121                 ip->i_gid = VTOI(dvp)->i_gid;
2122                 ip->i_uid = kauth_cred_geteuid(cred);
2123         }
2124         DIP_ASSIGN(ip, gid, ip->i_gid);
2125         DIP_ASSIGN(ip, uid, ip->i_uid);
2126
2127 #if defined(QUOTA) || defined(QUOTA2)
2128         error = UFS_WAPBL_BEGIN(mp);
2129         if (error) {
2130                 ffs_deinit_vnode(ump, vp);
2131
2132                 return error;
2133         }
2134         error = chkiq(ip, 1, cred, 0);
2135         if (error) {
2136                 ffs_vfree(dvp, ino, mode);
2137                 UFS_WAPBL_END(mp);
2138                 ffs_deinit_vnode(ump, vp);
2139
2140                 return error;
2141         }
2142         UFS_WAPBL_END(mp);
2143 #endif
2144
2145         /* Set type and finalize. */
2146         ip->i_flags = 0;
2147         DIP_ASSIGN(ip, flags, 0);
2148         ip->i_mode = mode;
2149         DIP_ASSIGN(ip, mode, mode);
2150         if (vap->va_rdev != VNOVAL) {
2151                 /*
2152                  * Want to be able to use this to make badblock
2153                  * inodes, so don't truncate the dev number.
2154                  */
2155                 if (ump->um_fstype == UFS1)
2156                         ip->i_ffs1_rdev = ufs_rw32(vap->va_rdev,
2157                             UFS_MPNEEDSWAP(ump));
2158                 else
2159                         ip->i_ffs2_rdev = ufs_rw64(vap->va_rdev,
2160                             UFS_MPNEEDSWAP(ump));
2161         }
2162         ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
2163         ip->i_devvp = ump->um_devvp;
2164         vref(ip->i_devvp);
2165
2166         /* Set up a new generation number for this inode.  */
2167         ip->i_gen++;
2168         DIP_ASSIGN(ip, gen, ip->i_gen);
2169         if (fs->fs_magic == FS_UFS2_MAGIC) {
2170                 vfs_timestamp(&ts);
2171                 ip->i_ffs2_birthtime = ts.tv_sec;
2172                 ip->i_ffs2_birthnsec = ts.tv_nsec;
2173         }
2174
2175         uvm_vnp_setsize(vp, ip->i_size);
2176         *new_key = &ip->i_number;
2177         return 0;
2178 }
2179
2180 /*
2181  * File handle to vnode
2182  *
2183  * Have to be really careful about stale file handles:
2184  * - check that the inode number is valid
2185  * - call ffs_vget() to get the locked inode
2186  * - check for an unallocated inode (i_mode == 0)
2187  * - check that the given client host has export rights and return
2188  *   those rights via. exflagsp and credanonp
2189  */
2190 int
2191 ffs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp)
2192 {
2193         struct ufid ufh;
2194         struct fs *fs;
2195
2196         if (fhp->fid_len != sizeof(struct ufid))
2197                 return EINVAL;
2198
2199         memcpy(&ufh, fhp, sizeof(ufh));
2200         fs = VFSTOUFS(mp)->um_fs;
2201         if (ufh.ufid_ino < UFS_ROOTINO ||
2202             ufh.ufid_ino >= fs->fs_ncg * fs->fs_ipg)
2203                 return (ESTALE);
2204         return (ufs_fhtovp(mp, &ufh, vpp));
2205 }
2206
2207 /*
2208  * Vnode pointer to File handle
2209  */
2210 /* ARGSUSED */
2211 int
2212 ffs_vptofh(struct vnode *vp, struct fid *fhp, size_t *fh_size)
2213 {
2214         struct inode *ip;
2215         struct ufid ufh;
2216
2217         if (*fh_size < sizeof(struct ufid)) {
2218                 *fh_size = sizeof(struct ufid);
2219                 return E2BIG;
2220         }
2221         ip = VTOI(vp);
2222         *fh_size = sizeof(struct ufid);
2223         memset(&ufh, 0, sizeof(ufh));
2224         ufh.ufid_len = sizeof(struct ufid);
2225         ufh.ufid_ino = ip->i_number;
2226         ufh.ufid_gen = ip->i_gen;
2227         memcpy(fhp, &ufh, sizeof(ufh));
2228         return (0);
2229 }
2230
2231 void
2232 ffs_init(void)
2233 {
2234         if (ffs_initcount++ > 0)
2235                 return;
2236
2237         ffs_inode_cache = pool_cache_init(sizeof(struct inode), 0, 0, 0,
2238             "ffsino", NULL, IPL_NONE, NULL, NULL, NULL);
2239         ffs_dinode1_cache = pool_cache_init(sizeof(struct ufs1_dinode), 0, 0, 0,
2240             "ffsdino1", NULL, IPL_NONE, NULL, NULL, NULL);
2241         ffs_dinode2_cache = pool_cache_init(sizeof(struct ufs2_dinode), 0, 0, 0,
2242             "ffsdino2", NULL, IPL_NONE, NULL, NULL, NULL);
2243         ufs_init();
2244 }
2245
2246 void
2247 ffs_reinit(void)
2248 {
2249         ufs_reinit();
2250 }
2251
2252 void
2253 ffs_done(void)
2254 {
2255         if (--ffs_initcount > 0)
2256                 return;
2257
2258         ufs_done();
2259         pool_cache_destroy(ffs_dinode2_cache);
2260         pool_cache_destroy(ffs_dinode1_cache);
2261         pool_cache_destroy(ffs_inode_cache);
2262 }
2263
2264 /*
2265  * Write a superblock and associated information back to disk.
2266  */
2267 int
2268 ffs_sbupdate(struct ufsmount *mp, int waitfor)
2269 {
2270         struct fs *fs = mp->um_fs;
2271         struct buf *bp;
2272         int error;
2273         u_int32_t saveflag;
2274
2275         error = ffs_getblk(mp->um_devvp,
2276             fs->fs_sblockloc / DEV_BSIZE, FFS_NOBLK,
2277             fs->fs_sbsize, false, &bp);
2278         if (error)
2279                 return error;
2280         saveflag = fs->fs_flags & FS_INTERNAL;
2281         fs->fs_flags &= ~FS_INTERNAL;
2282
2283         memcpy(bp->b_data, fs, fs->fs_sbsize);
2284
2285         ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
2286 #ifdef FFS_EI
2287         if (mp->um_flags & UFS_NEEDSWAP)
2288                 ffs_sb_swap((struct fs *)bp->b_data, (struct fs *)bp->b_data);
2289 #endif
2290         fs->fs_flags |= saveflag;
2291
2292         if (waitfor == MNT_WAIT)
2293                 error = bwrite(bp);
2294         else
2295                 bawrite(bp);
2296         return (error);
2297 }
2298
2299 int
2300 ffs_cgupdate(struct ufsmount *mp, int waitfor)
2301 {
2302         struct fs *fs = mp->um_fs;
2303         struct buf *bp;
2304         int blks;
2305         void *space;
2306         int i, size, error = 0, allerror = 0;
2307
2308         allerror = ffs_sbupdate(mp, waitfor);
2309         blks = howmany(fs->fs_cssize, fs->fs_fsize);
2310         space = fs->fs_csp;
2311         for (i = 0; i < blks; i += fs->fs_frag) {
2312                 size = fs->fs_bsize;
2313                 if (i + fs->fs_frag > blks)
2314                         size = (blks - i) * fs->fs_fsize;
2315                 error = ffs_getblk(mp->um_devvp, FFS_FSBTODB(fs, fs->fs_csaddr + i),
2316                     FFS_NOBLK, size, false, &bp);
2317                 if (error)
2318                         break;
2319 #ifdef FFS_EI
2320                 if (mp->um_flags & UFS_NEEDSWAP)
2321                         ffs_csum_swap((struct csum*)space,
2322                             (struct csum*)bp->b_data, size);
2323                 else
2324 #endif
2325                         memcpy(bp->b_data, space, (u_int)size);
2326                 space = (char *)space + size;
2327                 if (waitfor == MNT_WAIT)
2328                         error = bwrite(bp);
2329                 else
2330                         bawrite(bp);
2331         }
2332         if (!allerror && error)
2333                 allerror = error;
2334         return (allerror);
2335 }
2336
2337 int
2338 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *vp,
2339     int attrnamespace, const char *attrname)
2340 {
2341 #ifdef UFS_EXTATTR
2342         /*
2343          * File-backed extended attributes are only supported on UFS1.
2344          * UFS2 has native extended attributes.
2345          */
2346         if (VFSTOUFS(mp)->um_fstype == UFS1)
2347                 return (ufs_extattrctl(mp, cmd, vp, attrnamespace, attrname));
2348 #endif
2349         return (vfs_stdextattrctl(mp, cmd, vp, attrnamespace, attrname));
2350 }
2351
2352 int
2353 ffs_suspendctl(struct mount *mp, int cmd)
2354 {
2355         int error;
2356         struct lwp *l = curlwp;
2357
2358         switch (cmd) {
2359         case SUSPEND_SUSPEND:
2360                 if ((error = fstrans_setstate(mp, FSTRANS_SUSPENDING)) != 0)
2361                         return error;
2362                 error = ffs_sync(mp, MNT_WAIT, l->l_proc->p_cred);
2363                 if (error == 0)
2364                         error = fstrans_setstate(mp, FSTRANS_SUSPENDED);
2365 #ifdef WAPBL
2366                 if (error == 0 && mp->mnt_wapbl)
2367                         error = wapbl_flush(mp->mnt_wapbl, 1);
2368 #endif
2369                 if (error != 0) {
2370                         (void) fstrans_setstate(mp, FSTRANS_NORMAL);
2371                         return error;
2372                 }
2373                 return 0;
2374
2375         case SUSPEND_RESUME:
2376                 return fstrans_setstate(mp, FSTRANS_NORMAL);
2377
2378         default:
2379                 return EINVAL;
2380         }
2381 }
2382
2383 /*
2384  * Synch vnode for a mounted file system.
2385  */
2386 static int
2387 ffs_vfs_fsync(vnode_t *vp, int flags)
2388 {
2389         int error, i, pflags;
2390 #ifdef WAPBL
2391         struct mount *mp;
2392 #endif
2393
2394         KASSERT(vp->v_type == VBLK);
2395         KASSERT(spec_node_getmountedfs(vp) != NULL);
2396
2397         /*
2398          * Flush all dirty data associated with the vnode.
2399          */
2400         pflags = PGO_ALLPAGES | PGO_CLEANIT;
2401         if ((flags & FSYNC_WAIT) != 0)
2402                 pflags |= PGO_SYNCIO;
2403         mutex_enter(vp->v_interlock);
2404         error = VOP_PUTPAGES(vp, 0, 0, pflags);
2405         if (error)
2406                 return error;
2407
2408 #ifdef WAPBL
2409         mp = spec_node_getmountedfs(vp);
2410         if (mp && mp->mnt_wapbl) {
2411                 /*
2412                  * Don't bother writing out metadata if the syncer is
2413                  * making the request.  We will let the sync vnode
2414                  * write it out in a single burst through a call to
2415                  * VFS_SYNC().
2416                  */
2417                 if ((flags & (FSYNC_DATAONLY | FSYNC_LAZY | FSYNC_NOLOG)) != 0)
2418                         return 0;
2419
2420                 /*
2421                  * Don't flush the log if the vnode being flushed
2422                  * contains no dirty buffers that could be in the log.
2423                  */
2424                 if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
2425                         error = wapbl_flush(mp->mnt_wapbl, 0);
2426                         if (error)
2427                                 return error;
2428                 }
2429
2430                 if ((flags & FSYNC_WAIT) != 0) {
2431                         mutex_enter(vp->v_interlock);
2432                         while (vp->v_numoutput)
2433                                 cv_wait(&vp->v_cv, vp->v_interlock);
2434                         mutex_exit(vp->v_interlock);
2435                 }
2436
2437                 return 0;
2438         }
2439 #endif /* WAPBL */
2440
2441         error = vflushbuf(vp, flags);
2442         if (error == 0 && (flags & FSYNC_CACHE) != 0) {
2443                 i = 1;
2444                 (void)VOP_IOCTL(vp, DIOCCACHESYNC, &i, FWRITE,
2445                     kauth_cred_get());
2446         }
2447
2448         return error;
2449 }