sys/ufs/ffs/ffs_vfsops.c

   1 /*      $NetBSD: ffs_vfsops.c,v 1.271 2011/11/14 18:35:14 hannken Exp $ */
   2
   3 /*-
   4  * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
   5  * All rights reserved.
   6  *
   7  * This code is derived from software contributed to The NetBSD Foundation
   8  * by Wasabi Systems, Inc, and by Andrew Doran.
   9  *
  10  * Redistribution and use in source and binary forms, with or without
  11  * modification, are permitted provided that the following conditions
  12  * are met:
  13  * 1. Redistributions of source code must retain the above copyright
  14  *    notice, this list of conditions and the following disclaimer.
  15  * 2. Redistributions in binary form must reproduce the above copyright
  16  *    notice, this list of conditions and the following disclaimer in the
  17  *    documentation and/or other materials provided with the distribution.
  18  *
  19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  29  * POSSIBILITY OF SUCH DAMAGE.
  30  */
  31
  32 /*
  33  * Copyright (c) 1989, 1991, 1993, 1994
  34  *      The Regents of the University of California.  All rights reserved.
  35  *
  36  * Redistribution and use in source and binary forms, with or without
  37  * modification, are permitted provided that the following conditions
  38  * are met:
  39  * 1. Redistributions of source code must retain the above copyright
  40  *    notice, this list of conditions and the following disclaimer.
  41  * 2. Redistributions in binary form must reproduce the above copyright
  42  *    notice, this list of conditions and the following disclaimer in the
  43  *    documentation and/or other materials provided with the distribution.
  44  * 3. Neither the name of the University nor the names of its contributors
  45  *    may be used to endorse or promote products derived from this software
  46  *    without specific prior written permission.
  47  *
  48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  58  * SUCH DAMAGE.
  59  *
  60  *      @(#)ffs_vfsops.c        8.31 (Berkeley) 5/20/95
  61  */
  62
  63 #include <sys/cdefs.h>
  64 __KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.271 2011/11/14 18:35:14 hannken Exp $");
  65
  66 #if defined(_KERNEL_OPT)
  67 #include "opt_ffs.h"
  68 #include "opt_quota.h"
  69 #include "opt_wapbl.h"
  70 #endif
  71
  72 #include <sys/param.h>
  73 #include <sys/systm.h>
  74 #include <sys/namei.h>
  75 #include <sys/proc.h>
  76 #include <sys/kernel.h>
  77 #include <sys/vnode.h>
  78 #include <sys/socket.h>
  79 #include <sys/mount.h>
  80 #include <sys/buf.h>
  81 #include <sys/device.h>
  82 #include <sys/disk.h>
  83 #include <sys/mbuf.h>
  84 #include <sys/file.h>
  85 #include <sys/disklabel.h>
  86 #include <sys/ioctl.h>
  87 #include <sys/errno.h>
  88 #include <sys/malloc.h>
  89 #include <sys/pool.h>
  90 #include <sys/lock.h>
  91 #include <sys/sysctl.h>
  92 #include <sys/conf.h>
  93 #include <sys/kauth.h>
  94 #include <sys/wapbl.h>
  95 #include <sys/fstrans.h>
  96 #include <sys/module.h>
  97
  98 #include <miscfs/genfs/genfs.h>
  99 #include <miscfs/specfs/specdev.h>
 100
 101 #include <ufs/ufs/quota.h>
 102 #include <ufs/ufs/ufsmount.h>
 103 #include <ufs/ufs/inode.h>
 104 #include <ufs/ufs/dir.h>
 105 #include <ufs/ufs/ufs_extern.h>
 106 #include <ufs/ufs/ufs_bswap.h>
 107 #include <ufs/ufs/ufs_wapbl.h>
 108
 109 #include <ufs/ffs/fs.h>
 110 #include <ufs/ffs/ffs_extern.h>
 111
 112 MODULE(MODULE_CLASS_VFS, ffs, NULL);
 113
 114 static int      ffs_vfs_fsync(vnode_t *, int);
 115
 116 static struct sysctllog *ffs_sysctl_log;
 117
 118 /* how many times ffs_init() was called */
 119 int ffs_initcount = 0;
 120
 121 extern const struct vnodeopv_desc ffs_vnodeop_opv_desc;
 122 extern const struct vnodeopv_desc ffs_specop_opv_desc;
 123 extern const struct vnodeopv_desc ffs_fifoop_opv_desc;
 124
 125 const struct vnodeopv_desc * const ffs_vnodeopv_descs[] = {
 126         &ffs_vnodeop_opv_desc,
 127         &ffs_specop_opv_desc,
 128         &ffs_fifoop_opv_desc,
 129         NULL,
 130 };
 131
 132 struct vfsops ffs_vfsops = {
 133         MOUNT_FFS,
 134         sizeof (struct ufs_args),
 135         ffs_mount,
 136         ufs_start,
 137         ffs_unmount,
 138         ufs_root,
 139         ufs_quotactl,
 140         ffs_statvfs,
 141         ffs_sync,
 142         ffs_vget,
 143         ffs_fhtovp,
 144         ffs_vptofh,
 145         ffs_init,
 146         ffs_reinit,
 147         ffs_done,
 148         ffs_mountroot,
 149         ffs_snapshot,
 150         ffs_extattrctl,
 151         ffs_suspendctl,
 152         genfs_renamelock_enter,
 153         genfs_renamelock_exit,
 154         ffs_vfs_fsync,
 155         ffs_vnodeopv_descs,
 156         0,
 157         { NULL, NULL },
 158 };
 159
 160 static const struct genfs_ops ffs_genfsops = {
 161         .gop_size = ffs_gop_size,
 162         .gop_alloc = ufs_gop_alloc,
 163         .gop_write = genfs_gop_write,
 164         .gop_markupdate = ufs_gop_markupdate,
 165 };
 166
 167 static const struct ufs_ops ffs_ufsops = {
 168         .uo_itimes = ffs_itimes,
 169         .uo_update = ffs_update,
 170         .uo_truncate = ffs_truncate,
 171         .uo_valloc = ffs_valloc,
 172         .uo_vfree = ffs_vfree,
 173         .uo_balloc = ffs_balloc,
 174         .uo_unmark_vnode = (void (*)(vnode_t *))nullop,
 175 };
 176
 177 static int
 178 ffs_modcmd(modcmd_t cmd, void *arg)
 179 {
 180         int error;
 181
 182 #if 0
 183         extern int doasyncfree;
 184 #endif
 185 #ifdef UFS_EXTATTR
 186         extern int ufs_extattr_autocreate;
 187 #endif
 188         extern int ffs_log_changeopt;
 189
 190         switch (cmd) {
 191         case MODULE_CMD_INIT:
 192                 error = vfs_attach(&ffs_vfsops);
 193                 if (error != 0)
 194                         break;
 195
 196                 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
 197                                CTLFLAG_PERMANENT,
 198                                CTLTYPE_NODE, "vfs", NULL,
 199                                NULL, 0, NULL, 0,
 200                                CTL_VFS, CTL_EOL);
 201                 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
 202                                CTLFLAG_PERMANENT,
 203                                CTLTYPE_NODE, "ffs",
 204                                SYSCTL_DESCR("Berkeley Fast File System"),
 205                                NULL, 0, NULL, 0,
 206                                CTL_VFS, 1, CTL_EOL);
 207                 /*
 208                  * @@@ should we even bother with these first three?
 209                  */
 210                 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
 211                                CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 212                                CTLTYPE_INT, "doclusterread", NULL,
 213                                sysctl_notavail, 0, NULL, 0,
 214                                CTL_VFS, 1, FFS_CLUSTERREAD, CTL_EOL);
 215                 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
 216                                CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 217                                CTLTYPE_INT, "doclusterwrite", NULL,
 218                                sysctl_notavail, 0, NULL, 0,
 219                                CTL_VFS, 1, FFS_CLUSTERWRITE, CTL_EOL);
 220                 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
 221                                CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 222                                CTLTYPE_INT, "doreallocblks", NULL,
 223                                sysctl_notavail, 0, NULL, 0,
 224                                CTL_VFS, 1, FFS_REALLOCBLKS, CTL_EOL);
 225 #if 0
 226                 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
 227                                CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 228                                CTLTYPE_INT, "doasyncfree",
 229                                SYSCTL_DESCR("Release dirty blocks asynchronously"),
 230                                NULL, 0, &doasyncfree, 0,
 231                                CTL_VFS, 1, FFS_ASYNCFREE, CTL_EOL);
 232 #endif
 233                 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
 234                                CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 235                                CTLTYPE_INT, "log_changeopt",
 236                                SYSCTL_DESCR("Log changes in optimization strategy"),
 237                                NULL, 0, &ffs_log_changeopt, 0,
 238                                CTL_VFS, 1, FFS_LOG_CHANGEOPT, CTL_EOL);
 239 #ifdef UFS_EXTATTR
 240                 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
 241                                CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 242                                CTLTYPE_INT, "extattr_autocreate",
 243                                SYSCTL_DESCR("Size of attribute for "
 244                                             "backing file autocreation"),
 245                                NULL, 0, &ufs_extattr_autocreate, 0,
 246                                CTL_VFS, 1, FFS_EXTATTR_AUTOCREATE, CTL_EOL);
 247
 248 #endif /* UFS_EXTATTR */
 249
 250                 break;
 251         case MODULE_CMD_FINI:
 252                 error = vfs_detach(&ffs_vfsops);
 253                 if (error != 0)
 254                         break;
 255                 sysctl_teardown(&ffs_sysctl_log);
 256                 break;
 257         default:
 258                 error = ENOTTY;
 259                 break;
 260         }
 261
 262         return (error);
 263 }
 264
 265 pool_cache_t ffs_inode_cache;
 266 pool_cache_t ffs_dinode1_cache;
 267 pool_cache_t ffs_dinode2_cache;
 268
 269 static void ffs_oldfscompat_read(struct fs *, struct ufsmount *, daddr_t);
 270 static void ffs_oldfscompat_write(struct fs *, struct ufsmount *);
 271
 272 /*
 273  * Called by main() when ffs is going to be mounted as root.
 274  */
 275
 276 int
 277 ffs_mountroot(void)
 278 {
 279         struct fs *fs;
 280         struct mount *mp;
 281         struct lwp *l = curlwp;                 /* XXX */
 282         struct ufsmount *ump;
 283         int error;
 284
 285         if (device_class(root_device) != DV_DISK)
 286                 return (ENODEV);
 287
 288         if ((error = vfs_rootmountalloc(MOUNT_FFS, "root_device", &mp))) {
 289                 vrele(rootvp);
 290                 return (error);
 291         }
 292
 293         /*
 294          * We always need to be able to mount the root file system.
 295          */
 296         mp->mnt_flag |= MNT_FORCE;
 297         if ((error = ffs_mountfs(rootvp, mp, l)) != 0) {
 298                 vfs_unbusy(mp, false, NULL);
 299                 vfs_destroy(mp);
 300                 return (error);
 301         }
 302         mp->mnt_flag &= ~MNT_FORCE;
 303         mutex_enter(&mountlist_lock);
 304         CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
 305         mutex_exit(&mountlist_lock);
 306         ump = VFSTOUFS(mp);
 307         fs = ump->um_fs;
 308         memset(fs->fs_fsmnt, 0, sizeof(fs->fs_fsmnt));
 309         (void)copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0);
 310         (void)ffs_statvfs(mp, &mp->mnt_stat);
 311         vfs_unbusy(mp, false, NULL);
 312         setrootfstime((time_t)fs->fs_time);
 313         return (0);
 314 }
 315
 316 /*
 317  * VFS Operations.
 318  *
 319  * mount system call
 320  */
 321 int
 322 ffs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
 323 {
 324         struct lwp *l = curlwp;
 325         struct vnode *devvp = NULL;
 326         struct ufs_args *args = data;
 327         struct ufsmount *ump = NULL;
 328         struct fs *fs;
 329         int error = 0, flags, update;
 330         mode_t accessmode;
 331
 332         if (*data_len < sizeof *args)
 333                 return EINVAL;
 334
 335         if (mp->mnt_flag & MNT_GETARGS) {
 336                 ump = VFSTOUFS(mp);
 337                 if (ump == NULL)
 338                         return EIO;
 339                 args->fspec = NULL;
 340                 *data_len = sizeof *args;
 341                 return 0;
 342         }
 343
 344         update = mp->mnt_flag & MNT_UPDATE;
 345
 346         /* Check arguments */
 347         if (args->fspec != NULL) {
 348                 /*
 349                  * Look up the name and verify that it's sane.
 350                  */
 351                 error = namei_simple_user(args->fspec,
 352                                         NSM_FOLLOW_NOEMULROOT, &devvp);
 353                 if (error != 0)
 354                         return (error);
 355
 356                 if (!update) {
 357                         /*
 358                          * Be sure this is a valid block device
 359                          */
 360                         if (devvp->v_type != VBLK)
 361                                 error = ENOTBLK;
 362                         else if (bdevsw_lookup(devvp->v_rdev) == NULL)
 363                                 error = ENXIO;
 364                 } else {
 365                         /*
 366                          * Be sure we're still naming the same device
 367                          * used for our initial mount
 368                          */
 369                         ump = VFSTOUFS(mp);
 370                         if (devvp != ump->um_devvp) {
 371                                 if (devvp->v_rdev != ump->um_devvp->v_rdev)
 372                                         error = EINVAL;
 373                                 else {
 374                                         vrele(devvp);
 375                                         devvp = ump->um_devvp;
 376                                         vref(devvp);
 377                                 }
 378                         }
 379                 }
 380         } else {
 381                 if (!update) {
 382                         /* New mounts must have a filename for the device */
 383                         return (EINVAL);
 384                 } else {
 385                         /* Use the extant mount */
 386                         ump = VFSTOUFS(mp);
 387                         devvp = ump->um_devvp;
 388                         vref(devvp);
 389                 }
 390         }
 391
 392         /*
 393          * If mount by non-root, then verify that user has necessary
 394          * permissions on the device.
 395          *
 396          * Permission to update a mount is checked higher, so here we presume
 397          * updating the mount is okay (for example, as far as securelevel goes)
 398          * which leaves us with the normal check.
 399          */
 400         if (error == 0) {
 401                 accessmode = VREAD;
 402                 if (update ?
 403                     (mp->mnt_iflag & IMNT_WANTRDWR) != 0 :
 404                     (mp->mnt_flag & MNT_RDONLY) == 0)
 405                         accessmode |= VWRITE;
 406                 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 407                 error = genfs_can_mount(devvp, accessmode, l->l_cred);
 408                 VOP_UNLOCK(devvp);
 409         }
 410
 411         if (error) {
 412                 vrele(devvp);
 413                 return (error);
 414         }
 415
 416 #ifdef WAPBL
 417         /* WAPBL can only be enabled on a r/w mount. */
 418         if ((mp->mnt_flag & MNT_RDONLY) && !(mp->mnt_iflag & IMNT_WANTRDWR)) {
 419                 mp->mnt_flag &= ~MNT_LOG;
 420         }
 421 #else /* !WAPBL */
 422         mp->mnt_flag &= ~MNT_LOG;
 423 #endif /* !WAPBL */
 424
 425         if (!update) {
 426                 int xflags;
 427
 428                 if (mp->mnt_flag & MNT_RDONLY)
 429                         xflags = FREAD;
 430                 else
 431                         xflags = FREAD | FWRITE;
 432                 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 433                 error = VOP_OPEN(devvp, xflags, FSCRED);
 434                 VOP_UNLOCK(devvp);
 435                 if (error)
 436                         goto fail;
 437                 error = ffs_mountfs(devvp, mp, l);
 438                 if (error) {
 439                         vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 440                         (void)VOP_CLOSE(devvp, xflags, NOCRED);
 441                         VOP_UNLOCK(devvp);
 442                         goto fail;
 443                 }
 444
 445                 ump = VFSTOUFS(mp);
 446                 fs = ump->um_fs;
 447         } else {
 448                 /*
 449                  * Update the mount.
 450                  */
 451
 452                 /*
 453                  * The initial mount got a reference on this
 454                  * device, so drop the one obtained via
 455                  * namei(), above.
 456                  */
 457                 vrele(devvp);
 458
 459                 ump = VFSTOUFS(mp);
 460                 fs = ump->um_fs;
 461                 if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
 462                         /*
 463                          * Changing from r/w to r/o
 464                          */
 465                         flags = WRITECLOSE;
 466                         if (mp->mnt_flag & MNT_FORCE)
 467                                 flags |= FORCECLOSE;
 468                         error = ffs_flushfiles(mp, flags, l);
 469                         if (error == 0)
 470                                 error = UFS_WAPBL_BEGIN(mp);
 471                         if (error == 0 &&
 472                             ffs_cgupdate(ump, MNT_WAIT) == 0 &&
 473                             fs->fs_clean & FS_WASCLEAN) {
 474                                 if (mp->mnt_flag & MNT_SOFTDEP)
 475                                         fs->fs_flags &= ~FS_DOSOFTDEP;
 476                                 fs->fs_clean = FS_ISCLEAN;
 477                                 (void) ffs_sbupdate(ump, MNT_WAIT);
 478                         }
 479                         if (error == 0)
 480                                 UFS_WAPBL_END(mp);
 481                         if (error)
 482                                 return (error);
 483                 }
 484
 485 #ifdef WAPBL
 486                 if ((mp->mnt_flag & MNT_LOG) == 0) {
 487                         error = ffs_wapbl_stop(mp, mp->mnt_flag & MNT_FORCE);
 488                         if (error)
 489                                 return error;
 490                 }
 491 #endif /* WAPBL */
 492
 493                 if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
 494                         /*
 495                          * Finish change from r/w to r/o
 496                          */
 497                         fs->fs_ronly = 1;
 498                         fs->fs_fmod = 0;
 499                 }
 500
 501                 if (mp->mnt_flag & MNT_RELOAD) {
 502                         error = ffs_reload(mp, l->l_cred, l);
 503                         if (error)
 504                                 return (error);
 505                 }
 506
 507                 if (fs->fs_ronly && (mp->mnt_iflag & IMNT_WANTRDWR)) {
 508                         /*
 509                          * Changing from read-only to read/write
 510                          */
 511 #ifndef QUOTA2
 512                         if (fs->fs_flags & FS_DOQUOTA2) {
 513                                 ump->um_flags |= UFS_QUOTA2;
 514                                 uprintf("%s: options QUOTA2 not enabled%s\n",
 515                                     mp->mnt_stat.f_mntonname,
 516                                     (mp->mnt_flag & MNT_FORCE) ? "" :
 517                                     ", not mounting");
 518                                 return EINVAL;
 519                         }
 520 #endif
 521                         fs->fs_ronly = 0;
 522                         fs->fs_clean <<= 1;
 523                         fs->fs_fmod = 1;
 524 #ifdef WAPBL
 525                         if (fs->fs_flags & FS_DOWAPBL) {
 526                                 printf("%s: replaying log to disk\n",
 527                                     fs->fs_fsmnt);
 528                                 KDASSERT(mp->mnt_wapbl_replay);
 529                                 error = wapbl_replay_write(mp->mnt_wapbl_replay,
 530                                                            devvp);
 531                                 if (error) {
 532                                         return error;
 533                                 }
 534                                 wapbl_replay_stop(mp->mnt_wapbl_replay);
 535                                 fs->fs_clean = FS_WASCLEAN;
 536                         }
 537 #endif /* WAPBL */
 538                         if (fs->fs_snapinum[0] != 0)
 539                                 ffs_snapshot_mount(mp);
 540                 }
 541
 542 #ifdef WAPBL
 543                 error = ffs_wapbl_start(mp);
 544                 if (error)
 545                         return error;
 546 #endif /* WAPBL */
 547
 548 #ifdef QUOTA2
 549                 if (!fs->fs_ronly) {
 550                         error = ffs_quota2_mount(mp);
 551                         if (error) {
 552                                 return error;
 553                         }
 554                 }
 555 #endif
 556                 if (args->fspec == NULL)
 557                         return 0;
 558         }
 559
 560         error = set_statvfs_info(path, UIO_USERSPACE, args->fspec,
 561             UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l);
 562         if (error == 0)
 563                 (void)strncpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname,
 564                     sizeof(fs->fs_fsmnt));
 565         fs->fs_flags &= ~FS_DOSOFTDEP;
 566         if (fs->fs_fmod != 0) { /* XXX */
 567                 int err;
 568
 569                 fs->fs_fmod = 0;
 570                 if (fs->fs_clean & FS_WASCLEAN)
 571                         fs->fs_time = time_second;
 572                 else {
 573                         printf("%s: file system not clean (fs_clean=%#x); "
 574                             "please fsck(8)\n", mp->mnt_stat.f_mntfromname,
 575                             fs->fs_clean);
 576                         printf("%s: lost blocks %" PRId64 " files %d\n",
 577                             mp->mnt_stat.f_mntfromname, fs->fs_pendingblocks,
 578                             fs->fs_pendinginodes);
 579                 }
 580                 err = UFS_WAPBL_BEGIN(mp);
 581                 if (err == 0) {
 582                         (void) ffs_cgupdate(ump, MNT_WAIT);
 583                         UFS_WAPBL_END(mp);
 584                 }
 585         }
 586         if ((mp->mnt_flag & MNT_SOFTDEP) != 0) {
 587                 printf("%s: `-o softdep' is no longer supported, "
 588                     "consider `-o log'\n", mp->mnt_stat.f_mntfromname);
 589                 mp->mnt_flag &= ~MNT_SOFTDEP;
 590         }
 591
 592         return (error);
 593
 594 fail:
 595         vrele(devvp);
 596         return (error);
 597 }
 598
 599 /*
 600  * Reload all incore data for a filesystem (used after running fsck on
 601  * the root filesystem and finding things to fix). The filesystem must
 602  * be mounted read-only.
 603  *
 604  * Things to do to update the mount:
 605  *      1) invalidate all cached meta-data.
 606  *      2) re-read superblock from disk.
 607  *      3) re-read summary information from disk.
 608  *      4) invalidate all inactive vnodes.
 609  *      5) invalidate all cached file data.
 610  *      6) re-read inode data for all active vnodes.
 611  */
 612 int
 613 ffs_reload(struct mount *mp, kauth_cred_t cred, struct lwp *l)
 614 {
 615         struct vnode *vp, *mvp, *devvp;
 616         struct inode *ip;
 617         void *space;
 618         struct buf *bp;
 619         struct fs *fs, *newfs;
 620         struct dkwedge_info dkw;
 621         int i, bsize, blks, error;
 622         int32_t *lp;
 623         struct ufsmount *ump;
 624         daddr_t sblockloc;
 625
 626         if ((mp->mnt_flag & MNT_RDONLY) == 0)
 627                 return (EINVAL);
 628
 629         ump = VFSTOUFS(mp);
 630         /*
 631          * Step 1: invalidate all cached meta-data.
 632          */
 633         devvp = ump->um_devvp;
 634         vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 635         error = vinvalbuf(devvp, 0, cred, l, 0, 0);
 636         VOP_UNLOCK(devvp);
 637         if (error)
 638                 panic("ffs_reload: dirty1");
 639         /*
 640          * Step 2: re-read superblock from disk.
 641          */
 642         fs = ump->um_fs;
 643
 644         /* XXX we don't handle possibility that superblock moved. */
 645         error = bread(devvp, fs->fs_sblockloc / DEV_BSIZE, fs->fs_sbsize,
 646                       NOCRED, 0, &bp);
 647         if (error) {
 648                 brelse(bp, 0);
 649                 return (error);
 650         }
 651         newfs = malloc(fs->fs_sbsize, M_UFSMNT, M_WAITOK);
 652         memcpy(newfs, bp->b_data, fs->fs_sbsize);
 653 #ifdef FFS_EI
 654         if (ump->um_flags & UFS_NEEDSWAP) {
 655                 ffs_sb_swap((struct fs*)bp->b_data, newfs);
 656                 fs->fs_flags |= FS_SWAPPED;
 657         } else
 658 #endif
 659                 fs->fs_flags &= ~FS_SWAPPED;
 660         if ((newfs->fs_magic != FS_UFS1_MAGIC &&
 661              newfs->fs_magic != FS_UFS2_MAGIC)||
 662              newfs->fs_bsize > MAXBSIZE ||
 663              newfs->fs_bsize < sizeof(struct fs)) {
 664                 brelse(bp, 0);
 665                 free(newfs, M_UFSMNT);
 666                 return (EIO);           /* XXX needs translation */
 667         }
 668         /* Store off old fs_sblockloc for fs_oldfscompat_read. */
 669         sblockloc = fs->fs_sblockloc;
 670         /*
 671          * Copy pointer fields back into superblock before copying in   XXX
 672          * new superblock. These should really be in the ufsmount.      XXX
 673          * Note that important parameters (eg fs_ncg) are unchanged.
 674          */
 675         newfs->fs_csp = fs->fs_csp;
 676         newfs->fs_maxcluster = fs->fs_maxcluster;
 677         newfs->fs_contigdirs = fs->fs_contigdirs;
 678         newfs->fs_ronly = fs->fs_ronly;
 679         newfs->fs_active = fs->fs_active;
 680         memcpy(fs, newfs, (u_int)fs->fs_sbsize);
 681         brelse(bp, 0);
 682         free(newfs, M_UFSMNT);
 683
 684         /* Recheck for apple UFS filesystem */
 685         ump->um_flags &= ~UFS_ISAPPLEUFS;
 686         /* First check to see if this is tagged as an Apple UFS filesystem
 687          * in the disklabel
 688          */
 689         if (getdiskinfo(devvp, &dkw) == 0 &&
 690             strcmp(dkw.dkw_ptype, DKW_PTYPE_APPLEUFS) == 0)
 691                 ump->um_flags |= UFS_ISAPPLEUFS;
 692 #ifdef APPLE_UFS
 693         else {
 694                 /* Manually look for an apple ufs label, and if a valid one
 695                  * is found, then treat it like an Apple UFS filesystem anyway
 696                  *
 697                  * EINVAL is most probably a blocksize or alignment problem,
 698                  * it is unlikely that this is an Apple UFS filesystem then.
 699                  */
 700                 error = bread(devvp, (daddr_t)(APPLEUFS_LABEL_OFFSET / DEV_BSIZE),
 701                         APPLEUFS_LABEL_SIZE, cred, 0, &bp);
 702                 if (error && error != EINVAL) {
 703                         brelse(bp, 0);
 704                         return (error);
 705                 }
 706                 if (error == 0) {
 707                         error = ffs_appleufs_validate(fs->fs_fsmnt,
 708                                 (struct appleufslabel *)bp->b_data, NULL);
 709                         if (error == 0)
 710                                 ump->um_flags |= UFS_ISAPPLEUFS;
 711                 }
 712                 brelse(bp, 0);
 713                 bp = NULL;
 714         }
 715 #else
 716         if (ump->um_flags & UFS_ISAPPLEUFS)
 717                 return (EIO);
 718 #endif
 719
 720         if (UFS_MPISAPPLEUFS(ump)) {
 721                 /* see comment about NeXT below */
 722                 ump->um_maxsymlinklen = APPLEUFS_MAXSYMLINKLEN;
 723                 ump->um_dirblksiz = APPLEUFS_DIRBLKSIZ;
 724                 mp->mnt_iflag |= IMNT_DTYPE;
 725         } else {
 726                 ump->um_maxsymlinklen = fs->fs_maxsymlinklen;
 727                 ump->um_dirblksiz = DIRBLKSIZ;
 728                 if (ump->um_maxsymlinklen > 0)
 729                         mp->mnt_iflag |= IMNT_DTYPE;
 730                 else
 731                         mp->mnt_iflag &= ~IMNT_DTYPE;
 732         }
 733         ffs_oldfscompat_read(fs, ump, sblockloc);
 734
 735         mutex_enter(&ump->um_lock);
 736         ump->um_maxfilesize = fs->fs_maxfilesize;
 737         if (fs->fs_flags & ~(FS_KNOWN_FLAGS | FS_INTERNAL)) {
 738                 uprintf("%s: unknown ufs flags: 0x%08"PRIx32"%s\n",
 739                     mp->mnt_stat.f_mntonname, fs->fs_flags,
 740                     (mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting");
 741                 if ((mp->mnt_flag & MNT_FORCE) == 0) {
 742                         mutex_exit(&ump->um_lock);
 743                         return (EINVAL);
 744                 }
 745         }
 746         if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
 747                 fs->fs_pendingblocks = 0;
 748                 fs->fs_pendinginodes = 0;
 749         }
 750         mutex_exit(&ump->um_lock);
 751
 752         ffs_statvfs(mp, &mp->mnt_stat);
 753         /*
 754          * Step 3: re-read summary information from disk.
 755          */
 756         blks = howmany(fs->fs_cssize, fs->fs_fsize);
 757         space = fs->fs_csp;
 758         for (i = 0; i < blks; i += fs->fs_frag) {
 759                 bsize = fs->fs_bsize;
 760                 if (i + fs->fs_frag > blks)
 761                         bsize = (blks - i) * fs->fs_fsize;
 762                 error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), bsize,
 763                               NOCRED, 0, &bp);
 764                 if (error) {
 765                         brelse(bp, 0);
 766                         return (error);
 767                 }
 768 #ifdef FFS_EI
 769                 if (UFS_FSNEEDSWAP(fs))
 770                         ffs_csum_swap((struct csum *)bp->b_data,
 771                             (struct csum *)space, bsize);
 772                 else
 773 #endif
 774                         memcpy(space, bp->b_data, (size_t)bsize);
 775                 space = (char *)space + bsize;
 776                 brelse(bp, 0);
 777         }
 778         if (fs->fs_snapinum[0] != 0)
 779                 ffs_snapshot_mount(mp);
 780         /*
 781          * We no longer know anything about clusters per cylinder group.
 782          */
 783         if (fs->fs_contigsumsize > 0) {
 784                 lp = fs->fs_maxcluster;
 785                 for (i = 0; i < fs->fs_ncg; i++)
 786                         *lp++ = fs->fs_contigsumsize;
 787         }
 788
 789         /* Allocate a marker vnode. */
 790         mvp = vnalloc(mp);
 791         /*
 792          * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
 793          * and vclean() can be called indirectly
 794          */
 795         mutex_enter(&mntvnode_lock);
 796  loop:
 797         for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) {
 798                 vmark(mvp, vp);
 799                 if (vp->v_mount != mp || vismarker(vp))
 800                         continue;
 801                 /*
 802                  * Step 4: invalidate all inactive vnodes.
 803                  */
 804                 if (vrecycle(vp, &mntvnode_lock, l)) {
 805                         mutex_enter(&mntvnode_lock);
 806                         (void)vunmark(mvp);
 807                         goto loop;
 808                 }
 809                 /*
 810                  * Step 5: invalidate all cached file data.
 811                  */
 812                 mutex_enter(vp->v_interlock);
 813                 mutex_exit(&mntvnode_lock);
 814                 if (vget(vp, LK_EXCLUSIVE)) {
 815                         (void)vunmark(mvp);
 816                         goto loop;
 817                 }
 818                 if (vinvalbuf(vp, 0, cred, l, 0, 0))
 819                         panic("ffs_reload: dirty2");
 820                 /*
 821                  * Step 6: re-read inode data for all active vnodes.
 822                  */
 823                 ip = VTOI(vp);
 824                 error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
 825                               (int)fs->fs_bsize, NOCRED, 0, &bp);
 826                 if (error) {
 827                         brelse(bp, 0);
 828                         vput(vp);
 829                         (void)vunmark(mvp);
 830                         break;
 831                 }
 832                 ffs_load_inode(bp, ip, fs, ip->i_number);
 833                 brelse(bp, 0);
 834                 vput(vp);
 835                 mutex_enter(&mntvnode_lock);
 836         }
 837         mutex_exit(&mntvnode_lock);
 838         vnfree(mvp);
 839         return (error);
 840 }
 841
 842 /*
 843  * Possible superblock locations ordered from most to least likely.
 844  */
 845 static const int sblock_try[] = SBLOCKSEARCH;
 846
 847 /*
 848  * Common code for mount and mountroot
 849  */
 850 int
 851 ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l)
 852 {
 853         struct ufsmount *ump;
 854         struct buf *bp;
 855         struct fs *fs;
 856         dev_t dev;
 857         struct dkwedge_info dkw;
 858         void *space;
 859         daddr_t sblockloc, fsblockloc;
 860         int blks, fstype;
 861         int error, i, bsize, ronly, bset = 0;
 862 #ifdef FFS_EI
 863         int needswap = 0;               /* keep gcc happy */
 864 #endif
 865         int32_t *lp;
 866         kauth_cred_t cred;
 867         u_int32_t sbsize = 8192;        /* keep gcc happy*/
 868         int32_t fsbsize;
 869
 870         dev = devvp->v_rdev;
 871         cred = l ? l->l_cred : NOCRED;
 872
 873         /* Flush out any old buffers remaining from a previous use. */
 874         vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 875         error = vinvalbuf(devvp, V_SAVE, cred, l, 0, 0);
 876         VOP_UNLOCK(devvp);
 877         if (error)
 878                 return (error);
 879
 880         ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 881
 882         bp = NULL;
 883         ump = NULL;
 884         fs = NULL;
 885         sblockloc = 0;
 886         fstype = 0;
 887
 888         error = fstrans_mount(mp);
 889         if (error)
 890                 return error;
 891
 892         ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
 893         memset(ump, 0, sizeof *ump);
 894         mutex_init(&ump->um_lock, MUTEX_DEFAULT, IPL_NONE);
 895         error = ffs_snapshot_init(ump);
 896         if (error)
 897                 goto out;
 898         ump->um_ops = &ffs_ufsops;
 899
 900 #ifdef WAPBL
 901  sbagain:
 902 #endif
 903         /*
 904          * Try reading the superblock in each of its possible locations.
 905          */
 906         for (i = 0; ; i++) {
 907                 if (bp != NULL) {
 908                         brelse(bp, BC_NOCACHE);
 909                         bp = NULL;
 910                 }
 911                 if (sblock_try[i] == -1) {
 912                         error = EINVAL;
 913                         fs = NULL;
 914                         goto out;
 915                 }
 916                 error = bread(devvp, sblock_try[i] / DEV_BSIZE, SBLOCKSIZE, cred,
 917                               0, &bp);
 918                 if (error) {
 919                         fs = NULL;
 920                         goto out;
 921                 }
 922                 fs = (struct fs*)bp->b_data;
 923                 fsblockloc = sblockloc = sblock_try[i];
 924                 if (fs->fs_magic == FS_UFS1_MAGIC) {
 925                         sbsize = fs->fs_sbsize;
 926                         fstype = UFS1;
 927                         fsbsize = fs->fs_bsize;
 928 #ifdef FFS_EI
 929                         needswap = 0;
 930                 } else if (fs->fs_magic == bswap32(FS_UFS1_MAGIC)) {
 931                         sbsize = bswap32(fs->fs_sbsize);
 932                         fstype = UFS1;
 933                         fsbsize = bswap32(fs->fs_bsize);
 934                         needswap = 1;
 935 #endif
 936                 } else if (fs->fs_magic == FS_UFS2_MAGIC) {
 937                         sbsize = fs->fs_sbsize;
 938                         fstype = UFS2;
 939                         fsbsize = fs->fs_bsize;
 940 #ifdef FFS_EI
 941                         needswap = 0;
 942                 } else if (fs->fs_magic == bswap32(FS_UFS2_MAGIC)) {
 943                         sbsize = bswap32(fs->fs_sbsize);
 944                         fstype = UFS2;
 945                         fsbsize = bswap32(fs->fs_bsize);
 946                         needswap = 1;
 947 #endif
 948                 } else
 949                         continue;
 950
 951
 952                 /* fs->fs_sblockloc isn't defined for old filesystems */
 953                 if (fstype == UFS1 && !(fs->fs_old_flags & FS_FLAGS_UPDATED)) {
 954                         if (sblockloc == SBLOCK_UFS2)
 955                                 /*
 956                                  * This is likely to be the first alternate
 957                                  * in a filesystem with 64k blocks.
 958                                  * Don't use it.
 959                                  */
 960                                 continue;
 961                         fsblockloc = sblockloc;
 962                 } else {
 963                         fsblockloc = fs->fs_sblockloc;
 964 #ifdef FFS_EI
 965                         if (needswap)
 966                                 fsblockloc = bswap64(fsblockloc);
 967 #endif
 968                 }
 969
 970                 /* Check we haven't found an alternate superblock */
 971                 if (fsblockloc != sblockloc)
 972                         continue;
 973
 974                 /* Validate size of superblock */
 975                 if (sbsize > MAXBSIZE || sbsize < sizeof(struct fs))
 976                         continue;
 977
 978                 /* Check that we can handle the file system blocksize */
 979                 if (fsbsize > MAXBSIZE) {
 980                         printf("ffs_mountfs: block size (%d) > MAXBSIZE (%d)\n",
 981                             fsbsize, MAXBSIZE);
 982                         continue;
 983                 }
 984
 985                 /* Ok seems to be a good superblock */
 986                 break;
 987         }
 988
 989         fs = malloc((u_long)sbsize, M_UFSMNT, M_WAITOK);
 990         memcpy(fs, bp->b_data, sbsize);
 991         ump->um_fs = fs;
 992
 993 #ifdef FFS_EI
 994         if (needswap) {
 995                 ffs_sb_swap((struct fs*)bp->b_data, fs);
 996                 fs->fs_flags |= FS_SWAPPED;
 997         } else
 998 #endif
 999                 fs->fs_flags &= ~FS_SWAPPED;
1000
1001 #ifdef WAPBL
1002         if ((mp->mnt_wapbl_replay == 0) && (fs->fs_flags & FS_DOWAPBL)) {
1003                 error = ffs_wapbl_replay_start(mp, fs, devvp);
1004                 if (error && (mp->mnt_flag & MNT_FORCE) == 0)
1005                         goto out;
1006                 if (!error) {
1007                         if (!ronly) {
1008                                 /* XXX fsmnt may be stale. */
1009                                 printf("%s: replaying log to disk\n",
1010                                     fs->fs_fsmnt);
1011                                 error = wapbl_replay_write(mp->mnt_wapbl_replay,
1012                                     devvp);
1013                                 if (error)
1014                                         goto out;
1015                                 wapbl_replay_stop(mp->mnt_wapbl_replay);
1016                                 fs->fs_clean = FS_WASCLEAN;
1017                         } else {
1018                                 /* XXX fsmnt may be stale */
1019                                 printf("%s: replaying log to memory\n",
1020                                     fs->fs_fsmnt);
1021                         }
1022
1023                         /* Force a re-read of the superblock */
1024                         brelse(bp, BC_INVAL);
1025                         bp = NULL;
1026                         free(fs, M_UFSMNT);
1027                         fs = NULL;
1028                         goto sbagain;
1029                 }
1030         }
1031 #else /* !WAPBL */
1032         if ((fs->fs_flags & FS_DOWAPBL) && (mp->mnt_flag & MNT_FORCE) == 0) {
1033                 error = EPERM;
1034                 goto out;
1035         }
1036 #endif /* !WAPBL */
1037
1038         ffs_oldfscompat_read(fs, ump, sblockloc);
1039         ump->um_maxfilesize = fs->fs_maxfilesize;
1040
1041         if (fs->fs_flags & ~(FS_KNOWN_FLAGS | FS_INTERNAL)) {
1042                 uprintf("%s: unknown ufs flags: 0x%08"PRIx32"%s\n",
1043                     mp->mnt_stat.f_mntonname, fs->fs_flags,
1044                     (mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting");
1045                 if ((mp->mnt_flag & MNT_FORCE) == 0) {
1046                         error = EINVAL;
1047                         goto out;
1048                 }
1049         }
1050
1051         if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
1052                 fs->fs_pendingblocks = 0;
1053                 fs->fs_pendinginodes = 0;
1054         }
1055
1056         ump->um_fstype = fstype;
1057         if (fs->fs_sbsize < SBLOCKSIZE)
1058                 brelse(bp, BC_INVAL);
1059         else
1060                 brelse(bp, 0);
1061         bp = NULL;
1062
1063         /* First check to see if this is tagged as an Apple UFS filesystem
1064          * in the disklabel
1065          */
1066         if (getdiskinfo(devvp, &dkw) == 0 &&
1067             strcmp(dkw.dkw_ptype, DKW_PTYPE_APPLEUFS) == 0)
1068                 ump->um_flags |= UFS_ISAPPLEUFS;
1069 #ifdef APPLE_UFS
1070         else {
1071                 /* Manually look for an apple ufs label, and if a valid one
1072                  * is found, then treat it like an Apple UFS filesystem anyway
1073                  */
1074                 error = bread(devvp, (daddr_t)(APPLEUFS_LABEL_OFFSET / DEV_BSIZE),
1075                         APPLEUFS_LABEL_SIZE, cred, 0, &bp);
1076                 if (error)
1077                         goto out;
1078                 error = ffs_appleufs_validate(fs->fs_fsmnt,
1079                         (struct appleufslabel *)bp->b_data, NULL);
1080                 if (error == 0) {
1081                         ump->um_flags |= UFS_ISAPPLEUFS;
1082                 }
1083                 brelse(bp, 0);
1084                 bp = NULL;
1085         }
1086 #else
1087         if (ump->um_flags & UFS_ISAPPLEUFS) {
1088                 error = EINVAL;
1089                 goto out;
1090         }
1091 #endif
1092
1093 #if 0
1094 /*
1095  * XXX This code changes the behaviour of mounting dirty filesystems, to
1096  * XXX require "mount -f ..." to mount them.  This doesn't match what
1097  * XXX mount(8) describes and is disabled for now.
1098  */
1099         /*
1100          * If the file system is not clean, don't allow it to be mounted
1101          * unless MNT_FORCE is specified.  (Note: MNT_FORCE is always set
1102          * for the root file system.)
1103          */
1104         if (fs->fs_flags & FS_DOWAPBL) {
1105                 /*
1106                  * wapbl normally expects to be FS_WASCLEAN when the FS_DOWAPBL
1107                  * bit is set, although there's a window in unmount where it
1108                  * could be FS_ISCLEAN
1109                  */
1110                 if ((mp->mnt_flag & MNT_FORCE) == 0 &&
1111                     (fs->fs_clean & (FS_WASCLEAN | FS_ISCLEAN)) == 0) {
1112                         error = EPERM;
1113                         goto out;
1114                 }
1115         } else
1116                 if ((fs->fs_clean & FS_ISCLEAN) == 0 &&
1117                     (mp->mnt_flag & MNT_FORCE) == 0) {
1118                         error = EPERM;
1119                         goto out;
1120                 }
1121 #endif
1122
1123         /*
1124          * verify that we can access the last block in the fs
1125          * if we're mounting read/write.
1126          */
1127
1128         if (!ronly) {
1129                 error = bread(devvp, fsbtodb(fs, fs->fs_size - 1), fs->fs_fsize,
1130                     cred, 0, &bp);
1131                 if (bp->b_bcount != fs->fs_fsize)
1132                         error = EINVAL;
1133                 if (error) {
1134                         bset = BC_INVAL;
1135                         goto out;
1136                 }
1137                 brelse(bp, BC_INVAL);
1138                 bp = NULL;
1139         }
1140
1141         fs->fs_ronly = ronly;
1142         /* Don't bump fs_clean if we're replaying journal */
1143         if (!((fs->fs_flags & FS_DOWAPBL) && (fs->fs_clean & FS_WASCLEAN)))
1144                 if (ronly == 0) {
1145                         fs->fs_clean <<= 1;
1146                         fs->fs_fmod = 1;
1147                 }
1148         bsize = fs->fs_cssize;
1149         blks = howmany(bsize, fs->fs_fsize);
1150         if (fs->fs_contigsumsize > 0)
1151                 bsize += fs->fs_ncg * sizeof(int32_t);
1152         bsize += fs->fs_ncg * sizeof(*fs->fs_contigdirs);
1153         space = malloc((u_long)bsize, M_UFSMNT, M_WAITOK);
1154         fs->fs_csp = space;
1155         for (i = 0; i < blks; i += fs->fs_frag) {
1156                 bsize = fs->fs_bsize;
1157                 if (i + fs->fs_frag > blks)
1158                         bsize = (blks - i) * fs->fs_fsize;
1159                 error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), bsize,
1160                               cred, 0, &bp);
1161                 if (error) {
1162                         free(fs->fs_csp, M_UFSMNT);
1163                         goto out;
1164                 }
1165 #ifdef FFS_EI
1166                 if (needswap)
1167                         ffs_csum_swap((struct csum *)bp->b_data,
1168                                 (struct csum *)space, bsize);
1169                 else
1170 #endif
1171                         memcpy(space, bp->b_data, (u_int)bsize);
1172
1173                 space = (char *)space + bsize;
1174                 brelse(bp, 0);
1175                 bp = NULL;
1176         }
1177         if (fs->fs_contigsumsize > 0) {
1178                 fs->fs_maxcluster = lp = space;
1179                 for (i = 0; i < fs->fs_ncg; i++)
1180                         *lp++ = fs->fs_contigsumsize;
1181                 space = lp;
1182         }
1183         bsize = fs->fs_ncg * sizeof(*fs->fs_contigdirs);
1184         fs->fs_contigdirs = space;
1185         space = (char *)space + bsize;
1186         memset(fs->fs_contigdirs, 0, bsize);
1187                 /* Compatibility for old filesystems - XXX */
1188         if (fs->fs_avgfilesize <= 0)
1189                 fs->fs_avgfilesize = AVFILESIZ;
1190         if (fs->fs_avgfpdir <= 0)
1191                 fs->fs_avgfpdir = AFPDIR;
1192         fs->fs_active = NULL;
1193         mp->mnt_data = ump;
1194         mp->mnt_stat.f_fsidx.__fsid_val[0] = (long)dev;
1195         mp->mnt_stat.f_fsidx.__fsid_val[1] = makefstype(MOUNT_FFS);
1196         mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
1197         mp->mnt_stat.f_namemax = FFS_MAXNAMLEN;
1198         if (UFS_MPISAPPLEUFS(ump)) {
1199                 /* NeXT used to keep short symlinks in the inode even
1200                  * when using FS_42INODEFMT.  In that case fs->fs_maxsymlinklen
1201                  * is probably -1, but we still need to be able to identify
1202                  * short symlinks.
1203                  */
1204                 ump->um_maxsymlinklen = APPLEUFS_MAXSYMLINKLEN;
1205                 ump->um_dirblksiz = APPLEUFS_DIRBLKSIZ;
1206                 mp->mnt_iflag |= IMNT_DTYPE;
1207         } else {
1208                 ump->um_maxsymlinklen = fs->fs_maxsymlinklen;
1209                 ump->um_dirblksiz = DIRBLKSIZ;
1210                 if (ump->um_maxsymlinklen > 0)
1211                         mp->mnt_iflag |= IMNT_DTYPE;
1212                 else
1213                         mp->mnt_iflag &= ~IMNT_DTYPE;
1214         }
1215         mp->mnt_fs_bshift = fs->fs_bshift;
1216         mp->mnt_dev_bshift = DEV_BSHIFT;        /* XXX */
1217         mp->mnt_flag |= MNT_LOCAL;
1218         mp->mnt_iflag |= IMNT_MPSAFE;
1219 #ifdef FFS_EI
1220         if (needswap)
1221                 ump->um_flags |= UFS_NEEDSWAP;
1222 #endif
1223         ump->um_mountp = mp;
1224         ump->um_dev = dev;
1225         ump->um_devvp = devvp;
1226         ump->um_nindir = fs->fs_nindir;
1227         ump->um_lognindir = ffs(fs->fs_nindir) - 1;
1228         ump->um_bptrtodb = fs->fs_fshift - DEV_BSHIFT;
1229         ump->um_seqinc = fs->fs_frag;
1230         for (i = 0; i < MAXQUOTAS; i++)
1231                 ump->um_quotas[i] = NULLVP;
1232         devvp->v_specmountpoint = mp;
1233         if (ronly == 0 && fs->fs_snapinum[0] != 0)
1234                 ffs_snapshot_mount(mp);
1235 #ifdef WAPBL
1236         if (!ronly) {
1237                 KDASSERT(fs->fs_ronly == 0);
1238                 /*
1239                  * ffs_wapbl_start() needs mp->mnt_stat initialised if it
1240                  * needs to create a new log file in-filesystem.
1241                  */
1242                 ffs_statvfs(mp, &mp->mnt_stat);
1243
1244                 error = ffs_wapbl_start(mp);
1245                 if (error) {
1246                         free(fs->fs_csp, M_UFSMNT);
1247                         goto out;
1248                 }
1249         }
1250 #endif /* WAPBL */
1251         if (ronly == 0) {
1252 #ifdef QUOTA2
1253                 error = ffs_quota2_mount(mp);
1254                 if (error) {
1255                         free(fs->fs_csp, M_UFSMNT);
1256                         goto out;
1257                 }
1258 #else
1259                 if (fs->fs_flags & FS_DOQUOTA2) {
1260                         ump->um_flags |= UFS_QUOTA2;
1261                         uprintf("%s: options QUOTA2 not enabled%s\n",
1262                             mp->mnt_stat.f_mntonname,
1263                             (mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting");
1264                         if ((mp->mnt_flag & MNT_FORCE) == 0) {
1265                                 error = EINVAL;
1266                                 free(fs->fs_csp, M_UFSMNT);
1267                                 goto out;
1268                         }
1269                 }
1270 #endif
1271          }
1272 #ifdef UFS_EXTATTR
1273         /*
1274          * Initialize file-backed extended attributes on UFS1 file
1275          * systems.
1276          */
1277         if (ump->um_fstype == UFS1)
1278                 ufs_extattr_uepm_init(&ump->um_extattr);
1279 #endif /* UFS_EXTATTR */
1280
1281         return (0);
1282 out:
1283 #ifdef WAPBL
1284         if (mp->mnt_wapbl_replay) {
1285                 wapbl_replay_stop(mp->mnt_wapbl_replay);
1286                 wapbl_replay_free(mp->mnt_wapbl_replay);
1287                 mp->mnt_wapbl_replay = 0;
1288         }
1289 #endif
1290
1291         fstrans_unmount(mp);
1292         if (fs)
1293                 free(fs, M_UFSMNT);
1294         devvp->v_specmountpoint = NULL;
1295         if (bp)
1296                 brelse(bp, bset);
1297         if (ump) {
1298                 if (ump->um_oldfscompat)
1299                         free(ump->um_oldfscompat, M_UFSMNT);
1300                 mutex_destroy(&ump->um_lock);
1301                 free(ump, M_UFSMNT);
1302                 mp->mnt_data = NULL;
1303         }
1304         return (error);
1305 }
1306
1307 /*
1308  * Sanity checks for loading old filesystem superblocks.
1309  * See ffs_oldfscompat_write below for unwound actions.
1310  *
1311  * XXX - Parts get retired eventually.
1312  * Unfortunately new bits get added.
1313  */
1314 static void
1315 ffs_oldfscompat_read(struct fs *fs, struct ufsmount *ump, daddr_t sblockloc)
1316 {
1317         off_t maxfilesize;
1318         int32_t *extrasave;
1319
1320         if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1321             (fs->fs_old_flags & FS_FLAGS_UPDATED))
1322                 return;
1323
1324         if (!ump->um_oldfscompat)
1325                 ump->um_oldfscompat = malloc(512 + 3*sizeof(int32_t),
1326                     M_UFSMNT, M_WAITOK);
1327
1328         memcpy(ump->um_oldfscompat, &fs->fs_old_postbl_start, 512);
1329         extrasave = ump->um_oldfscompat;
1330         extrasave += 512/sizeof(int32_t);
1331         extrasave[0] = fs->fs_old_npsect;
1332         extrasave[1] = fs->fs_old_interleave;
1333         extrasave[2] = fs->fs_old_trackskew;
1334
1335         /* These fields will be overwritten by their
1336          * original values in fs_oldfscompat_write, so it is harmless
1337          * to modify them here.
1338          */
1339         fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
1340         fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
1341         fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
1342         fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
1343
1344         fs->fs_maxbsize = fs->fs_bsize;
1345         fs->fs_time = fs->fs_old_time;
1346         fs->fs_size = fs->fs_old_size;
1347         fs->fs_dsize = fs->fs_old_dsize;
1348         fs->fs_csaddr = fs->fs_old_csaddr;
1349         fs->fs_sblockloc = sblockloc;
1350
1351         fs->fs_flags = fs->fs_old_flags | (fs->fs_flags & FS_INTERNAL);
1352
1353         if (fs->fs_old_postblformat == FS_42POSTBLFMT) {
1354                 fs->fs_old_nrpos = 8;
1355                 fs->fs_old_npsect = fs->fs_old_nsect;
1356                 fs->fs_old_interleave = 1;
1357                 fs->fs_old_trackskew = 0;
1358         }
1359
1360         if (fs->fs_old_inodefmt < FS_44INODEFMT) {
1361                 fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
1362                 fs->fs_qbmask = ~fs->fs_bmask;
1363                 fs->fs_qfmask = ~fs->fs_fmask;
1364         }
1365
1366         maxfilesize = (u_int64_t)0x80000000 * fs->fs_bsize - 1;
1367         if (fs->fs_maxfilesize > maxfilesize)
1368                 fs->fs_maxfilesize = maxfilesize;
1369
1370         /* Compatibility for old filesystems */
1371         if (fs->fs_avgfilesize <= 0)
1372                 fs->fs_avgfilesize = AVFILESIZ;
1373         if (fs->fs_avgfpdir <= 0)
1374                 fs->fs_avgfpdir = AFPDIR;
1375
1376 #if 0
1377         if (bigcgs) {
1378                 fs->fs_save_cgsize = fs->fs_cgsize;
1379                 fs->fs_cgsize = fs->fs_bsize;
1380         }
1381 #endif
1382 }
1383
1384 /*
1385  * Unwinding superblock updates for old filesystems.
1386  * See ffs_oldfscompat_read above for details.
1387  *
1388  * XXX - Parts get retired eventually.
1389  * Unfortunately new bits get added.
1390  */
1391 static void
1392 ffs_oldfscompat_write(struct fs *fs, struct ufsmount *ump)
1393 {
1394         int32_t *extrasave;
1395
1396         if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1397             (fs->fs_old_flags & FS_FLAGS_UPDATED))
1398                 return;
1399
1400         fs->fs_old_time = fs->fs_time;
1401         fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
1402         fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
1403         fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
1404         fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
1405         fs->fs_old_flags = fs->fs_flags;
1406
1407 #if 0
1408         if (bigcgs) {
1409                 fs->fs_cgsize = fs->fs_save_cgsize;
1410         }
1411 #endif
1412
1413         memcpy(&fs->fs_old_postbl_start, ump->um_oldfscompat, 512);
1414         extrasave = ump->um_oldfscompat;
1415         extrasave += 512/sizeof(int32_t);
1416         fs->fs_old_npsect = extrasave[0];
1417         fs->fs_old_interleave = extrasave[1];
1418         fs->fs_old_trackskew = extrasave[2];
1419
1420 }
1421
1422 /*
1423  * unmount vfs operation
1424  */
1425 int
1426 ffs_unmount(struct mount *mp, int mntflags)
1427 {
1428         struct lwp *l = curlwp;
1429         struct ufsmount *ump = VFSTOUFS(mp);
1430         struct fs *fs = ump->um_fs;
1431         int error, flags;
1432 #ifdef WAPBL
1433         extern int doforce;
1434 #endif
1435
1436         flags = 0;
1437         if (mntflags & MNT_FORCE)
1438                 flags |= FORCECLOSE;
1439         if ((error = ffs_flushfiles(mp, flags, l)) != 0)
1440                 return (error);
1441         error = UFS_WAPBL_BEGIN(mp);
1442         if (error == 0)
1443                 if (fs->fs_ronly == 0 &&
1444                     ffs_cgupdate(ump, MNT_WAIT) == 0 &&
1445                     fs->fs_clean & FS_WASCLEAN) {
1446                         fs->fs_clean = FS_ISCLEAN;
1447                         fs->fs_fmod = 0;
1448                         (void) ffs_sbupdate(ump, MNT_WAIT);
1449                 }
1450         if (error == 0)
1451                 UFS_WAPBL_END(mp);
1452 #ifdef WAPBL
1453         KASSERT(!(mp->mnt_wapbl_replay && mp->mnt_wapbl));
1454         if (mp->mnt_wapbl_replay) {
1455                 KDASSERT(fs->fs_ronly);
1456                 wapbl_replay_stop(mp->mnt_wapbl_replay);
1457                 wapbl_replay_free(mp->mnt_wapbl_replay);
1458                 mp->mnt_wapbl_replay = 0;
1459         }
1460         error = ffs_wapbl_stop(mp, doforce && (mntflags & MNT_FORCE));
1461         if (error) {
1462                 return error;
1463         }
1464 #endif /* WAPBL */
1465 #ifdef UFS_EXTATTR
1466         if (ump->um_fstype == UFS1) {
1467                 ufs_extattr_stop(mp, l);
1468                 ufs_extattr_uepm_destroy(&ump->um_extattr);
1469         }
1470 #endif /* UFS_EXTATTR */
1471
1472         if (ump->um_devvp->v_type != VBAD)
1473                 ump->um_devvp->v_specmountpoint = NULL;
1474         vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1475         (void)VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD | FWRITE,
1476                 NOCRED);
1477         vput(ump->um_devvp);
1478         free(fs->fs_csp, M_UFSMNT);
1479         free(fs, M_UFSMNT);
1480         if (ump->um_oldfscompat != NULL)
1481                 free(ump->um_oldfscompat, M_UFSMNT);
1482         mutex_destroy(&ump->um_lock);
1483         ffs_snapshot_fini(ump);
1484         free(ump, M_UFSMNT);
1485         mp->mnt_data = NULL;
1486         mp->mnt_flag &= ~MNT_LOCAL;
1487         fstrans_unmount(mp);
1488         return (0);
1489 }
1490
1491 /*
1492  * Flush out all the files in a filesystem.
1493  */
1494 int
1495 ffs_flushfiles(struct mount *mp, int flags, struct lwp *l)
1496 {
1497         extern int doforce;
1498         struct ufsmount *ump;
1499         int error;
1500
1501         if (!doforce)
1502                 flags &= ~FORCECLOSE;
1503         ump = VFSTOUFS(mp);
1504 #ifdef QUOTA
1505         if ((error = quota1_umount(mp, flags)) != 0)
1506                 return (error);
1507 #endif
1508 #ifdef QUOTA2
1509         if ((error = quota2_umount(mp, flags)) != 0)
1510                 return (error);
1511 #endif
1512         if ((error = vflush(mp, 0, SKIPSYSTEM | flags)) != 0)
1513                 return (error);
1514         ffs_snapshot_unmount(mp);
1515         /*
1516          * Flush all the files.
1517          */
1518         error = vflush(mp, NULLVP, flags);
1519         if (error)
1520                 return (error);
1521         /*
1522          * Flush filesystem metadata.
1523          */
1524         vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1525         error = VOP_FSYNC(ump->um_devvp, l->l_cred, FSYNC_WAIT, 0, 0);
1526         VOP_UNLOCK(ump->um_devvp);
1527         if (flags & FORCECLOSE) /* XXXDBJ */
1528                 error = 0;
1529
1530 #ifdef WAPBL
1531         if (error)
1532                 return error;
1533         if (mp->mnt_wapbl) {
1534                 error = wapbl_flush(mp->mnt_wapbl, 1);
1535                 if (flags & FORCECLOSE)
1536                         error = 0;
1537         }
1538 #endif
1539
1540         return (error);
1541 }
1542
1543 /*
1544  * Get file system statistics.
1545  */
1546 int
1547 ffs_statvfs(struct mount *mp, struct statvfs *sbp)
1548 {
1549         struct ufsmount *ump;
1550         struct fs *fs;
1551
1552         ump = VFSTOUFS(mp);
1553         fs = ump->um_fs;
1554         mutex_enter(&ump->um_lock);
1555         sbp->f_bsize = fs->fs_bsize;
1556         sbp->f_frsize = fs->fs_fsize;
1557         sbp->f_iosize = fs->fs_bsize;
1558         sbp->f_blocks = fs->fs_dsize;
1559         sbp->f_bfree = blkstofrags(fs, fs->fs_cstotal.cs_nbfree) +
1560             fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1561         sbp->f_bresvd = ((u_int64_t) fs->fs_dsize * (u_int64_t)
1562             fs->fs_minfree) / (u_int64_t) 100;
1563         if (sbp->f_bfree > sbp->f_bresvd)
1564                 sbp->f_bavail = sbp->f_bfree - sbp->f_bresvd;
1565         else
1566                 sbp->f_bavail = 0;
1567         sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
1568         sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1569         sbp->f_favail = sbp->f_ffree;
1570         sbp->f_fresvd = 0;
1571         mutex_exit(&ump->um_lock);
1572         copy_statvfs_info(sbp, mp);
1573
1574         return (0);
1575 }
1576
1577 /*
1578  * Go through the disk queues to initiate sandbagged IO;
1579  * go through the inodes to write those that have been modified;
1580  * initiate the writing of the super block if it has been modified.
1581  *
1582  * Note: we are always called with the filesystem marked `MPBUSY'.
1583  */
1584 int
1585 ffs_sync(struct mount *mp, int waitfor, kauth_cred_t cred)
1586 {
1587         struct vnode *vp, *mvp, *nvp;
1588         struct inode *ip;
1589         struct ufsmount *ump = VFSTOUFS(mp);
1590         struct fs *fs;
1591         int error, allerror = 0;
1592         bool is_suspending;
1593
1594         fs = ump->um_fs;
1595         if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {            /* XXX */
1596                 printf("fs = %s\n", fs->fs_fsmnt);
1597                 panic("update: rofs mod");
1598         }
1599
1600         /* Allocate a marker vnode. */
1601         mvp = vnalloc(mp);
1602
1603         fstrans_start(mp, FSTRANS_SHARED);
1604         is_suspending = (fstrans_getstate(mp) == FSTRANS_SUSPENDING);
1605         /*
1606          * Write back each (modified) inode.
1607          */
1608         mutex_enter(&mntvnode_lock);
1609 loop:
1610         /*
1611          * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
1612          * and vclean() can be called indirectly
1613          */
1614         for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
1615                 nvp = TAILQ_NEXT(vp, v_mntvnodes);
1616                 /*
1617                  * If the vnode that we are about to sync is no longer
1618                  * associated with this mount point, start over.
1619                  */
1620                 if (vp->v_mount != mp)
1621                         goto loop;
1622                 /*
1623                  * Don't interfere with concurrent scans of this FS.
1624                  */
1625                 if (vismarker(vp))
1626                         continue;
1627                 mutex_enter(vp->v_interlock);
1628                 ip = VTOI(vp);
1629
1630                 /*
1631                  * Skip the vnode/inode if inaccessible.
1632                  */
1633                 if (ip == NULL || (vp->v_iflag & (VI_XLOCK | VI_CLEAN)) != 0 ||
1634                     vp->v_type == VNON) {
1635                         mutex_exit(vp->v_interlock);
1636                         continue;
1637                 }
1638
1639                 /*
1640                  * We deliberately update inode times here.  This will
1641                  * prevent a massive queue of updates accumulating, only
1642                  * to be handled by a call to unmount.
1643                  *
1644                  * XXX It would be better to have the syncer trickle these
1645                  * out.  Adjustment needed to allow registering vnodes for
1646                  * sync when the vnode is clean, but the inode dirty.  Or
1647                  * have ufs itself trickle out inode updates.
1648                  *
1649                  * If doing a lazy sync, we don't care about metadata or
1650                  * data updates, because they are handled by each vnode's
1651                  * synclist entry.  In this case we are only interested in
1652                  * writing back modified inodes.
1653                  */
1654                 if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE |
1655                     IN_MODIFY | IN_MODIFIED | IN_ACCESSED)) == 0 &&
1656                     (waitfor == MNT_LAZY || (LIST_EMPTY(&vp->v_dirtyblkhd) &&
1657                     UVM_OBJ_IS_CLEAN(&vp->v_uobj)))) {
1658                         mutex_exit(vp->v_interlock);
1659                         continue;
1660                 }
1661                 if (vp->v_type == VBLK && is_suspending) {
1662                         mutex_exit(vp->v_interlock);
1663                         continue;
1664                 }
1665                 vmark(mvp, vp);
1666                 mutex_exit(&mntvnode_lock);
1667                 error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT);
1668                 if (error) {
1669                         mutex_enter(&mntvnode_lock);
1670                         nvp = vunmark(mvp);
1671                         if (error == ENOENT) {
1672                                 goto loop;
1673                         }
1674                         continue;
1675                 }
1676                 if (waitfor == MNT_LAZY) {
1677                         error = UFS_WAPBL_BEGIN(vp->v_mount);
1678                         if (!error) {
1679                                 error = ffs_update(vp, NULL, NULL,
1680                                     UPDATE_CLOSE);
1681                                 UFS_WAPBL_END(vp->v_mount);
1682                         }
1683                 } else {
1684                         error = VOP_FSYNC(vp, cred, FSYNC_NOLOG |
1685                             (waitfor == MNT_WAIT ? FSYNC_WAIT : 0), 0, 0);
1686                 }
1687                 if (error)
1688                         allerror = error;
1689                 vput(vp);
1690                 mutex_enter(&mntvnode_lock);
1691                 nvp = vunmark(mvp);
1692         }
1693         mutex_exit(&mntvnode_lock);
1694         /*
1695          * Force stale file system control information to be flushed.
1696          */
1697         if (waitfor != MNT_LAZY && (ump->um_devvp->v_numoutput > 0 ||
1698             !LIST_EMPTY(&ump->um_devvp->v_dirtyblkhd))) {
1699                 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1700                 if ((error = VOP_FSYNC(ump->um_devvp, cred,
1701                     (waitfor == MNT_WAIT ? FSYNC_WAIT : 0) | FSYNC_NOLOG,
1702                     0, 0)) != 0)
1703                         allerror = error;
1704                 VOP_UNLOCK(ump->um_devvp);
1705                 if (allerror == 0 && waitfor == MNT_WAIT && !mp->mnt_wapbl) {
1706                         mutex_enter(&mntvnode_lock);
1707                         goto loop;
1708                 }
1709         }
1710 #if defined(QUOTA) || defined(QUOTA2)
1711         qsync(mp);
1712 #endif
1713         /*
1714          * Write back modified superblock.
1715          */
1716         if (fs->fs_fmod != 0) {
1717                 fs->fs_fmod = 0;
1718                 fs->fs_time = time_second;
1719                 error = UFS_WAPBL_BEGIN(mp);
1720                 if (error)
1721                         allerror = error;
1722                 else {
1723                         if ((error = ffs_cgupdate(ump, waitfor)))
1724                                 allerror = error;
1725                         UFS_WAPBL_END(mp);
1726                 }
1727         }
1728
1729 #ifdef WAPBL
1730         if (mp->mnt_wapbl) {
1731                 error = wapbl_flush(mp->mnt_wapbl, 0);
1732                 if (error)
1733                         allerror = error;
1734         }
1735 #endif
1736
1737         fstrans_done(mp);
1738         vnfree(mvp);
1739         return (allerror);
1740 }
1741
1742 /*
1743  * Look up a FFS dinode number to find its incore vnode, otherwise read it
1744  * in from disk.  If it is in core, wait for the lock bit to clear, then
1745  * return the inode locked.  Detection and handling of mount points must be
1746  * done by the calling routine.
1747  */
1748 int
1749 ffs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
1750 {
1751         struct fs *fs;
1752         struct inode *ip;
1753         struct ufsmount *ump;
1754         struct buf *bp;
1755         struct vnode *vp;
1756         dev_t dev;
1757         int error;
1758
1759         ump = VFSTOUFS(mp);
1760         dev = ump->um_dev;
1761
1762  retry:
1763         if ((*vpp = ufs_ihashget(dev, ino, LK_EXCLUSIVE)) != NULL)
1764                 return (0);
1765
1766         /* Allocate a new vnode/inode. */
1767         error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, NULL, &vp);
1768         if (error) {
1769                 *vpp = NULL;
1770                 return (error);
1771         }
1772         ip = pool_cache_get(ffs_inode_cache, PR_WAITOK);
1773
1774         /*
1775          * If someone beat us to it, put back the freshly allocated
1776          * vnode/inode pair and retry.
1777          */
1778         mutex_enter(&ufs_hashlock);
1779         if (ufs_ihashget(dev, ino, 0) != NULL) {
1780                 mutex_exit(&ufs_hashlock);
1781                 ungetnewvnode(vp);
1782                 pool_cache_put(ffs_inode_cache, ip);
1783                 goto retry;
1784         }
1785
1786         vp->v_vflag |= VV_LOCKSWORK;
1787
1788         /*
1789          * XXX MFS ends up here, too, to allocate an inode.  Should we
1790          * XXX create another pool for MFS inodes?
1791          */
1792
1793         memset(ip, 0, sizeof(struct inode));
1794         vp->v_data = ip;
1795         ip->i_vnode = vp;
1796         ip->i_ump = ump;
1797         ip->i_fs = fs = ump->um_fs;
1798         ip->i_dev = dev;
1799         ip->i_number = ino;
1800 #if defined(QUOTA) || defined(QUOTA2)
1801         ufsquota_init(ip);
1802 #endif
1803
1804         /*
1805          * Initialize genfs node, we might proceed to destroy it in
1806          * error branches.
1807          */
1808         genfs_node_init(vp, &ffs_genfsops);
1809
1810         /*
1811          * Put it onto its hash chain and lock it so that other requests for
1812          * this inode will block if they arrive while we are sleeping waiting
1813          * for old data structures to be purged or for the contents of the
1814          * disk portion of this inode to be read.
1815          */
1816
1817         ufs_ihashins(ip);
1818         mutex_exit(&ufs_hashlock);
1819
1820         /* Read in the disk contents for the inode, copy into the inode. */
1821         error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1822                       (int)fs->fs_bsize, NOCRED, 0, &bp);
1823         if (error) {
1824
1825                 /*
1826                  * The inode does not contain anything useful, so it would
1827                  * be misleading to leave it on its hash chain. With mode
1828                  * still zero, it will be unlinked and returned to the free
1829                  * list by vput().
1830                  */
1831
1832                 vput(vp);
1833                 brelse(bp, 0);
1834                 *vpp = NULL;
1835                 return (error);
1836         }
1837         if (ip->i_ump->um_fstype == UFS1)
1838                 ip->i_din.ffs1_din = pool_cache_get(ffs_dinode1_cache,
1839                     PR_WAITOK);
1840         else
1841                 ip->i_din.ffs2_din = pool_cache_get(ffs_dinode2_cache,
1842                     PR_WAITOK);
1843         ffs_load_inode(bp, ip, fs, ino);
1844         brelse(bp, 0);
1845
1846         /*
1847          * Initialize the vnode from the inode, check for aliases.
1848          * Note that the underlying vnode may have changed.
1849          */
1850
1851         ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1852
1853         /*
1854          * Finish inode initialization now that aliasing has been resolved.
1855          */
1856
1857         ip->i_devvp = ump->um_devvp;
1858         vref(ip->i_devvp);
1859
1860         /*
1861          * Ensure that uid and gid are correct. This is a temporary
1862          * fix until fsck has been changed to do the update.
1863          */
1864
1865         if (fs->fs_old_inodefmt < FS_44INODEFMT) {              /* XXX */
1866                 ip->i_uid = ip->i_ffs1_ouid;                    /* XXX */
1867                 ip->i_gid = ip->i_ffs1_ogid;                    /* XXX */
1868         }                                                       /* XXX */
1869         uvm_vnp_setsize(vp, ip->i_size);
1870         *vpp = vp;
1871         return (0);
1872 }
1873
1874 /*
1875  * File handle to vnode
1876  *
1877  * Have to be really careful about stale file handles:
1878  * - check that the inode number is valid
1879  * - call ffs_vget() to get the locked inode
1880  * - check for an unallocated inode (i_mode == 0)
1881  * - check that the given client host has export rights and return
1882  *   those rights via. exflagsp and credanonp
1883  */
1884 int
1885 ffs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp)
1886 {
1887         struct ufid ufh;
1888         struct fs *fs;
1889
1890         if (fhp->fid_len != sizeof(struct ufid))
1891                 return EINVAL;
1892
1893         memcpy(&ufh, fhp, sizeof(ufh));
1894         fs = VFSTOUFS(mp)->um_fs;
1895         if (ufh.ufid_ino < ROOTINO ||
1896             ufh.ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1897                 return (ESTALE);
1898         return (ufs_fhtovp(mp, &ufh, vpp));
1899 }
1900
1901 /*
1902  * Vnode pointer to File handle
1903  */
1904 /* ARGSUSED */
1905 int
1906 ffs_vptofh(struct vnode *vp, struct fid *fhp, size_t *fh_size)
1907 {
1908         struct inode *ip;
1909         struct ufid ufh;
1910
1911         if (*fh_size < sizeof(struct ufid)) {
1912                 *fh_size = sizeof(struct ufid);
1913                 return E2BIG;
1914         }
1915         ip = VTOI(vp);
1916         *fh_size = sizeof(struct ufid);
1917         memset(&ufh, 0, sizeof(ufh));
1918         ufh.ufid_len = sizeof(struct ufid);
1919         ufh.ufid_ino = ip->i_number;
1920         ufh.ufid_gen = ip->i_gen;
1921         memcpy(fhp, &ufh, sizeof(ufh));
1922         return (0);
1923 }
1924
1925 void
1926 ffs_init(void)
1927 {
1928         if (ffs_initcount++ > 0)
1929                 return;
1930
1931         ffs_inode_cache = pool_cache_init(sizeof(struct inode), 0, 0, 0,
1932             "ffsino", NULL, IPL_NONE, NULL, NULL, NULL);
1933         ffs_dinode1_cache = pool_cache_init(sizeof(struct ufs1_dinode), 0, 0, 0,
1934             "ffsdino1", NULL, IPL_NONE, NULL, NULL, NULL);
1935         ffs_dinode2_cache = pool_cache_init(sizeof(struct ufs2_dinode), 0, 0, 0,
1936             "ffsdino2", NULL, IPL_NONE, NULL, NULL, NULL);
1937         ufs_init();
1938 }
1939
1940 void
1941 ffs_reinit(void)
1942 {
1943
1944         ufs_reinit();
1945 }
1946
1947 void
1948 ffs_done(void)
1949 {
1950         if (--ffs_initcount > 0)
1951                 return;
1952
1953         ufs_done();
1954         pool_cache_destroy(ffs_dinode2_cache);
1955         pool_cache_destroy(ffs_dinode1_cache);
1956         pool_cache_destroy(ffs_inode_cache);
1957 }
1958
1959 /*
1960  * Write a superblock and associated information back to disk.
1961  */
1962 int
1963 ffs_sbupdate(struct ufsmount *mp, int waitfor)
1964 {
1965         struct fs *fs = mp->um_fs;
1966         struct buf *bp;
1967         int error = 0;
1968         u_int32_t saveflag;
1969
1970         error = ffs_getblk(mp->um_devvp,
1971             fs->fs_sblockloc / DEV_BSIZE, FFS_NOBLK,
1972             fs->fs_sbsize, false, &bp);
1973         if (error)
1974                 return error;
1975         saveflag = fs->fs_flags & FS_INTERNAL;
1976         fs->fs_flags &= ~FS_INTERNAL;
1977
1978         memcpy(bp->b_data, fs, fs->fs_sbsize);
1979
1980         ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1981 #ifdef FFS_EI
1982         if (mp->um_flags & UFS_NEEDSWAP)
1983                 ffs_sb_swap((struct fs *)bp->b_data, (struct fs *)bp->b_data);
1984 #endif
1985         fs->fs_flags |= saveflag;
1986
1987         if (waitfor == MNT_WAIT)
1988                 error = bwrite(bp);
1989         else
1990                 bawrite(bp);
1991         return (error);
1992 }
1993
1994 int
1995 ffs_cgupdate(struct ufsmount *mp, int waitfor)
1996 {
1997         struct fs *fs = mp->um_fs;
1998         struct buf *bp;
1999         int blks;
2000         void *space;
2001         int i, size, error = 0, allerror = 0;
2002
2003         allerror = ffs_sbupdate(mp, waitfor);
2004         blks = howmany(fs->fs_cssize, fs->fs_fsize);
2005         space = fs->fs_csp;
2006         for (i = 0; i < blks; i += fs->fs_frag) {
2007                 size = fs->fs_bsize;
2008                 if (i + fs->fs_frag > blks)
2009                         size = (blks - i) * fs->fs_fsize;
2010                 error = ffs_getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
2011                     FFS_NOBLK, size, false, &bp);
2012                 if (error)
2013                         break;
2014 #ifdef FFS_EI
2015                 if (mp->um_flags & UFS_NEEDSWAP)
2016                         ffs_csum_swap((struct csum*)space,
2017                             (struct csum*)bp->b_data, size);
2018                 else
2019 #endif
2020                         memcpy(bp->b_data, space, (u_int)size);
2021                 space = (char *)space + size;
2022                 if (waitfor == MNT_WAIT)
2023                         error = bwrite(bp);
2024                 else
2025                         bawrite(bp);
2026         }
2027         if (!allerror && error)
2028                 allerror = error;
2029         return (allerror);
2030 }
2031
2032 int
2033 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *vp,
2034     int attrnamespace, const char *attrname)
2035 {
2036 #ifdef UFS_EXTATTR
2037         /*
2038          * File-backed extended attributes are only supported on UFS1.
2039          * UFS2 has native extended attributes.
2040          */
2041         if (VFSTOUFS(mp)->um_fstype == UFS1)
2042                 return (ufs_extattrctl(mp, cmd, vp, attrnamespace, attrname));
2043 #endif
2044         return (vfs_stdextattrctl(mp, cmd, vp, attrnamespace, attrname));
2045 }
2046
2047 int
2048 ffs_suspendctl(struct mount *mp, int cmd)
2049 {
2050         int error;
2051         struct lwp *l = curlwp;
2052
2053         switch (cmd) {
2054         case SUSPEND_SUSPEND:
2055                 if ((error = fstrans_setstate(mp, FSTRANS_SUSPENDING)) != 0)
2056                         return error;
2057                 error = ffs_sync(mp, MNT_WAIT, l->l_proc->p_cred);
2058                 if (error == 0)
2059                         error = fstrans_setstate(mp, FSTRANS_SUSPENDED);
2060 #ifdef WAPBL
2061                 if (error == 0 && mp->mnt_wapbl)
2062                         error = wapbl_flush(mp->mnt_wapbl, 1);
2063 #endif
2064                 if (error != 0) {
2065                         (void) fstrans_setstate(mp, FSTRANS_NORMAL);
2066                         return error;
2067                 }
2068                 return 0;
2069
2070         case SUSPEND_RESUME:
2071                 return fstrans_setstate(mp, FSTRANS_NORMAL);
2072
2073         default:
2074                 return EINVAL;
2075         }
2076 }
2077
2078 /*
2079  * Synch vnode for a mounted file system.
2080  */
2081 static int
2082 ffs_vfs_fsync(vnode_t *vp, int flags)
2083 {
2084         int error, i, pflags;
2085 #ifdef WAPBL
2086         struct mount *mp;
2087 #endif
2088
2089         KASSERT(vp->v_type == VBLK);
2090         KASSERT(vp->v_specmountpoint != NULL);
2091
2092         /*
2093          * Flush all dirty data associated with the vnode.
2094          */
2095         pflags = PGO_ALLPAGES | PGO_CLEANIT;
2096         if ((flags & FSYNC_WAIT) != 0)
2097                 pflags |= PGO_SYNCIO;
2098         mutex_enter(vp->v_interlock);
2099         error = VOP_PUTPAGES(vp, 0, 0, pflags);
2100         if (error)
2101                 return error;
2102
2103 #ifdef WAPBL
2104         mp = vp->v_specmountpoint;
2105         if (mp && mp->mnt_wapbl) {
2106                 /*
2107                  * Don't bother writing out metadata if the syncer is
2108                  * making the request.  We will let the sync vnode
2109                  * write it out in a single burst through a call to
2110                  * VFS_SYNC().
2111                  */
2112                 if ((flags & (FSYNC_DATAONLY | FSYNC_LAZY | FSYNC_NOLOG)) != 0)
2113                         return 0;
2114
2115                 /*
2116                  * Don't flush the log if the vnode being flushed
2117                  * contains no dirty buffers that could be in the log.
2118                  */
2119                 if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
2120                         error = wapbl_flush(mp->mnt_wapbl, 0);
2121                         if (error)
2122                                 return error;
2123                 }
2124
2125                 if ((flags & FSYNC_WAIT) != 0) {
2126                         mutex_enter(vp->v_interlock);
2127                         while (vp->v_numoutput)
2128                                 cv_wait(&vp->v_cv, vp->v_interlock);
2129                         mutex_exit(vp->v_interlock);
2130                 }
2131
2132                 return 0;
2133         }
2134 #endif /* WAPBL */
2135
2136         error = vflushbuf(vp, (flags & FSYNC_WAIT) != 0);
2137         if (error == 0 && (flags & FSYNC_CACHE) != 0) {
2138                 i = 1;
2139                 (void)VOP_IOCTL(vp, DIOCCACHESYNC, &i, FWRITE,
2140                     kauth_cred_get());
2141         }
2142
2143         return error;
2144 }