sys/ufs/ffs/ffs_vfsops.c

   1 /*      $NetBSD: ffs_vfsops.c,v 1.291 2013/11/23 13:35:37 christos Exp $        */
   2
   3 /*-
   4  * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
   5  * All rights reserved.
   6  *
   7  * This code is derived from software contributed to The NetBSD Foundation
   8  * by Wasabi Systems, Inc, and by Andrew Doran.
   9  *
  10  * Redistribution and use in source and binary forms, with or without
  11  * modification, are permitted provided that the following conditions
  12  * are met:
  13  * 1. Redistributions of source code must retain the above copyright
  14  *    notice, this list of conditions and the following disclaimer.
  15  * 2. Redistributions in binary form must reproduce the above copyright
  16  *    notice, this list of conditions and the following disclaimer in the
  17  *    documentation and/or other materials provided with the distribution.
  18  *
  19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  29  * POSSIBILITY OF SUCH DAMAGE.
  30  */
  31
  32 /*
  33  * Copyright (c) 1989, 1991, 1993, 1994
  34  *      The Regents of the University of California.  All rights reserved.
  35  *
  36  * Redistribution and use in source and binary forms, with or without
  37  * modification, are permitted provided that the following conditions
  38  * are met:
  39  * 1. Redistributions of source code must retain the above copyright
  40  *    notice, this list of conditions and the following disclaimer.
  41  * 2. Redistributions in binary form must reproduce the above copyright
  42  *    notice, this list of conditions and the following disclaimer in the
  43  *    documentation and/or other materials provided with the distribution.
  44  * 3. Neither the name of the University nor the names of its contributors
  45  *    may be used to endorse or promote products derived from this software
  46  *    without specific prior written permission.
  47  *
  48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  58  * SUCH DAMAGE.
  59  *
  60  *      @(#)ffs_vfsops.c        8.31 (Berkeley) 5/20/95
  61  */
  62
  63 #include <sys/cdefs.h>
  64 __KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.291 2013/11/23 13:35:37 christos Exp $");
  65
  66 #if defined(_KERNEL_OPT)
  67 #include "opt_ffs.h"
  68 #include "opt_quota.h"
  69 #include "opt_wapbl.h"
  70 #endif
  71
  72 #include <sys/param.h>
  73 #include <sys/systm.h>
  74 #include <sys/namei.h>
  75 #include <sys/proc.h>
  76 #include <sys/kernel.h>
  77 #include <sys/vnode.h>
  78 #include <sys/socket.h>
  79 #include <sys/mount.h>
  80 #include <sys/buf.h>
  81 #include <sys/device.h>
  82 #include <sys/disk.h>
  83 #include <sys/mbuf.h>
  84 #include <sys/file.h>
  85 #include <sys/disklabel.h>
  86 #include <sys/ioctl.h>
  87 #include <sys/errno.h>
  88 #include <sys/kmem.h>
  89 #include <sys/pool.h>
  90 #include <sys/lock.h>
  91 #include <sys/sysctl.h>
  92 #include <sys/conf.h>
  93 #include <sys/kauth.h>
  94 #include <sys/wapbl.h>
  95 #include <sys/fstrans.h>
  96 #include <sys/module.h>
  97
  98 #include <miscfs/genfs/genfs.h>
  99 #include <miscfs/specfs/specdev.h>
 100
 101 #include <ufs/ufs/quota.h>
 102 #include <ufs/ufs/ufsmount.h>
 103 #include <ufs/ufs/inode.h>
 104 #include <ufs/ufs/dir.h>
 105 #include <ufs/ufs/ufs_extern.h>
 106 #include <ufs/ufs/ufs_bswap.h>
 107 #include <ufs/ufs/ufs_wapbl.h>
 108
 109 #include <ufs/ffs/fs.h>
 110 #include <ufs/ffs/ffs_extern.h>
 111
 112 MODULE(MODULE_CLASS_VFS, ffs, NULL);
 113
 114 static int      ffs_vfs_fsync(vnode_t *, int);
 115
 116 static struct sysctllog *ffs_sysctl_log;
 117
 118 static kauth_listener_t ffs_snapshot_listener;
 119
 120 /* how many times ffs_init() was called */
 121 int ffs_initcount = 0;
 122
 123 extern const struct vnodeopv_desc ffs_vnodeop_opv_desc;
 124 extern const struct vnodeopv_desc ffs_specop_opv_desc;
 125 extern const struct vnodeopv_desc ffs_fifoop_opv_desc;
 126
 127 const struct vnodeopv_desc * const ffs_vnodeopv_descs[] = {
 128         &ffs_vnodeop_opv_desc,
 129         &ffs_specop_opv_desc,
 130         &ffs_fifoop_opv_desc,
 131         NULL,
 132 };
 133
 134 struct vfsops ffs_vfsops = {
 135         MOUNT_FFS,
 136         sizeof (struct ufs_args),
 137         ffs_mount,
 138         ufs_start,
 139         ffs_unmount,
 140         ufs_root,
 141         ufs_quotactl,
 142         ffs_statvfs,
 143         ffs_sync,
 144         ffs_vget,
 145         ffs_fhtovp,
 146         ffs_vptofh,
 147         ffs_init,
 148         ffs_reinit,
 149         ffs_done,
 150         ffs_mountroot,
 151         ffs_snapshot,
 152         ffs_extattrctl,
 153         ffs_suspendctl,
 154         genfs_renamelock_enter,
 155         genfs_renamelock_exit,
 156         ffs_vfs_fsync,
 157         ffs_vnodeopv_descs,
 158         0,
 159         { NULL, NULL },
 160 };
 161
 162 static const struct genfs_ops ffs_genfsops = {
 163         .gop_size = ffs_gop_size,
 164         .gop_alloc = ufs_gop_alloc,
 165         .gop_write = genfs_gop_write,
 166         .gop_markupdate = ufs_gop_markupdate,
 167 };
 168
 169 static const struct ufs_ops ffs_ufsops = {
 170         .uo_itimes = ffs_itimes,
 171         .uo_update = ffs_update,
 172         .uo_truncate = ffs_truncate,
 173         .uo_valloc = ffs_valloc,
 174         .uo_vfree = ffs_vfree,
 175         .uo_balloc = ffs_balloc,
 176         .uo_snapgone = ffs_snapgone,
 177 };
 178
 179 static int
 180 ffs_snapshot_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
 181     void *arg0, void *arg1, void *arg2, void *arg3)
 182 {
 183         vnode_t *vp = arg2;
 184         int result = KAUTH_RESULT_DEFER;;
 185
 186         if (action != KAUTH_SYSTEM_FS_SNAPSHOT)
 187                 return result;
 188
 189         if (VTOI(vp)->i_uid == kauth_cred_geteuid(cred))
 190                 result = KAUTH_RESULT_ALLOW;
 191
 192         return result;
 193 }
 194
 195 static int
 196 ffs_modcmd(modcmd_t cmd, void *arg)
 197 {
 198         int error;
 199
 200 #if 0
 201         extern int doasyncfree;
 202 #endif
 203 #ifdef UFS_EXTATTR
 204         extern int ufs_extattr_autocreate;
 205 #endif
 206         extern int ffs_log_changeopt;
 207
 208         switch (cmd) {
 209         case MODULE_CMD_INIT:
 210                 error = vfs_attach(&ffs_vfsops);
 211                 if (error != 0)
 212                         break;
 213
 214                 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
 215                                CTLFLAG_PERMANENT,
 216                                CTLTYPE_NODE, "vfs", NULL,
 217                                NULL, 0, NULL, 0,
 218                                CTL_VFS, CTL_EOL);
 219                 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
 220                                CTLFLAG_PERMANENT,
 221                                CTLTYPE_NODE, "ffs",
 222                                SYSCTL_DESCR("Berkeley Fast File System"),
 223                                NULL, 0, NULL, 0,
 224                                CTL_VFS, 1, CTL_EOL);
 225                 /*
 226                  * @@@ should we even bother with these first three?
 227                  */
 228                 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
 229                                CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 230                                CTLTYPE_INT, "doclusterread", NULL,
 231                                sysctl_notavail, 0, NULL, 0,
 232                                CTL_VFS, 1, FFS_CLUSTERREAD, CTL_EOL);
 233                 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
 234                                CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 235                                CTLTYPE_INT, "doclusterwrite", NULL,
 236                                sysctl_notavail, 0, NULL, 0,
 237                                CTL_VFS, 1, FFS_CLUSTERWRITE, CTL_EOL);
 238                 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
 239                                CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 240                                CTLTYPE_INT, "doreallocblks", NULL,
 241                                sysctl_notavail, 0, NULL, 0,
 242                                CTL_VFS, 1, FFS_REALLOCBLKS, CTL_EOL);
 243 #if 0
 244                 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
 245                                CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 246                                CTLTYPE_INT, "doasyncfree",
 247                                SYSCTL_DESCR("Release dirty blocks asynchronously"),
 248                                NULL, 0, &doasyncfree, 0,
 249                                CTL_VFS, 1, FFS_ASYNCFREE, CTL_EOL);
 250 #endif
 251                 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
 252                                CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 253                                CTLTYPE_INT, "log_changeopt",
 254                                SYSCTL_DESCR("Log changes in optimization strategy"),
 255                                NULL, 0, &ffs_log_changeopt, 0,
 256                                CTL_VFS, 1, FFS_LOG_CHANGEOPT, CTL_EOL);
 257 #ifdef UFS_EXTATTR
 258                 sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
 259                                CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 260                                CTLTYPE_INT, "extattr_autocreate",
 261                                SYSCTL_DESCR("Size of attribute for "
 262                                             "backing file autocreation"),
 263                                NULL, 0, &ufs_extattr_autocreate, 0,
 264                                CTL_VFS, 1, FFS_EXTATTR_AUTOCREATE, CTL_EOL);
 265
 266 #endif /* UFS_EXTATTR */
 267
 268                 ffs_snapshot_listener = kauth_listen_scope(KAUTH_SCOPE_SYSTEM,
 269                     ffs_snapshot_cb, NULL);
 270                 if (ffs_snapshot_listener == NULL)
 271                         printf("ffs_modcmd: can't listen on system scope.\n");
 272
 273                 break;
 274         case MODULE_CMD_FINI:
 275                 error = vfs_detach(&ffs_vfsops);
 276                 if (error != 0)
 277                         break;
 278                 sysctl_teardown(&ffs_sysctl_log);
 279                 if (ffs_snapshot_listener != NULL)
 280                         kauth_unlisten_scope(ffs_snapshot_listener);
 281                 break;
 282         default:
 283                 error = ENOTTY;
 284                 break;
 285         }
 286
 287         return (error);
 288 }
 289
 290 pool_cache_t ffs_inode_cache;
 291 pool_cache_t ffs_dinode1_cache;
 292 pool_cache_t ffs_dinode2_cache;
 293
 294 static void ffs_oldfscompat_read(struct fs *, struct ufsmount *, daddr_t);
 295 static void ffs_oldfscompat_write(struct fs *, struct ufsmount *);
 296
 297 /*
 298  * Called by main() when ffs is going to be mounted as root.
 299  */
 300
 301 int
 302 ffs_mountroot(void)
 303 {
 304         struct fs *fs;
 305         struct mount *mp;
 306         struct lwp *l = curlwp;                 /* XXX */
 307         struct ufsmount *ump;
 308         int error;
 309
 310         if (device_class(root_device) != DV_DISK)
 311                 return (ENODEV);
 312
 313         if ((error = vfs_rootmountalloc(MOUNT_FFS, "root_device", &mp))) {
 314                 vrele(rootvp);
 315                 return (error);
 316         }
 317
 318         /*
 319          * We always need to be able to mount the root file system.
 320          */
 321         mp->mnt_flag |= MNT_FORCE;
 322         if ((error = ffs_mountfs(rootvp, mp, l)) != 0) {
 323                 vfs_unbusy(mp, false, NULL);
 324                 vfs_destroy(mp);
 325                 return (error);
 326         }
 327         mp->mnt_flag &= ~MNT_FORCE;
 328         mountlist_append(mp);
 329         ump = VFSTOUFS(mp);
 330         fs = ump->um_fs;
 331         memset(fs->fs_fsmnt, 0, sizeof(fs->fs_fsmnt));
 332         (void)copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0);
 333         (void)ffs_statvfs(mp, &mp->mnt_stat);
 334         vfs_unbusy(mp, false, NULL);
 335         setrootfstime((time_t)fs->fs_time);
 336         return (0);
 337 }
 338
 339 /*
 340  * VFS Operations.
 341  *
 342  * mount system call
 343  */
 344 int
 345 ffs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
 346 {
 347         struct lwp *l = curlwp;
 348         struct vnode *devvp = NULL;
 349         struct ufs_args *args = data;
 350         struct ufsmount *ump = NULL;
 351         struct fs *fs;
 352         int error = 0, flags, update;
 353         mode_t accessmode;
 354
 355         if (*data_len < sizeof *args)
 356                 return EINVAL;
 357
 358         if (mp->mnt_flag & MNT_GETARGS) {
 359                 ump = VFSTOUFS(mp);
 360                 if (ump == NULL)
 361                         return EIO;
 362                 args->fspec = NULL;
 363                 *data_len = sizeof *args;
 364                 return 0;
 365         }
 366
 367         update = mp->mnt_flag & MNT_UPDATE;
 368
 369         /* Check arguments */
 370         if (args->fspec != NULL) {
 371                 /*
 372                  * Look up the name and verify that it's sane.
 373                  */
 374                 error = namei_simple_user(args->fspec,
 375                                         NSM_FOLLOW_NOEMULROOT, &devvp);
 376                 if (error != 0)
 377                         return (error);
 378
 379                 if (!update) {
 380                         /*
 381                          * Be sure this is a valid block device
 382                          */
 383                         if (devvp->v_type != VBLK)
 384                                 error = ENOTBLK;
 385                         else if (bdevsw_lookup(devvp->v_rdev) == NULL)
 386                                 error = ENXIO;
 387                 } else {
 388                         /*
 389                          * Be sure we're still naming the same device
 390                          * used for our initial mount
 391                          */
 392                         ump = VFSTOUFS(mp);
 393                         if (devvp != ump->um_devvp) {
 394                                 if (devvp->v_rdev != ump->um_devvp->v_rdev)
 395                                         error = EINVAL;
 396                                 else {
 397                                         vrele(devvp);
 398                                         devvp = ump->um_devvp;
 399                                         vref(devvp);
 400                                 }
 401                         }
 402                 }
 403         } else {
 404                 if (!update) {
 405                         /* New mounts must have a filename for the device */
 406                         return (EINVAL);
 407                 } else {
 408                         /* Use the extant mount */
 409                         ump = VFSTOUFS(mp);
 410                         devvp = ump->um_devvp;
 411                         vref(devvp);
 412                 }
 413         }
 414
 415         /*
 416          * If mount by non-root, then verify that user has necessary
 417          * permissions on the device.
 418          *
 419          * Permission to update a mount is checked higher, so here we presume
 420          * updating the mount is okay (for example, as far as securelevel goes)
 421          * which leaves us with the normal check.
 422          */
 423         if (error == 0) {
 424                 accessmode = VREAD;
 425                 if (update ?
 426                     (mp->mnt_iflag & IMNT_WANTRDWR) != 0 :
 427                     (mp->mnt_flag & MNT_RDONLY) == 0)
 428                         accessmode |= VWRITE;
 429                 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 430                 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
 431                     KAUTH_REQ_SYSTEM_MOUNT_DEVICE, mp, devvp,
 432                     KAUTH_ARG(accessmode));
 433                 VOP_UNLOCK(devvp);
 434         }
 435
 436         if (error) {
 437                 vrele(devvp);
 438                 return (error);
 439         }
 440
 441 #ifdef WAPBL
 442         /* WAPBL can only be enabled on a r/w mount. */
 443         if ((mp->mnt_flag & MNT_RDONLY) && !(mp->mnt_iflag & IMNT_WANTRDWR)) {
 444                 mp->mnt_flag &= ~MNT_LOG;
 445         }
 446 #else /* !WAPBL */
 447         mp->mnt_flag &= ~MNT_LOG;
 448 #endif /* !WAPBL */
 449
 450         if (!update) {
 451                 int xflags;
 452
 453                 if (mp->mnt_flag & MNT_RDONLY)
 454                         xflags = FREAD;
 455                 else
 456                         xflags = FREAD | FWRITE;
 457                 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 458                 error = VOP_OPEN(devvp, xflags, FSCRED);
 459                 VOP_UNLOCK(devvp);
 460                 if (error)
 461                         goto fail;
 462                 error = ffs_mountfs(devvp, mp, l);
 463                 if (error) {
 464                         vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 465                         (void)VOP_CLOSE(devvp, xflags, NOCRED);
 466                         VOP_UNLOCK(devvp);
 467                         goto fail;
 468                 }
 469
 470                 ump = VFSTOUFS(mp);
 471                 fs = ump->um_fs;
 472         } else {
 473                 /*
 474                  * Update the mount.
 475                  */
 476
 477                 /*
 478                  * The initial mount got a reference on this
 479                  * device, so drop the one obtained via
 480                  * namei(), above.
 481                  */
 482                 vrele(devvp);
 483
 484                 ump = VFSTOUFS(mp);
 485                 fs = ump->um_fs;
 486                 if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
 487                         /*
 488                          * Changing from r/w to r/o
 489                          */
 490                         flags = WRITECLOSE;
 491                         if (mp->mnt_flag & MNT_FORCE)
 492                                 flags |= FORCECLOSE;
 493                         error = ffs_flushfiles(mp, flags, l);
 494                         if (error == 0)
 495                                 error = UFS_WAPBL_BEGIN(mp);
 496                         if (error == 0 &&
 497                             ffs_cgupdate(ump, MNT_WAIT) == 0 &&
 498                             fs->fs_clean & FS_WASCLEAN) {
 499                                 if (mp->mnt_flag & MNT_SOFTDEP)
 500                                         fs->fs_flags &= ~FS_DOSOFTDEP;
 501                                 fs->fs_clean = FS_ISCLEAN;
 502                                 (void) ffs_sbupdate(ump, MNT_WAIT);
 503                         }
 504                         if (error == 0)
 505                                 UFS_WAPBL_END(mp);
 506                         if (error)
 507                                 return (error);
 508                 }
 509
 510 #ifdef WAPBL
 511                 if ((mp->mnt_flag & MNT_LOG) == 0) {
 512                         error = ffs_wapbl_stop(mp, mp->mnt_flag & MNT_FORCE);
 513                         if (error)
 514                                 return error;
 515                 }
 516 #endif /* WAPBL */
 517
 518                 if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
 519                         /*
 520                          * Finish change from r/w to r/o
 521                          */
 522                         fs->fs_ronly = 1;
 523                         fs->fs_fmod = 0;
 524                 }
 525
 526                 if (mp->mnt_flag & MNT_RELOAD) {
 527                         error = ffs_reload(mp, l->l_cred, l);
 528                         if (error)
 529                                 return (error);
 530                 }
 531
 532                 if (fs->fs_ronly && (mp->mnt_iflag & IMNT_WANTRDWR)) {
 533                         /*
 534                          * Changing from read-only to read/write
 535                          */
 536 #ifndef QUOTA2
 537                         if (fs->fs_flags & FS_DOQUOTA2) {
 538                                 ump->um_flags |= UFS_QUOTA2;
 539                                 uprintf("%s: options QUOTA2 not enabled%s\n",
 540                                     mp->mnt_stat.f_mntonname,
 541                                     (mp->mnt_flag & MNT_FORCE) ? "" :
 542                                     ", not mounting");
 543                                 return EINVAL;
 544                         }
 545 #endif
 546                         fs->fs_ronly = 0;
 547                         fs->fs_clean <<= 1;
 548                         fs->fs_fmod = 1;
 549 #ifdef WAPBL
 550                         if (fs->fs_flags & FS_DOWAPBL) {
 551                                 printf("%s: replaying log to disk\n",
 552                                     mp->mnt_stat.f_mntonname);
 553                                 KDASSERT(mp->mnt_wapbl_replay);
 554                                 error = wapbl_replay_write(mp->mnt_wapbl_replay,
 555                                                            devvp);
 556                                 if (error) {
 557                                         return error;
 558                                 }
 559                                 wapbl_replay_stop(mp->mnt_wapbl_replay);
 560                                 fs->fs_clean = FS_WASCLEAN;
 561                         }
 562 #endif /* WAPBL */
 563                         if (fs->fs_snapinum[0] != 0)
 564                                 ffs_snapshot_mount(mp);
 565                 }
 566
 567 #ifdef WAPBL
 568                 error = ffs_wapbl_start(mp);
 569                 if (error)
 570                         return error;
 571 #endif /* WAPBL */
 572
 573 #ifdef QUOTA2
 574                 if (!fs->fs_ronly) {
 575                         error = ffs_quota2_mount(mp);
 576                         if (error) {
 577                                 return error;
 578                         }
 579                 }
 580 #endif
 581
 582                 if ((mp->mnt_flag & MNT_DISCARD) && !(ump->um_discarddata))
 583                         ump->um_discarddata = ffs_discard_init(devvp, fs);
 584
 585                 if (args->fspec == NULL)
 586                         return 0;
 587         }
 588
 589         error = set_statvfs_info(path, UIO_USERSPACE, args->fspec,
 590             UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l);
 591         if (error == 0)
 592                 (void)strncpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname,
 593                     sizeof(fs->fs_fsmnt));
 594         fs->fs_flags &= ~FS_DOSOFTDEP;
 595         if (fs->fs_fmod != 0) { /* XXX */
 596                 int err;
 597
 598                 fs->fs_fmod = 0;
 599                 if (fs->fs_clean & FS_WASCLEAN)
 600                         fs->fs_time = time_second;
 601                 else {
 602                         printf("%s: file system not clean (fs_clean=%#x); "
 603                             "please fsck(8)\n", mp->mnt_stat.f_mntfromname,
 604                             fs->fs_clean);
 605                         printf("%s: lost blocks %" PRId64 " files %d\n",
 606                             mp->mnt_stat.f_mntfromname, fs->fs_pendingblocks,
 607                             fs->fs_pendinginodes);
 608                 }
 609                 err = UFS_WAPBL_BEGIN(mp);
 610                 if (err == 0) {
 611                         (void) ffs_cgupdate(ump, MNT_WAIT);
 612                         UFS_WAPBL_END(mp);
 613                 }
 614         }
 615         if ((mp->mnt_flag & MNT_SOFTDEP) != 0) {
 616                 printf("%s: `-o softdep' is no longer supported, "
 617                     "consider `-o log'\n", mp->mnt_stat.f_mntfromname);
 618                 mp->mnt_flag &= ~MNT_SOFTDEP;
 619         }
 620
 621         return (error);
 622
 623 fail:
 624         vrele(devvp);
 625         return (error);
 626 }
 627
 628 /*
 629  * Reload all incore data for a filesystem (used after running fsck on
 630  * the root filesystem and finding things to fix). The filesystem must
 631  * be mounted read-only.
 632  *
 633  * Things to do to update the mount:
 634  *      1) invalidate all cached meta-data.
 635  *      2) re-read superblock from disk.
 636  *      3) re-read summary information from disk.
 637  *      4) invalidate all inactive vnodes.
 638  *      5) invalidate all cached file data.
 639  *      6) re-read inode data for all active vnodes.
 640  */
 641 int
 642 ffs_reload(struct mount *mp, kauth_cred_t cred, struct lwp *l)
 643 {
 644         struct vnode *vp, *mvp, *devvp;
 645         struct inode *ip;
 646         void *space;
 647         struct buf *bp;
 648         struct fs *fs, *newfs;
 649         struct dkwedge_info dkw;
 650         int i, bsize, blks, error;
 651         int32_t *lp;
 652         struct ufsmount *ump;
 653         daddr_t sblockloc;
 654
 655         if ((mp->mnt_flag & MNT_RDONLY) == 0)
 656                 return (EINVAL);
 657
 658         ump = VFSTOUFS(mp);
 659         /*
 660          * Step 1: invalidate all cached meta-data.
 661          */
 662         devvp = ump->um_devvp;
 663         vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 664         error = vinvalbuf(devvp, 0, cred, l, 0, 0);
 665         VOP_UNLOCK(devvp);
 666         if (error)
 667                 panic("ffs_reload: dirty1");
 668         /*
 669          * Step 2: re-read superblock from disk.
 670          */
 671         fs = ump->um_fs;
 672
 673         /* XXX we don't handle possibility that superblock moved. */
 674         error = bread(devvp, fs->fs_sblockloc / DEV_BSIZE, fs->fs_sbsize,
 675                       NOCRED, 0, &bp);
 676         if (error) {
 677                 return (error);
 678         }
 679         newfs = kmem_alloc(fs->fs_sbsize, KM_SLEEP);
 680         memcpy(newfs, bp->b_data, fs->fs_sbsize);
 681 #ifdef FFS_EI
 682         if (ump->um_flags & UFS_NEEDSWAP) {
 683                 ffs_sb_swap((struct fs*)bp->b_data, newfs);
 684                 fs->fs_flags |= FS_SWAPPED;
 685         } else
 686 #endif
 687                 fs->fs_flags &= ~FS_SWAPPED;
 688         if ((newfs->fs_magic != FS_UFS1_MAGIC &&
 689              newfs->fs_magic != FS_UFS2_MAGIC)||
 690              newfs->fs_bsize > MAXBSIZE ||
 691              newfs->fs_bsize < sizeof(struct fs)) {
 692                 brelse(bp, 0);
 693                 kmem_free(newfs, fs->fs_sbsize);
 694                 return (EIO);           /* XXX needs translation */
 695         }
 696         /* Store off old fs_sblockloc for fs_oldfscompat_read. */
 697         sblockloc = fs->fs_sblockloc;
 698         /*
 699          * Copy pointer fields back into superblock before copying in   XXX
 700          * new superblock. These should really be in the ufsmount.      XXX
 701          * Note that important parameters (eg fs_ncg) are unchanged.
 702          */
 703         newfs->fs_csp = fs->fs_csp;
 704         newfs->fs_maxcluster = fs->fs_maxcluster;
 705         newfs->fs_contigdirs = fs->fs_contigdirs;
 706         newfs->fs_ronly = fs->fs_ronly;
 707         newfs->fs_active = fs->fs_active;
 708         memcpy(fs, newfs, (u_int)fs->fs_sbsize);
 709         brelse(bp, 0);
 710         kmem_free(newfs, fs->fs_sbsize);
 711
 712         /* Recheck for apple UFS filesystem */
 713         ump->um_flags &= ~UFS_ISAPPLEUFS;
 714         /* First check to see if this is tagged as an Apple UFS filesystem
 715          * in the disklabel
 716          */
 717         if (getdiskinfo(devvp, &dkw) == 0 &&
 718             strcmp(dkw.dkw_ptype, DKW_PTYPE_APPLEUFS) == 0)
 719                 ump->um_flags |= UFS_ISAPPLEUFS;
 720 #ifdef APPLE_UFS
 721         else {
 722                 /* Manually look for an apple ufs label, and if a valid one
 723                  * is found, then treat it like an Apple UFS filesystem anyway
 724                  *
 725                  * EINVAL is most probably a blocksize or alignment problem,
 726                  * it is unlikely that this is an Apple UFS filesystem then.
 727                  */
 728                 error = bread(devvp, (daddr_t)(APPLEUFS_LABEL_OFFSET / DEV_BSIZE),
 729                         APPLEUFS_LABEL_SIZE, cred, 0, &bp);
 730                 if (error && error != EINVAL) {
 731                         return (error);
 732                 }
 733                 if (error == 0) {
 734                         error = ffs_appleufs_validate(fs->fs_fsmnt,
 735                                 (struct appleufslabel *)bp->b_data, NULL);
 736                         if (error == 0)
 737                                 ump->um_flags |= UFS_ISAPPLEUFS;
 738                         brelse(bp, 0);
 739                 }
 740                 bp = NULL;
 741         }
 742 #else
 743         if (ump->um_flags & UFS_ISAPPLEUFS)
 744                 return (EIO);
 745 #endif
 746
 747         if (UFS_MPISAPPLEUFS(ump)) {
 748                 /* see comment about NeXT below */
 749                 ump->um_maxsymlinklen = APPLEUFS_MAXSYMLINKLEN;
 750                 ump->um_dirblksiz = APPLEUFS_DIRBLKSIZ;
 751                 mp->mnt_iflag |= IMNT_DTYPE;
 752         } else {
 753                 ump->um_maxsymlinklen = fs->fs_maxsymlinklen;
 754                 ump->um_dirblksiz = UFS_DIRBLKSIZ;
 755                 if (ump->um_maxsymlinklen > 0)
 756                         mp->mnt_iflag |= IMNT_DTYPE;
 757                 else
 758                         mp->mnt_iflag &= ~IMNT_DTYPE;
 759         }
 760         ffs_oldfscompat_read(fs, ump, sblockloc);
 761
 762         mutex_enter(&ump->um_lock);
 763         ump->um_maxfilesize = fs->fs_maxfilesize;
 764         if (fs->fs_flags & ~(FS_KNOWN_FLAGS | FS_INTERNAL)) {
 765                 uprintf("%s: unknown ufs flags: 0x%08"PRIx32"%s\n",
 766                     mp->mnt_stat.f_mntonname, fs->fs_flags,
 767                     (mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting");
 768                 if ((mp->mnt_flag & MNT_FORCE) == 0) {
 769                         mutex_exit(&ump->um_lock);
 770                         return (EINVAL);
 771                 }
 772         }
 773         if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
 774                 fs->fs_pendingblocks = 0;
 775                 fs->fs_pendinginodes = 0;
 776         }
 777         mutex_exit(&ump->um_lock);
 778
 779         ffs_statvfs(mp, &mp->mnt_stat);
 780         /*
 781          * Step 3: re-read summary information from disk.
 782          */
 783         blks = howmany(fs->fs_cssize, fs->fs_fsize);
 784         space = fs->fs_csp;
 785         for (i = 0; i < blks; i += fs->fs_frag) {
 786                 bsize = fs->fs_bsize;
 787                 if (i + fs->fs_frag > blks)
 788                         bsize = (blks - i) * fs->fs_fsize;
 789                 error = bread(devvp, FFS_FSBTODB(fs, fs->fs_csaddr + i), bsize,
 790                               NOCRED, 0, &bp);
 791                 if (error) {
 792                         return (error);
 793                 }
 794 #ifdef FFS_EI
 795                 if (UFS_FSNEEDSWAP(fs))
 796                         ffs_csum_swap((struct csum *)bp->b_data,
 797                             (struct csum *)space, bsize);
 798                 else
 799 #endif
 800                         memcpy(space, bp->b_data, (size_t)bsize);
 801                 space = (char *)space + bsize;
 802                 brelse(bp, 0);
 803         }
 804         /*
 805          * We no longer know anything about clusters per cylinder group.
 806          */
 807         if (fs->fs_contigsumsize > 0) {
 808                 lp = fs->fs_maxcluster;
 809                 for (i = 0; i < fs->fs_ncg; i++)
 810                         *lp++ = fs->fs_contigsumsize;
 811         }
 812
 813         /* Allocate a marker vnode. */
 814         mvp = vnalloc(mp);
 815         /*
 816          * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
 817          * and vclean() can be called indirectly
 818          */
 819         mutex_enter(&mntvnode_lock);
 820  loop:
 821         for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) {
 822                 vmark(mvp, vp);
 823                 if (vp->v_mount != mp || vismarker(vp))
 824                         continue;
 825                 /*
 826                  * Step 4: invalidate all inactive vnodes.
 827                  */
 828                 if (vrecycle(vp, &mntvnode_lock)) {
 829                         mutex_enter(&mntvnode_lock);
 830                         (void)vunmark(mvp);
 831                         goto loop;
 832                 }
 833                 /*
 834                  * Step 5: invalidate all cached file data.
 835                  */
 836                 mutex_enter(vp->v_interlock);
 837                 mutex_exit(&mntvnode_lock);
 838                 if (vget(vp, LK_EXCLUSIVE)) {
 839                         (void)vunmark(mvp);
 840                         goto loop;
 841                 }
 842                 if (vinvalbuf(vp, 0, cred, l, 0, 0))
 843                         panic("ffs_reload: dirty2");
 844                 /*
 845                  * Step 6: re-read inode data for all active vnodes.
 846                  */
 847                 ip = VTOI(vp);
 848                 error = bread(devvp, FFS_FSBTODB(fs, ino_to_fsba(fs, ip->i_number)),
 849                               (int)fs->fs_bsize, NOCRED, 0, &bp);
 850                 if (error) {
 851                         vput(vp);
 852                         (void)vunmark(mvp);
 853                         break;
 854                 }
 855                 ffs_load_inode(bp, ip, fs, ip->i_number);
 856                 brelse(bp, 0);
 857                 vput(vp);
 858                 mutex_enter(&mntvnode_lock);
 859         }
 860         mutex_exit(&mntvnode_lock);
 861         vnfree(mvp);
 862         return (error);
 863 }
 864
 865 /*
 866  * Possible superblock locations ordered from most to least likely.
 867  */
 868 static const int sblock_try[] = SBLOCKSEARCH;
 869
 870 /*
 871  * Common code for mount and mountroot
 872  */
 873 int
 874 ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l)
 875 {
 876         struct ufsmount *ump;
 877         struct buf *bp;
 878         struct fs *fs;
 879         dev_t dev;
 880         struct dkwedge_info dkw;
 881         void *space;
 882         daddr_t sblockloc, fsblockloc;
 883         int blks, fstype;
 884         int error, i, bsize, ronly, bset = 0;
 885 #ifdef FFS_EI
 886         int needswap = 0;               /* keep gcc happy */
 887 #endif
 888         int32_t *lp;
 889         kauth_cred_t cred;
 890         u_int32_t sbsize = 8192;        /* keep gcc happy*/
 891         u_int32_t allocsbsize;
 892         int32_t fsbsize;
 893
 894         dev = devvp->v_rdev;
 895         cred = l ? l->l_cred : NOCRED;
 896
 897         /* Flush out any old buffers remaining from a previous use. */
 898         vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 899         error = vinvalbuf(devvp, V_SAVE, cred, l, 0, 0);
 900         VOP_UNLOCK(devvp);
 901         if (error)
 902                 return (error);
 903
 904         ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 905
 906         bp = NULL;
 907         ump = NULL;
 908         fs = NULL;
 909         sblockloc = 0;
 910         fstype = 0;
 911
 912         error = fstrans_mount(mp);
 913         if (error)
 914                 return error;
 915
 916         ump = kmem_zalloc(sizeof(*ump), KM_SLEEP);
 917         mutex_init(&ump->um_lock, MUTEX_DEFAULT, IPL_NONE);
 918         error = ffs_snapshot_init(ump);
 919         if (error)
 920                 goto out;
 921         ump->um_ops = &ffs_ufsops;
 922
 923 #ifdef WAPBL
 924  sbagain:
 925 #endif
 926         /*
 927          * Try reading the superblock in each of its possible locations.
 928          */
 929         for (i = 0; ; i++) {
 930                 if (bp != NULL) {
 931                         brelse(bp, BC_NOCACHE);
 932                         bp = NULL;
 933                 }
 934                 if (sblock_try[i] == -1) {
 935                         error = EINVAL;
 936                         fs = NULL;
 937                         goto out;
 938                 }
 939                 error = bread(devvp, sblock_try[i] / DEV_BSIZE, SBLOCKSIZE, cred,
 940                               0, &bp);
 941                 if (error) {
 942                         fs = NULL;
 943                         goto out;
 944                 }
 945                 fs = (struct fs*)bp->b_data;
 946                 fsblockloc = sblockloc = sblock_try[i];
 947                 if (fs->fs_magic == FS_UFS1_MAGIC) {
 948                         sbsize = fs->fs_sbsize;
 949                         fstype = UFS1;
 950                         fsbsize = fs->fs_bsize;
 951 #ifdef FFS_EI
 952                         needswap = 0;
 953                 } else if (fs->fs_magic == FS_UFS1_MAGIC_SWAPPED) {
 954                         sbsize = bswap32(fs->fs_sbsize);
 955                         fstype = UFS1;
 956                         fsbsize = bswap32(fs->fs_bsize);
 957                         needswap = 1;
 958 #endif
 959                 } else if (fs->fs_magic == FS_UFS2_MAGIC) {
 960                         sbsize = fs->fs_sbsize;
 961                         fstype = UFS2;
 962                         fsbsize = fs->fs_bsize;
 963 #ifdef FFS_EI
 964                         needswap = 0;
 965                 } else if (fs->fs_magic == FS_UFS2_MAGIC_SWAPPED) {
 966                         sbsize = bswap32(fs->fs_sbsize);
 967                         fstype = UFS2;
 968                         fsbsize = bswap32(fs->fs_bsize);
 969                         needswap = 1;
 970 #endif
 971                 } else
 972                         continue;
 973
 974
 975                 /* fs->fs_sblockloc isn't defined for old filesystems */
 976                 if (fstype == UFS1 && !(fs->fs_old_flags & FS_FLAGS_UPDATED)) {
 977                         if (sblockloc == SBLOCK_UFS2)
 978                                 /*
 979                                  * This is likely to be the first alternate
 980                                  * in a filesystem with 64k blocks.
 981                                  * Don't use it.
 982                                  */
 983                                 continue;
 984                         fsblockloc = sblockloc;
 985                 } else {
 986                         fsblockloc = fs->fs_sblockloc;
 987 #ifdef FFS_EI
 988                         if (needswap)
 989                                 fsblockloc = bswap64(fsblockloc);
 990 #endif
 991                 }
 992
 993                 /* Check we haven't found an alternate superblock */
 994                 if (fsblockloc != sblockloc)
 995                         continue;
 996
 997                 /* Validate size of superblock */
 998                 if (sbsize > MAXBSIZE || sbsize < sizeof(struct fs))
 999                         continue;
1000
1001                 /* Check that we can handle the file system blocksize */
1002                 if (fsbsize > MAXBSIZE) {
1003                         printf("ffs_mountfs: block size (%d) > MAXBSIZE (%d)\n",
1004                             fsbsize, MAXBSIZE);
1005                         continue;
1006                 }
1007
1008                 /* Ok seems to be a good superblock */
1009                 break;
1010         }
1011
1012         fs = kmem_alloc((u_long)sbsize, KM_SLEEP);
1013         memcpy(fs, bp->b_data, sbsize);
1014         ump->um_fs = fs;
1015
1016 #ifdef FFS_EI
1017         if (needswap) {
1018                 ffs_sb_swap((struct fs*)bp->b_data, fs);
1019                 fs->fs_flags |= FS_SWAPPED;
1020         } else
1021 #endif
1022                 fs->fs_flags &= ~FS_SWAPPED;
1023
1024 #ifdef WAPBL
1025         if ((mp->mnt_wapbl_replay == 0) && (fs->fs_flags & FS_DOWAPBL)) {
1026                 error = ffs_wapbl_replay_start(mp, fs, devvp);
1027                 if (error && (mp->mnt_flag & MNT_FORCE) == 0)
1028                         goto out;
1029                 if (!error) {
1030                         if (!ronly) {
1031                                 /* XXX fsmnt may be stale. */
1032                                 printf("%s: replaying log to disk\n",
1033                                     fs->fs_fsmnt);
1034                                 error = wapbl_replay_write(mp->mnt_wapbl_replay,
1035                                     devvp);
1036                                 if (error)
1037                                         goto out;
1038                                 wapbl_replay_stop(mp->mnt_wapbl_replay);
1039                                 fs->fs_clean = FS_WASCLEAN;
1040                         } else {
1041                                 /* XXX fsmnt may be stale */
1042                                 printf("%s: replaying log to memory\n",
1043                                     fs->fs_fsmnt);
1044                         }
1045
1046                         /* Force a re-read of the superblock */
1047                         brelse(bp, BC_INVAL);
1048                         bp = NULL;
1049                         kmem_free(fs, sbsize);
1050                         fs = NULL;
1051                         goto sbagain;
1052                 }
1053         }
1054 #else /* !WAPBL */
1055         if ((fs->fs_flags & FS_DOWAPBL) && (mp->mnt_flag & MNT_FORCE) == 0) {
1056                 error = EPERM;
1057                 goto out;
1058         }
1059 #endif /* !WAPBL */
1060
1061         ffs_oldfscompat_read(fs, ump, sblockloc);
1062         ump->um_maxfilesize = fs->fs_maxfilesize;
1063
1064         if (fs->fs_flags & ~(FS_KNOWN_FLAGS | FS_INTERNAL)) {
1065                 uprintf("%s: unknown ufs flags: 0x%08"PRIx32"%s\n",
1066                     mp->mnt_stat.f_mntonname, fs->fs_flags,
1067                     (mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting");
1068                 if ((mp->mnt_flag & MNT_FORCE) == 0) {
1069                         error = EINVAL;
1070                         goto out;
1071                 }
1072         }
1073
1074         if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
1075                 fs->fs_pendingblocks = 0;
1076                 fs->fs_pendinginodes = 0;
1077         }
1078
1079         ump->um_fstype = fstype;
1080         if (fs->fs_sbsize < SBLOCKSIZE)
1081                 brelse(bp, BC_INVAL);
1082         else
1083                 brelse(bp, 0);
1084         bp = NULL;
1085
1086         /* First check to see if this is tagged as an Apple UFS filesystem
1087          * in the disklabel
1088          */
1089         if (getdiskinfo(devvp, &dkw) == 0 &&
1090             strcmp(dkw.dkw_ptype, DKW_PTYPE_APPLEUFS) == 0)
1091                 ump->um_flags |= UFS_ISAPPLEUFS;
1092 #ifdef APPLE_UFS
1093         else {
1094                 /* Manually look for an apple ufs label, and if a valid one
1095                  * is found, then treat it like an Apple UFS filesystem anyway
1096                  */
1097                 error = bread(devvp, (daddr_t)(APPLEUFS_LABEL_OFFSET / DEV_BSIZE),
1098                         APPLEUFS_LABEL_SIZE, cred, 0, &bp);
1099                 if (error)
1100                         goto out;
1101                 error = ffs_appleufs_validate(fs->fs_fsmnt,
1102                         (struct appleufslabel *)bp->b_data, NULL);
1103                 if (error == 0) {
1104                         ump->um_flags |= UFS_ISAPPLEUFS;
1105                 }
1106                 brelse(bp, 0);
1107                 bp = NULL;
1108         }
1109 #else
1110         if (ump->um_flags & UFS_ISAPPLEUFS) {
1111                 error = EINVAL;
1112                 goto out;
1113         }
1114 #endif
1115
1116 #if 0
1117 /*
1118  * XXX This code changes the behaviour of mounting dirty filesystems, to
1119  * XXX require "mount -f ..." to mount them.  This doesn't match what
1120  * XXX mount(8) describes and is disabled for now.
1121  */
1122         /*
1123          * If the file system is not clean, don't allow it to be mounted
1124          * unless MNT_FORCE is specified.  (Note: MNT_FORCE is always set
1125          * for the root file system.)
1126          */
1127         if (fs->fs_flags & FS_DOWAPBL) {
1128                 /*
1129                  * wapbl normally expects to be FS_WASCLEAN when the FS_DOWAPBL
1130                  * bit is set, although there's a window in unmount where it
1131                  * could be FS_ISCLEAN
1132                  */
1133                 if ((mp->mnt_flag & MNT_FORCE) == 0 &&
1134                     (fs->fs_clean & (FS_WASCLEAN | FS_ISCLEAN)) == 0) {
1135                         error = EPERM;
1136                         goto out;
1137                 }
1138         } else
1139                 if ((fs->fs_clean & FS_ISCLEAN) == 0 &&
1140                     (mp->mnt_flag & MNT_FORCE) == 0) {
1141                         error = EPERM;
1142                         goto out;
1143                 }
1144 #endif
1145
1146         /*
1147          * verify that we can access the last block in the fs
1148          * if we're mounting read/write.
1149          */
1150
1151         if (!ronly) {
1152                 error = bread(devvp, FFS_FSBTODB(fs, fs->fs_size - 1), fs->fs_fsize,
1153                     cred, 0, &bp);
1154                 if (bp->b_bcount != fs->fs_fsize)
1155                         error = EINVAL;
1156                 if (error) {
1157                         bset = BC_INVAL;
1158                         goto out;
1159                 }
1160                 brelse(bp, BC_INVAL);
1161                 bp = NULL;
1162         }
1163
1164         fs->fs_ronly = ronly;
1165         /* Don't bump fs_clean if we're replaying journal */
1166         if (!((fs->fs_flags & FS_DOWAPBL) && (fs->fs_clean & FS_WASCLEAN)))
1167                 if (ronly == 0) {
1168                         fs->fs_clean <<= 1;
1169                         fs->fs_fmod = 1;
1170                 }
1171         bsize = fs->fs_cssize;
1172         blks = howmany(bsize, fs->fs_fsize);
1173         if (fs->fs_contigsumsize > 0)
1174                 bsize += fs->fs_ncg * sizeof(int32_t);
1175         bsize += fs->fs_ncg * sizeof(*fs->fs_contigdirs);
1176         allocsbsize = bsize;
1177         space = kmem_alloc((u_long)allocsbsize, KM_SLEEP);
1178         fs->fs_csp = space;
1179         for (i = 0; i < blks; i += fs->fs_frag) {
1180                 bsize = fs->fs_bsize;
1181                 if (i + fs->fs_frag > blks)
1182                         bsize = (blks - i) * fs->fs_fsize;
1183                 error = bread(devvp, FFS_FSBTODB(fs, fs->fs_csaddr + i), bsize,
1184                               cred, 0, &bp);
1185                 if (error) {
1186                         kmem_free(fs->fs_csp, allocsbsize);
1187                         goto out;
1188                 }
1189 #ifdef FFS_EI
1190                 if (needswap)
1191                         ffs_csum_swap((struct csum *)bp->b_data,
1192                                 (struct csum *)space, bsize);
1193                 else
1194 #endif
1195                         memcpy(space, bp->b_data, (u_int)bsize);
1196
1197                 space = (char *)space + bsize;
1198                 brelse(bp, 0);
1199                 bp = NULL;
1200         }
1201         if (fs->fs_contigsumsize > 0) {
1202                 fs->fs_maxcluster = lp = space;
1203                 for (i = 0; i < fs->fs_ncg; i++)
1204                         *lp++ = fs->fs_contigsumsize;
1205                 space = lp;
1206         }
1207         bsize = fs->fs_ncg * sizeof(*fs->fs_contigdirs);
1208         fs->fs_contigdirs = space;
1209         space = (char *)space + bsize;
1210         memset(fs->fs_contigdirs, 0, bsize);
1211                 /* Compatibility for old filesystems - XXX */
1212         if (fs->fs_avgfilesize <= 0)
1213                 fs->fs_avgfilesize = AVFILESIZ;
1214         if (fs->fs_avgfpdir <= 0)
1215                 fs->fs_avgfpdir = AFPDIR;
1216         fs->fs_active = NULL;
1217         mp->mnt_data = ump;
1218         mp->mnt_stat.f_fsidx.__fsid_val[0] = (long)dev;
1219         mp->mnt_stat.f_fsidx.__fsid_val[1] = makefstype(MOUNT_FFS);
1220         mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
1221         mp->mnt_stat.f_namemax = FFS_MAXNAMLEN;
1222         if (UFS_MPISAPPLEUFS(ump)) {
1223                 /* NeXT used to keep short symlinks in the inode even
1224                  * when using FS_42INODEFMT.  In that case fs->fs_maxsymlinklen
1225                  * is probably -1, but we still need to be able to identify
1226                  * short symlinks.
1227                  */
1228                 ump->um_maxsymlinklen = APPLEUFS_MAXSYMLINKLEN;
1229                 ump->um_dirblksiz = APPLEUFS_DIRBLKSIZ;
1230                 mp->mnt_iflag |= IMNT_DTYPE;
1231         } else {
1232                 ump->um_maxsymlinklen = fs->fs_maxsymlinklen;
1233                 ump->um_dirblksiz = UFS_DIRBLKSIZ;
1234                 if (ump->um_maxsymlinklen > 0)
1235                         mp->mnt_iflag |= IMNT_DTYPE;
1236                 else
1237                         mp->mnt_iflag &= ~IMNT_DTYPE;
1238         }
1239         mp->mnt_fs_bshift = fs->fs_bshift;
1240         mp->mnt_dev_bshift = DEV_BSHIFT;        /* XXX */
1241         mp->mnt_flag |= MNT_LOCAL;
1242         mp->mnt_iflag |= IMNT_MPSAFE;
1243 #ifdef FFS_EI
1244         if (needswap)
1245                 ump->um_flags |= UFS_NEEDSWAP;
1246 #endif
1247         ump->um_mountp = mp;
1248         ump->um_dev = dev;
1249         ump->um_devvp = devvp;
1250         ump->um_nindir = fs->fs_nindir;
1251         ump->um_lognindir = ffs(fs->fs_nindir) - 1;
1252         ump->um_bptrtodb = fs->fs_fshift - DEV_BSHIFT;
1253         ump->um_seqinc = fs->fs_frag;
1254         for (i = 0; i < MAXQUOTAS; i++)
1255                 ump->um_quotas[i] = NULLVP;
1256         spec_node_setmountedfs(devvp, mp);
1257         if (ronly == 0 && fs->fs_snapinum[0] != 0)
1258                 ffs_snapshot_mount(mp);
1259 #ifdef WAPBL
1260         if (!ronly) {
1261                 KDASSERT(fs->fs_ronly == 0);
1262                 /*
1263                  * ffs_wapbl_start() needs mp->mnt_stat initialised if it
1264                  * needs to create a new log file in-filesystem.
1265                  */
1266                 ffs_statvfs(mp, &mp->mnt_stat);
1267
1268                 error = ffs_wapbl_start(mp);
1269                 if (error) {
1270                         kmem_free(fs->fs_csp, allocsbsize);
1271                         goto out;
1272                 }
1273         }
1274 #endif /* WAPBL */
1275         if (ronly == 0) {
1276 #ifdef QUOTA2
1277                 error = ffs_quota2_mount(mp);
1278                 if (error) {
1279                         kmem_free(fs->fs_csp, allocsbsize);
1280                         goto out;
1281                 }
1282 #else
1283                 if (fs->fs_flags & FS_DOQUOTA2) {
1284                         ump->um_flags |= UFS_QUOTA2;
1285                         uprintf("%s: options QUOTA2 not enabled%s\n",
1286                             mp->mnt_stat.f_mntonname,
1287                             (mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting");
1288                         if ((mp->mnt_flag & MNT_FORCE) == 0) {
1289                                 error = EINVAL;
1290                                 kmem_free(fs->fs_csp, allocsbsize);
1291                                 goto out;
1292                         }
1293                 }
1294 #endif
1295          }
1296 #ifdef UFS_EXTATTR
1297         /*
1298          * Initialize file-backed extended attributes on UFS1 file
1299          * systems.
1300          */
1301         if (ump->um_fstype == UFS1)
1302                 ufs_extattr_uepm_init(&ump->um_extattr);
1303 #endif /* UFS_EXTATTR */
1304
1305         if (mp->mnt_flag & MNT_DISCARD)
1306                 ump->um_discarddata = ffs_discard_init(devvp, fs);
1307
1308         return (0);
1309 out:
1310 #ifdef WAPBL
1311         if (mp->mnt_wapbl_replay) {
1312                 wapbl_replay_stop(mp->mnt_wapbl_replay);
1313                 wapbl_replay_free(mp->mnt_wapbl_replay);
1314                 mp->mnt_wapbl_replay = 0;
1315         }
1316 #endif
1317
1318         fstrans_unmount(mp);
1319         if (fs)
1320                 kmem_free(fs, fs->fs_sbsize);
1321         spec_node_setmountedfs(devvp, NULL);
1322         if (bp)
1323                 brelse(bp, bset);
1324         if (ump) {
1325                 if (ump->um_oldfscompat)
1326                         kmem_free(ump->um_oldfscompat, 512 + 3*sizeof(int32_t));
1327                 mutex_destroy(&ump->um_lock);
1328                 kmem_free(ump, sizeof(*ump));
1329                 mp->mnt_data = NULL;
1330         }
1331         return (error);
1332 }
1333
1334 /*
1335  * Sanity checks for loading old filesystem superblocks.
1336  * See ffs_oldfscompat_write below for unwound actions.
1337  *
1338  * XXX - Parts get retired eventually.
1339  * Unfortunately new bits get added.
1340  */
1341 static void
1342 ffs_oldfscompat_read(struct fs *fs, struct ufsmount *ump, daddr_t sblockloc)
1343 {
1344         off_t maxfilesize;
1345         int32_t *extrasave;
1346
1347         if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1348             (fs->fs_old_flags & FS_FLAGS_UPDATED))
1349                 return;
1350
1351         if (!ump->um_oldfscompat)
1352                 ump->um_oldfscompat = kmem_alloc(512 + 3*sizeof(int32_t),
1353                     KM_SLEEP);
1354
1355         memcpy(ump->um_oldfscompat, &fs->fs_old_postbl_start, 512);
1356         extrasave = ump->um_oldfscompat;
1357         extrasave += 512/sizeof(int32_t);
1358         extrasave[0] = fs->fs_old_npsect;
1359         extrasave[1] = fs->fs_old_interleave;
1360         extrasave[2] = fs->fs_old_trackskew;
1361
1362         /* These fields will be overwritten by their
1363          * original values in fs_oldfscompat_write, so it is harmless
1364          * to modify them here.
1365          */
1366         fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
1367         fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
1368         fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
1369         fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
1370
1371         fs->fs_maxbsize = fs->fs_bsize;
1372         fs->fs_time = fs->fs_old_time;
1373         fs->fs_size = fs->fs_old_size;
1374         fs->fs_dsize = fs->fs_old_dsize;
1375         fs->fs_csaddr = fs->fs_old_csaddr;
1376         fs->fs_sblockloc = sblockloc;
1377
1378         fs->fs_flags = fs->fs_old_flags | (fs->fs_flags & FS_INTERNAL);
1379
1380         if (fs->fs_old_postblformat == FS_42POSTBLFMT) {
1381                 fs->fs_old_nrpos = 8;
1382                 fs->fs_old_npsect = fs->fs_old_nsect;
1383                 fs->fs_old_interleave = 1;
1384                 fs->fs_old_trackskew = 0;
1385         }
1386
1387         if (fs->fs_old_inodefmt < FS_44INODEFMT) {
1388                 fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
1389                 fs->fs_qbmask = ~fs->fs_bmask;
1390                 fs->fs_qfmask = ~fs->fs_fmask;
1391         }
1392
1393         maxfilesize = (u_int64_t)0x80000000 * fs->fs_bsize - 1;
1394         if (fs->fs_maxfilesize > maxfilesize)
1395                 fs->fs_maxfilesize = maxfilesize;
1396
1397         /* Compatibility for old filesystems */
1398         if (fs->fs_avgfilesize <= 0)
1399                 fs->fs_avgfilesize = AVFILESIZ;
1400         if (fs->fs_avgfpdir <= 0)
1401                 fs->fs_avgfpdir = AFPDIR;
1402
1403 #if 0
1404         if (bigcgs) {
1405                 fs->fs_save_cgsize = fs->fs_cgsize;
1406                 fs->fs_cgsize = fs->fs_bsize;
1407         }
1408 #endif
1409 }
1410
1411 /*
1412  * Unwinding superblock updates for old filesystems.
1413  * See ffs_oldfscompat_read above for details.
1414  *
1415  * XXX - Parts get retired eventually.
1416  * Unfortunately new bits get added.
1417  */
1418 static void
1419 ffs_oldfscompat_write(struct fs *fs, struct ufsmount *ump)
1420 {
1421         int32_t *extrasave;
1422
1423         if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1424             (fs->fs_old_flags & FS_FLAGS_UPDATED))
1425                 return;
1426
1427         fs->fs_old_time = fs->fs_time;
1428         fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
1429         fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
1430         fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
1431         fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
1432         fs->fs_old_flags = fs->fs_flags;
1433
1434 #if 0
1435         if (bigcgs) {
1436                 fs->fs_cgsize = fs->fs_save_cgsize;
1437         }
1438 #endif
1439
1440         memcpy(&fs->fs_old_postbl_start, ump->um_oldfscompat, 512);
1441         extrasave = ump->um_oldfscompat;
1442         extrasave += 512/sizeof(int32_t);
1443         fs->fs_old_npsect = extrasave[0];
1444         fs->fs_old_interleave = extrasave[1];
1445         fs->fs_old_trackskew = extrasave[2];
1446
1447 }
1448
1449 /*
1450  * unmount vfs operation
1451  */
1452 int
1453 ffs_unmount(struct mount *mp, int mntflags)
1454 {
1455         struct lwp *l = curlwp;
1456         struct ufsmount *ump = VFSTOUFS(mp);
1457         struct fs *fs = ump->um_fs;
1458         int error, flags;
1459         u_int32_t bsize;
1460 #ifdef WAPBL
1461         extern int doforce;
1462 #endif
1463
1464         if (ump->um_discarddata) {
1465                 ffs_discard_finish(ump->um_discarddata, mntflags);
1466                 ump->um_discarddata = NULL;
1467         }
1468
1469         flags = 0;
1470         if (mntflags & MNT_FORCE)
1471                 flags |= FORCECLOSE;
1472         if ((error = ffs_flushfiles(mp, flags, l)) != 0)
1473                 return (error);
1474         error = UFS_WAPBL_BEGIN(mp);
1475         if (error == 0)
1476                 if (fs->fs_ronly == 0 &&
1477                     ffs_cgupdate(ump, MNT_WAIT) == 0 &&
1478                     fs->fs_clean & FS_WASCLEAN) {
1479                         fs->fs_clean = FS_ISCLEAN;
1480                         fs->fs_fmod = 0;
1481                         (void) ffs_sbupdate(ump, MNT_WAIT);
1482                 }
1483         if (error == 0)
1484                 UFS_WAPBL_END(mp);
1485 #ifdef WAPBL
1486         KASSERT(!(mp->mnt_wapbl_replay && mp->mnt_wapbl));
1487         if (mp->mnt_wapbl_replay) {
1488                 KDASSERT(fs->fs_ronly);
1489                 wapbl_replay_stop(mp->mnt_wapbl_replay);
1490                 wapbl_replay_free(mp->mnt_wapbl_replay);
1491                 mp->mnt_wapbl_replay = 0;
1492         }
1493         error = ffs_wapbl_stop(mp, doforce && (mntflags & MNT_FORCE));
1494         if (error) {
1495                 return error;
1496         }
1497 #endif /* WAPBL */
1498
1499         if (ump->um_devvp->v_type != VBAD)
1500                 spec_node_setmountedfs(ump->um_devvp, NULL);
1501         vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1502         (void)VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD | FWRITE,
1503                 NOCRED);
1504         vput(ump->um_devvp);
1505
1506         bsize = fs->fs_cssize;
1507         if (fs->fs_contigsumsize > 0)
1508                 bsize += fs->fs_ncg * sizeof(int32_t);
1509         bsize += fs->fs_ncg * sizeof(*fs->fs_contigdirs);
1510         kmem_free(fs->fs_csp, bsize);
1511
1512         kmem_free(fs, fs->fs_sbsize);
1513         if (ump->um_oldfscompat != NULL)
1514                 kmem_free(ump->um_oldfscompat, 512 + 3*sizeof(int32_t));
1515         mutex_destroy(&ump->um_lock);
1516         ffs_snapshot_fini(ump);
1517         kmem_free(ump, sizeof(*ump));
1518         mp->mnt_data = NULL;
1519         mp->mnt_flag &= ~MNT_LOCAL;
1520         fstrans_unmount(mp);
1521         return (0);
1522 }
1523
1524 /*
1525  * Flush out all the files in a filesystem.
1526  */
1527 int
1528 ffs_flushfiles(struct mount *mp, int flags, struct lwp *l)
1529 {
1530         extern int doforce;
1531         struct ufsmount *ump;
1532         int error;
1533
1534         if (!doforce)
1535                 flags &= ~FORCECLOSE;
1536         ump = VFSTOUFS(mp);
1537 #ifdef QUOTA
1538         if ((error = quota1_umount(mp, flags)) != 0)
1539                 return (error);
1540 #endif
1541 #ifdef QUOTA2
1542         if ((error = quota2_umount(mp, flags)) != 0)
1543                 return (error);
1544 #endif
1545 #ifdef UFS_EXTATTR
1546         if (ump->um_fstype == UFS1) {
1547                 if (ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED)
1548                         ufs_extattr_stop(mp, l);
1549                 if (ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_INITIALIZED)
1550                         ufs_extattr_uepm_destroy(&ump->um_extattr);
1551         }
1552 #endif
1553         if ((error = vflush(mp, 0, SKIPSYSTEM | flags)) != 0)
1554                 return (error);
1555         ffs_snapshot_unmount(mp);
1556         /*
1557          * Flush all the files.
1558          */
1559         error = vflush(mp, NULLVP, flags);
1560         if (error)
1561                 return (error);
1562         /*
1563          * Flush filesystem metadata.
1564          */
1565         vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1566         error = VOP_FSYNC(ump->um_devvp, l->l_cred, FSYNC_WAIT, 0, 0);
1567         VOP_UNLOCK(ump->um_devvp);
1568         if (flags & FORCECLOSE) /* XXXDBJ */
1569                 error = 0;
1570
1571 #ifdef WAPBL
1572         if (error)
1573                 return error;
1574         if (mp->mnt_wapbl) {
1575                 error = wapbl_flush(mp->mnt_wapbl, 1);
1576                 if (flags & FORCECLOSE)
1577                         error = 0;
1578         }
1579 #endif
1580
1581         return (error);
1582 }
1583
1584 /*
1585  * Get file system statistics.
1586  */
1587 int
1588 ffs_statvfs(struct mount *mp, struct statvfs *sbp)
1589 {
1590         struct ufsmount *ump;
1591         struct fs *fs;
1592
1593         ump = VFSTOUFS(mp);
1594         fs = ump->um_fs;
1595         mutex_enter(&ump->um_lock);
1596         sbp->f_bsize = fs->fs_bsize;
1597         sbp->f_frsize = fs->fs_fsize;
1598         sbp->f_iosize = fs->fs_bsize;
1599         sbp->f_blocks = fs->fs_dsize;
1600         sbp->f_bfree = ffs_blkstofrags(fs, fs->fs_cstotal.cs_nbfree) +
1601             fs->fs_cstotal.cs_nffree + FFS_DBTOFSB(fs, fs->fs_pendingblocks);
1602         sbp->f_bresvd = ((u_int64_t) fs->fs_dsize * (u_int64_t)
1603             fs->fs_minfree) / (u_int64_t) 100;
1604         if (sbp->f_bfree > sbp->f_bresvd)
1605                 sbp->f_bavail = sbp->f_bfree - sbp->f_bresvd;
1606         else
1607                 sbp->f_bavail = 0;
1608         sbp->f_files =  fs->fs_ncg * fs->fs_ipg - UFS_ROOTINO;
1609         sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1610         sbp->f_favail = sbp->f_ffree;
1611         sbp->f_fresvd = 0;
1612         mutex_exit(&ump->um_lock);
1613         copy_statvfs_info(sbp, mp);
1614
1615         return (0);
1616 }
1617
1618 /*
1619  * Go through the disk queues to initiate sandbagged IO;
1620  * go through the inodes to write those that have been modified;
1621  * initiate the writing of the super block if it has been modified.
1622  *
1623  * Note: we are always called with the filesystem marked `MPBUSY'.
1624  */
1625 int
1626 ffs_sync(struct mount *mp, int waitfor, kauth_cred_t cred)
1627 {
1628         struct vnode *vp, *mvp, *nvp;
1629         struct inode *ip;
1630         struct ufsmount *ump = VFSTOUFS(mp);
1631         struct fs *fs;
1632         int error, allerror = 0;
1633         bool is_suspending;
1634
1635         fs = ump->um_fs;
1636         if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {            /* XXX */
1637                 printf("fs = %s\n", fs->fs_fsmnt);
1638                 panic("update: rofs mod");
1639         }
1640
1641         /* Allocate a marker vnode. */
1642         mvp = vnalloc(mp);
1643
1644         fstrans_start(mp, FSTRANS_SHARED);
1645         is_suspending = (fstrans_getstate(mp) == FSTRANS_SUSPENDING);
1646         /*
1647          * Write back each (modified) inode.
1648          */
1649         mutex_enter(&mntvnode_lock);
1650 loop:
1651         /*
1652          * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
1653          * and vclean() can be called indirectly
1654          */
1655         for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
1656                 nvp = TAILQ_NEXT(vp, v_mntvnodes);
1657                 /*
1658                  * If the vnode that we are about to sync is no longer
1659                  * associated with this mount point, start over.
1660                  */
1661                 if (vp->v_mount != mp)
1662                         goto loop;
1663                 /*
1664                  * Don't interfere with concurrent scans of this FS.
1665                  */
1666                 if (vismarker(vp))
1667                         continue;
1668                 mutex_enter(vp->v_interlock);
1669                 ip = VTOI(vp);
1670
1671                 /*
1672                  * Skip the vnode/inode if inaccessible.
1673                  */
1674                 if (ip == NULL || (vp->v_iflag & (VI_XLOCK | VI_CLEAN)) != 0 ||
1675                     vp->v_type == VNON) {
1676                         mutex_exit(vp->v_interlock);
1677                         continue;
1678                 }
1679
1680                 /*
1681                  * We deliberately update inode times here.  This will
1682                  * prevent a massive queue of updates accumulating, only
1683                  * to be handled by a call to unmount.
1684                  *
1685                  * XXX It would be better to have the syncer trickle these
1686                  * out.  Adjustment needed to allow registering vnodes for
1687                  * sync when the vnode is clean, but the inode dirty.  Or
1688                  * have ufs itself trickle out inode updates.
1689                  *
1690                  * If doing a lazy sync, we don't care about metadata or
1691                  * data updates, because they are handled by each vnode's
1692                  * synclist entry.  In this case we are only interested in
1693                  * writing back modified inodes.
1694                  */
1695                 if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE |
1696                     IN_MODIFY | IN_MODIFIED | IN_ACCESSED)) == 0 &&
1697                     (waitfor == MNT_LAZY || (LIST_EMPTY(&vp->v_dirtyblkhd) &&
1698                     UVM_OBJ_IS_CLEAN(&vp->v_uobj)))) {
1699                         mutex_exit(vp->v_interlock);
1700                         continue;
1701                 }
1702                 if (vp->v_type == VBLK && is_suspending) {
1703                         mutex_exit(vp->v_interlock);
1704                         continue;
1705                 }
1706                 vmark(mvp, vp);
1707                 mutex_exit(&mntvnode_lock);
1708                 error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT);
1709                 if (error) {
1710                         mutex_enter(&mntvnode_lock);
1711                         nvp = vunmark(mvp);
1712                         if (error == ENOENT) {
1713                                 goto loop;
1714                         }
1715                         continue;
1716                 }
1717                 if (waitfor == MNT_LAZY) {
1718                         error = UFS_WAPBL_BEGIN(vp->v_mount);
1719                         if (!error) {
1720                                 error = ffs_update(vp, NULL, NULL,
1721                                     UPDATE_CLOSE);
1722                                 UFS_WAPBL_END(vp->v_mount);
1723                         }
1724                 } else {
1725                         error = VOP_FSYNC(vp, cred, FSYNC_NOLOG |
1726                             (waitfor == MNT_WAIT ? FSYNC_WAIT : 0), 0, 0);
1727                 }
1728                 if (error)
1729                         allerror = error;
1730                 vput(vp);
1731                 mutex_enter(&mntvnode_lock);
1732                 nvp = vunmark(mvp);
1733         }
1734         mutex_exit(&mntvnode_lock);
1735         /*
1736          * Force stale file system control information to be flushed.
1737          */
1738         if (waitfor != MNT_LAZY && (ump->um_devvp->v_numoutput > 0 ||
1739             !LIST_EMPTY(&ump->um_devvp->v_dirtyblkhd))) {
1740                 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1741                 if ((error = VOP_FSYNC(ump->um_devvp, cred,
1742                     (waitfor == MNT_WAIT ? FSYNC_WAIT : 0) | FSYNC_NOLOG,
1743                     0, 0)) != 0)
1744                         allerror = error;
1745                 VOP_UNLOCK(ump->um_devvp);
1746                 if (allerror == 0 && waitfor == MNT_WAIT && !mp->mnt_wapbl) {
1747                         mutex_enter(&mntvnode_lock);
1748                         goto loop;
1749                 }
1750         }
1751 #if defined(QUOTA) || defined(QUOTA2)
1752         qsync(mp);
1753 #endif
1754         /*
1755          * Write back modified superblock.
1756          */
1757         if (fs->fs_fmod != 0) {
1758                 fs->fs_fmod = 0;
1759                 fs->fs_time = time_second;
1760                 error = UFS_WAPBL_BEGIN(mp);
1761                 if (error)
1762                         allerror = error;
1763                 else {
1764                         if ((error = ffs_cgupdate(ump, waitfor)))
1765                                 allerror = error;
1766                         UFS_WAPBL_END(mp);
1767                 }
1768         }
1769
1770 #ifdef WAPBL
1771         if (mp->mnt_wapbl) {
1772                 error = wapbl_flush(mp->mnt_wapbl, 0);
1773                 if (error)
1774                         allerror = error;
1775         }
1776 #endif
1777
1778         fstrans_done(mp);
1779         vnfree(mvp);
1780         return (allerror);
1781 }
1782
1783 /*
1784  * Look up a FFS dinode number to find its incore vnode, otherwise read it
1785  * in from disk.  If it is in core, wait for the lock bit to clear, then
1786  * return the inode locked.  Detection and handling of mount points must be
1787  * done by the calling routine.
1788  */
1789 int
1790 ffs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
1791 {
1792         struct fs *fs;
1793         struct inode *ip;
1794         struct ufsmount *ump;
1795         struct buf *bp;
1796         struct vnode *vp;
1797         dev_t dev;
1798         int error;
1799
1800         ump = VFSTOUFS(mp);
1801         dev = ump->um_dev;
1802
1803  retry:
1804         if ((*vpp = ufs_ihashget(dev, ino, LK_EXCLUSIVE)) != NULL)
1805                 return (0);
1806
1807         /* Allocate a new vnode/inode. */
1808         error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, NULL, &vp);
1809         if (error) {
1810                 *vpp = NULL;
1811                 return (error);
1812         }
1813         ip = pool_cache_get(ffs_inode_cache, PR_WAITOK);
1814
1815         /*
1816          * If someone beat us to it, put back the freshly allocated
1817          * vnode/inode pair and retry.
1818          */
1819         mutex_enter(&ufs_hashlock);
1820         if (ufs_ihashget(dev, ino, 0) != NULL) {
1821                 mutex_exit(&ufs_hashlock);
1822                 ungetnewvnode(vp);
1823                 pool_cache_put(ffs_inode_cache, ip);
1824                 goto retry;
1825         }
1826
1827         vp->v_vflag |= VV_LOCKSWORK;
1828
1829         /*
1830          * XXX MFS ends up here, too, to allocate an inode.  Should we
1831          * XXX create another pool for MFS inodes?
1832          */
1833
1834         memset(ip, 0, sizeof(struct inode));
1835         vp->v_data = ip;
1836         ip->i_vnode = vp;
1837         ip->i_ump = ump;
1838         ip->i_fs = fs = ump->um_fs;
1839         ip->i_dev = dev;
1840         ip->i_number = ino;
1841 #if defined(QUOTA) || defined(QUOTA2)
1842         ufsquota_init(ip);
1843 #endif
1844
1845         /*
1846          * Initialize genfs node, we might proceed to destroy it in
1847          * error branches.
1848          */
1849         genfs_node_init(vp, &ffs_genfsops);
1850
1851         /*
1852          * Put it onto its hash chain and lock it so that other requests for
1853          * this inode will block if they arrive while we are sleeping waiting
1854          * for old data structures to be purged or for the contents of the
1855          * disk portion of this inode to be read.
1856          */
1857
1858         ufs_ihashins(ip);
1859         mutex_exit(&ufs_hashlock);
1860
1861         /* Read in the disk contents for the inode, copy into the inode. */
1862         error = bread(ump->um_devvp, FFS_FSBTODB(fs, ino_to_fsba(fs, ino)),
1863                       (int)fs->fs_bsize, NOCRED, 0, &bp);
1864         if (error) {
1865
1866                 /*
1867                  * The inode does not contain anything useful, so it would
1868                  * be misleading to leave it on its hash chain. With mode
1869                  * still zero, it will be unlinked and returned to the free
1870                  * list by vput().
1871                  */
1872
1873                 vput(vp);
1874                 *vpp = NULL;
1875                 return (error);
1876         }
1877         if (ip->i_ump->um_fstype == UFS1)
1878                 ip->i_din.ffs1_din = pool_cache_get(ffs_dinode1_cache,
1879                     PR_WAITOK);
1880         else
1881                 ip->i_din.ffs2_din = pool_cache_get(ffs_dinode2_cache,
1882                     PR_WAITOK);
1883         ffs_load_inode(bp, ip, fs, ino);
1884         brelse(bp, 0);
1885
1886         /*
1887          * Initialize the vnode from the inode, check for aliases.
1888          * Note that the underlying vnode may have changed.
1889          */
1890
1891         ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1892
1893         /*
1894          * Finish inode initialization now that aliasing has been resolved.
1895          */
1896
1897         ip->i_devvp = ump->um_devvp;
1898         vref(ip->i_devvp);
1899
1900         /*
1901          * Ensure that uid and gid are correct. This is a temporary
1902          * fix until fsck has been changed to do the update.
1903          */
1904
1905         if (fs->fs_old_inodefmt < FS_44INODEFMT) {              /* XXX */
1906                 ip->i_uid = ip->i_ffs1_ouid;                    /* XXX */
1907                 ip->i_gid = ip->i_ffs1_ogid;                    /* XXX */
1908         }                                                       /* XXX */
1909         uvm_vnp_setsize(vp, ip->i_size);
1910         *vpp = vp;
1911         return (0);
1912 }
1913
1914 /*
1915  * File handle to vnode
1916  *
1917  * Have to be really careful about stale file handles:
1918  * - check that the inode number is valid
1919  * - call ffs_vget() to get the locked inode
1920  * - check for an unallocated inode (i_mode == 0)
1921  * - check that the given client host has export rights and return
1922  *   those rights via. exflagsp and credanonp
1923  */
1924 int
1925 ffs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp)
1926 {
1927         struct ufid ufh;
1928         struct fs *fs;
1929
1930         if (fhp->fid_len != sizeof(struct ufid))
1931                 return EINVAL;
1932
1933         memcpy(&ufh, fhp, sizeof(ufh));
1934         fs = VFSTOUFS(mp)->um_fs;
1935         if (ufh.ufid_ino < UFS_ROOTINO ||
1936             ufh.ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1937                 return (ESTALE);
1938         return (ufs_fhtovp(mp, &ufh, vpp));
1939 }
1940
1941 /*
1942  * Vnode pointer to File handle
1943  */
1944 /* ARGSUSED */
1945 int
1946 ffs_vptofh(struct vnode *vp, struct fid *fhp, size_t *fh_size)
1947 {
1948         struct inode *ip;
1949         struct ufid ufh;
1950
1951         if (*fh_size < sizeof(struct ufid)) {
1952                 *fh_size = sizeof(struct ufid);
1953                 return E2BIG;
1954         }
1955         ip = VTOI(vp);
1956         *fh_size = sizeof(struct ufid);
1957         memset(&ufh, 0, sizeof(ufh));
1958         ufh.ufid_len = sizeof(struct ufid);
1959         ufh.ufid_ino = ip->i_number;
1960         ufh.ufid_gen = ip->i_gen;
1961         memcpy(fhp, &ufh, sizeof(ufh));
1962         return (0);
1963 }
1964
1965 void
1966 ffs_init(void)
1967 {
1968         if (ffs_initcount++ > 0)
1969                 return;
1970
1971         ffs_inode_cache = pool_cache_init(sizeof(struct inode), 0, 0, 0,
1972             "ffsino", NULL, IPL_NONE, NULL, NULL, NULL);
1973         ffs_dinode1_cache = pool_cache_init(sizeof(struct ufs1_dinode), 0, 0, 0,
1974             "ffsdino1", NULL, IPL_NONE, NULL, NULL, NULL);
1975         ffs_dinode2_cache = pool_cache_init(sizeof(struct ufs2_dinode), 0, 0, 0,
1976             "ffsdino2", NULL, IPL_NONE, NULL, NULL, NULL);
1977         ufs_init();
1978 }
1979
1980 void
1981 ffs_reinit(void)
1982 {
1983
1984         ufs_reinit();
1985 }
1986
1987 void
1988 ffs_done(void)
1989 {
1990         if (--ffs_initcount > 0)
1991                 return;
1992
1993         ufs_done();
1994         pool_cache_destroy(ffs_dinode2_cache);
1995         pool_cache_destroy(ffs_dinode1_cache);
1996         pool_cache_destroy(ffs_inode_cache);
1997 }
1998
1999 /*
2000  * Write a superblock and associated information back to disk.
2001  */
2002 int
2003 ffs_sbupdate(struct ufsmount *mp, int waitfor)
2004 {
2005         struct fs *fs = mp->um_fs;
2006         struct buf *bp;
2007         int error = 0;
2008         u_int32_t saveflag;
2009
2010         error = ffs_getblk(mp->um_devvp,
2011             fs->fs_sblockloc / DEV_BSIZE, FFS_NOBLK,
2012             fs->fs_sbsize, false, &bp);
2013         if (error)
2014                 return error;
2015         saveflag = fs->fs_flags & FS_INTERNAL;
2016         fs->fs_flags &= ~FS_INTERNAL;
2017
2018         memcpy(bp->b_data, fs, fs->fs_sbsize);
2019
2020         ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
2021 #ifdef FFS_EI
2022         if (mp->um_flags & UFS_NEEDSWAP)
2023                 ffs_sb_swap((struct fs *)bp->b_data, (struct fs *)bp->b_data);
2024 #endif
2025         fs->fs_flags |= saveflag;
2026
2027         if (waitfor == MNT_WAIT)
2028                 error = bwrite(bp);
2029         else
2030                 bawrite(bp);
2031         return (error);
2032 }
2033
2034 int
2035 ffs_cgupdate(struct ufsmount *mp, int waitfor)
2036 {
2037         struct fs *fs = mp->um_fs;
2038         struct buf *bp;
2039         int blks;
2040         void *space;
2041         int i, size, error = 0, allerror = 0;
2042
2043         allerror = ffs_sbupdate(mp, waitfor);
2044         blks = howmany(fs->fs_cssize, fs->fs_fsize);
2045         space = fs->fs_csp;
2046         for (i = 0; i < blks; i += fs->fs_frag) {
2047                 size = fs->fs_bsize;
2048                 if (i + fs->fs_frag > blks)
2049                         size = (blks - i) * fs->fs_fsize;
2050                 error = ffs_getblk(mp->um_devvp, FFS_FSBTODB(fs, fs->fs_csaddr + i),
2051                     FFS_NOBLK, size, false, &bp);
2052                 if (error)
2053                         break;
2054 #ifdef FFS_EI
2055                 if (mp->um_flags & UFS_NEEDSWAP)
2056                         ffs_csum_swap((struct csum*)space,
2057                             (struct csum*)bp->b_data, size);
2058                 else
2059 #endif
2060                         memcpy(bp->b_data, space, (u_int)size);
2061                 space = (char *)space + size;
2062                 if (waitfor == MNT_WAIT)
2063                         error = bwrite(bp);
2064                 else
2065                         bawrite(bp);
2066         }
2067         if (!allerror && error)
2068                 allerror = error;
2069         return (allerror);
2070 }
2071
2072 int
2073 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *vp,
2074     int attrnamespace, const char *attrname)
2075 {
2076 #ifdef UFS_EXTATTR
2077         /*
2078          * File-backed extended attributes are only supported on UFS1.
2079          * UFS2 has native extended attributes.
2080          */
2081         if (VFSTOUFS(mp)->um_fstype == UFS1)
2082                 return (ufs_extattrctl(mp, cmd, vp, attrnamespace, attrname));
2083 #endif
2084         return (vfs_stdextattrctl(mp, cmd, vp, attrnamespace, attrname));
2085 }
2086
2087 int
2088 ffs_suspendctl(struct mount *mp, int cmd)
2089 {
2090         int error;
2091         struct lwp *l = curlwp;
2092
2093         switch (cmd) {
2094         case SUSPEND_SUSPEND:
2095                 if ((error = fstrans_setstate(mp, FSTRANS_SUSPENDING)) != 0)
2096                         return error;
2097                 error = ffs_sync(mp, MNT_WAIT, l->l_proc->p_cred);
2098                 if (error == 0)
2099                         error = fstrans_setstate(mp, FSTRANS_SUSPENDED);
2100 #ifdef WAPBL
2101                 if (error == 0 && mp->mnt_wapbl)
2102                         error = wapbl_flush(mp->mnt_wapbl, 1);
2103 #endif
2104                 if (error != 0) {
2105                         (void) fstrans_setstate(mp, FSTRANS_NORMAL);
2106                         return error;
2107                 }
2108                 return 0;
2109
2110         case SUSPEND_RESUME:
2111                 return fstrans_setstate(mp, FSTRANS_NORMAL);
2112
2113         default:
2114                 return EINVAL;
2115         }
2116 }
2117
2118 /*
2119  * Synch vnode for a mounted file system.
2120  */
2121 static int
2122 ffs_vfs_fsync(vnode_t *vp, int flags)
2123 {
2124         int error, i, pflags;
2125 #ifdef WAPBL
2126         struct mount *mp;
2127 #endif
2128
2129         KASSERT(vp->v_type == VBLK);
2130         KASSERT(spec_node_getmountedfs(vp) != NULL);
2131
2132         /*
2133          * Flush all dirty data associated with the vnode.
2134          */
2135         pflags = PGO_ALLPAGES | PGO_CLEANIT;
2136         if ((flags & FSYNC_WAIT) != 0)
2137                 pflags |= PGO_SYNCIO;
2138         mutex_enter(vp->v_interlock);
2139         error = VOP_PUTPAGES(vp, 0, 0, pflags);
2140         if (error)
2141                 return error;
2142
2143 #ifdef WAPBL
2144         mp = spec_node_getmountedfs(vp);
2145         if (mp && mp->mnt_wapbl) {
2146                 /*
2147                  * Don't bother writing out metadata if the syncer is
2148                  * making the request.  We will let the sync vnode
2149                  * write it out in a single burst through a call to
2150                  * VFS_SYNC().
2151                  */
2152                 if ((flags & (FSYNC_DATAONLY | FSYNC_LAZY | FSYNC_NOLOG)) != 0)
2153                         return 0;
2154
2155                 /*
2156                  * Don't flush the log if the vnode being flushed
2157                  * contains no dirty buffers that could be in the log.
2158                  */
2159                 if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
2160                         error = wapbl_flush(mp->mnt_wapbl, 0);
2161                         if (error)
2162                                 return error;
2163                 }
2164
2165                 if ((flags & FSYNC_WAIT) != 0) {
2166                         mutex_enter(vp->v_interlock);
2167                         while (vp->v_numoutput)
2168                                 cv_wait(&vp->v_cv, vp->v_interlock);
2169                         mutex_exit(vp->v_interlock);
2170                 }
2171
2172                 return 0;
2173         }
2174 #endif /* WAPBL */
2175
2176         error = vflushbuf(vp, flags);
2177         if (error == 0 && (flags & FSYNC_CACHE) != 0) {
2178                 i = 1;
2179                 (void)VOP_IOCTL(vp, DIOCCACHESYNC, &i, FWRITE,
2180                     kauth_cred_get());
2181         }
2182
2183         return error;
2184 }