sys/ufs/lfs/lfs_syscalls.c

   1 /*      $NetBSD: lfs_syscalls.c,v 1.139 2011/06/12 03:36:01 rmind Exp $ */
   2
   3 /*-
   4  * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2007, 2007, 2008
   5  *    The NetBSD Foundation, Inc.
   6  * All rights reserved.
   7  *
   8  * This code is derived from software contributed to The NetBSD Foundation
   9  * by Konrad E. Schroder <perseant@hhhh.org>.
  10  *
  11  * Redistribution and use in source and binary forms, with or without
  12  * modification, are permitted provided that the following conditions
  13  * are met:
  14  * 1. Redistributions of source code must retain the above copyright
  15  *    notice, this list of conditions and the following disclaimer.
  16  * 2. Redistributions in binary form must reproduce the above copyright
  17  *    notice, this list of conditions and the following disclaimer in the
  18  *    documentation and/or other materials provided with the distribution.
  19  *
  20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  30  * POSSIBILITY OF SUCH DAMAGE.
  31  */
  32 /*-
  33  * Copyright (c) 1991, 1993, 1994
  34  *      The Regents of the University of California.  All rights reserved.
  35  *
  36  * Redistribution and use in source and binary forms, with or without
  37  * modification, are permitted provided that the following conditions
  38  * are met:
  39  * 1. Redistributions of source code must retain the above copyright
  40  *    notice, this list of conditions and the following disclaimer.
  41  * 2. Redistributions in binary form must reproduce the above copyright
  42  *    notice, this list of conditions and the following disclaimer in the
  43  *    documentation and/or other materials provided with the distribution.
  44  * 3. Neither the name of the University nor the names of its contributors
  45  *    may be used to endorse or promote products derived from this software
  46  *    without specific prior written permission.
  47  *
  48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  58  * SUCH DAMAGE.
  59  *
  60  *      @(#)lfs_syscalls.c      8.10 (Berkeley) 5/14/95
  61  */
  62
  63 #include <sys/cdefs.h>
  64 __KERNEL_RCSID(0, "$NetBSD: lfs_syscalls.c,v 1.139 2011/06/12 03:36:01 rmind Exp $");
  65
  66 #ifndef LFS
  67 # define LFS            /* for prototypes in syscallargs.h */
  68 #endif
  69
  70 #include <sys/param.h>
  71 #include <sys/systm.h>
  72 #include <sys/proc.h>
  73 #include <sys/buf.h>
  74 #include <sys/mount.h>
  75 #include <sys/vnode.h>
  76 #include <sys/kernel.h>
  77 #include <sys/kauth.h>
  78 #include <sys/syscallargs.h>
  79
  80 #include <ufs/ufs/inode.h>
  81 #include <ufs/ufs/ufsmount.h>
  82 #include <ufs/ufs/ufs_extern.h>
  83
  84 #include <ufs/lfs/lfs.h>
  85 #include <ufs/lfs/lfs_extern.h>
  86
  87 struct buf *lfs_fakebuf(struct lfs *, struct vnode *, int, size_t, void *);
  88 int lfs_fasthashget(dev_t, ino_t, struct vnode **);
  89
  90 pid_t lfs_cleaner_pid = 0;
  91
  92 /*
  93  * sys_lfs_markv:
  94  *
  95  * This will mark inodes and blocks dirty, so they are written into the log.
  96  * It will block until all the blocks have been written.  The segment create
  97  * time passed in the block_info and inode_info structures is used to decide
  98  * if the data is valid for each block (in case some process dirtied a block
  99  * or inode that is being cleaned between the determination that a block is
 100  * live and the lfs_markv call).
 101  *
 102  *  0 on success
 103  * -1/errno is return on error.
 104  */
 105 #ifdef USE_64BIT_SYSCALLS
 106 int
 107 sys_lfs_markv(struct lwp *l, const struct sys_lfs_markv_args *uap, register_t *retval)
 108 {
 109         /* {
 110                 syscallarg(fsid_t *) fsidp;
 111                 syscallarg(struct block_info *) blkiov;
 112                 syscallarg(int) blkcnt;
 113         } */
 114         BLOCK_INFO *blkiov;
 115         int blkcnt, error;
 116         fsid_t fsid;
 117         struct lfs *fs;
 118         struct mount *mntp;
 119
 120         if ((error = kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
 121             NULL)) != 0)
 122                 return (error);
 123
 124         if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
 125                 return (error);
 126
 127         if ((mntp = vfs_getvfs(fsidp)) == NULL)
 128                 return (ENOENT);
 129         fs = VFSTOUFS(mntp)->um_lfs;
 130
 131         blkcnt = SCARG(uap, blkcnt);
 132         if ((u_int) blkcnt > LFS_MARKV_MAXBLKCNT)
 133                 return (EINVAL);
 134
 135         KERNEL_LOCK(1, NULL);
 136         blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV);
 137         if ((error = copyin(SCARG(uap, blkiov), blkiov,
 138                             blkcnt * sizeof(BLOCK_INFO))) != 0)
 139                 goto out;
 140
 141         if ((error = lfs_markv(p, &fsid, blkiov, blkcnt)) == 0)
 142                 copyout(blkiov, SCARG(uap, blkiov),
 143                         blkcnt * sizeof(BLOCK_INFO));
 144     out:
 145         lfs_free(fs, blkiov, LFS_NB_BLKIOV);
 146         KERNEL_UNLOCK_ONE(NULL);
 147         return error;
 148 }
 149 #else
 150 int
 151 sys_lfs_markv(struct lwp *l, const struct sys_lfs_markv_args *uap, register_t *retval)
 152 {
 153         /* {
 154                 syscallarg(fsid_t *) fsidp;
 155                 syscallarg(struct block_info *) blkiov;
 156                 syscallarg(int) blkcnt;
 157         } */
 158         BLOCK_INFO *blkiov;
 159         BLOCK_INFO_15 *blkiov15;
 160         int i, blkcnt, error;
 161         fsid_t fsid;
 162         struct lfs *fs;
 163         struct mount *mntp;
 164
 165         if ((error = kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
 166             NULL)) != 0)
 167                 return (error);
 168
 169         if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
 170                 return (error);
 171
 172         if ((mntp = vfs_getvfs(&fsid)) == NULL)
 173                 return (ENOENT);
 174         fs = VFSTOUFS(mntp)->um_lfs;
 175
 176         blkcnt = SCARG(uap, blkcnt);
 177         if ((u_int) blkcnt > LFS_MARKV_MAXBLKCNT)
 178                 return (EINVAL);
 179
 180         KERNEL_LOCK(1, NULL);
 181         blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV);
 182         blkiov15 = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO_15), LFS_NB_BLKIOV);
 183         if ((error = copyin(SCARG(uap, blkiov), blkiov15,
 184                             blkcnt * sizeof(BLOCK_INFO_15))) != 0)
 185                 goto out;
 186
 187         for (i = 0; i < blkcnt; i++) {
 188                 blkiov[i].bi_inode     = blkiov15[i].bi_inode;
 189                 blkiov[i].bi_lbn       = blkiov15[i].bi_lbn;
 190                 blkiov[i].bi_daddr     = blkiov15[i].bi_daddr;
 191                 blkiov[i].bi_segcreate = blkiov15[i].bi_segcreate;
 192                 blkiov[i].bi_version   = blkiov15[i].bi_version;
 193                 blkiov[i].bi_bp        = blkiov15[i].bi_bp;
 194                 blkiov[i].bi_size      = blkiov15[i].bi_size;
 195         }
 196
 197         if ((error = lfs_markv(l->l_proc, &fsid, blkiov, blkcnt)) == 0) {
 198                 for (i = 0; i < blkcnt; i++) {
 199                         blkiov15[i].bi_inode     = blkiov[i].bi_inode;
 200                         blkiov15[i].bi_lbn       = blkiov[i].bi_lbn;
 201                         blkiov15[i].bi_daddr     = blkiov[i].bi_daddr;
 202                         blkiov15[i].bi_segcreate = blkiov[i].bi_segcreate;
 203                         blkiov15[i].bi_version   = blkiov[i].bi_version;
 204                         blkiov15[i].bi_bp        = blkiov[i].bi_bp;
 205                         blkiov15[i].bi_size      = blkiov[i].bi_size;
 206                 }
 207                 copyout(blkiov15, SCARG(uap, blkiov),
 208                         blkcnt * sizeof(BLOCK_INFO_15));
 209         }
 210     out:
 211         lfs_free(fs, blkiov, LFS_NB_BLKIOV);
 212         lfs_free(fs, blkiov15, LFS_NB_BLKIOV);
 213         KERNEL_UNLOCK_ONE(NULL);
 214         return error;
 215 }
 216 #endif
 217
 218 #define LFS_MARKV_MAX_BLOCKS    (LFS_MAX_BUFS)
 219
 220 int
 221 lfs_markv(struct proc *p, fsid_t *fsidp, BLOCK_INFO *blkiov,
 222     int blkcnt)
 223 {
 224         BLOCK_INFO *blkp;
 225         IFILE *ifp;
 226         struct buf *bp;
 227         struct inode *ip = NULL;
 228         struct lfs *fs;
 229         struct mount *mntp;
 230         struct vnode *vp = NULL;
 231         ino_t lastino;
 232         daddr_t b_daddr, v_daddr;
 233         int cnt, error;
 234         int do_again = 0;
 235         int numrefed = 0;
 236         ino_t maxino;
 237         size_t obsize;
 238
 239         /* number of blocks/inodes that we have already bwrite'ed */
 240         int nblkwritten, ninowritten;
 241
 242         if ((mntp = vfs_getvfs(fsidp)) == NULL)
 243                 return (ENOENT);
 244
 245         fs = VFSTOUFS(mntp)->um_lfs;
 246
 247         if (fs->lfs_ronly)
 248                 return EROFS;
 249
 250         maxino = (fragstoblks(fs, VTOI(fs->lfs_ivnode)->i_ffs1_blocks) -
 251                       fs->lfs_cleansz - fs->lfs_segtabsz) * fs->lfs_ifpb;
 252
 253         cnt = blkcnt;
 254
 255         if ((error = vfs_busy(mntp, NULL)) != 0)
 256                 return (error);
 257
 258         /*
 259          * This seglock is just to prevent the fact that we might have to sleep
 260          * from allowing the possibility that our blocks might become
 261          * invalid.
 262          *
 263          * It is also important to note here that unless we specify SEGM_CKP,
 264          * any Ifile blocks that we might be asked to clean will never get
 265          * to the disk.
 266          */
 267         lfs_seglock(fs, SEGM_CLEAN | SEGM_CKP | SEGM_SYNC);
 268
 269         /* Mark blocks/inodes dirty.  */
 270         error = 0;
 271
 272         /* these were inside the initialization for the for loop */
 273         v_daddr = LFS_UNUSED_DADDR;
 274         lastino = LFS_UNUSED_INUM;
 275         nblkwritten = ninowritten = 0;
 276         for (blkp = blkiov; cnt--; ++blkp)
 277         {
 278                 /* Bounds-check incoming data, avoid panic for failed VGET */
 279                 if (blkp->bi_inode <= 0 || blkp->bi_inode >= maxino) {
 280                         error = EINVAL;
 281                         goto err3;
 282                 }
 283                 /*
 284                  * Get the IFILE entry (only once) and see if the file still
 285                  * exists.
 286                  */
 287                 if (lastino != blkp->bi_inode) {
 288                         /*
 289                          * Finish the old file, if there was one.  The presence
 290                          * of a usable vnode in vp is signaled by a valid v_daddr.
 291                          */
 292                         if (v_daddr != LFS_UNUSED_DADDR) {
 293                                 lfs_vunref(vp);
 294                                 numrefed--;
 295                         }
 296
 297                         /*
 298                          * Start a new file
 299                          */
 300                         lastino = blkp->bi_inode;
 301                         if (blkp->bi_inode == LFS_IFILE_INUM)
 302                                 v_daddr = fs->lfs_idaddr;
 303                         else {
 304                                 LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
 305                                 /* XXX fix for force write */
 306                                 v_daddr = ifp->if_daddr;
 307                                 brelse(bp, 0);
 308                         }
 309                         if (v_daddr == LFS_UNUSED_DADDR)
 310                                 continue;
 311
 312                         /* Get the vnode/inode. */
 313                         error = lfs_fastvget(mntp, blkp->bi_inode, v_daddr,
 314                                            &vp,
 315                                            (blkp->bi_lbn == LFS_UNUSED_LBN
 316                                             ? blkp->bi_bp
 317                                             : NULL));
 318
 319                         if (!error) {
 320                                 numrefed++;
 321                         }
 322                         if (error) {
 323                                 DLOG((DLOG_CLEAN, "lfs_markv: lfs_fastvget"
 324                                       " failed with %d (ino %d, segment %d)\n",
 325                                       error, blkp->bi_inode,
 326                                       dtosn(fs, blkp->bi_daddr)));
 327                                 /*
 328                                  * If we got EAGAIN, that means that the
 329                                  * Inode was locked.  This is
 330                                  * recoverable: just clean the rest of
 331                                  * this segment, and let the cleaner try
 332                                  * again with another.  (When the
 333                                  * cleaner runs again, this segment will
 334                                  * sort high on the list, since it is
 335                                  * now almost entirely empty.) But, we
 336                                  * still set v_daddr = LFS_UNUSED_ADDR
 337                                  * so as not to test this over and over
 338                                  * again.
 339                                  */
 340                                 if (error == EAGAIN) {
 341                                         error = 0;
 342                                         do_again++;
 343                                 }
 344 #ifdef DIAGNOSTIC
 345                                 else if (error != ENOENT)
 346                                         panic("lfs_markv VFS_VGET FAILED");
 347 #endif
 348                                 /* lastino = LFS_UNUSED_INUM; */
 349                                 v_daddr = LFS_UNUSED_DADDR;
 350                                 vp = NULL;
 351                                 ip = NULL;
 352                                 continue;
 353                         }
 354                         ip = VTOI(vp);
 355                         ninowritten++;
 356                 } else if (v_daddr == LFS_UNUSED_DADDR) {
 357                         /*
 358                          * This can only happen if the vnode is dead (or
 359                          * in any case we can't get it...e.g., it is
 360                          * inlocked).  Keep going.
 361                          */
 362                         continue;
 363                 }
 364
 365                 /* Past this point we are guaranteed that vp, ip are valid. */
 366
 367                 /* Can't clean VU_DIROP directories in case of truncation */
 368                 /* XXX - maybe we should mark removed dirs specially? */
 369                 if (vp->v_type == VDIR && (vp->v_uflag & VU_DIROP)) {
 370                         do_again++;
 371                         continue;
 372                 }
 373
 374                 /* If this BLOCK_INFO didn't contain a block, keep going. */
 375                 if (blkp->bi_lbn == LFS_UNUSED_LBN) {
 376                         /* XXX need to make sure that the inode gets written in this case */
 377                         /* XXX but only write the inode if it's the right one */
 378                         if (blkp->bi_inode != LFS_IFILE_INUM) {
 379                                 LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
 380                                 if (ifp->if_daddr == blkp->bi_daddr) {
 381                                         mutex_enter(&lfs_lock);
 382                                         LFS_SET_UINO(ip, IN_CLEANING);
 383                                         mutex_exit(&lfs_lock);
 384                                 }
 385                                 brelse(bp, 0);
 386                         }
 387                         continue;
 388                 }
 389
 390                 b_daddr = 0;
 391                 if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &b_daddr, NULL) ||
 392                     dbtofsb(fs, b_daddr) != blkp->bi_daddr)
 393                 {
 394                         if (dtosn(fs, dbtofsb(fs, b_daddr)) ==
 395                             dtosn(fs, blkp->bi_daddr))
 396                         {
 397                                 DLOG((DLOG_CLEAN, "lfs_markv: wrong da same seg: %llx vs %llx\n",
 398                                       (long long)blkp->bi_daddr, (long long)dbtofsb(fs, b_daddr)));
 399                         }
 400                         do_again++;
 401                         continue;
 402                 }
 403
 404                 /*
 405                  * Check block sizes.  The blocks being cleaned come from
 406                  * disk, so they should have the same size as their on-disk
 407                  * counterparts.
 408                  */
 409                 if (blkp->bi_lbn >= 0)
 410                         obsize = blksize(fs, ip, blkp->bi_lbn);
 411                 else
 412                         obsize = fs->lfs_bsize;
 413                 /* Check for fragment size change */
 414                 if (blkp->bi_lbn >= 0 && blkp->bi_lbn < NDADDR) {
 415                         obsize = ip->i_lfs_fragsize[blkp->bi_lbn];
 416                 }
 417                 if (obsize != blkp->bi_size) {
 418                         DLOG((DLOG_CLEAN, "lfs_markv: ino %d lbn %lld wrong"
 419                               " size (%ld != %d), try again\n",
 420                               blkp->bi_inode, (long long)blkp->bi_lbn,
 421                               (long) obsize, blkp->bi_size));
 422                         do_again++;
 423                         continue;
 424                 }
 425
 426                 /*
 427                  * If we get to here, then we are keeping the block.  If
 428                  * it is an indirect block, we want to actually put it
 429                  * in the buffer cache so that it can be updated in the
 430                  * finish_meta section.  If it's not, we need to
 431                  * allocate a fake buffer so that writeseg can perform
 432                  * the copyin and write the buffer.
 433                  */
 434                 if (ip->i_number != LFS_IFILE_INUM && blkp->bi_lbn >= 0) {
 435                         /* Data Block */
 436                         bp = lfs_fakebuf(fs, vp, blkp->bi_lbn,
 437                                          blkp->bi_size, blkp->bi_bp);
 438                         /* Pretend we used bread() to get it */
 439                         bp->b_blkno = fsbtodb(fs, blkp->bi_daddr);
 440                 } else {
 441                         /* Indirect block or ifile */
 442                         if (blkp->bi_size != fs->lfs_bsize &&
 443                             ip->i_number != LFS_IFILE_INUM)
 444                                 panic("lfs_markv: partial indirect block?"
 445                                     " size=%d\n", blkp->bi_size);
 446                         bp = getblk(vp, blkp->bi_lbn, blkp->bi_size, 0, 0);
 447                         if (!(bp->b_oflags & (BO_DONE|BO_DELWRI))) {
 448                                 /*
 449                                  * The block in question was not found
 450                                  * in the cache; i.e., the block that
 451                                  * getblk() returned is empty.  So, we
 452                                  * can (and should) copy in the
 453                                  * contents, because we've already
 454                                  * determined that this was the right
 455                                  * version of this block on disk.
 456                                  *
 457                                  * And, it can't have changed underneath
 458                                  * us, because we have the segment lock.
 459                                  */
 460                                 error = copyin(blkp->bi_bp, bp->b_data, blkp->bi_size);
 461                                 if (error)
 462                                         goto err2;
 463                         }
 464                 }
 465                 if ((error = lfs_bwrite_ext(bp, BW_CLEAN)) != 0)
 466                         goto err2;
 467
 468                 nblkwritten++;
 469                 /*
 470                  * XXX should account indirect blocks and ifile pages as well
 471                  */
 472                 if (nblkwritten + lblkno(fs, ninowritten * sizeof (struct ufs1_dinode))
 473                     > LFS_MARKV_MAX_BLOCKS) {
 474                         DLOG((DLOG_CLEAN, "lfs_markv: writing %d blks %d inos\n",
 475                               nblkwritten, ninowritten));
 476                         lfs_segwrite(mntp, SEGM_CLEAN);
 477                         nblkwritten = ninowritten = 0;
 478                 }
 479         }
 480
 481         /*
 482          * Finish the old file, if there was one
 483          */
 484         if (v_daddr != LFS_UNUSED_DADDR) {
 485                 lfs_vunref(vp);
 486                 numrefed--;
 487         }
 488
 489 #ifdef DIAGNOSTIC
 490         if (numrefed != 0)
 491                 panic("lfs_markv: numrefed=%d", numrefed);
 492 #endif
 493         DLOG((DLOG_CLEAN, "lfs_markv: writing %d blks %d inos (check point)\n",
 494               nblkwritten, ninowritten));
 495
 496         /*
 497          * The last write has to be SEGM_SYNC, because of calling semantics.
 498          * It also has to be SEGM_CKP, because otherwise we could write
 499          * over the newly cleaned data contained in a checkpoint, and then
 500          * we'd be unhappy at recovery time.
 501          */
 502         lfs_segwrite(mntp, SEGM_CLEAN | SEGM_CKP | SEGM_SYNC);
 503
 504         lfs_segunlock(fs);
 505
 506         vfs_unbusy(mntp, false, NULL);
 507         if (error)
 508                 return (error);
 509         else if (do_again)
 510                 return EAGAIN;
 511
 512         return 0;
 513
 514 err2:
 515         DLOG((DLOG_CLEAN, "lfs_markv err2\n"));
 516
 517         /*
 518          * XXX we're here because copyin() failed.
 519          * XXX it means that we can't trust the cleanerd.  too bad.
 520          * XXX how can we recover from this?
 521          */
 522
 523 err3:
 524         KERNEL_UNLOCK_ONE(NULL);
 525         /*
 526          * XXX should do segwrite here anyway?
 527          */
 528
 529         if (v_daddr != LFS_UNUSED_DADDR) {
 530                 lfs_vunref(vp);
 531                 --numrefed;
 532         }
 533
 534         lfs_segunlock(fs);
 535         vfs_unbusy(mntp, false, NULL);
 536 #ifdef DIAGNOSTIC
 537         if (numrefed != 0)
 538                 panic("lfs_markv: numrefed=%d", numrefed);
 539 #endif
 540
 541         return (error);
 542 }
 543
 544 /*
 545  * sys_lfs_bmapv:
 546  *
 547  * This will fill in the current disk address for arrays of blocks.
 548  *
 549  *  0 on success
 550  * -1/errno is return on error.
 551  */
 552 #ifdef USE_64BIT_SYSCALLS
 553 int
 554 sys_lfs_bmapv(struct lwp *l, const struct sys_lfs_bmapv_args *uap, register_t *retval)
 555 {
 556         /* {
 557                 syscallarg(fsid_t *) fsidp;
 558                 syscallarg(struct block_info *) blkiov;
 559                 syscallarg(int) blkcnt;
 560         } */
 561         BLOCK_INFO *blkiov;
 562         int blkcnt, error;
 563         fsid_t fsid;
 564         struct lfs *fs;
 565         struct mount *mntp;
 566
 567         if ((error = kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
 568             NULL)) != 0)
 569                 return (error);
 570
 571         if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
 572                 return (error);
 573
 574         if ((mntp = vfs_getvfs(&fsid)) == NULL)
 575                 return (ENOENT);
 576         fs = VFSTOUFS(mntp)->um_lfs;
 577
 578         blkcnt = SCARG(uap, blkcnt);
 579         if ((u_int) blkcnt > SIZE_T_MAX / sizeof(BLOCK_INFO))
 580                 return (EINVAL);
 581         KERNEL_LOCK(1, NULL);
 582         blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV);
 583         if ((error = copyin(SCARG(uap, blkiov), blkiov,
 584                             blkcnt * sizeof(BLOCK_INFO))) != 0)
 585                 goto out;
 586
 587         if ((error = lfs_bmapv(p, &fsid, blkiov, blkcnt)) == 0)
 588                 copyout(blkiov, SCARG(uap, blkiov),
 589                         blkcnt * sizeof(BLOCK_INFO));
 590     out:
 591         lfs_free(fs, blkiov, LFS_NB_BLKIOV);
 592         KERNEL_UNLOCK_ONE(NULL);
 593         return error;
 594 }
 595 #else
 596 int
 597 sys_lfs_bmapv(struct lwp *l, const struct sys_lfs_bmapv_args *uap, register_t *retval)
 598 {
 599         /* {
 600                 syscallarg(fsid_t *) fsidp;
 601                 syscallarg(struct block_info *) blkiov;
 602                 syscallarg(int) blkcnt;
 603         } */
 604         BLOCK_INFO *blkiov;
 605         BLOCK_INFO_15 *blkiov15;
 606         int i, blkcnt, error;
 607         fsid_t fsid;
 608         struct lfs *fs;
 609         struct mount *mntp;
 610
 611         if ((error = kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
 612             NULL)) != 0)
 613                 return (error);
 614
 615         if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
 616                 return (error);
 617
 618         if ((mntp = vfs_getvfs(&fsid)) == NULL)
 619                 return (ENOENT);
 620         fs = VFSTOUFS(mntp)->um_lfs;
 621
 622         blkcnt = SCARG(uap, blkcnt);
 623         if ((size_t) blkcnt > SIZE_T_MAX / sizeof(BLOCK_INFO))
 624                 return (EINVAL);
 625         KERNEL_LOCK(1, NULL);
 626         blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV);
 627         blkiov15 = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO_15), LFS_NB_BLKIOV);
 628         if ((error = copyin(SCARG(uap, blkiov), blkiov15,
 629                             blkcnt * sizeof(BLOCK_INFO_15))) != 0)
 630                 goto out;
 631
 632         for (i = 0; i < blkcnt; i++) {
 633                 blkiov[i].bi_inode     = blkiov15[i].bi_inode;
 634                 blkiov[i].bi_lbn       = blkiov15[i].bi_lbn;
 635                 blkiov[i].bi_daddr     = blkiov15[i].bi_daddr;
 636                 blkiov[i].bi_segcreate = blkiov15[i].bi_segcreate;
 637                 blkiov[i].bi_version   = blkiov15[i].bi_version;
 638                 blkiov[i].bi_bp        = blkiov15[i].bi_bp;
 639                 blkiov[i].bi_size      = blkiov15[i].bi_size;
 640         }
 641
 642         if ((error = lfs_bmapv(l->l_proc, &fsid, blkiov, blkcnt)) == 0) {
 643                 for (i = 0; i < blkcnt; i++) {
 644                         blkiov15[i].bi_inode     = blkiov[i].bi_inode;
 645                         blkiov15[i].bi_lbn       = blkiov[i].bi_lbn;
 646                         blkiov15[i].bi_daddr     = blkiov[i].bi_daddr;
 647                         blkiov15[i].bi_segcreate = blkiov[i].bi_segcreate;
 648                         blkiov15[i].bi_version   = blkiov[i].bi_version;
 649                         blkiov15[i].bi_bp        = blkiov[i].bi_bp;
 650                         blkiov15[i].bi_size      = blkiov[i].bi_size;
 651                 }
 652                 copyout(blkiov15, SCARG(uap, blkiov),
 653                         blkcnt * sizeof(BLOCK_INFO_15));
 654         }
 655     out:
 656         lfs_free(fs, blkiov, LFS_NB_BLKIOV);
 657         lfs_free(fs, blkiov15, LFS_NB_BLKIOV);
 658         KERNEL_UNLOCK_ONE(NULL);
 659         return error;
 660 }
 661 #endif
 662
 663 int
 664 lfs_bmapv(struct proc *p, fsid_t *fsidp, BLOCK_INFO *blkiov, int blkcnt)
 665 {
 666         BLOCK_INFO *blkp;
 667         IFILE *ifp;
 668         struct buf *bp;
 669         struct inode *ip = NULL;
 670         struct lfs *fs;
 671         struct mount *mntp;
 672         struct ufsmount *ump;
 673         struct vnode *vp;
 674         ino_t lastino;
 675         daddr_t v_daddr;
 676         int cnt, error;
 677         int numrefed = 0;
 678
 679         lfs_cleaner_pid = p->p_pid;
 680
 681         if ((mntp = vfs_getvfs(fsidp)) == NULL)
 682                 return (ENOENT);
 683
 684         ump = VFSTOUFS(mntp);
 685         if ((error = vfs_busy(mntp, NULL)) != 0)
 686                 return (error);
 687
 688         cnt = blkcnt;
 689
 690         fs = VFSTOUFS(mntp)->um_lfs;
 691
 692         error = 0;
 693
 694         /* these were inside the initialization for the for loop */
 695         v_daddr = LFS_UNUSED_DADDR;
 696         lastino = LFS_UNUSED_INUM;
 697         for (blkp = blkiov; cnt--; ++blkp)
 698         {
 699                 /*
 700                  * Get the IFILE entry (only once) and see if the file still
 701                  * exists.
 702                  */
 703                 if (lastino != blkp->bi_inode) {
 704                         /*
 705                          * Finish the old file, if there was one.  The presence
 706                          * of a usable vnode in vp is signaled by a valid
 707                          * v_daddr.
 708                          */
 709                         if (v_daddr != LFS_UNUSED_DADDR) {
 710                                 lfs_vunref(vp);
 711                                 numrefed--;
 712                         }
 713
 714                         /*
 715                          * Start a new file
 716                          */
 717                         lastino = blkp->bi_inode;
 718                         if (blkp->bi_inode == LFS_IFILE_INUM)
 719                                 v_daddr = fs->lfs_idaddr;
 720                         else {
 721                                 LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
 722                                 v_daddr = ifp->if_daddr;
 723                                 brelse(bp, 0);
 724                         }
 725                         if (v_daddr == LFS_UNUSED_DADDR) {
 726                                 blkp->bi_daddr = LFS_UNUSED_DADDR;
 727                                 continue;
 728                         }
 729                         /*
 730                          * A regular call to VFS_VGET could deadlock
 731                          * here.  Instead, we try an unlocked access.
 732                          */
 733                         mutex_enter(&ufs_ihash_lock);
 734                         vp = ufs_ihashlookup(ump->um_dev, blkp->bi_inode);
 735                         if (vp != NULL && !(vp->v_iflag & VI_XLOCK)) {
 736                                 ip = VTOI(vp);
 737                                 mutex_enter(vp->v_interlock);
 738                                 mutex_exit(&ufs_ihash_lock);
 739                                 if (lfs_vref(vp)) {
 740                                         v_daddr = LFS_UNUSED_DADDR;
 741                                         continue;
 742                                 }
 743                                 numrefed++;
 744                         } else {
 745                                 mutex_exit(&ufs_ihash_lock);
 746                                 /*
 747                                  * Don't VFS_VGET if we're being unmounted,
 748                                  * since we hold vfs_busy().
 749                                  */
 750                                 if (mntp->mnt_iflag & IMNT_UNMOUNT) {
 751                                         v_daddr = LFS_UNUSED_DADDR;
 752                                         continue;
 753                                 }
 754                                 error = VFS_VGET(mntp, blkp->bi_inode, &vp);
 755                                 if (error) {
 756                                         DLOG((DLOG_CLEAN, "lfs_bmapv: vget ino"
 757                                               "%d failed with %d",
 758                                               blkp->bi_inode,error));
 759                                         v_daddr = LFS_UNUSED_DADDR;
 760                                         continue;
 761                                 } else {
 762                                         KASSERT(VOP_ISLOCKED(vp));
 763                                         VOP_UNLOCK(vp);
 764                                         numrefed++;
 765                                 }
 766                         }
 767                         ip = VTOI(vp);
 768                 } else if (v_daddr == LFS_UNUSED_DADDR) {
 769                         /*
 770                          * This can only happen if the vnode is dead.
 771                          * Keep going.  Note that we DO NOT set the
 772                          * bi_addr to anything -- if we failed to get
 773                          * the vnode, for example, we want to assume
 774                          * conservatively that all of its blocks *are*
 775                          * located in the segment in question.
 776                          * lfs_markv will throw them out if we are
 777                          * wrong.
 778                          */
 779                         /* blkp->bi_daddr = LFS_UNUSED_DADDR; */
 780                         continue;
 781                 }
 782
 783                 /* Past this point we are guaranteed that vp, ip are valid. */
 784
 785                 if (blkp->bi_lbn == LFS_UNUSED_LBN) {
 786                         /*
 787                          * We just want the inode address, which is
 788                          * conveniently in v_daddr.
 789                          */
 790                         blkp->bi_daddr = v_daddr;
 791                 } else {
 792                         daddr_t bi_daddr;
 793
 794                         /* XXX ondisk32 */
 795                         error = VOP_BMAP(vp, blkp->bi_lbn, NULL,
 796                                          &bi_daddr, NULL);
 797                         if (error)
 798                         {
 799                                 blkp->bi_daddr = LFS_UNUSED_DADDR;
 800                                 continue;
 801                         }
 802                         blkp->bi_daddr = dbtofsb(fs, bi_daddr);
 803                         /* Fill in the block size, too */
 804                         if (blkp->bi_lbn >= 0)
 805                                 blkp->bi_size = blksize(fs, ip, blkp->bi_lbn);
 806                         else
 807                                 blkp->bi_size = fs->lfs_bsize;
 808                 }
 809         }
 810
 811         /*
 812          * Finish the old file, if there was one.  The presence
 813          * of a usable vnode in vp is signaled by a valid v_daddr.
 814          */
 815         if (v_daddr != LFS_UNUSED_DADDR) {
 816                 lfs_vunref(vp);
 817                 numrefed--;
 818         }
 819
 820 #ifdef DIAGNOSTIC
 821         if (numrefed != 0)
 822                 panic("lfs_bmapv: numrefed=%d", numrefed);
 823 #endif
 824
 825         vfs_unbusy(mntp, false, NULL);
 826
 827         return 0;
 828 }
 829
 830 /*
 831  * sys_lfs_segclean:
 832  *
 833  * Mark the segment clean.
 834  *
 835  *  0 on success
 836  * -1/errno is return on error.
 837  */
 838 int
 839 sys_lfs_segclean(struct lwp *l, const struct sys_lfs_segclean_args *uap, register_t *retval)
 840 {
 841         /* {
 842                 syscallarg(fsid_t *) fsidp;
 843                 syscallarg(u_long) segment;
 844         } */
 845         struct lfs *fs;
 846         struct mount *mntp;
 847         fsid_t fsid;
 848         int error;
 849         unsigned long segnum;
 850
 851         if ((error = kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
 852             NULL)) != 0)
 853                 return (error);
 854
 855         if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
 856                 return (error);
 857         if ((mntp = vfs_getvfs(&fsid)) == NULL)
 858                 return (ENOENT);
 859
 860         fs = VFSTOUFS(mntp)->um_lfs;
 861         segnum = SCARG(uap, segment);
 862
 863         if ((error = vfs_busy(mntp, NULL)) != 0)
 864                 return (error);
 865
 866         KERNEL_LOCK(1, NULL);
 867         lfs_seglock(fs, SEGM_PROT);
 868         error = lfs_do_segclean(fs, segnum);
 869         lfs_segunlock(fs);
 870         KERNEL_UNLOCK_ONE(NULL);
 871         vfs_unbusy(mntp, false, NULL);
 872         return error;
 873 }
 874
 875 /*
 876  * Actually mark the segment clean.
 877  * Must be called with the segment lock held.
 878  */
 879 int
 880 lfs_do_segclean(struct lfs *fs, unsigned long segnum)
 881 {
 882         extern int lfs_dostats;
 883         struct buf *bp;
 884         CLEANERINFO *cip;
 885         SEGUSE *sup;
 886
 887         if (dtosn(fs, fs->lfs_curseg) == segnum) {
 888                 return (EBUSY);
 889         }
 890
 891         LFS_SEGENTRY(sup, fs, segnum, bp);
 892         if (sup->su_nbytes) {
 893                 DLOG((DLOG_CLEAN, "lfs_segclean: not cleaning segment %lu:"
 894                       " %d live bytes\n", segnum, sup->su_nbytes));
 895                 brelse(bp, 0);
 896                 return (EBUSY);
 897         }
 898         if (sup->su_flags & SEGUSE_ACTIVE) {
 899                 DLOG((DLOG_CLEAN, "lfs_segclean: not cleaning segment %lu:"
 900                       " segment is active\n", segnum));
 901                 brelse(bp, 0);
 902                 return (EBUSY);
 903         }
 904         if (!(sup->su_flags & SEGUSE_DIRTY)) {
 905                 DLOG((DLOG_CLEAN, "lfs_segclean: not cleaning segment %lu:"
 906                       " segment is already clean\n", segnum));
 907                 brelse(bp, 0);
 908                 return (EALREADY);
 909         }
 910
 911         fs->lfs_avail += segtod(fs, 1);
 912         if (sup->su_flags & SEGUSE_SUPERBLOCK)
 913                 fs->lfs_avail -= btofsb(fs, LFS_SBPAD);
 914         if (fs->lfs_version > 1 && segnum == 0 &&
 915             fs->lfs_start < btofsb(fs, LFS_LABELPAD))
 916                 fs->lfs_avail -= btofsb(fs, LFS_LABELPAD) - fs->lfs_start;
 917         mutex_enter(&lfs_lock);
 918         fs->lfs_bfree += sup->su_nsums * btofsb(fs, fs->lfs_sumsize) +
 919                 btofsb(fs, sup->su_ninos * fs->lfs_ibsize);
 920         fs->lfs_dmeta -= sup->su_nsums * btofsb(fs, fs->lfs_sumsize) +
 921                 btofsb(fs, sup->su_ninos * fs->lfs_ibsize);
 922         if (fs->lfs_dmeta < 0)
 923                 fs->lfs_dmeta = 0;
 924         mutex_exit(&lfs_lock);
 925         sup->su_flags &= ~SEGUSE_DIRTY;
 926         LFS_WRITESEGENTRY(sup, fs, segnum, bp);
 927
 928         LFS_CLEANERINFO(cip, fs, bp);
 929         ++cip->clean;
 930         --cip->dirty;
 931         fs->lfs_nclean = cip->clean;
 932         cip->bfree = fs->lfs_bfree;
 933         mutex_enter(&lfs_lock);
 934         cip->avail = fs->lfs_avail - fs->lfs_ravail - fs->lfs_favail;
 935         wakeup(&fs->lfs_avail);
 936         mutex_exit(&lfs_lock);
 937         (void) LFS_BWRITE_LOG(bp);
 938
 939         if (lfs_dostats)
 940                 ++lfs_stats.segs_reclaimed;
 941
 942         return (0);
 943 }
 944
 945 /*
 946  * This will block until a segment in file system fsid is written.  A timeout
 947  * in milliseconds may be specified which will awake the cleaner automatically.
 948  * An fsid of -1 means any file system, and a timeout of 0 means forever.
 949  */
 950 int
 951 lfs_segwait(fsid_t *fsidp, struct timeval *tv)
 952 {
 953         struct mount *mntp;
 954         void *addr;
 955         u_long timeout;
 956         int error;
 957
 958         KERNEL_LOCK(1, NULL);
 959         if (fsidp == NULL || (mntp = vfs_getvfs(fsidp)) == NULL)
 960                 addr = &lfs_allclean_wakeup;
 961         else
 962                 addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
 963         /*
 964          * XXX THIS COULD SLEEP FOREVER IF TIMEOUT IS {0,0}!
 965          * XXX IS THAT WHAT IS INTENDED?
 966          */
 967         timeout = tvtohz(tv);
 968         error = tsleep(addr, PCATCH | PVFS, "segment", timeout);
 969         KERNEL_UNLOCK_ONE(NULL);
 970         return (error == ERESTART ? EINTR : 0);
 971 }
 972
 973 /*
 974  * sys_lfs_segwait:
 975  *
 976  * System call wrapper around lfs_segwait().
 977  *
 978  *  0 on success
 979  *  1 on timeout
 980  * -1/errno is return on error.
 981  */
 982 int
 983 sys___lfs_segwait50(struct lwp *l, const struct sys___lfs_segwait50_args *uap,
 984     register_t *retval)
 985 {
 986         /* {
 987                 syscallarg(fsid_t *) fsidp;
 988                 syscallarg(struct timeval *) tv;
 989         } */
 990         struct timeval atv;
 991         fsid_t fsid;
 992         int error;
 993
 994         /* XXX need we be su to segwait? */
 995         if ((error = kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
 996             NULL)) != 0)
 997                 return (error);
 998         if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
 999                 return (error);
1000
1001         if (SCARG(uap, tv)) {
1002                 error = copyin(SCARG(uap, tv), &atv, sizeof(struct timeval));
1003                 if (error)
1004                         return (error);
1005                 if (itimerfix(&atv))
1006                         return (EINVAL);
1007         } else /* NULL or invalid */
1008                 atv.tv_sec = atv.tv_usec = 0;
1009         return lfs_segwait(&fsid, &atv);
1010 }
1011
1012 /*
1013  * VFS_VGET call specialized for the cleaner.  The cleaner already knows the
1014  * daddr from the ifile, so don't look it up again.  If the cleaner is
1015  * processing IINFO structures, it may have the ondisk inode already, so
1016  * don't go retrieving it again.
1017  *
1018  * we lfs_vref, and it is the caller's responsibility to lfs_vunref
1019  * when finished.
1020  */
1021
1022 int
1023 lfs_fasthashget(dev_t dev, ino_t ino, struct vnode **vpp)
1024 {
1025         struct vnode *vp;
1026
1027         mutex_enter(&ufs_ihash_lock);
1028         if ((vp = ufs_ihashlookup(dev, ino)) != NULL) {
1029                 mutex_enter(vp->v_interlock);
1030                 mutex_exit(&ufs_ihash_lock);
1031                 if (vp->v_iflag & VI_XLOCK) {
1032                         DLOG((DLOG_CLEAN, "lfs_fastvget: ino %d VI_XLOCK\n",
1033                               ino));
1034                         lfs_stats.clean_vnlocked++;
1035                         mutex_exit(vp->v_interlock);
1036                         return EAGAIN;
1037                 }
1038                 if (lfs_vref(vp)) {
1039                         DLOG((DLOG_CLEAN, "lfs_fastvget: lfs_vref failed"
1040                               " for ino %d\n", ino));
1041                         lfs_stats.clean_inlocked++;
1042                         return EAGAIN;
1043                 }
1044         } else {
1045                 mutex_exit(&ufs_ihash_lock);
1046         }
1047         *vpp = vp;
1048
1049         return (0);
1050 }
1051
1052 int
1053 lfs_fastvget(struct mount *mp, ino_t ino, daddr_t daddr, struct vnode **vpp,
1054              struct ufs1_dinode *dinp)
1055 {
1056         struct inode *ip;
1057         struct ufs1_dinode *dip;
1058         struct vnode *vp;
1059         struct ufsmount *ump;
1060         dev_t dev;
1061         int error, retries;
1062         struct buf *bp;
1063         struct lfs *fs;
1064
1065         ump = VFSTOUFS(mp);
1066         dev = ump->um_dev;
1067         fs = ump->um_lfs;
1068
1069         /*
1070          * Wait until the filesystem is fully mounted before allowing vget
1071          * to complete.  This prevents possible problems with roll-forward.
1072          */
1073         mutex_enter(&lfs_lock);
1074         while (fs->lfs_flags & LFS_NOTYET) {
1075                 mtsleep(&fs->lfs_flags, PRIBIO+1, "lfs_fnotyet", 0,
1076                         &lfs_lock);
1077         }
1078         mutex_exit(&lfs_lock);
1079
1080         /*
1081          * This is playing fast and loose.  Someone may have the inode
1082          * locked, in which case they are going to be distinctly unhappy
1083          * if we trash something.
1084          */
1085
1086         error = lfs_fasthashget(dev, ino, vpp);
1087         if (error != 0 || *vpp != NULL)
1088                 return (error);
1089
1090         /*
1091          * getnewvnode(9) will call vfs_busy, which will block if the
1092          * filesystem is being unmounted; but umount(9) is waiting for
1093          * us because we're already holding the fs busy.
1094          * XXXMP
1095          */
1096         if (mp->mnt_iflag & IMNT_UNMOUNT) {
1097                 *vpp = NULL;
1098                 return EDEADLK;
1099         }
1100         error = getnewvnode(VT_LFS, mp, lfs_vnodeop_p, NULL, &vp);
1101         if (error) {
1102                 *vpp = NULL;
1103                 return (error);
1104         }
1105
1106         mutex_enter(&ufs_hashlock);
1107         error = lfs_fasthashget(dev, ino, vpp);
1108         if (error != 0 || *vpp != NULL) {
1109                 mutex_exit(&ufs_hashlock);
1110                 ungetnewvnode(vp);
1111                 return (error);
1112         }
1113
1114         /* Allocate new vnode/inode. */
1115         lfs_vcreate(mp, ino, vp);
1116
1117         /*
1118          * Put it onto its hash chain and lock it so that other requests for
1119          * this inode will block if they arrive while we are sleeping waiting
1120          * for old data structures to be purged or for the contents of the
1121          * disk portion of this inode to be read.
1122          */
1123         ip = VTOI(vp);
1124         ufs_ihashins(ip);
1125         mutex_exit(&ufs_hashlock);
1126
1127         /*
1128          * XXX
1129          * This may not need to be here, logically it should go down with
1130          * the i_devvp initialization.
1131          * Ask Kirk.
1132          */
1133         ip->i_lfs = fs;
1134
1135         /* Read in the disk contents for the inode, copy into the inode. */
1136         if (dinp) {
1137                 error = copyin(dinp, ip->i_din.ffs1_din, sizeof (struct ufs1_dinode));
1138                 if (error) {
1139                         DLOG((DLOG_CLEAN, "lfs_fastvget: dinode copyin failed"
1140                               " for ino %d\n", ino));
1141                         ufs_ihashrem(ip);
1142
1143                         /* Unlock and discard unneeded inode. */
1144                         VOP_UNLOCK(vp);
1145                         lfs_vunref(vp);
1146                         *vpp = NULL;
1147                         return (error);
1148                 }
1149                 if (ip->i_number != ino)
1150                         panic("lfs_fastvget: I was fed the wrong inode!");
1151         } else {
1152                 retries = 0;
1153             again:
1154                 error = bread(ump->um_devvp, fsbtodb(fs, daddr), fs->lfs_ibsize,
1155                               NOCRED, 0, &bp);
1156                 if (error) {
1157                         DLOG((DLOG_CLEAN, "lfs_fastvget: bread failed (%d)\n",
1158                               error));
1159                         /*
1160                          * The inode does not contain anything useful, so it
1161                          * would be misleading to leave it on its hash chain.
1162                          * Iput() will return it to the free list.
1163                          */
1164                         ufs_ihashrem(ip);
1165
1166                         /* Unlock and discard unneeded inode. */
1167                         VOP_UNLOCK(vp);
1168                         lfs_vunref(vp);
1169                         brelse(bp, 0);
1170                         *vpp = NULL;
1171                         return (error);
1172                 }
1173                 dip = lfs_ifind(ump->um_lfs, ino, bp);
1174                 if (dip == NULL) {
1175                         /* Assume write has not completed yet; try again */
1176                         brelse(bp, BC_INVAL);
1177                         ++retries;
1178                         if (retries > LFS_IFIND_RETRIES)
1179                                 panic("lfs_fastvget: dinode not found");
1180                         DLOG((DLOG_CLEAN, "lfs_fastvget: dinode not found,"
1181                               " retrying...\n"));
1182                         goto again;
1183                 }
1184                 *ip->i_din.ffs1_din = *dip;
1185                 brelse(bp, 0);
1186         }
1187         lfs_vinit(mp, &vp);
1188
1189         *vpp = vp;
1190
1191         KASSERT(VOP_ISLOCKED(vp));
1192         VOP_UNLOCK(vp);
1193
1194         return (0);
1195 }
1196
1197 /*
1198  * Make up a "fake" cleaner buffer, copy the data from userland into it.
1199  */
1200 struct buf *
1201 lfs_fakebuf(struct lfs *fs, struct vnode *vp, int lbn, size_t size, void *uaddr)
1202 {
1203         struct buf *bp;
1204         int error;
1205
1206         KASSERT(VTOI(vp)->i_number != LFS_IFILE_INUM);
1207
1208         bp = lfs_newbuf(VTOI(vp)->i_lfs, vp, lbn, size, LFS_NB_CLEAN);
1209         error = copyin(uaddr, bp->b_data, size);
1210         if (error) {
1211                 lfs_freebuf(fs, bp);
1212                 return NULL;
1213         }
1214         KDASSERT(bp->b_iodone == lfs_callback);
1215
1216 #if 0
1217         mutex_enter(&lfs_lock);
1218         ++fs->lfs_iocount;
1219         mutex_exit(&lfs_lock);
1220 #endif
1221         bp->b_bufsize = size;
1222         bp->b_bcount = size;
1223         return (bp);
1224 }