sbin/fsck_lfs/segwrite.c

   1 /* $NetBSD: segwrite.c,v 1.18 2008/04/28 20:23:08 martin Exp $ */
   2 /*-
   3  * Copyright (c) 2003 The NetBSD Foundation, Inc.
   4  * All rights reserved.
   5  *
   6  * This code is derived from software contributed to The NetBSD Foundation
   7  * by Konrad E. Schroder <perseant@hhhh.org>.
   8  *
   9  * Redistribution and use in source and binary forms, with or without
  10  * modification, are permitted provided that the following conditions
  11  * are met:
  12  * 1. Redistributions of source code must retain the above copyright
  13  *    notice, this list of conditions and the following disclaimer.
  14  * 2. Redistributions in binary form must reproduce the above copyright
  15  *    notice, this list of conditions and the following disclaimer in the
  16  *    documentation and/or other materials provided with the distribution.
  17  *
  18  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  20  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  21  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  22  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  28  * POSSIBILITY OF SUCH DAMAGE.
  29  */
  30 /*
  31  * Copyright (c) 1991, 1993
  32  *      The Regents of the University of California.  All rights reserved.
  33  *
  34  * Redistribution and use in source and binary forms, with or without
  35  * modification, are permitted provided that the following conditions
  36  * are met:
  37  * 1. Redistributions of source code must retain the above copyright
  38  *    notice, this list of conditions and the following disclaimer.
  39  * 2. Redistributions in binary form must reproduce the above copyright
  40  *    notice, this list of conditions and the following disclaimer in the
  41  *    documentation and/or other materials provided with the distribution.
  42  * 3. Neither the name of the University nor the names of its contributors
  43  *    may be used to endorse or promote products derived from this software
  44  *    without specific prior written permission.
  45  *
  46  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  47  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  48  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  49  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  50  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  51  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  52  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  53  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  54  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  55  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  56  * SUCH DAMAGE.
  57  *
  58  *      @(#)lfs_segment.c       8.10 (Berkeley) 6/10/95
  59  */
  60
  61 /*
  62  * Partial segment writer, taken from the kernel and adapted for userland.
  63  */
  64 #include <sys/types.h>
  65 #include <sys/param.h>
  66 #include <sys/time.h>
  67 #include <sys/buf.h>
  68 #include <sys/mount.h>
  69
  70 #include <ufs/ufs/inode.h>
  71 #include <ufs/ufs/ufsmount.h>
  72
  73 /* Override certain things to make <ufs/lfs/lfs.h> work */
  74 #define vnode uvnode
  75 #define buf ubuf
  76 #define panic call_panic
  77
  78 #include <ufs/lfs/lfs.h>
  79
  80 #include <assert.h>
  81 #include <stdio.h>
  82 #include <stdlib.h>
  83 #include <string.h>
  84 #include <err.h>
  85 #include <errno.h>
  86 #include <util.h>
  87
  88 #include "bufcache.h"
  89 #include "vnode.h"
  90 #include "lfs_user.h"
  91 #include "segwrite.h"
  92
  93 /* Compatibility definitions */
  94 extern off_t locked_queue_bytes;
  95 int locked_queue_count;
  96 off_t written_bytes = 0;
  97 off_t written_data = 0;
  98 off_t written_indir = 0;
  99 off_t written_dev = 0;
 100 int written_inodes = 0;
 101
 102 /* Global variables */
 103 time_t write_time;
 104
 105 extern u_int32_t cksum(void *, size_t);
 106 extern u_int32_t lfs_sb_cksum(struct dlfs *);
 107 extern int preen;
 108
 109 /*
 110  * Logical block number match routines used when traversing the dirty block
 111  * chain.
 112  */
 113 int
 114 lfs_match_data(struct lfs * fs, struct ubuf * bp)
 115 {
 116         return (bp->b_lblkno >= 0);
 117 }
 118
 119 int
 120 lfs_match_indir(struct lfs * fs, struct ubuf * bp)
 121 {
 122         daddr_t lbn;
 123
 124         lbn = bp->b_lblkno;
 125         return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 0);
 126 }
 127
 128 int
 129 lfs_match_dindir(struct lfs * fs, struct ubuf * bp)
 130 {
 131         daddr_t lbn;
 132
 133         lbn = bp->b_lblkno;
 134         return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 1);
 135 }
 136
 137 int
 138 lfs_match_tindir(struct lfs * fs, struct ubuf * bp)
 139 {
 140         daddr_t lbn;
 141
 142         lbn = bp->b_lblkno;
 143         return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 2);
 144 }
 145
 146 /*
 147  * Do a checkpoint.
 148  */
 149 int
 150 lfs_segwrite(struct lfs * fs, int flags)
 151 {
 152         struct inode *ip;
 153         struct segment *sp;
 154         struct uvnode *vp;
 155         int redo;
 156
 157         lfs_seglock(fs, flags | SEGM_CKP);
 158         sp = fs->lfs_sp;
 159
 160         lfs_writevnodes(fs, sp, VN_REG);
 161         lfs_writevnodes(fs, sp, VN_DIROP);
 162         ((SEGSUM *) (sp->segsum))->ss_flags &= ~(SS_CONT);
 163
 164         do {
 165                 vp = fs->lfs_ivnode;
 166                 fs->lfs_flags &= ~LFS_IFDIRTY;
 167                 ip = VTOI(vp);
 168                 if (LIST_FIRST(&vp->v_dirtyblkhd) != NULL || fs->lfs_idaddr <= 0)
 169                         lfs_writefile(fs, sp, vp);
 170
 171                 redo = lfs_writeinode(fs, sp, ip);
 172                 redo += lfs_writeseg(fs, sp);
 173                 redo += (fs->lfs_flags & LFS_IFDIRTY);
 174         } while (redo);
 175
 176         lfs_segunlock(fs);
 177 #if 0
 178         printf("wrote %" PRId64 " bytes (%" PRId32 " fsb)\n",
 179                 written_bytes, (ufs_daddr_t)btofsb(fs, written_bytes));
 180         printf("wrote %" PRId64 " bytes data (%" PRId32 " fsb)\n",
 181                 written_data, (ufs_daddr_t)btofsb(fs, written_data));
 182         printf("wrote %" PRId64 " bytes indir (%" PRId32 " fsb)\n",
 183                 written_indir, (ufs_daddr_t)btofsb(fs, written_indir));
 184         printf("wrote %" PRId64 " bytes dev (%" PRId32 " fsb)\n",
 185                 written_dev, (ufs_daddr_t)btofsb(fs, written_dev));
 186         printf("wrote %d inodes (%" PRId32 " fsb)\n",
 187                 written_inodes, btofsb(fs, written_inodes * fs->lfs_ibsize));
 188 #endif
 189         return 0;
 190 }
 191
 192 /*
 193  * Write the dirty blocks associated with a vnode.
 194  */
 195 void
 196 lfs_writefile(struct lfs * fs, struct segment * sp, struct uvnode * vp)
 197 {
 198         struct ubuf *bp;
 199         struct finfo *fip;
 200         struct inode *ip;
 201         IFILE *ifp;
 202
 203         ip = VTOI(vp);
 204
 205         if (sp->seg_bytes_left < fs->lfs_bsize ||
 206             sp->sum_bytes_left < sizeof(struct finfo))
 207                 (void) lfs_writeseg(fs, sp);
 208
 209         sp->sum_bytes_left -= FINFOSIZE;
 210         ++((SEGSUM *) (sp->segsum))->ss_nfinfo;
 211
 212         if (vp->v_uflag & VU_DIROP)
 213                 ((SEGSUM *) (sp->segsum))->ss_flags |= (SS_DIROP | SS_CONT);
 214
 215         fip = sp->fip;
 216         fip->fi_nblocks = 0;
 217         fip->fi_ino = ip->i_number;
 218         LFS_IENTRY(ifp, fs, fip->fi_ino, bp);
 219         fip->fi_version = ifp->if_version;
 220         brelse(bp, 0);
 221
 222         lfs_gather(fs, sp, vp, lfs_match_data);
 223         lfs_gather(fs, sp, vp, lfs_match_indir);
 224         lfs_gather(fs, sp, vp, lfs_match_dindir);
 225         lfs_gather(fs, sp, vp, lfs_match_tindir);
 226
 227         fip = sp->fip;
 228         if (fip->fi_nblocks != 0) {
 229                 sp->fip = (FINFO *) ((caddr_t) fip + FINFOSIZE +
 230                     sizeof(ufs_daddr_t) * (fip->fi_nblocks));
 231                 sp->start_lbp = &sp->fip->fi_blocks[0];
 232         } else {
 233                 sp->sum_bytes_left += FINFOSIZE;
 234                 --((SEGSUM *) (sp->segsum))->ss_nfinfo;
 235         }
 236 }
 237
 238 int
 239 lfs_writeinode(struct lfs * fs, struct segment * sp, struct inode * ip)
 240 {
 241         struct ubuf *bp, *ibp;
 242         struct ufs1_dinode *cdp;
 243         IFILE *ifp;
 244         SEGUSE *sup;
 245         daddr_t daddr;
 246         ino_t ino;
 247         int error, i, ndx, fsb = 0;
 248         int redo_ifile = 0;
 249         struct timespec ts;
 250         int gotblk = 0;
 251
 252         /* Allocate a new inode block if necessary. */
 253         if ((ip->i_number != LFS_IFILE_INUM || sp->idp == NULL) &&
 254             sp->ibp == NULL) {
 255                 /* Allocate a new segment if necessary. */
 256                 if (sp->seg_bytes_left < fs->lfs_ibsize ||
 257                     sp->sum_bytes_left < sizeof(ufs_daddr_t))
 258                         (void) lfs_writeseg(fs, sp);
 259
 260                 /* Get next inode block. */
 261                 daddr = fs->lfs_offset;
 262                 fs->lfs_offset += btofsb(fs, fs->lfs_ibsize);
 263                 sp->ibp = *sp->cbpp++ =
 264                     getblk(fs->lfs_devvp, fsbtodb(fs, daddr),
 265                     fs->lfs_ibsize);
 266                 sp->ibp->b_flags |= B_GATHERED;
 267                 gotblk++;
 268
 269                 /* Zero out inode numbers */
 270                 for (i = 0; i < INOPB(fs); ++i)
 271                         ((struct ufs1_dinode *) sp->ibp->b_data)[i].di_inumber = 0;
 272
 273                 ++sp->start_bpp;
 274                 fs->lfs_avail -= btofsb(fs, fs->lfs_ibsize);
 275                 /* Set remaining space counters. */
 276                 sp->seg_bytes_left -= fs->lfs_ibsize;
 277                 sp->sum_bytes_left -= sizeof(ufs_daddr_t);
 278                 ndx = fs->lfs_sumsize / sizeof(ufs_daddr_t) -
 279                     sp->ninodes / INOPB(fs) - 1;
 280                 ((ufs_daddr_t *) (sp->segsum))[ndx] = daddr;
 281         }
 282         /* Update the inode times and copy the inode onto the inode page. */
 283         ts.tv_nsec = 0;
 284         ts.tv_sec = write_time;
 285         /* XXX kludge --- don't redirty the ifile just to put times on it */
 286         if (ip->i_number != LFS_IFILE_INUM)
 287                 LFS_ITIMES(ip, &ts, &ts, &ts);
 288
 289         /*
 290          * If this is the Ifile, and we've already written the Ifile in this
 291          * partial segment, just overwrite it (it's not on disk yet) and
 292          * continue.
 293          *
 294          * XXX we know that the bp that we get the second time around has
 295          * already been gathered.
 296          */
 297         if (ip->i_number == LFS_IFILE_INUM && sp->idp) {
 298                 *(sp->idp) = *ip->i_din.ffs1_din;
 299                 ip->i_lfs_osize = ip->i_ffs1_size;
 300                 return 0;
 301         }
 302         bp = sp->ibp;
 303         cdp = ((struct ufs1_dinode *) bp->b_data) + (sp->ninodes % INOPB(fs));
 304         *cdp = *ip->i_din.ffs1_din;
 305
 306         /* If all blocks are goig to disk, update the "size on disk" */
 307         ip->i_lfs_osize = ip->i_ffs1_size;
 308
 309         if (ip->i_number == LFS_IFILE_INUM)     /* We know sp->idp == NULL */
 310                 sp->idp = ((struct ufs1_dinode *) bp->b_data) +
 311                     (sp->ninodes % INOPB(fs));
 312         if (gotblk) {
 313                 LFS_LOCK_BUF(bp);
 314                 assert(!(bp->b_flags & B_INVAL));
 315                 brelse(bp, 0);
 316         }
 317         /* Increment inode count in segment summary block. */
 318         ++((SEGSUM *) (sp->segsum))->ss_ninos;
 319
 320         /* If this page is full, set flag to allocate a new page. */
 321         if (++sp->ninodes % INOPB(fs) == 0)
 322                 sp->ibp = NULL;
 323
 324         /*
 325          * If updating the ifile, update the super-block.  Update the disk
 326          * address and access times for this inode in the ifile.
 327          */
 328         ino = ip->i_number;
 329         if (ino == LFS_IFILE_INUM) {
 330                 daddr = fs->lfs_idaddr;
 331                 fs->lfs_idaddr = dbtofsb(fs, bp->b_blkno);
 332                 sbdirty();
 333         } else {
 334                 LFS_IENTRY(ifp, fs, ino, ibp);
 335                 daddr = ifp->if_daddr;
 336                 ifp->if_daddr = dbtofsb(fs, bp->b_blkno) + fsb;
 337                 error = LFS_BWRITE_LOG(ibp);    /* Ifile */
 338         }
 339
 340         /*
 341          * Account the inode: it no longer belongs to its former segment,
 342          * though it will not belong to the new segment until that segment
 343          * is actually written.
 344          */
 345         if (daddr != LFS_UNUSED_DADDR) {
 346                 u_int32_t oldsn = dtosn(fs, daddr);
 347                 LFS_SEGENTRY(sup, fs, oldsn, bp);
 348                 sup->su_nbytes -= DINODE1_SIZE;
 349                 redo_ifile =
 350                     (ino == LFS_IFILE_INUM && !(bp->b_flags & B_GATHERED));
 351                 if (redo_ifile)
 352                         fs->lfs_flags |= LFS_IFDIRTY;
 353                 LFS_WRITESEGENTRY(sup, fs, oldsn, bp);  /* Ifile */
 354         }
 355         return redo_ifile;
 356 }
 357
 358 int
 359 lfs_gatherblock(struct segment * sp, struct ubuf * bp)
 360 {
 361         struct lfs *fs;
 362         int version;
 363         int j, blksinblk;
 364
 365         /*
 366          * If full, finish this segment.  We may be doing I/O, so
 367          * release and reacquire the splbio().
 368          */
 369         fs = sp->fs;
 370         blksinblk = howmany(bp->b_bcount, fs->lfs_bsize);
 371         if (sp->sum_bytes_left < sizeof(ufs_daddr_t) * blksinblk ||
 372             sp->seg_bytes_left < bp->b_bcount) {
 373                 lfs_updatemeta(sp);
 374
 375                 version = sp->fip->fi_version;
 376                 (void) lfs_writeseg(fs, sp);
 377
 378                 sp->fip->fi_version = version;
 379                 sp->fip->fi_ino = VTOI(sp->vp)->i_number;
 380                 /* Add the current file to the segment summary. */
 381                 ++((SEGSUM *) (sp->segsum))->ss_nfinfo;
 382                 sp->sum_bytes_left -= FINFOSIZE;
 383
 384                 return 1;
 385         }
 386         /* Insert into the buffer list, update the FINFO block. */
 387         bp->b_flags |= B_GATHERED;
 388         /* bp->b_flags &= ~B_DONE; */
 389
 390         *sp->cbpp++ = bp;
 391         for (j = 0; j < blksinblk; j++)
 392                 sp->fip->fi_blocks[sp->fip->fi_nblocks++] = bp->b_lblkno + j;
 393
 394         sp->sum_bytes_left -= sizeof(ufs_daddr_t) * blksinblk;
 395         sp->seg_bytes_left -= bp->b_bcount;
 396         return 0;
 397 }
 398
 399 int
 400 lfs_gather(struct lfs * fs, struct segment * sp, struct uvnode * vp, int (*match) (struct lfs *, struct ubuf *))
 401 {
 402         struct ubuf *bp, *nbp;
 403         int count = 0;
 404
 405         sp->vp = vp;
 406 loop:
 407         for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 408                 nbp = LIST_NEXT(bp, b_vnbufs);
 409
 410                 assert(bp->b_flags & B_DELWRI);
 411                 if ((bp->b_flags & (B_BUSY | B_GATHERED)) || !match(fs, bp)) {
 412                         continue;
 413                 }
 414                 if (lfs_gatherblock(sp, bp)) {
 415                         goto loop;
 416                 }
 417                 count++;
 418         }
 419
 420         lfs_updatemeta(sp);
 421         sp->vp = NULL;
 422         return count;
 423 }
 424
 425
 426 /*
 427  * Change the given block's address to ndaddr, finding its previous
 428  * location using ufs_bmaparray().
 429  *
 430  * Account for this change in the segment table.
 431  */
 432 void
 433 lfs_update_single(struct lfs * fs, struct segment * sp, daddr_t lbn,
 434     ufs_daddr_t ndaddr, int size)
 435 {
 436         SEGUSE *sup;
 437         struct ubuf *bp;
 438         struct indir a[NIADDR + 2], *ap;
 439         struct inode *ip;
 440         struct uvnode *vp;
 441         daddr_t daddr, ooff;
 442         int num, error;
 443         int bb, osize, obb;
 444
 445         vp = sp->vp;
 446         ip = VTOI(vp);
 447
 448         error = ufs_bmaparray(fs, vp, lbn, &daddr, a, &num);
 449         if (error)
 450                 errx(1, "lfs_updatemeta: ufs_bmaparray returned %d looking up lbn %" PRId64 "\n", error, lbn);
 451         if (daddr > 0)
 452                 daddr = dbtofsb(fs, daddr);
 453
 454         bb = fragstofsb(fs, numfrags(fs, size));
 455         switch (num) {
 456         case 0:
 457                 ooff = ip->i_ffs1_db[lbn];
 458                 if (ooff == UNWRITTEN)
 459                         ip->i_ffs1_blocks += bb;
 460                 else {
 461                         /* possible fragment truncation or extension */
 462                         obb = btofsb(fs, ip->i_lfs_fragsize[lbn]);
 463                         ip->i_ffs1_blocks += (bb - obb);
 464                 }
 465                 ip->i_ffs1_db[lbn] = ndaddr;
 466                 break;
 467         case 1:
 468                 ooff = ip->i_ffs1_ib[a[0].in_off];
 469                 if (ooff == UNWRITTEN)
 470                         ip->i_ffs1_blocks += bb;
 471                 ip->i_ffs1_ib[a[0].in_off] = ndaddr;
 472                 break;
 473         default:
 474                 ap = &a[num - 1];
 475                 if (bread(vp, ap->in_lbn, fs->lfs_bsize, NULL, 0, &bp))
 476                         errx(1, "lfs_updatemeta: bread bno %" PRId64,
 477                             ap->in_lbn);
 478
 479                 ooff = ((ufs_daddr_t *) bp->b_data)[ap->in_off];
 480                 if (ooff == UNWRITTEN)
 481                         ip->i_ffs1_blocks += bb;
 482                 ((ufs_daddr_t *) bp->b_data)[ap->in_off] = ndaddr;
 483                 (void) VOP_BWRITE(bp);
 484         }
 485
 486         /*
 487          * Update segment usage information, based on old size
 488          * and location.
 489          */
 490         if (daddr > 0) {
 491                 u_int32_t oldsn = dtosn(fs, daddr);
 492                 if (lbn >= 0 && lbn < NDADDR)
 493                         osize = ip->i_lfs_fragsize[lbn];
 494                 else
 495                         osize = fs->lfs_bsize;
 496                 LFS_SEGENTRY(sup, fs, oldsn, bp);
 497                 sup->su_nbytes -= osize;
 498                 if (!(bp->b_flags & B_GATHERED))
 499                         fs->lfs_flags |= LFS_IFDIRTY;
 500                 LFS_WRITESEGENTRY(sup, fs, oldsn, bp);
 501         }
 502         /*
 503          * Now that this block has a new address, and its old
 504          * segment no longer owns it, we can forget about its
 505          * old size.
 506          */
 507         if (lbn >= 0 && lbn < NDADDR)
 508                 ip->i_lfs_fragsize[lbn] = size;
 509 }
 510
 511 /*
 512  * Update the metadata that points to the blocks listed in the FINFO
 513  * array.
 514  */
 515 void
 516 lfs_updatemeta(struct segment * sp)
 517 {
 518         struct ubuf *sbp;
 519         struct lfs *fs;
 520         struct uvnode *vp;
 521         daddr_t lbn;
 522         int i, nblocks, num;
 523         int bb;
 524         int bytesleft, size;
 525
 526         vp = sp->vp;
 527         nblocks = &sp->fip->fi_blocks[sp->fip->fi_nblocks] - sp->start_lbp;
 528
 529         if (vp == NULL || nblocks == 0)
 530                 return;
 531
 532         /*
 533          * This count may be high due to oversize blocks from lfs_gop_write.
 534          * Correct for this. (XXX we should be able to keep track of these.)
 535          */
 536         fs = sp->fs;
 537         for (i = 0; i < nblocks; i++) {
 538                 if (sp->start_bpp[i] == NULL) {
 539                         printf("nblocks = %d, not %d\n", i, nblocks);
 540                         nblocks = i;
 541                         break;
 542                 }
 543                 num = howmany(sp->start_bpp[i]->b_bcount, fs->lfs_bsize);
 544                 nblocks -= num - 1;
 545         }
 546
 547         /*
 548          * Sort the blocks.
 549          */
 550         lfs_shellsort(sp->start_bpp, sp->start_lbp, nblocks, fs->lfs_bsize);
 551
 552         /*
 553          * Record the length of the last block in case it's a fragment.
 554          * If there are indirect blocks present, they sort last.  An
 555          * indirect block will be lfs_bsize and its presence indicates
 556          * that you cannot have fragments.
 557          */
 558         sp->fip->fi_lastlength = ((sp->start_bpp[nblocks - 1]->b_bcount - 1) &
 559             fs->lfs_bmask) + 1;
 560
 561         /*
 562          * Assign disk addresses, and update references to the logical
 563          * block and the segment usage information.
 564          */
 565         for (i = nblocks; i--; ++sp->start_bpp) {
 566                 sbp = *sp->start_bpp;
 567                 lbn = *sp->start_lbp;
 568
 569                 sbp->b_blkno = fsbtodb(fs, fs->lfs_offset);
 570
 571                 /*
 572                  * If we write a frag in the wrong place, the cleaner won't
 573                  * be able to correctly identify its size later, and the
 574                  * segment will be uncleanable.  (Even worse, it will assume
 575                  * that the indirect block that actually ends the list
 576                  * is of a smaller size!)
 577                  */
 578                 if ((sbp->b_bcount & fs->lfs_bmask) && i != 0)
 579                         errx(1, "lfs_updatemeta: fragment is not last block");
 580
 581                 /*
 582                  * For each subblock in this possibly oversized block,
 583                  * update its address on disk.
 584                  */
 585                 for (bytesleft = sbp->b_bcount; bytesleft > 0;
 586                     bytesleft -= fs->lfs_bsize) {
 587                         size = MIN(bytesleft, fs->lfs_bsize);
 588                         bb = fragstofsb(fs, numfrags(fs, size));
 589                         lbn = *sp->start_lbp++;
 590                         lfs_update_single(fs, sp, lbn, fs->lfs_offset, size);
 591                         fs->lfs_offset += bb;
 592                 }
 593
 594         }
 595 }
 596
 597 /*
 598  * Start a new segment.
 599  */
 600 int
 601 lfs_initseg(struct lfs * fs)
 602 {
 603         struct segment *sp;
 604         SEGUSE *sup;
 605         SEGSUM *ssp;
 606         struct ubuf *bp, *sbp;
 607         int repeat;
 608
 609         sp = fs->lfs_sp;
 610
 611         repeat = 0;
 612
 613         /* Advance to the next segment. */
 614         if (!LFS_PARTIAL_FITS(fs)) {
 615                 /* lfs_avail eats the remaining space */
 616                 fs->lfs_avail -= fs->lfs_fsbpseg - (fs->lfs_offset -
 617                     fs->lfs_curseg);
 618                 lfs_newseg(fs);
 619                 repeat = 1;
 620                 fs->lfs_offset = fs->lfs_curseg;
 621
 622                 sp->seg_number = dtosn(fs, fs->lfs_curseg);
 623                 sp->seg_bytes_left = fsbtob(fs, fs->lfs_fsbpseg);
 624
 625                 /*
 626                  * If the segment contains a superblock, update the offset
 627                  * and summary address to skip over it.
 628                  */
 629                 LFS_SEGENTRY(sup, fs, sp->seg_number, bp);
 630                 if (sup->su_flags & SEGUSE_SUPERBLOCK) {
 631                         fs->lfs_offset += btofsb(fs, LFS_SBPAD);
 632                         sp->seg_bytes_left -= LFS_SBPAD;
 633                 }
 634                 brelse(bp, 0);
 635                 /* Segment zero could also contain the labelpad */
 636                 if (fs->lfs_version > 1 && sp->seg_number == 0 &&
 637                     fs->lfs_start < btofsb(fs, LFS_LABELPAD)) {
 638                         fs->lfs_offset += btofsb(fs, LFS_LABELPAD) - fs->lfs_start;
 639                         sp->seg_bytes_left -= LFS_LABELPAD - fsbtob(fs, fs->lfs_start);
 640                 }
 641         } else {
 642                 sp->seg_number = dtosn(fs, fs->lfs_curseg);
 643                 sp->seg_bytes_left = fsbtob(fs, fs->lfs_fsbpseg -
 644                     (fs->lfs_offset - fs->lfs_curseg));
 645         }
 646         fs->lfs_lastpseg = fs->lfs_offset;
 647
 648         sp->fs = fs;
 649         sp->ibp = NULL;
 650         sp->idp = NULL;
 651         sp->ninodes = 0;
 652         sp->ndupino = 0;
 653
 654         /* Get a new buffer for SEGSUM and enter it into the buffer list. */
 655         sp->cbpp = sp->bpp;
 656         sbp = *sp->cbpp = getblk(fs->lfs_devvp,
 657             fsbtodb(fs, fs->lfs_offset), fs->lfs_sumsize);
 658         sp->segsum = sbp->b_data;
 659         memset(sp->segsum, 0, fs->lfs_sumsize);
 660         sp->start_bpp = ++sp->cbpp;
 661         fs->lfs_offset += btofsb(fs, fs->lfs_sumsize);
 662
 663         /* Set point to SEGSUM, initialize it. */
 664         ssp = sp->segsum;
 665         ssp->ss_next = fs->lfs_nextseg;
 666         ssp->ss_nfinfo = ssp->ss_ninos = 0;
 667         ssp->ss_magic = SS_MAGIC;
 668
 669         /* Set pointer to first FINFO, initialize it. */
 670         sp->fip = (struct finfo *) ((caddr_t) sp->segsum + SEGSUM_SIZE(fs));
 671         sp->fip->fi_nblocks = 0;
 672         sp->start_lbp = &sp->fip->fi_blocks[0];
 673         sp->fip->fi_lastlength = 0;
 674
 675         sp->seg_bytes_left -= fs->lfs_sumsize;
 676         sp->sum_bytes_left = fs->lfs_sumsize - SEGSUM_SIZE(fs);
 677
 678         LFS_LOCK_BUF(sbp);
 679         brelse(sbp, 0);
 680         return repeat;
 681 }
 682
 683 /*
 684  * Return the next segment to write.
 685  */
 686 void
 687 lfs_newseg(struct lfs * fs)
 688 {
 689         CLEANERINFO *cip;
 690         SEGUSE *sup;
 691         struct ubuf *bp;
 692         int curseg, isdirty, sn;
 693
 694         LFS_SEGENTRY(sup, fs, dtosn(fs, fs->lfs_nextseg), bp);
 695         sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE;
 696         sup->su_nbytes = 0;
 697         sup->su_nsums = 0;
 698         sup->su_ninos = 0;
 699         LFS_WRITESEGENTRY(sup, fs, dtosn(fs, fs->lfs_nextseg), bp);
 700
 701         LFS_CLEANERINFO(cip, fs, bp);
 702         --cip->clean;
 703         ++cip->dirty;
 704         fs->lfs_nclean = cip->clean;
 705         LFS_SYNC_CLEANERINFO(cip, fs, bp, 1);
 706
 707         fs->lfs_lastseg = fs->lfs_curseg;
 708         fs->lfs_curseg = fs->lfs_nextseg;
 709         for (sn = curseg = dtosn(fs, fs->lfs_curseg) + fs->lfs_interleave;;) {
 710                 sn = (sn + 1) % fs->lfs_nseg;
 711                 if (sn == curseg)
 712                         errx(1, "lfs_nextseg: no clean segments");
 713                 LFS_SEGENTRY(sup, fs, sn, bp);
 714                 isdirty = sup->su_flags & SEGUSE_DIRTY;
 715                 brelse(bp, 0);
 716
 717                 if (!isdirty)
 718                         break;
 719         }
 720
 721         ++fs->lfs_nactive;
 722         fs->lfs_nextseg = sntod(fs, sn);
 723 }
 724
 725
 726 int
 727 lfs_writeseg(struct lfs * fs, struct segment * sp)
 728 {
 729         struct ubuf **bpp, *bp;
 730         SEGUSE *sup;
 731         SEGSUM *ssp;
 732         char *datap, *dp;
 733         int i;
 734         int do_again, nblocks, byteoffset;
 735         size_t el_size;
 736         u_short ninos;
 737         struct uvnode *devvp;
 738
 739         /*
 740          * If there are no buffers other than the segment summary to write
 741          * and it is not a checkpoint, don't do anything.  On a checkpoint,
 742          * even if there aren't any buffers, you need to write the superblock.
 743          */
 744         nblocks = sp->cbpp - sp->bpp;
 745 #if 0
 746         printf("write %d blocks at 0x%x\n",
 747                 nblocks, (int)dbtofsb(fs, (*sp->bpp)->b_blkno));
 748 #endif
 749         if (nblocks == 1)
 750                 return 0;
 751
 752         devvp = fs->lfs_devvp;
 753
 754         /* Update the segment usage information. */
 755         LFS_SEGENTRY(sup, fs, sp->seg_number, bp);
 756         sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE;
 757
 758         /* Loop through all blocks, except the segment summary. */
 759         for (bpp = sp->bpp; ++bpp < sp->cbpp;) {
 760                 if ((*bpp)->b_vp != devvp) {
 761                         sup->su_nbytes += (*bpp)->b_bcount;
 762                 }
 763                 assert(dtosn(fs, dbtofsb(fs, (*bpp)->b_blkno)) == sp->seg_number);
 764         }
 765
 766         ssp = (SEGSUM *) sp->segsum;
 767         ssp->ss_flags |= SS_RFW;
 768
 769         ninos = (ssp->ss_ninos + INOPB(fs) - 1) / INOPB(fs);
 770         sup->su_nbytes += ssp->ss_ninos * DINODE1_SIZE;
 771
 772         if (fs->lfs_version == 1)
 773                 sup->su_olastmod = write_time;
 774         else
 775                 sup->su_lastmod = write_time;
 776         sup->su_ninos += ninos;
 777         ++sup->su_nsums;
 778         fs->lfs_dmeta += (btofsb(fs, fs->lfs_sumsize) + btofsb(fs, ninos *
 779                 fs->lfs_ibsize));
 780         fs->lfs_avail -= btofsb(fs, fs->lfs_sumsize);
 781
 782         do_again = !(bp->b_flags & B_GATHERED);
 783         LFS_WRITESEGENTRY(sup, fs, sp->seg_number, bp); /* Ifile */
 784
 785         /*
 786          * Compute checksum across data and then across summary; the first
 787          * block (the summary block) is skipped.  Set the create time here
 788          * so that it's guaranteed to be later than the inode mod times.
 789          */
 790         if (fs->lfs_version == 1)
 791                 el_size = sizeof(u_long);
 792         else
 793                 el_size = sizeof(u_int32_t);
 794         datap = dp = emalloc(nblocks * el_size);
 795         for (bpp = sp->bpp, i = nblocks - 1; i--;) {
 796                 ++bpp;
 797                 /* Loop through gop_write cluster blocks */
 798                 for (byteoffset = 0; byteoffset < (*bpp)->b_bcount;
 799                     byteoffset += fs->lfs_bsize) {
 800                         memcpy(dp, (*bpp)->b_data + byteoffset, el_size);
 801                         dp += el_size;
 802                 }
 803                 bremfree(*bpp);
 804                 (*bpp)->b_flags |= B_BUSY;
 805         }
 806         if (fs->lfs_version == 1)
 807                 ssp->ss_ocreate = write_time;
 808         else {
 809                 ssp->ss_create = write_time;
 810                 ssp->ss_serial = ++fs->lfs_serial;
 811                 ssp->ss_ident = fs->lfs_ident;
 812         }
 813         /* Set the summary block busy too */
 814         bremfree(*(sp->bpp));
 815         (*(sp->bpp))->b_flags |= B_BUSY;
 816
 817         ssp->ss_datasum = cksum(datap, (nblocks - 1) * el_size);
 818         ssp->ss_sumsum =
 819             cksum(&ssp->ss_datasum, fs->lfs_sumsize - sizeof(ssp->ss_sumsum));
 820         free(datap);
 821         datap = dp = NULL;
 822         fs->lfs_bfree -= (btofsb(fs, ninos * fs->lfs_ibsize) +
 823             btofsb(fs, fs->lfs_sumsize));
 824
 825         if (devvp == NULL)
 826                 errx(1, "devvp is NULL");
 827         for (bpp = sp->bpp, i = nblocks; i; bpp++, i--) {
 828                 bp = *bpp;
 829 #if 0
 830                 printf("i = %d, bp = %p, flags %lx, bn = %" PRIx64 "\n",
 831                        nblocks - i, bp, bp->b_flags, bp->b_blkno);
 832                 printf("  vp = %p\n", bp->b_vp);
 833                 if (bp->b_vp != fs->lfs_devvp)
 834                         printf("  ino = %d lbn = %" PRId64 "\n",
 835                                VTOI(bp->b_vp)->i_number, bp->b_lblkno);
 836 #endif
 837                 if (bp->b_vp == fs->lfs_devvp)
 838                         written_dev += bp->b_bcount;
 839                 else {
 840                         if (bp->b_lblkno >= 0)
 841                                 written_data += bp->b_bcount;
 842                         else
 843                                 written_indir += bp->b_bcount;
 844                 }
 845                 bp->b_flags &= ~(B_DELWRI | B_READ | B_GATHERED | B_ERROR |
 846                                  B_LOCKED);
 847                 bwrite(bp);
 848                 written_bytes += bp->b_bcount;
 849         }
 850         written_inodes += ninos;
 851
 852         return (lfs_initseg(fs) || do_again);
 853 }
 854
 855 /*
 856  * Our own copy of shellsort.  XXX use qsort or heapsort.
 857  */
 858 void
 859 lfs_shellsort(struct ubuf ** bp_array, ufs_daddr_t * lb_array, int nmemb, int size)
 860 {
 861         static int __rsshell_increments[] = {4, 1, 0};
 862         int incr, *incrp, t1, t2;
 863         struct ubuf *bp_temp;
 864
 865         for (incrp = __rsshell_increments; (incr = *incrp++) != 0;)
 866                 for (t1 = incr; t1 < nmemb; ++t1)
 867                         for (t2 = t1 - incr; t2 >= 0;)
 868                                 if ((u_int32_t) bp_array[t2]->b_lblkno >
 869                                     (u_int32_t) bp_array[t2 + incr]->b_lblkno) {
 870                                         bp_temp = bp_array[t2];
 871                                         bp_array[t2] = bp_array[t2 + incr];
 872                                         bp_array[t2 + incr] = bp_temp;
 873                                         t2 -= incr;
 874                                 } else
 875                                         break;
 876
 877         /* Reform the list of logical blocks */
 878         incr = 0;
 879         for (t1 = 0; t1 < nmemb; t1++) {
 880                 for (t2 = 0; t2 * size < bp_array[t1]->b_bcount; t2++) {
 881                         lb_array[incr++] = bp_array[t1]->b_lblkno + t2;
 882                 }
 883         }
 884 }
 885
 886
 887 /*
 888  * lfs_seglock --
 889  *      Single thread the segment writer.
 890  */
 891 int
 892 lfs_seglock(struct lfs * fs, unsigned long flags)
 893 {
 894         struct segment *sp;
 895
 896         if (fs->lfs_seglock) {
 897                 ++fs->lfs_seglock;
 898                 fs->lfs_sp->seg_flags |= flags;
 899                 return 0;
 900         }
 901         fs->lfs_seglock = 1;
 902
 903         sp = fs->lfs_sp = emalloc(sizeof(*sp));
 904         sp->bpp = emalloc(fs->lfs_ssize * sizeof(struct ubuf *));
 905         if (!sp->bpp)
 906                 errx(!preen, "Could not allocate %zu bytes: %s",
 907                         (size_t)(fs->lfs_ssize * sizeof(struct ubuf *)),
 908                         strerror(errno));
 909         sp->seg_flags = flags;
 910         sp->vp = NULL;
 911         sp->seg_iocount = 0;
 912         (void) lfs_initseg(fs);
 913
 914         return 0;
 915 }
 916
 917 /*
 918  * lfs_segunlock --
 919  *      Single thread the segment writer.
 920  */
 921 void
 922 lfs_segunlock(struct lfs * fs)
 923 {
 924         struct segment *sp;
 925         struct ubuf *bp;
 926
 927         sp = fs->lfs_sp;
 928
 929         if (fs->lfs_seglock == 1) {
 930                 if (sp->bpp != sp->cbpp) {
 931                         /* Free allocated segment summary */
 932                         fs->lfs_offset -= btofsb(fs, fs->lfs_sumsize);
 933                         bp = *sp->bpp;
 934                         bremfree(bp);
 935                         bp->b_flags |= B_DONE | B_INVAL;
 936                         bp->b_flags &= ~B_DELWRI;
 937                         reassignbuf(bp, bp->b_vp);
 938                         bp->b_flags |= B_BUSY; /* XXX */
 939                         brelse(bp, 0);
 940                 } else
 941                         printf("unlock to 0 with no summary");
 942
 943                 free(sp->bpp);
 944                 sp->bpp = NULL;
 945                 free(sp);
 946                 fs->lfs_sp = NULL;
 947
 948                 fs->lfs_nactive = 0;
 949
 950                 /* Since we *know* everything's on disk, write both sbs */
 951                 lfs_writesuper(fs, fs->lfs_sboffs[0]);
 952                 lfs_writesuper(fs, fs->lfs_sboffs[1]);
 953
 954                 --fs->lfs_seglock;
 955                 fs->lfs_lockpid = 0;
 956         } else if (fs->lfs_seglock == 0) {
 957                 errx(1, "Seglock not held");
 958         } else {
 959                 --fs->lfs_seglock;
 960         }
 961 }
 962
 963 int
 964 lfs_writevnodes(struct lfs *fs, struct segment *sp, int op)
 965 {
 966         struct inode *ip;
 967         struct uvnode *vp;
 968         int inodes_written = 0;
 969
 970         LIST_FOREACH(vp, &vnodelist, v_mntvnodes) {
 971                 if (vp->v_bmap_op != lfs_vop_bmap)
 972                         continue;
 973
 974                 ip = VTOI(vp);
 975
 976                 if ((op == VN_DIROP && !(vp->v_uflag & VU_DIROP)) ||
 977                     (op != VN_DIROP && (vp->v_uflag & VU_DIROP))) {
 978                         continue;
 979                 }
 980                 /*
 981                  * Write the inode/file if dirty and it's not the IFILE.
 982                  */
 983                 if (ip->i_flag & IN_ALLMOD || !LIST_EMPTY(&vp->v_dirtyblkhd)) {
 984                         if (ip->i_number != LFS_IFILE_INUM)
 985                                 lfs_writefile(fs, sp, vp);
 986                         (void) lfs_writeinode(fs, sp, ip);
 987                         inodes_written++;
 988                 }
 989         }
 990         return inodes_written;
 991 }
 992
 993 void
 994 lfs_writesuper(struct lfs *fs, ufs_daddr_t daddr)
 995 {
 996         struct ubuf *bp;
 997
 998         /* Set timestamp of this version of the superblock */
 999         if (fs->lfs_version == 1)
1000                 fs->lfs_otstamp = write_time;
1001         fs->lfs_tstamp = write_time;
1002
1003         /* Checksum the superblock and copy it into a buffer. */
1004         fs->lfs_cksum = lfs_sb_cksum(&(fs->lfs_dlfs));
1005         assert(daddr > 0);
1006         bp = getblk(fs->lfs_devvp, fsbtodb(fs, daddr), LFS_SBPAD);
1007         memset(bp->b_data + sizeof(struct dlfs), 0,
1008             LFS_SBPAD - sizeof(struct dlfs));
1009         *(struct dlfs *) bp->b_data = fs->lfs_dlfs;
1010
1011         bwrite(bp);
1012 }