usr/src/cmd/fs.d/ufs/fsck/inode.c

   1 /*
   2  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
   3  */
   4
   5 /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T     */
   6 /*        All Rights Reserved   */
   7
   8 /*
   9  * Copyright (c) 1980, 1986, 1990 The Regents of the University of California.
  10  * All rights reserved.
  11  *
  12  * Redistribution and use in source and binary forms are permitted
  13  * provided that: (1) source distributions retain this entire copyright
  14  * notice and comment, and (2) distributions including binaries display
  15  * the following acknowledgement:  ``This product includes software
  16  * developed by the University of California, Berkeley and its contributors''
  17  * in the documentation or other materials provided with the distribution
  18  * and in all advertising materials mentioning features or use of this
  19  * software. Neither the name of the University nor the names of its
  20  * contributors may be used to endorse or promote products derived
  21  * from this software without specific prior written permission.
  22  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
  23  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
  24  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  25  */
  26
  27
  28 #include <stdio.h>
  29 #include <string.h>
  30 #include <stdlib.h>
  31 #include <unistd.h>
  32 #include <time.h>
  33 #include <limits.h>
  34 #include <sys/param.h>
  35 #include <sys/types.h>
  36 #include <sys/sysmacros.h>
  37 #include <sys/mntent.h>
  38 #include <sys/vnode.h>
  39 #include <sys/fs/ufs_inode.h>
  40 #include <sys/fs/ufs_fs.h>
  41 #define _KERNEL
  42 #include <sys/fs/ufs_fsdir.h>
  43 #undef _KERNEL
  44 #include <pwd.h>
  45 #include "fsck.h"
  46
  47 static int get_indir_offsets(int, daddr_t, int *, int *);
  48 static int clearanentry(struct inodesc *);
  49 static void pdinode(struct dinode *);
  50 static void inoflush(void);
  51 static void mark_delayed_inodes(fsck_ino_t, daddr32_t);
  52 static int iblock(struct inodesc *, int, u_offset_t, enum cki_action);
  53 static struct inoinfo *search_cache(struct inoinfo *, fsck_ino_t);
  54 static int ckinode_common(struct dinode *, struct inodesc *, enum cki_action);
  55 static int lookup_dotdot_ino(fsck_ino_t);
  56
  57 /*
  58  * ckinode() essentially traverses the blocklist of the provided
  59  * inode.  For each block either the caller-supplied callback (id_func
  60  * in the provided struct inodesc) or dirscan() is invoked.  Which is
  61  * chosen is controlled by what type of traversal was requested
  62  * (id_type) - if it was for an ADDR or ACL, use the callback,
  63  * otherwise it is assumed to be DATA (i.e., a directory) whose
  64  * contents need to be scanned.
  65  *
  66  * Note that a directory inode can get passed in with a type of ADDR;
  67  * the type field is orthogonal to the IFMT value.  This is so that
  68  * the file aspects (no duplicate blocks, etc) of a directory can be
  69  * verified just like is done for any other file, or the actual
  70  * contents can be scanned so that connectivity and such can be
  71  * investigated.
  72  *
  73  * The traversal is controlled by flags in the return value of
  74  * dirscan() or the callback.  Five flags are defined, STOP, SKIP,
  75  * KEEPON, ALTERED, and FOUND.  Their semantics are:
  76  *
  77  *     STOP -    no further processing of this inode is desired/possible/
  78  *               feasible/etc.  This can mean that whatever the scan
  79  *               was searching for was found, or a serious
  80  *               inconsistency was encountered, or anything else
  81  *               appropriate.
  82  *
  83  *     SKIP -    something that made it impossible to continue was
  84  *               encountered, and the caller should go on to the next
  85  *               inode.  This is more for i/o failures than for
  86  *               logical inconsistencies.  Nothing actually looks for
  87  *               this.
  88  *
  89  *     KEEPON -  no more blocks of this inode need to be scanned, but
  90  *               nothing's wrong, so keep on going with the next
  91  *               inode.  It is similar to STOP, except that
  92  *               ckinode()'s caller will typically advance to the next
  93  *               inode for KEEPON, whereas it ceases scanning through
  94  *               the inodes completely for STOP.
  95  *
  96  *     ALTERED - a change was made to the inode.  If the caller sees
  97  *               this set, it should make sure to flush out the
  98  *               changes.  Note that any data blocks read in by the
  99  *               function need to be marked dirty by it directly;
 100  *               flushing of those will happen automatically later.
 101  *
 102  *     FOUND -   whatever was being searched for was located.
 103  *               Typically combined with STOP to avoid wasting time
 104  *               doing additional looking.
 105  *
 106  * During a traversal, some state needs to be carried around.  At the
 107  * least, the callback functions need to know what inode they're
 108  * working on, which logical block, and whether or not fixing problems
 109  * when they're encountered is desired.  Rather than try to guess what
 110  * else might be needed (and thus end up passing way more arguments
 111  * than is reasonable), all the possibilities have been bundled in
 112  * struct inodesc.  About half of the fields are specific to directory
 113  * traversals, and the rest are pretty much generic to any traversal.
 114  *
 115  * The general fields are:
 116  *
 117  *     id_fix        What to do when an error is found.  Generally, this
 118  *                   is set to DONTKNOW before a traversal.  If a
 119  *                   problem is encountered, it is changed to either FIX
 120  *                   or NOFIX by the dofix() query function.  If id_fix
 121  *                   has already been set to FIX when dofix() is called, then
 122  *                   it includes the ALTERED flag (see above) in its return
 123  *                   value; the net effect is that the inode's buffer
 124  *                   will get marked dirty and written to disk at some
 125  *                   point.  If id_fix is DONTKNOW, then dofix() will
 126  *                   query the user.  If it is NOFIX, then dofix()
 127  *                   essentially does nothing.  A few routines set NOFIX
 128  *                   as the initial value, as they are performing a best-
 129  *                   effort informational task, rather than an actual
 130  *                   repair operation.
 131  *
 132  *     id_func       This is the function that will be called for every
 133  *                   logical block in the file (assuming id_type is not
 134  *                   DATA).  The logical block may represent a hole, so
 135  *                   the callback needs to be prepared to handle that
 136  *                   case.  Its return value is a combination of the flags
 137  *                   described above (SKIP, ALTERED, etc).
 138  *
 139  *     id_number     The inode number whose block list or data is being
 140  *                   scanned.
 141  *
 142  *     id_parent     When id_type is DATA, this is the inode number for
 143  *                   the parent of id_number.  Otherwise, it is
 144  *                   available for use as an extra parameter or return
 145  *                   value between the callback and ckinode()'s caller.
 146  *                   Which, if either, of those is left completely up to
 147  *                   the two routines involved, so nothing can generally
 148  *                   be assumed about the id_parent value for non-DATA
 149  *                   traversals.
 150  *
 151  *     id_lbn        This is the current logical block (not fragment)
 152  *                   number being visited by the traversal.
 153  *
 154  *     id_blkno      This is the physical block corresponding to id_lbn.
 155  *
 156  *     id_numfrags   This defines how large a block is being processed in
 157  *                   this particular invocation of the callback.
 158  *                   Usually, it will be the same as sblock.fs_frag.
 159  *                   However, if a direct block is being processed and
 160  *                   it is less than a full filesystem block,
 161  *                   id_numfrags will indicate just how many fragments
 162  *                   (starting from id_lbn) are actually part of the
 163  *                   file.
 164  *
 165  *     id_truncto    The pass 4 callback is used in several places to
 166  *                   free the blocks of a file (the `FILE HAS PROBLEM
 167  *                   FOO; CLEAR?' scenario).  This has been generalized
 168  *                   to allow truncating a file to a particular length
 169  *                   rather than always completely discarding it.  If
 170  *                   id_truncto is -1, then the entire file is released,
 171  *                   otherwise it is logical block number to truncate
 172  *                   to.  This generalized interface was motivated by a
 173  *                   desire to be able to discard everything after a
 174  *                   hole in a directory, rather than the entire
 175  *                   directory.
 176  *
 177  *     id_type       Selects the type of traversal.  DATA for dirscan(),
 178  *                   ADDR or ACL for using the provided callback.
 179  *
 180  * There are several more fields used just for dirscan() traversals:
 181  *
 182  *     id_filesize   The number of bytes in the overall directory left to
 183  *                   process.
 184  *
 185  *     id_loc        Byte position within the directory block.  Should always
 186  *                   point to the start of a directory entry.
 187  *
 188  *     id_entryno    Which logical directory entry is being processed (0
 189  *                   is `.', 1 is `..', 2 and on are normal entries).
 190  *                   This field is primarily used to enable special
 191  *                   checks when looking at the first two entries.
 192  *
 193  *                   The exception (there's always an exception in fsck)
 194  *                   is that in pass 1, it tracks how many fragments are
 195  *                   being used by a particular inode.
 196  *
 197  *     id_firsthole  The first logical block number that was found to
 198  *                   be zero.  As directories are not supposed to have
 199  *                   holes, this marks where a directory should be
 200  *                   truncated down to.  A value of -1 indicates that
 201  *                   no holes were found.
 202  *
 203  *     id_dirp       A pointer to the in-memory copy of the current
 204  *                   directory entry (as identified by id_loc).
 205  *
 206  *     id_name       This is a directory entry name to either create
 207  *                   (callback is mkentry) or locate (callback is
 208  *                   chgino, findino, or findname).
 209  */
 210 int
 211 ckinode(struct dinode *dp, struct inodesc *idesc, enum cki_action action)
 212 {
 213         struct inodesc cleardesc;
 214         mode_t  mode;
 215
 216         if (idesc->id_filesize == 0)
 217                 idesc->id_filesize = (offset_t)dp->di_size;
 218
 219         /*
 220          * Our caller should be filtering out completely-free inodes
 221          * (mode == zero), so we'll work on the assumption that what
 222          * we're given has some basic validity.
 223          *
 224          * The kernel is inconsistent about MAXPATHLEN including the
 225          * trailing \0, so allow the more-generous length for symlinks.
 226          */
 227         mode = dp->di_mode & IFMT;
 228         if (mode == IFBLK || mode == IFCHR)
 229                 return (KEEPON);
 230         if (mode == IFLNK && dp->di_size > MAXPATHLEN) {
 231                 pwarn("I=%d  Symlink longer than supported maximum\n",
 232                     idesc->id_number);
 233                 init_inodesc(&cleardesc);
 234                 cleardesc.id_type = ADDR;
 235                 cleardesc.id_number = idesc->id_number;
 236                 cleardesc.id_fix = DONTKNOW;
 237                 clri(&cleardesc, "BAD", CLRI_VERBOSE, CLRI_NOP_CORRUPT);
 238                 return (STOP);
 239         }
 240         return (ckinode_common(dp, idesc, action));
 241 }
 242
 243 /*
 244  * This was split out from ckinode() to allow it to be used
 245  * without having to pass in kludge flags to suppress the
 246  * wrong-for-deletion initialization and irrelevant checks.
 247  * This feature is no longer needed, but is being kept in case
 248  * the need comes back.
 249  */
 250 static int
 251 ckinode_common(struct dinode *dp, struct inodesc *idesc,
 252         enum cki_action action)
 253 {
 254         offset_t offset;
 255         struct dinode dino;
 256         daddr_t ndb;
 257         int indir_data_blks, last_indir_blk;
 258         int ret, i, frags;
 259
 260         (void) memmove(&dino, dp, sizeof (struct dinode));
 261         ndb = howmany(dino.di_size, (u_offset_t)sblock.fs_bsize);
 262
 263         for (i = 0; i < NDADDR; i++) {
 264                 idesc->id_lbn++;
 265                 offset = blkoff(&sblock, dino.di_size);
 266                 if ((--ndb == 0) && (offset != 0)) {
 267                         idesc->id_numfrags =
 268                             numfrags(&sblock, fragroundup(&sblock, offset));
 269                 } else {
 270                         idesc->id_numfrags = sblock.fs_frag;
 271                 }
 272                 if (dino.di_db[i] == 0) {
 273                         if ((ndb > 0) && (idesc->id_firsthole < 0)) {
 274                                 idesc->id_firsthole = i;
 275                         }
 276                         continue;
 277                 }
 278                 idesc->id_blkno = dino.di_db[i];
 279                 if (idesc->id_type == ADDR || idesc->id_type == ACL)
 280                         ret = (*idesc->id_func)(idesc);
 281                 else
 282                         ret = dirscan(idesc);
 283
 284                 /*
 285                  * Need to clear the entry, now that we're done with
 286                  * it.  We depend on freeblk() ignoring a request to
 287                  * free already-free fragments to handle the problem of
 288                  * a partial block.
 289                  */
 290                 if ((action == CKI_TRUNCATE) &&
 291                     (idesc->id_truncto >= 0) &&
 292                     (idesc->id_lbn >= idesc->id_truncto)) {
 293                         dp = ginode(idesc->id_number);
 294                         /*
 295                          * The (int) cast is safe, in that if di_size won't
 296                          * fit, it'll be a multiple of any legal fs_frag,
 297                          * thus giving a zero result.  That value, in turn
 298                          * means we're doing an entire block.
 299                          */
 300                         frags = howmany((int)dp->di_size, sblock.fs_fsize) %
 301                             sblock.fs_frag;
 302                         if (frags == 0)
 303                                 frags = sblock.fs_frag;
 304                         freeblk(idesc->id_number, dp->di_db[i],
 305                             frags);
 306                         dp = ginode(idesc->id_number);
 307                         dp->di_db[i] = 0;
 308                         inodirty();
 309                         ret |= ALTERED;
 310                 }
 311
 312                 if (ret & STOP)
 313                         return (ret);
 314         }
 315
 316 #ifdef lint
 317         /*
 318          * Cure a lint complaint of ``possible use before set''.
 319          * Apparently it can't quite figure out the switch statement.
 320          */
 321         indir_data_blks = 0;
 322 #endif
 323         /*
 324          * indir_data_blks contains the number of data blocks in all
 325          * the previous levels for this iteration.  E.g., for the
 326          * single indirect case (i = 0, di_ib[i] != 0), NDADDR's worth
 327          * of blocks have already been covered by the direct blocks
 328          * (di_db[]).  At the triple indirect level (i = NIADDR - 1),
 329          * it is all of the number of data blocks that were covered
 330          * by the second indirect, single indirect, and direct block
 331          * levels.
 332          */
 333         idesc->id_numfrags = sblock.fs_frag;
 334         ndb = howmany(dino.di_size, (u_offset_t)sblock.fs_bsize);
 335         for (i = 0; i < NIADDR; i++) {
 336                 (void) get_indir_offsets(i, ndb, &indir_data_blks,
 337                     &last_indir_blk);
 338                 if (dino.di_ib[i] != 0) {
 339                         /*
 340                          * We'll only clear di_ib[i] if the first entry (and
 341                          * therefore all of them) is to be cleared, since we
 342                          * only go through this code on the first entry of
 343                          * each level of indirection.  The +1 is to account
 344                          * for the fact that we don't modify id_lbn until
 345                          * we actually start processing on a data block.
 346                          */
 347                         idesc->id_blkno = dino.di_ib[i];
 348                         ret = iblock(idesc, i + 1,
 349                             (u_offset_t)howmany(dino.di_size,
 350                             (u_offset_t)sblock.fs_bsize) - indir_data_blks,
 351                             action);
 352                         if ((action == CKI_TRUNCATE) &&
 353                             (idesc->id_truncto <= indir_data_blks) &&
 354                             ((idesc->id_lbn + 1) >= indir_data_blks) &&
 355                             ((idesc->id_lbn + 1) <= last_indir_blk)) {
 356                                 dp = ginode(idesc->id_number);
 357                                 if (dp->di_ib[i] != 0) {
 358                                         freeblk(idesc->id_number, dp->di_ib[i],
 359                                             sblock.fs_frag);
 360                                 }
 361                         }
 362                         if (ret & STOP)
 363                                 return (ret);
 364                 } else {
 365                         /*
 366                          * Need to know which of the file's logical blocks
 367                          * reside in the missing indirect block.  However, the
 368                          * precise location is only needed for truncating
 369                          * directories, and level-of-indirection precision is
 370                          * sufficient for that.
 371                          */
 372                         if ((indir_data_blks < ndb) &&
 373                             (idesc->id_firsthole < 0)) {
 374                                 idesc->id_firsthole = indir_data_blks;
 375                         }
 376                 }
 377         }
 378         return (KEEPON);
 379 }
 380
 381 static int
 382 get_indir_offsets(int ilevel_wanted, daddr_t ndb, int *data_blks,
 383         int *last_blk)
 384 {
 385         int ndb_ilevel = -1;
 386         int ilevel;
 387         int dblks, lblk;
 388
 389         for (ilevel = 0; ilevel < NIADDR; ilevel++) {
 390                 switch (ilevel) {
 391                 case 0: /* SINGLE */
 392                         dblks = NDADDR;
 393                         lblk = dblks + NINDIR(&sblock) - 1;
 394                         break;
 395                 case 1: /* DOUBLE */
 396                         dblks = NDADDR + NINDIR(&sblock);
 397                         lblk = dblks + (NINDIR(&sblock) * NINDIR(&sblock)) - 1;
 398                         break;
 399                 case 2: /* TRIPLE */
 400                         dblks = NDADDR + NINDIR(&sblock) +
 401                             (NINDIR(&sblock) * NINDIR(&sblock));
 402                         lblk = dblks + (NINDIR(&sblock) * NINDIR(&sblock) *
 403                             NINDIR(&sblock)) - 1;
 404                         break;
 405                 default:
 406                         exitstat = EXERRFATAL;
 407                         /*
 408                          * Translate from zero-based array to
 409                          * one-based human-style counting.
 410                          */
 411                         errexit("panic: indirection level %d not 1, 2, or 3",
 412                             ilevel + 1);
 413                         /* NOTREACHED */
 414                 }
 415
 416                 if (dblks < ndb && ndb <= lblk)
 417                         ndb_ilevel = ilevel;
 418
 419                 if (ilevel == ilevel_wanted) {
 420                         if (data_blks != NULL)
 421                                 *data_blks = dblks;
 422                         if (last_blk != NULL)
 423                                 *last_blk = lblk;
 424                 }
 425         }
 426
 427         return (ndb_ilevel);
 428 }
 429
 430 static int
 431 iblock(struct inodesc *idesc, int ilevel, u_offset_t iblks,
 432         enum cki_action action)
 433 {
 434         struct bufarea *bp;
 435         int i, n;
 436         int (*func)(struct inodesc *) = NULL;
 437         u_offset_t fsbperindirb;
 438         daddr32_t last_lbn;
 439         int nif;
 440         char buf[BUFSIZ];
 441
 442         n = KEEPON;
 443
 444         switch (idesc->id_type) {
 445         case ADDR:
 446                 func = idesc->id_func;
 447                 if (((n = (*func)(idesc)) & KEEPON) == 0)
 448                                 return (n);
 449                 break;
 450         case ACL:
 451                 func = idesc->id_func;
 452                 break;
 453         case DATA:
 454                 func = dirscan;
 455                 break;
 456         default:
 457                 errexit("unknown inodesc type %d in iblock()", idesc->id_type);
 458                 /* NOTREACHED */
 459         }
 460         if (chkrange(idesc->id_blkno, idesc->id_numfrags)) {
 461                 return ((idesc->id_type == ACL) ? STOP : SKIP);
 462         }
 463
 464         bp = getdatablk(idesc->id_blkno, (size_t)sblock.fs_bsize);
 465         if (bp->b_errs != 0) {
 466                 brelse(bp);
 467                 return (SKIP);
 468         }
 469
 470         ilevel--;
 471         /*
 472          * Trivia note: the BSD fsck has the number of bytes remaining
 473          * as the third argument to iblock(), so the equivalent of
 474          * fsbperindirb starts at fs_bsize instead of one.  We're
 475          * working in units of filesystem blocks here, not bytes or
 476          * fragments.
 477          */
 478         for (fsbperindirb = 1, i = 0; i < ilevel; i++) {
 479                 fsbperindirb *= (u_offset_t)NINDIR(&sblock);
 480         }
 481         /*
 482          * nif indicates the next "free" pointer (as an array index) in this
 483          * indirect block, based on counting the blocks remaining in the
 484          * file after subtracting all previously processed blocks.
 485          * This figure is based on the size field of the inode.
 486          *
 487          * Note that in normal operation, nif may initially be calculated
 488          * as larger than the number of pointers in this block (as when
 489          * there are more indirect blocks following); if that is
 490          * the case, nif is limited to the max number of pointers per
 491          * indirect block.
 492          *
 493          * Also note that if an inode is inconsistent (has more blocks
 494          * allocated to it than the size field would indicate), the sweep
 495          * through any indirect blocks directly pointed at by the inode
 496          * continues. Since the block offset of any data blocks referenced
 497          * by these indirect blocks is greater than the size of the file,
 498          * the index nif may be computed as a negative value.
 499          * In this case, we reset nif to indicate that all pointers in
 500          * this retrieval block should be zeroed and the resulting
 501          * unreferenced data and/or retrieval blocks will be recovered
 502          * through garbage collection later.
 503          */
 504         nif = (offset_t)howmany(iblks, fsbperindirb);
 505         if (nif > NINDIR(&sblock))
 506                 nif = NINDIR(&sblock);
 507         else if (nif < 0)
 508                 nif = 0;
 509         /*
 510          * first pass: all "free" retrieval pointers (from [nif] thru
 511          *      the end of the indirect block) should be zero. (This
 512          *      assertion does not hold for directories, which may be
 513          *      truncated without releasing their allocated space)
 514          */
 515         if (nif < NINDIR(&sblock) && (idesc->id_func == pass1check ||
 516             idesc->id_func == pass3bcheck)) {
 517                 for (i = nif; i < NINDIR(&sblock); i++) {
 518                         if (bp->b_un.b_indir[i] == 0)
 519                                 continue;
 520                         (void) sprintf(buf, "PARTIALLY TRUNCATED INODE I=%lu",
 521                             (ulong_t)idesc->id_number);
 522                         if (preen) {
 523                                 pfatal(buf);
 524                         } else if (dofix(idesc, buf)) {
 525                                 freeblk(idesc->id_number,
 526                                     bp->b_un.b_indir[i],
 527                                     sblock.fs_frag);
 528                                 bp->b_un.b_indir[i] = 0;
 529                                 dirty(bp);
 530                         }
 531                 }
 532                 flush(fswritefd, bp);
 533         }
 534         /*
 535          * second pass: all retrieval pointers referring to blocks within
 536          *      a valid range [0..filesize] (both indirect and data blocks)
 537          *      are examined in the same manner as ckinode() checks the
 538          *      direct blocks in the inode.  Sweep through from
 539          *      the first pointer in this retrieval block to [nif-1].
 540          */
 541         last_lbn = howmany(idesc->id_filesize, sblock.fs_bsize);
 542         for (i = 0; i < nif; i++) {
 543                 if (ilevel == 0)
 544                         idesc->id_lbn++;
 545                 if (bp->b_un.b_indir[i] != 0) {
 546                         idesc->id_blkno = bp->b_un.b_indir[i];
 547                         if (ilevel > 0) {
 548                                 n = iblock(idesc, ilevel, iblks, action);
 549                                 /*
 550                                  * Each iteration decreases "remaining block
 551                                  * count" by the number of blocks accessible
 552                                  * by a pointer at this indirect block level.
 553                                  */
 554                                 iblks -= fsbperindirb;
 555                         } else {
 556                                 /*
 557                                  * If we're truncating, func will discard
 558                                  * the data block for us.
 559                                  */
 560                                 n = (*func)(idesc);
 561                         }
 562
 563                         if ((action == CKI_TRUNCATE) &&
 564                             (idesc->id_truncto >= 0) &&
 565                             (idesc->id_lbn >= idesc->id_truncto)) {
 566                                 freeblk(idesc->id_number,  bp->b_un.b_indir[i],
 567                                     sblock.fs_frag);
 568                         }
 569
 570                         /*
 571                          * Note that truncation never gets STOP back
 572                          * under normal circumstances.  Abnormal would
 573                          * be a bad acl short-circuit in iblock() or
 574                          * an out-of-range failure in pass4check().
 575                          * We still want to keep going when truncating
 576                          * under those circumstances, since the whole
 577                          * point of truncating is to get rid of all
 578                          * that.
 579                          */
 580                         if ((n & STOP) && (action != CKI_TRUNCATE)) {
 581                                 brelse(bp);
 582                                 return (n);
 583                         }
 584                 } else {
 585                         if ((idesc->id_lbn < last_lbn) &&
 586                             (idesc->id_firsthole < 0)) {
 587                                 idesc->id_firsthole = idesc->id_lbn;
 588                         }
 589                         if (idesc->id_type == DATA) {
 590                                 /*
 591                                  * No point in continuing in the indirect
 592                                  * blocks of a directory, since they'll just
 593                                  * get freed anyway.
 594                                  */
 595                                 brelse(bp);
 596                                 return ((n & ~KEEPON) | STOP);
 597                         }
 598                 }
 599         }
 600
 601         brelse(bp);
 602         return (KEEPON);
 603 }
 604
 605 /*
 606  * Check that a block is a legal block number.
 607  * Return 0 if in range, 1 if out of range.
 608  */
 609 int
 610 chkrange(daddr32_t blk, int cnt)
 611 {
 612         int c;
 613
 614         if (cnt <= 0 || blk <= 0 || ((unsigned)blk >= (unsigned)maxfsblock) ||
 615             ((cnt - 1) > (maxfsblock - blk))) {
 616                 if (debug)
 617                         (void) printf(
 618                             "Bad fragment range: should be 1 <= %d..%d < %d\n",
 619                             blk, blk + cnt, maxfsblock);
 620                 return (1);
 621         }
 622         if ((cnt > sblock.fs_frag) ||
 623             ((fragnum(&sblock, blk) + cnt) > sblock.fs_frag)) {
 624                 if (debug)
 625                         (void) printf("Bad fragment size: size %d\n", cnt);
 626                 return (1);
 627         }
 628         c = dtog(&sblock, blk);
 629         if (blk < cgdmin(&sblock, c)) {
 630                 if ((unsigned)(blk + cnt) > (unsigned)cgsblock(&sblock, c)) {
 631                         if (debug)
 632                                 (void) printf(
 633             "Bad fragment position: %d..%d spans start of cg metadata\n",
 634                                     blk, blk + cnt);
 635                         return (1);
 636                 }
 637         } else {
 638                 if ((unsigned)(blk + cnt) > (unsigned)cgbase(&sblock, c+1)) {
 639                         if (debug)
 640                                 (void) printf(
 641                                     "Bad frag pos: %d..%d crosses end of cg\n",
 642                                     blk, blk + cnt);
 643                         return (1);
 644                 }
 645         }
 646         return (0);
 647 }
 648
 649 /*
 650  * General purpose interface for reading inodes.
 651  */
 652
 653 /*
 654  * Note that any call to ginode() can potentially invalidate any
 655  * dinode pointers previously acquired from it.  To avoid pain,
 656  * make sure to always call inodirty() immediately after modifying
 657  * an inode, if there's any chance of ginode() being called after
 658  * that.  Also, always call ginode() right before you need to access
 659  * an inode, so that there won't be any surprises from functions
 660  * called between the previous ginode() invocation and the dinode
 661  * use.
 662  *
 663  * Despite all that, we aren't doing the amount of i/o that's implied,
 664  * as we use the buffer cache that getdatablk() and friends maintain.
 665  */
 666 static fsck_ino_t startinum = -1;
 667
 668 struct dinode *
 669 ginode(fsck_ino_t inum)
 670 {
 671         daddr32_t iblk;
 672         struct dinode *dp;
 673
 674         if (inum < UFSROOTINO || inum > maxino) {
 675                 errexit("bad inode number %d to ginode\n", inum);
 676         }
 677         if (startinum == -1 ||
 678             pbp == NULL ||
 679             inum < startinum ||
 680             inum >= (fsck_ino_t)(startinum + (fsck_ino_t)INOPB(&sblock))) {
 681                 iblk = itod(&sblock, inum);
 682                 if (pbp != NULL) {
 683                         brelse(pbp);
 684                 }
 685                 /*
 686                  * We don't check for errors here, because we can't
 687                  * tell our caller about it, and the zeros that will
 688                  * be in the buffer are just as good as anything we
 689                  * could fake.
 690                  */
 691                 pbp = getdatablk(iblk, (size_t)sblock.fs_bsize);
 692                 startinum =
 693                     (fsck_ino_t)((inum / INOPB(&sblock)) * INOPB(&sblock));
 694         }
 695         dp = &pbp->b_un.b_dinode[inum % INOPB(&sblock)];
 696         if (dp->di_suid != UID_LONG)
 697                 dp->di_uid = dp->di_suid;
 698         if (dp->di_sgid != GID_LONG)
 699                 dp->di_gid = dp->di_sgid;
 700         return (dp);
 701 }
 702
 703 /*
 704  * Special purpose version of ginode used to optimize first pass
 705  * over all the inodes in numerical order.  It bypasses the buffer
 706  * system used by ginode(), etc in favour of reading the bulk of a
 707  * cg's inodes at one time.
 708  */
 709 static fsck_ino_t nextino, lastinum;
 710 static int64_t readcnt, readpercg, fullcnt, inobufsize;
 711 static int64_t partialcnt, partialsize;
 712 static size_t lastsize;
 713 static struct dinode *inodebuf;
 714 static diskaddr_t currentdblk;
 715 static struct dinode *currentinode;
 716
 717 struct dinode *
 718 getnextinode(fsck_ino_t inum)
 719 {
 720         size_t size;
 721         diskaddr_t dblk;
 722         static struct dinode *dp;
 723
 724         if (inum != nextino++ || inum > maxino)
 725                 errexit("bad inode number %d to nextinode\n", inum);
 726
 727         /*
 728          * Will always go into the if() the first time we're called,
 729          * so dp will always be valid.
 730          */
 731         if (inum >= lastinum) {
 732                 readcnt++;
 733                 dblk = fsbtodb(&sblock, itod(&sblock, lastinum));
 734                 currentdblk = dblk;
 735                 if (readcnt % readpercg == 0) {
 736                         if (partialsize > SIZE_MAX)
 737                                 errexit(
 738                                     "Internal error: partialsize overflow");
 739                         size = (size_t)partialsize;
 740                         lastinum += partialcnt;
 741                 } else {
 742                         if (inobufsize > SIZE_MAX)
 743                                 errexit("Internal error: inobufsize overflow");
 744                         size = (size_t)inobufsize;
 745                         lastinum += fullcnt;
 746                 }
 747                 /*
 748                  * If fsck_bread() returns an error, it will already have
 749                  * zeroed out the buffer, so we do not need to do so here.
 750                  */
 751                 (void) fsck_bread(fsreadfd, (caddr_t)inodebuf, dblk, size);
 752                 lastsize = size;
 753                 dp = inodebuf;
 754         }
 755         currentinode = dp;
 756         return (dp++);
 757 }
 758
 759 /*
 760  * Reread the current getnext() buffer.  This allows for changing inodes
 761  * other than the current one via ginode()/inodirty()/inoflush().
 762  *
 763  * Just reuses all the interesting variables that getnextinode() set up
 764  * last time it was called.  This shouldn't get called often, so we don't
 765  * try to figure out if the caller's actually touched an inode in the
 766  * range we have cached.  There could have been an arbitrary number of
 767  * them, after all.
 768  */
 769 struct dinode *
 770 getnextrefresh(void)
 771 {
 772         if (inodebuf == NULL) {
 773                 return (NULL);
 774         }
 775
 776         inoflush();
 777         (void) fsck_bread(fsreadfd, (caddr_t)inodebuf, currentdblk, lastsize);
 778         return (currentinode);
 779 }
 780
 781 void
 782 resetinodebuf(void)
 783 {
 784         startinum = 0;
 785         nextino = 0;
 786         lastinum = 0;
 787         readcnt = 0;
 788         inobufsize = blkroundup(&sblock, INOBUFSIZE);
 789         fullcnt = inobufsize / sizeof (struct dinode);
 790         readpercg = sblock.fs_ipg / fullcnt;
 791         partialcnt = sblock.fs_ipg % fullcnt;
 792         partialsize = partialcnt * sizeof (struct dinode);
 793         if (partialcnt != 0) {
 794                 readpercg++;
 795         } else {
 796                 partialcnt = fullcnt;
 797                 partialsize = inobufsize;
 798         }
 799         if (inodebuf == NULL &&
 800             (inodebuf = (struct dinode *)malloc((unsigned)inobufsize)) == NULL)
 801                 errexit("Cannot allocate space for inode buffer\n");
 802         while (nextino < UFSROOTINO)
 803                 (void) getnextinode(nextino);
 804 }
 805
 806 void
 807 freeinodebuf(void)
 808 {
 809         if (inodebuf != NULL) {
 810                 free((void *)inodebuf);
 811         }
 812         inodebuf = NULL;
 813 }
 814
 815 /*
 816  * Routines to maintain information about directory inodes.
 817  * This is built during the first pass and used during the
 818  * second and third passes.
 819  *
 820  * Enter inodes into the cache.
 821  */
 822 void
 823 cacheino(struct dinode *dp, fsck_ino_t inum)
 824 {
 825         struct inoinfo *inp;
 826         struct inoinfo **inpp;
 827         uint_t blks;
 828
 829         blks = NDADDR + NIADDR;
 830         inp = (struct inoinfo *)
 831             malloc(sizeof (*inp) + (blks - 1) * sizeof (daddr32_t));
 832         if (inp == NULL)
 833                 errexit("Cannot increase directory list\n");
 834         init_inoinfo(inp, dp, inum); /* doesn't touch i_nextlist or i_number */
 835         inpp = &inphead[inum % numdirs];
 836         inp->i_nextlist = *inpp;
 837         *inpp = inp;
 838         inp->i_number = inum;
 839         if (inplast == listmax) {
 840                 listmax += 100;
 841                 inpsort = (struct inoinfo **)realloc((void *)inpsort,
 842                     (unsigned)listmax * sizeof (struct inoinfo *));
 843                 if (inpsort == NULL)
 844                         errexit("cannot increase directory list");
 845         }
 846         inpsort[inplast++] = inp;
 847 }
 848
 849 /*
 850  * Look up an inode cache structure.
 851  */
 852 struct inoinfo *
 853 getinoinfo(fsck_ino_t inum)
 854 {
 855         struct inoinfo *inp;
 856
 857         inp = search_cache(inphead[inum % numdirs], inum);
 858         return (inp);
 859 }
 860
 861 /*
 862  * Determine whether inode is in cache.
 863  */
 864 int
 865 inocached(fsck_ino_t inum)
 866 {
 867         return (search_cache(inphead[inum % numdirs], inum) != NULL);
 868 }
 869
 870 /*
 871  * Clean up all the inode cache structure.
 872  */
 873 void
 874 inocleanup(void)
 875 {
 876         struct inoinfo **inpp;
 877
 878         if (inphead == NULL)
 879                 return;
 880         for (inpp = &inpsort[inplast - 1]; inpp >= inpsort; inpp--) {
 881                 free((void *)(*inpp));
 882         }
 883         free((void *)inphead);
 884         free((void *)inpsort);
 885         inphead = inpsort = NULL;
 886 }
 887
 888 /*
 889  * Routines to maintain information about acl inodes.
 890  * This is built during the first pass and used during the
 891  * second and third passes.
 892  *
 893  * Enter acl inodes into the cache.
 894  */
 895 void
 896 cacheacl(struct dinode *dp, fsck_ino_t inum)
 897 {
 898         struct inoinfo *aclp;
 899         struct inoinfo **aclpp;
 900         uint_t blks;
 901
 902         blks = NDADDR + NIADDR;
 903         aclp = (struct inoinfo *)
 904             malloc(sizeof (*aclp) + (blks - 1) * sizeof (daddr32_t));
 905         if (aclp == NULL)
 906                 return;
 907         aclpp = &aclphead[inum % numacls];
 908         aclp->i_nextlist = *aclpp;
 909         *aclpp = aclp;
 910         aclp->i_number = inum;
 911         aclp->i_isize = (offset_t)dp->di_size;
 912         aclp->i_blkssize = (size_t)(blks * sizeof (daddr32_t));
 913         (void) memmove(&aclp->i_blks[0], &dp->di_db[0], aclp->i_blkssize);
 914         if (aclplast == aclmax) {
 915                 aclmax += 100;
 916                 aclpsort = (struct inoinfo **)realloc((char *)aclpsort,
 917                     (unsigned)aclmax * sizeof (struct inoinfo *));
 918                 if (aclpsort == NULL)
 919                         errexit("cannot increase acl list");
 920         }
 921         aclpsort[aclplast++] = aclp;
 922 }
 923
 924
 925 /*
 926  * Generic cache search function.
 927  * ROOT is the first entry in a hash chain (the caller is expected
 928  * to have done the initial bucket lookup).  KEY is what's being
 929  * searched for.
 930  *
 931  * Returns a pointer to the entry if it is found, NULL otherwise.
 932  */
 933 static struct inoinfo *
 934 search_cache(struct inoinfo *element, fsck_ino_t key)
 935 {
 936         while (element != NULL) {
 937                 if (element->i_number == key)
 938                         break;
 939                 element = element->i_nextlist;
 940         }
 941
 942         return (element);
 943 }
 944
 945 void
 946 inodirty(void)
 947 {
 948         dirty(pbp);
 949 }
 950
 951 static void
 952 inoflush(void)
 953 {
 954         if (pbp != NULL)
 955                 flush(fswritefd, pbp);
 956 }
 957
 958 /*
 959  * Interactive wrapper for freeino(), for those times when we're
 960  * not sure if we should throw something away.
 961  */
 962 void
 963 clri(struct inodesc *idesc, char *type, int verbose, int corrupting)
 964 {
 965         int need_parent;
 966         struct dinode *dp;
 967
 968         if (statemap[idesc->id_number] == USTATE)
 969                 return;
 970
 971         dp = ginode(idesc->id_number);
 972         if (verbose == CLRI_VERBOSE) {
 973                 pwarn("%s %s", type, file_id(idesc->id_number, dp->di_mode));
 974                 pinode(idesc->id_number);
 975         }
 976         if (preen || (reply("CLEAR") == 1)) {
 977                 need_parent = (corrupting == CLRI_NOP_OK) ?
 978                     TI_NOPARENT : TI_PARENT;
 979                 freeino(idesc->id_number, need_parent);
 980                 if (preen)
 981                         (void) printf(" (CLEARED)\n");
 982                 remove_orphan_dir(idesc->id_number);
 983         } else if (corrupting == CLRI_NOP_CORRUPT) {
 984                 iscorrupt = 1;
 985         }
 986         (void) printf("\n");
 987 }
 988
 989 /*
 990  * Find the directory entry for the inode noted in id_parent (which is
 991  * not necessarily the parent of anything, we're just using a convenient
 992  * field.
 993  */
 994 int
 995 findname(struct inodesc *idesc)
 996 {
 997         struct direct *dirp = idesc->id_dirp;
 998
 999         if (dirp->d_ino != idesc->id_parent)
1000                 return (KEEPON);
1001         (void) memmove(idesc->id_name, dirp->d_name,
1002             MIN(dirp->d_namlen, MAXNAMLEN) + 1);
1003         return (STOP|FOUND);
1004 }
1005
1006 /*
1007  * Find the inode number associated with the given name.
1008  */
1009 int
1010 findino(struct inodesc *idesc)
1011 {
1012         struct direct *dirp = idesc->id_dirp;
1013
1014         if (dirp->d_ino == 0)
1015                 return (KEEPON);
1016         if (strcmp(dirp->d_name, idesc->id_name) == 0 &&
1017             dirp->d_ino >= UFSROOTINO && dirp->d_ino <= maxino) {
1018                 idesc->id_parent = dirp->d_ino;
1019                 return (STOP|FOUND);
1020         }
1021         return (KEEPON);
1022 }
1023
1024 int
1025 cleardirentry(fsck_ino_t parentdir, fsck_ino_t target)
1026 {
1027         struct inodesc idesc;
1028         struct dinode *dp;
1029
1030         dp = ginode(parentdir);
1031         init_inodesc(&idesc);
1032         idesc.id_func = clearanentry;
1033         idesc.id_parent = target;
1034         idesc.id_type = DATA;
1035         idesc.id_fix = NOFIX;
1036         return (ckinode(dp, &idesc, CKI_TRAVERSE));
1037 }
1038
1039 static int
1040 clearanentry(struct inodesc *idesc)
1041 {
1042         struct direct *dirp = idesc->id_dirp;
1043
1044         if (dirp->d_ino != idesc->id_parent || idesc->id_entryno < 2) {
1045                 idesc->id_entryno++;
1046                 return (KEEPON);
1047         }
1048         dirp->d_ino = 0;
1049         return (STOP|FOUND|ALTERED);
1050 }
1051
1052 void
1053 pinode(fsck_ino_t ino)
1054 {
1055         struct dinode *dp;
1056
1057         (void) printf(" I=%lu ", (ulong_t)ino);
1058         if (ino < UFSROOTINO || ino > maxino)
1059                 return;
1060         dp = ginode(ino);
1061         pdinode(dp);
1062 }
1063
1064 static void
1065 pdinode(struct dinode *dp)
1066 {
1067         char *p;
1068         struct passwd *pw;
1069         time_t t;
1070
1071         (void) printf(" OWNER=");
1072         if ((pw = getpwuid((int)dp->di_uid)) != 0)
1073                 (void) printf("%s ", pw->pw_name);
1074         else
1075                 (void) printf("%lu ", (ulong_t)dp->di_uid);
1076         (void) printf("MODE=%o\n", dp->di_mode);
1077         if (preen)
1078                 (void) printf("%s: ", devname);
1079         (void) printf("SIZE=%lld ", (longlong_t)dp->di_size);
1080
1081         /* ctime() ignores LOCALE, so this is safe */
1082         t = (time_t)dp->di_mtime;
1083         p = ctime(&t);
1084         (void) printf("MTIME=%12.12s %4.4s ", p + 4, p + 20);
1085 }
1086
1087 void
1088 blkerror(fsck_ino_t ino, char *type, daddr32_t blk, daddr32_t lbn)
1089 {
1090         pfatal("FRAGMENT %d %s I=%u LFN %d", blk, type, ino, lbn);
1091         (void) printf("\n");
1092
1093         switch (statemap[ino] & ~INDELAYD) {
1094
1095         case FSTATE:
1096         case FZLINK:
1097                 statemap[ino] = FCLEAR;
1098                 return;
1099
1100         case DFOUND:
1101         case DSTATE:
1102         case DZLINK:
1103                 statemap[ino] = DCLEAR;
1104                 add_orphan_dir(ino);
1105                 return;
1106
1107         case SSTATE:
1108                 statemap[ino] = SCLEAR;
1109                 return;
1110
1111         case FCLEAR:
1112         case DCLEAR:
1113         case SCLEAR:
1114                 return;
1115
1116         default:
1117                 errexit("BAD STATE 0x%x TO BLKERR\n", statemap[ino]);
1118                 /* NOTREACHED */
1119         }
1120 }
1121
1122 /*
1123  * allocate an unused inode
1124  */
1125 fsck_ino_t
1126 allocino(fsck_ino_t request, int type)
1127 {
1128         fsck_ino_t ino;
1129         struct dinode *dp;
1130         struct cg *cgp = &cgrp;
1131         int cg;
1132         time_t t;
1133         caddr_t err;
1134
1135         if (debug && (request != 0) && (request != UFSROOTINO))
1136                 errexit("assertion failed: allocino() asked for "
1137                     "inode %d instead of 0 or %d",
1138                     (int)request, (int)UFSROOTINO);
1139
1140         /*
1141          * We know that we're only going to get requests for UFSROOTINO
1142          * or 0.  If UFSROOTINO is wanted, then it better be available
1143          * because our caller is trying to recreate the root directory.
1144          * If we're asked for 0, then which one we return doesn't matter.
1145          * We know that inodes 0 and 1 are never valid to return, so we
1146          * the start at the lowest-legal inode number.
1147          *
1148          * If we got a request for UFSROOTINO, then request != 0, and
1149          * this pair of conditionals is the only place that treats
1150          * UFSROOTINO specially.
1151          */
1152         if (request == 0)
1153                 request = UFSROOTINO;
1154         else if (statemap[request] != USTATE)
1155                 return (0);
1156
1157         /*
1158          * Doesn't do wrapping, since we know we started at
1159          * the smallest inode.
1160          */
1161         for (ino = request; ino < maxino; ino++)
1162                 if (statemap[ino] == USTATE)
1163                         break;
1164         if (ino == maxino)
1165                 return (0);
1166
1167         /*
1168          * In pass5, we'll calculate the bitmaps and counts all again from
1169          * scratch and do a comparison, but for that to work the cg has
1170          * to know what in-memory changes we've made to it.  If we have
1171          * trouble reading the cg, cg_sanity() should kick it out so
1172          * we can skip explicit i/o error checking here.
1173          */
1174         cg = itog(&sblock, ino);
1175         (void) getblk(&cgblk, cgtod(&sblock, cg), (size_t)sblock.fs_cgsize);
1176         err = cg_sanity(cgp, cg);
1177         if (err != NULL) {
1178                 pfatal("CG %d: %s\n", cg, err);
1179                 free((void *)err);
1180                 if (reply("REPAIR") == 0)
1181                         errexit("Program terminated.");
1182                 fix_cg(cgp, cg);
1183         }
1184         setbit(cg_inosused(cgp), ino % sblock.fs_ipg);
1185         cgp->cg_cs.cs_nifree--;
1186         cgdirty();
1187
1188         if (lastino < ino)
1189                 lastino = ino;
1190
1191         /*
1192          * Don't currently support IFATTRDIR or any of the other
1193          * types, as they aren't needed.
1194          */
1195         switch (type & IFMT) {
1196         case IFDIR:
1197                 statemap[ino] = DSTATE;
1198                 cgp->cg_cs.cs_ndir++;
1199                 break;
1200         case IFREG:
1201         case IFLNK:
1202                 statemap[ino] = FSTATE;
1203                 break;
1204         default:
1205                 /*
1206                  * Pretend nothing ever happened.  This clears the
1207                  * dirty flag, among other things.
1208                  */
1209                 initbarea(&cgblk);
1210                 if (debug)
1211                         (void) printf("allocino: unknown type 0%o\n",
1212                             type & IFMT);
1213                 return (0);
1214         }
1215
1216         /*
1217          * We're allocating what should be a completely-unused inode,
1218          * so make sure we don't inherit anything from any previous
1219          * incarnations.
1220          */
1221         dp = ginode(ino);
1222         (void) memset((void *)dp, 0, sizeof (struct dinode));
1223         dp->di_db[0] = allocblk(1);
1224         if (dp->di_db[0] == 0) {
1225                 statemap[ino] = USTATE;
1226                 return (0);
1227         }
1228         dp->di_mode = (mode_t)type;
1229         (void) time(&t);
1230         dp->di_atime = (time32_t)t;
1231         dp->di_ctime = dp->di_atime;
1232         dp->di_mtime = dp->di_ctime;
1233         dp->di_size = (u_offset_t)sblock.fs_fsize;
1234         dp->di_blocks = btodb(sblock.fs_fsize);
1235         n_files++;
1236         inodirty();
1237         return (ino);
1238 }
1239
1240 /*
1241  * Release some or all of the blocks of an inode.
1242  * Only truncates down.  Assumes new_length is appropriately aligned
1243  * to a block boundary (or a directory block boundary, if it's a
1244  * directory).
1245  *
1246  * If this is a directory, discard all of its contents first, so
1247  * we don't create a bunch of orphans that would need another fsck
1248  * run to clean up.
1249  *
1250  * Even if truncating to zero length, the inode remains allocated.
1251  */
1252 void
1253 truncino(fsck_ino_t ino, offset_t new_length, int update)
1254 {
1255         struct inodesc idesc;
1256         struct inoinfo *iip;
1257         struct dinode *dp;
1258         fsck_ino_t parent;
1259         mode_t mode;
1260         caddr_t message;
1261         int isdir, islink;
1262         int ilevel, dblk;
1263
1264         dp = ginode(ino);
1265         mode = (dp->di_mode & IFMT);
1266         isdir = (mode == IFDIR) || (mode == IFATTRDIR);
1267         islink = (mode == IFLNK);
1268
1269         if (isdir) {
1270                 /*
1271                  * Go with the parent we found by chasing references,
1272                  * if we've gotten that far.  Otherwise, use what the
1273                  * directory itself claims.  If there's no ``..'' entry
1274                  * in it, give up trying to get the link counts right.
1275                  */
1276                 if (update == TI_NOPARENT) {
1277                         parent = -1;
1278                 } else {
1279                         iip = getinoinfo(ino);
1280                         if (iip != NULL) {
1281                                 parent = iip->i_parent;
1282                         } else {
1283                                 parent = lookup_dotdot_ino(ino);
1284                                 if (parent != 0) {
1285                                         /*
1286                                          * Make sure that the claimed
1287                                          * parent actually has a
1288                                          * reference to us.
1289                                          */
1290                                         dp = ginode(parent);
1291                                         idesc.id_name = lfname;
1292                                         idesc.id_type = DATA;
1293                                         idesc.id_func = findino;
1294                                         idesc.id_number = ino;
1295                                         idesc.id_fix = DONTKNOW;
1296                                         if ((ckinode(dp, &idesc,
1297                                             CKI_TRAVERSE) & FOUND) == 0)
1298                                                 parent = 0;
1299                                 }
1300                         }
1301                 }
1302
1303                 mark_delayed_inodes(ino, numfrags(&sblock, new_length));
1304                 if (parent > 0) {
1305                         dp = ginode(parent);
1306                         LINK_RANGE(message, dp->di_nlink, -1);
1307                         if (message != NULL) {
1308                                 LINK_CLEAR(message, parent, dp->di_mode,
1309                                     &idesc);
1310                                 if (statemap[parent] == USTATE)
1311                                         goto no_parent_update;
1312                         }
1313                         TRACK_LNCNTP(parent, lncntp[parent]--);
1314                 } else if ((mode == IFDIR) && (parent == 0)) {
1315                         /*
1316                          * Currently don't have a good way to
1317                          * handle this, so throw up our hands.
1318                          * However, we know that we can still
1319                          * do some good if we continue, so
1320                          * don't actually exit yet.
1321                          *
1322                          * We don't do it for attrdirs,
1323                          * because there aren't link counts
1324                          * between them and their parents.
1325                          */
1326                         pwarn("Could not determine former parent of "
1327                             "inode %d, link counts are possibly\n"
1328                             "incorrect.  Please rerun fsck(1M) to "
1329                             "correct this.\n",
1330                             ino);
1331                         iscorrupt = 1;
1332                 }
1333                 /*
1334                  * ...else if it's a directory with parent == -1, then
1335                  * we've not gotten far enough to know connectivity,
1336                  * and it'll get handled automatically later.
1337                  */
1338         }
1339
1340 no_parent_update:
1341         init_inodesc(&idesc);
1342         idesc.id_type = ADDR;
1343         idesc.id_func = pass4check;
1344         idesc.id_number = ino;
1345         idesc.id_fix = DONTKNOW;
1346         idesc.id_truncto = howmany(new_length, sblock.fs_bsize);
1347         dp = ginode(ino);
1348         if (!islink && ckinode(dp, &idesc, CKI_TRUNCATE) & ALTERED)
1349                 inodirty();
1350
1351         /*
1352          * This has to be done after ckinode(), so that all of
1353          * the fragments get visited.  Note that we assume we're
1354          * always truncating to a block boundary, rather than a
1355          * fragment boundary.
1356          */
1357         dp = ginode(ino);
1358         dp->di_size = new_length;
1359
1360         /*
1361          * Clear now-obsolete pointers.
1362          */
1363         for (dblk = idesc.id_truncto + 1; dblk < NDADDR; dblk++) {
1364                 dp->di_db[dblk] = 0;
1365         }
1366
1367         ilevel = get_indir_offsets(-1, idesc.id_truncto, NULL, NULL);
1368         for (ilevel++; ilevel < NIADDR; ilevel++) {
1369                 dp->di_ib[ilevel] = 0;
1370         }
1371
1372         inodirty();
1373 }
1374
1375 /*
1376  * Release an inode's resources, then release the inode itself.
1377  */
1378 void
1379 freeino(fsck_ino_t ino, int update_parent)
1380 {
1381         int cg;
1382         struct dinode *dp;
1383         struct cg *cgp;
1384
1385         n_files--;
1386         dp = ginode(ino);
1387         /*
1388          * We need to make sure that the file is really a large file.
1389          * Everything bigger than UFS_MAXOFFSET_T is treated as a file with
1390          * negative size, which shall be cleared. (see verify_inode() in
1391          * pass1.c)
1392          */
1393         if (dp->di_size > (u_offset_t)MAXOFF_T &&
1394             dp->di_size <= (u_offset_t)UFS_MAXOFFSET_T &&
1395             ftypeok(dp) &&
1396             (dp->di_mode & IFMT) != IFBLK &&
1397             (dp->di_mode & IFMT) != IFCHR) {
1398                 largefile_count--;
1399         }
1400         truncino(ino, 0, update_parent);
1401
1402         dp = ginode(ino);
1403         if ((dp->di_mode & IFMT) == IFATTRDIR) {
1404                 clearshadow(ino, &attrclientinfo);
1405                 dp = ginode(ino);
1406         }
1407
1408         clearinode(dp);
1409         inodirty();
1410         statemap[ino] = USTATE;
1411
1412         /*
1413          * Keep the disk in sync with us so that pass5 doesn't get
1414          * upset about spurious inconsistencies.
1415          */
1416         cg = itog(&sblock, ino);
1417         (void) getblk(&cgblk, (diskaddr_t)cgtod(&sblock, cg),
1418             (size_t)sblock.fs_cgsize);
1419         cgp = cgblk.b_un.b_cg;
1420         clrbit(cg_inosused(cgp), ino % sblock.fs_ipg);
1421         cgp->cg_cs.cs_nifree += 1;
1422         cgdirty();
1423         sblock.fs_cstotal.cs_nifree += 1;
1424         sbdirty();
1425 }
1426
1427 void
1428 init_inoinfo(struct inoinfo *inp, struct dinode *dp, fsck_ino_t inum)
1429 {
1430         inp->i_parent = ((inum == UFSROOTINO) ? UFSROOTINO : (fsck_ino_t)0);
1431         inp->i_dotdot = (fsck_ino_t)0;
1432         inp->i_isize = (offset_t)dp->di_size;
1433         inp->i_blkssize = (NDADDR + NIADDR) * sizeof (daddr32_t);
1434         inp->i_extattr = dp->di_oeftflag;
1435         (void) memmove((void *)&inp->i_blks[0], (void *)&dp->di_db[0],
1436             inp->i_blkssize);
1437 }
1438
1439 /*
1440  * Return the inode number in the ".." entry of the provided
1441  * directory inode.
1442  */
1443 static int
1444 lookup_dotdot_ino(fsck_ino_t ino)
1445 {
1446         struct inodesc idesc;
1447
1448         init_inodesc(&idesc);
1449         idesc.id_type = DATA;
1450         idesc.id_func = findino;
1451         idesc.id_name = "..";
1452         idesc.id_number = ino;
1453         idesc.id_fix = NOFIX;
1454
1455         if ((ckinode(ginode(ino), &idesc, CKI_TRAVERSE) & FOUND) != 0) {
1456                 return (idesc.id_parent);
1457         }
1458
1459         return (0);
1460 }
1461
1462 /*
1463  * Convenience wrapper around ckinode(findino()).
1464  */
1465 int
1466 lookup_named_ino(fsck_ino_t dir, caddr_t name)
1467 {
1468         struct inodesc idesc;
1469
1470         init_inodesc(&idesc);
1471         idesc.id_type = DATA;
1472         idesc.id_func = findino;
1473         idesc.id_name = name;
1474         idesc.id_number = dir;
1475         idesc.id_fix = NOFIX;
1476
1477         if ((ckinode(ginode(dir), &idesc, CKI_TRAVERSE) & FOUND) != 0) {
1478                 return (idesc.id_parent);
1479         }
1480
1481         return (0);
1482 }
1483
1484 /*
1485  * Marks inodes that are being orphaned and might need to be reconnected
1486  * by pass4().  The inode we're traversing is the directory whose
1487  * contents will be reconnected later.  id_parent is the lfn at which
1488  * to start looking at said contents.
1489  */
1490 static int
1491 mark_a_delayed_inode(struct inodesc *idesc)
1492 {
1493         struct direct *dirp = idesc->id_dirp;
1494
1495         if (idesc->id_lbn < idesc->id_parent) {
1496                 return (KEEPON);
1497         }
1498
1499         if (dirp->d_ino != 0 &&
1500             strcmp(dirp->d_name, ".") != 0 &&
1501             strcmp(dirp->d_name, "..") != 0) {
1502                 statemap[dirp->d_ino] &= ~INFOUND;
1503                 statemap[dirp->d_ino] |= INDELAYD;
1504         }
1505
1506         return (KEEPON);
1507 }
1508
1509 static void
1510 mark_delayed_inodes(fsck_ino_t ino, daddr32_t first_lfn)
1511 {
1512         struct dinode *dp;
1513         struct inodesc idelayed;
1514
1515         init_inodesc(&idelayed);
1516         idelayed.id_number = ino;
1517         idelayed.id_type = DATA;
1518         idelayed.id_fix = NOFIX;
1519         idelayed.id_func = mark_a_delayed_inode;
1520         idelayed.id_parent = first_lfn;
1521         idelayed.id_entryno = 2;
1522
1523         dp = ginode(ino);
1524         (void) ckinode(dp, &idelayed, CKI_TRAVERSE);
1525 }
1526
1527 /*
1528  * Clear the i_oeftflag/extended attribute pointer from INO.
1529  */
1530 void
1531 clearattrref(fsck_ino_t ino)
1532 {
1533         struct dinode *dp;
1534
1535         dp = ginode(ino);
1536         if (debug) {
1537                 if (dp->di_oeftflag == 0)
1538                         (void) printf("clearattref: no attr to clear on %d\n",
1539                             ino);
1540         }
1541
1542         dp->di_oeftflag = 0;
1543         inodirty();
1544 }