usr/src/cmd/fs.d/ufs/fsck/utilities.c

   1 /*
   2  * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2016 by Delphix. All rights reserved.
   4  */
   5
   6 /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T     */
   7 /*        All Rights Reserved   */
   8
   9 /*
  10  * Copyright (c) 1980, 1986, 1990 The Regents of the University of California.
  11  * All rights reserved.
  12  *
  13  * Redistribution and use in source and binary forms are permitted
  14  * provided that: (1) source distributions retain this entire copyright
  15  * notice and comment, and (2) distributions including binaries display
  16  * the following acknowledgement:  ``This product includes software
  17  * developed by the University of California, Berkeley and its contributors''
  18  * in the documentation or other materials provided with the distribution
  19  * and in all advertising materials mentioning features or use of this
  20  * software. Neither the name of the University nor the names of its
  21  * contributors may be used to endorse or promote products derived
  22  * from this software without specific prior written permission.
  23  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
  24  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
  25  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  26  */
  27
  28 #include <stdio.h>
  29 #include <stdlib.h>
  30 #include <unistd.h>
  31 #include <stdarg.h>
  32 #include <libadm.h>
  33 #include <note.h>
  34 #include <sys/param.h>
  35 #include <sys/types.h>
  36 #include <sys/mntent.h>
  37 #include <sys/filio.h>
  38 #include <sys/fs/ufs_fs.h>
  39 #include <sys/vnode.h>
  40 #include <sys/fs/ufs_acl.h>
  41 #include <sys/fs/ufs_inode.h>
  42 #include <sys/fs/ufs_log.h>
  43 #define _KERNEL
  44 #include <sys/fs/ufs_fsdir.h>
  45 #undef _KERNEL
  46 #include <sys/mnttab.h>
  47 #include <sys/types.h>
  48 #include <sys/stat.h>
  49 #include <fcntl.h>
  50 #include <signal.h>
  51 #include <string.h>
  52 #include <ctype.h>
  53 #include <sys/vfstab.h>
  54 #include <sys/lockfs.h>
  55 #include <errno.h>
  56 #include <sys/cmn_err.h>
  57 #include <sys/dkio.h>
  58 #include <sys/vtoc.h>
  59 #include <sys/efi_partition.h>
  60 #include <fslib.h>
  61 #include <inttypes.h>
  62 #include "fsck.h"
  63
  64 caddr_t mount_point = NULL;
  65
  66 static int64_t diskreads, totalreads;   /* Disk cache statistics */
  67
  68 static int log_checksum(int32_t *, int32_t *, int);
  69 static void vdirerror(fsck_ino_t, caddr_t, va_list);
  70 static struct mnttab *search_mnttab(caddr_t, caddr_t, caddr_t, size_t);
  71 static struct vfstab *search_vfstab(caddr_t, caddr_t, caddr_t, size_t);
  72 static void vpwarn(caddr_t, va_list);
  73 static int getaline(FILE *, caddr_t, int);
  74 static struct bufarea *alloc_bufarea(void);
  75 static void rwerror(caddr_t, diskaddr_t, int rval);
  76 static void debugclean(void);
  77 static void report_io_prob(caddr_t, diskaddr_t, size_t, ssize_t);
  78 static void freelogblk(daddr32_t);
  79 static void verrexit(caddr_t, va_list);
  80 static void vpfatal(caddr_t, va_list);
  81 static diskaddr_t get_device_size(int, caddr_t);
  82 static diskaddr_t brute_force_get_device_size(int);
  83 static void cg_constants(int, daddr32_t *, daddr32_t *, daddr32_t *,
  84             daddr32_t *, daddr32_t *, daddr32_t *);
  85
  86 int
  87 ftypeok(struct dinode *dp)
  88 {
  89         switch (dp->di_mode & IFMT) {
  90
  91         case IFDIR:
  92         case IFREG:
  93         case IFBLK:
  94         case IFCHR:
  95         case IFLNK:
  96         case IFSOCK:
  97         case IFIFO:
  98         case IFSHAD:
  99         case IFATTRDIR:
 100                 return (1);
 101
 102         default:
 103                 if (debug)
 104                         (void) printf("bad file type 0%o\n", dp->di_mode);
 105                 return (0);
 106         }
 107 }
 108
 109 int
 110 acltypeok(struct dinode *dp)
 111 {
 112         if (CHECK_ACL_ALLOWED(dp->di_mode & IFMT))
 113                 return (1);
 114
 115         if (debug)
 116                 (void) printf("bad file type for acl I=%d: 0%o\n",
 117                     dp->di_shadow, dp->di_mode);
 118         return (0);
 119 }
 120
 121 NOTE(PRINTFLIKE(1))
 122 int
 123 reply(caddr_t fmt, ...)
 124 {
 125         va_list ap;
 126         char line[80];
 127
 128         if (preen)
 129                 pfatal("INTERNAL ERROR: GOT TO reply() in preen mode");
 130
 131         if (mflag) {
 132                 /*
 133                  * We don't know what's going on, so don't potentially
 134                  * make things worse by having errexit() write stuff
 135                  * out to disk.
 136                  */
 137                 (void) printf(
 138                     "\n%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n",
 139                     devname);
 140                 exit(EXERRFATAL);
 141         }
 142
 143         va_start(ap, fmt);
 144         (void) putchar('\n');
 145         (void) vprintf(fmt, ap);
 146         (void) putchar('?');
 147         (void) putchar(' ');
 148         va_end(ap);
 149
 150         if (nflag || fswritefd < 0) {
 151                 (void) printf(" no\n\n");
 152                 return (0);
 153         }
 154         if (yflag) {
 155                 (void) printf(" yes\n\n");
 156                 return (1);
 157         }
 158         (void) fflush(stdout);
 159         if (getaline(stdin, line, sizeof (line)) == EOF)
 160                 errexit("\n");
 161         (void) printf("\n");
 162         if (line[0] == 'y' || line[0] == 'Y') {
 163                 return (1);
 164         } else {
 165                 return (0);
 166         }
 167 }
 168
 169 int
 170 getaline(FILE *fp, caddr_t loc, int maxlen)
 171 {
 172         int n;
 173         caddr_t p, lastloc;
 174
 175         p = loc;
 176         lastloc = &p[maxlen-1];
 177         while ((n = getc(fp)) != '\n') {
 178                 if (n == EOF)
 179                         return (EOF);
 180                 if (!isspace(n) && p < lastloc)
 181                         *p++ = (char)n;
 182         }
 183         *p = '\0';
 184         /* LINTED pointer difference won't overflow */
 185         return (p - loc);
 186 }
 187
 188 /*
 189  * Malloc buffers and set up cache.
 190  */
 191 void
 192 bufinit(void)
 193 {
 194         struct bufarea *bp;
 195         int bufcnt, i;
 196         caddr_t bufp;
 197
 198         bufp = malloc((size_t)sblock.fs_bsize);
 199         if (bufp == NULL)
 200                 goto nomem;
 201         initbarea(&cgblk);
 202         cgblk.b_un.b_buf = bufp;
 203         bufhead.b_next = bufhead.b_prev = &bufhead;
 204         bufcnt = MAXBUFSPACE / sblock.fs_bsize;
 205         if (bufcnt < MINBUFS)
 206                 bufcnt = MINBUFS;
 207         for (i = 0; i < bufcnt; i++) {
 208                 bp = (struct bufarea *)malloc(sizeof (struct bufarea));
 209                 if (bp == NULL) {
 210                         if (i >= MINBUFS)
 211                                 goto noalloc;
 212                         goto nomem;
 213                 }
 214
 215                 bufp = malloc((size_t)sblock.fs_bsize);
 216                 if (bufp == NULL) {
 217                         free((void *)bp);
 218                         if (i >= MINBUFS)
 219                                 goto noalloc;
 220                         goto nomem;
 221                 }
 222                 initbarea(bp);
 223                 bp->b_un.b_buf = bufp;
 224                 bp->b_prev = &bufhead;
 225                 bp->b_next = bufhead.b_next;
 226                 bufhead.b_next->b_prev = bp;
 227                 bufhead.b_next = bp;
 228         }
 229 noalloc:
 230         bufhead.b_size = i;     /* save number of buffers */
 231         pbp = pdirbp = NULL;
 232         return;
 233
 234 nomem:
 235         errexit("cannot allocate buffer pool\n");
 236         /* NOTREACHED */
 237 }
 238
 239 /*
 240  * Undo a bufinit().
 241  */
 242 void
 243 unbufinit(void)
 244 {
 245         int cnt;
 246         struct bufarea *bp, *nbp;
 247
 248         cnt = 0;
 249         for (bp = bufhead.b_prev; bp != NULL && bp != &bufhead; bp = nbp) {
 250                 cnt++;
 251                 flush(fswritefd, bp);
 252                 nbp = bp->b_prev;
 253                 /*
 254                  * We're discarding the entire chain, so this isn't
 255                  * technically necessary.  However, it doesn't hurt
 256                  * and lint's data flow analysis is much happier
 257                  * (this prevents it from thinking there's a chance
 258                  * of our using memory elsewhere after it's been released).
 259                  */
 260                 nbp->b_next = bp->b_next;
 261                 bp->b_next->b_prev = nbp;
 262                 free((void *)bp->b_un.b_buf);
 263                 free((void *)bp);
 264         }
 265
 266         if (bufhead.b_size != cnt)
 267                 errexit("Panic: cache lost %d buffers\n",
 268                     bufhead.b_size - cnt);
 269 }
 270
 271 /*
 272  * Manage a cache of directory blocks.
 273  */
 274 struct bufarea *
 275 getdatablk(daddr32_t blkno, size_t size)
 276 {
 277         struct bufarea *bp;
 278
 279         for (bp = bufhead.b_next; bp != &bufhead; bp = bp->b_next)
 280                 if (bp->b_bno == fsbtodb(&sblock, blkno)) {
 281                         goto foundit;
 282                 }
 283         for (bp = bufhead.b_prev; bp != &bufhead; bp = bp->b_prev)
 284                 if ((bp->b_flags & B_INUSE) == 0)
 285                         break;
 286         if (bp == &bufhead) {
 287                 bp = alloc_bufarea();
 288                 if (bp == NULL) {
 289                         errexit("deadlocked buffer pool\n");
 290                         /* NOTREACHED */
 291                 }
 292         }
 293         /*
 294          * We're at the same logical level as getblk(), so if there
 295          * are any errors, we'll let our caller handle them.
 296          */
 297         diskreads++;
 298         (void) getblk(bp, blkno, size);
 299
 300 foundit:
 301         totalreads++;
 302         bp->b_cnt++;
 303         /*
 304          * Move the buffer to head of linked list if it isn't
 305          * already there.
 306          */
 307         if (bufhead.b_next != bp) {
 308                 bp->b_prev->b_next = bp->b_next;
 309                 bp->b_next->b_prev = bp->b_prev;
 310                 bp->b_prev = &bufhead;
 311                 bp->b_next = bufhead.b_next;
 312                 bufhead.b_next->b_prev = bp;
 313                 bufhead.b_next = bp;
 314         }
 315         bp->b_flags |= B_INUSE;
 316         return (bp);
 317 }
 318
 319 void
 320 brelse(struct bufarea *bp)
 321 {
 322         bp->b_cnt--;
 323         if (bp->b_cnt == 0) {
 324                 bp->b_flags &= ~B_INUSE;
 325         }
 326 }
 327
 328 struct bufarea *
 329 getblk(struct bufarea *bp, daddr32_t blk, size_t size)
 330 {
 331         diskaddr_t dblk;
 332
 333         dblk = fsbtodb(&sblock, blk);
 334         if (bp->b_bno == dblk)
 335                 return (bp);
 336         flush(fswritefd, bp);
 337         bp->b_errs = fsck_bread(fsreadfd, bp->b_un.b_buf, dblk, size);
 338         bp->b_bno = dblk;
 339         bp->b_size = size;
 340         return (bp);
 341 }
 342
 343 void
 344 flush(int fd, struct bufarea *bp)
 345 {
 346         int i, j;
 347         caddr_t sip;
 348         long size;
 349
 350         if (!bp->b_dirty)
 351                 return;
 352
 353         /*
 354          * It's not our buf, so if there are errors, let whoever
 355          * acquired it deal with the actual problem.
 356          */
 357         if (bp->b_errs != 0)
 358                 pfatal("WRITING ZERO'ED BLOCK %lld TO DISK\n", bp->b_bno);
 359         bp->b_dirty = 0;
 360         bp->b_errs = 0;
 361         bwrite(fd, bp->b_un.b_buf, bp->b_bno, (long)bp->b_size);
 362         if (bp != &sblk) {
 363                 return;
 364         }
 365
 366         /*
 367          * We're flushing the superblock, so make sure all the
 368          * ancillary bits go out as well.
 369          */
 370         sip = (caddr_t)sblock.fs_u.fs_csp;
 371         for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) {
 372                 size = sblock.fs_cssize - i < sblock.fs_bsize ?
 373                     sblock.fs_cssize - i : sblock.fs_bsize;
 374                 bwrite(fswritefd, sip,
 375                     fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag),
 376                     size);
 377                 sip += size;
 378         }
 379 }
 380
 381 static void
 382 rwerror(caddr_t mesg, diskaddr_t blk, int rval)
 383 {
 384         int olderr = errno;
 385
 386         if (!preen)
 387                 (void) printf("\n");
 388
 389         if (rval == -1)
 390                 pfatal("CANNOT %s: DISK BLOCK %lld: %s",
 391                     mesg, blk, strerror(olderr));
 392         else
 393                 pfatal("CANNOT %s: DISK BLOCK %lld", mesg, blk);
 394
 395         if (reply("CONTINUE") == 0) {
 396                 exitstat = EXERRFATAL;
 397                 errexit("Program terminated\n");
 398         }
 399 }
 400
 401 void
 402 ckfini(void)
 403 {
 404         int64_t percentage;
 405
 406         if (fswritefd < 0)
 407                 return;
 408
 409         flush(fswritefd, &sblk);
 410         /*
 411          * Were we using a backup superblock?
 412          */
 413         if (havesb && sblk.b_bno != SBOFF / dev_bsize) {
 414                 if (preen || reply("UPDATE STANDARD SUPERBLOCK") == 1) {
 415                         sblk.b_bno = SBOFF / dev_bsize;
 416                         sbdirty();
 417                         flush(fswritefd, &sblk);
 418                 }
 419         }
 420         flush(fswritefd, &cgblk);
 421         if (cgblk.b_un.b_buf != NULL) {
 422                 free((void *)cgblk.b_un.b_buf);
 423                 cgblk.b_un.b_buf = NULL;
 424         }
 425         unbufinit();
 426         pbp = NULL;
 427         pdirbp = NULL;
 428         if (debug) {
 429                 /*
 430                  * Note that we only count cache-related reads.
 431                  * Anything that called fsck_bread() or getblk()
 432                  * directly are explicitly not cached, so they're not
 433                  * included here.
 434                  */
 435                 if (totalreads != 0)
 436                         percentage = diskreads * 100 / totalreads;
 437                 else
 438                         percentage = 0;
 439
 440                 (void) printf("cache missed %lld of %lld reads (%lld%%)\n",
 441                     (longlong_t)diskreads, (longlong_t)totalreads,
 442                     (longlong_t)percentage);
 443         }
 444
 445         (void) close(fsreadfd);
 446         (void) close(fswritefd);
 447         fsreadfd = -1;
 448         fswritefd = -1;
 449 }
 450
 451 int
 452 fsck_bread(int fd, caddr_t buf, diskaddr_t blk, size_t size)
 453 {
 454         caddr_t cp;
 455         int i;
 456         int errs;
 457         offset_t offset = ldbtob(blk);
 458         offset_t addr;
 459
 460         /*
 461          * In our universe, nothing exists before the superblock, so
 462          * just pretend it's always zeros.  This is the complement of
 463          * bwrite()'s ignoring write requests into that space.
 464          */
 465         if (blk < SBLOCK) {
 466                 if (debug)
 467                         (void) printf(
 468                             "WARNING: fsck_bread() passed blkno < %d (%lld)\n",
 469                             SBLOCK, (longlong_t)blk);
 470                 (void) memset(buf, 0, (size_t)size);
 471                 return (1);
 472         }
 473
 474         if (llseek(fd, offset, SEEK_SET) < 0) {
 475                 rwerror("SEEK", blk, -1);
 476         }
 477
 478         if ((i = read(fd, buf, size)) == size) {
 479                 return (0);
 480         }
 481         rwerror("READ", blk, i);
 482         if (llseek(fd, offset, SEEK_SET) < 0) {
 483                 rwerror("SEEK", blk, -1);
 484         }
 485         errs = 0;
 486         (void) memset(buf, 0, (size_t)size);
 487         pwarn("THE FOLLOWING SECTORS COULD NOT BE READ:");
 488         for (cp = buf, i = 0; i < btodb(size); i++, cp += DEV_BSIZE) {
 489                 addr = ldbtob(blk + i);
 490                 if (llseek(fd, addr, SEEK_SET) < 0 ||
 491                     read(fd, cp, (int)secsize) < 0) {
 492                         iscorrupt = 1;
 493                         (void) printf(" %llu", blk + (u_longlong_t)i);
 494                         errs++;
 495                 }
 496         }
 497         (void) printf("\n");
 498         return (errs);
 499 }
 500
 501 void
 502 bwrite(int fd, caddr_t buf, diskaddr_t blk, int64_t size)
 503 {
 504         int i;
 505         int n;
 506         caddr_t cp;
 507         offset_t offset = ldbtob(blk);
 508         offset_t addr;
 509
 510         if (fd < 0)
 511                 return;
 512         if (blk < SBLOCK) {
 513                 if (debug)
 514                         (void) printf(
 515                     "WARNING: Attempt to write illegal blkno %lld on %s\n",
 516                             (longlong_t)blk, devname);
 517                 return;
 518         }
 519         if (llseek(fd, offset, SEEK_SET) < 0) {
 520                 rwerror("SEEK", blk, -1);
 521         }
 522         if ((i = write(fd, buf, (int)size)) == size) {
 523                 fsmodified = 1;
 524                 return;
 525         }
 526         rwerror("WRITE", blk, i);
 527         if (llseek(fd, offset, SEEK_SET) < 0) {
 528                 rwerror("SEEK", blk, -1);
 529         }
 530         pwarn("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:");
 531         for (cp = buf, i = 0; i < btodb(size); i++, cp += DEV_BSIZE) {
 532                 n = 0;
 533                 addr = ldbtob(blk + i);
 534                 if (llseek(fd, addr, SEEK_SET) < 0 ||
 535                     (n = write(fd, cp, DEV_BSIZE)) < 0) {
 536                         iscorrupt = 1;
 537                         (void) printf(" %llu", blk + (u_longlong_t)i);
 538                 } else if (n > 0) {
 539                         fsmodified = 1;
 540                 }
 541
 542         }
 543         (void) printf("\n");
 544 }
 545
 546 /*
 547  * Allocates the specified number of contiguous fragments.
 548  */
 549 daddr32_t
 550 allocblk(int wantedfrags)
 551 {
 552         int block, leadfrag, tailfrag;
 553         daddr32_t selected;
 554         size_t size;
 555         struct bufarea *bp;
 556
 557         /*
 558          * It's arguable whether we should just fail, or instead
 559          * error out here.  Since we should only ever be asked for
 560          * a single fragment or an entire block (i.e., sblock.fs_frag),
 561          * we'll fail out because anything else means somebody
 562          * changed code without considering all of the ramifications.
 563          */
 564         if (wantedfrags <= 0 || wantedfrags > sblock.fs_frag) {
 565                 exitstat = EXERRFATAL;
 566                 errexit("allocblk() asked for %d frags.  "
 567                     "Legal range is 1 to %d",
 568                     wantedfrags, sblock.fs_frag);
 569         }
 570
 571         /*
 572          * For each filesystem block, look at every possible starting
 573          * offset within the block such that we can get the number of
 574          * contiguous fragments that we need.  This is a drastically
 575          * simplified version of the kernel's mapsearch() and alloc*().
 576          * It's also correspondingly slower.
 577          */
 578         for (block = 0; block < maxfsblock - sblock.fs_frag;
 579             block += sblock.fs_frag) {
 580                 for (leadfrag = 0; leadfrag <= sblock.fs_frag - wantedfrags;
 581                     leadfrag++) {
 582                         /*
 583                          * Is first fragment of candidate run available?
 584                          */
 585                         if (testbmap(block + leadfrag))
 586                                 continue;
 587                         /*
 588                          * Are the rest of them available?
 589                          */
 590                         for (tailfrag = 1; tailfrag < wantedfrags; tailfrag++)
 591                                 if (testbmap(block + leadfrag + tailfrag))
 592                                         break;
 593                         if (tailfrag < wantedfrags) {
 594                                 /*
 595                                  * No, skip the known-unusable run.
 596                                  */
 597                                 leadfrag += tailfrag;
 598                                 continue;
 599                         }
 600                         /*
 601                          * Found what we need, so claim them.
 602                          */
 603                         for (tailfrag = 0; tailfrag < wantedfrags; tailfrag++)
 604                                 setbmap(block + leadfrag + tailfrag);
 605                         n_blks += wantedfrags;
 606                         size = wantedfrags * sblock.fs_fsize;
 607                         selected = block + leadfrag;
 608                         bp = getdatablk(selected, size);
 609                         (void) memset(bp->b_un.b_buf, 0, size);
 610                         dirty(bp);
 611                         brelse(bp);
 612                         if (debug)
 613                                 (void) printf(
 614                     "allocblk: selected %d (in block %d), frags %d, size %d\n",
 615                                     selected, selected % sblock.fs_bsize,
 616                                     wantedfrags, (int)size);
 617                         return (selected);
 618                 }
 619         }
 620         return (0);
 621 }
 622
 623 /*
 624  * Free a previously allocated block
 625  */
 626 void
 627 freeblk(fsck_ino_t ino, daddr32_t blkno, int frags)
 628 {
 629         struct inodesc idesc;
 630
 631         if (debug)
 632                 (void) printf("debug: freeing %d fragments starting at %d\n",
 633                     frags, blkno);
 634
 635         init_inodesc(&idesc);
 636
 637         idesc.id_number = ino;
 638         idesc.id_blkno = blkno;
 639         idesc.id_numfrags = frags;
 640         idesc.id_truncto = -1;
 641
 642         /*
 643          * Nothing in the return status has any relevance to how
 644          * we're using pass4check(), so just ignore it.
 645          */
 646         (void) pass4check(&idesc);
 647 }
 648
 649 /*
 650  * Fill NAMEBUF with a path starting in CURDIR for INO.  Assumes
 651  * that the given buffer is at least MAXPATHLEN + 1 characters.
 652  */
 653 void
 654 getpathname(caddr_t namebuf, fsck_ino_t curdir, fsck_ino_t ino)
 655 {
 656         int len;
 657         caddr_t cp;
 658         struct dinode *dp;
 659         struct inodesc idesc;
 660         struct inoinfo *inp;
 661
 662         if (debug)
 663                 (void) printf("debug: getpathname(curdir %d, ino %d)\n",
 664                     curdir, ino);
 665
 666         if ((curdir == 0) || (!INO_IS_DVALID(curdir))) {
 667                 (void) strcpy(namebuf, "?");
 668                 return;
 669         }
 670
 671         if ((curdir == UFSROOTINO) && (ino == UFSROOTINO)) {
 672                 (void) strcpy(namebuf, "/");
 673                 return;
 674         }
 675
 676         init_inodesc(&idesc);
 677         idesc.id_type = DATA;
 678         cp = &namebuf[MAXPATHLEN - 1];
 679         *cp = '\0';
 680
 681         /*
 682          * In the case of extended attributes, our
 683          * parent won't necessarily be a directory, so just
 684          * return what we've found with a prefix indicating
 685          * that it's an XATTR.  Presumably our caller will
 686          * know what's going on and do something useful, like
 687          * work out the path of the parent and then combine
 688          * the two names.
 689          *
 690          * Can't use strcpy(), etc, because we've probably
 691          * already got some name information in the buffer and
 692          * the usual trailing \0 would lose it.
 693          */
 694         dp = ginode(curdir);
 695         if ((dp->di_mode & IFMT) == IFATTRDIR) {
 696                 idesc.id_number = curdir;
 697                 idesc.id_parent = ino;
 698                 idesc.id_func = findname;
 699                 idesc.id_name = namebuf;
 700                 idesc.id_fix = NOFIX;
 701                 if ((ckinode(dp, &idesc, CKI_TRAVERSE) & FOUND) == 0) {
 702                         *cp-- = '?';
 703                 }
 704
 705                 len = sizeof (XATTR_DIR_NAME) - 1;
 706                 cp -= len;
 707                 (void) memmove(cp, XATTR_DIR_NAME, len);
 708                 goto attrname;
 709         }
 710
 711         /*
 712          * If curdir == ino, need to get a handle on .. so we
 713          * can search it for ino's name.  Otherwise, just search
 714          * the given directory for ino.  Repeat until out of space
 715          * or a full path has been built.
 716          */
 717         if (curdir != ino) {
 718                 idesc.id_parent = curdir;
 719                 goto namelookup;
 720         }
 721         while (ino != UFSROOTINO && ino != 0) {
 722                 idesc.id_number = ino;
 723                 idesc.id_func = findino;
 724                 idesc.id_name = "..";
 725                 idesc.id_fix = NOFIX;
 726                 if ((ckinode(ginode(ino), &idesc, CKI_TRAVERSE) & FOUND) == 0) {
 727                         inp = getinoinfo(ino);
 728                         if ((inp == NULL) || (inp->i_parent == 0)) {
 729                                 break;
 730                         }
 731                         idesc.id_parent = inp->i_parent;
 732                 }
 733
 734                 /*
 735                  * To get this far, id_parent must have the inode
 736                  * number for `..' in it.  By definition, that's got
 737                  * to be a directory, so search it for the inode of
 738                  * interest.
 739                  */
 740 namelookup:
 741                 idesc.id_number = idesc.id_parent;
 742                 idesc.id_parent = ino;
 743                 idesc.id_func = findname;
 744                 idesc.id_name = namebuf;
 745                 idesc.id_fix = NOFIX;
 746                 if ((ckinode(ginode(idesc.id_number),
 747                     &idesc, CKI_TRAVERSE) & FOUND) == 0) {
 748                         break;
 749                 }
 750                 /*
 751                  * Prepend to what we've accumulated so far.  If
 752                  * there's not enough room for even one more path element
 753                  * (of the worst-case length), then bail out.
 754                  */
 755                 len = strlen(namebuf);
 756                 cp -= len;
 757                 if (cp < &namebuf[MAXNAMLEN])
 758                         break;
 759                 (void) memmove(cp, namebuf, len);
 760                 *--cp = '/';
 761
 762                 /*
 763                  * Corner case for a looped-to-itself directory.
 764                  */
 765                 if (ino == idesc.id_number)
 766                         break;
 767
 768                 /*
 769                  * Climb one level of the hierarchy.  In other words,
 770                  * the current .. becomes the inode to search for and
 771                  * its parent becomes the directory to search in.
 772                  */
 773                 ino = idesc.id_number;
 774         }
 775
 776         /*
 777          * If we hit a discontinuity in the hierarchy, indicate it by
 778          * prefixing the path so far with `?'.  Otherwise, the first
 779          * character will be `/' as a side-effect of the *--cp above.
 780          *
 781          * The special case is to handle the situation where we're
 782          * trying to look something up in UFSROOTINO, but didn't find
 783          * it.
 784          */
 785         if (ino != UFSROOTINO || cp == &namebuf[MAXPATHLEN - 1]) {
 786                 if (cp > namebuf)
 787                         cp--;
 788                 *cp = '?';
 789         }
 790
 791         /*
 792          * The invariants being used for buffer integrity are:
 793          * - namebuf[] is terminated with \0 before anything else
 794          * - cp is always <= the last element of namebuf[]
 795          * - the new path element is always stored at the
 796          *   beginning of namebuf[], and is no more than MAXNAMLEN-1
 797          *   characters
 798          * - cp is is decremented by the number of characters in
 799          *   the new path element
 800          * - if, after the above accounting for the new element's
 801          *   size, there is no longer enough room at the beginning of
 802          *   namebuf[] for a full-sized path element and a slash,
 803          *   terminate the loop.  cp is in the range
 804          *   &namebuf[0]..&namebuf[MAXNAMLEN - 1]
 805          */
 806 attrname:
 807         /* LINTED per the above discussion */
 808         (void) memmove(namebuf, cp, &namebuf[MAXPATHLEN] - cp);
 809 }
 810
 811 /* ARGSUSED */
 812 void
 813 catch(int dummy)
 814 {
 815         ckfini();
 816         exit(EXSIGNAL);
 817 }
 818
 819 /*
 820  * When preening, allow a single quit to signal
 821  * a special exit after filesystem checks complete
 822  * so that reboot sequence may be interrupted.
 823  */
 824 /* ARGSUSED */
 825 void
 826 catchquit(int dummy)
 827 {
 828         (void) printf("returning to single-user after filesystem check\n");
 829         interrupted = 1;
 830         (void) signal(SIGQUIT, SIG_DFL);
 831 }
 832
 833
 834 /*
 835  * determine whether an inode should be fixed.
 836  */
 837 NOTE(PRINTFLIKE(2))
 838 int
 839 dofix(struct inodesc *idesc, caddr_t msg, ...)
 840 {
 841         int rval = 0;
 842         va_list ap;
 843
 844         va_start(ap, msg);
 845
 846         switch (idesc->id_fix) {
 847
 848         case DONTKNOW:
 849                 if (idesc->id_type == DATA)
 850                         vdirerror(idesc->id_number, msg, ap);
 851                 else
 852                         vpwarn(msg, ap);
 853                 if (preen) {
 854                         idesc->id_fix = FIX;
 855                         rval = ALTERED;
 856                         break;
 857                 }
 858                 if (reply("SALVAGE") == 0) {
 859                         idesc->id_fix = NOFIX;
 860                         break;
 861                 }
 862                 idesc->id_fix = FIX;
 863                 rval = ALTERED;
 864                 break;
 865
 866         case FIX:
 867                 rval = ALTERED;
 868                 break;
 869
 870         case NOFIX:
 871                 break;
 872
 873         default:
 874                 errexit("UNKNOWN INODESC FIX MODE %d\n", (int)idesc->id_fix);
 875         }
 876
 877         va_end(ap);
 878         return (rval);
 879 }
 880
 881 NOTE(PRINTFLIKE(1))
 882 void
 883 errexit(caddr_t fmt, ...)
 884 {
 885         va_list ap;
 886
 887         va_start(ap, fmt);
 888         verrexit(fmt, ap);
 889         /* NOTREACHED */
 890 }
 891
 892 NOTE(PRINTFLIKE(1))
 893 static void
 894 verrexit(caddr_t fmt, va_list ap)
 895 {
 896         static int recursing = 0;
 897
 898         if (!recursing) {
 899                 recursing = 1;
 900                 if (errorlocked || iscorrupt) {
 901                         if (havesb && fswritefd >= 0) {
 902                                 sblock.fs_clean = FSBAD;
 903                                 sblock.fs_state = FSOKAY - (long)sblock.fs_time;
 904                                 sblock.fs_state = -sblock.fs_state;
 905                                 sbdirty();
 906                                 write_altsb(fswritefd);
 907                                 flush(fswritefd, &sblk);
 908                         }
 909                 }
 910                 ckfini();
 911                 recursing = 0;
 912         }
 913         (void) vprintf(fmt, ap);
 914         if (fmt[strlen(fmt) - 1] != '\n')
 915                 (void) putchar('\n');
 916         exit((exitstat != 0) ? exitstat : EXERRFATAL);
 917 }
 918
 919 /*
 920  * An unexpected inconsistency occured.
 921  * Die if preening, otherwise just print message and continue.
 922  */
 923 NOTE(PRINTFLIKE(1))
 924 void
 925 pfatal(caddr_t fmt, ...)
 926 {
 927         va_list ap;
 928
 929         va_start(ap, fmt);
 930         vpfatal(fmt, ap);
 931         va_end(ap);
 932 }
 933
 934 NOTE(PRINTFLIKE(1))
 935 static void
 936 vpfatal(caddr_t fmt, va_list ap)
 937 {
 938         if (preen) {
 939                 if (*fmt != '\0') {
 940                         (void) printf("%s: ", devname);
 941                         (void) vprintf(fmt, ap);
 942                         (void) printf("\n");
 943                 }
 944                 (void) printf(
 945                     "%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n",
 946                     devname);
 947                 if (havesb && fswritefd >= 0) {
 948                         sblock.fs_clean = FSBAD;
 949                         sblock.fs_state = -(FSOKAY - (long)sblock.fs_time);
 950                         sbdirty();
 951                         flush(fswritefd, &sblk);
 952                 }
 953                 /*
 954                  * We're exiting, it doesn't really matter that our
 955                  * caller doesn't get to call va_end().
 956                  */
 957                 if (exitstat == 0)
 958                         exitstat = EXFNDERRS;
 959                 exit(exitstat);
 960         }
 961         if (*fmt != '\0') {
 962                 (void) vprintf(fmt, ap);
 963         }
 964 }
 965
 966 /*
 967  * Pwarn just prints a message when not preening,
 968  * or a warning (preceded by filename) when preening.
 969  */
 970 NOTE(PRINTFLIKE(1))
 971 void
 972 pwarn(caddr_t fmt, ...)
 973 {
 974         va_list ap;
 975
 976         va_start(ap, fmt);
 977         vpwarn(fmt, ap);
 978         va_end(ap);
 979 }
 980
 981 NOTE(PRINTFLIKE(1))
 982 static void
 983 vpwarn(caddr_t fmt, va_list ap)
 984 {
 985         if (*fmt != '\0') {
 986                 if (preen)
 987                         (void) printf("%s: ", devname);
 988                 (void) vprintf(fmt, ap);
 989         }
 990 }
 991
 992 /*
 993  * Like sprintf(), except the buffer is dynamically allocated
 994  * and returned, instead of being passed in.  A pointer to the
 995  * buffer is stored in *RET, and FMT is the usual format string.
 996  * The number of characters in *RET (excluding the trailing \0,
 997  * to be consistent with the other *printf() routines) is returned.
 998  *
 999  * Solaris doesn't have asprintf(3C) yet, unfortunately.
1000  */
1001 NOTE(PRINTFLIKE(2))
1002 int
1003 fsck_asprintf(caddr_t *ret, caddr_t fmt, ...)
1004 {
1005         int len;
1006         caddr_t buffer;
1007         va_list ap;
1008
1009         va_start(ap, fmt);
1010         len = vsnprintf(NULL, 0, fmt, ap);
1011         va_end(ap);
1012
1013         buffer = malloc((len + 1) * sizeof (char));
1014         if (buffer == NULL) {
1015                 errexit("Out of memory in asprintf\n");
1016                 /* NOTREACHED */
1017         }
1018
1019         va_start(ap, fmt);
1020         (void) vsnprintf(buffer, len + 1, fmt, ap);
1021         va_end(ap);
1022
1023         *ret = buffer;
1024         return (len);
1025 }
1026
1027 /*
1028  * So we can take advantage of kernel routines in ufs_subr.c.
1029  */
1030 /* PRINTFLIKE2 */
1031 void
1032 cmn_err(int level, caddr_t fmt, ...)
1033 {
1034         va_list ap;
1035
1036         va_start(ap, fmt);
1037         if (level == CE_PANIC) {
1038                 (void) printf("INTERNAL INCONSISTENCY:");
1039                 verrexit(fmt, ap);
1040         } else {
1041                 (void) vprintf(fmt, ap);
1042         }
1043         va_end(ap);
1044 }
1045
1046 /*
1047  * Check to see if unraw version of name is already mounted.
1048  * Updates devstr with the device name if devstr is not NULL
1049  * and str_size is positive.
1050  */
1051 int
1052 mounted(caddr_t name, caddr_t devstr, size_t str_size)
1053 {
1054         int found;
1055         struct mnttab *mntent;
1056
1057         mntent = search_mnttab(NULL, unrawname(name), devstr, str_size);
1058         if (mntent == NULL)
1059                 return (M_NOMNT);
1060
1061         /*
1062          * It's mounted.  With or without write access?
1063          */
1064         if (hasmntopt(mntent, MNTOPT_RO) != 0)
1065                 found = M_RO;   /* mounted as RO */
1066         else
1067                 found = M_RW;   /* mounted as R/W */
1068
1069         if (mount_point == NULL) {
1070                 mount_point = strdup(mntent->mnt_mountp);
1071                 if (mount_point == NULL) {
1072                         errexit("fsck: memory allocation failure: %s",
1073                             strerror(errno));
1074                         /* NOTREACHED */
1075                 }
1076
1077                 if (devstr != NULL && str_size > 0)
1078                         (void) strlcpy(devstr, mntent->mnt_special, str_size);
1079         }
1080
1081         return (found);
1082 }
1083
1084 /*
1085  * Check to see if name corresponds to an entry in vfstab, and that the entry
1086  * does not have option ro.
1087  */
1088 int
1089 writable(caddr_t name)
1090 {
1091         int rw = 1;
1092         struct vfstab vfsbuf, vfskey;
1093         FILE *vfstab;
1094
1095         vfstab = fopen(VFSTAB, "r");
1096         if (vfstab == NULL) {
1097                 (void) printf("can't open %s\n", VFSTAB);
1098                 return (1);
1099         }
1100         (void) memset(&vfskey, 0, sizeof (vfskey));
1101         vfsnull(&vfskey);
1102         vfskey.vfs_special = unrawname(name);
1103         vfskey.vfs_fstype = MNTTYPE_UFS;
1104         if ((getvfsany(vfstab, &vfsbuf, &vfskey) == 0) &&
1105             (hasvfsopt(&vfsbuf, MNTOPT_RO))) {
1106                 rw = 0;
1107         }
1108         (void) fclose(vfstab);
1109         return (rw);
1110 }
1111
1112 /*
1113  * debugclean
1114  */
1115 static void
1116 debugclean(void)
1117 {
1118         if (!debug)
1119                 return;
1120
1121         if ((iscorrupt == 0) && (isdirty == 0))
1122                 return;
1123
1124         if ((sblock.fs_clean == FSSTABLE) || (sblock.fs_clean == FSCLEAN) ||
1125             (sblock.fs_clean == FSLOG && islog && islogok) ||
1126             ((FSOKAY == (sblock.fs_state + sblock.fs_time)) && !errorlocked))
1127                 return;
1128
1129         (void) printf("WARNING: inconsistencies detected on %s filesystem %s\n",
1130             sblock.fs_clean == FSSTABLE ? "stable" :
1131             sblock.fs_clean == FSLOG ? "logging" :
1132             sblock.fs_clean == FSFIX ? "being fixed" : "clean",
1133             devname);
1134 }
1135
1136 /*
1137  * updateclean
1138  *      Carefully and transparently update the clean flag.
1139  *
1140  * `iscorrupt' has to be in its final state before this is called.
1141  */
1142 int
1143 updateclean(void)
1144 {
1145         int freedlog = 0;
1146         struct bufarea cleanbuf;
1147         size_t size;
1148         ssize_t io_res;
1149         diskaddr_t bno;
1150         char fsclean;
1151         int fsreclaim;
1152         char fsflags;
1153         int flags_ok = 1;
1154         daddr32_t fslogbno;
1155         offset_t sblkoff;
1156         time_t t;
1157
1158         /*
1159          * debug stuff
1160          */
1161         debugclean();
1162
1163         /*
1164          * set fsclean to its appropriate value
1165          */
1166         fslogbno = sblock.fs_logbno;
1167         fsclean = sblock.fs_clean;
1168         fsreclaim = sblock.fs_reclaim;
1169         fsflags = sblock.fs_flags;
1170         if (FSOKAY != (sblock.fs_state + sblock.fs_time) && !errorlocked) {
1171                 fsclean = FSACTIVE;
1172         }
1173         /*
1174          * If ufs log is not okay, note that we need to clear it.
1175          */
1176         examinelog(NULL);
1177         if (fslogbno && !(islog && islogok)) {
1178                 fsclean = FSACTIVE;
1179                 fslogbno = 0;
1180         }
1181
1182         /*
1183          * if necessary, update fs_clean and fs_state
1184          */
1185         switch (fsclean) {
1186
1187         case FSACTIVE:
1188                 if (!iscorrupt) {
1189                         fsclean = FSSTABLE;
1190                         fsreclaim = 0;
1191                 }
1192                 break;
1193
1194         case FSCLEAN:
1195         case FSSTABLE:
1196                 if (iscorrupt) {
1197                         fsclean = FSACTIVE;
1198                 } else {
1199                         fsreclaim = 0;
1200                 }
1201                 break;
1202
1203         case FSLOG:
1204                 if (iscorrupt) {
1205                         fsclean = FSACTIVE;
1206                 } else if (!islog || fslogbno == 0) {
1207                         fsclean = FSSTABLE;
1208                         fsreclaim = 0;
1209                 } else if (fflag) {
1210                         fsreclaim = 0;
1211                 }
1212                 break;
1213
1214         case FSFIX:
1215                 fsclean = FSBAD;
1216                 if (errorlocked && !iscorrupt) {
1217                         fsclean = islog ? FSLOG : FSCLEAN;
1218                 }
1219                 break;
1220
1221         default:
1222                 if (iscorrupt) {
1223                         fsclean = FSACTIVE;
1224                 } else {
1225                         fsclean = FSSTABLE;
1226                         fsreclaim = 0;
1227                 }
1228         }
1229
1230         if (largefile_count > 0)
1231                 fsflags |= FSLARGEFILES;
1232         else
1233                 fsflags &= ~FSLARGEFILES;
1234
1235         /*
1236          * There can be two discrepencies here.  A) The superblock
1237          * shows no largefiles but we found some while scanning.
1238          * B) The superblock indicates the presence of largefiles,
1239          * but none are present.  Note that if preening, the superblock
1240          * is silently corrected.
1241          */
1242         if ((fsflags == FSLARGEFILES && sblock.fs_flags != FSLARGEFILES) ||
1243             (fsflags != FSLARGEFILES && sblock.fs_flags == FSLARGEFILES))
1244                 flags_ok = 0;
1245
1246         if (debug)
1247                 (void) printf(
1248                     "** largefile count=%d, fs.fs_flags=%x, flags_ok %d\n",
1249                     largefile_count, sblock.fs_flags, flags_ok);
1250
1251         /*
1252          * If fs is unchanged, do nothing.
1253          */
1254         if ((!isdirty) && (flags_ok) &&
1255             (fslogbno == sblock.fs_logbno) &&
1256             (sblock.fs_clean == fsclean) &&
1257             (sblock.fs_reclaim == fsreclaim) &&
1258             (FSOKAY == (sblock.fs_state + sblock.fs_time))) {
1259                 if (errorlocked) {
1260                         if (!do_errorlock(LOCKFS_ULOCK))
1261                                 pwarn(
1262                     "updateclean(unchanged): unlock(LOCKFS_ULOCK) failed\n");
1263                 }
1264                 return (freedlog);
1265         }
1266
1267         /*
1268          * if user allows, update superblock state
1269          */
1270         if (debug) {
1271                 (void) printf(
1272             "superblock: flags 0x%x logbno %d clean %d reclaim %d state 0x%x\n",
1273                     sblock.fs_flags, sblock.fs_logbno,
1274                     sblock.fs_clean, sblock.fs_reclaim,
1275                     sblock.fs_state + sblock.fs_time);
1276                 (void) printf(
1277             "calculated: flags 0x%x logbno %d clean %d reclaim %d state 0x%x\n",
1278                     fsflags, fslogbno, fsclean, fsreclaim, FSOKAY);
1279         }
1280         if (!isdirty && !preen && !rerun &&
1281             (reply("FILE SYSTEM STATE IN SUPERBLOCK IS WRONG; FIX") == 0))
1282                 return (freedlog);
1283
1284         (void) time(&t);
1285         sblock.fs_time = (time32_t)t;
1286         if (debug)
1287                 printclean();
1288
1289         if (sblock.fs_logbno != fslogbno) {
1290                 examinelog(&freelogblk);
1291                 freedlog++;
1292         }
1293
1294         sblock.fs_logbno = fslogbno;
1295         sblock.fs_clean = fsclean;
1296         sblock.fs_state = FSOKAY - (long)sblock.fs_time;
1297         sblock.fs_reclaim = fsreclaim;
1298         sblock.fs_flags = fsflags;
1299
1300         /*
1301          * if superblock can't be written, return
1302          */
1303         if (fswritefd < 0)
1304                 return (freedlog);
1305
1306         /*
1307          * Read private copy of superblock, update clean flag, and write it.
1308          */
1309         bno  = sblk.b_bno;
1310         size = sblk.b_size;
1311
1312         sblkoff = ldbtob(bno);
1313
1314         if ((cleanbuf.b_un.b_buf = malloc(size)) == NULL)
1315                 errexit("out of memory");
1316         if (llseek(fsreadfd, sblkoff, SEEK_SET) == -1) {
1317                 (void) printf("COULD NOT SEEK TO SUPERBLOCK AT %lld: %s\n",
1318                     (longlong_t)bno, strerror(errno));
1319                 goto out;
1320         }
1321
1322         if ((io_res = read(fsreadfd, cleanbuf.b_un.b_buf, size)) != size) {
1323                 report_io_prob("READ FROM", bno, size, io_res);
1324                 goto out;
1325         }
1326
1327         cleanbuf.b_un.b_fs->fs_logbno  = sblock.fs_logbno;
1328         cleanbuf.b_un.b_fs->fs_clean   = sblock.fs_clean;
1329         cleanbuf.b_un.b_fs->fs_state   = sblock.fs_state;
1330         cleanbuf.b_un.b_fs->fs_time    = sblock.fs_time;
1331         cleanbuf.b_un.b_fs->fs_reclaim = sblock.fs_reclaim;
1332         cleanbuf.b_un.b_fs->fs_flags   = sblock.fs_flags;
1333
1334         if (llseek(fswritefd, sblkoff, SEEK_SET) == -1) {
1335                 (void) printf("COULD NOT SEEK TO SUPERBLOCK AT %lld: %s\n",
1336                     (longlong_t)bno, strerror(errno));
1337                 goto out;
1338         }
1339
1340         if ((io_res = write(fswritefd, cleanbuf.b_un.b_buf, size)) != size) {
1341                 report_io_prob("WRITE TO", bno, size, io_res);
1342                 goto out;
1343         }
1344
1345         /*
1346          * 1208040
1347          * If we had to use -b to grab an alternate superblock, then we
1348          * likely had to do so because of unacceptable differences between
1349          * the main and alternate superblocks.  So, we had better update
1350          * the alternate superblock as well, or we'll just fail again
1351          * the next time we attempt to run fsck!
1352          */
1353         if (bflag != 0) {
1354                 write_altsb(fswritefd);
1355         }
1356
1357         if (errorlocked) {
1358                 if (!do_errorlock(LOCKFS_ULOCK))
1359                         pwarn(
1360                     "updateclean(changed): unlock(LOCKFS_ULOCK) failed\n");
1361         }
1362
1363 out:
1364         if (cleanbuf.b_un.b_buf != NULL) {
1365                 free((void *)cleanbuf.b_un.b_buf);
1366         }
1367
1368         return (freedlog);
1369 }
1370
1371 static void
1372 report_io_prob(caddr_t what, diskaddr_t bno, size_t expected, ssize_t failure)
1373 {
1374         if (failure < 0)
1375                 (void) printf("COULD NOT %s SUPERBLOCK AT %d: %s\n",
1376                     what, (int)bno, strerror(errno));
1377         else if (failure == 0)
1378                 (void) printf("COULD NOT %s SUPERBLOCK AT %d: EOF\n",
1379                     what, (int)bno);
1380         else
1381                 (void) printf("SHORT %s SUPERBLOCK AT %d: %u out of %u bytes\n",
1382                     what, (int)bno, (unsigned)failure, (unsigned)expected);
1383 }
1384
1385 /*
1386  * print out clean info
1387  */
1388 void
1389 printclean(void)
1390 {
1391         caddr_t s;
1392
1393         if (FSOKAY != (sblock.fs_state + sblock.fs_time) && !errorlocked)
1394                 s = "unknown";
1395         else
1396                 switch (sblock.fs_clean) {
1397
1398                 case FSACTIVE:
1399                         s = "active";
1400                         break;
1401
1402                 case FSCLEAN:
1403                         s = "clean";
1404                         break;
1405
1406                 case FSSTABLE:
1407                         s = "stable";
1408                         break;
1409
1410                 case FSLOG:
1411                         s = "logging";
1412                         break;
1413
1414                 case FSBAD:
1415                         s = "is bad";
1416                         break;
1417
1418                 case FSFIX:
1419                         s = "being fixed";
1420                         break;
1421
1422                 default:
1423                         s = "unknown";
1424                 }
1425
1426         if (preen)
1427                 pwarn("is %s.\n", s);
1428         else
1429                 (void) printf("** %s is %s.\n", devname, s);
1430 }
1431
1432 int
1433 is_errorlocked(caddr_t fs)
1434 {
1435         int             retval;
1436         struct stat64   statb;
1437         caddr_t         mountp;
1438         struct mnttab   *mntent;
1439
1440         retval = 0;
1441
1442         if (!fs)
1443                 return (0);
1444
1445         if (stat64(fs, &statb) < 0)
1446                 return (0);
1447
1448         if (S_ISDIR(statb.st_mode)) {
1449                 mountp = fs;
1450         } else if (S_ISBLK(statb.st_mode) || S_ISCHR(statb.st_mode)) {
1451                 mntent = search_mnttab(NULL, fs, NULL, 0);
1452                 if (mntent == NULL)
1453                         return (0);
1454                 mountp = mntent->mnt_mountp;
1455                 if (mountp == NULL) /* theoretically a can't-happen */
1456                         return (0);
1457         } else {
1458                 return (0);
1459         }
1460
1461         /*
1462          * From here on, must `goto out' to avoid memory leakage.
1463          */
1464
1465         if (elock_combuf == NULL)
1466                 elock_combuf =
1467                     (caddr_t)calloc(LOCKFS_MAXCOMMENTLEN, sizeof (char));
1468         else
1469                 elock_combuf =
1470                     (caddr_t)realloc(elock_combuf, LOCKFS_MAXCOMMENTLEN);
1471
1472         if (elock_combuf == NULL)
1473                 goto out;
1474
1475         (void) memset(elock_combuf, 0, LOCKFS_MAXCOMMENTLEN);
1476
1477         if (elock_mountp != NULL) {
1478                 free(elock_mountp);
1479         }
1480
1481         elock_mountp = strdup(mountp);
1482         if (elock_mountp == NULL)
1483                 goto out;
1484
1485         if (mountfd < 0) {
1486                 if ((mountfd = open64(mountp, O_RDONLY)) == -1)
1487                         goto out;
1488         }
1489
1490         if (lfp == NULL) {
1491                 lfp = (struct lockfs *)malloc(sizeof (struct lockfs));
1492                 if (lfp == NULL)
1493                         goto out;
1494                 (void) memset(lfp, 0, sizeof (struct lockfs));
1495         }
1496
1497         lfp->lf_comlen = LOCKFS_MAXCOMMENTLEN;
1498         lfp->lf_comment = elock_combuf;
1499
1500         if (ioctl(mountfd, _FIOLFSS, lfp) == -1)
1501                 goto out;
1502
1503         /*
1504          * lint believes that the ioctl() (or any other function
1505          * taking lfp as an arg) could free lfp.  This is not the
1506          * case, however.
1507          */
1508         retval = LOCKFS_IS_ELOCK(lfp);
1509
1510 out:
1511         return (retval);
1512 }
1513
1514 /*
1515  * Given a name which is known to be a directory, see if it appears
1516  * in the vfstab.  If so, return the entry's block (special) device
1517  * field via devstr.
1518  */
1519 int
1520 check_vfstab(caddr_t name, caddr_t devstr, size_t str_size)
1521 {
1522         return (NULL != search_vfstab(name, NULL, devstr, str_size));
1523 }
1524
1525 /*
1526  * Given a name which is known to be a directory, see if it appears
1527  * in the mnttab.  If so, return the entry's block (special) device
1528  * field via devstr.
1529  */
1530 int
1531 check_mnttab(caddr_t name, caddr_t devstr, size_t str_size)
1532 {
1533         return (NULL != search_mnttab(name, NULL, devstr, str_size));
1534 }
1535
1536 /*
1537  * Search for mount point and/or special device in the given file.
1538  * The first matching entry is returned.
1539  *
1540  * If an entry is found and str_size is greater than zero, then
1541  * up to size_str bytes of the special device name from the entry
1542  * are copied to devstr.
1543  */
1544
1545 #define SEARCH_TAB_BODY(st_type, st_file, st_mount, st_special, \
1546                         st_nuller, st_init, st_searcher) \
1547         { \
1548                 FILE *fp; \
1549                 struct st_type *retval = NULL; \
1550                 struct st_type key; \
1551                 static struct st_type buffer; \
1552                 \
1553                 /* LINTED ``assigned value never used'' */ \
1554                 st_nuller(&key); \
1555                 key.st_mount = mountp; \
1556                 key.st_special = special; \
1557                 st_init; \
1558                 \
1559                 if ((fp = fopen(st_file, "r")) == NULL) \
1560                         return (NULL); \
1561                 \
1562                 if (st_searcher(fp, &buffer, &key) == 0) { \
1563                         retval = &buffer; \
1564                         if (devstr != NULL && str_size > 0 && \
1565                             buffer.st_special != NULL) { \
1566                                 (void) strlcpy(devstr, buffer.st_special, \
1567                                     str_size); \
1568                         } \
1569                 } \
1570                 (void) fclose(fp); \
1571                 return (retval); \
1572         }
1573
1574 static struct vfstab *
1575 search_vfstab(caddr_t mountp, caddr_t special, caddr_t devstr, size_t str_size)
1576 SEARCH_TAB_BODY(vfstab, VFSTAB, vfs_mountp, vfs_special, vfsnull,
1577                 (retval = retval), getvfsany)
1578
1579 static struct mnttab *
1580 search_mnttab(caddr_t mountp, caddr_t special, caddr_t devstr, size_t str_size)
1581 SEARCH_TAB_BODY(mnttab, MNTTAB, mnt_mountp, mnt_special, mntnull,
1582                 (key.mnt_fstype = MNTTYPE_UFS), getmntany)
1583
1584 int
1585 do_errorlock(int lock_type)
1586 {
1587         caddr_t    buf;
1588         time_t     now;
1589         struct tm *local;
1590         int        rc;
1591
1592         if (elock_combuf == NULL)
1593                 errexit("do_errorlock(%s, %d): unallocated elock_combuf\n",
1594                     elock_mountp ? elock_mountp : "<null>",
1595                     lock_type);
1596
1597         if ((buf = (caddr_t)calloc(LOCKFS_MAXCOMMENTLEN, sizeof (char))) ==
1598             NULL) {
1599                 errexit("Couldn't alloc memory for temp. lock status buffer\n");
1600         }
1601         if (lfp == NULL) {
1602                 errexit("do_errorlock(%s, %d): lockfs status unallocated\n",
1603                     elock_mountp, lock_type);
1604         }
1605
1606         (void) memmove((void *)buf, (void *)elock_combuf,
1607             LOCKFS_MAXCOMMENTLEN-1);
1608
1609         switch (lock_type) {
1610         case LOCKFS_ELOCK:
1611                 /*
1612                  * Note that if it is error-locked, we won't get an
1613                  * error back if we try to error-lock it again.
1614                  */
1615                 if (time(&now) != (time_t)-1) {
1616                         if ((local = localtime(&now)) != NULL)
1617                                 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1618                     "%s [pid:%d fsck start:%02d/%02d/%02d %02d:%02d:%02d",
1619                                     elock_combuf, (int)pid,
1620                                     local->tm_mon + 1, local->tm_mday,
1621                                     (local->tm_year % 100), local->tm_hour,
1622                                     local->tm_min, local->tm_sec);
1623                         else
1624                                 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1625                                     "%s [fsck pid %d", elock_combuf, pid);
1626
1627                 } else {
1628                         (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1629                             "%s [fsck pid %d", elock_combuf, pid);
1630                 }
1631                 break;
1632
1633         case LOCKFS_ULOCK:
1634                 if (time(&now) != (time_t)-1) {
1635                         if ((local = localtime(&now)) != NULL) {
1636                                 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1637                                     "%s, done:%02d/%02d/%02d %02d:%02d:%02d]",
1638                                     elock_combuf,
1639                                     local->tm_mon + 1, local->tm_mday,
1640                                     (local->tm_year % 100), local->tm_hour,
1641                                     local->tm_min, local->tm_sec);
1642                         } else {
1643                                 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1644                                     "%s]", elock_combuf);
1645                         }
1646                 } else {
1647                         (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1648                             "%s]", elock_combuf);
1649                 }
1650                 if ((rc = ioctl(mountfd, _FIOLFSS, lfp)) == -1) {
1651                         pwarn("do_errorlock: unlock failed: %s\n",
1652                             strerror(errno));
1653                         goto out;
1654                 }
1655                 break;
1656
1657         default:
1658                 break;
1659         }
1660
1661         (void) memmove((void *)elock_combuf, (void *)buf,
1662             LOCKFS_MAXCOMMENTLEN - 1);
1663
1664         lfp->lf_lock = lock_type;
1665         lfp->lf_comlen = LOCKFS_MAXCOMMENTLEN;
1666         lfp->lf_comment = elock_combuf;
1667         lfp->lf_flags = 0;
1668         errno = 0;
1669
1670         if ((rc = ioctl(mountfd, _FIOLFS, lfp)) == -1) {
1671                 if (errno == EINVAL) {
1672                         pwarn("Another fsck active?\n");
1673                         iscorrupt = 0;  /* don't go away mad, just go away */
1674                 } else {
1675                         pwarn("do_errorlock(lock_type:%d, %s) failed: %s\n",
1676                             lock_type, elock_combuf, strerror(errno));
1677                 }
1678         }
1679 out:
1680         if (buf != NULL) {
1681                 free((void *)buf);
1682         }
1683
1684         return (rc != -1);
1685 }
1686
1687 /*
1688  * Shadow inode support.  To register a shadow with a client is to note
1689  * that an inode (the client) refers to the shadow.
1690  */
1691
1692 static struct shadowclients *
1693 newshadowclient(struct shadowclients *prev)
1694 {
1695         struct shadowclients *rc;
1696
1697         rc = (struct shadowclients *)malloc(sizeof (*rc));
1698         if (rc == NULL)
1699                 errexit("newshadowclient: cannot malloc shadow client");
1700         rc->next = prev;
1701         rc->nclients = 0;
1702
1703         rc->client = (fsck_ino_t *)malloc(sizeof (fsck_ino_t) *
1704             maxshadowclients);
1705         if (rc->client == NULL)
1706                 errexit("newshadowclient: cannot malloc client array");
1707         return (rc);
1708 }
1709
1710 void
1711 registershadowclient(fsck_ino_t shadow, fsck_ino_t client,
1712         struct shadowclientinfo **info)
1713 {
1714         struct shadowclientinfo *sci;
1715         struct shadowclients *scc;
1716
1717         /*
1718          * Already have a record for this shadow?
1719          */
1720         for (sci = *info; sci != NULL; sci = sci->next)
1721                 if (sci->shadow == shadow)
1722                         break;
1723         if (sci == NULL) {
1724                 /*
1725                  * It's a new shadow, add it to the list
1726                  */
1727                 sci = (struct shadowclientinfo *)malloc(sizeof (*sci));
1728                 if (sci == NULL)
1729                         errexit("registershadowclient: cannot malloc");
1730                 sci->next = *info;
1731                 *info = sci;
1732                 sci->shadow = shadow;
1733                 sci->totalClients = 0;
1734                 sci->clients = newshadowclient(NULL);
1735         }
1736
1737         sci->totalClients++;
1738         scc = sci->clients;
1739         if (scc->nclients >= maxshadowclients) {
1740                 scc = newshadowclient(sci->clients);
1741                 sci->clients = scc;
1742         }
1743
1744         scc->client[scc->nclients++] = client;
1745 }
1746
1747 /*
1748  * Locate and discard a shadow.
1749  */
1750 void
1751 clearshadow(fsck_ino_t shadow, struct shadowclientinfo **info)
1752 {
1753         struct shadowclientinfo *sci, *prev;
1754
1755         /*
1756          * Do we have a record for this shadow?
1757          */
1758         prev = NULL;
1759         for (sci = *info; sci != NULL; sci = sci->next) {
1760                 if (sci->shadow == shadow)
1761                         break;
1762                 prev = sci;
1763         }
1764
1765         if (sci != NULL) {
1766                 /*
1767                  * First, pull it off the list, since we know there
1768                  * shouldn't be any future references to this one.
1769                  */
1770                 if (prev == NULL)
1771                         *info = sci->next;
1772                 else
1773                         prev->next = sci->next;
1774                 deshadow(sci, clearattrref);
1775         }
1776 }
1777
1778 /*
1779  * Discard all memory used to track clients of a shadow.
1780  */
1781 void
1782 deshadow(struct shadowclientinfo *sci, void (*cb)(fsck_ino_t))
1783 {
1784         struct shadowclients *clients, *discard;
1785         int idx;
1786
1787         clients = sci->clients;
1788         while (clients != NULL) {
1789                 discard = clients;
1790                 clients = clients->next;
1791                 if (discard->client != NULL) {
1792                         if (cb != NULL) {
1793                                 for (idx = 0; idx < discard->nclients; idx++)
1794                                         (*cb)(discard->client[idx]);
1795                         }
1796                         free((void *)discard->client);
1797                 }
1798                 free((void *)discard);
1799         }
1800
1801         free((void *)sci);
1802 }
1803
1804 /*
1805  * Allocate more buffer as need arises but allocate one at a time.
1806  * This is done to make sure that fsck does not exit with error if it
1807  * needs more buffer to complete its task.
1808  */
1809 static struct bufarea *
1810 alloc_bufarea(void)
1811 {
1812         struct bufarea *newbp;
1813         caddr_t bufp;
1814
1815         bufp = malloc((unsigned int)sblock.fs_bsize);
1816         if (bufp == NULL)
1817                 return (NULL);
1818
1819         newbp = (struct bufarea *)malloc(sizeof (struct bufarea));
1820         if (newbp == NULL) {
1821                 free((void *)bufp);
1822                 return (NULL);
1823         }
1824
1825         initbarea(newbp);
1826         newbp->b_un.b_buf = bufp;
1827         newbp->b_prev = &bufhead;
1828         newbp->b_next = bufhead.b_next;
1829         bufhead.b_next->b_prev = newbp;
1830         bufhead.b_next = newbp;
1831         bufhead.b_size++;
1832         return (newbp);
1833 }
1834
1835 /*
1836  * We length-limit in both unrawname() and rawname() to avoid
1837  * overflowing our arrays or those of our naive, trusting callers.
1838  */
1839
1840 caddr_t
1841 unrawname(caddr_t name)
1842 {
1843         caddr_t dp;
1844         static char fullname[MAXPATHLEN + 1];
1845
1846         if ((dp = getfullblkname(name)) == NULL)
1847                 return ("");
1848
1849         (void) strlcpy(fullname, dp, sizeof (fullname));
1850         /*
1851          * Not reporting under debug, as the allocation isn't
1852          * reported by getfullblkname.  The idea is that we
1853          * produce balanced alloc/free instances.
1854          */
1855         free(dp);
1856
1857         return (fullname);
1858 }
1859
1860 caddr_t
1861 rawname(caddr_t name)
1862 {
1863         caddr_t dp;
1864         static char fullname[MAXPATHLEN + 1];
1865
1866         if ((dp = getfullrawname(name)) == NULL)
1867                 return ("");
1868
1869         (void) strlcpy(fullname, dp, sizeof (fullname));
1870         /*
1871          * Not reporting under debug, as the allocation isn't
1872          * reported by getfullblkname.  The idea is that we
1873          * produce balanced alloc/free instances.
1874          */
1875         free(dp);
1876
1877         return (fullname);
1878 }
1879
1880 /*
1881  * Make sure that a cg header looks at least moderately reasonable.
1882  * We want to be able to trust the contents enough to be able to use
1883  * the standard accessor macros.  So, besides looking at the obvious
1884  * such as the magic number, we verify that the offset field values
1885  * are properly aligned and not too big or small.
1886  *
1887  * Returns a NULL pointer if the cg is sane enough for our needs, else
1888  * a dynamically-allocated string describing all of its faults.
1889  */
1890 #define Append_Error(full, full_len, addition, addition_len) \
1891         if (full == NULL) { \
1892                 full = addition; \
1893                 full_len = addition_len; \
1894         } else { \
1895                 /* lint doesn't think realloc() understands NULLs */ \
1896                 full = realloc(full, full_len + addition_len + 1); \
1897                 if (full == NULL) { \
1898                         errexit("Out of memory in cg_sanity"); \
1899                         /* NOTREACHED */ \
1900                 } \
1901                 (void) strcpy(full + full_len, addition); \
1902                 full_len += addition_len; \
1903                 free(addition); \
1904         }
1905
1906 caddr_t
1907 cg_sanity(struct cg *cgp, int cgno)
1908 {
1909         caddr_t full_err;
1910         caddr_t this_err = NULL;
1911         int full_len, this_len;
1912         daddr32_t ndblk;
1913         daddr32_t exp_btotoff, exp_boff, exp_iusedoff;
1914         daddr32_t exp_freeoff, exp_nextfreeoff;
1915
1916         cg_constants(cgno, &exp_btotoff, &exp_boff, &exp_iusedoff,
1917             &exp_freeoff, &exp_nextfreeoff, &ndblk);
1918
1919         full_err = NULL;
1920         full_len = 0;
1921
1922         if (!cg_chkmagic(cgp)) {
1923                 this_len = fsck_asprintf(&this_err,
1924                     "BAD CG MAGIC NUMBER (0x%x should be 0x%x)\n",
1925                     cgp->cg_magic, CG_MAGIC);
1926                 Append_Error(full_err, full_len, this_err, this_len);
1927         }
1928
1929         if (cgp->cg_cgx != cgno) {
1930                 this_len = fsck_asprintf(&this_err,
1931                     "WRONG CG NUMBER (%d should be %d)\n",
1932                     cgp->cg_cgx, cgno);
1933                 Append_Error(full_err, full_len, this_err, this_len);
1934         }
1935
1936         if ((cgp->cg_btotoff & 3) != 0) {
1937                 this_len = fsck_asprintf(&this_err,
1938                     "BLOCK TOTALS OFFSET %d NOT FOUR-BYTE ALIGNED\n",
1939                     cgp->cg_btotoff);
1940                 Append_Error(full_err, full_len, this_err, this_len);
1941         }
1942
1943         if ((cgp->cg_boff & 1) != 0) {
1944                 this_len = fsck_asprintf(&this_err,
1945             "FREE BLOCK POSITIONS TABLE OFFSET %d NOT TWO-BYTE ALIGNED\n",
1946                     cgp->cg_boff);
1947                 Append_Error(full_err, full_len, this_err, this_len);
1948         }
1949
1950         if ((cgp->cg_ncyl < 1) || (cgp->cg_ncyl > sblock.fs_cpg)) {
1951                 if (cgp->cg_ncyl < 1) {
1952                         this_len = fsck_asprintf(&this_err,
1953             "IMPOSSIBLE NUMBER OF CYLINDERS IN GROUP (%d is less than 1)\n",
1954                             cgp->cg_ncyl);
1955                 } else {
1956                         this_len = fsck_asprintf(&this_err,
1957             "IMPOSSIBLE NUMBER OF CYLINDERS IN GROUP (%d is greater than %d)\n",
1958                             cgp->cg_ncyl, sblock.fs_cpg);
1959                 }
1960                 Append_Error(full_err, full_len, this_err, this_len);
1961         }
1962
1963         if (cgp->cg_niblk != sblock.fs_ipg) {
1964                 this_len = fsck_asprintf(&this_err,
1965                     "INCORRECT NUMBER OF INODES IN GROUP (%d should be %d)\n",
1966                     cgp->cg_niblk, sblock.fs_ipg);
1967                 Append_Error(full_err, full_len, this_err, this_len);
1968         }
1969
1970         if (cgp->cg_ndblk != ndblk) {
1971                 this_len = fsck_asprintf(&this_err,
1972             "INCORRECT NUMBER OF DATA BLOCKS IN GROUP (%d should be %d)\n",
1973                     cgp->cg_ndblk, ndblk);
1974                 Append_Error(full_err, full_len, this_err, this_len);
1975         }
1976
1977         if ((cgp->cg_rotor < 0) || (cgp->cg_rotor >= ndblk)) {
1978                 this_len = fsck_asprintf(&this_err,
1979                     "IMPOSSIBLE BLOCK ALLOCATION ROTOR POSITION "
1980                     "(%d should be at least 0 and less than %d)\n",
1981                     cgp->cg_rotor, ndblk);
1982                 Append_Error(full_err, full_len, this_err, this_len);
1983         }
1984
1985         if ((cgp->cg_frotor < 0) || (cgp->cg_frotor >= ndblk)) {
1986                 this_len = fsck_asprintf(&this_err,
1987                     "IMPOSSIBLE FRAGMENT ALLOCATION ROTOR POSITION "
1988                     "(%d should be at least 0 and less than %d)\n",
1989                     cgp->cg_frotor, ndblk);
1990                 Append_Error(full_err, full_len, this_err, this_len);
1991         }
1992
1993         if ((cgp->cg_irotor < 0) || (cgp->cg_irotor >= sblock.fs_ipg)) {
1994                 this_len = fsck_asprintf(&this_err,
1995                     "IMPOSSIBLE INODE ALLOCATION ROTOR POSITION "
1996                     "(%d should be at least 0 and less than %d)\n",
1997                     cgp->cg_irotor, sblock.fs_ipg);
1998                 Append_Error(full_err, full_len, this_err, this_len);
1999         }
2000
2001         if (cgp->cg_btotoff != exp_btotoff) {
2002                 this_len = fsck_asprintf(&this_err,
2003                     "INCORRECT BLOCK TOTALS OFFSET (%d should be %d)\n",
2004                     cgp->cg_btotoff, exp_btotoff);
2005                 Append_Error(full_err, full_len, this_err, this_len);
2006         }
2007
2008         if (cgp->cg_boff != exp_boff) {
2009                 this_len = fsck_asprintf(&this_err,
2010                     "BAD FREE BLOCK POSITIONS TABLE OFFSET (%d should %d)\n",
2011                     cgp->cg_boff, exp_boff);
2012                 Append_Error(full_err, full_len, this_err, this_len);
2013         }
2014
2015         if (cgp->cg_iusedoff != exp_iusedoff) {
2016                 this_len = fsck_asprintf(&this_err,
2017                     "INCORRECT USED INODE MAP OFFSET (%d should be %d)\n",
2018                     cgp->cg_iusedoff, exp_iusedoff);
2019                 Append_Error(full_err, full_len, this_err, this_len);
2020         }
2021
2022         if (cgp->cg_freeoff != exp_freeoff) {
2023                 this_len = fsck_asprintf(&this_err,
2024                     "INCORRECT FREE FRAGMENT MAP OFFSET (%d should be %d)\n",
2025                     cgp->cg_freeoff, exp_freeoff);
2026                 Append_Error(full_err, full_len, this_err, this_len);
2027         }
2028
2029         if (cgp->cg_nextfreeoff != exp_nextfreeoff) {
2030                 this_len = fsck_asprintf(&this_err,
2031                     "END OF HEADER POSITION INCORRECT (%d should be %d)\n",
2032                     cgp->cg_nextfreeoff, exp_nextfreeoff);
2033                 Append_Error(full_err, full_len, this_err, this_len);
2034         }
2035
2036         return (full_err);
2037 }
2038
2039 #undef  Append_Error
2040
2041 /*
2042  * This is taken from mkfs, and is what is used to come up with the
2043  * original values for a struct cg.  This implies that, since these
2044  * are all constants, recalculating them now should give us the same
2045  * thing as what's on disk.
2046  */
2047 static void
2048 cg_constants(int cgno, daddr32_t *btotoff, daddr32_t *boff,
2049         daddr32_t *iusedoff, daddr32_t *freeoff, daddr32_t *nextfreeoff,
2050         daddr32_t *ndblk)
2051 {
2052         daddr32_t cbase, dmax;
2053         struct cg *cgp;
2054
2055         (void) getblk(&cgblk, (diskaddr_t)cgtod(&sblock, cgno),
2056             (size_t)sblock.fs_cgsize);
2057         cgp = cgblk.b_un.b_cg;
2058
2059         cbase = cgbase(&sblock, cgno);
2060         dmax = cbase + sblock.fs_fpg;
2061         if (dmax > sblock.fs_size)
2062                 dmax = sblock.fs_size;
2063
2064         /* LINTED pointer difference won't overflow */
2065         *btotoff = &cgp->cg_space[0] - (uchar_t *)(&cgp->cg_link);
2066         *boff = *btotoff + sblock.fs_cpg * sizeof (daddr32_t);
2067         *iusedoff = *boff + sblock.fs_cpg * sblock.fs_nrpos * sizeof (int16_t);
2068         *freeoff = *iusedoff + howmany(sblock.fs_ipg, NBBY);
2069         *nextfreeoff = *freeoff +
2070             howmany(sblock.fs_cpg * sblock.fs_spc / NSPF(&sblock), NBBY);
2071         *ndblk = dmax - cbase;
2072 }
2073
2074 /*
2075  * Corrects all fields in the cg that can be done with the available
2076  * redundant data.
2077  */
2078 void
2079 fix_cg(struct cg *cgp, int cgno)
2080 {
2081         daddr32_t exp_btotoff, exp_boff, exp_iusedoff;
2082         daddr32_t exp_freeoff, exp_nextfreeoff;
2083         daddr32_t ndblk;
2084
2085         cg_constants(cgno, &exp_btotoff, &exp_boff, &exp_iusedoff,
2086             &exp_freeoff, &exp_nextfreeoff, &ndblk);
2087
2088         if (cgp->cg_cgx != cgno) {
2089                 cgp->cg_cgx = cgno;
2090         }
2091
2092         if ((cgp->cg_ncyl < 1) || (cgp->cg_ncyl > sblock.fs_cpg)) {
2093                 if (cgno == (sblock.fs_ncg - 1)) {
2094                         cgp->cg_ncyl = sblock.fs_ncyl -
2095                             (sblock.fs_cpg * cgno);
2096                 } else {
2097                         cgp->cg_ncyl = sblock.fs_cpg;
2098                 }
2099         }
2100
2101         if (cgp->cg_niblk != sblock.fs_ipg) {
2102                 /*
2103                  * This is not used by the kernel, so it's pretty
2104                  * harmless if it's wrong.
2105                  */
2106                 cgp->cg_niblk = sblock.fs_ipg;
2107         }
2108
2109         if (cgp->cg_ndblk != ndblk) {
2110                 cgp->cg_ndblk = ndblk;
2111         }
2112
2113         /*
2114          * For the rotors, any position's valid, so pick the one we know
2115          * will always exist.
2116          */
2117         if ((cgp->cg_rotor < 0) || (cgp->cg_rotor >= cgp->cg_ndblk)) {
2118                 cgp->cg_rotor = 0;
2119         }
2120
2121         if ((cgp->cg_frotor < 0) || (cgp->cg_frotor >= cgp->cg_ndblk)) {
2122                 cgp->cg_frotor = 0;
2123         }
2124
2125         if ((cgp->cg_irotor < 0) || (cgp->cg_irotor >= sblock.fs_ipg)) {
2126                 cgp->cg_irotor = 0;
2127         }
2128
2129         /*
2130          * For btotoff and boff, if they're misaligned they won't
2131          * match the expected values, so we're catching both cases
2132          * here.  Of course, if any of these are off, it seems likely
2133          * that the tables really won't be where we calculate they
2134          * should be anyway.
2135          */
2136         if (cgp->cg_btotoff != exp_btotoff) {
2137                 cgp->cg_btotoff = exp_btotoff;
2138         }
2139
2140         if (cgp->cg_boff != exp_boff) {
2141                 cgp->cg_boff = exp_boff;
2142         }
2143
2144         if (cgp->cg_iusedoff != exp_iusedoff) {
2145                 cgp->cg_iusedoff = exp_iusedoff;
2146         }
2147
2148         if (cgp->cg_freeoff != exp_freeoff) {
2149                 cgp->cg_freeoff = exp_freeoff;
2150         }
2151
2152         if (cgp->cg_nextfreeoff != exp_nextfreeoff) {
2153                 cgp->cg_nextfreeoff = exp_nextfreeoff;
2154         }
2155
2156         /*
2157          * Reset the magic, as we've recreated this cg, also
2158          * update the cg_time, as we're writing out the cg
2159          */
2160         cgp->cg_magic = CG_MAGIC;
2161         cgp->cg_time = time(NULL);
2162
2163         /*
2164          * We know there was at least one correctable problem,
2165          * or else we wouldn't have been called.  So instead of
2166          * marking the buffer dirty N times above, just do it
2167          * once here.
2168          */
2169         cgdirty();
2170 }
2171
2172 void
2173 examinelog(void (*cb)(daddr32_t))
2174 {
2175         struct bufarea *bp;
2176         extent_block_t *ebp;
2177         extent_t *ep;
2178         daddr32_t nfno, fno;
2179         int i;
2180         int j;
2181
2182         /*
2183          * Since ufs stores fs_logbno as blocks and MTBufs stores it as frags
2184          * we need to translate accordingly using logbtodb()
2185          */
2186
2187         if (logbtodb(&sblock, sblock.fs_logbno) < SBLOCK) {
2188                 if (debug) {
2189                         (void) printf("fs_logbno < SBLOCK: %ld < %ld\n" \
2190                             "Aborting log examination\n", \
2191                             logbtodb(&sblock, sblock.fs_logbno), SBLOCK);
2192                 }
2193                 return;
2194         }
2195
2196         /*
2197          * Read errors will return zeros, which will cause us
2198          * to do nothing harmful, so don't need to handle it.
2199          */
2200         bp = getdatablk(logbtofrag(&sblock, sblock.fs_logbno),
2201             (size_t)sblock.fs_bsize);
2202         ebp = (void *)bp->b_un.b_buf;
2203
2204         /*
2205          * Does it look like a log allocation table?
2206          */
2207         /* LINTED pointer cast is aligned */
2208         if (!log_checksum(&ebp->chksum, (int32_t *)bp->b_un.b_buf,
2209             sblock.fs_bsize))
2210                 return;
2211         if (ebp->type != LUFS_EXTENTS || ebp->nextents == 0)
2212                 return;
2213
2214         ep = &ebp->extents[0];
2215         for (i = 0; i < ebp->nextents; ++i, ++ep) {
2216                 fno = logbtofrag(&sblock, ep->pbno);
2217                 nfno = dbtofsb(&sblock, ep->nbno);
2218                 for (j = 0; j < nfno; ++j, ++fno) {
2219                         /*
2220                          * Invoke the callback first, so that pass1 can
2221                          * mark the log blocks in-use.  Then, if any
2222                          * subsequent pass over the log shows us that a
2223                          * block got freed (say, it was also claimed by
2224                          * an inode that we cleared), we can safely declare
2225                          * the log bad.
2226                          */
2227                         if (cb != NULL)
2228                                 (*cb)(fno);
2229                         if (!testbmap(fno))
2230                                 islogok = 0;
2231                 }
2232         }
2233         brelse(bp);
2234
2235         if (cb != NULL) {
2236                 fno = logbtofrag(&sblock, sblock.fs_logbno);
2237                 for (j = 0; j < sblock.fs_frag; ++j, ++fno)
2238                         (*cb)(fno);
2239         }
2240 }
2241
2242 static void
2243 freelogblk(daddr32_t frag)
2244 {
2245         freeblk(sblock.fs_logbno, frag, 1);
2246 }
2247
2248 caddr_t
2249 file_id(fsck_ino_t inum, mode_t mode)
2250 {
2251         static char name[MAXPATHLEN + 1];
2252
2253         if (lfdir == inum) {
2254                 return (lfname);
2255         }
2256
2257         if ((mode & IFMT) == IFDIR) {
2258                 (void) strcpy(name, "DIR");
2259         } else if ((mode & IFMT) == IFATTRDIR) {
2260                 (void) strcpy(name, "ATTR DIR");
2261         } else if ((mode & IFMT) == IFSHAD) {
2262                 (void) strcpy(name, "ACL");
2263         } else {
2264                 (void) strcpy(name, "FILE");
2265         }
2266
2267         return (name);
2268 }
2269
2270 /*
2271  * Simple initializer for inodesc structures, so users of only a few
2272  * fields don't have to worry about getting the right defaults for
2273  * everything out.
2274  */
2275 void
2276 init_inodesc(struct inodesc *idesc)
2277 {
2278         /*
2279          * Most fields should be zero, just hit the special cases.
2280          */
2281         (void) memset(idesc, 0, sizeof (struct inodesc));
2282         idesc->id_fix = DONTKNOW;
2283         idesc->id_lbn = -1;
2284         idesc->id_truncto = -1;
2285         idesc->id_firsthole = -1;
2286 }
2287
2288 /*
2289  * Compare routine for tsearch(C) to use on ino_t instances.
2290  */
2291 int
2292 ino_t_cmp(const void *left, const void *right)
2293 {
2294         const fsck_ino_t lino = (const fsck_ino_t)left;
2295         const fsck_ino_t rino = (const fsck_ino_t)right;
2296
2297         return (lino - rino);
2298 }
2299
2300 int
2301 cgisdirty(void)
2302 {
2303         return (cgblk.b_dirty);
2304 }
2305
2306 void
2307 cgflush(void)
2308 {
2309         flush(fswritefd, &cgblk);
2310 }
2311
2312 void
2313 dirty(struct bufarea *bp)
2314 {
2315         if (fswritefd < 0) {
2316                 /*
2317                  * No one should call dirty() in read only mode.
2318                  * But if one does, it's not fatal issue. Just warn them.
2319                  */
2320                 pwarn("WON'T SET DIRTY FLAG IN READ_ONLY MODE\n");
2321         } else {
2322                 (bp)->b_dirty = 1;
2323                 isdirty = 1;
2324         }
2325 }
2326
2327 void
2328 initbarea(struct bufarea *bp)
2329 {
2330         (bp)->b_dirty = 0;
2331         (bp)->b_bno = (diskaddr_t)-1LL;
2332         (bp)->b_flags = 0;
2333         (bp)->b_cnt = 0;
2334         (bp)->b_errs = 0;
2335 }
2336
2337 /*
2338  * Partition-sizing routines adapted from ../newfs/newfs.c.
2339  * Needed because calcsb() needs to use mkfs to work out what the
2340  * superblock should be, and mkfs insists on being told how many
2341  * sectors to use.
2342  *
2343  * Error handling assumes we're never called while preening.
2344  *
2345  * XXX This should be extracted into a ../ufslib.{c,h},
2346  *     in the same spirit to ../../fslib.{c,h}.  Once that is
2347  *     done, both fsck and newfs should be modified to link
2348  *     against it.
2349  */
2350
2351 static int label_type;
2352
2353 #define LABEL_TYPE_VTOC         1
2354 #define LABEL_TYPE_EFI          2
2355 #define LABEL_TYPE_OTHER        3
2356
2357 #define MB                      (1024 * 1024)
2358 #define SECTORS_PER_TERABYTE    (1LL << 31)
2359 #define FS_SIZE_UPPER_LIMIT     0x100000000000LL
2360
2361 diskaddr_t
2362 getdisksize(caddr_t disk, int fd)
2363 {
2364         int rpm;
2365         struct dk_geom g;
2366         struct dk_cinfo ci;
2367         diskaddr_t actual_size;
2368
2369         /*
2370          * get_device_size() determines the actual size of the
2371          * device, and also the disk's attributes, such as geometry.
2372          */
2373         actual_size = get_device_size(fd, disk);
2374
2375         if (label_type == LABEL_TYPE_VTOC) {
2376                 if (ioctl(fd, DKIOCGGEOM, &g)) {
2377                         pwarn("%s: Unable to read Disk geometry", disk);
2378                         return (0);
2379                 }
2380                 if (sblock.fs_nsect == 0)
2381                         sblock.fs_nsect = g.dkg_nsect;
2382                 if (sblock.fs_ntrak == 0)
2383                         sblock.fs_ntrak = g.dkg_nhead;
2384                 if (sblock.fs_rps == 0) {
2385                         rpm = ((int)g.dkg_rpm <= 0) ? 3600: g.dkg_rpm;
2386                         sblock.fs_rps = rpm / 60;
2387                 }
2388         }
2389
2390         if (sblock.fs_bsize == 0)
2391                 sblock.fs_bsize = MAXBSIZE;
2392
2393         /*
2394          * Adjust maxcontig by the device's maxtransfer. If maxtransfer
2395          * information is not available, default to the min of a MB and
2396          * maxphys.
2397          */
2398         if (sblock.fs_maxcontig == -1 && ioctl(fd, DKIOCINFO, &ci) == 0) {
2399                 sblock.fs_maxcontig = ci.dki_maxtransfer * DEV_BSIZE;
2400                 if (sblock.fs_maxcontig < 0) {
2401                         int gotit, maxphys;
2402
2403                         gotit = fsgetmaxphys(&maxphys, NULL);
2404
2405                         /*
2406                          * If we cannot get the maxphys value, default
2407                          * to ufs_maxmaxphys (MB).
2408                          */
2409                         if (gotit) {
2410                                 sblock.fs_maxcontig = MIN(maxphys, MB);
2411                         } else {
2412                                 sblock.fs_maxcontig = MB;
2413                         }
2414                 }
2415                 sblock.fs_maxcontig /= sblock.fs_bsize;
2416         }
2417
2418         return (actual_size);
2419 }
2420
2421 /*
2422  * Figure out how big the partition we're dealing with is.
2423  */
2424 static diskaddr_t
2425 get_device_size(int fd, caddr_t name)
2426 {
2427         struct extvtoc vtoc;
2428         struct dk_gpt *efi_vtoc;
2429         diskaddr_t slicesize = 0;
2430
2431         int index = read_extvtoc(fd, &vtoc);
2432
2433         if (index >= 0) {
2434                 label_type = LABEL_TYPE_VTOC;
2435         } else {
2436                 if (index == VT_ENOTSUP || index == VT_ERROR) {
2437                         /* it might be an EFI label */
2438                         index = efi_alloc_and_read(fd, &efi_vtoc);
2439                         if (index >= 0)
2440                                 label_type = LABEL_TYPE_EFI;
2441                 }
2442         }
2443
2444         if (index < 0) {
2445                 /*
2446                  * Since both attempts to read the label failed, we're
2447                  * going to fall back to a brute force approach to
2448                  * determining the device's size:  see how far out we can
2449                  * perform reads on the device.
2450                  */
2451
2452                 slicesize = brute_force_get_device_size(fd);
2453                 if (slicesize == 0) {
2454                         switch (index) {
2455                         case VT_ERROR:
2456                                 pwarn("%s: %s\n", name, strerror(errno));
2457                                 break;
2458                         case VT_EIO:
2459                                 pwarn("%s: I/O error accessing VTOC", name);
2460                                 break;
2461                         case VT_EINVAL:
2462                                 pwarn("%s: Invalid field in VTOC", name);
2463                                 break;
2464                         default:
2465                                 pwarn("%s: unknown error %d accessing VTOC",
2466                                     name, index);
2467                                 break;
2468                         }
2469                         return (0);
2470                 } else {
2471                         label_type = LABEL_TYPE_OTHER;
2472                 }
2473         }
2474
2475         if (label_type == LABEL_TYPE_EFI) {
2476                 slicesize = efi_vtoc->efi_parts[index].p_size;
2477                 efi_free(efi_vtoc);
2478         } else if (label_type == LABEL_TYPE_VTOC) {
2479                 slicesize = vtoc.v_part[index].p_size;
2480         }
2481
2482         return (slicesize);
2483 }
2484
2485 /*
2486  * brute_force_get_device_size
2487  *
2488  * Determine the size of the device by seeing how far we can
2489  * read.  Doing an llseek( , , SEEK_END) would probably work
2490  * in most cases, but we've seen at least one third-party driver
2491  * which doesn't correctly support the SEEK_END option when the
2492  * the device is greater than a terabyte.
2493  */
2494
2495 static diskaddr_t
2496 brute_force_get_device_size(int fd)
2497 {
2498         diskaddr_t      min_fail = 0;
2499         diskaddr_t      max_succeed = 0;
2500         diskaddr_t      cur_db_off;
2501         char            buf[DEV_BSIZE];
2502
2503         /*
2504          * First, see if we can read the device at all, just to
2505          * eliminate errors that have nothing to do with the
2506          * device's size.
2507          */
2508
2509         if (((llseek(fd, (offset_t)0, SEEK_SET)) == -1) ||
2510             ((read(fd, buf, DEV_BSIZE)) == -1))
2511                 return (0);  /* can't determine size */
2512
2513         /*
2514          * Now, go sequentially through the multiples of 4TB
2515          * to find the first read that fails (this isn't strictly
2516          * the most efficient way to find the actual size if the
2517          * size really could be anything between 0 and 2**64 bytes.
2518          * We expect the sizes to be less than 16 TB for some time,
2519          * so why do a bunch of reads that are larger than that?
2520          * However, this algorithm *will* work for sizes of greater
2521          * than 16 TB.  We're just not optimizing for those sizes.)
2522          */
2523
2524         /*
2525          * XXX lint uses 32-bit arithmetic for doing flow analysis.
2526          * We're using > 32-bit constants here.  Therefore, its flow
2527          * analysis is wrong.  For the time being, ignore complaints
2528          * from it about the body of the for() being unreached.
2529          */
2530         for (cur_db_off = SECTORS_PER_TERABYTE * 4;
2531             (min_fail == 0) && (cur_db_off < FS_SIZE_UPPER_LIMIT);
2532             cur_db_off += 4 * SECTORS_PER_TERABYTE) {
2533                 if ((llseek(fd, (offset_t)(cur_db_off * DEV_BSIZE),
2534                     SEEK_SET) == -1) ||
2535                     (read(fd, buf, DEV_BSIZE) != DEV_BSIZE))
2536                         min_fail = cur_db_off;
2537                 else
2538                         max_succeed = cur_db_off;
2539         }
2540
2541         /*
2542          * XXX Same lint flow analysis problem as above.
2543          */
2544         if (min_fail == 0)
2545                 return (0);
2546
2547         /*
2548          * We now know that the size of the device is less than
2549          * min_fail and greater than or equal to max_succeed.  Now
2550          * keep splitting the difference until the actual size in
2551          * sectors in known.  We also know that the difference
2552          * between max_succeed and min_fail at this time is
2553          * 4 * SECTORS_PER_TERABYTE, which is a power of two, which
2554          * simplifies the math below.
2555          */
2556
2557         while (min_fail - max_succeed > 1) {
2558                 cur_db_off = max_succeed + (min_fail - max_succeed)/2;
2559                 if (((llseek(fd, (offset_t)(cur_db_off * DEV_BSIZE),
2560                     SEEK_SET)) == -1) ||
2561                     ((read(fd, buf, DEV_BSIZE)) != DEV_BSIZE))
2562                         min_fail = cur_db_off;
2563                 else
2564                         max_succeed = cur_db_off;
2565         }
2566
2567         /* the size is the last successfully read sector offset plus one */
2568         return (max_succeed + 1);
2569 }
2570
2571 static void
2572 vfileerror(fsck_ino_t cwd, fsck_ino_t ino, caddr_t fmt, va_list ap)
2573 {
2574         struct dinode *dp;
2575         char pathbuf[MAXPATHLEN + 1];
2576
2577         vpwarn(fmt, ap);
2578         (void) putchar(' ');
2579         pinode(ino);
2580         (void) printf("\n");
2581         getpathname(pathbuf, cwd, ino);
2582         if (ino < UFSROOTINO || ino > maxino) {
2583                 pfatal("NAME=%s\n", pathbuf);
2584                 return;
2585         }
2586         dp = ginode(ino);
2587         if (ftypeok(dp))
2588                 pfatal("%s=%s\n", file_id(ino, dp->di_mode), pathbuf);
2589         else
2590                 pfatal("NAME=%s\n", pathbuf);
2591 }
2592
2593 void
2594 direrror(fsck_ino_t ino, caddr_t fmt, ...)
2595 {
2596         va_list ap;
2597
2598         va_start(ap, fmt);
2599         vfileerror(ino, ino, fmt, ap);
2600         va_end(ap);
2601 }
2602
2603 static void
2604 vdirerror(fsck_ino_t ino, caddr_t fmt, va_list ap)
2605 {
2606         vfileerror(ino, ino, fmt, ap);
2607 }
2608
2609 void
2610 fileerror(fsck_ino_t cwd, fsck_ino_t ino, caddr_t fmt, ...)
2611 {
2612         va_list ap;
2613
2614         va_start(ap, fmt);
2615         vfileerror(cwd, ino, fmt, ap);
2616         va_end(ap);
2617 }
2618
2619 /*
2620  * Adds the given inode to the orphaned-directories list, limbo_dirs.
2621  * Assumes that the caller has set INCLEAR in the inode's statemap[]
2622  * entry.
2623  *
2624  * With INCLEAR set, the inode will get ignored by passes 2 and 3,
2625  * meaning it's effectively an orphan.  It needs to be noted now, so
2626  * it will be remembered in pass 4.
2627  */
2628
2629 void
2630 add_orphan_dir(fsck_ino_t ino)
2631 {
2632         if (tsearch((void *)ino, &limbo_dirs, ino_t_cmp) == NULL)
2633                 errexit("add_orphan_dir: out of memory");
2634 }
2635
2636 /*
2637  * Remove an inode from the orphaned-directories list, presumably
2638  * because it's been cleared.
2639  */
2640 void
2641 remove_orphan_dir(fsck_ino_t ino)
2642 {
2643         (void) tdelete((void *)ino, &limbo_dirs, ino_t_cmp);
2644 }
2645
2646 /*
2647  * log_setsum() and log_checksum() are equivalent to lufs.c:setsum()
2648  * and lufs.c:checksum().
2649  */
2650 static void
2651 log_setsum(int32_t *sp, int32_t *lp, int nb)
2652 {
2653         int32_t csum = 0;
2654
2655         *sp = 0;
2656         nb /= sizeof (int32_t);
2657         while (nb--)
2658                 csum += *lp++;
2659         *sp = csum;
2660 }
2661
2662 static int
2663 log_checksum(int32_t *sp, int32_t *lp, int nb)
2664 {
2665         int32_t ssum = *sp;
2666
2667         log_setsum(sp, lp, nb);
2668         if (ssum != *sp) {
2669                 *sp = ssum;
2670                 return (0);
2671         }
2672         return (1);
2673 }