dmake: do not set MAKEFLAGS=k
[unleashed/tickless.git] / usr / src / cmd / fs.d / ufs / fsck / utilities.c
blob6092018b0e4ee855ab95dd19dab430a920078079
1 /*
2 * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2016 by Delphix. All rights reserved.
4 */
6 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
7 /* All Rights Reserved */
9 /*
10 * Copyright (c) 1980, 1986, 1990 The Regents of the University of California.
11 * All rights reserved.
13 * Redistribution and use in source and binary forms are permitted
14 * provided that: (1) source distributions retain this entire copyright
15 * notice and comment, and (2) distributions including binaries display
16 * the following acknowledgement: ``This product includes software
17 * developed by the University of California, Berkeley and its contributors''
18 * in the documentation or other materials provided with the distribution
19 * and in all advertising materials mentioning features or use of this
20 * software. Neither the name of the University nor the names of its
21 * contributors may be used to endorse or promote products derived
22 * from this software without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
24 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
25 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <unistd.h>
31 #include <stdarg.h>
32 #include <libadm.h>
33 #include <note.h>
34 #include <sys/param.h>
35 #include <sys/types.h>
36 #include <sys/mntent.h>
37 #include <sys/filio.h>
38 #include <sys/fs/ufs_fs.h>
39 #include <sys/vnode.h>
40 #include <sys/fs/ufs_acl.h>
41 #include <sys/fs/ufs_inode.h>
42 #include <sys/fs/ufs_log.h>
43 #define _KERNEL
44 #include <sys/fs/ufs_fsdir.h>
45 #undef _KERNEL
46 #include <sys/mnttab.h>
47 #include <sys/types.h>
48 #include <sys/stat.h>
49 #include <fcntl.h>
50 #include <signal.h>
51 #include <string.h>
52 #include <ctype.h>
53 #include <sys/vfstab.h>
54 #include <sys/lockfs.h>
55 #include <errno.h>
56 #include <sys/cmn_err.h>
57 #include <sys/dkio.h>
58 #include <sys/vtoc.h>
59 #include <sys/efi_partition.h>
60 #include <fslib.h>
61 #include <inttypes.h>
62 #include "fsck.h"
64 caddr_t mount_point = NULL;
66 static int64_t diskreads, totalreads; /* Disk cache statistics */
68 static int log_checksum(int32_t *, int32_t *, int);
69 static void vdirerror(fsck_ino_t, caddr_t, va_list);
70 static struct mnttab *search_mnttab(caddr_t, caddr_t, caddr_t, size_t);
71 static struct vfstab *search_vfstab(caddr_t, caddr_t, caddr_t, size_t);
72 static void vpwarn(caddr_t, va_list);
73 static int getaline(FILE *, caddr_t, int);
74 static struct bufarea *alloc_bufarea(void);
75 static void rwerror(caddr_t, diskaddr_t, int rval);
76 static void debugclean(void);
77 static void report_io_prob(caddr_t, diskaddr_t, size_t, ssize_t);
78 static void freelogblk(daddr32_t);
79 static void verrexit(caddr_t, va_list);
80 static void vpfatal(caddr_t, va_list);
81 static diskaddr_t get_device_size(int, caddr_t);
82 static diskaddr_t brute_force_get_device_size(int);
83 static void cg_constants(int, daddr32_t *, daddr32_t *, daddr32_t *,
84 daddr32_t *, daddr32_t *, daddr32_t *);
86 int
87 ftypeok(struct dinode *dp)
89 switch (dp->di_mode & IFMT) {
91 case IFDIR:
92 case IFREG:
93 case IFBLK:
94 case IFCHR:
95 case IFLNK:
96 case IFSOCK:
97 case IFIFO:
98 case IFSHAD:
99 case IFATTRDIR:
100 return (1);
102 default:
103 if (debug)
104 (void) printf("bad file type 0%o\n", dp->di_mode);
105 return (0);
110 acltypeok(struct dinode *dp)
112 if (CHECK_ACL_ALLOWED(dp->di_mode & IFMT))
113 return (1);
115 if (debug)
116 (void) printf("bad file type for acl I=%d: 0%o\n",
117 dp->di_shadow, dp->di_mode);
118 return (0);
121 NOTE(PRINTFLIKE(1))
123 reply(caddr_t fmt, ...)
125 va_list ap;
126 char line[80];
128 if (preen)
129 pfatal("INTERNAL ERROR: GOT TO reply() in preen mode");
131 if (mflag) {
133 * We don't know what's going on, so don't potentially
134 * make things worse by having errexit() write stuff
135 * out to disk.
137 (void) printf(
138 "\n%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n",
139 devname);
140 exit(EXERRFATAL);
143 va_start(ap, fmt);
144 (void) putchar('\n');
145 (void) vprintf(fmt, ap);
146 (void) putchar('?');
147 (void) putchar(' ');
148 va_end(ap);
150 if (nflag || fswritefd < 0) {
151 (void) printf(" no\n\n");
152 return (0);
154 if (yflag) {
155 (void) printf(" yes\n\n");
156 return (1);
158 (void) fflush(stdout);
159 if (getaline(stdin, line, sizeof (line)) == EOF)
160 errexit("\n");
161 (void) printf("\n");
162 if (line[0] == 'y' || line[0] == 'Y') {
163 return (1);
164 } else {
165 return (0);
170 getaline(FILE *fp, caddr_t loc, int maxlen)
172 int n;
173 caddr_t p, lastloc;
175 p = loc;
176 lastloc = &p[maxlen-1];
177 while ((n = getc(fp)) != '\n') {
178 if (n == EOF)
179 return (EOF);
180 if (!isspace(n) && p < lastloc)
181 *p++ = (char)n;
183 *p = '\0';
184 /* LINTED pointer difference won't overflow */
185 return (p - loc);
189 * Malloc buffers and set up cache.
191 void
192 bufinit(void)
194 struct bufarea *bp;
195 int bufcnt, i;
196 caddr_t bufp;
198 bufp = malloc((size_t)sblock.fs_bsize);
199 if (bufp == NULL)
200 goto nomem;
201 initbarea(&cgblk);
202 cgblk.b_un.b_buf = bufp;
203 bufhead.b_next = bufhead.b_prev = &bufhead;
204 bufcnt = MAXBUFSPACE / sblock.fs_bsize;
205 if (bufcnt < MINBUFS)
206 bufcnt = MINBUFS;
207 for (i = 0; i < bufcnt; i++) {
208 bp = (struct bufarea *)malloc(sizeof (struct bufarea));
209 if (bp == NULL) {
210 if (i >= MINBUFS)
211 goto noalloc;
212 goto nomem;
215 bufp = malloc((size_t)sblock.fs_bsize);
216 if (bufp == NULL) {
217 free((void *)bp);
218 if (i >= MINBUFS)
219 goto noalloc;
220 goto nomem;
222 initbarea(bp);
223 bp->b_un.b_buf = bufp;
224 bp->b_prev = &bufhead;
225 bp->b_next = bufhead.b_next;
226 bufhead.b_next->b_prev = bp;
227 bufhead.b_next = bp;
229 noalloc:
230 bufhead.b_size = i; /* save number of buffers */
231 pbp = pdirbp = NULL;
232 return;
234 nomem:
235 errexit("cannot allocate buffer pool\n");
236 /* NOTREACHED */
240 * Undo a bufinit().
242 void
243 unbufinit(void)
245 int cnt;
246 struct bufarea *bp, *nbp;
248 cnt = 0;
249 for (bp = bufhead.b_prev; bp != NULL && bp != &bufhead; bp = nbp) {
250 cnt++;
251 flush(fswritefd, bp);
252 nbp = bp->b_prev;
254 * We're discarding the entire chain, so this isn't
255 * technically necessary. However, it doesn't hurt
256 * and lint's data flow analysis is much happier
257 * (this prevents it from thinking there's a chance
258 * of our using memory elsewhere after it's been released).
260 nbp->b_next = bp->b_next;
261 bp->b_next->b_prev = nbp;
262 free((void *)bp->b_un.b_buf);
263 free((void *)bp);
266 if (bufhead.b_size != cnt)
267 errexit("Panic: cache lost %d buffers\n",
268 bufhead.b_size - cnt);
272 * Manage a cache of directory blocks.
274 struct bufarea *
275 getdatablk(daddr32_t blkno, size_t size)
277 struct bufarea *bp;
279 for (bp = bufhead.b_next; bp != &bufhead; bp = bp->b_next)
280 if (bp->b_bno == fsbtodb(&sblock, blkno)) {
281 goto foundit;
283 for (bp = bufhead.b_prev; bp != &bufhead; bp = bp->b_prev)
284 if ((bp->b_flags & B_INUSE) == 0)
285 break;
286 if (bp == &bufhead) {
287 bp = alloc_bufarea();
288 if (bp == NULL) {
289 errexit("deadlocked buffer pool\n");
290 /* NOTREACHED */
294 * We're at the same logical level as getblk(), so if there
295 * are any errors, we'll let our caller handle them.
297 diskreads++;
298 (void) getblk(bp, blkno, size);
300 foundit:
301 totalreads++;
302 bp->b_cnt++;
304 * Move the buffer to head of linked list if it isn't
305 * already there.
307 if (bufhead.b_next != bp) {
308 bp->b_prev->b_next = bp->b_next;
309 bp->b_next->b_prev = bp->b_prev;
310 bp->b_prev = &bufhead;
311 bp->b_next = bufhead.b_next;
312 bufhead.b_next->b_prev = bp;
313 bufhead.b_next = bp;
315 bp->b_flags |= B_INUSE;
316 return (bp);
319 void
320 brelse(struct bufarea *bp)
322 bp->b_cnt--;
323 if (bp->b_cnt == 0) {
324 bp->b_flags &= ~B_INUSE;
328 struct bufarea *
329 getblk(struct bufarea *bp, daddr32_t blk, size_t size)
331 diskaddr_t dblk;
333 dblk = fsbtodb(&sblock, blk);
334 if (bp->b_bno == dblk)
335 return (bp);
336 flush(fswritefd, bp);
337 bp->b_errs = fsck_bread(fsreadfd, bp->b_un.b_buf, dblk, size);
338 bp->b_bno = dblk;
339 bp->b_size = size;
340 return (bp);
343 void
344 flush(int fd, struct bufarea *bp)
346 int i, j;
347 caddr_t sip;
348 long size;
350 if (!bp->b_dirty)
351 return;
354 * It's not our buf, so if there are errors, let whoever
355 * acquired it deal with the actual problem.
357 if (bp->b_errs != 0)
358 pfatal("WRITING ZERO'ED BLOCK %lld TO DISK\n", bp->b_bno);
359 bp->b_dirty = 0;
360 bp->b_errs = 0;
361 bwrite(fd, bp->b_un.b_buf, bp->b_bno, (long)bp->b_size);
362 if (bp != &sblk) {
363 return;
367 * We're flushing the superblock, so make sure all the
368 * ancillary bits go out as well.
370 sip = (caddr_t)sblock.fs_u.fs_csp;
371 for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) {
372 size = sblock.fs_cssize - i < sblock.fs_bsize ?
373 sblock.fs_cssize - i : sblock.fs_bsize;
374 bwrite(fswritefd, sip,
375 fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag),
376 size);
377 sip += size;
381 static void
382 rwerror(caddr_t mesg, diskaddr_t blk, int rval)
384 int olderr = errno;
386 if (!preen)
387 (void) printf("\n");
389 if (rval == -1)
390 pfatal("CANNOT %s: DISK BLOCK %lld: %s",
391 mesg, blk, strerror(olderr));
392 else
393 pfatal("CANNOT %s: DISK BLOCK %lld", mesg, blk);
395 if (reply("CONTINUE") == 0) {
396 exitstat = EXERRFATAL;
397 errexit("Program terminated\n");
401 void
402 ckfini(void)
404 int64_t percentage;
406 if (fswritefd < 0)
407 return;
409 flush(fswritefd, &sblk);
411 * Were we using a backup superblock?
413 if (havesb && sblk.b_bno != SBOFF / dev_bsize) {
414 if (preen || reply("UPDATE STANDARD SUPERBLOCK") == 1) {
415 sblk.b_bno = SBOFF / dev_bsize;
416 sbdirty();
417 flush(fswritefd, &sblk);
420 flush(fswritefd, &cgblk);
421 if (cgblk.b_un.b_buf != NULL) {
422 free((void *)cgblk.b_un.b_buf);
423 cgblk.b_un.b_buf = NULL;
425 unbufinit();
426 pbp = NULL;
427 pdirbp = NULL;
428 if (debug) {
430 * Note that we only count cache-related reads.
431 * Anything that called fsck_bread() or getblk()
432 * directly are explicitly not cached, so they're not
433 * included here.
435 if (totalreads != 0)
436 percentage = diskreads * 100 / totalreads;
437 else
438 percentage = 0;
440 (void) printf("cache missed %lld of %lld reads (%lld%%)\n",
441 (longlong_t)diskreads, (longlong_t)totalreads,
442 (longlong_t)percentage);
445 (void) close(fsreadfd);
446 (void) close(fswritefd);
447 fsreadfd = -1;
448 fswritefd = -1;
452 fsck_bread(int fd, caddr_t buf, diskaddr_t blk, size_t size)
454 caddr_t cp;
455 int i;
456 int errs;
457 offset_t offset = ldbtob(blk);
458 offset_t addr;
461 * In our universe, nothing exists before the superblock, so
462 * just pretend it's always zeros. This is the complement of
463 * bwrite()'s ignoring write requests into that space.
465 if (blk < SBLOCK) {
466 if (debug)
467 (void) printf(
468 "WARNING: fsck_bread() passed blkno < %d (%lld)\n",
469 SBLOCK, (longlong_t)blk);
470 (void) memset(buf, 0, (size_t)size);
471 return (1);
474 if (llseek(fd, offset, SEEK_SET) < 0) {
475 rwerror("SEEK", blk, -1);
478 if ((i = read(fd, buf, size)) == size) {
479 return (0);
481 rwerror("READ", blk, i);
482 if (llseek(fd, offset, SEEK_SET) < 0) {
483 rwerror("SEEK", blk, -1);
485 errs = 0;
486 (void) memset(buf, 0, (size_t)size);
487 pwarn("THE FOLLOWING SECTORS COULD NOT BE READ:");
488 for (cp = buf, i = 0; i < btodb(size); i++, cp += DEV_BSIZE) {
489 addr = ldbtob(blk + i);
490 if (llseek(fd, addr, SEEK_SET) < 0 ||
491 read(fd, cp, (int)secsize) < 0) {
492 iscorrupt = 1;
493 (void) printf(" %llu", blk + (u_longlong_t)i);
494 errs++;
497 (void) printf("\n");
498 return (errs);
501 void
502 bwrite(int fd, caddr_t buf, diskaddr_t blk, int64_t size)
504 int i;
505 int n;
506 caddr_t cp;
507 offset_t offset = ldbtob(blk);
508 offset_t addr;
510 if (fd < 0)
511 return;
512 if (blk < SBLOCK) {
513 if (debug)
514 (void) printf(
515 "WARNING: Attempt to write illegal blkno %lld on %s\n",
516 (longlong_t)blk, devname);
517 return;
519 if (llseek(fd, offset, SEEK_SET) < 0) {
520 rwerror("SEEK", blk, -1);
522 if ((i = write(fd, buf, (int)size)) == size) {
523 fsmodified = 1;
524 return;
526 rwerror("WRITE", blk, i);
527 if (llseek(fd, offset, SEEK_SET) < 0) {
528 rwerror("SEEK", blk, -1);
530 pwarn("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:");
531 for (cp = buf, i = 0; i < btodb(size); i++, cp += DEV_BSIZE) {
532 n = 0;
533 addr = ldbtob(blk + i);
534 if (llseek(fd, addr, SEEK_SET) < 0 ||
535 (n = write(fd, cp, DEV_BSIZE)) < 0) {
536 iscorrupt = 1;
537 (void) printf(" %llu", blk + (u_longlong_t)i);
538 } else if (n > 0) {
539 fsmodified = 1;
543 (void) printf("\n");
547 * Allocates the specified number of contiguous fragments.
549 daddr32_t
550 allocblk(int wantedfrags)
552 int block, leadfrag, tailfrag;
553 daddr32_t selected;
554 size_t size;
555 struct bufarea *bp;
558 * It's arguable whether we should just fail, or instead
559 * error out here. Since we should only ever be asked for
560 * a single fragment or an entire block (i.e., sblock.fs_frag),
561 * we'll fail out because anything else means somebody
562 * changed code without considering all of the ramifications.
564 if (wantedfrags <= 0 || wantedfrags > sblock.fs_frag) {
565 exitstat = EXERRFATAL;
566 errexit("allocblk() asked for %d frags. "
567 "Legal range is 1 to %d",
568 wantedfrags, sblock.fs_frag);
572 * For each filesystem block, look at every possible starting
573 * offset within the block such that we can get the number of
574 * contiguous fragments that we need. This is a drastically
575 * simplified version of the kernel's mapsearch() and alloc*().
576 * It's also correspondingly slower.
578 for (block = 0; block < maxfsblock - sblock.fs_frag;
579 block += sblock.fs_frag) {
580 for (leadfrag = 0; leadfrag <= sblock.fs_frag - wantedfrags;
581 leadfrag++) {
583 * Is first fragment of candidate run available?
585 if (testbmap(block + leadfrag))
586 continue;
588 * Are the rest of them available?
590 for (tailfrag = 1; tailfrag < wantedfrags; tailfrag++)
591 if (testbmap(block + leadfrag + tailfrag))
592 break;
593 if (tailfrag < wantedfrags) {
595 * No, skip the known-unusable run.
597 leadfrag += tailfrag;
598 continue;
601 * Found what we need, so claim them.
603 for (tailfrag = 0; tailfrag < wantedfrags; tailfrag++)
604 setbmap(block + leadfrag + tailfrag);
605 n_blks += wantedfrags;
606 size = wantedfrags * sblock.fs_fsize;
607 selected = block + leadfrag;
608 bp = getdatablk(selected, size);
609 (void) memset(bp->b_un.b_buf, 0, size);
610 dirty(bp);
611 brelse(bp);
612 if (debug)
613 (void) printf(
614 "allocblk: selected %d (in block %d), frags %d, size %d\n",
615 selected, selected % sblock.fs_bsize,
616 wantedfrags, (int)size);
617 return (selected);
620 return (0);
624 * Free a previously allocated block
626 void
627 freeblk(fsck_ino_t ino, daddr32_t blkno, int frags)
629 struct inodesc idesc;
631 if (debug)
632 (void) printf("debug: freeing %d fragments starting at %d\n",
633 frags, blkno);
635 init_inodesc(&idesc);
637 idesc.id_number = ino;
638 idesc.id_blkno = blkno;
639 idesc.id_numfrags = frags;
640 idesc.id_truncto = -1;
643 * Nothing in the return status has any relevance to how
644 * we're using pass4check(), so just ignore it.
646 (void) pass4check(&idesc);
650 * Fill NAMEBUF with a path starting in CURDIR for INO. Assumes
651 * that the given buffer is at least MAXPATHLEN + 1 characters.
653 void
654 getpathname(caddr_t namebuf, fsck_ino_t curdir, fsck_ino_t ino)
656 int len;
657 caddr_t cp;
658 struct dinode *dp;
659 struct inodesc idesc;
660 struct inoinfo *inp;
662 if (debug)
663 (void) printf("debug: getpathname(curdir %d, ino %d)\n",
664 curdir, ino);
666 if ((curdir == 0) || (!INO_IS_DVALID(curdir))) {
667 (void) strcpy(namebuf, "?");
668 return;
671 if ((curdir == UFSROOTINO) && (ino == UFSROOTINO)) {
672 (void) strcpy(namebuf, "/");
673 return;
676 init_inodesc(&idesc);
677 idesc.id_type = DATA;
678 cp = &namebuf[MAXPATHLEN - 1];
679 *cp = '\0';
682 * In the case of extended attributes, our
683 * parent won't necessarily be a directory, so just
684 * return what we've found with a prefix indicating
685 * that it's an XATTR. Presumably our caller will
686 * know what's going on and do something useful, like
687 * work out the path of the parent and then combine
688 * the two names.
690 * Can't use strcpy(), etc, because we've probably
691 * already got some name information in the buffer and
692 * the usual trailing \0 would lose it.
694 dp = ginode(curdir);
695 if ((dp->di_mode & IFMT) == IFATTRDIR) {
696 idesc.id_number = curdir;
697 idesc.id_parent = ino;
698 idesc.id_func = findname;
699 idesc.id_name = namebuf;
700 idesc.id_fix = NOFIX;
701 if ((ckinode(dp, &idesc, CKI_TRAVERSE) & FOUND) == 0) {
702 *cp-- = '?';
705 len = sizeof (XATTR_DIR_NAME) - 1;
706 cp -= len;
707 (void) memmove(cp, XATTR_DIR_NAME, len);
708 goto attrname;
712 * If curdir == ino, need to get a handle on .. so we
713 * can search it for ino's name. Otherwise, just search
714 * the given directory for ino. Repeat until out of space
715 * or a full path has been built.
717 if (curdir != ino) {
718 idesc.id_parent = curdir;
719 goto namelookup;
721 while (ino != UFSROOTINO && ino != 0) {
722 idesc.id_number = ino;
723 idesc.id_func = findino;
724 idesc.id_name = "..";
725 idesc.id_fix = NOFIX;
726 if ((ckinode(ginode(ino), &idesc, CKI_TRAVERSE) & FOUND) == 0) {
727 inp = getinoinfo(ino);
728 if ((inp == NULL) || (inp->i_parent == 0)) {
729 break;
731 idesc.id_parent = inp->i_parent;
735 * To get this far, id_parent must have the inode
736 * number for `..' in it. By definition, that's got
737 * to be a directory, so search it for the inode of
738 * interest.
740 namelookup:
741 idesc.id_number = idesc.id_parent;
742 idesc.id_parent = ino;
743 idesc.id_func = findname;
744 idesc.id_name = namebuf;
745 idesc.id_fix = NOFIX;
746 if ((ckinode(ginode(idesc.id_number),
747 &idesc, CKI_TRAVERSE) & FOUND) == 0) {
748 break;
751 * Prepend to what we've accumulated so far. If
752 * there's not enough room for even one more path element
753 * (of the worst-case length), then bail out.
755 len = strlen(namebuf);
756 cp -= len;
757 if (cp < &namebuf[MAXNAMLEN])
758 break;
759 (void) memmove(cp, namebuf, len);
760 *--cp = '/';
763 * Corner case for a looped-to-itself directory.
765 if (ino == idesc.id_number)
766 break;
769 * Climb one level of the hierarchy. In other words,
770 * the current .. becomes the inode to search for and
771 * its parent becomes the directory to search in.
773 ino = idesc.id_number;
777 * If we hit a discontinuity in the hierarchy, indicate it by
778 * prefixing the path so far with `?'. Otherwise, the first
779 * character will be `/' as a side-effect of the *--cp above.
781 * The special case is to handle the situation where we're
782 * trying to look something up in UFSROOTINO, but didn't find
783 * it.
785 if (ino != UFSROOTINO || cp == &namebuf[MAXPATHLEN - 1]) {
786 if (cp > namebuf)
787 cp--;
788 *cp = '?';
792 * The invariants being used for buffer integrity are:
793 * - namebuf[] is terminated with \0 before anything else
794 * - cp is always <= the last element of namebuf[]
795 * - the new path element is always stored at the
796 * beginning of namebuf[], and is no more than MAXNAMLEN-1
797 * characters
798 * - cp is is decremented by the number of characters in
799 * the new path element
800 * - if, after the above accounting for the new element's
801 * size, there is no longer enough room at the beginning of
802 * namebuf[] for a full-sized path element and a slash,
803 * terminate the loop. cp is in the range
804 * &namebuf[0]..&namebuf[MAXNAMLEN - 1]
806 attrname:
807 /* LINTED per the above discussion */
808 (void) memmove(namebuf, cp, &namebuf[MAXPATHLEN] - cp);
811 /* ARGSUSED */
812 void
813 catch(int dummy)
815 ckfini();
816 exit(EXSIGNAL);
820 * When preening, allow a single quit to signal
821 * a special exit after filesystem checks complete
822 * so that reboot sequence may be interrupted.
824 /* ARGSUSED */
825 void
826 catchquit(int dummy)
828 (void) printf("returning to single-user after filesystem check\n");
829 interrupted = 1;
830 (void) signal(SIGQUIT, SIG_DFL);
835 * determine whether an inode should be fixed.
837 NOTE(PRINTFLIKE(2))
839 dofix(struct inodesc *idesc, caddr_t msg, ...)
841 int rval = 0;
842 va_list ap;
844 va_start(ap, msg);
846 switch (idesc->id_fix) {
848 case DONTKNOW:
849 if (idesc->id_type == DATA)
850 vdirerror(idesc->id_number, msg, ap);
851 else
852 vpwarn(msg, ap);
853 if (preen) {
854 idesc->id_fix = FIX;
855 rval = ALTERED;
856 break;
858 if (reply("SALVAGE") == 0) {
859 idesc->id_fix = NOFIX;
860 break;
862 idesc->id_fix = FIX;
863 rval = ALTERED;
864 break;
866 case FIX:
867 rval = ALTERED;
868 break;
870 case NOFIX:
871 break;
873 default:
874 errexit("UNKNOWN INODESC FIX MODE %d\n", (int)idesc->id_fix);
877 va_end(ap);
878 return (rval);
881 NOTE(PRINTFLIKE(1))
882 void
883 errexit(caddr_t fmt, ...)
885 va_list ap;
887 va_start(ap, fmt);
888 verrexit(fmt, ap);
889 /* NOTREACHED */
892 NOTE(PRINTFLIKE(1))
893 static void
894 verrexit(caddr_t fmt, va_list ap)
896 static int recursing = 0;
898 if (!recursing) {
899 recursing = 1;
900 if (errorlocked || iscorrupt) {
901 if (havesb && fswritefd >= 0) {
902 sblock.fs_clean = FSBAD;
903 sblock.fs_state = FSOKAY - (long)sblock.fs_time;
904 sblock.fs_state = -sblock.fs_state;
905 sbdirty();
906 write_altsb(fswritefd);
907 flush(fswritefd, &sblk);
910 ckfini();
911 recursing = 0;
913 (void) vprintf(fmt, ap);
914 if (fmt[strlen(fmt) - 1] != '\n')
915 (void) putchar('\n');
916 exit((exitstat != 0) ? exitstat : EXERRFATAL);
920 * An unexpected inconsistency occured.
921 * Die if preening, otherwise just print message and continue.
923 NOTE(PRINTFLIKE(1))
924 void
925 pfatal(caddr_t fmt, ...)
927 va_list ap;
929 va_start(ap, fmt);
930 vpfatal(fmt, ap);
931 va_end(ap);
934 NOTE(PRINTFLIKE(1))
935 static void
936 vpfatal(caddr_t fmt, va_list ap)
938 if (preen) {
939 if (*fmt != '\0') {
940 (void) printf("%s: ", devname);
941 (void) vprintf(fmt, ap);
942 (void) printf("\n");
944 (void) printf(
945 "%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n",
946 devname);
947 if (havesb && fswritefd >= 0) {
948 sblock.fs_clean = FSBAD;
949 sblock.fs_state = -(FSOKAY - (long)sblock.fs_time);
950 sbdirty();
951 flush(fswritefd, &sblk);
954 * We're exiting, it doesn't really matter that our
955 * caller doesn't get to call va_end().
957 if (exitstat == 0)
958 exitstat = EXFNDERRS;
959 exit(exitstat);
961 if (*fmt != '\0') {
962 (void) vprintf(fmt, ap);
967 * Pwarn just prints a message when not preening,
968 * or a warning (preceded by filename) when preening.
970 NOTE(PRINTFLIKE(1))
971 void
972 pwarn(caddr_t fmt, ...)
974 va_list ap;
976 va_start(ap, fmt);
977 vpwarn(fmt, ap);
978 va_end(ap);
981 NOTE(PRINTFLIKE(1))
982 static void
983 vpwarn(caddr_t fmt, va_list ap)
985 if (*fmt != '\0') {
986 if (preen)
987 (void) printf("%s: ", devname);
988 (void) vprintf(fmt, ap);
993 * Like sprintf(), except the buffer is dynamically allocated
994 * and returned, instead of being passed in. A pointer to the
995 * buffer is stored in *RET, and FMT is the usual format string.
996 * The number of characters in *RET (excluding the trailing \0,
997 * to be consistent with the other *printf() routines) is returned.
999 * Solaris doesn't have asprintf(3C) yet, unfortunately.
1001 NOTE(PRINTFLIKE(2))
1003 fsck_asprintf(caddr_t *ret, caddr_t fmt, ...)
1005 int len;
1006 caddr_t buffer;
1007 va_list ap;
1009 va_start(ap, fmt);
1010 len = vsnprintf(NULL, 0, fmt, ap);
1011 va_end(ap);
1013 buffer = malloc((len + 1) * sizeof (char));
1014 if (buffer == NULL) {
1015 errexit("Out of memory in asprintf\n");
1016 /* NOTREACHED */
1019 va_start(ap, fmt);
1020 (void) vsnprintf(buffer, len + 1, fmt, ap);
1021 va_end(ap);
1023 *ret = buffer;
1024 return (len);
1028 * So we can take advantage of kernel routines in ufs_subr.c.
1030 /* PRINTFLIKE2 */
1031 void
1032 cmn_err(int level, caddr_t fmt, ...)
1034 va_list ap;
1036 va_start(ap, fmt);
1037 if (level == CE_PANIC) {
1038 (void) printf("INTERNAL INCONSISTENCY:");
1039 verrexit(fmt, ap);
1040 } else {
1041 (void) vprintf(fmt, ap);
1043 va_end(ap);
1047 * Check to see if unraw version of name is already mounted.
1048 * Updates devstr with the device name if devstr is not NULL
1049 * and str_size is positive.
1052 mounted(caddr_t name, caddr_t devstr, size_t str_size)
1054 int found;
1055 struct mnttab *mntent;
1057 mntent = search_mnttab(NULL, unrawname(name), devstr, str_size);
1058 if (mntent == NULL)
1059 return (M_NOMNT);
1062 * It's mounted. With or without write access?
1064 if (hasmntopt(mntent, MNTOPT_RO) != 0)
1065 found = M_RO; /* mounted as RO */
1066 else
1067 found = M_RW; /* mounted as R/W */
1069 if (mount_point == NULL) {
1070 mount_point = strdup(mntent->mnt_mountp);
1071 if (mount_point == NULL) {
1072 errexit("fsck: memory allocation failure: %s",
1073 strerror(errno));
1074 /* NOTREACHED */
1077 if (devstr != NULL && str_size > 0)
1078 (void) strlcpy(devstr, mntent->mnt_special, str_size);
1081 return (found);
1085 * Check to see if name corresponds to an entry in vfstab, and that the entry
1086 * does not have option ro.
1089 writable(caddr_t name)
1091 int rw = 1;
1092 struct vfstab vfsbuf, vfskey;
1093 FILE *vfstab;
1095 vfstab = fopen(VFSTAB, "r");
1096 if (vfstab == NULL) {
1097 (void) printf("can't open %s\n", VFSTAB);
1098 return (1);
1100 (void) memset(&vfskey, 0, sizeof (vfskey));
1101 vfsnull(&vfskey);
1102 vfskey.vfs_special = unrawname(name);
1103 vfskey.vfs_fstype = MNTTYPE_UFS;
1104 if ((getvfsany(vfstab, &vfsbuf, &vfskey) == 0) &&
1105 (hasvfsopt(&vfsbuf, MNTOPT_RO))) {
1106 rw = 0;
1108 (void) fclose(vfstab);
1109 return (rw);
1113 * debugclean
1115 static void
1116 debugclean(void)
1118 if (!debug)
1119 return;
1121 if ((iscorrupt == 0) && (isdirty == 0))
1122 return;
1124 if ((sblock.fs_clean == FSSTABLE) || (sblock.fs_clean == FSCLEAN) ||
1125 (sblock.fs_clean == FSLOG && islog && islogok) ||
1126 ((FSOKAY == (sblock.fs_state + sblock.fs_time)) && !errorlocked))
1127 return;
1129 (void) printf("WARNING: inconsistencies detected on %s filesystem %s\n",
1130 sblock.fs_clean == FSSTABLE ? "stable" :
1131 sblock.fs_clean == FSLOG ? "logging" :
1132 sblock.fs_clean == FSFIX ? "being fixed" : "clean",
1133 devname);
1137 * updateclean
1138 * Carefully and transparently update the clean flag.
1140 * `iscorrupt' has to be in its final state before this is called.
1143 updateclean(void)
1145 int freedlog = 0;
1146 struct bufarea cleanbuf;
1147 size_t size;
1148 ssize_t io_res;
1149 diskaddr_t bno;
1150 char fsclean;
1151 int fsreclaim;
1152 char fsflags;
1153 int flags_ok = 1;
1154 daddr32_t fslogbno;
1155 offset_t sblkoff;
1156 time_t t;
1159 * debug stuff
1161 debugclean();
1164 * set fsclean to its appropriate value
1166 fslogbno = sblock.fs_logbno;
1167 fsclean = sblock.fs_clean;
1168 fsreclaim = sblock.fs_reclaim;
1169 fsflags = sblock.fs_flags;
1170 if (FSOKAY != (sblock.fs_state + sblock.fs_time) && !errorlocked) {
1171 fsclean = FSACTIVE;
1174 * If ufs log is not okay, note that we need to clear it.
1176 examinelog(NULL);
1177 if (fslogbno && !(islog && islogok)) {
1178 fsclean = FSACTIVE;
1179 fslogbno = 0;
1183 * if necessary, update fs_clean and fs_state
1185 switch (fsclean) {
1187 case FSACTIVE:
1188 if (!iscorrupt) {
1189 fsclean = FSSTABLE;
1190 fsreclaim = 0;
1192 break;
1194 case FSCLEAN:
1195 case FSSTABLE:
1196 if (iscorrupt) {
1197 fsclean = FSACTIVE;
1198 } else {
1199 fsreclaim = 0;
1201 break;
1203 case FSLOG:
1204 if (iscorrupt) {
1205 fsclean = FSACTIVE;
1206 } else if (!islog || fslogbno == 0) {
1207 fsclean = FSSTABLE;
1208 fsreclaim = 0;
1209 } else if (fflag) {
1210 fsreclaim = 0;
1212 break;
1214 case FSFIX:
1215 fsclean = FSBAD;
1216 if (errorlocked && !iscorrupt) {
1217 fsclean = islog ? FSLOG : FSCLEAN;
1219 break;
1221 default:
1222 if (iscorrupt) {
1223 fsclean = FSACTIVE;
1224 } else {
1225 fsclean = FSSTABLE;
1226 fsreclaim = 0;
1230 if (largefile_count > 0)
1231 fsflags |= FSLARGEFILES;
1232 else
1233 fsflags &= ~FSLARGEFILES;
1236 * There can be two discrepencies here. A) The superblock
1237 * shows no largefiles but we found some while scanning.
1238 * B) The superblock indicates the presence of largefiles,
1239 * but none are present. Note that if preening, the superblock
1240 * is silently corrected.
1242 if ((fsflags == FSLARGEFILES && sblock.fs_flags != FSLARGEFILES) ||
1243 (fsflags != FSLARGEFILES && sblock.fs_flags == FSLARGEFILES))
1244 flags_ok = 0;
1246 if (debug)
1247 (void) printf(
1248 "** largefile count=%d, fs.fs_flags=%x, flags_ok %d\n",
1249 largefile_count, sblock.fs_flags, flags_ok);
1252 * If fs is unchanged, do nothing.
1254 if ((!isdirty) && (flags_ok) &&
1255 (fslogbno == sblock.fs_logbno) &&
1256 (sblock.fs_clean == fsclean) &&
1257 (sblock.fs_reclaim == fsreclaim) &&
1258 (FSOKAY == (sblock.fs_state + sblock.fs_time))) {
1259 if (errorlocked) {
1260 if (!do_errorlock(LOCKFS_ULOCK))
1261 pwarn(
1262 "updateclean(unchanged): unlock(LOCKFS_ULOCK) failed\n");
1264 return (freedlog);
1268 * if user allows, update superblock state
1270 if (debug) {
1271 (void) printf(
1272 "superblock: flags 0x%x logbno %d clean %d reclaim %d state 0x%x\n",
1273 sblock.fs_flags, sblock.fs_logbno,
1274 sblock.fs_clean, sblock.fs_reclaim,
1275 sblock.fs_state + sblock.fs_time);
1276 (void) printf(
1277 "calculated: flags 0x%x logbno %d clean %d reclaim %d state 0x%x\n",
1278 fsflags, fslogbno, fsclean, fsreclaim, FSOKAY);
1280 if (!isdirty && !preen && !rerun &&
1281 (reply("FILE SYSTEM STATE IN SUPERBLOCK IS WRONG; FIX") == 0))
1282 return (freedlog);
1284 (void) time(&t);
1285 sblock.fs_time = (time32_t)t;
1286 if (debug)
1287 printclean();
1289 if (sblock.fs_logbno != fslogbno) {
1290 examinelog(&freelogblk);
1291 freedlog++;
1294 sblock.fs_logbno = fslogbno;
1295 sblock.fs_clean = fsclean;
1296 sblock.fs_state = FSOKAY - (long)sblock.fs_time;
1297 sblock.fs_reclaim = fsreclaim;
1298 sblock.fs_flags = fsflags;
1301 * if superblock can't be written, return
1303 if (fswritefd < 0)
1304 return (freedlog);
1307 * Read private copy of superblock, update clean flag, and write it.
1309 bno = sblk.b_bno;
1310 size = sblk.b_size;
1312 sblkoff = ldbtob(bno);
1314 if ((cleanbuf.b_un.b_buf = malloc(size)) == NULL)
1315 errexit("out of memory");
1316 if (llseek(fsreadfd, sblkoff, SEEK_SET) == -1) {
1317 (void) printf("COULD NOT SEEK TO SUPERBLOCK AT %lld: %s\n",
1318 (longlong_t)bno, strerror(errno));
1319 goto out;
1322 if ((io_res = read(fsreadfd, cleanbuf.b_un.b_buf, size)) != size) {
1323 report_io_prob("READ FROM", bno, size, io_res);
1324 goto out;
1327 cleanbuf.b_un.b_fs->fs_logbno = sblock.fs_logbno;
1328 cleanbuf.b_un.b_fs->fs_clean = sblock.fs_clean;
1329 cleanbuf.b_un.b_fs->fs_state = sblock.fs_state;
1330 cleanbuf.b_un.b_fs->fs_time = sblock.fs_time;
1331 cleanbuf.b_un.b_fs->fs_reclaim = sblock.fs_reclaim;
1332 cleanbuf.b_un.b_fs->fs_flags = sblock.fs_flags;
1334 if (llseek(fswritefd, sblkoff, SEEK_SET) == -1) {
1335 (void) printf("COULD NOT SEEK TO SUPERBLOCK AT %lld: %s\n",
1336 (longlong_t)bno, strerror(errno));
1337 goto out;
1340 if ((io_res = write(fswritefd, cleanbuf.b_un.b_buf, size)) != size) {
1341 report_io_prob("WRITE TO", bno, size, io_res);
1342 goto out;
1346 * 1208040
1347 * If we had to use -b to grab an alternate superblock, then we
1348 * likely had to do so because of unacceptable differences between
1349 * the main and alternate superblocks. So, we had better update
1350 * the alternate superblock as well, or we'll just fail again
1351 * the next time we attempt to run fsck!
1353 if (bflag != 0) {
1354 write_altsb(fswritefd);
1357 if (errorlocked) {
1358 if (!do_errorlock(LOCKFS_ULOCK))
1359 pwarn(
1360 "updateclean(changed): unlock(LOCKFS_ULOCK) failed\n");
1363 out:
1364 if (cleanbuf.b_un.b_buf != NULL) {
1365 free((void *)cleanbuf.b_un.b_buf);
1368 return (freedlog);
1371 static void
1372 report_io_prob(caddr_t what, diskaddr_t bno, size_t expected, ssize_t failure)
1374 if (failure < 0)
1375 (void) printf("COULD NOT %s SUPERBLOCK AT %d: %s\n",
1376 what, (int)bno, strerror(errno));
1377 else if (failure == 0)
1378 (void) printf("COULD NOT %s SUPERBLOCK AT %d: EOF\n",
1379 what, (int)bno);
1380 else
1381 (void) printf("SHORT %s SUPERBLOCK AT %d: %u out of %u bytes\n",
1382 what, (int)bno, (unsigned)failure, (unsigned)expected);
1386 * print out clean info
1388 void
1389 printclean(void)
1391 caddr_t s;
1393 if (FSOKAY != (sblock.fs_state + sblock.fs_time) && !errorlocked)
1394 s = "unknown";
1395 else
1396 switch (sblock.fs_clean) {
1398 case FSACTIVE:
1399 s = "active";
1400 break;
1402 case FSCLEAN:
1403 s = "clean";
1404 break;
1406 case FSSTABLE:
1407 s = "stable";
1408 break;
1410 case FSLOG:
1411 s = "logging";
1412 break;
1414 case FSBAD:
1415 s = "is bad";
1416 break;
1418 case FSFIX:
1419 s = "being fixed";
1420 break;
1422 default:
1423 s = "unknown";
1426 if (preen)
1427 pwarn("is %s.\n", s);
1428 else
1429 (void) printf("** %s is %s.\n", devname, s);
1433 is_errorlocked(caddr_t fs)
1435 int retval;
1436 struct stat64 statb;
1437 caddr_t mountp;
1438 struct mnttab *mntent;
1440 retval = 0;
1442 if (!fs)
1443 return (0);
1445 if (stat64(fs, &statb) < 0)
1446 return (0);
1448 if (S_ISDIR(statb.st_mode)) {
1449 mountp = fs;
1450 } else if (S_ISBLK(statb.st_mode) || S_ISCHR(statb.st_mode)) {
1451 mntent = search_mnttab(NULL, fs, NULL, 0);
1452 if (mntent == NULL)
1453 return (0);
1454 mountp = mntent->mnt_mountp;
1455 if (mountp == NULL) /* theoretically a can't-happen */
1456 return (0);
1457 } else {
1458 return (0);
1462 * From here on, must `goto out' to avoid memory leakage.
1465 if (elock_combuf == NULL)
1466 elock_combuf =
1467 (caddr_t)calloc(LOCKFS_MAXCOMMENTLEN, sizeof (char));
1468 else
1469 elock_combuf =
1470 (caddr_t)realloc(elock_combuf, LOCKFS_MAXCOMMENTLEN);
1472 if (elock_combuf == NULL)
1473 goto out;
1475 (void) memset(elock_combuf, 0, LOCKFS_MAXCOMMENTLEN);
1477 if (elock_mountp != NULL) {
1478 free(elock_mountp);
1481 elock_mountp = strdup(mountp);
1482 if (elock_mountp == NULL)
1483 goto out;
1485 if (mountfd < 0) {
1486 if ((mountfd = open64(mountp, O_RDONLY)) == -1)
1487 goto out;
1490 if (lfp == NULL) {
1491 lfp = (struct lockfs *)malloc(sizeof (struct lockfs));
1492 if (lfp == NULL)
1493 goto out;
1494 (void) memset(lfp, 0, sizeof (struct lockfs));
1497 lfp->lf_comlen = LOCKFS_MAXCOMMENTLEN;
1498 lfp->lf_comment = elock_combuf;
1500 if (ioctl(mountfd, _FIOLFSS, lfp) == -1)
1501 goto out;
1504 * lint believes that the ioctl() (or any other function
1505 * taking lfp as an arg) could free lfp. This is not the
1506 * case, however.
1508 retval = LOCKFS_IS_ELOCK(lfp);
1510 out:
1511 return (retval);
1515 * Given a name which is known to be a directory, see if it appears
1516 * in the vfstab. If so, return the entry's block (special) device
1517 * field via devstr.
1520 check_vfstab(caddr_t name, caddr_t devstr, size_t str_size)
1522 return (NULL != search_vfstab(name, NULL, devstr, str_size));
1526 * Given a name which is known to be a directory, see if it appears
1527 * in the mnttab. If so, return the entry's block (special) device
1528 * field via devstr.
1531 check_mnttab(caddr_t name, caddr_t devstr, size_t str_size)
1533 return (NULL != search_mnttab(name, NULL, devstr, str_size));
1537 * Search for mount point and/or special device in the given file.
1538 * The first matching entry is returned.
1540 * If an entry is found and str_size is greater than zero, then
1541 * up to size_str bytes of the special device name from the entry
1542 * are copied to devstr.
1545 #define SEARCH_TAB_BODY(st_type, st_file, st_mount, st_special, \
1546 st_nuller, st_init, st_searcher) \
1548 FILE *fp; \
1549 struct st_type *retval = NULL; \
1550 struct st_type key; \
1551 static struct st_type buffer; \
1553 /* LINTED ``assigned value never used'' */ \
1554 st_nuller(&key); \
1555 key.st_mount = mountp; \
1556 key.st_special = special; \
1557 st_init; \
1559 if ((fp = fopen(st_file, "r")) == NULL) \
1560 return (NULL); \
1562 if (st_searcher(fp, &buffer, &key) == 0) { \
1563 retval = &buffer; \
1564 if (devstr != NULL && str_size > 0 && \
1565 buffer.st_special != NULL) { \
1566 (void) strlcpy(devstr, buffer.st_special, \
1567 str_size); \
1570 (void) fclose(fp); \
1571 return (retval); \
1574 static struct vfstab *
1575 search_vfstab(caddr_t mountp, caddr_t special, caddr_t devstr, size_t str_size)
1576 SEARCH_TAB_BODY(vfstab, VFSTAB, vfs_mountp, vfs_special, vfsnull,
1577 (retval = retval), getvfsany)
1579 static struct mnttab *
1580 search_mnttab(caddr_t mountp, caddr_t special, caddr_t devstr, size_t str_size)
1581 SEARCH_TAB_BODY(mnttab, MNTTAB, mnt_mountp, mnt_special, mntnull,
1582 (key.mnt_fstype = MNTTYPE_UFS), getmntany)
1585 do_errorlock(int lock_type)
1587 caddr_t buf;
1588 time_t now;
1589 struct tm *local;
1590 int rc;
1592 if (elock_combuf == NULL)
1593 errexit("do_errorlock(%s, %d): unallocated elock_combuf\n",
1594 elock_mountp ? elock_mountp : "<null>",
1595 lock_type);
1597 if ((buf = (caddr_t)calloc(LOCKFS_MAXCOMMENTLEN, sizeof (char))) ==
1598 NULL) {
1599 errexit("Couldn't alloc memory for temp. lock status buffer\n");
1601 if (lfp == NULL) {
1602 errexit("do_errorlock(%s, %d): lockfs status unallocated\n",
1603 elock_mountp, lock_type);
1606 (void) memmove((void *)buf, (void *)elock_combuf,
1607 LOCKFS_MAXCOMMENTLEN-1);
1609 switch (lock_type) {
1610 case LOCKFS_ELOCK:
1612 * Note that if it is error-locked, we won't get an
1613 * error back if we try to error-lock it again.
1615 if (time(&now) != (time_t)-1) {
1616 if ((local = localtime(&now)) != NULL)
1617 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1618 "%s [pid:%d fsck start:%02d/%02d/%02d %02d:%02d:%02d",
1619 elock_combuf, (int)pid,
1620 local->tm_mon + 1, local->tm_mday,
1621 (local->tm_year % 100), local->tm_hour,
1622 local->tm_min, local->tm_sec);
1623 else
1624 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1625 "%s [fsck pid %d", elock_combuf, pid);
1627 } else {
1628 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1629 "%s [fsck pid %d", elock_combuf, pid);
1631 break;
1633 case LOCKFS_ULOCK:
1634 if (time(&now) != (time_t)-1) {
1635 if ((local = localtime(&now)) != NULL) {
1636 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1637 "%s, done:%02d/%02d/%02d %02d:%02d:%02d]",
1638 elock_combuf,
1639 local->tm_mon + 1, local->tm_mday,
1640 (local->tm_year % 100), local->tm_hour,
1641 local->tm_min, local->tm_sec);
1642 } else {
1643 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1644 "%s]", elock_combuf);
1646 } else {
1647 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1648 "%s]", elock_combuf);
1650 if ((rc = ioctl(mountfd, _FIOLFSS, lfp)) == -1) {
1651 pwarn("do_errorlock: unlock failed: %s\n",
1652 strerror(errno));
1653 goto out;
1655 break;
1657 default:
1658 break;
1661 (void) memmove((void *)elock_combuf, (void *)buf,
1662 LOCKFS_MAXCOMMENTLEN - 1);
1664 lfp->lf_lock = lock_type;
1665 lfp->lf_comlen = LOCKFS_MAXCOMMENTLEN;
1666 lfp->lf_comment = elock_combuf;
1667 lfp->lf_flags = 0;
1668 errno = 0;
1670 if ((rc = ioctl(mountfd, _FIOLFS, lfp)) == -1) {
1671 if (errno == EINVAL) {
1672 pwarn("Another fsck active?\n");
1673 iscorrupt = 0; /* don't go away mad, just go away */
1674 } else {
1675 pwarn("do_errorlock(lock_type:%d, %s) failed: %s\n",
1676 lock_type, elock_combuf, strerror(errno));
1679 out:
1680 if (buf != NULL) {
1681 free((void *)buf);
1684 return (rc != -1);
1688 * Shadow inode support. To register a shadow with a client is to note
1689 * that an inode (the client) refers to the shadow.
1692 static struct shadowclients *
1693 newshadowclient(struct shadowclients *prev)
1695 struct shadowclients *rc;
1697 rc = (struct shadowclients *)malloc(sizeof (*rc));
1698 if (rc == NULL)
1699 errexit("newshadowclient: cannot malloc shadow client");
1700 rc->next = prev;
1701 rc->nclients = 0;
1703 rc->client = (fsck_ino_t *)malloc(sizeof (fsck_ino_t) *
1704 maxshadowclients);
1705 if (rc->client == NULL)
1706 errexit("newshadowclient: cannot malloc client array");
1707 return (rc);
1710 void
1711 registershadowclient(fsck_ino_t shadow, fsck_ino_t client,
1712 struct shadowclientinfo **info)
1714 struct shadowclientinfo *sci;
1715 struct shadowclients *scc;
1718 * Already have a record for this shadow?
1720 for (sci = *info; sci != NULL; sci = sci->next)
1721 if (sci->shadow == shadow)
1722 break;
1723 if (sci == NULL) {
1725 * It's a new shadow, add it to the list
1727 sci = (struct shadowclientinfo *)malloc(sizeof (*sci));
1728 if (sci == NULL)
1729 errexit("registershadowclient: cannot malloc");
1730 sci->next = *info;
1731 *info = sci;
1732 sci->shadow = shadow;
1733 sci->totalClients = 0;
1734 sci->clients = newshadowclient(NULL);
1737 sci->totalClients++;
1738 scc = sci->clients;
1739 if (scc->nclients >= maxshadowclients) {
1740 scc = newshadowclient(sci->clients);
1741 sci->clients = scc;
1744 scc->client[scc->nclients++] = client;
1748 * Locate and discard a shadow.
1750 void
1751 clearshadow(fsck_ino_t shadow, struct shadowclientinfo **info)
1753 struct shadowclientinfo *sci, *prev;
1756 * Do we have a record for this shadow?
1758 prev = NULL;
1759 for (sci = *info; sci != NULL; sci = sci->next) {
1760 if (sci->shadow == shadow)
1761 break;
1762 prev = sci;
1765 if (sci != NULL) {
1767 * First, pull it off the list, since we know there
1768 * shouldn't be any future references to this one.
1770 if (prev == NULL)
1771 *info = sci->next;
1772 else
1773 prev->next = sci->next;
1774 deshadow(sci, clearattrref);
1779 * Discard all memory used to track clients of a shadow.
1781 void
1782 deshadow(struct shadowclientinfo *sci, void (*cb)(fsck_ino_t))
1784 struct shadowclients *clients, *discard;
1785 int idx;
1787 clients = sci->clients;
1788 while (clients != NULL) {
1789 discard = clients;
1790 clients = clients->next;
1791 if (discard->client != NULL) {
1792 if (cb != NULL) {
1793 for (idx = 0; idx < discard->nclients; idx++)
1794 (*cb)(discard->client[idx]);
1796 free((void *)discard->client);
1798 free((void *)discard);
1801 free((void *)sci);
1805 * Allocate more buffer as need arises but allocate one at a time.
1806 * This is done to make sure that fsck does not exit with error if it
1807 * needs more buffer to complete its task.
1809 static struct bufarea *
1810 alloc_bufarea(void)
1812 struct bufarea *newbp;
1813 caddr_t bufp;
1815 bufp = malloc((unsigned int)sblock.fs_bsize);
1816 if (bufp == NULL)
1817 return (NULL);
1819 newbp = (struct bufarea *)malloc(sizeof (struct bufarea));
1820 if (newbp == NULL) {
1821 free((void *)bufp);
1822 return (NULL);
1825 initbarea(newbp);
1826 newbp->b_un.b_buf = bufp;
1827 newbp->b_prev = &bufhead;
1828 newbp->b_next = bufhead.b_next;
1829 bufhead.b_next->b_prev = newbp;
1830 bufhead.b_next = newbp;
1831 bufhead.b_size++;
1832 return (newbp);
1836 * We length-limit in both unrawname() and rawname() to avoid
1837 * overflowing our arrays or those of our naive, trusting callers.
1840 caddr_t
1841 unrawname(caddr_t name)
1843 caddr_t dp;
1844 static char fullname[MAXPATHLEN + 1];
1846 if ((dp = getfullblkname(name)) == NULL)
1847 return ("");
1849 (void) strlcpy(fullname, dp, sizeof (fullname));
1851 * Not reporting under debug, as the allocation isn't
1852 * reported by getfullblkname. The idea is that we
1853 * produce balanced alloc/free instances.
1855 free(dp);
1857 return (fullname);
1860 caddr_t
1861 rawname(caddr_t name)
1863 caddr_t dp;
1864 static char fullname[MAXPATHLEN + 1];
1866 if ((dp = getfullrawname(name)) == NULL)
1867 return ("");
1869 (void) strlcpy(fullname, dp, sizeof (fullname));
1871 * Not reporting under debug, as the allocation isn't
1872 * reported by getfullblkname. The idea is that we
1873 * produce balanced alloc/free instances.
1875 free(dp);
1877 return (fullname);
1881 * Make sure that a cg header looks at least moderately reasonable.
1882 * We want to be able to trust the contents enough to be able to use
1883 * the standard accessor macros. So, besides looking at the obvious
1884 * such as the magic number, we verify that the offset field values
1885 * are properly aligned and not too big or small.
1887 * Returns a NULL pointer if the cg is sane enough for our needs, else
1888 * a dynamically-allocated string describing all of its faults.
1890 #define Append_Error(full, full_len, addition, addition_len) \
1891 if (full == NULL) { \
1892 full = addition; \
1893 full_len = addition_len; \
1894 } else { \
1895 /* lint doesn't think realloc() understands NULLs */ \
1896 full = realloc(full, full_len + addition_len + 1); \
1897 if (full == NULL) { \
1898 errexit("Out of memory in cg_sanity"); \
1899 /* NOTREACHED */ \
1901 (void) strcpy(full + full_len, addition); \
1902 full_len += addition_len; \
1903 free(addition); \
1906 caddr_t
1907 cg_sanity(struct cg *cgp, int cgno)
1909 caddr_t full_err;
1910 caddr_t this_err = NULL;
1911 int full_len, this_len;
1912 daddr32_t ndblk;
1913 daddr32_t exp_btotoff, exp_boff, exp_iusedoff;
1914 daddr32_t exp_freeoff, exp_nextfreeoff;
1916 cg_constants(cgno, &exp_btotoff, &exp_boff, &exp_iusedoff,
1917 &exp_freeoff, &exp_nextfreeoff, &ndblk);
1919 full_err = NULL;
1920 full_len = 0;
1922 if (!cg_chkmagic(cgp)) {
1923 this_len = fsck_asprintf(&this_err,
1924 "BAD CG MAGIC NUMBER (0x%x should be 0x%x)\n",
1925 cgp->cg_magic, CG_MAGIC);
1926 Append_Error(full_err, full_len, this_err, this_len);
1929 if (cgp->cg_cgx != cgno) {
1930 this_len = fsck_asprintf(&this_err,
1931 "WRONG CG NUMBER (%d should be %d)\n",
1932 cgp->cg_cgx, cgno);
1933 Append_Error(full_err, full_len, this_err, this_len);
1936 if ((cgp->cg_btotoff & 3) != 0) {
1937 this_len = fsck_asprintf(&this_err,
1938 "BLOCK TOTALS OFFSET %d NOT FOUR-BYTE ALIGNED\n",
1939 cgp->cg_btotoff);
1940 Append_Error(full_err, full_len, this_err, this_len);
1943 if ((cgp->cg_boff & 1) != 0) {
1944 this_len = fsck_asprintf(&this_err,
1945 "FREE BLOCK POSITIONS TABLE OFFSET %d NOT TWO-BYTE ALIGNED\n",
1946 cgp->cg_boff);
1947 Append_Error(full_err, full_len, this_err, this_len);
1950 if ((cgp->cg_ncyl < 1) || (cgp->cg_ncyl > sblock.fs_cpg)) {
1951 if (cgp->cg_ncyl < 1) {
1952 this_len = fsck_asprintf(&this_err,
1953 "IMPOSSIBLE NUMBER OF CYLINDERS IN GROUP (%d is less than 1)\n",
1954 cgp->cg_ncyl);
1955 } else {
1956 this_len = fsck_asprintf(&this_err,
1957 "IMPOSSIBLE NUMBER OF CYLINDERS IN GROUP (%d is greater than %d)\n",
1958 cgp->cg_ncyl, sblock.fs_cpg);
1960 Append_Error(full_err, full_len, this_err, this_len);
1963 if (cgp->cg_niblk != sblock.fs_ipg) {
1964 this_len = fsck_asprintf(&this_err,
1965 "INCORRECT NUMBER OF INODES IN GROUP (%d should be %d)\n",
1966 cgp->cg_niblk, sblock.fs_ipg);
1967 Append_Error(full_err, full_len, this_err, this_len);
1970 if (cgp->cg_ndblk != ndblk) {
1971 this_len = fsck_asprintf(&this_err,
1972 "INCORRECT NUMBER OF DATA BLOCKS IN GROUP (%d should be %d)\n",
1973 cgp->cg_ndblk, ndblk);
1974 Append_Error(full_err, full_len, this_err, this_len);
1977 if ((cgp->cg_rotor < 0) || (cgp->cg_rotor >= ndblk)) {
1978 this_len = fsck_asprintf(&this_err,
1979 "IMPOSSIBLE BLOCK ALLOCATION ROTOR POSITION "
1980 "(%d should be at least 0 and less than %d)\n",
1981 cgp->cg_rotor, ndblk);
1982 Append_Error(full_err, full_len, this_err, this_len);
1985 if ((cgp->cg_frotor < 0) || (cgp->cg_frotor >= ndblk)) {
1986 this_len = fsck_asprintf(&this_err,
1987 "IMPOSSIBLE FRAGMENT ALLOCATION ROTOR POSITION "
1988 "(%d should be at least 0 and less than %d)\n",
1989 cgp->cg_frotor, ndblk);
1990 Append_Error(full_err, full_len, this_err, this_len);
1993 if ((cgp->cg_irotor < 0) || (cgp->cg_irotor >= sblock.fs_ipg)) {
1994 this_len = fsck_asprintf(&this_err,
1995 "IMPOSSIBLE INODE ALLOCATION ROTOR POSITION "
1996 "(%d should be at least 0 and less than %d)\n",
1997 cgp->cg_irotor, sblock.fs_ipg);
1998 Append_Error(full_err, full_len, this_err, this_len);
2001 if (cgp->cg_btotoff != exp_btotoff) {
2002 this_len = fsck_asprintf(&this_err,
2003 "INCORRECT BLOCK TOTALS OFFSET (%d should be %d)\n",
2004 cgp->cg_btotoff, exp_btotoff);
2005 Append_Error(full_err, full_len, this_err, this_len);
2008 if (cgp->cg_boff != exp_boff) {
2009 this_len = fsck_asprintf(&this_err,
2010 "BAD FREE BLOCK POSITIONS TABLE OFFSET (%d should %d)\n",
2011 cgp->cg_boff, exp_boff);
2012 Append_Error(full_err, full_len, this_err, this_len);
2015 if (cgp->cg_iusedoff != exp_iusedoff) {
2016 this_len = fsck_asprintf(&this_err,
2017 "INCORRECT USED INODE MAP OFFSET (%d should be %d)\n",
2018 cgp->cg_iusedoff, exp_iusedoff);
2019 Append_Error(full_err, full_len, this_err, this_len);
2022 if (cgp->cg_freeoff != exp_freeoff) {
2023 this_len = fsck_asprintf(&this_err,
2024 "INCORRECT FREE FRAGMENT MAP OFFSET (%d should be %d)\n",
2025 cgp->cg_freeoff, exp_freeoff);
2026 Append_Error(full_err, full_len, this_err, this_len);
2029 if (cgp->cg_nextfreeoff != exp_nextfreeoff) {
2030 this_len = fsck_asprintf(&this_err,
2031 "END OF HEADER POSITION INCORRECT (%d should be %d)\n",
2032 cgp->cg_nextfreeoff, exp_nextfreeoff);
2033 Append_Error(full_err, full_len, this_err, this_len);
2036 return (full_err);
2039 #undef Append_Error
2042 * This is taken from mkfs, and is what is used to come up with the
2043 * original values for a struct cg. This implies that, since these
2044 * are all constants, recalculating them now should give us the same
2045 * thing as what's on disk.
2047 static void
2048 cg_constants(int cgno, daddr32_t *btotoff, daddr32_t *boff,
2049 daddr32_t *iusedoff, daddr32_t *freeoff, daddr32_t *nextfreeoff,
2050 daddr32_t *ndblk)
2052 daddr32_t cbase, dmax;
2053 struct cg *cgp;
2055 (void) getblk(&cgblk, (diskaddr_t)cgtod(&sblock, cgno),
2056 (size_t)sblock.fs_cgsize);
2057 cgp = cgblk.b_un.b_cg;
2059 cbase = cgbase(&sblock, cgno);
2060 dmax = cbase + sblock.fs_fpg;
2061 if (dmax > sblock.fs_size)
2062 dmax = sblock.fs_size;
2064 /* LINTED pointer difference won't overflow */
2065 *btotoff = &cgp->cg_space[0] - (uchar_t *)(&cgp->cg_link);
2066 *boff = *btotoff + sblock.fs_cpg * sizeof (daddr32_t);
2067 *iusedoff = *boff + sblock.fs_cpg * sblock.fs_nrpos * sizeof (int16_t);
2068 *freeoff = *iusedoff + howmany(sblock.fs_ipg, NBBY);
2069 *nextfreeoff = *freeoff +
2070 howmany(sblock.fs_cpg * sblock.fs_spc / NSPF(&sblock), NBBY);
2071 *ndblk = dmax - cbase;
2075 * Corrects all fields in the cg that can be done with the available
2076 * redundant data.
2078 void
2079 fix_cg(struct cg *cgp, int cgno)
2081 daddr32_t exp_btotoff, exp_boff, exp_iusedoff;
2082 daddr32_t exp_freeoff, exp_nextfreeoff;
2083 daddr32_t ndblk;
2085 cg_constants(cgno, &exp_btotoff, &exp_boff, &exp_iusedoff,
2086 &exp_freeoff, &exp_nextfreeoff, &ndblk);
2088 if (cgp->cg_cgx != cgno) {
2089 cgp->cg_cgx = cgno;
2092 if ((cgp->cg_ncyl < 1) || (cgp->cg_ncyl > sblock.fs_cpg)) {
2093 if (cgno == (sblock.fs_ncg - 1)) {
2094 cgp->cg_ncyl = sblock.fs_ncyl -
2095 (sblock.fs_cpg * cgno);
2096 } else {
2097 cgp->cg_ncyl = sblock.fs_cpg;
2101 if (cgp->cg_niblk != sblock.fs_ipg) {
2103 * This is not used by the kernel, so it's pretty
2104 * harmless if it's wrong.
2106 cgp->cg_niblk = sblock.fs_ipg;
2109 if (cgp->cg_ndblk != ndblk) {
2110 cgp->cg_ndblk = ndblk;
2114 * For the rotors, any position's valid, so pick the one we know
2115 * will always exist.
2117 if ((cgp->cg_rotor < 0) || (cgp->cg_rotor >= cgp->cg_ndblk)) {
2118 cgp->cg_rotor = 0;
2121 if ((cgp->cg_frotor < 0) || (cgp->cg_frotor >= cgp->cg_ndblk)) {
2122 cgp->cg_frotor = 0;
2125 if ((cgp->cg_irotor < 0) || (cgp->cg_irotor >= sblock.fs_ipg)) {
2126 cgp->cg_irotor = 0;
2130 * For btotoff and boff, if they're misaligned they won't
2131 * match the expected values, so we're catching both cases
2132 * here. Of course, if any of these are off, it seems likely
2133 * that the tables really won't be where we calculate they
2134 * should be anyway.
2136 if (cgp->cg_btotoff != exp_btotoff) {
2137 cgp->cg_btotoff = exp_btotoff;
2140 if (cgp->cg_boff != exp_boff) {
2141 cgp->cg_boff = exp_boff;
2144 if (cgp->cg_iusedoff != exp_iusedoff) {
2145 cgp->cg_iusedoff = exp_iusedoff;
2148 if (cgp->cg_freeoff != exp_freeoff) {
2149 cgp->cg_freeoff = exp_freeoff;
2152 if (cgp->cg_nextfreeoff != exp_nextfreeoff) {
2153 cgp->cg_nextfreeoff = exp_nextfreeoff;
2157 * Reset the magic, as we've recreated this cg, also
2158 * update the cg_time, as we're writing out the cg
2160 cgp->cg_magic = CG_MAGIC;
2161 cgp->cg_time = time(NULL);
2164 * We know there was at least one correctable problem,
2165 * or else we wouldn't have been called. So instead of
2166 * marking the buffer dirty N times above, just do it
2167 * once here.
2169 cgdirty();
2172 void
2173 examinelog(void (*cb)(daddr32_t))
2175 struct bufarea *bp;
2176 extent_block_t *ebp;
2177 extent_t *ep;
2178 daddr32_t nfno, fno;
2179 int i;
2180 int j;
2183 * Since ufs stores fs_logbno as blocks and MTBufs stores it as frags
2184 * we need to translate accordingly using logbtodb()
2187 if (logbtodb(&sblock, sblock.fs_logbno) < SBLOCK) {
2188 if (debug) {
2189 (void) printf("fs_logbno < SBLOCK: %ld < %ld\n" \
2190 "Aborting log examination\n", \
2191 logbtodb(&sblock, sblock.fs_logbno), SBLOCK);
2193 return;
2197 * Read errors will return zeros, which will cause us
2198 * to do nothing harmful, so don't need to handle it.
2200 bp = getdatablk(logbtofrag(&sblock, sblock.fs_logbno),
2201 (size_t)sblock.fs_bsize);
2202 ebp = (void *)bp->b_un.b_buf;
2205 * Does it look like a log allocation table?
2207 /* LINTED pointer cast is aligned */
2208 if (!log_checksum(&ebp->chksum, (int32_t *)bp->b_un.b_buf,
2209 sblock.fs_bsize))
2210 return;
2211 if (ebp->type != LUFS_EXTENTS || ebp->nextents == 0)
2212 return;
2214 ep = &ebp->extents[0];
2215 for (i = 0; i < ebp->nextents; ++i, ++ep) {
2216 fno = logbtofrag(&sblock, ep->pbno);
2217 nfno = dbtofsb(&sblock, ep->nbno);
2218 for (j = 0; j < nfno; ++j, ++fno) {
2220 * Invoke the callback first, so that pass1 can
2221 * mark the log blocks in-use. Then, if any
2222 * subsequent pass over the log shows us that a
2223 * block got freed (say, it was also claimed by
2224 * an inode that we cleared), we can safely declare
2225 * the log bad.
2227 if (cb != NULL)
2228 (*cb)(fno);
2229 if (!testbmap(fno))
2230 islogok = 0;
2233 brelse(bp);
2235 if (cb != NULL) {
2236 fno = logbtofrag(&sblock, sblock.fs_logbno);
2237 for (j = 0; j < sblock.fs_frag; ++j, ++fno)
2238 (*cb)(fno);
2242 static void
2243 freelogblk(daddr32_t frag)
2245 freeblk(sblock.fs_logbno, frag, 1);
2248 caddr_t
2249 file_id(fsck_ino_t inum, mode_t mode)
2251 static char name[MAXPATHLEN + 1];
2253 if (lfdir == inum) {
2254 return (lfname);
2257 if ((mode & IFMT) == IFDIR) {
2258 (void) strcpy(name, "DIR");
2259 } else if ((mode & IFMT) == IFATTRDIR) {
2260 (void) strcpy(name, "ATTR DIR");
2261 } else if ((mode & IFMT) == IFSHAD) {
2262 (void) strcpy(name, "ACL");
2263 } else {
2264 (void) strcpy(name, "FILE");
2267 return (name);
2271 * Simple initializer for inodesc structures, so users of only a few
2272 * fields don't have to worry about getting the right defaults for
2273 * everything out.
2275 void
2276 init_inodesc(struct inodesc *idesc)
2279 * Most fields should be zero, just hit the special cases.
2281 (void) memset(idesc, 0, sizeof (struct inodesc));
2282 idesc->id_fix = DONTKNOW;
2283 idesc->id_lbn = -1;
2284 idesc->id_truncto = -1;
2285 idesc->id_firsthole = -1;
2289 * Compare routine for tsearch(C) to use on ino_t instances.
2292 ino_t_cmp(const void *left, const void *right)
2294 const fsck_ino_t lino = (const fsck_ino_t)left;
2295 const fsck_ino_t rino = (const fsck_ino_t)right;
2297 return (lino - rino);
2301 cgisdirty(void)
2303 return (cgblk.b_dirty);
2306 void
2307 cgflush(void)
2309 flush(fswritefd, &cgblk);
2312 void
2313 dirty(struct bufarea *bp)
2315 if (fswritefd < 0) {
2317 * No one should call dirty() in read only mode.
2318 * But if one does, it's not fatal issue. Just warn them.
2320 pwarn("WON'T SET DIRTY FLAG IN READ_ONLY MODE\n");
2321 } else {
2322 (bp)->b_dirty = 1;
2323 isdirty = 1;
2327 void
2328 initbarea(struct bufarea *bp)
2330 (bp)->b_dirty = 0;
2331 (bp)->b_bno = (diskaddr_t)-1LL;
2332 (bp)->b_flags = 0;
2333 (bp)->b_cnt = 0;
2334 (bp)->b_errs = 0;
2338 * Partition-sizing routines adapted from ../newfs/newfs.c.
2339 * Needed because calcsb() needs to use mkfs to work out what the
2340 * superblock should be, and mkfs insists on being told how many
2341 * sectors to use.
2343 * Error handling assumes we're never called while preening.
2345 * XXX This should be extracted into a ../ufslib.{c,h},
2346 * in the same spirit to ../../fslib.{c,h}. Once that is
2347 * done, both fsck and newfs should be modified to link
2348 * against it.
2351 static int label_type;
2353 #define LABEL_TYPE_VTOC 1
2354 #define LABEL_TYPE_EFI 2
2355 #define LABEL_TYPE_OTHER 3
2357 #define MB (1024 * 1024)
2358 #define SECTORS_PER_TERABYTE (1LL << 31)
2359 #define FS_SIZE_UPPER_LIMIT 0x100000000000LL
2361 diskaddr_t
2362 getdisksize(caddr_t disk, int fd)
2364 int rpm;
2365 struct dk_geom g;
2366 struct dk_cinfo ci;
2367 diskaddr_t actual_size;
2370 * get_device_size() determines the actual size of the
2371 * device, and also the disk's attributes, such as geometry.
2373 actual_size = get_device_size(fd, disk);
2375 if (label_type == LABEL_TYPE_VTOC) {
2376 if (ioctl(fd, DKIOCGGEOM, &g)) {
2377 pwarn("%s: Unable to read Disk geometry", disk);
2378 return (0);
2380 if (sblock.fs_nsect == 0)
2381 sblock.fs_nsect = g.dkg_nsect;
2382 if (sblock.fs_ntrak == 0)
2383 sblock.fs_ntrak = g.dkg_nhead;
2384 if (sblock.fs_rps == 0) {
2385 rpm = ((int)g.dkg_rpm <= 0) ? 3600: g.dkg_rpm;
2386 sblock.fs_rps = rpm / 60;
2390 if (sblock.fs_bsize == 0)
2391 sblock.fs_bsize = MAXBSIZE;
2394 * Adjust maxcontig by the device's maxtransfer. If maxtransfer
2395 * information is not available, default to the min of a MB and
2396 * maxphys.
2398 if (sblock.fs_maxcontig == -1 && ioctl(fd, DKIOCINFO, &ci) == 0) {
2399 sblock.fs_maxcontig = ci.dki_maxtransfer * DEV_BSIZE;
2400 if (sblock.fs_maxcontig < 0) {
2401 int gotit, maxphys;
2403 gotit = fsgetmaxphys(&maxphys, NULL);
2406 * If we cannot get the maxphys value, default
2407 * to ufs_maxmaxphys (MB).
2409 if (gotit) {
2410 sblock.fs_maxcontig = MIN(maxphys, MB);
2411 } else {
2412 sblock.fs_maxcontig = MB;
2415 sblock.fs_maxcontig /= sblock.fs_bsize;
2418 return (actual_size);
2422 * Figure out how big the partition we're dealing with is.
2424 static diskaddr_t
2425 get_device_size(int fd, caddr_t name)
2427 struct extvtoc vtoc;
2428 struct dk_gpt *efi_vtoc;
2429 diskaddr_t slicesize = 0;
2431 int index = read_extvtoc(fd, &vtoc);
2433 if (index >= 0) {
2434 label_type = LABEL_TYPE_VTOC;
2435 } else {
2436 if (index == VT_ENOTSUP || index == VT_ERROR) {
2437 /* it might be an EFI label */
2438 index = efi_alloc_and_read(fd, &efi_vtoc);
2439 if (index >= 0)
2440 label_type = LABEL_TYPE_EFI;
2444 if (index < 0) {
2446 * Since both attempts to read the label failed, we're
2447 * going to fall back to a brute force approach to
2448 * determining the device's size: see how far out we can
2449 * perform reads on the device.
2452 slicesize = brute_force_get_device_size(fd);
2453 if (slicesize == 0) {
2454 switch (index) {
2455 case VT_ERROR:
2456 pwarn("%s: %s\n", name, strerror(errno));
2457 break;
2458 case VT_EIO:
2459 pwarn("%s: I/O error accessing VTOC", name);
2460 break;
2461 case VT_EINVAL:
2462 pwarn("%s: Invalid field in VTOC", name);
2463 break;
2464 default:
2465 pwarn("%s: unknown error %d accessing VTOC",
2466 name, index);
2467 break;
2469 return (0);
2470 } else {
2471 label_type = LABEL_TYPE_OTHER;
2475 if (label_type == LABEL_TYPE_EFI) {
2476 slicesize = efi_vtoc->efi_parts[index].p_size;
2477 efi_free(efi_vtoc);
2478 } else if (label_type == LABEL_TYPE_VTOC) {
2479 slicesize = vtoc.v_part[index].p_size;
2482 return (slicesize);
2486 * brute_force_get_device_size
2488 * Determine the size of the device by seeing how far we can
2489 * read. Doing an llseek( , , SEEK_END) would probably work
2490 * in most cases, but we've seen at least one third-party driver
2491 * which doesn't correctly support the SEEK_END option when the
2492 * the device is greater than a terabyte.
2495 static diskaddr_t
2496 brute_force_get_device_size(int fd)
2498 diskaddr_t min_fail = 0;
2499 diskaddr_t max_succeed = 0;
2500 diskaddr_t cur_db_off;
2501 char buf[DEV_BSIZE];
2504 * First, see if we can read the device at all, just to
2505 * eliminate errors that have nothing to do with the
2506 * device's size.
2509 if (((llseek(fd, (offset_t)0, SEEK_SET)) == -1) ||
2510 ((read(fd, buf, DEV_BSIZE)) == -1))
2511 return (0); /* can't determine size */
2514 * Now, go sequentially through the multiples of 4TB
2515 * to find the first read that fails (this isn't strictly
2516 * the most efficient way to find the actual size if the
2517 * size really could be anything between 0 and 2**64 bytes.
2518 * We expect the sizes to be less than 16 TB for some time,
2519 * so why do a bunch of reads that are larger than that?
2520 * However, this algorithm *will* work for sizes of greater
2521 * than 16 TB. We're just not optimizing for those sizes.)
2525 * XXX lint uses 32-bit arithmetic for doing flow analysis.
2526 * We're using > 32-bit constants here. Therefore, its flow
2527 * analysis is wrong. For the time being, ignore complaints
2528 * from it about the body of the for() being unreached.
2530 for (cur_db_off = SECTORS_PER_TERABYTE * 4;
2531 (min_fail == 0) && (cur_db_off < FS_SIZE_UPPER_LIMIT);
2532 cur_db_off += 4 * SECTORS_PER_TERABYTE) {
2533 if ((llseek(fd, (offset_t)(cur_db_off * DEV_BSIZE),
2534 SEEK_SET) == -1) ||
2535 (read(fd, buf, DEV_BSIZE) != DEV_BSIZE))
2536 min_fail = cur_db_off;
2537 else
2538 max_succeed = cur_db_off;
2542 * XXX Same lint flow analysis problem as above.
2544 if (min_fail == 0)
2545 return (0);
2548 * We now know that the size of the device is less than
2549 * min_fail and greater than or equal to max_succeed. Now
2550 * keep splitting the difference until the actual size in
2551 * sectors in known. We also know that the difference
2552 * between max_succeed and min_fail at this time is
2553 * 4 * SECTORS_PER_TERABYTE, which is a power of two, which
2554 * simplifies the math below.
2557 while (min_fail - max_succeed > 1) {
2558 cur_db_off = max_succeed + (min_fail - max_succeed)/2;
2559 if (((llseek(fd, (offset_t)(cur_db_off * DEV_BSIZE),
2560 SEEK_SET)) == -1) ||
2561 ((read(fd, buf, DEV_BSIZE)) != DEV_BSIZE))
2562 min_fail = cur_db_off;
2563 else
2564 max_succeed = cur_db_off;
2567 /* the size is the last successfully read sector offset plus one */
2568 return (max_succeed + 1);
2571 static void
2572 vfileerror(fsck_ino_t cwd, fsck_ino_t ino, caddr_t fmt, va_list ap)
2574 struct dinode *dp;
2575 char pathbuf[MAXPATHLEN + 1];
2577 vpwarn(fmt, ap);
2578 (void) putchar(' ');
2579 pinode(ino);
2580 (void) printf("\n");
2581 getpathname(pathbuf, cwd, ino);
2582 if (ino < UFSROOTINO || ino > maxino) {
2583 pfatal("NAME=%s\n", pathbuf);
2584 return;
2586 dp = ginode(ino);
2587 if (ftypeok(dp))
2588 pfatal("%s=%s\n", file_id(ino, dp->di_mode), pathbuf);
2589 else
2590 pfatal("NAME=%s\n", pathbuf);
2593 void
2594 direrror(fsck_ino_t ino, caddr_t fmt, ...)
2596 va_list ap;
2598 va_start(ap, fmt);
2599 vfileerror(ino, ino, fmt, ap);
2600 va_end(ap);
2603 static void
2604 vdirerror(fsck_ino_t ino, caddr_t fmt, va_list ap)
2606 vfileerror(ino, ino, fmt, ap);
2609 void
2610 fileerror(fsck_ino_t cwd, fsck_ino_t ino, caddr_t fmt, ...)
2612 va_list ap;
2614 va_start(ap, fmt);
2615 vfileerror(cwd, ino, fmt, ap);
2616 va_end(ap);
2620 * Adds the given inode to the orphaned-directories list, limbo_dirs.
2621 * Assumes that the caller has set INCLEAR in the inode's statemap[]
2622 * entry.
2624 * With INCLEAR set, the inode will get ignored by passes 2 and 3,
2625 * meaning it's effectively an orphan. It needs to be noted now, so
2626 * it will be remembered in pass 4.
2629 void
2630 add_orphan_dir(fsck_ino_t ino)
2632 if (tsearch((void *)ino, &limbo_dirs, ino_t_cmp) == NULL)
2633 errexit("add_orphan_dir: out of memory");
2637 * Remove an inode from the orphaned-directories list, presumably
2638 * because it's been cleared.
2640 void
2641 remove_orphan_dir(fsck_ino_t ino)
2643 (void) tdelete((void *)ino, &limbo_dirs, ino_t_cmp);
2647 * log_setsum() and log_checksum() are equivalent to lufs.c:setsum()
2648 * and lufs.c:checksum().
2650 static void
2651 log_setsum(int32_t *sp, int32_t *lp, int nb)
2653 int32_t csum = 0;
2655 *sp = 0;
2656 nb /= sizeof (int32_t);
2657 while (nb--)
2658 csum += *lp++;
2659 *sp = csum;
2662 static int
2663 log_checksum(int32_t *sp, int32_t *lp, int nb)
2665 int32_t ssum = *sp;
2667 log_setsum(sp, lp, nb);
2668 if (ssum != *sp) {
2669 *sp = ssum;
2670 return (0);
2672 return (1);