2 * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2016 by Delphix. All rights reserved.
6 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
7 /* All Rights Reserved */
10 * Copyright (c) 1980, 1986, 1990 The Regents of the University of California.
11 * All rights reserved.
13 * Redistribution and use in source and binary forms are permitted
14 * provided that: (1) source distributions retain this entire copyright
15 * notice and comment, and (2) distributions including binaries display
16 * the following acknowledgement: ``This product includes software
17 * developed by the University of California, Berkeley and its contributors''
18 * in the documentation or other materials provided with the distribution
19 * and in all advertising materials mentioning features or use of this
20 * software. Neither the name of the University nor the names of its
21 * contributors may be used to endorse or promote products derived
22 * from this software without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
24 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
25 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
34 #include <sys/param.h>
35 #include <sys/types.h>
36 #include <sys/mntent.h>
37 #include <sys/filio.h>
38 #include <sys/fs/ufs_fs.h>
39 #include <sys/vnode.h>
40 #include <sys/fs/ufs_acl.h>
41 #include <sys/fs/ufs_inode.h>
42 #include <sys/fs/ufs_log.h>
44 #include <sys/fs/ufs_fsdir.h>
46 #include <sys/mnttab.h>
47 #include <sys/types.h>
53 #include <sys/vfstab.h>
54 #include <sys/lockfs.h>
56 #include <sys/cmn_err.h>
59 #include <sys/efi_partition.h>
64 caddr_t mount_point
= NULL
;
66 static int64_t diskreads
, totalreads
; /* Disk cache statistics */
68 static int log_checksum(int32_t *, int32_t *, int);
69 static void vdirerror(fsck_ino_t
, caddr_t
, va_list);
70 static struct mnttab
*search_mnttab(caddr_t
, caddr_t
, caddr_t
, size_t);
71 static struct vfstab
*search_vfstab(caddr_t
, caddr_t
, caddr_t
, size_t);
72 static void vpwarn(caddr_t
, va_list);
73 static int getaline(FILE *, caddr_t
, int);
74 static struct bufarea
*alloc_bufarea(void);
75 static void rwerror(caddr_t
, diskaddr_t
, int rval
);
76 static void debugclean(void);
77 static void report_io_prob(caddr_t
, diskaddr_t
, size_t, ssize_t
);
78 static void freelogblk(daddr32_t
);
79 static void verrexit(caddr_t
, va_list);
80 static void vpfatal(caddr_t
, va_list);
81 static diskaddr_t
get_device_size(int, caddr_t
);
82 static diskaddr_t
brute_force_get_device_size(int);
83 static void cg_constants(int, daddr32_t
*, daddr32_t
*, daddr32_t
*,
84 daddr32_t
*, daddr32_t
*, daddr32_t
*);
87 ftypeok(struct dinode
*dp
)
89 switch (dp
->di_mode
& IFMT
) {
104 (void) printf("bad file type 0%o\n", dp
->di_mode
);
110 acltypeok(struct dinode
*dp
)
112 if (CHECK_ACL_ALLOWED(dp
->di_mode
& IFMT
))
116 (void) printf("bad file type for acl I=%d: 0%o\n",
117 dp
->di_shadow
, dp
->di_mode
);
123 reply(caddr_t fmt
, ...)
129 pfatal("INTERNAL ERROR: GOT TO reply() in preen mode");
133 * We don't know what's going on, so don't potentially
134 * make things worse by having errexit() write stuff
138 "\n%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n",
144 (void) putchar('\n');
145 (void) vprintf(fmt
, ap
);
150 if (nflag
|| fswritefd
< 0) {
151 (void) printf(" no\n\n");
155 (void) printf(" yes\n\n");
158 (void) fflush(stdout
);
159 if (getaline(stdin
, line
, sizeof (line
)) == EOF
)
162 if (line
[0] == 'y' || line
[0] == 'Y') {
170 getaline(FILE *fp
, caddr_t loc
, int maxlen
)
176 lastloc
= &p
[maxlen
-1];
177 while ((n
= getc(fp
)) != '\n') {
180 if (!isspace(n
) && p
< lastloc
)
184 /* LINTED pointer difference won't overflow */
189 * Malloc buffers and set up cache.
198 bufp
= malloc((size_t)sblock
.fs_bsize
);
202 cgblk
.b_un
.b_buf
= bufp
;
203 bufhead
.b_next
= bufhead
.b_prev
= &bufhead
;
204 bufcnt
= MAXBUFSPACE
/ sblock
.fs_bsize
;
205 if (bufcnt
< MINBUFS
)
207 for (i
= 0; i
< bufcnt
; i
++) {
208 bp
= (struct bufarea
*)malloc(sizeof (struct bufarea
));
215 bufp
= malloc((size_t)sblock
.fs_bsize
);
223 bp
->b_un
.b_buf
= bufp
;
224 bp
->b_prev
= &bufhead
;
225 bp
->b_next
= bufhead
.b_next
;
226 bufhead
.b_next
->b_prev
= bp
;
230 bufhead
.b_size
= i
; /* save number of buffers */
235 errexit("cannot allocate buffer pool\n");
246 struct bufarea
*bp
, *nbp
;
249 for (bp
= bufhead
.b_prev
; bp
!= NULL
&& bp
!= &bufhead
; bp
= nbp
) {
251 flush(fswritefd
, bp
);
254 * We're discarding the entire chain, so this isn't
255 * technically necessary. However, it doesn't hurt
256 * and lint's data flow analysis is much happier
257 * (this prevents it from thinking there's a chance
258 * of our using memory elsewhere after it's been released).
260 nbp
->b_next
= bp
->b_next
;
261 bp
->b_next
->b_prev
= nbp
;
262 free((void *)bp
->b_un
.b_buf
);
266 if (bufhead
.b_size
!= cnt
)
267 errexit("Panic: cache lost %d buffers\n",
268 bufhead
.b_size
- cnt
);
272 * Manage a cache of directory blocks.
275 getdatablk(daddr32_t blkno
, size_t size
)
279 for (bp
= bufhead
.b_next
; bp
!= &bufhead
; bp
= bp
->b_next
)
280 if (bp
->b_bno
== fsbtodb(&sblock
, blkno
)) {
283 for (bp
= bufhead
.b_prev
; bp
!= &bufhead
; bp
= bp
->b_prev
)
284 if ((bp
->b_flags
& B_INUSE
) == 0)
286 if (bp
== &bufhead
) {
287 bp
= alloc_bufarea();
289 errexit("deadlocked buffer pool\n");
294 * We're at the same logical level as getblk(), so if there
295 * are any errors, we'll let our caller handle them.
298 (void) getblk(bp
, blkno
, size
);
304 * Move the buffer to head of linked list if it isn't
307 if (bufhead
.b_next
!= bp
) {
308 bp
->b_prev
->b_next
= bp
->b_next
;
309 bp
->b_next
->b_prev
= bp
->b_prev
;
310 bp
->b_prev
= &bufhead
;
311 bp
->b_next
= bufhead
.b_next
;
312 bufhead
.b_next
->b_prev
= bp
;
315 bp
->b_flags
|= B_INUSE
;
320 brelse(struct bufarea
*bp
)
323 if (bp
->b_cnt
== 0) {
324 bp
->b_flags
&= ~B_INUSE
;
329 getblk(struct bufarea
*bp
, daddr32_t blk
, size_t size
)
333 dblk
= fsbtodb(&sblock
, blk
);
334 if (bp
->b_bno
== dblk
)
336 flush(fswritefd
, bp
);
337 bp
->b_errs
= fsck_bread(fsreadfd
, bp
->b_un
.b_buf
, dblk
, size
);
344 flush(int fd
, struct bufarea
*bp
)
354 * It's not our buf, so if there are errors, let whoever
355 * acquired it deal with the actual problem.
358 pfatal("WRITING ZERO'ED BLOCK %lld TO DISK\n", bp
->b_bno
);
361 bwrite(fd
, bp
->b_un
.b_buf
, bp
->b_bno
, (long)bp
->b_size
);
367 * We're flushing the superblock, so make sure all the
368 * ancillary bits go out as well.
370 sip
= (caddr_t
)sblock
.fs_u
.fs_csp
;
371 for (i
= 0, j
= 0; i
< sblock
.fs_cssize
; i
+= sblock
.fs_bsize
, j
++) {
372 size
= sblock
.fs_cssize
- i
< sblock
.fs_bsize
?
373 sblock
.fs_cssize
- i
: sblock
.fs_bsize
;
374 bwrite(fswritefd
, sip
,
375 fsbtodb(&sblock
, sblock
.fs_csaddr
+ j
* sblock
.fs_frag
),
382 rwerror(caddr_t mesg
, diskaddr_t blk
, int rval
)
390 pfatal("CANNOT %s: DISK BLOCK %lld: %s",
391 mesg
, blk
, strerror(olderr
));
393 pfatal("CANNOT %s: DISK BLOCK %lld", mesg
, blk
);
395 if (reply("CONTINUE") == 0) {
396 exitstat
= EXERRFATAL
;
397 errexit("Program terminated\n");
409 flush(fswritefd
, &sblk
);
411 * Were we using a backup superblock?
413 if (havesb
&& sblk
.b_bno
!= SBOFF
/ dev_bsize
) {
414 if (preen
|| reply("UPDATE STANDARD SUPERBLOCK") == 1) {
415 sblk
.b_bno
= SBOFF
/ dev_bsize
;
417 flush(fswritefd
, &sblk
);
420 flush(fswritefd
, &cgblk
);
421 if (cgblk
.b_un
.b_buf
!= NULL
) {
422 free((void *)cgblk
.b_un
.b_buf
);
423 cgblk
.b_un
.b_buf
= NULL
;
430 * Note that we only count cache-related reads.
431 * Anything that called fsck_bread() or getblk()
432 * directly are explicitly not cached, so they're not
436 percentage
= diskreads
* 100 / totalreads
;
440 (void) printf("cache missed %lld of %lld reads (%lld%%)\n",
441 (longlong_t
)diskreads
, (longlong_t
)totalreads
,
442 (longlong_t
)percentage
);
445 (void) close(fsreadfd
);
446 (void) close(fswritefd
);
452 fsck_bread(int fd
, caddr_t buf
, diskaddr_t blk
, size_t size
)
457 offset_t offset
= ldbtob(blk
);
461 * In our universe, nothing exists before the superblock, so
462 * just pretend it's always zeros. This is the complement of
463 * bwrite()'s ignoring write requests into that space.
468 "WARNING: fsck_bread() passed blkno < %d (%lld)\n",
469 SBLOCK
, (longlong_t
)blk
);
470 (void) memset(buf
, 0, (size_t)size
);
474 if (llseek(fd
, offset
, SEEK_SET
) < 0) {
475 rwerror("SEEK", blk
, -1);
478 if ((i
= read(fd
, buf
, size
)) == size
) {
481 rwerror("READ", blk
, i
);
482 if (llseek(fd
, offset
, SEEK_SET
) < 0) {
483 rwerror("SEEK", blk
, -1);
486 (void) memset(buf
, 0, (size_t)size
);
487 pwarn("THE FOLLOWING SECTORS COULD NOT BE READ:");
488 for (cp
= buf
, i
= 0; i
< btodb(size
); i
++, cp
+= DEV_BSIZE
) {
489 addr
= ldbtob(blk
+ i
);
490 if (llseek(fd
, addr
, SEEK_SET
) < 0 ||
491 read(fd
, cp
, (int)secsize
) < 0) {
493 (void) printf(" %llu", blk
+ (u_longlong_t
)i
);
502 bwrite(int fd
, caddr_t buf
, diskaddr_t blk
, int64_t size
)
507 offset_t offset
= ldbtob(blk
);
515 "WARNING: Attempt to write illegal blkno %lld on %s\n",
516 (longlong_t
)blk
, devname
);
519 if (llseek(fd
, offset
, SEEK_SET
) < 0) {
520 rwerror("SEEK", blk
, -1);
522 if ((i
= write(fd
, buf
, (int)size
)) == size
) {
526 rwerror("WRITE", blk
, i
);
527 if (llseek(fd
, offset
, SEEK_SET
) < 0) {
528 rwerror("SEEK", blk
, -1);
530 pwarn("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:");
531 for (cp
= buf
, i
= 0; i
< btodb(size
); i
++, cp
+= DEV_BSIZE
) {
533 addr
= ldbtob(blk
+ i
);
534 if (llseek(fd
, addr
, SEEK_SET
) < 0 ||
535 (n
= write(fd
, cp
, DEV_BSIZE
)) < 0) {
537 (void) printf(" %llu", blk
+ (u_longlong_t
)i
);
547 * Allocates the specified number of contiguous fragments.
550 allocblk(int wantedfrags
)
552 int block
, leadfrag
, tailfrag
;
558 * It's arguable whether we should just fail, or instead
559 * error out here. Since we should only ever be asked for
560 * a single fragment or an entire block (i.e., sblock.fs_frag),
561 * we'll fail out because anything else means somebody
562 * changed code without considering all of the ramifications.
564 if (wantedfrags
<= 0 || wantedfrags
> sblock
.fs_frag
) {
565 exitstat
= EXERRFATAL
;
566 errexit("allocblk() asked for %d frags. "
567 "Legal range is 1 to %d",
568 wantedfrags
, sblock
.fs_frag
);
572 * For each filesystem block, look at every possible starting
573 * offset within the block such that we can get the number of
574 * contiguous fragments that we need. This is a drastically
575 * simplified version of the kernel's mapsearch() and alloc*().
576 * It's also correspondingly slower.
578 for (block
= 0; block
< maxfsblock
- sblock
.fs_frag
;
579 block
+= sblock
.fs_frag
) {
580 for (leadfrag
= 0; leadfrag
<= sblock
.fs_frag
- wantedfrags
;
583 * Is first fragment of candidate run available?
585 if (testbmap(block
+ leadfrag
))
588 * Are the rest of them available?
590 for (tailfrag
= 1; tailfrag
< wantedfrags
; tailfrag
++)
591 if (testbmap(block
+ leadfrag
+ tailfrag
))
593 if (tailfrag
< wantedfrags
) {
595 * No, skip the known-unusable run.
597 leadfrag
+= tailfrag
;
601 * Found what we need, so claim them.
603 for (tailfrag
= 0; tailfrag
< wantedfrags
; tailfrag
++)
604 setbmap(block
+ leadfrag
+ tailfrag
);
605 n_blks
+= wantedfrags
;
606 size
= wantedfrags
* sblock
.fs_fsize
;
607 selected
= block
+ leadfrag
;
608 bp
= getdatablk(selected
, size
);
609 (void) memset((void *)bp
->b_un
.b_buf
, 0, size
);
614 "allocblk: selected %d (in block %d), frags %d, size %d\n",
615 selected
, selected
% sblock
.fs_bsize
,
616 wantedfrags
, (int)size
);
624 * Free a previously allocated block
627 freeblk(fsck_ino_t ino
, daddr32_t blkno
, int frags
)
629 struct inodesc idesc
;
632 (void) printf("debug: freeing %d fragments starting at %d\n",
635 init_inodesc(&idesc
);
637 idesc
.id_number
= ino
;
638 idesc
.id_blkno
= blkno
;
639 idesc
.id_numfrags
= frags
;
640 idesc
.id_truncto
= -1;
643 * Nothing in the return status has any relevance to how
644 * we're using pass4check(), so just ignore it.
646 (void) pass4check(&idesc
);
650 * Fill NAMEBUF with a path starting in CURDIR for INO. Assumes
651 * that the given buffer is at least MAXPATHLEN + 1 characters.
654 getpathname(caddr_t namebuf
, fsck_ino_t curdir
, fsck_ino_t ino
)
659 struct inodesc idesc
;
663 (void) printf("debug: getpathname(curdir %d, ino %d)\n",
666 if ((curdir
== 0) || (!INO_IS_DVALID(curdir
))) {
667 (void) strcpy(namebuf
, "?");
671 if ((curdir
== UFSROOTINO
) && (ino
== UFSROOTINO
)) {
672 (void) strcpy(namebuf
, "/");
676 init_inodesc(&idesc
);
677 idesc
.id_type
= DATA
;
678 cp
= &namebuf
[MAXPATHLEN
- 1];
682 * In the case of extended attributes, our
683 * parent won't necessarily be a directory, so just
684 * return what we've found with a prefix indicating
685 * that it's an XATTR. Presumably our caller will
686 * know what's going on and do something useful, like
687 * work out the path of the parent and then combine
690 * Can't use strcpy(), etc, because we've probably
691 * already got some name information in the buffer and
692 * the usual trailing \0 would lose it.
695 if ((dp
->di_mode
& IFMT
) == IFATTRDIR
) {
696 idesc
.id_number
= curdir
;
697 idesc
.id_parent
= ino
;
698 idesc
.id_func
= findname
;
699 idesc
.id_name
= namebuf
;
700 idesc
.id_fix
= NOFIX
;
701 if ((ckinode(dp
, &idesc
, CKI_TRAVERSE
) & FOUND
) == 0) {
705 len
= sizeof (XATTR_DIR_NAME
) - 1;
707 (void) memmove(cp
, XATTR_DIR_NAME
, len
);
712 * If curdir == ino, need to get a handle on .. so we
713 * can search it for ino's name. Otherwise, just search
714 * the given directory for ino. Repeat until out of space
715 * or a full path has been built.
718 idesc
.id_parent
= curdir
;
721 while (ino
!= UFSROOTINO
&& ino
!= 0) {
722 idesc
.id_number
= ino
;
723 idesc
.id_func
= findino
;
724 idesc
.id_name
= "..";
725 idesc
.id_fix
= NOFIX
;
726 if ((ckinode(ginode(ino
), &idesc
, CKI_TRAVERSE
) & FOUND
) == 0) {
727 inp
= getinoinfo(ino
);
728 if ((inp
== NULL
) || (inp
->i_parent
== 0)) {
731 idesc
.id_parent
= inp
->i_parent
;
735 * To get this far, id_parent must have the inode
736 * number for `..' in it. By definition, that's got
737 * to be a directory, so search it for the inode of
741 idesc
.id_number
= idesc
.id_parent
;
742 idesc
.id_parent
= ino
;
743 idesc
.id_func
= findname
;
744 idesc
.id_name
= namebuf
;
745 idesc
.id_fix
= NOFIX
;
746 if ((ckinode(ginode(idesc
.id_number
),
747 &idesc
, CKI_TRAVERSE
) & FOUND
) == 0) {
751 * Prepend to what we've accumulated so far. If
752 * there's not enough room for even one more path element
753 * (of the worst-case length), then bail out.
755 len
= strlen(namebuf
);
757 if (cp
< &namebuf
[MAXNAMLEN
])
759 (void) memmove(cp
, namebuf
, len
);
763 * Corner case for a looped-to-itself directory.
765 if (ino
== idesc
.id_number
)
769 * Climb one level of the hierarchy. In other words,
770 * the current .. becomes the inode to search for and
771 * its parent becomes the directory to search in.
773 ino
= idesc
.id_number
;
777 * If we hit a discontinuity in the hierarchy, indicate it by
778 * prefixing the path so far with `?'. Otherwise, the first
779 * character will be `/' as a side-effect of the *--cp above.
781 * The special case is to handle the situation where we're
782 * trying to look something up in UFSROOTINO, but didn't find
785 if (ino
!= UFSROOTINO
|| cp
== &namebuf
[MAXPATHLEN
- 1]) {
792 * The invariants being used for buffer integrity are:
793 * - namebuf[] is terminated with \0 before anything else
794 * - cp is always <= the last element of namebuf[]
795 * - the new path element is always stored at the
796 * beginning of namebuf[], and is no more than MAXNAMLEN-1
798 * - cp is is decremented by the number of characters in
799 * the new path element
800 * - if, after the above accounting for the new element's
801 * size, there is no longer enough room at the beginning of
802 * namebuf[] for a full-sized path element and a slash,
803 * terminate the loop. cp is in the range
804 * &namebuf[0]..&namebuf[MAXNAMLEN - 1]
807 /* LINTED per the above discussion */
808 (void) memmove(namebuf
, cp
, &namebuf
[MAXPATHLEN
] - cp
);
820 * When preening, allow a single quit to signal
821 * a special exit after filesystem checks complete
822 * so that reboot sequence may be interrupted.
828 (void) printf("returning to single-user after filesystem check\n");
830 (void) signal(SIGQUIT
, SIG_DFL
);
835 * determine whether an inode should be fixed.
839 dofix(struct inodesc
*idesc
, caddr_t msg
, ...)
846 switch (idesc
->id_fix
) {
849 if (idesc
->id_type
== DATA
)
850 vdirerror(idesc
->id_number
, msg
, ap
);
858 if (reply("SALVAGE") == 0) {
859 idesc
->id_fix
= NOFIX
;
874 errexit("UNKNOWN INODESC FIX MODE %d\n", (int)idesc
->id_fix
);
883 errexit(caddr_t fmt
, ...)
894 verrexit(caddr_t fmt
, va_list ap
)
896 static int recursing
= 0;
900 if (errorlocked
|| iscorrupt
) {
901 if (havesb
&& fswritefd
>= 0) {
902 sblock
.fs_clean
= FSBAD
;
903 sblock
.fs_state
= FSOKAY
- (long)sblock
.fs_time
;
904 sblock
.fs_state
= -sblock
.fs_state
;
906 write_altsb(fswritefd
);
907 flush(fswritefd
, &sblk
);
913 (void) vprintf(fmt
, ap
);
914 if (fmt
[strlen(fmt
) - 1] != '\n')
915 (void) putchar('\n');
916 exit((exitstat
!= 0) ? exitstat
: EXERRFATAL
);
920 * An unexpected inconsistency occured.
921 * Die if preening, otherwise just print message and continue.
925 pfatal(caddr_t fmt
, ...)
936 vpfatal(caddr_t fmt
, va_list ap
)
940 (void) printf("%s: ", devname
);
941 (void) vprintf(fmt
, ap
);
945 "%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n",
947 if (havesb
&& fswritefd
>= 0) {
948 sblock
.fs_clean
= FSBAD
;
949 sblock
.fs_state
= -(FSOKAY
- (long)sblock
.fs_time
);
951 flush(fswritefd
, &sblk
);
954 * We're exiting, it doesn't really matter that our
955 * caller doesn't get to call va_end().
958 exitstat
= EXFNDERRS
;
962 (void) vprintf(fmt
, ap
);
967 * Pwarn just prints a message when not preening,
968 * or a warning (preceded by filename) when preening.
972 pwarn(caddr_t fmt
, ...)
983 vpwarn(caddr_t fmt
, va_list ap
)
987 (void) printf("%s: ", devname
);
988 (void) vprintf(fmt
, ap
);
993 * Like sprintf(), except the buffer is dynamically allocated
994 * and returned, instead of being passed in. A pointer to the
995 * buffer is stored in *RET, and FMT is the usual format string.
996 * The number of characters in *RET (excluding the trailing \0,
997 * to be consistent with the other *printf() routines) is returned.
999 * Solaris doesn't have asprintf(3C) yet, unfortunately.
1003 fsck_asprintf(caddr_t
*ret
, caddr_t fmt
, ...)
1010 len
= vsnprintf(NULL
, 0, fmt
, ap
);
1013 buffer
= malloc((len
+ 1) * sizeof (char));
1014 if (buffer
== NULL
) {
1015 errexit("Out of memory in asprintf\n");
1020 (void) vsnprintf(buffer
, len
+ 1, fmt
, ap
);
1028 * So we can take advantage of kernel routines in ufs_subr.c.
1032 cmn_err(int level
, caddr_t fmt
, ...)
1037 if (level
== CE_PANIC
) {
1038 (void) printf("INTERNAL INCONSISTENCY:");
1041 (void) vprintf(fmt
, ap
);
1047 * Check to see if unraw version of name is already mounted.
1048 * Updates devstr with the device name if devstr is not NULL
1049 * and str_size is positive.
1052 mounted(caddr_t name
, caddr_t devstr
, size_t str_size
)
1055 struct mnttab
*mntent
;
1057 mntent
= search_mnttab(NULL
, unrawname(name
), devstr
, str_size
);
1062 * It's mounted. With or without write access?
1064 if (hasmntopt(mntent
, MNTOPT_RO
) != 0)
1065 found
= M_RO
; /* mounted as RO */
1067 found
= M_RW
; /* mounted as R/W */
1069 if (mount_point
== NULL
) {
1070 mount_point
= strdup(mntent
->mnt_mountp
);
1071 if (mount_point
== NULL
) {
1072 errexit("fsck: memory allocation failure: %s",
1077 if (devstr
!= NULL
&& str_size
> 0)
1078 (void) strlcpy(devstr
, mntent
->mnt_special
, str_size
);
1085 * Check to see if name corresponds to an entry in vfstab, and that the entry
1086 * does not have option ro.
1089 writable(caddr_t name
)
1092 struct vfstab vfsbuf
, vfskey
;
1095 vfstab
= fopen(VFSTAB
, "r");
1096 if (vfstab
== NULL
) {
1097 (void) printf("can't open %s\n", VFSTAB
);
1100 (void) memset((void *)&vfskey
, 0, sizeof (vfskey
));
1102 vfskey
.vfs_special
= unrawname(name
);
1103 vfskey
.vfs_fstype
= MNTTYPE_UFS
;
1104 if ((getvfsany(vfstab
, &vfsbuf
, &vfskey
) == 0) &&
1105 (hasvfsopt(&vfsbuf
, MNTOPT_RO
))) {
1108 (void) fclose(vfstab
);
1121 if ((iscorrupt
== 0) && (isdirty
== 0))
1124 if ((sblock
.fs_clean
== FSSTABLE
) || (sblock
.fs_clean
== FSCLEAN
) ||
1125 (sblock
.fs_clean
== FSLOG
&& islog
&& islogok
) ||
1126 ((FSOKAY
== (sblock
.fs_state
+ sblock
.fs_time
)) && !errorlocked
))
1129 (void) printf("WARNING: inconsistencies detected on %s filesystem %s\n",
1130 sblock
.fs_clean
== FSSTABLE
? "stable" :
1131 sblock
.fs_clean
== FSLOG
? "logging" :
1132 sblock
.fs_clean
== FSFIX
? "being fixed" : "clean",
1138 * Carefully and transparently update the clean flag.
1140 * `iscorrupt' has to be in its final state before this is called.
1146 struct bufarea cleanbuf
;
1164 * set fsclean to its appropriate value
1166 fslogbno
= sblock
.fs_logbno
;
1167 fsclean
= sblock
.fs_clean
;
1168 fsreclaim
= sblock
.fs_reclaim
;
1169 fsflags
= sblock
.fs_flags
;
1170 if (FSOKAY
!= (sblock
.fs_state
+ sblock
.fs_time
) && !errorlocked
) {
1174 * If ufs log is not okay, note that we need to clear it.
1177 if (fslogbno
&& !(islog
&& islogok
)) {
1183 * if necessary, update fs_clean and fs_state
1206 } else if (!islog
|| fslogbno
== 0) {
1216 if (errorlocked
&& !iscorrupt
) {
1217 fsclean
= islog
? FSLOG
: FSCLEAN
;
1230 if (largefile_count
> 0)
1231 fsflags
|= FSLARGEFILES
;
1233 fsflags
&= ~FSLARGEFILES
;
1236 * There can be two discrepencies here. A) The superblock
1237 * shows no largefiles but we found some while scanning.
1238 * B) The superblock indicates the presence of largefiles,
1239 * but none are present. Note that if preening, the superblock
1240 * is silently corrected.
1242 if ((fsflags
== FSLARGEFILES
&& sblock
.fs_flags
!= FSLARGEFILES
) ||
1243 (fsflags
!= FSLARGEFILES
&& sblock
.fs_flags
== FSLARGEFILES
))
1248 "** largefile count=%d, fs.fs_flags=%x, flags_ok %d\n",
1249 largefile_count
, sblock
.fs_flags
, flags_ok
);
1252 * If fs is unchanged, do nothing.
1254 if ((!isdirty
) && (flags_ok
) &&
1255 (fslogbno
== sblock
.fs_logbno
) &&
1256 (sblock
.fs_clean
== fsclean
) &&
1257 (sblock
.fs_reclaim
== fsreclaim
) &&
1258 (FSOKAY
== (sblock
.fs_state
+ sblock
.fs_time
))) {
1260 if (!do_errorlock(LOCKFS_ULOCK
))
1262 "updateclean(unchanged): unlock(LOCKFS_ULOCK) failed\n");
1268 * if user allows, update superblock state
1272 "superblock: flags 0x%x logbno %d clean %d reclaim %d state 0x%x\n",
1273 sblock
.fs_flags
, sblock
.fs_logbno
,
1274 sblock
.fs_clean
, sblock
.fs_reclaim
,
1275 sblock
.fs_state
+ sblock
.fs_time
);
1277 "calculated: flags 0x%x logbno %d clean %d reclaim %d state 0x%x\n",
1278 fsflags
, fslogbno
, fsclean
, fsreclaim
, FSOKAY
);
1280 if (!isdirty
&& !preen
&& !rerun
&&
1281 (reply("FILE SYSTEM STATE IN SUPERBLOCK IS WRONG; FIX") == 0))
1285 sblock
.fs_time
= (time32_t
)t
;
1289 if (sblock
.fs_logbno
!= fslogbno
) {
1290 examinelog(&freelogblk
);
1294 sblock
.fs_logbno
= fslogbno
;
1295 sblock
.fs_clean
= fsclean
;
1296 sblock
.fs_state
= FSOKAY
- (long)sblock
.fs_time
;
1297 sblock
.fs_reclaim
= fsreclaim
;
1298 sblock
.fs_flags
= fsflags
;
1301 * if superblock can't be written, return
1307 * Read private copy of superblock, update clean flag, and write it.
1312 sblkoff
= ldbtob(bno
);
1314 if ((cleanbuf
.b_un
.b_buf
= malloc(size
)) == NULL
)
1315 errexit("out of memory");
1316 if (llseek(fsreadfd
, sblkoff
, SEEK_SET
) == -1) {
1317 (void) printf("COULD NOT SEEK TO SUPERBLOCK AT %lld: %s\n",
1318 (longlong_t
)bno
, strerror(errno
));
1322 if ((io_res
= read(fsreadfd
, cleanbuf
.b_un
.b_buf
, size
)) != size
) {
1323 report_io_prob("READ FROM", bno
, size
, io_res
);
1327 cleanbuf
.b_un
.b_fs
->fs_logbno
= sblock
.fs_logbno
;
1328 cleanbuf
.b_un
.b_fs
->fs_clean
= sblock
.fs_clean
;
1329 cleanbuf
.b_un
.b_fs
->fs_state
= sblock
.fs_state
;
1330 cleanbuf
.b_un
.b_fs
->fs_time
= sblock
.fs_time
;
1331 cleanbuf
.b_un
.b_fs
->fs_reclaim
= sblock
.fs_reclaim
;
1332 cleanbuf
.b_un
.b_fs
->fs_flags
= sblock
.fs_flags
;
1334 if (llseek(fswritefd
, sblkoff
, SEEK_SET
) == -1) {
1335 (void) printf("COULD NOT SEEK TO SUPERBLOCK AT %lld: %s\n",
1336 (longlong_t
)bno
, strerror(errno
));
1340 if ((io_res
= write(fswritefd
, cleanbuf
.b_un
.b_buf
, size
)) != size
) {
1341 report_io_prob("WRITE TO", bno
, size
, io_res
);
1347 * If we had to use -b to grab an alternate superblock, then we
1348 * likely had to do so because of unacceptable differences between
1349 * the main and alternate superblocks. So, we had better update
1350 * the alternate superblock as well, or we'll just fail again
1351 * the next time we attempt to run fsck!
1354 write_altsb(fswritefd
);
1358 if (!do_errorlock(LOCKFS_ULOCK
))
1360 "updateclean(changed): unlock(LOCKFS_ULOCK) failed\n");
1364 if (cleanbuf
.b_un
.b_buf
!= NULL
) {
1365 free((void *)cleanbuf
.b_un
.b_buf
);
1372 report_io_prob(caddr_t what
, diskaddr_t bno
, size_t expected
, ssize_t failure
)
1375 (void) printf("COULD NOT %s SUPERBLOCK AT %d: %s\n",
1376 what
, (int)bno
, strerror(errno
));
1377 else if (failure
== 0)
1378 (void) printf("COULD NOT %s SUPERBLOCK AT %d: EOF\n",
1381 (void) printf("SHORT %s SUPERBLOCK AT %d: %u out of %u bytes\n",
1382 what
, (int)bno
, (unsigned)failure
, (unsigned)expected
);
1386 * print out clean info
1393 if (FSOKAY
!= (sblock
.fs_state
+ sblock
.fs_time
) && !errorlocked
)
1396 switch (sblock
.fs_clean
) {
1427 pwarn("is %s.\n", s
);
1429 (void) printf("** %s is %s.\n", devname
, s
);
1433 is_errorlocked(caddr_t fs
)
1436 struct stat64 statb
;
1438 struct mnttab
*mntent
;
1445 if (stat64(fs
, &statb
) < 0)
1448 if (S_ISDIR(statb
.st_mode
)) {
1450 } else if (S_ISBLK(statb
.st_mode
) || S_ISCHR(statb
.st_mode
)) {
1451 mntent
= search_mnttab(NULL
, fs
, NULL
, 0);
1454 mountp
= mntent
->mnt_mountp
;
1455 if (mountp
== NULL
) /* theoretically a can't-happen */
1462 * From here on, must `goto out' to avoid memory leakage.
1465 if (elock_combuf
== NULL
)
1467 (caddr_t
)calloc(LOCKFS_MAXCOMMENTLEN
, sizeof (char));
1470 (caddr_t
)realloc(elock_combuf
, LOCKFS_MAXCOMMENTLEN
);
1472 if (elock_combuf
== NULL
)
1475 (void) memset((void *)elock_combuf
, 0, LOCKFS_MAXCOMMENTLEN
);
1477 if (elock_mountp
!= NULL
) {
1481 elock_mountp
= strdup(mountp
);
1482 if (elock_mountp
== NULL
)
1486 if ((mountfd
= open64(mountp
, O_RDONLY
)) == -1)
1491 lfp
= (struct lockfs
*)malloc(sizeof (struct lockfs
));
1494 (void) memset((void *)lfp
, 0, sizeof (struct lockfs
));
1497 lfp
->lf_comlen
= LOCKFS_MAXCOMMENTLEN
;
1498 lfp
->lf_comment
= elock_combuf
;
1500 if (ioctl(mountfd
, _FIOLFSS
, lfp
) == -1)
1504 * lint believes that the ioctl() (or any other function
1505 * taking lfp as an arg) could free lfp. This is not the
1508 retval
= LOCKFS_IS_ELOCK(lfp
);
1515 * Given a name which is known to be a directory, see if it appears
1516 * in the vfstab. If so, return the entry's block (special) device
1520 check_vfstab(caddr_t name
, caddr_t devstr
, size_t str_size
)
1522 return (NULL
!= search_vfstab(name
, NULL
, devstr
, str_size
));
1526 * Given a name which is known to be a directory, see if it appears
1527 * in the mnttab. If so, return the entry's block (special) device
1531 check_mnttab(caddr_t name
, caddr_t devstr
, size_t str_size
)
1533 return (NULL
!= search_mnttab(name
, NULL
, devstr
, str_size
));
1537 * Search for mount point and/or special device in the given file.
1538 * The first matching entry is returned.
1540 * If an entry is found and str_size is greater than zero, then
1541 * up to size_str bytes of the special device name from the entry
1542 * are copied to devstr.
1545 #define SEARCH_TAB_BODY(st_type, st_file, st_mount, st_special, \
1546 st_nuller, st_init, st_searcher) \
1549 struct st_type *retval = NULL; \
1550 struct st_type key; \
1551 static struct st_type buffer; \
1553 /* LINTED ``assigned value never used'' */ \
1555 key.st_mount = mountp; \
1556 key.st_special = special; \
1559 if ((fp = fopen(st_file, "r")) == NULL) \
1562 if (st_searcher(fp, &buffer, &key) == 0) { \
1564 if (devstr != NULL && str_size > 0 && \
1565 buffer.st_special != NULL) { \
1566 (void) strlcpy(devstr, buffer.st_special, \
1570 (void) fclose(fp); \
1574 static struct vfstab
*
1575 search_vfstab(caddr_t mountp
, caddr_t special
, caddr_t devstr
, size_t str_size
)
1576 SEARCH_TAB_BODY(vfstab
, VFSTAB
, vfs_mountp
, vfs_special
, vfsnull
,
1577 (retval
= retval
), getvfsany
)
1579 static struct mnttab
*
1580 search_mnttab(caddr_t mountp
, caddr_t special
, caddr_t devstr
, size_t str_size
)
1581 SEARCH_TAB_BODY(mnttab
, MNTTAB
, mnt_mountp
, mnt_special
, mntnull
,
1582 (key
.mnt_fstype
= MNTTYPE_UFS
), getmntany
)
1585 do_errorlock(int lock_type
)
1592 if (elock_combuf
== NULL
)
1593 errexit("do_errorlock(%s, %d): unallocated elock_combuf\n",
1594 elock_mountp
? elock_mountp
: "<null>",
1597 if ((buf
= (caddr_t
)calloc(LOCKFS_MAXCOMMENTLEN
, sizeof (char))) ==
1599 errexit("Couldn't alloc memory for temp. lock status buffer\n");
1602 errexit("do_errorlock(%s, %d): lockfs status unallocated\n",
1603 elock_mountp
, lock_type
);
1606 (void) memmove((void *)buf
, (void *)elock_combuf
,
1607 LOCKFS_MAXCOMMENTLEN
-1);
1609 switch (lock_type
) {
1612 * Note that if it is error-locked, we won't get an
1613 * error back if we try to error-lock it again.
1615 if (time(&now
) != (time_t)-1) {
1616 if ((local
= localtime(&now
)) != NULL
)
1617 (void) snprintf(buf
, LOCKFS_MAXCOMMENTLEN
,
1618 "%s [pid:%d fsck start:%02d/%02d/%02d %02d:%02d:%02d",
1619 elock_combuf
, (int)pid
,
1620 local
->tm_mon
+ 1, local
->tm_mday
,
1621 (local
->tm_year
% 100), local
->tm_hour
,
1622 local
->tm_min
, local
->tm_sec
);
1624 (void) snprintf(buf
, LOCKFS_MAXCOMMENTLEN
,
1625 "%s [fsck pid %d", elock_combuf
, pid
);
1628 (void) snprintf(buf
, LOCKFS_MAXCOMMENTLEN
,
1629 "%s [fsck pid %d", elock_combuf
, pid
);
1634 if (time(&now
) != (time_t)-1) {
1635 if ((local
= localtime(&now
)) != NULL
) {
1636 (void) snprintf(buf
, LOCKFS_MAXCOMMENTLEN
,
1637 "%s, done:%02d/%02d/%02d %02d:%02d:%02d]",
1639 local
->tm_mon
+ 1, local
->tm_mday
,
1640 (local
->tm_year
% 100), local
->tm_hour
,
1641 local
->tm_min
, local
->tm_sec
);
1643 (void) snprintf(buf
, LOCKFS_MAXCOMMENTLEN
,
1644 "%s]", elock_combuf
);
1647 (void) snprintf(buf
, LOCKFS_MAXCOMMENTLEN
,
1648 "%s]", elock_combuf
);
1650 if ((rc
= ioctl(mountfd
, _FIOLFSS
, lfp
)) == -1) {
1651 pwarn("do_errorlock: unlock failed: %s\n",
1661 (void) memmove((void *)elock_combuf
, (void *)buf
,
1662 LOCKFS_MAXCOMMENTLEN
- 1);
1664 lfp
->lf_lock
= lock_type
;
1665 lfp
->lf_comlen
= LOCKFS_MAXCOMMENTLEN
;
1666 lfp
->lf_comment
= elock_combuf
;
1670 if ((rc
= ioctl(mountfd
, _FIOLFS
, lfp
)) == -1) {
1671 if (errno
== EINVAL
) {
1672 pwarn("Another fsck active?\n");
1673 iscorrupt
= 0; /* don't go away mad, just go away */
1675 pwarn("do_errorlock(lock_type:%d, %s) failed: %s\n",
1676 lock_type
, elock_combuf
, strerror(errno
));
1688 * Shadow inode support. To register a shadow with a client is to note
1689 * that an inode (the client) refers to the shadow.
1692 static struct shadowclients
*
1693 newshadowclient(struct shadowclients
*prev
)
1695 struct shadowclients
*rc
;
1697 rc
= (struct shadowclients
*)malloc(sizeof (*rc
));
1699 errexit("newshadowclient: cannot malloc shadow client");
1703 rc
->client
= (fsck_ino_t
*)malloc(sizeof (fsck_ino_t
) *
1705 if (rc
->client
== NULL
)
1706 errexit("newshadowclient: cannot malloc client array");
1711 registershadowclient(fsck_ino_t shadow
, fsck_ino_t client
,
1712 struct shadowclientinfo
**info
)
1714 struct shadowclientinfo
*sci
;
1715 struct shadowclients
*scc
;
1718 * Already have a record for this shadow?
1720 for (sci
= *info
; sci
!= NULL
; sci
= sci
->next
)
1721 if (sci
->shadow
== shadow
)
1725 * It's a new shadow, add it to the list
1727 sci
= (struct shadowclientinfo
*)malloc(sizeof (*sci
));
1729 errexit("registershadowclient: cannot malloc");
1732 sci
->shadow
= shadow
;
1733 sci
->totalClients
= 0;
1734 sci
->clients
= newshadowclient(NULL
);
1737 sci
->totalClients
++;
1739 if (scc
->nclients
>= maxshadowclients
) {
1740 scc
= newshadowclient(sci
->clients
);
1744 scc
->client
[scc
->nclients
++] = client
;
1748 * Locate and discard a shadow.
1751 clearshadow(fsck_ino_t shadow
, struct shadowclientinfo
**info
)
1753 struct shadowclientinfo
*sci
, *prev
;
1756 * Do we have a record for this shadow?
1759 for (sci
= *info
; sci
!= NULL
; sci
= sci
->next
) {
1760 if (sci
->shadow
== shadow
)
1767 * First, pull it off the list, since we know there
1768 * shouldn't be any future references to this one.
1773 prev
->next
= sci
->next
;
1774 deshadow(sci
, clearattrref
);
1779 * Discard all memory used to track clients of a shadow.
1782 deshadow(struct shadowclientinfo
*sci
, void (*cb
)(fsck_ino_t
))
1784 struct shadowclients
*clients
, *discard
;
1787 clients
= sci
->clients
;
1788 while (clients
!= NULL
) {
1790 clients
= clients
->next
;
1791 if (discard
->client
!= NULL
) {
1793 for (idx
= 0; idx
< discard
->nclients
; idx
++)
1794 (*cb
)(discard
->client
[idx
]);
1796 free((void *)discard
->client
);
1798 free((void *)discard
);
1805 * Allocate more buffer as need arises but allocate one at a time.
1806 * This is done to make sure that fsck does not exit with error if it
1807 * needs more buffer to complete its task.
1809 static struct bufarea
*
1812 struct bufarea
*newbp
;
1815 bufp
= malloc((unsigned int)sblock
.fs_bsize
);
1819 newbp
= (struct bufarea
*)malloc(sizeof (struct bufarea
));
1820 if (newbp
== NULL
) {
1826 newbp
->b_un
.b_buf
= bufp
;
1827 newbp
->b_prev
= &bufhead
;
1828 newbp
->b_next
= bufhead
.b_next
;
1829 bufhead
.b_next
->b_prev
= newbp
;
1830 bufhead
.b_next
= newbp
;
1836 * We length-limit in both unrawname() and rawname() to avoid
1837 * overflowing our arrays or those of our naive, trusting callers.
1841 unrawname(caddr_t name
)
1844 static char fullname
[MAXPATHLEN
+ 1];
1846 if ((dp
= getfullblkname(name
)) == NULL
)
1849 (void) strlcpy(fullname
, dp
, sizeof (fullname
));
1851 * Not reporting under debug, as the allocation isn't
1852 * reported by getfullblkname. The idea is that we
1853 * produce balanced alloc/free instances.
1861 rawname(caddr_t name
)
1864 static char fullname
[MAXPATHLEN
+ 1];
1866 if ((dp
= getfullrawname(name
)) == NULL
)
1869 (void) strlcpy(fullname
, dp
, sizeof (fullname
));
1871 * Not reporting under debug, as the allocation isn't
1872 * reported by getfullblkname. The idea is that we
1873 * produce balanced alloc/free instances.
1881 * Make sure that a cg header looks at least moderately reasonable.
1882 * We want to be able to trust the contents enough to be able to use
1883 * the standard accessor macros. So, besides looking at the obvious
1884 * such as the magic number, we verify that the offset field values
1885 * are properly aligned and not too big or small.
1887 * Returns a NULL pointer if the cg is sane enough for our needs, else
1888 * a dynamically-allocated string describing all of its faults.
1890 #define Append_Error(full, full_len, addition, addition_len) \
1891 if (full == NULL) { \
1893 full_len = addition_len; \
1895 /* lint doesn't think realloc() understands NULLs */ \
1896 full = realloc(full, full_len + addition_len + 1); \
1897 if (full == NULL) { \
1898 errexit("Out of memory in cg_sanity"); \
1901 (void) strcpy(full + full_len, addition); \
1902 full_len += addition_len; \
1907 cg_sanity(struct cg
*cgp
, int cgno
)
1910 caddr_t this_err
= NULL
;
1911 int full_len
, this_len
;
1913 daddr32_t exp_btotoff
, exp_boff
, exp_iusedoff
;
1914 daddr32_t exp_freeoff
, exp_nextfreeoff
;
1916 cg_constants(cgno
, &exp_btotoff
, &exp_boff
, &exp_iusedoff
,
1917 &exp_freeoff
, &exp_nextfreeoff
, &ndblk
);
1922 if (!cg_chkmagic(cgp
)) {
1923 this_len
= fsck_asprintf(&this_err
,
1924 "BAD CG MAGIC NUMBER (0x%x should be 0x%x)\n",
1925 cgp
->cg_magic
, CG_MAGIC
);
1926 Append_Error(full_err
, full_len
, this_err
, this_len
);
1929 if (cgp
->cg_cgx
!= cgno
) {
1930 this_len
= fsck_asprintf(&this_err
,
1931 "WRONG CG NUMBER (%d should be %d)\n",
1933 Append_Error(full_err
, full_len
, this_err
, this_len
);
1936 if ((cgp
->cg_btotoff
& 3) != 0) {
1937 this_len
= fsck_asprintf(&this_err
,
1938 "BLOCK TOTALS OFFSET %d NOT FOUR-BYTE ALIGNED\n",
1940 Append_Error(full_err
, full_len
, this_err
, this_len
);
1943 if ((cgp
->cg_boff
& 1) != 0) {
1944 this_len
= fsck_asprintf(&this_err
,
1945 "FREE BLOCK POSITIONS TABLE OFFSET %d NOT TWO-BYTE ALIGNED\n",
1947 Append_Error(full_err
, full_len
, this_err
, this_len
);
1950 if ((cgp
->cg_ncyl
< 1) || (cgp
->cg_ncyl
> sblock
.fs_cpg
)) {
1951 if (cgp
->cg_ncyl
< 1) {
1952 this_len
= fsck_asprintf(&this_err
,
1953 "IMPOSSIBLE NUMBER OF CYLINDERS IN GROUP (%d is less than 1)\n",
1956 this_len
= fsck_asprintf(&this_err
,
1957 "IMPOSSIBLE NUMBER OF CYLINDERS IN GROUP (%d is greater than %d)\n",
1958 cgp
->cg_ncyl
, sblock
.fs_cpg
);
1960 Append_Error(full_err
, full_len
, this_err
, this_len
);
1963 if (cgp
->cg_niblk
!= sblock
.fs_ipg
) {
1964 this_len
= fsck_asprintf(&this_err
,
1965 "INCORRECT NUMBER OF INODES IN GROUP (%d should be %d)\n",
1966 cgp
->cg_niblk
, sblock
.fs_ipg
);
1967 Append_Error(full_err
, full_len
, this_err
, this_len
);
1970 if (cgp
->cg_ndblk
!= ndblk
) {
1971 this_len
= fsck_asprintf(&this_err
,
1972 "INCORRECT NUMBER OF DATA BLOCKS IN GROUP (%d should be %d)\n",
1973 cgp
->cg_ndblk
, ndblk
);
1974 Append_Error(full_err
, full_len
, this_err
, this_len
);
1977 if ((cgp
->cg_rotor
< 0) || (cgp
->cg_rotor
>= ndblk
)) {
1978 this_len
= fsck_asprintf(&this_err
,
1979 "IMPOSSIBLE BLOCK ALLOCATION ROTOR POSITION "
1980 "(%d should be at least 0 and less than %d)\n",
1981 cgp
->cg_rotor
, ndblk
);
1982 Append_Error(full_err
, full_len
, this_err
, this_len
);
1985 if ((cgp
->cg_frotor
< 0) || (cgp
->cg_frotor
>= ndblk
)) {
1986 this_len
= fsck_asprintf(&this_err
,
1987 "IMPOSSIBLE FRAGMENT ALLOCATION ROTOR POSITION "
1988 "(%d should be at least 0 and less than %d)\n",
1989 cgp
->cg_frotor
, ndblk
);
1990 Append_Error(full_err
, full_len
, this_err
, this_len
);
1993 if ((cgp
->cg_irotor
< 0) || (cgp
->cg_irotor
>= sblock
.fs_ipg
)) {
1994 this_len
= fsck_asprintf(&this_err
,
1995 "IMPOSSIBLE INODE ALLOCATION ROTOR POSITION "
1996 "(%d should be at least 0 and less than %d)\n",
1997 cgp
->cg_irotor
, sblock
.fs_ipg
);
1998 Append_Error(full_err
, full_len
, this_err
, this_len
);
2001 if (cgp
->cg_btotoff
!= exp_btotoff
) {
2002 this_len
= fsck_asprintf(&this_err
,
2003 "INCORRECT BLOCK TOTALS OFFSET (%d should be %d)\n",
2004 cgp
->cg_btotoff
, exp_btotoff
);
2005 Append_Error(full_err
, full_len
, this_err
, this_len
);
2008 if (cgp
->cg_boff
!= exp_boff
) {
2009 this_len
= fsck_asprintf(&this_err
,
2010 "BAD FREE BLOCK POSITIONS TABLE OFFSET (%d should %d)\n",
2011 cgp
->cg_boff
, exp_boff
);
2012 Append_Error(full_err
, full_len
, this_err
, this_len
);
2015 if (cgp
->cg_iusedoff
!= exp_iusedoff
) {
2016 this_len
= fsck_asprintf(&this_err
,
2017 "INCORRECT USED INODE MAP OFFSET (%d should be %d)\n",
2018 cgp
->cg_iusedoff
, exp_iusedoff
);
2019 Append_Error(full_err
, full_len
, this_err
, this_len
);
2022 if (cgp
->cg_freeoff
!= exp_freeoff
) {
2023 this_len
= fsck_asprintf(&this_err
,
2024 "INCORRECT FREE FRAGMENT MAP OFFSET (%d should be %d)\n",
2025 cgp
->cg_freeoff
, exp_freeoff
);
2026 Append_Error(full_err
, full_len
, this_err
, this_len
);
2029 if (cgp
->cg_nextfreeoff
!= exp_nextfreeoff
) {
2030 this_len
= fsck_asprintf(&this_err
,
2031 "END OF HEADER POSITION INCORRECT (%d should be %d)\n",
2032 cgp
->cg_nextfreeoff
, exp_nextfreeoff
);
2033 Append_Error(full_err
, full_len
, this_err
, this_len
);
2042 * This is taken from mkfs, and is what is used to come up with the
2043 * original values for a struct cg. This implies that, since these
2044 * are all constants, recalculating them now should give us the same
2045 * thing as what's on disk.
2048 cg_constants(int cgno
, daddr32_t
*btotoff
, daddr32_t
*boff
,
2049 daddr32_t
*iusedoff
, daddr32_t
*freeoff
, daddr32_t
*nextfreeoff
,
2052 daddr32_t cbase
, dmax
;
2055 (void) getblk(&cgblk
, (diskaddr_t
)cgtod(&sblock
, cgno
),
2056 (size_t)sblock
.fs_cgsize
);
2057 cgp
= cgblk
.b_un
.b_cg
;
2059 cbase
= cgbase(&sblock
, cgno
);
2060 dmax
= cbase
+ sblock
.fs_fpg
;
2061 if (dmax
> sblock
.fs_size
)
2062 dmax
= sblock
.fs_size
;
2064 /* LINTED pointer difference won't overflow */
2065 *btotoff
= &cgp
->cg_space
[0] - (uchar_t
*)(&cgp
->cg_link
);
2066 *boff
= *btotoff
+ sblock
.fs_cpg
* sizeof (daddr32_t
);
2067 *iusedoff
= *boff
+ sblock
.fs_cpg
* sblock
.fs_nrpos
* sizeof (int16_t);
2068 *freeoff
= *iusedoff
+ howmany(sblock
.fs_ipg
, NBBY
);
2069 *nextfreeoff
= *freeoff
+
2070 howmany(sblock
.fs_cpg
* sblock
.fs_spc
/ NSPF(&sblock
), NBBY
);
2071 *ndblk
= dmax
- cbase
;
2075 * Corrects all fields in the cg that can be done with the available
2079 fix_cg(struct cg
*cgp
, int cgno
)
2081 daddr32_t exp_btotoff
, exp_boff
, exp_iusedoff
;
2082 daddr32_t exp_freeoff
, exp_nextfreeoff
;
2085 cg_constants(cgno
, &exp_btotoff
, &exp_boff
, &exp_iusedoff
,
2086 &exp_freeoff
, &exp_nextfreeoff
, &ndblk
);
2088 if (cgp
->cg_cgx
!= cgno
) {
2092 if ((cgp
->cg_ncyl
< 1) || (cgp
->cg_ncyl
> sblock
.fs_cpg
)) {
2093 if (cgno
== (sblock
.fs_ncg
- 1)) {
2094 cgp
->cg_ncyl
= sblock
.fs_ncyl
-
2095 (sblock
.fs_cpg
* cgno
);
2097 cgp
->cg_ncyl
= sblock
.fs_cpg
;
2101 if (cgp
->cg_niblk
!= sblock
.fs_ipg
) {
2103 * This is not used by the kernel, so it's pretty
2104 * harmless if it's wrong.
2106 cgp
->cg_niblk
= sblock
.fs_ipg
;
2109 if (cgp
->cg_ndblk
!= ndblk
) {
2110 cgp
->cg_ndblk
= ndblk
;
2114 * For the rotors, any position's valid, so pick the one we know
2115 * will always exist.
2117 if ((cgp
->cg_rotor
< 0) || (cgp
->cg_rotor
>= cgp
->cg_ndblk
)) {
2121 if ((cgp
->cg_frotor
< 0) || (cgp
->cg_frotor
>= cgp
->cg_ndblk
)) {
2125 if ((cgp
->cg_irotor
< 0) || (cgp
->cg_irotor
>= sblock
.fs_ipg
)) {
2130 * For btotoff and boff, if they're misaligned they won't
2131 * match the expected values, so we're catching both cases
2132 * here. Of course, if any of these are off, it seems likely
2133 * that the tables really won't be where we calculate they
2136 if (cgp
->cg_btotoff
!= exp_btotoff
) {
2137 cgp
->cg_btotoff
= exp_btotoff
;
2140 if (cgp
->cg_boff
!= exp_boff
) {
2141 cgp
->cg_boff
= exp_boff
;
2144 if (cgp
->cg_iusedoff
!= exp_iusedoff
) {
2145 cgp
->cg_iusedoff
= exp_iusedoff
;
2148 if (cgp
->cg_freeoff
!= exp_freeoff
) {
2149 cgp
->cg_freeoff
= exp_freeoff
;
2152 if (cgp
->cg_nextfreeoff
!= exp_nextfreeoff
) {
2153 cgp
->cg_nextfreeoff
= exp_nextfreeoff
;
2157 * Reset the magic, as we've recreated this cg, also
2158 * update the cg_time, as we're writing out the cg
2160 cgp
->cg_magic
= CG_MAGIC
;
2161 cgp
->cg_time
= time(NULL
);
2164 * We know there was at least one correctable problem,
2165 * or else we wouldn't have been called. So instead of
2166 * marking the buffer dirty N times above, just do it
2173 examinelog(void (*cb
)(daddr32_t
))
2176 extent_block_t
*ebp
;
2178 daddr32_t nfno
, fno
;
2183 * Since ufs stores fs_logbno as blocks and MTBufs stores it as frags
2184 * we need to translate accordingly using logbtodb()
2187 if (logbtodb(&sblock
, sblock
.fs_logbno
) < SBLOCK
) {
2189 (void) printf("fs_logbno < SBLOCK: %ld < %ld\n" \
2190 "Aborting log examination\n", \
2191 logbtodb(&sblock
, sblock
.fs_logbno
), SBLOCK
);
2197 * Read errors will return zeros, which will cause us
2198 * to do nothing harmful, so don't need to handle it.
2200 bp
= getdatablk(logbtofrag(&sblock
, sblock
.fs_logbno
),
2201 (size_t)sblock
.fs_bsize
);
2202 ebp
= (void *)bp
->b_un
.b_buf
;
2205 * Does it look like a log allocation table?
2207 /* LINTED pointer cast is aligned */
2208 if (!log_checksum(&ebp
->chksum
, (int32_t *)bp
->b_un
.b_buf
,
2211 if (ebp
->type
!= LUFS_EXTENTS
|| ebp
->nextents
== 0)
2214 ep
= &ebp
->extents
[0];
2215 for (i
= 0; i
< ebp
->nextents
; ++i
, ++ep
) {
2216 fno
= logbtofrag(&sblock
, ep
->pbno
);
2217 nfno
= dbtofsb(&sblock
, ep
->nbno
);
2218 for (j
= 0; j
< nfno
; ++j
, ++fno
) {
2220 * Invoke the callback first, so that pass1 can
2221 * mark the log blocks in-use. Then, if any
2222 * subsequent pass over the log shows us that a
2223 * block got freed (say, it was also claimed by
2224 * an inode that we cleared), we can safely declare
2236 fno
= logbtofrag(&sblock
, sblock
.fs_logbno
);
2237 for (j
= 0; j
< sblock
.fs_frag
; ++j
, ++fno
)
2243 freelogblk(daddr32_t frag
)
2245 freeblk(sblock
.fs_logbno
, frag
, 1);
2249 file_id(fsck_ino_t inum
, mode_t mode
)
2251 static char name
[MAXPATHLEN
+ 1];
2253 if (lfdir
== inum
) {
2257 if ((mode
& IFMT
) == IFDIR
) {
2258 (void) strcpy(name
, "DIR");
2259 } else if ((mode
& IFMT
) == IFATTRDIR
) {
2260 (void) strcpy(name
, "ATTR DIR");
2261 } else if ((mode
& IFMT
) == IFSHAD
) {
2262 (void) strcpy(name
, "ACL");
2264 (void) strcpy(name
, "FILE");
2271 * Simple initializer for inodesc structures, so users of only a few
2272 * fields don't have to worry about getting the right defaults for
2276 init_inodesc(struct inodesc
*idesc
)
2279 * Most fields should be zero, just hit the special cases.
2281 (void) memset((void *)idesc
, 0, sizeof (struct inodesc
));
2282 idesc
->id_fix
= DONTKNOW
;
2284 idesc
->id_truncto
= -1;
2285 idesc
->id_firsthole
= -1;
2289 * Compare routine for tsearch(C) to use on ino_t instances.
2292 ino_t_cmp(const void *left
, const void *right
)
2294 const fsck_ino_t lino
= (const fsck_ino_t
)left
;
2295 const fsck_ino_t rino
= (const fsck_ino_t
)right
;
2297 return (lino
- rino
);
2303 return (cgblk
.b_dirty
);
2309 flush(fswritefd
, &cgblk
);
2313 dirty(struct bufarea
*bp
)
2315 if (fswritefd
< 0) {
2317 * No one should call dirty() in read only mode.
2318 * But if one does, it's not fatal issue. Just warn them.
2320 pwarn("WON'T SET DIRTY FLAG IN READ_ONLY MODE\n");
2328 initbarea(struct bufarea
*bp
)
2331 (bp
)->b_bno
= (diskaddr_t
)-1LL;
2338 * Partition-sizing routines adapted from ../newfs/newfs.c.
2339 * Needed because calcsb() needs to use mkfs to work out what the
2340 * superblock should be, and mkfs insists on being told how many
2343 * Error handling assumes we're never called while preening.
2345 * XXX This should be extracted into a ../ufslib.{c,h},
2346 * in the same spirit to ../../fslib.{c,h}. Once that is
2347 * done, both fsck and newfs should be modified to link
2351 static int label_type
;
2353 #define LABEL_TYPE_VTOC 1
2354 #define LABEL_TYPE_EFI 2
2355 #define LABEL_TYPE_OTHER 3
2357 #define MB (1024 * 1024)
2358 #define SECTORS_PER_TERABYTE (1LL << 31)
2359 #define FS_SIZE_UPPER_LIMIT 0x100000000000LL
2362 getdisksize(caddr_t disk
, int fd
)
2367 diskaddr_t actual_size
;
2370 * get_device_size() determines the actual size of the
2371 * device, and also the disk's attributes, such as geometry.
2373 actual_size
= get_device_size(fd
, disk
);
2375 if (label_type
== LABEL_TYPE_VTOC
) {
2376 if (ioctl(fd
, DKIOCGGEOM
, &g
)) {
2377 pwarn("%s: Unable to read Disk geometry", disk
);
2380 if (sblock
.fs_nsect
== 0)
2381 sblock
.fs_nsect
= g
.dkg_nsect
;
2382 if (sblock
.fs_ntrak
== 0)
2383 sblock
.fs_ntrak
= g
.dkg_nhead
;
2384 if (sblock
.fs_rps
== 0) {
2385 rpm
= ((int)g
.dkg_rpm
<= 0) ? 3600: g
.dkg_rpm
;
2386 sblock
.fs_rps
= rpm
/ 60;
2390 if (sblock
.fs_bsize
== 0)
2391 sblock
.fs_bsize
= MAXBSIZE
;
2394 * Adjust maxcontig by the device's maxtransfer. If maxtransfer
2395 * information is not available, default to the min of a MB and
2398 if (sblock
.fs_maxcontig
== -1 && ioctl(fd
, DKIOCINFO
, &ci
) == 0) {
2399 sblock
.fs_maxcontig
= ci
.dki_maxtransfer
* DEV_BSIZE
;
2400 if (sblock
.fs_maxcontig
< 0) {
2403 gotit
= fsgetmaxphys(&maxphys
, NULL
);
2406 * If we cannot get the maxphys value, default
2407 * to ufs_maxmaxphys (MB).
2410 sblock
.fs_maxcontig
= MIN(maxphys
, MB
);
2412 sblock
.fs_maxcontig
= MB
;
2415 sblock
.fs_maxcontig
/= sblock
.fs_bsize
;
2418 return (actual_size
);
2422 * Figure out how big the partition we're dealing with is.
2425 get_device_size(int fd
, caddr_t name
)
2427 struct extvtoc vtoc
;
2428 struct dk_gpt
*efi_vtoc
;
2429 diskaddr_t slicesize
= 0;
2431 int index
= read_extvtoc(fd
, &vtoc
);
2434 label_type
= LABEL_TYPE_VTOC
;
2436 if (index
== VT_ENOTSUP
|| index
== VT_ERROR
) {
2437 /* it might be an EFI label */
2438 index
= efi_alloc_and_read(fd
, &efi_vtoc
);
2440 label_type
= LABEL_TYPE_EFI
;
2446 * Since both attempts to read the label failed, we're
2447 * going to fall back to a brute force approach to
2448 * determining the device's size: see how far out we can
2449 * perform reads on the device.
2452 slicesize
= brute_force_get_device_size(fd
);
2453 if (slicesize
== 0) {
2456 pwarn("%s: %s\n", name
, strerror(errno
));
2459 pwarn("%s: I/O error accessing VTOC", name
);
2462 pwarn("%s: Invalid field in VTOC", name
);
2465 pwarn("%s: unknown error %d accessing VTOC",
2471 label_type
= LABEL_TYPE_OTHER
;
2475 if (label_type
== LABEL_TYPE_EFI
) {
2476 slicesize
= efi_vtoc
->efi_parts
[index
].p_size
;
2478 } else if (label_type
== LABEL_TYPE_VTOC
) {
2479 slicesize
= vtoc
.v_part
[index
].p_size
;
2486 * brute_force_get_device_size
2488 * Determine the size of the device by seeing how far we can
2489 * read. Doing an llseek( , , SEEK_END) would probably work
2490 * in most cases, but we've seen at least one third-party driver
2491 * which doesn't correctly support the SEEK_END option when the
2492 * the device is greater than a terabyte.
2496 brute_force_get_device_size(int fd
)
2498 diskaddr_t min_fail
= 0;
2499 diskaddr_t max_succeed
= 0;
2500 diskaddr_t cur_db_off
;
2501 char buf
[DEV_BSIZE
];
2504 * First, see if we can read the device at all, just to
2505 * eliminate errors that have nothing to do with the
2509 if (((llseek(fd
, (offset_t
)0, SEEK_SET
)) == -1) ||
2510 ((read(fd
, buf
, DEV_BSIZE
)) == -1))
2511 return (0); /* can't determine size */
2514 * Now, go sequentially through the multiples of 4TB
2515 * to find the first read that fails (this isn't strictly
2516 * the most efficient way to find the actual size if the
2517 * size really could be anything between 0 and 2**64 bytes.
2518 * We expect the sizes to be less than 16 TB for some time,
2519 * so why do a bunch of reads that are larger than that?
2520 * However, this algorithm *will* work for sizes of greater
2521 * than 16 TB. We're just not optimizing for those sizes.)
2525 * XXX lint uses 32-bit arithmetic for doing flow analysis.
2526 * We're using > 32-bit constants here. Therefore, its flow
2527 * analysis is wrong. For the time being, ignore complaints
2528 * from it about the body of the for() being unreached.
2530 for (cur_db_off
= SECTORS_PER_TERABYTE
* 4;
2531 (min_fail
== 0) && (cur_db_off
< FS_SIZE_UPPER_LIMIT
);
2532 cur_db_off
+= 4 * SECTORS_PER_TERABYTE
) {
2533 if ((llseek(fd
, (offset_t
)(cur_db_off
* DEV_BSIZE
),
2535 (read(fd
, buf
, DEV_BSIZE
) != DEV_BSIZE
))
2536 min_fail
= cur_db_off
;
2538 max_succeed
= cur_db_off
;
2542 * XXX Same lint flow analysis problem as above.
2548 * We now know that the size of the device is less than
2549 * min_fail and greater than or equal to max_succeed. Now
2550 * keep splitting the difference until the actual size in
2551 * sectors in known. We also know that the difference
2552 * between max_succeed and min_fail at this time is
2553 * 4 * SECTORS_PER_TERABYTE, which is a power of two, which
2554 * simplifies the math below.
2557 while (min_fail
- max_succeed
> 1) {
2558 cur_db_off
= max_succeed
+ (min_fail
- max_succeed
)/2;
2559 if (((llseek(fd
, (offset_t
)(cur_db_off
* DEV_BSIZE
),
2560 SEEK_SET
)) == -1) ||
2561 ((read(fd
, buf
, DEV_BSIZE
)) != DEV_BSIZE
))
2562 min_fail
= cur_db_off
;
2564 max_succeed
= cur_db_off
;
2567 /* the size is the last successfully read sector offset plus one */
2568 return (max_succeed
+ 1);
2572 vfileerror(fsck_ino_t cwd
, fsck_ino_t ino
, caddr_t fmt
, va_list ap
)
2575 char pathbuf
[MAXPATHLEN
+ 1];
2578 (void) putchar(' ');
2580 (void) printf("\n");
2581 getpathname(pathbuf
, cwd
, ino
);
2582 if (ino
< UFSROOTINO
|| ino
> maxino
) {
2583 pfatal("NAME=%s\n", pathbuf
);
2588 pfatal("%s=%s\n", file_id(ino
, dp
->di_mode
), pathbuf
);
2590 pfatal("NAME=%s\n", pathbuf
);
2594 direrror(fsck_ino_t ino
, caddr_t fmt
, ...)
2599 vfileerror(ino
, ino
, fmt
, ap
);
2604 vdirerror(fsck_ino_t ino
, caddr_t fmt
, va_list ap
)
2606 vfileerror(ino
, ino
, fmt
, ap
);
2610 fileerror(fsck_ino_t cwd
, fsck_ino_t ino
, caddr_t fmt
, ...)
2615 vfileerror(cwd
, ino
, fmt
, ap
);
2620 * Adds the given inode to the orphaned-directories list, limbo_dirs.
2621 * Assumes that the caller has set INCLEAR in the inode's statemap[]
2624 * With INCLEAR set, the inode will get ignored by passes 2 and 3,
2625 * meaning it's effectively an orphan. It needs to be noted now, so
2626 * it will be remembered in pass 4.
2630 add_orphan_dir(fsck_ino_t ino
)
2632 if (tsearch((void *)ino
, &limbo_dirs
, ino_t_cmp
) == NULL
)
2633 errexit("add_orphan_dir: out of memory");
2637 * Remove an inode from the orphaned-directories list, presumably
2638 * because it's been cleared.
2641 remove_orphan_dir(fsck_ino_t ino
)
2643 (void) tdelete((void *)ino
, &limbo_dirs
, ino_t_cmp
);
2647 * log_setsum() and log_checksum() are equivalent to lufs.c:setsum()
2648 * and lufs.c:checksum().
2651 log_setsum(int32_t *sp
, int32_t *lp
, int nb
)
2656 nb
/= sizeof (int32_t);
2663 log_checksum(int32_t *sp
, int32_t *lp
, int nb
)
2667 log_setsum(sp
, lp
, nb
);