2 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
3 * Use is subject to license terms.
6 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
7 /* All Rights Reserved */
11 * Copyright (c) 1980, 1986, 1990 The Regents of the University of California.
12 * All rights reserved.
14 * Redistribution and use in source and binary forms are permitted
15 * provided that: (1) source distributions retain this entire copyright
16 * notice and comment, and (2) distributions including binaries display
17 * the following acknowledgement: ``This product includes software
18 * developed by the University of California, Berkeley and its contributors''
19 * in the documentation or other materials provided with the distribution
20 * and in all advertising materials mentioning features or use of this
21 * software. Neither the name of the University nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED '`AS IS'' AND WITHOUT ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
26 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
32 * A bitmap of block usage very similar to what's on disk, but
33 * for the entire filesystem rather than just a cylinder group.
34 * Zero indicates free, one indicates allocated. Note that this
35 * is opposite the interpretation of a cylinder group's free block
39 * Tracks what is known about each inode in the filesystem.
40 * The fundamental state value is one of USTATE, FSTATE, DSTATE,
41 * or SSTATE (unallocated, file, directory, shadow/acl).
43 * There are optional modifying attributes as well: INZLINK,
44 * INFOUND, INCLEAR, INORPHAN, and INDELAYD. The IN prefix
45 * stands for inode. INZLINK declares that no links (di_nlink ==
46 * 0) to the inode have been found. It is used instead of
47 * examining di_nlink because we've always got the statemap[] in
48 * memory, and on average the odds are against having any given
49 * inode in the cache. INFOUND flags that an inode was
50 * encountered during the descent of the filesystem. In other
51 * words, it's reachable, either by name or by being an acl or
52 * attribute. INCLEAR declares an intent to call clri() on an
53 * inode. The INCLEAR and INZLINK attributes are treated in a
54 * mutually exclusive manner with INCLEAR taking higher precedence
55 * as the intent is to clear the inode.
57 * INORPHAN indicates that the inode has already been seen once
58 * in pass3 and determined to be an orphan, so any additional
59 * encounters don't need to waste cycles redetermining that status.
60 * It also means we don't ask the user about doing something to the
63 * INDELAYD marks inodes that pass1 determined needed to be truncated.
64 * They can't be truncated during that pass, because it depends on
65 * having a stable world for building the block and inode tables from.
67 * The IN flags rarely used directly, but instead are
68 * pre-combined through the {D,F,S}ZLINK, DFOUND, and
69 * {D,F,S}CLEAR convenience macros. This mainly matters when
70 * trying to use grep on the source.
72 * Three state-test macros are provided: S_IS_DUNFOUND(),
73 * S_IS_DVALID(), and S_IS_ZLINK(). The first is true when an
74 * inode's state indicates that it is either a simple directory
75 * (DSTATE without the INFOUND or INCLEAR modifiers) or a
76 * directory with the INZLINK modifier set. By definition, if a
77 * directory has zero links, then it can't be found. As for
78 * S_IS_DVALID(), it decides if a directory inode is alive.
79 * Effectively, this translates to whether or not it's been
80 * flagged for clearing. If not, then it's valid for current
81 * purposes. This is true even if INZLINK is set, as we may find
82 * a reference to it later. Finally, S_IS_ZLINK() just picks out
83 * the INZLINK flag from the state.
85 * The S_*() macros all work on a state value. To simplify a
86 * bit, the INO_IS_{DUNFOUND,DVALID}() macros take an inode
87 * number argument. The inode is looked up in the statemap[] and
88 * the result handed off to the corresponding S_*() macro. This
89 * is partly a holdover from working with different data
90 * structures (with the same net intent) in the BSD fsck.
93 * Each entry is initialized to the di_link from the on-disk
94 * inode. Each time we find one of those links, we decrement it.
95 * Once all the traversing is done, we should have a zero. If we
96 * have a positive value, then some reference disappeared
97 * (probably from a directory that got nuked); deal with it by
98 * fixing the count. If we have a negative value, then we found
99 * an extra reference. This is a can't-happen, except in the
100 * special case of when we reconnect a directory to its parent or
101 * to lost+found. An exact match between lncntp[] and the on-disk
102 * inode means it's completely unreferenced.
105 * This is a hash table of the acl inodes in the filesystem.
108 * The same acls as in aclphead, but as a simple linear array.
109 * It is used to hold the acl pointers for sorting and scanning
116 #include <sys/types.h>
117 #include <sys/param.h>
118 #include <sys/int_types.h>
119 #include <sys/mntent.h>
120 #include <sys/fs/ufs_fs.h>
121 #include <sys/vnode.h>
122 #include <sys/fs/ufs_inode.h>
123 #include <sys/stat.h>
125 #include <sys/wait.h>
126 #include <sys/mnttab.h>
129 #include <sys/vfstab.h>
130 #include <sys/statvfs.h>
131 #include <sys/filio.h>
136 static void usage(void);
137 static long argtol(int, char *, char *, int);
138 static void checkfilesys(char *);
139 static void check_sanity(char *);
140 static void report_limbo(const void *, VISIT
, int);
142 #define QUICK_CHECK 'm' /* are things ok according to superblock? */
143 #define ALL_no 'n' /* auto-answer interactive questions `no' */
144 #define ALL_NO 'N' /* auto-answer interactive questions `no' */
145 #define UFS_OPTS 'o' /* ufs-specific options, see subopts[] */
146 #define ECHO_CMD 'V' /* echo the command line */
147 #define ALL_yes 'y' /* auto-answer interactive questions `yes' */
148 #define ALL_YES 'Y' /* auto-answer interactive questions `yes' */
149 #define VERBOSE 'v' /* be chatty */
151 static char *subopts
[] = {
152 #define PREEN 0 /* non-interactive mode (parent is parallel) */
154 #define BLOCK 1 /* alternate superblock */
156 #define DEBUG 2 /* yammer */
158 #define ONLY_WRITES 3 /* check all writable filesystems */
160 #define FORCE 4 /* force checking, even if clean */
166 * Filesystems that are `magical' - if they exist in vfstab,
167 * then they have to be mounted for the system to have gotten
168 * far enough to be able to run fsck. Thus, don't get all
169 * bent out of shape if we're asked to check it and it is mounted.
172 "", /* MAGIC_NONE, for normal filesystems */
173 "/", /* MAGIC_ROOT */
174 "/usr", /* MAGIC_USR */
175 NULL
/* MAGIC_LIMIT */
179 main(int argc
, char *argv
[])
183 char *suboptions
, *value
;
184 struct rlimit rlimit
;
188 while ((c
= getopt(argc
, argv
, "mnNo:VvyY")) != EOF
) {
207 * ufs specific options.
209 if (optarg
== NULL
) {
212 * lint does not believe this, nor does it
213 * believe #pragma does_not_return(usage)
218 while (*suboptions
!= '\0') {
219 switch (getsubopt(&suboptions
, subopts
,
227 bflag
= argtol(BLOCK
, "block",
229 (void) printf("Alternate super block "
240 /* check only writable filesystems */
259 (void) printf("fsck -F ufs ");
260 for (opt_count
= 1; opt_count
< argc
;
262 opt_text
= argv
[opt_count
];
264 (void) printf("%s ", opt_text
);
286 rflag
++; /* check raw devices where we can */
287 if (signal(SIGINT
, SIG_IGN
) != SIG_IGN
)
288 (void) signal(SIGINT
, catch);
290 (void) signal(SIGQUIT
, catchquit
);
293 * Push up our allowed memory limit so we can cope
294 * with huge file systems.
296 if (getrlimit(RLIMIT_DATA
, &rlimit
) == 0) {
297 rlimit
.rlim_cur
= rlimit
.rlim_max
;
298 (void) setrlimit(RLIMIT_DATA
, &rlimit
);
302 * There are a lot of places where we just exit if a problem is
303 * found. This means that we won't necessarily check everything
304 * we were asked to. It would be nice to do everything, and
305 * then provide a summary when we're done. However, the
306 * interface doesn't really allow us to do that in any useful
307 * way. So, we'll just bail on the first unrecoverable
308 * problem encountered. If we've been run by the generic
309 * wrapper, we were only given one filesystem to check, so the
310 * multi-fs case implies being run manually; that means the
311 * user can rerun us on the remaining filesystems when it's
312 * convenient for them.
315 if (wflag
&& !writable(*argv
)) {
316 (void) fprintf(stderr
, "not writeable '%s'\n", *argv
);
319 exitstat
= EXBADPARM
;
321 checkfilesys(*argv
++);
330 * A relatively intelligent strtol(). Note that if str is NULL, we'll
331 * exit, so ret does not actually need to be pre-initialized. Lint
332 * doesn't believe this, and it's harmless enough to make lint happy here.
335 argtol(int flag
, char *req
, char *str
, int base
)
342 ret
= strtol(str
, &cp
, base
);
343 if (cp
== str
|| *cp
) {
344 (void) fprintf(stderr
, "-%c flag requires a %s\n", flag
, req
);
348 (void) fprintf(stderr
, "-%c %s value out of range\n",
356 * Check the specified file system.
359 checkfilesys(char *filesys
)
361 daddr32_t n_ffree
, n_bfree
;
367 fsck_ino_t limbo_victim
;
368 double dbl_nffree
, dbl_dsize
;
376 iscorrupt
= 1; /* assume failure in setup() */
380 errorlocked
= is_errorlocked(filesys
);
383 if ((devstr
= setup(filesys
)) == NULL
) {
389 pfatal("CAN'T CHECK FILE SYSTEM.");
391 exitstat
= mflag
? EXUMNTCHK
: EXERRFATAL
;
398 check_sanity(filesys
);
405 iscorrupt
= 0; /* setup() succeeded, assume good filesystem */
408 * 1: scan inodes tallying blocks used
411 /* hotroot is reported as such in setup() if debug is on */
412 if (mountedfs
!= M_NOMNT
)
413 (void) printf("** Currently Mounted on %s\n",
416 (void) printf("** Last Mounted on %s\n",
418 (void) printf("** Phase 1 - Check Blocks and Sizes\n");
423 * 1b: locate first references to duplicates, if any
427 pfatal("INTERNAL ERROR: dups with -o p");
428 (void) printf("** Phase 1b - Rescan For More DUPS\n");
433 * 2: traverse directories from root to mark all connected directories
436 (void) printf("** Phase 2 - Check Pathnames\n");
440 * 3a: scan inodes looking for disconnected directories.
443 (void) printf("** Phase 3a - Check Connectivity\n");
450 (void) printf("** Phase 3b - Verify Shadows/ACLs\n");
454 * 4: scan inodes looking for disconnected files; check reference counts
457 (void) printf("** Phase 4 - Check Reference Counts\n");
461 * 5: check and repair resource counts in cylinder groups
464 (void) printf("** Phase 5 - Check Cylinder Groups\n");
472 if (!nflag
&& mountedfs
== M_RW
) {
473 (void) printf("FILESYSTEM MAY STILL BE INCONSISTENT.\n");
478 quiet_dups
= (reply("LIST REMAINING DUPS") == 0);
479 if (report_dups(quiet_dups
) > 0)
482 (void) printf("WARNING: DATA LOSS MAY HAVE OCCURRED DUE TO "
483 "DUP BLOCKS.\nVERIFY FILE CONTENTS BEFORE USING.\n");
486 if (limbo_dirs
!= NULL
) {
488 * Don't force iscorrupt, as this is sufficiently
489 * harmless that the filesystem can be mounted and
490 * used. We just leak some inodes and/or blocks.
492 pwarn("Orphan directories not cleared or reconnected:\n");
494 twalk(limbo_dirs
, report_limbo
);
496 while (limbo_dirs
!= NULL
) {
497 limbo_victim
= *(fsck_ino_t
*)limbo_dirs
;
498 if (limbo_victim
!= 0) {
499 (void) tdelete((void *)limbo_victim
,
509 if (mountedfs
== M_RW
)
510 (void) printf("FS IS MOUNTED R/W AND"
511 " FSCK DID ITS BEST TO FIX"
512 " INCONSISTENCIES.\n");
514 (void) printf("FILESYSTEM MAY STILL BE"
520 * iscorrupt must be stable at this point.
521 * updateclean() returns true when it had to discard the log.
522 * This can only happen once, since sblock.fs_logbno gets
523 * cleared as part of that operation.
528 "Log was discarded, updating cyl groups\n");
538 * print out summary statistics
540 n_ffree
= sblock
.fs_cstotal
.cs_nffree
;
541 n_bfree
= sblock
.fs_cstotal
.cs_nbfree
;
542 files
= maxino
- UFSROOTINO
- sblock
.fs_cstotal
.cs_nifree
- n_files
;
544 sblock
.fs_ncg
* (cgdmin(&sblock
, 0) - cgsblock(&sblock
, 0));
545 blks
+= cgsblock(&sblock
, 0) - cgbase(&sblock
, 0);
546 blks
+= howmany(sblock
.fs_cssize
, sblock
.fs_fsize
);
547 blks
= maxfsblock
- (n_ffree
+ sblock
.fs_frag
* n_bfree
) - blks
;
548 if (debug
&& (files
> 0 || blks
> 0)) {
549 countdirs
= sblock
.fs_cstotal
.cs_ndir
- countdirs
;
550 pwarn("Reclaimed: %d directories, %d files, %lld fragments\n",
551 countdirs
, files
- countdirs
,
555 dbl_nffree
= (double)n_ffree
;
556 dbl_dsize
= (double)sblock
.fs_dsize
;
560 * Done as one big string to try for a single write,
561 * so the output doesn't get interleaved with other
564 pwarn("%ld files, %lld used, %lld free "
565 "(%lld frags, %lld blocks, %.1f%% fragmentation)\n",
566 (long)n_files
, (longlong_t
)n_blks
,
567 (longlong_t
)n_ffree
+ sblock
.fs_frag
* n_bfree
,
568 (longlong_t
)n_ffree
, (longlong_t
)n_bfree
,
569 (dbl_nffree
* 100.0) / dbl_dsize
);
571 pwarn("\nFilesystem summary:\n");
572 pwarn("Inodes in use: %ld\n", (long)n_files
);
573 pwarn("Blocks in use: %lld\n", (longlong_t
)n_blks
);
574 pwarn("Total free fragments: %lld\n",
575 (longlong_t
)n_ffree
+ sblock
.fs_frag
* n_bfree
);
576 pwarn("Free fragments not in blocks: %lld\n",
577 (longlong_t
)n_ffree
);
578 pwarn("Total free blocks: %lld\n", (longlong_t
)n_bfree
);
579 pwarn("Fragment/block fragmentation: %.1f%%\n",
580 (dbl_nffree
* 100.0) / dbl_dsize
);
584 pwarn("%d inodes missing\n", -files
);
586 pwarn("%lld blocks missing\n", -(longlong_t
)blks
);
589 for (inumber
= UFSROOTINO
; inumber
< maxino
; inumber
++) {
590 if (S_IS_ZLINK(statemap
[inumber
])) {
591 if (zlinks_printed
== 0) {
592 pwarn("The following zero "
593 "link count inodes remain:");
595 if (zlinks_printed
) {
596 if ((zlinks_printed
% 9) == 0)
601 (void) printf("%u", inumber
);
605 if ((zlinks_printed
!= 0) && ((zlinks_printed
% 9) != 0))
606 (void) putchar('\n');
610 * Clean up after ourselves, so we can do the next filesystem.
616 free((void *)lncntp
);
620 if (iscorrupt
&& exitstat
== 0)
621 exitstat
= EXFNDERRS
;
623 (void) printf("\n***** FILE SYSTEM WAS MODIFIED *****\n");
625 (void) printf("\n***** %s FULL, MUST REMOVE ENTRIES *****\n",
627 if (reattached_dir
) {
628 (void) printf("ORPHANED DIRECTORIES REATTACHED; DIR LINK "
629 "COUNTS MAY NOT BE CORRECT.\n");
632 if (broke_dir_link
) {
634 "DIRECTORY HARDLINK BROKEN; LOOPS MAY STILL EXIST.\n");
638 (void) printf("***** FILE SYSTEM IS BAD *****\n");
641 if (mountedfs
== M_RW
)
642 (void) printf("\n***** PLEASE RERUN FSCK ON UNMOUNTED"
643 " FILE SYSTEM *****\n");
645 (void) printf("\n***** PLEASE RERUN FSCK *****\n");
648 if ((exitstat
== 0) &&
649 (((mountedfs
!= M_NOMNT
) && !errorlocked
) || hotroot
)) {
650 exitstat
= EXROOTOKAY
;
653 if ((exitstat
== 0) && rerun
)
654 exitstat
= EXFNDERRS
;
656 if (mountedfs
!= M_NOMNT
) {
660 * _FIOFFS is much more effective than a simple sync().
661 * Note that the original fswritefd was discarded in
664 fswritefd
= open(devstr
, O_RDWR
, 0);
665 if (fswritefd
!= -1) {
666 (void) ioctl(fswritefd
, _FIOFFS
, NULL
);
667 (void) close(fswritefd
);
671 (void) printf("\n***** REBOOT NOW *****\n");
673 exitstat
= EXREBOOTNOW
;
678 * fsck -m: does the filesystem pass cursory examination
680 * XXX This is very redundant with setup(). The right thing would be
681 * for setup() to modify its behaviour when mflag is set (less
682 * chatty, exit instead of return, etc).
685 check_sanity(char *filename
)
687 struct stat64 stbd
, stbr
;
690 char vfsfilename
[MAXPATHLEN
];
691 struct vfstab vfsbuf
;
693 struct statvfs vfs_stat
;
694 int found_magic
[MAGIC_LIMIT
];
700 (void) memset(found_magic
, 0, sizeof (found_magic
));
702 if (stat64(filename
, &stbd
) < 0) {
703 (void) fprintf(stderr
,
704 "ufs fsck: sanity check failed : cannot stat %s\n", filename
);
708 if (S_ISBLK(stbd
.st_mode
)) {
710 } else if (S_ISCHR(stbd
.st_mode
)) {
712 } else if (S_ISREG(stbd
.st_mode
)) {
717 * Determine if this is the root file system via vfstab. Give up
718 * silently on failures. The whole point of this is to be tolerant
719 * of the magic file systems being already mounted.
721 if (!is_file
&& (vfstab
= fopen(VFSTAB
, "r")) != NULL
) {
722 for (magic_cnt
= 0; magic_cnt
< MAGIC_LIMIT
; magic_cnt
++) {
723 if (magic_cnt
== MAGIC_NONE
)
725 if (getvfsfile(vfstab
, &vfsbuf
,
726 magic_fs
[magic_cnt
]) == 0) {
728 devname
= vfsbuf
.vfs_special
;
730 devname
= vfsbuf
.vfs_fsckdev
;
731 if (stat64(devname
, &stbr
) == 0) {
732 if (stbr
.st_rdev
== stbd
.st_rdev
) {
733 found_magic
[magic_cnt
] = 1;
734 is_magic
= magic_cnt
;
743 * Only works if filename is a block device or if
744 * character and block device has the same dev_t value.
745 * This is currently true, but nothing really forces it.
747 if (!is_magic
&& (ustat(stbd
.st_rdev
, &usb
) == 0)) {
748 (void) fprintf(stderr
,
749 "ufs fsck: sanity check: %s already mounted\n", filename
);
754 (void) strcpy(vfsfilename
, magic_fs
[is_magic
]);
755 if (statvfs(vfsfilename
, &vfs_stat
) != 0) {
756 (void) fprintf(stderr
, "ufs fsck: Cannot stat %s\n",
761 if (!(vfs_stat
.f_flag
& ST_RDONLY
)) {
763 * The file system is mounted read/write
764 * We need to exit saying this. If it's only
765 * mounted readonly, we can continue.
768 (void) fprintf(stderr
,
769 "ufs fsck: sanity check:"
770 "%s already mounted read/write\n", filename
);
776 * We know that at boot, the ufs root file system is mounted
777 * read-only first. After fsck runs, it is remounted as
778 * read-write. Therefore, we do not need to check for different
779 * values for fs_state between the root file system and the
780 * rest of the file systems.
782 if (islog
&& !islogok
) {
783 (void) fprintf(stderr
,
784 "ufs fsck: sanity check: %s needs checking\n", filename
);
787 if ((sblock
.fs_state
+ (long)sblock
.fs_time
== FSOKAY
) &&
788 (sblock
.fs_clean
== FSCLEAN
|| sblock
.fs_clean
== FSSTABLE
||
789 (sblock
.fs_clean
== FSLOG
&& islog
))) {
790 (void) fprintf(stderr
,
791 "ufs fsck: sanity check: %s okay\n", filename
);
793 (void) fprintf(stderr
,
794 "ufs fsck: sanity check: %s needs checking\n", filename
);
801 hasvfsopt(struct vfstab
*vfs
, char *opt
)
805 if (vfs
->vfs_mntopts
== NULL
)
807 mtab
.mnt_mntopts
= vfs
->vfs_mntopts
;
808 return (hasmntopt(&mtab
, opt
));
814 (void) fprintf(stderr
,
815 "ufs usage: fsck [-F ufs] [-m] [-n] [-V] [-v] [-y] "
816 "[-o p,b=#,w,f] [special ....]\n");
823 report_limbo(const void *node
, VISIT order
, int level
)
825 fsck_ino_t ino
= *(fsck_ino_t
*)node
;
827 if ((order
== postorder
) || (order
== leaf
)) {
828 (void) printf(" Inode %d\n", ino
);