1 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
2 /* All Rights Reserved */
5 * Copyright (c) 1980, 1986, 1990 The Regents of the University of California.
8 * Redistribution and use in source and binary forms are permitted
9 * provided that: (1) source distributions retain this entire copyright
10 * notice and comment, and (2) distributions including binaries display
11 * the following acknowledgement: ``This product includes software
12 * developed by the University of California, Berkeley and its contributors''
13 * in the documentation or other materials provided with the distribution
14 * and in all advertising materials mentioning features or use of this
15 * software. Neither the name of the University nor the names of its
16 * contributors may be used to endorse or promote products derived
17 * from this software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
24 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
25 * Use is subject to license terms.
31 #pragma ident "%Z%%M% %I% %E% SMI" /* SVr4.0 1.3 */
40 #include <sys/param.h>
41 #include <sys/types.h>
42 #include <sys/mnttab.h>
43 #include <sys/vfstab.h>
44 #include <sys/fs/ufs_fs.h>
45 #include <sys/fs/ufs_inode.h>
47 #define MAXDUP 10 /* limit on dup blks (per inode) */
48 #define MAXBAD 10 /* limit on bad blks (per inode) */
49 #define MAXBUFSPACE 40*1024 /* initial space to allocate to buffers */
50 #define INOBUFSIZE 56*1024 /* size of buffer to read inodes in pass1 */
53 #define BUFSIZ MAXPATHLEN
57 * Inode states in statemap[].
59 #define USTATE 0x01 /* inode not allocated */
60 #define FSTATE 0x02 /* inode is file */
61 #define DSTATE 0x04 /* inode is directory */
62 #define SSTATE 0x08 /* inode is a shadow/acl */
63 #define STMASK 0x0f /* pick off the basic state/type */
65 /* flags OR'd into the above */
66 #define INZLINK 0x0010 /* inode has zero links */
67 #define INFOUND 0x0020 /* inode was found during descent */
68 #define INCLEAR 0x0040 /* inode is to be cleared */
69 #define INORPHAN 0x0080 /* inode is a known orphan (pass3 only) */
70 #define INDELAYD 0x0200 /* link count update delayed */
71 #define INMASK 0xfff0 /* pick off the modifiers */
73 #define FZLINK (FSTATE | INZLINK)
74 #define DZLINK (DSTATE | INZLINK)
75 #define SZLINK (SSTATE | INZLINK)
77 #define DFOUND (DSTATE | INFOUND)
79 #define DCLEAR (DSTATE | INCLEAR)
80 #define FCLEAR (FSTATE | INCLEAR)
81 #define SCLEAR (SSTATE | INCLEAR)
84 * These tests depend on the state/type defines above not overlapping bits.
86 * DUNFOUND === (state == DSTATE || state == DZLINK)
87 * INCLEAR is irrelevant to the determination of
88 * connectedness, so it's not included in this test.
90 * DVALID === (state == DSTATE || state == DZLINK || state == DFOUND)
92 #define S_IS_DUNFOUND(state) (((state) & (DSTATE | INZLINK)) \
94 #define S_IS_DVALID(state) (((state) & (DSTATE | INZLINK | INFOUND | \
95 INORPHAN)) == (state))
96 #define S_IS_ZLINK(state) (((state) & INZLINK) != 0)
97 #define INO_IS_DUNFOUND(ino) S_IS_DUNFOUND(statemap[ino])
98 #define INO_IS_DVALID(ino) S_IS_DVALID(statemap[ino])
101 * buffer cache structure.
104 struct bufarea
*b_next
; /* free list queue */
105 struct bufarea
*b_prev
; /* free list queue */
106 diskaddr_t b_bno
; /* physical sector number */
110 int b_cnt
; /* reference cnt */
112 char *b_buf
; /* buffer space */
113 daddr32_t
*b_indir
; /* indirect block */
114 struct fs
*b_fs
; /* super block */
115 struct cg
*b_cg
; /* cylinder group */
116 struct dinode
*b_dinode
; /* inode block */
123 #define MINBUFS 5 /* minimum number of buffers required */
124 struct bufarea bufhead
; /* head of list of other blks in filesys */
125 struct bufarea sblk
; /* file system superblock */
126 struct bufarea asblk
; /* alternate superblock */
127 struct bufarea cgblk
; /* cylinder group blocks */
128 struct bufarea
*pbp
; /* pointer to inode data in buffer pool */
129 struct bufarea
*pdirbp
; /* pointer to directory data in buffer pool */
131 #define sbdirty() dirty(&sblk)
132 #define cgdirty() dirty(&cgblk)
133 #define sblock (*sblk.b_un.b_fs)
134 #define cgrp (*cgblk.b_un.b_cg)
137 * inodesc.id_fix values. See inode.c for a description of their usage.
140 DONTKNOW
, NOFIX
, FIX
, IGNORE
144 * Tells truncino() whether or not to attempt to update the parent
145 * directory's link count. Also, TI_NODUP flags when we're discarding
146 * fragments that are beyond the original end of the file, and so
147 * should not be considered duplicate-claim candidates.
149 #define TI_NOPARENT 0x0001 /* leave parent's di_nlink alone */
150 #define TI_PARENT 0x0002 /* update parent's di_nlink */
151 #define TI_NODUP 0x0004 /* not a dup candidate */
154 * Modes for ckinode() and ckinode_common().
156 * CKI_TRAVERSE is the common case, and requests a traditional
157 * traversal of blocks or directory entries.
159 * CKI_TRUNCATE indicates that we're truncating the file, and that any
160 * block indices beyond the end of the target length should be cleared
161 * after the callback has returned (i.e., this is a superset of
162 * CKI_TRAVERSE). idesc->id_truncto is the first logical block number
163 * to clear. If it is less than zero, then the traversal will be
164 * equivalent to a simple CKI_TRAVERSE.
166 enum cki_action
{ CKI_TRAVERSE
, CKI_TRUNCATE
};
169 * The general definition of an ino_t is an unsigned quantity.
170 * However, the on-disk version is an int32_t, which is signed.
171 * Since we really want to be able to detect wrapped-around
172 * inode numbers and such, we'll use something that's compatible
173 * with what's on disk since that's the only context that really
174 * matters. If an int32_t is found not to be sufficiently large,
175 * this will make it much easier to change later.
177 * Note that there is one unsigned inode field in the on-disk
178 * inode, ic_oeftflag. Since all other inode fields are signed,
179 * no legitimate inode number can be put into ic_oeftflag that
180 * would overflow into the high bit. Essentially, it should
181 * actually be declared as int32_t just like all the others, and
182 * we're going to pretend that it was.
184 * None of the routines that we use in ufs_subr.c do anything with
185 * inode numbers. If that changes, then great care will be needed
186 * to deal with the differences in definition of ino_t and fsck_ino_t.
187 * Lint is your friend.
189 typedef int32_t fsck_ino_t
;
192 * See the full discussion of the interactions between struct inodesc
193 * and ckinode() in inode.c
196 enum fixstate id_fix
; /* policy on fixing errors */
197 int (*id_func
)(struct inodesc
*);
198 /* function to be applied to blocks of inode */
199 fsck_ino_t id_number
; /* inode number described */
200 fsck_ino_t id_parent
; /* for DATA nodes, their parent */
201 /* also used for extra (*id_func) parameter */
202 /* and return values */
203 daddr32_t id_lbn
; /* logical fragment number of current block */
204 daddr32_t id_blkno
; /* physical fragment number being examined */
205 int id_numfrags
; /* number of frags contained in block */
206 daddr32_t id_truncto
; /* # blocks to truncate to, -1 for no trunc. */
207 offset_t id_filesize
; /* for DATA nodes, the size of the directory */
208 uint_t id_loc
; /* for DATA nodes, current location in dir */
209 daddr32_t id_entryno
; /* for DATA nodes, current dir entry number */
210 daddr32_t id_firsthole
; /* for DATA inode, logical block that is */
211 /* zero but shouldn't be, -1 for no holes */
212 struct direct
*id_dirp
; /* for DATA nodes, ptr to current entry */
213 caddr_t id_name
; /* for DATA nodes, name to find or enter */
214 char id_type
; /* type of descriptor, DATA or ADDR */
217 /* file types (0 is reserved for catching bugs) */
218 #define DATA 1 /* a directory */
219 #define ACL 2 /* an acl/shadow */
220 #define ADDR 3 /* anything but a directory or an acl/shadow */
223 * OR'd flags for find_dup_ref()'s mode argument
225 #define DB_CREATE 0x01 /* if dup record found, make one */
226 #define DB_INCR 0x02 /* increment block's reference count */
227 #define DB_DECR 0x04 /* decrement block's reference count */
230 * Cache data structures
233 struct inoinfo
*i_nextlist
; /* next inode/acl cache entry */
234 fsck_ino_t i_number
; /* inode number of this entry */
235 fsck_ino_t i_parent
; /* inode number of parent */
236 fsck_ino_t i_dotdot
; /* inode number of .. */
237 fsck_ino_t i_extattr
; /* inode of hidden attr dir */
238 offset_t i_isize
; /* size of inode */
239 size_t i_blkssize
; /* size of block array in bytes */
240 daddr32_t i_blks
[1]; /* actually longer */
246 struct inoinfo
**inphead
, **inpsort
;
247 int64_t numdirs
, listmax
, inplast
;
252 struct inoinfo
**aclphead
, **aclpsort
;
253 int64_t numacls
, aclmax
, aclplast
;
256 * Tree of directories we haven't reconnected or cleared. Any
257 * dir inode that linkup() fails on gets added, any that clri()
258 * succeeds on gets removed. If there are any left at the end of
259 * pass four, then we have a user-forced corrupt filesystem, and
260 * need to set iscorrupt.
262 * Elements are fsck_ino_t instances (not pointers).
267 * Number of directories we actually found in the filesystem,
268 * as opposed to how many the superblock claims there are.
270 fsck_ino_t countdirs
;
273 * shadowclients and shadowclientinfo are structures for keeping track of
274 * shadow inodes that exist, and which regular inodes use them (i.e. are
278 struct shadowclients
{
279 fsck_ino_t
*client
; /* an array of inode numbers */
280 int nclients
; /* how many inodes in the array are in use (valid) */
281 struct shadowclients
*next
; /* link to more client inode numbers */
283 struct shadowclientinfo
{
284 fsck_ino_t shadow
; /* the shadow inode that this info is for */
285 int totalClients
; /* how many inodes total refer to this */
286 struct shadowclients
*clients
; /* a linked list of wads of clients */
287 struct shadowclientinfo
*next
; /* link to the next shadow inode */
289 /* global pointer to this shadow/client information */
290 struct shadowclientinfo
*shadowclientinfo
;
291 struct shadowclientinfo
*attrclientinfo
;
294 * In ufs_inode.h ifdef _KERNEL, this is defined as `/@/'. However,
295 * to avoid all sorts of potential confusion (you can't actually use
296 * `foo/@/bar' to get to an attribute), we use something that doesn't
297 * look quite so much like a simple pathname.
299 #define XATTR_DIR_NAME " <xattr> "
302 * granularity -- how many client inodes do we make space for at a time
303 * initialized in setup.c;
305 extern int maxshadowclients
;
308 * Initialized global variables.
310 extern caddr_t lfname
;
313 * Unitialized globals.
315 char *devname
; /* name of device being checked */
316 size_t dev_bsize
; /* computed value of DEV_BSIZE */
317 int secsize
; /* actual disk sector size */
318 char nflag
; /* assume a no response */
319 char yflag
; /* assume a yes response */
320 daddr32_t bflag
; /* location of alternate super block */
321 int debug
; /* output debugging info */
322 int rflag
; /* check raw file systems */
323 int roflag
; /* do normal checks but don't update disk */
324 int fflag
; /* check regardless of clean flag (force) */
325 int mflag
; /* sanity check only */
326 int verbose
; /* be chatty */
327 char preen
; /* just fix normal inconsistencies */
328 char mountedfs
; /* checking mounted device */
329 int exitstat
; /* exit status (see EX* defines below) */
330 char hotroot
; /* checking root device */
331 char rerun
; /* rerun fsck. Only used in non-preen mode */
332 int interrupted
; /* 1 => exit EXSIGNAL on exit */
333 char havesb
; /* superblock has been read */
334 int fsmodified
; /* 1 => write done to file system */
335 int fsreadfd
; /* file descriptor for reading file system */
336 int fswritefd
; /* file descriptor for writing file system */
337 int iscorrupt
; /* known to be corrupt/inconsistent */
338 /* -1 means mark clean so user can mount+fix */
339 int isdirty
; /* 1 => write pending to file system */
341 int islog
; /* logging file system */
342 int islogok
; /* log is okay */
344 int errorlocked
; /* set => mounted fs has been error-locked */
345 /* implies fflag "force check flag" */
346 char *elock_combuf
; /* error lock comment buffer */
347 char *elock_mountp
; /* mount point; used to unlock error-lock */
348 int pid
; /* fsck's process id (put in lockfs comment) */
349 int mountfd
; /* fd of mount point */
350 struct lockfs
*lfp
; /* current lockfs status */
352 daddr32_t maxfsblock
; /* number of blocks in the file system */
353 uint_t largefile_count
; /* global largefile counter */
354 char *mount_point
; /* if mounted, this is where */
355 char *blockmap
; /* ptr to primary blk allocation map */
356 fsck_ino_t maxino
; /* number of inodes in file system */
357 fsck_ino_t lastino
; /* last inode in use */
358 ushort_t
*statemap
; /* ptr to inode state table */
359 short *lncntp
; /* ptr to link count table */
361 fsck_ino_t lfdir
; /* lost & found directory inode number */
362 int overflowed_lf
; /* tried to wrap lost & found's link count */
363 int reattached_dir
; /* reconnected at least one directory */
364 int broke_dir_link
; /* broke at least one directory hardlink */
366 daddr32_t n_blks
; /* number of blocks in use */
367 fsck_ino_t n_files
; /* number of files in use */
369 #define clearinode(dp) { \
374 #define testbmap(blkno) isset(blockmap, blkno)
375 #define setbmap(blkno) setbit(blockmap, blkno)
376 #define clrbmap(blkno) clrbit(blockmap, blkno)
385 * Support relatively easy debugging of lncntp[] updates. This can't
386 * be a function, because of the (_op) step. Normally, we just do that.
388 #define TRACK_LNCNTP(_ino, _op) (_op)
391 * See if the net link count for an inode has gone outside
392 * what can be represented on disk. Returning text as NULL
395 * Remember that link counts are effectively inverted, so
396 * underflow and overflow are reversed as well.
398 * This check should be done before modifying the actual link
401 #define LINK_RANGE(text, current, offset) { \
402 int net = ((int)(current)) + ((int)(offset)); \
404 if (net > (MAXLINK)) \
405 text = "UNDERFLOW"; \
406 else if (net < -(MAXLINK)) \
411 * If LINK_RANGE() indicated a problem, this is the boiler-plate
412 * for dealing with it. Usage is:
414 * LINK_RANGE(text, current, offset);
415 * if (text != NULL) {
416 * LINK_CLEAR(text, ino, mode, idp);
417 * if (statemap[ino] == USTATE)
418 * ...inode was cleared...
421 * Note that clri() will set iscorrupt if the user elects not to
422 * clear the problem inode, so the filesystem won't get reported
423 * as clean when it shouldn't be.
425 #define LINK_CLEAR(text, ino, mode, idp) { \
426 pwarn("%s LINK COUNT %s", file_id((ino), (mode)), (text)); \
429 init_inodesc((idp)); \
430 (idp)->id_type = ADDR; \
431 (idp)->id_func = pass4check; \
432 (idp)->id_number = ino; \
433 (idp)->id_fix = DONTKNOW; \
434 clri((idp), (text), CLRI_QUIET, CLRI_NOP_CORRUPT); \
438 * Used for checking link count under/overflow specifically on
439 * the lost+found directory. If the user decides not to do the
440 * clri(), then flag that we've hit this problem and refuse to do
443 #define LFDIR_LINK_RANGE_RVAL(text, current, offset, idp, rval) { \
444 LINK_RANGE(text, current, offset); \
445 if (text != NULL) { \
446 LINK_CLEAR(text, lfdir, IFDIR, idp); \
447 if (statemap[lfdir] == USTATE) { \
456 #define LFDIR_LINK_RANGE_NORVAL(text, current, offset, idp) { \
457 LINK_RANGE(text, current, offset); \
458 if (text != NULL) { \
459 LINK_CLEAR(text, lfdir, IFDIR, idp); \
460 if (statemap[lfdir] == USTATE) { \
470 * Values for mounted() and mountedfs.
472 #define M_NOMNT 0 /* filesystem is not mounted */
473 #define M_RO 1 /* filesystem is mounted read-only */
474 #define M_RW 2 /* filesystem is mounted read-write */
476 #define EXOKAY 0 /* file system is unmounted and ok */
477 #define EXBADPARM 1 /* bad parameter(s) given */
478 #define EXUMNTCHK 32 /* fsck -m: unmounted, needs checking */
479 #define EXMOUNTED 33 /* file system already mounted, not magic, */
480 /* or it is magic and mounted read/write */
481 #define EXNOSTAT 34 /* cannot stat device */
482 #define EXREBOOTNOW 35 /* modified root or something equally scary */
483 #define EXFNDERRS 36 /* uncorrectable errors, terminate normally */
484 #define EXSIGNAL 37 /* a signal was caught during processing */
485 #define EXERRFATAL 39 /* uncorrectable errors, exit immediately */
486 #define EXROOTOKAY 40 /* for root, same as 0 */
489 * Values for clri()'s `verbose' and `corrupting' arguments (third
490 * and fourth, respectively).
493 #define CLRI_VERBOSE 2
495 #define CLRI_NOP_OK 1
496 #define CLRI_NOP_CORRUPT 2
499 * Filesystems that are `magical' - if they exist in vfstab,
500 * then they have to be mounted for the system to have gotten
501 * far enough to be able to run fsck. Thus, don't get all
502 * bent out of shape if we're asked to check it and it is mounted.
503 * Actual initialization of the array is in main.c
511 extern char *magic_fs
[];
514 * Paths needed by calcsb().
516 #define MKFS_PATH "/usr/lib/fs/ufs/mkfs"
517 #define NEWFS_PATH "/usr/lib/fs/ufs/newfs"
519 int acltypeok(struct dinode
*);
520 void add_orphan_dir(fsck_ino_t
);
521 void adjust(struct inodesc
*, int);
522 daddr32_t
allocblk(int);
523 fsck_ino_t
allocdir(fsck_ino_t
, fsck_ino_t
, int, int);
524 fsck_ino_t
allocino(fsck_ino_t
, int);
525 void blkerror(fsck_ino_t
, caddr_t
, daddr32_t
, daddr32_t
);
526 void brelse(struct bufarea
*);
528 void bwrite(int, caddr_t
, diskaddr_t
, int64_t);
529 void cacheacl(struct dinode
*, fsck_ino_t
);
530 void cacheino(struct dinode
*, fsck_ino_t
);
533 caddr_t
cg_sanity(struct cg
*, int);
536 int changeino(fsck_ino_t
, caddr_t
, fsck_ino_t
);
537 int check_mnttab(caddr_t
, caddr_t
, size_t);
538 int check_vfstab(caddr_t
, caddr_t
, size_t);
539 int chkrange(daddr32_t
, int);
541 int ckinode(struct dinode
*, struct inodesc
*, enum cki_action
);
542 void clearattrref(fsck_ino_t
);
543 int cleardirentry(fsck_ino_t
, fsck_ino_t
);
544 void clearshadow(fsck_ino_t
, struct shadowclientinfo
**);
545 void clri(struct inodesc
*, caddr_t
, int, int);
546 void deshadow(struct shadowclientinfo
*, void (*)(fsck_ino_t
));
547 void direrror(fsck_ino_t
, caddr_t
, ...);
548 int dirscan(struct inodesc
*);
549 void dirty(struct bufarea
*);
550 int do_errorlock(int);
551 int dofix(struct inodesc
*, caddr_t
, ...);
552 void examinelog(void (*)(daddr32_t
));
553 void errexit(caddr_t
, ...);
554 void fileerror(fsck_ino_t
, fsck_ino_t
, caddr_t
, ...);
555 caddr_t
file_id(fsck_ino_t
, mode_t
);
556 int find_dup_ref(daddr32_t
, fsck_ino_t
, daddr32_t
, int);
557 int findino(struct inodesc
*);
558 int findname(struct inodesc
*);
559 void fix_cg(struct cg
*, int);
560 void flush(int, struct bufarea
*);
561 void free_dup_state(void);
562 void freeblk(fsck_ino_t
, daddr32_t
, int);
563 void freeino(fsck_ino_t
, int);
564 void freeinodebuf(void);
565 int fsck_asprintf(caddr_t
*, caddr_t
, ...);
566 int fsck_bread(int, caddr_t
, diskaddr_t
, size_t);
567 int ftypeok(struct dinode
*);
568 struct bufarea
*getblk(struct bufarea
*, daddr32_t
, size_t);
569 struct bufarea
*getdatablk(daddr32_t
, size_t size
);
570 diskaddr_t
getdisksize(caddr_t
, int);
571 struct inoinfo
*getinoinfo(fsck_ino_t
);
572 struct dinode
*getnextinode(fsck_ino_t
);
573 struct dinode
*getnextrefresh(void);
574 void getpathname(caddr_t
, fsck_ino_t
, fsck_ino_t
);
575 struct dinode
*ginode(fsck_ino_t
);
576 caddr_t
hasvfsopt(struct vfstab
*, caddr_t
);
578 void init_inodesc(struct inodesc
*);
579 void init_inoinfo(struct inoinfo
*, struct dinode
*, fsck_ino_t
);
580 void initbarea(struct bufarea
*);
581 int ino_t_cmp(const void *, const void *);
582 int inocached(fsck_ino_t
);
583 void inocleanup(void);
585 int is_errorlocked(caddr_t
);
586 int linkup(fsck_ino_t
, fsck_ino_t
, caddr_t
);
587 int lookup_named_ino(fsck_ino_t
, caddr_t
);
588 int makeentry(fsck_ino_t
, fsck_ino_t
, caddr_t
);
589 void maybe_convert_attrdir_to_dir(fsck_ino_t
);
590 int mounted(caddr_t
, caddr_t
, size_t);
593 int pass1check(struct inodesc
*);
597 int pass3bcheck(struct inodesc
*);
599 int pass4check(struct inodesc
*);
601 void pfatal(caddr_t
, ...);
602 void pinode(fsck_ino_t
);
603 void printclean(void);
604 void propagate(void);
605 void pwarn(caddr_t
, ...);
606 caddr_t
rawname(caddr_t
);
607 void registershadowclient(fsck_ino_t
, fsck_ino_t
,
608 struct shadowclientinfo
**);
609 void remove_orphan_dir(fsck_ino_t
);
610 int reply(caddr_t
, ...);
611 int report_dups(int);
612 void resetinodebuf(void);
613 char *setup(caddr_t
);
614 void truncino(fsck_ino_t
, offset_t
, int);
615 void unbufinit(void);
616 caddr_t
unrawname(caddr_t
);
617 void unregistershadow(fsck_ino_t
, struct shadowclientinfo
**);
618 int updateclean(void);
619 int writable(caddr_t
);
620 void write_altsb(int);
623 * Functions from the kernel sources (ufs_subr.c, etc).
625 extern void fragacct(struct fs
*, int, int32_t *, int);
631 #endif /* _FSCK_FSCK_H */