8322 nl: misleading-indentation
[unleashed/tickless.git] / usr / src / cmd / fs.d / ufs / fsck / pass1.c
blobe4bbc0e1e25427ea8d291245496c0cf2f7e51313
1 /*
2 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
3 * Use is subject to license terms.
4 */
6 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
7 /* All Rights Reserved */
9 /*
10 * Copyright (c) 1980, 1986, 1990 The Regents of the University of California.
11 * All rights reserved.
13 * Redistribution and use in source and binary forms are permitted
14 * provided that: (1) source distributions retain this entire copyright
15 * notice and comment, and (2) distributions including binaries display
16 * the following acknowledgement: ``This product includes software
17 * developed by the University of California, Berkeley and its contributors''
18 * in the documentation or other materials provided with the distribution
19 * and in all advertising materials mentioning features or use of this
20 * software. Neither the name of the University nor the names of its
21 * contributors may be used to endorse or promote products derived
22 * from this software without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
24 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
25 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
28 #pragma ident "%Z%%M% %I% %E% SMI"
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <unistd.h>
33 #include <string.h>
34 #include <sys/param.h>
35 #include <sys/types.h>
36 #include <sys/mntent.h>
37 #include <sys/fs/ufs_fs.h>
38 #include <sys/vnode.h>
39 #define _KERNEL
40 #include <sys/fs/ufs_fsdir.h>
41 #undef _KERNEL
42 #include <sys/fs/ufs_inode.h>
43 #include "fsck.h"
46 * for each large file (size > MAXOFF_T), the global largefile_count
47 * gets incremented during this pass.
50 static uint32_t badblk; /* number seen for the current inode */
51 static uint32_t dupblk; /* number seen for the current inode */
53 static void clear_attr_acl(fsck_ino_t, fsck_ino_t, char *);
54 static void verify_inode(fsck_ino_t, struct inodesc *, fsck_ino_t);
55 static void check_dirholes(fsck_ino_t, struct inodesc *);
56 static void collapse_dirhole(fsck_ino_t, struct inodesc *);
57 static void note_used(daddr32_t);
59 void
60 pass1(void)
62 uint_t c, i;
63 daddr32_t cgd;
64 struct inodesc idesc;
65 fsck_ino_t inumber;
66 fsck_ino_t maxinumber;
69 * Set file system reserved blocks in used block map.
71 for (c = 0; c < sblock.fs_ncg; c++) {
72 cgd = cgdmin(&sblock, c);
73 if (c == 0) {
75 * Doing the first cylinder group, account for
76 * the cg summaries as well.
78 i = cgbase(&sblock, c);
79 cgd += howmany(sblock.fs_cssize, sblock.fs_fsize);
80 } else {
81 i = cgsblock(&sblock, c);
83 for (; i < cgd; i++) {
84 note_used(i);
88 * Note blocks being used by the log, so we don't declare
89 * them as available and some time in the future we get a
90 * freeing free block panic.
92 if (islog && islogok && sblock.fs_logbno)
93 examinelog(&note_used);
96 * Find all allocated blocks. This must be completed before
97 * we read the contents of any directories, as dirscan() et al
98 * don't want to know about block allocation holes. So, part
99 * of this pass is to truncate any directories with holes to
100 * just before those holes, so dirscan() can remain blissfully
101 * ignorant.
103 inumber = 0;
104 n_files = n_blks = 0;
105 resetinodebuf();
106 maxinumber = sblock.fs_ncg * sblock.fs_ipg;
107 for (c = 0; c < sblock.fs_ncg; c++) {
108 for (i = 0; i < sblock.fs_ipg; i++, inumber++) {
109 if (inumber < UFSROOTINO)
110 continue;
111 init_inodesc(&idesc);
112 idesc.id_type = ADDR;
113 idesc.id_func = pass1check;
114 verify_inode(inumber, &idesc, maxinumber);
117 freeinodebuf();
121 * Perform checks on an inode and setup/track the state of the inode
122 * in maps (statemap[], lncntp[]) for future reference and validation.
123 * Initiate the calls to ckinode and in turn pass1check() to handle
124 * further validation.
126 static void
127 verify_inode(fsck_ino_t inumber, struct inodesc *idesc, fsck_ino_t maxinumber)
129 int j, clear, flags;
130 int isdir;
131 char *err;
132 fsck_ino_t shadow, attrinode;
133 daddr32_t ndb;
134 struct dinode *dp;
135 struct inoinfo *iip;
137 dp = getnextinode(inumber);
138 if ((dp->di_mode & IFMT) == 0) {
139 /* mode and type of file is not set */
140 if ((memcmp((void *)dp->di_db, (void *)zino.di_db,
141 NDADDR * sizeof (daddr32_t)) != 0) ||
142 (memcmp((void *)dp->di_ib, (void *)zino.di_ib,
143 NIADDR * sizeof (daddr32_t)) != 0) ||
144 (dp->di_mode != 0) || (dp->di_size != 0)) {
145 pfatal("PARTIALLY ALLOCATED INODE I=%u", inumber);
146 if (reply("CLEAR") == 1) {
147 dp = ginode(inumber);
148 clearinode(dp);
149 inodirty();
150 } else {
151 iscorrupt = 1;
154 statemap[inumber] = USTATE;
155 return;
158 isdir = ((dp->di_mode & IFMT) == IFDIR) ||
159 ((dp->di_mode & IFMT) == IFATTRDIR);
161 lastino = inumber;
162 if (dp->di_size > (u_offset_t)UFS_MAXOFFSET_T) {
163 pfatal("NEGATIVE SIZE %lld I=%d",
164 (longlong_t)dp->di_size, inumber);
165 goto bogus;
169 * A more precise test of the type is done later on. Just get
170 * rid of the blatantly-wrong ones before we do any
171 * significant work.
173 if ((dp->di_mode & IFMT) == IFMT) {
174 pfatal("BAD MODE 0%o I=%d",
175 dp->di_mode & IFMT, inumber);
176 if (reply("BAD MODE: MAKE IT A FILE") == 1) {
177 statemap[inumber] = FSTATE;
178 dp = ginode(inumber);
179 dp->di_mode = IFREG | 0600;
180 inodirty();
181 truncino(inumber, sblock.fs_fsize, TI_NOPARENT);
182 dp = getnextrefresh();
183 } else {
184 iscorrupt = 1;
188 ndb = howmany(dp->di_size, (u_offset_t)sblock.fs_bsize);
189 if (ndb < 0) {
190 /* extra space to distinguish from previous pfatal() */
191 pfatal("NEGATIVE SIZE %lld I=%d",
192 (longlong_t)dp->di_size, inumber);
193 goto bogus;
196 if ((dp->di_mode & IFMT) == IFBLK ||
197 (dp->di_mode & IFMT) == IFCHR) {
198 if (dp->di_size != 0) {
199 pfatal("SPECIAL FILE WITH NON-ZERO LENGTH %lld I=%d",
200 (longlong_t)dp->di_size, inumber);
201 goto bogus;
204 for (j = 0; j < NDADDR; j++) {
206 * It's a device, so all the block pointers
207 * should be zero except for di_ordev.
208 * di_ordev is overlayed on the block array,
209 * but where varies between big and little
210 * endian, so make sure that the only non-zero
211 * element is the correct one. There can be
212 * a device whose ordev is zero, so we can't
213 * check for the reverse.
215 if (dp->di_db[j] != 0 &&
216 &dp->di_db[j] != &dp->di_ordev) {
217 if (debug) {
218 (void) printf(
219 "spec file di_db[%d] has %d\n",
220 j, dp->di_db[j]);
222 pfatal(
223 "SPECIAL FILE WITH NON-ZERO FRAGMENT LIST I=%d",
224 inumber);
225 goto bogus;
229 for (j = 0; j < NIADDR; j++) {
230 if (dp->di_ib[j] != 0) {
231 if (debug)
232 (void) printf(
233 "special has %d at ib[%d]\n",
234 dp->di_ib[j], j);
235 pfatal(
236 "SPECIAL FILE WITH NON-ZERO FRAGMENT LIST I=%d",
237 inumber);
238 goto bogus;
241 } else {
243 * This assignment is mostly here to appease lint, but
244 * doesn't hurt.
246 err = "Internal error: unexpected variant of having "
247 "blocks past end of file I=%d";
249 clear = 0;
252 * If it's not a device, it has to follow the
253 * rules for files. In particular, no blocks after
254 * the last one that di_size says is in use.
256 for (j = ndb; j < NDADDR; j++) {
257 if (dp->di_db[j] != 0) {
258 if (debug) {
259 (void) printf("bad file direct "
260 "addr[%d]: block 0x%x "
261 "format: 0%o\n",
262 j, dp->di_db[j],
263 dp->di_mode & IFMT);
265 err = "FILE WITH FRAGMENTS PAST END I=%d";
266 clear = 1;
267 break;
272 * Find last indirect pointer that should be in use,
273 * and make sure any after it are clear.
275 if (!clear) {
276 for (j = 0, ndb -= NDADDR; ndb > 0; j++) {
277 ndb /= NINDIR(&sblock);
279 for (; j < NIADDR; j++) {
280 if (dp->di_ib[j] != 0) {
281 if (debug) {
282 (void) printf("bad file "
283 "indirect addr: block %d\n",
284 dp->di_ib[j]);
286 err =
287 "FILE WITH FRAGMENTS PAST END I=%d";
288 clear = 2;
289 break;
294 if (clear) {
296 * The discarded blocks will be garbage-
297 * collected in pass5. If we're told not to
298 * discard them, it's just lost blocks, which
299 * isn't worth setting iscorrupt for.
301 pwarn(err, inumber);
302 if (preen || reply("DISCARD EXCESS FRAGMENTS") == 1) {
303 dp = ginode(inumber);
304 if (clear == 1) {
305 for (; j < NDADDR; j++)
306 dp->di_db[j] = 0;
307 j = 0;
309 for (; j < NIADDR; j++)
310 dp->di_ib[j] = 0;
311 inodirty();
312 dp = getnextrefresh();
313 if (preen)
314 (void) printf(" (TRUNCATED)");
319 if (ftypeok(dp) == 0) {
320 pfatal("UNKNOWN FILE TYPE 0%o I=%d", dp->di_mode, inumber);
321 goto bogus;
323 n_files++;
324 TRACK_LNCNTP(inumber, lncntp[inumber] = dp->di_nlink);
327 * We can't do anything about it right now, so note that its
328 * processing is being delayed. Otherwise, we'd be changing
329 * the block allocations out from under ourselves, which causes
330 * no end of confusion.
332 flags = statemap[inumber] & INDELAYD;
335 * if errorlocked or logging, then open deleted files will
336 * manifest as di_nlink <= 0 and di_mode != 0
337 * so skip them; they're ok.
338 * Also skip anything already marked to be cleared.
340 if (dp->di_nlink <= 0 &&
341 !((errorlocked || islog) && dp->di_mode == 0) &&
342 !(flags & INCLEAR)) {
343 flags |= INZLINK;
344 if (debug)
345 (void) printf(
346 "marking i=%d INZLINK; nlink %d, mode 0%o, islog %d\n",
347 inumber, dp->di_nlink, dp->di_mode, islog);
350 switch (dp->di_mode & IFMT) {
351 case IFDIR:
352 case IFATTRDIR:
353 if (dp->di_size == 0) {
355 * INCLEAR means it will be ignored by passes 2 & 3.
357 if ((dp->di_mode & IFMT) == IFDIR)
358 (void) printf("ZERO-LENGTH DIR I=%d\n",
359 inumber);
360 else
361 (void) printf("ZERO-LENGTH ATTRDIR I=%d\n",
362 inumber);
363 add_orphan_dir(inumber);
364 flags |= INCLEAR;
365 flags &= ~INZLINK; /* It will be cleared anyway */
367 statemap[inumber] = DSTATE | flags;
368 cacheino(dp, inumber);
369 countdirs++;
370 break;
372 case IFSHAD:
373 if (dp->di_size == 0) {
374 (void) printf("ZERO-LENGTH SHADOW I=%d\n", inumber);
375 flags |= INCLEAR;
376 flags &= ~INZLINK; /* It will be cleared anyway */
378 statemap[inumber] = SSTATE | flags;
379 cacheacl(dp, inumber);
380 break;
382 default:
383 statemap[inumber] = FSTATE | flags;
386 badblk = 0;
387 dupblk = 0;
388 idesc->id_number = inumber;
389 idesc->id_fix = DONTKNOW;
390 if (dp->di_size > (u_offset_t)MAXOFF_T) {
391 largefile_count++;
394 (void) ckinode(dp, idesc, CKI_TRAVERSE);
395 if (isdir && (idesc->id_firsthole >= 0))
396 check_dirholes(inumber, idesc);
398 if (dp->di_blocks != idesc->id_entryno) {
400 * The kernel releases any blocks it finds in the lists,
401 * ignoring the block count itself. So, a bad count is
402 * not grounds for setting iscorrupt.
404 pwarn("INCORRECT DISK BLOCK COUNT I=%u (%d should be %d)",
405 inumber, (uint32_t)dp->di_blocks, idesc->id_entryno);
406 if (!preen && (reply("CORRECT") == 0))
407 return;
408 dp = ginode(inumber);
409 dp->di_blocks = idesc->id_entryno;
410 iip = getinoinfo(inumber);
411 if (iip != NULL)
412 iip->i_isize = dp->di_size;
413 inodirty();
414 if (preen)
415 (void) printf(" (CORRECTED)\n");
417 if (isdir && (dp->di_blocks == 0)) {
419 * INCLEAR will cause passes 2 and 3 to skip it.
421 (void) printf("DIR WITH ZERO BLOCKS I=%d\n", inumber);
422 statemap[inumber] = DCLEAR;
423 add_orphan_dir(inumber);
427 * Check that the ACL is on a valid file type
429 shadow = dp->di_shadow;
430 if (shadow != 0) {
431 if (acltypeok(dp) == 0) {
432 clear_attr_acl(inumber, -1,
433 "NON-ZERO ACL REFERENCE, I=%d\n");
434 } else if ((shadow <= UFSROOTINO) ||
435 (shadow > maxinumber)) {
436 clear_attr_acl(inumber, -1,
437 "BAD ACL REFERENCE I=%d\n");
438 } else {
439 registershadowclient(shadow,
440 inumber, &shadowclientinfo);
444 attrinode = dp->di_oeftflag;
445 if (attrinode != 0) {
446 if ((attrinode <= UFSROOTINO) ||
447 (attrinode > maxinumber)) {
448 clear_attr_acl(attrinode, inumber,
449 "BAD ATTRIBUTE REFERENCE TO I=%d FROM I=%d\n");
450 } else {
451 dp = ginode(attrinode);
452 if ((dp->di_mode & IFMT) != IFATTRDIR) {
453 clear_attr_acl(attrinode, inumber,
454 "BAD ATTRIBUTE DIR REF TO I=%d FROM I=%d\n");
455 } else if (dp->di_size == 0) {
456 clear_attr_acl(attrinode, inumber,
457 "REFERENCE TO ZERO-LENGTH ATTRIBUTE DIR I=%d from I=%d\n");
458 } else {
459 registershadowclient(attrinode, inumber,
460 &attrclientinfo);
464 return;
467 * If we got here, we've not had the chance to see if a
468 * directory has holes, but we know the directory's bad,
469 * so it's safe to always return false (no holes found).
471 * Also, a pfatal() is always done before jumping here, so
472 * we know we're not in preen mode.
474 bogus:
475 if (isdir) {
477 * INCLEAR makes passes 2 & 3 skip it.
479 statemap[inumber] = DCLEAR;
480 add_orphan_dir(inumber);
481 cacheino(dp, inumber);
482 } else {
483 statemap[inumber] = FCLEAR;
485 if (reply("CLEAR") == 1) {
486 (void) tdelete((void *)inumber, &limbo_dirs, ino_t_cmp);
487 freeino(inumber, TI_PARENT);
488 inodirty();
489 } else {
490 iscorrupt = 1;
495 * Do fixup for bad acl/attr references. If PARENT is -1, then
496 * we assume we're working on a shadow, otherwise an extended attribute.
497 * FMT must be a printf format string, with one %d directive for
498 * the inode number.
500 static void
501 clear_attr_acl(fsck_ino_t inumber, fsck_ino_t parent, char *fmt)
503 fsck_ino_t victim = inumber;
504 struct dinode *dp;
506 if (parent != -1)
507 victim = parent;
509 if (fmt != NULL) {
510 if (parent == -1)
511 pwarn(fmt, (int)inumber);
512 else
513 pwarn(fmt, (int)inumber, (int)parent);
516 if (debug)
517 (void) printf("parent file/dir I=%d\nvictim I=%d",
518 (int)parent, (int)victim);
520 if (!preen && (reply("REMOVE REFERENCE") == 0)) {
521 iscorrupt = 1;
522 return;
525 dp = ginode(victim);
526 if (parent == -1) {
528 * The file had a bad shadow/acl, so lock it down
529 * until someone can protect it the way they need it
530 * to be (i.e., be conservatively paranoid).
532 dp->di_shadow = 0;
533 dp->di_mode &= IFMT;
534 } else {
535 dp->di_oeftflag = 0;
538 inodirty();
539 if (preen)
540 (void) printf(" (CORRECTED)\n");
544 * Check if we have holes in the directory's indirect
545 * blocks. If there are, get rid of everything after
546 * the first hole.
548 static void
549 check_dirholes(fsck_ino_t inumber, struct inodesc *idesc)
551 char pathbuf[MAXPATHLEN + 1];
553 getpathname(pathbuf, idesc->id_number, idesc->id_number);
554 pfatal("I=%d DIRECTORY %s: CONTAINS EMPTY BLOCKS",
555 idesc->id_number, pathbuf);
556 if (reply("TRUNCATE AT FIRST EMPTY BLOCK") == 1) {
558 * We found a hole, so get rid of it.
560 collapse_dirhole(inumber, idesc);
562 if (preen)
563 (void) printf(" (TRUNCATED)\n");
564 } else {
565 iscorrupt = 1;
570 * Truncate a directory to its first hole. If there are non-holes
571 * in the direct blocks after the problem block, move them down so
572 * that there's somewhat less lossage. Doing this for indirect blocks
573 * is left as an exercise for the reader.
575 static void
576 collapse_dirhole(fsck_ino_t inumber, struct inodesc *idesc)
578 offset_t new_size;
579 int blocks;
581 if (idesc->id_firsthole < 0) {
582 return;
586 * Since truncino() adjusts the size, we don't need to do that here,
587 * but we have to tell it what final size we want.
589 * We need to count from block zero up through the last block
590 * before the hole. If the hole is in the indirect blocks, chop at
591 * the start of the nearest level of indirection. Orphans will
592 * get reconnected, so we're not actually losing anything by doing
593 * it this way, and we're simplifying truncation significantly.
595 new_size = idesc->id_firsthole * (offset_t)sblock.fs_bsize;
596 blocks = howmany(new_size, sblock.fs_bsize);
597 if (blocks > NDADDR) {
598 if (blocks < (NDADDR + NINDIR(&sblock)))
599 blocks = NDADDR;
600 else if (blocks < (NDADDR + NINDIR(&sblock) +
601 (NINDIR(&sblock) * NINDIR(&sblock))))
602 blocks = NDADDR + NINDIR(&sblock);
603 else
604 blocks = NDADDR + NINDIR(&sblock) +
605 (NINDIR(&sblock) * NINDIR(&sblock));
606 new_size = blocks * sblock.fs_bsize;
607 if (debug)
608 (void) printf("to %lld (blocks %d)\n",
609 (longlong_t)new_size, blocks);
611 truncino(inumber, new_size, TI_NOPARENT);
614 * Technically, there are still the original number of fragments
615 * associated with the object. However, that number is not used
616 * to control anything, so we can do the in-memory truncation of
617 * it without bad things happening.
619 idesc->id_entryno = btodb(new_size);
623 pass1check(struct inodesc *idesc)
625 int res = KEEPON;
626 int anyout;
627 int nfrags;
628 daddr32_t lbn;
629 daddr32_t fragno = idesc->id_blkno;
630 struct dinode *dp;
633 * If this is a fallocate'd file, block numbers may be stored
634 * as negative. In that case negate the negative numbers.
636 dp = ginode(idesc->id_number);
637 if (dp->di_cflags & IFALLOCATE && fragno < 0)
638 fragno = -fragno;
640 if ((anyout = chkrange(fragno, idesc->id_numfrags)) != 0) {
642 * Note that blkerror() exits when preening.
644 blkerror(idesc->id_number, "OUT OF RANGE",
645 fragno, idesc->id_lbn * sblock.fs_frag);
647 dp = ginode(idesc->id_number);
648 if ((((dp->di_mode & IFMT) == IFDIR) ||
649 ((dp->di_mode & IFMT) == IFATTRDIR)) &&
650 (idesc->id_firsthole < 0)) {
651 idesc->id_firsthole = idesc->id_lbn;
654 if (++badblk >= MAXBAD) {
655 pwarn("EXCESSIVE BAD FRAGMENTS I=%u",
656 idesc->id_number);
657 if (reply("CONTINUE") == 0)
658 errexit("Program terminated.");
660 * See discussion below as to why we don't
661 * want to short-circuit the processing of
662 * this inode. However, we know that this
663 * particular block is bad, so we don't need
664 * to go through the dup check loop.
666 return (SKIP | STOP);
671 * For each fragment, verify that it is a legal one (either
672 * by having already found the entire run to be legal, or by
673 * individual inspection), and if it is legal, see if we've
674 * seen it before or not. If we haven't, note that we've seen
675 * it and continue on. If we have (our in-core bitmap shows
676 * it as already being busy), then this must be a duplicate
677 * allocation. Whine and moan accordingly.
679 * Note that for full-block allocations, this will produce
680 * a complaint for each fragment making up the block (i.e.,
681 * fs_frags' worth). Among other things, this could be
682 * considered artificially inflating the dup-block count.
683 * However, since it is possible that one file has a full
684 * fs block allocated, but another is only claiming a frag
685 * or two out of the middle, we'll just live it.
687 for (nfrags = 0; nfrags < idesc->id_numfrags; fragno++, nfrags++) {
688 if (anyout && chkrange(fragno, 1)) {
689 /* bad fragment number */
690 res = SKIP;
691 } else if (!testbmap(fragno)) {
692 /* no other claims seen as yet */
693 note_used(fragno);
694 } else {
696 * We have a duplicate claim for the same fragment.
698 * blkerror() exits when preening.
700 * We want to report all the dups up until
701 * hitting MAXDUP. Fortunately, blkerror()'s
702 * side-effects on statemap[] are idempotent,
703 * so the ``extra'' calls are harmless.
705 lbn = idesc->id_lbn * sblock.fs_frag + nfrags;
706 if (dupblk < MAXDUP)
707 blkerror(idesc->id_number, "DUP", fragno, lbn);
710 * Use ==, so we only complain once, no matter
711 * how far over the limit we end up going.
713 if (++dupblk == MAXDUP) {
714 pwarn("EXCESSIVE DUPLICATE FRAGMENTS I=%u",
715 idesc->id_number);
716 if (reply("CONTINUE") == 0)
717 errexit("Program terminated.");
720 * If we stop the traversal here, then
721 * there may be more dups in the
722 * inode's block list that don't get
723 * flagged. Later, if we're told to
724 * clear one of the files claiming
725 * these blocks, but not the other, we
726 * will release blocks that are
727 * actually still in use. An additional
728 * fsck run would be necessary to undo
729 * the damage. So, instead of the
730 * traditional return (STOP) when told
731 * to continue, we really do just continue.
734 (void) find_dup_ref(fragno, idesc->id_number, lbn,
735 DB_CREATE | DB_INCR);
738 * id_entryno counts the number of disk blocks found.
740 idesc->id_entryno += btodb(sblock.fs_fsize);
742 return (res);
745 static void
746 note_used(daddr32_t frag)
748 n_blks++;
749 setbmap(frag);