4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #pragma ident "%Z%%M% %I% %E% SMI"
29 * Keep track of duplicate fragment references (elsewhere called
30 * blocks for ancient historical reasons).
32 * The duplicates are kept in a binary tree to attempt to minimize
33 * search times when checking the block lists of all active inodes
34 * for multiple uses. This is opposed to using a simple linear list
35 * that is traversed for every block, as is used in the traditional
36 * fsck. It can be very time-expensive if there's more than just a
37 * very few duplicates, and typically there are either none or lots.
39 * For each multiply-claimed fragment, we note all of the claiming
40 * inodes and their corresponding logical block numbers. This allows
41 * reporting exactly which parts of which files were damaged, which
42 * provides at least a chance of recovering the bulk of the data on
43 * a seriously-corrupted filesystem.
50 #include <sys/fs/ufs_fsdir.h> /* for struct direct */
52 #include <sys/debug.h>
55 #define OFFSETOF(type, elt) ((size_t)(&((type *)NULL)->elt))
58 * For each physical fragment with multiple claimants, the specifics
59 * of each claim are recorded. This means there are N+1 AVL trees in
60 * use: one for each fragment's claimant table, plus one that orders
61 * the fragments themselves.
63 * The table of fragments simply has the physical fragment number
64 * (pfn) and has the root of the tree of the associated claimants. It
65 * is keyed by the pfn and called dup_frags.
67 * The subsidiary trees list inodes and logical fragment number (lfn)
68 * for each claimant. They are keyed first by inode number and then
69 * by lfn. Both are needed, as it is possible for one inode to have
70 * multiple claims on the same fragment.
73 typedef struct claimant
{
79 typedef struct fragment
{
81 avl_tree_t fr_claimants
;
85 typedef struct reference
{
91 typedef struct inode_dup
{
93 avl_tree_t id_fragments
;
97 static avl_tree_t dup_frags
;
99 static void free_invert_frags(avl_tree_t
*);
100 static void report_dup_lfn_pfn(daddr32_t
, daddr32_t
, daddr32_t
, daddr32_t
);
101 static inode_dup_t
*new_inode_dup(fsck_ino_t
);
102 static void invert_frags(avl_tree_t
*, avl_tree_t
*);
103 static void report_inode_dups(inode_dup_t
*);
104 static int by_ino_cmp(const void *, const void *);
105 static int by_lfn_cmp(const void *, const void *);
106 static claimant_t
*alloc_claimant(fsck_ino_t
, daddr32_t
);
107 static fragment_t
*alloc_dup(daddr32_t
);
108 static int claimant_cmp(const void *, const void *);
109 static int fragment_cmp(const void *, const void *);
110 static int decrement_claimant(fragment_t
*, fsck_ino_t
, daddr32_t
);
111 static int increment_claimant(fragment_t
*, fsck_ino_t
, daddr32_t
);
114 * Simple accessor function for the outside world so only we need to
115 * see and interpret our data structures.
120 return (avl_numnodes(&dup_frags
) > 0);
124 * Locates, creates, and deletes a record of a duplicate reference.
126 * For DB_INCR, returns true if the dup was added to the tree.
127 * For DB_DECR, returns true if the dup was in the tree.
130 find_dup_ref(daddr32_t fragno
, fsck_ino_t ino
, daddr32_t lfn
, int flags
)
138 if (avl_first(&dup_frags
) == NULL
) {
139 if (flags
& DB_CREATE
)
140 avl_create(&dup_frags
, fragment_cmp
,
142 OFFSETOF(fragment_t
, fr_avl
));
148 dup
= avl_find(&dup_frags
, (void *)&key
, &where
);
149 if ((dup
== NULL
) & (flags
& DB_CREATE
)) {
150 dup
= alloc_dup(fragno
);
151 avl_insert(&dup_frags
, (void *)dup
, where
);
155 if (flags
& DB_INCR
) {
158 "adding claim by ino %d as lfn %d\n",
160 added
= increment_claimant(dup
, ino
, lfn
);
161 } else if (flags
& DB_DECR
) {
163 * Note that dup may be invalidated by this call.
165 removed
= decrement_claimant(dup
, ino
, lfn
);
168 "check for claimant ino %d lfn %d returned %d\n",
173 return (added
|| removed
|| (dup
!= NULL
));
177 * Dump the duplicates table in a relatively user-friendly form.
178 * The idea is that the output can be useful when trying to manually
179 * work out which block belongs to which of the claiming inodes.
181 * What we have is a tree of duplicates indexed by physical
182 * fragment number. What we want to report is:
185 * Logical Offset 0x%08llx, Physical Fragment %d
186 * Logical Offsets 0x%08llx - 0x%08llx, Physical Fragments %d - %d
189 * Logical Offsets 0x%08llx - 0x%08llx, Physical Fragments %d - %d
193 report_dups(int quiet
)
198 avl_tree_t inode_frags
;
203 * Figure out how many actual dups are still around.
204 * This tells us whether or not we can mark the
207 dup
= avl_first(&dup_frags
);
208 while (dup
!= NULL
) {
209 if (avl_numnodes(&dup
->fr_claimants
) > 1) {
213 dup
= AVL_NEXT(&dup_frags
, dup
);
217 * Now report on every object that still exists that
218 * had *any* dups associated with it.
221 (void) puts("\nSome blocks that were found to be in "
222 "multiple files are still\nassigned to "
223 "file(s).\nFragments sorted by inode and "
226 invert_frags(&dup_frags
, &inode_frags
);
227 inode
= avl_first(&inode_frags
);
228 while (inode
!= NULL
) {
229 report_inode_dups(inode
);
230 inode
= AVL_NEXT(&inode_frags
, inode
);
234 free_invert_frags(&inode_frags
);
241 report_inode_dups(inode_dup_t
*inode
)
244 daddr32_t first_lfn
, last_lfn
, first_pfn
, last_pfn
;
246 (void) printf("Inode %d:\n", inode
->id_ino
);
247 dup
= avl_first(&inode
->id_fragments
);
248 first_lfn
= last_lfn
= dup
->ref_lfn
;
249 first_pfn
= last_pfn
= dup
->ref_pfn
;
250 while ((dup
= AVL_NEXT(&inode
->id_fragments
, dup
)) != NULL
) {
251 if (((last_lfn
+ 1) != dup
->ref_lfn
) ||
252 ((last_pfn
+ 1) != dup
->ref_pfn
)) {
253 report_dup_lfn_pfn(first_lfn
, last_lfn
,
254 first_pfn
, last_pfn
);
255 first_lfn
= last_lfn
= dup
->ref_lfn
;
256 first_pfn
= last_pfn
= dup
->ref_pfn
;
259 report_dup_lfn_pfn(first_lfn
, last_lfn
, first_pfn
, last_pfn
);
263 report_dup_lfn_pfn(daddr32_t first_lfn
, daddr32_t last_lfn
,
264 daddr32_t first_pfn
, daddr32_t last_pfn
)
266 if ((first_lfn
== last_lfn
) && (first_pfn
== last_pfn
)) {
268 " Logical Offset 0x%08llx Physical Fragment %d\n",
269 (longlong_t
)first_lfn
* sblock
.fs_fsize
, first_pfn
);
272 " Logical Offsets 0x%08llx - 0x%08llx, "
273 "Physical Fragments %d - %d\n",
274 (longlong_t
)first_lfn
* sblock
.fs_fsize
,
275 (longlong_t
)last_lfn
* sblock
.fs_fsize
,
276 first_pfn
, last_pfn
);
281 * Given a tree of fragment_ts, each element of which has an integral
282 * sub-tree of claimant_ts, produce a tree of inode_dup_ts, each element
283 * of which has an integral sub-tree of reference_ts.
286 invert_frags(avl_tree_t
*source
, avl_tree_t
*target
)
288 fragment_t
*src_frag
;
289 claimant_t
*src_claim
;
290 inode_dup_t
*tgt_inode
;
291 inode_dup_t tgt_inode_key
;
292 reference_t
*tgt_ref
;
293 reference_t tgt_ref_key
;
296 avl_create(target
, by_ino_cmp
, sizeof (inode_dup_t
),
297 OFFSETOF(inode_dup_t
, id_avl
));
299 src_frag
= avl_first(source
);
300 while (src_frag
!= NULL
) {
301 src_claim
= avl_first(&src_frag
->fr_claimants
);
302 while (src_claim
!= NULL
) {
304 * Have we seen this inode before?
306 tgt_inode_key
.id_ino
= src_claim
->cl_inode
;
307 tgt_inode
= avl_find(target
, (void *)&tgt_inode_key
,
309 if (tgt_inode
== NULL
) {
311 * No, so set up a record for it.
313 tgt_inode
= new_inode_dup(src_claim
->cl_inode
);
314 avl_insert(target
, (void *)tgt_inode
, where
);
317 * Now, how about this logical fragment? In
318 * theory, we should never see a duplicate, since
319 * a given lfn only exists once for a given inode.
320 * As such, we ignore duplicate hits.
322 tgt_ref_key
.ref_lfn
= src_claim
->cl_lfn
;
323 tgt_ref
= avl_find(&tgt_inode
->id_fragments
,
324 (void *)&tgt_ref_key
, &where
);
325 if (tgt_ref
== NULL
) {
327 * Haven't seen it, add it.
329 tgt_ref
= (reference_t
*)malloc(
330 sizeof (reference_t
));
332 errexit("Out of memory in "
334 tgt_ref
->ref_lfn
= src_claim
->cl_lfn
;
335 tgt_ref
->ref_pfn
= src_frag
->fr_pfn
;
336 avl_insert(&tgt_inode
->id_fragments
,
337 (void *)tgt_ref
, where
);
339 src_claim
= AVL_NEXT(&src_frag
->fr_claimants
,
342 src_frag
= AVL_NEXT(source
, src_frag
);
347 * Discard memory associated with the inverted fragments tree created
348 * by report_dups() via invert_frags().
351 free_invert_frags(avl_tree_t
*tree
)
353 void *outer
= NULL
; /* traversal cookie */
354 void *inner
; /* traversal cookie */
355 inode_dup_t
*inode_dup
;
356 reference_t
*ref_dup
;
358 while ((inode_dup
= avl_destroy_nodes(tree
, &outer
)) != NULL
) {
360 while ((ref_dup
= avl_destroy_nodes(&inode_dup
->id_fragments
,
362 free((void *)ref_dup
);
364 avl_destroy(&inode_dup
->id_fragments
);
365 free((void *)inode_dup
);
371 * Discard all memory allocations associated with the current duplicates
377 void *dup_cookie
= NULL
;
382 while ((fragv
= avl_destroy_nodes(&dup_frags
, &dup_cookie
)) != NULL
) {
384 while ((claimv
= avl_destroy_nodes(&fragv
->fr_claimants
,
385 &claim_cookie
)) != NULL
) {
386 free((void *)claimv
);
388 avl_destroy(&fragv
->fr_claimants
);
391 avl_destroy(&dup_frags
);
395 * If the given claimant has not been seen before, add it to DUP's
396 * list of them. It's not fatal for the same PFN/INODE/LFN to get
397 * added twice, because pass1b() will add the same dups that pass1()
401 increment_claimant(fragment_t
*dup
, fsck_ino_t ino
, daddr32_t lfn
)
404 claimant_t
*claimant
;
410 claimant
= avl_find(&dup
->fr_claimants
, &key
, &where
);
411 if (claimant
== NULL
) {
413 (void) printf("inserting claimant\n");
414 claimant
= alloc_claimant(ino
, lfn
);
415 avl_insert(&dup
->fr_claimants
, (void *)claimant
, where
);
416 statemap
[ino
] |= INCLEAR
;
418 * If the inode is to be cleared and has zero links then remove
419 * the zero link bit as it will be cleared anyway. If INZLINK
420 * is being removed and it's a directory inode then add the
421 * inode to the orphan directory list.
423 if (statemap
[ino
] & INZLINK
) {
424 statemap
[ino
] &= ~INZLINK
;
425 if (statemap
[ino
] & DSTATE
) {
436 * If the given claimant is on DUP's list, remove it. It is not
437 * an error for the claimant to not be on the list.
440 decrement_claimant(fragment_t
*dup
, fsck_ino_t ino
, daddr32_t lfn
)
443 claimant_t
*claimant
;
449 claimant
= avl_find(&dup
->fr_claimants
, &key
, &where
);
450 if (claimant
!= NULL
) {
451 avl_remove(&dup
->fr_claimants
, claimant
);
452 if (avl_numnodes(&dup
->fr_claimants
) == 0) {
453 avl_destroy(&dup
->fr_claimants
);
454 avl_remove(&dup_frags
, (void *)dup
);
465 alloc_claimant(fsck_ino_t inode
, daddr32_t lfn
)
467 claimant_t
*new = (claimant_t
*)malloc(sizeof (claimant_t
));
470 errexit("Out of memory in alloc_claimant()\n");
472 new->cl_inode
= inode
;
479 alloc_dup(daddr32_t pfn
)
481 fragment_t
*new = (fragment_t
*)malloc(sizeof (fragment_t
));
484 errexit("Out of memory in alloc_dup()\n");
487 avl_create(&new->fr_claimants
, claimant_cmp
, sizeof (fragment_t
),
488 OFFSETOF(claimant_t
, cl_avl
));
494 * Compare two fragment_t instances for avl_find(). It requires a
495 * return value of -1/0/1, so we can't just hand back left - right.
498 fragment_cmp(const void *vlp
, const void *vrp
)
500 const fragment_t
*lp
= (const fragment_t
*)vlp
;
501 const fragment_t
*rp
= (const fragment_t
*)vrp
;
502 int cmp
= lp
->fr_pfn
- rp
->fr_pfn
;
513 * Compare two claimant_t instances for avl_find(). It requires a
514 * return value of -1/0/1, so we can't just hand back left - right.
517 claimant_cmp(const void *vlp
, const void *vrp
)
519 const claimant_t
*lp
= (const claimant_t
*)vlp
;
520 const claimant_t
*rp
= (const claimant_t
*)vrp
;
523 cmp
= lp
->cl_inode
- rp
->cl_inode
;
526 * lfn < 0 is a wildcard lfn match.
528 if ((lp
->cl_lfn
>= 0) && (rp
->cl_lfn
>= 0))
529 cmp
= lp
->cl_lfn
- rp
->cl_lfn
;
541 by_ino_cmp(const void *vlp
, const void *vrp
)
543 const inode_dup_t
*lp
= (const inode_dup_t
*)vlp
;
544 const inode_dup_t
*rp
= (const inode_dup_t
*)vrp
;
547 cmp
= lp
->id_ino
- rp
->id_ino
;
558 by_lfn_cmp(const void *vlp
, const void *vrp
)
560 const reference_t
*lp
= (const reference_t
*)vlp
;
561 const reference_t
*rp
= (const reference_t
*)vrp
;
564 cmp
= lp
->ref_lfn
- rp
->ref_lfn
;
575 new_inode_dup(fsck_ino_t inode
)
579 new = (inode_dup_t
*)malloc(sizeof (inode_dup_t
));
581 errexit("Out of memory in new_inode_dup\n");
583 avl_create(&new->id_fragments
, by_lfn_cmp
, sizeof (reference_t
),
584 OFFSETOF(reference_t
, ref_avl
));