1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Copyright (C) 2017-2023 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_log_format.h"
13 #include "xfs_trans.h"
14 #include "xfs_inode.h"
15 #include "xfs_icache.h"
17 #include "xfs_dir2_priv.h"
18 #include "xfs_health.h"
20 #include "xfs_parent.h"
21 #include "scrub/scrub.h"
22 #include "scrub/common.h"
23 #include "scrub/dabtree.h"
24 #include "scrub/readdir.h"
25 #include "scrub/health.h"
26 #include "scrub/repair.h"
27 #include "scrub/trace.h"
28 #include "scrub/xfile.h"
29 #include "scrub/xfarray.h"
30 #include "scrub/xfblob.h"
32 /* Set us up to scrub directories. */
39 if (xchk_could_repair(sc
)) {
40 error
= xrep_setup_directory(sc
);
45 return xchk_setup_inode_contents(sc
, 0);
50 /* Deferred directory entry that we saved for later. */
52 /* Cookie for retrieval of the dirent name. */
53 xfblob_cookie name_cookie
;
55 /* Child inode number. */
58 /* Length of the pptr name. */
65 /* information for parent pointer validation. */
66 struct xfs_parent_rec pptr_rec
;
67 struct xfs_da_args pptr_args
;
69 /* Fixed-size array of xchk_dirent structures. */
70 struct xfarray
*dir_entries
;
72 /* Blobs containing dirent names. */
73 struct xfblob
*dir_names
;
75 /* If we've cycled the ILOCK, we must revalidate deferred dirents. */
78 /* Name buffer for dirent revalidation. */
79 struct xfs_name xname
;
80 uint8_t namebuf
[MAXNAMELEN
];
83 /* Scrub a directory entry. */
85 /* Check that an inode's mode matches a given XFS_DIR3_FT_* type. */
93 struct xfs_mount
*mp
= sc
->mp
;
95 if (!xfs_has_ftype(mp
)) {
96 if (ftype
!= XFS_DIR3_FT_UNKNOWN
&& ftype
!= XFS_DIR3_FT_DIR
)
97 xchk_fblock_set_corrupt(sc
, XFS_DATA_FORK
, offset
);
101 if (xfs_mode_to_ftype(VFS_I(ip
)->i_mode
) != ftype
)
102 xchk_fblock_set_corrupt(sc
, XFS_DATA_FORK
, offset
);
105 * Metadata and regular inodes cannot cross trees. This property
106 * cannot change without a full inode free and realloc cycle, so it's
107 * safe to check this without holding locks.
109 if (xfs_is_metadir_inode(ip
) != xfs_is_metadir_inode(sc
->ip
))
110 xchk_fblock_set_corrupt(sc
, XFS_DATA_FORK
, offset
);
114 * Try to lock a child file for checking parent pointers. Returns the inode
115 * flags for the locks we now hold, or zero if we failed.
119 struct xfs_scrub
*sc
,
120 struct xfs_inode
*ip
)
122 if (!xfs_ilock_nowait(ip
, XFS_IOLOCK_SHARED
))
125 if (!xfs_ilock_nowait(ip
, XFS_ILOCK_SHARED
)) {
126 xfs_iunlock(ip
, XFS_IOLOCK_SHARED
);
130 if (!xfs_inode_has_attr_fork(ip
) || !xfs_need_iread_extents(&ip
->i_af
))
131 return XFS_IOLOCK_SHARED
| XFS_ILOCK_SHARED
;
133 xfs_iunlock(ip
, XFS_ILOCK_SHARED
);
135 if (!xfs_ilock_nowait(ip
, XFS_ILOCK_EXCL
)) {
136 xfs_iunlock(ip
, XFS_IOLOCK_SHARED
);
140 return XFS_IOLOCK_SHARED
| XFS_ILOCK_EXCL
;
143 /* Check the backwards link (parent pointer) associated with this dirent. */
145 xchk_dir_parent_pointer(
147 const struct xfs_name
*name
,
148 struct xfs_inode
*ip
)
150 struct xfs_scrub
*sc
= sd
->sc
;
153 xfs_inode_to_parent_rec(&sd
->pptr_rec
, sc
->ip
);
154 error
= xfs_parent_lookup(sc
->tp
, ip
, name
, &sd
->pptr_rec
,
156 if (error
== -ENOATTR
)
157 xchk_fblock_xref_set_corrupt(sc
, XFS_DATA_FORK
, 0);
162 /* Look for a parent pointer matching this dirent, if the child isn't busy. */
164 xchk_dir_check_pptr_fast(
166 xfs_dir2_dataptr_t dapos
,
167 const struct xfs_name
*name
,
168 struct xfs_inode
*ip
)
170 struct xfs_scrub
*sc
= sd
->sc
;
171 unsigned int lockmode
;
174 /* dot and dotdot entries do not have parent pointers */
175 if (xfs_dir2_samename(name
, &xfs_name_dot
) ||
176 xfs_dir2_samename(name
, &xfs_name_dotdot
))
179 /* No self-referential non-dot or dotdot dirents. */
181 xchk_fblock_set_corrupt(sc
, XFS_DATA_FORK
, 0);
185 /* Try to lock the inode. */
186 lockmode
= xchk_dir_lock_child(sc
, ip
);
188 struct xchk_dirent save_de
= {
189 .namelen
= name
->len
,
193 /* Couldn't lock the inode, so save the dirent for later. */
194 trace_xchk_dir_defer(sc
->ip
, name
, ip
->i_ino
);
196 error
= xfblob_storename(sd
->dir_names
, &save_de
.name_cookie
,
198 if (!xchk_fblock_xref_process_error(sc
, XFS_DATA_FORK
, 0,
202 error
= xfarray_append(sd
->dir_entries
, &save_de
);
203 if (!xchk_fblock_xref_process_error(sc
, XFS_DATA_FORK
, 0,
210 error
= xchk_dir_parent_pointer(sd
, name
, ip
);
211 xfs_iunlock(ip
, lockmode
);
216 * Scrub a single directory entry.
218 * Check the inode number to make sure it's sane, then we check that we can
219 * look up this filename. Finally, we check the ftype.
223 struct xfs_scrub
*sc
,
224 struct xfs_inode
*dp
,
225 xfs_dir2_dataptr_t dapos
,
226 const struct xfs_name
*name
,
230 struct xfs_mount
*mp
= dp
->i_mount
;
231 struct xfs_inode
*ip
;
232 struct xchk_dir
*sd
= priv
;
233 xfs_ino_t lookup_ino
;
237 offset
= xfs_dir2_db_to_da(mp
->m_dir_geo
,
238 xfs_dir2_dataptr_to_db(mp
->m_dir_geo
, dapos
));
240 if (xchk_should_terminate(sc
, &error
))
243 /* Does this inode number make sense? */
244 if (!xfs_verify_dir_ino(mp
, ino
)) {
245 xchk_fblock_set_corrupt(sc
, XFS_DATA_FORK
, offset
);
249 /* Does this name make sense? */
250 if (!xfs_dir2_namecheck(name
->name
, name
->len
)) {
251 xchk_fblock_set_corrupt(sc
, XFS_DATA_FORK
, offset
);
255 if (xfs_dir2_samename(name
, &xfs_name_dot
)) {
256 /* If this is "." then check that the inum matches the dir. */
257 if (ino
!= dp
->i_ino
)
258 xchk_fblock_set_corrupt(sc
, XFS_DATA_FORK
, offset
);
259 } else if (xfs_dir2_samename(name
, &xfs_name_dotdot
)) {
261 * If this is ".." in the root inode, check that the inum
264 if (xchk_inode_is_dirtree_root(dp
) && ino
!= dp
->i_ino
)
265 xchk_fblock_set_corrupt(sc
, XFS_DATA_FORK
, offset
);
268 /* Verify that we can look up this name by hash. */
269 error
= xchk_dir_lookup(sc
, dp
, name
, &lookup_ino
);
270 /* ENOENT means the hash lookup failed and the dir is corrupt */
271 if (error
== -ENOENT
)
272 error
= -EFSCORRUPTED
;
273 if (!xchk_fblock_process_error(sc
, XFS_DATA_FORK
, offset
, &error
))
275 if (lookup_ino
!= ino
) {
276 xchk_fblock_set_corrupt(sc
, XFS_DATA_FORK
, offset
);
281 * Grab the inode pointed to by the dirent. We release the inode
282 * before we cancel the scrub transaction.
284 * If _iget returns -EINVAL or -ENOENT then the child inode number is
285 * garbage and the directory is corrupt. If the _iget returns
286 * -EFSCORRUPTED or -EFSBADCRC then the child is corrupt which is a
287 * cross referencing error. Any other error is an operational error.
289 error
= xchk_iget(sc
, ino
, &ip
);
290 if (error
== -EINVAL
|| error
== -ENOENT
) {
291 error
= -EFSCORRUPTED
;
292 xchk_fblock_process_error(sc
, XFS_DATA_FORK
, 0, &error
);
295 if (!xchk_fblock_xref_process_error(sc
, XFS_DATA_FORK
, offset
, &error
))
298 xchk_dir_check_ftype(sc
, offset
, ip
, name
->type
);
300 if (xfs_has_parent(mp
)) {
301 error
= xchk_dir_check_pptr_fast(sd
, dapos
, name
, ip
);
309 if (sc
->sm
->sm_flags
& XFS_SCRUB_OFLAG_CORRUPT
)
314 /* Scrub a directory btree record. */
317 struct xchk_da_btree
*ds
,
320 struct xfs_name dname
= { };
321 struct xfs_da_state_blk
*blk
= &ds
->state
->path
.blk
[level
];
322 struct xfs_mount
*mp
= ds
->state
->mp
;
323 struct xfs_inode
*dp
= ds
->dargs
.dp
;
324 struct xfs_da_geometry
*geo
= mp
->m_dir_geo
;
325 struct xfs_dir2_data_entry
*dent
;
327 struct xfs_dir2_leaf_entry
*ent
;
329 unsigned int iter_off
;
333 xfs_dir2_data_aoff_t off
;
334 xfs_dir2_dataptr_t ptr
;
335 xfs_dahash_t calc_hash
;
337 struct xfs_dir3_icleaf_hdr hdr
;
341 ASSERT(blk
->magic
== XFS_DIR2_LEAF1_MAGIC
||
342 blk
->magic
== XFS_DIR2_LEAFN_MAGIC
);
344 xfs_dir2_leaf_hdr_from_disk(mp
, &hdr
, blk
->bp
->b_addr
);
345 ent
= hdr
.ents
+ blk
->index
;
347 /* Check the hash of the entry. */
348 error
= xchk_da_btree_hash(ds
, level
, &ent
->hashval
);
352 /* Valid hash pointer? */
353 ptr
= be32_to_cpu(ent
->address
);
357 /* Find the directory entry's location. */
358 db
= xfs_dir2_dataptr_to_db(geo
, ptr
);
359 off
= xfs_dir2_dataptr_to_off(geo
, ptr
);
360 rec_bno
= xfs_dir2_db_to_da(geo
, db
);
362 if (rec_bno
>= geo
->leafblk
) {
363 xchk_da_set_corrupt(ds
, level
);
366 error
= xfs_dir3_data_read(ds
->dargs
.trans
, dp
, ds
->dargs
.owner
,
367 rec_bno
, XFS_DABUF_MAP_HOLE_OK
, &bp
);
368 if (!xchk_fblock_process_error(ds
->sc
, XFS_DATA_FORK
, rec_bno
,
372 xchk_fblock_set_corrupt(ds
->sc
, XFS_DATA_FORK
, rec_bno
);
375 xchk_buffer_recheck(ds
->sc
, bp
);
377 if (ds
->sc
->sm
->sm_flags
& XFS_SCRUB_OFLAG_CORRUPT
)
380 dent
= bp
->b_addr
+ off
;
382 /* Make sure we got a real directory entry. */
383 iter_off
= geo
->data_entry_offset
;
384 end
= xfs_dir3_data_end_offset(geo
, bp
->b_addr
);
386 xchk_fblock_set_corrupt(ds
->sc
, XFS_DATA_FORK
, rec_bno
);
390 struct xfs_dir2_data_entry
*dep
= bp
->b_addr
+ iter_off
;
391 struct xfs_dir2_data_unused
*dup
= bp
->b_addr
+ iter_off
;
393 if (iter_off
>= end
) {
394 xchk_fblock_set_corrupt(ds
->sc
, XFS_DATA_FORK
, rec_bno
);
398 if (be16_to_cpu(dup
->freetag
) == XFS_DIR2_DATA_FREE_TAG
) {
399 iter_off
+= be16_to_cpu(dup
->length
);
404 iter_off
+= xfs_dir2_data_entsize(mp
, dep
->namelen
);
407 /* Retrieve the entry, sanity check it, and compare hashes. */
408 ino
= be64_to_cpu(dent
->inumber
);
409 hash
= be32_to_cpu(ent
->hashval
);
410 tag
= be16_to_cpup(xfs_dir2_data_entry_tag_p(mp
, dent
));
411 if (!xfs_verify_dir_ino(mp
, ino
) || tag
!= off
)
412 xchk_fblock_set_corrupt(ds
->sc
, XFS_DATA_FORK
, rec_bno
);
413 if (dent
->namelen
== 0) {
414 xchk_fblock_set_corrupt(ds
->sc
, XFS_DATA_FORK
, rec_bno
);
418 /* Does the directory hash match? */
419 dname
.name
= dent
->name
;
420 dname
.len
= dent
->namelen
;
421 calc_hash
= xfs_dir2_hashname(mp
, &dname
);
422 if (calc_hash
!= hash
)
423 xchk_fblock_set_corrupt(ds
->sc
, XFS_DATA_FORK
, rec_bno
);
426 xfs_trans_brelse(ds
->dargs
.trans
, bp
);
432 * Is this unused entry either in the bestfree or smaller than all of
433 * them? We've already checked that the bestfrees are sorted longest to
434 * shortest, and that there aren't any bogus entries.
437 xchk_directory_check_free_entry(
438 struct xfs_scrub
*sc
,
440 struct xfs_dir2_data_free
*bf
,
441 struct xfs_dir2_data_unused
*dup
)
443 struct xfs_dir2_data_free
*dfp
;
444 unsigned int dup_length
;
446 dup_length
= be16_to_cpu(dup
->length
);
448 /* Unused entry is shorter than any of the bestfrees */
449 if (dup_length
< be16_to_cpu(bf
[XFS_DIR2_DATA_FD_COUNT
- 1].length
))
452 for (dfp
= &bf
[XFS_DIR2_DATA_FD_COUNT
- 1]; dfp
>= bf
; dfp
--)
453 if (dup_length
== be16_to_cpu(dfp
->length
))
456 /* Unused entry should be in the bestfrees but wasn't found. */
457 xchk_fblock_set_corrupt(sc
, XFS_DATA_FORK
, lblk
);
460 /* Check free space info in a directory data block. */
462 xchk_directory_data_bestfree(
463 struct xfs_scrub
*sc
,
467 struct xfs_dir2_data_unused
*dup
;
468 struct xfs_dir2_data_free
*dfp
;
470 struct xfs_dir2_data_free
*bf
;
471 struct xfs_mount
*mp
= sc
->mp
;
473 unsigned int nr_bestfrees
= 0;
474 unsigned int nr_frees
= 0;
475 unsigned int smallest_bestfree
;
482 /* dir block format */
483 if (lblk
!= XFS_B_TO_FSBT(mp
, XFS_DIR2_DATA_OFFSET
))
484 xchk_fblock_set_corrupt(sc
, XFS_DATA_FORK
, lblk
);
485 error
= xfs_dir3_block_read(sc
->tp
, sc
->ip
, sc
->ip
->i_ino
, &bp
);
487 /* dir data format */
488 error
= xfs_dir3_data_read(sc
->tp
, sc
->ip
, sc
->ip
->i_ino
, lblk
,
491 if (!xchk_fblock_process_error(sc
, XFS_DATA_FORK
, lblk
, &error
))
493 xchk_buffer_recheck(sc
, bp
);
495 /* XXX: Check xfs_dir3_data_hdr.pad is zero once we start setting it. */
497 if (sc
->sm
->sm_flags
& XFS_SCRUB_OFLAG_CORRUPT
)
500 /* Do the bestfrees correspond to actual free space? */
501 bf
= xfs_dir2_data_bestfree_p(mp
, bp
->b_addr
);
502 smallest_bestfree
= UINT_MAX
;
503 for (dfp
= &bf
[0]; dfp
< &bf
[XFS_DIR2_DATA_FD_COUNT
]; dfp
++) {
504 offset
= be16_to_cpu(dfp
->offset
);
507 if (offset
>= mp
->m_dir_geo
->blksize
) {
508 xchk_fblock_set_corrupt(sc
, XFS_DATA_FORK
, lblk
);
511 dup
= bp
->b_addr
+ offset
;
512 tag
= be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup
));
514 /* bestfree doesn't match the entry it points at? */
515 if (dup
->freetag
!= cpu_to_be16(XFS_DIR2_DATA_FREE_TAG
) ||
516 be16_to_cpu(dup
->length
) != be16_to_cpu(dfp
->length
) ||
518 xchk_fblock_set_corrupt(sc
, XFS_DATA_FORK
, lblk
);
522 /* bestfree records should be ordered largest to smallest */
523 if (smallest_bestfree
< be16_to_cpu(dfp
->length
)) {
524 xchk_fblock_set_corrupt(sc
, XFS_DATA_FORK
, lblk
);
528 smallest_bestfree
= be16_to_cpu(dfp
->length
);
532 /* Make sure the bestfrees are actually the best free spaces. */
533 offset
= mp
->m_dir_geo
->data_entry_offset
;
534 end
= xfs_dir3_data_end_offset(mp
->m_dir_geo
, bp
->b_addr
);
536 /* Iterate the entries, stopping when we hit or go past the end. */
537 while (offset
< end
) {
538 dup
= bp
->b_addr
+ offset
;
540 /* Skip real entries */
541 if (dup
->freetag
!= cpu_to_be16(XFS_DIR2_DATA_FREE_TAG
)) {
542 struct xfs_dir2_data_entry
*dep
= bp
->b_addr
+ offset
;
544 newlen
= xfs_dir2_data_entsize(mp
, dep
->namelen
);
546 xchk_fblock_set_corrupt(sc
, XFS_DATA_FORK
,
554 /* Spot check this free entry */
555 tag
= be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup
));
557 xchk_fblock_set_corrupt(sc
, XFS_DATA_FORK
, lblk
);
562 * Either this entry is a bestfree or it's smaller than
563 * any of the bestfrees.
565 xchk_directory_check_free_entry(sc
, lblk
, bf
, dup
);
566 if (sc
->sm
->sm_flags
& XFS_SCRUB_OFLAG_CORRUPT
)
570 newlen
= be16_to_cpu(dup
->length
);
572 xchk_fblock_set_corrupt(sc
, XFS_DATA_FORK
, lblk
);
580 /* We're required to fill all the space. */
582 xchk_fblock_set_corrupt(sc
, XFS_DATA_FORK
, lblk
);
584 /* Did we see at least as many free slots as there are bestfrees? */
585 if (nr_frees
< nr_bestfrees
)
586 xchk_fblock_set_corrupt(sc
, XFS_DATA_FORK
, lblk
);
588 xfs_trans_brelse(sc
->tp
, bp
);
594 * Does the free space length in the free space index block ($len) match
595 * the longest length in the directory data block's bestfree array?
596 * Assume that we've already checked that the data block's bestfree
600 xchk_directory_check_freesp(
601 struct xfs_scrub
*sc
,
606 struct xfs_dir2_data_free
*dfp
;
608 dfp
= xfs_dir2_data_bestfree_p(sc
->mp
, dbp
->b_addr
);
610 if (len
!= be16_to_cpu(dfp
->length
))
611 xchk_fblock_set_corrupt(sc
, XFS_DATA_FORK
, lblk
);
613 if (len
> 0 && be16_to_cpu(dfp
->offset
) == 0)
614 xchk_fblock_set_corrupt(sc
, XFS_DATA_FORK
, lblk
);
617 /* Check free space info in a directory leaf1 block. */
619 xchk_directory_leaf1_bestfree(
620 struct xfs_scrub
*sc
,
621 struct xfs_da_args
*args
,
622 xfs_dir2_db_t last_data_db
,
625 struct xfs_dir3_icleaf_hdr leafhdr
;
626 struct xfs_dir2_leaf_tail
*ltp
;
627 struct xfs_dir2_leaf
*leaf
;
630 struct xfs_da_geometry
*geo
= sc
->mp
->m_dir_geo
;
636 unsigned int stale
= 0;
640 /* Read the free space block. */
641 error
= xfs_dir3_leaf_read(sc
->tp
, sc
->ip
, sc
->ip
->i_ino
, lblk
, &bp
);
642 if (!xchk_fblock_process_error(sc
, XFS_DATA_FORK
, lblk
, &error
))
644 xchk_buffer_recheck(sc
, bp
);
647 xfs_dir2_leaf_hdr_from_disk(sc
->ip
->i_mount
, &leafhdr
, leaf
);
648 ltp
= xfs_dir2_leaf_tail_p(geo
, leaf
);
649 bestcount
= be32_to_cpu(ltp
->bestcount
);
650 bestp
= xfs_dir2_leaf_bests_p(ltp
);
652 if (xfs_has_crc(sc
->mp
)) {
653 struct xfs_dir3_leaf_hdr
*hdr3
= bp
->b_addr
;
655 if (hdr3
->pad
!= cpu_to_be32(0))
656 xchk_fblock_set_corrupt(sc
, XFS_DATA_FORK
, lblk
);
660 * There must be enough bestfree slots to cover all the directory data
661 * blocks that we scanned. It is possible for there to be a hole
662 * between the last data block and i_disk_size. This seems like an
663 * oversight to the scrub author, but as we have been writing out
664 * directories like this (and xfs_repair doesn't mind them) for years,
665 * that's what we have to check.
667 if (bestcount
!= last_data_db
+ 1) {
668 xchk_fblock_set_corrupt(sc
, XFS_DATA_FORK
, lblk
);
672 /* Is the leaf count even remotely sane? */
673 if (leafhdr
.count
> geo
->leaf_max_ents
) {
674 xchk_fblock_set_corrupt(sc
, XFS_DATA_FORK
, lblk
);
678 /* Leaves and bests don't overlap in leaf format. */
679 if ((char *)&leafhdr
.ents
[leafhdr
.count
] > (char *)bestp
) {
680 xchk_fblock_set_corrupt(sc
, XFS_DATA_FORK
, lblk
);
684 /* Check hash value order, count stale entries. */
685 for (i
= 0; i
< leafhdr
.count
; i
++) {
686 hash
= be32_to_cpu(leafhdr
.ents
[i
].hashval
);
687 if (i
> 0 && lasthash
> hash
)
688 xchk_fblock_set_corrupt(sc
, XFS_DATA_FORK
, lblk
);
690 if (leafhdr
.ents
[i
].address
==
691 cpu_to_be32(XFS_DIR2_NULL_DATAPTR
))
694 if (leafhdr
.stale
!= stale
)
695 xchk_fblock_set_corrupt(sc
, XFS_DATA_FORK
, lblk
);
696 if (sc
->sm
->sm_flags
& XFS_SCRUB_OFLAG_CORRUPT
)
699 /* Check all the bestfree entries. */
700 for (i
= 0; i
< bestcount
; i
++, bestp
++) {
701 best
= be16_to_cpu(*bestp
);
702 error
= xfs_dir3_data_read(sc
->tp
, sc
->ip
, args
->owner
,
703 xfs_dir2_db_to_da(args
->geo
, i
),
704 XFS_DABUF_MAP_HOLE_OK
, &dbp
);
705 if (!xchk_fblock_process_error(sc
, XFS_DATA_FORK
, lblk
,
710 if (best
!= NULLDATAOFF
) {
711 xchk_fblock_set_corrupt(sc
, XFS_DATA_FORK
,
718 if (best
== NULLDATAOFF
)
719 xchk_fblock_set_corrupt(sc
, XFS_DATA_FORK
, lblk
);
721 xchk_directory_check_freesp(sc
, lblk
, dbp
, best
);
722 xfs_trans_brelse(sc
->tp
, dbp
);
723 if (sc
->sm
->sm_flags
& XFS_SCRUB_OFLAG_CORRUPT
)
727 xfs_trans_brelse(sc
->tp
, bp
);
731 /* Check free space info in a directory freespace block. */
733 xchk_directory_free_bestfree(
734 struct xfs_scrub
*sc
,
735 struct xfs_da_args
*args
,
738 struct xfs_dir3_icfree_hdr freehdr
;
742 unsigned int stale
= 0;
746 /* Read the free space block */
747 error
= xfs_dir2_free_read(sc
->tp
, sc
->ip
, sc
->ip
->i_ino
, lblk
, &bp
);
748 if (!xchk_fblock_process_error(sc
, XFS_DATA_FORK
, lblk
, &error
))
750 xchk_buffer_recheck(sc
, bp
);
752 if (xfs_has_crc(sc
->mp
)) {
753 struct xfs_dir3_free_hdr
*hdr3
= bp
->b_addr
;
755 if (hdr3
->pad
!= cpu_to_be32(0))
756 xchk_fblock_set_corrupt(sc
, XFS_DATA_FORK
, lblk
);
759 /* Check all the entries. */
760 xfs_dir2_free_hdr_from_disk(sc
->ip
->i_mount
, &freehdr
, bp
->b_addr
);
761 for (i
= 0; i
< freehdr
.nvalid
; i
++) {
762 best
= be16_to_cpu(freehdr
.bests
[i
]);
763 if (best
== NULLDATAOFF
) {
767 error
= xfs_dir3_data_read(sc
->tp
, sc
->ip
, args
->owner
,
768 (freehdr
.firstdb
+ i
) * args
->geo
->fsbcount
,
770 if (!xchk_fblock_process_error(sc
, XFS_DATA_FORK
, lblk
,
773 xchk_directory_check_freesp(sc
, lblk
, dbp
, best
);
774 xfs_trans_brelse(sc
->tp
, dbp
);
777 if (freehdr
.nused
+ stale
!= freehdr
.nvalid
)
778 xchk_fblock_set_corrupt(sc
, XFS_DATA_FORK
, lblk
);
780 xfs_trans_brelse(sc
->tp
, bp
);
784 /* Check free space information in directories. */
786 xchk_directory_blocks(
787 struct xfs_scrub
*sc
)
789 struct xfs_bmbt_irec got
;
790 struct xfs_da_args args
= {
792 .whichfork
= XFS_DATA_FORK
,
793 .geo
= sc
->mp
->m_dir_geo
,
795 .owner
= sc
->ip
->i_ino
,
797 struct xfs_ifork
*ifp
= xfs_ifork_ptr(sc
->ip
, XFS_DATA_FORK
);
798 struct xfs_mount
*mp
= sc
->mp
;
799 xfs_fileoff_t leaf_lblk
;
800 xfs_fileoff_t free_lblk
;
802 struct xfs_iext_cursor icur
;
804 xfs_dir2_db_t last_data_db
= 0;
806 bool is_block
= false;
809 /* Ignore local format directories. */
810 if (ifp
->if_format
!= XFS_DINODE_FMT_EXTENTS
&&
811 ifp
->if_format
!= XFS_DINODE_FMT_BTREE
)
814 lblk
= XFS_B_TO_FSB(mp
, XFS_DIR2_DATA_OFFSET
);
815 leaf_lblk
= XFS_B_TO_FSB(mp
, XFS_DIR2_LEAF_OFFSET
);
816 free_lblk
= XFS_B_TO_FSB(mp
, XFS_DIR2_FREE_OFFSET
);
818 /* Is this a block dir? */
819 if (xfs_dir2_format(&args
, &error
) == XFS_DIR2_FMT_BLOCK
)
821 if (!xchk_fblock_process_error(sc
, XFS_DATA_FORK
, lblk
, &error
))
824 /* Iterate all the data extents in the directory... */
825 found
= xfs_iext_lookup_extent(sc
->ip
, ifp
, lblk
, &icur
, &got
);
826 while (found
&& !(sc
->sm
->sm_flags
& XFS_SCRUB_OFLAG_CORRUPT
)) {
827 /* No more data blocks... */
828 if (got
.br_startoff
>= leaf_lblk
)
832 * Check each data block's bestfree data.
834 * Iterate all the fsbcount-aligned block offsets in
835 * this directory. The directory block reading code is
836 * smart enough to do its own bmap lookups to handle
837 * discontiguous directory blocks. When we're done
838 * with the extent record, re-query the bmap at the
839 * next fsbcount-aligned offset to avoid redundant
842 for (lblk
= roundup((xfs_dablk_t
)got
.br_startoff
,
844 lblk
< got
.br_startoff
+ got
.br_blockcount
;
845 lblk
+= args
.geo
->fsbcount
) {
846 last_data_db
= xfs_dir2_da_to_db(args
.geo
, lblk
);
847 error
= xchk_directory_data_bestfree(sc
, lblk
,
852 dabno
= got
.br_startoff
+ got
.br_blockcount
;
853 lblk
= roundup(dabno
, args
.geo
->fsbcount
);
854 found
= xfs_iext_lookup_extent(sc
->ip
, ifp
, lblk
, &icur
, &got
);
857 if (sc
->sm
->sm_flags
& XFS_SCRUB_OFLAG_CORRUPT
)
860 /* Look for a leaf1 block, which has free info. */
861 if (xfs_iext_lookup_extent(sc
->ip
, ifp
, leaf_lblk
, &icur
, &got
) &&
862 got
.br_startoff
== leaf_lblk
&&
863 got
.br_blockcount
== args
.geo
->fsbcount
&&
864 !xfs_iext_next_extent(ifp
, &icur
, &got
)) {
866 xchk_fblock_set_corrupt(sc
, XFS_DATA_FORK
, lblk
);
869 error
= xchk_directory_leaf1_bestfree(sc
, &args
, last_data_db
,
875 if (sc
->sm
->sm_flags
& XFS_SCRUB_OFLAG_CORRUPT
)
878 /* Scan for free blocks */
880 found
= xfs_iext_lookup_extent(sc
->ip
, ifp
, lblk
, &icur
, &got
);
881 while (found
&& !(sc
->sm
->sm_flags
& XFS_SCRUB_OFLAG_CORRUPT
)) {
883 * Dirs can't have blocks mapped above 2^32.
884 * Single-block dirs shouldn't even be here.
886 lblk
= got
.br_startoff
;
887 if (lblk
& ~0xFFFFFFFFULL
) {
888 xchk_fblock_set_corrupt(sc
, XFS_DATA_FORK
, lblk
);
892 xchk_fblock_set_corrupt(sc
, XFS_DATA_FORK
, lblk
);
897 * Check each dir free block's bestfree data.
899 * Iterate all the fsbcount-aligned block offsets in
900 * this directory. The directory block reading code is
901 * smart enough to do its own bmap lookups to handle
902 * discontiguous directory blocks. When we're done
903 * with the extent record, re-query the bmap at the
904 * next fsbcount-aligned offset to avoid redundant
907 for (lblk
= roundup((xfs_dablk_t
)got
.br_startoff
,
909 lblk
< got
.br_startoff
+ got
.br_blockcount
;
910 lblk
+= args
.geo
->fsbcount
) {
911 error
= xchk_directory_free_bestfree(sc
, &args
,
916 dabno
= got
.br_startoff
+ got
.br_blockcount
;
917 lblk
= roundup(dabno
, args
.geo
->fsbcount
);
918 found
= xfs_iext_lookup_extent(sc
->ip
, ifp
, lblk
, &icur
, &got
);
925 * Revalidate a dirent that we collected in the past but couldn't check because
926 * of lock contention. Returns 0 if the dirent is still valid, -ENOENT if it
927 * has gone away on us, or a negative errno.
930 xchk_dir_revalidate_dirent(
932 const struct xfs_name
*xname
,
935 struct xfs_scrub
*sc
= sd
->sc
;
940 * Look up the directory entry. If we get -ENOENT, the directory entry
941 * went away and there's nothing to revalidate. Return any other
944 error
= xchk_dir_lookup(sc
, sc
->ip
, xname
, &child_ino
);
948 /* The inode number changed, nothing to revalidate. */
949 if (ino
!= child_ino
)
956 * Check a directory entry's parent pointers the slow way, which means we cycle
957 * locks a bunch and put up with revalidation until we get it done.
960 xchk_dir_slow_dirent(
962 struct xchk_dirent
*dirent
,
963 const struct xfs_name
*xname
)
965 struct xfs_scrub
*sc
= sd
->sc
;
966 struct xfs_inode
*ip
;
967 unsigned int lockmode
;
970 /* Check that the deferred dirent still exists. */
971 if (sd
->need_revalidate
) {
972 error
= xchk_dir_revalidate_dirent(sd
, xname
, dirent
->ino
);
973 if (error
== -ENOENT
)
975 if (!xchk_fblock_xref_process_error(sc
, XFS_DATA_FORK
, 0,
980 error
= xchk_iget(sc
, dirent
->ino
, &ip
);
981 if (error
== -EINVAL
|| error
== -ENOENT
) {
982 xchk_fblock_set_corrupt(sc
, XFS_DATA_FORK
, 0);
985 if (!xchk_fblock_xref_process_error(sc
, XFS_DATA_FORK
, 0, &error
))
989 * If we can grab both IOLOCK and ILOCK of the alleged child, we can
990 * proceed with the validation.
992 lockmode
= xchk_dir_lock_child(sc
, ip
);
994 trace_xchk_dir_slowpath(sc
->ip
, xname
, ip
->i_ino
);
999 * We couldn't lock the child file. Drop all the locks and try to
1000 * get them again, one at a time.
1002 xchk_iunlock(sc
, sc
->ilock_flags
);
1003 sd
->need_revalidate
= true;
1005 trace_xchk_dir_ultraslowpath(sc
->ip
, xname
, ip
->i_ino
);
1007 error
= xchk_dir_trylock_for_pptrs(sc
, ip
, &lockmode
);
1011 /* Revalidate, since we just cycled the locks. */
1012 error
= xchk_dir_revalidate_dirent(sd
, xname
, dirent
->ino
);
1013 if (error
== -ENOENT
) {
1017 if (!xchk_fblock_xref_process_error(sc
, XFS_DATA_FORK
, 0, &error
))
1021 error
= xchk_dir_parent_pointer(sd
, xname
, ip
);
1023 xfs_iunlock(ip
, lockmode
);
1029 /* Check all the dirents that we deferred the first time around. */
1031 xchk_dir_finish_slow_dirents(
1032 struct xchk_dir
*sd
)
1034 xfarray_idx_t array_cur
;
1037 foreach_xfarray_idx(sd
->dir_entries
, array_cur
) {
1038 struct xchk_dirent dirent
;
1040 if (sd
->sc
->sm
->sm_flags
& XFS_SCRUB_OFLAG_CORRUPT
)
1043 error
= xfarray_load(sd
->dir_entries
, array_cur
, &dirent
);
1047 error
= xfblob_loadname(sd
->dir_names
, dirent
.name_cookie
,
1048 &sd
->xname
, dirent
.namelen
);
1052 error
= xchk_dir_slow_dirent(sd
, &dirent
, &sd
->xname
);
1060 /* Scrub a whole directory. */
1063 struct xfs_scrub
*sc
)
1065 struct xchk_dir
*sd
;
1068 if (!S_ISDIR(VFS_I(sc
->ip
)->i_mode
))
1071 if (xchk_file_looks_zapped(sc
, XFS_SICK_INO_DIR_ZAPPED
)) {
1072 xchk_fblock_set_corrupt(sc
, XFS_DATA_FORK
, 0);
1076 /* Plausible size? */
1077 if (sc
->ip
->i_disk_size
< xfs_dir2_sf_hdr_size(0)) {
1078 xchk_ino_set_corrupt(sc
, sc
->ip
->i_ino
);
1082 /* Check directory tree structure */
1083 error
= xchk_da_btree(sc
, XFS_DATA_FORK
, xchk_dir_rec
, NULL
);
1087 if (sc
->sm
->sm_flags
& XFS_SCRUB_OFLAG_CORRUPT
)
1090 /* Check the freespace. */
1091 error
= xchk_directory_blocks(sc
);
1095 if (sc
->sm
->sm_flags
& XFS_SCRUB_OFLAG_CORRUPT
)
1098 sd
= kvzalloc(sizeof(struct xchk_dir
), XCHK_GFP_FLAGS
);
1102 sd
->xname
.name
= sd
->namebuf
;
1104 if (xfs_has_parent(sc
->mp
)) {
1108 * Set up some staging memory for dirents that we can't check
1109 * due to locking contention.
1111 descr
= xchk_xfile_ino_descr(sc
, "slow directory entries");
1112 error
= xfarray_create(descr
, 0, sizeof(struct xchk_dirent
),
1118 descr
= xchk_xfile_ino_descr(sc
, "slow directory entry names");
1119 error
= xfblob_create(descr
, &sd
->dir_names
);
1125 /* Look up every name in this directory by hash. */
1126 error
= xchk_dir_walk(sc
, sc
->ip
, xchk_dir_actor
, sd
);
1127 if (error
== -ECANCELED
)
1132 if (xfs_has_parent(sc
->mp
)) {
1133 error
= xchk_dir_finish_slow_dirents(sd
);
1134 if (error
== -ETIMEDOUT
) {
1135 /* Couldn't grab a lock, scrub was marked incomplete */
1145 xfblob_destroy(sd
->dir_names
);
1147 if (sd
->dir_entries
)
1148 xfarray_destroy(sd
->dir_entries
);
1154 /* If the dir is clean, it is clearly not zapped. */
1155 xchk_mark_healthy_if_clean(sc
, XFS_SICK_INO_DIR_ZAPPED
);
1160 * Decide if this directory has been zapped to satisfy the inode and ifork
1161 * verifiers. Checking and repairing should be postponed until the directory
1165 xchk_dir_looks_zapped(
1166 struct xfs_inode
*dp
)
1168 /* Repair zapped this dir's data fork a short time ago */
1169 if (xfs_ifork_zapped(dp
, XFS_DATA_FORK
))
1173 * If the dinode repair found a bad data fork, it will reset the fork
1174 * to extents format with zero records and wait for the bmapbtd
1175 * scrubber to reconstruct the block mappings. Directories always
1176 * contain some content, so this is a clear sign of a zapped directory.
1177 * The state checked by xfs_ifork_zapped is not persisted, so this is
1178 * the secondary strategy if repairs are interrupted by a crash or an
1181 return dp
->i_df
.if_format
== XFS_DINODE_FMT_EXTENTS
&&
1182 dp
->i_df
.if_nextents
== 0;