accel/qaic: Add AIC200 support
[drm/drm-misc.git] / fs / xfs / scrub / dir.c
blobc877bde71e62808036a7262e042a0937d40b190d
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (C) 2017-2023 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_log_format.h"
13 #include "xfs_trans.h"
14 #include "xfs_inode.h"
15 #include "xfs_icache.h"
16 #include "xfs_dir2.h"
17 #include "xfs_dir2_priv.h"
18 #include "xfs_health.h"
19 #include "xfs_attr.h"
20 #include "xfs_parent.h"
21 #include "scrub/scrub.h"
22 #include "scrub/common.h"
23 #include "scrub/dabtree.h"
24 #include "scrub/readdir.h"
25 #include "scrub/health.h"
26 #include "scrub/repair.h"
27 #include "scrub/trace.h"
28 #include "scrub/xfile.h"
29 #include "scrub/xfarray.h"
30 #include "scrub/xfblob.h"
32 /* Set us up to scrub directories. */
33 int
34 xchk_setup_directory(
35 struct xfs_scrub *sc)
37 int error;
39 if (xchk_could_repair(sc)) {
40 error = xrep_setup_directory(sc);
41 if (error)
42 return error;
45 return xchk_setup_inode_contents(sc, 0);
48 /* Directories */
50 /* Deferred directory entry that we saved for later. */
51 struct xchk_dirent {
52 /* Cookie for retrieval of the dirent name. */
53 xfblob_cookie name_cookie;
55 /* Child inode number. */
56 xfs_ino_t ino;
58 /* Length of the pptr name. */
59 uint8_t namelen;
62 struct xchk_dir {
63 struct xfs_scrub *sc;
65 /* information for parent pointer validation. */
66 struct xfs_parent_rec pptr_rec;
67 struct xfs_da_args pptr_args;
69 /* Fixed-size array of xchk_dirent structures. */
70 struct xfarray *dir_entries;
72 /* Blobs containing dirent names. */
73 struct xfblob *dir_names;
75 /* If we've cycled the ILOCK, we must revalidate deferred dirents. */
76 bool need_revalidate;
78 /* Name buffer for dirent revalidation. */
79 struct xfs_name xname;
80 uint8_t namebuf[MAXNAMELEN];
83 /* Scrub a directory entry. */
85 /* Check that an inode's mode matches a given XFS_DIR3_FT_* type. */
86 STATIC void
87 xchk_dir_check_ftype(
88 struct xfs_scrub *sc,
89 xfs_fileoff_t offset,
90 struct xfs_inode *ip,
91 int ftype)
93 struct xfs_mount *mp = sc->mp;
95 if (!xfs_has_ftype(mp)) {
96 if (ftype != XFS_DIR3_FT_UNKNOWN && ftype != XFS_DIR3_FT_DIR)
97 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
98 return;
101 if (xfs_mode_to_ftype(VFS_I(ip)->i_mode) != ftype)
102 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
105 * Metadata and regular inodes cannot cross trees. This property
106 * cannot change without a full inode free and realloc cycle, so it's
107 * safe to check this without holding locks.
109 if (xfs_is_metadir_inode(ip) != xfs_is_metadir_inode(sc->ip))
110 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
114 * Try to lock a child file for checking parent pointers. Returns the inode
115 * flags for the locks we now hold, or zero if we failed.
117 STATIC unsigned int
118 xchk_dir_lock_child(
119 struct xfs_scrub *sc,
120 struct xfs_inode *ip)
122 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
123 return 0;
125 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
126 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
127 return 0;
130 if (!xfs_inode_has_attr_fork(ip) || !xfs_need_iread_extents(&ip->i_af))
131 return XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED;
133 xfs_iunlock(ip, XFS_ILOCK_SHARED);
135 if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
136 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
137 return 0;
140 return XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL;
143 /* Check the backwards link (parent pointer) associated with this dirent. */
144 STATIC int
145 xchk_dir_parent_pointer(
146 struct xchk_dir *sd,
147 const struct xfs_name *name,
148 struct xfs_inode *ip)
150 struct xfs_scrub *sc = sd->sc;
151 int error;
153 xfs_inode_to_parent_rec(&sd->pptr_rec, sc->ip);
154 error = xfs_parent_lookup(sc->tp, ip, name, &sd->pptr_rec,
155 &sd->pptr_args);
156 if (error == -ENOATTR)
157 xchk_fblock_xref_set_corrupt(sc, XFS_DATA_FORK, 0);
159 return 0;
162 /* Look for a parent pointer matching this dirent, if the child isn't busy. */
163 STATIC int
164 xchk_dir_check_pptr_fast(
165 struct xchk_dir *sd,
166 xfs_dir2_dataptr_t dapos,
167 const struct xfs_name *name,
168 struct xfs_inode *ip)
170 struct xfs_scrub *sc = sd->sc;
171 unsigned int lockmode;
172 int error;
174 /* dot and dotdot entries do not have parent pointers */
175 if (xfs_dir2_samename(name, &xfs_name_dot) ||
176 xfs_dir2_samename(name, &xfs_name_dotdot))
177 return 0;
179 /* No self-referential non-dot or dotdot dirents. */
180 if (ip == sc->ip) {
181 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
182 return -ECANCELED;
185 /* Try to lock the inode. */
186 lockmode = xchk_dir_lock_child(sc, ip);
187 if (!lockmode) {
188 struct xchk_dirent save_de = {
189 .namelen = name->len,
190 .ino = ip->i_ino,
193 /* Couldn't lock the inode, so save the dirent for later. */
194 trace_xchk_dir_defer(sc->ip, name, ip->i_ino);
196 error = xfblob_storename(sd->dir_names, &save_de.name_cookie,
197 name);
198 if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0,
199 &error))
200 return error;
202 error = xfarray_append(sd->dir_entries, &save_de);
203 if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0,
204 &error))
205 return error;
207 return 0;
210 error = xchk_dir_parent_pointer(sd, name, ip);
211 xfs_iunlock(ip, lockmode);
212 return error;
216 * Scrub a single directory entry.
218 * Check the inode number to make sure it's sane, then we check that we can
219 * look up this filename. Finally, we check the ftype.
221 STATIC int
222 xchk_dir_actor(
223 struct xfs_scrub *sc,
224 struct xfs_inode *dp,
225 xfs_dir2_dataptr_t dapos,
226 const struct xfs_name *name,
227 xfs_ino_t ino,
228 void *priv)
230 struct xfs_mount *mp = dp->i_mount;
231 struct xfs_inode *ip;
232 struct xchk_dir *sd = priv;
233 xfs_ino_t lookup_ino;
234 xfs_dablk_t offset;
235 int error = 0;
237 offset = xfs_dir2_db_to_da(mp->m_dir_geo,
238 xfs_dir2_dataptr_to_db(mp->m_dir_geo, dapos));
240 if (xchk_should_terminate(sc, &error))
241 return error;
243 /* Does this inode number make sense? */
244 if (!xfs_verify_dir_ino(mp, ino)) {
245 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
246 return -ECANCELED;
249 /* Does this name make sense? */
250 if (!xfs_dir2_namecheck(name->name, name->len)) {
251 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
252 return -ECANCELED;
255 if (xfs_dir2_samename(name, &xfs_name_dot)) {
256 /* If this is "." then check that the inum matches the dir. */
257 if (ino != dp->i_ino)
258 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
259 } else if (xfs_dir2_samename(name, &xfs_name_dotdot)) {
261 * If this is ".." in the root inode, check that the inum
262 * matches this dir.
264 if (xchk_inode_is_dirtree_root(dp) && ino != dp->i_ino)
265 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
268 /* Verify that we can look up this name by hash. */
269 error = xchk_dir_lookup(sc, dp, name, &lookup_ino);
270 /* ENOENT means the hash lookup failed and the dir is corrupt */
271 if (error == -ENOENT)
272 error = -EFSCORRUPTED;
273 if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, offset, &error))
274 goto out;
275 if (lookup_ino != ino) {
276 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
277 return -ECANCELED;
281 * Grab the inode pointed to by the dirent. We release the inode
282 * before we cancel the scrub transaction.
284 * If _iget returns -EINVAL or -ENOENT then the child inode number is
285 * garbage and the directory is corrupt. If the _iget returns
286 * -EFSCORRUPTED or -EFSBADCRC then the child is corrupt which is a
287 * cross referencing error. Any other error is an operational error.
289 error = xchk_iget(sc, ino, &ip);
290 if (error == -EINVAL || error == -ENOENT) {
291 error = -EFSCORRUPTED;
292 xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error);
293 goto out;
295 if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, offset, &error))
296 goto out;
298 xchk_dir_check_ftype(sc, offset, ip, name->type);
300 if (xfs_has_parent(mp)) {
301 error = xchk_dir_check_pptr_fast(sd, dapos, name, ip);
302 if (error)
303 goto out_rele;
306 out_rele:
307 xchk_irele(sc, ip);
308 out:
309 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
310 return -ECANCELED;
311 return error;
314 /* Scrub a directory btree record. */
315 STATIC int
316 xchk_dir_rec(
317 struct xchk_da_btree *ds,
318 int level)
320 struct xfs_name dname = { };
321 struct xfs_da_state_blk *blk = &ds->state->path.blk[level];
322 struct xfs_mount *mp = ds->state->mp;
323 struct xfs_inode *dp = ds->dargs.dp;
324 struct xfs_da_geometry *geo = mp->m_dir_geo;
325 struct xfs_dir2_data_entry *dent;
326 struct xfs_buf *bp;
327 struct xfs_dir2_leaf_entry *ent;
328 unsigned int end;
329 unsigned int iter_off;
330 xfs_ino_t ino;
331 xfs_dablk_t rec_bno;
332 xfs_dir2_db_t db;
333 xfs_dir2_data_aoff_t off;
334 xfs_dir2_dataptr_t ptr;
335 xfs_dahash_t calc_hash;
336 xfs_dahash_t hash;
337 struct xfs_dir3_icleaf_hdr hdr;
338 unsigned int tag;
339 int error;
341 ASSERT(blk->magic == XFS_DIR2_LEAF1_MAGIC ||
342 blk->magic == XFS_DIR2_LEAFN_MAGIC);
344 xfs_dir2_leaf_hdr_from_disk(mp, &hdr, blk->bp->b_addr);
345 ent = hdr.ents + blk->index;
347 /* Check the hash of the entry. */
348 error = xchk_da_btree_hash(ds, level, &ent->hashval);
349 if (error)
350 goto out;
352 /* Valid hash pointer? */
353 ptr = be32_to_cpu(ent->address);
354 if (ptr == 0)
355 return 0;
357 /* Find the directory entry's location. */
358 db = xfs_dir2_dataptr_to_db(geo, ptr);
359 off = xfs_dir2_dataptr_to_off(geo, ptr);
360 rec_bno = xfs_dir2_db_to_da(geo, db);
362 if (rec_bno >= geo->leafblk) {
363 xchk_da_set_corrupt(ds, level);
364 goto out;
366 error = xfs_dir3_data_read(ds->dargs.trans, dp, ds->dargs.owner,
367 rec_bno, XFS_DABUF_MAP_HOLE_OK, &bp);
368 if (!xchk_fblock_process_error(ds->sc, XFS_DATA_FORK, rec_bno,
369 &error))
370 goto out;
371 if (!bp) {
372 xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
373 goto out;
375 xchk_buffer_recheck(ds->sc, bp);
377 if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
378 goto out_relse;
380 dent = bp->b_addr + off;
382 /* Make sure we got a real directory entry. */
383 iter_off = geo->data_entry_offset;
384 end = xfs_dir3_data_end_offset(geo, bp->b_addr);
385 if (!end) {
386 xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
387 goto out_relse;
389 for (;;) {
390 struct xfs_dir2_data_entry *dep = bp->b_addr + iter_off;
391 struct xfs_dir2_data_unused *dup = bp->b_addr + iter_off;
393 if (iter_off >= end) {
394 xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
395 goto out_relse;
398 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
399 iter_off += be16_to_cpu(dup->length);
400 continue;
402 if (dep == dent)
403 break;
404 iter_off += xfs_dir2_data_entsize(mp, dep->namelen);
407 /* Retrieve the entry, sanity check it, and compare hashes. */
408 ino = be64_to_cpu(dent->inumber);
409 hash = be32_to_cpu(ent->hashval);
410 tag = be16_to_cpup(xfs_dir2_data_entry_tag_p(mp, dent));
411 if (!xfs_verify_dir_ino(mp, ino) || tag != off)
412 xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
413 if (dent->namelen == 0) {
414 xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
415 goto out_relse;
418 /* Does the directory hash match? */
419 dname.name = dent->name;
420 dname.len = dent->namelen;
421 calc_hash = xfs_dir2_hashname(mp, &dname);
422 if (calc_hash != hash)
423 xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
425 out_relse:
426 xfs_trans_brelse(ds->dargs.trans, bp);
427 out:
428 return error;
432 * Is this unused entry either in the bestfree or smaller than all of
433 * them? We've already checked that the bestfrees are sorted longest to
434 * shortest, and that there aren't any bogus entries.
436 STATIC void
437 xchk_directory_check_free_entry(
438 struct xfs_scrub *sc,
439 xfs_dablk_t lblk,
440 struct xfs_dir2_data_free *bf,
441 struct xfs_dir2_data_unused *dup)
443 struct xfs_dir2_data_free *dfp;
444 unsigned int dup_length;
446 dup_length = be16_to_cpu(dup->length);
448 /* Unused entry is shorter than any of the bestfrees */
449 if (dup_length < be16_to_cpu(bf[XFS_DIR2_DATA_FD_COUNT - 1].length))
450 return;
452 for (dfp = &bf[XFS_DIR2_DATA_FD_COUNT - 1]; dfp >= bf; dfp--)
453 if (dup_length == be16_to_cpu(dfp->length))
454 return;
456 /* Unused entry should be in the bestfrees but wasn't found. */
457 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
460 /* Check free space info in a directory data block. */
461 STATIC int
462 xchk_directory_data_bestfree(
463 struct xfs_scrub *sc,
464 xfs_dablk_t lblk,
465 bool is_block)
467 struct xfs_dir2_data_unused *dup;
468 struct xfs_dir2_data_free *dfp;
469 struct xfs_buf *bp;
470 struct xfs_dir2_data_free *bf;
471 struct xfs_mount *mp = sc->mp;
472 u16 tag;
473 unsigned int nr_bestfrees = 0;
474 unsigned int nr_frees = 0;
475 unsigned int smallest_bestfree;
476 int newlen;
477 unsigned int offset;
478 unsigned int end;
479 int error;
481 if (is_block) {
482 /* dir block format */
483 if (lblk != XFS_B_TO_FSBT(mp, XFS_DIR2_DATA_OFFSET))
484 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
485 error = xfs_dir3_block_read(sc->tp, sc->ip, sc->ip->i_ino, &bp);
486 } else {
487 /* dir data format */
488 error = xfs_dir3_data_read(sc->tp, sc->ip, sc->ip->i_ino, lblk,
489 0, &bp);
491 if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
492 goto out;
493 xchk_buffer_recheck(sc, bp);
495 /* XXX: Check xfs_dir3_data_hdr.pad is zero once we start setting it. */
497 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
498 goto out_buf;
500 /* Do the bestfrees correspond to actual free space? */
501 bf = xfs_dir2_data_bestfree_p(mp, bp->b_addr);
502 smallest_bestfree = UINT_MAX;
503 for (dfp = &bf[0]; dfp < &bf[XFS_DIR2_DATA_FD_COUNT]; dfp++) {
504 offset = be16_to_cpu(dfp->offset);
505 if (offset == 0)
506 continue;
507 if (offset >= mp->m_dir_geo->blksize) {
508 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
509 goto out_buf;
511 dup = bp->b_addr + offset;
512 tag = be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup));
514 /* bestfree doesn't match the entry it points at? */
515 if (dup->freetag != cpu_to_be16(XFS_DIR2_DATA_FREE_TAG) ||
516 be16_to_cpu(dup->length) != be16_to_cpu(dfp->length) ||
517 tag != offset) {
518 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
519 goto out_buf;
522 /* bestfree records should be ordered largest to smallest */
523 if (smallest_bestfree < be16_to_cpu(dfp->length)) {
524 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
525 goto out_buf;
528 smallest_bestfree = be16_to_cpu(dfp->length);
529 nr_bestfrees++;
532 /* Make sure the bestfrees are actually the best free spaces. */
533 offset = mp->m_dir_geo->data_entry_offset;
534 end = xfs_dir3_data_end_offset(mp->m_dir_geo, bp->b_addr);
536 /* Iterate the entries, stopping when we hit or go past the end. */
537 while (offset < end) {
538 dup = bp->b_addr + offset;
540 /* Skip real entries */
541 if (dup->freetag != cpu_to_be16(XFS_DIR2_DATA_FREE_TAG)) {
542 struct xfs_dir2_data_entry *dep = bp->b_addr + offset;
544 newlen = xfs_dir2_data_entsize(mp, dep->namelen);
545 if (newlen <= 0) {
546 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK,
547 lblk);
548 goto out_buf;
550 offset += newlen;
551 continue;
554 /* Spot check this free entry */
555 tag = be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup));
556 if (tag != offset) {
557 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
558 goto out_buf;
562 * Either this entry is a bestfree or it's smaller than
563 * any of the bestfrees.
565 xchk_directory_check_free_entry(sc, lblk, bf, dup);
566 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
567 goto out_buf;
569 /* Move on. */
570 newlen = be16_to_cpu(dup->length);
571 if (newlen <= 0) {
572 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
573 goto out_buf;
575 offset += newlen;
576 if (offset <= end)
577 nr_frees++;
580 /* We're required to fill all the space. */
581 if (offset != end)
582 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
584 /* Did we see at least as many free slots as there are bestfrees? */
585 if (nr_frees < nr_bestfrees)
586 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
587 out_buf:
588 xfs_trans_brelse(sc->tp, bp);
589 out:
590 return error;
594 * Does the free space length in the free space index block ($len) match
595 * the longest length in the directory data block's bestfree array?
596 * Assume that we've already checked that the data block's bestfree
597 * array is in order.
599 STATIC void
600 xchk_directory_check_freesp(
601 struct xfs_scrub *sc,
602 xfs_dablk_t lblk,
603 struct xfs_buf *dbp,
604 unsigned int len)
606 struct xfs_dir2_data_free *dfp;
608 dfp = xfs_dir2_data_bestfree_p(sc->mp, dbp->b_addr);
610 if (len != be16_to_cpu(dfp->length))
611 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
613 if (len > 0 && be16_to_cpu(dfp->offset) == 0)
614 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
617 /* Check free space info in a directory leaf1 block. */
618 STATIC int
619 xchk_directory_leaf1_bestfree(
620 struct xfs_scrub *sc,
621 struct xfs_da_args *args,
622 xfs_dir2_db_t last_data_db,
623 xfs_dablk_t lblk)
625 struct xfs_dir3_icleaf_hdr leafhdr;
626 struct xfs_dir2_leaf_tail *ltp;
627 struct xfs_dir2_leaf *leaf;
628 struct xfs_buf *dbp;
629 struct xfs_buf *bp;
630 struct xfs_da_geometry *geo = sc->mp->m_dir_geo;
631 __be16 *bestp;
632 __u16 best;
633 __u32 hash;
634 __u32 lasthash = 0;
635 __u32 bestcount;
636 unsigned int stale = 0;
637 int i;
638 int error;
640 /* Read the free space block. */
641 error = xfs_dir3_leaf_read(sc->tp, sc->ip, sc->ip->i_ino, lblk, &bp);
642 if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
643 return error;
644 xchk_buffer_recheck(sc, bp);
646 leaf = bp->b_addr;
647 xfs_dir2_leaf_hdr_from_disk(sc->ip->i_mount, &leafhdr, leaf);
648 ltp = xfs_dir2_leaf_tail_p(geo, leaf);
649 bestcount = be32_to_cpu(ltp->bestcount);
650 bestp = xfs_dir2_leaf_bests_p(ltp);
652 if (xfs_has_crc(sc->mp)) {
653 struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr;
655 if (hdr3->pad != cpu_to_be32(0))
656 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
660 * There must be enough bestfree slots to cover all the directory data
661 * blocks that we scanned. It is possible for there to be a hole
662 * between the last data block and i_disk_size. This seems like an
663 * oversight to the scrub author, but as we have been writing out
664 * directories like this (and xfs_repair doesn't mind them) for years,
665 * that's what we have to check.
667 if (bestcount != last_data_db + 1) {
668 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
669 goto out;
672 /* Is the leaf count even remotely sane? */
673 if (leafhdr.count > geo->leaf_max_ents) {
674 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
675 goto out;
678 /* Leaves and bests don't overlap in leaf format. */
679 if ((char *)&leafhdr.ents[leafhdr.count] > (char *)bestp) {
680 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
681 goto out;
684 /* Check hash value order, count stale entries. */
685 for (i = 0; i < leafhdr.count; i++) {
686 hash = be32_to_cpu(leafhdr.ents[i].hashval);
687 if (i > 0 && lasthash > hash)
688 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
689 lasthash = hash;
690 if (leafhdr.ents[i].address ==
691 cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
692 stale++;
694 if (leafhdr.stale != stale)
695 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
696 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
697 goto out;
699 /* Check all the bestfree entries. */
700 for (i = 0; i < bestcount; i++, bestp++) {
701 best = be16_to_cpu(*bestp);
702 error = xfs_dir3_data_read(sc->tp, sc->ip, args->owner,
703 xfs_dir2_db_to_da(args->geo, i),
704 XFS_DABUF_MAP_HOLE_OK, &dbp);
705 if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk,
706 &error))
707 break;
709 if (!dbp) {
710 if (best != NULLDATAOFF) {
711 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK,
712 lblk);
713 break;
715 continue;
718 if (best == NULLDATAOFF)
719 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
720 else
721 xchk_directory_check_freesp(sc, lblk, dbp, best);
722 xfs_trans_brelse(sc->tp, dbp);
723 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
724 break;
726 out:
727 xfs_trans_brelse(sc->tp, bp);
728 return error;
731 /* Check free space info in a directory freespace block. */
732 STATIC int
733 xchk_directory_free_bestfree(
734 struct xfs_scrub *sc,
735 struct xfs_da_args *args,
736 xfs_dablk_t lblk)
738 struct xfs_dir3_icfree_hdr freehdr;
739 struct xfs_buf *dbp;
740 struct xfs_buf *bp;
741 __u16 best;
742 unsigned int stale = 0;
743 int i;
744 int error;
746 /* Read the free space block */
747 error = xfs_dir2_free_read(sc->tp, sc->ip, sc->ip->i_ino, lblk, &bp);
748 if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
749 return error;
750 xchk_buffer_recheck(sc, bp);
752 if (xfs_has_crc(sc->mp)) {
753 struct xfs_dir3_free_hdr *hdr3 = bp->b_addr;
755 if (hdr3->pad != cpu_to_be32(0))
756 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
759 /* Check all the entries. */
760 xfs_dir2_free_hdr_from_disk(sc->ip->i_mount, &freehdr, bp->b_addr);
761 for (i = 0; i < freehdr.nvalid; i++) {
762 best = be16_to_cpu(freehdr.bests[i]);
763 if (best == NULLDATAOFF) {
764 stale++;
765 continue;
767 error = xfs_dir3_data_read(sc->tp, sc->ip, args->owner,
768 (freehdr.firstdb + i) * args->geo->fsbcount,
769 0, &dbp);
770 if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk,
771 &error))
772 goto out;
773 xchk_directory_check_freesp(sc, lblk, dbp, best);
774 xfs_trans_brelse(sc->tp, dbp);
777 if (freehdr.nused + stale != freehdr.nvalid)
778 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
779 out:
780 xfs_trans_brelse(sc->tp, bp);
781 return error;
784 /* Check free space information in directories. */
785 STATIC int
786 xchk_directory_blocks(
787 struct xfs_scrub *sc)
789 struct xfs_bmbt_irec got;
790 struct xfs_da_args args = {
791 .dp = sc->ip,
792 .whichfork = XFS_DATA_FORK,
793 .geo = sc->mp->m_dir_geo,
794 .trans = sc->tp,
795 .owner = sc->ip->i_ino,
797 struct xfs_ifork *ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
798 struct xfs_mount *mp = sc->mp;
799 xfs_fileoff_t leaf_lblk;
800 xfs_fileoff_t free_lblk;
801 xfs_fileoff_t lblk;
802 struct xfs_iext_cursor icur;
803 xfs_dablk_t dabno;
804 xfs_dir2_db_t last_data_db = 0;
805 bool found;
806 bool is_block = false;
807 int error;
809 /* Ignore local format directories. */
810 if (ifp->if_format != XFS_DINODE_FMT_EXTENTS &&
811 ifp->if_format != XFS_DINODE_FMT_BTREE)
812 return 0;
814 lblk = XFS_B_TO_FSB(mp, XFS_DIR2_DATA_OFFSET);
815 leaf_lblk = XFS_B_TO_FSB(mp, XFS_DIR2_LEAF_OFFSET);
816 free_lblk = XFS_B_TO_FSB(mp, XFS_DIR2_FREE_OFFSET);
818 /* Is this a block dir? */
819 if (xfs_dir2_format(&args, &error) == XFS_DIR2_FMT_BLOCK)
820 is_block = true;
821 if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
822 goto out;
824 /* Iterate all the data extents in the directory... */
825 found = xfs_iext_lookup_extent(sc->ip, ifp, lblk, &icur, &got);
826 while (found && !(sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) {
827 /* No more data blocks... */
828 if (got.br_startoff >= leaf_lblk)
829 break;
832 * Check each data block's bestfree data.
834 * Iterate all the fsbcount-aligned block offsets in
835 * this directory. The directory block reading code is
836 * smart enough to do its own bmap lookups to handle
837 * discontiguous directory blocks. When we're done
838 * with the extent record, re-query the bmap at the
839 * next fsbcount-aligned offset to avoid redundant
840 * block checks.
842 for (lblk = roundup((xfs_dablk_t)got.br_startoff,
843 args.geo->fsbcount);
844 lblk < got.br_startoff + got.br_blockcount;
845 lblk += args.geo->fsbcount) {
846 last_data_db = xfs_dir2_da_to_db(args.geo, lblk);
847 error = xchk_directory_data_bestfree(sc, lblk,
848 is_block);
849 if (error)
850 goto out;
852 dabno = got.br_startoff + got.br_blockcount;
853 lblk = roundup(dabno, args.geo->fsbcount);
854 found = xfs_iext_lookup_extent(sc->ip, ifp, lblk, &icur, &got);
857 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
858 goto out;
860 /* Look for a leaf1 block, which has free info. */
861 if (xfs_iext_lookup_extent(sc->ip, ifp, leaf_lblk, &icur, &got) &&
862 got.br_startoff == leaf_lblk &&
863 got.br_blockcount == args.geo->fsbcount &&
864 !xfs_iext_next_extent(ifp, &icur, &got)) {
865 if (is_block) {
866 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
867 goto out;
869 error = xchk_directory_leaf1_bestfree(sc, &args, last_data_db,
870 leaf_lblk);
871 if (error)
872 goto out;
875 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
876 goto out;
878 /* Scan for free blocks */
879 lblk = free_lblk;
880 found = xfs_iext_lookup_extent(sc->ip, ifp, lblk, &icur, &got);
881 while (found && !(sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) {
883 * Dirs can't have blocks mapped above 2^32.
884 * Single-block dirs shouldn't even be here.
886 lblk = got.br_startoff;
887 if (lblk & ~0xFFFFFFFFULL) {
888 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
889 goto out;
891 if (is_block) {
892 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
893 goto out;
897 * Check each dir free block's bestfree data.
899 * Iterate all the fsbcount-aligned block offsets in
900 * this directory. The directory block reading code is
901 * smart enough to do its own bmap lookups to handle
902 * discontiguous directory blocks. When we're done
903 * with the extent record, re-query the bmap at the
904 * next fsbcount-aligned offset to avoid redundant
905 * block checks.
907 for (lblk = roundup((xfs_dablk_t)got.br_startoff,
908 args.geo->fsbcount);
909 lblk < got.br_startoff + got.br_blockcount;
910 lblk += args.geo->fsbcount) {
911 error = xchk_directory_free_bestfree(sc, &args,
912 lblk);
913 if (error)
914 goto out;
916 dabno = got.br_startoff + got.br_blockcount;
917 lblk = roundup(dabno, args.geo->fsbcount);
918 found = xfs_iext_lookup_extent(sc->ip, ifp, lblk, &icur, &got);
920 out:
921 return error;
925 * Revalidate a dirent that we collected in the past but couldn't check because
926 * of lock contention. Returns 0 if the dirent is still valid, -ENOENT if it
927 * has gone away on us, or a negative errno.
929 STATIC int
930 xchk_dir_revalidate_dirent(
931 struct xchk_dir *sd,
932 const struct xfs_name *xname,
933 xfs_ino_t ino)
935 struct xfs_scrub *sc = sd->sc;
936 xfs_ino_t child_ino;
937 int error;
940 * Look up the directory entry. If we get -ENOENT, the directory entry
941 * went away and there's nothing to revalidate. Return any other
942 * error.
944 error = xchk_dir_lookup(sc, sc->ip, xname, &child_ino);
945 if (error)
946 return error;
948 /* The inode number changed, nothing to revalidate. */
949 if (ino != child_ino)
950 return -ENOENT;
952 return 0;
956 * Check a directory entry's parent pointers the slow way, which means we cycle
957 * locks a bunch and put up with revalidation until we get it done.
959 STATIC int
960 xchk_dir_slow_dirent(
961 struct xchk_dir *sd,
962 struct xchk_dirent *dirent,
963 const struct xfs_name *xname)
965 struct xfs_scrub *sc = sd->sc;
966 struct xfs_inode *ip;
967 unsigned int lockmode;
968 int error;
970 /* Check that the deferred dirent still exists. */
971 if (sd->need_revalidate) {
972 error = xchk_dir_revalidate_dirent(sd, xname, dirent->ino);
973 if (error == -ENOENT)
974 return 0;
975 if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0,
976 &error))
977 return error;
980 error = xchk_iget(sc, dirent->ino, &ip);
981 if (error == -EINVAL || error == -ENOENT) {
982 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
983 return 0;
985 if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
986 return error;
989 * If we can grab both IOLOCK and ILOCK of the alleged child, we can
990 * proceed with the validation.
992 lockmode = xchk_dir_lock_child(sc, ip);
993 if (lockmode) {
994 trace_xchk_dir_slowpath(sc->ip, xname, ip->i_ino);
995 goto check_pptr;
999 * We couldn't lock the child file. Drop all the locks and try to
1000 * get them again, one at a time.
1002 xchk_iunlock(sc, sc->ilock_flags);
1003 sd->need_revalidate = true;
1005 trace_xchk_dir_ultraslowpath(sc->ip, xname, ip->i_ino);
1007 error = xchk_dir_trylock_for_pptrs(sc, ip, &lockmode);
1008 if (error)
1009 goto out_rele;
1011 /* Revalidate, since we just cycled the locks. */
1012 error = xchk_dir_revalidate_dirent(sd, xname, dirent->ino);
1013 if (error == -ENOENT) {
1014 error = 0;
1015 goto out_unlock;
1017 if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
1018 goto out_unlock;
1020 check_pptr:
1021 error = xchk_dir_parent_pointer(sd, xname, ip);
1022 out_unlock:
1023 xfs_iunlock(ip, lockmode);
1024 out_rele:
1025 xchk_irele(sc, ip);
1026 return error;
1029 /* Check all the dirents that we deferred the first time around. */
1030 STATIC int
1031 xchk_dir_finish_slow_dirents(
1032 struct xchk_dir *sd)
1034 xfarray_idx_t array_cur;
1035 int error;
1037 foreach_xfarray_idx(sd->dir_entries, array_cur) {
1038 struct xchk_dirent dirent;
1040 if (sd->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
1041 return 0;
1043 error = xfarray_load(sd->dir_entries, array_cur, &dirent);
1044 if (error)
1045 return error;
1047 error = xfblob_loadname(sd->dir_names, dirent.name_cookie,
1048 &sd->xname, dirent.namelen);
1049 if (error)
1050 return error;
1052 error = xchk_dir_slow_dirent(sd, &dirent, &sd->xname);
1053 if (error)
1054 return error;
1057 return 0;
1060 /* Scrub a whole directory. */
1062 xchk_directory(
1063 struct xfs_scrub *sc)
1065 struct xchk_dir *sd;
1066 int error;
1068 if (!S_ISDIR(VFS_I(sc->ip)->i_mode))
1069 return -ENOENT;
1071 if (xchk_file_looks_zapped(sc, XFS_SICK_INO_DIR_ZAPPED)) {
1072 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
1073 return 0;
1076 /* Plausible size? */
1077 if (sc->ip->i_disk_size < xfs_dir2_sf_hdr_size(0)) {
1078 xchk_ino_set_corrupt(sc, sc->ip->i_ino);
1079 return 0;
1082 /* Check directory tree structure */
1083 error = xchk_da_btree(sc, XFS_DATA_FORK, xchk_dir_rec, NULL);
1084 if (error)
1085 return error;
1087 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
1088 return 0;
1090 /* Check the freespace. */
1091 error = xchk_directory_blocks(sc);
1092 if (error)
1093 return error;
1095 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
1096 return 0;
1098 sd = kvzalloc(sizeof(struct xchk_dir), XCHK_GFP_FLAGS);
1099 if (!sd)
1100 return -ENOMEM;
1101 sd->sc = sc;
1102 sd->xname.name = sd->namebuf;
1104 if (xfs_has_parent(sc->mp)) {
1105 char *descr;
1108 * Set up some staging memory for dirents that we can't check
1109 * due to locking contention.
1111 descr = xchk_xfile_ino_descr(sc, "slow directory entries");
1112 error = xfarray_create(descr, 0, sizeof(struct xchk_dirent),
1113 &sd->dir_entries);
1114 kfree(descr);
1115 if (error)
1116 goto out_sd;
1118 descr = xchk_xfile_ino_descr(sc, "slow directory entry names");
1119 error = xfblob_create(descr, &sd->dir_names);
1120 kfree(descr);
1121 if (error)
1122 goto out_entries;
1125 /* Look up every name in this directory by hash. */
1126 error = xchk_dir_walk(sc, sc->ip, xchk_dir_actor, sd);
1127 if (error == -ECANCELED)
1128 error = 0;
1129 if (error)
1130 goto out_names;
1132 if (xfs_has_parent(sc->mp)) {
1133 error = xchk_dir_finish_slow_dirents(sd);
1134 if (error == -ETIMEDOUT) {
1135 /* Couldn't grab a lock, scrub was marked incomplete */
1136 error = 0;
1137 goto out_names;
1139 if (error)
1140 goto out_names;
1143 out_names:
1144 if (sd->dir_names)
1145 xfblob_destroy(sd->dir_names);
1146 out_entries:
1147 if (sd->dir_entries)
1148 xfarray_destroy(sd->dir_entries);
1149 out_sd:
1150 kvfree(sd);
1151 if (error)
1152 return error;
1154 /* If the dir is clean, it is clearly not zapped. */
1155 xchk_mark_healthy_if_clean(sc, XFS_SICK_INO_DIR_ZAPPED);
1156 return 0;
1160 * Decide if this directory has been zapped to satisfy the inode and ifork
1161 * verifiers. Checking and repairing should be postponed until the directory
1162 * is fixed.
1164 bool
1165 xchk_dir_looks_zapped(
1166 struct xfs_inode *dp)
1168 /* Repair zapped this dir's data fork a short time ago */
1169 if (xfs_ifork_zapped(dp, XFS_DATA_FORK))
1170 return true;
1173 * If the dinode repair found a bad data fork, it will reset the fork
1174 * to extents format with zero records and wait for the bmapbtd
1175 * scrubber to reconstruct the block mappings. Directories always
1176 * contain some content, so this is a clear sign of a zapped directory.
1177 * The state checked by xfs_ifork_zapped is not persisted, so this is
1178 * the secondary strategy if repairs are interrupted by a crash or an
1179 * unmount.
1181 return dp->i_df.if_format == XFS_DINODE_FMT_EXTENTS &&
1182 dp->i_df.if_nextents == 0;