1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Copyright (C) 2017-2023 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_log_format.h"
13 #include "xfs_trans.h"
14 #include "xfs_inode.h"
16 #include "xfs_dir2_priv.h"
17 #include "xfs_attr_leaf.h"
18 #include "scrub/scrub.h"
19 #include "scrub/common.h"
20 #include "scrub/trace.h"
21 #include "scrub/dabtree.h"
23 /* Directory/Attribute Btree */
26 * Check for da btree operation errors. See the section about handling
27 * operational errors in common.c.
30 xchk_da_process_error(
31 struct xchk_da_btree
*ds
,
35 struct xfs_scrub
*sc
= ds
->sc
;
43 /* Used to restart an op with deadlock avoidance. */
44 trace_xchk_deadlock_retry(sc
->ip
, sc
->sm
, *error
);
48 /* Note the badness but don't abort. */
49 sc
->sm
->sm_flags
|= XFS_SCRUB_OFLAG_CORRUPT
;
53 trace_xchk_file_op_error(sc
, ds
->dargs
.whichfork
,
54 xfs_dir2_da_to_db(ds
->dargs
.geo
,
55 ds
->state
->path
.blk
[level
].blkno
),
56 *error
, __return_address
);
63 * Check for da btree corruption. See the section about handling
64 * operational errors in common.c.
68 struct xchk_da_btree
*ds
,
71 struct xfs_scrub
*sc
= ds
->sc
;
73 sc
->sm
->sm_flags
|= XFS_SCRUB_OFLAG_CORRUPT
;
75 trace_xchk_fblock_error(sc
, ds
->dargs
.whichfork
,
76 xfs_dir2_da_to_db(ds
->dargs
.geo
,
77 ds
->state
->path
.blk
[level
].blkno
),
81 /* Flag a da btree node in need of optimization. */
84 struct xchk_da_btree
*ds
,
87 struct xfs_scrub
*sc
= ds
->sc
;
89 sc
->sm
->sm_flags
|= XFS_SCRUB_OFLAG_PREEN
;
90 trace_xchk_fblock_preen(sc
, ds
->dargs
.whichfork
,
91 xfs_dir2_da_to_db(ds
->dargs
.geo
,
92 ds
->state
->path
.blk
[level
].blkno
),
96 /* Find an entry at a certain level in a da btree. */
97 static struct xfs_da_node_entry
*
98 xchk_da_btree_node_entry(
99 struct xchk_da_btree
*ds
,
102 struct xfs_da_state_blk
*blk
= &ds
->state
->path
.blk
[level
];
103 struct xfs_da3_icnode_hdr hdr
;
105 ASSERT(blk
->magic
== XFS_DA_NODE_MAGIC
);
107 xfs_da3_node_hdr_from_disk(ds
->sc
->mp
, &hdr
, blk
->bp
->b_addr
);
108 return hdr
.btree
+ blk
->index
;
111 /* Scrub a da btree hash (key). */
114 struct xchk_da_btree
*ds
,
118 struct xfs_da_node_entry
*entry
;
120 xfs_dahash_t parent_hash
;
122 /* Is this hash in order? */
123 hash
= be32_to_cpu(*hashp
);
124 if (hash
< ds
->hashes
[level
])
125 xchk_da_set_corrupt(ds
, level
);
126 ds
->hashes
[level
] = hash
;
131 /* Is this hash no larger than the parent hash? */
132 entry
= xchk_da_btree_node_entry(ds
, level
- 1);
133 parent_hash
= be32_to_cpu(entry
->hashval
);
134 if (parent_hash
< hash
)
135 xchk_da_set_corrupt(ds
, level
);
141 * Check a da btree pointer. Returns true if it's ok to use this
145 xchk_da_btree_ptr_ok(
146 struct xchk_da_btree
*ds
,
150 if (blkno
< ds
->lowest
|| (ds
->highest
!= 0 && blkno
>= ds
->highest
)) {
151 xchk_da_set_corrupt(ds
, level
);
159 * The da btree scrubber can handle leaf1 blocks as a degenerate
160 * form of leafn blocks. Since the regular da code doesn't handle
161 * leaf1, we must multiplex the verifiers.
164 xchk_da_btree_read_verify(
167 struct xfs_da_blkinfo
*info
= bp
->b_addr
;
169 switch (be16_to_cpu(info
->magic
)) {
170 case XFS_DIR2_LEAF1_MAGIC
:
171 case XFS_DIR3_LEAF1_MAGIC
:
172 bp
->b_ops
= &xfs_dir3_leaf1_buf_ops
;
173 bp
->b_ops
->verify_read(bp
);
177 * xfs_da3_node_buf_ops already know how to handle
178 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
180 bp
->b_ops
= &xfs_da3_node_buf_ops
;
181 bp
->b_ops
->verify_read(bp
);
186 xchk_da_btree_write_verify(
189 struct xfs_da_blkinfo
*info
= bp
->b_addr
;
191 switch (be16_to_cpu(info
->magic
)) {
192 case XFS_DIR2_LEAF1_MAGIC
:
193 case XFS_DIR3_LEAF1_MAGIC
:
194 bp
->b_ops
= &xfs_dir3_leaf1_buf_ops
;
195 bp
->b_ops
->verify_write(bp
);
199 * xfs_da3_node_buf_ops already know how to handle
200 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
202 bp
->b_ops
= &xfs_da3_node_buf_ops
;
203 bp
->b_ops
->verify_write(bp
);
208 xchk_da_btree_verify(
211 struct xfs_da_blkinfo
*info
= bp
->b_addr
;
213 switch (be16_to_cpu(info
->magic
)) {
214 case XFS_DIR2_LEAF1_MAGIC
:
215 case XFS_DIR3_LEAF1_MAGIC
:
216 bp
->b_ops
= &xfs_dir3_leaf1_buf_ops
;
217 return bp
->b_ops
->verify_struct(bp
);
219 bp
->b_ops
= &xfs_da3_node_buf_ops
;
220 return bp
->b_ops
->verify_struct(bp
);
224 static const struct xfs_buf_ops xchk_da_btree_buf_ops
= {
225 .name
= "xchk_da_btree",
226 .verify_read
= xchk_da_btree_read_verify
,
227 .verify_write
= xchk_da_btree_write_verify
,
228 .verify_struct
= xchk_da_btree_verify
,
231 /* Check a block's sibling. */
233 xchk_da_btree_block_check_sibling(
234 struct xchk_da_btree
*ds
,
239 struct xfs_da_state_path
*path
= &ds
->state
->path
;
240 struct xfs_da_state_path
*altpath
= &ds
->state
->altpath
;
245 memcpy(altpath
, path
, sizeof(ds
->state
->altpath
));
248 * If the pointer is null, we shouldn't be able to move the upper
249 * level pointer anywhere.
252 error
= xfs_da3_path_shift(ds
->state
, altpath
, direction
,
254 if (error
== 0 && retval
== 0)
255 xchk_da_set_corrupt(ds
, level
);
260 /* Move the alternate cursor one block in the direction given. */
261 error
= xfs_da3_path_shift(ds
->state
, altpath
, direction
, false,
263 if (!xchk_da_process_error(ds
, level
, &error
))
266 xchk_da_set_corrupt(ds
, level
);
269 if (altpath
->blk
[level
].bp
)
270 xchk_buffer_recheck(ds
->sc
, altpath
->blk
[level
].bp
);
272 /* Compare upper level pointer to sibling pointer. */
273 if (altpath
->blk
[level
].blkno
!= sibling
)
274 xchk_da_set_corrupt(ds
, level
);
277 /* Free all buffers in the altpath that aren't referenced from path. */
278 for (plevel
= 0; plevel
< altpath
->active
; plevel
++) {
279 if (altpath
->blk
[plevel
].bp
== NULL
||
280 (plevel
< path
->active
&&
281 altpath
->blk
[plevel
].bp
== path
->blk
[plevel
].bp
))
284 xfs_trans_brelse(ds
->dargs
.trans
, altpath
->blk
[plevel
].bp
);
285 altpath
->blk
[plevel
].bp
= NULL
;
291 /* Check a block's sibling pointers. */
293 xchk_da_btree_block_check_siblings(
294 struct xchk_da_btree
*ds
,
296 struct xfs_da_blkinfo
*hdr
)
302 forw
= be32_to_cpu(hdr
->forw
);
303 back
= be32_to_cpu(hdr
->back
);
305 /* Top level blocks should not have sibling pointers. */
307 if (forw
!= 0 || back
!= 0)
308 xchk_da_set_corrupt(ds
, level
);
313 * Check back (left) and forw (right) pointers. These functions
314 * absorb error codes for us.
316 error
= xchk_da_btree_block_check_sibling(ds
, level
, 0, back
);
319 error
= xchk_da_btree_block_check_sibling(ds
, level
, 1, forw
);
322 memset(&ds
->state
->altpath
, 0, sizeof(ds
->state
->altpath
));
326 /* Load a dir/attribute block from a btree. */
329 struct xchk_da_btree
*ds
,
333 struct xfs_da_state_blk
*blk
;
334 struct xfs_da_intnode
*node
;
335 struct xfs_da_node_entry
*btree
;
336 struct xfs_da3_blkinfo
*hdr3
;
337 struct xfs_da_args
*dargs
= &ds
->dargs
;
338 struct xfs_inode
*ip
= ds
->dargs
.dp
;
342 struct xfs_da3_icnode_hdr nodehdr
;
345 blk
= &ds
->state
->path
.blk
[level
];
346 ds
->state
->path
.active
= level
+ 1;
348 /* Release old block. */
350 xfs_trans_brelse(dargs
->trans
, blk
->bp
);
354 /* Check the pointer. */
356 if (!xchk_da_btree_ptr_ok(ds
, level
, blkno
))
359 /* Read the buffer. */
360 error
= xfs_da_read_buf(dargs
->trans
, dargs
->dp
, blk
->blkno
,
361 XFS_DABUF_MAP_HOLE_OK
, &blk
->bp
, dargs
->whichfork
,
362 &xchk_da_btree_buf_ops
);
363 if (!xchk_da_process_error(ds
, level
, &error
))
366 xchk_buffer_recheck(ds
->sc
, blk
->bp
);
369 * We didn't find a dir btree root block, which means that
370 * there's no LEAF1/LEAFN tree (at least not where it's supposed
371 * to be), so jump out now.
373 if (ds
->dargs
.whichfork
== XFS_DATA_FORK
&& level
== 0 &&
377 /* It's /not/ ok for attr trees not to have a da btree. */
378 if (blk
->bp
== NULL
) {
379 xchk_da_set_corrupt(ds
, level
);
383 hdr3
= blk
->bp
->b_addr
;
384 blk
->magic
= be16_to_cpu(hdr3
->hdr
.magic
);
385 pmaxrecs
= &ds
->maxrecs
[level
];
387 /* We only started zeroing the header on v5 filesystems. */
388 if (xfs_has_crc(ds
->sc
->mp
) && hdr3
->hdr
.pad
)
389 xchk_da_set_corrupt(ds
, level
);
391 /* Check the owner. */
392 if (xfs_has_crc(ip
->i_mount
)) {
393 owner
= be64_to_cpu(hdr3
->owner
);
394 if (owner
!= ip
->i_ino
)
395 xchk_da_set_corrupt(ds
, level
);
398 /* Check the siblings. */
399 error
= xchk_da_btree_block_check_siblings(ds
, level
, &hdr3
->hdr
);
403 /* Interpret the buffer. */
404 switch (blk
->magic
) {
405 case XFS_ATTR_LEAF_MAGIC
:
406 case XFS_ATTR3_LEAF_MAGIC
:
407 xfs_trans_buf_set_type(dargs
->trans
, blk
->bp
,
408 XFS_BLFT_ATTR_LEAF_BUF
);
409 blk
->magic
= XFS_ATTR_LEAF_MAGIC
;
410 blk
->hashval
= xfs_attr_leaf_lasthash(blk
->bp
, pmaxrecs
);
411 if (ds
->tree_level
!= 0)
412 xchk_da_set_corrupt(ds
, level
);
414 case XFS_DIR2_LEAFN_MAGIC
:
415 case XFS_DIR3_LEAFN_MAGIC
:
416 xfs_trans_buf_set_type(dargs
->trans
, blk
->bp
,
417 XFS_BLFT_DIR_LEAFN_BUF
);
418 blk
->magic
= XFS_DIR2_LEAFN_MAGIC
;
419 blk
->hashval
= xfs_dir2_leaf_lasthash(ip
, blk
->bp
, pmaxrecs
);
420 if (ds
->tree_level
!= 0)
421 xchk_da_set_corrupt(ds
, level
);
423 case XFS_DIR2_LEAF1_MAGIC
:
424 case XFS_DIR3_LEAF1_MAGIC
:
425 xfs_trans_buf_set_type(dargs
->trans
, blk
->bp
,
426 XFS_BLFT_DIR_LEAF1_BUF
);
427 blk
->magic
= XFS_DIR2_LEAF1_MAGIC
;
428 blk
->hashval
= xfs_dir2_leaf_lasthash(ip
, blk
->bp
, pmaxrecs
);
429 if (ds
->tree_level
!= 0)
430 xchk_da_set_corrupt(ds
, level
);
432 case XFS_DA_NODE_MAGIC
:
433 case XFS_DA3_NODE_MAGIC
:
434 xfs_trans_buf_set_type(dargs
->trans
, blk
->bp
,
435 XFS_BLFT_DA_NODE_BUF
);
436 blk
->magic
= XFS_DA_NODE_MAGIC
;
437 node
= blk
->bp
->b_addr
;
438 xfs_da3_node_hdr_from_disk(ip
->i_mount
, &nodehdr
, node
);
439 btree
= nodehdr
.btree
;
440 *pmaxrecs
= nodehdr
.count
;
441 blk
->hashval
= be32_to_cpu(btree
[*pmaxrecs
- 1].hashval
);
443 if (nodehdr
.level
>= XFS_DA_NODE_MAXDEPTH
) {
444 xchk_da_set_corrupt(ds
, level
);
447 ds
->tree_level
= nodehdr
.level
;
449 if (ds
->tree_level
!= nodehdr
.level
) {
450 xchk_da_set_corrupt(ds
, level
);
455 /* XXX: Check hdr3.pad32 once we know how to fix it. */
458 xchk_da_set_corrupt(ds
, level
);
462 fa
= xfs_da3_header_check(blk
->bp
, dargs
->owner
);
464 xchk_da_set_corrupt(ds
, level
);
469 * If we've been handed a block that is below the dabtree root, does
470 * its hashval match what the parent block expected to see?
473 struct xfs_da_node_entry
*key
;
475 key
= xchk_da_btree_node_entry(ds
, level
- 1);
476 if (be32_to_cpu(key
->hashval
) != blk
->hashval
) {
477 xchk_da_set_corrupt(ds
, level
);
485 xfs_trans_brelse(dargs
->trans
, blk
->bp
);
492 /* Visit all nodes and leaves of a da btree. */
495 struct xfs_scrub
*sc
,
497 xchk_da_btree_rec_fn scrub_fn
,
500 struct xchk_da_btree
*ds
;
501 struct xfs_mount
*mp
= sc
->mp
;
502 struct xfs_da_state_blk
*blks
;
503 struct xfs_da_node_entry
*key
;
508 /* Skip short format data structures; no btree to scan. */
509 if (!xfs_ifork_has_extents(xfs_ifork_ptr(sc
->ip
, whichfork
)))
512 /* Set up initial da state. */
513 ds
= kzalloc(sizeof(struct xchk_da_btree
), XCHK_GFP_FLAGS
);
516 ds
->dargs
.dp
= sc
->ip
;
517 ds
->dargs
.whichfork
= whichfork
;
518 ds
->dargs
.trans
= sc
->tp
;
519 ds
->dargs
.op_flags
= XFS_DA_OP_OKNOENT
;
520 ds
->dargs
.owner
= sc
->ip
->i_ino
;
521 ds
->state
= xfs_da_state_alloc(&ds
->dargs
);
523 ds
->private = private;
524 if (whichfork
== XFS_ATTR_FORK
) {
525 ds
->dargs
.geo
= mp
->m_attr_geo
;
529 ds
->dargs
.geo
= mp
->m_dir_geo
;
530 ds
->lowest
= ds
->dargs
.geo
->leafblk
;
531 ds
->highest
= ds
->dargs
.geo
->freeblk
;
536 /* Find the root of the da tree, if present. */
537 blks
= ds
->state
->path
.blk
;
538 error
= xchk_da_btree_block(ds
, level
, blkno
);
542 * We didn't find a block at ds->lowest, which means that there's
543 * no LEAF1/LEAFN tree (at least not where it's supposed to be),
546 if (blks
[level
].bp
== NULL
)
549 blks
[level
].index
= 0;
550 while (level
>= 0 && level
< XFS_DA_NODE_MAXDEPTH
) {
551 /* Handle leaf block. */
552 if (blks
[level
].magic
!= XFS_DA_NODE_MAGIC
) {
553 /* End of leaf, pop back towards the root. */
554 if (blks
[level
].index
>= ds
->maxrecs
[level
]) {
556 blks
[level
- 1].index
++;
562 /* Dispatch record scrubbing. */
563 error
= scrub_fn(ds
, level
);
566 if (xchk_should_terminate(sc
, &error
) ||
567 (sc
->sm
->sm_flags
& XFS_SCRUB_OFLAG_CORRUPT
))
575 /* End of node, pop back towards the root. */
576 if (blks
[level
].index
>= ds
->maxrecs
[level
]) {
578 blks
[level
- 1].index
++;
584 /* Hashes in order for scrub? */
585 key
= xchk_da_btree_node_entry(ds
, level
);
586 error
= xchk_da_btree_hash(ds
, level
, &key
->hashval
);
590 /* Drill another level deeper. */
591 blkno
= be32_to_cpu(key
->before
);
593 if (level
>= XFS_DA_NODE_MAXDEPTH
) {
595 xchk_da_set_corrupt(ds
, level
- 1);
599 error
= xchk_da_btree_block(ds
, level
, blkno
);
602 if (blks
[level
].bp
== NULL
)
605 blks
[level
].index
= 0;
609 /* Release all the buffers we're tracking. */
610 for (level
= 0; level
< XFS_DA_NODE_MAXDEPTH
; level
++) {
611 if (blks
[level
].bp
== NULL
)
613 xfs_trans_brelse(sc
->tp
, blks
[level
].bp
);
614 blks
[level
].bp
= NULL
;
618 xfs_da_state_free(ds
->state
);