2 * Copyright (C) 2017 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
22 #include "xfs_shared.h"
23 #include "xfs_format.h"
24 #include "xfs_trans_resv.h"
25 #include "xfs_mount.h"
26 #include "xfs_defer.h"
27 #include "xfs_btree.h"
29 #include "xfs_log_format.h"
30 #include "xfs_trans.h"
32 #include "xfs_inode.h"
33 #include "xfs_icache.h"
34 #include "xfs_itable.h"
35 #include "xfs_alloc.h"
36 #include "xfs_alloc_btree.h"
38 #include "xfs_bmap_btree.h"
39 #include "xfs_ialloc.h"
40 #include "xfs_ialloc_btree.h"
41 #include "xfs_refcount.h"
42 #include "xfs_refcount_btree.h"
44 #include "xfs_rmap_btree.h"
46 #include "xfs_trans_priv.h"
47 #include "scrub/xfs_scrub.h"
48 #include "scrub/scrub.h"
49 #include "scrub/common.h"
50 #include "scrub/trace.h"
51 #include "scrub/btree.h"
53 /* Common code for the metadata scrubbers. */
56 * Handling operational errors.
58 * The *_process_error() family of functions are used to process error return
59 * codes from functions called as part of a scrub operation.
61 * If there's no error, we return true to tell the caller that it's ok
62 * to move on to the next check in its list.
64 * For non-verifier errors (e.g. ENOMEM) we return false to tell the
65 * caller that something bad happened, and we preserve *error so that
66 * the caller can return the *error up the stack to userspace.
68 * Verifier errors (EFSBADCRC/EFSCORRUPTED) are recorded by setting
69 * OFLAG_CORRUPT in sm_flags and the *error is cleared. In other words,
70 * we track verifier errors (and failed scrub checks) via OFLAG_CORRUPT,
71 * not via return codes. We return false to tell the caller that
72 * something bad happened. Since the error has been cleared, the caller
73 * will (presumably) return that zero and scrubbing will move on to
76 * ftrace can be used to record the precise metadata location and the
77 * approximate code location of the failed operation.
80 /* Check for operational errors. */
82 __xfs_scrub_process_error(
83 struct xfs_scrub_context
*sc
,
94 /* Used to restart an op with deadlock avoidance. */
95 trace_xfs_scrub_deadlock_retry(sc
->ip
, sc
->sm
, *error
);
99 /* Note the badness but don't abort. */
100 sc
->sm
->sm_flags
|= errflag
;
104 trace_xfs_scrub_op_error(sc
, agno
, bno
, *error
,
112 xfs_scrub_process_error(
113 struct xfs_scrub_context
*sc
,
118 return __xfs_scrub_process_error(sc
, agno
, bno
, error
,
119 XFS_SCRUB_OFLAG_CORRUPT
, __return_address
);
123 xfs_scrub_xref_process_error(
124 struct xfs_scrub_context
*sc
,
129 return __xfs_scrub_process_error(sc
, agno
, bno
, error
,
130 XFS_SCRUB_OFLAG_XFAIL
, __return_address
);
133 /* Check for operational errors for a file offset. */
135 __xfs_scrub_fblock_process_error(
136 struct xfs_scrub_context
*sc
,
138 xfs_fileoff_t offset
,
147 /* Used to restart an op with deadlock avoidance. */
148 trace_xfs_scrub_deadlock_retry(sc
->ip
, sc
->sm
, *error
);
152 /* Note the badness but don't abort. */
153 sc
->sm
->sm_flags
|= errflag
;
157 trace_xfs_scrub_file_op_error(sc
, whichfork
, offset
, *error
,
165 xfs_scrub_fblock_process_error(
166 struct xfs_scrub_context
*sc
,
168 xfs_fileoff_t offset
,
171 return __xfs_scrub_fblock_process_error(sc
, whichfork
, offset
, error
,
172 XFS_SCRUB_OFLAG_CORRUPT
, __return_address
);
176 xfs_scrub_fblock_xref_process_error(
177 struct xfs_scrub_context
*sc
,
179 xfs_fileoff_t offset
,
182 return __xfs_scrub_fblock_process_error(sc
, whichfork
, offset
, error
,
183 XFS_SCRUB_OFLAG_XFAIL
, __return_address
);
187 * Handling scrub corruption/optimization/warning checks.
189 * The *_set_{corrupt,preen,warning}() family of functions are used to
190 * record the presence of metadata that is incorrect (corrupt), could be
191 * optimized somehow (preen), or should be flagged for administrative
192 * review but is not incorrect (warn).
194 * ftrace can be used to record the precise metadata location and
195 * approximate code location of the failed check.
198 /* Record a block which could be optimized. */
200 xfs_scrub_block_set_preen(
201 struct xfs_scrub_context
*sc
,
204 sc
->sm
->sm_flags
|= XFS_SCRUB_OFLAG_PREEN
;
205 trace_xfs_scrub_block_preen(sc
, bp
->b_bn
, __return_address
);
209 * Record an inode which could be optimized. The trace data will
210 * include the block given by bp if bp is given; otherwise it will use
211 * the block location of the inode record itself.
214 xfs_scrub_ino_set_preen(
215 struct xfs_scrub_context
*sc
,
219 sc
->sm
->sm_flags
|= XFS_SCRUB_OFLAG_PREEN
;
220 trace_xfs_scrub_ino_preen(sc
, ino
, bp
? bp
->b_bn
: 0,
224 /* Record a corrupt block. */
226 xfs_scrub_block_set_corrupt(
227 struct xfs_scrub_context
*sc
,
230 sc
->sm
->sm_flags
|= XFS_SCRUB_OFLAG_CORRUPT
;
231 trace_xfs_scrub_block_error(sc
, bp
->b_bn
, __return_address
);
234 /* Record a corruption while cross-referencing. */
236 xfs_scrub_block_xref_set_corrupt(
237 struct xfs_scrub_context
*sc
,
240 sc
->sm
->sm_flags
|= XFS_SCRUB_OFLAG_XCORRUPT
;
241 trace_xfs_scrub_block_error(sc
, bp
->b_bn
, __return_address
);
245 * Record a corrupt inode. The trace data will include the block given
246 * by bp if bp is given; otherwise it will use the block location of the
247 * inode record itself.
250 xfs_scrub_ino_set_corrupt(
251 struct xfs_scrub_context
*sc
,
255 sc
->sm
->sm_flags
|= XFS_SCRUB_OFLAG_CORRUPT
;
256 trace_xfs_scrub_ino_error(sc
, ino
, bp
? bp
->b_bn
: 0, __return_address
);
259 /* Record a corruption while cross-referencing with an inode. */
261 xfs_scrub_ino_xref_set_corrupt(
262 struct xfs_scrub_context
*sc
,
266 sc
->sm
->sm_flags
|= XFS_SCRUB_OFLAG_XCORRUPT
;
267 trace_xfs_scrub_ino_error(sc
, ino
, bp
? bp
->b_bn
: 0, __return_address
);
270 /* Record corruption in a block indexed by a file fork. */
272 xfs_scrub_fblock_set_corrupt(
273 struct xfs_scrub_context
*sc
,
275 xfs_fileoff_t offset
)
277 sc
->sm
->sm_flags
|= XFS_SCRUB_OFLAG_CORRUPT
;
278 trace_xfs_scrub_fblock_error(sc
, whichfork
, offset
, __return_address
);
281 /* Record a corruption while cross-referencing a fork block. */
283 xfs_scrub_fblock_xref_set_corrupt(
284 struct xfs_scrub_context
*sc
,
286 xfs_fileoff_t offset
)
288 sc
->sm
->sm_flags
|= XFS_SCRUB_OFLAG_XCORRUPT
;
289 trace_xfs_scrub_fblock_error(sc
, whichfork
, offset
, __return_address
);
293 * Warn about inodes that need administrative review but is not
297 xfs_scrub_ino_set_warning(
298 struct xfs_scrub_context
*sc
,
302 sc
->sm
->sm_flags
|= XFS_SCRUB_OFLAG_WARNING
;
303 trace_xfs_scrub_ino_warning(sc
, ino
, bp
? bp
->b_bn
: 0,
307 /* Warn about a block indexed by a file fork that needs review. */
309 xfs_scrub_fblock_set_warning(
310 struct xfs_scrub_context
*sc
,
312 xfs_fileoff_t offset
)
314 sc
->sm
->sm_flags
|= XFS_SCRUB_OFLAG_WARNING
;
315 trace_xfs_scrub_fblock_warning(sc
, whichfork
, offset
, __return_address
);
318 /* Signal an incomplete scrub. */
320 xfs_scrub_set_incomplete(
321 struct xfs_scrub_context
*sc
)
323 sc
->sm
->sm_flags
|= XFS_SCRUB_OFLAG_INCOMPLETE
;
324 trace_xfs_scrub_incomplete(sc
, __return_address
);
328 * rmap scrubbing -- compute the number of blocks with a given owner,
329 * at least according to the reverse mapping data.
332 struct xfs_scrub_rmap_ownedby_info
{
333 struct xfs_owner_info
*oinfo
;
334 xfs_filblks_t
*blocks
;
338 xfs_scrub_count_rmap_ownedby_irec(
339 struct xfs_btree_cur
*cur
,
340 struct xfs_rmap_irec
*rec
,
343 struct xfs_scrub_rmap_ownedby_info
*sroi
= priv
;
347 irec_attr
= rec
->rm_flags
& XFS_RMAP_ATTR_FORK
;
348 oinfo_attr
= sroi
->oinfo
->oi_flags
& XFS_OWNER_INFO_ATTR_FORK
;
350 if (rec
->rm_owner
!= sroi
->oinfo
->oi_owner
)
353 if (XFS_RMAP_NON_INODE_OWNER(rec
->rm_owner
) || irec_attr
== oinfo_attr
)
354 (*sroi
->blocks
) += rec
->rm_blockcount
;
360 * Calculate the number of blocks the rmap thinks are owned by something.
361 * The caller should pass us an rmapbt cursor.
364 xfs_scrub_count_rmap_ownedby_ag(
365 struct xfs_scrub_context
*sc
,
366 struct xfs_btree_cur
*cur
,
367 struct xfs_owner_info
*oinfo
,
368 xfs_filblks_t
*blocks
)
370 struct xfs_scrub_rmap_ownedby_info sroi
;
374 sroi
.blocks
= blocks
;
376 return xfs_rmap_query_all(cur
, xfs_scrub_count_rmap_ownedby_irec
,
383 * These helpers facilitate locking an allocation group's header
384 * buffers, setting up cursors for all btrees that are present, and
385 * cleaning everything up once we're through.
388 /* Decide if we want to return an AG header read failure. */
390 want_ag_read_header_failure(
391 struct xfs_scrub_context
*sc
,
394 /* Return all AG header read failures when scanning btrees. */
395 if (sc
->sm
->sm_type
!= XFS_SCRUB_TYPE_AGF
&&
396 sc
->sm
->sm_type
!= XFS_SCRUB_TYPE_AGFL
&&
397 sc
->sm
->sm_type
!= XFS_SCRUB_TYPE_AGI
)
400 * If we're scanning a given type of AG header, we only want to
401 * see read failures from that specific header. We'd like the
402 * other headers to cross-check them, but this isn't required.
404 if (sc
->sm
->sm_type
== type
)
410 * Grab all the headers for an AG.
412 * The headers should be released by xfs_scrub_ag_free, but as a fail
413 * safe we attach all the buffers we grab to the scrub transaction so
414 * they'll all be freed when we cancel it.
417 xfs_scrub_ag_read_headers(
418 struct xfs_scrub_context
*sc
,
420 struct xfs_buf
**agi
,
421 struct xfs_buf
**agf
,
422 struct xfs_buf
**agfl
)
424 struct xfs_mount
*mp
= sc
->mp
;
427 error
= xfs_ialloc_read_agi(mp
, sc
->tp
, agno
, agi
);
428 if (error
&& want_ag_read_header_failure(sc
, XFS_SCRUB_TYPE_AGI
))
431 error
= xfs_alloc_read_agf(mp
, sc
->tp
, agno
, 0, agf
);
432 if (error
&& want_ag_read_header_failure(sc
, XFS_SCRUB_TYPE_AGF
))
435 error
= xfs_alloc_read_agfl(mp
, sc
->tp
, agno
, agfl
);
436 if (error
&& want_ag_read_header_failure(sc
, XFS_SCRUB_TYPE_AGFL
))
443 /* Release all the AG btree cursors. */
445 xfs_scrub_ag_btcur_free(
446 struct xfs_scrub_ag
*sa
)
449 xfs_btree_del_cursor(sa
->refc_cur
, XFS_BTREE_ERROR
);
451 xfs_btree_del_cursor(sa
->rmap_cur
, XFS_BTREE_ERROR
);
453 xfs_btree_del_cursor(sa
->fino_cur
, XFS_BTREE_ERROR
);
455 xfs_btree_del_cursor(sa
->ino_cur
, XFS_BTREE_ERROR
);
457 xfs_btree_del_cursor(sa
->cnt_cur
, XFS_BTREE_ERROR
);
459 xfs_btree_del_cursor(sa
->bno_cur
, XFS_BTREE_ERROR
);
469 /* Initialize all the btree cursors for an AG. */
471 xfs_scrub_ag_btcur_init(
472 struct xfs_scrub_context
*sc
,
473 struct xfs_scrub_ag
*sa
)
475 struct xfs_mount
*mp
= sc
->mp
;
476 xfs_agnumber_t agno
= sa
->agno
;
479 /* Set up a bnobt cursor for cross-referencing. */
480 sa
->bno_cur
= xfs_allocbt_init_cursor(mp
, sc
->tp
, sa
->agf_bp
,
481 agno
, XFS_BTNUM_BNO
);
485 /* Set up a cntbt cursor for cross-referencing. */
486 sa
->cnt_cur
= xfs_allocbt_init_cursor(mp
, sc
->tp
, sa
->agf_bp
,
487 agno
, XFS_BTNUM_CNT
);
492 /* Set up a inobt cursor for cross-referencing. */
494 sa
->ino_cur
= xfs_inobt_init_cursor(mp
, sc
->tp
, sa
->agi_bp
,
495 agno
, XFS_BTNUM_INO
);
500 /* Set up a finobt cursor for cross-referencing. */
501 if (sa
->agi_bp
&& xfs_sb_version_hasfinobt(&mp
->m_sb
)) {
502 sa
->fino_cur
= xfs_inobt_init_cursor(mp
, sc
->tp
, sa
->agi_bp
,
503 agno
, XFS_BTNUM_FINO
);
508 /* Set up a rmapbt cursor for cross-referencing. */
509 if (sa
->agf_bp
&& xfs_sb_version_hasrmapbt(&mp
->m_sb
)) {
510 sa
->rmap_cur
= xfs_rmapbt_init_cursor(mp
, sc
->tp
, sa
->agf_bp
,
516 /* Set up a refcountbt cursor for cross-referencing. */
517 if (sa
->agf_bp
&& xfs_sb_version_hasreflink(&mp
->m_sb
)) {
518 sa
->refc_cur
= xfs_refcountbt_init_cursor(mp
, sc
->tp
,
519 sa
->agf_bp
, agno
, NULL
);
529 /* Release the AG header context and btree cursors. */
532 struct xfs_scrub_context
*sc
,
533 struct xfs_scrub_ag
*sa
)
535 xfs_scrub_ag_btcur_free(sa
);
537 xfs_trans_brelse(sc
->tp
, sa
->agfl_bp
);
541 xfs_trans_brelse(sc
->tp
, sa
->agf_bp
);
545 xfs_trans_brelse(sc
->tp
, sa
->agi_bp
);
548 sa
->agno
= NULLAGNUMBER
;
552 * For scrub, grab the AGI and the AGF headers, in that order. Locking
553 * order requires us to get the AGI before the AGF. We use the
554 * transaction to avoid deadlocking on crosslinked metadata buffers;
555 * either the caller passes one in (bmap scrub) or we have to create a
556 * transaction ourselves.
560 struct xfs_scrub_context
*sc
,
562 struct xfs_scrub_ag
*sa
)
567 error
= xfs_scrub_ag_read_headers(sc
, agno
, &sa
->agi_bp
,
568 &sa
->agf_bp
, &sa
->agfl_bp
);
572 return xfs_scrub_ag_btcur_init(sc
, sa
);
575 /* Per-scrubber setup functions */
577 /* Set us up with a transaction and an empty context. */
580 struct xfs_scrub_context
*sc
,
581 struct xfs_inode
*ip
)
583 return xfs_scrub_trans_alloc(sc
->sm
, sc
->mp
, &sc
->tp
);
586 /* Set us up with AG headers and btree cursors. */
588 xfs_scrub_setup_ag_btree(
589 struct xfs_scrub_context
*sc
,
590 struct xfs_inode
*ip
,
593 struct xfs_mount
*mp
= sc
->mp
;
597 * If the caller asks us to checkpont the log, do so. This
598 * expensive operation should be performed infrequently and only
599 * as a last resort. Any caller that sets force_log should
600 * document why they need to do so.
603 error
= xfs_scrub_checkpoint_log(mp
);
608 error
= xfs_scrub_setup_fs(sc
, ip
);
612 return xfs_scrub_ag_init(sc
, sc
->sm
->sm_agno
, &sc
->sa
);
615 /* Push everything out of the log onto disk. */
617 xfs_scrub_checkpoint_log(
618 struct xfs_mount
*mp
)
622 error
= _xfs_log_force(mp
, XFS_LOG_SYNC
, NULL
);
625 xfs_ail_push_all_sync(mp
->m_ail
);
630 * Given an inode and the scrub control structure, grab either the
631 * inode referenced in the control structure or the inode passed in.
632 * The inode is not locked.
636 struct xfs_scrub_context
*sc
,
637 struct xfs_inode
*ip_in
)
639 struct xfs_imap imap
;
640 struct xfs_mount
*mp
= sc
->mp
;
641 struct xfs_inode
*ip
= NULL
;
644 /* We want to scan the inode we already had opened. */
645 if (sc
->sm
->sm_ino
== 0 || sc
->sm
->sm_ino
== ip_in
->i_ino
) {
650 /* Look up the inode, see if the generation number matches. */
651 if (xfs_internal_inum(mp
, sc
->sm
->sm_ino
))
653 error
= xfs_iget(mp
, NULL
, sc
->sm
->sm_ino
,
654 XFS_IGET_UNTRUSTED
| XFS_IGET_DONTCACHE
, 0, &ip
);
657 /* Inode doesn't exist, just bail out. */
660 /* Got an inode, continue. */
664 * -EINVAL with IGET_UNTRUSTED could mean one of several
665 * things: userspace gave us an inode number that doesn't
666 * correspond to fs space, or doesn't have an inobt entry;
667 * or it could simply mean that the inode buffer failed the
670 * Try just the inode mapping lookup -- if it succeeds, then
671 * the inode buffer verifier failed and something needs fixing.
672 * Otherwise, we really couldn't find it so tell userspace
673 * that it no longer exists.
675 error
= xfs_imap(sc
->mp
, sc
->tp
, sc
->sm
->sm_ino
, &imap
,
676 XFS_IGET_UNTRUSTED
| XFS_IGET_DONTCACHE
);
679 error
= -EFSCORRUPTED
;
682 trace_xfs_scrub_op_error(sc
,
683 XFS_INO_TO_AGNO(mp
, sc
->sm
->sm_ino
),
684 XFS_INO_TO_AGBNO(mp
, sc
->sm
->sm_ino
),
685 error
, __return_address
);
688 if (VFS_I(ip
)->i_generation
!= sc
->sm
->sm_gen
) {
697 /* Set us up to scrub a file's contents. */
699 xfs_scrub_setup_inode_contents(
700 struct xfs_scrub_context
*sc
,
701 struct xfs_inode
*ip
,
702 unsigned int resblks
)
704 struct xfs_mount
*mp
= sc
->mp
;
707 error
= xfs_scrub_get_inode(sc
, ip
);
711 /* Got the inode, lock it and we're ready to go. */
712 sc
->ilock_flags
= XFS_IOLOCK_EXCL
| XFS_MMAPLOCK_EXCL
;
713 xfs_ilock(sc
->ip
, sc
->ilock_flags
);
714 error
= xfs_scrub_trans_alloc(sc
->sm
, mp
, &sc
->tp
);
717 sc
->ilock_flags
|= XFS_ILOCK_EXCL
;
718 xfs_ilock(sc
->ip
, XFS_ILOCK_EXCL
);
721 /* scrub teardown will unlock and release the inode for us */
726 * Predicate that decides if we need to evaluate the cross-reference check.
727 * If there was an error accessing the cross-reference btree, just delete
728 * the cursor and skip the check.
731 xfs_scrub_should_check_xref(
732 struct xfs_scrub_context
*sc
,
734 struct xfs_btree_cur
**curpp
)
740 /* If we've already given up on xref, just bail out. */
744 /* xref error, delete cursor and bail out. */
745 xfs_btree_del_cursor(*curpp
, XFS_BTREE_ERROR
);
749 sc
->sm
->sm_flags
|= XFS_SCRUB_OFLAG_XFAIL
;
750 trace_xfs_scrub_xref_error(sc
, *error
, __return_address
);
753 * Errors encountered during cross-referencing with another
754 * data structure should not cause this scrubber to abort.
760 /* Run the structure verifiers on in-memory buffers to detect bad memory. */
762 xfs_scrub_buffer_recheck(
763 struct xfs_scrub_context
*sc
,
768 if (bp
->b_ops
== NULL
) {
769 xfs_scrub_block_set_corrupt(sc
, bp
);
772 if (bp
->b_ops
->verify_struct
== NULL
) {
773 xfs_scrub_set_incomplete(sc
);
776 fa
= bp
->b_ops
->verify_struct(bp
);
779 sc
->sm
->sm_flags
|= XFS_SCRUB_OFLAG_CORRUPT
;
780 trace_xfs_scrub_block_error(sc
, bp
->b_bn
, fa
);