1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Copyright (c) 2018-2024 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_defer.h"
13 #include "xfs_btree.h"
14 #include "xfs_btree_staging.h"
15 #include "xfs_buf_mem.h"
16 #include "xfs_btree_mem.h"
18 #include "xfs_log_format.h"
19 #include "xfs_trans.h"
21 #include "xfs_alloc.h"
22 #include "xfs_alloc_btree.h"
23 #include "xfs_ialloc.h"
24 #include "xfs_ialloc_btree.h"
26 #include "xfs_rmap_btree.h"
27 #include "xfs_inode.h"
28 #include "xfs_icache.h"
30 #include "xfs_bmap_btree.h"
31 #include "xfs_refcount.h"
32 #include "xfs_refcount_btree.h"
34 #include "scrub/xfs_scrub.h"
35 #include "scrub/scrub.h"
36 #include "scrub/common.h"
37 #include "scrub/btree.h"
38 #include "scrub/trace.h"
39 #include "scrub/repair.h"
40 #include "scrub/bitmap.h"
41 #include "scrub/agb_bitmap.h"
42 #include "scrub/xfile.h"
43 #include "scrub/xfarray.h"
44 #include "scrub/iscan.h"
45 #include "scrub/newbt.h"
46 #include "scrub/reap.h"
49 * Reverse Mapping Btree Repair
50 * ============================
52 * This is the most involved of all the AG space btree rebuilds. Everywhere
53 * else in XFS we lock inodes and then AG data structures, but generating the
54 * list of rmap records requires that we be able to scan both block mapping
55 * btrees of every inode in the filesystem to see if it owns any extents in
56 * this AG. We can't tolerate any inode updates while we do this, so we
57 * freeze the filesystem to lock everyone else out, and grant ourselves
58 * special privileges to run transactions with regular background reclamation
61 * We also have to be very careful not to allow inode reclaim to start a
62 * transaction because all transactions (other than our own) will block.
63 * Deferred inode inactivation helps us out there.
65 * I) Reverse mappings for all non-space metadata and file data are collected
66 * according to the following algorithm:
68 * 1. For each fork of each inode:
69 * 1.1. Create a bitmap BMBIT to track bmbt blocks if necessary.
70 * 1.2. If the incore extent map isn't loaded, walk the bmbt to accumulate
71 * bmaps into rmap records (see 1.1.4). Set bits in BMBIT for each btree
73 * 1.3. If the incore extent map is loaded but the fork is in btree format,
74 * just visit the bmbt blocks to set the corresponding BMBIT areas.
75 * 1.4. From the incore extent map, accumulate each bmap that falls into our
76 * target AG. Remember, multiple bmap records can map to a single rmap
77 * record, so we cannot simply emit rmap records 1:1.
78 * 1.5. Emit rmap records for each extent in BMBIT and free it.
79 * 2. Create bitmaps INOBIT and ICHUNKBIT.
80 * 3. For each record in the inobt, set the corresponding areas in ICHUNKBIT,
81 * and set bits in INOBIT for each btree block. If the inobt has no records
82 * at all, we must be careful to record its root in INOBIT.
83 * 4. For each block in the finobt, set the corresponding INOBIT area.
84 * 5. Emit rmap records for each extent in INOBIT and ICHUNKBIT and free them.
85 * 6. Create bitmaps REFCBIT and COWBIT.
86 * 7. For each CoW staging extent in the refcountbt, set the corresponding
88 * 8. For each block in the refcountbt, set the corresponding REFCBIT area.
89 * 9. Emit rmap records for each extent in REFCBIT and COWBIT and free them.
90 * A. Emit rmap for the AG headers.
91 * B. Emit rmap for the log, if there is one.
93 * II) The rmapbt shape and space metadata rmaps are computed as follows:
95 * 1. Count the rmaps collected in the previous step. (= NR)
96 * 2. Estimate the number of rmapbt blocks needed to store NR records. (= RMB)
97 * 3. Reserve RMB blocks through the newbt using the allocator in normap mode.
98 * 4. Create bitmap AGBIT.
99 * 5. For each reservation in the newbt, set the corresponding areas in AGBIT.
100 * 6. For each block in the AGFL, bnobt, and cntbt, set the bits in AGBIT.
101 * 7. Count the extents in AGBIT. (= AGNR)
102 * 8. Estimate the number of rmapbt blocks needed for NR + AGNR rmaps. (= RMB')
103 * 9. If RMB' >= RMB, reserve RMB' - RMB more newbt blocks, set RMB = RMB',
104 * and clear AGBIT. Go to step 5.
105 * A. Emit rmaps for each extent in AGBIT.
107 * III) The rmapbt is constructed and set in place as follows:
109 * 1. Sort the rmap records.
110 * 2. Bulk load the rmaps.
112 * IV) Reap the old btree blocks.
114 * 1. Create a bitmap OLDRMBIT.
115 * 2. For each gap in the new rmapbt, set the corresponding areas of OLDRMBIT.
116 * 3. For each extent in the bnobt, clear the corresponding parts of OLDRMBIT.
117 * 4. Reap the extents corresponding to the set areas in OLDRMBIT. These are
118 * the parts of the AG that the rmap didn't find during its scan of the
119 * primary metadata and aren't known to be in the free space, which implies
120 * that they were the old rmapbt blocks.
123 * We use the 'xrep_rmap' prefix for all the rmap functions.
126 /* Context for collecting rmaps */
128 /* new rmapbt information */
129 struct xrep_newbt new_btree
;
131 /* lock for the xfbtree and xfile */
134 /* rmap records generated from primary metadata */
135 struct xfbtree rmap_btree
;
137 struct xfs_scrub
*sc
;
139 /* in-memory btree cursor for the xfs_btree_bload iteration */
140 struct xfs_btree_cur
*mcur
;
142 /* Hooks into rmap update code. */
143 struct xfs_rmap_hook rhook
;
145 /* inode scan cursor */
146 struct xchk_iscan iscan
;
148 /* Number of non-freespace records found. */
149 unsigned long long nr_records
;
151 /* bnobt/cntbt contribution to btreeblks */
152 xfs_agblock_t freesp_btblocks
;
154 /* old agf_rmap_blocks counter */
155 unsigned int old_rmapbt_fsbcount
;
158 /* Set us up to repair reverse mapping btrees. */
160 xrep_setup_ag_rmapbt(
161 struct xfs_scrub
*sc
)
163 struct xrep_rmap
*rr
;
167 xchk_fsgates_enable(sc
, XCHK_FSGATES_RMAP
);
169 descr
= xchk_xfile_ag_descr(sc
, "reverse mapping records");
170 error
= xrep_setup_xfbtree(sc
, descr
);
175 rr
= kzalloc(sizeof(struct xrep_rmap
), XCHK_GFP_FLAGS
);
184 /* Make sure there's nothing funny about this mapping. */
186 xrep_rmap_check_mapping(
187 struct xfs_scrub
*sc
,
188 const struct xfs_rmap_irec
*rec
)
190 enum xbtree_recpacking outcome
;
193 if (xfs_rmap_check_irec(sc
->sa
.pag
, rec
) != NULL
)
194 return -EFSCORRUPTED
;
196 /* Make sure this isn't free space. */
197 error
= xfs_alloc_has_records(sc
->sa
.bno_cur
, rec
->rm_startblock
,
198 rec
->rm_blockcount
, &outcome
);
201 if (outcome
!= XBTREE_RECPACKING_EMPTY
)
202 return -EFSCORRUPTED
;
207 /* Store a reverse-mapping record. */
210 struct xrep_rmap
*rr
,
211 xfs_agblock_t startblock
,
212 xfs_extlen_t blockcount
,
217 struct xfs_rmap_irec rmap
= {
218 .rm_startblock
= startblock
,
219 .rm_blockcount
= blockcount
,
224 struct xfs_scrub
*sc
= rr
->sc
;
225 struct xfs_btree_cur
*mcur
;
228 if (xchk_should_terminate(sc
, &error
))
231 if (xchk_iscan_aborted(&rr
->iscan
))
232 return -EFSCORRUPTED
;
234 trace_xrep_rmap_found(sc
->sa
.pag
, &rmap
);
236 mutex_lock(&rr
->lock
);
237 mcur
= xfs_rmapbt_mem_cursor(sc
->sa
.pag
, sc
->tp
, &rr
->rmap_btree
);
238 error
= xfs_rmap_map_raw(mcur
, &rmap
);
239 xfs_btree_del_cursor(mcur
, error
);
243 error
= xfbtree_trans_commit(&rr
->rmap_btree
, sc
->tp
);
247 mutex_unlock(&rr
->lock
);
251 xfbtree_trans_cancel(&rr
->rmap_btree
, sc
->tp
);
253 xchk_iscan_abort(&rr
->iscan
);
254 mutex_unlock(&rr
->lock
);
258 struct xrep_rmap_stash_run
{
259 struct xrep_rmap
*rr
;
261 unsigned int rmap_flags
;
270 struct xrep_rmap_stash_run
*rsr
= priv
;
271 struct xrep_rmap
*rr
= rsr
->rr
;
273 return xrep_rmap_stash(rr
, start
, len
, rsr
->owner
, 0, rsr
->rmap_flags
);
277 * Emit rmaps for every extent of bits set in the bitmap. Caller must ensure
278 * that the ranges are in units of FS blocks.
281 xrep_rmap_stash_bitmap(
282 struct xrep_rmap
*rr
,
283 struct xagb_bitmap
*bitmap
,
284 const struct xfs_owner_info
*oinfo
)
286 struct xrep_rmap_stash_run rsr
= {
288 .owner
= oinfo
->oi_owner
,
292 if (oinfo
->oi_flags
& XFS_OWNER_INFO_ATTR_FORK
)
293 rsr
.rmap_flags
|= XFS_RMAP_ATTR_FORK
;
294 if (oinfo
->oi_flags
& XFS_OWNER_INFO_BMBT_BLOCK
)
295 rsr
.rmap_flags
|= XFS_RMAP_BMBT_BLOCK
;
297 return xagb_bitmap_walk(bitmap
, xrep_rmap_stash_run
, &rsr
);
300 /* Section (I): Finding all file and bmbt extents. */
302 /* Context for accumulating rmaps for an inode fork. */
303 struct xrep_rmap_ifork
{
305 * Accumulate rmap data here to turn multiple adjacent bmaps into a
308 struct xfs_rmap_irec accum
;
310 /* Bitmap of bmbt blocks in this AG. */
311 struct xagb_bitmap bmbt_blocks
;
313 struct xrep_rmap
*rr
;
315 /* Which inode fork? */
319 /* Stash an rmap that we accumulated while walking an inode fork. */
321 xrep_rmap_stash_accumulated(
322 struct xrep_rmap_ifork
*rf
)
324 if (rf
->accum
.rm_blockcount
== 0)
327 return xrep_rmap_stash(rf
->rr
, rf
->accum
.rm_startblock
,
328 rf
->accum
.rm_blockcount
, rf
->accum
.rm_owner
,
329 rf
->accum
.rm_offset
, rf
->accum
.rm_flags
);
332 /* Accumulate a bmbt record. */
334 xrep_rmap_visit_bmbt(
335 struct xfs_btree_cur
*cur
,
336 struct xfs_bmbt_irec
*rec
,
339 struct xrep_rmap_ifork
*rf
= priv
;
340 struct xfs_mount
*mp
= rf
->rr
->sc
->mp
;
341 struct xfs_rmap_irec
*accum
= &rf
->accum
;
343 unsigned int rmap_flags
= 0;
346 if (XFS_FSB_TO_AGNO(mp
, rec
->br_startblock
) !=
347 pag_agno(rf
->rr
->sc
->sa
.pag
))
350 agbno
= XFS_FSB_TO_AGBNO(mp
, rec
->br_startblock
);
351 if (rf
->whichfork
== XFS_ATTR_FORK
)
352 rmap_flags
|= XFS_RMAP_ATTR_FORK
;
353 if (rec
->br_state
== XFS_EXT_UNWRITTEN
)
354 rmap_flags
|= XFS_RMAP_UNWRITTEN
;
356 /* If this bmap is adjacent to the previous one, just add it. */
357 if (accum
->rm_blockcount
> 0 &&
358 rec
->br_startoff
== accum
->rm_offset
+ accum
->rm_blockcount
&&
359 agbno
== accum
->rm_startblock
+ accum
->rm_blockcount
&&
360 rmap_flags
== accum
->rm_flags
) {
361 accum
->rm_blockcount
+= rec
->br_blockcount
;
365 /* Otherwise stash the old rmap and start accumulating a new one. */
366 error
= xrep_rmap_stash_accumulated(rf
);
370 accum
->rm_startblock
= agbno
;
371 accum
->rm_blockcount
= rec
->br_blockcount
;
372 accum
->rm_offset
= rec
->br_startoff
;
373 accum
->rm_flags
= rmap_flags
;
377 /* Add a btree block to the bitmap. */
379 xrep_rmap_visit_iroot_btree_block(
380 struct xfs_btree_cur
*cur
,
384 struct xrep_rmap_ifork
*rf
= priv
;
389 xfs_btree_get_block(cur
, level
, &bp
);
393 fsbno
= XFS_DADDR_TO_FSB(cur
->bc_mp
, xfs_buf_daddr(bp
));
394 if (XFS_FSB_TO_AGNO(cur
->bc_mp
, fsbno
) != pag_agno(rf
->rr
->sc
->sa
.pag
))
397 agbno
= XFS_FSB_TO_AGBNO(cur
->bc_mp
, fsbno
);
398 return xagb_bitmap_set(&rf
->bmbt_blocks
, agbno
, 1);
402 * Iterate a metadata btree rooted in an inode to collect rmap records for
403 * anything in this fork that matches the AG.
406 xrep_rmap_scan_iroot_btree(
407 struct xrep_rmap_ifork
*rf
,
408 struct xfs_btree_cur
*cur
)
410 struct xfs_owner_info oinfo
;
411 struct xrep_rmap
*rr
= rf
->rr
;
414 xagb_bitmap_init(&rf
->bmbt_blocks
);
416 /* Record all the blocks in the btree itself. */
417 error
= xfs_btree_visit_blocks(cur
, xrep_rmap_visit_iroot_btree_block
,
418 XFS_BTREE_VISIT_ALL
, rf
);
422 /* Emit rmaps for the btree blocks. */
423 xfs_rmap_ino_bmbt_owner(&oinfo
, rf
->accum
.rm_owner
, rf
->whichfork
);
424 error
= xrep_rmap_stash_bitmap(rr
, &rf
->bmbt_blocks
, &oinfo
);
428 /* Stash any remaining accumulated rmaps. */
429 error
= xrep_rmap_stash_accumulated(rf
);
431 xagb_bitmap_destroy(&rf
->bmbt_blocks
);
436 * Iterate the block mapping btree to collect rmap records for anything in this
437 * fork that matches the AG. Sets @mappings_done to true if we've scanned the
438 * block mappings in this fork.
442 struct xrep_rmap_ifork
*rf
,
443 struct xfs_inode
*ip
,
446 struct xrep_rmap
*rr
= rf
->rr
;
447 struct xfs_btree_cur
*cur
;
448 struct xfs_ifork
*ifp
;
451 *mappings_done
= false;
452 ifp
= xfs_ifork_ptr(ip
, rf
->whichfork
);
453 cur
= xfs_bmbt_init_cursor(rr
->sc
->mp
, rr
->sc
->tp
, ip
, rf
->whichfork
);
455 if (!xfs_ifork_is_realtime(ip
, rf
->whichfork
) &&
456 xfs_need_iread_extents(ifp
)) {
458 * If the incore extent cache isn't loaded, scan the bmbt for
459 * mapping records. This avoids loading the incore extent
460 * tree, which will increase memory pressure at a time when
461 * we're trying to run as quickly as we possibly can. Ignore
464 error
= xfs_bmap_query_all(cur
, xrep_rmap_visit_bmbt
, rf
);
468 *mappings_done
= true;
471 /* Scan for the bmbt blocks, which always live on the data device. */
472 error
= xrep_rmap_scan_iroot_btree(rf
, cur
);
474 xfs_btree_del_cursor(cur
, error
);
479 * Iterate the in-core extent cache to collect rmap records for anything in
480 * this fork that matches the AG.
484 struct xrep_rmap_ifork
*rf
,
485 struct xfs_ifork
*ifp
)
487 struct xfs_bmbt_irec rec
;
488 struct xfs_iext_cursor icur
;
491 for_each_xfs_iext(ifp
, &icur
, &rec
) {
492 if (isnullstartblock(rec
.br_startblock
))
494 error
= xrep_rmap_visit_bmbt(NULL
, &rec
, rf
);
499 return xrep_rmap_stash_accumulated(rf
);
502 /* Find all the extents from a given AG in an inode fork. */
504 xrep_rmap_scan_ifork(
505 struct xrep_rmap
*rr
,
506 struct xfs_inode
*ip
,
509 struct xrep_rmap_ifork rf
= {
510 .accum
= { .rm_owner
= ip
->i_ino
, },
512 .whichfork
= whichfork
,
514 struct xfs_ifork
*ifp
= xfs_ifork_ptr(ip
, whichfork
);
520 if (ifp
->if_format
== XFS_DINODE_FMT_BTREE
) {
524 * Scan the bmap btree for data device mappings. This includes
525 * the btree blocks themselves, even if this is a realtime
528 error
= xrep_rmap_scan_bmbt(&rf
, ip
, &mappings_done
);
529 if (error
|| mappings_done
)
531 } else if (ifp
->if_format
!= XFS_DINODE_FMT_EXTENTS
) {
535 /* Scan incore extent cache if this isn't a realtime file. */
536 if (xfs_ifork_is_realtime(ip
, whichfork
))
539 return xrep_rmap_scan_iext(&rf
, ifp
);
543 * Take ILOCK on a file that we want to scan.
545 * Select ILOCK_EXCL if the file has an unloaded data bmbt or has an unloaded
546 * attr bmbt. Otherwise, take ILOCK_SHARED.
548 static inline unsigned int
549 xrep_rmap_scan_ilock(
550 struct xfs_inode
*ip
)
552 uint lock_mode
= XFS_ILOCK_SHARED
;
554 if (xfs_need_iread_extents(&ip
->i_df
)) {
555 lock_mode
= XFS_ILOCK_EXCL
;
559 if (xfs_inode_has_attr_fork(ip
) && xfs_need_iread_extents(&ip
->i_af
))
560 lock_mode
= XFS_ILOCK_EXCL
;
563 xfs_ilock(ip
, lock_mode
);
567 /* Record reverse mappings for a file. */
569 xrep_rmap_scan_inode(
570 struct xrep_rmap
*rr
,
571 struct xfs_inode
*ip
)
573 unsigned int lock_mode
= xrep_rmap_scan_ilock(ip
);
576 /* Check the data fork. */
577 error
= xrep_rmap_scan_ifork(rr
, ip
, XFS_DATA_FORK
);
581 /* Check the attr fork. */
582 error
= xrep_rmap_scan_ifork(rr
, ip
, XFS_ATTR_FORK
);
586 /* COW fork extents are "owned" by the refcount btree. */
588 xchk_iscan_mark_visited(&rr
->iscan
, ip
);
590 xfs_iunlock(ip
, lock_mode
);
594 /* Section (I): Find all AG metadata extents except for free space metadata. */
596 struct xrep_rmap_inodes
{
597 struct xrep_rmap
*rr
;
598 struct xagb_bitmap inobt_blocks
; /* INOBIT */
599 struct xagb_bitmap ichunk_blocks
; /* ICHUNKBIT */
602 /* Record inode btree rmaps. */
604 xrep_rmap_walk_inobt(
605 struct xfs_btree_cur
*cur
,
606 const union xfs_btree_rec
*rec
,
609 struct xfs_inobt_rec_incore irec
;
610 struct xrep_rmap_inodes
*ri
= priv
;
611 struct xfs_mount
*mp
= cur
->bc_mp
;
615 xfs_agino_t iperhole
;
619 /* Record the inobt blocks. */
620 error
= xagb_bitmap_set_btcur_path(&ri
->inobt_blocks
, cur
);
624 xfs_inobt_btrec_to_irec(mp
, rec
, &irec
);
625 if (xfs_inobt_check_irec(to_perag(cur
->bc_group
), &irec
) != NULL
)
626 return -EFSCORRUPTED
;
628 agino
= irec
.ir_startino
;
630 /* Record a non-sparse inode chunk. */
631 if (!xfs_inobt_issparse(irec
.ir_holemask
)) {
632 agbno
= XFS_AGINO_TO_AGBNO(mp
, agino
);
633 aglen
= max_t(xfs_extlen_t
, 1,
634 XFS_INODES_PER_CHUNK
/ mp
->m_sb
.sb_inopblock
);
636 return xagb_bitmap_set(&ri
->ichunk_blocks
, agbno
, aglen
);
639 /* Iterate each chunk. */
640 iperhole
= max_t(xfs_agino_t
, mp
->m_sb
.sb_inopblock
,
641 XFS_INODES_PER_HOLEMASK_BIT
);
642 aglen
= iperhole
/ mp
->m_sb
.sb_inopblock
;
643 for (i
= 0, agino
= irec
.ir_startino
;
644 i
< XFS_INOBT_HOLEMASK_BITS
;
645 i
+= iperhole
/ XFS_INODES_PER_HOLEMASK_BIT
, agino
+= iperhole
) {
647 if (irec
.ir_holemask
& (1 << i
))
650 /* Record the inode chunk otherwise. */
651 agbno
= XFS_AGINO_TO_AGBNO(mp
, agino
);
652 error
= xagb_bitmap_set(&ri
->ichunk_blocks
, agbno
, aglen
);
660 /* Collect rmaps for the blocks containing inode btrees and the inode chunks. */
662 xrep_rmap_find_inode_rmaps(
663 struct xrep_rmap
*rr
)
665 struct xrep_rmap_inodes ri
= {
668 struct xfs_scrub
*sc
= rr
->sc
;
671 xagb_bitmap_init(&ri
.inobt_blocks
);
672 xagb_bitmap_init(&ri
.ichunk_blocks
);
675 * Iterate every record in the inobt so we can capture all the inode
676 * chunks and the blocks in the inobt itself.
678 error
= xfs_btree_query_all(sc
->sa
.ino_cur
, xrep_rmap_walk_inobt
, &ri
);
683 * Note that if there are zero records in the inobt then query_all does
684 * nothing and we have to account the empty inobt root manually.
686 if (xagb_bitmap_empty(&ri
.ichunk_blocks
)) {
687 struct xfs_agi
*agi
= sc
->sa
.agi_bp
->b_addr
;
689 error
= xagb_bitmap_set(&ri
.inobt_blocks
,
690 be32_to_cpu(agi
->agi_root
), 1);
695 /* Scan the finobt too. */
696 if (xfs_has_finobt(sc
->mp
)) {
697 error
= xagb_bitmap_set_btblocks(&ri
.inobt_blocks
,
703 /* Generate rmaps for everything. */
704 error
= xrep_rmap_stash_bitmap(rr
, &ri
.inobt_blocks
,
705 &XFS_RMAP_OINFO_INOBT
);
708 error
= xrep_rmap_stash_bitmap(rr
, &ri
.ichunk_blocks
,
709 &XFS_RMAP_OINFO_INODES
);
712 xagb_bitmap_destroy(&ri
.inobt_blocks
);
713 xagb_bitmap_destroy(&ri
.ichunk_blocks
);
717 /* Record a CoW staging extent. */
719 xrep_rmap_walk_cowblocks(
720 struct xfs_btree_cur
*cur
,
721 const struct xfs_refcount_irec
*irec
,
724 struct xagb_bitmap
*bitmap
= priv
;
726 if (!xfs_refcount_check_domain(irec
) ||
727 irec
->rc_domain
!= XFS_REFC_DOMAIN_COW
)
728 return -EFSCORRUPTED
;
730 return xagb_bitmap_set(bitmap
, irec
->rc_startblock
, irec
->rc_blockcount
);
734 * Collect rmaps for the blocks containing the refcount btree, and all CoW
738 xrep_rmap_find_refcount_rmaps(
739 struct xrep_rmap
*rr
)
741 struct xagb_bitmap refcountbt_blocks
; /* REFCBIT */
742 struct xagb_bitmap cow_blocks
; /* COWBIT */
743 struct xfs_refcount_irec low
= {
745 .rc_domain
= XFS_REFC_DOMAIN_COW
,
747 struct xfs_refcount_irec high
= {
748 .rc_startblock
= -1U,
749 .rc_domain
= XFS_REFC_DOMAIN_COW
,
751 struct xfs_scrub
*sc
= rr
->sc
;
754 if (!xfs_has_reflink(sc
->mp
))
757 xagb_bitmap_init(&refcountbt_blocks
);
758 xagb_bitmap_init(&cow_blocks
);
761 error
= xagb_bitmap_set_btblocks(&refcountbt_blocks
, sc
->sa
.refc_cur
);
765 /* Collect rmaps for CoW staging extents. */
766 error
= xfs_refcount_query_range(sc
->sa
.refc_cur
, &low
, &high
,
767 xrep_rmap_walk_cowblocks
, &cow_blocks
);
771 /* Generate rmaps for everything. */
772 error
= xrep_rmap_stash_bitmap(rr
, &cow_blocks
, &XFS_RMAP_OINFO_COW
);
775 error
= xrep_rmap_stash_bitmap(rr
, &refcountbt_blocks
,
776 &XFS_RMAP_OINFO_REFC
);
779 xagb_bitmap_destroy(&cow_blocks
);
780 xagb_bitmap_destroy(&refcountbt_blocks
);
784 /* Generate rmaps for the AG headers (AGI/AGF/AGFL) */
786 xrep_rmap_find_agheader_rmaps(
787 struct xrep_rmap
*rr
)
789 struct xfs_scrub
*sc
= rr
->sc
;
791 /* Create a record for the AG sb->agfl. */
792 return xrep_rmap_stash(rr
, XFS_SB_BLOCK(sc
->mp
),
793 XFS_AGFL_BLOCK(sc
->mp
) - XFS_SB_BLOCK(sc
->mp
) + 1,
794 XFS_RMAP_OWN_FS
, 0, 0);
797 /* Generate rmaps for the log, if it's in this AG. */
799 xrep_rmap_find_log_rmaps(
800 struct xrep_rmap
*rr
)
802 struct xfs_scrub
*sc
= rr
->sc
;
804 if (!xfs_ag_contains_log(sc
->mp
, pag_agno(sc
->sa
.pag
)))
807 return xrep_rmap_stash(rr
,
808 XFS_FSB_TO_AGBNO(sc
->mp
, sc
->mp
->m_sb
.sb_logstart
),
809 sc
->mp
->m_sb
.sb_logblocks
, XFS_RMAP_OWN_LOG
, 0, 0);
812 /* Check and count all the records that we gathered. */
814 xrep_rmap_check_record(
815 struct xfs_btree_cur
*cur
,
816 const struct xfs_rmap_irec
*rec
,
819 struct xrep_rmap
*rr
= priv
;
822 error
= xrep_rmap_check_mapping(rr
->sc
, rec
);
831 * Generate all the reverse-mappings for this AG, a list of the old rmapbt
832 * blocks, and the new btreeblks count. Figure out if we have enough free
833 * space to reconstruct the inode btrees. The caller must clean up the lists
834 * if anything goes wrong. This implements section (I) above.
837 xrep_rmap_find_rmaps(
838 struct xrep_rmap
*rr
)
840 struct xfs_scrub
*sc
= rr
->sc
;
841 struct xchk_ag
*sa
= &sc
->sa
;
842 struct xfs_inode
*ip
;
843 struct xfs_btree_cur
*mcur
;
846 /* Find all the per-AG metadata. */
847 xrep_ag_btcur_init(sc
, &sc
->sa
);
849 error
= xrep_rmap_find_inode_rmaps(rr
);
853 error
= xrep_rmap_find_refcount_rmaps(rr
);
857 error
= xrep_rmap_find_agheader_rmaps(rr
);
861 error
= xrep_rmap_find_log_rmaps(rr
);
863 xchk_ag_btcur_free(&sc
->sa
);
868 * Set up for a potentially lengthy filesystem scan by reducing our
869 * transaction resource usage for the duration. Specifically:
871 * Unlock the AG header buffers and cancel the transaction to release
872 * the log grant space while we scan the filesystem.
874 * Create a new empty transaction to eliminate the possibility of the
875 * inode scan deadlocking on cyclical metadata.
877 * We pass the empty transaction to the file scanning function to avoid
878 * repeatedly cycling empty transactions. This can be done even though
879 * we take the IOLOCK to quiesce the file because empty transactions
880 * do not take sb_internal.
884 xchk_trans_cancel(sc
);
885 error
= xchk_trans_alloc_empty(sc
);
889 /* Iterate all AGs for inodes rmaps. */
890 while ((error
= xchk_iscan_iter(&rr
->iscan
, &ip
)) == 1) {
891 error
= xrep_rmap_scan_inode(rr
, ip
);
896 if (xchk_should_terminate(sc
, &error
))
899 xchk_iscan_iter_finish(&rr
->iscan
);
904 * Switch out for a real transaction and lock the AG headers in
905 * preparation for building a new tree.
907 xchk_trans_cancel(sc
);
908 error
= xchk_setup_fs(sc
);
911 error
= xchk_perag_drain_and_lock(sc
);
916 * If a hook failed to update the in-memory btree, we lack the data to
917 * continue the repair.
919 if (xchk_iscan_aborted(&rr
->iscan
))
920 return -EFSCORRUPTED
;
923 * Now that we have everything locked again, we need to count the
924 * number of rmap records stashed in the btree. This should reflect
925 * all actively-owned space in the filesystem. At the same time, check
926 * all our records before we start building a new btree, which requires
929 mcur
= xfs_rmapbt_mem_cursor(rr
->sc
->sa
.pag
, NULL
, &rr
->rmap_btree
);
930 sc
->sa
.bno_cur
= xfs_bnobt_init_cursor(sc
->mp
, sc
->tp
, sc
->sa
.agf_bp
,
934 error
= xfs_rmap_query_all(mcur
, xrep_rmap_check_record
, rr
);
936 xfs_btree_del_cursor(sc
->sa
.bno_cur
, error
);
937 sc
->sa
.bno_cur
= NULL
;
938 xfs_btree_del_cursor(mcur
, error
);
943 /* Section (II): Reserving space for new rmapbt and setting free space bitmap */
945 struct xrep_rmap_agfl
{
946 struct xagb_bitmap
*bitmap
;
950 /* Add an AGFL block to the rmap list. */
953 struct xfs_mount
*mp
,
957 struct xrep_rmap_agfl
*ra
= priv
;
959 return xagb_bitmap_set(ra
->bitmap
, agbno
, 1);
963 * Run one round of reserving space for the new rmapbt and recomputing the
964 * number of blocks needed to store the previously observed rmapbt records and
965 * the ones we'll create for the free space metadata. When we don't need more
966 * blocks, return a bitmap of OWN_AG extents in @freesp_blocks and set @done to
970 xrep_rmap_try_reserve(
971 struct xrep_rmap
*rr
,
972 struct xfs_btree_cur
*rmap_cur
,
973 struct xagb_bitmap
*freesp_blocks
,
974 uint64_t *blocks_reserved
,
977 struct xrep_rmap_agfl ra
= {
978 .bitmap
= freesp_blocks
,
979 .agno
= pag_agno(rr
->sc
->sa
.pag
),
981 struct xfs_scrub
*sc
= rr
->sc
;
982 struct xrep_newbt_resv
*resv
, *n
;
983 struct xfs_agf
*agf
= sc
->sa
.agf_bp
->b_addr
;
984 struct xfs_buf
*agfl_bp
;
985 uint64_t nr_blocks
; /* RMB */
986 uint64_t freesp_records
;
990 * We're going to recompute new_btree.bload.nr_blocks at the end of
991 * this function to reflect however many btree blocks we need to store
992 * all the rmap records (including the ones that reflect the changes we
993 * made to support the new rmapbt blocks), so we save the old value
994 * here so we can decide if we've reserved enough blocks.
996 nr_blocks
= rr
->new_btree
.bload
.nr_blocks
;
999 * Make sure we've reserved enough space for the new btree. This can
1000 * change the shape of the free space btrees, which can cause secondary
1001 * interactions with the rmap records because all three space btrees
1002 * have the same rmap owner. We'll account for all that below.
1004 error
= xrep_newbt_alloc_blocks(&rr
->new_btree
,
1005 nr_blocks
- *blocks_reserved
);
1009 *blocks_reserved
= rr
->new_btree
.bload
.nr_blocks
;
1011 /* Clear everything in the bitmap. */
1012 xagb_bitmap_destroy(freesp_blocks
);
1014 /* Set all the bnobt blocks in the bitmap. */
1015 sc
->sa
.bno_cur
= xfs_bnobt_init_cursor(sc
->mp
, sc
->tp
, sc
->sa
.agf_bp
,
1017 error
= xagb_bitmap_set_btblocks(freesp_blocks
, sc
->sa
.bno_cur
);
1018 xfs_btree_del_cursor(sc
->sa
.bno_cur
, error
);
1019 sc
->sa
.bno_cur
= NULL
;
1023 /* Set all the cntbt blocks in the bitmap. */
1024 sc
->sa
.cnt_cur
= xfs_cntbt_init_cursor(sc
->mp
, sc
->tp
, sc
->sa
.agf_bp
,
1026 error
= xagb_bitmap_set_btblocks(freesp_blocks
, sc
->sa
.cnt_cur
);
1027 xfs_btree_del_cursor(sc
->sa
.cnt_cur
, error
);
1028 sc
->sa
.cnt_cur
= NULL
;
1032 /* Record our new btreeblks value. */
1033 rr
->freesp_btblocks
= xagb_bitmap_hweight(freesp_blocks
) - 2;
1035 /* Set all the new rmapbt blocks in the bitmap. */
1036 list_for_each_entry_safe(resv
, n
, &rr
->new_btree
.resv_list
, list
) {
1037 error
= xagb_bitmap_set(freesp_blocks
, resv
->agbno
, resv
->len
);
1042 /* Set all the AGFL blocks in the bitmap. */
1043 error
= xfs_alloc_read_agfl(sc
->sa
.pag
, sc
->tp
, &agfl_bp
);
1047 error
= xfs_agfl_walk(sc
->mp
, agf
, agfl_bp
, xrep_rmap_walk_agfl
, &ra
);
1051 /* Count the extents in the bitmap. */
1052 freesp_records
= xagb_bitmap_count_set_regions(freesp_blocks
);
1054 /* Compute how many blocks we'll need for all the rmaps. */
1055 error
= xfs_btree_bload_compute_geometry(rmap_cur
,
1056 &rr
->new_btree
.bload
, rr
->nr_records
+ freesp_records
);
1060 /* We're done when we don't need more blocks. */
1061 *done
= nr_blocks
>= rr
->new_btree
.bload
.nr_blocks
;
1066 * Iteratively reserve space for rmap btree while recording OWN_AG rmaps for
1067 * the free space metadata. This implements section (II) above.
1070 xrep_rmap_reserve_space(
1071 struct xrep_rmap
*rr
,
1072 struct xfs_btree_cur
*rmap_cur
)
1074 struct xagb_bitmap freesp_blocks
; /* AGBIT */
1075 uint64_t blocks_reserved
= 0;
1079 /* Compute how many blocks we'll need for the rmaps collected so far. */
1080 error
= xfs_btree_bload_compute_geometry(rmap_cur
,
1081 &rr
->new_btree
.bload
, rr
->nr_records
);
1085 /* Last chance to abort before we start committing fixes. */
1086 if (xchk_should_terminate(rr
->sc
, &error
))
1089 xagb_bitmap_init(&freesp_blocks
);
1092 * Iteratively reserve space for the new rmapbt and recompute the
1093 * number of blocks needed to store the previously observed rmapbt
1094 * records and the ones we'll create for the free space metadata.
1095 * Finish when we don't need more blocks.
1098 error
= xrep_rmap_try_reserve(rr
, rmap_cur
, &freesp_blocks
,
1099 &blocks_reserved
, &done
);
1104 /* Emit rmaps for everything in the free space bitmap. */
1105 xrep_ag_btcur_init(rr
->sc
, &rr
->sc
->sa
);
1106 error
= xrep_rmap_stash_bitmap(rr
, &freesp_blocks
, &XFS_RMAP_OINFO_AG
);
1107 xchk_ag_btcur_free(&rr
->sc
->sa
);
1110 xagb_bitmap_destroy(&freesp_blocks
);
1114 /* Section (III): Building the new rmap btree. */
1116 /* Update the AGF counters. */
1118 xrep_rmap_reset_counters(
1119 struct xrep_rmap
*rr
)
1121 struct xfs_scrub
*sc
= rr
->sc
;
1122 struct xfs_perag
*pag
= sc
->sa
.pag
;
1123 struct xfs_agf
*agf
= sc
->sa
.agf_bp
->b_addr
;
1124 xfs_agblock_t rmap_btblocks
;
1127 * The AGF header contains extra information related to the reverse
1128 * mapping btree, so we must update those fields here.
1130 rmap_btblocks
= rr
->new_btree
.afake
.af_blocks
- 1;
1131 agf
->agf_btreeblks
= cpu_to_be32(rr
->freesp_btblocks
+ rmap_btblocks
);
1132 xfs_alloc_log_agf(sc
->tp
, sc
->sa
.agf_bp
, XFS_AGF_BTREEBLKS
);
1135 * After we commit the new btree to disk, it is possible that the
1136 * process to reap the old btree blocks will race with the AIL trying
1137 * to checkpoint the old btree blocks into the filesystem. If the new
1138 * tree is shorter than the old one, the rmapbt write verifier will
1139 * fail and the AIL will shut down the filesystem.
1141 * To avoid this, save the old incore btree height values as the alt
1142 * height values before re-initializing the perag info from the updated
1143 * AGF to capture all the new values.
1145 pag
->pagf_repair_rmap_level
= pag
->pagf_rmap_level
;
1147 /* Reinitialize with the values we just logged. */
1148 return xrep_reinit_pagf(sc
);
1151 /* Retrieve rmapbt data for bulk load. */
1153 xrep_rmap_get_records(
1154 struct xfs_btree_cur
*cur
,
1156 struct xfs_btree_block
*block
,
1157 unsigned int nr_wanted
,
1160 struct xrep_rmap
*rr
= priv
;
1161 union xfs_btree_rec
*block_rec
;
1162 unsigned int loaded
;
1165 for (loaded
= 0; loaded
< nr_wanted
; loaded
++, idx
++) {
1168 error
= xfs_btree_increment(rr
->mcur
, 0, &stat
);
1172 return -EFSCORRUPTED
;
1174 error
= xfs_rmap_get_rec(rr
->mcur
, &cur
->bc_rec
.r
, &stat
);
1178 return -EFSCORRUPTED
;
1180 block_rec
= xfs_btree_rec_addr(cur
, idx
, block
);
1181 cur
->bc_ops
->init_rec_from_cur(cur
, block_rec
);
1187 /* Feed one of the new btree blocks to the bulk loader. */
1189 xrep_rmap_claim_block(
1190 struct xfs_btree_cur
*cur
,
1191 union xfs_btree_ptr
*ptr
,
1194 struct xrep_rmap
*rr
= priv
;
1196 return xrep_newbt_claim_block(cur
, &rr
->new_btree
, ptr
);
1199 /* Custom allocation function for new rmap btrees. */
1201 xrep_rmap_alloc_vextent(
1202 struct xfs_scrub
*sc
,
1203 struct xfs_alloc_arg
*args
,
1204 xfs_fsblock_t alloc_hint
)
1209 * We don't want an rmap update on the allocation, since we iteratively
1210 * compute the OWN_AG records /after/ allocating blocks for the records
1211 * that we already know we need to store. Therefore, fix the freelist
1212 * with the NORMAP flag set so that we don't also try to create an rmap
1213 * for new AGFL blocks.
1215 error
= xrep_fix_freelist(sc
, XFS_ALLOC_FLAG_NORMAP
);
1220 * If xrep_fix_freelist fixed the freelist by moving blocks from the
1221 * free space btrees or by removing blocks from the AGFL and queueing
1222 * an EFI to free the block, the transaction will be dirty. This
1223 * second case is of interest to us.
1225 * Later on, we will need to compare gaps in the new recordset against
1226 * the block usage of all OWN_AG owners in order to free the old
1227 * btree's blocks, which means that we can't have EFIs for former AGFL
1228 * blocks attached to the repair transaction when we commit the new
1231 * xrep_newbt_alloc_blocks guarantees this for us by calling
1232 * xrep_defer_finish to commit anything that fix_freelist may have
1233 * added to the transaction.
1235 return xfs_alloc_vextent_near_bno(args
, alloc_hint
);
1239 /* Count the records in this btree. */
1241 xrep_rmap_count_records(
1242 struct xfs_btree_cur
*cur
,
1243 unsigned long long *nr
)
1250 error
= xfs_btree_goto_left_edge(cur
);
1254 while (running
&& !(error
= xfs_btree_increment(cur
, 0, &running
))) {
1262 * Use the collected rmap information to stage a new rmap btree. If this is
1263 * successful we'll return with the new btree root information logged to the
1264 * repair transaction but not yet committed. This implements section (III)
1268 xrep_rmap_build_new_tree(
1269 struct xrep_rmap
*rr
)
1271 struct xfs_scrub
*sc
= rr
->sc
;
1272 struct xfs_perag
*pag
= sc
->sa
.pag
;
1273 struct xfs_agf
*agf
= sc
->sa
.agf_bp
->b_addr
;
1274 struct xfs_btree_cur
*rmap_cur
;
1278 * Preserve the old rmapbt block count so that we can adjust the
1279 * per-AG rmapbt reservation after we commit the new btree root and
1280 * want to dispose of the old btree blocks.
1282 rr
->old_rmapbt_fsbcount
= be32_to_cpu(agf
->agf_rmap_blocks
);
1285 * Prepare to construct the new btree by reserving disk space for the
1286 * new btree and setting up all the accounting information we'll need
1287 * to root the new btree while it's under construction and before we
1288 * attach it to the AG header. The new blocks are accounted to the
1289 * rmapbt per-AG reservation, which we will adjust further after
1290 * committing the new btree.
1292 xrep_newbt_init_ag(&rr
->new_btree
, sc
, &XFS_RMAP_OINFO_SKIP_UPDATE
,
1293 xfs_agbno_to_fsb(pag
, XFS_RMAP_BLOCK(sc
->mp
)),
1294 XFS_AG_RESV_RMAPBT
);
1295 rr
->new_btree
.bload
.get_records
= xrep_rmap_get_records
;
1296 rr
->new_btree
.bload
.claim_block
= xrep_rmap_claim_block
;
1297 rr
->new_btree
.alloc_vextent
= xrep_rmap_alloc_vextent
;
1298 rmap_cur
= xfs_rmapbt_init_cursor(sc
->mp
, NULL
, NULL
, pag
);
1299 xfs_btree_stage_afakeroot(rmap_cur
, &rr
->new_btree
.afake
);
1302 * Initialize @rr->new_btree, reserve space for the new rmapbt,
1303 * and compute OWN_AG rmaps.
1305 error
= xrep_rmap_reserve_space(rr
, rmap_cur
);
1310 * Count the rmapbt records again, because the space reservation
1311 * for the rmapbt itself probably added more records to the btree.
1313 rr
->mcur
= xfs_rmapbt_mem_cursor(rr
->sc
->sa
.pag
, NULL
,
1316 error
= xrep_rmap_count_records(rr
->mcur
, &rr
->nr_records
);
1321 * Due to btree slack factors, it's possible for a new btree to be one
1322 * level taller than the old btree. Update the incore btree height so
1323 * that we don't trip the verifiers when writing the new btree blocks
1326 pag
->pagf_repair_rmap_level
= rr
->new_btree
.bload
.btree_height
;
1329 * Move the cursor to the left edge of the tree so that the first
1330 * increment in ->get_records positions us at the first record.
1332 error
= xfs_btree_goto_left_edge(rr
->mcur
);
1336 /* Add all observed rmap records. */
1337 error
= xfs_btree_bload(rmap_cur
, &rr
->new_btree
.bload
, rr
);
1342 * Install the new btree in the AG header. After this point the old
1343 * btree is no longer accessible and the new tree is live.
1345 xfs_rmapbt_commit_staged_btree(rmap_cur
, sc
->tp
, sc
->sa
.agf_bp
);
1346 xfs_btree_del_cursor(rmap_cur
, 0);
1347 xfs_btree_del_cursor(rr
->mcur
, 0);
1351 * Now that we've written the new btree to disk, we don't need to keep
1352 * updating the in-memory btree. Abort the scan to stop live updates.
1354 xchk_iscan_abort(&rr
->iscan
);
1357 * The newly committed rmap recordset includes mappings for the blocks
1358 * that we reserved to build the new btree. If there is excess space
1359 * reservation to be freed, the corresponding rmap records must also be
1362 rr
->new_btree
.oinfo
= XFS_RMAP_OINFO_AG
;
1364 /* Reset the AGF counters now that we've changed the btree shape. */
1365 error
= xrep_rmap_reset_counters(rr
);
1369 /* Dispose of any unused blocks and the accounting information. */
1370 error
= xrep_newbt_commit(&rr
->new_btree
);
1374 return xrep_roll_ag_trans(sc
);
1377 pag
->pagf_repair_rmap_level
= 0;
1379 xfs_btree_del_cursor(rr
->mcur
, error
);
1381 xfs_btree_del_cursor(rmap_cur
, error
);
1383 xrep_newbt_cancel(&rr
->new_btree
);
1387 /* Section (IV): Reaping the old btree. */
1389 struct xrep_rmap_find_gaps
{
1390 struct xagb_bitmap rmap_gaps
;
1391 xfs_agblock_t next_agbno
;
1394 /* Subtract each free extent in the bnobt from the rmap gaps. */
1396 xrep_rmap_find_freesp(
1397 struct xfs_btree_cur
*cur
,
1398 const struct xfs_alloc_rec_incore
*rec
,
1401 struct xrep_rmap_find_gaps
*rfg
= priv
;
1403 return xagb_bitmap_clear(&rfg
->rmap_gaps
, rec
->ar_startblock
,
1404 rec
->ar_blockcount
);
1407 /* Record the free space we find, as part of cleaning out the btree. */
1409 xrep_rmap_find_gaps(
1410 struct xfs_btree_cur
*cur
,
1411 const struct xfs_rmap_irec
*rec
,
1414 struct xrep_rmap_find_gaps
*rfg
= priv
;
1417 if (rec
->rm_startblock
> rfg
->next_agbno
) {
1418 error
= xagb_bitmap_set(&rfg
->rmap_gaps
, rfg
->next_agbno
,
1419 rec
->rm_startblock
- rfg
->next_agbno
);
1424 rfg
->next_agbno
= max_t(xfs_agblock_t
, rfg
->next_agbno
,
1425 rec
->rm_startblock
+ rec
->rm_blockcount
);
1430 * Reap the old rmapbt blocks. Now that the rmapbt is fully rebuilt, we make
1431 * a list of gaps in the rmap records and a list of the extents mentioned in
1432 * the bnobt. Any block that's in the new rmapbt gap list but not mentioned
1433 * in the bnobt is a block from the old rmapbt and can be removed.
1436 xrep_rmap_remove_old_tree(
1437 struct xrep_rmap
*rr
)
1439 struct xrep_rmap_find_gaps rfg
= {
1442 struct xfs_scrub
*sc
= rr
->sc
;
1443 struct xfs_agf
*agf
= sc
->sa
.agf_bp
->b_addr
;
1444 struct xfs_perag
*pag
= sc
->sa
.pag
;
1445 struct xfs_btree_cur
*mcur
;
1446 xfs_agblock_t agend
;
1449 xagb_bitmap_init(&rfg
.rmap_gaps
);
1451 /* Compute free space from the new rmapbt. */
1452 mcur
= xfs_rmapbt_mem_cursor(rr
->sc
->sa
.pag
, NULL
, &rr
->rmap_btree
);
1454 error
= xfs_rmap_query_all(mcur
, xrep_rmap_find_gaps
, &rfg
);
1455 xfs_btree_del_cursor(mcur
, error
);
1459 /* Insert a record for space between the last rmap and EOAG. */
1460 agend
= be32_to_cpu(agf
->agf_length
);
1461 if (rfg
.next_agbno
< agend
) {
1462 error
= xagb_bitmap_set(&rfg
.rmap_gaps
, rfg
.next_agbno
,
1463 agend
- rfg
.next_agbno
);
1468 /* Compute free space from the existing bnobt. */
1469 sc
->sa
.bno_cur
= xfs_bnobt_init_cursor(sc
->mp
, sc
->tp
, sc
->sa
.agf_bp
,
1471 error
= xfs_alloc_query_all(sc
->sa
.bno_cur
, xrep_rmap_find_freesp
,
1473 xfs_btree_del_cursor(sc
->sa
.bno_cur
, error
);
1474 sc
->sa
.bno_cur
= NULL
;
1479 * Free the "free" blocks that the new rmapbt knows about but the bnobt
1480 * doesn't--these are the old rmapbt blocks. Credit the old rmapbt
1481 * block usage count back to the per-AG rmapbt reservation (and not
1482 * fdblocks, since the rmap btree lives in free space) to keep the
1483 * reservation and free space accounting correct.
1485 error
= xrep_reap_agblocks(sc
, &rfg
.rmap_gaps
,
1486 &XFS_RMAP_OINFO_ANY_OWNER
, XFS_AG_RESV_RMAPBT
);
1491 * Now that we've zapped all the old rmapbt blocks we can turn off
1492 * the alternate height mechanism and reset the per-AG space
1495 pag
->pagf_repair_rmap_level
= 0;
1496 sc
->flags
|= XREP_RESET_PERAG_RESV
;
1498 xagb_bitmap_destroy(&rfg
.rmap_gaps
);
1503 xrep_rmapbt_want_live_update(
1504 struct xchk_iscan
*iscan
,
1505 const struct xfs_owner_info
*oi
)
1507 if (xchk_iscan_aborted(iscan
))
1511 * Before unlocking the AG header to perform the inode scan, we
1512 * recorded reverse mappings for all AG metadata except for the OWN_AG
1513 * metadata. IOWs, the in-memory btree knows about the AG headers, the
1514 * two inode btrees, the CoW staging extents, and the refcount btrees.
1515 * For these types of metadata, we need to record the live updates in
1516 * the in-memory rmap btree.
1518 * However, we do not scan the free space btrees or the AGFL until we
1519 * have re-locked the AGF and are ready to reserve space for the new
1520 * rmap btree, so we do not want live updates for OWN_AG metadata.
1522 if (XFS_RMAP_NON_INODE_OWNER(oi
->oi_owner
))
1523 return oi
->oi_owner
!= XFS_RMAP_OWN_AG
;
1525 /* Ignore updates to files that the scanner hasn't visited yet. */
1526 return xchk_iscan_want_live_update(iscan
, oi
->oi_owner
);
1530 * Apply a rmapbt update from the regular filesystem into our shadow btree.
1531 * We're running from the thread that owns the AGF buffer and is generating
1532 * the update, so we must be careful about which parts of the struct xrep_rmap
1536 xrep_rmapbt_live_update(
1537 struct notifier_block
*nb
,
1538 unsigned long action
,
1541 struct xfs_rmap_update_params
*p
= data
;
1542 struct xrep_rmap
*rr
;
1543 struct xfs_mount
*mp
;
1544 struct xfs_btree_cur
*mcur
;
1545 struct xfs_trans
*tp
;
1549 rr
= container_of(nb
, struct xrep_rmap
, rhook
.rmap_hook
.nb
);
1552 if (!xrep_rmapbt_want_live_update(&rr
->iscan
, &p
->oinfo
))
1555 trace_xrep_rmap_live_update(rr
->sc
->sa
.pag
, action
, p
);
1557 error
= xrep_trans_alloc_hook_dummy(mp
, &txcookie
, &tp
);
1561 mutex_lock(&rr
->lock
);
1562 mcur
= xfs_rmapbt_mem_cursor(rr
->sc
->sa
.pag
, tp
, &rr
->rmap_btree
);
1563 error
= __xfs_rmap_finish_intent(mcur
, action
, p
->startblock
,
1564 p
->blockcount
, &p
->oinfo
, p
->unwritten
);
1565 xfs_btree_del_cursor(mcur
, error
);
1569 error
= xfbtree_trans_commit(&rr
->rmap_btree
, tp
);
1573 xrep_trans_cancel_hook_dummy(&txcookie
, tp
);
1574 mutex_unlock(&rr
->lock
);
1578 xfbtree_trans_cancel(&rr
->rmap_btree
, tp
);
1579 xrep_trans_cancel_hook_dummy(&txcookie
, tp
);
1581 mutex_unlock(&rr
->lock
);
1582 xchk_iscan_abort(&rr
->iscan
);
1587 /* Set up the filesystem scan components. */
1589 xrep_rmap_setup_scan(
1590 struct xrep_rmap
*rr
)
1592 struct xfs_scrub
*sc
= rr
->sc
;
1595 mutex_init(&rr
->lock
);
1597 /* Set up in-memory rmap btree */
1598 error
= xfs_rmapbt_mem_init(sc
->mp
, &rr
->rmap_btree
, sc
->xmbtp
,
1599 pag_agno(sc
->sa
.pag
));
1603 /* Retry iget every tenth of a second for up to 30 seconds. */
1604 xchk_iscan_start(sc
, 30000, 100, &rr
->iscan
);
1607 * Hook into live rmap operations so that we can update our in-memory
1608 * btree to reflect live changes on the filesystem. Since we drop the
1609 * AGF buffer to scan all the inodes, we need this piece to avoid
1610 * installing a stale btree.
1612 ASSERT(sc
->flags
& XCHK_FSGATES_RMAP
);
1613 xfs_rmap_hook_setup(&rr
->rhook
, xrep_rmapbt_live_update
);
1614 error
= xfs_rmap_hook_add(pag_group(sc
->sa
.pag
), &rr
->rhook
);
1620 xchk_iscan_teardown(&rr
->iscan
);
1621 xfbtree_destroy(&rr
->rmap_btree
);
1623 mutex_destroy(&rr
->lock
);
1627 /* Tear down scan components. */
1630 struct xrep_rmap
*rr
)
1632 struct xfs_scrub
*sc
= rr
->sc
;
1634 xchk_iscan_abort(&rr
->iscan
);
1635 xfs_rmap_hook_del(pag_group(sc
->sa
.pag
), &rr
->rhook
);
1636 xchk_iscan_teardown(&rr
->iscan
);
1637 xfbtree_destroy(&rr
->rmap_btree
);
1638 mutex_destroy(&rr
->lock
);
1641 /* Repair the rmap btree for some AG. */
1644 struct xfs_scrub
*sc
)
1646 struct xrep_rmap
*rr
= sc
->buf
;
1649 error
= xrep_rmap_setup_scan(rr
);
1654 * Collect rmaps for everything in this AG that isn't space metadata.
1655 * These rmaps won't change even as we try to allocate blocks.
1657 error
= xrep_rmap_find_rmaps(rr
);
1661 /* Rebuild the rmap information. */
1662 error
= xrep_rmap_build_new_tree(rr
);
1666 /* Kill the old tree. */
1667 error
= xrep_rmap_remove_old_tree(rr
);
1672 xrep_rmap_teardown(rr
);