drm/atomic-helper: document drm_atomic_helper_check() restrictions
[drm/drm-misc.git] / fs / xfs / scrub / ialloc_repair.c
blob14e48d3f1912bf06cf9448f646fef751c2b1022a
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (C) 2018-2023 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_defer.h"
13 #include "xfs_btree.h"
14 #include "xfs_btree_staging.h"
15 #include "xfs_bit.h"
16 #include "xfs_log_format.h"
17 #include "xfs_trans.h"
18 #include "xfs_sb.h"
19 #include "xfs_inode.h"
20 #include "xfs_alloc.h"
21 #include "xfs_ialloc.h"
22 #include "xfs_ialloc_btree.h"
23 #include "xfs_icache.h"
24 #include "xfs_rmap.h"
25 #include "xfs_rmap_btree.h"
26 #include "xfs_log.h"
27 #include "xfs_trans_priv.h"
28 #include "xfs_error.h"
29 #include "xfs_health.h"
30 #include "xfs_ag.h"
31 #include "scrub/xfs_scrub.h"
32 #include "scrub/scrub.h"
33 #include "scrub/common.h"
34 #include "scrub/btree.h"
35 #include "scrub/trace.h"
36 #include "scrub/repair.h"
37 #include "scrub/bitmap.h"
38 #include "scrub/agb_bitmap.h"
39 #include "scrub/xfile.h"
40 #include "scrub/xfarray.h"
41 #include "scrub/newbt.h"
42 #include "scrub/reap.h"
45 * Inode Btree Repair
46 * ==================
48 * A quick refresher of inode btrees on a v5 filesystem:
50 * - Inode records are read into memory in units of 'inode clusters'. However
51 * many inodes fit in a cluster buffer is the smallest number of inodes that
52 * can be allocated or freed. Clusters are never smaller than one fs block
53 * though they can span multiple blocks. The size (in fs blocks) is
54 * computed with xfs_icluster_size_fsb(). The fs block alignment of a
55 * cluster is computed with xfs_ialloc_cluster_alignment().
57 * - Each inode btree record can describe a single 'inode chunk'. The chunk
58 * size is defined to be 64 inodes. If sparse inodes are enabled, every
59 * inobt record must be aligned to the chunk size; if not, every record must
60 * be aligned to the start of a cluster. It is possible to construct an XFS
61 * geometry where one inobt record maps to multiple inode clusters; it is
62 * also possible to construct a geometry where multiple inobt records map to
63 * different parts of one inode cluster.
65 * - If sparse inodes are not enabled, the smallest unit of allocation for
66 * inode records is enough to contain one inode chunk's worth of inodes.
68 * - If sparse inodes are enabled, the holemask field will be active. Each
69 * bit of the holemask represents 4 potential inodes; if set, the
70 * corresponding space does *not* contain inodes and must be left alone.
71 * Clusters cannot be smaller than 4 inodes. The smallest unit of allocation
72 * of inode records is one inode cluster.
74 * So what's the rebuild algorithm?
76 * Iterate the reverse mapping records looking for OWN_INODES and OWN_INOBT
77 * records. The OWN_INOBT records are the old inode btree blocks and will be
78 * cleared out after we've rebuilt the tree. Each possible inode cluster
79 * within an OWN_INODES record will be read in; for each possible inobt record
80 * associated with that cluster, compute the freemask calculated from the
81 * i_mode data in the inode chunk. For sparse inodes the holemask will be
82 * calculated by creating the properly aligned inobt record and punching out
83 * any chunk that's missing. Inode allocations and frees grab the AGI first,
84 * so repair protects itself from concurrent access by locking the AGI.
86 * Once we've reconstructed all the inode records, we can create new inode
87 * btree roots and reload the btrees. We rebuild both inode trees at the same
88 * time because they have the same rmap owner and it would be more complex to
89 * figure out if the other tree isn't in need of a rebuild and which OWN_INOBT
90 * blocks it owns. We have all the data we need to build both, so dump
91 * everything and start over.
93 * We use the prefix 'xrep_ibt' because we rebuild both inode btrees at once.
96 struct xrep_ibt {
97 /* Record under construction. */
98 struct xfs_inobt_rec_incore rie;
100 /* new inobt information */
101 struct xrep_newbt new_inobt;
103 /* new finobt information */
104 struct xrep_newbt new_finobt;
106 /* Old inode btree blocks we found in the rmap. */
107 struct xagb_bitmap old_iallocbt_blocks;
109 /* Reconstructed inode records. */
110 struct xfarray *inode_records;
112 struct xfs_scrub *sc;
114 /* Number of inodes assigned disk space. */
115 unsigned int icount;
117 /* Number of inodes in use. */
118 unsigned int iused;
120 /* Number of finobt records needed. */
121 unsigned int finobt_recs;
123 /* get_records()'s position in the inode record array. */
124 xfarray_idx_t array_cur;
128 * Is this inode in use? If the inode is in memory we can tell from i_mode,
129 * otherwise we have to check di_mode in the on-disk buffer. We only care
130 * that the high (i.e. non-permission) bits of _mode are zero. This should be
131 * safe because repair keeps all AG headers locked until the end, and process
132 * trying to perform an inode allocation/free must lock the AGI.
134 * @cluster_ag_base is the inode offset of the cluster within the AG.
135 * @cluster_bp is the cluster buffer.
136 * @cluster_index is the inode offset within the inode cluster.
138 STATIC int
139 xrep_ibt_check_ifree(
140 struct xrep_ibt *ri,
141 xfs_agino_t cluster_ag_base,
142 struct xfs_buf *cluster_bp,
143 unsigned int cluster_index,
144 bool *inuse)
146 struct xfs_scrub *sc = ri->sc;
147 struct xfs_mount *mp = sc->mp;
148 struct xfs_dinode *dip;
149 xfs_agino_t agino;
150 unsigned int cluster_buf_base;
151 unsigned int offset;
152 int error;
154 agino = cluster_ag_base + cluster_index;
156 /* Inode uncached or half assembled, read disk buffer */
157 cluster_buf_base = XFS_INO_TO_OFFSET(mp, cluster_ag_base);
158 offset = (cluster_buf_base + cluster_index) * mp->m_sb.sb_inodesize;
159 if (offset >= BBTOB(cluster_bp->b_length))
160 return -EFSCORRUPTED;
161 dip = xfs_buf_offset(cluster_bp, offset);
162 if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)
163 return -EFSCORRUPTED;
165 if (dip->di_version >= 3 &&
166 be64_to_cpu(dip->di_ino) != xfs_agino_to_ino(ri->sc->sa.pag, agino))
167 return -EFSCORRUPTED;
169 /* Will the in-core inode tell us if it's in use? */
170 error = xchk_inode_is_allocated(sc, agino, inuse);
171 if (!error)
172 return 0;
174 *inuse = dip->di_mode != 0;
175 return 0;
178 /* Stash the accumulated inobt record for rebuilding. */
179 STATIC int
180 xrep_ibt_stash(
181 struct xrep_ibt *ri)
183 int error = 0;
185 if (xchk_should_terminate(ri->sc, &error))
186 return error;
188 ri->rie.ir_freecount = xfs_inobt_rec_freecount(&ri->rie);
189 if (xfs_inobt_check_irec(ri->sc->sa.pag, &ri->rie) != NULL)
190 return -EFSCORRUPTED;
192 if (ri->rie.ir_freecount > 0)
193 ri->finobt_recs++;
195 trace_xrep_ibt_found(ri->sc->sa.pag, &ri->rie);
197 error = xfarray_append(ri->inode_records, &ri->rie);
198 if (error)
199 return error;
201 ri->rie.ir_startino = NULLAGINO;
202 return 0;
206 * Given an extent of inodes and an inode cluster buffer, calculate the
207 * location of the corresponding inobt record (creating it if necessary),
208 * then update the parts of the holemask and freemask of that record that
209 * correspond to the inode extent we were given.
211 * @cluster_ir_startino is the AG inode number of an inobt record that we're
212 * proposing to create for this inode cluster. If sparse inodes are enabled,
213 * we must round down to a chunk boundary to find the actual sparse record.
214 * @cluster_bp is the buffer of the inode cluster.
215 * @nr_inodes is the number of inodes to check from the cluster.
217 STATIC int
218 xrep_ibt_cluster_record(
219 struct xrep_ibt *ri,
220 xfs_agino_t cluster_ir_startino,
221 struct xfs_buf *cluster_bp,
222 unsigned int nr_inodes)
224 struct xfs_scrub *sc = ri->sc;
225 struct xfs_mount *mp = sc->mp;
226 xfs_agino_t ir_startino;
227 unsigned int cluster_base;
228 unsigned int cluster_index;
229 int error = 0;
231 ir_startino = cluster_ir_startino;
232 if (xfs_has_sparseinodes(mp))
233 ir_startino = rounddown(ir_startino, XFS_INODES_PER_CHUNK);
234 cluster_base = cluster_ir_startino - ir_startino;
237 * If the accumulated inobt record doesn't map this cluster, add it to
238 * the list and reset it.
240 if (ri->rie.ir_startino != NULLAGINO &&
241 ri->rie.ir_startino + XFS_INODES_PER_CHUNK <= ir_startino) {
242 error = xrep_ibt_stash(ri);
243 if (error)
244 return error;
247 if (ri->rie.ir_startino == NULLAGINO) {
248 ri->rie.ir_startino = ir_startino;
249 ri->rie.ir_free = XFS_INOBT_ALL_FREE;
250 ri->rie.ir_holemask = 0xFFFF;
251 ri->rie.ir_count = 0;
254 /* Record the whole cluster. */
255 ri->icount += nr_inodes;
256 ri->rie.ir_count += nr_inodes;
257 ri->rie.ir_holemask &= ~xfs_inobt_maskn(
258 cluster_base / XFS_INODES_PER_HOLEMASK_BIT,
259 nr_inodes / XFS_INODES_PER_HOLEMASK_BIT);
261 /* Which inodes within this cluster are free? */
262 for (cluster_index = 0; cluster_index < nr_inodes; cluster_index++) {
263 bool inuse = false;
265 error = xrep_ibt_check_ifree(ri, cluster_ir_startino,
266 cluster_bp, cluster_index, &inuse);
267 if (error)
268 return error;
269 if (!inuse)
270 continue;
271 ri->iused++;
272 ri->rie.ir_free &= ~XFS_INOBT_MASK(cluster_base +
273 cluster_index);
275 return 0;
279 * For each inode cluster covering the physical extent recorded by the rmapbt,
280 * we must calculate the properly aligned startino of that cluster, then
281 * iterate each cluster to fill in used and filled masks appropriately. We
282 * then use the (startino, used, filled) information to construct the
283 * appropriate inode records.
285 STATIC int
286 xrep_ibt_process_cluster(
287 struct xrep_ibt *ri,
288 xfs_agblock_t cluster_bno)
290 struct xfs_imap imap;
291 struct xfs_buf *cluster_bp;
292 struct xfs_scrub *sc = ri->sc;
293 struct xfs_mount *mp = sc->mp;
294 struct xfs_ino_geometry *igeo = M_IGEO(mp);
295 xfs_agino_t cluster_ag_base;
296 xfs_agino_t irec_index;
297 unsigned int nr_inodes;
298 int error;
300 nr_inodes = min_t(unsigned int, igeo->inodes_per_cluster,
301 XFS_INODES_PER_CHUNK);
304 * Grab the inode cluster buffer. This is safe to do with a broken
305 * inobt because imap_to_bp directly maps the buffer without touching
306 * either inode btree.
308 imap.im_blkno = xfs_agbno_to_daddr(sc->sa.pag, cluster_bno);
309 imap.im_len = XFS_FSB_TO_BB(mp, igeo->blocks_per_cluster);
310 imap.im_boffset = 0;
311 error = xfs_imap_to_bp(mp, sc->tp, &imap, &cluster_bp);
312 if (error)
313 return error;
316 * Record the contents of each possible inobt record mapping this
317 * cluster.
319 cluster_ag_base = XFS_AGB_TO_AGINO(mp, cluster_bno);
320 for (irec_index = 0;
321 irec_index < igeo->inodes_per_cluster;
322 irec_index += XFS_INODES_PER_CHUNK) {
323 error = xrep_ibt_cluster_record(ri,
324 cluster_ag_base + irec_index, cluster_bp,
325 nr_inodes);
326 if (error)
327 break;
331 xfs_trans_brelse(sc->tp, cluster_bp);
332 return error;
335 /* Check for any obvious conflicts in the inode chunk extent. */
336 STATIC int
337 xrep_ibt_check_inode_ext(
338 struct xfs_scrub *sc,
339 xfs_agblock_t agbno,
340 xfs_extlen_t len)
342 struct xfs_mount *mp = sc->mp;
343 struct xfs_ino_geometry *igeo = M_IGEO(mp);
344 xfs_agino_t agino;
345 enum xbtree_recpacking outcome;
346 int error;
348 /* Inode records must be within the AG. */
349 if (!xfs_verify_agbext(sc->sa.pag, agbno, len))
350 return -EFSCORRUPTED;
352 /* The entire record must align to the inode cluster size. */
353 if (!IS_ALIGNED(agbno, igeo->blocks_per_cluster) ||
354 !IS_ALIGNED(agbno + len, igeo->blocks_per_cluster))
355 return -EFSCORRUPTED;
358 * The entire record must also adhere to the inode cluster alignment
359 * size if sparse inodes are not enabled.
361 if (!xfs_has_sparseinodes(mp) &&
362 (!IS_ALIGNED(agbno, igeo->cluster_align) ||
363 !IS_ALIGNED(agbno + len, igeo->cluster_align)))
364 return -EFSCORRUPTED;
367 * On a sparse inode fs, this cluster could be part of a sparse chunk.
368 * Sparse clusters must be aligned to sparse chunk alignment.
370 if (xfs_has_sparseinodes(mp) && mp->m_sb.sb_spino_align &&
371 (!IS_ALIGNED(agbno, mp->m_sb.sb_spino_align) ||
372 !IS_ALIGNED(agbno + len, mp->m_sb.sb_spino_align)))
373 return -EFSCORRUPTED;
375 /* Make sure the entire range of blocks are valid AG inodes. */
376 agino = XFS_AGB_TO_AGINO(mp, agbno);
377 if (!xfs_verify_agino(sc->sa.pag, agino))
378 return -EFSCORRUPTED;
380 agino = XFS_AGB_TO_AGINO(mp, agbno + len) - 1;
381 if (!xfs_verify_agino(sc->sa.pag, agino))
382 return -EFSCORRUPTED;
384 /* Make sure this isn't free space. */
385 error = xfs_alloc_has_records(sc->sa.bno_cur, agbno, len, &outcome);
386 if (error)
387 return error;
388 if (outcome != XBTREE_RECPACKING_EMPTY)
389 return -EFSCORRUPTED;
391 return 0;
394 /* Found a fragment of the old inode btrees; dispose of them later. */
395 STATIC int
396 xrep_ibt_record_old_btree_blocks(
397 struct xrep_ibt *ri,
398 const struct xfs_rmap_irec *rec)
400 if (!xfs_verify_agbext(ri->sc->sa.pag, rec->rm_startblock,
401 rec->rm_blockcount))
402 return -EFSCORRUPTED;
404 return xagb_bitmap_set(&ri->old_iallocbt_blocks, rec->rm_startblock,
405 rec->rm_blockcount);
408 /* Record extents that belong to inode cluster blocks. */
409 STATIC int
410 xrep_ibt_record_inode_blocks(
411 struct xrep_ibt *ri,
412 const struct xfs_rmap_irec *rec)
414 struct xfs_mount *mp = ri->sc->mp;
415 struct xfs_ino_geometry *igeo = M_IGEO(mp);
416 xfs_agblock_t cluster_base;
417 int error;
419 error = xrep_ibt_check_inode_ext(ri->sc, rec->rm_startblock,
420 rec->rm_blockcount);
421 if (error)
422 return error;
424 trace_xrep_ibt_walk_rmap(ri->sc->sa.pag, rec);
427 * Record the free/hole masks for each inode cluster that could be
428 * mapped by this rmap record.
430 for (cluster_base = 0;
431 cluster_base < rec->rm_blockcount;
432 cluster_base += igeo->blocks_per_cluster) {
433 error = xrep_ibt_process_cluster(ri,
434 rec->rm_startblock + cluster_base);
435 if (error)
436 return error;
439 return 0;
442 STATIC int
443 xrep_ibt_walk_rmap(
444 struct xfs_btree_cur *cur,
445 const struct xfs_rmap_irec *rec,
446 void *priv)
448 struct xrep_ibt *ri = priv;
449 int error = 0;
451 if (xchk_should_terminate(ri->sc, &error))
452 return error;
454 switch (rec->rm_owner) {
455 case XFS_RMAP_OWN_INOBT:
456 return xrep_ibt_record_old_btree_blocks(ri, rec);
457 case XFS_RMAP_OWN_INODES:
458 return xrep_ibt_record_inode_blocks(ri, rec);
460 return 0;
464 * Iterate all reverse mappings to find the inodes (OWN_INODES) and the inode
465 * btrees (OWN_INOBT). Figure out if we have enough free space to reconstruct
466 * the inode btrees. The caller must clean up the lists if anything goes
467 * wrong.
469 STATIC int
470 xrep_ibt_find_inodes(
471 struct xrep_ibt *ri)
473 struct xfs_scrub *sc = ri->sc;
474 int error;
476 ri->rie.ir_startino = NULLAGINO;
478 /* Collect all reverse mappings for inode blocks. */
479 xrep_ag_btcur_init(sc, &sc->sa);
480 error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_ibt_walk_rmap, ri);
481 xchk_ag_btcur_free(&sc->sa);
482 if (error)
483 return error;
485 /* If we have a record ready to go, add it to the array. */
486 if (ri->rie.ir_startino != NULLAGINO)
487 return xrep_ibt_stash(ri);
489 return 0;
492 /* Update the AGI counters. */
493 STATIC int
494 xrep_ibt_reset_counters(
495 struct xrep_ibt *ri)
497 struct xfs_scrub *sc = ri->sc;
498 struct xfs_agi *agi = sc->sa.agi_bp->b_addr;
499 unsigned int freecount = ri->icount - ri->iused;
501 /* Trigger inode count recalculation */
502 xfs_force_summary_recalc(sc->mp);
505 * The AGI header contains extra information related to the inode
506 * btrees, so we must update those fields here.
508 agi->agi_count = cpu_to_be32(ri->icount);
509 agi->agi_freecount = cpu_to_be32(freecount);
510 xfs_ialloc_log_agi(sc->tp, sc->sa.agi_bp,
511 XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
513 /* Reinitialize with the values we just logged. */
514 return xrep_reinit_pagi(sc);
517 /* Retrieve finobt data for bulk load. */
518 STATIC int
519 xrep_fibt_get_records(
520 struct xfs_btree_cur *cur,
521 unsigned int idx,
522 struct xfs_btree_block *block,
523 unsigned int nr_wanted,
524 void *priv)
526 struct xfs_inobt_rec_incore *irec = &cur->bc_rec.i;
527 struct xrep_ibt *ri = priv;
528 union xfs_btree_rec *block_rec;
529 unsigned int loaded;
530 int error;
532 for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
533 do {
534 error = xfarray_load(ri->inode_records,
535 ri->array_cur++, irec);
536 } while (error == 0 && xfs_inobt_rec_freecount(irec) == 0);
537 if (error)
538 return error;
540 block_rec = xfs_btree_rec_addr(cur, idx, block);
541 cur->bc_ops->init_rec_from_cur(cur, block_rec);
544 return loaded;
547 /* Retrieve inobt data for bulk load. */
548 STATIC int
549 xrep_ibt_get_records(
550 struct xfs_btree_cur *cur,
551 unsigned int idx,
552 struct xfs_btree_block *block,
553 unsigned int nr_wanted,
554 void *priv)
556 struct xfs_inobt_rec_incore *irec = &cur->bc_rec.i;
557 struct xrep_ibt *ri = priv;
558 union xfs_btree_rec *block_rec;
559 unsigned int loaded;
560 int error;
562 for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
563 error = xfarray_load(ri->inode_records, ri->array_cur++, irec);
564 if (error)
565 return error;
567 block_rec = xfs_btree_rec_addr(cur, idx, block);
568 cur->bc_ops->init_rec_from_cur(cur, block_rec);
571 return loaded;
574 /* Feed one of the new inobt blocks to the bulk loader. */
575 STATIC int
576 xrep_ibt_claim_block(
577 struct xfs_btree_cur *cur,
578 union xfs_btree_ptr *ptr,
579 void *priv)
581 struct xrep_ibt *ri = priv;
583 return xrep_newbt_claim_block(cur, &ri->new_inobt, ptr);
586 /* Feed one of the new finobt blocks to the bulk loader. */
587 STATIC int
588 xrep_fibt_claim_block(
589 struct xfs_btree_cur *cur,
590 union xfs_btree_ptr *ptr,
591 void *priv)
593 struct xrep_ibt *ri = priv;
595 return xrep_newbt_claim_block(cur, &ri->new_finobt, ptr);
598 /* Make sure the records do not overlap in inumber address space. */
599 STATIC int
600 xrep_ibt_check_overlap(
601 struct xrep_ibt *ri)
603 struct xfs_inobt_rec_incore irec;
604 xfarray_idx_t cur;
605 xfs_agino_t next_agino = 0;
606 int error = 0;
608 foreach_xfarray_idx(ri->inode_records, cur) {
609 if (xchk_should_terminate(ri->sc, &error))
610 return error;
612 error = xfarray_load(ri->inode_records, cur, &irec);
613 if (error)
614 return error;
616 if (irec.ir_startino < next_agino)
617 return -EFSCORRUPTED;
619 next_agino = irec.ir_startino + XFS_INODES_PER_CHUNK;
622 return error;
625 /* Build new inode btrees and dispose of the old one. */
626 STATIC int
627 xrep_ibt_build_new_trees(
628 struct xrep_ibt *ri)
630 struct xfs_scrub *sc = ri->sc;
631 struct xfs_btree_cur *ino_cur;
632 struct xfs_btree_cur *fino_cur = NULL;
633 bool need_finobt;
634 int error;
636 need_finobt = xfs_has_finobt(sc->mp);
639 * Create new btrees for staging all the inobt records we collected
640 * earlier. The records were collected in order of increasing agino,
641 * so we do not have to sort them. Ensure there are no overlapping
642 * records.
644 error = xrep_ibt_check_overlap(ri);
645 if (error)
646 return error;
649 * The new inode btrees will not be rooted in the AGI until we've
650 * successfully rebuilt the tree.
652 * Start by setting up the inobt staging cursor.
654 xrep_newbt_init_ag(&ri->new_inobt, sc, &XFS_RMAP_OINFO_INOBT,
655 xfs_agbno_to_fsb(sc->sa.pag, XFS_IBT_BLOCK(sc->mp)),
656 XFS_AG_RESV_NONE);
657 ri->new_inobt.bload.claim_block = xrep_ibt_claim_block;
658 ri->new_inobt.bload.get_records = xrep_ibt_get_records;
660 ino_cur = xfs_inobt_init_cursor(sc->sa.pag, NULL, NULL);
661 xfs_btree_stage_afakeroot(ino_cur, &ri->new_inobt.afake);
662 error = xfs_btree_bload_compute_geometry(ino_cur, &ri->new_inobt.bload,
663 xfarray_length(ri->inode_records));
664 if (error)
665 goto err_inocur;
667 /* Set up finobt staging cursor. */
668 if (need_finobt) {
669 enum xfs_ag_resv_type resv = XFS_AG_RESV_METADATA;
671 if (sc->mp->m_finobt_nores)
672 resv = XFS_AG_RESV_NONE;
674 xrep_newbt_init_ag(&ri->new_finobt, sc, &XFS_RMAP_OINFO_INOBT,
675 xfs_agbno_to_fsb(sc->sa.pag, XFS_FIBT_BLOCK(sc->mp)),
676 resv);
677 ri->new_finobt.bload.claim_block = xrep_fibt_claim_block;
678 ri->new_finobt.bload.get_records = xrep_fibt_get_records;
680 fino_cur = xfs_finobt_init_cursor(sc->sa.pag, NULL, NULL);
681 xfs_btree_stage_afakeroot(fino_cur, &ri->new_finobt.afake);
682 error = xfs_btree_bload_compute_geometry(fino_cur,
683 &ri->new_finobt.bload, ri->finobt_recs);
684 if (error)
685 goto err_finocur;
688 /* Last chance to abort before we start committing fixes. */
689 if (xchk_should_terminate(sc, &error))
690 goto err_finocur;
692 /* Reserve all the space we need to build the new btrees. */
693 error = xrep_newbt_alloc_blocks(&ri->new_inobt,
694 ri->new_inobt.bload.nr_blocks);
695 if (error)
696 goto err_finocur;
698 if (need_finobt) {
699 error = xrep_newbt_alloc_blocks(&ri->new_finobt,
700 ri->new_finobt.bload.nr_blocks);
701 if (error)
702 goto err_finocur;
705 /* Add all inobt records. */
706 ri->array_cur = XFARRAY_CURSOR_INIT;
707 error = xfs_btree_bload(ino_cur, &ri->new_inobt.bload, ri);
708 if (error)
709 goto err_finocur;
711 /* Add all finobt records. */
712 if (need_finobt) {
713 ri->array_cur = XFARRAY_CURSOR_INIT;
714 error = xfs_btree_bload(fino_cur, &ri->new_finobt.bload, ri);
715 if (error)
716 goto err_finocur;
720 * Install the new btrees in the AG header. After this point the old
721 * btrees are no longer accessible and the new trees are live.
723 xfs_inobt_commit_staged_btree(ino_cur, sc->tp, sc->sa.agi_bp);
724 xfs_btree_del_cursor(ino_cur, 0);
726 if (fino_cur) {
727 xfs_inobt_commit_staged_btree(fino_cur, sc->tp, sc->sa.agi_bp);
728 xfs_btree_del_cursor(fino_cur, 0);
731 /* Reset the AGI counters now that we've changed the inode roots. */
732 error = xrep_ibt_reset_counters(ri);
733 if (error)
734 goto err_finobt;
736 /* Free unused blocks and bitmap. */
737 if (need_finobt) {
738 error = xrep_newbt_commit(&ri->new_finobt);
739 if (error)
740 goto err_inobt;
742 error = xrep_newbt_commit(&ri->new_inobt);
743 if (error)
744 return error;
746 return xrep_roll_ag_trans(sc);
748 err_finocur:
749 if (need_finobt)
750 xfs_btree_del_cursor(fino_cur, error);
751 err_inocur:
752 xfs_btree_del_cursor(ino_cur, error);
753 err_finobt:
754 if (need_finobt)
755 xrep_newbt_cancel(&ri->new_finobt);
756 err_inobt:
757 xrep_newbt_cancel(&ri->new_inobt);
758 return error;
762 * Now that we've logged the roots of the new btrees, invalidate all of the
763 * old blocks and free them.
765 STATIC int
766 xrep_ibt_remove_old_trees(
767 struct xrep_ibt *ri)
769 struct xfs_scrub *sc = ri->sc;
770 int error;
773 * Free the old inode btree blocks if they're not in use. It's ok to
774 * reap with XFS_AG_RESV_NONE even if the finobt had a per-AG
775 * reservation because we reset the reservation before releasing the
776 * AGI and AGF header buffer locks.
778 error = xrep_reap_agblocks(sc, &ri->old_iallocbt_blocks,
779 &XFS_RMAP_OINFO_INOBT, XFS_AG_RESV_NONE);
780 if (error)
781 return error;
784 * If the finobt is enabled and has a per-AG reservation, make sure we
785 * reinitialize the per-AG reservations.
787 if (xfs_has_finobt(sc->mp) && !sc->mp->m_finobt_nores)
788 sc->flags |= XREP_RESET_PERAG_RESV;
790 return 0;
793 /* Repair both inode btrees. */
795 xrep_iallocbt(
796 struct xfs_scrub *sc)
798 struct xrep_ibt *ri;
799 struct xfs_mount *mp = sc->mp;
800 char *descr;
801 xfs_agino_t first_agino, last_agino;
802 int error = 0;
804 /* We require the rmapbt to rebuild anything. */
805 if (!xfs_has_rmapbt(mp))
806 return -EOPNOTSUPP;
808 ri = kzalloc(sizeof(struct xrep_ibt), XCHK_GFP_FLAGS);
809 if (!ri)
810 return -ENOMEM;
811 ri->sc = sc;
813 /* We rebuild both inode btrees. */
814 sc->sick_mask = XFS_SICK_AG_INOBT | XFS_SICK_AG_FINOBT;
816 /* Set up enough storage to handle an AG with nothing but inodes. */
817 xfs_agino_range(mp, pag_agno(sc->sa.pag), &first_agino, &last_agino);
818 last_agino /= XFS_INODES_PER_CHUNK;
819 descr = xchk_xfile_ag_descr(sc, "inode index records");
820 error = xfarray_create(descr, last_agino,
821 sizeof(struct xfs_inobt_rec_incore),
822 &ri->inode_records);
823 kfree(descr);
824 if (error)
825 goto out_ri;
827 /* Collect the inode data and find the old btree blocks. */
828 xagb_bitmap_init(&ri->old_iallocbt_blocks);
829 error = xrep_ibt_find_inodes(ri);
830 if (error)
831 goto out_bitmap;
833 /* Rebuild the inode indexes. */
834 error = xrep_ibt_build_new_trees(ri);
835 if (error)
836 goto out_bitmap;
838 /* Kill the old tree. */
839 error = xrep_ibt_remove_old_trees(ri);
840 if (error)
841 goto out_bitmap;
843 out_bitmap:
844 xagb_bitmap_destroy(&ri->old_iallocbt_blocks);
845 xfarray_destroy(ri->inode_records);
846 out_ri:
847 kfree(ri);
848 return error;
851 /* Make sure both btrees are ok after we've rebuilt them. */
853 xrep_revalidate_iallocbt(
854 struct xfs_scrub *sc)
856 __u32 old_type = sc->sm->sm_type;
857 int error;
860 * We must update sm_type temporarily so that the tree-to-tree cross
861 * reference checks will work in the correct direction, and also so
862 * that tracing will report correctly if there are more errors.
864 sc->sm->sm_type = XFS_SCRUB_TYPE_INOBT;
865 error = xchk_iallocbt(sc);
866 if (error)
867 goto out;
869 if (xfs_has_finobt(sc->mp)) {
870 sc->sm->sm_type = XFS_SCRUB_TYPE_FINOBT;
871 error = xchk_iallocbt(sc);
874 out:
875 sc->sm->sm_type = old_type;
876 return error;