drm/atomic-helper: document drm_atomic_helper_check() restrictions
[drm/drm-misc.git] / fs / xfs / scrub / cow_repair.c
blob5b6194cef3e5e304d392d6e11cf0d5d26419964a
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (C) 2022-2023 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_defer.h"
13 #include "xfs_btree.h"
14 #include "xfs_log_format.h"
15 #include "xfs_trans.h"
16 #include "xfs_inode.h"
17 #include "xfs_inode_fork.h"
18 #include "xfs_alloc.h"
19 #include "xfs_bmap.h"
20 #include "xfs_rmap.h"
21 #include "xfs_refcount.h"
22 #include "xfs_quota.h"
23 #include "xfs_ialloc.h"
24 #include "xfs_ag.h"
25 #include "xfs_error.h"
26 #include "xfs_errortag.h"
27 #include "xfs_icache.h"
28 #include "xfs_refcount_btree.h"
29 #include "scrub/xfs_scrub.h"
30 #include "scrub/scrub.h"
31 #include "scrub/common.h"
32 #include "scrub/trace.h"
33 #include "scrub/repair.h"
34 #include "scrub/bitmap.h"
35 #include "scrub/off_bitmap.h"
36 #include "scrub/fsb_bitmap.h"
37 #include "scrub/reap.h"
40 * CoW Fork Mapping Repair
41 * =======================
43 * Although CoW staging extents are owned by incore CoW inode forks, on disk
44 * they are owned by the refcount btree. The ondisk metadata does not record
45 * any ownership information, which limits what we can do to repair the
46 * mappings in the CoW fork. At most, we can replace ifork mappings that lack
47 * an entry in the refcount btree or are described by a reverse mapping record
48 * whose owner is not OWN_COW.
50 * Replacing extents is also tricky -- we can't touch written CoW fork extents
51 * since they are undergoing writeback, and delalloc extents do not require
52 * repair since they only exist incore. Hence the most we can do is find the
53 * bad parts of unwritten mappings, allocate a replacement set of blocks, and
54 * replace the incore mapping. We use the regular reaping process to unmap
55 * or free the discarded blocks, as appropriate.
57 struct xrep_cow {
58 struct xfs_scrub *sc;
60 /* Bitmap of file offset ranges that need replacing. */
61 struct xoff_bitmap bad_fileoffs;
63 /* Bitmap of fsblocks that were removed from the CoW fork. */
64 struct xfsb_bitmap old_cowfork_fsblocks;
66 /* CoW fork mappings used to scan for bad CoW staging extents. */
67 struct xfs_bmbt_irec irec;
69 /* refcount btree block number of irec.br_startblock */
70 unsigned int irec_startbno;
72 /* refcount btree block number of the next refcount record we expect */
73 unsigned int next_bno;
76 /* CoW staging extent. */
77 struct xrep_cow_extent {
78 xfs_fsblock_t fsbno;
79 xfs_extlen_t len;
83 * Mark the part of the file range that corresponds to the given physical
84 * space. Caller must ensure that the physical range is within xc->irec.
86 STATIC int
87 xrep_cow_mark_file_range(
88 struct xrep_cow *xc,
89 xfs_fsblock_t startblock,
90 xfs_filblks_t blockcount)
92 xfs_fileoff_t startoff;
94 startoff = xc->irec.br_startoff +
95 (startblock - xc->irec.br_startblock);
97 trace_xrep_cow_mark_file_range(xc->sc->ip, startblock, startoff,
98 blockcount);
100 return xoff_bitmap_set(&xc->bad_fileoffs, startoff, blockcount);
104 * Trim @src to fit within the CoW fork mapping being examined, and put the
105 * result in @dst.
107 static inline void
108 xrep_cow_trim_refcount(
109 struct xrep_cow *xc,
110 struct xfs_refcount_irec *dst,
111 const struct xfs_refcount_irec *src)
113 unsigned int adj;
115 memcpy(dst, src, sizeof(*dst));
117 if (dst->rc_startblock < xc->irec_startbno) {
118 adj = xc->irec_startbno - dst->rc_startblock;
119 dst->rc_blockcount -= adj;
120 dst->rc_startblock += adj;
123 if (dst->rc_startblock + dst->rc_blockcount >
124 xc->irec_startbno + xc->irec.br_blockcount) {
125 adj = (dst->rc_startblock + dst->rc_blockcount) -
126 (xc->irec_startbno + xc->irec.br_blockcount);
127 dst->rc_blockcount -= adj;
131 /* Mark any shared CoW staging extents. */
132 STATIC int
133 xrep_cow_mark_shared_staging(
134 struct xfs_btree_cur *cur,
135 const struct xfs_refcount_irec *rec,
136 void *priv)
138 struct xrep_cow *xc = priv;
139 struct xfs_refcount_irec rrec;
141 if (!xfs_refcount_check_domain(rec) ||
142 rec->rc_domain != XFS_REFC_DOMAIN_SHARED)
143 return -EFSCORRUPTED;
145 xrep_cow_trim_refcount(xc, &rrec, rec);
147 return xrep_cow_mark_file_range(xc,
148 xfs_agbno_to_fsb(to_perag(cur->bc_group),
149 rrec.rc_startblock),
150 rrec.rc_blockcount);
154 * Mark any portion of the CoW fork file offset range where there is not a CoW
155 * staging extent record in the refcountbt, and keep a record of where we did
156 * find correct refcountbt records. Staging records are always cleaned out at
157 * mount time, so any two inodes trying to map the same staging area would have
158 * already taken the fs down due to refcount btree verifier errors. Hence this
159 * inode should be the sole creator of the staging extent records ondisk.
161 STATIC int
162 xrep_cow_mark_missing_staging(
163 struct xfs_btree_cur *cur,
164 const struct xfs_refcount_irec *rec,
165 void *priv)
167 struct xrep_cow *xc = priv;
168 struct xfs_refcount_irec rrec;
169 int error;
171 if (!xfs_refcount_check_domain(rec) ||
172 rec->rc_domain != XFS_REFC_DOMAIN_COW)
173 return -EFSCORRUPTED;
175 xrep_cow_trim_refcount(xc, &rrec, rec);
177 if (xc->next_bno >= rrec.rc_startblock)
178 goto next;
181 error = xrep_cow_mark_file_range(xc,
182 xfs_agbno_to_fsb(to_perag(cur->bc_group), xc->next_bno),
183 rrec.rc_startblock - xc->next_bno);
184 if (error)
185 return error;
187 next:
188 xc->next_bno = rrec.rc_startblock + rrec.rc_blockcount;
189 return 0;
193 * Mark any area that does not correspond to a CoW staging rmap. These are
194 * cross-linked areas that must be avoided.
196 STATIC int
197 xrep_cow_mark_missing_staging_rmap(
198 struct xfs_btree_cur *cur,
199 const struct xfs_rmap_irec *rec,
200 void *priv)
202 struct xrep_cow *xc = priv;
203 xfs_agblock_t rec_bno;
204 xfs_extlen_t rec_len;
205 unsigned int adj;
207 if (rec->rm_owner == XFS_RMAP_OWN_COW)
208 return 0;
210 rec_bno = rec->rm_startblock;
211 rec_len = rec->rm_blockcount;
212 if (rec_bno < xc->irec_startbno) {
213 adj = xc->irec_startbno - rec_bno;
214 rec_len -= adj;
215 rec_bno += adj;
218 if (rec_bno + rec_len > xc->irec_startbno + xc->irec.br_blockcount) {
219 adj = (rec_bno + rec_len) -
220 (xc->irec_startbno + xc->irec.br_blockcount);
221 rec_len -= adj;
224 return xrep_cow_mark_file_range(xc,
225 xfs_agbno_to_fsb(to_perag(cur->bc_group), rec_bno),
226 rec_len);
230 * Find any part of the CoW fork mapping that isn't a single-owner CoW staging
231 * extent and mark the corresponding part of the file range in the bitmap.
233 STATIC int
234 xrep_cow_find_bad(
235 struct xrep_cow *xc)
237 struct xfs_refcount_irec rc_low = { 0 };
238 struct xfs_refcount_irec rc_high = { 0 };
239 struct xfs_rmap_irec rm_low = { 0 };
240 struct xfs_rmap_irec rm_high = { 0 };
241 struct xfs_perag *pag;
242 struct xfs_scrub *sc = xc->sc;
243 xfs_agnumber_t agno;
244 int error;
246 agno = XFS_FSB_TO_AGNO(sc->mp, xc->irec.br_startblock);
247 xc->irec_startbno = XFS_FSB_TO_AGBNO(sc->mp, xc->irec.br_startblock);
249 pag = xfs_perag_get(sc->mp, agno);
250 if (!pag)
251 return -EFSCORRUPTED;
253 error = xrep_ag_init(sc, pag, &sc->sa);
254 if (error)
255 goto out_pag;
257 /* Mark any CoW fork extents that are shared. */
258 rc_low.rc_startblock = xc->irec_startbno;
259 rc_high.rc_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
260 rc_low.rc_domain = rc_high.rc_domain = XFS_REFC_DOMAIN_SHARED;
261 error = xfs_refcount_query_range(sc->sa.refc_cur, &rc_low, &rc_high,
262 xrep_cow_mark_shared_staging, xc);
263 if (error)
264 goto out_sa;
266 /* Make sure there are CoW staging extents for the whole mapping. */
267 rc_low.rc_startblock = xc->irec_startbno;
268 rc_high.rc_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
269 rc_low.rc_domain = rc_high.rc_domain = XFS_REFC_DOMAIN_COW;
270 xc->next_bno = xc->irec_startbno;
271 error = xfs_refcount_query_range(sc->sa.refc_cur, &rc_low, &rc_high,
272 xrep_cow_mark_missing_staging, xc);
273 if (error)
274 goto out_sa;
276 if (xc->next_bno < xc->irec_startbno + xc->irec.br_blockcount) {
277 error = xrep_cow_mark_file_range(xc,
278 xfs_agbno_to_fsb(pag, xc->next_bno),
279 xc->irec_startbno + xc->irec.br_blockcount -
280 xc->next_bno);
281 if (error)
282 goto out_sa;
285 /* Mark any area has an rmap that isn't a COW staging extent. */
286 rm_low.rm_startblock = xc->irec_startbno;
287 memset(&rm_high, 0xFF, sizeof(rm_high));
288 rm_high.rm_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
289 error = xfs_rmap_query_range(sc->sa.rmap_cur, &rm_low, &rm_high,
290 xrep_cow_mark_missing_staging_rmap, xc);
291 if (error)
292 goto out_sa;
295 * If userspace is forcing us to rebuild the CoW fork or someone turned
296 * on the debugging knob, replace everything in the CoW fork.
298 if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) ||
299 XFS_TEST_ERROR(false, sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) {
300 error = xrep_cow_mark_file_range(xc, xc->irec.br_startblock,
301 xc->irec.br_blockcount);
302 if (error)
303 return error;
306 out_sa:
307 xchk_ag_free(sc, &sc->sa);
308 out_pag:
309 xfs_perag_put(pag);
310 return 0;
314 * Allocate a replacement CoW staging extent of up to the given number of
315 * blocks, and fill out the mapping.
317 STATIC int
318 xrep_cow_alloc(
319 struct xfs_scrub *sc,
320 xfs_extlen_t maxlen,
321 struct xrep_cow_extent *repl)
323 struct xfs_alloc_arg args = {
324 .tp = sc->tp,
325 .mp = sc->mp,
326 .oinfo = XFS_RMAP_OINFO_SKIP_UPDATE,
327 .minlen = 1,
328 .maxlen = maxlen,
329 .prod = 1,
330 .resv = XFS_AG_RESV_NONE,
331 .datatype = XFS_ALLOC_USERDATA,
333 int error;
335 error = xfs_trans_reserve_more(sc->tp, maxlen, 0);
336 if (error)
337 return error;
339 error = xfs_alloc_vextent_start_ag(&args,
340 XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino));
341 if (error)
342 return error;
343 if (args.fsbno == NULLFSBLOCK)
344 return -ENOSPC;
346 xfs_refcount_alloc_cow_extent(sc->tp, args.fsbno, args.len);
348 repl->fsbno = args.fsbno;
349 repl->len = args.len;
350 return 0;
354 * Look up the current CoW fork mapping so that we only allocate enough to
355 * replace a single mapping. If we don't find a mapping that covers the start
356 * of the file range, or we find a delalloc or written extent, something is
357 * seriously wrong, since we didn't drop the ILOCK.
359 static inline int
360 xrep_cow_find_mapping(
361 struct xrep_cow *xc,
362 struct xfs_iext_cursor *icur,
363 xfs_fileoff_t startoff,
364 struct xfs_bmbt_irec *got)
366 struct xfs_inode *ip = xc->sc->ip;
367 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_COW_FORK);
369 if (!xfs_iext_lookup_extent(ip, ifp, startoff, icur, got))
370 goto bad;
372 if (got->br_startoff > startoff)
373 goto bad;
375 if (got->br_blockcount == 0)
376 goto bad;
378 if (isnullstartblock(got->br_startblock))
379 goto bad;
381 if (xfs_bmap_is_written_extent(got))
382 goto bad;
384 return 0;
385 bad:
386 ASSERT(0);
387 return -EFSCORRUPTED;
390 #define REPLACE_LEFT_SIDE (1U << 0)
391 #define REPLACE_RIGHT_SIDE (1U << 1)
394 * Given a CoW fork mapping @got and a replacement mapping @repl, remap the
395 * beginning of @got with the space described by @rep.
397 static inline void
398 xrep_cow_replace_mapping(
399 struct xfs_inode *ip,
400 struct xfs_iext_cursor *icur,
401 const struct xfs_bmbt_irec *got,
402 const struct xrep_cow_extent *repl)
404 struct xfs_bmbt_irec new = *got; /* struct copy */
406 ASSERT(repl->len > 0);
407 ASSERT(!isnullstartblock(got->br_startblock));
409 trace_xrep_cow_replace_mapping(ip, got, repl->fsbno, repl->len);
411 if (got->br_blockcount == repl->len) {
413 * The new extent is a complete replacement for the existing
414 * extent. Update the COW fork record.
416 new.br_startblock = repl->fsbno;
417 xfs_iext_update_extent(ip, BMAP_COWFORK, icur, &new);
418 return;
422 * The new extent can replace the beginning of the COW fork record.
423 * Move the left side of @got upwards, then insert the new record.
425 new.br_startoff += repl->len;
426 new.br_startblock += repl->len;
427 new.br_blockcount -= repl->len;
428 xfs_iext_update_extent(ip, BMAP_COWFORK, icur, &new);
430 new.br_startoff = got->br_startoff;
431 new.br_startblock = repl->fsbno;
432 new.br_blockcount = repl->len;
433 xfs_iext_insert(ip, icur, &new, BMAP_COWFORK);
437 * Replace the unwritten CoW staging extent backing the given file range with a
438 * new space extent that isn't as problematic.
440 STATIC int
441 xrep_cow_replace_range(
442 struct xrep_cow *xc,
443 xfs_fileoff_t startoff,
444 xfs_extlen_t *blockcount)
446 struct xfs_iext_cursor icur;
447 struct xrep_cow_extent repl;
448 struct xfs_bmbt_irec got;
449 struct xfs_scrub *sc = xc->sc;
450 xfs_fileoff_t nextoff;
451 xfs_extlen_t alloc_len;
452 int error;
455 * Put the existing CoW fork mapping in @got. If @got ends before
456 * @rep, truncate @rep so we only replace one extent mapping at a time.
458 error = xrep_cow_find_mapping(xc, &icur, startoff, &got);
459 if (error)
460 return error;
461 nextoff = min(startoff + *blockcount,
462 got.br_startoff + got.br_blockcount);
465 * Allocate a replacement extent. If we don't fill all the blocks,
466 * shorten the quantity that will be deleted in this step.
468 alloc_len = min_t(xfs_fileoff_t, XFS_MAX_BMBT_EXTLEN,
469 nextoff - startoff);
470 error = xrep_cow_alloc(sc, alloc_len, &repl);
471 if (error)
472 return error;
475 * Replace the old mapping with the new one, and commit the metadata
476 * changes made so far.
478 xrep_cow_replace_mapping(sc->ip, &icur, &got, &repl);
480 xfs_inode_set_cowblocks_tag(sc->ip);
481 error = xfs_defer_finish(&sc->tp);
482 if (error)
483 return error;
485 /* Note the old CoW staging extents; we'll reap them all later. */
486 error = xfsb_bitmap_set(&xc->old_cowfork_fsblocks, got.br_startblock,
487 repl.len);
488 if (error)
489 return error;
491 *blockcount = repl.len;
492 return 0;
496 * Replace a bad part of an unwritten CoW staging extent with a fresh delalloc
497 * reservation.
499 STATIC int
500 xrep_cow_replace(
501 uint64_t startoff,
502 uint64_t blockcount,
503 void *priv)
505 struct xrep_cow *xc = priv;
506 int error = 0;
508 while (blockcount > 0) {
509 xfs_extlen_t len = min_t(xfs_filblks_t, blockcount,
510 XFS_MAX_BMBT_EXTLEN);
512 error = xrep_cow_replace_range(xc, startoff, &len);
513 if (error)
514 break;
516 blockcount -= len;
517 startoff += len;
520 return error;
524 * Repair an inode's CoW fork. The CoW fork is an in-core structure, so
525 * there's no btree to rebuid. Instead, we replace any mappings that are
526 * cross-linked or lack ondisk CoW fork records in the refcount btree.
529 xrep_bmap_cow(
530 struct xfs_scrub *sc)
532 struct xrep_cow *xc;
533 struct xfs_iext_cursor icur;
534 struct xfs_ifork *ifp = xfs_ifork_ptr(sc->ip, XFS_COW_FORK);
535 int error;
537 if (!xfs_has_rmapbt(sc->mp) || !xfs_has_reflink(sc->mp))
538 return -EOPNOTSUPP;
540 if (!ifp)
541 return 0;
543 /* realtime files aren't supported yet */
544 if (XFS_IS_REALTIME_INODE(sc->ip))
545 return -EOPNOTSUPP;
548 * If we're somehow not in extents format, then reinitialize it to
549 * an empty extent mapping fork and exit.
551 if (ifp->if_format != XFS_DINODE_FMT_EXTENTS) {
552 ifp->if_format = XFS_DINODE_FMT_EXTENTS;
553 ifp->if_nextents = 0;
554 return 0;
557 xc = kzalloc(sizeof(struct xrep_cow), XCHK_GFP_FLAGS);
558 if (!xc)
559 return -ENOMEM;
561 xfs_trans_ijoin(sc->tp, sc->ip, 0);
563 xc->sc = sc;
564 xoff_bitmap_init(&xc->bad_fileoffs);
565 xfsb_bitmap_init(&xc->old_cowfork_fsblocks);
567 for_each_xfs_iext(ifp, &icur, &xc->irec) {
568 if (xchk_should_terminate(sc, &error))
569 goto out_bitmap;
572 * delalloc reservations only exist incore, so there is no
573 * ondisk metadata that we can examine. Hence we leave them
574 * alone.
576 if (isnullstartblock(xc->irec.br_startblock))
577 continue;
580 * COW fork extents are only in the written state if writeback
581 * is actively writing to disk. We cannot restart the write
582 * at a different disk address since we've already issued the
583 * IO, so we leave these alone and hope for the best.
585 if (xfs_bmap_is_written_extent(&xc->irec))
586 continue;
588 error = xrep_cow_find_bad(xc);
589 if (error)
590 goto out_bitmap;
593 /* Replace any bad unwritten mappings with fresh reservations. */
594 error = xoff_bitmap_walk(&xc->bad_fileoffs, xrep_cow_replace, xc);
595 if (error)
596 goto out_bitmap;
599 * Reap as many of the old CoW blocks as we can. They are owned ondisk
600 * by the refcount btree, not the inode, so it is correct to treat them
601 * like inode metadata.
603 error = xrep_reap_fsblocks(sc, &xc->old_cowfork_fsblocks,
604 &XFS_RMAP_OINFO_COW);
605 if (error)
606 goto out_bitmap;
608 out_bitmap:
609 xfsb_bitmap_destroy(&xc->old_cowfork_fsblocks);
610 xoff_bitmap_destroy(&xc->bad_fileoffs);
611 kfree(xc);
612 return error;