1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Copyright (C) 2018-2023 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_defer.h"
13 #include "xfs_btree.h"
15 #include "xfs_log_format.h"
16 #include "xfs_trans.h"
18 #include "xfs_inode.h"
19 #include "xfs_icache.h"
20 #include "xfs_inode_buf.h"
21 #include "xfs_inode_fork.h"
22 #include "xfs_ialloc.h"
23 #include "xfs_da_format.h"
24 #include "xfs_reflink.h"
25 #include "xfs_alloc.h"
27 #include "xfs_rmap_btree.h"
29 #include "xfs_bmap_btree.h"
30 #include "xfs_bmap_util.h"
32 #include "xfs_dir2_priv.h"
33 #include "xfs_quota_defs.h"
34 #include "xfs_quota.h"
36 #include "xfs_rtbitmap.h"
37 #include "xfs_attr_leaf.h"
38 #include "xfs_log_priv.h"
39 #include "xfs_health.h"
40 #include "xfs_symlink_remote.h"
41 #include "scrub/xfs_scrub.h"
42 #include "scrub/scrub.h"
43 #include "scrub/common.h"
44 #include "scrub/btree.h"
45 #include "scrub/trace.h"
46 #include "scrub/repair.h"
47 #include "scrub/iscan.h"
48 #include "scrub/readdir.h"
49 #include "scrub/tempfile.h"
55 * Roughly speaking, inode problems can be classified based on whether or not
56 * they trip the dinode verifiers. If those trip, then we won't be able to
57 * xfs_iget ourselves the inode.
59 * Therefore, the xrep_dinode_* functions fix anything that will cause the
60 * inode buffer verifier or the dinode verifier. The xrep_inode_* functions
61 * fix things on live incore inodes. The inode repair functions make decisions
62 * with security and usability implications when reviving a file:
64 * - Files with zero di_mode or a garbage di_mode are converted to regular file
65 * that only root can read. This file may not actually contain user data,
66 * if the file was not previously a regular file. Setuid and setgid bits
69 * - Zero-size directories can be truncated to look empty. It is necessary to
70 * run the bmapbtd and directory repair functions to fully rebuild the
73 * - Zero-size symbolic link targets can be truncated to '?'. It is necessary
74 * to run the bmapbtd and symlink repair functions to salvage the symlink.
76 * - Invalid extent size hints will be removed.
78 * - Quotacheck will be scheduled if we repaired an inode that was so badly
79 * damaged that the ondisk inode had to be rebuilt.
81 * - Invalid user, group, or project IDs (aka -1U) will be reset to zero.
82 * Setuid and setgid bits are cleared.
84 * - Data and attr forks are reset to extents format with zero extents if the
85 * fork data is inconsistent. It is necessary to run the bmapbtd or bmapbta
86 * repair functions to recover the space mapping.
88 * - ACLs will not be recovered if the attr fork is zapped or the extended
89 * attribute structure itself requires salvaging.
91 * - If the attr fork is zapped, the user and group ids are reset to root and
92 * the setuid and setgid bits are removed.
96 * All the information we need to repair the ondisk inode if we can't iget the
97 * incore inode. We don't allocate this buffer unless we're going to perform
98 * a repair to the ondisk inode cluster buffer.
101 /* Inode mapping that we saved from the initial lookup attempt. */
102 struct xfs_imap imap
;
104 struct xfs_scrub
*sc
;
106 /* Blocks in use on the data device by data extents or bmbt blocks. */
107 xfs_rfsblock_t data_blocks
;
109 /* Blocks in use on the rt device. */
110 xfs_rfsblock_t rt_blocks
;
112 /* Blocks in use by the attr fork. */
113 xfs_rfsblock_t attr_blocks
;
115 /* Number of data device extents for the data fork. */
116 xfs_extnum_t data_extents
;
119 * Number of realtime device extents for the data fork. If
120 * data_extents and rt_extents indicate that the data fork has extents
121 * on both devices, we'll just back away slowly.
123 xfs_extnum_t rt_extents
;
125 /* Number of (data device) extents for the attr fork. */
126 xfs_aextnum_t attr_extents
;
128 /* Sick state to set after zapping parts of the inode. */
129 unsigned int ino_sick_mask
;
131 /* Must we remove all access from this file? */
134 /* Inode scanner to see if we can find the ftype from dirents */
135 struct xchk_iscan ftype_iscan
;
136 uint8_t alleged_ftype
;
140 * Setup function for inode repair. @imap contains the ondisk inode mapping
141 * information so that we can correct the ondisk inode cluster buffer if
142 * necessary to make iget work.
146 struct xfs_scrub
*sc
,
147 const struct xfs_imap
*imap
)
149 struct xrep_inode
*ri
;
151 sc
->buf
= kzalloc(sizeof(struct xrep_inode
), XCHK_GFP_FLAGS
);
156 memcpy(&ri
->imap
, imap
, sizeof(struct xfs_imap
));
162 * Make sure this ondisk inode can pass the inode buffer verifier. This is
163 * not the same as the dinode verifier.
166 xrep_dinode_buf_core(
167 struct xfs_scrub
*sc
,
169 unsigned int ioffset
)
171 struct xfs_dinode
*dip
= xfs_buf_offset(bp
, ioffset
);
172 struct xfs_trans
*tp
= sc
->tp
;
173 struct xfs_mount
*mp
= sc
->mp
;
176 bool magic_ok
= false;
177 bool unlinked_ok
= false;
179 agino
= be32_to_cpu(dip
->di_next_unlinked
);
181 if (xfs_verify_agino_or_null(bp
->b_pag
, agino
))
184 if (dip
->di_magic
== cpu_to_be16(XFS_DINODE_MAGIC
) &&
185 xfs_dinode_good_version(mp
, dip
->di_version
))
188 if (xfs_verify_cksum((char *)dip
, mp
->m_sb
.sb_inodesize
,
192 if (magic_ok
&& unlinked_ok
&& crc_ok
)
196 dip
->di_magic
= cpu_to_be16(XFS_DINODE_MAGIC
);
200 dip
->di_next_unlinked
= cpu_to_be32(NULLAGINO
);
201 xfs_dinode_calc_crc(mp
, dip
);
202 xfs_trans_buf_set_type(tp
, bp
, XFS_BLFT_DINO_BUF
);
203 xfs_trans_log_buf(tp
, bp
, ioffset
,
204 ioffset
+ sizeof(struct xfs_dinode
) - 1);
207 /* Make sure this inode cluster buffer can pass the inode buffer verifier. */
210 struct xfs_scrub
*sc
,
213 struct xfs_mount
*mp
= sc
->mp
;
217 ni
= XFS_BB_TO_FSB(mp
, bp
->b_length
) * mp
->m_sb
.sb_inopblock
;
218 for (i
= 0; i
< ni
; i
++)
219 xrep_dinode_buf_core(sc
, bp
, i
<< mp
->m_sb
.sb_inodelog
);
222 /* Reinitialize things that never change in an inode. */
225 struct xfs_scrub
*sc
,
226 struct xfs_dinode
*dip
)
228 trace_xrep_dinode_header(sc
, dip
);
230 dip
->di_magic
= cpu_to_be16(XFS_DINODE_MAGIC
);
231 if (!xfs_dinode_good_version(sc
->mp
, dip
->di_version
))
233 dip
->di_ino
= cpu_to_be64(sc
->sm
->sm_ino
);
234 uuid_copy(&dip
->di_uuid
, &sc
->mp
->m_sb
.sb_meta_uuid
);
235 dip
->di_gen
= cpu_to_be32(sc
->sm
->sm_gen
);
239 * If this directory entry points to the scrub target inode, then the directory
240 * we're scanning is the parent of the scrub target inode.
243 xrep_dinode_findmode_dirent(
244 struct xfs_scrub
*sc
,
245 struct xfs_inode
*dp
,
246 xfs_dir2_dataptr_t dapos
,
247 const struct xfs_name
*name
,
251 struct xrep_inode
*ri
= priv
;
254 if (xchk_should_terminate(ri
->sc
, &error
))
257 if (ino
!= sc
->sm
->sm_ino
)
260 /* Ignore garbage directory entry names. */
261 if (name
->len
== 0 || !xfs_dir2_namecheck(name
->name
, name
->len
))
262 return -EFSCORRUPTED
;
264 /* Don't pick up dot or dotdot entries; we only want child dirents. */
265 if (xfs_dir2_samename(name
, &xfs_name_dotdot
) ||
266 xfs_dir2_samename(name
, &xfs_name_dot
))
270 * Uhoh, more than one parent for this inode and they don't agree on
273 if (ri
->alleged_ftype
!= XFS_DIR3_FT_UNKNOWN
&&
274 ri
->alleged_ftype
!= name
->type
) {
275 trace_xrep_dinode_findmode_dirent_inval(ri
->sc
, dp
, name
->type
,
277 return -EFSCORRUPTED
;
280 /* We found a potential parent; remember the ftype. */
281 trace_xrep_dinode_findmode_dirent(ri
->sc
, dp
, name
->type
);
282 ri
->alleged_ftype
= name
->type
;
286 /* Try to lock a directory, or wait a jiffy. */
288 xrep_dinode_ilock_nowait(
289 struct xfs_inode
*dp
,
290 unsigned int lock_mode
)
292 if (xfs_ilock_nowait(dp
, lock_mode
))
295 schedule_timeout_killable(1);
300 * Try to lock a directory to look for ftype hints. Since we already hold the
301 * AGI buffer, we cannot block waiting for the ILOCK because rename can take
302 * the ILOCK and then try to lock AGIs.
305 xrep_dinode_trylock_directory(
306 struct xrep_inode
*ri
,
307 struct xfs_inode
*dp
,
308 unsigned int *lock_modep
)
310 unsigned long deadline
= jiffies
+ msecs_to_jiffies(30000);
311 unsigned int lock_mode
;
315 if (xchk_should_terminate(ri
->sc
, &error
))
318 if (xfs_need_iread_extents(&dp
->i_df
))
319 lock_mode
= XFS_ILOCK_EXCL
;
321 lock_mode
= XFS_ILOCK_SHARED
;
323 if (xrep_dinode_ilock_nowait(dp
, lock_mode
)) {
324 *lock_modep
= lock_mode
;
327 } while (!time_is_before_jiffies(deadline
));
332 * If this is a directory, walk the dirents looking for any that point to the
333 * scrub target inode.
336 xrep_dinode_findmode_walk_directory(
337 struct xrep_inode
*ri
,
338 struct xfs_inode
*dp
)
340 struct xfs_scrub
*sc
= ri
->sc
;
341 unsigned int lock_mode
;
344 /* Ignore temporary repair directories. */
345 if (xrep_is_tempfile(dp
))
349 * Scan the directory to see if there it contains an entry pointing to
350 * the directory that we are repairing.
352 error
= xrep_dinode_trylock_directory(ri
, dp
, &lock_mode
);
357 * If this directory is known to be sick, we cannot scan it reliably
360 if (xfs_inode_has_sickness(dp
, XFS_SICK_INO_CORE
|
363 error
= -EFSCORRUPTED
;
368 * We cannot complete our parent pointer scan if a directory looks as
369 * though it has been zapped by the inode record repair code.
371 if (xchk_dir_looks_zapped(dp
)) {
376 error
= xchk_dir_walk(sc
, dp
, xrep_dinode_findmode_dirent
, ri
);
381 xfs_iunlock(dp
, lock_mode
);
386 * Try to find the mode of the inode being repaired by looking for directories
387 * that point down to this file.
390 xrep_dinode_find_mode(
391 struct xrep_inode
*ri
,
394 struct xfs_scrub
*sc
= ri
->sc
;
395 struct xfs_inode
*dp
;
398 /* No ftype means we have no other metadata to consult. */
399 if (!xfs_has_ftype(sc
->mp
)) {
405 * Scan all directories for parents that might point down to this
406 * inode. Skip the inode being repaired during the scan since it
407 * cannot be its own parent. Note that we still hold the AGI locked
408 * so there's a real possibility that _iscan_iter can return EBUSY.
410 xchk_iscan_start(sc
, 5000, 100, &ri
->ftype_iscan
);
411 xchk_iscan_set_agi_trylock(&ri
->ftype_iscan
);
412 ri
->ftype_iscan
.skip_ino
= sc
->sm
->sm_ino
;
413 ri
->alleged_ftype
= XFS_DIR3_FT_UNKNOWN
;
414 while ((error
= xchk_iscan_iter(&ri
->ftype_iscan
, &dp
)) == 1) {
415 if (S_ISDIR(VFS_I(dp
)->i_mode
))
416 error
= xrep_dinode_findmode_walk_directory(ri
, dp
);
417 xchk_iscan_mark_visited(&ri
->ftype_iscan
, dp
);
421 if (xchk_should_terminate(sc
, &error
))
424 xchk_iscan_iter_finish(&ri
->ftype_iscan
);
425 xchk_iscan_teardown(&ri
->ftype_iscan
);
427 if (error
== -EBUSY
) {
428 if (ri
->alleged_ftype
!= XFS_DIR3_FT_UNKNOWN
) {
430 * If we got an EBUSY after finding at least one
431 * dirent, that means the scan found an inode on the
432 * inactivation list and could not open it. Accept the
433 * alleged ftype and install a new mode below.
436 } else if (!(sc
->flags
& XCHK_TRY_HARDER
)) {
438 * Otherwise, retry the operation one time to see if
439 * the reason for the delay is an inode from the same
440 * cluster buffer waiting on the inactivation list.
449 * Convert the discovered ftype into the file mode. If all else fails,
452 switch (ri
->alleged_ftype
) {
453 case XFS_DIR3_FT_DIR
:
456 case XFS_DIR3_FT_WHT
:
457 case XFS_DIR3_FT_CHRDEV
:
460 case XFS_DIR3_FT_BLKDEV
:
463 case XFS_DIR3_FT_FIFO
:
466 case XFS_DIR3_FT_SOCK
:
469 case XFS_DIR3_FT_SYMLINK
:
479 /* Turn di_mode into /something/ recognizable. Returns true if we succeed. */
482 struct xrep_inode
*ri
,
483 struct xfs_dinode
*dip
)
485 struct xfs_scrub
*sc
= ri
->sc
;
486 uint16_t mode
= be16_to_cpu(dip
->di_mode
);
489 trace_xrep_dinode_mode(sc
, dip
);
491 if (mode
== 0 || xfs_mode_to_ftype(mode
) != XFS_DIR3_FT_UNKNOWN
)
494 /* Try to fix the mode. If we cannot, then leave everything alone. */
495 error
= xrep_dinode_find_mode(ri
, &mode
);
500 /* temporary failure or fatal signal */
506 /* some other error, assume S_IFREG */
511 /* bad mode, so we set it to a file that only root can read */
512 dip
->di_mode
= cpu_to_be16(mode
);
519 /* Fix unused link count fields having nonzero values. */
522 struct xfs_dinode
*dip
)
524 if (dip
->di_version
< 2) {
529 if (xfs_dinode_is_metadir(dip
)) {
530 if (be16_to_cpu(dip
->di_metatype
) >= XFS_METAFILE_MAX
)
531 dip
->di_metatype
= cpu_to_be16(XFS_METAFILE_UNKNOWN
);
533 dip
->di_metatype
= 0;
537 /* Fix any conflicting flags that the verifiers complain about. */
540 struct xfs_scrub
*sc
,
541 struct xfs_dinode
*dip
,
544 struct xfs_mount
*mp
= sc
->mp
;
545 uint64_t flags2
= be64_to_cpu(dip
->di_flags2
);
546 uint16_t flags
= be16_to_cpu(dip
->di_flags
);
547 uint16_t mode
= be16_to_cpu(dip
->di_mode
);
549 trace_xrep_dinode_flags(sc
, dip
);
552 flags
|= XFS_DIFLAG_REALTIME
;
554 flags
&= ~XFS_DIFLAG_REALTIME
;
557 * For regular files on a reflink filesystem, set the REFLINK flag to
558 * protect shared extents. A later stage will actually check those
559 * extents and clear the flag if possible.
561 if (xfs_has_reflink(mp
) && S_ISREG(mode
))
562 flags2
|= XFS_DIFLAG2_REFLINK
;
564 flags2
&= ~(XFS_DIFLAG2_REFLINK
| XFS_DIFLAG2_COWEXTSIZE
);
565 if (flags
& XFS_DIFLAG_REALTIME
)
566 flags2
&= ~XFS_DIFLAG2_REFLINK
;
567 if (!xfs_has_bigtime(mp
))
568 flags2
&= ~XFS_DIFLAG2_BIGTIME
;
569 if (!xfs_has_large_extent_counts(mp
))
570 flags2
&= ~XFS_DIFLAG2_NREXT64
;
571 if (flags2
& XFS_DIFLAG2_NREXT64
)
572 dip
->di_nrext64_pad
= 0;
573 else if (dip
->di_version
>= 3)
576 if (flags2
& XFS_DIFLAG2_METADATA
) {
579 fa
= xfs_dinode_verify_metadir(sc
->mp
, dip
, mode
, flags
,
582 flags2
&= ~XFS_DIFLAG2_METADATA
;
585 dip
->di_flags
= cpu_to_be16(flags
);
586 dip
->di_flags2
= cpu_to_be64(flags2
);
590 * Blow out symlink; now it points nowhere. We don't have to worry about
591 * incore state because this inode is failing the verifiers.
594 xrep_dinode_zap_symlink(
595 struct xrep_inode
*ri
,
596 struct xfs_dinode
*dip
)
598 struct xfs_scrub
*sc
= ri
->sc
;
601 trace_xrep_dinode_zap_symlink(sc
, dip
);
603 dip
->di_format
= XFS_DINODE_FMT_LOCAL
;
604 dip
->di_size
= cpu_to_be64(1);
605 p
= XFS_DFORK_PTR(dip
, XFS_DATA_FORK
);
607 ri
->ino_sick_mask
|= XFS_SICK_INO_SYMLINK_ZAPPED
;
611 * Blow out dir, make the parent point to the root. In the future repair will
612 * reconstruct this directory for us. Note that there's no in-core directory
613 * inode because the sf verifier tripped, so we don't have to worry about the
618 struct xrep_inode
*ri
,
619 struct xfs_dinode
*dip
)
621 struct xfs_scrub
*sc
= ri
->sc
;
622 struct xfs_mount
*mp
= sc
->mp
;
623 struct xfs_dir2_sf_hdr
*sfp
;
626 trace_xrep_dinode_zap_dir(sc
, dip
);
628 dip
->di_format
= XFS_DINODE_FMT_LOCAL
;
629 i8count
= mp
->m_sb
.sb_rootino
> XFS_DIR2_MAX_SHORT_INUM
;
630 sfp
= XFS_DFORK_PTR(dip
, XFS_DATA_FORK
);
632 sfp
->i8count
= i8count
;
633 xfs_dir2_sf_put_parent_ino(sfp
, mp
->m_sb
.sb_rootino
);
634 dip
->di_size
= cpu_to_be64(xfs_dir2_sf_hdr_size(i8count
));
635 ri
->ino_sick_mask
|= XFS_SICK_INO_DIR_ZAPPED
;
638 /* Make sure we don't have a garbage file size. */
641 struct xrep_inode
*ri
,
642 struct xfs_dinode
*dip
)
644 struct xfs_scrub
*sc
= ri
->sc
;
645 uint64_t size
= be64_to_cpu(dip
->di_size
);
646 uint16_t mode
= be16_to_cpu(dip
->di_mode
);
648 trace_xrep_dinode_size(sc
, dip
);
650 switch (mode
& S_IFMT
) {
655 /* di_size can't be nonzero for special files */
659 /* Regular files can't be larger than 2^63-1 bytes. */
660 dip
->di_size
= cpu_to_be64(size
& ~(1ULL << 63));
664 * Truncate ridiculously oversized symlinks. If the size is
665 * zero, reset it to point to the current directory. Both of
666 * these conditions trigger dinode verifier errors, so there
667 * is no in-core state to reset.
669 if (size
> XFS_SYMLINK_MAXLEN
)
670 dip
->di_size
= cpu_to_be64(XFS_SYMLINK_MAXLEN
);
672 xrep_dinode_zap_symlink(ri
, dip
);
676 * Directories can't have a size larger than 32G. If the size
677 * is zero, reset it to an empty directory. Both of these
678 * conditions trigger dinode verifier errors, so there is no
679 * in-core state to reset.
681 if (size
> XFS_DIR2_SPACE_SIZE
)
682 dip
->di_size
= cpu_to_be64(XFS_DIR2_SPACE_SIZE
);
684 xrep_dinode_zap_dir(ri
, dip
);
689 /* Fix extent size hints. */
691 xrep_dinode_extsize_hints(
692 struct xfs_scrub
*sc
,
693 struct xfs_dinode
*dip
)
695 struct xfs_mount
*mp
= sc
->mp
;
696 uint64_t flags2
= be64_to_cpu(dip
->di_flags2
);
697 uint16_t flags
= be16_to_cpu(dip
->di_flags
);
698 uint16_t mode
= be16_to_cpu(dip
->di_mode
);
702 trace_xrep_dinode_extsize_hints(sc
, dip
);
704 fa
= xfs_inode_validate_extsize(mp
, be32_to_cpu(dip
->di_extsize
),
708 dip
->di_flags
&= ~cpu_to_be16(XFS_DIFLAG_EXTSIZE
|
709 XFS_DIFLAG_EXTSZINHERIT
);
712 if (dip
->di_version
< 3)
715 fa
= xfs_inode_validate_cowextsize(mp
, be32_to_cpu(dip
->di_cowextsize
),
716 mode
, flags
, flags2
);
718 dip
->di_cowextsize
= 0;
719 dip
->di_flags2
&= ~cpu_to_be64(XFS_DIFLAG2_COWEXTSIZE
);
723 /* Count extents and blocks for an inode given an rmap. */
725 xrep_dinode_walk_rmap(
726 struct xfs_btree_cur
*cur
,
727 const struct xfs_rmap_irec
*rec
,
730 struct xrep_inode
*ri
= priv
;
733 if (xchk_should_terminate(ri
->sc
, &error
))
736 /* We only care about this inode. */
737 if (rec
->rm_owner
!= ri
->sc
->sm
->sm_ino
)
740 if (rec
->rm_flags
& XFS_RMAP_ATTR_FORK
) {
741 ri
->attr_blocks
+= rec
->rm_blockcount
;
742 if (!(rec
->rm_flags
& XFS_RMAP_BMBT_BLOCK
))
748 ri
->data_blocks
+= rec
->rm_blockcount
;
749 if (!(rec
->rm_flags
& XFS_RMAP_BMBT_BLOCK
))
755 /* Count extents and blocks for an inode from all AG rmap data. */
757 xrep_dinode_count_ag_rmaps(
758 struct xrep_inode
*ri
,
759 struct xfs_perag
*pag
)
761 struct xfs_btree_cur
*cur
;
765 error
= xfs_alloc_read_agf(pag
, ri
->sc
->tp
, 0, &agf
);
769 cur
= xfs_rmapbt_init_cursor(ri
->sc
->mp
, ri
->sc
->tp
, agf
, pag
);
770 error
= xfs_rmap_query_all(cur
, xrep_dinode_walk_rmap
, ri
);
771 xfs_btree_del_cursor(cur
, error
);
772 xfs_trans_brelse(ri
->sc
->tp
, agf
);
776 /* Count extents and blocks for a given inode from all rmap data. */
778 xrep_dinode_count_rmaps(
779 struct xrep_inode
*ri
)
781 struct xfs_perag
*pag
= NULL
;
784 if (!xfs_has_rmapbt(ri
->sc
->mp
) || xfs_has_realtime(ri
->sc
->mp
))
787 while ((pag
= xfs_perag_next(ri
->sc
->mp
, pag
))) {
788 error
= xrep_dinode_count_ag_rmaps(ri
, pag
);
795 /* Can't have extents on both the rt and the data device. */
796 if (ri
->data_extents
&& ri
->rt_extents
)
797 return -EFSCORRUPTED
;
799 trace_xrep_dinode_count_rmaps(ri
->sc
,
800 ri
->data_blocks
, ri
->rt_blocks
, ri
->attr_blocks
,
801 ri
->data_extents
, ri
->rt_extents
, ri
->attr_extents
);
805 /* Return true if this extents-format ifork looks like garbage. */
807 xrep_dinode_bad_extents_fork(
808 struct xfs_scrub
*sc
,
809 struct xfs_dinode
*dip
,
810 unsigned int dfork_size
,
813 struct xfs_bmbt_irec
new;
814 struct xfs_bmbt_rec
*dp
;
819 nex
= xfs_dfork_nextents(dip
, whichfork
);
820 if (nex
> dfork_size
/ sizeof(struct xfs_bmbt_rec
))
823 dp
= XFS_DFORK_PTR(dip
, whichfork
);
825 isrt
= dip
->di_flags
& cpu_to_be16(XFS_DIFLAG_REALTIME
);
826 for (i
= 0; i
< nex
; i
++, dp
++) {
829 xfs_bmbt_disk_get_all(dp
, &new);
830 fa
= xfs_bmap_validate_extent_raw(sc
->mp
, isrt
, whichfork
,
839 /* Return true if this btree-format ifork looks like garbage. */
841 xrep_dinode_bad_bmbt_fork(
842 struct xfs_scrub
*sc
,
843 struct xfs_dinode
*dip
,
844 unsigned int dfork_size
,
847 struct xfs_bmdr_block
*dfp
;
854 nex
= xfs_dfork_nextents(dip
, whichfork
);
855 if (nex
<= dfork_size
/ sizeof(struct xfs_bmbt_rec
))
858 if (dfork_size
< sizeof(struct xfs_bmdr_block
))
861 dfp
= XFS_DFORK_PTR(dip
, whichfork
);
862 nrecs
= be16_to_cpu(dfp
->bb_numrecs
);
863 level
= be16_to_cpu(dfp
->bb_level
);
865 if (nrecs
== 0 || xfs_bmdr_space_calc(nrecs
) > dfork_size
)
867 if (level
== 0 || level
>= XFS_BM_MAXLEVELS(sc
->mp
, whichfork
))
870 dmxr
= xfs_bmdr_maxrecs(dfork_size
, 0);
871 for (i
= 1; i
<= nrecs
; i
++) {
872 struct xfs_bmbt_key
*fkp
;
874 xfs_fileoff_t fileoff
;
877 fkp
= xfs_bmdr_key_addr(dfp
, i
);
878 fileoff
= be64_to_cpu(fkp
->br_startoff
);
879 if (!xfs_verify_fileoff(sc
->mp
, fileoff
))
882 fpp
= xfs_bmdr_ptr_addr(dfp
, i
, dmxr
);
883 fsbno
= be64_to_cpu(*fpp
);
884 if (!xfs_verify_fsbno(sc
->mp
, fsbno
))
892 * Check the data fork for things that will fail the ifork verifiers or the
896 xrep_dinode_check_dfork(
897 struct xfs_scrub
*sc
,
898 struct xfs_dinode
*dip
,
904 unsigned int dfork_size
;
907 * Verifier functions take signed int64_t, so check for bogus negative
910 data_size
= be64_to_cpu(dip
->di_size
);
914 fmt
= XFS_DFORK_FORMAT(dip
, XFS_DATA_FORK
);
915 switch (mode
& S_IFMT
) {
920 if (fmt
!= XFS_DINODE_FMT_DEV
)
924 if (fmt
== XFS_DINODE_FMT_LOCAL
)
930 case XFS_DINODE_FMT_LOCAL
:
931 case XFS_DINODE_FMT_EXTENTS
:
932 case XFS_DINODE_FMT_BTREE
:
942 dfork_size
= XFS_DFORK_SIZE(dip
, sc
->mp
, XFS_DATA_FORK
);
943 dfork_ptr
= XFS_DFORK_PTR(dip
, XFS_DATA_FORK
);
946 case XFS_DINODE_FMT_DEV
:
948 case XFS_DINODE_FMT_LOCAL
:
949 /* dir/symlink structure cannot be larger than the fork */
950 if (data_size
> dfork_size
)
952 /* directory structure must pass verification. */
954 xfs_dir2_sf_verify(sc
->mp
, dfork_ptr
, data_size
) != NULL
)
956 /* symlink structure must pass verification. */
958 xfs_symlink_shortform_verify(dfork_ptr
, data_size
) != NULL
)
961 case XFS_DINODE_FMT_EXTENTS
:
962 if (xrep_dinode_bad_extents_fork(sc
, dip
, dfork_size
,
966 case XFS_DINODE_FMT_BTREE
:
967 if (xrep_dinode_bad_bmbt_fork(sc
, dip
, dfork_size
,
979 xrep_dinode_set_data_nextents(
980 struct xfs_dinode
*dip
,
981 xfs_extnum_t nextents
)
983 if (xfs_dinode_has_large_extent_counts(dip
))
984 dip
->di_big_nextents
= cpu_to_be64(nextents
);
986 dip
->di_nextents
= cpu_to_be32(nextents
);
990 xrep_dinode_set_attr_nextents(
991 struct xfs_dinode
*dip
,
992 xfs_extnum_t nextents
)
994 if (xfs_dinode_has_large_extent_counts(dip
))
995 dip
->di_big_anextents
= cpu_to_be32(nextents
);
997 dip
->di_anextents
= cpu_to_be16(nextents
);
1000 /* Reset the data fork to something sane. */
1002 xrep_dinode_zap_dfork(
1003 struct xrep_inode
*ri
,
1004 struct xfs_dinode
*dip
,
1007 struct xfs_scrub
*sc
= ri
->sc
;
1009 trace_xrep_dinode_zap_dfork(sc
, dip
);
1011 ri
->ino_sick_mask
|= XFS_SICK_INO_BMBTD_ZAPPED
;
1013 xrep_dinode_set_data_nextents(dip
, 0);
1014 ri
->data_blocks
= 0;
1017 /* Special files always get reset to DEV */
1018 switch (mode
& S_IFMT
) {
1023 dip
->di_format
= XFS_DINODE_FMT_DEV
;
1029 * If we have data extents, reset to an empty map and hope the user
1030 * will run the bmapbtd checker next.
1032 if (ri
->data_extents
|| ri
->rt_extents
|| S_ISREG(mode
)) {
1033 dip
->di_format
= XFS_DINODE_FMT_EXTENTS
;
1037 /* Otherwise, reset the local format to the minimum. */
1038 switch (mode
& S_IFMT
) {
1040 xrep_dinode_zap_symlink(ri
, dip
);
1043 xrep_dinode_zap_dir(ri
, dip
);
1049 * Check the attr fork for things that will fail the ifork verifiers or the
1053 xrep_dinode_check_afork(
1054 struct xfs_scrub
*sc
,
1055 struct xfs_dinode
*dip
)
1057 struct xfs_attr_sf_hdr
*afork_ptr
;
1059 unsigned int afork_size
;
1061 if (XFS_DFORK_BOFF(dip
) == 0)
1062 return dip
->di_aformat
!= XFS_DINODE_FMT_EXTENTS
||
1063 xfs_dfork_attr_extents(dip
) != 0;
1065 afork_size
= XFS_DFORK_SIZE(dip
, sc
->mp
, XFS_ATTR_FORK
);
1066 afork_ptr
= XFS_DFORK_PTR(dip
, XFS_ATTR_FORK
);
1068 switch (XFS_DFORK_FORMAT(dip
, XFS_ATTR_FORK
)) {
1069 case XFS_DINODE_FMT_LOCAL
:
1070 /* Fork has to be large enough to extract the xattr size. */
1071 if (afork_size
< sizeof(struct xfs_attr_sf_hdr
))
1074 /* xattr structure cannot be larger than the fork */
1075 attr_size
= be16_to_cpu(afork_ptr
->totsize
);
1076 if (attr_size
> afork_size
)
1079 /* xattr structure must pass verification. */
1080 return xfs_attr_shortform_verify(afork_ptr
, attr_size
) != NULL
;
1081 case XFS_DINODE_FMT_EXTENTS
:
1082 if (xrep_dinode_bad_extents_fork(sc
, dip
, afork_size
,
1086 case XFS_DINODE_FMT_BTREE
:
1087 if (xrep_dinode_bad_bmbt_fork(sc
, dip
, afork_size
,
1099 * Reset the attr fork to empty. Since the attr fork could have contained
1100 * ACLs, make the file readable only by root.
1103 xrep_dinode_zap_afork(
1104 struct xrep_inode
*ri
,
1105 struct xfs_dinode
*dip
,
1108 struct xfs_scrub
*sc
= ri
->sc
;
1110 trace_xrep_dinode_zap_afork(sc
, dip
);
1112 ri
->ino_sick_mask
|= XFS_SICK_INO_BMBTA_ZAPPED
;
1114 dip
->di_aformat
= XFS_DINODE_FMT_EXTENTS
;
1115 xrep_dinode_set_attr_nextents(dip
, 0);
1116 ri
->attr_blocks
= 0;
1119 * If the data fork is in btree format, removing the attr fork entirely
1120 * might cause verifier failures if the next level down in the bmbt
1121 * could now fit in the data fork area.
1123 if (dip
->di_format
!= XFS_DINODE_FMT_BTREE
)
1124 dip
->di_forkoff
= 0;
1125 dip
->di_mode
= cpu_to_be16(mode
& ~0777);
1130 /* Make sure the fork offset is a sensible value. */
1132 xrep_dinode_ensure_forkoff(
1133 struct xrep_inode
*ri
,
1134 struct xfs_dinode
*dip
,
1137 struct xfs_bmdr_block
*bmdr
;
1138 struct xfs_scrub
*sc
= ri
->sc
;
1139 xfs_extnum_t attr_extents
, data_extents
;
1140 size_t bmdr_minsz
= xfs_bmdr_space_calc(1);
1141 unsigned int lit_sz
= XFS_LITINO(sc
->mp
);
1142 unsigned int afork_min
, dfork_min
;
1144 trace_xrep_dinode_ensure_forkoff(sc
, dip
);
1147 * Before calling this function, xrep_dinode_core ensured that both
1148 * forks actually fit inside their respective literal areas. If this
1149 * was not the case, the fork was reset to FMT_EXTENTS with zero
1150 * records. If the rmapbt scan found attr or data fork blocks, this
1151 * will be noted in the dinode_stats, and we must leave enough room
1152 * for the bmap repair code to reconstruct the mapping structure.
1154 * First, compute the minimum space required for the attr fork.
1156 switch (dip
->di_aformat
) {
1157 case XFS_DINODE_FMT_LOCAL
:
1159 * If we still have a shortform xattr structure at all, that
1160 * means the attr fork area was exactly large enough to fit
1163 afork_min
= XFS_DFORK_SIZE(dip
, sc
->mp
, XFS_ATTR_FORK
);
1165 case XFS_DINODE_FMT_EXTENTS
:
1166 attr_extents
= xfs_dfork_attr_extents(dip
);
1169 * We must maintain sufficient space to hold the entire
1170 * extent map array in the data fork. Note that we
1171 * previously zapped the fork if it had no chance of
1172 * fitting in the inode.
1174 afork_min
= sizeof(struct xfs_bmbt_rec
) * attr_extents
;
1175 } else if (ri
->attr_extents
> 0) {
1177 * The attr fork thinks it has zero extents, but we
1178 * found some xattr extents. We need to leave enough
1179 * empty space here so that the incore attr fork will
1180 * get created (and hence trigger the attr fork bmap
1183 afork_min
= bmdr_minsz
;
1185 /* No extents on disk or found in rmapbt. */
1189 case XFS_DINODE_FMT_BTREE
:
1190 /* Must have space for btree header and key/pointers. */
1191 bmdr
= XFS_DFORK_PTR(dip
, XFS_ATTR_FORK
);
1192 afork_min
= xfs_bmap_broot_space(sc
->mp
, bmdr
);
1195 /* We should never see any other formats. */
1200 /* Compute the minimum space required for the data fork. */
1201 switch (dip
->di_format
) {
1202 case XFS_DINODE_FMT_DEV
:
1203 dfork_min
= sizeof(__be32
);
1205 case XFS_DINODE_FMT_UUID
:
1206 dfork_min
= sizeof(uuid_t
);
1208 case XFS_DINODE_FMT_LOCAL
:
1210 * If we still have a shortform data fork at all, that means
1211 * the data fork area was large enough to fit whatever was in
1214 dfork_min
= be64_to_cpu(dip
->di_size
);
1216 case XFS_DINODE_FMT_EXTENTS
:
1217 data_extents
= xfs_dfork_data_extents(dip
);
1220 * We must maintain sufficient space to hold the entire
1221 * extent map array in the data fork. Note that we
1222 * previously zapped the fork if it had no chance of
1223 * fitting in the inode.
1225 dfork_min
= sizeof(struct xfs_bmbt_rec
) * data_extents
;
1226 } else if (ri
->data_extents
> 0 || ri
->rt_extents
> 0) {
1228 * The data fork thinks it has zero extents, but we
1229 * found some data extents. We need to leave enough
1230 * empty space here so that the data fork bmap repair
1231 * will recover the mappings.
1233 dfork_min
= bmdr_minsz
;
1235 /* No extents on disk or found in rmapbt. */
1239 case XFS_DINODE_FMT_BTREE
:
1240 /* Must have space for btree header and key/pointers. */
1241 bmdr
= XFS_DFORK_PTR(dip
, XFS_DATA_FORK
);
1242 dfork_min
= xfs_bmap_broot_space(sc
->mp
, bmdr
);
1250 * Round all values up to the nearest 8 bytes, because that is the
1251 * precision of di_forkoff.
1253 afork_min
= roundup(afork_min
, 8);
1254 dfork_min
= roundup(dfork_min
, 8);
1255 bmdr_minsz
= roundup(bmdr_minsz
, 8);
1257 ASSERT(dfork_min
<= lit_sz
);
1258 ASSERT(afork_min
<= lit_sz
);
1261 * If the data fork was zapped and we don't have enough space for the
1262 * recovery fork, move the attr fork up.
1264 if (dip
->di_format
== XFS_DINODE_FMT_EXTENTS
&&
1265 xfs_dfork_data_extents(dip
) == 0 &&
1266 (ri
->data_extents
> 0 || ri
->rt_extents
> 0) &&
1267 bmdr_minsz
> XFS_DFORK_DSIZE(dip
, sc
->mp
)) {
1268 if (bmdr_minsz
+ afork_min
> lit_sz
) {
1270 * The attr for and the stub fork we need to recover
1271 * the data fork won't both fit. Zap the attr fork.
1273 xrep_dinode_zap_afork(ri
, dip
, mode
);
1274 afork_min
= bmdr_minsz
;
1276 void *before
, *after
;
1278 /* Otherwise, just slide the attr fork up. */
1279 before
= XFS_DFORK_APTR(dip
);
1280 dip
->di_forkoff
= bmdr_minsz
>> 3;
1281 after
= XFS_DFORK_APTR(dip
);
1282 memmove(after
, before
, XFS_DFORK_ASIZE(dip
, sc
->mp
));
1287 * If the attr fork was zapped and we don't have enough space for the
1288 * recovery fork, move the attr fork down.
1290 if (dip
->di_aformat
== XFS_DINODE_FMT_EXTENTS
&&
1291 xfs_dfork_attr_extents(dip
) == 0 &&
1292 ri
->attr_extents
> 0 &&
1293 bmdr_minsz
> XFS_DFORK_ASIZE(dip
, sc
->mp
)) {
1294 if (dip
->di_format
== XFS_DINODE_FMT_BTREE
) {
1296 * If the data fork is in btree format then we can't
1297 * adjust forkoff because that runs the risk of
1298 * violating the extents/btree format transition rules.
1300 } else if (bmdr_minsz
+ dfork_min
> lit_sz
) {
1302 * If we can't move the attr fork, too bad, we lose the
1303 * attr fork and leak its blocks.
1305 xrep_dinode_zap_afork(ri
, dip
, mode
);
1308 * Otherwise, just slide the attr fork down. The attr
1309 * fork is empty, so we don't have any old contents to
1312 dip
->di_forkoff
= (lit_sz
- bmdr_minsz
) >> 3;
1318 * Zap the data/attr forks if we spot anything that isn't going to pass the
1319 * ifork verifiers or the ifork formatters, because we need to get the inode
1320 * into good enough shape that the higher level repair functions can run.
1323 xrep_dinode_zap_forks(
1324 struct xrep_inode
*ri
,
1325 struct xfs_dinode
*dip
)
1327 struct xfs_scrub
*sc
= ri
->sc
;
1328 xfs_extnum_t data_extents
;
1329 xfs_extnum_t attr_extents
;
1330 xfs_filblks_t nblocks
;
1332 bool zap_datafork
= false;
1333 bool zap_attrfork
= ri
->zap_acls
;
1335 trace_xrep_dinode_zap_forks(sc
, dip
);
1337 mode
= be16_to_cpu(dip
->di_mode
);
1339 data_extents
= xfs_dfork_data_extents(dip
);
1340 attr_extents
= xfs_dfork_attr_extents(dip
);
1341 nblocks
= be64_to_cpu(dip
->di_nblocks
);
1343 /* Inode counters don't make sense? */
1344 if (data_extents
> nblocks
)
1345 zap_datafork
= true;
1346 if (attr_extents
> nblocks
)
1347 zap_attrfork
= true;
1348 if (data_extents
+ attr_extents
> nblocks
)
1349 zap_datafork
= zap_attrfork
= true;
1352 zap_datafork
= xrep_dinode_check_dfork(sc
, dip
, mode
);
1354 zap_attrfork
= xrep_dinode_check_afork(sc
, dip
);
1356 /* Zap whatever's bad. */
1358 xrep_dinode_zap_afork(ri
, dip
, mode
);
1360 xrep_dinode_zap_dfork(ri
, dip
, mode
);
1361 xrep_dinode_ensure_forkoff(ri
, dip
, mode
);
1364 * Zero di_nblocks if we don't have any extents at all to satisfy the
1367 data_extents
= xfs_dfork_data_extents(dip
);
1368 attr_extents
= xfs_dfork_attr_extents(dip
);
1369 if (data_extents
+ attr_extents
== 0)
1370 dip
->di_nblocks
= 0;
1373 /* Inode didn't pass dinode verifiers, so fix the raw buffer and retry iget. */
1376 struct xrep_inode
*ri
)
1378 struct xfs_scrub
*sc
= ri
->sc
;
1380 struct xfs_dinode
*dip
;
1381 xfs_ino_t ino
= sc
->sm
->sm_ino
;
1385 /* Figure out what this inode had mapped in both forks. */
1386 error
= xrep_dinode_count_rmaps(ri
);
1390 /* Read the inode cluster buffer. */
1391 error
= xfs_trans_read_buf(sc
->mp
, sc
->tp
, sc
->mp
->m_ddev_targp
,
1392 ri
->imap
.im_blkno
, ri
->imap
.im_len
, XBF_UNMAPPED
, &bp
,
1397 /* Make sure we can pass the inode buffer verifier. */
1398 xrep_dinode_buf(sc
, bp
);
1399 bp
->b_ops
= &xfs_inode_buf_ops
;
1401 /* Fix everything the verifier will complain about. */
1402 dip
= xfs_buf_offset(bp
, ri
->imap
.im_boffset
);
1403 xrep_dinode_header(sc
, dip
);
1404 iget_error
= xrep_dinode_mode(ri
, dip
);
1407 xrep_dinode_nlinks(dip
);
1408 xrep_dinode_flags(sc
, dip
, ri
->rt_extents
> 0);
1409 xrep_dinode_size(ri
, dip
);
1410 xrep_dinode_extsize_hints(sc
, dip
);
1411 xrep_dinode_zap_forks(ri
, dip
);
1414 /* Write out the inode. */
1415 trace_xrep_dinode_fixed(sc
, dip
);
1416 xfs_dinode_calc_crc(sc
->mp
, dip
);
1417 xfs_trans_buf_set_type(sc
->tp
, bp
, XFS_BLFT_DINO_BUF
);
1418 xfs_trans_log_buf(sc
->tp
, bp
, ri
->imap
.im_boffset
,
1419 ri
->imap
.im_boffset
+ sc
->mp
->m_sb
.sb_inodesize
- 1);
1422 * In theory, we've fixed the ondisk inode record enough that we should
1423 * be able to load the inode into the cache. Try to iget that inode
1424 * now while we hold the AGI and the inode cluster buffer and take the
1425 * IOLOCK so that we can continue with repairs without anyone else
1426 * accessing the inode. If iget fails, we still need to commit the
1430 iget_error
= xchk_iget(sc
, ino
, &sc
->ip
);
1432 xchk_ilock(sc
, XFS_IOLOCK_EXCL
);
1435 * Commit the inode cluster buffer updates and drop the AGI buffer that
1436 * we've been holding since scrub setup. From here on out, repairs
1437 * deal only with the cached inode.
1439 error
= xrep_trans_commit(sc
);
1446 error
= xchk_trans_alloc(sc
, 0);
1450 error
= xrep_ino_dqattach(sc
);
1454 xchk_ilock(sc
, XFS_ILOCK_EXCL
);
1455 if (ri
->ino_sick_mask
)
1456 xfs_inode_mark_sick(sc
->ip
, ri
->ino_sick_mask
);
1460 /* Fix everything xfs_dinode_verify cares about. */
1462 xrep_dinode_problems(
1463 struct xrep_inode
*ri
)
1465 struct xfs_scrub
*sc
= ri
->sc
;
1468 error
= xrep_dinode_core(ri
);
1472 /* We had to fix a totally busted inode, schedule quotacheck. */
1473 if (XFS_IS_UQUOTA_ON(sc
->mp
))
1474 xrep_force_quotacheck(sc
, XFS_DQTYPE_USER
);
1475 if (XFS_IS_GQUOTA_ON(sc
->mp
))
1476 xrep_force_quotacheck(sc
, XFS_DQTYPE_GROUP
);
1477 if (XFS_IS_PQUOTA_ON(sc
->mp
))
1478 xrep_force_quotacheck(sc
, XFS_DQTYPE_PROJ
);
1484 * Fix problems that the verifiers don't care about. In general these are
1485 * errors that don't cause problems elsewhere in the kernel that we can easily
1486 * detect, so we don't check them all that rigorously.
1489 /* Make sure block and extent counts are ok. */
1491 xrep_inode_blockcounts(
1492 struct xfs_scrub
*sc
)
1494 struct xfs_ifork
*ifp
;
1495 xfs_filblks_t count
;
1496 xfs_filblks_t acount
;
1497 xfs_extnum_t nextents
;
1500 trace_xrep_inode_blockcounts(sc
);
1502 /* Set data fork counters from the data fork mappings. */
1503 error
= xfs_bmap_count_blocks(sc
->tp
, sc
->ip
, XFS_DATA_FORK
,
1507 if (xfs_is_reflink_inode(sc
->ip
)) {
1509 * data fork blockcount can exceed physical storage if a user
1510 * reflinks the same block over and over again.
1513 } else if (XFS_IS_REALTIME_INODE(sc
->ip
)) {
1514 if (count
>= sc
->mp
->m_sb
.sb_rblocks
)
1515 return -EFSCORRUPTED
;
1517 if (count
>= sc
->mp
->m_sb
.sb_dblocks
)
1518 return -EFSCORRUPTED
;
1520 error
= xrep_ino_ensure_extent_count(sc
, XFS_DATA_FORK
, nextents
);
1523 sc
->ip
->i_df
.if_nextents
= nextents
;
1525 /* Set attr fork counters from the attr fork mappings. */
1526 ifp
= xfs_ifork_ptr(sc
->ip
, XFS_ATTR_FORK
);
1528 error
= xfs_bmap_count_blocks(sc
->tp
, sc
->ip
, XFS_ATTR_FORK
,
1529 &nextents
, &acount
);
1532 if (count
>= sc
->mp
->m_sb
.sb_dblocks
)
1533 return -EFSCORRUPTED
;
1534 error
= xrep_ino_ensure_extent_count(sc
, XFS_ATTR_FORK
,
1538 ifp
->if_nextents
= nextents
;
1543 sc
->ip
->i_nblocks
= count
+ acount
;
1547 /* Check for invalid uid/gid/prid. */
1550 struct xfs_scrub
*sc
)
1554 trace_xrep_inode_ids(sc
);
1556 if (!uid_valid(VFS_I(sc
->ip
)->i_uid
)) {
1557 i_uid_write(VFS_I(sc
->ip
), 0);
1559 if (XFS_IS_UQUOTA_ON(sc
->mp
))
1560 xrep_force_quotacheck(sc
, XFS_DQTYPE_USER
);
1563 if (!gid_valid(VFS_I(sc
->ip
)->i_gid
)) {
1564 i_gid_write(VFS_I(sc
->ip
), 0);
1566 if (XFS_IS_GQUOTA_ON(sc
->mp
))
1567 xrep_force_quotacheck(sc
, XFS_DQTYPE_GROUP
);
1570 if (sc
->ip
->i_projid
== -1U) {
1571 sc
->ip
->i_projid
= 0;
1573 if (XFS_IS_PQUOTA_ON(sc
->mp
))
1574 xrep_force_quotacheck(sc
, XFS_DQTYPE_PROJ
);
1577 /* strip setuid/setgid if we touched any of the ids */
1579 VFS_I(sc
->ip
)->i_mode
&= ~(S_ISUID
| S_ISGID
);
1583 xrep_clamp_timestamp(
1584 struct xfs_inode
*ip
,
1585 struct timespec64
*ts
)
1587 ts
->tv_nsec
= clamp_t(long, ts
->tv_nsec
, 0, NSEC_PER_SEC
);
1588 *ts
= timestamp_truncate(*ts
, VFS_I(ip
));
1591 /* Nanosecond counters can't have more than 1 billion. */
1593 xrep_inode_timestamps(
1594 struct xfs_inode
*ip
)
1596 struct timespec64 tstamp
;
1597 struct inode
*inode
= VFS_I(ip
);
1599 tstamp
= inode_get_atime(inode
);
1600 xrep_clamp_timestamp(ip
, &tstamp
);
1601 inode_set_atime_to_ts(inode
, tstamp
);
1603 tstamp
= inode_get_mtime(inode
);
1604 xrep_clamp_timestamp(ip
, &tstamp
);
1605 inode_set_mtime_to_ts(inode
, tstamp
);
1607 tstamp
= inode_get_ctime(inode
);
1608 xrep_clamp_timestamp(ip
, &tstamp
);
1609 inode_set_ctime_to_ts(inode
, tstamp
);
1611 xrep_clamp_timestamp(ip
, &ip
->i_crtime
);
1614 /* Fix inode flags that don't make sense together. */
1617 struct xfs_scrub
*sc
)
1621 trace_xrep_inode_flags(sc
);
1623 mode
= VFS_I(sc
->ip
)->i_mode
;
1625 /* Clear junk flags */
1626 if (sc
->ip
->i_diflags
& ~XFS_DIFLAG_ANY
)
1627 sc
->ip
->i_diflags
&= ~XFS_DIFLAG_ANY
;
1629 /* NEWRTBM only applies to realtime bitmaps */
1630 if (sc
->ip
->i_ino
== sc
->mp
->m_sb
.sb_rbmino
)
1631 sc
->ip
->i_diflags
|= XFS_DIFLAG_NEWRTBM
;
1633 sc
->ip
->i_diflags
&= ~XFS_DIFLAG_NEWRTBM
;
1635 /* These only make sense for directories. */
1637 sc
->ip
->i_diflags
&= ~(XFS_DIFLAG_RTINHERIT
|
1638 XFS_DIFLAG_EXTSZINHERIT
|
1639 XFS_DIFLAG_PROJINHERIT
|
1640 XFS_DIFLAG_NOSYMLINKS
);
1642 /* These only make sense for files. */
1644 sc
->ip
->i_diflags
&= ~(XFS_DIFLAG_REALTIME
|
1645 XFS_DIFLAG_EXTSIZE
);
1647 /* These only make sense for non-rt files. */
1648 if (sc
->ip
->i_diflags
& XFS_DIFLAG_REALTIME
)
1649 sc
->ip
->i_diflags
&= ~XFS_DIFLAG_FILESTREAM
;
1651 /* Immutable and append only? Drop the append. */
1652 if ((sc
->ip
->i_diflags
& XFS_DIFLAG_IMMUTABLE
) &&
1653 (sc
->ip
->i_diflags
& XFS_DIFLAG_APPEND
))
1654 sc
->ip
->i_diflags
&= ~XFS_DIFLAG_APPEND
;
1656 /* Clear junk flags. */
1657 if (sc
->ip
->i_diflags2
& ~XFS_DIFLAG2_ANY
)
1658 sc
->ip
->i_diflags2
&= ~XFS_DIFLAG2_ANY
;
1660 /* No reflink flag unless we support it and it's a file. */
1661 if (!xfs_has_reflink(sc
->mp
) || !S_ISREG(mode
))
1662 sc
->ip
->i_diflags2
&= ~XFS_DIFLAG2_REFLINK
;
1664 /* DAX only applies to files and dirs. */
1665 if (!(S_ISREG(mode
) || S_ISDIR(mode
)))
1666 sc
->ip
->i_diflags2
&= ~XFS_DIFLAG2_DAX
;
1668 /* No reflink files on the realtime device. */
1669 if (sc
->ip
->i_diflags
& XFS_DIFLAG_REALTIME
)
1670 sc
->ip
->i_diflags2
&= ~XFS_DIFLAG2_REFLINK
;
1674 * Fix size problems with block/node format directories. If we fail to find
1675 * the extent list, just bail out and let the bmapbtd repair functions clean
1679 xrep_inode_blockdir_size(
1680 struct xfs_scrub
*sc
)
1682 struct xfs_iext_cursor icur
;
1683 struct xfs_bmbt_irec got
;
1684 struct xfs_ifork
*ifp
;
1688 trace_xrep_inode_blockdir_size(sc
);
1690 error
= xfs_iread_extents(sc
->tp
, sc
->ip
, XFS_DATA_FORK
);
1694 /* Find the last block before 32G; this is the dir size. */
1695 ifp
= xfs_ifork_ptr(sc
->ip
, XFS_DATA_FORK
);
1696 off
= XFS_B_TO_FSB(sc
->mp
, XFS_DIR2_SPACE_SIZE
);
1697 if (!xfs_iext_lookup_extent_before(sc
->ip
, ifp
, &off
, &icur
, &got
)) {
1698 /* zero-extents directory? */
1702 off
= got
.br_startoff
+ got
.br_blockcount
;
1703 sc
->ip
->i_disk_size
= min_t(loff_t
, XFS_DIR2_SPACE_SIZE
,
1704 XFS_FSB_TO_B(sc
->mp
, off
));
1707 /* Fix size problems with short format directories. */
1709 xrep_inode_sfdir_size(
1710 struct xfs_scrub
*sc
)
1712 struct xfs_ifork
*ifp
;
1714 trace_xrep_inode_sfdir_size(sc
);
1716 ifp
= xfs_ifork_ptr(sc
->ip
, XFS_DATA_FORK
);
1717 sc
->ip
->i_disk_size
= ifp
->if_bytes
;
1721 * Fix any irregularities in a directory inode's size now that we can iterate
1722 * extent maps and access other regular inode data.
1725 xrep_inode_dir_size(
1726 struct xfs_scrub
*sc
)
1728 trace_xrep_inode_dir_size(sc
);
1730 switch (sc
->ip
->i_df
.if_format
) {
1731 case XFS_DINODE_FMT_EXTENTS
:
1732 case XFS_DINODE_FMT_BTREE
:
1733 xrep_inode_blockdir_size(sc
);
1735 case XFS_DINODE_FMT_LOCAL
:
1736 xrep_inode_sfdir_size(sc
);
1741 /* Fix extent size hint problems. */
1744 struct xfs_scrub
*sc
)
1746 /* Fix misaligned extent size hints on a directory. */
1747 if ((sc
->ip
->i_diflags
& XFS_DIFLAG_RTINHERIT
) &&
1748 (sc
->ip
->i_diflags
& XFS_DIFLAG_EXTSZINHERIT
) &&
1749 xfs_extlen_to_rtxmod(sc
->mp
, sc
->ip
->i_extsize
) > 0) {
1750 sc
->ip
->i_extsize
= 0;
1751 sc
->ip
->i_diflags
&= ~XFS_DIFLAG_EXTSZINHERIT
;
1755 /* Ensure this file has an attr fork if it needs to hold a parent pointer. */
1758 struct xfs_scrub
*sc
)
1760 struct xfs_mount
*mp
= sc
->mp
;
1761 struct xfs_inode
*ip
= sc
->ip
;
1762 struct inode
*inode
= VFS_I(ip
);
1764 if (!xfs_has_parent(mp
))
1768 * Unlinked inodes that cannot be added to the directory tree will not
1769 * have a parent pointer.
1771 if (inode
->i_nlink
== 0 && !(inode
->i_state
& I_LINKABLE
))
1774 /* Children of the superblock do not have parent pointers. */
1775 if (xchk_inode_is_sb_rooted(ip
))
1778 /* Inode already has an attr fork; no further work possible here. */
1779 if (xfs_inode_has_attr_fork(ip
))
1782 return xfs_bmap_add_attrfork(sc
->tp
, ip
,
1783 sizeof(struct xfs_attr_sf_hdr
), true);
1786 /* Fix any irregularities in an inode that the verifiers don't catch. */
1788 xrep_inode_problems(
1789 struct xfs_scrub
*sc
)
1793 error
= xrep_inode_blockcounts(sc
);
1796 error
= xrep_inode_pptr(sc
);
1799 xrep_inode_timestamps(sc
->ip
);
1800 xrep_inode_flags(sc
);
1803 * We can now do a better job fixing the size of a directory now that
1804 * we can scan the data fork extents than we could in xrep_dinode_size.
1806 if (S_ISDIR(VFS_I(sc
->ip
)->i_mode
))
1807 xrep_inode_dir_size(sc
);
1808 xrep_inode_extsize(sc
);
1810 trace_xrep_inode_fixed(sc
);
1811 xfs_trans_log_inode(sc
->tp
, sc
->ip
, XFS_ILOG_CORE
);
1812 return xrep_roll_trans(sc
);
1816 * Make sure this inode's unlinked list pointers are consistent with its
1820 xrep_inode_unlinked(
1821 struct xfs_scrub
*sc
)
1823 unsigned int nlink
= VFS_I(sc
->ip
)->i_nlink
;
1827 * If this inode is linked from the directory tree and on the unlinked
1828 * list, remove it from the unlinked list.
1830 if (nlink
> 0 && xfs_inode_on_unlinked_list(sc
->ip
)) {
1831 struct xfs_perag
*pag
;
1834 pag
= xfs_perag_get(sc
->mp
,
1835 XFS_INO_TO_AGNO(sc
->mp
, sc
->ip
->i_ino
));
1836 error
= xfs_iunlink_remove(sc
->tp
, pag
, sc
->ip
);
1843 * If this inode is not linked from the directory tree yet not on the
1844 * unlinked list, put it on the unlinked list.
1846 if (nlink
== 0 && !xfs_inode_on_unlinked_list(sc
->ip
)) {
1847 error
= xfs_iunlink(sc
->tp
, sc
->ip
);
1855 /* Repair an inode's fields. */
1858 struct xfs_scrub
*sc
)
1863 * No inode? That means we failed the _iget verifiers. Repair all
1864 * the things that the inode verifiers care about, then retry _iget.
1867 struct xrep_inode
*ri
= sc
->buf
;
1871 error
= xrep_dinode_problems(ri
);
1872 if (error
== -EBUSY
) {
1874 * Directory scan to recover inode mode encountered a
1875 * busy inode, so we did not continue repairing things.
1882 /* By this point we had better have a working incore inode. */
1884 return -EFSCORRUPTED
;
1887 xfs_trans_ijoin(sc
->tp
, sc
->ip
, 0);
1889 /* If we found corruption of any kind, try to fix it. */
1890 if ((sc
->sm
->sm_flags
& XFS_SCRUB_OFLAG_CORRUPT
) ||
1891 (sc
->sm
->sm_flags
& XFS_SCRUB_OFLAG_XCORRUPT
)) {
1892 error
= xrep_inode_problems(sc
);
1897 /* See if we can clear the reflink flag. */
1898 if (xfs_is_reflink_inode(sc
->ip
)) {
1899 error
= xfs_reflink_clear_inode_flag(sc
->ip
, &sc
->tp
);
1904 /* Reconnect incore unlinked list */
1905 error
= xrep_inode_unlinked(sc
);
1909 return xrep_defer_finish(sc
);