1 // SPDX-License-Identifier: GPL-2.0+
3 * Copyright (C) 2017 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_btree.h"
13 #include "xfs_log_format.h"
14 #include "xfs_inode.h"
15 #include "xfs_ialloc.h"
16 #include "xfs_da_format.h"
17 #include "xfs_reflink.h"
19 #include "xfs_bmap_util.h"
20 #include "scrub/scrub.h"
21 #include "scrub/common.h"
22 #include "scrub/btree.h"
25 * Grab total control of the inode metadata. It doesn't matter here if
26 * the file data is still changing; exclusive access to the metadata is
37 * Try to get the inode. If the verifiers fail, we try again
40 error
= xchk_get_inode(sc
, ip
);
46 return xchk_trans_alloc(sc
, 0);
51 /* Got the inode, lock it and we're ready to go. */
52 sc
->ilock_flags
= XFS_IOLOCK_EXCL
| XFS_MMAPLOCK_EXCL
;
53 xfs_ilock(sc
->ip
, sc
->ilock_flags
);
54 error
= xchk_trans_alloc(sc
, 0);
57 sc
->ilock_flags
|= XFS_ILOCK_EXCL
;
58 xfs_ilock(sc
->ip
, XFS_ILOCK_EXCL
);
61 /* scrub teardown will unlock and release the inode for us */
67 /* Validate di_extsize hint. */
71 struct xfs_dinode
*dip
,
78 fa
= xfs_inode_validate_extsize(sc
->mp
, be32_to_cpu(dip
->di_extsize
),
81 xchk_ino_set_corrupt(sc
, ino
);
85 * Validate di_cowextsize hint.
87 * The rules are documented at xfs_ioctl_setattr_check_cowextsize().
88 * These functions must be kept in sync with each other.
91 xchk_inode_cowextsize(
93 struct xfs_dinode
*dip
,
101 fa
= xfs_inode_validate_cowextsize(sc
->mp
,
102 be32_to_cpu(dip
->di_cowextsize
), mode
, flags
,
105 xchk_ino_set_corrupt(sc
, ino
);
108 /* Make sure the di_flags make sense for the inode. */
111 struct xfs_scrub
*sc
,
112 struct xfs_dinode
*dip
,
117 struct xfs_mount
*mp
= sc
->mp
;
119 /* di_flags are all taken, last bit cannot be used */
120 if (flags
& ~XFS_DIFLAG_ANY
)
123 /* rt flags require rt device */
124 if ((flags
& (XFS_DIFLAG_REALTIME
| XFS_DIFLAG_RTINHERIT
)) &&
128 /* new rt bitmap flag only valid for rbmino */
129 if ((flags
& XFS_DIFLAG_NEWRTBM
) && ino
!= mp
->m_sb
.sb_rbmino
)
132 /* directory-only flags */
133 if ((flags
& (XFS_DIFLAG_RTINHERIT
|
134 XFS_DIFLAG_EXTSZINHERIT
|
135 XFS_DIFLAG_PROJINHERIT
|
136 XFS_DIFLAG_NOSYMLINKS
)) &&
140 /* file-only flags */
141 if ((flags
& (XFS_DIFLAG_REALTIME
| FS_XFLAG_EXTSIZE
)) &&
145 /* filestreams and rt make no sense */
146 if ((flags
& XFS_DIFLAG_FILESTREAM
) && (flags
& XFS_DIFLAG_REALTIME
))
151 xchk_ino_set_corrupt(sc
, ino
);
154 /* Make sure the di_flags2 make sense for the inode. */
157 struct xfs_scrub
*sc
,
158 struct xfs_dinode
*dip
,
164 struct xfs_mount
*mp
= sc
->mp
;
166 /* Unknown di_flags2 could be from a future kernel */
167 if (flags2
& ~XFS_DIFLAG2_ANY
)
168 xchk_ino_set_warning(sc
, ino
);
170 /* reflink flag requires reflink feature */
171 if ((flags2
& XFS_DIFLAG2_REFLINK
) &&
172 !xfs_sb_version_hasreflink(&mp
->m_sb
))
175 /* cowextsize flag is checked w.r.t. mode separately */
177 /* file/dir-only flags */
178 if ((flags2
& XFS_DIFLAG2_DAX
) && !(S_ISREG(mode
) || S_ISDIR(mode
)))
181 /* file-only flags */
182 if ((flags2
& XFS_DIFLAG2_REFLINK
) && !S_ISREG(mode
))
185 /* realtime and reflink make no sense, currently */
186 if ((flags
& XFS_DIFLAG_REALTIME
) && (flags2
& XFS_DIFLAG2_REFLINK
))
189 /* dax and reflink make no sense, currently */
190 if ((flags2
& XFS_DIFLAG2_DAX
) && (flags2
& XFS_DIFLAG2_REFLINK
))
195 xchk_ino_set_corrupt(sc
, ino
);
198 /* Scrub all the ondisk inode fields. */
201 struct xfs_scrub
*sc
,
202 struct xfs_dinode
*dip
,
205 struct xfs_mount
*mp
= sc
->mp
;
207 unsigned long long isize
;
213 flags
= be16_to_cpu(dip
->di_flags
);
214 if (dip
->di_version
>= 3)
215 flags2
= be64_to_cpu(dip
->di_flags2
);
220 mode
= be16_to_cpu(dip
->di_mode
);
221 switch (mode
& S_IFMT
) {
229 /* mode is recognized */
232 xchk_ino_set_corrupt(sc
, ino
);
237 switch (dip
->di_version
) {
240 * We autoconvert v1 inodes into v2 inodes on writeout,
241 * so just mark this inode for preening.
243 xchk_ino_set_preen(sc
, ino
);
247 if (dip
->di_onlink
!= 0)
248 xchk_ino_set_corrupt(sc
, ino
);
250 if (dip
->di_mode
== 0 && sc
->ip
)
251 xchk_ino_set_corrupt(sc
, ino
);
253 if (dip
->di_projid_hi
!= 0 &&
254 !xfs_sb_version_hasprojid32bit(&mp
->m_sb
))
255 xchk_ino_set_corrupt(sc
, ino
);
258 xchk_ino_set_corrupt(sc
, ino
);
263 * di_uid/di_gid -- -1 isn't invalid, but there's no way that
264 * userspace could have created that.
266 if (dip
->di_uid
== cpu_to_be32(-1U) ||
267 dip
->di_gid
== cpu_to_be32(-1U))
268 xchk_ino_set_warning(sc
, ino
);
271 switch (dip
->di_format
) {
272 case XFS_DINODE_FMT_DEV
:
273 if (!S_ISCHR(mode
) && !S_ISBLK(mode
) &&
274 !S_ISFIFO(mode
) && !S_ISSOCK(mode
))
275 xchk_ino_set_corrupt(sc
, ino
);
277 case XFS_DINODE_FMT_LOCAL
:
278 if (!S_ISDIR(mode
) && !S_ISLNK(mode
))
279 xchk_ino_set_corrupt(sc
, ino
);
281 case XFS_DINODE_FMT_EXTENTS
:
282 if (!S_ISREG(mode
) && !S_ISDIR(mode
) && !S_ISLNK(mode
))
283 xchk_ino_set_corrupt(sc
, ino
);
285 case XFS_DINODE_FMT_BTREE
:
286 if (!S_ISREG(mode
) && !S_ISDIR(mode
))
287 xchk_ino_set_corrupt(sc
, ino
);
289 case XFS_DINODE_FMT_UUID
:
291 xchk_ino_set_corrupt(sc
, ino
);
295 /* di_[amc]time.nsec */
296 if (be32_to_cpu(dip
->di_atime
.t_nsec
) >= NSEC_PER_SEC
)
297 xchk_ino_set_corrupt(sc
, ino
);
298 if (be32_to_cpu(dip
->di_mtime
.t_nsec
) >= NSEC_PER_SEC
)
299 xchk_ino_set_corrupt(sc
, ino
);
300 if (be32_to_cpu(dip
->di_ctime
.t_nsec
) >= NSEC_PER_SEC
)
301 xchk_ino_set_corrupt(sc
, ino
);
304 * di_size. xfs_dinode_verify checks for things that screw up
305 * the VFS such as the upper bit being set and zero-length
306 * symlinks/directories, but we can do more here.
308 isize
= be64_to_cpu(dip
->di_size
);
309 if (isize
& (1ULL << 63))
310 xchk_ino_set_corrupt(sc
, ino
);
312 /* Devices, fifos, and sockets must have zero size */
313 if (!S_ISDIR(mode
) && !S_ISREG(mode
) && !S_ISLNK(mode
) && isize
!= 0)
314 xchk_ino_set_corrupt(sc
, ino
);
316 /* Directories can't be larger than the data section size (32G) */
317 if (S_ISDIR(mode
) && (isize
== 0 || isize
>= XFS_DIR2_SPACE_SIZE
))
318 xchk_ino_set_corrupt(sc
, ino
);
320 /* Symlinks can't be larger than SYMLINK_MAXLEN */
321 if (S_ISLNK(mode
) && (isize
== 0 || isize
>= XFS_SYMLINK_MAXLEN
))
322 xchk_ino_set_corrupt(sc
, ino
);
325 * Warn if the running kernel can't handle the kinds of offsets
326 * needed to deal with the file size. In other words, if the
327 * pagecache can't cache all the blocks in this file due to
328 * overly large offsets, flag the inode for admin review.
330 if (isize
>= mp
->m_super
->s_maxbytes
)
331 xchk_ino_set_warning(sc
, ino
);
334 if (flags2
& XFS_DIFLAG2_REFLINK
) {
335 ; /* nblocks can exceed dblocks */
336 } else if (flags
& XFS_DIFLAG_REALTIME
) {
338 * nblocks is the sum of data extents (in the rtdev),
339 * attr extents (in the datadev), and both forks' bmbt
340 * blocks (in the datadev). This clumsy check is the
341 * best we can do without cross-referencing with the
344 if (be64_to_cpu(dip
->di_nblocks
) >=
345 mp
->m_sb
.sb_dblocks
+ mp
->m_sb
.sb_rblocks
)
346 xchk_ino_set_corrupt(sc
, ino
);
348 if (be64_to_cpu(dip
->di_nblocks
) >= mp
->m_sb
.sb_dblocks
)
349 xchk_ino_set_corrupt(sc
, ino
);
352 xchk_inode_flags(sc
, dip
, ino
, mode
, flags
);
354 xchk_inode_extsize(sc
, dip
, ino
, mode
, flags
);
357 nextents
= be32_to_cpu(dip
->di_nextents
);
358 fork_recs
= XFS_DFORK_DSIZE(dip
, mp
) / sizeof(struct xfs_bmbt_rec
);
359 switch (dip
->di_format
) {
360 case XFS_DINODE_FMT_EXTENTS
:
361 if (nextents
> fork_recs
)
362 xchk_ino_set_corrupt(sc
, ino
);
364 case XFS_DINODE_FMT_BTREE
:
365 if (nextents
<= fork_recs
)
366 xchk_ino_set_corrupt(sc
, ino
);
370 xchk_ino_set_corrupt(sc
, ino
);
375 if (XFS_DFORK_APTR(dip
) >= (char *)dip
+ mp
->m_sb
.sb_inodesize
)
376 xchk_ino_set_corrupt(sc
, ino
);
377 if (dip
->di_anextents
!= 0 && dip
->di_forkoff
== 0)
378 xchk_ino_set_corrupt(sc
, ino
);
379 if (dip
->di_forkoff
== 0 && dip
->di_aformat
!= XFS_DINODE_FMT_EXTENTS
)
380 xchk_ino_set_corrupt(sc
, ino
);
383 if (dip
->di_aformat
!= XFS_DINODE_FMT_LOCAL
&&
384 dip
->di_aformat
!= XFS_DINODE_FMT_EXTENTS
&&
385 dip
->di_aformat
!= XFS_DINODE_FMT_BTREE
)
386 xchk_ino_set_corrupt(sc
, ino
);
389 nextents
= be16_to_cpu(dip
->di_anextents
);
390 fork_recs
= XFS_DFORK_ASIZE(dip
, mp
) / sizeof(struct xfs_bmbt_rec
);
391 switch (dip
->di_aformat
) {
392 case XFS_DINODE_FMT_EXTENTS
:
393 if (nextents
> fork_recs
)
394 xchk_ino_set_corrupt(sc
, ino
);
396 case XFS_DINODE_FMT_BTREE
:
397 if (nextents
<= fork_recs
)
398 xchk_ino_set_corrupt(sc
, ino
);
402 xchk_ino_set_corrupt(sc
, ino
);
405 if (dip
->di_version
>= 3) {
406 if (be32_to_cpu(dip
->di_crtime
.t_nsec
) >= NSEC_PER_SEC
)
407 xchk_ino_set_corrupt(sc
, ino
);
408 xchk_inode_flags2(sc
, dip
, ino
, mode
, flags
, flags2
);
409 xchk_inode_cowextsize(sc
, dip
, ino
, mode
, flags
,
415 * Make sure the finobt doesn't think this inode is free.
416 * We don't have to check the inobt ourselves because we got the inode via
417 * IGET_UNTRUSTED, which checks the inobt for us.
420 xchk_inode_xref_finobt(
421 struct xfs_scrub
*sc
,
424 struct xfs_inobt_rec_incore rec
;
429 if (!sc
->sa
.fino_cur
|| xchk_skip_xref(sc
->sm
))
432 agino
= XFS_INO_TO_AGINO(sc
->mp
, ino
);
435 * Try to get the finobt record. If we can't get it, then we're
438 error
= xfs_inobt_lookup(sc
->sa
.fino_cur
, agino
, XFS_LOOKUP_LE
,
440 if (!xchk_should_check_xref(sc
, &error
, &sc
->sa
.fino_cur
) ||
444 error
= xfs_inobt_get_rec(sc
->sa
.fino_cur
, &rec
, &has_record
);
445 if (!xchk_should_check_xref(sc
, &error
, &sc
->sa
.fino_cur
) ||
450 * Otherwise, make sure this record either doesn't cover this inode,
451 * or that it does but it's marked present.
453 if (rec
.ir_startino
> agino
||
454 rec
.ir_startino
+ XFS_INODES_PER_CHUNK
<= agino
)
457 if (rec
.ir_free
& XFS_INOBT_MASK(agino
- rec
.ir_startino
))
458 xchk_btree_xref_set_corrupt(sc
, sc
->sa
.fino_cur
, 0);
461 /* Cross reference the inode fields with the forks. */
463 xchk_inode_xref_bmap(
464 struct xfs_scrub
*sc
,
465 struct xfs_dinode
*dip
)
467 xfs_extnum_t nextents
;
469 xfs_filblks_t acount
;
472 if (xchk_skip_xref(sc
->sm
))
475 /* Walk all the extents to check nextents/naextents/nblocks. */
476 error
= xfs_bmap_count_blocks(sc
->tp
, sc
->ip
, XFS_DATA_FORK
,
478 if (!xchk_should_check_xref(sc
, &error
, NULL
))
480 if (nextents
< be32_to_cpu(dip
->di_nextents
))
481 xchk_ino_xref_set_corrupt(sc
, sc
->ip
->i_ino
);
483 error
= xfs_bmap_count_blocks(sc
->tp
, sc
->ip
, XFS_ATTR_FORK
,
485 if (!xchk_should_check_xref(sc
, &error
, NULL
))
487 if (nextents
!= be16_to_cpu(dip
->di_anextents
))
488 xchk_ino_xref_set_corrupt(sc
, sc
->ip
->i_ino
);
490 /* Check nblocks against the inode. */
491 if (count
+ acount
!= be64_to_cpu(dip
->di_nblocks
))
492 xchk_ino_xref_set_corrupt(sc
, sc
->ip
->i_ino
);
495 /* Cross-reference with the other btrees. */
498 struct xfs_scrub
*sc
,
500 struct xfs_dinode
*dip
)
506 if (sc
->sm
->sm_flags
& XFS_SCRUB_OFLAG_CORRUPT
)
509 agno
= XFS_INO_TO_AGNO(sc
->mp
, ino
);
510 agbno
= XFS_INO_TO_AGBNO(sc
->mp
, ino
);
512 error
= xchk_ag_init(sc
, agno
, &sc
->sa
);
513 if (!xchk_xref_process_error(sc
, agno
, agbno
, &error
))
516 xchk_xref_is_used_space(sc
, agbno
, 1);
517 xchk_inode_xref_finobt(sc
, ino
);
518 xchk_xref_is_owned_by(sc
, agbno
, 1, &XFS_RMAP_OINFO_INODES
);
519 xchk_xref_is_not_shared(sc
, agbno
, 1);
520 xchk_inode_xref_bmap(sc
, dip
);
522 xchk_ag_free(sc
, &sc
->sa
);
526 * If the reflink iflag disagrees with a scan for shared data fork extents,
527 * either flag an error (shared extents w/ no flag) or a preen (flag set w/o
528 * any shared extents). We already checked for reflink iflag set on a non
529 * reflink filesystem.
532 xchk_inode_check_reflink_iflag(
533 struct xfs_scrub
*sc
,
536 struct xfs_mount
*mp
= sc
->mp
;
540 if (!xfs_sb_version_hasreflink(&mp
->m_sb
))
543 error
= xfs_reflink_inode_has_shared_extents(sc
->tp
, sc
->ip
,
545 if (!xchk_xref_process_error(sc
, XFS_INO_TO_AGNO(mp
, ino
),
546 XFS_INO_TO_AGBNO(mp
, ino
), &error
))
548 if (xfs_is_reflink_inode(sc
->ip
) && !has_shared
)
549 xchk_ino_set_preen(sc
, ino
);
550 else if (!xfs_is_reflink_inode(sc
->ip
) && has_shared
)
551 xchk_ino_set_corrupt(sc
, ino
);
554 /* Scrub an inode. */
557 struct xfs_scrub
*sc
)
559 struct xfs_dinode di
;
563 * If sc->ip is NULL, that means that the setup function called
564 * xfs_iget to look up the inode. xfs_iget returned a EFSCORRUPTED
565 * and a NULL inode, so flag the corruption error and return.
568 xchk_ino_set_corrupt(sc
, sc
->sm
->sm_ino
);
572 /* Scrub the inode core. */
573 xfs_inode_to_disk(sc
->ip
, &di
, 0);
574 xchk_dinode(sc
, &di
, sc
->ip
->i_ino
);
575 if (sc
->sm
->sm_flags
& XFS_SCRUB_OFLAG_CORRUPT
)
579 * Look for discrepancies between file's data blocks and the reflink
580 * iflag. We already checked the iflag against the file mode when
581 * we scrubbed the dinode.
583 if (S_ISREG(VFS_I(sc
->ip
)->i_mode
))
584 xchk_inode_check_reflink_iflag(sc
, sc
->ip
->i_ino
);
586 xchk_inode_xref(sc
, sc
->ip
->i_ino
, &di
);