1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Copyright (c) 2020-2024 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_defer.h"
14 #include "xfs_log_format.h"
15 #include "xfs_trans.h"
17 #include "xfs_inode.h"
18 #include "xfs_icache.h"
19 #include "xfs_da_format.h"
20 #include "xfs_da_btree.h"
22 #include "xfs_bmap_btree.h"
23 #include "xfs_dir2_priv.h"
24 #include "xfs_trans_space.h"
25 #include "xfs_health.h"
26 #include "xfs_exchmaps.h"
27 #include "xfs_parent.h"
31 #include "scrub/xfs_scrub.h"
32 #include "scrub/scrub.h"
33 #include "scrub/common.h"
34 #include "scrub/trace.h"
35 #include "scrub/repair.h"
36 #include "scrub/iscan.h"
37 #include "scrub/findparent.h"
38 #include "scrub/readdir.h"
39 #include "scrub/tempfile.h"
40 #include "scrub/tempexch.h"
41 #include "scrub/orphanage.h"
42 #include "scrub/xfile.h"
43 #include "scrub/xfarray.h"
44 #include "scrub/xfblob.h"
45 #include "scrub/attr_repair.h"
46 #include "scrub/listxattr.h"
49 * Repairing The Directory Parent Pointer
50 * ======================================
52 * Currently, only directories support parent pointers (in the form of '..'
53 * entries), so we simply scan the filesystem and update the '..' entry.
55 * Note that because the only parent pointer is the dotdot entry, we won't
56 * touch an unhealthy directory, since the directory repair code is perfectly
57 * capable of rebuilding a directory with the proper parent inode.
59 * See the section on locking issues in dir_repair.c for more information about
60 * conflicts with the VFS. The findparent code wll keep our incore parent
63 * If parent pointers are enabled, we instead reconstruct the parent pointer
64 * information by visiting every directory entry of every directory in the
65 * system and translating the relevant dirents into parent pointers. In this
66 * case, it is advantageous to stash all parent pointers created from dirents
67 * from a single parent file before replaying them into the temporary file. To
68 * save memory, the live filesystem scan reuses the findparent object. Parent
69 * pointer repair chooses either directory scanning or findparent, but not
72 * When salvaging completes, the remaining stashed entries are replayed to the
73 * temporary file. All non-parent pointer extended attributes are copied to
74 * the temporary file's extended attributes. An atomic file mapping exchange
75 * is used to commit the new xattr blocks to the file being repaired. This
76 * will disrupt attrmulti cursors.
79 /* Create a parent pointer in the tempfile. */
80 #define XREP_PPTR_ADD (1)
82 /* Remove a parent pointer from the tempfile. */
83 #define XREP_PPTR_REMOVE (2)
85 /* A stashed parent pointer update. */
87 /* Cookie for retrieval of the pptr name. */
88 xfblob_cookie name_cookie
;
90 /* Parent pointer record. */
91 struct xfs_parent_rec pptr_rec
;
93 /* Length of the pptr name. */
96 /* XREP_PPTR_{ADD,REMOVE} */
101 * Stash up to 8 pages of recovered parent pointers in pptr_recs and
102 * pptr_names before we write them to the temp file.
104 #define XREP_PARENT_MAX_STASH_BYTES (PAGE_SIZE * 8)
107 struct xfs_scrub
*sc
;
109 /* Fixed-size array of xrep_pptr structures. */
110 struct xfarray
*pptr_recs
;
112 /* Blobs containing parent pointer names. */
113 struct xfblob
*pptr_names
;
116 struct xfarray
*xattr_records
;
119 struct xfblob
*xattr_blobs
;
121 /* Scratch buffers for saving extended attributes */
122 unsigned char *xattr_name
;
124 unsigned int xattr_value_sz
;
127 * Information used to exchange the attr fork mappings, if the fs
128 * supports parent pointers.
130 struct xrep_tempexch tx
;
133 * Information used to scan the filesystem to find the inumber of the
134 * dotdot entry for this directory. On filesystems without parent
135 * pointers, we use the findparent_* functions on this object and
136 * access only the parent_ino field directly.
138 * When parent pointers are enabled, the directory entry scanner uses
139 * the iscan, hooks, and lock fields of this object directly.
140 * @pscan.lock coordinates access to pptr_recs, pptr_names, pptr, and
141 * pptr_scratch. This reduces the memory requirements of this
144 * The lock also controls access to xattr_records and xattr_blobs(?)
146 struct xrep_parent_scan_info pscan
;
148 /* Orphanage reparenting request. */
149 struct xrep_adoption adoption
;
151 /* Directory entry name, plus the trailing null. */
152 struct xfs_name xname
;
153 unsigned char namebuf
[MAXNAMELEN
];
155 /* Scratch buffer for scanning pptr xattrs */
156 struct xfs_da_args pptr_args
;
158 /* Have we seen any live updates of parent pointers recently? */
159 bool saw_pptr_updates
;
161 /* Number of parents we found after all other repairs */
162 unsigned long long parents
;
165 struct xrep_parent_xattr
{
166 /* Cookie for retrieval of the xattr name. */
167 xfblob_cookie name_cookie
;
169 /* Cookie for retrieval of the xattr value. */
170 xfblob_cookie value_cookie
;
172 /* XFS_ATTR_* flags */
175 /* Length of the value and name. */
181 * Stash up to 8 pages of attrs in xattr_records/xattr_blobs before we write
182 * them to the temp file.
184 #define XREP_PARENT_XATTR_MAX_STASH_BYTES (PAGE_SIZE * 8)
186 /* Tear down all the incore stuff we created. */
188 xrep_parent_teardown(
189 struct xrep_parent
*rp
)
191 xrep_findparent_scan_teardown(&rp
->pscan
);
192 kvfree(rp
->xattr_name
);
193 rp
->xattr_name
= NULL
;
194 kvfree(rp
->xattr_value
);
195 rp
->xattr_value
= NULL
;
197 xfblob_destroy(rp
->xattr_blobs
);
198 rp
->xattr_blobs
= NULL
;
199 if (rp
->xattr_records
)
200 xfarray_destroy(rp
->xattr_records
);
201 rp
->xattr_records
= NULL
;
203 xfblob_destroy(rp
->pptr_names
);
204 rp
->pptr_names
= NULL
;
206 xfarray_destroy(rp
->pptr_recs
);
207 rp
->pptr_recs
= NULL
;
210 /* Set up for a parent repair. */
213 struct xfs_scrub
*sc
)
215 struct xrep_parent
*rp
;
218 xchk_fsgates_enable(sc
, XCHK_FSGATES_DIRENTS
);
220 rp
= kvzalloc(sizeof(struct xrep_parent
), XCHK_GFP_FLAGS
);
224 rp
->xname
.name
= rp
->namebuf
;
227 error
= xrep_tempfile_create(sc
, S_IFREG
);
231 return xrep_orphanage_try_create(sc
);
235 * Scan all files in the filesystem for a child dirent that we can turn into
236 * the dotdot entry for this directory.
239 xrep_parent_find_dotdot(
240 struct xrep_parent
*rp
)
242 struct xfs_scrub
*sc
= rp
->sc
;
244 unsigned int sick
, checked
;
248 * Avoid sick directories. There shouldn't be anyone else clearing the
249 * directory's sick status.
251 xfs_inode_measure_sickness(sc
->ip
, &sick
, &checked
);
252 if (sick
& XFS_SICK_INO_DIR
)
253 return -EFSCORRUPTED
;
255 ino
= xrep_findparent_self_reference(sc
);
256 if (ino
!= NULLFSINO
) {
257 xrep_findparent_scan_finish_early(&rp
->pscan
, ino
);
262 * Drop the ILOCK on this directory so that we can scan for the dotdot
263 * entry. Figure out who is going to be the parent of this directory,
264 * then retake the ILOCK so that we can salvage directory entries.
266 xchk_iunlock(sc
, XFS_ILOCK_EXCL
);
268 /* Does the VFS dcache have an answer for us? */
269 ino
= xrep_findparent_from_dcache(sc
);
270 if (ino
!= NULLFSINO
) {
271 error
= xrep_findparent_confirm(sc
, &ino
);
272 if (!error
&& ino
!= NULLFSINO
) {
273 xrep_findparent_scan_finish_early(&rp
->pscan
, ino
);
278 /* Scan the entire filesystem for a parent. */
279 error
= xrep_findparent_scan(&rp
->pscan
);
281 xchk_ilock(sc
, XFS_ILOCK_EXCL
);
287 * Add this stashed incore parent pointer to the temporary file.
288 * The caller must hold the tempdir's IOLOCK, must not hold any ILOCKs, and
289 * must not be in transaction context.
292 xrep_parent_replay_update(
293 struct xrep_parent
*rp
,
294 const struct xfs_name
*xname
,
295 struct xrep_pptr
*pptr
)
297 struct xfs_scrub
*sc
= rp
->sc
;
299 switch (pptr
->action
) {
301 /* Create parent pointer. */
302 trace_xrep_parent_replay_parentadd(sc
->tempip
, xname
,
305 return xfs_parent_set(sc
->tempip
, sc
->ip
->i_ino
, xname
,
306 &pptr
->pptr_rec
, &rp
->pptr_args
);
307 case XREP_PPTR_REMOVE
:
308 /* Remove parent pointer. */
309 trace_xrep_parent_replay_parentremove(sc
->tempip
, xname
,
312 return xfs_parent_unset(sc
->tempip
, sc
->ip
->i_ino
, xname
,
313 &pptr
->pptr_rec
, &rp
->pptr_args
);
321 * Flush stashed parent pointer updates that have been recorded by the scanner.
322 * This is done to reduce the memory requirements of the parent pointer
323 * rebuild, since files can have a lot of hardlinks and the fs can be busy.
325 * Caller must not hold transactions or ILOCKs. Caller must hold the tempfile
329 xrep_parent_replay_updates(
330 struct xrep_parent
*rp
)
332 xfarray_idx_t array_cur
;
335 mutex_lock(&rp
->pscan
.lock
);
336 foreach_xfarray_idx(rp
->pptr_recs
, array_cur
) {
337 struct xrep_pptr pptr
;
339 error
= xfarray_load(rp
->pptr_recs
, array_cur
, &pptr
);
343 error
= xfblob_loadname(rp
->pptr_names
, pptr
.name_cookie
,
344 &rp
->xname
, pptr
.namelen
);
347 rp
->xname
.len
= pptr
.namelen
;
348 mutex_unlock(&rp
->pscan
.lock
);
350 error
= xrep_parent_replay_update(rp
, &rp
->xname
, &pptr
);
354 mutex_lock(&rp
->pscan
.lock
);
357 /* Empty out both arrays now that we've added the entries. */
358 xfarray_truncate(rp
->pptr_recs
);
359 xfblob_truncate(rp
->pptr_names
);
360 mutex_unlock(&rp
->pscan
.lock
);
363 mutex_unlock(&rp
->pscan
.lock
);
368 * Remember that we want to create a parent pointer in the tempfile. These
369 * stashed actions will be replayed later.
372 xrep_parent_stash_parentadd(
373 struct xrep_parent
*rp
,
374 const struct xfs_name
*name
,
375 const struct xfs_inode
*dp
)
377 struct xrep_pptr pptr
= {
378 .action
= XREP_PPTR_ADD
,
379 .namelen
= name
->len
,
383 trace_xrep_parent_stash_parentadd(rp
->sc
->tempip
, dp
, name
);
385 xfs_inode_to_parent_rec(&pptr
.pptr_rec
, dp
);
386 error
= xfblob_storename(rp
->pptr_names
, &pptr
.name_cookie
, name
);
390 return xfarray_append(rp
->pptr_recs
, &pptr
);
394 * Remember that we want to remove a parent pointer from the tempfile. These
395 * stashed actions will be replayed later.
398 xrep_parent_stash_parentremove(
399 struct xrep_parent
*rp
,
400 const struct xfs_name
*name
,
401 const struct xfs_inode
*dp
)
403 struct xrep_pptr pptr
= {
404 .action
= XREP_PPTR_REMOVE
,
405 .namelen
= name
->len
,
409 trace_xrep_parent_stash_parentremove(rp
->sc
->tempip
, dp
, name
);
411 xfs_inode_to_parent_rec(&pptr
.pptr_rec
, dp
);
412 error
= xfblob_storename(rp
->pptr_names
, &pptr
.name_cookie
, name
);
416 return xfarray_append(rp
->pptr_recs
, &pptr
);
420 * Examine an entry of a directory. If this dirent leads us back to the file
421 * whose parent pointers we're rebuilding, add a pptr to the temporary
425 xrep_parent_scan_dirent(
426 struct xfs_scrub
*sc
,
427 struct xfs_inode
*dp
,
428 xfs_dir2_dataptr_t dapos
,
429 const struct xfs_name
*name
,
433 struct xrep_parent
*rp
= priv
;
436 /* Dirent doesn't point to this directory. */
437 if (ino
!= rp
->sc
->ip
->i_ino
)
440 /* No weird looking names. */
441 if (name
->len
== 0 || !xfs_dir2_namecheck(name
->name
, name
->len
))
442 return -EFSCORRUPTED
;
444 /* No mismatching ftypes. */
445 if (name
->type
!= xfs_mode_to_ftype(VFS_I(sc
->ip
)->i_mode
))
446 return -EFSCORRUPTED
;
448 /* Don't pick up dot or dotdot entries; we only want child dirents. */
449 if (xfs_dir2_samename(name
, &xfs_name_dotdot
) ||
450 xfs_dir2_samename(name
, &xfs_name_dot
))
454 * Transform this dirent into a parent pointer and queue it for later
455 * addition to the temporary file.
457 mutex_lock(&rp
->pscan
.lock
);
458 error
= xrep_parent_stash_parentadd(rp
, name
, dp
);
459 mutex_unlock(&rp
->pscan
.lock
);
464 * Decide if we want to look for dirents in this directory. Skip the file
465 * being repaired and any files being used to stage repairs.
468 xrep_parent_want_scan(
469 struct xrep_parent
*rp
,
470 const struct xfs_inode
*ip
)
472 return ip
!= rp
->sc
->ip
&& !xrep_is_tempfile(ip
);
476 * Take ILOCK on a file that we want to scan.
478 * Select ILOCK_EXCL if the file is a directory with an unloaded data bmbt.
479 * Otherwise, take ILOCK_SHARED.
481 static inline unsigned int
482 xrep_parent_scan_ilock(
483 struct xrep_parent
*rp
,
484 struct xfs_inode
*ip
)
486 uint lock_mode
= XFS_ILOCK_SHARED
;
488 /* Still need to take the shared ILOCK to advance the iscan cursor. */
489 if (!xrep_parent_want_scan(rp
, ip
))
492 if (S_ISDIR(VFS_I(ip
)->i_mode
) && xfs_need_iread_extents(&ip
->i_df
)) {
493 lock_mode
= XFS_ILOCK_EXCL
;
498 xfs_ilock(ip
, lock_mode
);
503 * Scan this file for relevant child dirents that point to the file whose
504 * parent pointers we're rebuilding.
507 xrep_parent_scan_file(
508 struct xrep_parent
*rp
,
509 struct xfs_inode
*ip
)
511 unsigned int lock_mode
;
514 lock_mode
= xrep_parent_scan_ilock(rp
, ip
);
516 if (!xrep_parent_want_scan(rp
, ip
))
519 if (S_ISDIR(VFS_I(ip
)->i_mode
)) {
521 * If the directory looks as though it has been zapped by the
522 * inode record repair code, we cannot scan for child dirents.
524 if (xchk_dir_looks_zapped(ip
)) {
529 error
= xchk_dir_walk(rp
->sc
, ip
, xrep_parent_scan_dirent
, rp
);
535 xchk_iscan_mark_visited(&rp
->pscan
.iscan
, ip
);
536 xfs_iunlock(ip
, lock_mode
);
540 /* Decide if we've stashed too much pptr data in memory. */
542 xrep_parent_want_flush_stashed(
543 struct xrep_parent
*rp
)
545 unsigned long long bytes
;
547 bytes
= xfarray_bytes(rp
->pptr_recs
) + xfblob_bytes(rp
->pptr_names
);
548 return bytes
> XREP_PARENT_MAX_STASH_BYTES
;
552 * Scan all directories in the filesystem to look for dirents that we can turn
553 * into parent pointers.
556 xrep_parent_scan_dirtree(
557 struct xrep_parent
*rp
)
559 struct xfs_scrub
*sc
= rp
->sc
;
560 struct xfs_inode
*ip
;
564 * Filesystem scans are time consuming. Drop the file ILOCK and all
565 * other resources for the duration of the scan and hope for the best.
566 * The live update hooks will keep our scan information up to date.
568 xchk_trans_cancel(sc
);
569 if (sc
->ilock_flags
& (XFS_ILOCK_SHARED
| XFS_ILOCK_EXCL
))
570 xchk_iunlock(sc
, sc
->ilock_flags
& (XFS_ILOCK_SHARED
|
572 error
= xchk_trans_alloc_empty(sc
);
576 while ((error
= xchk_iscan_iter(&rp
->pscan
.iscan
, &ip
)) == 1) {
579 error
= xrep_parent_scan_file(rp
, ip
);
584 /* Flush stashed pptr updates to constrain memory usage. */
585 mutex_lock(&rp
->pscan
.lock
);
586 flush
= xrep_parent_want_flush_stashed(rp
);
587 mutex_unlock(&rp
->pscan
.lock
);
589 xchk_trans_cancel(sc
);
591 error
= xrep_tempfile_iolock_polled(sc
);
595 error
= xrep_parent_replay_updates(rp
);
596 xrep_tempfile_iounlock(sc
);
600 error
= xchk_trans_alloc_empty(sc
);
605 if (xchk_should_terminate(sc
, &error
))
608 xchk_iscan_iter_finish(&rp
->pscan
.iscan
);
611 * If we couldn't grab an inode that was busy with a state
612 * change, change the error code so that we exit to userspace
613 * as quickly as possible.
621 * Retake sc->ip's ILOCK now that we're done flushing stashed parent
622 * pointers. We end this function with an empty transaction and the
625 xchk_ilock(rp
->sc
, XFS_ILOCK_EXCL
);
630 * Capture dirent updates being made by other threads which are relevant to the
631 * file being repaired.
634 xrep_parent_live_update(
635 struct notifier_block
*nb
,
636 unsigned long action
,
639 struct xfs_dir_update_params
*p
= data
;
640 struct xrep_parent
*rp
;
641 struct xfs_scrub
*sc
;
644 rp
= container_of(nb
, struct xrep_parent
, pscan
.dhook
.dirent_hook
.nb
);
648 * This thread updated a dirent that points to the file that we're
649 * repairing, so stash the update for replay against the temporary
652 if (p
->ip
->i_ino
== sc
->ip
->i_ino
&&
653 xchk_iscan_want_live_update(&rp
->pscan
.iscan
, p
->dp
->i_ino
)) {
654 mutex_lock(&rp
->pscan
.lock
);
656 error
= xrep_parent_stash_parentadd(rp
, p
->name
, p
->dp
);
658 error
= xrep_parent_stash_parentremove(rp
, p
->name
,
661 rp
->saw_pptr_updates
= true;
662 mutex_unlock(&rp
->pscan
.lock
);
669 xchk_iscan_abort(&rp
->pscan
.iscan
);
673 /* Reset a directory's dotdot entry, if needed. */
675 xrep_parent_reset_dotdot(
676 struct xrep_parent
*rp
)
678 struct xfs_scrub
*sc
= rp
->sc
;
680 unsigned int spaceres
;
683 ASSERT(sc
->ilock_flags
& XFS_ILOCK_EXCL
);
685 error
= xchk_dir_lookup(sc
, sc
->ip
, &xfs_name_dotdot
, &ino
);
686 if (error
|| ino
== rp
->pscan
.parent_ino
)
689 xfs_trans_ijoin(sc
->tp
, sc
->ip
, 0);
691 trace_xrep_parent_reset_dotdot(sc
->ip
, rp
->pscan
.parent_ino
);
694 * Reserve more space just in case we have to expand the dir. We're
695 * allowed to exceed quota to repair inconsistent metadata.
697 spaceres
= xfs_rename_space_res(sc
->mp
, 0, false, xfs_name_dotdot
.len
,
699 error
= xfs_trans_reserve_more_inode(sc
->tp
, sc
->ip
, spaceres
, 0,
704 error
= xfs_dir_replace(sc
->tp
, sc
->ip
, &xfs_name_dotdot
,
705 rp
->pscan
.parent_ino
, spaceres
);
710 * Roll transaction to detach the inode from the transaction but retain
713 return xfs_trans_roll(&sc
->tp
);
716 /* Pass back the parent inumber if this a parent pointer */
718 xrep_parent_lookup_pptr(
719 struct xfs_scrub
*sc
,
720 struct xfs_inode
*ip
,
721 unsigned int attr_flags
,
722 const unsigned char *name
,
723 unsigned int namelen
,
725 unsigned int valuelen
,
728 xfs_ino_t
*inop
= priv
;
729 xfs_ino_t parent_ino
;
732 if (!(attr_flags
& XFS_ATTR_PARENT
))
735 error
= xfs_parent_from_attr(sc
->mp
, attr_flags
, name
, namelen
, value
,
736 valuelen
, &parent_ino
, NULL
);
745 * Find the first parent of the scrub target by walking parent pointers for
746 * the purpose of deciding if we're going to move it to the orphanage.
747 * We don't care if the attr fork is zapped.
750 xrep_parent_lookup_pptrs(
751 struct xfs_scrub
*sc
,
758 error
= xchk_xattr_walk(sc
, sc
->ip
, xrep_parent_lookup_pptr
, NULL
,
760 if (error
&& error
!= -ECANCELED
)
766 * Move the current file to the orphanage.
768 * Caller must hold IOLOCK_EXCL on @sc->ip, and no other inode locks. Upon
769 * successful return, the scrub transaction will have enough extra reservation
770 * to make the move; it will hold IOLOCK_EXCL and ILOCK_EXCL of @sc->ip and the
771 * orphanage; and both inodes will be ijoined.
774 xrep_parent_move_to_orphanage(
775 struct xrep_parent
*rp
)
777 struct xfs_scrub
*sc
= rp
->sc
;
778 xfs_ino_t orig_parent
, new_parent
;
781 if (S_ISDIR(VFS_I(sc
->ip
)->i_mode
)) {
783 * We are about to drop the ILOCK on sc->ip to lock the
784 * orphanage and prepare for the adoption. Therefore, look up
785 * the old dotdot entry for sc->ip so that we can compare it
786 * after we re-lock sc->ip.
788 error
= xchk_dir_lookup(sc
, sc
->ip
, &xfs_name_dotdot
,
794 * We haven't dropped the ILOCK since we committed the new
795 * xattr structure (and hence the new parent pointer records),
796 * which means that the file cannot have been moved in the
797 * directory tree, and there are no parents.
799 orig_parent
= NULLFSINO
;
803 * Drop the ILOCK on the scrub target and commit the transaction.
804 * Adoption computes its own resource requirements and gathers the
805 * necessary components.
807 error
= xrep_trans_commit(sc
);
810 xchk_iunlock(sc
, XFS_ILOCK_EXCL
);
812 /* If we can take the orphanage's iolock then we're ready to move. */
813 if (!xrep_orphanage_ilock_nowait(sc
, XFS_IOLOCK_EXCL
)) {
814 xchk_iunlock(sc
, sc
->ilock_flags
);
815 error
= xrep_orphanage_iolock_two(sc
);
820 /* Grab transaction and ILOCK the two files. */
821 error
= xrep_adoption_trans_alloc(sc
, &rp
->adoption
);
825 error
= xrep_adoption_compute_name(&rp
->adoption
, &rp
->xname
);
830 * Now that we've reacquired the ILOCK on sc->ip, look up the dotdot
831 * entry again. If the parent changed or the child was unlinked while
832 * the child directory was unlocked, we don't need to move the child to
833 * the orphanage after all. For a non-directory, we have to scan for
834 * the first parent pointer to see if one has been added.
836 if (S_ISDIR(VFS_I(sc
->ip
)->i_mode
))
837 error
= xchk_dir_lookup(sc
, sc
->ip
, &xfs_name_dotdot
,
840 error
= xrep_parent_lookup_pptrs(sc
, &new_parent
);
845 * Attach to the orphanage if we still have a linked directory and it
848 if (orig_parent
== new_parent
&& VFS_I(sc
->ip
)->i_nlink
> 0) {
849 error
= xrep_adoption_move(&rp
->adoption
);
855 * Launder the scrub transaction so we can drop the orphanage ILOCK
856 * and IOLOCK. Return holding the scrub target's ILOCK and IOLOCK.
858 error
= xrep_adoption_trans_roll(&rp
->adoption
);
862 xrep_orphanage_iunlock(sc
, XFS_ILOCK_EXCL
);
863 xrep_orphanage_iunlock(sc
, XFS_IOLOCK_EXCL
);
867 /* Ensure that the xattr value buffer is large enough. */
869 xrep_parent_alloc_xattr_value(
870 struct xrep_parent
*rp
,
875 if (rp
->xattr_value_sz
>= bufsize
)
878 if (rp
->xattr_value
) {
879 kvfree(rp
->xattr_value
);
880 rp
->xattr_value
= NULL
;
881 rp
->xattr_value_sz
= 0;
884 new_val
= kvmalloc(bufsize
, XCHK_GFP_FLAGS
);
888 rp
->xattr_value
= new_val
;
889 rp
->xattr_value_sz
= bufsize
;
893 /* Retrieve the (remote) value of a non-pptr xattr. */
895 xrep_parent_fetch_xattr_remote(
896 struct xrep_parent
*rp
,
897 struct xfs_inode
*ip
,
898 unsigned int attr_flags
,
899 const unsigned char *name
,
900 unsigned int namelen
,
901 unsigned int valuelen
)
903 struct xfs_scrub
*sc
= rp
->sc
;
904 struct xfs_da_args args
= {
905 .attr_filter
= attr_flags
& XFS_ATTR_NSP_ONDISK_MASK
,
906 .geo
= sc
->mp
->m_attr_geo
,
907 .whichfork
= XFS_ATTR_FORK
,
912 .valuelen
= valuelen
,
918 * If we need a larger value buffer, try to allocate one. If that
919 * fails, return with -EDEADLOCK to try harder.
921 error
= xrep_parent_alloc_xattr_value(rp
, valuelen
);
922 if (error
== -ENOMEM
)
927 args
.value
= rp
->xattr_value
;
928 xfs_attr_sethash(&args
);
929 return xfs_attr_get_ilocked(&args
);
932 /* Stash non-pptr attributes for later replay into the temporary file. */
934 xrep_parent_stash_xattr(
935 struct xfs_scrub
*sc
,
936 struct xfs_inode
*ip
,
937 unsigned int attr_flags
,
938 const unsigned char *name
,
939 unsigned int namelen
,
941 unsigned int valuelen
,
944 struct xrep_parent_xattr key
= {
945 .valuelen
= valuelen
,
947 .flags
= attr_flags
& XFS_ATTR_NSP_ONDISK_MASK
,
949 struct xrep_parent
*rp
= priv
;
952 if (attr_flags
& (XFS_ATTR_INCOMPLETE
| XFS_ATTR_PARENT
))
956 error
= xrep_parent_fetch_xattr_remote(rp
, ip
, attr_flags
,
957 name
, namelen
, valuelen
);
961 value
= rp
->xattr_value
;
964 trace_xrep_parent_stash_xattr(rp
->sc
->tempip
, key
.flags
, (void *)name
,
965 key
.namelen
, key
.valuelen
);
967 error
= xfblob_store(rp
->xattr_blobs
, &key
.name_cookie
, name
,
972 error
= xfblob_store(rp
->xattr_blobs
, &key
.value_cookie
, value
,
977 return xfarray_append(rp
->xattr_records
, &key
);
980 /* Insert one xattr key/value. */
982 xrep_parent_insert_xattr(
983 struct xrep_parent
*rp
,
984 const struct xrep_parent_xattr
*key
)
986 struct xfs_da_args args
= {
987 .dp
= rp
->sc
->tempip
,
988 .attr_filter
= key
->flags
,
989 .namelen
= key
->namelen
,
990 .valuelen
= key
->valuelen
,
991 .owner
= rp
->sc
->ip
->i_ino
,
992 .geo
= rp
->sc
->mp
->m_attr_geo
,
993 .whichfork
= XFS_ATTR_FORK
,
994 .op_flags
= XFS_DA_OP_OKNOENT
,
998 ASSERT(!(key
->flags
& XFS_ATTR_PARENT
));
1001 * Grab pointers to the scrub buffer so that we can use them to insert
1002 * attrs into the temp file.
1004 args
.name
= rp
->xattr_name
;
1005 args
.value
= rp
->xattr_value
;
1008 * The attribute name is stored near the end of the in-core buffer,
1009 * though we reserve one more byte to ensure null termination.
1011 rp
->xattr_name
[XATTR_NAME_MAX
] = 0;
1013 error
= xfblob_load(rp
->xattr_blobs
, key
->name_cookie
, rp
->xattr_name
,
1018 error
= xfblob_free(rp
->xattr_blobs
, key
->name_cookie
);
1022 error
= xfblob_load(rp
->xattr_blobs
, key
->value_cookie
, args
.value
,
1027 error
= xfblob_free(rp
->xattr_blobs
, key
->value_cookie
);
1031 rp
->xattr_name
[key
->namelen
] = 0;
1033 trace_xrep_parent_insert_xattr(rp
->sc
->tempip
, key
->flags
,
1034 rp
->xattr_name
, key
->namelen
, key
->valuelen
);
1036 xfs_attr_sethash(&args
);
1037 return xfs_attr_set(&args
, XFS_ATTRUPDATE_UPSERT
, false);
1041 * Periodically flush salvaged attributes to the temporary file. This is done
1042 * to reduce the memory requirements of the xattr rebuild because files can
1043 * contain millions of attributes.
1046 xrep_parent_flush_xattrs(
1047 struct xrep_parent
*rp
)
1049 xfarray_idx_t array_cur
;
1053 * Entering this function, the scrub context has a reference to the
1054 * inode being repaired, the temporary file, and the empty scrub
1055 * transaction that we created for the xattr scan. We hold ILOCK_EXCL
1056 * on the inode being repaired.
1058 * To constrain kernel memory use, we occasionally flush salvaged
1059 * xattrs from the xfarray and xfblob structures into the temporary
1060 * file in preparation for exchanging the xattr structures at the end.
1061 * Updating the temporary file requires a transaction, so we commit the
1062 * scrub transaction and drop the ILOCK so that xfs_attr_set can
1063 * allocate whatever transaction it wants.
1065 * We still hold IOLOCK_EXCL on the inode being repaired, which
1066 * prevents anyone from adding xattrs (or parent pointers) while we're
1069 xchk_trans_cancel(rp
->sc
);
1070 xchk_iunlock(rp
->sc
, XFS_ILOCK_EXCL
);
1073 * Take the IOLOCK of the temporary file while we modify xattrs. This
1074 * isn't strictly required because the temporary file is never revealed
1075 * to userspace, but we follow the same locking rules. We still hold
1078 error
= xrep_tempfile_iolock_polled(rp
->sc
);
1082 /* Add all the salvaged attrs to the temporary file. */
1083 foreach_xfarray_idx(rp
->xattr_records
, array_cur
) {
1084 struct xrep_parent_xattr key
;
1086 error
= xfarray_load(rp
->xattr_records
, array_cur
, &key
);
1090 error
= xrep_parent_insert_xattr(rp
, &key
);
1095 /* Empty out both arrays now that we've added the entries. */
1096 xfarray_truncate(rp
->xattr_records
);
1097 xfblob_truncate(rp
->xattr_blobs
);
1099 xrep_tempfile_iounlock(rp
->sc
);
1101 /* Recreate the empty transaction and relock the inode. */
1102 error
= xchk_trans_alloc_empty(rp
->sc
);
1105 xchk_ilock(rp
->sc
, XFS_ILOCK_EXCL
);
1109 /* Decide if we've stashed too much xattr data in memory. */
1111 xrep_parent_want_flush_xattrs(
1112 struct xrep_parent
*rp
)
1114 unsigned long long bytes
;
1116 bytes
= xfarray_bytes(rp
->xattr_records
) +
1117 xfblob_bytes(rp
->xattr_blobs
);
1118 return bytes
> XREP_PARENT_XATTR_MAX_STASH_BYTES
;
1121 /* Flush staged attributes to the temporary file if we're over the limit. */
1123 xrep_parent_try_flush_xattrs(
1124 struct xfs_scrub
*sc
,
1127 struct xrep_parent
*rp
= priv
;
1130 if (!xrep_parent_want_flush_xattrs(rp
))
1133 error
= xrep_parent_flush_xattrs(rp
);
1138 * If there were any parent pointer updates to the xattr structure
1139 * while we dropped the ILOCK, the xattr structure is now stale.
1140 * Signal to the attr copy process that we need to start over, but
1141 * this time without opportunistic attr flushing.
1143 * This is unlikely to happen, so we're ok with restarting the copy.
1145 mutex_lock(&rp
->pscan
.lock
);
1146 if (rp
->saw_pptr_updates
)
1148 mutex_unlock(&rp
->pscan
.lock
);
1152 /* Copy all the non-pptr extended attributes into the temporary file. */
1154 xrep_parent_copy_xattrs(
1155 struct xrep_parent
*rp
)
1157 struct xfs_scrub
*sc
= rp
->sc
;
1161 * Clear the pptr updates flag. We hold sc->ip ILOCKed, so there
1162 * can't be any parent pointer updates in progress.
1164 mutex_lock(&rp
->pscan
.lock
);
1165 rp
->saw_pptr_updates
= false;
1166 mutex_unlock(&rp
->pscan
.lock
);
1168 /* Copy xattrs, stopping periodically to flush the incore buffers. */
1169 error
= xchk_xattr_walk(sc
, sc
->ip
, xrep_parent_stash_xattr
,
1170 xrep_parent_try_flush_xattrs
, rp
);
1171 if (error
&& error
!= -ESTALE
)
1174 if (error
== -ESTALE
) {
1176 * The xattr copy collided with a parent pointer update.
1177 * Restart the copy, but this time hold the ILOCK all the way
1178 * to the end to lock out any directory parent pointer updates.
1180 error
= xchk_xattr_walk(sc
, sc
->ip
, xrep_parent_stash_xattr
,
1186 /* Flush any remaining stashed xattrs to the temporary file. */
1187 if (xfarray_bytes(rp
->xattr_records
) == 0)
1190 return xrep_parent_flush_xattrs(rp
);
1194 * Ensure that @sc->ip and @sc->tempip both have attribute forks before we head
1195 * into the attr fork exchange transaction. All files on a filesystem with
1196 * parent pointers must have an attr fork because the parent pointer code does
1197 * not itself add attribute forks.
1199 * Note: Unlinkable unlinked files don't need one, but the overhead of having
1200 * an unnecessary attr fork is not justified by the additional code complexity
1201 * that would be needed to track that state correctly.
1204 xrep_parent_ensure_attr_fork(
1205 struct xrep_parent
*rp
)
1207 struct xfs_scrub
*sc
= rp
->sc
;
1210 error
= xfs_attr_add_fork(sc
->tempip
,
1211 sizeof(struct xfs_attr_sf_hdr
), 1);
1214 return xfs_attr_add_fork(sc
->ip
, sizeof(struct xfs_attr_sf_hdr
), 1);
1218 * Finish replaying stashed parent pointer updates, allocate a transaction for
1219 * exchanging extent mappings, and take the ILOCKs of both files before we
1220 * commit the new attribute structure.
1223 xrep_parent_finalize_tempfile(
1224 struct xrep_parent
*rp
)
1226 struct xfs_scrub
*sc
= rp
->sc
;
1230 * Repair relies on the ILOCK to quiesce all possible xattr updates.
1231 * Replay all queued parent pointer updates into the tempfile before
1232 * exchanging the contents, even if that means dropping the ILOCKs and
1236 error
= xrep_parent_replay_updates(rp
);
1240 error
= xrep_parent_ensure_attr_fork(rp
);
1244 error
= xrep_tempexch_trans_alloc(sc
, XFS_ATTR_FORK
, &rp
->tx
);
1248 if (xfarray_length(rp
->pptr_recs
) == 0)
1251 xchk_trans_cancel(sc
);
1252 xrep_tempfile_iunlock_both(sc
);
1253 } while (!xchk_should_terminate(sc
, &error
));
1258 * Replay all the stashed parent pointers into the temporary file, copy all
1259 * the non-pptr xattrs from the file being repaired into the temporary file,
1260 * and exchange the attr fork contents atomically.
1263 xrep_parent_rebuild_pptrs(
1264 struct xrep_parent
*rp
)
1266 struct xfs_scrub
*sc
= rp
->sc
;
1267 xfs_ino_t parent_ino
= NULLFSINO
;
1271 * Copy non-ppttr xattrs from the file being repaired into the
1272 * temporary file's xattr structure. We hold sc->ip's IOLOCK, which
1273 * prevents setxattr/removexattr calls from occurring, but renames
1274 * update the parent pointers without holding IOLOCK. If we detect
1275 * stale attr structures, we restart the scan but only flush at the
1278 error
= xrep_parent_copy_xattrs(rp
);
1283 * Cancel the empty transaction that we used to walk and copy attrs,
1284 * and drop the ILOCK so that we can take the IOLOCK on the temporary
1285 * file. We still hold sc->ip's IOLOCK.
1287 xchk_trans_cancel(sc
);
1288 xchk_iunlock(sc
, XFS_ILOCK_EXCL
);
1290 error
= xrep_tempfile_iolock_polled(sc
);
1295 * Allocate transaction, lock inodes, and make sure that we've replayed
1296 * all the stashed pptr updates to the tempdir. After this point,
1297 * we're ready to exchange the attr fork mappings.
1299 error
= xrep_parent_finalize_tempfile(rp
);
1303 /* Last chance to abort before we start committing pptr fixes. */
1304 if (xchk_should_terminate(sc
, &error
))
1307 if (xchk_iscan_aborted(&rp
->pscan
.iscan
))
1311 * Exchange the attr fork contents and junk the old attr fork contents,
1312 * which are now in the tempfile.
1314 error
= xrep_xattr_swap(sc
, &rp
->tx
);
1317 error
= xrep_xattr_reset_tempfile_fork(sc
);
1322 * Roll to get a transaction without any inodes joined to it. Then we
1323 * can drop the tempfile's ILOCK and IOLOCK before doing more work on
1324 * the scrub target file.
1326 error
= xfs_trans_roll(&sc
->tp
);
1329 xrep_tempfile_iunlock(sc
);
1330 xrep_tempfile_iounlock(sc
);
1333 * We've committed the new parent pointers. Find at least one parent
1334 * so that we can decide if we're moving this file to the orphanage.
1335 * For this purpose, root directories are their own parents.
1337 if (sc
->ip
== sc
->mp
->m_rootip
) {
1338 xrep_findparent_scan_found(&rp
->pscan
, sc
->ip
->i_ino
);
1340 error
= xrep_parent_lookup_pptrs(sc
, &parent_ino
);
1343 if (parent_ino
!= NULLFSINO
)
1344 xrep_findparent_scan_found(&rp
->pscan
, parent_ino
);
1350 * Commit the new parent pointer structure (currently only the dotdot entry) to
1351 * the file that we're repairing.
1354 xrep_parent_rebuild_tree(
1355 struct xrep_parent
*rp
)
1359 if (xfs_has_parent(rp
->sc
->mp
)) {
1360 error
= xrep_parent_rebuild_pptrs(rp
);
1365 if (rp
->pscan
.parent_ino
== NULLFSINO
) {
1366 if (xrep_orphanage_can_adopt(rp
->sc
))
1367 return xrep_parent_move_to_orphanage(rp
);
1368 return -EFSCORRUPTED
;
1371 if (S_ISDIR(VFS_I(rp
->sc
->ip
)->i_mode
))
1372 return xrep_parent_reset_dotdot(rp
);
1377 /* Count the number of parent pointers. */
1379 xrep_parent_count_pptr(
1380 struct xfs_scrub
*sc
,
1381 struct xfs_inode
*ip
,
1382 unsigned int attr_flags
,
1383 const unsigned char *name
,
1384 unsigned int namelen
,
1386 unsigned int valuelen
,
1389 struct xrep_parent
*rp
= priv
;
1392 if (!(attr_flags
& XFS_ATTR_PARENT
))
1395 error
= xfs_parent_from_attr(sc
->mp
, attr_flags
, name
, namelen
, value
,
1396 valuelen
, NULL
, NULL
);
1405 * After all parent pointer rebuilding and adoption activity completes, reset
1406 * the link count of this nondirectory, having scanned the fs to rebuild all
1410 xrep_parent_set_nondir_nlink(
1411 struct xrep_parent
*rp
)
1413 struct xfs_scrub
*sc
= rp
->sc
;
1414 struct xfs_inode
*ip
= sc
->ip
;
1415 struct xfs_perag
*pag
;
1416 bool joined
= false;
1419 /* Count parent pointers so we can reset the file link count. */
1421 error
= xchk_xattr_walk(sc
, ip
, xrep_parent_count_pptr
, NULL
, rp
);
1425 if (rp
->parents
> 0 && xfs_inode_on_unlinked_list(ip
)) {
1426 xfs_trans_ijoin(sc
->tp
, sc
->ip
, 0);
1430 * The file is on the unlinked list but we found parents.
1431 * Remove the file from the unlinked list.
1433 pag
= xfs_perag_get(sc
->mp
, XFS_INO_TO_AGNO(sc
->mp
, ip
->i_ino
));
1436 return -EFSCORRUPTED
;
1439 error
= xfs_iunlink_remove(sc
->tp
, pag
, ip
);
1443 } else if (rp
->parents
== 0 && !xfs_inode_on_unlinked_list(ip
)) {
1444 xfs_trans_ijoin(sc
->tp
, sc
->ip
, 0);
1448 * The file is not on the unlinked list but we found no
1449 * parents. Add the file to the unlinked list.
1451 error
= xfs_iunlink(sc
->tp
, ip
);
1456 /* Set the correct link count. */
1457 if (VFS_I(ip
)->i_nlink
!= rp
->parents
) {
1459 xfs_trans_ijoin(sc
->tp
, sc
->ip
, 0);
1463 set_nlink(VFS_I(ip
), min_t(unsigned long long, rp
->parents
,
1467 /* Log the inode to keep it moving forward if we dirtied anything. */
1469 xfs_trans_log_inode(sc
->tp
, ip
, XFS_ILOG_CORE
);
1473 /* Set up the filesystem scan so we can look for parents. */
1475 xrep_parent_setup_scan(
1476 struct xrep_parent
*rp
)
1478 struct xfs_scrub
*sc
= rp
->sc
;
1480 struct xfs_da_geometry
*geo
= sc
->mp
->m_attr_geo
;
1484 if (!xfs_has_parent(sc
->mp
))
1485 return xrep_findparent_scan_start(sc
, &rp
->pscan
);
1487 /* Buffers for copying non-pptr attrs to the tempfile */
1488 rp
->xattr_name
= kvmalloc(XATTR_NAME_MAX
+ 1, XCHK_GFP_FLAGS
);
1489 if (!rp
->xattr_name
)
1493 * Allocate enough memory to handle loading local attr values from the
1494 * xfblob data while flushing stashed attrs to the temporary file.
1495 * We only realloc the buffer when salvaging remote attr values, so
1496 * TRY_HARDER means we allocate the maximal attr value size.
1498 if (sc
->flags
& XCHK_TRY_HARDER
)
1499 max_len
= XATTR_SIZE_MAX
;
1501 max_len
= xfs_attr_leaf_entsize_local_max(geo
->blksize
);
1502 error
= xrep_parent_alloc_xattr_value(rp
, max_len
);
1504 goto out_xattr_name
;
1506 /* Set up some staging memory for logging parent pointer updates. */
1507 descr
= xchk_xfile_ino_descr(sc
, "parent pointer entries");
1508 error
= xfarray_create(descr
, 0, sizeof(struct xrep_pptr
),
1512 goto out_xattr_value
;
1514 descr
= xchk_xfile_ino_descr(sc
, "parent pointer names");
1515 error
= xfblob_create(descr
, &rp
->pptr_names
);
1520 /* Set up some storage for copying attrs before the mapping exchange */
1521 descr
= xchk_xfile_ino_descr(sc
,
1522 "parent pointer retained xattr entries");
1523 error
= xfarray_create(descr
, 0, sizeof(struct xrep_parent_xattr
),
1524 &rp
->xattr_records
);
1529 descr
= xchk_xfile_ino_descr(sc
,
1530 "parent pointer retained xattr values");
1531 error
= xfblob_create(descr
, &rp
->xattr_blobs
);
1536 error
= __xrep_findparent_scan_start(sc
, &rp
->pscan
,
1537 xrep_parent_live_update
);
1539 goto out_attr_values
;
1544 xfblob_destroy(rp
->xattr_blobs
);
1545 rp
->xattr_blobs
= NULL
;
1547 xfarray_destroy(rp
->xattr_records
);
1548 rp
->xattr_records
= NULL
;
1550 xfblob_destroy(rp
->pptr_names
);
1551 rp
->pptr_names
= NULL
;
1553 xfarray_destroy(rp
->pptr_recs
);
1554 rp
->pptr_recs
= NULL
;
1556 kvfree(rp
->xattr_value
);
1557 rp
->xattr_value
= NULL
;
1559 kvfree(rp
->xattr_name
);
1560 rp
->xattr_name
= NULL
;
1566 struct xfs_scrub
*sc
)
1568 struct xrep_parent
*rp
= sc
->buf
;
1572 * When the parent pointers feature is enabled, repairs are committed
1573 * by atomically committing a new xattr structure and reaping the old
1574 * attr fork. Reaping requires rmap and exchange-range to be enabled.
1576 if (xfs_has_parent(sc
->mp
)) {
1577 if (!xfs_has_rmapbt(sc
->mp
))
1579 if (!xfs_has_exchange_range(sc
->mp
))
1583 error
= xrep_parent_setup_scan(rp
);
1587 if (xfs_has_parent(sc
->mp
))
1588 error
= xrep_parent_scan_dirtree(rp
);
1590 error
= xrep_parent_find_dotdot(rp
);
1594 /* Last chance to abort before we start committing dotdot fixes. */
1595 if (xchk_should_terminate(sc
, &error
))
1598 error
= xrep_parent_rebuild_tree(rp
);
1601 if (xfs_has_parent(sc
->mp
) && !S_ISDIR(VFS_I(sc
->ip
)->i_mode
)) {
1602 error
= xrep_parent_set_nondir_nlink(rp
);
1607 error
= xrep_defer_finish(sc
);
1610 xrep_parent_teardown(rp
);