1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Copyright (c) 2021-2024 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_log_format.h"
13 #include "xfs_trans.h"
14 #include "xfs_inode.h"
15 #include "xfs_ialloc.h"
16 #include "xfs_quota.h"
17 #include "xfs_trans_space.h"
19 #include "xfs_icache.h"
21 #include "xfs_bmap_btree.h"
22 #include "xfs_parent.h"
23 #include "xfs_attr_sf.h"
24 #include "scrub/scrub.h"
25 #include "scrub/common.h"
26 #include "scrub/repair.h"
27 #include "scrub/trace.h"
28 #include "scrub/orphanage.h"
29 #include "scrub/readdir.h"
31 #include <linux/namei.h>
37 * If the directory tree is damaged, children of that directory become
38 * inaccessible via that file path. If a child has no other parents, the file
39 * is said to be orphaned. xfs_repair fixes this situation by creating a
40 * orphanage directory (specifically, /lost+found) and creating a directory
41 * entry pointing to the orphaned file.
43 * Online repair follows this tactic by creating a root-owned /lost+found
44 * directory if one does not exist. If an orphan is found, it will move that
45 * files into orphanage.
48 /* Make the orphanage owned by root. */
55 struct xfs_mount
*mp
= sc
->mp
;
56 struct xfs_dquot
*udqp
= NULL
, *gdqp
= NULL
, *pdqp
= NULL
;
57 struct xfs_dquot
*oldu
= NULL
, *oldg
= NULL
, *oldp
= NULL
;
58 struct inode
*inode
= VFS_I(dp
);
61 error
= xfs_qm_vop_dqalloc(dp
, GLOBAL_ROOT_UID
, GLOBAL_ROOT_GID
, 0,
62 XFS_QMOPT_QUOTALL
, &udqp
, &gdqp
, &pdqp
);
66 error
= xfs_trans_alloc_ichange(dp
, udqp
, gdqp
, pdqp
, true, &tp
);
71 * Always clear setuid/setgid/sticky on the orphanage since we don't
72 * normally want that functionality on this directory and xfs_repair
73 * doesn't create it this way either. Leave the other access bits
76 inode
->i_mode
&= ~(S_ISUID
| S_ISGID
| S_ISVTX
);
79 * Change the ownerships and register quota modifications
82 if (!uid_eq(inode
->i_uid
, GLOBAL_ROOT_UID
)) {
83 if (XFS_IS_UQUOTA_ON(mp
))
84 oldu
= xfs_qm_vop_chown(tp
, dp
, &dp
->i_udquot
, udqp
);
85 inode
->i_uid
= GLOBAL_ROOT_UID
;
87 if (!gid_eq(inode
->i_gid
, GLOBAL_ROOT_GID
)) {
88 if (XFS_IS_GQUOTA_ON(mp
))
89 oldg
= xfs_qm_vop_chown(tp
, dp
, &dp
->i_gdquot
, gdqp
);
90 inode
->i_gid
= GLOBAL_ROOT_GID
;
92 if (dp
->i_projid
!= 0) {
93 if (XFS_IS_PQUOTA_ON(mp
))
94 oldp
= xfs_qm_vop_chown(tp
, dp
, &dp
->i_pdquot
, pdqp
);
98 dp
->i_diflags
&= ~(XFS_DIFLAG_REALTIME
| XFS_DIFLAG_RTINHERIT
);
99 xfs_trans_log_inode(tp
, dp
, XFS_ILOG_CORE
);
101 XFS_STATS_INC(mp
, xs_ig_attrchg
);
103 if (xfs_has_wsync(mp
))
104 xfs_trans_set_sync(tp
);
105 error
= xfs_trans_commit(tp
);
118 #define ORPHANAGE "lost+found"
120 /* Create the orphanage directory, and set sc->orphanage to it. */
122 xrep_orphanage_create(
123 struct xfs_scrub
*sc
)
125 struct xfs_mount
*mp
= sc
->mp
;
126 struct dentry
*root_dentry
, *orphanage_dentry
;
127 struct inode
*root_inode
= VFS_I(sc
->mp
->m_rootip
);
128 struct inode
*orphanage_inode
;
131 if (xfs_is_shutdown(mp
))
133 if (xfs_is_readonly(mp
)) {
134 sc
->orphanage
= NULL
;
138 ASSERT(sc
->tp
== NULL
);
139 ASSERT(sc
->orphanage
== NULL
);
141 /* Find the dentry for the root directory... */
142 root_dentry
= d_find_alias(root_inode
);
144 error
= -EFSCORRUPTED
;
148 /* ...which is a directory, right? */
149 if (!d_is_dir(root_dentry
)) {
150 error
= -EFSCORRUPTED
;
154 /* Try to find the orphanage directory. */
155 inode_lock_nested(root_inode
, I_MUTEX_PARENT
);
156 orphanage_dentry
= lookup_one_len(ORPHANAGE
, root_dentry
,
158 if (IS_ERR(orphanage_dentry
)) {
159 error
= PTR_ERR(orphanage_dentry
);
160 goto out_unlock_root
;
164 * Nothing found? Call mkdir to create the orphanage. Create the
165 * directory without other-user access because we're live and someone
166 * could have been relying partly on minimal access to a parent
167 * directory to control access to a file we put in here.
169 if (d_really_is_negative(orphanage_dentry
)) {
170 error
= vfs_mkdir(&nop_mnt_idmap
, root_inode
, orphanage_dentry
,
173 goto out_dput_orphanage
;
176 /* Not a directory? Bail out. */
177 if (!d_is_dir(orphanage_dentry
)) {
179 goto out_dput_orphanage
;
183 * Grab a reference to the orphanage. This /should/ succeed since
184 * we hold the root directory locked and therefore nobody can delete
187 orphanage_inode
= igrab(d_inode(orphanage_dentry
));
188 if (!orphanage_inode
) {
190 goto out_dput_orphanage
;
193 /* Make sure the orphanage is owned by root. */
194 error
= xrep_chown_orphanage(sc
, XFS_I(orphanage_inode
));
196 goto out_dput_orphanage
;
198 /* Stash the reference for later and bail out. */
199 sc
->orphanage
= XFS_I(orphanage_inode
);
200 sc
->orphanage_ilock_flags
= 0;
203 dput(orphanage_dentry
);
205 inode_unlock(VFS_I(sc
->mp
->m_rootip
));
213 xrep_orphanage_ilock(
214 struct xfs_scrub
*sc
,
215 unsigned int ilock_flags
)
217 sc
->orphanage_ilock_flags
|= ilock_flags
;
218 xfs_ilock(sc
->orphanage
, ilock_flags
);
222 xrep_orphanage_ilock_nowait(
223 struct xfs_scrub
*sc
,
224 unsigned int ilock_flags
)
226 if (xfs_ilock_nowait(sc
->orphanage
, ilock_flags
)) {
227 sc
->orphanage_ilock_flags
|= ilock_flags
;
235 xrep_orphanage_iunlock(
236 struct xfs_scrub
*sc
,
237 unsigned int ilock_flags
)
239 xfs_iunlock(sc
->orphanage
, ilock_flags
);
240 sc
->orphanage_ilock_flags
&= ~ilock_flags
;
243 /* Grab the IOLOCK of the orphanage and sc->ip. */
245 xrep_orphanage_iolock_two(
246 struct xfs_scrub
*sc
)
251 if (xchk_should_terminate(sc
, &error
))
255 * Normal XFS takes the IOLOCK before grabbing a transaction.
256 * Scrub holds a transaction, which means that we can't block
259 if (xrep_orphanage_ilock_nowait(sc
, XFS_IOLOCK_EXCL
)) {
260 if (xchk_ilock_nowait(sc
, XFS_IOLOCK_EXCL
))
262 xrep_orphanage_iunlock(sc
, XFS_IOLOCK_EXCL
);
270 /* Release the orphanage. */
273 struct xfs_scrub
*sc
)
278 if (sc
->orphanage_ilock_flags
)
279 xfs_iunlock(sc
->orphanage
, sc
->orphanage_ilock_flags
);
281 xchk_irele(sc
, sc
->orphanage
);
282 sc
->orphanage
= NULL
;
285 /* Adoption moves a file into /lost+found */
287 /* Can the orphanage adopt @sc->ip? */
289 xrep_orphanage_can_adopt(
290 struct xfs_scrub
*sc
)
292 ASSERT(sc
->ip
!= NULL
);
296 if (sc
->ip
== sc
->orphanage
)
298 if (xchk_inode_is_sb_rooted(sc
->ip
))
300 if (xfs_is_internal_inode(sc
->ip
))
306 * Create a new transaction to send a child to the orphanage.
308 * Allocate a new transaction with sufficient disk space to handle the
309 * adoption, take ILOCK_EXCL of the orphanage and sc->ip, joins them to the
310 * transaction, and reserve quota to reparent the latter. Caller must hold the
311 * IOLOCK of the orphanage and sc->ip.
314 xrep_adoption_trans_alloc(
315 struct xfs_scrub
*sc
,
316 struct xrep_adoption
*adopt
)
318 struct xfs_mount
*mp
= sc
->mp
;
319 unsigned int child_blkres
= 0;
322 ASSERT(sc
->tp
== NULL
);
323 ASSERT(sc
->ip
!= NULL
);
324 ASSERT(sc
->orphanage
!= NULL
);
325 ASSERT(sc
->ilock_flags
& XFS_IOLOCK_EXCL
);
326 ASSERT(sc
->orphanage_ilock_flags
& XFS_IOLOCK_EXCL
);
327 ASSERT(!(sc
->ilock_flags
& (XFS_ILOCK_SHARED
| XFS_ILOCK_EXCL
)));
328 ASSERT(!(sc
->orphanage_ilock_flags
&
329 (XFS_ILOCK_SHARED
| XFS_ILOCK_EXCL
)));
331 /* Compute the worst case space reservation that we need. */
333 adopt
->orphanage_blkres
= xfs_link_space_res(mp
, MAXNAMELEN
);
334 if (S_ISDIR(VFS_I(sc
->ip
)->i_mode
))
335 child_blkres
= xfs_rename_space_res(mp
, 0, false,
336 xfs_name_dotdot
.len
, false);
337 if (xfs_has_parent(mp
))
338 child_blkres
+= XFS_ADDAFORK_SPACE_RES(mp
);
339 adopt
->child_blkres
= child_blkres
;
342 * Allocate a transaction to link the child into the parent, along with
343 * enough disk space to handle expansion of both the orphanage and the
344 * dotdot entry of a child directory.
346 error
= xfs_trans_alloc(mp
, &M_RES(mp
)->tr_link
,
347 adopt
->orphanage_blkres
+ adopt
->child_blkres
, 0, 0,
352 xfs_lock_two_inodes(sc
->orphanage
, XFS_ILOCK_EXCL
,
353 sc
->ip
, XFS_ILOCK_EXCL
);
354 sc
->ilock_flags
|= XFS_ILOCK_EXCL
;
355 sc
->orphanage_ilock_flags
|= XFS_ILOCK_EXCL
;
357 xfs_trans_ijoin(sc
->tp
, sc
->orphanage
, 0);
358 xfs_trans_ijoin(sc
->tp
, sc
->ip
, 0);
361 * Reserve enough quota in the orphan directory to add the new name.
362 * Normally the orphanage should have user/group/project ids of zero
363 * and hence is not subject to quota enforcement, but we're allowed to
364 * exceed quota to reattach disconnected parts of the directory tree.
366 error
= xfs_trans_reserve_quota_nblks(sc
->tp
, sc
->orphanage
,
367 adopt
->orphanage_blkres
, 0, true);
372 * Reserve enough quota in the child directory to change dotdot.
373 * Here we're also allowed to exceed file quota to repair inconsistent
376 if (adopt
->child_blkres
) {
377 error
= xfs_trans_reserve_quota_nblks(sc
->tp
, sc
->ip
,
378 adopt
->child_blkres
, 0, true);
385 xchk_trans_cancel(sc
);
386 xrep_orphanage_iunlock(sc
, XFS_ILOCK_EXCL
);
387 xchk_iunlock(sc
, XFS_ILOCK_EXCL
);
392 * Compute the xfs_name for the directory entry that we're adding to the
393 * orphanage. Caller must hold ILOCKs of sc->ip and the orphanage and must not
394 * reuse namebuf until the adoption completes or is dissolved.
397 xrep_adoption_compute_name(
398 struct xrep_adoption
*adopt
,
399 struct xfs_name
*xname
)
401 struct xfs_scrub
*sc
= adopt
->sc
;
402 char *namebuf
= (void *)xname
->name
;
404 unsigned int incr
= 0;
407 adopt
->xname
= xname
;
408 xname
->len
= snprintf(namebuf
, MAXNAMELEN
, "%llu", sc
->ip
->i_ino
);
409 xname
->type
= xfs_mode_to_ftype(VFS_I(sc
->ip
)->i_mode
);
411 /* Make sure the filename is unique in the lost+found. */
412 error
= xchk_dir_lookup(sc
, sc
->orphanage
, xname
, &ino
);
413 while (error
== 0 && incr
< 10000) {
414 xname
->len
= snprintf(namebuf
, MAXNAMELEN
, "%llu.%u",
415 sc
->ip
->i_ino
, ++incr
);
416 error
= xchk_dir_lookup(sc
, sc
->orphanage
, xname
, &ino
);
419 /* We already have 10,000 entries in the orphanage? */
420 return -EFSCORRUPTED
;
423 if (error
!= -ENOENT
)
429 * Make sure the dcache does not have a positive dentry for the name we've
430 * chosen. The caller should have checked with the ondisk directory, so any
431 * discrepancy is a sign that something is seriously wrong.
434 xrep_adoption_check_dcache(
435 struct xrep_adoption
*adopt
)
437 struct qstr qname
= QSTR_INIT(adopt
->xname
->name
,
439 struct xfs_scrub
*sc
= adopt
->sc
;
440 struct dentry
*d_orphanage
, *d_child
;
443 d_orphanage
= d_find_alias(VFS_I(sc
->orphanage
));
447 d_child
= d_hash_and_lookup(d_orphanage
, &qname
);
449 trace_xrep_adoption_check_child(sc
->mp
, d_child
);
451 if (d_is_positive(d_child
)) {
452 ASSERT(d_is_negative(d_child
));
453 error
= -EFSCORRUPTED
;
464 * Invalidate all dentries for the name that was added to the orphanage
465 * directory, and all dentries pointing to the child inode that was moved.
467 * There should not be any positive entries for the name, since we've
468 * maintained our lock on the orphanage directory.
471 xrep_adoption_zap_dcache(
472 struct xrep_adoption
*adopt
)
474 struct qstr qname
= QSTR_INIT(adopt
->xname
->name
,
476 struct xfs_scrub
*sc
= adopt
->sc
;
477 struct dentry
*d_orphanage
, *d_child
;
479 /* Invalidate all dentries for the adoption name */
480 d_orphanage
= d_find_alias(VFS_I(sc
->orphanage
));
484 d_child
= d_hash_and_lookup(d_orphanage
, &qname
);
485 while (d_child
!= NULL
) {
486 trace_xrep_adoption_invalidate_child(sc
->mp
, d_child
);
488 ASSERT(d_is_negative(d_child
));
489 d_invalidate(d_child
);
491 d_child
= d_lookup(d_orphanage
, &qname
);
496 /* Invalidate all the dentries pointing down to this file. */
497 while ((d_child
= d_find_alias(VFS_I(sc
->ip
))) != NULL
) {
498 trace_xrep_adoption_invalidate_child(sc
->mp
, d_child
);
500 d_invalidate(d_child
);
506 * If we have to add an attr fork ahead of a parent pointer update, how much
507 * space should we ask for?
510 xrep_adoption_attr_sizeof(
511 const struct xrep_adoption
*adopt
)
513 return sizeof(struct xfs_attr_sf_hdr
) +
514 xfs_attr_sf_entsize_byname(sizeof(struct xfs_parent_rec
),
519 * Move the current file to the orphanage under the computed name.
521 * Returns with a dirty transaction so that the caller can handle any other
522 * work, such as fixing up unlinked lists or resetting link counts.
526 struct xrep_adoption
*adopt
)
528 struct xfs_scrub
*sc
= adopt
->sc
;
529 bool isdir
= S_ISDIR(VFS_I(sc
->ip
)->i_mode
);
532 trace_xrep_adoption_reparent(sc
->orphanage
, adopt
->xname
,
535 error
= xrep_adoption_check_dcache(adopt
);
540 * If this filesystem has parent pointers, ensure that the file being
541 * moved to the orphanage has an attribute fork. This is required
542 * because the parent pointer code does not itself add attr forks.
544 if (!xfs_inode_has_attr_fork(sc
->ip
) && xfs_has_parent(sc
->mp
)) {
545 int sf_size
= xrep_adoption_attr_sizeof(adopt
);
547 error
= xfs_bmap_add_attrfork(sc
->tp
, sc
->ip
, sf_size
, true);
552 /* Create the new name in the orphanage. */
553 error
= xfs_dir_createname(sc
->tp
, sc
->orphanage
, adopt
->xname
,
554 sc
->ip
->i_ino
, adopt
->orphanage_blkres
);
559 * Bump the link count of the orphanage if we just added a
560 * subdirectory, and update its timestamps.
562 xfs_trans_ichgtime(sc
->tp
, sc
->orphanage
,
563 XFS_ICHGTIME_MOD
| XFS_ICHGTIME_CHG
);
565 xfs_bumplink(sc
->tp
, sc
->orphanage
);
566 xfs_trans_log_inode(sc
->tp
, sc
->orphanage
, XFS_ILOG_CORE
);
568 /* Bump the link count of the child. */
569 if (adopt
->bump_child_nlink
) {
570 xfs_bumplink(sc
->tp
, sc
->ip
);
571 xfs_trans_log_inode(sc
->tp
, sc
->ip
, XFS_ILOG_CORE
);
574 /* Replace the dotdot entry if the child is a subdirectory. */
576 error
= xfs_dir_replace(sc
->tp
, sc
->ip
, &xfs_name_dotdot
,
577 sc
->orphanage
->i_ino
, adopt
->child_blkres
);
582 /* Add a parent pointer from the file back to the lost+found. */
583 if (xfs_has_parent(sc
->mp
)) {
584 error
= xfs_parent_addname(sc
->tp
, &adopt
->ppargs
,
585 sc
->orphanage
, adopt
->xname
, sc
->ip
);
591 * Notify dirent hooks that we moved the file to /lost+found, and
592 * finish all the deferred work so that we know the adoption is fully
593 * recorded in the log.
595 xfs_dir_update_hook(sc
->orphanage
, sc
->ip
, 1, adopt
->xname
);
597 /* Remove negative dentries from the lost+found's dcache */
598 xrep_adoption_zap_dcache(adopt
);
603 * Roll to a clean scrub transaction so that we can release the orphanage,
604 * even if xrep_adoption_move was not called.
606 * Commits all the work and deferred ops attached to an adoption request and
607 * rolls to a clean scrub transaction. On success, returns 0 with the scrub
608 * context holding a clean transaction with no inodes joined. On failure,
609 * returns negative errno with no scrub transaction. All inode locks are
610 * still held after this function returns.
613 xrep_adoption_trans_roll(
614 struct xrep_adoption
*adopt
)
616 struct xfs_scrub
*sc
= adopt
->sc
;
619 trace_xrep_adoption_trans_roll(sc
->orphanage
, sc
->ip
,
620 !!(sc
->tp
->t_flags
& XFS_TRANS_DIRTY
));
622 /* Finish all the deferred ops to commit all repairs. */
623 error
= xrep_defer_finish(sc
);
627 /* Roll the transaction once more to detach the inodes. */
628 return xfs_trans_roll(&sc
->tp
);