1 // SPDX-License-Identifier: GPL-2.0
5 #include "btree_cache.h"
6 #include "btree_update.h"
12 #include "fs-common.h"
16 #include "recovery_passes.h"
21 #include <linux/bsearch.h>
22 #include <linux/dcache.h> /* struct qstr */
24 static bool inode_points_to_dirent(struct bch_inode_unpacked
*inode
,
25 struct bkey_s_c_dirent d
)
27 return inode
->bi_dir
== d
.k
->p
.inode
&&
28 inode
->bi_dir_offset
== d
.k
->p
.offset
;
31 static int dirent_points_to_inode_nowarn(struct bkey_s_c_dirent d
,
32 struct bch_inode_unpacked
*inode
)
34 if (d
.v
->d_type
== DT_SUBVOL
35 ? le32_to_cpu(d
.v
->d_child_subvol
) == inode
->bi_subvol
36 : le64_to_cpu(d
.v
->d_inum
) == inode
->bi_inum
)
38 return -BCH_ERR_ENOENT_dirent_doesnt_match_inode
;
41 static void dirent_inode_mismatch_msg(struct printbuf
*out
,
43 struct bkey_s_c_dirent dirent
,
44 struct bch_inode_unpacked
*inode
)
46 prt_str(out
, "inode points to dirent that does not point back:");
48 bch2_bkey_val_to_text(out
, c
, dirent
.s_c
);
50 bch2_inode_unpacked_to_text(out
, inode
);
53 static int dirent_points_to_inode(struct bch_fs
*c
,
54 struct bkey_s_c_dirent dirent
,
55 struct bch_inode_unpacked
*inode
)
57 int ret
= dirent_points_to_inode_nowarn(dirent
, inode
);
59 struct printbuf buf
= PRINTBUF
;
60 dirent_inode_mismatch_msg(&buf
, c
, dirent
, inode
);
61 bch_warn(c
, "%s", buf
.buf
);
68 * XXX: this is handling transaction restarts without returning
69 * -BCH_ERR_transaction_restart_nested, this is not how we do things anymore:
71 static s64
bch2_count_inode_sectors(struct btree_trans
*trans
, u64 inum
,
76 int ret
= for_each_btree_key_upto(trans
, iter
, BTREE_ID_extents
,
77 SPOS(inum
, 0, snapshot
),
80 if (bkey_extent_is_allocation(k
.k
))
85 return ret
?: sectors
;
88 static s64
bch2_count_subdirs(struct btree_trans
*trans
, u64 inum
,
93 int ret
= for_each_btree_key_upto(trans
, iter
, BTREE_ID_dirents
,
94 SPOS(inum
, 0, snapshot
),
97 if (k
.k
->type
== KEY_TYPE_dirent
&&
98 bkey_s_c_to_dirent(k
).v
->d_type
== DT_DIR
)
103 return ret
?: subdirs
;
106 static int subvol_lookup(struct btree_trans
*trans
, u32 subvol
,
107 u32
*snapshot
, u64
*inum
)
109 struct bch_subvolume s
;
110 int ret
= bch2_subvolume_get(trans
, subvol
, false, 0, &s
);
112 *snapshot
= le32_to_cpu(s
.snapshot
);
113 *inum
= le64_to_cpu(s
.inode
);
117 static int lookup_first_inode(struct btree_trans
*trans
, u64 inode_nr
,
118 struct bch_inode_unpacked
*inode
)
120 struct btree_iter iter
;
124 for_each_btree_key_norestart(trans
, iter
, BTREE_ID_inodes
, POS(0, inode_nr
),
125 BTREE_ITER_all_snapshots
, k
, ret
) {
126 if (k
.k
->p
.offset
!= inode_nr
)
128 if (!bkey_is_inode(k
.k
))
130 ret
= bch2_inode_unpack(k
, inode
);
133 ret
= -BCH_ERR_ENOENT_inode
;
135 bch_err_msg(trans
->c
, ret
, "fetching inode %llu", inode_nr
);
136 bch2_trans_iter_exit(trans
, &iter
);
140 static int lookup_inode(struct btree_trans
*trans
, u64 inode_nr
, u32 snapshot
,
141 struct bch_inode_unpacked
*inode
)
143 struct btree_iter iter
;
147 k
= bch2_bkey_get_iter(trans
, &iter
, BTREE_ID_inodes
,
148 SPOS(0, inode_nr
, snapshot
), 0);
153 ret
= bkey_is_inode(k
.k
)
154 ? bch2_inode_unpack(k
, inode
)
155 : -BCH_ERR_ENOENT_inode
;
157 bch2_trans_iter_exit(trans
, &iter
);
161 static int lookup_dirent_in_snapshot(struct btree_trans
*trans
,
162 struct bch_hash_info hash_info
,
163 subvol_inum dir
, struct qstr
*name
,
164 u64
*target
, unsigned *type
, u32 snapshot
)
166 struct btree_iter iter
;
167 struct bkey_s_c k
= bch2_hash_lookup_in_snapshot(trans
, &iter
, bch2_dirent_hash_desc
,
168 &hash_info
, dir
, name
, 0, snapshot
);
169 int ret
= bkey_err(k
);
173 struct bkey_s_c_dirent d
= bkey_s_c_to_dirent(bch2_btree_iter_peek_slot(&iter
));
174 *target
= le64_to_cpu(d
.v
->d_inum
);
176 bch2_trans_iter_exit(trans
, &iter
);
180 static int __remove_dirent(struct btree_trans
*trans
, struct bpos pos
)
182 struct bch_fs
*c
= trans
->c
;
183 struct btree_iter iter
;
184 struct bch_inode_unpacked dir_inode
;
185 struct bch_hash_info dir_hash_info
;
188 ret
= lookup_first_inode(trans
, pos
.inode
, &dir_inode
);
192 dir_hash_info
= bch2_hash_info_init(c
, &dir_inode
);
194 bch2_trans_iter_init(trans
, &iter
, BTREE_ID_dirents
, pos
, BTREE_ITER_intent
);
196 ret
= bch2_btree_iter_traverse(&iter
) ?:
197 bch2_hash_delete_at(trans
, bch2_dirent_hash_desc
,
198 &dir_hash_info
, &iter
,
199 BTREE_UPDATE_internal_snapshot_node
);
200 bch2_trans_iter_exit(trans
, &iter
);
206 /* Get lost+found, create if it doesn't exist: */
207 static int lookup_lostfound(struct btree_trans
*trans
, u32 snapshot
,
208 struct bch_inode_unpacked
*lostfound
,
209 u64 reattaching_inum
)
211 struct bch_fs
*c
= trans
->c
;
212 struct qstr lostfound_str
= QSTR("lost+found");
217 struct bch_snapshot_tree st
;
218 ret
= bch2_snapshot_tree_lookup(trans
,
219 bch2_snapshot_tree(c
, snapshot
), &st
);
223 subvol_inum root_inum
= { .subvol
= le32_to_cpu(st
.master_subvol
) };
225 struct bch_subvolume subvol
;
226 ret
= bch2_subvolume_get(trans
, le32_to_cpu(st
.master_subvol
),
228 bch_err_msg(c
, ret
, "looking up root subvol %u for snapshot %u",
229 le32_to_cpu(st
.master_subvol
), snapshot
);
234 struct btree_iter iter
;
235 struct bkey_i_subvolume
*subvol
= bch2_bkey_get_mut_typed(trans
, &iter
,
236 BTREE_ID_subvolumes
, POS(0, le32_to_cpu(st
.master_subvol
)),
238 ret
= PTR_ERR_OR_ZERO(subvol
);
242 subvol
->v
.inode
= cpu_to_le64(reattaching_inum
);
243 bch2_trans_iter_exit(trans
, &iter
);
246 root_inum
.inum
= le64_to_cpu(subvol
.inode
);
248 struct bch_inode_unpacked root_inode
;
249 struct bch_hash_info root_hash_info
;
250 ret
= lookup_inode(trans
, root_inum
.inum
, snapshot
, &root_inode
);
251 bch_err_msg(c
, ret
, "looking up root inode %llu for subvol %u",
252 root_inum
.inum
, le32_to_cpu(st
.master_subvol
));
256 root_hash_info
= bch2_hash_info_init(c
, &root_inode
);
258 ret
= lookup_dirent_in_snapshot(trans
, root_hash_info
, root_inum
,
259 &lostfound_str
, &inum
, &d_type
, snapshot
);
260 if (bch2_err_matches(ret
, ENOENT
))
261 goto create_lostfound
;
267 if (d_type
!= DT_DIR
) {
268 bch_err(c
, "error looking up lost+found: not a directory");
269 return -BCH_ERR_ENOENT_not_directory
;
273 * The bch2_check_dirents pass has already run, dangling dirents
274 * shouldn't exist here:
276 ret
= lookup_inode(trans
, inum
, snapshot
, lostfound
);
277 bch_err_msg(c
, ret
, "looking up lost+found %llu:%u in (root inode %llu, snapshot root %u)",
278 inum
, snapshot
, root_inum
.inum
, bch2_snapshot_root(c
, snapshot
));
283 * we always create lost+found in the root snapshot; we don't want
284 * different branches of the snapshot tree to have different lost+found
286 snapshot
= le32_to_cpu(st
.root_snapshot
);
288 * XXX: we could have a nicer log message here if we had a nice way to
289 * walk backpointers to print a path
291 bch_notice(c
, "creating lost+found in subvol %llu snapshot %u",
292 root_inum
.subvol
, le32_to_cpu(st
.root_snapshot
));
294 u64 now
= bch2_current_time(c
);
295 struct btree_iter lostfound_iter
= { NULL
};
296 u64 cpu
= raw_smp_processor_id();
298 bch2_inode_init_early(c
, lostfound
);
299 bch2_inode_init_late(lostfound
, now
, 0, 0, S_IFDIR
|0700, 0, &root_inode
);
300 lostfound
->bi_dir
= root_inode
.bi_inum
;
301 lostfound
->bi_snapshot
= le32_to_cpu(st
.root_snapshot
);
303 root_inode
.bi_nlink
++;
305 ret
= bch2_inode_create(trans
, &lostfound_iter
, lostfound
, snapshot
, cpu
);
309 bch2_btree_iter_set_snapshot(&lostfound_iter
, snapshot
);
310 ret
= bch2_btree_iter_traverse(&lostfound_iter
);
314 ret
= bch2_dirent_create_snapshot(trans
,
315 0, root_inode
.bi_inum
, snapshot
, &root_hash_info
,
316 mode_to_type(lostfound
->bi_mode
),
319 &lostfound
->bi_dir_offset
,
320 STR_HASH_must_create
) ?:
321 bch2_inode_write_flags(trans
, &lostfound_iter
, lostfound
,
322 BTREE_UPDATE_internal_snapshot_node
);
324 bch_err_msg(c
, ret
, "creating lost+found");
325 bch2_trans_iter_exit(trans
, &lostfound_iter
);
329 static inline bool inode_should_reattach(struct bch_inode_unpacked
*inode
)
331 if (inode
->bi_inum
== BCACHEFS_ROOT_INO
&&
332 inode
->bi_subvol
== BCACHEFS_ROOT_SUBVOL
)
335 return !inode
->bi_dir
&& !(inode
->bi_flags
& BCH_INODE_unlinked
);
338 static int maybe_delete_dirent(struct btree_trans
*trans
, struct bpos d_pos
, u32 snapshot
)
340 struct btree_iter iter
;
341 struct bkey_s_c k
= bch2_bkey_get_iter(trans
, &iter
, BTREE_ID_dirents
,
342 SPOS(d_pos
.inode
, d_pos
.offset
, snapshot
),
344 BTREE_ITER_with_updates
);
345 int ret
= bkey_err(k
);
349 if (bpos_eq(k
.k
->p
, d_pos
)) {
351 * delet_at() doesn't work because the update path doesn't
352 * internally use BTREE_ITER_with_updates yet
354 struct bkey_i
*k
= bch2_trans_kmalloc(trans
, sizeof(*k
));
355 ret
= PTR_ERR_OR_ZERO(k
);
360 k
->k
.type
= KEY_TYPE_whiteout
;
362 ret
= bch2_trans_update(trans
, &iter
, k
, BTREE_UPDATE_internal_snapshot_node
);
365 bch2_trans_iter_exit(trans
, &iter
);
369 static int reattach_inode(struct btree_trans
*trans
, struct bch_inode_unpacked
*inode
)
371 struct bch_fs
*c
= trans
->c
;
372 struct bch_inode_unpacked lostfound
;
376 u32 dirent_snapshot
= inode
->bi_snapshot
;
377 if (inode
->bi_subvol
) {
378 inode
->bi_parent_subvol
= BCACHEFS_ROOT_SUBVOL
;
381 ret
= subvol_lookup(trans
, inode
->bi_parent_subvol
,
382 &dirent_snapshot
, &root_inum
);
386 snprintf(name_buf
, sizeof(name_buf
), "subvol-%u", inode
->bi_subvol
);
388 snprintf(name_buf
, sizeof(name_buf
), "%llu", inode
->bi_inum
);
391 ret
= lookup_lostfound(trans
, dirent_snapshot
, &lostfound
, inode
->bi_inum
);
395 lostfound
.bi_nlink
+= S_ISDIR(inode
->bi_mode
);
397 /* ensure lost+found inode is also present in inode snapshot */
398 if (!inode
->bi_subvol
) {
399 BUG_ON(!bch2_snapshot_is_ancestor(c
, inode
->bi_snapshot
, lostfound
.bi_snapshot
));
400 lostfound
.bi_snapshot
= inode
->bi_snapshot
;
403 ret
= __bch2_fsck_write_inode(trans
, &lostfound
);
407 struct bch_hash_info dir_hash
= bch2_hash_info_init(c
, &lostfound
);
408 struct qstr name
= (struct qstr
) QSTR(name_buf
);
410 inode
->bi_dir
= lostfound
.bi_inum
;
412 ret
= bch2_dirent_create_snapshot(trans
,
413 inode
->bi_parent_subvol
, lostfound
.bi_inum
,
418 inode
->bi_subvol
?: inode
->bi_inum
,
419 &inode
->bi_dir_offset
,
420 STR_HASH_must_create
);
422 bch_err_msg(c
, ret
, "error creating dirent");
426 ret
= __bch2_fsck_write_inode(trans
, inode
);
431 * Fix up inodes in child snapshots: if they should also be reattached
432 * update the backpointer field, if they should not be we need to emit
433 * whiteouts for the dirent we just created.
435 if (!inode
->bi_subvol
&& bch2_snapshot_is_leaf(c
, inode
->bi_snapshot
) <= 0) {
436 snapshot_id_list whiteouts_done
;
437 struct btree_iter iter
;
440 darray_init(&whiteouts_done
);
442 for_each_btree_key_reverse_norestart(trans
, iter
,
443 BTREE_ID_inodes
, SPOS(0, inode
->bi_inum
, inode
->bi_snapshot
- 1),
444 BTREE_ITER_all_snapshots
|BTREE_ITER_intent
, k
, ret
) {
445 if (k
.k
->p
.offset
!= inode
->bi_inum
)
448 if (!bkey_is_inode(k
.k
) ||
449 !bch2_snapshot_is_ancestor(c
, k
.k
->p
.snapshot
, inode
->bi_snapshot
) ||
450 snapshot_list_has_ancestor(c
, &whiteouts_done
, k
.k
->p
.snapshot
))
453 struct bch_inode_unpacked child_inode
;
454 bch2_inode_unpack(k
, &child_inode
);
456 if (!inode_should_reattach(&child_inode
)) {
457 ret
= maybe_delete_dirent(trans
,
458 SPOS(lostfound
.bi_inum
, inode
->bi_dir_offset
,
464 ret
= snapshot_list_add(c
, &whiteouts_done
, k
.k
->p
.snapshot
);
468 iter
.snapshot
= k
.k
->p
.snapshot
;
469 child_inode
.bi_dir
= inode
->bi_dir
;
470 child_inode
.bi_dir_offset
= inode
->bi_dir_offset
;
472 ret
= bch2_inode_write_flags(trans
, &iter
, &child_inode
,
473 BTREE_UPDATE_internal_snapshot_node
);
478 darray_exit(&whiteouts_done
);
479 bch2_trans_iter_exit(trans
, &iter
);
485 static int remove_backpointer(struct btree_trans
*trans
,
486 struct bch_inode_unpacked
*inode
)
491 struct bch_fs
*c
= trans
->c
;
492 struct btree_iter iter
;
493 struct bkey_s_c_dirent d
=
494 bch2_bkey_get_iter_typed(trans
, &iter
, BTREE_ID_dirents
,
495 SPOS(inode
->bi_dir
, inode
->bi_dir_offset
, inode
->bi_snapshot
), 0,
497 int ret
= bkey_err(d
) ?:
498 dirent_points_to_inode(c
, d
, inode
) ?:
499 __remove_dirent(trans
, d
.k
->p
);
500 bch2_trans_iter_exit(trans
, &iter
);
504 static int reattach_subvol(struct btree_trans
*trans
, struct bkey_s_c_subvolume s
)
506 struct bch_fs
*c
= trans
->c
;
508 struct bch_inode_unpacked inode
;
509 int ret
= bch2_inode_find_by_inum_trans(trans
,
510 (subvol_inum
) { s
.k
->p
.offset
, le64_to_cpu(s
.v
->inode
) },
515 ret
= remove_backpointer(trans
, &inode
);
516 if (!bch2_err_matches(ret
, ENOENT
))
517 bch_err_msg(c
, ret
, "removing dirent");
521 ret
= reattach_inode(trans
, &inode
);
522 bch_err_msg(c
, ret
, "reattaching inode %llu", inode
.bi_inum
);
526 static int reconstruct_subvol(struct btree_trans
*trans
, u32 snapshotid
, u32 subvolid
, u64 inum
)
528 struct bch_fs
*c
= trans
->c
;
530 if (!bch2_snapshot_is_leaf(c
, snapshotid
)) {
531 bch_err(c
, "need to reconstruct subvol, but have interior node snapshot");
532 return -BCH_ERR_fsck_repair_unimplemented
;
536 * If inum isn't set, that means we're being called from check_dirents,
537 * not check_inodes - the root of this subvolume doesn't exist or we
538 * would have found it there:
541 struct btree_iter inode_iter
= {};
542 struct bch_inode_unpacked new_inode
;
543 u64 cpu
= raw_smp_processor_id();
545 bch2_inode_init_early(c
, &new_inode
);
546 bch2_inode_init_late(&new_inode
, bch2_current_time(c
), 0, 0, S_IFDIR
|0755, 0, NULL
);
548 new_inode
.bi_subvol
= subvolid
;
550 int ret
= bch2_inode_create(trans
, &inode_iter
, &new_inode
, snapshotid
, cpu
) ?:
551 bch2_btree_iter_traverse(&inode_iter
) ?:
552 bch2_inode_write(trans
, &inode_iter
, &new_inode
);
553 bch2_trans_iter_exit(trans
, &inode_iter
);
557 inum
= new_inode
.bi_inum
;
560 bch_info(c
, "reconstructing subvol %u with root inode %llu", subvolid
, inum
);
562 struct bkey_i_subvolume
*new_subvol
= bch2_trans_kmalloc(trans
, sizeof(*new_subvol
));
563 int ret
= PTR_ERR_OR_ZERO(new_subvol
);
567 bkey_subvolume_init(&new_subvol
->k_i
);
568 new_subvol
->k
.p
.offset
= subvolid
;
569 new_subvol
->v
.snapshot
= cpu_to_le32(snapshotid
);
570 new_subvol
->v
.inode
= cpu_to_le64(inum
);
571 ret
= bch2_btree_insert_trans(trans
, BTREE_ID_subvolumes
, &new_subvol
->k_i
, 0);
575 struct btree_iter iter
;
576 struct bkey_i_snapshot
*s
= bch2_bkey_get_mut_typed(trans
, &iter
,
577 BTREE_ID_snapshots
, POS(0, snapshotid
),
579 ret
= PTR_ERR_OR_ZERO(s
);
580 bch_err_msg(c
, ret
, "getting snapshot %u", snapshotid
);
584 u32 snapshot_tree
= le32_to_cpu(s
->v
.tree
);
586 s
->v
.subvol
= cpu_to_le32(subvolid
);
587 SET_BCH_SNAPSHOT_SUBVOL(&s
->v
, true);
588 bch2_trans_iter_exit(trans
, &iter
);
590 struct bkey_i_snapshot_tree
*st
= bch2_bkey_get_mut_typed(trans
, &iter
,
591 BTREE_ID_snapshot_trees
, POS(0, snapshot_tree
),
593 ret
= PTR_ERR_OR_ZERO(st
);
594 bch_err_msg(c
, ret
, "getting snapshot tree %u", snapshot_tree
);
598 if (!st
->v
.master_subvol
)
599 st
->v
.master_subvol
= cpu_to_le32(subvolid
);
601 bch2_trans_iter_exit(trans
, &iter
);
605 static int reconstruct_inode(struct btree_trans
*trans
, enum btree_id btree
, u32 snapshot
, u64 inum
)
607 struct bch_fs
*c
= trans
->c
;
608 unsigned i_mode
= S_IFREG
;
612 case BTREE_ID_extents
: {
613 struct btree_iter iter
= {};
615 bch2_trans_iter_init(trans
, &iter
, BTREE_ID_extents
, SPOS(inum
, U64_MAX
, snapshot
), 0);
616 struct bkey_s_c k
= bch2_btree_iter_peek_prev(&iter
);
617 bch2_trans_iter_exit(trans
, &iter
);
618 int ret
= bkey_err(k
);
622 i_size
= k
.k
->p
.offset
<< 9;
625 case BTREE_ID_dirents
:
628 case BTREE_ID_xattrs
:
634 struct bch_inode_unpacked new_inode
;
635 bch2_inode_init_early(c
, &new_inode
);
636 bch2_inode_init_late(&new_inode
, bch2_current_time(c
), 0, 0, i_mode
|0600, 0, NULL
);
637 new_inode
.bi_size
= i_size
;
638 new_inode
.bi_inum
= inum
;
639 new_inode
.bi_snapshot
= snapshot
;
641 return __bch2_fsck_write_inode(trans
, &new_inode
);
644 struct snapshots_seen
{
646 snapshot_id_list ids
;
649 static inline void snapshots_seen_exit(struct snapshots_seen
*s
)
651 darray_exit(&s
->ids
);
654 static inline void snapshots_seen_init(struct snapshots_seen
*s
)
656 memset(s
, 0, sizeof(*s
));
659 static int snapshots_seen_add_inorder(struct bch_fs
*c
, struct snapshots_seen
*s
, u32 id
)
662 __darray_for_each(s
->ids
, i
) {
669 int ret
= darray_insert_item(&s
->ids
, i
- s
->ids
.data
, id
);
671 bch_err(c
, "error reallocating snapshots_seen table (size %zu)",
676 static int snapshots_seen_update(struct bch_fs
*c
, struct snapshots_seen
*s
,
677 enum btree_id btree_id
, struct bpos pos
)
679 if (!bkey_eq(s
->pos
, pos
))
683 return snapshot_list_add_nodup(c
, &s
->ids
, pos
.snapshot
);
687 * key_visible_in_snapshot - returns true if @id is a descendent of @ancestor,
688 * and @ancestor hasn't been overwritten in @seen
690 * @c: filesystem handle
691 * @seen: list of snapshot ids already seen at current position
692 * @id: descendent snapshot id
693 * @ancestor: ancestor snapshot id
695 * Returns: whether key in @ancestor snapshot is visible in @id snapshot
697 static bool key_visible_in_snapshot(struct bch_fs
*c
, struct snapshots_seen
*seen
,
698 u32 id
, u32 ancestor
)
702 EBUG_ON(id
> ancestor
);
704 /* @ancestor should be the snapshot most recently added to @seen */
705 EBUG_ON(ancestor
!= seen
->pos
.snapshot
);
706 EBUG_ON(ancestor
!= darray_last(seen
->ids
));
711 if (!bch2_snapshot_is_ancestor(c
, id
, ancestor
))
715 * We know that @id is a descendant of @ancestor, we're checking if
716 * we've seen a key that overwrote @ancestor - i.e. also a descendent of
717 * @ascestor and with @id as a descendent.
719 * But we already know that we're scanning IDs between @id and @ancestor
720 * numerically, since snapshot ID lists are kept sorted, so if we find
721 * an id that's an ancestor of @id we're done:
724 for (i
= seen
->ids
.nr
- 2;
725 i
>= 0 && seen
->ids
.data
[i
] >= id
;
727 if (bch2_snapshot_is_ancestor(c
, id
, seen
->ids
.data
[i
]))
734 * ref_visible - given a key with snapshot id @src that points to a key with
735 * snapshot id @dst, test whether there is some snapshot in which @dst is
738 * @c: filesystem handle
739 * @s: list of snapshot IDs already seen at @src
740 * @src: snapshot ID of src key
741 * @dst: snapshot ID of dst key
742 * Returns: true if there is some snapshot in which @dst is visible
744 * Assumes we're visiting @src keys in natural key order
746 static bool ref_visible(struct bch_fs
*c
, struct snapshots_seen
*s
,
750 ? key_visible_in_snapshot(c
, s
, dst
, src
)
751 : bch2_snapshot_is_ancestor(c
, src
, dst
);
754 static int ref_visible2(struct bch_fs
*c
,
755 u32 src
, struct snapshots_seen
*src_seen
,
756 u32 dst
, struct snapshots_seen
*dst_seen
)
760 swap(dst_seen
, src_seen
);
762 return key_visible_in_snapshot(c
, src_seen
, dst
, src
);
765 #define for_each_visible_inode(_c, _s, _w, _snapshot, _i) \
766 for (_i = (_w)->inodes.data; _i < (_w)->inodes.data + (_w)->inodes.nr && \
767 (_i)->snapshot <= (_snapshot); _i++) \
768 if (key_visible_in_snapshot(_c, _s, _i->snapshot, _snapshot))
770 struct inode_walker_entry
{
771 struct bch_inode_unpacked inode
;
776 struct inode_walker
{
777 bool first_this_inode
;
779 bool recalculate_sums
;
780 struct bpos last_pos
;
782 DARRAY(struct inode_walker_entry
) inodes
;
785 static void inode_walker_exit(struct inode_walker
*w
)
787 darray_exit(&w
->inodes
);
790 static struct inode_walker
inode_walker_init(void)
792 return (struct inode_walker
) { 0, };
795 static int add_inode(struct bch_fs
*c
, struct inode_walker
*w
,
796 struct bkey_s_c inode
)
798 struct bch_inode_unpacked u
;
800 BUG_ON(bch2_inode_unpack(inode
, &u
));
802 return darray_push(&w
->inodes
, ((struct inode_walker_entry
) {
804 .snapshot
= inode
.k
->p
.snapshot
,
808 static int get_inodes_all_snapshots(struct btree_trans
*trans
,
809 struct inode_walker
*w
, u64 inum
)
811 struct bch_fs
*c
= trans
->c
;
812 struct btree_iter iter
;
817 * We no longer have inodes for w->last_pos; clear this to avoid
818 * screwing up check_i_sectors/check_subdir_count if we take a
819 * transaction restart here:
821 w
->have_inodes
= false;
822 w
->recalculate_sums
= false;
825 for_each_btree_key_norestart(trans
, iter
, BTREE_ID_inodes
, POS(0, inum
),
826 BTREE_ITER_all_snapshots
, k
, ret
) {
827 if (k
.k
->p
.offset
!= inum
)
830 if (bkey_is_inode(k
.k
))
833 bch2_trans_iter_exit(trans
, &iter
);
838 w
->first_this_inode
= true;
839 w
->have_inodes
= true;
843 static struct inode_walker_entry
*
844 lookup_inode_for_snapshot(struct bch_fs
*c
, struct inode_walker
*w
, struct bkey_s_c k
)
846 bool is_whiteout
= k
.k
->type
== KEY_TYPE_whiteout
;
848 struct inode_walker_entry
*i
;
849 __darray_for_each(w
->inodes
, i
)
850 if (bch2_snapshot_is_ancestor(c
, k
.k
->p
.snapshot
, i
->snapshot
))
855 BUG_ON(k
.k
->p
.snapshot
> i
->snapshot
);
857 if (k
.k
->p
.snapshot
!= i
->snapshot
&& !is_whiteout
) {
858 struct inode_walker_entry
new = *i
;
860 new.snapshot
= k
.k
->p
.snapshot
;
863 struct printbuf buf
= PRINTBUF
;
864 bch2_bkey_val_to_text(&buf
, c
, k
);
866 bch_info(c
, "have key for inode %llu:%u but have inode in ancestor snapshot %u\n"
867 "unexpected because we should always update the inode when we update a key in that inode\n"
869 w
->last_pos
.inode
, k
.k
->p
.snapshot
, i
->snapshot
, buf
.buf
);
872 while (i
> w
->inodes
.data
&& i
[-1].snapshot
> k
.k
->p
.snapshot
)
875 size_t pos
= i
- w
->inodes
.data
;
876 int ret
= darray_insert_item(&w
->inodes
, pos
, new);
880 i
= w
->inodes
.data
+ pos
;
886 static struct inode_walker_entry
*walk_inode(struct btree_trans
*trans
,
887 struct inode_walker
*w
,
890 if (w
->last_pos
.inode
!= k
.k
->p
.inode
) {
891 int ret
= get_inodes_all_snapshots(trans
, w
, k
.k
->p
.inode
);
896 w
->last_pos
= k
.k
->p
;
898 return lookup_inode_for_snapshot(trans
->c
, w
, k
);
901 static int get_visible_inodes(struct btree_trans
*trans
,
902 struct inode_walker
*w
,
903 struct snapshots_seen
*s
,
906 struct bch_fs
*c
= trans
->c
;
907 struct btree_iter iter
;
913 for_each_btree_key_norestart(trans
, iter
, BTREE_ID_inodes
, POS(0, inum
),
914 BTREE_ITER_all_snapshots
, k
, ret
) {
915 if (k
.k
->p
.offset
!= inum
)
918 if (!ref_visible(c
, s
, s
->pos
.snapshot
, k
.k
->p
.snapshot
))
921 if (bkey_is_inode(k
.k
))
924 if (k
.k
->p
.snapshot
>= s
->pos
.snapshot
)
927 bch2_trans_iter_exit(trans
, &iter
);
932 static int dirent_has_target(struct btree_trans
*trans
, struct bkey_s_c_dirent d
)
934 if (d
.v
->d_type
== DT_SUBVOL
) {
937 int ret
= subvol_lookup(trans
, le32_to_cpu(d
.v
->d_child_subvol
), &snap
, &inum
);
938 if (ret
&& !bch2_err_matches(ret
, ENOENT
))
942 struct btree_iter iter
;
943 struct bkey_s_c k
= bch2_bkey_get_iter(trans
, &iter
, BTREE_ID_inodes
,
944 SPOS(0, le64_to_cpu(d
.v
->d_inum
), d
.k
->p
.snapshot
), 0);
945 int ret
= bkey_err(k
);
949 ret
= bkey_is_inode(k
.k
);
950 bch2_trans_iter_exit(trans
, &iter
);
956 * Prefer to delete the first one, since that will be the one at the wrong
958 * return value: 0 -> delete k1, 1 -> delete k2
960 static int hash_pick_winner(struct btree_trans
*trans
,
961 const struct bch_hash_desc desc
,
962 struct bch_hash_info
*hash_info
,
966 if (bkey_val_bytes(k1
.k
) == bkey_val_bytes(k2
.k
) &&
967 !memcmp(k1
.v
, k2
.v
, bkey_val_bytes(k1
.k
)))
970 switch (desc
.btree_id
) {
971 case BTREE_ID_dirents
: {
972 int ret
= dirent_has_target(trans
, bkey_s_c_to_dirent(k1
));
978 ret
= dirent_has_target(trans
, bkey_s_c_to_dirent(k2
));
990 static int fsck_update_backpointers(struct btree_trans
*trans
,
991 struct snapshots_seen
*s
,
992 const struct bch_hash_desc desc
,
993 struct bch_hash_info
*hash_info
,
996 if (new->k
.type
!= KEY_TYPE_dirent
)
999 struct bkey_i_dirent
*d
= bkey_i_to_dirent(new);
1000 struct inode_walker target
= inode_walker_init();
1003 if (d
->v
.d_type
== DT_SUBVOL
) {
1006 ret
= get_visible_inodes(trans
, &target
, s
, le64_to_cpu(d
->v
.d_inum
));
1010 darray_for_each(target
.inodes
, i
) {
1011 i
->inode
.bi_dir_offset
= d
->k
.p
.offset
;
1012 ret
= __bch2_fsck_write_inode(trans
, &i
->inode
);
1018 inode_walker_exit(&target
);
1022 static int fsck_rename_dirent(struct btree_trans
*trans
,
1023 struct snapshots_seen
*s
,
1024 const struct bch_hash_desc desc
,
1025 struct bch_hash_info
*hash_info
,
1026 struct bkey_s_c_dirent old
)
1028 struct qstr old_name
= bch2_dirent_get_name(old
);
1029 struct bkey_i_dirent
*new = bch2_trans_kmalloc(trans
, bkey_bytes(old
.k
) + 32);
1030 int ret
= PTR_ERR_OR_ZERO(new);
1034 bkey_dirent_init(&new->k_i
);
1035 dirent_copy_target(new, old
);
1036 new->k
.p
= old
.k
->p
;
1038 for (unsigned i
= 0; i
< 1000; i
++) {
1039 unsigned len
= sprintf(new->v
.d_name
, "%.*s.fsck_renamed-%u",
1040 old_name
.len
, old_name
.name
, i
);
1041 unsigned u64s
= BKEY_U64s
+ dirent_val_u64s(len
);
1048 ret
= bch2_hash_set_in_snapshot(trans
, bch2_dirent_hash_desc
, hash_info
,
1049 (subvol_inum
) { 0, old
.k
->p
.inode
},
1050 old
.k
->p
.snapshot
, &new->k_i
,
1051 BTREE_UPDATE_internal_snapshot_node
);
1052 if (!bch2_err_matches(ret
, EEXIST
))
1059 return fsck_update_backpointers(trans
, s
, desc
, hash_info
, &new->k_i
);
1062 static int hash_check_key(struct btree_trans
*trans
,
1063 struct snapshots_seen
*s
,
1064 const struct bch_hash_desc desc
,
1065 struct bch_hash_info
*hash_info
,
1066 struct btree_iter
*k_iter
, struct bkey_s_c hash_k
)
1068 struct bch_fs
*c
= trans
->c
;
1069 struct btree_iter iter
= { NULL
};
1070 struct printbuf buf
= PRINTBUF
;
1075 if (hash_k
.k
->type
!= desc
.key_type
)
1078 hash
= desc
.hash_bkey(hash_info
, hash_k
);
1080 if (likely(hash
== hash_k
.k
->p
.offset
))
1083 if (hash_k
.k
->p
.offset
< hash
)
1086 for_each_btree_key_norestart(trans
, iter
, desc
.btree_id
,
1087 SPOS(hash_k
.k
->p
.inode
, hash
, hash_k
.k
->p
.snapshot
),
1088 BTREE_ITER_slots
, k
, ret
) {
1089 if (bkey_eq(k
.k
->p
, hash_k
.k
->p
))
1092 if (k
.k
->type
== desc
.key_type
&&
1093 !desc
.cmp_bkey(k
, hash_k
))
1094 goto duplicate_entries
;
1096 if (bkey_deleted(k
.k
)) {
1097 bch2_trans_iter_exit(trans
, &iter
);
1102 bch2_trans_iter_exit(trans
, &iter
);
1103 printbuf_exit(&buf
);
1106 if (fsck_err(trans
, hash_table_key_wrong_offset
,
1107 "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n %s",
1108 bch2_btree_id_str(desc
.btree_id
), hash_k
.k
->p
.inode
, hash_k
.k
->p
.offset
, hash
,
1109 (printbuf_reset(&buf
),
1110 bch2_bkey_val_to_text(&buf
, c
, hash_k
), buf
.buf
))) {
1111 struct bkey_i
*new = bch2_bkey_make_mut_noupdate(trans
, hash_k
);
1113 return PTR_ERR(new);
1115 k
= bch2_hash_set_or_get_in_snapshot(trans
, &iter
, desc
, hash_info
,
1116 (subvol_inum
) { 0, hash_k
.k
->p
.inode
},
1117 hash_k
.k
->p
.snapshot
, new,
1118 STR_HASH_must_create
|
1119 BTREE_ITER_with_updates
|
1120 BTREE_UPDATE_internal_snapshot_node
);
1125 goto duplicate_entries
;
1127 ret
= bch2_hash_delete_at(trans
, desc
, hash_info
, k_iter
,
1128 BTREE_UPDATE_internal_snapshot_node
) ?:
1129 fsck_update_backpointers(trans
, s
, desc
, hash_info
, new) ?:
1130 bch2_trans_commit(trans
, NULL
, NULL
, BCH_TRANS_COMMIT_no_enospc
) ?:
1131 -BCH_ERR_transaction_restart_nested
;
1137 ret
= hash_pick_winner(trans
, desc
, hash_info
, hash_k
, k
);
1141 if (!fsck_err(trans
, hash_table_key_duplicate
,
1142 "duplicate hash table keys%s:\n%s",
1143 ret
!= 2 ? "" : ", both point to valid inodes",
1144 (printbuf_reset(&buf
),
1145 bch2_bkey_val_to_text(&buf
, c
, hash_k
),
1147 bch2_bkey_val_to_text(&buf
, c
, k
),
1153 ret
= bch2_hash_delete_at(trans
, desc
, hash_info
, k_iter
, 0);
1156 ret
= bch2_hash_delete_at(trans
, desc
, hash_info
, &iter
, 0);
1159 ret
= fsck_rename_dirent(trans
, s
, desc
, hash_info
, bkey_s_c_to_dirent(hash_k
)) ?:
1160 bch2_hash_delete_at(trans
, desc
, hash_info
, k_iter
, 0);
1164 ret
= bch2_trans_commit(trans
, NULL
, NULL
, 0) ?:
1165 -BCH_ERR_transaction_restart_nested
;
1169 static struct bkey_s_c_dirent
dirent_get_by_pos(struct btree_trans
*trans
,
1170 struct btree_iter
*iter
,
1173 return bch2_bkey_get_iter_typed(trans
, iter
, BTREE_ID_dirents
, pos
, 0, dirent
);
1176 static struct bkey_s_c_dirent
inode_get_dirent(struct btree_trans
*trans
,
1177 struct btree_iter
*iter
,
1178 struct bch_inode_unpacked
*inode
,
1181 if (inode
->bi_subvol
) {
1183 int ret
= subvol_lookup(trans
, inode
->bi_parent_subvol
, snapshot
, &inum
);
1185 return ((struct bkey_s_c_dirent
) { .k
= ERR_PTR(ret
) });
1188 return dirent_get_by_pos(trans
, iter
, SPOS(inode
->bi_dir
, inode
->bi_dir_offset
, *snapshot
));
1191 static int check_inode_deleted_list(struct btree_trans
*trans
, struct bpos p
)
1193 struct btree_iter iter
;
1194 struct bkey_s_c k
= bch2_bkey_get_iter(trans
, &iter
, BTREE_ID_deleted_inodes
, p
, 0);
1195 int ret
= bkey_err(k
) ?: k
.k
->type
== KEY_TYPE_set
;
1196 bch2_trans_iter_exit(trans
, &iter
);
1200 static int check_inode_dirent_inode(struct btree_trans
*trans
,
1201 struct bch_inode_unpacked
*inode
,
1204 struct bch_fs
*c
= trans
->c
;
1205 struct printbuf buf
= PRINTBUF
;
1207 u32 inode_snapshot
= inode
->bi_snapshot
;
1208 struct btree_iter dirent_iter
= {};
1209 struct bkey_s_c_dirent d
= inode_get_dirent(trans
, &dirent_iter
, inode
, &inode_snapshot
);
1210 int ret
= bkey_err(d
);
1211 if (ret
&& !bch2_err_matches(ret
, ENOENT
))
1214 if (fsck_err_on(ret
,
1215 trans
, inode_points_to_missing_dirent
,
1216 "inode points to missing dirent\n%s",
1217 (bch2_inode_unpacked_to_text(&buf
, inode
), buf
.buf
)) ||
1218 fsck_err_on(!ret
&& dirent_points_to_inode_nowarn(d
, inode
),
1219 trans
, inode_points_to_wrong_dirent
,
1221 (printbuf_reset(&buf
),
1222 dirent_inode_mismatch_msg(&buf
, c
, d
, inode
),
1225 * We just clear the backpointer fields for now. If we find a
1226 * dirent that points to this inode in check_dirents(), we'll
1227 * update it then; then when we get to check_path() if the
1228 * backpointer is still 0 we'll reattach it.
1231 inode
->bi_dir_offset
= 0;
1232 *write_inode
= true;
1237 bch2_trans_iter_exit(trans
, &dirent_iter
);
1238 printbuf_exit(&buf
);
1243 static int get_snapshot_root_inode(struct btree_trans
*trans
,
1244 struct bch_inode_unpacked
*root
,
1247 struct btree_iter iter
;
1251 for_each_btree_key_reverse_norestart(trans
, iter
, BTREE_ID_inodes
,
1252 SPOS(0, inum
, U32_MAX
),
1253 BTREE_ITER_all_snapshots
, k
, ret
) {
1254 if (k
.k
->p
.offset
!= inum
)
1256 if (bkey_is_inode(k
.k
))
1263 BUG_ON(bch2_inode_unpack(k
, root
));
1265 bch2_trans_iter_exit(trans
, &iter
);
1269 static int check_inode(struct btree_trans
*trans
,
1270 struct btree_iter
*iter
,
1272 struct bch_inode_unpacked
*snapshot_root
,
1273 struct snapshots_seen
*s
)
1275 struct bch_fs
*c
= trans
->c
;
1276 struct printbuf buf
= PRINTBUF
;
1277 struct bch_inode_unpacked u
;
1278 bool do_update
= false;
1281 ret
= bch2_check_key_has_snapshot(trans
, iter
, k
);
1287 ret
= snapshots_seen_update(c
, s
, iter
->btree_id
, k
.k
->p
);
1291 if (!bkey_is_inode(k
.k
))
1294 BUG_ON(bch2_inode_unpack(k
, &u
));
1296 if (snapshot_root
->bi_inum
!= u
.bi_inum
) {
1297 ret
= get_snapshot_root_inode(trans
, snapshot_root
, u
.bi_inum
);
1302 if (fsck_err_on(u
.bi_hash_seed
!= snapshot_root
->bi_hash_seed
||
1303 INODE_STR_HASH(&u
) != INODE_STR_HASH(snapshot_root
),
1304 trans
, inode_snapshot_mismatch
,
1305 "inodes in different snapshots don't match")) {
1306 u
.bi_hash_seed
= snapshot_root
->bi_hash_seed
;
1307 SET_INODE_STR_HASH(&u
, INODE_STR_HASH(snapshot_root
));
1311 if (u
.bi_dir
|| u
.bi_dir_offset
) {
1312 ret
= check_inode_dirent_inode(trans
, &u
, &do_update
);
1317 if (fsck_err_on(u
.bi_dir
&& (u
.bi_flags
& BCH_INODE_unlinked
),
1318 trans
, inode_unlinked_but_has_dirent
,
1319 "inode unlinked but has dirent\n%s",
1320 (printbuf_reset(&buf
),
1321 bch2_inode_unpacked_to_text(&buf
, &u
),
1323 u
.bi_flags
&= ~BCH_INODE_unlinked
;
1327 if (S_ISDIR(u
.bi_mode
) && (u
.bi_flags
& BCH_INODE_unlinked
)) {
1328 /* Check for this early so that check_unreachable_inode() will reattach it */
1330 ret
= bch2_empty_dir_snapshot(trans
, k
.k
->p
.offset
, 0, k
.k
->p
.snapshot
);
1331 if (ret
&& ret
!= -BCH_ERR_ENOTEMPTY_dir_not_empty
)
1334 fsck_err_on(ret
, trans
, inode_dir_unlinked_but_not_empty
,
1335 "dir unlinked but not empty\n%s",
1336 (printbuf_reset(&buf
),
1337 bch2_inode_unpacked_to_text(&buf
, &u
),
1339 u
.bi_flags
&= ~BCH_INODE_unlinked
;
1344 ret
= bch2_inode_has_child_snapshots(trans
, k
.k
->p
);
1348 if (fsck_err_on(ret
!= !!(u
.bi_flags
& BCH_INODE_has_child_snapshot
),
1349 trans
, inode_has_child_snapshots_wrong
,
1350 "inode has_child_snapshots flag wrong (should be %u)\n%s",
1352 (printbuf_reset(&buf
),
1353 bch2_inode_unpacked_to_text(&buf
, &u
),
1356 u
.bi_flags
|= BCH_INODE_has_child_snapshot
;
1358 u
.bi_flags
&= ~BCH_INODE_has_child_snapshot
;
1363 if ((u
.bi_flags
& BCH_INODE_unlinked
) &&
1364 !(u
.bi_flags
& BCH_INODE_has_child_snapshot
)) {
1365 if (!test_bit(BCH_FS_started
, &c
->flags
)) {
1367 * If we're not in online fsck, don't delete unlinked
1368 * inodes, just make sure they're on the deleted list.
1370 * They might be referred to by a logged operation -
1371 * i.e. we might have crashed in the middle of a
1372 * truncate on an unlinked but open file - so we want to
1373 * let the delete_dead_inodes kill it after resuming
1376 ret
= check_inode_deleted_list(trans
, k
.k
->p
);
1381 trans
, unlinked_inode_not_on_deleted_list
,
1382 "inode %llu:%u unlinked, but not on deleted list",
1383 u
.bi_inum
, k
.k
->p
.snapshot
);
1385 ret
= bch2_btree_bit_mod_buffered(trans
, BTREE_ID_deleted_inodes
, k
.k
->p
, 1);
1389 ret
= bch2_inode_or_descendents_is_open(trans
, k
.k
->p
);
1393 if (fsck_err_on(!ret
,
1394 trans
, inode_unlinked_and_not_open
,
1395 "inode %llu%u unlinked and not open",
1396 u
.bi_inum
, u
.bi_snapshot
)) {
1397 ret
= bch2_inode_rm_snapshot(trans
, u
.bi_inum
, iter
->pos
.snapshot
);
1398 bch_err_msg(c
, ret
, "in fsck deleting inode");
1405 if (fsck_err_on(u
.bi_parent_subvol
&&
1406 (u
.bi_subvol
== 0 ||
1407 u
.bi_subvol
== BCACHEFS_ROOT_SUBVOL
),
1408 trans
, inode_bi_parent_nonzero
,
1409 "inode %llu:%u has subvol %u but nonzero parent subvol %u",
1410 u
.bi_inum
, k
.k
->p
.snapshot
, u
.bi_subvol
, u
.bi_parent_subvol
)) {
1411 u
.bi_parent_subvol
= 0;
1416 struct bch_subvolume s
;
1418 ret
= bch2_subvolume_get(trans
, u
.bi_subvol
, false, 0, &s
);
1419 if (ret
&& !bch2_err_matches(ret
, ENOENT
))
1422 if (ret
&& (c
->sb
.btrees_lost_data
& BIT_ULL(BTREE_ID_subvolumes
))) {
1423 ret
= reconstruct_subvol(trans
, k
.k
->p
.snapshot
, u
.bi_subvol
, u
.bi_inum
);
1427 if (fsck_err_on(ret
,
1428 trans
, inode_bi_subvol_missing
,
1429 "inode %llu:%u bi_subvol points to missing subvolume %u",
1430 u
.bi_inum
, k
.k
->p
.snapshot
, u
.bi_subvol
) ||
1431 fsck_err_on(le64_to_cpu(s
.inode
) != u
.bi_inum
||
1432 !bch2_snapshot_is_ancestor(c
, le32_to_cpu(s
.snapshot
),
1434 trans
, inode_bi_subvol_wrong
,
1435 "inode %llu:%u points to subvol %u, but subvol points to %llu:%u",
1436 u
.bi_inum
, k
.k
->p
.snapshot
, u
.bi_subvol
,
1437 le64_to_cpu(s
.inode
),
1438 le32_to_cpu(s
.snapshot
))) {
1440 u
.bi_parent_subvol
= 0;
1446 ret
= __bch2_fsck_write_inode(trans
, &u
);
1447 bch_err_msg(c
, ret
, "in fsck updating inode");
1455 printbuf_exit(&buf
);
1459 int bch2_check_inodes(struct bch_fs
*c
)
1461 struct bch_inode_unpacked snapshot_root
= {};
1462 struct snapshots_seen s
;
1464 snapshots_seen_init(&s
);
1466 int ret
= bch2_trans_run(c
,
1467 for_each_btree_key_commit(trans
, iter
, BTREE_ID_inodes
,
1469 BTREE_ITER_prefetch
|BTREE_ITER_all_snapshots
, k
,
1470 NULL
, NULL
, BCH_TRANS_COMMIT_no_enospc
,
1471 check_inode(trans
, &iter
, k
, &snapshot_root
, &s
)));
1473 snapshots_seen_exit(&s
);
1478 static int find_oldest_inode_needs_reattach(struct btree_trans
*trans
,
1479 struct bch_inode_unpacked
*inode
)
1481 struct bch_fs
*c
= trans
->c
;
1482 struct btree_iter iter
;
1487 * We look for inodes to reattach in natural key order, leaves first,
1488 * but we should do the reattach at the oldest version that needs to be
1491 for_each_btree_key_norestart(trans
, iter
,
1493 SPOS(0, inode
->bi_inum
, inode
->bi_snapshot
+ 1),
1494 BTREE_ITER_all_snapshots
, k
, ret
) {
1495 if (k
.k
->p
.offset
!= inode
->bi_inum
)
1498 if (!bch2_snapshot_is_ancestor(c
, inode
->bi_snapshot
, k
.k
->p
.snapshot
))
1501 if (!bkey_is_inode(k
.k
))
1504 struct bch_inode_unpacked parent_inode
;
1505 bch2_inode_unpack(k
, &parent_inode
);
1507 if (!inode_should_reattach(&parent_inode
))
1510 *inode
= parent_inode
;
1512 bch2_trans_iter_exit(trans
, &iter
);
1517 static int check_unreachable_inode(struct btree_trans
*trans
,
1518 struct btree_iter
*iter
,
1521 struct printbuf buf
= PRINTBUF
;
1524 if (!bkey_is_inode(k
.k
))
1527 struct bch_inode_unpacked inode
;
1528 BUG_ON(bch2_inode_unpack(k
, &inode
));
1530 if (!inode_should_reattach(&inode
))
1533 ret
= find_oldest_inode_needs_reattach(trans
, &inode
);
1537 if (fsck_err(trans
, inode_unreachable
,
1538 "unreachable inode:\n%s",
1539 (bch2_inode_unpacked_to_text(&buf
, &inode
),
1541 ret
= reattach_inode(trans
, &inode
);
1543 printbuf_exit(&buf
);
1548 * Reattach unreachable (but not unlinked) inodes
1550 * Run after check_inodes() and check_dirents(), so we node that inode
1551 * backpointer fields point to valid dirents, and every inode that has a dirent
1552 * that points to it has its backpointer field set - so we're just looking for
1553 * non-unlinked inodes without backpointers:
1555 * XXX: this is racy w.r.t. hardlink removal in online fsck
1557 int bch2_check_unreachable_inodes(struct bch_fs
*c
)
1559 int ret
= bch2_trans_run(c
,
1560 for_each_btree_key_commit(trans
, iter
, BTREE_ID_inodes
,
1562 BTREE_ITER_prefetch
|BTREE_ITER_all_snapshots
, k
,
1563 NULL
, NULL
, BCH_TRANS_COMMIT_no_enospc
,
1564 check_unreachable_inode(trans
, &iter
, k
)));
1569 static inline bool btree_matches_i_mode(enum btree_id btree
, unsigned mode
)
1572 case BTREE_ID_extents
:
1573 return S_ISREG(mode
) || S_ISLNK(mode
);
1574 case BTREE_ID_dirents
:
1575 return S_ISDIR(mode
);
1576 case BTREE_ID_xattrs
:
1583 static int check_key_has_inode(struct btree_trans
*trans
,
1584 struct btree_iter
*iter
,
1585 struct inode_walker
*inode
,
1586 struct inode_walker_entry
*i
,
1589 struct bch_fs
*c
= trans
->c
;
1590 struct printbuf buf
= PRINTBUF
;
1591 int ret
= PTR_ERR_OR_ZERO(i
);
1595 if (k
.k
->type
== KEY_TYPE_whiteout
)
1598 if (!i
&& (c
->sb
.btrees_lost_data
& BIT_ULL(BTREE_ID_inodes
))) {
1599 ret
= reconstruct_inode(trans
, iter
->btree_id
, k
.k
->p
.snapshot
, k
.k
->p
.inode
) ?:
1600 bch2_trans_commit(trans
, NULL
, NULL
, BCH_TRANS_COMMIT_no_enospc
);
1604 inode
->last_pos
.inode
--;
1605 ret
= -BCH_ERR_transaction_restart_nested
;
1610 trans
, key_in_missing_inode
,
1611 "key in missing inode:\n %s",
1612 (printbuf_reset(&buf
),
1613 bch2_bkey_val_to_text(&buf
, c
, k
), buf
.buf
)))
1616 if (fsck_err_on(i
&& !btree_matches_i_mode(iter
->btree_id
, i
->inode
.bi_mode
),
1617 trans
, key_in_wrong_inode_type
,
1618 "key for wrong inode mode %o:\n %s",
1620 (printbuf_reset(&buf
),
1621 bch2_bkey_val_to_text(&buf
, c
, k
), buf
.buf
)))
1626 printbuf_exit(&buf
);
1630 ret
= bch2_btree_delete_at(trans
, iter
, BTREE_UPDATE_internal_snapshot_node
);
1634 static int check_i_sectors_notnested(struct btree_trans
*trans
, struct inode_walker
*w
)
1636 struct bch_fs
*c
= trans
->c
;
1640 darray_for_each(w
->inodes
, i
) {
1641 if (i
->inode
.bi_sectors
== i
->count
)
1644 count2
= bch2_count_inode_sectors(trans
, w
->last_pos
.inode
, i
->snapshot
);
1646 if (w
->recalculate_sums
)
1649 if (i
->count
!= count2
) {
1650 bch_err_ratelimited(c
, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu",
1651 w
->last_pos
.inode
, i
->snapshot
, i
->count
, count2
);
1652 return -BCH_ERR_internal_fsck_err
;
1655 if (fsck_err_on(!(i
->inode
.bi_flags
& BCH_INODE_i_sectors_dirty
),
1656 trans
, inode_i_sectors_wrong
,
1657 "inode %llu:%u has incorrect i_sectors: got %llu, should be %llu",
1658 w
->last_pos
.inode
, i
->snapshot
,
1659 i
->inode
.bi_sectors
, i
->count
)) {
1660 i
->inode
.bi_sectors
= i
->count
;
1661 ret
= bch2_fsck_write_inode(trans
, &i
->inode
);
1671 static int check_i_sectors(struct btree_trans
*trans
, struct inode_walker
*w
)
1673 u32 restart_count
= trans
->restart_count
;
1674 return check_i_sectors_notnested(trans
, w
) ?:
1675 trans_was_restarted(trans
, restart_count
);
1681 struct snapshots_seen seen
;
1684 struct extent_ends
{
1685 struct bpos last_pos
;
1686 DARRAY(struct extent_end
) e
;
1689 static void extent_ends_reset(struct extent_ends
*extent_ends
)
1691 darray_for_each(extent_ends
->e
, i
)
1692 snapshots_seen_exit(&i
->seen
);
1693 extent_ends
->e
.nr
= 0;
1696 static void extent_ends_exit(struct extent_ends
*extent_ends
)
1698 extent_ends_reset(extent_ends
);
1699 darray_exit(&extent_ends
->e
);
1702 static void extent_ends_init(struct extent_ends
*extent_ends
)
1704 memset(extent_ends
, 0, sizeof(*extent_ends
));
1707 static int extent_ends_at(struct bch_fs
*c
,
1708 struct extent_ends
*extent_ends
,
1709 struct snapshots_seen
*seen
,
1712 struct extent_end
*i
, n
= (struct extent_end
) {
1713 .offset
= k
.k
->p
.offset
,
1714 .snapshot
= k
.k
->p
.snapshot
,
1718 n
.seen
.ids
.data
= kmemdup(seen
->ids
.data
,
1719 sizeof(seen
->ids
.data
[0]) * seen
->ids
.size
,
1721 if (!n
.seen
.ids
.data
)
1722 return -BCH_ERR_ENOMEM_fsck_extent_ends_at
;
1724 __darray_for_each(extent_ends
->e
, i
) {
1725 if (i
->snapshot
== k
.k
->p
.snapshot
) {
1726 snapshots_seen_exit(&i
->seen
);
1731 if (i
->snapshot
>= k
.k
->p
.snapshot
)
1735 return darray_insert_item(&extent_ends
->e
, i
- extent_ends
->e
.data
, n
);
1738 static int overlapping_extents_found(struct btree_trans
*trans
,
1739 enum btree_id btree
,
1740 struct bpos pos1
, struct snapshots_seen
*pos1_seen
,
1743 struct extent_end
*extent_end
)
1745 struct bch_fs
*c
= trans
->c
;
1746 struct printbuf buf
= PRINTBUF
;
1747 struct btree_iter iter1
, iter2
= { NULL
};
1748 struct bkey_s_c k1
, k2
;
1751 BUG_ON(bkey_le(pos1
, bkey_start_pos(&pos2
)));
1753 bch2_trans_iter_init(trans
, &iter1
, btree
, pos1
,
1754 BTREE_ITER_all_snapshots
|
1755 BTREE_ITER_not_extents
);
1756 k1
= bch2_btree_iter_peek_upto(&iter1
, POS(pos1
.inode
, U64_MAX
));
1761 prt_str(&buf
, "\n ");
1762 bch2_bkey_val_to_text(&buf
, c
, k1
);
1764 if (!bpos_eq(pos1
, k1
.k
->p
)) {
1765 prt_str(&buf
, "\n wanted\n ");
1766 bch2_bpos_to_text(&buf
, pos1
);
1767 prt_str(&buf
, "\n ");
1768 bch2_bkey_to_text(&buf
, &pos2
);
1770 bch_err(c
, "%s: error finding first overlapping extent when repairing, got%s",
1772 ret
= -BCH_ERR_internal_fsck_err
;
1776 bch2_trans_copy_iter(&iter2
, &iter1
);
1779 bch2_btree_iter_advance(&iter2
);
1781 k2
= bch2_btree_iter_peek_upto(&iter2
, POS(pos1
.inode
, U64_MAX
));
1786 if (bpos_ge(k2
.k
->p
, pos2
.p
))
1790 prt_str(&buf
, "\n ");
1791 bch2_bkey_val_to_text(&buf
, c
, k2
);
1793 if (bpos_gt(k2
.k
->p
, pos2
.p
) ||
1794 pos2
.size
!= k2
.k
->size
) {
1795 bch_err(c
, "%s: error finding seconding overlapping extent when repairing%s",
1797 ret
= -BCH_ERR_internal_fsck_err
;
1801 prt_printf(&buf
, "\n overwriting %s extent",
1802 pos1
.snapshot
>= pos2
.p
.snapshot
? "first" : "second");
1804 if (fsck_err(trans
, extent_overlapping
,
1805 "overlapping extents%s", buf
.buf
)) {
1806 struct btree_iter
*old_iter
= &iter1
;
1807 struct disk_reservation res
= { 0 };
1809 if (pos1
.snapshot
< pos2
.p
.snapshot
) {
1814 trans
->extra_disk_res
+= bch2_bkey_sectors_compressed(k2
);
1816 ret
= bch2_trans_update_extent_overwrite(trans
, old_iter
,
1817 BTREE_UPDATE_internal_snapshot_node
,
1819 bch2_trans_commit(trans
, &res
, NULL
, BCH_TRANS_COMMIT_no_enospc
);
1820 bch2_disk_reservation_put(c
, &res
);
1827 if (pos1
.snapshot
== pos2
.p
.snapshot
) {
1829 * We overwrote the first extent, and did the overwrite
1830 * in the same snapshot:
1832 extent_end
->offset
= bkey_start_offset(&pos2
);
1833 } else if (pos1
.snapshot
> pos2
.p
.snapshot
) {
1835 * We overwrote the first extent in pos2's snapshot:
1837 ret
= snapshots_seen_add_inorder(c
, pos1_seen
, pos2
.p
.snapshot
);
1840 * We overwrote the second extent - restart
1841 * check_extent() from the top:
1843 ret
= -BCH_ERR_transaction_restart_nested
;
1848 bch2_trans_iter_exit(trans
, &iter2
);
1849 bch2_trans_iter_exit(trans
, &iter1
);
1850 printbuf_exit(&buf
);
1854 static int check_overlapping_extents(struct btree_trans
*trans
,
1855 struct snapshots_seen
*seen
,
1856 struct extent_ends
*extent_ends
,
1858 struct btree_iter
*iter
,
1861 struct bch_fs
*c
= trans
->c
;
1864 /* transaction restart, running again */
1865 if (bpos_eq(extent_ends
->last_pos
, k
.k
->p
))
1868 if (extent_ends
->last_pos
.inode
!= k
.k
->p
.inode
)
1869 extent_ends_reset(extent_ends
);
1871 darray_for_each(extent_ends
->e
, i
) {
1872 if (i
->offset
<= bkey_start_offset(k
.k
))
1875 if (!ref_visible2(c
,
1876 k
.k
->p
.snapshot
, seen
,
1877 i
->snapshot
, &i
->seen
))
1880 ret
= overlapping_extents_found(trans
, iter
->btree_id
,
1881 SPOS(iter
->pos
.inode
,
1890 extent_ends
->last_pos
= k
.k
->p
;
1895 static int check_extent_overbig(struct btree_trans
*trans
, struct btree_iter
*iter
,
1898 struct bch_fs
*c
= trans
->c
;
1899 struct bkey_ptrs_c ptrs
= bch2_bkey_ptrs_c(k
);
1900 struct bch_extent_crc_unpacked crc
;
1901 const union bch_extent_entry
*i
;
1902 unsigned encoded_extent_max_sectors
= c
->opts
.encoded_extent_max
>> 9;
1904 bkey_for_each_crc(k
.k
, ptrs
, crc
, i
)
1905 if (crc_is_encoded(crc
) &&
1906 crc
.uncompressed_size
> encoded_extent_max_sectors
) {
1907 struct printbuf buf
= PRINTBUF
;
1909 bch2_bkey_val_to_text(&buf
, c
, k
);
1910 bch_err(c
, "overbig encoded extent, please report this:\n %s", buf
.buf
);
1911 printbuf_exit(&buf
);
1917 static int check_extent(struct btree_trans
*trans
, struct btree_iter
*iter
,
1919 struct inode_walker
*inode
,
1920 struct snapshots_seen
*s
,
1921 struct extent_ends
*extent_ends
,
1922 struct disk_reservation
*res
)
1924 struct bch_fs
*c
= trans
->c
;
1925 struct printbuf buf
= PRINTBUF
;
1928 ret
= bch2_check_key_has_snapshot(trans
, iter
, k
);
1930 ret
= ret
< 0 ? ret
: 0;
1934 if (inode
->last_pos
.inode
!= k
.k
->p
.inode
&& inode
->have_inodes
) {
1935 ret
= check_i_sectors(trans
, inode
);
1940 ret
= snapshots_seen_update(c
, s
, iter
->btree_id
, k
.k
->p
);
1944 struct inode_walker_entry
*extent_i
= walk_inode(trans
, inode
, k
);
1945 ret
= PTR_ERR_OR_ZERO(extent_i
);
1949 ret
= check_key_has_inode(trans
, iter
, inode
, extent_i
, k
);
1953 if (k
.k
->type
!= KEY_TYPE_whiteout
) {
1954 ret
= check_overlapping_extents(trans
, s
, extent_ends
, k
, iter
,
1955 &inode
->recalculate_sums
);
1960 * Check inodes in reverse order, from oldest snapshots to
1961 * newest, starting from the inode that matches this extent's
1962 * snapshot. If we didn't have one, iterate over all inodes:
1964 for (struct inode_walker_entry
*i
= extent_i
?: &darray_last(inode
->inodes
);
1965 inode
->inodes
.data
&& i
>= inode
->inodes
.data
;
1967 if (i
->snapshot
> k
.k
->p
.snapshot
||
1968 !key_visible_in_snapshot(c
, s
, i
->snapshot
, k
.k
->p
.snapshot
))
1971 if (fsck_err_on(k
.k
->p
.offset
> round_up(i
->inode
.bi_size
, block_bytes(c
)) >> 9 &&
1972 !bkey_extent_is_reservation(k
),
1973 trans
, extent_past_end_of_inode
,
1974 "extent type past end of inode %llu:%u, i_size %llu\n %s",
1975 i
->inode
.bi_inum
, i
->snapshot
, i
->inode
.bi_size
,
1976 (bch2_bkey_val_to_text(&buf
, c
, k
), buf
.buf
))) {
1977 struct btree_iter iter2
;
1979 bch2_trans_copy_iter(&iter2
, iter
);
1980 bch2_btree_iter_set_snapshot(&iter2
, i
->snapshot
);
1981 ret
= bch2_btree_iter_traverse(&iter2
) ?:
1982 bch2_btree_delete_at(trans
, &iter2
,
1983 BTREE_UPDATE_internal_snapshot_node
);
1984 bch2_trans_iter_exit(trans
, &iter2
);
1988 iter
->k
.type
= KEY_TYPE_whiteout
;
1994 ret
= bch2_trans_commit(trans
, res
, NULL
, BCH_TRANS_COMMIT_no_enospc
);
1998 if (bkey_extent_is_allocation(k
.k
)) {
1999 for (struct inode_walker_entry
*i
= extent_i
?: &darray_last(inode
->inodes
);
2000 inode
->inodes
.data
&& i
>= inode
->inodes
.data
;
2002 if (i
->snapshot
> k
.k
->p
.snapshot
||
2003 !key_visible_in_snapshot(c
, s
, i
->snapshot
, k
.k
->p
.snapshot
))
2006 i
->count
+= k
.k
->size
;
2010 if (k
.k
->type
!= KEY_TYPE_whiteout
) {
2011 ret
= extent_ends_at(c
, extent_ends
, s
, k
);
2018 printbuf_exit(&buf
);
2024 * Walk extents: verify that extents have a corresponding S_ISREG inode, and
2025 * that i_size an i_sectors are consistent
2027 int bch2_check_extents(struct bch_fs
*c
)
2029 struct inode_walker w
= inode_walker_init();
2030 struct snapshots_seen s
;
2031 struct extent_ends extent_ends
;
2032 struct disk_reservation res
= { 0 };
2034 snapshots_seen_init(&s
);
2035 extent_ends_init(&extent_ends
);
2037 int ret
= bch2_trans_run(c
,
2038 for_each_btree_key(trans
, iter
, BTREE_ID_extents
,
2039 POS(BCACHEFS_ROOT_INO
, 0),
2040 BTREE_ITER_prefetch
|BTREE_ITER_all_snapshots
, k
, ({
2041 bch2_disk_reservation_put(c
, &res
);
2042 check_extent(trans
, &iter
, k
, &w
, &s
, &extent_ends
, &res
) ?:
2043 check_extent_overbig(trans
, &iter
, k
);
2045 check_i_sectors_notnested(trans
, &w
));
2047 bch2_disk_reservation_put(c
, &res
);
2048 extent_ends_exit(&extent_ends
);
2049 inode_walker_exit(&w
);
2050 snapshots_seen_exit(&s
);
2056 int bch2_check_indirect_extents(struct bch_fs
*c
)
2058 struct disk_reservation res
= { 0 };
2060 int ret
= bch2_trans_run(c
,
2061 for_each_btree_key_commit(trans
, iter
, BTREE_ID_reflink
,
2063 BTREE_ITER_prefetch
, k
,
2065 BCH_TRANS_COMMIT_no_enospc
, ({
2066 bch2_disk_reservation_put(c
, &res
);
2067 check_extent_overbig(trans
, &iter
, k
);
2070 bch2_disk_reservation_put(c
, &res
);
2075 static int check_subdir_count_notnested(struct btree_trans
*trans
, struct inode_walker
*w
)
2077 struct bch_fs
*c
= trans
->c
;
2081 darray_for_each(w
->inodes
, i
) {
2082 if (i
->inode
.bi_nlink
== i
->count
)
2085 count2
= bch2_count_subdirs(trans
, w
->last_pos
.inode
, i
->snapshot
);
2089 if (i
->count
!= count2
) {
2090 bch_err_ratelimited(c
, "fsck counted subdirectories wrong for inum %llu:%u: got %llu should be %llu",
2091 w
->last_pos
.inode
, i
->snapshot
, i
->count
, count2
);
2093 if (i
->inode
.bi_nlink
== i
->count
)
2097 if (fsck_err_on(i
->inode
.bi_nlink
!= i
->count
,
2098 trans
, inode_dir_wrong_nlink
,
2099 "directory %llu:%u with wrong i_nlink: got %u, should be %llu",
2100 w
->last_pos
.inode
, i
->snapshot
, i
->inode
.bi_nlink
, i
->count
)) {
2101 i
->inode
.bi_nlink
= i
->count
;
2102 ret
= bch2_fsck_write_inode(trans
, &i
->inode
);
2112 static int check_subdir_count(struct btree_trans
*trans
, struct inode_walker
*w
)
2114 u32 restart_count
= trans
->restart_count
;
2115 return check_subdir_count_notnested(trans
, w
) ?:
2116 trans_was_restarted(trans
, restart_count
);
2120 static int check_dirent_inode_dirent(struct btree_trans
*trans
,
2121 struct btree_iter
*iter
,
2122 struct bkey_s_c_dirent d
,
2123 struct bch_inode_unpacked
*target
)
2125 struct bch_fs
*c
= trans
->c
;
2126 struct printbuf buf
= PRINTBUF
;
2127 struct btree_iter bp_iter
= { NULL
};
2130 if (inode_points_to_dirent(target
, d
))
2133 if (!target
->bi_dir
&&
2134 !target
->bi_dir_offset
) {
2135 fsck_err_on(S_ISDIR(target
->bi_mode
),
2136 trans
, inode_dir_missing_backpointer
,
2137 "directory with missing backpointer\n%s",
2138 (printbuf_reset(&buf
),
2139 bch2_bkey_val_to_text(&buf
, c
, d
.s_c
),
2140 prt_printf(&buf
, "\n"),
2141 bch2_inode_unpacked_to_text(&buf
, target
),
2144 fsck_err_on(target
->bi_flags
& BCH_INODE_unlinked
,
2145 trans
, inode_unlinked_but_has_dirent
,
2146 "inode unlinked but has dirent\n%s",
2147 (printbuf_reset(&buf
),
2148 bch2_bkey_val_to_text(&buf
, c
, d
.s_c
),
2149 prt_printf(&buf
, "\n"),
2150 bch2_inode_unpacked_to_text(&buf
, target
),
2153 target
->bi_flags
&= ~BCH_INODE_unlinked
;
2154 target
->bi_dir
= d
.k
->p
.inode
;
2155 target
->bi_dir_offset
= d
.k
->p
.offset
;
2156 return __bch2_fsck_write_inode(trans
, target
);
2159 if (bch2_inode_should_have_bp(target
) &&
2160 !fsck_err(trans
, inode_wrong_backpointer
,
2161 "dirent points to inode that does not point back:\n %s",
2162 (bch2_bkey_val_to_text(&buf
, c
, d
.s_c
),
2163 prt_printf(&buf
, "\n "),
2164 bch2_inode_unpacked_to_text(&buf
, target
),
2168 struct bkey_s_c_dirent bp_dirent
= dirent_get_by_pos(trans
, &bp_iter
,
2169 SPOS(target
->bi_dir
, target
->bi_dir_offset
, target
->bi_snapshot
));
2170 ret
= bkey_err(bp_dirent
);
2171 if (ret
&& !bch2_err_matches(ret
, ENOENT
))
2174 bool backpointer_exists
= !ret
;
2177 if (fsck_err_on(!backpointer_exists
,
2178 trans
, inode_wrong_backpointer
,
2179 "inode %llu:%u has wrong backpointer:\n"
2181 "should be %llu:%llu",
2182 target
->bi_inum
, target
->bi_snapshot
,
2184 target
->bi_dir_offset
,
2187 target
->bi_dir
= d
.k
->p
.inode
;
2188 target
->bi_dir_offset
= d
.k
->p
.offset
;
2189 ret
= __bch2_fsck_write_inode(trans
, target
);
2193 bch2_bkey_val_to_text(&buf
, c
, d
.s_c
);
2195 if (backpointer_exists
)
2196 bch2_bkey_val_to_text(&buf
, c
, bp_dirent
.s_c
);
2198 if (fsck_err_on(backpointer_exists
&&
2199 (S_ISDIR(target
->bi_mode
) ||
2201 trans
, inode_dir_multiple_links
,
2202 "%s %llu:%u with multiple links\n%s",
2203 S_ISDIR(target
->bi_mode
) ? "directory" : "subvolume",
2204 target
->bi_inum
, target
->bi_snapshot
, buf
.buf
)) {
2205 ret
= __remove_dirent(trans
, d
.k
->p
);
2210 * hardlinked file with nlink 0:
2211 * We're just adjusting nlink here so check_nlinks() will pick
2212 * it up, it ignores inodes with nlink 0
2214 if (fsck_err_on(backpointer_exists
&& !target
->bi_nlink
,
2215 trans
, inode_multiple_links_but_nlink_0
,
2216 "inode %llu:%u type %s has multiple links but i_nlink 0\n%s",
2217 target
->bi_inum
, target
->bi_snapshot
, bch2_d_types
[d
.v
->d_type
], buf
.buf
)) {
2219 target
->bi_flags
&= ~BCH_INODE_unlinked
;
2220 ret
= __bch2_fsck_write_inode(trans
, target
);
2227 bch2_trans_iter_exit(trans
, &bp_iter
);
2228 printbuf_exit(&buf
);
2234 static int check_dirent_target(struct btree_trans
*trans
,
2235 struct btree_iter
*iter
,
2236 struct bkey_s_c_dirent d
,
2237 struct bch_inode_unpacked
*target
)
2239 struct bch_fs
*c
= trans
->c
;
2240 struct bkey_i_dirent
*n
;
2241 struct printbuf buf
= PRINTBUF
;
2244 ret
= check_dirent_inode_dirent(trans
, iter
, d
, target
);
2248 if (fsck_err_on(d
.v
->d_type
!= inode_d_type(target
),
2249 trans
, dirent_d_type_wrong
,
2250 "incorrect d_type: got %s, should be %s:\n%s",
2251 bch2_d_type_str(d
.v
->d_type
),
2252 bch2_d_type_str(inode_d_type(target
)),
2253 (printbuf_reset(&buf
),
2254 bch2_bkey_val_to_text(&buf
, c
, d
.s_c
), buf
.buf
))) {
2255 n
= bch2_trans_kmalloc(trans
, bkey_bytes(d
.k
));
2256 ret
= PTR_ERR_OR_ZERO(n
);
2260 bkey_reassemble(&n
->k_i
, d
.s_c
);
2261 n
->v
.d_type
= inode_d_type(target
);
2262 if (n
->v
.d_type
== DT_SUBVOL
) {
2263 n
->v
.d_parent_subvol
= cpu_to_le32(target
->bi_parent_subvol
);
2264 n
->v
.d_child_subvol
= cpu_to_le32(target
->bi_subvol
);
2266 n
->v
.d_inum
= cpu_to_le64(target
->bi_inum
);
2269 ret
= bch2_trans_update(trans
, iter
, &n
->k_i
, 0);
2273 d
= dirent_i_to_s_c(n
);
2277 printbuf_exit(&buf
);
2282 /* find a subvolume that's a descendent of @snapshot: */
2283 static int find_snapshot_subvol(struct btree_trans
*trans
, u32 snapshot
, u32
*subvolid
)
2285 struct btree_iter iter
;
2289 for_each_btree_key_norestart(trans
, iter
, BTREE_ID_subvolumes
, POS_MIN
, 0, k
, ret
) {
2290 if (k
.k
->type
!= KEY_TYPE_subvolume
)
2293 struct bkey_s_c_subvolume s
= bkey_s_c_to_subvolume(k
);
2294 if (bch2_snapshot_is_ancestor(trans
->c
, le32_to_cpu(s
.v
->snapshot
), snapshot
)) {
2295 bch2_trans_iter_exit(trans
, &iter
);
2296 *subvolid
= k
.k
->p
.offset
;
2303 bch2_trans_iter_exit(trans
, &iter
);
2308 static int check_dirent_to_subvol(struct btree_trans
*trans
, struct btree_iter
*iter
,
2309 struct bkey_s_c_dirent d
)
2311 struct bch_fs
*c
= trans
->c
;
2312 struct btree_iter subvol_iter
= {};
2313 struct bch_inode_unpacked subvol_root
;
2314 u32 parent_subvol
= le32_to_cpu(d
.v
->d_parent_subvol
);
2315 u32 target_subvol
= le32_to_cpu(d
.v
->d_child_subvol
);
2316 u32 parent_snapshot
;
2317 u32 new_parent_subvol
= 0;
2319 struct printbuf buf
= PRINTBUF
;
2322 ret
= subvol_lookup(trans
, parent_subvol
, &parent_snapshot
, &parent_inum
);
2323 if (ret
&& !bch2_err_matches(ret
, ENOENT
))
2327 (!ret
&& !bch2_snapshot_is_ancestor(c
, parent_snapshot
, d
.k
->p
.snapshot
))) {
2328 int ret2
= find_snapshot_subvol(trans
, d
.k
->p
.snapshot
, &new_parent_subvol
);
2329 if (ret2
&& !bch2_err_matches(ret
, ENOENT
))
2334 !new_parent_subvol
&&
2335 (c
->sb
.btrees_lost_data
& BIT_ULL(BTREE_ID_subvolumes
))) {
2337 * Couldn't find a subvol for dirent's snapshot - but we lost
2338 * subvols, so we need to reconstruct:
2340 ret
= reconstruct_subvol(trans
, d
.k
->p
.snapshot
, parent_subvol
, 0);
2344 parent_snapshot
= d
.k
->p
.snapshot
;
2347 if (fsck_err_on(ret
,
2348 trans
, dirent_to_missing_parent_subvol
,
2349 "dirent parent_subvol points to missing subvolume\n%s",
2350 (bch2_bkey_val_to_text(&buf
, c
, d
.s_c
), buf
.buf
)) ||
2351 fsck_err_on(!ret
&& !bch2_snapshot_is_ancestor(c
, parent_snapshot
, d
.k
->p
.snapshot
),
2352 trans
, dirent_not_visible_in_parent_subvol
,
2353 "dirent not visible in parent_subvol (not an ancestor of subvol snap %u)\n%s",
2355 (bch2_bkey_val_to_text(&buf
, c
, d
.s_c
), buf
.buf
))) {
2356 if (!new_parent_subvol
) {
2357 bch_err(c
, "could not find a subvol for snapshot %u", d
.k
->p
.snapshot
);
2358 return -BCH_ERR_fsck_repair_unimplemented
;
2361 struct bkey_i_dirent
*new_dirent
= bch2_bkey_make_mut_typed(trans
, iter
, &d
.s_c
, 0, dirent
);
2362 ret
= PTR_ERR_OR_ZERO(new_dirent
);
2366 new_dirent
->v
.d_parent_subvol
= cpu_to_le32(new_parent_subvol
);
2369 struct bkey_s_c_subvolume s
=
2370 bch2_bkey_get_iter_typed(trans
, &subvol_iter
,
2371 BTREE_ID_subvolumes
, POS(0, target_subvol
),
2373 ret
= bkey_err(s
.s_c
);
2374 if (ret
&& !bch2_err_matches(ret
, ENOENT
))
2378 if (fsck_err(trans
, dirent_to_missing_subvol
,
2379 "dirent points to missing subvolume\n%s",
2380 (bch2_bkey_val_to_text(&buf
, c
, d
.s_c
), buf
.buf
)))
2381 return __remove_dirent(trans
, d
.k
->p
);
2386 if (fsck_err_on(le32_to_cpu(s
.v
->fs_path_parent
) != parent_subvol
,
2387 trans
, subvol_fs_path_parent_wrong
,
2388 "subvol with wrong fs_path_parent, should be be %u\n%s",
2390 (bch2_bkey_val_to_text(&buf
, c
, s
.s_c
), buf
.buf
))) {
2391 struct bkey_i_subvolume
*n
=
2392 bch2_bkey_make_mut_typed(trans
, &subvol_iter
, &s
.s_c
, 0, subvolume
);
2393 ret
= PTR_ERR_OR_ZERO(n
);
2397 n
->v
.fs_path_parent
= cpu_to_le32(parent_subvol
);
2400 u64 target_inum
= le64_to_cpu(s
.v
->inode
);
2401 u32 target_snapshot
= le32_to_cpu(s
.v
->snapshot
);
2403 ret
= lookup_inode(trans
, target_inum
, target_snapshot
, &subvol_root
);
2404 if (ret
&& !bch2_err_matches(ret
, ENOENT
))
2408 bch_err(c
, "subvol %u points to missing inode root %llu", target_subvol
, target_inum
);
2409 ret
= -BCH_ERR_fsck_repair_unimplemented
;
2413 if (fsck_err_on(!ret
&& parent_subvol
!= subvol_root
.bi_parent_subvol
,
2414 trans
, inode_bi_parent_wrong
,
2415 "subvol root %llu has wrong bi_parent_subvol: got %u, should be %u",
2417 subvol_root
.bi_parent_subvol
, parent_subvol
)) {
2418 subvol_root
.bi_parent_subvol
= parent_subvol
;
2419 subvol_root
.bi_snapshot
= le32_to_cpu(s
.v
->snapshot
);
2420 ret
= __bch2_fsck_write_inode(trans
, &subvol_root
);
2425 ret
= check_dirent_target(trans
, iter
, d
, &subvol_root
);
2431 bch2_trans_iter_exit(trans
, &subvol_iter
);
2432 printbuf_exit(&buf
);
2436 static int check_dirent(struct btree_trans
*trans
, struct btree_iter
*iter
,
2438 struct bch_hash_info
*hash_info
,
2439 struct inode_walker
*dir
,
2440 struct inode_walker
*target
,
2441 struct snapshots_seen
*s
)
2443 struct bch_fs
*c
= trans
->c
;
2444 struct inode_walker_entry
*i
;
2445 struct printbuf buf
= PRINTBUF
;
2448 ret
= bch2_check_key_has_snapshot(trans
, iter
, k
);
2450 ret
= ret
< 0 ? ret
: 0;
2454 ret
= snapshots_seen_update(c
, s
, iter
->btree_id
, k
.k
->p
);
2458 if (k
.k
->type
== KEY_TYPE_whiteout
)
2461 if (dir
->last_pos
.inode
!= k
.k
->p
.inode
&& dir
->have_inodes
) {
2462 ret
= check_subdir_count(trans
, dir
);
2467 i
= walk_inode(trans
, dir
, k
);
2468 ret
= PTR_ERR_OR_ZERO(i
);
2472 ret
= check_key_has_inode(trans
, iter
, dir
, i
, k
);
2479 if (dir
->first_this_inode
)
2480 *hash_info
= bch2_hash_info_init(c
, &i
->inode
);
2481 dir
->first_this_inode
= false;
2483 ret
= hash_check_key(trans
, s
, bch2_dirent_hash_desc
, hash_info
, iter
, k
);
2487 /* dirent has been deleted */
2492 if (k
.k
->type
!= KEY_TYPE_dirent
)
2495 struct bkey_s_c_dirent d
= bkey_s_c_to_dirent(k
);
2497 if (d
.v
->d_type
== DT_SUBVOL
) {
2498 ret
= check_dirent_to_subvol(trans
, iter
, d
);
2502 ret
= get_visible_inodes(trans
, target
, s
, le64_to_cpu(d
.v
->d_inum
));
2506 if (fsck_err_on(!target
->inodes
.nr
,
2507 trans
, dirent_to_missing_inode
,
2508 "dirent points to missing inode:\n%s",
2509 (printbuf_reset(&buf
),
2510 bch2_bkey_val_to_text(&buf
, c
, k
),
2512 ret
= __remove_dirent(trans
, d
.k
->p
);
2517 darray_for_each(target
->inodes
, i
) {
2518 ret
= check_dirent_target(trans
, iter
, d
, &i
->inode
);
2524 ret
= bch2_trans_commit(trans
, NULL
, NULL
, BCH_TRANS_COMMIT_no_enospc
);
2528 if (d
.v
->d_type
== DT_DIR
)
2529 for_each_visible_inode(c
, s
, dir
, d
.k
->p
.snapshot
, i
)
2534 printbuf_exit(&buf
);
2540 * Walk dirents: verify that they all have a corresponding S_ISDIR inode,
2543 int bch2_check_dirents(struct bch_fs
*c
)
2545 struct inode_walker dir
= inode_walker_init();
2546 struct inode_walker target
= inode_walker_init();
2547 struct snapshots_seen s
;
2548 struct bch_hash_info hash_info
;
2550 snapshots_seen_init(&s
);
2552 int ret
= bch2_trans_run(c
,
2553 for_each_btree_key(trans
, iter
, BTREE_ID_dirents
,
2554 POS(BCACHEFS_ROOT_INO
, 0),
2555 BTREE_ITER_prefetch
|BTREE_ITER_all_snapshots
, k
,
2556 check_dirent(trans
, &iter
, k
, &hash_info
, &dir
, &target
, &s
)) ?:
2557 check_subdir_count_notnested(trans
, &dir
));
2559 snapshots_seen_exit(&s
);
2560 inode_walker_exit(&dir
);
2561 inode_walker_exit(&target
);
2566 static int check_xattr(struct btree_trans
*trans
, struct btree_iter
*iter
,
2568 struct bch_hash_info
*hash_info
,
2569 struct inode_walker
*inode
)
2571 struct bch_fs
*c
= trans
->c
;
2572 struct inode_walker_entry
*i
;
2575 ret
= bch2_check_key_has_snapshot(trans
, iter
, k
);
2581 i
= walk_inode(trans
, inode
, k
);
2582 ret
= PTR_ERR_OR_ZERO(i
);
2586 ret
= check_key_has_inode(trans
, iter
, inode
, i
, k
);
2593 if (inode
->first_this_inode
)
2594 *hash_info
= bch2_hash_info_init(c
, &i
->inode
);
2595 inode
->first_this_inode
= false;
2597 ret
= hash_check_key(trans
, NULL
, bch2_xattr_hash_desc
, hash_info
, iter
, k
);
2603 * Walk xattrs: verify that they all have a corresponding inode
2605 int bch2_check_xattrs(struct bch_fs
*c
)
2607 struct inode_walker inode
= inode_walker_init();
2608 struct bch_hash_info hash_info
;
2611 ret
= bch2_trans_run(c
,
2612 for_each_btree_key_commit(trans
, iter
, BTREE_ID_xattrs
,
2613 POS(BCACHEFS_ROOT_INO
, 0),
2614 BTREE_ITER_prefetch
|BTREE_ITER_all_snapshots
,
2617 BCH_TRANS_COMMIT_no_enospc
,
2618 check_xattr(trans
, &iter
, k
, &hash_info
, &inode
)));
2620 inode_walker_exit(&inode
);
2625 static int check_root_trans(struct btree_trans
*trans
)
2627 struct bch_fs
*c
= trans
->c
;
2628 struct bch_inode_unpacked root_inode
;
2633 ret
= subvol_lookup(trans
, BCACHEFS_ROOT_SUBVOL
, &snapshot
, &inum
);
2634 if (ret
&& !bch2_err_matches(ret
, ENOENT
))
2637 if (mustfix_fsck_err_on(ret
, trans
, root_subvol_missing
,
2638 "root subvol missing")) {
2639 struct bkey_i_subvolume
*root_subvol
=
2640 bch2_trans_kmalloc(trans
, sizeof(*root_subvol
));
2641 ret
= PTR_ERR_OR_ZERO(root_subvol
);
2646 inum
= BCACHEFS_ROOT_INO
;
2648 bkey_subvolume_init(&root_subvol
->k_i
);
2649 root_subvol
->k
.p
.offset
= BCACHEFS_ROOT_SUBVOL
;
2650 root_subvol
->v
.flags
= 0;
2651 root_subvol
->v
.snapshot
= cpu_to_le32(snapshot
);
2652 root_subvol
->v
.inode
= cpu_to_le64(inum
);
2653 ret
= bch2_btree_insert_trans(trans
, BTREE_ID_subvolumes
, &root_subvol
->k_i
, 0);
2654 bch_err_msg(c
, ret
, "writing root subvol");
2659 ret
= lookup_inode(trans
, BCACHEFS_ROOT_INO
, snapshot
, &root_inode
);
2660 if (ret
&& !bch2_err_matches(ret
, ENOENT
))
2663 if (mustfix_fsck_err_on(ret
,
2664 trans
, root_dir_missing
,
2665 "root directory missing") ||
2666 mustfix_fsck_err_on(!S_ISDIR(root_inode
.bi_mode
),
2667 trans
, root_inode_not_dir
,
2668 "root inode not a directory")) {
2669 bch2_inode_init(c
, &root_inode
, 0, 0, S_IFDIR
|0755,
2671 root_inode
.bi_inum
= inum
;
2672 root_inode
.bi_snapshot
= snapshot
;
2674 ret
= __bch2_fsck_write_inode(trans
, &root_inode
);
2675 bch_err_msg(c
, ret
, "writing root inode");
2682 /* Get root directory, create if it doesn't exist: */
2683 int bch2_check_root(struct bch_fs
*c
)
2685 int ret
= bch2_trans_commit_do(c
, NULL
, NULL
, BCH_TRANS_COMMIT_no_enospc
,
2686 check_root_trans(trans
));
2691 typedef DARRAY(u32
) darray_u32
;
2693 static bool darray_u32_has(darray_u32
*d
, u32 v
)
2695 darray_for_each(*d
, i
)
2701 static int check_subvol_path(struct btree_trans
*trans
, struct btree_iter
*iter
, struct bkey_s_c k
)
2703 struct bch_fs
*c
= trans
->c
;
2704 struct btree_iter parent_iter
= {};
2705 darray_u32 subvol_path
= {};
2706 struct printbuf buf
= PRINTBUF
;
2709 if (k
.k
->type
!= KEY_TYPE_subvolume
)
2712 while (k
.k
->p
.offset
!= BCACHEFS_ROOT_SUBVOL
) {
2713 ret
= darray_push(&subvol_path
, k
.k
->p
.offset
);
2717 struct bkey_s_c_subvolume s
= bkey_s_c_to_subvolume(k
);
2719 struct bch_inode_unpacked subvol_root
;
2720 ret
= bch2_inode_find_by_inum_trans(trans
,
2721 (subvol_inum
) { s
.k
->p
.offset
, le64_to_cpu(s
.v
->inode
) },
2726 u32 parent
= le32_to_cpu(s
.v
->fs_path_parent
);
2728 if (darray_u32_has(&subvol_path
, parent
)) {
2729 if (fsck_err(c
, subvol_loop
, "subvolume loop"))
2730 ret
= reattach_subvol(trans
, s
);
2734 bch2_trans_iter_exit(trans
, &parent_iter
);
2735 bch2_trans_iter_init(trans
, &parent_iter
,
2736 BTREE_ID_subvolumes
, POS(0, parent
), 0);
2737 k
= bch2_btree_iter_peek_slot(&parent_iter
);
2742 if (fsck_err_on(k
.k
->type
!= KEY_TYPE_subvolume
,
2743 trans
, subvol_unreachable
,
2744 "unreachable subvolume %s",
2745 (bch2_bkey_val_to_text(&buf
, c
, s
.s_c
),
2747 ret
= reattach_subvol(trans
, s
);
2753 printbuf_exit(&buf
);
2754 darray_exit(&subvol_path
);
2755 bch2_trans_iter_exit(trans
, &parent_iter
);
2759 int bch2_check_subvolume_structure(struct bch_fs
*c
)
2761 int ret
= bch2_trans_run(c
,
2762 for_each_btree_key_commit(trans
, iter
,
2763 BTREE_ID_subvolumes
, POS_MIN
, BTREE_ITER_prefetch
, k
,
2764 NULL
, NULL
, BCH_TRANS_COMMIT_no_enospc
,
2765 check_subvol_path(trans
, &iter
, k
)));
2770 struct pathbuf_entry
{
2775 typedef DARRAY(struct pathbuf_entry
) pathbuf
;
2777 static bool path_is_dup(pathbuf
*p
, u64 inum
, u32 snapshot
)
2779 darray_for_each(*p
, i
)
2780 if (i
->inum
== inum
&&
2781 i
->snapshot
== snapshot
)
2786 static int check_path(struct btree_trans
*trans
, pathbuf
*p
, struct bkey_s_c inode_k
)
2788 struct bch_fs
*c
= trans
->c
;
2789 struct btree_iter inode_iter
= {};
2790 struct bch_inode_unpacked inode
;
2791 struct printbuf buf
= PRINTBUF
;
2792 u32 snapshot
= inode_k
.k
->p
.snapshot
;
2797 BUG_ON(bch2_inode_unpack(inode_k
, &inode
));
2799 if (!S_ISDIR(inode
.bi_mode
))
2802 while (!inode
.bi_subvol
) {
2803 struct btree_iter dirent_iter
;
2804 struct bkey_s_c_dirent d
;
2805 u32 parent_snapshot
= snapshot
;
2807 d
= inode_get_dirent(trans
, &dirent_iter
, &inode
, &parent_snapshot
);
2808 ret
= bkey_err(d
.s_c
);
2809 if (ret
&& !bch2_err_matches(ret
, ENOENT
))
2812 if (!ret
&& (ret
= dirent_points_to_inode(c
, d
, &inode
)))
2813 bch2_trans_iter_exit(trans
, &dirent_iter
);
2815 if (bch2_err_matches(ret
, ENOENT
)) {
2816 printbuf_reset(&buf
);
2817 bch2_bkey_val_to_text(&buf
, c
, inode_k
);
2818 bch_err(c
, "unreachable inode in check_directory_structure: %s\n%s",
2819 bch2_err_str(ret
), buf
.buf
);
2823 bch2_trans_iter_exit(trans
, &dirent_iter
);
2825 ret
= darray_push(p
, ((struct pathbuf_entry
) {
2826 .inum
= inode
.bi_inum
,
2827 .snapshot
= snapshot
,
2832 snapshot
= parent_snapshot
;
2834 bch2_trans_iter_exit(trans
, &inode_iter
);
2835 inode_k
= bch2_bkey_get_iter(trans
, &inode_iter
, BTREE_ID_inodes
,
2836 SPOS(0, inode
.bi_dir
, snapshot
), 0);
2837 ret
= bkey_err(inode_k
) ?:
2838 !bkey_is_inode(inode_k
.k
) ? -BCH_ERR_ENOENT_inode
2839 : bch2_inode_unpack(inode_k
, &inode
);
2841 /* Should have been caught in dirents pass */
2842 bch_err_msg(c
, ret
, "error looking up parent directory");
2846 snapshot
= inode_k
.k
->p
.snapshot
;
2848 if (path_is_dup(p
, inode
.bi_inum
, snapshot
)) {
2849 /* XXX print path */
2850 bch_err(c
, "directory structure loop");
2852 darray_for_each(*p
, i
)
2853 pr_err("%llu:%u", i
->inum
, i
->snapshot
);
2854 pr_err("%llu:%u", inode
.bi_inum
, snapshot
);
2856 if (fsck_err(trans
, dir_loop
, "directory structure loop")) {
2857 ret
= remove_backpointer(trans
, &inode
);
2858 bch_err_msg(c
, ret
, "removing dirent");
2862 ret
= reattach_inode(trans
, &inode
);
2863 bch_err_msg(c
, ret
, "reattaching inode %llu", inode
.bi_inum
);
2870 bch2_trans_iter_exit(trans
, &inode_iter
);
2871 printbuf_exit(&buf
);
2877 * Check for loops in the directory structure: all other connectivity issues
2878 * have been fixed by prior passes
2880 int bch2_check_directory_structure(struct bch_fs
*c
)
2882 pathbuf path
= { 0, };
2885 ret
= bch2_trans_run(c
,
2886 for_each_btree_key_commit(trans
, iter
, BTREE_ID_inodes
, POS_MIN
,
2888 BTREE_ITER_prefetch
|
2889 BTREE_ITER_all_snapshots
, k
,
2890 NULL
, NULL
, BCH_TRANS_COMMIT_no_enospc
, ({
2891 if (!bkey_is_inode(k
.k
))
2894 if (bch2_inode_flags(k
) & BCH_INODE_unlinked
)
2897 check_path(trans
, &path
, k
);
2905 struct nlink_table
{
2916 static int add_nlink(struct bch_fs
*c
, struct nlink_table
*t
,
2917 u64 inum
, u32 snapshot
)
2919 if (t
->nr
== t
->size
) {
2920 size_t new_size
= max_t(size_t, 128UL, t
->size
* 2);
2921 void *d
= kvmalloc_array(new_size
, sizeof(t
->d
[0]), GFP_KERNEL
);
2924 bch_err(c
, "fsck: error allocating memory for nlink_table, size %zu",
2926 return -BCH_ERR_ENOMEM_fsck_add_nlink
;
2930 memcpy(d
, t
->d
, t
->size
* sizeof(t
->d
[0]));
2938 t
->d
[t
->nr
++] = (struct nlink
) {
2940 .snapshot
= snapshot
,
2946 static int nlink_cmp(const void *_l
, const void *_r
)
2948 const struct nlink
*l
= _l
;
2949 const struct nlink
*r
= _r
;
2951 return cmp_int(l
->inum
, r
->inum
);
2954 static void inc_link(struct bch_fs
*c
, struct snapshots_seen
*s
,
2955 struct nlink_table
*links
,
2956 u64 range_start
, u64 range_end
, u64 inum
, u32 snapshot
)
2958 struct nlink
*link
, key
= {
2959 .inum
= inum
, .snapshot
= U32_MAX
,
2962 if (inum
< range_start
|| inum
>= range_end
)
2965 link
= __inline_bsearch(&key
, links
->d
, links
->nr
,
2966 sizeof(links
->d
[0]), nlink_cmp
);
2970 while (link
> links
->d
&& link
[0].inum
== link
[-1].inum
)
2973 for (; link
< links
->d
+ links
->nr
&& link
->inum
== inum
; link
++)
2974 if (ref_visible(c
, s
, snapshot
, link
->snapshot
)) {
2976 if (link
->snapshot
>= snapshot
)
2982 static int check_nlinks_find_hardlinks(struct bch_fs
*c
,
2983 struct nlink_table
*t
,
2984 u64 start
, u64
*end
)
2986 int ret
= bch2_trans_run(c
,
2987 for_each_btree_key(trans
, iter
, BTREE_ID_inodes
,
2990 BTREE_ITER_prefetch
|
2991 BTREE_ITER_all_snapshots
, k
, ({
2992 if (!bkey_is_inode(k
.k
))
2995 /* Should never fail, checked by bch2_inode_invalid: */
2996 struct bch_inode_unpacked u
;
2997 BUG_ON(bch2_inode_unpack(k
, &u
));
3000 * Backpointer and directory structure checks are sufficient for
3001 * directories, since they can't have hardlinks:
3003 if (S_ISDIR(u
.bi_mode
))
3007 * Previous passes ensured that bi_nlink is nonzero if
3008 * it had multiple hardlinks:
3013 ret
= add_nlink(c
, t
, k
.k
->p
.offset
, k
.k
->p
.snapshot
);
3015 *end
= k
.k
->p
.offset
;
3027 static int check_nlinks_walk_dirents(struct bch_fs
*c
, struct nlink_table
*links
,
3028 u64 range_start
, u64 range_end
)
3030 struct snapshots_seen s
;
3032 snapshots_seen_init(&s
);
3034 int ret
= bch2_trans_run(c
,
3035 for_each_btree_key(trans
, iter
, BTREE_ID_dirents
, POS_MIN
,
3037 BTREE_ITER_prefetch
|
3038 BTREE_ITER_all_snapshots
, k
, ({
3039 ret
= snapshots_seen_update(c
, &s
, iter
.btree_id
, k
.k
->p
);
3043 if (k
.k
->type
== KEY_TYPE_dirent
) {
3044 struct bkey_s_c_dirent d
= bkey_s_c_to_dirent(k
);
3046 if (d
.v
->d_type
!= DT_DIR
&&
3047 d
.v
->d_type
!= DT_SUBVOL
)
3048 inc_link(c
, &s
, links
, range_start
, range_end
,
3049 le64_to_cpu(d
.v
->d_inum
), d
.k
->p
.snapshot
);
3054 snapshots_seen_exit(&s
);
3060 static int check_nlinks_update_inode(struct btree_trans
*trans
, struct btree_iter
*iter
,
3062 struct nlink_table
*links
,
3063 size_t *idx
, u64 range_end
)
3065 struct bch_inode_unpacked u
;
3066 struct nlink
*link
= &links
->d
[*idx
];
3069 if (k
.k
->p
.offset
>= range_end
)
3072 if (!bkey_is_inode(k
.k
))
3075 BUG_ON(bch2_inode_unpack(k
, &u
));
3077 if (S_ISDIR(u
.bi_mode
))
3083 while ((cmp_int(link
->inum
, k
.k
->p
.offset
) ?:
3084 cmp_int(link
->snapshot
, k
.k
->p
.snapshot
)) < 0) {
3085 BUG_ON(*idx
== links
->nr
);
3086 link
= &links
->d
[++*idx
];
3089 if (fsck_err_on(bch2_inode_nlink_get(&u
) != link
->count
,
3090 trans
, inode_wrong_nlink
,
3091 "inode %llu type %s has wrong i_nlink (%u, should be %u)",
3092 u
.bi_inum
, bch2_d_types
[mode_to_type(u
.bi_mode
)],
3093 bch2_inode_nlink_get(&u
), link
->count
)) {
3094 bch2_inode_nlink_set(&u
, link
->count
);
3095 ret
= __bch2_fsck_write_inode(trans
, &u
);
3102 static int check_nlinks_update_hardlinks(struct bch_fs
*c
,
3103 struct nlink_table
*links
,
3104 u64 range_start
, u64 range_end
)
3108 int ret
= bch2_trans_run(c
,
3109 for_each_btree_key_commit(trans
, iter
, BTREE_ID_inodes
,
3110 POS(0, range_start
),
3111 BTREE_ITER_intent
|BTREE_ITER_prefetch
|BTREE_ITER_all_snapshots
, k
,
3112 NULL
, NULL
, BCH_TRANS_COMMIT_no_enospc
,
3113 check_nlinks_update_inode(trans
, &iter
, k
, links
, &idx
, range_end
)));
3115 bch_err(c
, "error in fsck walking inodes: %s", bch2_err_str(ret
));
3122 int bch2_check_nlinks(struct bch_fs
*c
)
3124 struct nlink_table links
= { 0 };
3125 u64 this_iter_range_start
, next_iter_range_start
= 0;
3129 this_iter_range_start
= next_iter_range_start
;
3130 next_iter_range_start
= U64_MAX
;
3132 ret
= check_nlinks_find_hardlinks(c
, &links
,
3133 this_iter_range_start
,
3134 &next_iter_range_start
);
3136 ret
= check_nlinks_walk_dirents(c
, &links
,
3137 this_iter_range_start
,
3138 next_iter_range_start
);
3142 ret
= check_nlinks_update_hardlinks(c
, &links
,
3143 this_iter_range_start
,
3144 next_iter_range_start
);
3149 } while (next_iter_range_start
!= U64_MAX
);
3156 static int fix_reflink_p_key(struct btree_trans
*trans
, struct btree_iter
*iter
,
3159 struct bkey_s_c_reflink_p p
;
3160 struct bkey_i_reflink_p
*u
;
3162 if (k
.k
->type
!= KEY_TYPE_reflink_p
)
3165 p
= bkey_s_c_to_reflink_p(k
);
3167 if (!p
.v
->front_pad
&& !p
.v
->back_pad
)
3170 u
= bch2_trans_kmalloc(trans
, sizeof(*u
));
3171 int ret
= PTR_ERR_OR_ZERO(u
);
3175 bkey_reassemble(&u
->k_i
, k
);
3179 return bch2_trans_update(trans
, iter
, &u
->k_i
, BTREE_TRIGGER_norun
);
3182 int bch2_fix_reflink_p(struct bch_fs
*c
)
3184 if (c
->sb
.version
>= bcachefs_metadata_version_reflink_p_fix
)
3187 int ret
= bch2_trans_run(c
,
3188 for_each_btree_key_commit(trans
, iter
,
3189 BTREE_ID_extents
, POS_MIN
,
3190 BTREE_ITER_intent
|BTREE_ITER_prefetch
|
3191 BTREE_ITER_all_snapshots
, k
,
3192 NULL
, NULL
, BCH_TRANS_COMMIT_no_enospc
,
3193 fix_reflink_p_key(trans
, &iter
, k
)));