2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
46 #include "check/mode-common.h"
47 #include "check/mode-original.h"
48 #include "check/mode-lowmem.h"
54 TASK_NOTHING
, /* have to be the last element */
59 enum task_position tp
;
61 struct task_info
*info
;
65 u64 total_csum_bytes
= 0;
66 u64 total_btree_bytes
= 0;
67 u64 total_fs_tree_bytes
= 0;
68 u64 total_extent_tree_bytes
= 0;
69 u64 btree_space_waste
= 0;
70 u64 data_bytes_allocated
= 0;
71 u64 data_bytes_referenced
= 0;
72 LIST_HEAD(duplicate_extents
);
73 LIST_HEAD(delete_items
);
75 int init_extent_tree
= 0;
76 int check_data_csum
= 0;
77 struct btrfs_fs_info
*global_info
;
78 struct task_ctx ctx
= { 0 };
79 struct cache_tree
*roots_info_cache
= NULL
;
81 enum btrfs_check_mode
{
85 CHECK_MODE_DEFAULT
= CHECK_MODE_ORIGINAL
88 static enum btrfs_check_mode check_mode
= CHECK_MODE_DEFAULT
;
90 static int compare_data_backref(struct rb_node
*node1
, struct rb_node
*node2
)
92 struct extent_backref
*ext1
= rb_node_to_extent_backref(node1
);
93 struct extent_backref
*ext2
= rb_node_to_extent_backref(node2
);
94 struct data_backref
*back1
= to_data_backref(ext1
);
95 struct data_backref
*back2
= to_data_backref(ext2
);
97 WARN_ON(!ext1
->is_data
);
98 WARN_ON(!ext2
->is_data
);
100 /* parent and root are a union, so this covers both */
101 if (back1
->parent
> back2
->parent
)
103 if (back1
->parent
< back2
->parent
)
106 /* This is a full backref and the parents match. */
107 if (back1
->node
.full_backref
)
110 if (back1
->owner
> back2
->owner
)
112 if (back1
->owner
< back2
->owner
)
115 if (back1
->offset
> back2
->offset
)
117 if (back1
->offset
< back2
->offset
)
120 if (back1
->found_ref
&& back2
->found_ref
) {
121 if (back1
->disk_bytenr
> back2
->disk_bytenr
)
123 if (back1
->disk_bytenr
< back2
->disk_bytenr
)
126 if (back1
->bytes
> back2
->bytes
)
128 if (back1
->bytes
< back2
->bytes
)
135 static int compare_tree_backref(struct rb_node
*node1
, struct rb_node
*node2
)
137 struct extent_backref
*ext1
= rb_node_to_extent_backref(node1
);
138 struct extent_backref
*ext2
= rb_node_to_extent_backref(node2
);
139 struct tree_backref
*back1
= to_tree_backref(ext1
);
140 struct tree_backref
*back2
= to_tree_backref(ext2
);
142 WARN_ON(ext1
->is_data
);
143 WARN_ON(ext2
->is_data
);
145 /* parent and root are a union, so this covers both */
146 if (back1
->parent
> back2
->parent
)
148 if (back1
->parent
< back2
->parent
)
154 static int compare_extent_backref(struct rb_node
*node1
, struct rb_node
*node2
)
156 struct extent_backref
*ext1
= rb_node_to_extent_backref(node1
);
157 struct extent_backref
*ext2
= rb_node_to_extent_backref(node2
);
159 if (ext1
->is_data
> ext2
->is_data
)
162 if (ext1
->is_data
< ext2
->is_data
)
165 if (ext1
->full_backref
> ext2
->full_backref
)
167 if (ext1
->full_backref
< ext2
->full_backref
)
171 return compare_data_backref(node1
, node2
);
173 return compare_tree_backref(node1
, node2
);
177 static void *print_status_check(void *p
)
179 struct task_ctx
*priv
= p
;
180 const char work_indicator
[] = { '.', 'o', 'O', 'o' };
182 static char *task_position_string
[] = {
184 "checking free space cache",
188 task_period_start(priv
->info
, 1000 /* 1s */);
190 if (priv
->tp
== TASK_NOTHING
)
194 printf("%s [%c]\r", task_position_string
[priv
->tp
],
195 work_indicator
[count
% 4]);
198 task_period_wait(priv
->info
);
203 static int print_status_return(void *p
)
211 static enum btrfs_check_mode
parse_check_mode(const char *str
)
213 if (strcmp(str
, "lowmem") == 0)
214 return CHECK_MODE_LOWMEM
;
215 if (strcmp(str
, "orig") == 0)
216 return CHECK_MODE_ORIGINAL
;
217 if (strcmp(str
, "original") == 0)
218 return CHECK_MODE_ORIGINAL
;
220 return CHECK_MODE_UNKNOWN
;
223 /* Compatible function to allow reuse of old codes */
224 static u64
first_extent_gap(struct rb_root
*holes
)
226 struct file_extent_hole
*hole
;
228 if (RB_EMPTY_ROOT(holes
))
231 hole
= rb_entry(rb_first(holes
), struct file_extent_hole
, node
);
235 static int compare_hole(struct rb_node
*node1
, struct rb_node
*node2
)
237 struct file_extent_hole
*hole1
;
238 struct file_extent_hole
*hole2
;
240 hole1
= rb_entry(node1
, struct file_extent_hole
, node
);
241 hole2
= rb_entry(node2
, struct file_extent_hole
, node
);
243 if (hole1
->start
> hole2
->start
)
245 if (hole1
->start
< hole2
->start
)
247 /* Now hole1->start == hole2->start */
248 if (hole1
->len
>= hole2
->len
)
250 * Hole 1 will be merge center
251 * Same hole will be merged later
254 /* Hole 2 will be merge center */
259 * Add a hole to the record
261 * This will do hole merge for copy_file_extent_holes(),
262 * which will ensure there won't be continuous holes.
264 static int add_file_extent_hole(struct rb_root
*holes
,
267 struct file_extent_hole
*hole
;
268 struct file_extent_hole
*prev
= NULL
;
269 struct file_extent_hole
*next
= NULL
;
271 hole
= malloc(sizeof(*hole
));
276 /* Since compare will not return 0, no -EEXIST will happen */
277 rb_insert(holes
, &hole
->node
, compare_hole
);
279 /* simple merge with previous hole */
280 if (rb_prev(&hole
->node
))
281 prev
= rb_entry(rb_prev(&hole
->node
), struct file_extent_hole
,
283 if (prev
&& prev
->start
+ prev
->len
>= hole
->start
) {
284 hole
->len
= hole
->start
+ hole
->len
- prev
->start
;
285 hole
->start
= prev
->start
;
286 rb_erase(&prev
->node
, holes
);
291 /* iterate merge with next holes */
293 if (!rb_next(&hole
->node
))
295 next
= rb_entry(rb_next(&hole
->node
), struct file_extent_hole
,
297 if (hole
->start
+ hole
->len
>= next
->start
) {
298 if (hole
->start
+ hole
->len
<= next
->start
+ next
->len
)
299 hole
->len
= next
->start
+ next
->len
-
301 rb_erase(&next
->node
, holes
);
310 static int compare_hole_range(struct rb_node
*node
, void *data
)
312 struct file_extent_hole
*hole
;
315 hole
= (struct file_extent_hole
*)data
;
318 hole
= rb_entry(node
, struct file_extent_hole
, node
);
319 if (start
< hole
->start
)
321 if (start
>= hole
->start
&& start
< hole
->start
+ hole
->len
)
327 * Delete a hole in the record
329 * This will do the hole split and is much restrict than add.
331 static int del_file_extent_hole(struct rb_root
*holes
,
334 struct file_extent_hole
*hole
;
335 struct file_extent_hole tmp
;
340 struct rb_node
*node
;
347 node
= rb_search(holes
, &tmp
, compare_hole_range
, NULL
);
350 hole
= rb_entry(node
, struct file_extent_hole
, node
);
351 if (start
+ len
> hole
->start
+ hole
->len
)
355 * Now there will be no overlap, delete the hole and re-add the
356 * split(s) if they exists.
358 if (start
> hole
->start
) {
359 prev_start
= hole
->start
;
360 prev_len
= start
- hole
->start
;
363 if (hole
->start
+ hole
->len
> start
+ len
) {
364 next_start
= start
+ len
;
365 next_len
= hole
->start
+ hole
->len
- start
- len
;
368 rb_erase(node
, holes
);
371 ret
= add_file_extent_hole(holes
, prev_start
, prev_len
);
376 ret
= add_file_extent_hole(holes
, next_start
, next_len
);
383 static int copy_file_extent_holes(struct rb_root
*dst
,
386 struct file_extent_hole
*hole
;
387 struct rb_node
*node
;
390 node
= rb_first(src
);
392 hole
= rb_entry(node
, struct file_extent_hole
, node
);
393 ret
= add_file_extent_hole(dst
, hole
->start
, hole
->len
);
396 node
= rb_next(node
);
401 static void free_file_extent_holes(struct rb_root
*holes
)
403 struct rb_node
*node
;
404 struct file_extent_hole
*hole
;
406 node
= rb_first(holes
);
408 hole
= rb_entry(node
, struct file_extent_hole
, node
);
409 rb_erase(node
, holes
);
411 node
= rb_first(holes
);
415 static void record_root_in_trans(struct btrfs_trans_handle
*trans
,
416 struct btrfs_root
*root
)
418 if (root
->last_trans
!= trans
->transid
) {
419 root
->track_dirty
= 1;
420 root
->last_trans
= trans
->transid
;
421 root
->commit_root
= root
->node
;
422 extent_buffer_get(root
->node
);
426 static int device_record_compare(struct rb_node
*node1
, struct rb_node
*node2
)
428 struct device_record
*rec1
;
429 struct device_record
*rec2
;
431 rec1
= rb_entry(node1
, struct device_record
, node
);
432 rec2
= rb_entry(node2
, struct device_record
, node
);
433 if (rec1
->devid
> rec2
->devid
)
435 else if (rec1
->devid
< rec2
->devid
)
441 static struct inode_record
*clone_inode_rec(struct inode_record
*orig_rec
)
443 struct inode_record
*rec
;
444 struct inode_backref
*backref
;
445 struct inode_backref
*orig
;
446 struct inode_backref
*tmp
;
447 struct orphan_data_extent
*src_orphan
;
448 struct orphan_data_extent
*dst_orphan
;
453 rec
= malloc(sizeof(*rec
));
455 return ERR_PTR(-ENOMEM
);
456 memcpy(rec
, orig_rec
, sizeof(*rec
));
458 INIT_LIST_HEAD(&rec
->backrefs
);
459 INIT_LIST_HEAD(&rec
->orphan_extents
);
460 rec
->holes
= RB_ROOT
;
462 list_for_each_entry(orig
, &orig_rec
->backrefs
, list
) {
463 size
= sizeof(*orig
) + orig
->namelen
+ 1;
464 backref
= malloc(size
);
469 memcpy(backref
, orig
, size
);
470 list_add_tail(&backref
->list
, &rec
->backrefs
);
472 list_for_each_entry(src_orphan
, &orig_rec
->orphan_extents
, list
) {
473 dst_orphan
= malloc(sizeof(*dst_orphan
));
478 memcpy(dst_orphan
, src_orphan
, sizeof(*src_orphan
));
479 list_add_tail(&dst_orphan
->list
, &rec
->orphan_extents
);
481 ret
= copy_file_extent_holes(&rec
->holes
, &orig_rec
->holes
);
488 rb
= rb_first(&rec
->holes
);
490 struct file_extent_hole
*hole
;
492 hole
= rb_entry(rb
, struct file_extent_hole
, node
);
498 if (!list_empty(&rec
->backrefs
))
499 list_for_each_entry_safe(orig
, tmp
, &rec
->backrefs
, list
) {
500 list_del(&orig
->list
);
504 if (!list_empty(&rec
->orphan_extents
))
505 list_for_each_entry_safe(orig
, tmp
, &rec
->orphan_extents
, list
) {
506 list_del(&orig
->list
);
515 static void print_orphan_data_extents(struct list_head
*orphan_extents
,
518 struct orphan_data_extent
*orphan
;
520 if (list_empty(orphan_extents
))
522 printf("The following data extent is lost in tree %llu:\n",
524 list_for_each_entry(orphan
, orphan_extents
, list
) {
525 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
526 orphan
->objectid
, orphan
->offset
, orphan
->disk_bytenr
,
531 static void print_inode_error(struct btrfs_root
*root
, struct inode_record
*rec
)
533 u64 root_objectid
= root
->root_key
.objectid
;
534 int errors
= rec
->errors
;
538 /* reloc root errors, we print its corresponding fs root objectid*/
539 if (root_objectid
== BTRFS_TREE_RELOC_OBJECTID
) {
540 root_objectid
= root
->root_key
.offset
;
541 fprintf(stderr
, "reloc");
543 fprintf(stderr
, "root %llu inode %llu errors %x",
544 (unsigned long long) root_objectid
,
545 (unsigned long long) rec
->ino
, rec
->errors
);
547 if (errors
& I_ERR_NO_INODE_ITEM
)
548 fprintf(stderr
, ", no inode item");
549 if (errors
& I_ERR_NO_ORPHAN_ITEM
)
550 fprintf(stderr
, ", no orphan item");
551 if (errors
& I_ERR_DUP_INODE_ITEM
)
552 fprintf(stderr
, ", dup inode item");
553 if (errors
& I_ERR_DUP_DIR_INDEX
)
554 fprintf(stderr
, ", dup dir index");
555 if (errors
& I_ERR_ODD_DIR_ITEM
)
556 fprintf(stderr
, ", odd dir item");
557 if (errors
& I_ERR_ODD_FILE_EXTENT
)
558 fprintf(stderr
, ", odd file extent");
559 if (errors
& I_ERR_BAD_FILE_EXTENT
)
560 fprintf(stderr
, ", bad file extent");
561 if (errors
& I_ERR_FILE_EXTENT_OVERLAP
)
562 fprintf(stderr
, ", file extent overlap");
563 if (errors
& I_ERR_FILE_EXTENT_TOO_LARGE
)
564 fprintf(stderr
, ", inline file extent too large");
565 if (errors
& I_ERR_FILE_EXTENT_DISCOUNT
)
566 fprintf(stderr
, ", file extent discount");
567 if (errors
& I_ERR_DIR_ISIZE_WRONG
)
568 fprintf(stderr
, ", dir isize wrong");
569 if (errors
& I_ERR_FILE_NBYTES_WRONG
)
570 fprintf(stderr
, ", nbytes wrong");
571 if (errors
& I_ERR_ODD_CSUM_ITEM
)
572 fprintf(stderr
, ", odd csum item");
573 if (errors
& I_ERR_SOME_CSUM_MISSING
)
574 fprintf(stderr
, ", some csum missing");
575 if (errors
& I_ERR_LINK_COUNT_WRONG
)
576 fprintf(stderr
, ", link count wrong");
577 if (errors
& I_ERR_FILE_EXTENT_ORPHAN
)
578 fprintf(stderr
, ", orphan file extent");
579 if (errors
& I_ERR_ODD_INODE_FLAGS
)
580 fprintf(stderr
, ", odd inode flags");
581 if (errors
& I_ERR_INLINE_RAM_BYTES_WRONG
)
582 fprintf(stderr
, ", invalid inline ram bytes");
583 fprintf(stderr
, "\n");
584 /* Print the orphan extents if needed */
585 if (errors
& I_ERR_FILE_EXTENT_ORPHAN
)
586 print_orphan_data_extents(&rec
->orphan_extents
, root
->objectid
);
588 /* Print the holes if needed */
589 if (errors
& I_ERR_FILE_EXTENT_DISCOUNT
) {
590 struct file_extent_hole
*hole
;
591 struct rb_node
*node
;
594 node
= rb_first(&rec
->holes
);
595 fprintf(stderr
, "Found file extent holes:\n");
598 hole
= rb_entry(node
, struct file_extent_hole
, node
);
599 fprintf(stderr
, "\tstart: %llu, len: %llu\n",
600 hole
->start
, hole
->len
);
601 node
= rb_next(node
);
604 fprintf(stderr
, "\tstart: 0, len: %llu\n",
606 root
->fs_info
->sectorsize
));
610 static void print_ref_error(int errors
)
612 if (errors
& REF_ERR_NO_DIR_ITEM
)
613 fprintf(stderr
, ", no dir item");
614 if (errors
& REF_ERR_NO_DIR_INDEX
)
615 fprintf(stderr
, ", no dir index");
616 if (errors
& REF_ERR_NO_INODE_REF
)
617 fprintf(stderr
, ", no inode ref");
618 if (errors
& REF_ERR_DUP_DIR_ITEM
)
619 fprintf(stderr
, ", dup dir item");
620 if (errors
& REF_ERR_DUP_DIR_INDEX
)
621 fprintf(stderr
, ", dup dir index");
622 if (errors
& REF_ERR_DUP_INODE_REF
)
623 fprintf(stderr
, ", dup inode ref");
624 if (errors
& REF_ERR_INDEX_UNMATCH
)
625 fprintf(stderr
, ", index mismatch");
626 if (errors
& REF_ERR_FILETYPE_UNMATCH
)
627 fprintf(stderr
, ", filetype mismatch");
628 if (errors
& REF_ERR_NAME_TOO_LONG
)
629 fprintf(stderr
, ", name too long");
630 if (errors
& REF_ERR_NO_ROOT_REF
)
631 fprintf(stderr
, ", no root ref");
632 if (errors
& REF_ERR_NO_ROOT_BACKREF
)
633 fprintf(stderr
, ", no root backref");
634 if (errors
& REF_ERR_DUP_ROOT_REF
)
635 fprintf(stderr
, ", dup root ref");
636 if (errors
& REF_ERR_DUP_ROOT_BACKREF
)
637 fprintf(stderr
, ", dup root backref");
638 fprintf(stderr
, "\n");
641 static struct inode_record
*get_inode_rec(struct cache_tree
*inode_cache
,
644 struct ptr_node
*node
;
645 struct cache_extent
*cache
;
646 struct inode_record
*rec
= NULL
;
649 cache
= lookup_cache_extent(inode_cache
, ino
, 1);
651 node
= container_of(cache
, struct ptr_node
, cache
);
653 if (mod
&& rec
->refs
> 1) {
654 node
->data
= clone_inode_rec(rec
);
655 if (IS_ERR(node
->data
))
661 rec
= calloc(1, sizeof(*rec
));
663 return ERR_PTR(-ENOMEM
);
665 rec
->extent_start
= (u64
)-1;
667 INIT_LIST_HEAD(&rec
->backrefs
);
668 INIT_LIST_HEAD(&rec
->orphan_extents
);
669 rec
->holes
= RB_ROOT
;
671 node
= malloc(sizeof(*node
));
674 return ERR_PTR(-ENOMEM
);
676 node
->cache
.start
= ino
;
677 node
->cache
.size
= 1;
680 if (ino
== BTRFS_FREE_INO_OBJECTID
)
683 ret
= insert_cache_extent(inode_cache
, &node
->cache
);
685 return ERR_PTR(-EEXIST
);
690 static void free_orphan_data_extents(struct list_head
*orphan_extents
)
692 struct orphan_data_extent
*orphan
;
694 while (!list_empty(orphan_extents
)) {
695 orphan
= list_entry(orphan_extents
->next
,
696 struct orphan_data_extent
, list
);
697 list_del(&orphan
->list
);
702 static void free_inode_rec(struct inode_record
*rec
)
704 struct inode_backref
*backref
;
709 while (!list_empty(&rec
->backrefs
)) {
710 backref
= to_inode_backref(rec
->backrefs
.next
);
711 list_del(&backref
->list
);
714 free_orphan_data_extents(&rec
->orphan_extents
);
715 free_file_extent_holes(&rec
->holes
);
719 static int can_free_inode_rec(struct inode_record
*rec
)
721 if (!rec
->errors
&& rec
->checked
&& rec
->found_inode_item
&&
722 rec
->nlink
== rec
->found_link
&& list_empty(&rec
->backrefs
))
727 static void maybe_free_inode_rec(struct cache_tree
*inode_cache
,
728 struct inode_record
*rec
)
730 struct cache_extent
*cache
;
731 struct inode_backref
*tmp
, *backref
;
732 struct ptr_node
*node
;
735 if (!rec
->found_inode_item
)
738 filetype
= imode_to_type(rec
->imode
);
739 list_for_each_entry_safe(backref
, tmp
, &rec
->backrefs
, list
) {
740 if (backref
->found_dir_item
&& backref
->found_dir_index
) {
741 if (backref
->filetype
!= filetype
)
742 backref
->errors
|= REF_ERR_FILETYPE_UNMATCH
;
743 if (!backref
->errors
&& backref
->found_inode_ref
&&
744 rec
->nlink
== rec
->found_link
) {
745 list_del(&backref
->list
);
751 if (!rec
->checked
|| rec
->merging
)
754 if (S_ISDIR(rec
->imode
)) {
755 if (rec
->found_size
!= rec
->isize
)
756 rec
->errors
|= I_ERR_DIR_ISIZE_WRONG
;
757 if (rec
->found_file_extent
)
758 rec
->errors
|= I_ERR_ODD_FILE_EXTENT
;
759 } else if (S_ISREG(rec
->imode
) || S_ISLNK(rec
->imode
)) {
760 if (rec
->found_dir_item
)
761 rec
->errors
|= I_ERR_ODD_DIR_ITEM
;
762 if (rec
->found_size
!= rec
->nbytes
)
763 rec
->errors
|= I_ERR_FILE_NBYTES_WRONG
;
764 if (rec
->nlink
> 0 && !no_holes
&&
765 (rec
->extent_end
< rec
->isize
||
766 first_extent_gap(&rec
->holes
) < rec
->isize
))
767 rec
->errors
|= I_ERR_FILE_EXTENT_DISCOUNT
;
770 if (S_ISREG(rec
->imode
) || S_ISLNK(rec
->imode
)) {
771 if (rec
->found_csum_item
&& rec
->nodatasum
)
772 rec
->errors
|= I_ERR_ODD_CSUM_ITEM
;
773 if (rec
->some_csum_missing
&& !rec
->nodatasum
)
774 rec
->errors
|= I_ERR_SOME_CSUM_MISSING
;
777 BUG_ON(rec
->refs
!= 1);
778 if (can_free_inode_rec(rec
)) {
779 cache
= lookup_cache_extent(inode_cache
, rec
->ino
, 1);
780 node
= container_of(cache
, struct ptr_node
, cache
);
781 BUG_ON(node
->data
!= rec
);
782 remove_cache_extent(inode_cache
, &node
->cache
);
788 static int check_orphan_item(struct btrfs_root
*root
, u64 ino
)
790 struct btrfs_path path
;
791 struct btrfs_key key
;
794 key
.objectid
= BTRFS_ORPHAN_OBJECTID
;
795 key
.type
= BTRFS_ORPHAN_ITEM_KEY
;
798 btrfs_init_path(&path
);
799 ret
= btrfs_search_slot(NULL
, root
, &key
, &path
, 0, 0);
800 btrfs_release_path(&path
);
806 static int process_inode_item(struct extent_buffer
*eb
,
807 int slot
, struct btrfs_key
*key
,
808 struct shared_node
*active_node
)
810 struct inode_record
*rec
;
811 struct btrfs_inode_item
*item
;
814 rec
= active_node
->current
;
815 BUG_ON(rec
->ino
!= key
->objectid
|| rec
->refs
> 1);
816 if (rec
->found_inode_item
) {
817 rec
->errors
|= I_ERR_DUP_INODE_ITEM
;
820 item
= btrfs_item_ptr(eb
, slot
, struct btrfs_inode_item
);
821 rec
->nlink
= btrfs_inode_nlink(eb
, item
);
822 rec
->isize
= btrfs_inode_size(eb
, item
);
823 rec
->nbytes
= btrfs_inode_nbytes(eb
, item
);
824 rec
->imode
= btrfs_inode_mode(eb
, item
);
825 if (btrfs_inode_flags(eb
, item
) & BTRFS_INODE_NODATASUM
)
827 rec
->found_inode_item
= 1;
829 rec
->errors
|= I_ERR_NO_ORPHAN_ITEM
;
830 flags
= btrfs_inode_flags(eb
, item
);
831 if (S_ISLNK(rec
->imode
) &&
832 flags
& (BTRFS_INODE_IMMUTABLE
| BTRFS_INODE_APPEND
))
833 rec
->errors
|= I_ERR_ODD_INODE_FLAGS
;
834 maybe_free_inode_rec(&active_node
->inode_cache
, rec
);
838 static struct inode_backref
*get_inode_backref(struct inode_record
*rec
,
840 int namelen
, u64 dir
)
842 struct inode_backref
*backref
;
844 list_for_each_entry(backref
, &rec
->backrefs
, list
) {
845 if (rec
->ino
== BTRFS_MULTIPLE_OBJECTIDS
)
847 if (backref
->dir
!= dir
|| backref
->namelen
!= namelen
)
849 if (memcmp(name
, backref
->name
, namelen
))
854 backref
= malloc(sizeof(*backref
) + namelen
+ 1);
857 memset(backref
, 0, sizeof(*backref
));
859 backref
->namelen
= namelen
;
860 memcpy(backref
->name
, name
, namelen
);
861 backref
->name
[namelen
] = '\0';
862 list_add_tail(&backref
->list
, &rec
->backrefs
);
866 static int add_inode_backref(struct cache_tree
*inode_cache
,
867 u64 ino
, u64 dir
, u64 index
,
868 const char *name
, int namelen
,
869 u8 filetype
, u8 itemtype
, int errors
)
871 struct inode_record
*rec
;
872 struct inode_backref
*backref
;
874 rec
= get_inode_rec(inode_cache
, ino
, 1);
876 backref
= get_inode_backref(rec
, name
, namelen
, dir
);
879 backref
->errors
|= errors
;
880 if (itemtype
== BTRFS_DIR_INDEX_KEY
) {
881 if (backref
->found_dir_index
)
882 backref
->errors
|= REF_ERR_DUP_DIR_INDEX
;
883 if (backref
->found_inode_ref
&& backref
->index
!= index
)
884 backref
->errors
|= REF_ERR_INDEX_UNMATCH
;
885 if (backref
->found_dir_item
&& backref
->filetype
!= filetype
)
886 backref
->errors
|= REF_ERR_FILETYPE_UNMATCH
;
888 backref
->index
= index
;
889 backref
->filetype
= filetype
;
890 backref
->found_dir_index
= 1;
891 } else if (itemtype
== BTRFS_DIR_ITEM_KEY
) {
893 if (backref
->found_dir_item
)
894 backref
->errors
|= REF_ERR_DUP_DIR_ITEM
;
895 if (backref
->found_dir_index
&& backref
->filetype
!= filetype
)
896 backref
->errors
|= REF_ERR_FILETYPE_UNMATCH
;
898 backref
->filetype
= filetype
;
899 backref
->found_dir_item
= 1;
900 } else if ((itemtype
== BTRFS_INODE_REF_KEY
) ||
901 (itemtype
== BTRFS_INODE_EXTREF_KEY
)) {
902 if (backref
->found_inode_ref
)
903 backref
->errors
|= REF_ERR_DUP_INODE_REF
;
904 if (backref
->found_dir_index
&& backref
->index
!= index
)
905 backref
->errors
|= REF_ERR_INDEX_UNMATCH
;
907 backref
->index
= index
;
909 backref
->ref_type
= itemtype
;
910 backref
->found_inode_ref
= 1;
915 maybe_free_inode_rec(inode_cache
, rec
);
919 static int merge_inode_recs(struct inode_record
*src
, struct inode_record
*dst
,
920 struct cache_tree
*dst_cache
)
922 struct inode_backref
*backref
;
927 list_for_each_entry(backref
, &src
->backrefs
, list
) {
928 if (backref
->found_dir_index
) {
929 add_inode_backref(dst_cache
, dst
->ino
, backref
->dir
,
930 backref
->index
, backref
->name
,
931 backref
->namelen
, backref
->filetype
,
932 BTRFS_DIR_INDEX_KEY
, backref
->errors
);
934 if (backref
->found_dir_item
) {
936 add_inode_backref(dst_cache
, dst
->ino
,
937 backref
->dir
, 0, backref
->name
,
938 backref
->namelen
, backref
->filetype
,
939 BTRFS_DIR_ITEM_KEY
, backref
->errors
);
941 if (backref
->found_inode_ref
) {
942 add_inode_backref(dst_cache
, dst
->ino
,
943 backref
->dir
, backref
->index
,
944 backref
->name
, backref
->namelen
, 0,
945 backref
->ref_type
, backref
->errors
);
949 if (src
->found_dir_item
)
950 dst
->found_dir_item
= 1;
951 if (src
->found_file_extent
)
952 dst
->found_file_extent
= 1;
953 if (src
->found_csum_item
)
954 dst
->found_csum_item
= 1;
955 if (src
->some_csum_missing
)
956 dst
->some_csum_missing
= 1;
957 if (first_extent_gap(&dst
->holes
) > first_extent_gap(&src
->holes
)) {
958 ret
= copy_file_extent_holes(&dst
->holes
, &src
->holes
);
963 BUG_ON(src
->found_link
< dir_count
);
964 dst
->found_link
+= src
->found_link
- dir_count
;
965 dst
->found_size
+= src
->found_size
;
966 if (src
->extent_start
!= (u64
)-1) {
967 if (dst
->extent_start
== (u64
)-1) {
968 dst
->extent_start
= src
->extent_start
;
969 dst
->extent_end
= src
->extent_end
;
971 if (dst
->extent_end
> src
->extent_start
)
972 dst
->errors
|= I_ERR_FILE_EXTENT_OVERLAP
;
973 else if (dst
->extent_end
< src
->extent_start
) {
974 ret
= add_file_extent_hole(&dst
->holes
,
976 src
->extent_start
- dst
->extent_end
);
978 if (dst
->extent_end
< src
->extent_end
)
979 dst
->extent_end
= src
->extent_end
;
983 dst
->errors
|= src
->errors
;
984 if (src
->found_inode_item
) {
985 if (!dst
->found_inode_item
) {
986 dst
->nlink
= src
->nlink
;
987 dst
->isize
= src
->isize
;
988 dst
->nbytes
= src
->nbytes
;
989 dst
->imode
= src
->imode
;
990 dst
->nodatasum
= src
->nodatasum
;
991 dst
->found_inode_item
= 1;
993 dst
->errors
|= I_ERR_DUP_INODE_ITEM
;
1001 static int splice_shared_node(struct shared_node
*src_node
,
1002 struct shared_node
*dst_node
)
1004 struct cache_extent
*cache
;
1005 struct ptr_node
*node
, *ins
;
1006 struct cache_tree
*src
, *dst
;
1007 struct inode_record
*rec
, *conflict
;
1008 u64 current_ino
= 0;
1012 if (--src_node
->refs
== 0)
1014 if (src_node
->current
)
1015 current_ino
= src_node
->current
->ino
;
1017 src
= &src_node
->root_cache
;
1018 dst
= &dst_node
->root_cache
;
1020 cache
= search_cache_extent(src
, 0);
1022 node
= container_of(cache
, struct ptr_node
, cache
);
1024 cache
= next_cache_extent(cache
);
1027 remove_cache_extent(src
, &node
->cache
);
1030 ins
= malloc(sizeof(*ins
));
1032 ins
->cache
.start
= node
->cache
.start
;
1033 ins
->cache
.size
= node
->cache
.size
;
1037 ret
= insert_cache_extent(dst
, &ins
->cache
);
1038 if (ret
== -EEXIST
) {
1039 conflict
= get_inode_rec(dst
, rec
->ino
, 1);
1040 BUG_ON(IS_ERR(conflict
));
1041 merge_inode_recs(rec
, conflict
, dst
);
1043 conflict
->checked
= 1;
1044 if (dst_node
->current
== conflict
)
1045 dst_node
->current
= NULL
;
1047 maybe_free_inode_rec(dst
, conflict
);
1048 free_inode_rec(rec
);
1055 if (src
== &src_node
->root_cache
) {
1056 src
= &src_node
->inode_cache
;
1057 dst
= &dst_node
->inode_cache
;
1061 if (current_ino
> 0 && (!dst_node
->current
||
1062 current_ino
> dst_node
->current
->ino
)) {
1063 if (dst_node
->current
) {
1064 dst_node
->current
->checked
= 1;
1065 maybe_free_inode_rec(dst
, dst_node
->current
);
1067 dst_node
->current
= get_inode_rec(dst
, current_ino
, 1);
1068 BUG_ON(IS_ERR(dst_node
->current
));
1073 static void free_inode_ptr(struct cache_extent
*cache
)
1075 struct ptr_node
*node
;
1076 struct inode_record
*rec
;
1078 node
= container_of(cache
, struct ptr_node
, cache
);
1080 free_inode_rec(rec
);
1084 FREE_EXTENT_CACHE_BASED_TREE(inode_recs
, free_inode_ptr
);
1086 static struct shared_node
*find_shared_node(struct cache_tree
*shared
,
1089 struct cache_extent
*cache
;
1090 struct shared_node
*node
;
1092 cache
= lookup_cache_extent(shared
, bytenr
, 1);
1094 node
= container_of(cache
, struct shared_node
, cache
);
1100 static int add_shared_node(struct cache_tree
*shared
, u64 bytenr
, u32 refs
)
1103 struct shared_node
*node
;
1105 node
= calloc(1, sizeof(*node
));
1108 node
->cache
.start
= bytenr
;
1109 node
->cache
.size
= 1;
1110 cache_tree_init(&node
->root_cache
);
1111 cache_tree_init(&node
->inode_cache
);
1114 ret
= insert_cache_extent(shared
, &node
->cache
);
1119 static int enter_shared_node(struct btrfs_root
*root
, u64 bytenr
, u32 refs
,
1120 struct walk_control
*wc
, int level
)
1122 struct shared_node
*node
;
1123 struct shared_node
*dest
;
1126 if (level
== wc
->active_node
)
1129 BUG_ON(wc
->active_node
<= level
);
1130 node
= find_shared_node(&wc
->shared
, bytenr
);
1132 ret
= add_shared_node(&wc
->shared
, bytenr
, refs
);
1134 node
= find_shared_node(&wc
->shared
, bytenr
);
1135 wc
->nodes
[level
] = node
;
1136 wc
->active_node
= level
;
1140 if (wc
->root_level
== wc
->active_node
&&
1141 btrfs_root_refs(&root
->root_item
) == 0) {
1142 if (--node
->refs
== 0) {
1143 free_inode_recs_tree(&node
->root_cache
);
1144 free_inode_recs_tree(&node
->inode_cache
);
1145 remove_cache_extent(&wc
->shared
, &node
->cache
);
1151 dest
= wc
->nodes
[wc
->active_node
];
1152 splice_shared_node(node
, dest
);
1153 if (node
->refs
== 0) {
1154 remove_cache_extent(&wc
->shared
, &node
->cache
);
1160 static int leave_shared_node(struct btrfs_root
*root
,
1161 struct walk_control
*wc
, int level
)
1163 struct shared_node
*node
;
1164 struct shared_node
*dest
;
1167 if (level
== wc
->root_level
)
1170 for (i
= level
+ 1; i
< BTRFS_MAX_LEVEL
; i
++) {
1174 BUG_ON(i
>= BTRFS_MAX_LEVEL
);
1176 node
= wc
->nodes
[wc
->active_node
];
1177 wc
->nodes
[wc
->active_node
] = NULL
;
1178 wc
->active_node
= i
;
1180 dest
= wc
->nodes
[wc
->active_node
];
1181 if (wc
->active_node
< wc
->root_level
||
1182 btrfs_root_refs(&root
->root_item
) > 0) {
1183 BUG_ON(node
->refs
<= 1);
1184 splice_shared_node(node
, dest
);
1186 BUG_ON(node
->refs
< 2);
1195 * 1 - if the root with id child_root_id is a child of root parent_root_id
1196 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1197 * has other root(s) as parent(s)
1198 * 2 - if the root child_root_id doesn't have any parent roots
1200 static int is_child_root(struct btrfs_root
*root
, u64 parent_root_id
,
1203 struct btrfs_path path
;
1204 struct btrfs_key key
;
1205 struct extent_buffer
*leaf
;
1209 btrfs_init_path(&path
);
1211 key
.objectid
= parent_root_id
;
1212 key
.type
= BTRFS_ROOT_REF_KEY
;
1213 key
.offset
= child_root_id
;
1214 ret
= btrfs_search_slot(NULL
, root
->fs_info
->tree_root
, &key
, &path
,
1218 btrfs_release_path(&path
);
1222 key
.objectid
= child_root_id
;
1223 key
.type
= BTRFS_ROOT_BACKREF_KEY
;
1225 ret
= btrfs_search_slot(NULL
, root
->fs_info
->tree_root
, &key
, &path
,
1231 leaf
= path
.nodes
[0];
1232 if (path
.slots
[0] >= btrfs_header_nritems(leaf
)) {
1233 ret
= btrfs_next_leaf(root
->fs_info
->tree_root
, &path
);
1236 leaf
= path
.nodes
[0];
1239 btrfs_item_key_to_cpu(leaf
, &key
, path
.slots
[0]);
1240 if (key
.objectid
!= child_root_id
||
1241 key
.type
!= BTRFS_ROOT_BACKREF_KEY
)
1246 if (key
.offset
== parent_root_id
) {
1247 btrfs_release_path(&path
);
1254 btrfs_release_path(&path
);
1257 return has_parent
? 0 : 2;
1260 static int process_dir_item(struct extent_buffer
*eb
,
1261 int slot
, struct btrfs_key
*key
,
1262 struct shared_node
*active_node
)
1272 struct btrfs_dir_item
*di
;
1273 struct inode_record
*rec
;
1274 struct cache_tree
*root_cache
;
1275 struct cache_tree
*inode_cache
;
1276 struct btrfs_key location
;
1277 char namebuf
[BTRFS_NAME_LEN
];
1279 root_cache
= &active_node
->root_cache
;
1280 inode_cache
= &active_node
->inode_cache
;
1281 rec
= active_node
->current
;
1282 rec
->found_dir_item
= 1;
1284 di
= btrfs_item_ptr(eb
, slot
, struct btrfs_dir_item
);
1285 total
= btrfs_item_size_nr(eb
, slot
);
1286 while (cur
< total
) {
1288 btrfs_dir_item_key_to_cpu(eb
, di
, &location
);
1289 name_len
= btrfs_dir_name_len(eb
, di
);
1290 data_len
= btrfs_dir_data_len(eb
, di
);
1291 filetype
= btrfs_dir_type(eb
, di
);
1293 rec
->found_size
+= name_len
;
1294 if (cur
+ sizeof(*di
) + name_len
> total
||
1295 name_len
> BTRFS_NAME_LEN
) {
1296 error
= REF_ERR_NAME_TOO_LONG
;
1298 if (cur
+ sizeof(*di
) > total
)
1300 len
= min_t(u32
, total
- cur
- sizeof(*di
),
1307 read_extent_buffer(eb
, namebuf
, (unsigned long)(di
+ 1), len
);
1309 if (key
->type
== BTRFS_DIR_ITEM_KEY
&&
1310 key
->offset
!= btrfs_name_hash(namebuf
, len
)) {
1311 rec
->errors
|= I_ERR_ODD_DIR_ITEM
;
1312 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1313 key
->objectid
, key
->offset
, namebuf
, len
, filetype
,
1314 key
->offset
, btrfs_name_hash(namebuf
, len
));
1317 if (location
.type
== BTRFS_INODE_ITEM_KEY
) {
1318 add_inode_backref(inode_cache
, location
.objectid
,
1319 key
->objectid
, key
->offset
, namebuf
,
1320 len
, filetype
, key
->type
, error
);
1321 } else if (location
.type
== BTRFS_ROOT_ITEM_KEY
) {
1322 add_inode_backref(root_cache
, location
.objectid
,
1323 key
->objectid
, key
->offset
,
1324 namebuf
, len
, filetype
,
1328 "unknown location type %d in DIR_ITEM[%llu %llu]\n",
1329 location
.type
, key
->objectid
, key
->offset
);
1330 add_inode_backref(inode_cache
, BTRFS_MULTIPLE_OBJECTIDS
,
1331 key
->objectid
, key
->offset
, namebuf
,
1332 len
, filetype
, key
->type
, error
);
1335 len
= sizeof(*di
) + name_len
+ data_len
;
1336 di
= (struct btrfs_dir_item
*)((char *)di
+ len
);
1339 if (key
->type
== BTRFS_DIR_INDEX_KEY
&& nritems
> 1)
1340 rec
->errors
|= I_ERR_DUP_DIR_INDEX
;
1345 static int process_inode_ref(struct extent_buffer
*eb
,
1346 int slot
, struct btrfs_key
*key
,
1347 struct shared_node
*active_node
)
1355 struct cache_tree
*inode_cache
;
1356 struct btrfs_inode_ref
*ref
;
1357 char namebuf
[BTRFS_NAME_LEN
];
1359 inode_cache
= &active_node
->inode_cache
;
1361 ref
= btrfs_item_ptr(eb
, slot
, struct btrfs_inode_ref
);
1362 total
= btrfs_item_size_nr(eb
, slot
);
1363 while (cur
< total
) {
1364 name_len
= btrfs_inode_ref_name_len(eb
, ref
);
1365 index
= btrfs_inode_ref_index(eb
, ref
);
1367 /* inode_ref + namelen should not cross item boundary */
1368 if (cur
+ sizeof(*ref
) + name_len
> total
||
1369 name_len
> BTRFS_NAME_LEN
) {
1370 if (total
< cur
+ sizeof(*ref
))
1373 /* Still try to read out the remaining part */
1374 len
= min_t(u32
, total
- cur
- sizeof(*ref
),
1376 error
= REF_ERR_NAME_TOO_LONG
;
1382 read_extent_buffer(eb
, namebuf
, (unsigned long)(ref
+ 1), len
);
1383 add_inode_backref(inode_cache
, key
->objectid
, key
->offset
,
1384 index
, namebuf
, len
, 0, key
->type
, error
);
1386 len
= sizeof(*ref
) + name_len
;
1387 ref
= (struct btrfs_inode_ref
*)((char *)ref
+ len
);
1393 static int process_inode_extref(struct extent_buffer
*eb
,
1394 int slot
, struct btrfs_key
*key
,
1395 struct shared_node
*active_node
)
1404 struct cache_tree
*inode_cache
;
1405 struct btrfs_inode_extref
*extref
;
1406 char namebuf
[BTRFS_NAME_LEN
];
1408 inode_cache
= &active_node
->inode_cache
;
1410 extref
= btrfs_item_ptr(eb
, slot
, struct btrfs_inode_extref
);
1411 total
= btrfs_item_size_nr(eb
, slot
);
1412 while (cur
< total
) {
1413 name_len
= btrfs_inode_extref_name_len(eb
, extref
);
1414 index
= btrfs_inode_extref_index(eb
, extref
);
1415 parent
= btrfs_inode_extref_parent(eb
, extref
);
1416 if (name_len
<= BTRFS_NAME_LEN
) {
1420 len
= BTRFS_NAME_LEN
;
1421 error
= REF_ERR_NAME_TOO_LONG
;
1423 read_extent_buffer(eb
, namebuf
,
1424 (unsigned long)(extref
+ 1), len
);
1425 add_inode_backref(inode_cache
, key
->objectid
, parent
,
1426 index
, namebuf
, len
, 0, key
->type
, error
);
1428 len
= sizeof(*extref
) + name_len
;
1429 extref
= (struct btrfs_inode_extref
*)((char *)extref
+ len
);
1436 static int process_file_extent(struct btrfs_root
*root
,
1437 struct extent_buffer
*eb
,
1438 int slot
, struct btrfs_key
*key
,
1439 struct shared_node
*active_node
)
1441 struct inode_record
*rec
;
1442 struct btrfs_file_extent_item
*fi
;
1444 u64 disk_bytenr
= 0;
1445 u64 extent_offset
= 0;
1446 u64 mask
= root
->fs_info
->sectorsize
- 1;
1447 u32 max_inline_size
= min_t(u32
, mask
,
1448 BTRFS_MAX_INLINE_DATA_SIZE(root
->fs_info
));
1452 rec
= active_node
->current
;
1453 BUG_ON(rec
->ino
!= key
->objectid
|| rec
->refs
> 1);
1454 rec
->found_file_extent
= 1;
1456 if (rec
->extent_start
== (u64
)-1) {
1457 rec
->extent_start
= key
->offset
;
1458 rec
->extent_end
= key
->offset
;
1461 if (rec
->extent_end
> key
->offset
)
1462 rec
->errors
|= I_ERR_FILE_EXTENT_OVERLAP
;
1463 else if (rec
->extent_end
< key
->offset
) {
1464 ret
= add_file_extent_hole(&rec
->holes
, rec
->extent_end
,
1465 key
->offset
- rec
->extent_end
);
1470 fi
= btrfs_item_ptr(eb
, slot
, struct btrfs_file_extent_item
);
1471 extent_type
= btrfs_file_extent_type(eb
, fi
);
1473 if (extent_type
== BTRFS_FILE_EXTENT_INLINE
) {
1474 u8 compression
= btrfs_file_extent_compression(eb
, fi
);
1475 struct btrfs_item
*item
= btrfs_item_nr(slot
);
1477 num_bytes
= btrfs_file_extent_ram_bytes(eb
, fi
);
1479 rec
->errors
|= I_ERR_BAD_FILE_EXTENT
;
1481 if (btrfs_file_extent_inline_item_len(eb
, item
) >
1483 num_bytes
> root
->fs_info
->sectorsize
)
1484 rec
->errors
|= I_ERR_FILE_EXTENT_TOO_LARGE
;
1486 if (num_bytes
> max_inline_size
)
1487 rec
->errors
|= I_ERR_FILE_EXTENT_TOO_LARGE
;
1488 if (btrfs_file_extent_inline_item_len(eb
, item
) !=
1490 rec
->errors
|= I_ERR_INLINE_RAM_BYTES_WRONG
;
1492 rec
->found_size
+= num_bytes
;
1493 num_bytes
= (num_bytes
+ mask
) & ~mask
;
1494 } else if (extent_type
== BTRFS_FILE_EXTENT_REG
||
1495 extent_type
== BTRFS_FILE_EXTENT_PREALLOC
) {
1496 num_bytes
= btrfs_file_extent_num_bytes(eb
, fi
);
1497 disk_bytenr
= btrfs_file_extent_disk_bytenr(eb
, fi
);
1498 extent_offset
= btrfs_file_extent_offset(eb
, fi
);
1499 if (num_bytes
== 0 || (num_bytes
& mask
))
1500 rec
->errors
|= I_ERR_BAD_FILE_EXTENT
;
1501 if (num_bytes
+ extent_offset
>
1502 btrfs_file_extent_ram_bytes(eb
, fi
))
1503 rec
->errors
|= I_ERR_BAD_FILE_EXTENT
;
1504 if (extent_type
== BTRFS_FILE_EXTENT_PREALLOC
&&
1505 (btrfs_file_extent_compression(eb
, fi
) ||
1506 btrfs_file_extent_encryption(eb
, fi
) ||
1507 btrfs_file_extent_other_encoding(eb
, fi
)))
1508 rec
->errors
|= I_ERR_BAD_FILE_EXTENT
;
1509 if (disk_bytenr
> 0)
1510 rec
->found_size
+= num_bytes
;
1512 rec
->errors
|= I_ERR_BAD_FILE_EXTENT
;
1514 rec
->extent_end
= key
->offset
+ num_bytes
;
1517 * The data reloc tree will copy full extents into its inode and then
1518 * copy the corresponding csums. Because the extent it copied could be
1519 * a preallocated extent that hasn't been written to yet there may be no
1520 * csums to copy, ergo we won't have csums for our file extent. This is
1521 * ok so just don't bother checking csums if the inode belongs to the
1524 if (disk_bytenr
> 0 &&
1525 btrfs_header_owner(eb
) != BTRFS_DATA_RELOC_TREE_OBJECTID
) {
1527 if (btrfs_file_extent_compression(eb
, fi
))
1528 num_bytes
= btrfs_file_extent_disk_num_bytes(eb
, fi
);
1530 disk_bytenr
+= extent_offset
;
1532 ret
= count_csum_range(root
->fs_info
, disk_bytenr
, num_bytes
,
1536 if (extent_type
== BTRFS_FILE_EXTENT_REG
) {
1538 rec
->found_csum_item
= 1;
1539 if (found
< num_bytes
)
1540 rec
->some_csum_missing
= 1;
1541 } else if (extent_type
== BTRFS_FILE_EXTENT_PREALLOC
) {
1543 ret
= check_prealloc_extent_written(root
->fs_info
,
1549 rec
->errors
|= I_ERR_ODD_CSUM_ITEM
;
1556 static int process_one_leaf(struct btrfs_root
*root
, struct extent_buffer
*eb
,
1557 struct walk_control
*wc
)
1559 struct btrfs_key key
;
1563 struct cache_tree
*inode_cache
;
1564 struct shared_node
*active_node
;
1566 if (wc
->root_level
== wc
->active_node
&&
1567 btrfs_root_refs(&root
->root_item
) == 0)
1570 active_node
= wc
->nodes
[wc
->active_node
];
1571 inode_cache
= &active_node
->inode_cache
;
1572 nritems
= btrfs_header_nritems(eb
);
1573 for (i
= 0; i
< nritems
; i
++) {
1574 btrfs_item_key_to_cpu(eb
, &key
, i
);
1576 if (key
.objectid
== BTRFS_FREE_SPACE_OBJECTID
)
1578 if (key
.type
== BTRFS_ORPHAN_ITEM_KEY
)
1581 if (active_node
->current
== NULL
||
1582 active_node
->current
->ino
< key
.objectid
) {
1583 if (active_node
->current
) {
1584 active_node
->current
->checked
= 1;
1585 maybe_free_inode_rec(inode_cache
,
1586 active_node
->current
);
1588 active_node
->current
= get_inode_rec(inode_cache
,
1590 BUG_ON(IS_ERR(active_node
->current
));
1593 case BTRFS_DIR_ITEM_KEY
:
1594 case BTRFS_DIR_INDEX_KEY
:
1595 ret
= process_dir_item(eb
, i
, &key
, active_node
);
1597 case BTRFS_INODE_REF_KEY
:
1598 ret
= process_inode_ref(eb
, i
, &key
, active_node
);
1600 case BTRFS_INODE_EXTREF_KEY
:
1601 ret
= process_inode_extref(eb
, i
, &key
, active_node
);
1603 case BTRFS_INODE_ITEM_KEY
:
1604 ret
= process_inode_item(eb
, i
, &key
, active_node
);
1606 case BTRFS_EXTENT_DATA_KEY
:
1607 ret
= process_file_extent(root
, eb
, i
, &key
,
1617 static int walk_down_tree(struct btrfs_root
*root
, struct btrfs_path
*path
,
1618 struct walk_control
*wc
, int *level
,
1619 struct node_refs
*nrefs
)
1621 enum btrfs_tree_block_status status
;
1624 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
1625 struct extent_buffer
*next
;
1626 struct extent_buffer
*cur
;
1630 WARN_ON(*level
< 0);
1631 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
1633 if (path
->nodes
[*level
]->start
== nrefs
->bytenr
[*level
]) {
1634 refs
= nrefs
->refs
[*level
];
1637 ret
= btrfs_lookup_extent_info(NULL
, fs_info
,
1638 path
->nodes
[*level
]->start
,
1639 *level
, 1, &refs
, NULL
);
1644 nrefs
->bytenr
[*level
] = path
->nodes
[*level
]->start
;
1645 nrefs
->refs
[*level
] = refs
;
1649 ret
= enter_shared_node(root
, path
->nodes
[*level
]->start
,
1657 while (*level
>= 0) {
1658 WARN_ON(*level
< 0);
1659 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
1660 cur
= path
->nodes
[*level
];
1662 if (btrfs_header_level(cur
) != *level
)
1665 if (path
->slots
[*level
] >= btrfs_header_nritems(cur
))
1668 ret
= process_one_leaf(root
, cur
, wc
);
1673 bytenr
= btrfs_node_blockptr(cur
, path
->slots
[*level
]);
1674 ptr_gen
= btrfs_node_ptr_generation(cur
, path
->slots
[*level
]);
1676 if (bytenr
== nrefs
->bytenr
[*level
- 1]) {
1677 refs
= nrefs
->refs
[*level
- 1];
1679 ret
= btrfs_lookup_extent_info(NULL
, fs_info
, bytenr
,
1680 *level
- 1, 1, &refs
, NULL
);
1684 nrefs
->bytenr
[*level
- 1] = bytenr
;
1685 nrefs
->refs
[*level
- 1] = refs
;
1690 ret
= enter_shared_node(root
, bytenr
, refs
,
1693 path
->slots
[*level
]++;
1698 next
= btrfs_find_tree_block(fs_info
, bytenr
, fs_info
->nodesize
);
1699 if (!next
|| !btrfs_buffer_uptodate(next
, ptr_gen
)) {
1700 free_extent_buffer(next
);
1701 reada_walk_down(root
, cur
, path
->slots
[*level
]);
1702 next
= read_tree_block(root
->fs_info
, bytenr
, ptr_gen
);
1703 if (!extent_buffer_uptodate(next
)) {
1704 struct btrfs_key node_key
;
1706 btrfs_node_key_to_cpu(path
->nodes
[*level
],
1708 path
->slots
[*level
]);
1709 btrfs_add_corrupt_extent_record(root
->fs_info
,
1711 path
->nodes
[*level
]->start
,
1712 root
->fs_info
->nodesize
,
1719 ret
= check_child_node(cur
, path
->slots
[*level
], next
);
1721 free_extent_buffer(next
);
1726 if (btrfs_is_leaf(next
))
1727 status
= btrfs_check_leaf(root
, NULL
, next
);
1729 status
= btrfs_check_node(root
, NULL
, next
);
1730 if (status
!= BTRFS_TREE_BLOCK_CLEAN
) {
1731 free_extent_buffer(next
);
1736 *level
= *level
- 1;
1737 free_extent_buffer(path
->nodes
[*level
]);
1738 path
->nodes
[*level
] = next
;
1739 path
->slots
[*level
] = 0;
1742 path
->slots
[*level
] = btrfs_header_nritems(path
->nodes
[*level
]);
1746 static int walk_up_tree(struct btrfs_root
*root
, struct btrfs_path
*path
,
1747 struct walk_control
*wc
, int *level
)
1750 struct extent_buffer
*leaf
;
1752 for (i
= *level
; i
< BTRFS_MAX_LEVEL
- 1 && path
->nodes
[i
]; i
++) {
1753 leaf
= path
->nodes
[i
];
1754 if (path
->slots
[i
] + 1 < btrfs_header_nritems(leaf
)) {
1759 free_extent_buffer(path
->nodes
[*level
]);
1760 path
->nodes
[*level
] = NULL
;
1761 BUG_ON(*level
> wc
->active_node
);
1762 if (*level
== wc
->active_node
)
1763 leave_shared_node(root
, wc
, *level
);
1769 static int check_root_dir(struct inode_record
*rec
)
1771 struct inode_backref
*backref
;
1774 if (!rec
->found_inode_item
|| rec
->errors
)
1776 if (rec
->nlink
!= 1 || rec
->found_link
!= 0)
1778 if (list_empty(&rec
->backrefs
))
1780 backref
= to_inode_backref(rec
->backrefs
.next
);
1781 if (!backref
->found_inode_ref
)
1783 if (backref
->index
!= 0 || backref
->namelen
!= 2 ||
1784 memcmp(backref
->name
, "..", 2))
1786 if (backref
->found_dir_index
|| backref
->found_dir_item
)
1793 static int repair_inode_isize(struct btrfs_trans_handle
*trans
,
1794 struct btrfs_root
*root
, struct btrfs_path
*path
,
1795 struct inode_record
*rec
)
1797 struct btrfs_inode_item
*ei
;
1798 struct btrfs_key key
;
1801 key
.objectid
= rec
->ino
;
1802 key
.type
= BTRFS_INODE_ITEM_KEY
;
1803 key
.offset
= (u64
)-1;
1805 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 1);
1809 if (!path
->slots
[0]) {
1816 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
1817 if (key
.objectid
!= rec
->ino
) {
1822 ei
= btrfs_item_ptr(path
->nodes
[0], path
->slots
[0],
1823 struct btrfs_inode_item
);
1824 btrfs_set_inode_size(path
->nodes
[0], ei
, rec
->found_size
);
1825 btrfs_mark_buffer_dirty(path
->nodes
[0]);
1826 rec
->errors
&= ~I_ERR_DIR_ISIZE_WRONG
;
1827 printf("reset isize for dir %llu root %llu\n", rec
->ino
,
1828 root
->root_key
.objectid
);
1830 btrfs_release_path(path
);
1834 static int repair_inode_orphan_item(struct btrfs_trans_handle
*trans
,
1835 struct btrfs_root
*root
,
1836 struct btrfs_path
*path
,
1837 struct inode_record
*rec
)
1841 ret
= btrfs_add_orphan_item(trans
, root
, path
, rec
->ino
);
1842 btrfs_release_path(path
);
1844 rec
->errors
&= ~I_ERR_NO_ORPHAN_ITEM
;
1848 static int repair_inode_nbytes(struct btrfs_trans_handle
*trans
,
1849 struct btrfs_root
*root
,
1850 struct btrfs_path
*path
,
1851 struct inode_record
*rec
)
1853 struct btrfs_inode_item
*ei
;
1854 struct btrfs_key key
;
1857 key
.objectid
= rec
->ino
;
1858 key
.type
= BTRFS_INODE_ITEM_KEY
;
1861 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 1);
1868 /* Since ret == 0, no need to check anything */
1869 ei
= btrfs_item_ptr(path
->nodes
[0], path
->slots
[0],
1870 struct btrfs_inode_item
);
1871 btrfs_set_inode_nbytes(path
->nodes
[0], ei
, rec
->found_size
);
1872 btrfs_mark_buffer_dirty(path
->nodes
[0]);
1873 rec
->errors
&= ~I_ERR_FILE_NBYTES_WRONG
;
1874 printf("reset nbytes for ino %llu root %llu\n",
1875 rec
->ino
, root
->root_key
.objectid
);
1877 btrfs_release_path(path
);
1881 static int add_missing_dir_index(struct btrfs_root
*root
,
1882 struct cache_tree
*inode_cache
,
1883 struct inode_record
*rec
,
1884 struct inode_backref
*backref
)
1886 struct btrfs_path path
;
1887 struct btrfs_trans_handle
*trans
;
1888 struct btrfs_dir_item
*dir_item
;
1889 struct extent_buffer
*leaf
;
1890 struct btrfs_key key
;
1891 struct btrfs_disk_key disk_key
;
1892 struct inode_record
*dir_rec
;
1893 unsigned long name_ptr
;
1894 u32 data_size
= sizeof(*dir_item
) + backref
->namelen
;
1897 trans
= btrfs_start_transaction(root
, 1);
1899 return PTR_ERR(trans
);
1901 fprintf(stderr
, "repairing missing dir index item for inode %llu\n",
1902 (unsigned long long)rec
->ino
);
1904 btrfs_init_path(&path
);
1905 key
.objectid
= backref
->dir
;
1906 key
.type
= BTRFS_DIR_INDEX_KEY
;
1907 key
.offset
= backref
->index
;
1908 ret
= btrfs_insert_empty_item(trans
, root
, &path
, &key
, data_size
);
1911 leaf
= path
.nodes
[0];
1912 dir_item
= btrfs_item_ptr(leaf
, path
.slots
[0], struct btrfs_dir_item
);
1914 disk_key
.objectid
= cpu_to_le64(rec
->ino
);
1915 disk_key
.type
= BTRFS_INODE_ITEM_KEY
;
1916 disk_key
.offset
= 0;
1918 btrfs_set_dir_item_key(leaf
, dir_item
, &disk_key
);
1919 btrfs_set_dir_type(leaf
, dir_item
, imode_to_type(rec
->imode
));
1920 btrfs_set_dir_data_len(leaf
, dir_item
, 0);
1921 btrfs_set_dir_name_len(leaf
, dir_item
, backref
->namelen
);
1922 name_ptr
= (unsigned long)(dir_item
+ 1);
1923 write_extent_buffer(leaf
, backref
->name
, name_ptr
, backref
->namelen
);
1924 btrfs_mark_buffer_dirty(leaf
);
1925 btrfs_release_path(&path
);
1926 btrfs_commit_transaction(trans
, root
);
1928 backref
->found_dir_index
= 1;
1929 dir_rec
= get_inode_rec(inode_cache
, backref
->dir
, 0);
1930 BUG_ON(IS_ERR(dir_rec
));
1933 dir_rec
->found_size
+= backref
->namelen
;
1934 if (dir_rec
->found_size
== dir_rec
->isize
&&
1935 (dir_rec
->errors
& I_ERR_DIR_ISIZE_WRONG
))
1936 dir_rec
->errors
&= ~I_ERR_DIR_ISIZE_WRONG
;
1937 if (dir_rec
->found_size
!= dir_rec
->isize
)
1938 dir_rec
->errors
|= I_ERR_DIR_ISIZE_WRONG
;
1943 static int delete_dir_index(struct btrfs_root
*root
,
1944 struct inode_backref
*backref
)
1946 struct btrfs_trans_handle
*trans
;
1947 struct btrfs_dir_item
*di
;
1948 struct btrfs_path path
;
1951 trans
= btrfs_start_transaction(root
, 1);
1953 return PTR_ERR(trans
);
1955 fprintf(stderr
, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
1956 (unsigned long long)backref
->dir
,
1957 BTRFS_DIR_INDEX_KEY
, (unsigned long long)backref
->index
,
1958 (unsigned long long)root
->objectid
);
1960 btrfs_init_path(&path
);
1961 di
= btrfs_lookup_dir_index(trans
, root
, &path
, backref
->dir
,
1962 backref
->name
, backref
->namelen
,
1963 backref
->index
, -1);
1966 btrfs_release_path(&path
);
1967 btrfs_commit_transaction(trans
, root
);
1974 ret
= btrfs_del_item(trans
, root
, &path
);
1976 ret
= btrfs_delete_one_dir_name(trans
, root
, &path
, di
);
1978 btrfs_release_path(&path
);
1979 btrfs_commit_transaction(trans
, root
);
1983 static int create_inode_item(struct btrfs_root
*root
,
1984 struct inode_record
*rec
, int root_dir
)
1986 struct btrfs_trans_handle
*trans
;
1992 trans
= btrfs_start_transaction(root
, 1);
1993 if (IS_ERR(trans
)) {
1994 ret
= PTR_ERR(trans
);
1998 nlink
= root_dir
? 1 : rec
->found_link
;
1999 if (rec
->found_dir_item
) {
2000 if (rec
->found_file_extent
)
2001 fprintf(stderr
, "root %llu inode %llu has both a dir "
2002 "item and extents, unsure if it is a dir or a "
2003 "regular file so setting it as a directory\n",
2004 (unsigned long long)root
->objectid
,
2005 (unsigned long long)rec
->ino
);
2006 mode
= S_IFDIR
| 0755;
2007 size
= rec
->found_size
;
2008 } else if (!rec
->found_dir_item
) {
2009 size
= rec
->extent_end
;
2010 mode
= S_IFREG
| 0755;
2013 ret
= insert_inode_item(trans
, root
, rec
->ino
, size
, rec
->nbytes
,
2015 btrfs_commit_transaction(trans
, root
);
2019 static int repair_inode_backrefs(struct btrfs_root
*root
,
2020 struct inode_record
*rec
,
2021 struct cache_tree
*inode_cache
,
2024 struct inode_backref
*tmp
, *backref
;
2025 u64 root_dirid
= btrfs_root_dirid(&root
->root_item
);
2029 list_for_each_entry_safe(backref
, tmp
, &rec
->backrefs
, list
) {
2030 if (!delete && rec
->ino
== root_dirid
) {
2031 if (!rec
->found_inode_item
) {
2032 ret
= create_inode_item(root
, rec
, 1);
2039 /* Index 0 for root dir's are special, don't mess with it */
2040 if (rec
->ino
== root_dirid
&& backref
->index
== 0)
2044 ((backref
->found_dir_index
&& !backref
->found_inode_ref
) ||
2045 (backref
->found_dir_index
&& backref
->found_inode_ref
&&
2046 (backref
->errors
& REF_ERR_INDEX_UNMATCH
)))) {
2047 ret
= delete_dir_index(root
, backref
);
2051 list_del(&backref
->list
);
2056 if (!delete && !backref
->found_dir_index
&&
2057 backref
->found_dir_item
&& backref
->found_inode_ref
) {
2058 ret
= add_missing_dir_index(root
, inode_cache
, rec
,
2063 if (backref
->found_dir_item
&&
2064 backref
->found_dir_index
) {
2065 if (!backref
->errors
&&
2066 backref
->found_inode_ref
) {
2067 list_del(&backref
->list
);
2074 if (!delete && (!backref
->found_dir_index
&&
2075 !backref
->found_dir_item
&&
2076 backref
->found_inode_ref
)) {
2077 struct btrfs_trans_handle
*trans
;
2078 struct btrfs_key location
;
2080 ret
= check_dir_conflict(root
, backref
->name
,
2086 * let nlink fixing routine to handle it,
2087 * which can do it better.
2092 location
.objectid
= rec
->ino
;
2093 location
.type
= BTRFS_INODE_ITEM_KEY
;
2094 location
.offset
= 0;
2096 trans
= btrfs_start_transaction(root
, 1);
2097 if (IS_ERR(trans
)) {
2098 ret
= PTR_ERR(trans
);
2101 fprintf(stderr
, "adding missing dir index/item pair "
2103 (unsigned long long)rec
->ino
);
2104 ret
= btrfs_insert_dir_item(trans
, root
, backref
->name
,
2106 backref
->dir
, &location
,
2107 imode_to_type(rec
->imode
),
2110 btrfs_commit_transaction(trans
, root
);
2114 if (!delete && (backref
->found_inode_ref
&&
2115 backref
->found_dir_index
&&
2116 backref
->found_dir_item
&&
2117 !(backref
->errors
& REF_ERR_INDEX_UNMATCH
) &&
2118 !rec
->found_inode_item
)) {
2119 ret
= create_inode_item(root
, rec
, 0);
2126 return ret
? ret
: repaired
;
2130 * To determine the file type for nlink/inode_item repair
2132 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2133 * Return -ENOENT if file type is not found.
2135 static int find_file_type(struct inode_record
*rec
, u8
*type
)
2137 struct inode_backref
*backref
;
2139 /* For inode item recovered case */
2140 if (rec
->found_inode_item
) {
2141 *type
= imode_to_type(rec
->imode
);
2145 list_for_each_entry(backref
, &rec
->backrefs
, list
) {
2146 if (backref
->found_dir_index
|| backref
->found_dir_item
) {
2147 *type
= backref
->filetype
;
2155 * To determine the file name for nlink repair
2157 * Return 0 if file name is found, set name and namelen.
2158 * Return -ENOENT if file name is not found.
2160 static int find_file_name(struct inode_record
*rec
,
2161 char *name
, int *namelen
)
2163 struct inode_backref
*backref
;
2165 list_for_each_entry(backref
, &rec
->backrefs
, list
) {
2166 if (backref
->found_dir_index
|| backref
->found_dir_item
||
2167 backref
->found_inode_ref
) {
2168 memcpy(name
, backref
->name
, backref
->namelen
);
2169 *namelen
= backref
->namelen
;
2176 /* Reset the nlink of the inode to the correct one */
2177 static int reset_nlink(struct btrfs_trans_handle
*trans
,
2178 struct btrfs_root
*root
,
2179 struct btrfs_path
*path
,
2180 struct inode_record
*rec
)
2182 struct inode_backref
*backref
;
2183 struct inode_backref
*tmp
;
2184 struct btrfs_key key
;
2185 struct btrfs_inode_item
*inode_item
;
2188 /* We don't believe this either, reset it and iterate backref */
2189 rec
->found_link
= 0;
2191 /* Remove all backref including the valid ones */
2192 list_for_each_entry_safe(backref
, tmp
, &rec
->backrefs
, list
) {
2193 ret
= btrfs_unlink(trans
, root
, rec
->ino
, backref
->dir
,
2194 backref
->index
, backref
->name
,
2195 backref
->namelen
, 0);
2199 /* remove invalid backref, so it won't be added back */
2200 if (!(backref
->found_dir_index
&&
2201 backref
->found_dir_item
&&
2202 backref
->found_inode_ref
)) {
2203 list_del(&backref
->list
);
2210 /* Set nlink to 0 */
2211 key
.objectid
= rec
->ino
;
2212 key
.type
= BTRFS_INODE_ITEM_KEY
;
2214 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 1);
2221 inode_item
= btrfs_item_ptr(path
->nodes
[0], path
->slots
[0],
2222 struct btrfs_inode_item
);
2223 btrfs_set_inode_nlink(path
->nodes
[0], inode_item
, 0);
2224 btrfs_mark_buffer_dirty(path
->nodes
[0]);
2225 btrfs_release_path(path
);
2228 * Add back valid inode_ref/dir_item/dir_index,
2229 * add_link() will handle the nlink inc, so new nlink must be correct
2231 list_for_each_entry(backref
, &rec
->backrefs
, list
) {
2232 ret
= btrfs_add_link(trans
, root
, rec
->ino
, backref
->dir
,
2233 backref
->name
, backref
->namelen
,
2234 backref
->filetype
, &backref
->index
, 1, 0);
2239 btrfs_release_path(path
);
2243 static int repair_inode_nlinks(struct btrfs_trans_handle
*trans
,
2244 struct btrfs_root
*root
,
2245 struct btrfs_path
*path
,
2246 struct inode_record
*rec
)
2248 char namebuf
[BTRFS_NAME_LEN
] = {0};
2251 int name_recovered
= 0;
2252 int type_recovered
= 0;
2256 * Get file name and type first before these invalid inode ref
2257 * are deleted by remove_all_invalid_backref()
2259 name_recovered
= !find_file_name(rec
, namebuf
, &namelen
);
2260 type_recovered
= !find_file_type(rec
, &type
);
2262 if (!name_recovered
) {
2263 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2264 rec
->ino
, rec
->ino
);
2265 namelen
= count_digits(rec
->ino
);
2266 sprintf(namebuf
, "%llu", rec
->ino
);
2269 if (!type_recovered
) {
2270 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2272 type
= BTRFS_FT_REG_FILE
;
2276 ret
= reset_nlink(trans
, root
, path
, rec
);
2279 "Failed to reset nlink for inode %llu: %s\n",
2280 rec
->ino
, strerror(-ret
));
2284 if (rec
->found_link
== 0) {
2285 ret
= link_inode_to_lostfound(trans
, root
, path
, rec
->ino
,
2286 namebuf
, namelen
, type
,
2287 (u64
*)&rec
->found_link
);
2291 printf("Fixed the nlink of inode %llu\n", rec
->ino
);
2294 * Clear the flag anyway, or we will loop forever for the same inode
2295 * as it will not be removed from the bad inode list and the dead loop
2298 rec
->errors
&= ~I_ERR_LINK_COUNT_WRONG
;
2299 btrfs_release_path(path
);
2304 * Check if there is any normal(reg or prealloc) file extent for given
2306 * This is used to determine the file type when neither its dir_index/item or
2307 * inode_item exists.
2309 * This will *NOT* report error, if any error happens, just consider it does
2310 * not have any normal file extent.
2312 static int find_normal_file_extent(struct btrfs_root
*root
, u64 ino
)
2314 struct btrfs_path path
;
2315 struct btrfs_key key
;
2316 struct btrfs_key found_key
;
2317 struct btrfs_file_extent_item
*fi
;
2321 btrfs_init_path(&path
);
2323 key
.type
= BTRFS_EXTENT_DATA_KEY
;
2326 ret
= btrfs_search_slot(NULL
, root
, &key
, &path
, 0, 0);
2331 if (ret
&& path
.slots
[0] >= btrfs_header_nritems(path
.nodes
[0])) {
2332 ret
= btrfs_next_leaf(root
, &path
);
2339 btrfs_item_key_to_cpu(path
.nodes
[0], &found_key
,
2341 if (found_key
.objectid
!= ino
||
2342 found_key
.type
!= BTRFS_EXTENT_DATA_KEY
)
2344 fi
= btrfs_item_ptr(path
.nodes
[0], path
.slots
[0],
2345 struct btrfs_file_extent_item
);
2346 type
= btrfs_file_extent_type(path
.nodes
[0], fi
);
2347 if (type
!= BTRFS_FILE_EXTENT_INLINE
) {
2353 btrfs_release_path(&path
);
2357 static u32
btrfs_type_to_imode(u8 type
)
2359 static u32 imode_by_btrfs_type
[] = {
2360 [BTRFS_FT_REG_FILE
] = S_IFREG
,
2361 [BTRFS_FT_DIR
] = S_IFDIR
,
2362 [BTRFS_FT_CHRDEV
] = S_IFCHR
,
2363 [BTRFS_FT_BLKDEV
] = S_IFBLK
,
2364 [BTRFS_FT_FIFO
] = S_IFIFO
,
2365 [BTRFS_FT_SOCK
] = S_IFSOCK
,
2366 [BTRFS_FT_SYMLINK
] = S_IFLNK
,
2369 return imode_by_btrfs_type
[(type
)];
2372 static int repair_inode_no_item(struct btrfs_trans_handle
*trans
,
2373 struct btrfs_root
*root
,
2374 struct btrfs_path
*path
,
2375 struct inode_record
*rec
)
2379 int type_recovered
= 0;
2382 printf("Trying to rebuild inode:%llu\n", rec
->ino
);
2384 type_recovered
= !find_file_type(rec
, &filetype
);
2387 * Try to determine inode type if type not found.
2389 * For found regular file extent, it must be FILE.
2390 * For found dir_item/index, it must be DIR.
2392 * For undetermined one, use FILE as fallback.
2395 * 1. If found backref(inode_index/item is already handled) to it,
2397 * Need new inode-inode ref structure to allow search for that.
2399 if (!type_recovered
) {
2400 if (rec
->found_file_extent
&&
2401 find_normal_file_extent(root
, rec
->ino
)) {
2403 filetype
= BTRFS_FT_REG_FILE
;
2404 } else if (rec
->found_dir_item
) {
2406 filetype
= BTRFS_FT_DIR
;
2407 } else if (!list_empty(&rec
->orphan_extents
)) {
2409 filetype
= BTRFS_FT_REG_FILE
;
2411 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2414 filetype
= BTRFS_FT_REG_FILE
;
2418 ret
= btrfs_new_inode(trans
, root
, rec
->ino
,
2419 mode
| btrfs_type_to_imode(filetype
));
2424 * Here inode rebuild is done, we only rebuild the inode item,
2425 * don't repair the nlink(like move to lost+found).
2426 * That is the job of nlink repair.
2428 * We just fill the record and return
2430 rec
->found_dir_item
= 1;
2431 rec
->imode
= mode
| btrfs_type_to_imode(filetype
);
2433 rec
->errors
&= ~I_ERR_NO_INODE_ITEM
;
2434 /* Ensure the inode_nlinks repair function will be called */
2435 rec
->errors
|= I_ERR_LINK_COUNT_WRONG
;
2440 static int repair_inode_orphan_extent(struct btrfs_trans_handle
*trans
,
2441 struct btrfs_root
*root
,
2442 struct btrfs_path
*path
,
2443 struct inode_record
*rec
)
2445 struct orphan_data_extent
*orphan
;
2446 struct orphan_data_extent
*tmp
;
2449 list_for_each_entry_safe(orphan
, tmp
, &rec
->orphan_extents
, list
) {
2451 * Check for conflicting file extents
2453 * Here we don't know whether the extents is compressed or not,
2454 * so we can only assume it not compressed nor data offset,
2455 * and use its disk_len as extent length.
2457 ret
= btrfs_get_extent(NULL
, root
, path
, orphan
->objectid
,
2458 orphan
->offset
, orphan
->disk_len
, 0);
2459 btrfs_release_path(path
);
2464 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2465 orphan
->disk_bytenr
, orphan
->disk_len
);
2466 ret
= btrfs_free_extent(trans
,
2467 root
->fs_info
->extent_root
,
2468 orphan
->disk_bytenr
, orphan
->disk_len
,
2469 0, root
->objectid
, orphan
->objectid
,
2474 ret
= btrfs_insert_file_extent(trans
, root
, orphan
->objectid
,
2475 orphan
->offset
, orphan
->disk_bytenr
,
2476 orphan
->disk_len
, orphan
->disk_len
);
2480 /* Update file size info */
2481 rec
->found_size
+= orphan
->disk_len
;
2482 if (rec
->found_size
== rec
->nbytes
)
2483 rec
->errors
&= ~I_ERR_FILE_NBYTES_WRONG
;
2485 /* Update the file extent hole info too */
2486 ret
= del_file_extent_hole(&rec
->holes
, orphan
->offset
,
2490 if (RB_EMPTY_ROOT(&rec
->holes
))
2491 rec
->errors
&= ~I_ERR_FILE_EXTENT_DISCOUNT
;
2493 list_del(&orphan
->list
);
2496 rec
->errors
&= ~I_ERR_FILE_EXTENT_ORPHAN
;
2501 static int repair_inode_discount_extent(struct btrfs_trans_handle
*trans
,
2502 struct btrfs_root
*root
,
2503 struct btrfs_path
*path
,
2504 struct inode_record
*rec
)
2506 struct rb_node
*node
;
2507 struct file_extent_hole
*hole
;
2511 node
= rb_first(&rec
->holes
);
2515 hole
= rb_entry(node
, struct file_extent_hole
, node
);
2516 ret
= btrfs_punch_hole(trans
, root
, rec
->ino
,
2517 hole
->start
, hole
->len
);
2520 ret
= del_file_extent_hole(&rec
->holes
, hole
->start
,
2524 if (RB_EMPTY_ROOT(&rec
->holes
))
2525 rec
->errors
&= ~I_ERR_FILE_EXTENT_DISCOUNT
;
2526 node
= rb_first(&rec
->holes
);
2528 /* special case for a file losing all its file extent */
2530 ret
= btrfs_punch_hole(trans
, root
, rec
->ino
, 0,
2531 round_up(rec
->isize
,
2532 root
->fs_info
->sectorsize
));
2536 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2537 rec
->ino
, root
->objectid
);
2542 static int repair_inline_ram_bytes(struct btrfs_trans_handle
*trans
,
2543 struct btrfs_root
*root
,
2544 struct btrfs_path
*path
,
2545 struct inode_record
*rec
)
2547 struct btrfs_key key
;
2548 struct btrfs_file_extent_item
*fi
;
2549 struct btrfs_item
*i
;
2550 u64 on_disk_item_len
;
2553 key
.objectid
= rec
->ino
;
2555 key
.type
= BTRFS_EXTENT_DATA_KEY
;
2557 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 1);
2563 i
= btrfs_item_nr(path
->slots
[0]);
2564 on_disk_item_len
= btrfs_file_extent_inline_item_len(path
->nodes
[0], i
);
2565 fi
= btrfs_item_ptr(path
->nodes
[0], path
->slots
[0],
2566 struct btrfs_file_extent_item
);
2567 btrfs_set_file_extent_ram_bytes(path
->nodes
[0], fi
, on_disk_item_len
);
2568 btrfs_mark_buffer_dirty(path
->nodes
[0]);
2569 printf("Repaired inline ram_bytes for root %llu ino %llu\n",
2570 root
->objectid
, rec
->ino
);
2571 rec
->errors
&= ~I_ERR_INLINE_RAM_BYTES_WRONG
;
2573 btrfs_release_path(path
);
2577 static int try_repair_inode(struct btrfs_root
*root
, struct inode_record
*rec
)
2579 struct btrfs_trans_handle
*trans
;
2580 struct btrfs_path path
;
2583 if (!(rec
->errors
& (I_ERR_DIR_ISIZE_WRONG
|
2584 I_ERR_NO_ORPHAN_ITEM
|
2585 I_ERR_LINK_COUNT_WRONG
|
2586 I_ERR_NO_INODE_ITEM
|
2587 I_ERR_FILE_EXTENT_ORPHAN
|
2588 I_ERR_FILE_EXTENT_DISCOUNT
|
2589 I_ERR_FILE_NBYTES_WRONG
|
2590 I_ERR_INLINE_RAM_BYTES_WRONG
)))
2594 * For nlink repair, it may create a dir and add link, so
2595 * 2 for parent(256)'s dir_index and dir_item
2596 * 2 for lost+found dir's inode_item and inode_ref
2597 * 1 for the new inode_ref of the file
2598 * 2 for lost+found dir's dir_index and dir_item for the file
2600 trans
= btrfs_start_transaction(root
, 7);
2602 return PTR_ERR(trans
);
2604 btrfs_init_path(&path
);
2605 if (rec
->errors
& I_ERR_NO_INODE_ITEM
)
2606 ret
= repair_inode_no_item(trans
, root
, &path
, rec
);
2607 if (!ret
&& rec
->errors
& I_ERR_FILE_EXTENT_ORPHAN
)
2608 ret
= repair_inode_orphan_extent(trans
, root
, &path
, rec
);
2609 if (!ret
&& rec
->errors
& I_ERR_FILE_EXTENT_DISCOUNT
)
2610 ret
= repair_inode_discount_extent(trans
, root
, &path
, rec
);
2611 if (!ret
&& rec
->errors
& I_ERR_DIR_ISIZE_WRONG
)
2612 ret
= repair_inode_isize(trans
, root
, &path
, rec
);
2613 if (!ret
&& rec
->errors
& I_ERR_NO_ORPHAN_ITEM
)
2614 ret
= repair_inode_orphan_item(trans
, root
, &path
, rec
);
2615 if (!ret
&& rec
->errors
& I_ERR_LINK_COUNT_WRONG
)
2616 ret
= repair_inode_nlinks(trans
, root
, &path
, rec
);
2617 if (!ret
&& rec
->errors
& I_ERR_FILE_NBYTES_WRONG
)
2618 ret
= repair_inode_nbytes(trans
, root
, &path
, rec
);
2619 if (!ret
&& rec
->errors
& I_ERR_INLINE_RAM_BYTES_WRONG
)
2620 ret
= repair_inline_ram_bytes(trans
, root
, &path
, rec
);
2621 btrfs_commit_transaction(trans
, root
);
2622 btrfs_release_path(&path
);
2626 static int check_inode_recs(struct btrfs_root
*root
,
2627 struct cache_tree
*inode_cache
)
2629 struct cache_extent
*cache
;
2630 struct ptr_node
*node
;
2631 struct inode_record
*rec
;
2632 struct inode_backref
*backref
;
2637 u64 root_dirid
= btrfs_root_dirid(&root
->root_item
);
2639 if (btrfs_root_refs(&root
->root_item
) == 0) {
2640 if (!cache_tree_empty(inode_cache
))
2641 fprintf(stderr
, "warning line %d\n", __LINE__
);
2646 * We need to repair backrefs first because we could change some of the
2647 * errors in the inode recs.
2649 * We also need to go through and delete invalid backrefs first and then
2650 * add the correct ones second. We do this because we may get EEXIST
2651 * when adding back the correct index because we hadn't yet deleted the
2654 * For example, if we were missing a dir index then the directories
2655 * isize would be wrong, so if we fixed the isize to what we thought it
2656 * would be and then fixed the backref we'd still have a invalid fs, so
2657 * we need to add back the dir index and then check to see if the isize
2662 if (stage
== 3 && !err
)
2665 cache
= search_cache_extent(inode_cache
, 0);
2666 while (repair
&& cache
) {
2667 node
= container_of(cache
, struct ptr_node
, cache
);
2669 cache
= next_cache_extent(cache
);
2671 /* Need to free everything up and rescan */
2673 remove_cache_extent(inode_cache
, &node
->cache
);
2675 free_inode_rec(rec
);
2679 if (list_empty(&rec
->backrefs
))
2682 ret
= repair_inode_backrefs(root
, rec
, inode_cache
,
2696 rec
= get_inode_rec(inode_cache
, root_dirid
, 0);
2697 BUG_ON(IS_ERR(rec
));
2699 ret
= check_root_dir(rec
);
2701 fprintf(stderr
, "root %llu root dir %llu error\n",
2702 (unsigned long long)root
->root_key
.objectid
,
2703 (unsigned long long)root_dirid
);
2704 print_inode_error(root
, rec
);
2709 struct btrfs_trans_handle
*trans
;
2711 trans
= btrfs_start_transaction(root
, 1);
2712 if (IS_ERR(trans
)) {
2713 err
= PTR_ERR(trans
);
2718 "root %llu missing its root dir, recreating\n",
2719 (unsigned long long)root
->objectid
);
2721 ret
= btrfs_make_root_dir(trans
, root
, root_dirid
);
2724 btrfs_commit_transaction(trans
, root
);
2728 fprintf(stderr
, "root %llu root dir %llu not found\n",
2729 (unsigned long long)root
->root_key
.objectid
,
2730 (unsigned long long)root_dirid
);
2734 cache
= search_cache_extent(inode_cache
, 0);
2737 node
= container_of(cache
, struct ptr_node
, cache
);
2739 remove_cache_extent(inode_cache
, &node
->cache
);
2741 if (rec
->ino
== root_dirid
||
2742 rec
->ino
== BTRFS_ORPHAN_OBJECTID
) {
2743 free_inode_rec(rec
);
2747 if (rec
->errors
& I_ERR_NO_ORPHAN_ITEM
) {
2748 ret
= check_orphan_item(root
, rec
->ino
);
2750 rec
->errors
&= ~I_ERR_NO_ORPHAN_ITEM
;
2751 if (can_free_inode_rec(rec
)) {
2752 free_inode_rec(rec
);
2757 if (!rec
->found_inode_item
)
2758 rec
->errors
|= I_ERR_NO_INODE_ITEM
;
2759 if (rec
->found_link
!= rec
->nlink
)
2760 rec
->errors
|= I_ERR_LINK_COUNT_WRONG
;
2762 ret
= try_repair_inode(root
, rec
);
2763 if (ret
== 0 && can_free_inode_rec(rec
)) {
2764 free_inode_rec(rec
);
2770 if (!(repair
&& ret
== 0))
2772 print_inode_error(root
, rec
);
2773 list_for_each_entry(backref
, &rec
->backrefs
, list
) {
2774 if (!backref
->found_dir_item
)
2775 backref
->errors
|= REF_ERR_NO_DIR_ITEM
;
2776 if (!backref
->found_dir_index
)
2777 backref
->errors
|= REF_ERR_NO_DIR_INDEX
;
2778 if (!backref
->found_inode_ref
)
2779 backref
->errors
|= REF_ERR_NO_INODE_REF
;
2780 fprintf(stderr
, "\tunresolved ref dir %llu index %llu"
2781 " namelen %u name %s filetype %d errors %x",
2782 (unsigned long long)backref
->dir
,
2783 (unsigned long long)backref
->index
,
2784 backref
->namelen
, backref
->name
,
2785 backref
->filetype
, backref
->errors
);
2786 print_ref_error(backref
->errors
);
2788 free_inode_rec(rec
);
2790 return (error
> 0) ? -1 : 0;
2793 static struct root_record
*get_root_rec(struct cache_tree
*root_cache
,
2796 struct cache_extent
*cache
;
2797 struct root_record
*rec
= NULL
;
2800 cache
= lookup_cache_extent(root_cache
, objectid
, 1);
2802 rec
= container_of(cache
, struct root_record
, cache
);
2804 rec
= calloc(1, sizeof(*rec
));
2806 return ERR_PTR(-ENOMEM
);
2807 rec
->objectid
= objectid
;
2808 INIT_LIST_HEAD(&rec
->backrefs
);
2809 rec
->cache
.start
= objectid
;
2810 rec
->cache
.size
= 1;
2812 ret
= insert_cache_extent(root_cache
, &rec
->cache
);
2814 return ERR_PTR(-EEXIST
);
2819 static struct root_backref
*get_root_backref(struct root_record
*rec
,
2820 u64 ref_root
, u64 dir
, u64 index
,
2821 const char *name
, int namelen
)
2823 struct root_backref
*backref
;
2825 list_for_each_entry(backref
, &rec
->backrefs
, list
) {
2826 if (backref
->ref_root
!= ref_root
|| backref
->dir
!= dir
||
2827 backref
->namelen
!= namelen
)
2829 if (memcmp(name
, backref
->name
, namelen
))
2834 backref
= calloc(1, sizeof(*backref
) + namelen
+ 1);
2837 backref
->ref_root
= ref_root
;
2839 backref
->index
= index
;
2840 backref
->namelen
= namelen
;
2841 memcpy(backref
->name
, name
, namelen
);
2842 backref
->name
[namelen
] = '\0';
2843 list_add_tail(&backref
->list
, &rec
->backrefs
);
2847 static void free_root_record(struct cache_extent
*cache
)
2849 struct root_record
*rec
;
2850 struct root_backref
*backref
;
2852 rec
= container_of(cache
, struct root_record
, cache
);
2853 while (!list_empty(&rec
->backrefs
)) {
2854 backref
= to_root_backref(rec
->backrefs
.next
);
2855 list_del(&backref
->list
);
2862 FREE_EXTENT_CACHE_BASED_TREE(root_recs
, free_root_record
);
2864 static int add_root_backref(struct cache_tree
*root_cache
,
2865 u64 root_id
, u64 ref_root
, u64 dir
, u64 index
,
2866 const char *name
, int namelen
,
2867 int item_type
, int errors
)
2869 struct root_record
*rec
;
2870 struct root_backref
*backref
;
2872 rec
= get_root_rec(root_cache
, root_id
);
2873 BUG_ON(IS_ERR(rec
));
2874 backref
= get_root_backref(rec
, ref_root
, dir
, index
, name
, namelen
);
2877 backref
->errors
|= errors
;
2879 if (item_type
!= BTRFS_DIR_ITEM_KEY
) {
2880 if (backref
->found_dir_index
|| backref
->found_back_ref
||
2881 backref
->found_forward_ref
) {
2882 if (backref
->index
!= index
)
2883 backref
->errors
|= REF_ERR_INDEX_UNMATCH
;
2885 backref
->index
= index
;
2889 if (item_type
== BTRFS_DIR_ITEM_KEY
) {
2890 if (backref
->found_forward_ref
)
2892 backref
->found_dir_item
= 1;
2893 } else if (item_type
== BTRFS_DIR_INDEX_KEY
) {
2894 backref
->found_dir_index
= 1;
2895 } else if (item_type
== BTRFS_ROOT_REF_KEY
) {
2896 if (backref
->found_forward_ref
)
2897 backref
->errors
|= REF_ERR_DUP_ROOT_REF
;
2898 else if (backref
->found_dir_item
)
2900 backref
->found_forward_ref
= 1;
2901 } else if (item_type
== BTRFS_ROOT_BACKREF_KEY
) {
2902 if (backref
->found_back_ref
)
2903 backref
->errors
|= REF_ERR_DUP_ROOT_BACKREF
;
2904 backref
->found_back_ref
= 1;
2909 if (backref
->found_forward_ref
&& backref
->found_dir_item
)
2910 backref
->reachable
= 1;
2914 static int merge_root_recs(struct btrfs_root
*root
,
2915 struct cache_tree
*src_cache
,
2916 struct cache_tree
*dst_cache
)
2918 struct cache_extent
*cache
;
2919 struct ptr_node
*node
;
2920 struct inode_record
*rec
;
2921 struct inode_backref
*backref
;
2924 if (root
->root_key
.objectid
== BTRFS_TREE_RELOC_OBJECTID
) {
2925 free_inode_recs_tree(src_cache
);
2930 cache
= search_cache_extent(src_cache
, 0);
2933 node
= container_of(cache
, struct ptr_node
, cache
);
2935 remove_cache_extent(src_cache
, &node
->cache
);
2938 ret
= is_child_root(root
, root
->objectid
, rec
->ino
);
2944 list_for_each_entry(backref
, &rec
->backrefs
, list
) {
2945 BUG_ON(backref
->found_inode_ref
);
2946 if (backref
->found_dir_item
)
2947 add_root_backref(dst_cache
, rec
->ino
,
2948 root
->root_key
.objectid
, backref
->dir
,
2949 backref
->index
, backref
->name
,
2950 backref
->namelen
, BTRFS_DIR_ITEM_KEY
,
2952 if (backref
->found_dir_index
)
2953 add_root_backref(dst_cache
, rec
->ino
,
2954 root
->root_key
.objectid
, backref
->dir
,
2955 backref
->index
, backref
->name
,
2956 backref
->namelen
, BTRFS_DIR_INDEX_KEY
,
2960 free_inode_rec(rec
);
2967 static int check_root_refs(struct btrfs_root
*root
,
2968 struct cache_tree
*root_cache
)
2970 struct root_record
*rec
;
2971 struct root_record
*ref_root
;
2972 struct root_backref
*backref
;
2973 struct cache_extent
*cache
;
2979 rec
= get_root_rec(root_cache
, BTRFS_FS_TREE_OBJECTID
);
2980 BUG_ON(IS_ERR(rec
));
2983 /* fixme: this can not detect circular references */
2986 cache
= search_cache_extent(root_cache
, 0);
2990 rec
= container_of(cache
, struct root_record
, cache
);
2991 cache
= next_cache_extent(cache
);
2993 if (rec
->found_ref
== 0)
2996 list_for_each_entry(backref
, &rec
->backrefs
, list
) {
2997 if (!backref
->reachable
)
3000 ref_root
= get_root_rec(root_cache
,
3002 BUG_ON(IS_ERR(ref_root
));
3003 if (ref_root
->found_ref
> 0)
3006 backref
->reachable
= 0;
3008 if (rec
->found_ref
== 0)
3014 cache
= search_cache_extent(root_cache
, 0);
3018 rec
= container_of(cache
, struct root_record
, cache
);
3019 cache
= next_cache_extent(cache
);
3021 if (rec
->found_ref
== 0 &&
3022 rec
->objectid
>= BTRFS_FIRST_FREE_OBJECTID
&&
3023 rec
->objectid
<= BTRFS_LAST_FREE_OBJECTID
) {
3024 ret
= check_orphan_item(root
->fs_info
->tree_root
,
3030 * If we don't have a root item then we likely just have
3031 * a dir item in a snapshot for this root but no actual
3032 * ref key or anything so it's meaningless.
3034 if (!rec
->found_root_item
)
3037 fprintf(stderr
, "fs tree %llu not referenced\n",
3038 (unsigned long long)rec
->objectid
);
3042 if (rec
->found_ref
> 0 && !rec
->found_root_item
)
3044 list_for_each_entry(backref
, &rec
->backrefs
, list
) {
3045 if (!backref
->found_dir_item
)
3046 backref
->errors
|= REF_ERR_NO_DIR_ITEM
;
3047 if (!backref
->found_dir_index
)
3048 backref
->errors
|= REF_ERR_NO_DIR_INDEX
;
3049 if (!backref
->found_back_ref
)
3050 backref
->errors
|= REF_ERR_NO_ROOT_BACKREF
;
3051 if (!backref
->found_forward_ref
)
3052 backref
->errors
|= REF_ERR_NO_ROOT_REF
;
3053 if (backref
->reachable
&& backref
->errors
)
3060 fprintf(stderr
, "fs tree %llu refs %u %s\n",
3061 (unsigned long long)rec
->objectid
, rec
->found_ref
,
3062 rec
->found_root_item
? "" : "not found");
3064 list_for_each_entry(backref
, &rec
->backrefs
, list
) {
3065 if (!backref
->reachable
)
3067 if (!backref
->errors
&& rec
->found_root_item
)
3069 fprintf(stderr
, "\tunresolved ref root %llu dir %llu"
3070 " index %llu namelen %u name %s errors %x\n",
3071 (unsigned long long)backref
->ref_root
,
3072 (unsigned long long)backref
->dir
,
3073 (unsigned long long)backref
->index
,
3074 backref
->namelen
, backref
->name
,
3076 print_ref_error(backref
->errors
);
3079 return errors
> 0 ? 1 : 0;
3082 static int process_root_ref(struct extent_buffer
*eb
, int slot
,
3083 struct btrfs_key
*key
,
3084 struct cache_tree
*root_cache
)
3090 struct btrfs_root_ref
*ref
;
3091 char namebuf
[BTRFS_NAME_LEN
];
3094 ref
= btrfs_item_ptr(eb
, slot
, struct btrfs_root_ref
);
3096 dirid
= btrfs_root_ref_dirid(eb
, ref
);
3097 index
= btrfs_root_ref_sequence(eb
, ref
);
3098 name_len
= btrfs_root_ref_name_len(eb
, ref
);
3100 if (name_len
<= BTRFS_NAME_LEN
) {
3104 len
= BTRFS_NAME_LEN
;
3105 error
= REF_ERR_NAME_TOO_LONG
;
3107 read_extent_buffer(eb
, namebuf
, (unsigned long)(ref
+ 1), len
);
3109 if (key
->type
== BTRFS_ROOT_REF_KEY
) {
3110 add_root_backref(root_cache
, key
->offset
, key
->objectid
, dirid
,
3111 index
, namebuf
, len
, key
->type
, error
);
3113 add_root_backref(root_cache
, key
->objectid
, key
->offset
, dirid
,
3114 index
, namebuf
, len
, key
->type
, error
);
3119 static void free_corrupt_block(struct cache_extent
*cache
)
3121 struct btrfs_corrupt_block
*corrupt
;
3123 corrupt
= container_of(cache
, struct btrfs_corrupt_block
, cache
);
3127 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks
, free_corrupt_block
);
3130 * Repair the btree of the given root.
3132 * The fix is to remove the node key in corrupt_blocks cache_tree.
3133 * and rebalance the tree.
3134 * After the fix, the btree should be writeable.
3136 static int repair_btree(struct btrfs_root
*root
,
3137 struct cache_tree
*corrupt_blocks
)
3139 struct btrfs_trans_handle
*trans
;
3140 struct btrfs_path path
;
3141 struct btrfs_corrupt_block
*corrupt
;
3142 struct cache_extent
*cache
;
3143 struct btrfs_key key
;
3148 if (cache_tree_empty(corrupt_blocks
))
3151 trans
= btrfs_start_transaction(root
, 1);
3152 if (IS_ERR(trans
)) {
3153 ret
= PTR_ERR(trans
);
3154 fprintf(stderr
, "Error starting transaction: %s\n",
3158 btrfs_init_path(&path
);
3159 cache
= first_cache_extent(corrupt_blocks
);
3161 corrupt
= container_of(cache
, struct btrfs_corrupt_block
,
3163 level
= corrupt
->level
;
3164 path
.lowest_level
= level
;
3165 key
.objectid
= corrupt
->key
.objectid
;
3166 key
.type
= corrupt
->key
.type
;
3167 key
.offset
= corrupt
->key
.offset
;
3170 * Here we don't want to do any tree balance, since it may
3171 * cause a balance with corrupted brother leaf/node,
3172 * so ins_len set to 0 here.
3173 * Balance will be done after all corrupt node/leaf is deleted.
3175 ret
= btrfs_search_slot(trans
, root
, &key
, &path
, 0, 1);
3178 offset
= btrfs_node_blockptr(path
.nodes
[level
],
3181 /* Remove the ptr */
3182 ret
= btrfs_del_ptr(root
, &path
, level
, path
.slots
[level
]);
3186 * Remove the corresponding extent
3187 * return value is not concerned.
3189 btrfs_release_path(&path
);
3190 ret
= btrfs_free_extent(trans
, root
, offset
,
3191 root
->fs_info
->nodesize
, 0,
3192 root
->root_key
.objectid
, level
- 1, 0);
3193 cache
= next_cache_extent(cache
);
3196 /* Balance the btree using btrfs_search_slot() */
3197 cache
= first_cache_extent(corrupt_blocks
);
3199 corrupt
= container_of(cache
, struct btrfs_corrupt_block
,
3201 memcpy(&key
, &corrupt
->key
, sizeof(key
));
3202 ret
= btrfs_search_slot(trans
, root
, &key
, &path
, -1, 1);
3205 /* return will always >0 since it won't find the item */
3207 btrfs_release_path(&path
);
3208 cache
= next_cache_extent(cache
);
3211 btrfs_commit_transaction(trans
, root
);
3212 btrfs_release_path(&path
);
3216 static int check_fs_root(struct btrfs_root
*root
,
3217 struct cache_tree
*root_cache
,
3218 struct walk_control
*wc
)
3224 struct btrfs_path path
;
3225 struct shared_node root_node
;
3226 struct root_record
*rec
;
3227 struct btrfs_root_item
*root_item
= &root
->root_item
;
3228 struct cache_tree corrupt_blocks
;
3229 struct orphan_data_extent
*orphan
;
3230 struct orphan_data_extent
*tmp
;
3231 enum btrfs_tree_block_status status
;
3232 struct node_refs nrefs
;
3235 * Reuse the corrupt_block cache tree to record corrupted tree block
3237 * Unlike the usage in extent tree check, here we do it in a per
3238 * fs/subvol tree base.
3240 cache_tree_init(&corrupt_blocks
);
3241 root
->fs_info
->corrupt_blocks
= &corrupt_blocks
;
3243 if (root
->root_key
.objectid
!= BTRFS_TREE_RELOC_OBJECTID
) {
3244 rec
= get_root_rec(root_cache
, root
->root_key
.objectid
);
3245 BUG_ON(IS_ERR(rec
));
3246 if (btrfs_root_refs(root_item
) > 0)
3247 rec
->found_root_item
= 1;
3250 btrfs_init_path(&path
);
3251 memset(&root_node
, 0, sizeof(root_node
));
3252 cache_tree_init(&root_node
.root_cache
);
3253 cache_tree_init(&root_node
.inode_cache
);
3254 memset(&nrefs
, 0, sizeof(nrefs
));
3256 /* Move the orphan extent record to corresponding inode_record */
3257 list_for_each_entry_safe(orphan
, tmp
,
3258 &root
->orphan_data_extents
, list
) {
3259 struct inode_record
*inode
;
3261 inode
= get_inode_rec(&root_node
.inode_cache
, orphan
->objectid
,
3263 BUG_ON(IS_ERR(inode
));
3264 inode
->errors
|= I_ERR_FILE_EXTENT_ORPHAN
;
3265 list_move(&orphan
->list
, &inode
->orphan_extents
);
3268 level
= btrfs_header_level(root
->node
);
3269 memset(wc
->nodes
, 0, sizeof(wc
->nodes
));
3270 wc
->nodes
[level
] = &root_node
;
3271 wc
->active_node
= level
;
3272 wc
->root_level
= level
;
3274 /* We may not have checked the root block, lets do that now */
3275 if (btrfs_is_leaf(root
->node
))
3276 status
= btrfs_check_leaf(root
, NULL
, root
->node
);
3278 status
= btrfs_check_node(root
, NULL
, root
->node
);
3279 if (status
!= BTRFS_TREE_BLOCK_CLEAN
)
3282 if (btrfs_root_refs(root_item
) > 0 ||
3283 btrfs_disk_key_objectid(&root_item
->drop_progress
) == 0) {
3284 path
.nodes
[level
] = root
->node
;
3285 extent_buffer_get(root
->node
);
3286 path
.slots
[level
] = 0;
3288 struct btrfs_key key
;
3289 struct btrfs_disk_key found_key
;
3291 btrfs_disk_key_to_cpu(&key
, &root_item
->drop_progress
);
3292 level
= root_item
->drop_level
;
3293 path
.lowest_level
= level
;
3294 if (level
> btrfs_header_level(root
->node
) ||
3295 level
>= BTRFS_MAX_LEVEL
) {
3296 error("ignoring invalid drop level: %u", level
);
3299 wret
= btrfs_search_slot(NULL
, root
, &key
, &path
, 0, 0);
3302 btrfs_node_key(path
.nodes
[level
], &found_key
,
3304 WARN_ON(memcmp(&found_key
, &root_item
->drop_progress
,
3305 sizeof(found_key
)));
3309 wret
= walk_down_tree(root
, &path
, wc
, &level
, &nrefs
);
3315 wret
= walk_up_tree(root
, &path
, wc
, &level
);
3322 btrfs_release_path(&path
);
3324 if (!cache_tree_empty(&corrupt_blocks
)) {
3325 struct cache_extent
*cache
;
3326 struct btrfs_corrupt_block
*corrupt
;
3328 printf("The following tree block(s) is corrupted in tree %llu:\n",
3329 root
->root_key
.objectid
);
3330 cache
= first_cache_extent(&corrupt_blocks
);
3332 corrupt
= container_of(cache
,
3333 struct btrfs_corrupt_block
,
3335 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3336 cache
->start
, corrupt
->level
,
3337 corrupt
->key
.objectid
, corrupt
->key
.type
,
3338 corrupt
->key
.offset
);
3339 cache
= next_cache_extent(cache
);
3342 printf("Try to repair the btree for root %llu\n",
3343 root
->root_key
.objectid
);
3344 ret
= repair_btree(root
, &corrupt_blocks
);
3346 fprintf(stderr
, "Failed to repair btree: %s\n",
3349 printf("Btree for root %llu is fixed\n",
3350 root
->root_key
.objectid
);
3354 err
= merge_root_recs(root
, &root_node
.root_cache
, root_cache
);
3358 if (root_node
.current
) {
3359 root_node
.current
->checked
= 1;
3360 maybe_free_inode_rec(&root_node
.inode_cache
,
3364 err
= check_inode_recs(root
, &root_node
.inode_cache
);
3368 free_corrupt_blocks_tree(&corrupt_blocks
);
3369 root
->fs_info
->corrupt_blocks
= NULL
;
3370 free_orphan_data_extents(&root
->orphan_data_extents
);
3374 static int check_fs_roots(struct btrfs_fs_info
*fs_info
,
3375 struct cache_tree
*root_cache
)
3377 struct btrfs_path path
;
3378 struct btrfs_key key
;
3379 struct walk_control wc
;
3380 struct extent_buffer
*leaf
, *tree_node
;
3381 struct btrfs_root
*tmp_root
;
3382 struct btrfs_root
*tree_root
= fs_info
->tree_root
;
3386 if (ctx
.progress_enabled
) {
3387 ctx
.tp
= TASK_FS_ROOTS
;
3388 task_start(ctx
.info
);
3392 * Just in case we made any changes to the extent tree that weren't
3393 * reflected into the free space cache yet.
3396 reset_cached_block_groups(fs_info
);
3397 memset(&wc
, 0, sizeof(wc
));
3398 cache_tree_init(&wc
.shared
);
3399 btrfs_init_path(&path
);
3404 key
.type
= BTRFS_ROOT_ITEM_KEY
;
3405 ret
= btrfs_search_slot(NULL
, tree_root
, &key
, &path
, 0, 0);
3410 tree_node
= tree_root
->node
;
3412 if (tree_node
!= tree_root
->node
) {
3413 free_root_recs_tree(root_cache
);
3414 btrfs_release_path(&path
);
3417 leaf
= path
.nodes
[0];
3418 if (path
.slots
[0] >= btrfs_header_nritems(leaf
)) {
3419 ret
= btrfs_next_leaf(tree_root
, &path
);
3425 leaf
= path
.nodes
[0];
3427 btrfs_item_key_to_cpu(leaf
, &key
, path
.slots
[0]);
3428 if (key
.type
== BTRFS_ROOT_ITEM_KEY
&&
3429 fs_root_objectid(key
.objectid
)) {
3430 if (key
.objectid
== BTRFS_TREE_RELOC_OBJECTID
) {
3431 tmp_root
= btrfs_read_fs_root_no_cache(
3434 key
.offset
= (u64
)-1;
3435 tmp_root
= btrfs_read_fs_root(
3438 if (IS_ERR(tmp_root
)) {
3442 ret
= check_fs_root(tmp_root
, root_cache
, &wc
);
3443 if (ret
== -EAGAIN
) {
3444 free_root_recs_tree(root_cache
);
3445 btrfs_release_path(&path
);
3450 if (key
.objectid
== BTRFS_TREE_RELOC_OBJECTID
)
3451 btrfs_free_fs_root(tmp_root
);
3452 } else if (key
.type
== BTRFS_ROOT_REF_KEY
||
3453 key
.type
== BTRFS_ROOT_BACKREF_KEY
) {
3454 process_root_ref(leaf
, path
.slots
[0], &key
,
3461 btrfs_release_path(&path
);
3463 free_extent_cache_tree(&wc
.shared
);
3464 if (!cache_tree_empty(&wc
.shared
))
3465 fprintf(stderr
, "warning line %d\n", __LINE__
);
3467 task_stop(ctx
.info
);
3472 static struct tree_backref
*find_tree_backref(struct extent_record
*rec
,
3473 u64 parent
, u64 root
)
3475 struct rb_node
*node
;
3476 struct tree_backref
*back
= NULL
;
3477 struct tree_backref match
= {
3484 match
.parent
= parent
;
3485 match
.node
.full_backref
= 1;
3490 node
= rb_search(&rec
->backref_tree
, &match
.node
.node
,
3491 (rb_compare_keys
)compare_extent_backref
, NULL
);
3493 back
= to_tree_backref(rb_node_to_extent_backref(node
));
3498 static struct data_backref
*find_data_backref(struct extent_record
*rec
,
3499 u64 parent
, u64 root
,
3500 u64 owner
, u64 offset
,
3502 u64 disk_bytenr
, u64 bytes
)
3504 struct rb_node
*node
;
3505 struct data_backref
*back
= NULL
;
3506 struct data_backref match
= {
3513 .found_ref
= found_ref
,
3514 .disk_bytenr
= disk_bytenr
,
3518 match
.parent
= parent
;
3519 match
.node
.full_backref
= 1;
3524 node
= rb_search(&rec
->backref_tree
, &match
.node
.node
,
3525 (rb_compare_keys
)compare_extent_backref
, NULL
);
3527 back
= to_data_backref(rb_node_to_extent_backref(node
));
3532 static int do_check_fs_roots(struct btrfs_fs_info
*fs_info
,
3533 struct cache_tree
*root_cache
)
3537 if (!ctx
.progress_enabled
)
3538 fprintf(stderr
, "checking fs roots\n");
3539 if (check_mode
== CHECK_MODE_LOWMEM
)
3540 ret
= check_fs_roots_lowmem(fs_info
);
3542 ret
= check_fs_roots(fs_info
, root_cache
);
3547 static int all_backpointers_checked(struct extent_record
*rec
, int print_errs
)
3549 struct extent_backref
*back
, *tmp
;
3550 struct tree_backref
*tback
;
3551 struct data_backref
*dback
;
3555 rbtree_postorder_for_each_entry_safe(back
, tmp
,
3556 &rec
->backref_tree
, node
) {
3557 if (!back
->found_extent_tree
) {
3561 if (back
->is_data
) {
3562 dback
= to_data_backref(back
);
3564 "data backref %llu %s %llu owner %llu offset %llu num_refs %lu not found in extent tree\n",
3565 (unsigned long long)rec
->start
,
3566 back
->full_backref
?
3568 back
->full_backref
?
3569 (unsigned long long)dback
->parent
:
3570 (unsigned long long)dback
->root
,
3571 (unsigned long long)dback
->owner
,
3572 (unsigned long long)dback
->offset
,
3573 (unsigned long)dback
->num_refs
);
3575 tback
= to_tree_backref(back
);
3577 "tree backref %llu parent %llu root %llu not found in extent tree\n",
3578 (unsigned long long)rec
->start
,
3579 (unsigned long long)tback
->parent
,
3580 (unsigned long long)tback
->root
);
3583 if (!back
->is_data
&& !back
->found_ref
) {
3587 tback
= to_tree_backref(back
);
3589 "backref %llu %s %llu not referenced back %p\n",
3590 (unsigned long long)rec
->start
,
3591 back
->full_backref
? "parent" : "root",
3592 back
->full_backref
?
3593 (unsigned long long)tback
->parent
:
3594 (unsigned long long)tback
->root
, back
);
3596 if (back
->is_data
) {
3597 dback
= to_data_backref(back
);
3598 if (dback
->found_ref
!= dback
->num_refs
) {
3603 "incorrect local backref count on %llu %s %llu owner %llu offset %llu found %u wanted %u back %p\n",
3604 (unsigned long long)rec
->start
,
3605 back
->full_backref
?
3607 back
->full_backref
?
3608 (unsigned long long)dback
->parent
:
3609 (unsigned long long)dback
->root
,
3610 (unsigned long long)dback
->owner
,
3611 (unsigned long long)dback
->offset
,
3612 dback
->found_ref
, dback
->num_refs
,
3615 if (dback
->disk_bytenr
!= rec
->start
) {
3620 "backref disk bytenr does not match extent record, bytenr=%llu, ref bytenr=%llu\n",
3621 (unsigned long long)rec
->start
,
3622 (unsigned long long)dback
->disk_bytenr
);
3625 if (dback
->bytes
!= rec
->nr
) {
3630 "backref bytes do not match extent backref, bytenr=%llu, ref bytes=%llu, backref bytes=%llu\n",
3631 (unsigned long long)rec
->start
,
3632 (unsigned long long)rec
->nr
,
3633 (unsigned long long)dback
->bytes
);
3636 if (!back
->is_data
) {
3639 dback
= to_data_backref(back
);
3640 found
+= dback
->found_ref
;
3643 if (found
!= rec
->refs
) {
3648 "incorrect global backref count on %llu found %llu wanted %llu\n",
3649 (unsigned long long)rec
->start
,
3650 (unsigned long long)found
,
3651 (unsigned long long)rec
->refs
);
3657 static void __free_one_backref(struct rb_node
*node
)
3659 struct extent_backref
*back
= rb_node_to_extent_backref(node
);
3664 static void free_all_extent_backrefs(struct extent_record
*rec
)
3666 rb_free_nodes(&rec
->backref_tree
, __free_one_backref
);
3669 static void free_extent_record_cache(struct cache_tree
*extent_cache
)
3671 struct cache_extent
*cache
;
3672 struct extent_record
*rec
;
3675 cache
= first_cache_extent(extent_cache
);
3678 rec
= container_of(cache
, struct extent_record
, cache
);
3679 remove_cache_extent(extent_cache
, cache
);
3680 free_all_extent_backrefs(rec
);
3685 static int maybe_free_extent_rec(struct cache_tree
*extent_cache
,
3686 struct extent_record
*rec
)
3688 if (rec
->content_checked
&& rec
->owner_ref_checked
&&
3689 rec
->extent_item_refs
== rec
->refs
&& rec
->refs
> 0 &&
3690 rec
->num_duplicates
== 0 && !all_backpointers_checked(rec
, 0) &&
3691 !rec
->bad_full_backref
&& !rec
->crossing_stripes
&&
3692 !rec
->wrong_chunk_type
) {
3693 remove_cache_extent(extent_cache
, &rec
->cache
);
3694 free_all_extent_backrefs(rec
);
3695 list_del_init(&rec
->list
);
3701 static int check_owner_ref(struct btrfs_root
*root
,
3702 struct extent_record
*rec
,
3703 struct extent_buffer
*buf
)
3705 struct extent_backref
*node
, *tmp
;
3706 struct tree_backref
*back
;
3707 struct btrfs_root
*ref_root
;
3708 struct btrfs_key key
;
3709 struct btrfs_path path
;
3710 struct extent_buffer
*parent
;
3715 rbtree_postorder_for_each_entry_safe(node
, tmp
,
3716 &rec
->backref_tree
, node
) {
3719 if (!node
->found_ref
)
3721 if (node
->full_backref
)
3723 back
= to_tree_backref(node
);
3724 if (btrfs_header_owner(buf
) == back
->root
)
3727 BUG_ON(rec
->is_root
);
3729 /* try to find the block by search corresponding fs tree */
3730 key
.objectid
= btrfs_header_owner(buf
);
3731 key
.type
= BTRFS_ROOT_ITEM_KEY
;
3732 key
.offset
= (u64
)-1;
3734 ref_root
= btrfs_read_fs_root(root
->fs_info
, &key
);
3735 if (IS_ERR(ref_root
))
3738 level
= btrfs_header_level(buf
);
3740 btrfs_item_key_to_cpu(buf
, &key
, 0);
3742 btrfs_node_key_to_cpu(buf
, &key
, 0);
3744 btrfs_init_path(&path
);
3745 path
.lowest_level
= level
+ 1;
3746 ret
= btrfs_search_slot(NULL
, ref_root
, &key
, &path
, 0, 0);
3750 parent
= path
.nodes
[level
+ 1];
3751 if (parent
&& buf
->start
== btrfs_node_blockptr(parent
,
3752 path
.slots
[level
+ 1]))
3755 btrfs_release_path(&path
);
3756 return found
? 0 : 1;
3759 static int is_extent_tree_record(struct extent_record
*rec
)
3761 struct extent_backref
*node
, *tmp
;
3762 struct tree_backref
*back
;
3765 rbtree_postorder_for_each_entry_safe(node
, tmp
,
3766 &rec
->backref_tree
, node
) {
3769 back
= to_tree_backref(node
);
3770 if (node
->full_backref
)
3772 if (back
->root
== BTRFS_EXTENT_TREE_OBJECTID
)
3779 static int record_bad_block_io(struct btrfs_fs_info
*info
,
3780 struct cache_tree
*extent_cache
,
3783 struct extent_record
*rec
;
3784 struct cache_extent
*cache
;
3785 struct btrfs_key key
;
3787 cache
= lookup_cache_extent(extent_cache
, start
, len
);
3791 rec
= container_of(cache
, struct extent_record
, cache
);
3792 if (!is_extent_tree_record(rec
))
3795 btrfs_disk_key_to_cpu(&key
, &rec
->parent_key
);
3796 return btrfs_add_corrupt_extent_record(info
, &key
, start
, len
, 0);
3799 static int swap_values(struct btrfs_root
*root
, struct btrfs_path
*path
,
3800 struct extent_buffer
*buf
, int slot
)
3802 if (btrfs_header_level(buf
)) {
3803 struct btrfs_key_ptr ptr1
, ptr2
;
3805 read_extent_buffer(buf
, &ptr1
, btrfs_node_key_ptr_offset(slot
),
3806 sizeof(struct btrfs_key_ptr
));
3807 read_extent_buffer(buf
, &ptr2
,
3808 btrfs_node_key_ptr_offset(slot
+ 1),
3809 sizeof(struct btrfs_key_ptr
));
3810 write_extent_buffer(buf
, &ptr1
,
3811 btrfs_node_key_ptr_offset(slot
+ 1),
3812 sizeof(struct btrfs_key_ptr
));
3813 write_extent_buffer(buf
, &ptr2
,
3814 btrfs_node_key_ptr_offset(slot
),
3815 sizeof(struct btrfs_key_ptr
));
3817 struct btrfs_disk_key key
;
3819 btrfs_node_key(buf
, &key
, 0);
3820 btrfs_fixup_low_keys(root
, path
, &key
,
3821 btrfs_header_level(buf
) + 1);
3824 struct btrfs_item
*item1
, *item2
;
3825 struct btrfs_key k1
, k2
;
3826 char *item1_data
, *item2_data
;
3827 u32 item1_offset
, item2_offset
, item1_size
, item2_size
;
3829 item1
= btrfs_item_nr(slot
);
3830 item2
= btrfs_item_nr(slot
+ 1);
3831 btrfs_item_key_to_cpu(buf
, &k1
, slot
);
3832 btrfs_item_key_to_cpu(buf
, &k2
, slot
+ 1);
3833 item1_offset
= btrfs_item_offset(buf
, item1
);
3834 item2_offset
= btrfs_item_offset(buf
, item2
);
3835 item1_size
= btrfs_item_size(buf
, item1
);
3836 item2_size
= btrfs_item_size(buf
, item2
);
3838 item1_data
= malloc(item1_size
);
3841 item2_data
= malloc(item2_size
);
3847 read_extent_buffer(buf
, item1_data
, item1_offset
, item1_size
);
3848 read_extent_buffer(buf
, item2_data
, item2_offset
, item2_size
);
3850 write_extent_buffer(buf
, item1_data
, item2_offset
, item2_size
);
3851 write_extent_buffer(buf
, item2_data
, item1_offset
, item1_size
);
3855 btrfs_set_item_offset(buf
, item1
, item2_offset
);
3856 btrfs_set_item_offset(buf
, item2
, item1_offset
);
3857 btrfs_set_item_size(buf
, item1
, item2_size
);
3858 btrfs_set_item_size(buf
, item2
, item1_size
);
3860 path
->slots
[0] = slot
;
3861 btrfs_set_item_key_unsafe(root
, path
, &k2
);
3862 path
->slots
[0] = slot
+ 1;
3863 btrfs_set_item_key_unsafe(root
, path
, &k1
);
3868 static int fix_key_order(struct btrfs_root
*root
, struct btrfs_path
*path
)
3870 struct extent_buffer
*buf
;
3871 struct btrfs_key k1
, k2
;
3873 int level
= path
->lowest_level
;
3876 buf
= path
->nodes
[level
];
3877 for (i
= 0; i
< btrfs_header_nritems(buf
) - 1; i
++) {
3879 btrfs_node_key_to_cpu(buf
, &k1
, i
);
3880 btrfs_node_key_to_cpu(buf
, &k2
, i
+ 1);
3882 btrfs_item_key_to_cpu(buf
, &k1
, i
);
3883 btrfs_item_key_to_cpu(buf
, &k2
, i
+ 1);
3885 if (btrfs_comp_cpu_keys(&k1
, &k2
) < 0)
3887 ret
= swap_values(root
, path
, buf
, i
);
3890 btrfs_mark_buffer_dirty(buf
);
3896 static int delete_bogus_item(struct btrfs_root
*root
,
3897 struct btrfs_path
*path
,
3898 struct extent_buffer
*buf
, int slot
)
3900 struct btrfs_key key
;
3901 int nritems
= btrfs_header_nritems(buf
);
3903 btrfs_item_key_to_cpu(buf
, &key
, slot
);
3905 /* These are all the keys we can deal with missing. */
3906 if (key
.type
!= BTRFS_DIR_INDEX_KEY
&&
3907 key
.type
!= BTRFS_EXTENT_ITEM_KEY
&&
3908 key
.type
!= BTRFS_METADATA_ITEM_KEY
&&
3909 key
.type
!= BTRFS_TREE_BLOCK_REF_KEY
&&
3910 key
.type
!= BTRFS_EXTENT_DATA_REF_KEY
)
3913 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
3914 (unsigned long long)key
.objectid
, key
.type
,
3915 (unsigned long long)key
.offset
, slot
, buf
->start
);
3916 memmove_extent_buffer(buf
, btrfs_item_nr_offset(slot
),
3917 btrfs_item_nr_offset(slot
+ 1),
3918 sizeof(struct btrfs_item
) *
3919 (nritems
- slot
- 1));
3920 btrfs_set_header_nritems(buf
, nritems
- 1);
3922 struct btrfs_disk_key disk_key
;
3924 btrfs_item_key(buf
, &disk_key
, 0);
3925 btrfs_fixup_low_keys(root
, path
, &disk_key
, 1);
3927 btrfs_mark_buffer_dirty(buf
);
3931 static int fix_item_offset(struct btrfs_root
*root
, struct btrfs_path
*path
)
3933 struct extent_buffer
*buf
;
3937 /* We should only get this for leaves */
3938 BUG_ON(path
->lowest_level
);
3939 buf
= path
->nodes
[0];
3941 for (i
= 0; i
< btrfs_header_nritems(buf
); i
++) {
3942 unsigned int shift
= 0, offset
;
3944 if (i
== 0 && btrfs_item_end_nr(buf
, i
) !=
3945 BTRFS_LEAF_DATA_SIZE(root
->fs_info
)) {
3946 if (btrfs_item_end_nr(buf
, i
) >
3947 BTRFS_LEAF_DATA_SIZE(root
->fs_info
)) {
3948 ret
= delete_bogus_item(root
, path
, buf
, i
);
3952 "item is off the end of the leaf, can't fix\n");
3956 shift
= BTRFS_LEAF_DATA_SIZE(root
->fs_info
) -
3957 btrfs_item_end_nr(buf
, i
);
3958 } else if (i
> 0 && btrfs_item_end_nr(buf
, i
) !=
3959 btrfs_item_offset_nr(buf
, i
- 1)) {
3960 if (btrfs_item_end_nr(buf
, i
) >
3961 btrfs_item_offset_nr(buf
, i
- 1)) {
3962 ret
= delete_bogus_item(root
, path
, buf
, i
);
3965 fprintf(stderr
, "items overlap, can't fix\n");
3969 shift
= btrfs_item_offset_nr(buf
, i
- 1) -
3970 btrfs_item_end_nr(buf
, i
);
3975 printf("Shifting item nr %d by %u bytes in block %llu\n",
3976 i
, shift
, (unsigned long long)buf
->start
);
3977 offset
= btrfs_item_offset_nr(buf
, i
);
3978 memmove_extent_buffer(buf
,
3979 btrfs_leaf_data(buf
) + offset
+ shift
,
3980 btrfs_leaf_data(buf
) + offset
,
3981 btrfs_item_size_nr(buf
, i
));
3982 btrfs_set_item_offset(buf
, btrfs_item_nr(i
),
3984 btrfs_mark_buffer_dirty(buf
);
3988 * We may have moved things, in which case we want to exit so we don't
3989 * write those changes out. Once we have proper abort functionality in
3990 * progs this can be changed to something nicer.
3997 * Attempt to fix basic block failures. If we can't fix it for whatever reason
3998 * then just return -EIO.
4000 static int try_to_fix_bad_block(struct btrfs_root
*root
,
4001 struct extent_buffer
*buf
,
4002 enum btrfs_tree_block_status status
)
4004 struct btrfs_trans_handle
*trans
;
4005 struct ulist
*roots
;
4006 struct ulist_node
*node
;
4007 struct btrfs_root
*search_root
;
4008 struct btrfs_path path
;
4009 struct ulist_iterator iter
;
4010 struct btrfs_key root_key
, key
;
4013 if (status
!= BTRFS_TREE_BLOCK_BAD_KEY_ORDER
&&
4014 status
!= BTRFS_TREE_BLOCK_INVALID_OFFSETS
)
4017 ret
= btrfs_find_all_roots(NULL
, root
->fs_info
, buf
->start
, 0, &roots
);
4021 btrfs_init_path(&path
);
4022 ULIST_ITER_INIT(&iter
);
4023 while ((node
= ulist_next(roots
, &iter
))) {
4024 root_key
.objectid
= node
->val
;
4025 root_key
.type
= BTRFS_ROOT_ITEM_KEY
;
4026 root_key
.offset
= (u64
)-1;
4028 search_root
= btrfs_read_fs_root(root
->fs_info
, &root_key
);
4035 trans
= btrfs_start_transaction(search_root
, 0);
4036 if (IS_ERR(trans
)) {
4037 ret
= PTR_ERR(trans
);
4041 path
.lowest_level
= btrfs_header_level(buf
);
4042 path
.skip_check_block
= 1;
4043 if (path
.lowest_level
)
4044 btrfs_node_key_to_cpu(buf
, &key
, 0);
4046 btrfs_item_key_to_cpu(buf
, &key
, 0);
4047 ret
= btrfs_search_slot(trans
, search_root
, &key
, &path
, 0, 1);
4050 btrfs_commit_transaction(trans
, search_root
);
4053 if (status
== BTRFS_TREE_BLOCK_BAD_KEY_ORDER
)
4054 ret
= fix_key_order(search_root
, &path
);
4055 else if (status
== BTRFS_TREE_BLOCK_INVALID_OFFSETS
)
4056 ret
= fix_item_offset(search_root
, &path
);
4058 btrfs_commit_transaction(trans
, search_root
);
4061 btrfs_release_path(&path
);
4062 btrfs_commit_transaction(trans
, search_root
);
4065 btrfs_release_path(&path
);
4069 static int check_block(struct btrfs_root
*root
,
4070 struct cache_tree
*extent_cache
,
4071 struct extent_buffer
*buf
, u64 flags
)
4073 struct extent_record
*rec
;
4074 struct cache_extent
*cache
;
4075 struct btrfs_key key
;
4076 enum btrfs_tree_block_status status
;
4080 cache
= lookup_cache_extent(extent_cache
, buf
->start
, buf
->len
);
4083 rec
= container_of(cache
, struct extent_record
, cache
);
4084 rec
->generation
= btrfs_header_generation(buf
);
4086 level
= btrfs_header_level(buf
);
4087 if (btrfs_header_nritems(buf
) > 0) {
4090 btrfs_item_key_to_cpu(buf
, &key
, 0);
4092 btrfs_node_key_to_cpu(buf
, &key
, 0);
4094 rec
->info_objectid
= key
.objectid
;
4096 rec
->info_level
= level
;
4098 if (btrfs_is_leaf(buf
))
4099 status
= btrfs_check_leaf(root
, &rec
->parent_key
, buf
);
4101 status
= btrfs_check_node(root
, &rec
->parent_key
, buf
);
4103 if (status
!= BTRFS_TREE_BLOCK_CLEAN
) {
4105 status
= try_to_fix_bad_block(root
, buf
, status
);
4106 if (status
!= BTRFS_TREE_BLOCK_CLEAN
) {
4108 fprintf(stderr
, "bad block %llu\n",
4109 (unsigned long long)buf
->start
);
4112 * Signal to callers we need to start the scan over
4113 * again since we'll have cowed blocks.
4118 rec
->content_checked
= 1;
4119 if (flags
& BTRFS_BLOCK_FLAG_FULL_BACKREF
)
4120 rec
->owner_ref_checked
= 1;
4122 ret
= check_owner_ref(root
, rec
, buf
);
4124 rec
->owner_ref_checked
= 1;
4128 maybe_free_extent_rec(extent_cache
, rec
);
4133 static struct tree_backref
*find_tree_backref(struct extent_record
*rec
,
4134 u64 parent
, u64 root
)
4136 struct list_head
*cur
= rec
->backrefs
.next
;
4137 struct extent_backref
*node
;
4138 struct tree_backref
*back
;
4140 while (cur
!= &rec
->backrefs
) {
4141 node
= to_extent_backref(cur
);
4145 back
= to_tree_backref(node
);
4147 if (!node
->full_backref
)
4149 if (parent
== back
->parent
)
4152 if (node
->full_backref
)
4154 if (back
->root
== root
)
4162 static struct tree_backref
*alloc_tree_backref(struct extent_record
*rec
,
4163 u64 parent
, u64 root
)
4165 struct tree_backref
*ref
= malloc(sizeof(*ref
));
4169 memset(&ref
->node
, 0, sizeof(ref
->node
));
4171 ref
->parent
= parent
;
4172 ref
->node
.full_backref
= 1;
4175 ref
->node
.full_backref
= 0;
4182 static struct data_backref
*find_data_backref(struct extent_record
*rec
,
4183 u64 parent
, u64 root
,
4184 u64 owner
, u64 offset
,
4186 u64 disk_bytenr
, u64 bytes
)
4188 struct list_head
*cur
= rec
->backrefs
.next
;
4189 struct extent_backref
*node
;
4190 struct data_backref
*back
;
4192 while (cur
!= &rec
->backrefs
) {
4193 node
= to_extent_backref(cur
);
4197 back
= to_data_backref(node
);
4199 if (!node
->full_backref
)
4201 if (parent
== back
->parent
)
4204 if (node
->full_backref
)
4206 if (back
->root
== root
&& back
->owner
== owner
&&
4207 back
->offset
== offset
) {
4208 if (found_ref
&& node
->found_ref
&&
4209 (back
->bytes
!= bytes
||
4210 back
->disk_bytenr
!= disk_bytenr
))
4220 static struct data_backref
*alloc_data_backref(struct extent_record
*rec
,
4221 u64 parent
, u64 root
,
4222 u64 owner
, u64 offset
,
4225 struct data_backref
*ref
= malloc(sizeof(*ref
));
4229 memset(&ref
->node
, 0, sizeof(ref
->node
));
4230 ref
->node
.is_data
= 1;
4233 ref
->parent
= parent
;
4236 ref
->node
.full_backref
= 1;
4240 ref
->offset
= offset
;
4241 ref
->node
.full_backref
= 0;
4243 ref
->bytes
= max_size
;
4246 if (max_size
> rec
->max_size
)
4247 rec
->max_size
= max_size
;
4251 /* Check if the type of extent matches with its chunk */
4252 static void check_extent_type(struct extent_record
*rec
)
4254 struct btrfs_block_group_cache
*bg_cache
;
4256 bg_cache
= btrfs_lookup_first_block_group(global_info
, rec
->start
);
4260 /* data extent, check chunk directly*/
4261 if (!rec
->metadata
) {
4262 if (!(bg_cache
->flags
& BTRFS_BLOCK_GROUP_DATA
))
4263 rec
->wrong_chunk_type
= 1;
4267 /* metadata extent, check the obvious case first */
4268 if (!(bg_cache
->flags
& (BTRFS_BLOCK_GROUP_SYSTEM
|
4269 BTRFS_BLOCK_GROUP_METADATA
))) {
4270 rec
->wrong_chunk_type
= 1;
4275 * Check SYSTEM extent, as it's also marked as metadata, we can only
4276 * make sure it's a SYSTEM extent by its backref
4278 if (!RB_EMPTY_ROOT(&rec
->backref_tree
)) {
4279 struct extent_backref
*node
;
4280 struct tree_backref
*tback
;
4283 node
= rb_node_to_extent_backref(rb_first(&rec
->backref_tree
));
4284 if (node
->is_data
) {
4285 /* tree block shouldn't have data backref */
4286 rec
->wrong_chunk_type
= 1;
4289 tback
= container_of(node
, struct tree_backref
, node
);
4291 if (tback
->root
== BTRFS_CHUNK_TREE_OBJECTID
)
4292 bg_type
= BTRFS_BLOCK_GROUP_SYSTEM
;
4294 bg_type
= BTRFS_BLOCK_GROUP_METADATA
;
4295 if (!(bg_cache
->flags
& bg_type
))
4296 rec
->wrong_chunk_type
= 1;
4301 * Allocate a new extent record, fill default values from @tmpl and insert int
4302 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4303 * the cache, otherwise it fails.
4305 static int add_extent_rec_nolookup(struct cache_tree
*extent_cache
,
4306 struct extent_record
*tmpl
)
4308 struct extent_record
*rec
;
4311 BUG_ON(tmpl
->max_size
== 0);
4312 rec
= malloc(sizeof(*rec
));
4315 rec
->start
= tmpl
->start
;
4316 rec
->max_size
= tmpl
->max_size
;
4317 rec
->nr
= max(tmpl
->nr
, tmpl
->max_size
);
4318 rec
->found_rec
= tmpl
->found_rec
;
4319 rec
->content_checked
= tmpl
->content_checked
;
4320 rec
->owner_ref_checked
= tmpl
->owner_ref_checked
;
4321 rec
->num_duplicates
= 0;
4322 rec
->metadata
= tmpl
->metadata
;
4323 rec
->flag_block_full_backref
= FLAG_UNSET
;
4324 rec
->bad_full_backref
= 0;
4325 rec
->crossing_stripes
= 0;
4326 rec
->wrong_chunk_type
= 0;
4327 rec
->is_root
= tmpl
->is_root
;
4328 rec
->refs
= tmpl
->refs
;
4329 rec
->extent_item_refs
= tmpl
->extent_item_refs
;
4330 rec
->parent_generation
= tmpl
->parent_generation
;
4331 INIT_LIST_HEAD(&rec
->backrefs
);
4332 INIT_LIST_HEAD(&rec
->dups
);
4333 INIT_LIST_HEAD(&rec
->list
);
4334 rec
->backref_tree
= RB_ROOT
;
4335 memcpy(&rec
->parent_key
, &tmpl
->parent_key
, sizeof(tmpl
->parent_key
));
4336 rec
->cache
.start
= tmpl
->start
;
4337 rec
->cache
.size
= tmpl
->nr
;
4338 ret
= insert_cache_extent(extent_cache
, &rec
->cache
);
4343 bytes_used
+= rec
->nr
;
4346 rec
->crossing_stripes
= check_crossing_stripes(global_info
,
4347 rec
->start
, global_info
->nodesize
);
4348 check_extent_type(rec
);
4353 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4355 * - refs - if found, increase refs
4356 * - is_root - if found, set
4357 * - content_checked - if found, set
4358 * - owner_ref_checked - if found, set
4360 * If not found, create a new one, initialize and insert.
4362 static int add_extent_rec(struct cache_tree
*extent_cache
,
4363 struct extent_record
*tmpl
)
4365 struct extent_record
*rec
;
4366 struct cache_extent
*cache
;
4370 cache
= lookup_cache_extent(extent_cache
, tmpl
->start
, tmpl
->nr
);
4372 rec
= container_of(cache
, struct extent_record
, cache
);
4376 rec
->nr
= max(tmpl
->nr
, tmpl
->max_size
);
4379 * We need to make sure to reset nr to whatever the extent
4380 * record says was the real size, this way we can compare it to
4383 if (tmpl
->found_rec
) {
4384 if (tmpl
->start
!= rec
->start
|| rec
->found_rec
) {
4385 struct extent_record
*tmp
;
4388 if (list_empty(&rec
->list
))
4389 list_add_tail(&rec
->list
,
4390 &duplicate_extents
);
4393 * We have to do this song and dance in case we
4394 * find an extent record that falls inside of
4395 * our current extent record but does not have
4396 * the same objectid.
4398 tmp
= malloc(sizeof(*tmp
));
4401 tmp
->start
= tmpl
->start
;
4402 tmp
->max_size
= tmpl
->max_size
;
4405 tmp
->metadata
= tmpl
->metadata
;
4406 tmp
->extent_item_refs
= tmpl
->extent_item_refs
;
4407 INIT_LIST_HEAD(&tmp
->list
);
4408 list_add_tail(&tmp
->list
, &rec
->dups
);
4409 rec
->num_duplicates
++;
4416 if (tmpl
->extent_item_refs
&& !dup
) {
4417 if (rec
->extent_item_refs
) {
4419 "block %llu rec extent_item_refs %llu, passed %llu\n",
4420 (unsigned long long)tmpl
->start
,
4421 (unsigned long long)
4422 rec
->extent_item_refs
,
4423 (unsigned long long)
4424 tmpl
->extent_item_refs
);
4426 rec
->extent_item_refs
= tmpl
->extent_item_refs
;
4430 if (tmpl
->content_checked
)
4431 rec
->content_checked
= 1;
4432 if (tmpl
->owner_ref_checked
)
4433 rec
->owner_ref_checked
= 1;
4434 memcpy(&rec
->parent_key
, &tmpl
->parent_key
,
4435 sizeof(tmpl
->parent_key
));
4436 if (tmpl
->parent_generation
)
4437 rec
->parent_generation
= tmpl
->parent_generation
;
4438 if (rec
->max_size
< tmpl
->max_size
)
4439 rec
->max_size
= tmpl
->max_size
;
4442 * A metadata extent can't cross stripe_len boundary, otherwise
4443 * kernel scrub won't be able to handle it.
4444 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4448 rec
->crossing_stripes
= check_crossing_stripes(
4449 global_info
, rec
->start
,
4450 global_info
->nodesize
);
4451 check_extent_type(rec
);
4452 maybe_free_extent_rec(extent_cache
, rec
);
4456 ret
= add_extent_rec_nolookup(extent_cache
, tmpl
);
4461 static int add_tree_backref(struct cache_tree
*extent_cache
, u64 bytenr
,
4462 u64 parent
, u64 root
, int found_ref
)
4464 struct extent_record
*rec
;
4465 struct tree_backref
*back
;
4466 struct cache_extent
*cache
;
4468 bool insert
= false;
4470 cache
= lookup_cache_extent(extent_cache
, bytenr
, 1);
4472 struct extent_record tmpl
;
4474 memset(&tmpl
, 0, sizeof(tmpl
));
4475 tmpl
.start
= bytenr
;
4480 ret
= add_extent_rec_nolookup(extent_cache
, &tmpl
);
4484 /* really a bug in cache_extent implement now */
4485 cache
= lookup_cache_extent(extent_cache
, bytenr
, 1);
4490 rec
= container_of(cache
, struct extent_record
, cache
);
4491 if (rec
->start
!= bytenr
) {
4493 * Several cause, from unaligned bytenr to over lapping extents
4498 back
= find_tree_backref(rec
, parent
, root
);
4500 back
= alloc_tree_backref(rec
, parent
, root
);
4507 if (back
->node
.found_ref
) {
4509 "Extent back ref already exists for %llu parent %llu root %llu\n",
4510 (unsigned long long)bytenr
,
4511 (unsigned long long)parent
,
4512 (unsigned long long)root
);
4514 back
->node
.found_ref
= 1;
4516 if (back
->node
.found_extent_tree
) {
4518 "extent back ref already exists for %llu parent %llu root %llu\n",
4519 (unsigned long long)bytenr
,
4520 (unsigned long long)parent
,
4521 (unsigned long long)root
);
4523 back
->node
.found_extent_tree
= 1;
4526 WARN_ON(rb_insert(&rec
->backref_tree
, &back
->node
.node
,
4527 compare_extent_backref
));
4528 check_extent_type(rec
);
4529 maybe_free_extent_rec(extent_cache
, rec
);
4533 static int add_data_backref(struct cache_tree
*extent_cache
, u64 bytenr
,
4534 u64 parent
, u64 root
, u64 owner
, u64 offset
,
4535 u32 num_refs
, int found_ref
, u64 max_size
)
4537 struct extent_record
*rec
;
4538 struct data_backref
*back
;
4539 struct cache_extent
*cache
;
4541 bool insert
= false;
4543 cache
= lookup_cache_extent(extent_cache
, bytenr
, 1);
4545 struct extent_record tmpl
;
4547 memset(&tmpl
, 0, sizeof(tmpl
));
4548 tmpl
.start
= bytenr
;
4550 tmpl
.max_size
= max_size
;
4552 ret
= add_extent_rec_nolookup(extent_cache
, &tmpl
);
4556 cache
= lookup_cache_extent(extent_cache
, bytenr
, 1);
4561 rec
= container_of(cache
, struct extent_record
, cache
);
4562 if (rec
->max_size
< max_size
)
4563 rec
->max_size
= max_size
;
4566 * If found_ref is set then max_size is the real size and must match the
4567 * existing refs. So if we have already found a ref then we need to
4568 * make sure that this ref matches the existing one, otherwise we need
4569 * to add a new backref so we can notice that the backrefs don't match
4570 * and we need to figure out who is telling the truth. This is to
4571 * account for that awful fsync bug I introduced where we'd end up with
4572 * a btrfs_file_extent_item that would have its length include multiple
4573 * prealloc extents or point inside of a prealloc extent.
4575 back
= find_data_backref(rec
, parent
, root
, owner
, offset
, found_ref
,
4578 back
= alloc_data_backref(rec
, parent
, root
, owner
, offset
,
4585 BUG_ON(num_refs
!= 1);
4586 if (back
->node
.found_ref
)
4587 BUG_ON(back
->bytes
!= max_size
);
4588 back
->node
.found_ref
= 1;
4589 back
->found_ref
+= 1;
4590 if (back
->bytes
!= max_size
|| back
->disk_bytenr
!= bytenr
) {
4591 back
->bytes
= max_size
;
4592 back
->disk_bytenr
= bytenr
;
4594 /* Need to reinsert if not already in the tree */
4596 rb_erase(&back
->node
.node
, &rec
->backref_tree
);
4601 rec
->content_checked
= 1;
4602 rec
->owner_ref_checked
= 1;
4604 if (back
->node
.found_extent_tree
) {
4606 "Extent back ref already exists for %llu parent %llu root %llu owner %llu offset %llu num_refs %lu\n",
4607 (unsigned long long)bytenr
,
4608 (unsigned long long)parent
,
4609 (unsigned long long)root
,
4610 (unsigned long long)owner
,
4611 (unsigned long long)offset
,
4612 (unsigned long)num_refs
);
4614 back
->num_refs
= num_refs
;
4615 back
->node
.found_extent_tree
= 1;
4618 WARN_ON(rb_insert(&rec
->backref_tree
, &back
->node
.node
,
4619 compare_extent_backref
));
4621 maybe_free_extent_rec(extent_cache
, rec
);
4625 static int add_pending(struct cache_tree
*pending
,
4626 struct cache_tree
*seen
, u64 bytenr
, u32 size
)
4630 ret
= add_cache_extent(seen
, bytenr
, size
);
4633 add_cache_extent(pending
, bytenr
, size
);
4637 static int pick_next_pending(struct cache_tree
*pending
,
4638 struct cache_tree
*reada
,
4639 struct cache_tree
*nodes
,
4640 u64 last
, struct block_info
*bits
, int bits_nr
,
4643 unsigned long node_start
= last
;
4644 struct cache_extent
*cache
;
4647 cache
= search_cache_extent(reada
, 0);
4649 bits
[0].start
= cache
->start
;
4650 bits
[0].size
= cache
->size
;
4655 if (node_start
> 32768)
4656 node_start
-= 32768;
4658 cache
= search_cache_extent(nodes
, node_start
);
4660 cache
= search_cache_extent(nodes
, 0);
4663 cache
= search_cache_extent(pending
, 0);
4668 bits
[ret
].start
= cache
->start
;
4669 bits
[ret
].size
= cache
->size
;
4670 cache
= next_cache_extent(cache
);
4672 } while (cache
&& ret
< bits_nr
);
4678 bits
[ret
].start
= cache
->start
;
4679 bits
[ret
].size
= cache
->size
;
4680 cache
= next_cache_extent(cache
);
4682 } while (cache
&& ret
< bits_nr
);
4684 if (bits_nr
- ret
> 8) {
4685 u64 lookup
= bits
[0].start
+ bits
[0].size
;
4686 struct cache_extent
*next
;
4688 next
= search_cache_extent(pending
, lookup
);
4690 if (next
->start
- lookup
> 32768)
4692 bits
[ret
].start
= next
->start
;
4693 bits
[ret
].size
= next
->size
;
4694 lookup
= next
->start
+ next
->size
;
4698 next
= next_cache_extent(next
);
4706 static void free_chunk_record(struct cache_extent
*cache
)
4708 struct chunk_record
*rec
;
4710 rec
= container_of(cache
, struct chunk_record
, cache
);
4711 list_del_init(&rec
->list
);
4712 list_del_init(&rec
->dextents
);
4716 void free_chunk_cache_tree(struct cache_tree
*chunk_cache
)
4718 cache_tree_free_extents(chunk_cache
, free_chunk_record
);
4721 static void free_device_record(struct rb_node
*node
)
4723 struct device_record
*rec
;
4725 rec
= container_of(node
, struct device_record
, node
);
4729 FREE_RB_BASED_TREE(device_cache
, free_device_record
);
4731 int insert_block_group_record(struct block_group_tree
*tree
,
4732 struct block_group_record
*bg_rec
)
4736 ret
= insert_cache_extent(&tree
->tree
, &bg_rec
->cache
);
4740 list_add_tail(&bg_rec
->list
, &tree
->block_groups
);
4744 static void free_block_group_record(struct cache_extent
*cache
)
4746 struct block_group_record
*rec
;
4748 rec
= container_of(cache
, struct block_group_record
, cache
);
4749 list_del_init(&rec
->list
);
4753 void free_block_group_tree(struct block_group_tree
*tree
)
4755 cache_tree_free_extents(&tree
->tree
, free_block_group_record
);
4758 int insert_device_extent_record(struct device_extent_tree
*tree
,
4759 struct device_extent_record
*de_rec
)
4764 * Device extent is a bit different from the other extents, because
4765 * the extents which belong to the different devices may have the
4766 * same start and size, so we need use the special extent cache
4767 * search/insert functions.
4769 ret
= insert_cache_extent2(&tree
->tree
, &de_rec
->cache
);
4773 list_add_tail(&de_rec
->chunk_list
, &tree
->no_chunk_orphans
);
4774 list_add_tail(&de_rec
->device_list
, &tree
->no_device_orphans
);
4778 static void free_device_extent_record(struct cache_extent
*cache
)
4780 struct device_extent_record
*rec
;
4782 rec
= container_of(cache
, struct device_extent_record
, cache
);
4783 if (!list_empty(&rec
->chunk_list
))
4784 list_del_init(&rec
->chunk_list
);
4785 if (!list_empty(&rec
->device_list
))
4786 list_del_init(&rec
->device_list
);
4790 void free_device_extent_tree(struct device_extent_tree
*tree
)
4792 cache_tree_free_extents(&tree
->tree
, free_device_extent_record
);
4795 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4796 static int process_extent_ref_v0(struct cache_tree
*extent_cache
,
4797 struct extent_buffer
*leaf
, int slot
)
4799 struct btrfs_extent_ref_v0
*ref0
;
4800 struct btrfs_key key
;
4803 btrfs_item_key_to_cpu(leaf
, &key
, slot
);
4804 ref0
= btrfs_item_ptr(leaf
, slot
, struct btrfs_extent_ref_v0
);
4805 if (btrfs_ref_objectid_v0(leaf
, ref0
) < BTRFS_FIRST_FREE_OBJECTID
) {
4806 ret
= add_tree_backref(extent_cache
, key
.objectid
, key
.offset
,
4809 ret
= add_data_backref(extent_cache
, key
.objectid
, key
.offset
,
4810 0, 0, 0, btrfs_ref_count_v0(leaf
, ref0
), 0, 0);
4816 struct chunk_record
*btrfs_new_chunk_record(struct extent_buffer
*leaf
,
4817 struct btrfs_key
*key
,
4820 struct btrfs_chunk
*ptr
;
4821 struct chunk_record
*rec
;
4824 ptr
= btrfs_item_ptr(leaf
, slot
, struct btrfs_chunk
);
4825 num_stripes
= btrfs_chunk_num_stripes(leaf
, ptr
);
4827 rec
= calloc(1, btrfs_chunk_record_size(num_stripes
));
4829 fprintf(stderr
, "memory allocation failed\n");
4833 INIT_LIST_HEAD(&rec
->list
);
4834 INIT_LIST_HEAD(&rec
->dextents
);
4837 rec
->cache
.start
= key
->offset
;
4838 rec
->cache
.size
= btrfs_chunk_length(leaf
, ptr
);
4840 rec
->generation
= btrfs_header_generation(leaf
);
4842 rec
->objectid
= key
->objectid
;
4843 rec
->type
= key
->type
;
4844 rec
->offset
= key
->offset
;
4846 rec
->length
= rec
->cache
.size
;
4847 rec
->owner
= btrfs_chunk_owner(leaf
, ptr
);
4848 rec
->stripe_len
= btrfs_chunk_stripe_len(leaf
, ptr
);
4849 rec
->type_flags
= btrfs_chunk_type(leaf
, ptr
);
4850 rec
->io_width
= btrfs_chunk_io_width(leaf
, ptr
);
4851 rec
->io_align
= btrfs_chunk_io_align(leaf
, ptr
);
4852 rec
->sector_size
= btrfs_chunk_sector_size(leaf
, ptr
);
4853 rec
->num_stripes
= num_stripes
;
4854 rec
->sub_stripes
= btrfs_chunk_sub_stripes(leaf
, ptr
);
4856 for (i
= 0; i
< rec
->num_stripes
; ++i
) {
4857 rec
->stripes
[i
].devid
=
4858 btrfs_stripe_devid_nr(leaf
, ptr
, i
);
4859 rec
->stripes
[i
].offset
=
4860 btrfs_stripe_offset_nr(leaf
, ptr
, i
);
4861 read_extent_buffer(leaf
, rec
->stripes
[i
].dev_uuid
,
4862 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr
, i
),
4869 static int process_chunk_item(struct cache_tree
*chunk_cache
,
4870 struct btrfs_key
*key
, struct extent_buffer
*eb
,
4873 struct chunk_record
*rec
;
4874 struct btrfs_chunk
*chunk
;
4877 chunk
= btrfs_item_ptr(eb
, slot
, struct btrfs_chunk
);
4879 * Do extra check for this chunk item,
4881 * It's still possible one can craft a leaf with CHUNK_ITEM, with
4882 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
4883 * and owner<->key_type check.
4885 ret
= btrfs_check_chunk_valid(global_info
, eb
, chunk
, slot
,
4888 error("chunk(%llu, %llu) is not valid, ignore it",
4889 key
->offset
, btrfs_chunk_length(eb
, chunk
));
4892 rec
= btrfs_new_chunk_record(eb
, key
, slot
);
4893 ret
= insert_cache_extent(chunk_cache
, &rec
->cache
);
4895 fprintf(stderr
, "Chunk[%llu, %llu] existed.\n",
4896 rec
->offset
, rec
->length
);
4903 static int process_device_item(struct rb_root
*dev_cache
,
4904 struct btrfs_key
*key
, struct extent_buffer
*eb
, int slot
)
4906 struct btrfs_dev_item
*ptr
;
4907 struct device_record
*rec
;
4910 ptr
= btrfs_item_ptr(eb
,
4911 slot
, struct btrfs_dev_item
);
4913 rec
= malloc(sizeof(*rec
));
4915 fprintf(stderr
, "memory allocation failed\n");
4919 rec
->devid
= key
->offset
;
4920 rec
->generation
= btrfs_header_generation(eb
);
4922 rec
->objectid
= key
->objectid
;
4923 rec
->type
= key
->type
;
4924 rec
->offset
= key
->offset
;
4926 rec
->devid
= btrfs_device_id(eb
, ptr
);
4927 rec
->total_byte
= btrfs_device_total_bytes(eb
, ptr
);
4928 rec
->byte_used
= btrfs_device_bytes_used(eb
, ptr
);
4930 ret
= rb_insert(dev_cache
, &rec
->node
, device_record_compare
);
4932 fprintf(stderr
, "Device[%llu] existed.\n", rec
->devid
);
4939 struct block_group_record
*
4940 btrfs_new_block_group_record(struct extent_buffer
*leaf
, struct btrfs_key
*key
,
4943 struct btrfs_block_group_item
*ptr
;
4944 struct block_group_record
*rec
;
4946 rec
= calloc(1, sizeof(*rec
));
4948 fprintf(stderr
, "memory allocation failed\n");
4952 rec
->cache
.start
= key
->objectid
;
4953 rec
->cache
.size
= key
->offset
;
4955 rec
->generation
= btrfs_header_generation(leaf
);
4957 rec
->objectid
= key
->objectid
;
4958 rec
->type
= key
->type
;
4959 rec
->offset
= key
->offset
;
4961 ptr
= btrfs_item_ptr(leaf
, slot
, struct btrfs_block_group_item
);
4962 rec
->flags
= btrfs_disk_block_group_flags(leaf
, ptr
);
4964 INIT_LIST_HEAD(&rec
->list
);
4969 static int process_block_group_item(struct block_group_tree
*block_group_cache
,
4970 struct btrfs_key
*key
,
4971 struct extent_buffer
*eb
, int slot
)
4973 struct block_group_record
*rec
;
4976 rec
= btrfs_new_block_group_record(eb
, key
, slot
);
4977 ret
= insert_block_group_record(block_group_cache
, rec
);
4979 fprintf(stderr
, "Block Group[%llu, %llu] existed.\n",
4980 rec
->objectid
, rec
->offset
);
4987 struct device_extent_record
*
4988 btrfs_new_device_extent_record(struct extent_buffer
*leaf
,
4989 struct btrfs_key
*key
, int slot
)
4991 struct device_extent_record
*rec
;
4992 struct btrfs_dev_extent
*ptr
;
4994 rec
= calloc(1, sizeof(*rec
));
4996 fprintf(stderr
, "memory allocation failed\n");
5000 rec
->cache
.objectid
= key
->objectid
;
5001 rec
->cache
.start
= key
->offset
;
5003 rec
->generation
= btrfs_header_generation(leaf
);
5005 rec
->objectid
= key
->objectid
;
5006 rec
->type
= key
->type
;
5007 rec
->offset
= key
->offset
;
5009 ptr
= btrfs_item_ptr(leaf
, slot
, struct btrfs_dev_extent
);
5010 rec
->chunk_objecteid
=
5011 btrfs_dev_extent_chunk_objectid(leaf
, ptr
);
5013 btrfs_dev_extent_chunk_offset(leaf
, ptr
);
5014 rec
->length
= btrfs_dev_extent_length(leaf
, ptr
);
5015 rec
->cache
.size
= rec
->length
;
5017 INIT_LIST_HEAD(&rec
->chunk_list
);
5018 INIT_LIST_HEAD(&rec
->device_list
);
5024 process_device_extent_item(struct device_extent_tree
*dev_extent_cache
,
5025 struct btrfs_key
*key
, struct extent_buffer
*eb
,
5028 struct device_extent_record
*rec
;
5031 rec
= btrfs_new_device_extent_record(eb
, key
, slot
);
5032 ret
= insert_device_extent_record(dev_extent_cache
, rec
);
5035 "Device extent[%llu, %llu, %llu] existed.\n",
5036 rec
->objectid
, rec
->offset
, rec
->length
);
5043 static int process_extent_item(struct btrfs_root
*root
,
5044 struct cache_tree
*extent_cache
,
5045 struct extent_buffer
*eb
, int slot
)
5047 struct btrfs_extent_item
*ei
;
5048 struct btrfs_extent_inline_ref
*iref
;
5049 struct btrfs_extent_data_ref
*dref
;
5050 struct btrfs_shared_data_ref
*sref
;
5051 struct btrfs_key key
;
5052 struct extent_record tmpl
;
5057 u32 item_size
= btrfs_item_size_nr(eb
, slot
);
5063 btrfs_item_key_to_cpu(eb
, &key
, slot
);
5065 if (key
.type
== BTRFS_METADATA_ITEM_KEY
) {
5067 num_bytes
= root
->fs_info
->nodesize
;
5069 num_bytes
= key
.offset
;
5072 if (!IS_ALIGNED(key
.objectid
, root
->fs_info
->sectorsize
)) {
5073 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5074 key
.objectid
, root
->fs_info
->sectorsize
);
5077 if (item_size
< sizeof(*ei
)) {
5078 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5079 struct btrfs_extent_item_v0
*ei0
;
5081 if (item_size
!= sizeof(*ei0
)) {
5083 "invalid extent item format: ITEM[%llu %u %llu] leaf: %llu slot: %d",
5084 key
.objectid
, key
.type
, key
.offset
,
5085 btrfs_header_bytenr(eb
), slot
);
5088 ei0
= btrfs_item_ptr(eb
, slot
, struct btrfs_extent_item_v0
);
5089 refs
= btrfs_extent_refs_v0(eb
, ei0
);
5093 memset(&tmpl
, 0, sizeof(tmpl
));
5094 tmpl
.start
= key
.objectid
;
5095 tmpl
.nr
= num_bytes
;
5096 tmpl
.extent_item_refs
= refs
;
5097 tmpl
.metadata
= metadata
;
5099 tmpl
.max_size
= num_bytes
;
5101 return add_extent_rec(extent_cache
, &tmpl
);
5104 ei
= btrfs_item_ptr(eb
, slot
, struct btrfs_extent_item
);
5105 refs
= btrfs_extent_refs(eb
, ei
);
5106 if (btrfs_extent_flags(eb
, ei
) & BTRFS_EXTENT_FLAG_TREE_BLOCK
)
5110 if (metadata
&& num_bytes
!= root
->fs_info
->nodesize
) {
5111 error("ignore invalid metadata extent, length %llu does not equal to %u",
5112 num_bytes
, root
->fs_info
->nodesize
);
5115 if (!metadata
&& !IS_ALIGNED(num_bytes
, root
->fs_info
->sectorsize
)) {
5116 error("ignore invalid data extent, length %llu is not aligned to %u",
5117 num_bytes
, root
->fs_info
->sectorsize
);
5121 memset(&tmpl
, 0, sizeof(tmpl
));
5122 tmpl
.start
= key
.objectid
;
5123 tmpl
.nr
= num_bytes
;
5124 tmpl
.extent_item_refs
= refs
;
5125 tmpl
.metadata
= metadata
;
5127 tmpl
.max_size
= num_bytes
;
5128 add_extent_rec(extent_cache
, &tmpl
);
5130 ptr
= (unsigned long)(ei
+ 1);
5131 if (btrfs_extent_flags(eb
, ei
) & BTRFS_EXTENT_FLAG_TREE_BLOCK
&&
5132 key
.type
== BTRFS_EXTENT_ITEM_KEY
)
5133 ptr
+= sizeof(struct btrfs_tree_block_info
);
5135 end
= (unsigned long)ei
+ item_size
;
5137 iref
= (struct btrfs_extent_inline_ref
*)ptr
;
5138 type
= btrfs_extent_inline_ref_type(eb
, iref
);
5139 offset
= btrfs_extent_inline_ref_offset(eb
, iref
);
5141 case BTRFS_TREE_BLOCK_REF_KEY
:
5142 ret
= add_tree_backref(extent_cache
, key
.objectid
,
5146 "add_tree_backref failed (extent items tree block): %s",
5149 case BTRFS_SHARED_BLOCK_REF_KEY
:
5150 ret
= add_tree_backref(extent_cache
, key
.objectid
,
5154 "add_tree_backref failed (extent items shared block): %s",
5157 case BTRFS_EXTENT_DATA_REF_KEY
:
5158 dref
= (struct btrfs_extent_data_ref
*)(&iref
->offset
);
5159 add_data_backref(extent_cache
, key
.objectid
, 0,
5160 btrfs_extent_data_ref_root(eb
, dref
),
5161 btrfs_extent_data_ref_objectid(eb
,
5163 btrfs_extent_data_ref_offset(eb
, dref
),
5164 btrfs_extent_data_ref_count(eb
, dref
),
5167 case BTRFS_SHARED_DATA_REF_KEY
:
5168 sref
= (struct btrfs_shared_data_ref
*)(iref
+ 1);
5169 add_data_backref(extent_cache
, key
.objectid
, offset
,
5171 btrfs_shared_data_ref_count(eb
, sref
),
5176 "corrupt extent record: key [%llu,%u,%llu]\n",
5177 key
.objectid
, key
.type
, num_bytes
);
5180 ptr
+= btrfs_extent_inline_ref_size(type
);
5187 static int check_cache_range(struct btrfs_root
*root
,
5188 struct btrfs_block_group_cache
*cache
,
5189 u64 offset
, u64 bytes
)
5191 struct btrfs_free_space
*entry
;
5197 for (i
= 0; i
< BTRFS_SUPER_MIRROR_MAX
; i
++) {
5198 bytenr
= btrfs_sb_offset(i
);
5199 ret
= btrfs_rmap_block(root
->fs_info
,
5200 cache
->key
.objectid
, bytenr
,
5201 &logical
, &nr
, &stripe_len
);
5206 if (logical
[nr
] + stripe_len
<= offset
)
5208 if (offset
+ bytes
<= logical
[nr
])
5210 if (logical
[nr
] == offset
) {
5211 if (stripe_len
>= bytes
) {
5215 bytes
-= stripe_len
;
5216 offset
+= stripe_len
;
5217 } else if (logical
[nr
] < offset
) {
5218 if (logical
[nr
] + stripe_len
>=
5223 bytes
= (offset
+ bytes
) -
5224 (logical
[nr
] + stripe_len
);
5225 offset
= logical
[nr
] + stripe_len
;
5228 * Could be tricky, the super may land in the
5229 * middle of the area we're checking. First
5230 * check the easiest case, it's at the end.
5232 if (logical
[nr
] + stripe_len
>=
5234 bytes
= logical
[nr
] - offset
;
5238 /* Check the left side */
5239 ret
= check_cache_range(root
, cache
,
5241 logical
[nr
] - offset
);
5247 /* Now we continue with the right side */
5248 bytes
= (offset
+ bytes
) -
5249 (logical
[nr
] + stripe_len
);
5250 offset
= logical
[nr
] + stripe_len
;
5257 entry
= btrfs_find_free_space(cache
->free_space_ctl
, offset
, bytes
);
5259 fprintf(stderr
, "there is no free space entry for %llu-%llu\n",
5260 offset
, offset
+bytes
);
5264 if (entry
->offset
!= offset
) {
5265 fprintf(stderr
, "wanted offset %llu, found %llu\n", offset
,
5270 if (entry
->bytes
!= bytes
) {
5271 fprintf(stderr
, "wanted bytes %llu, found %llu for off %llu\n",
5272 bytes
, entry
->bytes
, offset
);
5276 unlink_free_space(cache
->free_space_ctl
, entry
);
5281 static int verify_space_cache(struct btrfs_root
*root
,
5282 struct btrfs_block_group_cache
*cache
)
5284 struct btrfs_path path
;
5285 struct extent_buffer
*leaf
;
5286 struct btrfs_key key
;
5290 root
= root
->fs_info
->extent_root
;
5292 last
= max_t(u64
, cache
->key
.objectid
, BTRFS_SUPER_INFO_OFFSET
);
5294 btrfs_init_path(&path
);
5295 key
.objectid
= last
;
5297 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
5298 ret
= btrfs_search_slot(NULL
, root
, &key
, &path
, 0, 0);
5303 if (path
.slots
[0] >= btrfs_header_nritems(path
.nodes
[0])) {
5304 ret
= btrfs_next_leaf(root
, &path
);
5312 leaf
= path
.nodes
[0];
5313 btrfs_item_key_to_cpu(leaf
, &key
, path
.slots
[0]);
5314 if (key
.objectid
>= cache
->key
.offset
+ cache
->key
.objectid
)
5316 if (key
.type
!= BTRFS_EXTENT_ITEM_KEY
&&
5317 key
.type
!= BTRFS_METADATA_ITEM_KEY
) {
5322 if (last
== key
.objectid
) {
5323 if (key
.type
== BTRFS_EXTENT_ITEM_KEY
)
5324 last
= key
.objectid
+ key
.offset
;
5326 last
= key
.objectid
+ root
->fs_info
->nodesize
;
5331 ret
= check_cache_range(root
, cache
, last
,
5332 key
.objectid
- last
);
5335 if (key
.type
== BTRFS_EXTENT_ITEM_KEY
)
5336 last
= key
.objectid
+ key
.offset
;
5338 last
= key
.objectid
+ root
->fs_info
->nodesize
;
5342 if (last
< cache
->key
.objectid
+ cache
->key
.offset
)
5343 ret
= check_cache_range(root
, cache
, last
,
5344 cache
->key
.objectid
+
5345 cache
->key
.offset
- last
);
5348 btrfs_release_path(&path
);
5351 !RB_EMPTY_ROOT(&cache
->free_space_ctl
->free_space_offset
)) {
5352 fprintf(stderr
, "There are still entries left in the space "
5360 static int check_space_cache(struct btrfs_root
*root
)
5362 struct btrfs_block_group_cache
*cache
;
5363 u64 start
= BTRFS_SUPER_INFO_OFFSET
+ BTRFS_SUPER_INFO_SIZE
;
5367 if (btrfs_super_cache_generation(root
->fs_info
->super_copy
) != -1ULL &&
5368 btrfs_super_generation(root
->fs_info
->super_copy
) !=
5369 btrfs_super_cache_generation(root
->fs_info
->super_copy
)) {
5370 printf("cache and super generation don't match, space cache "
5371 "will be invalidated\n");
5375 if (ctx
.progress_enabled
) {
5376 ctx
.tp
= TASK_FREE_SPACE
;
5377 task_start(ctx
.info
);
5381 cache
= btrfs_lookup_first_block_group(root
->fs_info
, start
);
5385 start
= cache
->key
.objectid
+ cache
->key
.offset
;
5386 if (!cache
->free_space_ctl
) {
5387 if (btrfs_init_free_space_ctl(cache
,
5388 root
->fs_info
->sectorsize
)) {
5393 btrfs_remove_free_space_cache(cache
);
5396 if (btrfs_fs_compat_ro(root
->fs_info
, FREE_SPACE_TREE
)) {
5397 ret
= exclude_super_stripes(root
, cache
);
5399 fprintf(stderr
, "could not exclude super stripes: %s\n",
5404 ret
= load_free_space_tree(root
->fs_info
, cache
);
5405 free_excluded_extents(root
, cache
);
5407 fprintf(stderr
, "could not load free space tree: %s\n",
5414 ret
= load_free_space_cache(root
->fs_info
, cache
);
5421 ret
= verify_space_cache(root
, cache
);
5423 fprintf(stderr
, "cache appears valid but isn't %llu\n",
5424 cache
->key
.objectid
);
5429 task_stop(ctx
.info
);
5431 return error
? -EINVAL
: 0;
5435 * Check data checksum for [@bytenr, @bytenr + @num_bytes).
5437 * Return <0 for fatal error (fails to read checksum/data or allocate memory).
5438 * Return >0 for csum mismatch for any copy.
5439 * Return 0 if everything is OK.
5441 static int check_extent_csums(struct btrfs_root
*root
, u64 bytenr
,
5442 u64 num_bytes
, unsigned long leaf_offset
,
5443 struct extent_buffer
*eb
)
5445 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
5447 u16 csum_size
= btrfs_super_csum_size(fs_info
->super_copy
);
5449 unsigned long csum_offset
;
5453 u64 data_checked
= 0;
5458 bool csum_mismatch
= false;
5460 if (num_bytes
% fs_info
->sectorsize
)
5463 data
= malloc(num_bytes
);
5467 num_copies
= btrfs_num_copies(root
->fs_info
, bytenr
, num_bytes
);
5468 while (offset
< num_bytes
) {
5470 * Mirror 0 means 'read from any valid copy', so it's skipped.
5471 * The indexes 1-N represent the n-th copy for levels with
5474 for (mirror
= 1; mirror
<= num_copies
; mirror
++) {
5475 read_len
= num_bytes
- offset
;
5476 /* read as much space once a time */
5477 ret
= read_extent_data(fs_info
, data
+ offset
,
5478 bytenr
+ offset
, &read_len
, mirror
);
5483 /* verify every 4k data's checksum */
5484 while (data_checked
< read_len
) {
5486 tmp
= offset
+ data_checked
;
5488 csum
= btrfs_csum_data((char *)data
+ tmp
,
5489 csum
, fs_info
->sectorsize
);
5490 btrfs_csum_final(csum
, (u8
*)&csum
);
5492 csum_offset
= leaf_offset
+
5493 tmp
/ fs_info
->sectorsize
* csum_size
;
5494 read_extent_buffer(eb
, (char *)&csum_expected
,
5495 csum_offset
, csum_size
);
5496 if (csum
!= csum_expected
) {
5497 csum_mismatch
= true;
5499 "mirror %d bytenr %llu csum %u expected csum %u\n",
5500 mirror
, bytenr
+ tmp
,
5501 csum
, csum_expected
);
5503 data_checked
+= fs_info
->sectorsize
;
5510 if (!ret
&& csum_mismatch
)
5515 static int check_extent_exists(struct btrfs_root
*root
, u64 bytenr
,
5518 struct btrfs_path path
;
5519 struct extent_buffer
*leaf
;
5520 struct btrfs_key key
;
5523 btrfs_init_path(&path
);
5524 key
.objectid
= bytenr
;
5525 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
5526 key
.offset
= (u64
)-1;
5529 ret
= btrfs_search_slot(NULL
, root
->fs_info
->extent_root
, &key
, &path
,
5532 fprintf(stderr
, "Error looking up extent record %d\n", ret
);
5533 btrfs_release_path(&path
);
5536 if (path
.slots
[0] > 0) {
5539 ret
= btrfs_prev_leaf(root
, &path
);
5542 } else if (ret
> 0) {
5549 btrfs_item_key_to_cpu(path
.nodes
[0], &key
, path
.slots
[0]);
5552 * Block group items come before extent items if they have the same
5553 * bytenr, so walk back one more just in case. Dear future traveller,
5554 * first congrats on mastering time travel. Now if it's not too much
5555 * trouble could you go back to 2006 and tell Chris to make the
5556 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5557 * EXTENT_ITEM_KEY please?
5559 while (key
.type
> BTRFS_EXTENT_ITEM_KEY
) {
5560 if (path
.slots
[0] > 0) {
5563 ret
= btrfs_prev_leaf(root
, &path
);
5566 } else if (ret
> 0) {
5571 btrfs_item_key_to_cpu(path
.nodes
[0], &key
, path
.slots
[0]);
5575 if (path
.slots
[0] >= btrfs_header_nritems(path
.nodes
[0])) {
5576 ret
= btrfs_next_leaf(root
, &path
);
5578 fprintf(stderr
, "Error going to next leaf "
5580 btrfs_release_path(&path
);
5586 leaf
= path
.nodes
[0];
5587 btrfs_item_key_to_cpu(leaf
, &key
, path
.slots
[0]);
5588 if (key
.type
!= BTRFS_EXTENT_ITEM_KEY
) {
5592 if (key
.objectid
+ key
.offset
< bytenr
) {
5596 if (key
.objectid
> bytenr
+ num_bytes
)
5599 if (key
.objectid
== bytenr
) {
5600 if (key
.offset
>= num_bytes
) {
5604 num_bytes
-= key
.offset
;
5605 bytenr
+= key
.offset
;
5606 } else if (key
.objectid
< bytenr
) {
5607 if (key
.objectid
+ key
.offset
>= bytenr
+ num_bytes
) {
5611 num_bytes
= (bytenr
+ num_bytes
) -
5612 (key
.objectid
+ key
.offset
);
5613 bytenr
= key
.objectid
+ key
.offset
;
5615 if (key
.objectid
+ key
.offset
< bytenr
+ num_bytes
) {
5616 u64 new_start
= key
.objectid
+ key
.offset
;
5617 u64 new_bytes
= bytenr
+ num_bytes
- new_start
;
5620 * Weird case, the extent is in the middle of
5621 * our range, we'll have to search one side
5622 * and then the other. Not sure if this happens
5623 * in real life, but no harm in coding it up
5624 * anyway just in case.
5626 btrfs_release_path(&path
);
5627 ret
= check_extent_exists(root
, new_start
,
5630 fprintf(stderr
, "Right section didn't "
5634 num_bytes
= key
.objectid
- bytenr
;
5637 num_bytes
= key
.objectid
- bytenr
;
5644 if (num_bytes
&& !ret
) {
5646 "there are no extents for csum range %llu-%llu\n",
5647 bytenr
, bytenr
+num_bytes
);
5651 btrfs_release_path(&path
);
5655 static int check_csums(struct btrfs_root
*root
)
5657 struct btrfs_path path
;
5658 struct extent_buffer
*leaf
;
5659 struct btrfs_key key
;
5660 u64 offset
= 0, num_bytes
= 0;
5661 u16 csum_size
= btrfs_super_csum_size(root
->fs_info
->super_copy
);
5665 unsigned long leaf_offset
;
5666 bool verify_csum
= !!check_data_csum
;
5668 root
= root
->fs_info
->csum_root
;
5669 if (!extent_buffer_uptodate(root
->node
)) {
5670 fprintf(stderr
, "No valid csum tree found\n");
5674 btrfs_init_path(&path
);
5675 key
.objectid
= BTRFS_EXTENT_CSUM_OBJECTID
;
5676 key
.type
= BTRFS_EXTENT_CSUM_KEY
;
5678 ret
= btrfs_search_slot(NULL
, root
, &key
, &path
, 0, 0);
5680 fprintf(stderr
, "Error searching csum tree %d\n", ret
);
5681 btrfs_release_path(&path
);
5685 if (ret
> 0 && path
.slots
[0])
5690 * For metadata dump (btrfs-image) all data is wiped so verifying data
5691 * csum is meaningless and will always report csum error.
5693 if (check_data_csum
&& (btrfs_super_flags(root
->fs_info
->super_copy
) &
5694 (BTRFS_SUPER_FLAG_METADUMP
| BTRFS_SUPER_FLAG_METADUMP_V2
))) {
5695 printf("skip data csum verification for metadata dump\n");
5696 verify_csum
= false;
5700 if (path
.slots
[0] >= btrfs_header_nritems(path
.nodes
[0])) {
5701 ret
= btrfs_next_leaf(root
, &path
);
5703 fprintf(stderr
, "Error going to next leaf "
5710 leaf
= path
.nodes
[0];
5712 btrfs_item_key_to_cpu(leaf
, &key
, path
.slots
[0]);
5713 if (key
.type
!= BTRFS_EXTENT_CSUM_KEY
) {
5718 data_len
= (btrfs_item_size_nr(leaf
, path
.slots
[0]) /
5719 csum_size
) * root
->fs_info
->sectorsize
;
5721 goto skip_csum_check
;
5722 leaf_offset
= btrfs_item_ptr_offset(leaf
, path
.slots
[0]);
5723 ret
= check_extent_csums(root
, key
.offset
, data_len
,
5726 * Only break for fatal errors, if mismatch is found, continue
5727 * checking until all extents are checked.
5735 offset
= key
.offset
;
5736 } else if (key
.offset
!= offset
+ num_bytes
) {
5737 ret
= check_extent_exists(root
, offset
, num_bytes
);
5740 "csum exists for %llu-%llu but there is no extent record\n",
5741 offset
, offset
+num_bytes
);
5744 offset
= key
.offset
;
5747 num_bytes
+= data_len
;
5751 btrfs_release_path(&path
);
5755 static int is_dropped_key(struct btrfs_key
*key
,
5756 struct btrfs_key
*drop_key
)
5758 if (key
->objectid
< drop_key
->objectid
)
5760 else if (key
->objectid
== drop_key
->objectid
) {
5761 if (key
->type
< drop_key
->type
)
5763 else if (key
->type
== drop_key
->type
) {
5764 if (key
->offset
< drop_key
->offset
)
5772 * Here are the rules for FULL_BACKREF.
5774 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5775 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5777 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
5778 * if it happened after the relocation occurred since we'll have dropped the
5779 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5780 * have no real way to know for sure.
5782 * We process the blocks one root at a time, and we start from the lowest root
5783 * objectid and go to the highest. So we can just lookup the owner backref for
5784 * the record and if we don't find it then we know it doesn't exist and we have
5787 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5788 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5789 * be set or not and then we can check later once we've gathered all the refs.
5791 static int calc_extent_flag(struct cache_tree
*extent_cache
,
5792 struct extent_buffer
*buf
,
5793 struct root_item_record
*ri
,
5796 struct extent_record
*rec
;
5797 struct cache_extent
*cache
;
5798 struct tree_backref
*tback
;
5801 cache
= lookup_cache_extent(extent_cache
, buf
->start
, 1);
5802 /* we have added this extent before */
5806 rec
= container_of(cache
, struct extent_record
, cache
);
5809 * Except file/reloc tree, we can not have
5812 if (ri
->objectid
< BTRFS_FIRST_FREE_OBJECTID
)
5817 if (buf
->start
== ri
->bytenr
)
5820 if (btrfs_header_flag(buf
, BTRFS_HEADER_FLAG_RELOC
))
5823 owner
= btrfs_header_owner(buf
);
5824 if (owner
== ri
->objectid
)
5827 tback
= find_tree_backref(rec
, 0, owner
);
5832 if (rec
->flag_block_full_backref
!= FLAG_UNSET
&&
5833 rec
->flag_block_full_backref
!= 0)
5834 rec
->bad_full_backref
= 1;
5837 *flags
|= BTRFS_BLOCK_FLAG_FULL_BACKREF
;
5838 if (rec
->flag_block_full_backref
!= FLAG_UNSET
&&
5839 rec
->flag_block_full_backref
!= 1)
5840 rec
->bad_full_backref
= 1;
5844 static void report_mismatch_key_root(u8 key_type
, u64 rootid
)
5846 fprintf(stderr
, "Invalid key type(");
5847 print_key_type(stderr
, 0, key_type
);
5848 fprintf(stderr
, ") found in root(");
5849 print_objectid(stderr
, rootid
, 0);
5850 fprintf(stderr
, ")\n");
5854 * Check if the key is valid with its extent buffer.
5856 * This is a early check in case invalid key exists in a extent buffer
5857 * This is not comprehensive yet, but should prevent wrong key/item passed
5860 static int check_type_with_root(u64 rootid
, u8 key_type
)
5863 /* Only valid in chunk tree */
5864 case BTRFS_DEV_ITEM_KEY
:
5865 case BTRFS_CHUNK_ITEM_KEY
:
5866 if (rootid
!= BTRFS_CHUNK_TREE_OBJECTID
)
5869 /* valid in csum and log tree */
5870 case BTRFS_CSUM_TREE_OBJECTID
:
5871 if (!(rootid
== BTRFS_TREE_LOG_OBJECTID
||
5875 case BTRFS_EXTENT_ITEM_KEY
:
5876 case BTRFS_METADATA_ITEM_KEY
:
5877 case BTRFS_BLOCK_GROUP_ITEM_KEY
:
5878 if (rootid
!= BTRFS_EXTENT_TREE_OBJECTID
)
5881 case BTRFS_ROOT_ITEM_KEY
:
5882 if (rootid
!= BTRFS_ROOT_TREE_OBJECTID
)
5885 case BTRFS_DEV_EXTENT_KEY
:
5886 if (rootid
!= BTRFS_DEV_TREE_OBJECTID
)
5892 report_mismatch_key_root(key_type
, rootid
);
5896 static int run_next_block(struct btrfs_root
*root
,
5897 struct block_info
*bits
,
5900 struct cache_tree
*pending
,
5901 struct cache_tree
*seen
,
5902 struct cache_tree
*reada
,
5903 struct cache_tree
*nodes
,
5904 struct cache_tree
*extent_cache
,
5905 struct cache_tree
*chunk_cache
,
5906 struct rb_root
*dev_cache
,
5907 struct block_group_tree
*block_group_cache
,
5908 struct device_extent_tree
*dev_extent_cache
,
5909 struct root_item_record
*ri
)
5911 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
5912 struct extent_buffer
*buf
;
5913 struct extent_record
*rec
= NULL
;
5924 struct btrfs_key key
;
5925 struct cache_extent
*cache
;
5928 nritems
= pick_next_pending(pending
, reada
, nodes
, *last
, bits
,
5929 bits_nr
, &reada_bits
);
5934 for (i
= 0; i
< nritems
; i
++) {
5935 ret
= add_cache_extent(reada
, bits
[i
].start
,
5940 /* fixme, get the parent transid */
5941 readahead_tree_block(fs_info
, bits
[i
].start
, 0);
5944 *last
= bits
[0].start
;
5945 bytenr
= bits
[0].start
;
5946 size
= bits
[0].size
;
5948 cache
= lookup_cache_extent(pending
, bytenr
, size
);
5950 remove_cache_extent(pending
, cache
);
5953 cache
= lookup_cache_extent(reada
, bytenr
, size
);
5955 remove_cache_extent(reada
, cache
);
5958 cache
= lookup_cache_extent(nodes
, bytenr
, size
);
5960 remove_cache_extent(nodes
, cache
);
5963 cache
= lookup_cache_extent(extent_cache
, bytenr
, size
);
5965 rec
= container_of(cache
, struct extent_record
, cache
);
5966 gen
= rec
->parent_generation
;
5969 /* fixme, get the real parent transid */
5970 buf
= read_tree_block(root
->fs_info
, bytenr
, gen
);
5971 if (!extent_buffer_uptodate(buf
)) {
5972 record_bad_block_io(root
->fs_info
,
5973 extent_cache
, bytenr
, size
);
5977 nritems
= btrfs_header_nritems(buf
);
5980 if (!init_extent_tree
) {
5981 ret
= btrfs_lookup_extent_info(NULL
, fs_info
, bytenr
,
5982 btrfs_header_level(buf
), 1, NULL
,
5985 ret
= calc_extent_flag(extent_cache
, buf
, ri
, &flags
);
5987 fprintf(stderr
, "Couldn't calc extent flags\n");
5988 flags
|= BTRFS_BLOCK_FLAG_FULL_BACKREF
;
5993 ret
= calc_extent_flag(extent_cache
, buf
, ri
, &flags
);
5995 fprintf(stderr
, "Couldn't calc extent flags\n");
5996 flags
|= BTRFS_BLOCK_FLAG_FULL_BACKREF
;
6000 if (flags
& BTRFS_BLOCK_FLAG_FULL_BACKREF
) {
6002 ri
->objectid
!= BTRFS_TREE_RELOC_OBJECTID
&&
6003 ri
->objectid
== btrfs_header_owner(buf
)) {
6005 * Ok we got to this block from it's original owner and
6006 * we have FULL_BACKREF set. Relocation can leave
6007 * converted blocks over so this is altogether possible,
6008 * however it's not possible if the generation > the
6009 * last snapshot, so check for this case.
6011 if (!btrfs_header_flag(buf
, BTRFS_HEADER_FLAG_RELOC
) &&
6012 btrfs_header_generation(buf
) > ri
->last_snapshot
) {
6013 flags
&= ~BTRFS_BLOCK_FLAG_FULL_BACKREF
;
6014 rec
->bad_full_backref
= 1;
6019 (ri
->objectid
== BTRFS_TREE_RELOC_OBJECTID
||
6020 btrfs_header_flag(buf
, BTRFS_HEADER_FLAG_RELOC
))) {
6021 flags
|= BTRFS_BLOCK_FLAG_FULL_BACKREF
;
6022 rec
->bad_full_backref
= 1;
6026 if (flags
& BTRFS_BLOCK_FLAG_FULL_BACKREF
) {
6027 rec
->flag_block_full_backref
= 1;
6031 rec
->flag_block_full_backref
= 0;
6033 owner
= btrfs_header_owner(buf
);
6036 ret
= check_block(root
, extent_cache
, buf
, flags
);
6040 if (btrfs_is_leaf(buf
)) {
6041 btree_space_waste
+= btrfs_leaf_free_space(buf
);
6042 for (i
= 0; i
< nritems
; i
++) {
6043 struct btrfs_file_extent_item
*fi
;
6045 btrfs_item_key_to_cpu(buf
, &key
, i
);
6047 * Check key type against the leaf owner.
6048 * Could filter quite a lot of early error if
6051 if (check_type_with_root(btrfs_header_owner(buf
),
6053 fprintf(stderr
, "ignoring invalid key\n");
6056 if (key
.type
== BTRFS_EXTENT_ITEM_KEY
) {
6057 process_extent_item(root
, extent_cache
, buf
,
6061 if (key
.type
== BTRFS_METADATA_ITEM_KEY
) {
6062 process_extent_item(root
, extent_cache
, buf
,
6066 if (key
.type
== BTRFS_EXTENT_CSUM_KEY
) {
6068 btrfs_item_size_nr(buf
, i
);
6071 if (key
.type
== BTRFS_CHUNK_ITEM_KEY
) {
6072 process_chunk_item(chunk_cache
, &key
, buf
, i
);
6075 if (key
.type
== BTRFS_DEV_ITEM_KEY
) {
6076 process_device_item(dev_cache
, &key
, buf
, i
);
6079 if (key
.type
== BTRFS_BLOCK_GROUP_ITEM_KEY
) {
6080 process_block_group_item(block_group_cache
,
6084 if (key
.type
== BTRFS_DEV_EXTENT_KEY
) {
6085 process_device_extent_item(dev_extent_cache
,
6090 if (key
.type
== BTRFS_EXTENT_REF_V0_KEY
) {
6091 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6092 process_extent_ref_v0(extent_cache
, buf
, i
);
6099 if (key
.type
== BTRFS_TREE_BLOCK_REF_KEY
) {
6100 ret
= add_tree_backref(extent_cache
,
6101 key
.objectid
, 0, key
.offset
, 0);
6104 "add_tree_backref failed (leaf tree block): %s",
6108 if (key
.type
== BTRFS_SHARED_BLOCK_REF_KEY
) {
6109 ret
= add_tree_backref(extent_cache
,
6110 key
.objectid
, key
.offset
, 0, 0);
6113 "add_tree_backref failed (leaf shared block): %s",
6117 if (key
.type
== BTRFS_EXTENT_DATA_REF_KEY
) {
6118 struct btrfs_extent_data_ref
*ref
;
6120 ref
= btrfs_item_ptr(buf
, i
,
6121 struct btrfs_extent_data_ref
);
6122 add_data_backref(extent_cache
,
6124 btrfs_extent_data_ref_root(buf
, ref
),
6125 btrfs_extent_data_ref_objectid(buf
,
6127 btrfs_extent_data_ref_offset(buf
, ref
),
6128 btrfs_extent_data_ref_count(buf
, ref
),
6129 0, root
->fs_info
->sectorsize
);
6132 if (key
.type
== BTRFS_SHARED_DATA_REF_KEY
) {
6133 struct btrfs_shared_data_ref
*ref
;
6135 ref
= btrfs_item_ptr(buf
, i
,
6136 struct btrfs_shared_data_ref
);
6137 add_data_backref(extent_cache
,
6138 key
.objectid
, key
.offset
, 0, 0, 0,
6139 btrfs_shared_data_ref_count(buf
, ref
),
6140 0, root
->fs_info
->sectorsize
);
6143 if (key
.type
== BTRFS_ORPHAN_ITEM_KEY
) {
6144 struct bad_item
*bad
;
6146 if (key
.objectid
== BTRFS_ORPHAN_OBJECTID
)
6150 bad
= malloc(sizeof(struct bad_item
));
6153 INIT_LIST_HEAD(&bad
->list
);
6154 memcpy(&bad
->key
, &key
,
6155 sizeof(struct btrfs_key
));
6156 bad
->root_id
= owner
;
6157 list_add_tail(&bad
->list
, &delete_items
);
6160 if (key
.type
!= BTRFS_EXTENT_DATA_KEY
)
6162 fi
= btrfs_item_ptr(buf
, i
,
6163 struct btrfs_file_extent_item
);
6164 if (btrfs_file_extent_type(buf
, fi
) ==
6165 BTRFS_FILE_EXTENT_INLINE
)
6167 if (btrfs_file_extent_disk_bytenr(buf
, fi
) == 0)
6170 data_bytes_allocated
+=
6171 btrfs_file_extent_disk_num_bytes(buf
, fi
);
6172 if (data_bytes_allocated
< root
->fs_info
->sectorsize
)
6175 data_bytes_referenced
+=
6176 btrfs_file_extent_num_bytes(buf
, fi
);
6177 add_data_backref(extent_cache
,
6178 btrfs_file_extent_disk_bytenr(buf
, fi
),
6179 parent
, owner
, key
.objectid
, key
.offset
-
6180 btrfs_file_extent_offset(buf
, fi
), 1, 1,
6181 btrfs_file_extent_disk_num_bytes(buf
, fi
));
6186 level
= btrfs_header_level(buf
);
6187 for (i
= 0; i
< nritems
; i
++) {
6188 struct extent_record tmpl
;
6190 ptr
= btrfs_node_blockptr(buf
, i
);
6191 size
= root
->fs_info
->nodesize
;
6192 btrfs_node_key_to_cpu(buf
, &key
, i
);
6194 if ((level
== ri
->drop_level
)
6195 && is_dropped_key(&key
, &ri
->drop_key
)) {
6200 memset(&tmpl
, 0, sizeof(tmpl
));
6201 btrfs_cpu_key_to_disk(&tmpl
.parent_key
, &key
);
6202 tmpl
.parent_generation
=
6203 btrfs_node_ptr_generation(buf
, i
);
6208 tmpl
.max_size
= size
;
6209 ret
= add_extent_rec(extent_cache
, &tmpl
);
6213 ret
= add_tree_backref(extent_cache
, ptr
, parent
,
6217 "add_tree_backref failed (non-leaf block): %s",
6223 add_pending(nodes
, seen
, ptr
, size
);
6225 add_pending(pending
, seen
, ptr
, size
);
6227 btree_space_waste
+= (BTRFS_NODEPTRS_PER_BLOCK(fs_info
) -
6228 nritems
) * sizeof(struct btrfs_key_ptr
);
6230 total_btree_bytes
+= buf
->len
;
6231 if (fs_root_objectid(btrfs_header_owner(buf
)))
6232 total_fs_tree_bytes
+= buf
->len
;
6233 if (btrfs_header_owner(buf
) == BTRFS_EXTENT_TREE_OBJECTID
)
6234 total_extent_tree_bytes
+= buf
->len
;
6236 free_extent_buffer(buf
);
6240 static int add_root_to_pending(struct extent_buffer
*buf
,
6241 struct cache_tree
*extent_cache
,
6242 struct cache_tree
*pending
,
6243 struct cache_tree
*seen
,
6244 struct cache_tree
*nodes
,
6247 struct extent_record tmpl
;
6250 if (btrfs_header_level(buf
) > 0)
6251 add_pending(nodes
, seen
, buf
->start
, buf
->len
);
6253 add_pending(pending
, seen
, buf
->start
, buf
->len
);
6255 memset(&tmpl
, 0, sizeof(tmpl
));
6256 tmpl
.start
= buf
->start
;
6261 tmpl
.max_size
= buf
->len
;
6262 add_extent_rec(extent_cache
, &tmpl
);
6264 if (objectid
== BTRFS_TREE_RELOC_OBJECTID
||
6265 btrfs_header_backref_rev(buf
) < BTRFS_MIXED_BACKREF_REV
)
6266 ret
= add_tree_backref(extent_cache
, buf
->start
, buf
->start
,
6269 ret
= add_tree_backref(extent_cache
, buf
->start
, 0, objectid
,
6274 /* as we fix the tree, we might be deleting blocks that
6275 * we're tracking for repair. This hook makes sure we
6276 * remove any backrefs for blocks as we are fixing them.
6278 static int free_extent_hook(struct btrfs_trans_handle
*trans
,
6279 struct btrfs_root
*root
,
6280 u64 bytenr
, u64 num_bytes
, u64 parent
,
6281 u64 root_objectid
, u64 owner
, u64 offset
,
6284 struct extent_record
*rec
;
6285 struct cache_extent
*cache
;
6287 struct cache_tree
*extent_cache
= root
->fs_info
->fsck_extent_cache
;
6289 is_data
= owner
>= BTRFS_FIRST_FREE_OBJECTID
;
6290 cache
= lookup_cache_extent(extent_cache
, bytenr
, num_bytes
);
6294 rec
= container_of(cache
, struct extent_record
, cache
);
6296 struct data_backref
*back
;
6298 back
= find_data_backref(rec
, parent
, root_objectid
, owner
,
6299 offset
, 1, bytenr
, num_bytes
);
6302 if (back
->node
.found_ref
) {
6303 back
->found_ref
-= refs_to_drop
;
6305 rec
->refs
-= refs_to_drop
;
6307 if (back
->node
.found_extent_tree
) {
6308 back
->num_refs
-= refs_to_drop
;
6309 if (rec
->extent_item_refs
)
6310 rec
->extent_item_refs
-= refs_to_drop
;
6312 if (back
->found_ref
== 0)
6313 back
->node
.found_ref
= 0;
6314 if (back
->num_refs
== 0)
6315 back
->node
.found_extent_tree
= 0;
6317 if (!back
->node
.found_extent_tree
&& back
->node
.found_ref
) {
6318 rb_erase(&back
->node
.node
, &rec
->backref_tree
);
6322 struct tree_backref
*back
;
6324 back
= find_tree_backref(rec
, parent
, root_objectid
);
6327 if (back
->node
.found_ref
) {
6330 back
->node
.found_ref
= 0;
6332 if (back
->node
.found_extent_tree
) {
6333 if (rec
->extent_item_refs
)
6334 rec
->extent_item_refs
--;
6335 back
->node
.found_extent_tree
= 0;
6337 if (!back
->node
.found_extent_tree
&& back
->node
.found_ref
) {
6338 rb_erase(&back
->node
.node
, &rec
->backref_tree
);
6342 maybe_free_extent_rec(extent_cache
, rec
);
6347 static int delete_extent_records(struct btrfs_trans_handle
*trans
,
6348 struct btrfs_path
*path
,
6351 struct btrfs_fs_info
*fs_info
= trans
->fs_info
;
6352 struct btrfs_key key
;
6353 struct btrfs_key found_key
;
6354 struct extent_buffer
*leaf
;
6359 key
.objectid
= bytenr
;
6361 key
.offset
= (u64
)-1;
6364 ret
= btrfs_search_slot(trans
, fs_info
->extent_root
, &key
,
6371 if (path
->slots
[0] == 0)
6377 leaf
= path
->nodes
[0];
6378 slot
= path
->slots
[0];
6380 btrfs_item_key_to_cpu(leaf
, &found_key
, slot
);
6381 if (found_key
.objectid
!= bytenr
)
6384 if (found_key
.type
!= BTRFS_EXTENT_ITEM_KEY
&&
6385 found_key
.type
!= BTRFS_METADATA_ITEM_KEY
&&
6386 found_key
.type
!= BTRFS_TREE_BLOCK_REF_KEY
&&
6387 found_key
.type
!= BTRFS_EXTENT_DATA_REF_KEY
&&
6388 found_key
.type
!= BTRFS_EXTENT_REF_V0_KEY
&&
6389 found_key
.type
!= BTRFS_SHARED_BLOCK_REF_KEY
&&
6390 found_key
.type
!= BTRFS_SHARED_DATA_REF_KEY
) {
6391 btrfs_release_path(path
);
6392 if (found_key
.type
== 0) {
6393 if (found_key
.offset
== 0)
6395 key
.offset
= found_key
.offset
- 1;
6396 key
.type
= found_key
.type
;
6398 key
.type
= found_key
.type
- 1;
6399 key
.offset
= (u64
)-1;
6404 "repair deleting extent record: key [%llu,%u,%llu]\n",
6405 found_key
.objectid
, found_key
.type
, found_key
.offset
);
6407 ret
= btrfs_del_item(trans
, fs_info
->extent_root
, path
);
6410 btrfs_release_path(path
);
6412 if (found_key
.type
== BTRFS_EXTENT_ITEM_KEY
||
6413 found_key
.type
== BTRFS_METADATA_ITEM_KEY
) {
6414 u64 bytes
= (found_key
.type
== BTRFS_EXTENT_ITEM_KEY
) ?
6415 found_key
.offset
: fs_info
->nodesize
;
6417 ret
= btrfs_update_block_group(fs_info
->extent_root
,
6418 bytenr
, bytes
, 0, 0);
6424 btrfs_release_path(path
);
6429 * for a single backref, this will allocate a new extent
6430 * and add the backref to it.
6432 static int record_extent(struct btrfs_trans_handle
*trans
,
6433 struct btrfs_fs_info
*info
,
6434 struct btrfs_path
*path
,
6435 struct extent_record
*rec
,
6436 struct extent_backref
*back
,
6437 int allocated
, u64 flags
)
6440 struct btrfs_root
*extent_root
= info
->extent_root
;
6441 struct extent_buffer
*leaf
;
6442 struct btrfs_key ins_key
;
6443 struct btrfs_extent_item
*ei
;
6444 struct data_backref
*dback
;
6445 struct btrfs_tree_block_info
*bi
;
6448 rec
->max_size
= max_t(u64
, rec
->max_size
,
6452 u32 item_size
= sizeof(*ei
);
6455 item_size
+= sizeof(*bi
);
6457 ins_key
.objectid
= rec
->start
;
6458 ins_key
.offset
= rec
->max_size
;
6459 ins_key
.type
= BTRFS_EXTENT_ITEM_KEY
;
6461 ret
= btrfs_insert_empty_item(trans
, extent_root
, path
,
6462 &ins_key
, item_size
);
6466 leaf
= path
->nodes
[0];
6467 ei
= btrfs_item_ptr(leaf
, path
->slots
[0],
6468 struct btrfs_extent_item
);
6470 btrfs_set_extent_refs(leaf
, ei
, 0);
6471 btrfs_set_extent_generation(leaf
, ei
, rec
->generation
);
6473 if (back
->is_data
) {
6474 btrfs_set_extent_flags(leaf
, ei
,
6475 BTRFS_EXTENT_FLAG_DATA
);
6477 struct btrfs_disk_key copy_key
;
6479 bi
= (struct btrfs_tree_block_info
*)(ei
+ 1);
6480 memset_extent_buffer(leaf
, 0, (unsigned long)bi
,
6483 btrfs_set_disk_key_objectid(©_key
,
6484 rec
->info_objectid
);
6485 btrfs_set_disk_key_type(©_key
, 0);
6486 btrfs_set_disk_key_offset(©_key
, 0);
6488 btrfs_set_tree_block_level(leaf
, bi
, rec
->info_level
);
6489 btrfs_set_tree_block_key(leaf
, bi
, ©_key
);
6491 btrfs_set_extent_flags(leaf
, ei
,
6492 flags
| BTRFS_EXTENT_FLAG_TREE_BLOCK
);
6495 btrfs_mark_buffer_dirty(leaf
);
6496 ret
= btrfs_update_block_group(extent_root
, rec
->start
,
6497 rec
->max_size
, 1, 0);
6500 btrfs_release_path(path
);
6503 if (back
->is_data
) {
6507 dback
= to_data_backref(back
);
6508 if (back
->full_backref
)
6509 parent
= dback
->parent
;
6513 for (i
= 0; i
< dback
->found_ref
; i
++) {
6514 /* if parent != 0, we're doing a full backref
6515 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6516 * just makes the backref allocator create a data
6519 ret
= btrfs_inc_extent_ref(trans
, info
->extent_root
,
6520 rec
->start
, rec
->max_size
,
6524 BTRFS_FIRST_FREE_OBJECTID
:
6531 "adding new data backref on %llu %s %llu owner %llu offset %llu found %d\n",
6532 (unsigned long long)rec
->start
,
6533 back
->full_backref
? "parent" : "root",
6534 back
->full_backref
? (unsigned long long)parent
:
6535 (unsigned long long)dback
->root
,
6536 (unsigned long long)dback
->owner
,
6537 (unsigned long long)dback
->offset
, dback
->found_ref
);
6540 struct tree_backref
*tback
;
6542 tback
= to_tree_backref(back
);
6543 if (back
->full_backref
)
6544 parent
= tback
->parent
;
6548 ret
= btrfs_inc_extent_ref(trans
, info
->extent_root
,
6549 rec
->start
, rec
->max_size
,
6550 parent
, tback
->root
, 0, 0);
6552 "adding new tree backref on start %llu len %llu parent %llu root %llu\n",
6553 rec
->start
, rec
->max_size
, parent
, tback
->root
);
6556 btrfs_release_path(path
);
6560 static struct extent_entry
*find_entry(struct list_head
*entries
,
6561 u64 bytenr
, u64 bytes
)
6563 struct extent_entry
*entry
= NULL
;
6565 list_for_each_entry(entry
, entries
, list
) {
6566 if (entry
->bytenr
== bytenr
&& entry
->bytes
== bytes
)
6573 static struct extent_entry
*find_most_right_entry(struct list_head
*entries
)
6575 struct extent_entry
*entry
, *best
= NULL
, *prev
= NULL
;
6577 list_for_each_entry(entry
, entries
, list
) {
6579 * If there are as many broken entries as entries then we know
6580 * not to trust this particular entry.
6582 if (entry
->broken
== entry
->count
)
6586 * Special case, when there are only two entries and 'best' is
6596 * If our current entry == best then we can't be sure our best
6597 * is really the best, so we need to keep searching.
6599 if (best
&& best
->count
== entry
->count
) {
6605 /* Prev == entry, not good enough, have to keep searching */
6606 if (!prev
->broken
&& prev
->count
== entry
->count
)
6610 best
= (prev
->count
> entry
->count
) ? prev
: entry
;
6611 else if (best
->count
< entry
->count
)
6619 static int repair_ref(struct btrfs_fs_info
*info
, struct btrfs_path
*path
,
6620 struct data_backref
*dback
, struct extent_entry
*entry
)
6622 struct btrfs_trans_handle
*trans
;
6623 struct btrfs_root
*root
;
6624 struct btrfs_file_extent_item
*fi
;
6625 struct extent_buffer
*leaf
;
6626 struct btrfs_key key
;
6630 key
.objectid
= dback
->root
;
6631 key
.type
= BTRFS_ROOT_ITEM_KEY
;
6632 key
.offset
= (u64
)-1;
6633 root
= btrfs_read_fs_root(info
, &key
);
6635 fprintf(stderr
, "Couldn't find root for our ref\n");
6640 * The backref points to the original offset of the extent if it was
6641 * split, so we need to search down to the offset we have and then walk
6642 * forward until we find the backref we're looking for.
6644 key
.objectid
= dback
->owner
;
6645 key
.type
= BTRFS_EXTENT_DATA_KEY
;
6646 key
.offset
= dback
->offset
;
6647 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
6649 fprintf(stderr
, "Error looking up ref %d\n", ret
);
6654 if (path
->slots
[0] >= btrfs_header_nritems(path
->nodes
[0])) {
6655 ret
= btrfs_next_leaf(root
, path
);
6657 fprintf(stderr
, "Couldn't find our ref, next\n");
6661 leaf
= path
->nodes
[0];
6662 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
6663 if (key
.objectid
!= dback
->owner
||
6664 key
.type
!= BTRFS_EXTENT_DATA_KEY
) {
6665 fprintf(stderr
, "Couldn't find our ref, search\n");
6668 fi
= btrfs_item_ptr(leaf
, path
->slots
[0],
6669 struct btrfs_file_extent_item
);
6670 bytenr
= btrfs_file_extent_disk_bytenr(leaf
, fi
);
6671 bytes
= btrfs_file_extent_disk_num_bytes(leaf
, fi
);
6673 if (bytenr
== dback
->disk_bytenr
&& bytes
== dback
->bytes
)
6678 btrfs_release_path(path
);
6680 trans
= btrfs_start_transaction(root
, 1);
6682 return PTR_ERR(trans
);
6685 * Ok we have the key of the file extent we want to fix, now we can cow
6686 * down to the thing and fix it.
6688 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 1);
6690 fprintf(stderr
, "error cowing down to ref [%llu,%u,%llu]: %d\n",
6691 key
.objectid
, key
.type
, key
.offset
, ret
);
6696 "well that's odd, we just found this key [%llu,%u,%llu]\n",
6697 key
.objectid
, key
.type
, key
.offset
);
6701 leaf
= path
->nodes
[0];
6702 fi
= btrfs_item_ptr(leaf
, path
->slots
[0],
6703 struct btrfs_file_extent_item
);
6705 if (btrfs_file_extent_compression(leaf
, fi
) &&
6706 dback
->disk_bytenr
!= entry
->bytenr
) {
6708 "ref doesn't match the record start and is compressed, please take a btrfs-image of this file system and send it to a btrfs developer so they can complete this functionality for bytenr %llu\n",
6709 dback
->disk_bytenr
);
6714 if (dback
->node
.broken
&& dback
->disk_bytenr
!= entry
->bytenr
) {
6715 btrfs_set_file_extent_disk_bytenr(leaf
, fi
, entry
->bytenr
);
6716 } else if (dback
->disk_bytenr
> entry
->bytenr
) {
6717 u64 off_diff
, offset
;
6719 off_diff
= dback
->disk_bytenr
- entry
->bytenr
;
6720 offset
= btrfs_file_extent_offset(leaf
, fi
);
6721 if (dback
->disk_bytenr
+ offset
+
6722 btrfs_file_extent_num_bytes(leaf
, fi
) >
6723 entry
->bytenr
+ entry
->bytes
) {
6725 "ref is past the entry end, please take a btrfs-image of this file system and send it to a btrfs developer, ref %llu\n",
6726 dback
->disk_bytenr
);
6731 btrfs_set_file_extent_disk_bytenr(leaf
, fi
, entry
->bytenr
);
6732 btrfs_set_file_extent_offset(leaf
, fi
, offset
);
6733 } else if (dback
->disk_bytenr
< entry
->bytenr
) {
6736 offset
= btrfs_file_extent_offset(leaf
, fi
);
6737 if (dback
->disk_bytenr
+ offset
< entry
->bytenr
) {
6739 "ref is before the entry start, please take a btrfs-image of this file system and send it to a btrfs developer, ref %llu\n",
6740 dback
->disk_bytenr
);
6745 offset
+= dback
->disk_bytenr
;
6746 offset
-= entry
->bytenr
;
6747 btrfs_set_file_extent_disk_bytenr(leaf
, fi
, entry
->bytenr
);
6748 btrfs_set_file_extent_offset(leaf
, fi
, offset
);
6751 btrfs_set_file_extent_disk_num_bytes(leaf
, fi
, entry
->bytes
);
6754 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6755 * only do this if we aren't using compression, otherwise it's a
6758 if (!btrfs_file_extent_compression(leaf
, fi
))
6759 btrfs_set_file_extent_ram_bytes(leaf
, fi
, entry
->bytes
);
6761 printf("ram bytes may be wrong?\n");
6762 btrfs_mark_buffer_dirty(leaf
);
6764 err
= btrfs_commit_transaction(trans
, root
);
6765 btrfs_release_path(path
);
6766 return ret
? ret
: err
;
6769 static int verify_backrefs(struct btrfs_fs_info
*info
, struct btrfs_path
*path
,
6770 struct extent_record
*rec
)
6772 struct extent_backref
*back
, *tmp
;
6773 struct data_backref
*dback
;
6774 struct extent_entry
*entry
, *best
= NULL
;
6777 int broken_entries
= 0;
6782 * Metadata is easy and the backrefs should always agree on bytenr and
6783 * size, if not we've got bigger issues.
6788 rbtree_postorder_for_each_entry_safe(back
, tmp
,
6789 &rec
->backref_tree
, node
) {
6790 if (back
->full_backref
|| !back
->is_data
)
6793 dback
= to_data_backref(back
);
6796 * We only pay attention to backrefs that we found a real
6799 if (dback
->found_ref
== 0)
6803 * For now we only catch when the bytes don't match, not the
6804 * bytenr. We can easily do this at the same time, but I want
6805 * to have a fs image to test on before we just add repair
6806 * functionality willy-nilly so we know we won't screw up the
6810 entry
= find_entry(&entries
, dback
->disk_bytenr
,
6813 entry
= malloc(sizeof(struct extent_entry
));
6818 memset(entry
, 0, sizeof(*entry
));
6819 entry
->bytenr
= dback
->disk_bytenr
;
6820 entry
->bytes
= dback
->bytes
;
6821 list_add_tail(&entry
->list
, &entries
);
6826 * If we only have on entry we may think the entries agree when
6827 * in reality they don't so we have to do some extra checking.
6829 if (dback
->disk_bytenr
!= rec
->start
||
6830 dback
->bytes
!= rec
->nr
|| back
->broken
)
6841 /* Yay all the backrefs agree, carry on good sir */
6842 if (nr_entries
<= 1 && !mismatch
)
6846 "attempting to repair backref discrepency for bytenr %llu\n",
6850 * First we want to see if the backrefs can agree amongst themselves who
6851 * is right, so figure out which one of the entries has the highest
6854 best
= find_most_right_entry(&entries
);
6857 * Ok so we may have an even split between what the backrefs think, so
6858 * this is where we use the extent ref to see what it thinks.
6861 entry
= find_entry(&entries
, rec
->start
, rec
->nr
);
6862 if (!entry
&& (!broken_entries
|| !rec
->found_rec
)) {
6864 "backrefs don't agree with each other and extent record doesn't agree with anybody, so we can't fix bytenr %llu bytes %llu\n",
6865 rec
->start
, rec
->nr
);
6868 } else if (!entry
) {
6870 * Ok our backrefs were broken, we'll assume this is the
6871 * correct value and add an entry for this range.
6873 entry
= malloc(sizeof(struct extent_entry
));
6878 memset(entry
, 0, sizeof(*entry
));
6879 entry
->bytenr
= rec
->start
;
6880 entry
->bytes
= rec
->nr
;
6881 list_add_tail(&entry
->list
, &entries
);
6885 best
= find_most_right_entry(&entries
);
6888 "backrefs and extent record evenly split on who is right, this is going to require user input to fix bytenr %llu bytes %llu\n",
6889 rec
->start
, rec
->nr
);
6896 * I don't think this can happen currently as we'll abort() if we catch
6897 * this case higher up, but in case somebody removes that we still can't
6898 * deal with it properly here yet, so just bail out of that's the case.
6900 if (best
->bytenr
!= rec
->start
) {
6902 "extent start and backref starts don't match, please use btrfs-image on this file system and send it to a btrfs developer so they can make fsck fix this particular case. bytenr is %llu, bytes is %llu\n",
6903 rec
->start
, rec
->nr
);
6909 * Ok great we all agreed on an extent record, let's go find the real
6910 * references and fix up the ones that don't match.
6912 rbtree_postorder_for_each_entry_safe(back
, tmp
,
6913 &rec
->backref_tree
, node
) {
6914 if (back
->full_backref
|| !back
->is_data
)
6917 dback
= to_data_backref(back
);
6920 * Still ignoring backrefs that don't have a real ref attached
6923 if (dback
->found_ref
== 0)
6926 if (dback
->bytes
== best
->bytes
&&
6927 dback
->disk_bytenr
== best
->bytenr
)
6930 ret
= repair_ref(info
, path
, dback
, best
);
6936 * Ok we messed with the actual refs, which means we need to drop our
6937 * entire cache and go back and rescan. I know this is a huge pain and
6938 * adds a lot of extra work, but it's the only way to be safe. Once all
6939 * the backrefs agree we may not need to do anything to the extent
6944 while (!list_empty(&entries
)) {
6945 entry
= list_entry(entries
.next
, struct extent_entry
, list
);
6946 list_del_init(&entry
->list
);
6952 static int process_duplicates(struct cache_tree
*extent_cache
,
6953 struct extent_record
*rec
)
6955 struct extent_record
*good
, *tmp
;
6956 struct cache_extent
*cache
;
6960 * If we found a extent record for this extent then return, or if we
6961 * have more than one duplicate we are likely going to need to delete
6964 if (rec
->found_rec
|| rec
->num_duplicates
> 1)
6967 /* Shouldn't happen but just in case */
6968 BUG_ON(!rec
->num_duplicates
);
6971 * So this happens if we end up with a backref that doesn't match the
6972 * actual extent entry. So either the backref is bad or the extent
6973 * entry is bad. Either way we want to have the extent_record actually
6974 * reflect what we found in the extent_tree, so we need to take the
6975 * duplicate out and use that as the extent_record since the only way we
6976 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6978 remove_cache_extent(extent_cache
, &rec
->cache
);
6980 good
= to_extent_record(rec
->dups
.next
);
6981 list_del_init(&good
->list
);
6982 INIT_LIST_HEAD(&good
->backrefs
);
6983 INIT_LIST_HEAD(&good
->dups
);
6984 good
->cache
.start
= good
->start
;
6985 good
->cache
.size
= good
->nr
;
6986 good
->content_checked
= 0;
6987 good
->owner_ref_checked
= 0;
6988 good
->num_duplicates
= 0;
6989 good
->refs
= rec
->refs
;
6990 list_splice_init(&rec
->backrefs
, &good
->backrefs
);
6992 cache
= lookup_cache_extent(extent_cache
, good
->start
,
6996 tmp
= container_of(cache
, struct extent_record
, cache
);
6999 * If we find another overlapping extent and it's found_rec is
7000 * set then it's a duplicate and we need to try and delete
7003 if (tmp
->found_rec
|| tmp
->num_duplicates
> 0) {
7004 if (list_empty(&good
->list
))
7005 list_add_tail(&good
->list
,
7006 &duplicate_extents
);
7007 good
->num_duplicates
+= tmp
->num_duplicates
+ 1;
7008 list_splice_init(&tmp
->dups
, &good
->dups
);
7009 list_del_init(&tmp
->list
);
7010 list_add_tail(&tmp
->list
, &good
->dups
);
7011 remove_cache_extent(extent_cache
, &tmp
->cache
);
7016 * Ok we have another non extent item backed extent rec, so lets
7017 * just add it to this extent and carry on like we did above.
7019 good
->refs
+= tmp
->refs
;
7020 list_splice_init(&tmp
->backrefs
, &good
->backrefs
);
7021 remove_cache_extent(extent_cache
, &tmp
->cache
);
7024 ret
= insert_cache_extent(extent_cache
, &good
->cache
);
7027 return good
->num_duplicates
? 0 : 1;
7030 static int delete_duplicate_records(struct btrfs_root
*root
,
7031 struct extent_record
*rec
)
7033 struct btrfs_trans_handle
*trans
;
7034 LIST_HEAD(delete_list
);
7035 struct btrfs_path path
;
7036 struct extent_record
*tmp
, *good
, *n
;
7039 struct btrfs_key key
;
7041 btrfs_init_path(&path
);
7044 /* Find the record that covers all of the duplicates. */
7045 list_for_each_entry(tmp
, &rec
->dups
, list
) {
7046 if (good
->start
< tmp
->start
)
7048 if (good
->nr
> tmp
->nr
)
7051 if (tmp
->start
+ tmp
->nr
< good
->start
+ good
->nr
) {
7053 "Ok we have overlapping extents that aren't completely covered by each other, this is going to require more careful thought. The extents are [%llu-%llu] and [%llu-%llu]\n",
7054 tmp
->start
, tmp
->nr
, good
->start
, good
->nr
);
7061 list_add_tail(&rec
->list
, &delete_list
);
7063 list_for_each_entry_safe(tmp
, n
, &rec
->dups
, list
) {
7066 list_move_tail(&tmp
->list
, &delete_list
);
7069 root
= root
->fs_info
->extent_root
;
7070 trans
= btrfs_start_transaction(root
, 1);
7071 if (IS_ERR(trans
)) {
7072 ret
= PTR_ERR(trans
);
7076 list_for_each_entry(tmp
, &delete_list
, list
) {
7077 if (tmp
->found_rec
== 0)
7079 key
.objectid
= tmp
->start
;
7080 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
7081 key
.offset
= tmp
->nr
;
7083 /* Shouldn't happen but just in case */
7084 if (tmp
->metadata
) {
7086 "well this shouldn't happen, extent record overlaps but is metadata? [%llu, %llu]\n",
7087 tmp
->start
, tmp
->nr
);
7091 ret
= btrfs_search_slot(trans
, root
, &key
, &path
, -1, 1);
7097 ret
= btrfs_del_item(trans
, root
, &path
);
7100 btrfs_release_path(&path
);
7103 err
= btrfs_commit_transaction(trans
, root
);
7107 while (!list_empty(&delete_list
)) {
7108 tmp
= to_extent_record(delete_list
.next
);
7109 list_del_init(&tmp
->list
);
7115 while (!list_empty(&rec
->dups
)) {
7116 tmp
= to_extent_record(rec
->dups
.next
);
7117 list_del_init(&tmp
->list
);
7121 btrfs_release_path(&path
);
7123 if (!ret
&& !nr_del
)
7124 rec
->num_duplicates
= 0;
7126 return ret
? ret
: nr_del
;
7129 static int find_possible_backrefs(struct btrfs_fs_info
*info
,
7130 struct btrfs_path
*path
,
7131 struct cache_tree
*extent_cache
,
7132 struct extent_record
*rec
)
7134 struct btrfs_root
*root
;
7135 struct extent_backref
*back
, *tmp
;
7136 struct data_backref
*dback
;
7137 struct cache_extent
*cache
;
7138 struct btrfs_file_extent_item
*fi
;
7139 struct btrfs_key key
;
7143 rbtree_postorder_for_each_entry_safe(back
, tmp
,
7144 &rec
->backref_tree
, node
) {
7145 /* Don't care about full backrefs (poor unloved backrefs) */
7146 if (back
->full_backref
|| !back
->is_data
)
7149 dback
= to_data_backref(back
);
7151 /* We found this one, we don't need to do a lookup */
7152 if (dback
->found_ref
)
7155 key
.objectid
= dback
->root
;
7156 key
.type
= BTRFS_ROOT_ITEM_KEY
;
7157 key
.offset
= (u64
)-1;
7159 root
= btrfs_read_fs_root(info
, &key
);
7161 /* No root, definitely a bad ref, skip */
7162 if (IS_ERR(root
) && PTR_ERR(root
) == -ENOENT
)
7164 /* Other err, exit */
7166 return PTR_ERR(root
);
7168 key
.objectid
= dback
->owner
;
7169 key
.type
= BTRFS_EXTENT_DATA_KEY
;
7170 key
.offset
= dback
->offset
;
7171 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
7173 btrfs_release_path(path
);
7176 /* Didn't find it, we can carry on */
7181 fi
= btrfs_item_ptr(path
->nodes
[0], path
->slots
[0],
7182 struct btrfs_file_extent_item
);
7183 bytenr
= btrfs_file_extent_disk_bytenr(path
->nodes
[0], fi
);
7184 bytes
= btrfs_file_extent_disk_num_bytes(path
->nodes
[0], fi
);
7185 btrfs_release_path(path
);
7186 cache
= lookup_cache_extent(extent_cache
, bytenr
, 1);
7188 struct extent_record
*tmp
;
7190 tmp
= container_of(cache
, struct extent_record
, cache
);
7193 * If we found an extent record for the bytenr for this
7194 * particular backref then we can't add it to our
7195 * current extent record. We only want to add backrefs
7196 * that don't have a corresponding extent item in the
7197 * extent tree since they likely belong to this record
7198 * and we need to fix it if it doesn't match bytenrs.
7204 dback
->found_ref
+= 1;
7205 dback
->disk_bytenr
= bytenr
;
7206 dback
->bytes
= bytes
;
7209 * Set this so the verify backref code knows not to trust the
7210 * values in this backref.
7219 * Record orphan data ref into corresponding root.
7221 * Return 0 if the extent item contains data ref and recorded.
7222 * Return 1 if the extent item contains no useful data ref
7223 * On that case, it may contains only shared_dataref or metadata backref
7224 * or the file extent exists(this should be handled by the extent bytenr
7226 * Return <0 if something goes wrong.
7228 static int record_orphan_data_extents(struct btrfs_fs_info
*fs_info
,
7229 struct extent_record
*rec
)
7231 struct btrfs_key key
;
7232 struct btrfs_root
*dest_root
;
7233 struct extent_backref
*back
, *tmp
;
7234 struct data_backref
*dback
;
7235 struct orphan_data_extent
*orphan
;
7236 struct btrfs_path path
;
7237 int recorded_data_ref
= 0;
7242 btrfs_init_path(&path
);
7243 rbtree_postorder_for_each_entry_safe(back
, tmp
,
7244 &rec
->backref_tree
, node
) {
7245 if (back
->full_backref
|| !back
->is_data
||
7246 !back
->found_extent_tree
)
7248 dback
= to_data_backref(back
);
7249 if (dback
->found_ref
)
7251 key
.objectid
= dback
->root
;
7252 key
.type
= BTRFS_ROOT_ITEM_KEY
;
7253 key
.offset
= (u64
)-1;
7255 dest_root
= btrfs_read_fs_root(fs_info
, &key
);
7257 /* For non-exist root we just skip it */
7258 if (IS_ERR(dest_root
) || !dest_root
)
7261 key
.objectid
= dback
->owner
;
7262 key
.type
= BTRFS_EXTENT_DATA_KEY
;
7263 key
.offset
= dback
->offset
;
7265 ret
= btrfs_search_slot(NULL
, dest_root
, &key
, &path
, 0, 0);
7266 btrfs_release_path(&path
);
7268 * For ret < 0, it's OK since the fs-tree may be corrupted,
7269 * we need to record it for inode/file extent rebuild.
7270 * For ret > 0, we record it only for file extent rebuild.
7271 * For ret == 0, the file extent exists but only bytenr
7272 * mismatch, let the original bytenr fix routine to handle,
7278 orphan
= malloc(sizeof(*orphan
));
7283 INIT_LIST_HEAD(&orphan
->list
);
7284 orphan
->root
= dback
->root
;
7285 orphan
->objectid
= dback
->owner
;
7286 orphan
->offset
= dback
->offset
;
7287 orphan
->disk_bytenr
= rec
->cache
.start
;
7288 orphan
->disk_len
= rec
->cache
.size
;
7289 list_add(&dest_root
->orphan_data_extents
, &orphan
->list
);
7290 recorded_data_ref
= 1;
7293 btrfs_release_path(&path
);
7295 return !recorded_data_ref
;
7301 * when an incorrect extent item is found, this will delete
7302 * all of the existing entries for it and recreate them
7303 * based on what the tree scan found.
7305 static int fixup_extent_refs(struct btrfs_fs_info
*info
,
7306 struct cache_tree
*extent_cache
,
7307 struct extent_record
*rec
)
7309 struct btrfs_trans_handle
*trans
= NULL
;
7311 struct btrfs_path path
;
7312 struct cache_extent
*cache
;
7313 struct extent_backref
*back
, *tmp
;
7317 if (rec
->flag_block_full_backref
)
7318 flags
|= BTRFS_BLOCK_FLAG_FULL_BACKREF
;
7320 btrfs_init_path(&path
);
7321 if (rec
->refs
!= rec
->extent_item_refs
&& !rec
->metadata
) {
7323 * Sometimes the backrefs themselves are so broken they don't
7324 * get attached to any meaningful rec, so first go back and
7325 * check any of our backrefs that we couldn't find and throw
7326 * them into the list if we find the backref so that
7327 * verify_backrefs can figure out what to do.
7329 ret
= find_possible_backrefs(info
, &path
, extent_cache
, rec
);
7334 /* step one, make sure all of the backrefs agree */
7335 ret
= verify_backrefs(info
, &path
, rec
);
7339 trans
= btrfs_start_transaction(info
->extent_root
, 1);
7340 if (IS_ERR(trans
)) {
7341 ret
= PTR_ERR(trans
);
7345 /* step two, delete all the existing records */
7346 ret
= delete_extent_records(trans
, &path
, rec
->start
);
7351 /* was this block corrupt? If so, don't add references to it */
7352 cache
= lookup_cache_extent(info
->corrupt_blocks
,
7353 rec
->start
, rec
->max_size
);
7359 /* step three, recreate all the refs we did find */
7360 rbtree_postorder_for_each_entry_safe(back
, tmp
,
7361 &rec
->backref_tree
, node
) {
7363 * if we didn't find any references, don't create a
7366 if (!back
->found_ref
)
7369 rec
->bad_full_backref
= 0;
7370 ret
= record_extent(trans
, info
, &path
, rec
, back
, allocated
,
7379 int err
= btrfs_commit_transaction(trans
, info
->extent_root
);
7386 fprintf(stderr
, "Repaired extent references for %llu\n",
7387 (unsigned long long)rec
->start
);
7389 btrfs_release_path(&path
);
7393 static int fixup_extent_flags(struct btrfs_fs_info
*fs_info
,
7394 struct extent_record
*rec
)
7396 struct btrfs_trans_handle
*trans
;
7397 struct btrfs_root
*root
= fs_info
->extent_root
;
7398 struct btrfs_path path
;
7399 struct btrfs_extent_item
*ei
;
7400 struct btrfs_key key
;
7404 key
.objectid
= rec
->start
;
7405 if (rec
->metadata
) {
7406 key
.type
= BTRFS_METADATA_ITEM_KEY
;
7407 key
.offset
= rec
->info_level
;
7409 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
7410 key
.offset
= rec
->max_size
;
7413 trans
= btrfs_start_transaction(root
, 0);
7415 return PTR_ERR(trans
);
7417 btrfs_init_path(&path
);
7418 ret
= btrfs_search_slot(trans
, root
, &key
, &path
, 0, 1);
7420 btrfs_release_path(&path
);
7421 btrfs_commit_transaction(trans
, root
);
7424 fprintf(stderr
, "Didn't find extent for %llu\n",
7425 (unsigned long long)rec
->start
);
7426 btrfs_release_path(&path
);
7427 btrfs_commit_transaction(trans
, root
);
7431 ei
= btrfs_item_ptr(path
.nodes
[0], path
.slots
[0],
7432 struct btrfs_extent_item
);
7433 flags
= btrfs_extent_flags(path
.nodes
[0], ei
);
7434 if (rec
->flag_block_full_backref
) {
7435 fprintf(stderr
, "setting full backref on %llu\n",
7436 (unsigned long long)key
.objectid
);
7437 flags
|= BTRFS_BLOCK_FLAG_FULL_BACKREF
;
7439 fprintf(stderr
, "clearing full backref on %llu\n",
7440 (unsigned long long)key
.objectid
);
7441 flags
&= ~BTRFS_BLOCK_FLAG_FULL_BACKREF
;
7443 btrfs_set_extent_flags(path
.nodes
[0], ei
, flags
);
7444 btrfs_mark_buffer_dirty(path
.nodes
[0]);
7445 btrfs_release_path(&path
);
7446 ret
= btrfs_commit_transaction(trans
, root
);
7448 fprintf(stderr
, "Repaired extent flags for %llu\n",
7449 (unsigned long long)rec
->start
);
7454 /* right now we only prune from the extent allocation tree */
7455 static int prune_one_block(struct btrfs_trans_handle
*trans
,
7456 struct btrfs_fs_info
*info
,
7457 struct btrfs_corrupt_block
*corrupt
)
7460 struct btrfs_path path
;
7461 struct extent_buffer
*eb
;
7465 int level
= corrupt
->level
+ 1;
7467 btrfs_init_path(&path
);
7469 /* we want to stop at the parent to our busted block */
7470 path
.lowest_level
= level
;
7472 ret
= btrfs_search_slot(trans
, info
->extent_root
,
7473 &corrupt
->key
, &path
, -1, 1);
7478 eb
= path
.nodes
[level
];
7485 * hopefully the search gave us the block we want to prune,
7486 * lets try that first
7488 slot
= path
.slots
[level
];
7489 found
= btrfs_node_blockptr(eb
, slot
);
7490 if (found
== corrupt
->cache
.start
)
7493 nritems
= btrfs_header_nritems(eb
);
7495 /* the search failed, lets scan this node and hope we find it */
7496 for (slot
= 0; slot
< nritems
; slot
++) {
7497 found
= btrfs_node_blockptr(eb
, slot
);
7498 if (found
== corrupt
->cache
.start
)
7502 * We couldn't find the bad block.
7503 * TODO: search all the nodes for pointers to this block
7505 if (eb
== info
->extent_root
->node
) {
7510 btrfs_release_path(&path
);
7515 printk("deleting pointer to block %llu\n", corrupt
->cache
.start
);
7516 ret
= btrfs_del_ptr(info
->extent_root
, &path
, level
, slot
);
7519 btrfs_release_path(&path
);
7523 static int prune_corrupt_blocks(struct btrfs_fs_info
*info
)
7525 struct btrfs_trans_handle
*trans
= NULL
;
7526 struct cache_extent
*cache
;
7527 struct btrfs_corrupt_block
*corrupt
;
7530 cache
= search_cache_extent(info
->corrupt_blocks
, 0);
7534 trans
= btrfs_start_transaction(info
->extent_root
, 1);
7536 return PTR_ERR(trans
);
7538 corrupt
= container_of(cache
, struct btrfs_corrupt_block
, cache
);
7539 prune_one_block(trans
, info
, corrupt
);
7540 remove_cache_extent(info
->corrupt_blocks
, cache
);
7543 return btrfs_commit_transaction(trans
, info
->extent_root
);
7547 static int check_extent_refs(struct btrfs_root
*root
,
7548 struct cache_tree
*extent_cache
)
7550 struct extent_record
*rec
;
7551 struct cache_extent
*cache
;
7558 * if we're doing a repair, we have to make sure
7559 * we don't allocate from the problem extents.
7560 * In the worst case, this will be all the
7563 cache
= search_cache_extent(extent_cache
, 0);
7565 rec
= container_of(cache
, struct extent_record
, cache
);
7566 set_extent_dirty(root
->fs_info
->excluded_extents
,
7568 rec
->start
+ rec
->max_size
- 1);
7569 cache
= next_cache_extent(cache
);
7572 /* pin down all the corrupted blocks too */
7573 cache
= search_cache_extent(root
->fs_info
->corrupt_blocks
, 0);
7575 set_extent_dirty(root
->fs_info
->excluded_extents
,
7577 cache
->start
+ cache
->size
- 1);
7578 cache
= next_cache_extent(cache
);
7580 prune_corrupt_blocks(root
->fs_info
);
7581 reset_cached_block_groups(root
->fs_info
);
7584 reset_cached_block_groups(root
->fs_info
);
7587 * We need to delete any duplicate entries we find first otherwise we
7588 * could mess up the extent tree when we have backrefs that actually
7589 * belong to a different extent item and not the weird duplicate one.
7591 while (repair
&& !list_empty(&duplicate_extents
)) {
7592 rec
= to_extent_record(duplicate_extents
.next
);
7593 list_del_init(&rec
->list
);
7595 /* Sometimes we can find a backref before we find an actual
7596 * extent, so we need to process it a little bit to see if there
7597 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7598 * if this is a backref screwup. If we need to delete stuff
7599 * process_duplicates() will return 0, otherwise it will return
7602 if (process_duplicates(extent_cache
, rec
))
7604 ret
= delete_duplicate_records(root
, rec
);
7608 * delete_duplicate_records will return the number of entries
7609 * deleted, so if it's greater than 0 then we know we actually
7610 * did something and we need to remove.
7623 cache
= search_cache_extent(extent_cache
, 0);
7626 rec
= container_of(cache
, struct extent_record
, cache
);
7627 if (rec
->num_duplicates
) {
7629 "extent item %llu has multiple extent items\n",
7630 (unsigned long long)rec
->start
);
7634 if (rec
->refs
!= rec
->extent_item_refs
) {
7635 fprintf(stderr
, "ref mismatch on [%llu %llu] ",
7636 (unsigned long long)rec
->start
,
7637 (unsigned long long)rec
->nr
);
7638 fprintf(stderr
, "extent item %llu, found %llu\n",
7639 (unsigned long long)rec
->extent_item_refs
,
7640 (unsigned long long)rec
->refs
);
7641 ret
= record_orphan_data_extents(root
->fs_info
, rec
);
7647 if (all_backpointers_checked(rec
, 1)) {
7648 fprintf(stderr
, "backpointer mismatch on [%llu %llu]\n",
7649 (unsigned long long)rec
->start
,
7650 (unsigned long long)rec
->nr
);
7654 if (!rec
->owner_ref_checked
) {
7655 fprintf(stderr
, "owner ref check failed [%llu %llu]\n",
7656 (unsigned long long)rec
->start
,
7657 (unsigned long long)rec
->nr
);
7662 if (repair
&& fix
) {
7663 ret
= fixup_extent_refs(root
->fs_info
, extent_cache
,
7670 if (rec
->bad_full_backref
) {
7671 fprintf(stderr
, "bad full backref, on [%llu]\n",
7672 (unsigned long long)rec
->start
);
7674 ret
= fixup_extent_flags(root
->fs_info
, rec
);
7682 * Although it's not a extent ref's problem, we reuse this
7683 * routine for error reporting.
7684 * No repair function yet.
7686 if (rec
->crossing_stripes
) {
7688 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7689 rec
->start
, rec
->start
+ rec
->max_size
);
7693 if (rec
->wrong_chunk_type
) {
7695 "bad extent [%llu, %llu), type mismatch with chunk\n",
7696 rec
->start
, rec
->start
+ rec
->max_size
);
7701 remove_cache_extent(extent_cache
, cache
);
7702 free_all_extent_backrefs(rec
);
7703 if (!init_extent_tree
&& repair
&& (!cur_err
|| fix
))
7704 clear_extent_dirty(root
->fs_info
->excluded_extents
,
7706 rec
->start
+ rec
->max_size
- 1);
7711 if (ret
&& ret
!= -EAGAIN
) {
7712 fprintf(stderr
, "failed to repair damaged filesystem, aborting\n");
7715 struct btrfs_trans_handle
*trans
;
7717 root
= root
->fs_info
->extent_root
;
7718 trans
= btrfs_start_transaction(root
, 1);
7719 if (IS_ERR(trans
)) {
7720 ret
= PTR_ERR(trans
);
7724 ret
= btrfs_fix_block_accounting(trans
);
7727 ret
= btrfs_commit_transaction(trans
, root
);
7740 * Check the chunk with its block group/dev list ref:
7741 * Return 0 if all refs seems valid.
7742 * Return 1 if part of refs seems valid, need later check for rebuild ref
7743 * like missing block group and needs to search extent tree to rebuild them.
7744 * Return -1 if essential refs are missing and unable to rebuild.
7746 static int check_chunk_refs(struct chunk_record
*chunk_rec
,
7747 struct block_group_tree
*block_group_cache
,
7748 struct device_extent_tree
*dev_extent_cache
,
7751 struct cache_extent
*block_group_item
;
7752 struct block_group_record
*block_group_rec
;
7753 struct cache_extent
*dev_extent_item
;
7754 struct device_extent_record
*dev_extent_rec
;
7758 int metadump_v2
= 0;
7762 block_group_item
= lookup_cache_extent(&block_group_cache
->tree
,
7765 if (block_group_item
) {
7766 block_group_rec
= container_of(block_group_item
,
7767 struct block_group_record
,
7769 if (chunk_rec
->length
!= block_group_rec
->offset
||
7770 chunk_rec
->offset
!= block_group_rec
->objectid
||
7772 chunk_rec
->type_flags
!= block_group_rec
->flags
)) {
7775 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7776 chunk_rec
->objectid
,
7781 chunk_rec
->type_flags
,
7782 block_group_rec
->objectid
,
7783 block_group_rec
->type
,
7784 block_group_rec
->offset
,
7785 block_group_rec
->offset
,
7786 block_group_rec
->objectid
,
7787 block_group_rec
->flags
);
7790 list_del_init(&block_group_rec
->list
);
7791 chunk_rec
->bg_rec
= block_group_rec
;
7796 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7797 chunk_rec
->objectid
,
7802 chunk_rec
->type_flags
);
7809 length
= calc_stripe_length(chunk_rec
->type_flags
, chunk_rec
->length
,
7810 chunk_rec
->num_stripes
);
7811 for (i
= 0; i
< chunk_rec
->num_stripes
; ++i
) {
7812 devid
= chunk_rec
->stripes
[i
].devid
;
7813 offset
= chunk_rec
->stripes
[i
].offset
;
7814 dev_extent_item
= lookup_cache_extent2(&dev_extent_cache
->tree
,
7815 devid
, offset
, length
);
7816 if (dev_extent_item
) {
7817 dev_extent_rec
= container_of(dev_extent_item
,
7818 struct device_extent_record
,
7820 if (dev_extent_rec
->objectid
!= devid
||
7821 dev_extent_rec
->offset
!= offset
||
7822 dev_extent_rec
->chunk_offset
!= chunk_rec
->offset
||
7823 dev_extent_rec
->length
!= length
) {
7826 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7827 chunk_rec
->objectid
,
7830 chunk_rec
->stripes
[i
].devid
,
7831 chunk_rec
->stripes
[i
].offset
,
7832 dev_extent_rec
->objectid
,
7833 dev_extent_rec
->offset
,
7834 dev_extent_rec
->length
);
7837 list_move(&dev_extent_rec
->chunk_list
,
7838 &chunk_rec
->dextents
);
7843 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7844 chunk_rec
->objectid
,
7847 chunk_rec
->stripes
[i
].devid
,
7848 chunk_rec
->stripes
[i
].offset
);
7855 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7856 int check_chunks(struct cache_tree
*chunk_cache
,
7857 struct block_group_tree
*block_group_cache
,
7858 struct device_extent_tree
*dev_extent_cache
,
7859 struct list_head
*good
, struct list_head
*bad
,
7860 struct list_head
*rebuild
, int silent
)
7862 struct cache_extent
*chunk_item
;
7863 struct chunk_record
*chunk_rec
;
7864 struct block_group_record
*bg_rec
;
7865 struct device_extent_record
*dext_rec
;
7869 chunk_item
= first_cache_extent(chunk_cache
);
7870 while (chunk_item
) {
7871 chunk_rec
= container_of(chunk_item
, struct chunk_record
,
7873 err
= check_chunk_refs(chunk_rec
, block_group_cache
,
7874 dev_extent_cache
, silent
);
7877 if (err
== 0 && good
)
7878 list_add_tail(&chunk_rec
->list
, good
);
7879 if (err
> 0 && rebuild
)
7880 list_add_tail(&chunk_rec
->list
, rebuild
);
7882 list_add_tail(&chunk_rec
->list
, bad
);
7883 chunk_item
= next_cache_extent(chunk_item
);
7886 list_for_each_entry(bg_rec
, &block_group_cache
->block_groups
, list
) {
7889 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7897 list_for_each_entry(dext_rec
, &dev_extent_cache
->no_chunk_orphans
,
7901 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7912 static int check_device_used(struct device_record
*dev_rec
,
7913 struct device_extent_tree
*dext_cache
)
7915 struct cache_extent
*cache
;
7916 struct device_extent_record
*dev_extent_rec
;
7919 cache
= search_cache_extent2(&dext_cache
->tree
, dev_rec
->devid
, 0);
7921 dev_extent_rec
= container_of(cache
,
7922 struct device_extent_record
,
7924 if (dev_extent_rec
->objectid
!= dev_rec
->devid
)
7927 list_del_init(&dev_extent_rec
->device_list
);
7928 total_byte
+= dev_extent_rec
->length
;
7929 cache
= next_cache_extent(cache
);
7932 if (total_byte
!= dev_rec
->byte_used
) {
7934 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7935 total_byte
, dev_rec
->byte_used
, dev_rec
->objectid
,
7936 dev_rec
->type
, dev_rec
->offset
);
7944 * Unlike device size alignment check above, some super total_bytes check
7945 * failure can lead to mount failure for newer kernel.
7947 * So this function will return the error for a fatal super total_bytes problem.
7949 static bool is_super_size_valid(struct btrfs_fs_info
*fs_info
)
7951 struct btrfs_device
*dev
;
7952 struct list_head
*dev_list
= &fs_info
->fs_devices
->devices
;
7953 u64 total_bytes
= 0;
7954 u64 super_bytes
= btrfs_super_total_bytes(fs_info
->super_copy
);
7956 list_for_each_entry(dev
, dev_list
, dev_list
)
7957 total_bytes
+= dev
->total_bytes
;
7959 /* Important check, which can cause unmountable fs */
7960 if (super_bytes
< total_bytes
) {
7961 error("super total bytes %llu smaller than real device(s) size %llu",
7962 super_bytes
, total_bytes
);
7963 error("mounting this fs may fail for newer kernels");
7964 error("this can be fixed by 'btrfs rescue fix-device-size'");
7969 * Optional check, just to make everything aligned and match with each
7972 * For a btrfs-image restored fs, we don't need to check it anyway.
7974 if (btrfs_super_flags(fs_info
->super_copy
) &
7975 (BTRFS_SUPER_FLAG_METADUMP
| BTRFS_SUPER_FLAG_METADUMP_V2
))
7977 if (!IS_ALIGNED(super_bytes
, fs_info
->sectorsize
) ||
7978 !IS_ALIGNED(total_bytes
, fs_info
->sectorsize
) ||
7979 super_bytes
!= total_bytes
) {
7980 warning("minor unaligned/mismatch device size detected");
7982 "recommended to use 'btrfs rescue fix-device-size' to fix it");
7987 /* check btrfs_dev_item -> btrfs_dev_extent */
7988 static int check_devices(struct rb_root
*dev_cache
,
7989 struct device_extent_tree
*dev_extent_cache
)
7991 struct rb_node
*dev_node
;
7992 struct device_record
*dev_rec
;
7993 struct device_extent_record
*dext_rec
;
7997 dev_node
= rb_first(dev_cache
);
7999 dev_rec
= container_of(dev_node
, struct device_record
, node
);
8000 err
= check_device_used(dev_rec
, dev_extent_cache
);
8004 check_dev_size_alignment(dev_rec
->devid
, dev_rec
->total_byte
,
8005 global_info
->sectorsize
);
8006 dev_node
= rb_next(dev_node
);
8008 list_for_each_entry(dext_rec
, &dev_extent_cache
->no_device_orphans
,
8011 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8012 dext_rec
->objectid
, dext_rec
->offset
, dext_rec
->length
);
8019 static int add_root_item_to_list(struct list_head
*head
,
8020 u64 objectid
, u64 bytenr
, u64 last_snapshot
,
8021 u8 level
, u8 drop_level
,
8022 struct btrfs_key
*drop_key
)
8024 struct root_item_record
*ri_rec
;
8026 ri_rec
= malloc(sizeof(*ri_rec
));
8029 ri_rec
->bytenr
= bytenr
;
8030 ri_rec
->objectid
= objectid
;
8031 ri_rec
->level
= level
;
8032 ri_rec
->drop_level
= drop_level
;
8033 ri_rec
->last_snapshot
= last_snapshot
;
8035 memcpy(&ri_rec
->drop_key
, drop_key
, sizeof(*drop_key
));
8036 list_add_tail(&ri_rec
->list
, head
);
8041 static void free_root_item_list(struct list_head
*list
)
8043 struct root_item_record
*ri_rec
;
8045 while (!list_empty(list
)) {
8046 ri_rec
= list_first_entry(list
, struct root_item_record
,
8048 list_del_init(&ri_rec
->list
);
8053 static int deal_root_from_list(struct list_head
*list
,
8054 struct btrfs_root
*root
,
8055 struct block_info
*bits
,
8057 struct cache_tree
*pending
,
8058 struct cache_tree
*seen
,
8059 struct cache_tree
*reada
,
8060 struct cache_tree
*nodes
,
8061 struct cache_tree
*extent_cache
,
8062 struct cache_tree
*chunk_cache
,
8063 struct rb_root
*dev_cache
,
8064 struct block_group_tree
*block_group_cache
,
8065 struct device_extent_tree
*dev_extent_cache
)
8070 while (!list_empty(list
)) {
8071 struct root_item_record
*rec
;
8072 struct extent_buffer
*buf
;
8074 rec
= list_entry(list
->next
,
8075 struct root_item_record
, list
);
8077 buf
= read_tree_block(root
->fs_info
, rec
->bytenr
, 0);
8078 if (!extent_buffer_uptodate(buf
)) {
8079 free_extent_buffer(buf
);
8083 ret
= add_root_to_pending(buf
, extent_cache
, pending
,
8084 seen
, nodes
, rec
->objectid
);
8088 * To rebuild extent tree, we need deal with snapshot
8089 * one by one, otherwise we deal with node firstly which
8090 * can maximize readahead.
8093 ret
= run_next_block(root
, bits
, bits_nr
, &last
,
8094 pending
, seen
, reada
, nodes
,
8095 extent_cache
, chunk_cache
,
8096 dev_cache
, block_group_cache
,
8097 dev_extent_cache
, rec
);
8101 free_extent_buffer(buf
);
8102 list_del(&rec
->list
);
8108 ret
= run_next_block(root
, bits
, bits_nr
, &last
, pending
, seen
,
8109 reada
, nodes
, extent_cache
, chunk_cache
,
8110 dev_cache
, block_group_cache
,
8111 dev_extent_cache
, NULL
);
8122 * parse_tree_roots - Go over all roots in the tree root and add each one to
8125 * @fs_info - pointer to fs_info struct of the file system.
8127 * @normal_trees - list to contains all roots which don't have a drop
8128 * operation in progress
8130 * @dropping_trees - list containing all roots which have a drop operation
8133 * Returns 0 on success or a negative value indicating an error.
8135 static int parse_tree_roots(struct btrfs_fs_info
*fs_info
,
8136 struct list_head
*normal_trees
,
8137 struct list_head
*dropping_trees
)
8139 struct btrfs_path path
;
8140 struct btrfs_key key
;
8141 struct btrfs_key found_key
;
8142 struct btrfs_root_item ri
;
8143 struct extent_buffer
*leaf
;
8147 btrfs_init_path(&path
);
8150 key
.type
= BTRFS_ROOT_ITEM_KEY
;
8151 ret
= btrfs_search_slot(NULL
, fs_info
->tree_root
, &key
, &path
, 0, 0);
8155 leaf
= path
.nodes
[0];
8156 slot
= path
.slots
[0];
8157 if (slot
>= btrfs_header_nritems(path
.nodes
[0])) {
8158 ret
= btrfs_next_leaf(fs_info
->tree_root
, &path
);
8161 leaf
= path
.nodes
[0];
8162 slot
= path
.slots
[0];
8164 btrfs_item_key_to_cpu(leaf
, &found_key
, path
.slots
[0]);
8165 if (found_key
.type
== BTRFS_ROOT_ITEM_KEY
) {
8166 unsigned long offset
;
8170 offset
= btrfs_item_ptr_offset(leaf
, path
.slots
[0]);
8171 read_extent_buffer(leaf
, &ri
, offset
, sizeof(ri
));
8172 last_snapshot
= btrfs_root_last_snapshot(&ri
);
8173 level
= btrfs_root_level(&ri
);
8174 if (btrfs_disk_key_objectid(&ri
.drop_progress
) == 0) {
8175 ret
= add_root_item_to_list(normal_trees
,
8177 btrfs_root_bytenr(&ri
),
8178 last_snapshot
, level
,
8183 u64 objectid
= found_key
.objectid
;
8185 btrfs_disk_key_to_cpu(&found_key
,
8187 ret
= add_root_item_to_list(dropping_trees
,
8189 btrfs_root_bytenr(&ri
),
8190 last_snapshot
, level
,
8191 ri
.drop_level
, &found_key
);
8200 btrfs_release_path(&path
);
8204 static int check_chunks_and_extents(struct btrfs_fs_info
*fs_info
)
8206 struct rb_root dev_cache
;
8207 struct cache_tree chunk_cache
;
8208 struct block_group_tree block_group_cache
;
8209 struct device_extent_tree dev_extent_cache
;
8210 struct cache_tree extent_cache
;
8211 struct cache_tree seen
;
8212 struct cache_tree pending
;
8213 struct cache_tree reada
;
8214 struct cache_tree nodes
;
8215 struct extent_io_tree excluded_extents
;
8216 struct cache_tree corrupt_blocks
;
8218 struct block_info
*bits
;
8220 struct list_head dropping_trees
;
8221 struct list_head normal_trees
;
8222 struct btrfs_root
*root1
;
8223 struct btrfs_root
*root
;
8226 root
= fs_info
->fs_root
;
8227 dev_cache
= RB_ROOT
;
8228 cache_tree_init(&chunk_cache
);
8229 block_group_tree_init(&block_group_cache
);
8230 device_extent_tree_init(&dev_extent_cache
);
8232 cache_tree_init(&extent_cache
);
8233 cache_tree_init(&seen
);
8234 cache_tree_init(&pending
);
8235 cache_tree_init(&nodes
);
8236 cache_tree_init(&reada
);
8237 cache_tree_init(&corrupt_blocks
);
8238 extent_io_tree_init(&excluded_extents
);
8239 INIT_LIST_HEAD(&dropping_trees
);
8240 INIT_LIST_HEAD(&normal_trees
);
8243 fs_info
->excluded_extents
= &excluded_extents
;
8244 fs_info
->fsck_extent_cache
= &extent_cache
;
8245 fs_info
->free_extent_hook
= free_extent_hook
;
8246 fs_info
->corrupt_blocks
= &corrupt_blocks
;
8250 bits
= malloc(bits_nr
* sizeof(struct block_info
));
8256 if (ctx
.progress_enabled
) {
8257 ctx
.tp
= TASK_EXTENTS
;
8258 task_start(ctx
.info
);
8262 root1
= fs_info
->tree_root
;
8263 level
= btrfs_header_level(root1
->node
);
8264 ret
= add_root_item_to_list(&normal_trees
, root1
->root_key
.objectid
,
8265 root1
->node
->start
, 0, level
, 0, NULL
);
8268 root1
= fs_info
->chunk_root
;
8269 level
= btrfs_header_level(root1
->node
);
8270 ret
= add_root_item_to_list(&normal_trees
, root1
->root_key
.objectid
,
8271 root1
->node
->start
, 0, level
, 0, NULL
);
8275 ret
= parse_tree_roots(fs_info
, &normal_trees
, &dropping_trees
);
8280 * check_block can return -EAGAIN if it fixes something, please keep
8281 * this in mind when dealing with return values from these functions, if
8282 * we get -EAGAIN we want to fall through and restart the loop.
8284 ret
= deal_root_from_list(&normal_trees
, root
, bits
, bits_nr
, &pending
,
8285 &seen
, &reada
, &nodes
, &extent_cache
,
8286 &chunk_cache
, &dev_cache
, &block_group_cache
,
8293 ret
= deal_root_from_list(&dropping_trees
, root
, bits
, bits_nr
,
8294 &pending
, &seen
, &reada
, &nodes
,
8295 &extent_cache
, &chunk_cache
, &dev_cache
,
8296 &block_group_cache
, &dev_extent_cache
);
8303 ret
= check_chunks(&chunk_cache
, &block_group_cache
,
8304 &dev_extent_cache
, NULL
, NULL
, NULL
, 0);
8311 ret
= check_extent_refs(root
, &extent_cache
);
8318 ret
= check_devices(&dev_cache
, &dev_extent_cache
);
8323 task_stop(ctx
.info
);
8325 free_corrupt_blocks_tree(fs_info
->corrupt_blocks
);
8326 extent_io_tree_cleanup(&excluded_extents
);
8327 fs_info
->fsck_extent_cache
= NULL
;
8328 fs_info
->free_extent_hook
= NULL
;
8329 fs_info
->corrupt_blocks
= NULL
;
8330 fs_info
->excluded_extents
= NULL
;
8333 free_chunk_cache_tree(&chunk_cache
);
8334 free_device_cache_tree(&dev_cache
);
8335 free_block_group_tree(&block_group_cache
);
8336 free_device_extent_tree(&dev_extent_cache
);
8337 free_extent_cache_tree(&seen
);
8338 free_extent_cache_tree(&pending
);
8339 free_extent_cache_tree(&reada
);
8340 free_extent_cache_tree(&nodes
);
8341 free_root_item_list(&normal_trees
);
8342 free_root_item_list(&dropping_trees
);
8345 free_corrupt_blocks_tree(fs_info
->corrupt_blocks
);
8346 free_extent_cache_tree(&seen
);
8347 free_extent_cache_tree(&pending
);
8348 free_extent_cache_tree(&reada
);
8349 free_extent_cache_tree(&nodes
);
8350 free_chunk_cache_tree(&chunk_cache
);
8351 free_block_group_tree(&block_group_cache
);
8352 free_device_cache_tree(&dev_cache
);
8353 free_device_extent_tree(&dev_extent_cache
);
8354 free_extent_record_cache(&extent_cache
);
8355 free_root_item_list(&normal_trees
);
8356 free_root_item_list(&dropping_trees
);
8357 extent_io_tree_cleanup(&excluded_extents
);
8361 static int do_check_chunks_and_extents(struct btrfs_fs_info
*fs_info
)
8365 if (!ctx
.progress_enabled
)
8366 fprintf(stderr
, "checking extents\n");
8367 if (check_mode
== CHECK_MODE_LOWMEM
)
8368 ret
= check_chunks_and_extents_lowmem(fs_info
);
8370 ret
= check_chunks_and_extents(fs_info
);
8372 /* Also repair device size related problems */
8373 if (repair
&& !ret
) {
8374 ret
= btrfs_fix_device_and_super_size(fs_info
);
8381 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle
*trans
,
8382 struct btrfs_root
*root
, int overwrite
)
8384 struct extent_buffer
*c
;
8385 struct extent_buffer
*old
= root
->node
;
8388 struct btrfs_disk_key disk_key
= {0,0,0};
8394 extent_buffer_get(c
);
8397 c
= btrfs_alloc_free_block(trans
, root
,
8398 root
->fs_info
->nodesize
,
8399 root
->root_key
.objectid
,
8400 &disk_key
, level
, 0, 0);
8403 extent_buffer_get(c
);
8407 memset_extent_buffer(c
, 0, 0, sizeof(struct btrfs_header
));
8408 btrfs_set_header_level(c
, level
);
8409 btrfs_set_header_bytenr(c
, c
->start
);
8410 btrfs_set_header_generation(c
, trans
->transid
);
8411 btrfs_set_header_backref_rev(c
, BTRFS_MIXED_BACKREF_REV
);
8412 btrfs_set_header_owner(c
, root
->root_key
.objectid
);
8414 write_extent_buffer(c
, root
->fs_info
->fsid
,
8415 btrfs_header_fsid(), BTRFS_FSID_SIZE
);
8417 write_extent_buffer(c
, root
->fs_info
->chunk_tree_uuid
,
8418 btrfs_header_chunk_tree_uuid(c
),
8421 btrfs_mark_buffer_dirty(c
);
8423 * this case can happen in the following case:
8425 * 1.overwrite previous root.
8427 * 2.reinit reloc data root, this is because we skip pin
8428 * down reloc data tree before which means we can allocate
8429 * same block bytenr here.
8431 if (old
->start
== c
->start
) {
8432 btrfs_set_root_generation(&root
->root_item
,
8434 root
->root_item
.level
= btrfs_header_level(root
->node
);
8435 ret
= btrfs_update_root(trans
, root
->fs_info
->tree_root
,
8436 &root
->root_key
, &root
->root_item
);
8438 free_extent_buffer(c
);
8442 free_extent_buffer(old
);
8444 add_root_to_dirty_list(root
);
8448 static int reset_block_groups(struct btrfs_fs_info
*fs_info
)
8450 struct btrfs_block_group_cache
*cache
;
8451 struct btrfs_path path
;
8452 struct extent_buffer
*leaf
;
8453 struct btrfs_chunk
*chunk
;
8454 struct btrfs_key key
;
8458 btrfs_init_path(&path
);
8460 key
.type
= BTRFS_CHUNK_ITEM_KEY
;
8462 ret
= btrfs_search_slot(NULL
, fs_info
->chunk_root
, &key
, &path
, 0, 0);
8464 btrfs_release_path(&path
);
8469 * We do this in case the block groups were screwed up and had alloc
8470 * bits that aren't actually set on the chunks. This happens with
8471 * restored images every time and could happen in real life I guess.
8473 fs_info
->avail_data_alloc_bits
= 0;
8474 fs_info
->avail_metadata_alloc_bits
= 0;
8475 fs_info
->avail_system_alloc_bits
= 0;
8477 /* First we need to create the in-memory block groups */
8479 if (path
.slots
[0] >= btrfs_header_nritems(path
.nodes
[0])) {
8480 ret
= btrfs_next_leaf(fs_info
->chunk_root
, &path
);
8482 btrfs_release_path(&path
);
8490 leaf
= path
.nodes
[0];
8491 btrfs_item_key_to_cpu(leaf
, &key
, path
.slots
[0]);
8492 if (key
.type
!= BTRFS_CHUNK_ITEM_KEY
) {
8497 chunk
= btrfs_item_ptr(leaf
, path
.slots
[0], struct btrfs_chunk
);
8498 btrfs_add_block_group(fs_info
, 0,
8499 btrfs_chunk_type(leaf
, chunk
), key
.offset
,
8500 btrfs_chunk_length(leaf
, chunk
));
8501 set_extent_dirty(&fs_info
->free_space_cache
, key
.offset
,
8502 key
.offset
+ btrfs_chunk_length(leaf
, chunk
));
8507 cache
= btrfs_lookup_first_block_group(fs_info
, start
);
8511 start
= cache
->key
.objectid
+ cache
->key
.offset
;
8514 btrfs_release_path(&path
);
8518 static int reset_balance(struct btrfs_trans_handle
*trans
,
8519 struct btrfs_fs_info
*fs_info
)
8521 struct btrfs_root
*root
= fs_info
->tree_root
;
8522 struct btrfs_path path
;
8523 struct extent_buffer
*leaf
;
8524 struct btrfs_key key
;
8525 int del_slot
, del_nr
= 0;
8529 btrfs_init_path(&path
);
8530 key
.objectid
= BTRFS_BALANCE_OBJECTID
;
8531 key
.type
= BTRFS_BALANCE_ITEM_KEY
;
8533 ret
= btrfs_search_slot(trans
, root
, &key
, &path
, -1, 1);
8538 goto reinit_data_reloc
;
8543 ret
= btrfs_del_item(trans
, root
, &path
);
8546 btrfs_release_path(&path
);
8548 key
.objectid
= BTRFS_TREE_RELOC_OBJECTID
;
8549 key
.type
= BTRFS_ROOT_ITEM_KEY
;
8551 ret
= btrfs_search_slot(trans
, root
, &key
, &path
, -1, 1);
8555 if (path
.slots
[0] >= btrfs_header_nritems(path
.nodes
[0])) {
8560 ret
= btrfs_del_items(trans
, root
, &path
,
8567 btrfs_release_path(&path
);
8570 ret
= btrfs_search_slot(trans
, root
, &key
, &path
,
8577 leaf
= path
.nodes
[0];
8578 btrfs_item_key_to_cpu(leaf
, &key
, path
.slots
[0]);
8579 if (key
.objectid
> BTRFS_TREE_RELOC_OBJECTID
)
8581 if (key
.objectid
!= BTRFS_TREE_RELOC_OBJECTID
) {
8586 del_slot
= path
.slots
[0];
8595 ret
= btrfs_del_items(trans
, root
, &path
, del_slot
, del_nr
);
8599 btrfs_release_path(&path
);
8602 key
.objectid
= BTRFS_DATA_RELOC_TREE_OBJECTID
;
8603 key
.type
= BTRFS_ROOT_ITEM_KEY
;
8604 key
.offset
= (u64
)-1;
8605 root
= btrfs_read_fs_root(fs_info
, &key
);
8607 fprintf(stderr
, "Error reading data reloc tree\n");
8608 ret
= PTR_ERR(root
);
8611 record_root_in_trans(trans
, root
);
8612 ret
= btrfs_fsck_reinit_root(trans
, root
, 0);
8615 ret
= btrfs_make_root_dir(trans
, root
, BTRFS_FIRST_FREE_OBJECTID
);
8617 btrfs_release_path(&path
);
8621 static int reinit_extent_tree(struct btrfs_trans_handle
*trans
,
8622 struct btrfs_fs_info
*fs_info
, bool pin
)
8628 * The only reason we don't do this is because right now we're just
8629 * walking the trees we find and pinning down their bytes, we don't look
8630 * at any of the leaves. In order to do mixed groups we'd have to check
8631 * the leaves of any fs roots and pin down the bytes for any file
8632 * extents we find. Not hard but why do it if we don't have to?
8634 if (btrfs_fs_incompat(fs_info
, MIXED_GROUPS
)) {
8635 fprintf(stderr
, "We don't support re-initing the extent tree "
8636 "for mixed block groups yet, please notify a btrfs "
8637 "developer you want to do this so they can add this "
8638 "functionality.\n");
8643 * first we need to walk all of the trees except the extent tree and pin
8644 * down/exclude the bytes that are in use so we don't overwrite any
8645 * existing metadata.
8646 * If pinnned, unpin will be done in the end of transaction.
8647 * If excluded, cleanup will be done in check_chunks_and_extents_lowmem.
8651 ret
= pin_metadata_blocks(fs_info
);
8653 fprintf(stderr
, "error pinning down used bytes\n");
8657 ret
= exclude_metadata_blocks(fs_info
);
8659 fprintf(stderr
, "error excluding used bytes\n");
8660 printf("try to pin down used bytes\n");
8667 * Need to drop all the block groups since we're going to recreate all
8670 btrfs_free_block_groups(fs_info
);
8671 ret
= reset_block_groups(fs_info
);
8673 fprintf(stderr
, "error resetting the block groups\n");
8677 /* Ok we can allocate now, reinit the extent root */
8678 ret
= btrfs_fsck_reinit_root(trans
, fs_info
->extent_root
, 0);
8680 fprintf(stderr
, "extent root initialization failed\n");
8682 * When the transaction code is updated we should end the
8683 * transaction, but for now progs only knows about commit so
8684 * just return an error.
8690 * Now we have all the in-memory block groups setup so we can make
8691 * allocations properly, and the metadata we care about is safe since we
8692 * pinned all of it above.
8695 struct btrfs_block_group_cache
*cache
;
8697 cache
= btrfs_lookup_first_block_group(fs_info
, start
);
8700 start
= cache
->key
.objectid
+ cache
->key
.offset
;
8701 ret
= btrfs_insert_item(trans
, fs_info
->extent_root
,
8702 &cache
->key
, &cache
->item
,
8703 sizeof(cache
->item
));
8705 fprintf(stderr
, "Error adding block group\n");
8708 btrfs_extent_post_op(trans
);
8711 ret
= reset_balance(trans
, fs_info
);
8713 fprintf(stderr
, "error resetting the pending balance\n");
8718 static int recow_extent_buffer(struct btrfs_root
*root
, struct extent_buffer
*eb
)
8720 struct btrfs_path path
;
8721 struct btrfs_trans_handle
*trans
;
8722 struct btrfs_key key
;
8725 printf("Recowing metadata block %llu\n", eb
->start
);
8726 key
.objectid
= btrfs_header_owner(eb
);
8727 key
.type
= BTRFS_ROOT_ITEM_KEY
;
8728 key
.offset
= (u64
)-1;
8730 root
= btrfs_read_fs_root(root
->fs_info
, &key
);
8732 fprintf(stderr
, "Couldn't find owner root %llu\n",
8734 return PTR_ERR(root
);
8737 trans
= btrfs_start_transaction(root
, 1);
8739 return PTR_ERR(trans
);
8741 btrfs_init_path(&path
);
8742 path
.lowest_level
= btrfs_header_level(eb
);
8743 if (path
.lowest_level
)
8744 btrfs_node_key_to_cpu(eb
, &key
, 0);
8746 btrfs_item_key_to_cpu(eb
, &key
, 0);
8748 ret
= btrfs_search_slot(trans
, root
, &key
, &path
, 0, 1);
8749 btrfs_commit_transaction(trans
, root
);
8750 btrfs_release_path(&path
);
8754 static int delete_bad_item(struct btrfs_root
*root
, struct bad_item
*bad
)
8756 struct btrfs_path path
;
8757 struct btrfs_trans_handle
*trans
;
8758 struct btrfs_key key
;
8761 printf("Deleting bad item [%llu,%u,%llu]\n", bad
->key
.objectid
,
8762 bad
->key
.type
, bad
->key
.offset
);
8763 key
.objectid
= bad
->root_id
;
8764 key
.type
= BTRFS_ROOT_ITEM_KEY
;
8765 key
.offset
= (u64
)-1;
8767 root
= btrfs_read_fs_root(root
->fs_info
, &key
);
8769 fprintf(stderr
, "Couldn't find owner root %llu\n",
8771 return PTR_ERR(root
);
8774 trans
= btrfs_start_transaction(root
, 1);
8776 return PTR_ERR(trans
);
8778 btrfs_init_path(&path
);
8779 ret
= btrfs_search_slot(trans
, root
, &bad
->key
, &path
, -1, 1);
8785 ret
= btrfs_del_item(trans
, root
, &path
);
8787 btrfs_commit_transaction(trans
, root
);
8788 btrfs_release_path(&path
);
8792 static int zero_log_tree(struct btrfs_root
*root
)
8794 struct btrfs_trans_handle
*trans
;
8797 trans
= btrfs_start_transaction(root
, 1);
8798 if (IS_ERR(trans
)) {
8799 ret
= PTR_ERR(trans
);
8802 btrfs_set_super_log_root(root
->fs_info
->super_copy
, 0);
8803 btrfs_set_super_log_root_level(root
->fs_info
->super_copy
, 0);
8804 ret
= btrfs_commit_transaction(trans
, root
);
8808 static int populate_csum(struct btrfs_trans_handle
*trans
,
8809 struct btrfs_root
*csum_root
, char *buf
, u64 start
,
8812 struct btrfs_fs_info
*fs_info
= csum_root
->fs_info
;
8817 while (offset
< len
) {
8818 sectorsize
= fs_info
->sectorsize
;
8819 ret
= read_extent_data(fs_info
, buf
, start
+ offset
,
8823 ret
= btrfs_csum_file_block(trans
, csum_root
, start
+ len
,
8824 start
+ offset
, buf
, sectorsize
);
8827 offset
+= sectorsize
;
8832 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle
*trans
,
8833 struct btrfs_root
*csum_root
,
8834 struct btrfs_root
*cur_root
)
8836 struct btrfs_path path
;
8837 struct btrfs_key key
;
8838 struct extent_buffer
*node
;
8839 struct btrfs_file_extent_item
*fi
;
8846 buf
= malloc(cur_root
->fs_info
->sectorsize
);
8850 btrfs_init_path(&path
);
8854 ret
= btrfs_search_slot(NULL
, cur_root
, &key
, &path
, 0, 0);
8857 /* Iterate all regular file extents and fill its csum */
8859 btrfs_item_key_to_cpu(path
.nodes
[0], &key
, path
.slots
[0]);
8861 if (key
.type
!= BTRFS_EXTENT_DATA_KEY
)
8863 node
= path
.nodes
[0];
8864 slot
= path
.slots
[0];
8865 fi
= btrfs_item_ptr(node
, slot
, struct btrfs_file_extent_item
);
8866 if (btrfs_file_extent_type(node
, fi
) != BTRFS_FILE_EXTENT_REG
)
8868 start
= btrfs_file_extent_disk_bytenr(node
, fi
);
8869 len
= btrfs_file_extent_disk_num_bytes(node
, fi
);
8871 ret
= populate_csum(trans
, csum_root
, buf
, start
, len
);
8878 * TODO: if next leaf is corrupted, jump to nearest next valid
8881 ret
= btrfs_next_item(cur_root
, &path
);
8891 btrfs_release_path(&path
);
8896 static int fill_csum_tree_from_fs(struct btrfs_trans_handle
*trans
,
8897 struct btrfs_root
*csum_root
)
8899 struct btrfs_fs_info
*fs_info
= csum_root
->fs_info
;
8900 struct btrfs_path path
;
8901 struct btrfs_root
*tree_root
= fs_info
->tree_root
;
8902 struct btrfs_root
*cur_root
;
8903 struct extent_buffer
*node
;
8904 struct btrfs_key key
;
8908 btrfs_init_path(&path
);
8909 key
.objectid
= BTRFS_FS_TREE_OBJECTID
;
8911 key
.type
= BTRFS_ROOT_ITEM_KEY
;
8912 ret
= btrfs_search_slot(NULL
, tree_root
, &key
, &path
, 0, 0);
8921 node
= path
.nodes
[0];
8922 slot
= path
.slots
[0];
8923 btrfs_item_key_to_cpu(node
, &key
, slot
);
8924 if (key
.objectid
> BTRFS_LAST_FREE_OBJECTID
)
8926 if (key
.type
!= BTRFS_ROOT_ITEM_KEY
)
8928 if (!is_fstree(key
.objectid
))
8930 key
.offset
= (u64
)-1;
8932 cur_root
= btrfs_read_fs_root(fs_info
, &key
);
8933 if (IS_ERR(cur_root
) || !cur_root
) {
8934 fprintf(stderr
, "Fail to read fs/subvol tree: %lld\n",
8938 ret
= fill_csum_tree_from_one_fs_root(trans
, csum_root
,
8943 ret
= btrfs_next_item(tree_root
, &path
);
8953 btrfs_release_path(&path
);
8957 static int fill_csum_tree_from_extent(struct btrfs_trans_handle
*trans
,
8958 struct btrfs_root
*csum_root
)
8960 struct btrfs_root
*extent_root
= csum_root
->fs_info
->extent_root
;
8961 struct btrfs_path path
;
8962 struct btrfs_extent_item
*ei
;
8963 struct extent_buffer
*leaf
;
8965 struct btrfs_key key
;
8968 btrfs_init_path(&path
);
8970 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
8972 ret
= btrfs_search_slot(NULL
, extent_root
, &key
, &path
, 0, 0);
8974 btrfs_release_path(&path
);
8978 buf
= malloc(csum_root
->fs_info
->sectorsize
);
8980 btrfs_release_path(&path
);
8985 if (path
.slots
[0] >= btrfs_header_nritems(path
.nodes
[0])) {
8986 ret
= btrfs_next_leaf(extent_root
, &path
);
8994 leaf
= path
.nodes
[0];
8996 btrfs_item_key_to_cpu(leaf
, &key
, path
.slots
[0]);
8997 if (key
.type
!= BTRFS_EXTENT_ITEM_KEY
) {
9002 ei
= btrfs_item_ptr(leaf
, path
.slots
[0],
9003 struct btrfs_extent_item
);
9004 if (!(btrfs_extent_flags(leaf
, ei
) &
9005 BTRFS_EXTENT_FLAG_DATA
)) {
9010 ret
= populate_csum(trans
, csum_root
, buf
, key
.objectid
,
9017 btrfs_release_path(&path
);
9023 * Recalculate the csum and put it into the csum tree.
9025 * Extent tree init will wipe out all the extent info, so in that case, we
9026 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
9027 * will use fs/subvol trees to init the csum tree.
9029 static int fill_csum_tree(struct btrfs_trans_handle
*trans
,
9030 struct btrfs_root
*csum_root
,
9034 return fill_csum_tree_from_fs(trans
, csum_root
);
9036 return fill_csum_tree_from_extent(trans
, csum_root
);
9039 static void free_roots_info_cache(void)
9041 if (!roots_info_cache
)
9044 while (!cache_tree_empty(roots_info_cache
)) {
9045 struct cache_extent
*entry
;
9046 struct root_item_info
*rii
;
9048 entry
= first_cache_extent(roots_info_cache
);
9051 remove_cache_extent(roots_info_cache
, entry
);
9052 rii
= container_of(entry
, struct root_item_info
, cache_extent
);
9056 free(roots_info_cache
);
9057 roots_info_cache
= NULL
;
9060 static int build_roots_info_cache(struct btrfs_fs_info
*info
)
9063 struct btrfs_key key
;
9064 struct extent_buffer
*leaf
;
9065 struct btrfs_path path
;
9067 if (!roots_info_cache
) {
9068 roots_info_cache
= malloc(sizeof(*roots_info_cache
));
9069 if (!roots_info_cache
)
9071 cache_tree_init(roots_info_cache
);
9074 btrfs_init_path(&path
);
9076 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
9078 ret
= btrfs_search_slot(NULL
, info
->extent_root
, &key
, &path
, 0, 0);
9081 leaf
= path
.nodes
[0];
9084 struct btrfs_key found_key
;
9085 struct btrfs_extent_item
*ei
;
9086 struct btrfs_extent_inline_ref
*iref
;
9087 unsigned long item_end
;
9088 int slot
= path
.slots
[0];
9093 struct cache_extent
*entry
;
9094 struct root_item_info
*rii
;
9096 if (slot
>= btrfs_header_nritems(leaf
)) {
9097 ret
= btrfs_next_leaf(info
->extent_root
, &path
);
9104 leaf
= path
.nodes
[0];
9105 slot
= path
.slots
[0];
9108 btrfs_item_key_to_cpu(leaf
, &found_key
, path
.slots
[0]);
9110 if (found_key
.type
!= BTRFS_EXTENT_ITEM_KEY
&&
9111 found_key
.type
!= BTRFS_METADATA_ITEM_KEY
)
9114 ei
= btrfs_item_ptr(leaf
, slot
, struct btrfs_extent_item
);
9115 flags
= btrfs_extent_flags(leaf
, ei
);
9116 item_end
= (unsigned long)ei
+ btrfs_item_size_nr(leaf
, slot
);
9118 if (found_key
.type
== BTRFS_EXTENT_ITEM_KEY
&&
9119 !(flags
& BTRFS_EXTENT_FLAG_TREE_BLOCK
))
9122 if (found_key
.type
== BTRFS_METADATA_ITEM_KEY
) {
9123 iref
= (struct btrfs_extent_inline_ref
*)(ei
+ 1);
9124 level
= found_key
.offset
;
9126 struct btrfs_tree_block_info
*binfo
;
9128 binfo
= (struct btrfs_tree_block_info
*)(ei
+ 1);
9129 iref
= (struct btrfs_extent_inline_ref
*)(binfo
+ 1);
9130 level
= btrfs_tree_block_level(leaf
, binfo
);
9134 * It's a valid extent/metadata item that has no inline ref,
9135 * but SHARED_BLOCK_REF or other shared references.
9136 * So we need to do extra check to avoid reading beyond leaf
9139 if ((unsigned long)iref
>= item_end
)
9143 * For a root extent, it must be of the following type and the
9144 * first (and only one) iref in the item.
9146 type
= btrfs_extent_inline_ref_type(leaf
, iref
);
9147 if (type
!= BTRFS_TREE_BLOCK_REF_KEY
)
9150 root_id
= btrfs_extent_inline_ref_offset(leaf
, iref
);
9151 entry
= lookup_cache_extent(roots_info_cache
, root_id
, 1);
9153 rii
= malloc(sizeof(struct root_item_info
));
9158 rii
->cache_extent
.start
= root_id
;
9159 rii
->cache_extent
.size
= 1;
9160 rii
->level
= (u8
)-1;
9161 entry
= &rii
->cache_extent
;
9162 ret
= insert_cache_extent(roots_info_cache
, entry
);
9165 rii
= container_of(entry
, struct root_item_info
,
9169 ASSERT(rii
->cache_extent
.start
== root_id
);
9170 ASSERT(rii
->cache_extent
.size
== 1);
9172 if (level
> rii
->level
|| rii
->level
== (u8
)-1) {
9174 rii
->bytenr
= found_key
.objectid
;
9175 rii
->gen
= btrfs_extent_generation(leaf
, ei
);
9176 rii
->node_count
= 1;
9177 } else if (level
== rii
->level
) {
9185 btrfs_release_path(&path
);
9190 static int maybe_repair_root_item(struct btrfs_path
*path
,
9191 const struct btrfs_key
*root_key
,
9192 const int read_only_mode
)
9194 const u64 root_id
= root_key
->objectid
;
9195 struct cache_extent
*entry
;
9196 struct root_item_info
*rii
;
9197 struct btrfs_root_item ri
;
9198 unsigned long offset
;
9200 entry
= lookup_cache_extent(roots_info_cache
, root_id
, 1);
9203 "Error: could not find extent items for root %llu\n",
9204 root_key
->objectid
);
9208 rii
= container_of(entry
, struct root_item_info
, cache_extent
);
9209 ASSERT(rii
->cache_extent
.start
== root_id
);
9210 ASSERT(rii
->cache_extent
.size
== 1);
9212 if (rii
->node_count
!= 1) {
9214 "Error: could not find btree root extent for root %llu\n",
9219 offset
= btrfs_item_ptr_offset(path
->nodes
[0], path
->slots
[0]);
9220 read_extent_buffer(path
->nodes
[0], &ri
, offset
, sizeof(ri
));
9222 if (btrfs_root_bytenr(&ri
) != rii
->bytenr
||
9223 btrfs_root_level(&ri
) != rii
->level
||
9224 btrfs_root_generation(&ri
) != rii
->gen
) {
9227 * If we're in repair mode but our caller told us to not update
9228 * the root item, i.e. just check if it needs to be updated, don't
9229 * print this message, since the caller will call us again shortly
9230 * for the same root item without read only mode (the caller will
9231 * open a transaction first).
9233 if (!(read_only_mode
&& repair
))
9235 "%sroot item for root %llu,"
9236 " current bytenr %llu, current gen %llu, current level %u,"
9237 " new bytenr %llu, new gen %llu, new level %u\n",
9238 (read_only_mode
? "" : "fixing "),
9240 btrfs_root_bytenr(&ri
), btrfs_root_generation(&ri
),
9241 btrfs_root_level(&ri
),
9242 rii
->bytenr
, rii
->gen
, rii
->level
);
9244 if (btrfs_root_generation(&ri
) > rii
->gen
) {
9246 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9247 root_id
, btrfs_root_generation(&ri
), rii
->gen
);
9251 if (!read_only_mode
) {
9252 btrfs_set_root_bytenr(&ri
, rii
->bytenr
);
9253 btrfs_set_root_level(&ri
, rii
->level
);
9254 btrfs_set_root_generation(&ri
, rii
->gen
);
9255 write_extent_buffer(path
->nodes
[0], &ri
,
9256 offset
, sizeof(ri
));
9266 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9267 * caused read-only snapshots to be corrupted if they were created at a moment
9268 * when the source subvolume/snapshot had orphan items. The issue was that the
9269 * on-disk root items became incorrect, referring to the pre orphan cleanup root
9270 * node instead of the post orphan cleanup root node.
9271 * So this function, and its callees, just detects and fixes those cases. Even
9272 * though the regression was for read-only snapshots, this function applies to
9273 * any snapshot/subvolume root.
9274 * This must be run before any other repair code - not doing it so, makes other
9275 * repair code delete or modify backrefs in the extent tree for example, which
9276 * will result in an inconsistent fs after repairing the root items.
9278 static int repair_root_items(struct btrfs_fs_info
*info
)
9280 struct btrfs_path path
;
9281 struct btrfs_key key
;
9282 struct extent_buffer
*leaf
;
9283 struct btrfs_trans_handle
*trans
= NULL
;
9288 btrfs_init_path(&path
);
9290 ret
= build_roots_info_cache(info
);
9294 key
.objectid
= BTRFS_FIRST_FREE_OBJECTID
;
9295 key
.type
= BTRFS_ROOT_ITEM_KEY
;
9300 * Avoid opening and committing transactions if a leaf doesn't have
9301 * any root items that need to be fixed, so that we avoid rotating
9302 * backup roots unnecessarily.
9305 trans
= btrfs_start_transaction(info
->tree_root
, 1);
9306 if (IS_ERR(trans
)) {
9307 ret
= PTR_ERR(trans
);
9312 ret
= btrfs_search_slot(trans
, info
->tree_root
, &key
, &path
,
9316 leaf
= path
.nodes
[0];
9319 struct btrfs_key found_key
;
9321 if (path
.slots
[0] >= btrfs_header_nritems(leaf
)) {
9322 int no_more_keys
= find_next_key(&path
, &key
);
9324 btrfs_release_path(&path
);
9326 ret
= btrfs_commit_transaction(trans
,
9338 btrfs_item_key_to_cpu(leaf
, &found_key
, path
.slots
[0]);
9340 if (found_key
.type
!= BTRFS_ROOT_ITEM_KEY
)
9342 if (found_key
.objectid
== BTRFS_TREE_RELOC_OBJECTID
)
9345 ret
= maybe_repair_root_item(&path
, &found_key
, trans
? 0 : 1);
9349 if (!trans
&& repair
) {
9352 btrfs_release_path(&path
);
9362 free_roots_info_cache();
9363 btrfs_release_path(&path
);
9365 btrfs_commit_transaction(trans
, info
->tree_root
);
9372 static int clear_free_space_cache(struct btrfs_fs_info
*fs_info
)
9374 struct btrfs_trans_handle
*trans
;
9375 struct btrfs_block_group_cache
*bg_cache
;
9379 /* Clear all free space cache inodes and its extent data */
9381 bg_cache
= btrfs_lookup_first_block_group(fs_info
, current
);
9384 ret
= btrfs_clear_free_space_cache(fs_info
, bg_cache
);
9387 current
= bg_cache
->key
.objectid
+ bg_cache
->key
.offset
;
9390 /* Don't forget to set cache_generation to -1 */
9391 trans
= btrfs_start_transaction(fs_info
->tree_root
, 0);
9392 if (IS_ERR(trans
)) {
9393 error("failed to update super block cache generation");
9394 return PTR_ERR(trans
);
9396 btrfs_set_super_cache_generation(fs_info
->super_copy
, (u64
)-1);
9397 btrfs_commit_transaction(trans
, fs_info
->tree_root
);
9402 static int do_clear_free_space_cache(struct btrfs_fs_info
*fs_info
,
9407 if (clear_version
== 1) {
9408 if (btrfs_fs_compat_ro(fs_info
, FREE_SPACE_TREE
)) {
9410 "free space cache v2 detected, use --clear-space-cache v2");
9414 printf("Clearing free space cache\n");
9415 ret
= clear_free_space_cache(fs_info
);
9417 error("failed to clear free space cache");
9420 printf("Free space cache cleared\n");
9422 } else if (clear_version
== 2) {
9423 if (!btrfs_fs_compat_ro(fs_info
, FREE_SPACE_TREE
)) {
9424 printf("no free space cache v2 to clear\n");
9428 printf("Clear free space cache v2\n");
9429 ret
= btrfs_clear_free_space_tree(fs_info
);
9431 error("failed to clear free space cache v2: %d", ret
);
9434 printf("free space cache v2 cleared\n");
9441 const char * const cmd_check_usage
[] = {
9442 "btrfs check [options] <device>",
9443 "Check structural integrity of a filesystem (unmounted).",
9444 "Check structural integrity of an unmounted filesystem. Verify internal",
9445 "trees' consistency and item connectivity. In the repair mode try to",
9446 "fix the problems found. ",
9447 "WARNING: the repair mode is considered dangerous",
9449 "-s|--super <superblock> use this superblock copy",
9450 "-b|--backup use the first valid backup root copy",
9451 "--force skip mount checks, repair is not possible",
9452 "--repair try to repair the filesystem",
9453 "--readonly run in read-only mode (default)",
9454 "--init-csum-tree create a new CRC tree",
9455 "--init-extent-tree create a new extent tree",
9456 "--mode <MODE> allows choice of memory/IO trade-offs",
9457 " where MODE is one of:",
9458 " original - read inodes and extents to memory (requires",
9459 " more memory, does less IO)",
9460 " lowmem - try to use less memory but read blocks again",
9461 " when needed (experimental)",
9462 "--check-data-csum verify checksums of data blocks",
9463 "-Q|--qgroup-report print a report on qgroup consistency",
9464 "-E|--subvol-extents <subvolid>",
9465 " print subvolume extents and sharing state",
9466 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
9467 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
9468 "-p|--progress indicate progress",
9469 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
9473 int cmd_check(int argc
, char **argv
)
9475 struct cache_tree root_cache
;
9476 struct btrfs_root
*root
;
9477 struct btrfs_fs_info
*info
;
9480 u64 tree_root_bytenr
= 0;
9481 u64 chunk_root_bytenr
= 0;
9482 char uuidbuf
[BTRFS_UUID_UNPARSED_SIZE
];
9486 int init_csum_tree
= 0;
9488 int clear_space_cache
= 0;
9489 int qgroup_report
= 0;
9490 int qgroups_repaired
= 0;
9491 int qgroup_report_ret
;
9492 unsigned ctree_flags
= OPEN_CTREE_EXCLUSIVE
;
9497 enum { GETOPT_VAL_REPAIR
= 257, GETOPT_VAL_INIT_CSUM
,
9498 GETOPT_VAL_INIT_EXTENT
, GETOPT_VAL_CHECK_CSUM
,
9499 GETOPT_VAL_READONLY
, GETOPT_VAL_CHUNK_TREE
,
9500 GETOPT_VAL_MODE
, GETOPT_VAL_CLEAR_SPACE_CACHE
,
9502 static const struct option long_options
[] = {
9503 { "super", required_argument
, NULL
, 's' },
9504 { "repair", no_argument
, NULL
, GETOPT_VAL_REPAIR
},
9505 { "readonly", no_argument
, NULL
, GETOPT_VAL_READONLY
},
9506 { "init-csum-tree", no_argument
, NULL
,
9507 GETOPT_VAL_INIT_CSUM
},
9508 { "init-extent-tree", no_argument
, NULL
,
9509 GETOPT_VAL_INIT_EXTENT
},
9510 { "check-data-csum", no_argument
, NULL
,
9511 GETOPT_VAL_CHECK_CSUM
},
9512 { "backup", no_argument
, NULL
, 'b' },
9513 { "subvol-extents", required_argument
, NULL
, 'E' },
9514 { "qgroup-report", no_argument
, NULL
, 'Q' },
9515 { "tree-root", required_argument
, NULL
, 'r' },
9516 { "chunk-root", required_argument
, NULL
,
9517 GETOPT_VAL_CHUNK_TREE
},
9518 { "progress", no_argument
, NULL
, 'p' },
9519 { "mode", required_argument
, NULL
,
9521 { "clear-space-cache", required_argument
, NULL
,
9522 GETOPT_VAL_CLEAR_SPACE_CACHE
},
9523 { "force", no_argument
, NULL
, GETOPT_VAL_FORCE
},
9527 c
= getopt_long(argc
, argv
, "as:br:pEQ", long_options
, NULL
);
9531 case 'a': /* ignored */ break;
9533 ctree_flags
|= OPEN_CTREE_BACKUP_ROOT
;
9536 num
= arg_strtou64(optarg
);
9537 if (num
>= BTRFS_SUPER_MIRROR_MAX
) {
9539 "super mirror should be less than %d",
9540 BTRFS_SUPER_MIRROR_MAX
);
9543 bytenr
= btrfs_sb_offset(((int)num
));
9544 printf("using SB copy %llu, bytenr %llu\n", num
,
9545 (unsigned long long)bytenr
);
9551 subvolid
= arg_strtou64(optarg
);
9554 tree_root_bytenr
= arg_strtou64(optarg
);
9556 case GETOPT_VAL_CHUNK_TREE
:
9557 chunk_root_bytenr
= arg_strtou64(optarg
);
9560 ctx
.progress_enabled
= true;
9564 usage(cmd_check_usage
);
9565 case GETOPT_VAL_REPAIR
:
9566 printf("enabling repair mode\n");
9568 ctree_flags
|= OPEN_CTREE_WRITES
;
9570 case GETOPT_VAL_READONLY
:
9573 case GETOPT_VAL_INIT_CSUM
:
9574 printf("Creating a new CRC tree\n");
9577 ctree_flags
|= OPEN_CTREE_WRITES
;
9579 case GETOPT_VAL_INIT_EXTENT
:
9580 init_extent_tree
= 1;
9581 ctree_flags
|= (OPEN_CTREE_WRITES
|
9582 OPEN_CTREE_NO_BLOCK_GROUPS
);
9585 case GETOPT_VAL_CHECK_CSUM
:
9586 check_data_csum
= 1;
9588 case GETOPT_VAL_MODE
:
9589 check_mode
= parse_check_mode(optarg
);
9590 if (check_mode
== CHECK_MODE_UNKNOWN
) {
9591 error("unknown mode: %s", optarg
);
9595 case GETOPT_VAL_CLEAR_SPACE_CACHE
:
9596 if (strcmp(optarg
, "v1") == 0) {
9597 clear_space_cache
= 1;
9598 } else if (strcmp(optarg
, "v2") == 0) {
9599 clear_space_cache
= 2;
9600 ctree_flags
|= OPEN_CTREE_INVALIDATE_FST
;
9603 "invalid argument to --clear-space-cache, must be v1 or v2");
9606 ctree_flags
|= OPEN_CTREE_WRITES
;
9608 case GETOPT_VAL_FORCE
:
9614 if (check_argc_exact(argc
- optind
, 1))
9615 usage(cmd_check_usage
);
9617 if (ctx
.progress_enabled
) {
9618 ctx
.tp
= TASK_NOTHING
;
9619 ctx
.info
= task_init(print_status_check
, print_status_return
, &ctx
);
9622 /* This check is the only reason for --readonly to exist */
9623 if (readonly
&& repair
) {
9624 error("repair options are not compatible with --readonly");
9629 * experimental and dangerous
9631 if (repair
&& check_mode
== CHECK_MODE_LOWMEM
)
9632 warning("low-memory mode repair support is only partial");
9635 cache_tree_init(&root_cache
);
9637 ret
= check_mounted(argv
[optind
]);
9640 error("could not check mount status: %s",
9646 "%s is currently mounted, use --force if you really intend to check the filesystem",
9654 error("repair and --force is not yet supported");
9661 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
9665 "filesystem mounted, continuing because of --force");
9667 /* A block device is mounted in exclusive mode by kernel */
9668 ctree_flags
&= ~OPEN_CTREE_EXCLUSIVE
;
9671 /* only allow partial opening under repair mode */
9673 ctree_flags
|= OPEN_CTREE_PARTIAL
;
9675 info
= open_ctree_fs_info(argv
[optind
], bytenr
, tree_root_bytenr
,
9676 chunk_root_bytenr
, ctree_flags
);
9678 error("cannot open file system");
9685 root
= info
->fs_root
;
9686 uuid_unparse(info
->super_copy
->fsid
, uuidbuf
);
9688 printf("Checking filesystem on %s\nUUID: %s\n", argv
[optind
], uuidbuf
);
9691 * Check the bare minimum before starting anything else that could rely
9692 * on it, namely the tree roots, any local consistency checks
9694 if (!extent_buffer_uptodate(info
->tree_root
->node
) ||
9695 !extent_buffer_uptodate(info
->dev_root
->node
) ||
9696 !extent_buffer_uptodate(info
->chunk_root
->node
)) {
9697 error("critical roots corrupted, unable to check the filesystem");
9703 if (clear_space_cache
) {
9704 ret
= do_clear_free_space_cache(info
, clear_space_cache
);
9710 * repair mode will force us to commit transaction which
9711 * will make us fail to load log tree when mounting.
9713 if (repair
&& btrfs_super_log_root(info
->super_copy
)) {
9714 ret
= ask_user("repair mode will force to clear out log tree, are you sure?");
9720 ret
= zero_log_tree(root
);
9723 error("failed to zero log tree: %d", ret
);
9728 if (qgroup_report
) {
9729 printf("Print quota groups for %s\nUUID: %s\n", argv
[optind
],
9731 ret
= qgroup_verify_all(info
);
9734 err
|= !!report_qgroups(1);
9738 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9739 subvolid
, argv
[optind
], uuidbuf
);
9740 ret
= print_extent_state(info
, subvolid
);
9745 if (init_extent_tree
|| init_csum_tree
) {
9746 struct btrfs_trans_handle
*trans
;
9748 trans
= btrfs_start_transaction(info
->extent_root
, 0);
9749 if (IS_ERR(trans
)) {
9750 error("error starting transaction");
9751 ret
= PTR_ERR(trans
);
9756 if (init_extent_tree
) {
9757 printf("Creating a new extent tree\n");
9758 ret
= reinit_extent_tree(trans
, info
,
9759 check_mode
== CHECK_MODE_ORIGINAL
);
9765 if (init_csum_tree
) {
9766 printf("Reinitialize checksum tree\n");
9767 ret
= btrfs_fsck_reinit_root(trans
, info
->csum_root
, 0);
9769 error("checksum tree initialization failed: %d",
9776 ret
= fill_csum_tree(trans
, info
->csum_root
,
9780 error("checksum tree refilling failed: %d", ret
);
9785 * Ok now we commit and run the normal fsck, which will add
9786 * extent entries for all of the items it finds.
9788 ret
= btrfs_commit_transaction(trans
, info
->extent_root
);
9793 if (!extent_buffer_uptodate(info
->extent_root
->node
)) {
9794 error("critical: extent_root, unable to check the filesystem");
9799 if (!extent_buffer_uptodate(info
->csum_root
->node
)) {
9800 error("critical: csum_root, unable to check the filesystem");
9806 if (!init_extent_tree
) {
9807 ret
= repair_root_items(info
);
9810 error("failed to repair root items: %s", strerror(-ret
));
9814 fprintf(stderr
, "Fixed %d roots.\n", ret
);
9816 } else if (ret
> 0) {
9818 "Found %d roots with an outdated root item.\n",
9821 "Please run a filesystem check with the option --repair to fix them.\n");
9828 ret
= do_check_chunks_and_extents(info
);
9832 "errors found in extent allocation tree or chunk allocation");
9834 /* Only re-check super size after we checked and repaired the fs */
9835 err
|= !is_super_size_valid(info
);
9837 if (!ctx
.progress_enabled
) {
9838 if (btrfs_fs_compat_ro(info
, FREE_SPACE_TREE
))
9839 fprintf(stderr
, "checking free space tree\n");
9841 fprintf(stderr
, "checking free space cache\n");
9843 ret
= check_space_cache(root
);
9846 if (btrfs_fs_compat_ro(info
, FREE_SPACE_TREE
))
9847 error("errors found in free space tree");
9849 error("errors found in free space cache");
9854 * We used to have to have these hole extents in between our real
9855 * extents so if we don't have this flag set we need to make sure there
9856 * are no gaps in the file extents for inodes, otherwise we can just
9857 * ignore it when this happens.
9859 no_holes
= btrfs_fs_incompat(root
->fs_info
, NO_HOLES
);
9860 ret
= do_check_fs_roots(info
, &root_cache
);
9863 error("errors found in fs roots");
9867 if (check_data_csum
)
9868 fprintf(stderr
, "checking csums against data\n");
9871 "checking only csum items (without verifying data)\n");
9872 ret
= check_csums(root
);
9874 * Data csum error is not fatal, and it may indicate more serious
9875 * corruption, continue checking.
9878 error("errors found in csum tree");
9881 fprintf(stderr
, "checking root refs\n");
9882 /* For low memory mode, check_fs_roots_v2 handles root refs */
9883 if (check_mode
!= CHECK_MODE_LOWMEM
) {
9884 ret
= check_root_refs(root
, &root_cache
);
9887 error("errors found in root refs");
9892 while (repair
&& !list_empty(&root
->fs_info
->recow_ebs
)) {
9893 struct extent_buffer
*eb
;
9895 eb
= list_first_entry(&root
->fs_info
->recow_ebs
,
9896 struct extent_buffer
, recow
);
9897 list_del_init(&eb
->recow
);
9898 ret
= recow_extent_buffer(root
, eb
);
9901 error("fails to fix transid errors");
9906 while (!list_empty(&delete_items
)) {
9907 struct bad_item
*bad
;
9909 bad
= list_first_entry(&delete_items
, struct bad_item
, list
);
9910 list_del_init(&bad
->list
);
9912 ret
= delete_bad_item(root
, bad
);
9918 if (info
->quota_enabled
) {
9919 fprintf(stderr
, "checking quota groups\n");
9920 ret
= qgroup_verify_all(info
);
9923 error("failed to check quota groups");
9926 qgroup_report_ret
= report_qgroups(0);
9927 ret
= repair_qgroups(info
, &qgroups_repaired
);
9929 error("failed to repair quota groups");
9932 if (qgroup_report_ret
&& (!qgroups_repaired
|| ret
))
9933 err
|= qgroup_report_ret
;
9937 if (!list_empty(&root
->fs_info
->recow_ebs
)) {
9938 error("transid errors in file system");
9943 printf("found %llu bytes used, ",
9944 (unsigned long long)bytes_used
);
9946 printf("error(s) found\n");
9948 printf("no error found\n");
9949 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes
);
9950 printf("total tree bytes: %llu\n",
9951 (unsigned long long)total_btree_bytes
);
9952 printf("total fs tree bytes: %llu\n",
9953 (unsigned long long)total_fs_tree_bytes
);
9954 printf("total extent tree bytes: %llu\n",
9955 (unsigned long long)total_extent_tree_bytes
);
9956 printf("btree space waste bytes: %llu\n",
9957 (unsigned long long)btree_space_waste
);
9958 printf("file data blocks allocated: %llu\n referenced %llu\n",
9959 (unsigned long long)data_bytes_allocated
,
9960 (unsigned long long)data_bytes_referenced
);
9962 free_qgroup_counts();
9963 free_root_recs_tree(&root_cache
);
9967 if (ctx
.progress_enabled
)
9968 task_deinit(ctx
.info
);