2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
33 #include "print-tree.h"
34 #include "task-utils.h"
35 #include "transaction.h"
38 #include "free-space-cache.h"
39 #include "free-space-tree.h"
41 #include "qgroup-verify.h"
42 #include "rbtree-utils.h"
44 #include "kernel-shared/ulist.h"
47 #include "check/mode-common.h"
48 #include "check/mode-original.h"
49 #include "check/mode-lowmem.h"
52 u64 total_csum_bytes
= 0;
53 u64 total_btree_bytes
= 0;
54 u64 total_fs_tree_bytes
= 0;
55 u64 total_extent_tree_bytes
= 0;
56 u64 btree_space_waste
= 0;
57 u64 data_bytes_allocated
= 0;
58 u64 data_bytes_referenced
= 0;
59 LIST_HEAD(duplicate_extents
);
60 LIST_HEAD(delete_items
);
62 static int is_free_space_tree
= 0;
63 int init_extent_tree
= 0;
64 int check_data_csum
= 0;
65 struct btrfs_fs_info
*global_info
;
66 struct task_ctx ctx
= { 0 };
67 struct cache_tree
*roots_info_cache
= NULL
;
69 enum btrfs_check_mode
{
73 CHECK_MODE_DEFAULT
= CHECK_MODE_ORIGINAL
76 static enum btrfs_check_mode check_mode
= CHECK_MODE_DEFAULT
;
78 static int compare_data_backref(struct rb_node
*node1
, struct rb_node
*node2
)
80 struct extent_backref
*ext1
= rb_node_to_extent_backref(node1
);
81 struct extent_backref
*ext2
= rb_node_to_extent_backref(node2
);
82 struct data_backref
*back1
= to_data_backref(ext1
);
83 struct data_backref
*back2
= to_data_backref(ext2
);
85 WARN_ON(!ext1
->is_data
);
86 WARN_ON(!ext2
->is_data
);
88 /* parent and root are a union, so this covers both */
89 if (back1
->parent
> back2
->parent
)
91 if (back1
->parent
< back2
->parent
)
94 /* This is a full backref and the parents match. */
95 if (back1
->node
.full_backref
)
98 if (back1
->owner
> back2
->owner
)
100 if (back1
->owner
< back2
->owner
)
103 if (back1
->offset
> back2
->offset
)
105 if (back1
->offset
< back2
->offset
)
108 if (back1
->found_ref
&& back2
->found_ref
) {
109 if (back1
->disk_bytenr
> back2
->disk_bytenr
)
111 if (back1
->disk_bytenr
< back2
->disk_bytenr
)
114 if (back1
->bytes
> back2
->bytes
)
116 if (back1
->bytes
< back2
->bytes
)
123 static int compare_tree_backref(struct rb_node
*node1
, struct rb_node
*node2
)
125 struct extent_backref
*ext1
= rb_node_to_extent_backref(node1
);
126 struct extent_backref
*ext2
= rb_node_to_extent_backref(node2
);
127 struct tree_backref
*back1
= to_tree_backref(ext1
);
128 struct tree_backref
*back2
= to_tree_backref(ext2
);
130 WARN_ON(ext1
->is_data
);
131 WARN_ON(ext2
->is_data
);
133 /* parent and root are a union, so this covers both */
134 if (back1
->parent
> back2
->parent
)
136 if (back1
->parent
< back2
->parent
)
142 static int compare_extent_backref(struct rb_node
*node1
, struct rb_node
*node2
)
144 struct extent_backref
*ext1
= rb_node_to_extent_backref(node1
);
145 struct extent_backref
*ext2
= rb_node_to_extent_backref(node2
);
147 if (ext1
->is_data
> ext2
->is_data
)
150 if (ext1
->is_data
< ext2
->is_data
)
153 if (ext1
->full_backref
> ext2
->full_backref
)
155 if (ext1
->full_backref
< ext2
->full_backref
)
159 return compare_data_backref(node1
, node2
);
161 return compare_tree_backref(node1
, node2
);
164 static void print_status_check_line(void *p
)
166 struct task_ctx
*priv
= p
;
167 const char *task_position_string
[] = {
168 "[1/7] checking root items ",
169 "[2/7] checking extents ",
171 "[3/7] checking free space tree " :
172 "[3/7] checking free space cache ",
173 "[4/7] checking fs roots ",
175 "[5/7] checking csums against data " :
176 "[5/7] checking csums (without verifying data) ",
177 "[6/7] checking root refs ",
178 "[7/7] checking quota groups ",
185 elapsed
= time(NULL
) - priv
->start_time
;
186 hours
= elapsed
/ 3600;
187 elapsed
-= hours
* 3600;
188 minutes
= elapsed
/ 60;
189 elapsed
-= minutes
* 60;
192 printf("%s (%d:%02d:%02d elapsed", task_position_string
[priv
->tp
],
193 hours
, minutes
, seconds
);
194 if (priv
->item_count
> 0)
195 printf(", %llu items checked)\r", priv
->item_count
);
201 static void *print_status_check(void *p
)
203 struct task_ctx
*priv
= p
;
206 task_period_start(priv
->info
, 1000);
208 if (priv
->tp
== TASK_NOTHING
)
212 print_status_check_line(p
);
213 task_period_wait(priv
->info
);
218 static int print_status_return(void *p
)
220 print_status_check_line(p
);
227 static enum btrfs_check_mode
parse_check_mode(const char *str
)
229 if (strcmp(str
, "lowmem") == 0)
230 return CHECK_MODE_LOWMEM
;
231 if (strcmp(str
, "orig") == 0)
232 return CHECK_MODE_ORIGINAL
;
233 if (strcmp(str
, "original") == 0)
234 return CHECK_MODE_ORIGINAL
;
236 return CHECK_MODE_UNKNOWN
;
239 /* Compatible function to allow reuse of old codes */
240 static u64
first_extent_gap(struct rb_root
*holes
)
242 struct file_extent_hole
*hole
;
244 if (RB_EMPTY_ROOT(holes
))
247 hole
= rb_entry(rb_first(holes
), struct file_extent_hole
, node
);
251 static int compare_hole(struct rb_node
*node1
, struct rb_node
*node2
)
253 struct file_extent_hole
*hole1
;
254 struct file_extent_hole
*hole2
;
256 hole1
= rb_entry(node1
, struct file_extent_hole
, node
);
257 hole2
= rb_entry(node2
, struct file_extent_hole
, node
);
259 if (hole1
->start
> hole2
->start
)
261 if (hole1
->start
< hole2
->start
)
263 /* Now hole1->start == hole2->start */
264 if (hole1
->len
>= hole2
->len
)
266 * Hole 1 will be merge center
267 * Same hole will be merged later
270 /* Hole 2 will be merge center */
275 * Add a hole to the record
277 * This will do hole merge for copy_file_extent_holes(),
278 * which will ensure there won't be continuous holes.
280 static int add_file_extent_hole(struct rb_root
*holes
,
283 struct file_extent_hole
*hole
;
284 struct file_extent_hole
*prev
= NULL
;
285 struct file_extent_hole
*next
= NULL
;
287 hole
= malloc(sizeof(*hole
));
292 /* Since compare will not return 0, no -EEXIST will happen */
293 rb_insert(holes
, &hole
->node
, compare_hole
);
295 /* simple merge with previous hole */
296 if (rb_prev(&hole
->node
))
297 prev
= rb_entry(rb_prev(&hole
->node
), struct file_extent_hole
,
299 if (prev
&& prev
->start
+ prev
->len
>= hole
->start
) {
300 hole
->len
= hole
->start
+ hole
->len
- prev
->start
;
301 hole
->start
= prev
->start
;
302 rb_erase(&prev
->node
, holes
);
307 /* iterate merge with next holes */
309 if (!rb_next(&hole
->node
))
311 next
= rb_entry(rb_next(&hole
->node
), struct file_extent_hole
,
313 if (hole
->start
+ hole
->len
>= next
->start
) {
314 if (hole
->start
+ hole
->len
<= next
->start
+ next
->len
)
315 hole
->len
= next
->start
+ next
->len
-
317 rb_erase(&next
->node
, holes
);
326 static int compare_hole_range(struct rb_node
*node
, void *data
)
328 struct file_extent_hole
*hole
;
331 hole
= (struct file_extent_hole
*)data
;
334 hole
= rb_entry(node
, struct file_extent_hole
, node
);
335 if (start
< hole
->start
)
337 if (start
>= hole
->start
&& start
< hole
->start
+ hole
->len
)
343 * Delete a hole in the record
345 * This will do the hole split and is much restrict than add.
347 static int del_file_extent_hole(struct rb_root
*holes
,
350 struct file_extent_hole
*hole
;
351 struct file_extent_hole tmp
;
356 struct rb_node
*node
;
363 node
= rb_search(holes
, &tmp
, compare_hole_range
, NULL
);
366 hole
= rb_entry(node
, struct file_extent_hole
, node
);
367 if (start
+ len
> hole
->start
+ hole
->len
)
371 * Now there will be no overlap, delete the hole and re-add the
372 * split(s) if they exists.
374 if (start
> hole
->start
) {
375 prev_start
= hole
->start
;
376 prev_len
= start
- hole
->start
;
379 if (hole
->start
+ hole
->len
> start
+ len
) {
380 next_start
= start
+ len
;
381 next_len
= hole
->start
+ hole
->len
- start
- len
;
384 rb_erase(node
, holes
);
387 ret
= add_file_extent_hole(holes
, prev_start
, prev_len
);
392 ret
= add_file_extent_hole(holes
, next_start
, next_len
);
399 static int copy_file_extent_holes(struct rb_root
*dst
,
402 struct file_extent_hole
*hole
;
403 struct rb_node
*node
;
406 node
= rb_first(src
);
408 hole
= rb_entry(node
, struct file_extent_hole
, node
);
409 ret
= add_file_extent_hole(dst
, hole
->start
, hole
->len
);
412 node
= rb_next(node
);
417 static void free_file_extent_holes(struct rb_root
*holes
)
419 struct rb_node
*node
;
420 struct file_extent_hole
*hole
;
422 node
= rb_first(holes
);
424 hole
= rb_entry(node
, struct file_extent_hole
, node
);
425 rb_erase(node
, holes
);
427 node
= rb_first(holes
);
431 static void record_root_in_trans(struct btrfs_trans_handle
*trans
,
432 struct btrfs_root
*root
)
434 if (root
->last_trans
!= trans
->transid
) {
435 root
->track_dirty
= 1;
436 root
->last_trans
= trans
->transid
;
437 root
->commit_root
= root
->node
;
438 extent_buffer_get(root
->node
);
442 static int device_record_compare(struct rb_node
*node1
, struct rb_node
*node2
)
444 struct device_record
*rec1
;
445 struct device_record
*rec2
;
447 rec1
= rb_entry(node1
, struct device_record
, node
);
448 rec2
= rb_entry(node2
, struct device_record
, node
);
449 if (rec1
->devid
> rec2
->devid
)
451 else if (rec1
->devid
< rec2
->devid
)
457 static struct inode_record
*clone_inode_rec(struct inode_record
*orig_rec
)
459 struct inode_record
*rec
;
460 struct inode_backref
*backref
;
461 struct inode_backref
*orig
;
462 struct inode_backref
*tmp
;
463 struct orphan_data_extent
*src_orphan
;
464 struct orphan_data_extent
*dst_orphan
;
469 rec
= malloc(sizeof(*rec
));
471 return ERR_PTR(-ENOMEM
);
472 memcpy(rec
, orig_rec
, sizeof(*rec
));
474 INIT_LIST_HEAD(&rec
->backrefs
);
475 INIT_LIST_HEAD(&rec
->orphan_extents
);
476 rec
->holes
= RB_ROOT
;
478 list_for_each_entry(orig
, &orig_rec
->backrefs
, list
) {
479 size
= sizeof(*orig
) + orig
->namelen
+ 1;
480 backref
= malloc(size
);
485 memcpy(backref
, orig
, size
);
486 list_add_tail(&backref
->list
, &rec
->backrefs
);
488 list_for_each_entry(src_orphan
, &orig_rec
->orphan_extents
, list
) {
489 dst_orphan
= malloc(sizeof(*dst_orphan
));
494 memcpy(dst_orphan
, src_orphan
, sizeof(*src_orphan
));
495 list_add_tail(&dst_orphan
->list
, &rec
->orphan_extents
);
497 ret
= copy_file_extent_holes(&rec
->holes
, &orig_rec
->holes
);
504 rb
= rb_first(&rec
->holes
);
506 struct file_extent_hole
*hole
;
508 hole
= rb_entry(rb
, struct file_extent_hole
, node
);
514 if (!list_empty(&rec
->backrefs
))
515 list_for_each_entry_safe(orig
, tmp
, &rec
->backrefs
, list
) {
516 list_del(&orig
->list
);
520 if (!list_empty(&rec
->orphan_extents
))
521 list_for_each_entry_safe(orig
, tmp
, &rec
->orphan_extents
, list
) {
522 list_del(&orig
->list
);
531 static void print_orphan_data_extents(struct list_head
*orphan_extents
,
534 struct orphan_data_extent
*orphan
;
536 if (list_empty(orphan_extents
))
538 printf("The following data extent is lost in tree %llu:\n",
540 list_for_each_entry(orphan
, orphan_extents
, list
) {
541 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
542 orphan
->objectid
, orphan
->offset
, orphan
->disk_bytenr
,
547 static void print_inode_error(struct btrfs_root
*root
, struct inode_record
*rec
)
549 u64 root_objectid
= root
->root_key
.objectid
;
550 int errors
= rec
->errors
;
554 /* reloc root errors, we print its corresponding fs root objectid*/
555 if (root_objectid
== BTRFS_TREE_RELOC_OBJECTID
) {
556 root_objectid
= root
->root_key
.offset
;
557 fprintf(stderr
, "reloc");
559 fprintf(stderr
, "root %llu inode %llu errors %x",
560 (unsigned long long) root_objectid
,
561 (unsigned long long) rec
->ino
, rec
->errors
);
563 if (errors
& I_ERR_NO_INODE_ITEM
)
564 fprintf(stderr
, ", no inode item");
565 if (errors
& I_ERR_NO_ORPHAN_ITEM
)
566 fprintf(stderr
, ", no orphan item");
567 if (errors
& I_ERR_DUP_INODE_ITEM
)
568 fprintf(stderr
, ", dup inode item");
569 if (errors
& I_ERR_DUP_DIR_INDEX
)
570 fprintf(stderr
, ", dup dir index");
571 if (errors
& I_ERR_ODD_DIR_ITEM
)
572 fprintf(stderr
, ", odd dir item");
573 if (errors
& I_ERR_ODD_FILE_EXTENT
)
574 fprintf(stderr
, ", odd file extent");
575 if (errors
& I_ERR_BAD_FILE_EXTENT
)
576 fprintf(stderr
, ", bad file extent");
577 if (errors
& I_ERR_FILE_EXTENT_OVERLAP
)
578 fprintf(stderr
, ", file extent overlap");
579 if (errors
& I_ERR_FILE_EXTENT_TOO_LARGE
)
580 fprintf(stderr
, ", inline file extent too large");
581 if (errors
& I_ERR_FILE_EXTENT_DISCOUNT
)
582 fprintf(stderr
, ", file extent discount");
583 if (errors
& I_ERR_DIR_ISIZE_WRONG
)
584 fprintf(stderr
, ", dir isize wrong");
585 if (errors
& I_ERR_FILE_NBYTES_WRONG
)
586 fprintf(stderr
, ", nbytes wrong");
587 if (errors
& I_ERR_ODD_CSUM_ITEM
)
588 fprintf(stderr
, ", odd csum item");
589 if (errors
& I_ERR_SOME_CSUM_MISSING
)
590 fprintf(stderr
, ", some csum missing");
591 if (errors
& I_ERR_LINK_COUNT_WRONG
)
592 fprintf(stderr
, ", link count wrong");
593 if (errors
& I_ERR_FILE_EXTENT_ORPHAN
)
594 fprintf(stderr
, ", orphan file extent");
595 if (errors
& I_ERR_ODD_INODE_FLAGS
)
596 fprintf(stderr
, ", odd inode flags");
597 if (errors
& I_ERR_INLINE_RAM_BYTES_WRONG
)
598 fprintf(stderr
, ", invalid inline ram bytes");
599 fprintf(stderr
, "\n");
600 /* Print the orphan extents if needed */
601 if (errors
& I_ERR_FILE_EXTENT_ORPHAN
)
602 print_orphan_data_extents(&rec
->orphan_extents
, root
->objectid
);
604 /* Print the holes if needed */
605 if (errors
& I_ERR_FILE_EXTENT_DISCOUNT
) {
606 struct file_extent_hole
*hole
;
607 struct rb_node
*node
;
610 node
= rb_first(&rec
->holes
);
611 fprintf(stderr
, "Found file extent holes:\n");
614 hole
= rb_entry(node
, struct file_extent_hole
, node
);
615 fprintf(stderr
, "\tstart: %llu, len: %llu\n",
616 hole
->start
, hole
->len
);
617 node
= rb_next(node
);
620 fprintf(stderr
, "\tstart: 0, len: %llu\n",
622 root
->fs_info
->sectorsize
));
626 static void print_ref_error(int errors
)
628 if (errors
& REF_ERR_NO_DIR_ITEM
)
629 fprintf(stderr
, ", no dir item");
630 if (errors
& REF_ERR_NO_DIR_INDEX
)
631 fprintf(stderr
, ", no dir index");
632 if (errors
& REF_ERR_NO_INODE_REF
)
633 fprintf(stderr
, ", no inode ref");
634 if (errors
& REF_ERR_DUP_DIR_ITEM
)
635 fprintf(stderr
, ", dup dir item");
636 if (errors
& REF_ERR_DUP_DIR_INDEX
)
637 fprintf(stderr
, ", dup dir index");
638 if (errors
& REF_ERR_DUP_INODE_REF
)
639 fprintf(stderr
, ", dup inode ref");
640 if (errors
& REF_ERR_INDEX_UNMATCH
)
641 fprintf(stderr
, ", index mismatch");
642 if (errors
& REF_ERR_FILETYPE_UNMATCH
)
643 fprintf(stderr
, ", filetype mismatch");
644 if (errors
& REF_ERR_NAME_TOO_LONG
)
645 fprintf(stderr
, ", name too long");
646 if (errors
& REF_ERR_NO_ROOT_REF
)
647 fprintf(stderr
, ", no root ref");
648 if (errors
& REF_ERR_NO_ROOT_BACKREF
)
649 fprintf(stderr
, ", no root backref");
650 if (errors
& REF_ERR_DUP_ROOT_REF
)
651 fprintf(stderr
, ", dup root ref");
652 if (errors
& REF_ERR_DUP_ROOT_BACKREF
)
653 fprintf(stderr
, ", dup root backref");
654 fprintf(stderr
, "\n");
657 static struct inode_record
*get_inode_rec(struct cache_tree
*inode_cache
,
660 struct ptr_node
*node
;
661 struct cache_extent
*cache
;
662 struct inode_record
*rec
= NULL
;
665 cache
= lookup_cache_extent(inode_cache
, ino
, 1);
667 node
= container_of(cache
, struct ptr_node
, cache
);
669 if (mod
&& rec
->refs
> 1) {
670 node
->data
= clone_inode_rec(rec
);
671 if (IS_ERR(node
->data
))
677 rec
= calloc(1, sizeof(*rec
));
679 return ERR_PTR(-ENOMEM
);
681 rec
->extent_start
= (u64
)-1;
683 INIT_LIST_HEAD(&rec
->backrefs
);
684 INIT_LIST_HEAD(&rec
->orphan_extents
);
685 rec
->holes
= RB_ROOT
;
687 node
= malloc(sizeof(*node
));
690 return ERR_PTR(-ENOMEM
);
692 node
->cache
.start
= ino
;
693 node
->cache
.size
= 1;
696 if (ino
== BTRFS_FREE_INO_OBJECTID
)
699 ret
= insert_cache_extent(inode_cache
, &node
->cache
);
701 return ERR_PTR(-EEXIST
);
706 static void free_orphan_data_extents(struct list_head
*orphan_extents
)
708 struct orphan_data_extent
*orphan
;
710 while (!list_empty(orphan_extents
)) {
711 orphan
= list_entry(orphan_extents
->next
,
712 struct orphan_data_extent
, list
);
713 list_del(&orphan
->list
);
718 static void free_inode_rec(struct inode_record
*rec
)
720 struct inode_backref
*backref
;
725 while (!list_empty(&rec
->backrefs
)) {
726 backref
= to_inode_backref(rec
->backrefs
.next
);
727 list_del(&backref
->list
);
730 free_orphan_data_extents(&rec
->orphan_extents
);
731 free_file_extent_holes(&rec
->holes
);
735 static int can_free_inode_rec(struct inode_record
*rec
)
737 if (!rec
->errors
&& rec
->checked
&& rec
->found_inode_item
&&
738 rec
->nlink
== rec
->found_link
&& list_empty(&rec
->backrefs
))
743 static void maybe_free_inode_rec(struct cache_tree
*inode_cache
,
744 struct inode_record
*rec
)
746 struct cache_extent
*cache
;
747 struct inode_backref
*tmp
, *backref
;
748 struct ptr_node
*node
;
751 if (!rec
->found_inode_item
)
754 filetype
= imode_to_type(rec
->imode
);
755 list_for_each_entry_safe(backref
, tmp
, &rec
->backrefs
, list
) {
756 if (backref
->found_dir_item
&& backref
->found_dir_index
) {
757 if (backref
->filetype
!= filetype
)
758 backref
->errors
|= REF_ERR_FILETYPE_UNMATCH
;
759 if (!backref
->errors
&& backref
->found_inode_ref
&&
760 rec
->nlink
== rec
->found_link
) {
761 list_del(&backref
->list
);
767 if (!rec
->checked
|| rec
->merging
)
770 if (S_ISDIR(rec
->imode
)) {
771 if (rec
->found_size
!= rec
->isize
)
772 rec
->errors
|= I_ERR_DIR_ISIZE_WRONG
;
773 if (rec
->found_file_extent
)
774 rec
->errors
|= I_ERR_ODD_FILE_EXTENT
;
775 } else if (S_ISREG(rec
->imode
) || S_ISLNK(rec
->imode
)) {
776 if (rec
->found_dir_item
)
777 rec
->errors
|= I_ERR_ODD_DIR_ITEM
;
778 if (rec
->found_size
!= rec
->nbytes
)
779 rec
->errors
|= I_ERR_FILE_NBYTES_WRONG
;
780 if (rec
->nlink
> 0 && !no_holes
&&
781 (rec
->extent_end
< rec
->isize
||
782 first_extent_gap(&rec
->holes
) < rec
->isize
))
783 rec
->errors
|= I_ERR_FILE_EXTENT_DISCOUNT
;
786 if (S_ISREG(rec
->imode
) || S_ISLNK(rec
->imode
)) {
787 if (rec
->found_csum_item
&& rec
->nodatasum
)
788 rec
->errors
|= I_ERR_ODD_CSUM_ITEM
;
789 if (rec
->some_csum_missing
&& !rec
->nodatasum
)
790 rec
->errors
|= I_ERR_SOME_CSUM_MISSING
;
793 BUG_ON(rec
->refs
!= 1);
794 if (can_free_inode_rec(rec
)) {
795 cache
= lookup_cache_extent(inode_cache
, rec
->ino
, 1);
796 node
= container_of(cache
, struct ptr_node
, cache
);
797 BUG_ON(node
->data
!= rec
);
798 remove_cache_extent(inode_cache
, &node
->cache
);
804 static int check_orphan_item(struct btrfs_root
*root
, u64 ino
)
806 struct btrfs_path path
;
807 struct btrfs_key key
;
810 key
.objectid
= BTRFS_ORPHAN_OBJECTID
;
811 key
.type
= BTRFS_ORPHAN_ITEM_KEY
;
814 btrfs_init_path(&path
);
815 ret
= btrfs_search_slot(NULL
, root
, &key
, &path
, 0, 0);
816 btrfs_release_path(&path
);
822 static int process_inode_item(struct extent_buffer
*eb
,
823 int slot
, struct btrfs_key
*key
,
824 struct shared_node
*active_node
)
826 struct inode_record
*rec
;
827 struct btrfs_inode_item
*item
;
830 rec
= active_node
->current
;
831 BUG_ON(rec
->ino
!= key
->objectid
|| rec
->refs
> 1);
832 if (rec
->found_inode_item
) {
833 rec
->errors
|= I_ERR_DUP_INODE_ITEM
;
836 item
= btrfs_item_ptr(eb
, slot
, struct btrfs_inode_item
);
837 rec
->nlink
= btrfs_inode_nlink(eb
, item
);
838 rec
->isize
= btrfs_inode_size(eb
, item
);
839 rec
->nbytes
= btrfs_inode_nbytes(eb
, item
);
840 rec
->imode
= btrfs_inode_mode(eb
, item
);
841 if (btrfs_inode_flags(eb
, item
) & BTRFS_INODE_NODATASUM
)
843 rec
->found_inode_item
= 1;
845 rec
->errors
|= I_ERR_NO_ORPHAN_ITEM
;
846 flags
= btrfs_inode_flags(eb
, item
);
847 if (S_ISLNK(rec
->imode
) &&
848 flags
& (BTRFS_INODE_IMMUTABLE
| BTRFS_INODE_APPEND
))
849 rec
->errors
|= I_ERR_ODD_INODE_FLAGS
;
850 maybe_free_inode_rec(&active_node
->inode_cache
, rec
);
854 static struct inode_backref
*get_inode_backref(struct inode_record
*rec
,
856 int namelen
, u64 dir
)
858 struct inode_backref
*backref
;
860 list_for_each_entry(backref
, &rec
->backrefs
, list
) {
861 if (rec
->ino
== BTRFS_MULTIPLE_OBJECTIDS
)
863 if (backref
->dir
!= dir
|| backref
->namelen
!= namelen
)
865 if (memcmp(name
, backref
->name
, namelen
))
870 backref
= malloc(sizeof(*backref
) + namelen
+ 1);
873 memset(backref
, 0, sizeof(*backref
));
875 backref
->namelen
= namelen
;
876 memcpy(backref
->name
, name
, namelen
);
877 backref
->name
[namelen
] = '\0';
878 list_add_tail(&backref
->list
, &rec
->backrefs
);
882 static int add_inode_backref(struct cache_tree
*inode_cache
,
883 u64 ino
, u64 dir
, u64 index
,
884 const char *name
, int namelen
,
885 u8 filetype
, u8 itemtype
, int errors
)
887 struct inode_record
*rec
;
888 struct inode_backref
*backref
;
890 rec
= get_inode_rec(inode_cache
, ino
, 1);
892 backref
= get_inode_backref(rec
, name
, namelen
, dir
);
895 backref
->errors
|= errors
;
896 if (itemtype
== BTRFS_DIR_INDEX_KEY
) {
897 if (backref
->found_dir_index
)
898 backref
->errors
|= REF_ERR_DUP_DIR_INDEX
;
899 if (backref
->found_inode_ref
&& backref
->index
!= index
)
900 backref
->errors
|= REF_ERR_INDEX_UNMATCH
;
901 if (backref
->found_dir_item
&& backref
->filetype
!= filetype
)
902 backref
->errors
|= REF_ERR_FILETYPE_UNMATCH
;
904 backref
->index
= index
;
905 backref
->filetype
= filetype
;
906 backref
->found_dir_index
= 1;
907 } else if (itemtype
== BTRFS_DIR_ITEM_KEY
) {
909 if (backref
->found_dir_item
)
910 backref
->errors
|= REF_ERR_DUP_DIR_ITEM
;
911 if (backref
->found_dir_index
&& backref
->filetype
!= filetype
)
912 backref
->errors
|= REF_ERR_FILETYPE_UNMATCH
;
914 backref
->filetype
= filetype
;
915 backref
->found_dir_item
= 1;
916 } else if ((itemtype
== BTRFS_INODE_REF_KEY
) ||
917 (itemtype
== BTRFS_INODE_EXTREF_KEY
)) {
918 if (backref
->found_inode_ref
)
919 backref
->errors
|= REF_ERR_DUP_INODE_REF
;
920 if (backref
->found_dir_index
&& backref
->index
!= index
)
921 backref
->errors
|= REF_ERR_INDEX_UNMATCH
;
923 backref
->index
= index
;
925 backref
->ref_type
= itemtype
;
926 backref
->found_inode_ref
= 1;
931 maybe_free_inode_rec(inode_cache
, rec
);
935 static int merge_inode_recs(struct inode_record
*src
, struct inode_record
*dst
,
936 struct cache_tree
*dst_cache
)
938 struct inode_backref
*backref
;
943 list_for_each_entry(backref
, &src
->backrefs
, list
) {
944 if (backref
->found_dir_index
) {
945 add_inode_backref(dst_cache
, dst
->ino
, backref
->dir
,
946 backref
->index
, backref
->name
,
947 backref
->namelen
, backref
->filetype
,
948 BTRFS_DIR_INDEX_KEY
, backref
->errors
);
950 if (backref
->found_dir_item
) {
952 add_inode_backref(dst_cache
, dst
->ino
,
953 backref
->dir
, 0, backref
->name
,
954 backref
->namelen
, backref
->filetype
,
955 BTRFS_DIR_ITEM_KEY
, backref
->errors
);
957 if (backref
->found_inode_ref
) {
958 add_inode_backref(dst_cache
, dst
->ino
,
959 backref
->dir
, backref
->index
,
960 backref
->name
, backref
->namelen
, 0,
961 backref
->ref_type
, backref
->errors
);
965 if (src
->found_dir_item
)
966 dst
->found_dir_item
= 1;
967 if (src
->found_file_extent
)
968 dst
->found_file_extent
= 1;
969 if (src
->found_csum_item
)
970 dst
->found_csum_item
= 1;
971 if (src
->some_csum_missing
)
972 dst
->some_csum_missing
= 1;
973 if (first_extent_gap(&dst
->holes
) > first_extent_gap(&src
->holes
)) {
974 ret
= copy_file_extent_holes(&dst
->holes
, &src
->holes
);
979 BUG_ON(src
->found_link
< dir_count
);
980 dst
->found_link
+= src
->found_link
- dir_count
;
981 dst
->found_size
+= src
->found_size
;
982 if (src
->extent_start
!= (u64
)-1) {
983 if (dst
->extent_start
== (u64
)-1) {
984 dst
->extent_start
= src
->extent_start
;
985 dst
->extent_end
= src
->extent_end
;
987 if (dst
->extent_end
> src
->extent_start
)
988 dst
->errors
|= I_ERR_FILE_EXTENT_OVERLAP
;
989 else if (dst
->extent_end
< src
->extent_start
) {
990 ret
= add_file_extent_hole(&dst
->holes
,
992 src
->extent_start
- dst
->extent_end
);
994 if (dst
->extent_end
< src
->extent_end
)
995 dst
->extent_end
= src
->extent_end
;
999 dst
->errors
|= src
->errors
;
1000 if (src
->found_inode_item
) {
1001 if (!dst
->found_inode_item
) {
1002 dst
->nlink
= src
->nlink
;
1003 dst
->isize
= src
->isize
;
1004 dst
->nbytes
= src
->nbytes
;
1005 dst
->imode
= src
->imode
;
1006 dst
->nodatasum
= src
->nodatasum
;
1007 dst
->found_inode_item
= 1;
1009 dst
->errors
|= I_ERR_DUP_INODE_ITEM
;
1017 static int splice_shared_node(struct shared_node
*src_node
,
1018 struct shared_node
*dst_node
)
1020 struct cache_extent
*cache
;
1021 struct ptr_node
*node
, *ins
;
1022 struct cache_tree
*src
, *dst
;
1023 struct inode_record
*rec
, *conflict
;
1024 u64 current_ino
= 0;
1028 if (--src_node
->refs
== 0)
1030 if (src_node
->current
)
1031 current_ino
= src_node
->current
->ino
;
1033 src
= &src_node
->root_cache
;
1034 dst
= &dst_node
->root_cache
;
1036 cache
= search_cache_extent(src
, 0);
1038 node
= container_of(cache
, struct ptr_node
, cache
);
1040 cache
= next_cache_extent(cache
);
1043 remove_cache_extent(src
, &node
->cache
);
1046 ins
= malloc(sizeof(*ins
));
1048 ins
->cache
.start
= node
->cache
.start
;
1049 ins
->cache
.size
= node
->cache
.size
;
1053 ret
= insert_cache_extent(dst
, &ins
->cache
);
1054 if (ret
== -EEXIST
) {
1055 conflict
= get_inode_rec(dst
, rec
->ino
, 1);
1056 BUG_ON(IS_ERR(conflict
));
1057 merge_inode_recs(rec
, conflict
, dst
);
1059 conflict
->checked
= 1;
1060 if (dst_node
->current
== conflict
)
1061 dst_node
->current
= NULL
;
1063 maybe_free_inode_rec(dst
, conflict
);
1064 free_inode_rec(rec
);
1071 if (src
== &src_node
->root_cache
) {
1072 src
= &src_node
->inode_cache
;
1073 dst
= &dst_node
->inode_cache
;
1077 if (current_ino
> 0 && (!dst_node
->current
||
1078 current_ino
> dst_node
->current
->ino
)) {
1079 if (dst_node
->current
) {
1080 dst_node
->current
->checked
= 1;
1081 maybe_free_inode_rec(dst
, dst_node
->current
);
1083 dst_node
->current
= get_inode_rec(dst
, current_ino
, 1);
1084 BUG_ON(IS_ERR(dst_node
->current
));
1089 static void free_inode_ptr(struct cache_extent
*cache
)
1091 struct ptr_node
*node
;
1092 struct inode_record
*rec
;
1094 node
= container_of(cache
, struct ptr_node
, cache
);
1096 free_inode_rec(rec
);
1100 FREE_EXTENT_CACHE_BASED_TREE(inode_recs
, free_inode_ptr
);
1102 static struct shared_node
*find_shared_node(struct cache_tree
*shared
,
1105 struct cache_extent
*cache
;
1106 struct shared_node
*node
;
1108 cache
= lookup_cache_extent(shared
, bytenr
, 1);
1110 node
= container_of(cache
, struct shared_node
, cache
);
1116 static int add_shared_node(struct cache_tree
*shared
, u64 bytenr
, u32 refs
)
1119 struct shared_node
*node
;
1121 node
= calloc(1, sizeof(*node
));
1124 node
->cache
.start
= bytenr
;
1125 node
->cache
.size
= 1;
1126 cache_tree_init(&node
->root_cache
);
1127 cache_tree_init(&node
->inode_cache
);
1130 ret
= insert_cache_extent(shared
, &node
->cache
);
1135 static int enter_shared_node(struct btrfs_root
*root
, u64 bytenr
, u32 refs
,
1136 struct walk_control
*wc
, int level
)
1138 struct shared_node
*node
;
1139 struct shared_node
*dest
;
1142 if (level
== wc
->active_node
)
1145 BUG_ON(wc
->active_node
<= level
);
1146 node
= find_shared_node(&wc
->shared
, bytenr
);
1148 ret
= add_shared_node(&wc
->shared
, bytenr
, refs
);
1150 node
= find_shared_node(&wc
->shared
, bytenr
);
1151 wc
->nodes
[level
] = node
;
1152 wc
->active_node
= level
;
1156 if (wc
->root_level
== wc
->active_node
&&
1157 btrfs_root_refs(&root
->root_item
) == 0) {
1158 if (--node
->refs
== 0) {
1159 free_inode_recs_tree(&node
->root_cache
);
1160 free_inode_recs_tree(&node
->inode_cache
);
1161 remove_cache_extent(&wc
->shared
, &node
->cache
);
1167 dest
= wc
->nodes
[wc
->active_node
];
1168 splice_shared_node(node
, dest
);
1169 if (node
->refs
== 0) {
1170 remove_cache_extent(&wc
->shared
, &node
->cache
);
1176 static int leave_shared_node(struct btrfs_root
*root
,
1177 struct walk_control
*wc
, int level
)
1179 struct shared_node
*node
;
1180 struct shared_node
*dest
;
1183 if (level
== wc
->root_level
)
1186 for (i
= level
+ 1; i
< BTRFS_MAX_LEVEL
; i
++) {
1190 BUG_ON(i
>= BTRFS_MAX_LEVEL
);
1192 node
= wc
->nodes
[wc
->active_node
];
1193 wc
->nodes
[wc
->active_node
] = NULL
;
1194 wc
->active_node
= i
;
1196 dest
= wc
->nodes
[wc
->active_node
];
1197 if (wc
->active_node
< wc
->root_level
||
1198 btrfs_root_refs(&root
->root_item
) > 0) {
1199 BUG_ON(node
->refs
<= 1);
1200 splice_shared_node(node
, dest
);
1202 BUG_ON(node
->refs
< 2);
1211 * 1 - if the root with id child_root_id is a child of root parent_root_id
1212 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1213 * has other root(s) as parent(s)
1214 * 2 - if the root child_root_id doesn't have any parent roots
1216 static int is_child_root(struct btrfs_root
*root
, u64 parent_root_id
,
1219 struct btrfs_path path
;
1220 struct btrfs_key key
;
1221 struct extent_buffer
*leaf
;
1225 btrfs_init_path(&path
);
1227 key
.objectid
= parent_root_id
;
1228 key
.type
= BTRFS_ROOT_REF_KEY
;
1229 key
.offset
= child_root_id
;
1230 ret
= btrfs_search_slot(NULL
, root
->fs_info
->tree_root
, &key
, &path
,
1234 btrfs_release_path(&path
);
1238 key
.objectid
= child_root_id
;
1239 key
.type
= BTRFS_ROOT_BACKREF_KEY
;
1241 ret
= btrfs_search_slot(NULL
, root
->fs_info
->tree_root
, &key
, &path
,
1247 leaf
= path
.nodes
[0];
1248 if (path
.slots
[0] >= btrfs_header_nritems(leaf
)) {
1249 ret
= btrfs_next_leaf(root
->fs_info
->tree_root
, &path
);
1252 leaf
= path
.nodes
[0];
1255 btrfs_item_key_to_cpu(leaf
, &key
, path
.slots
[0]);
1256 if (key
.objectid
!= child_root_id
||
1257 key
.type
!= BTRFS_ROOT_BACKREF_KEY
)
1262 if (key
.offset
== parent_root_id
) {
1263 btrfs_release_path(&path
);
1270 btrfs_release_path(&path
);
1273 return has_parent
? 0 : 2;
1276 static int process_dir_item(struct extent_buffer
*eb
,
1277 int slot
, struct btrfs_key
*key
,
1278 struct shared_node
*active_node
)
1288 struct btrfs_dir_item
*di
;
1289 struct inode_record
*rec
;
1290 struct cache_tree
*root_cache
;
1291 struct cache_tree
*inode_cache
;
1292 struct btrfs_key location
;
1293 char namebuf
[BTRFS_NAME_LEN
];
1295 root_cache
= &active_node
->root_cache
;
1296 inode_cache
= &active_node
->inode_cache
;
1297 rec
= active_node
->current
;
1298 rec
->found_dir_item
= 1;
1300 di
= btrfs_item_ptr(eb
, slot
, struct btrfs_dir_item
);
1301 total
= btrfs_item_size_nr(eb
, slot
);
1302 while (cur
< total
) {
1304 btrfs_dir_item_key_to_cpu(eb
, di
, &location
);
1305 name_len
= btrfs_dir_name_len(eb
, di
);
1306 data_len
= btrfs_dir_data_len(eb
, di
);
1307 filetype
= btrfs_dir_type(eb
, di
);
1309 rec
->found_size
+= name_len
;
1310 if (cur
+ sizeof(*di
) + name_len
> total
||
1311 name_len
> BTRFS_NAME_LEN
) {
1312 error
= REF_ERR_NAME_TOO_LONG
;
1314 if (cur
+ sizeof(*di
) > total
)
1316 len
= min_t(u32
, total
- cur
- sizeof(*di
),
1323 read_extent_buffer(eb
, namebuf
, (unsigned long)(di
+ 1), len
);
1325 if (key
->type
== BTRFS_DIR_ITEM_KEY
&&
1326 key
->offset
!= btrfs_name_hash(namebuf
, len
)) {
1327 rec
->errors
|= I_ERR_ODD_DIR_ITEM
;
1328 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1329 key
->objectid
, key
->offset
, namebuf
, len
, filetype
,
1330 key
->offset
, btrfs_name_hash(namebuf
, len
));
1333 if (location
.type
== BTRFS_INODE_ITEM_KEY
) {
1334 add_inode_backref(inode_cache
, location
.objectid
,
1335 key
->objectid
, key
->offset
, namebuf
,
1336 len
, filetype
, key
->type
, error
);
1337 } else if (location
.type
== BTRFS_ROOT_ITEM_KEY
) {
1338 add_inode_backref(root_cache
, location
.objectid
,
1339 key
->objectid
, key
->offset
,
1340 namebuf
, len
, filetype
,
1344 "unknown location type %d in DIR_ITEM[%llu %llu]\n",
1345 location
.type
, key
->objectid
, key
->offset
);
1346 add_inode_backref(inode_cache
, BTRFS_MULTIPLE_OBJECTIDS
,
1347 key
->objectid
, key
->offset
, namebuf
,
1348 len
, filetype
, key
->type
, error
);
1351 len
= sizeof(*di
) + name_len
+ data_len
;
1352 di
= (struct btrfs_dir_item
*)((char *)di
+ len
);
1355 if (key
->type
== BTRFS_DIR_INDEX_KEY
&& nritems
> 1)
1356 rec
->errors
|= I_ERR_DUP_DIR_INDEX
;
1361 static int process_inode_ref(struct extent_buffer
*eb
,
1362 int slot
, struct btrfs_key
*key
,
1363 struct shared_node
*active_node
)
1371 struct cache_tree
*inode_cache
;
1372 struct btrfs_inode_ref
*ref
;
1373 char namebuf
[BTRFS_NAME_LEN
];
1375 inode_cache
= &active_node
->inode_cache
;
1377 ref
= btrfs_item_ptr(eb
, slot
, struct btrfs_inode_ref
);
1378 total
= btrfs_item_size_nr(eb
, slot
);
1379 while (cur
< total
) {
1380 name_len
= btrfs_inode_ref_name_len(eb
, ref
);
1381 index
= btrfs_inode_ref_index(eb
, ref
);
1383 /* inode_ref + namelen should not cross item boundary */
1384 if (cur
+ sizeof(*ref
) + name_len
> total
||
1385 name_len
> BTRFS_NAME_LEN
) {
1386 if (total
< cur
+ sizeof(*ref
))
1389 /* Still try to read out the remaining part */
1390 len
= min_t(u32
, total
- cur
- sizeof(*ref
),
1392 error
= REF_ERR_NAME_TOO_LONG
;
1398 read_extent_buffer(eb
, namebuf
, (unsigned long)(ref
+ 1), len
);
1399 add_inode_backref(inode_cache
, key
->objectid
, key
->offset
,
1400 index
, namebuf
, len
, 0, key
->type
, error
);
1402 len
= sizeof(*ref
) + name_len
;
1403 ref
= (struct btrfs_inode_ref
*)((char *)ref
+ len
);
1409 static int process_inode_extref(struct extent_buffer
*eb
,
1410 int slot
, struct btrfs_key
*key
,
1411 struct shared_node
*active_node
)
1420 struct cache_tree
*inode_cache
;
1421 struct btrfs_inode_extref
*extref
;
1422 char namebuf
[BTRFS_NAME_LEN
];
1424 inode_cache
= &active_node
->inode_cache
;
1426 extref
= btrfs_item_ptr(eb
, slot
, struct btrfs_inode_extref
);
1427 total
= btrfs_item_size_nr(eb
, slot
);
1428 while (cur
< total
) {
1429 name_len
= btrfs_inode_extref_name_len(eb
, extref
);
1430 index
= btrfs_inode_extref_index(eb
, extref
);
1431 parent
= btrfs_inode_extref_parent(eb
, extref
);
1432 if (name_len
<= BTRFS_NAME_LEN
) {
1436 len
= BTRFS_NAME_LEN
;
1437 error
= REF_ERR_NAME_TOO_LONG
;
1439 read_extent_buffer(eb
, namebuf
,
1440 (unsigned long)(extref
+ 1), len
);
1441 add_inode_backref(inode_cache
, key
->objectid
, parent
,
1442 index
, namebuf
, len
, 0, key
->type
, error
);
1444 len
= sizeof(*extref
) + name_len
;
1445 extref
= (struct btrfs_inode_extref
*)((char *)extref
+ len
);
1452 static int process_file_extent(struct btrfs_root
*root
,
1453 struct extent_buffer
*eb
,
1454 int slot
, struct btrfs_key
*key
,
1455 struct shared_node
*active_node
)
1457 struct inode_record
*rec
;
1458 struct btrfs_file_extent_item
*fi
;
1460 u64 disk_bytenr
= 0;
1461 u64 extent_offset
= 0;
1462 u64 mask
= root
->fs_info
->sectorsize
- 1;
1463 u32 max_inline_size
= min_t(u32
, mask
,
1464 BTRFS_MAX_INLINE_DATA_SIZE(root
->fs_info
));
1468 rec
= active_node
->current
;
1469 BUG_ON(rec
->ino
!= key
->objectid
|| rec
->refs
> 1);
1470 rec
->found_file_extent
= 1;
1472 if (rec
->extent_start
== (u64
)-1) {
1473 rec
->extent_start
= key
->offset
;
1474 rec
->extent_end
= key
->offset
;
1477 if (rec
->extent_end
> key
->offset
)
1478 rec
->errors
|= I_ERR_FILE_EXTENT_OVERLAP
;
1479 else if (rec
->extent_end
< key
->offset
) {
1480 ret
= add_file_extent_hole(&rec
->holes
, rec
->extent_end
,
1481 key
->offset
- rec
->extent_end
);
1486 fi
= btrfs_item_ptr(eb
, slot
, struct btrfs_file_extent_item
);
1487 extent_type
= btrfs_file_extent_type(eb
, fi
);
1489 if (extent_type
== BTRFS_FILE_EXTENT_INLINE
) {
1490 u8 compression
= btrfs_file_extent_compression(eb
, fi
);
1491 struct btrfs_item
*item
= btrfs_item_nr(slot
);
1493 num_bytes
= btrfs_file_extent_ram_bytes(eb
, fi
);
1495 rec
->errors
|= I_ERR_BAD_FILE_EXTENT
;
1497 if (btrfs_file_extent_inline_item_len(eb
, item
) >
1499 num_bytes
> root
->fs_info
->sectorsize
)
1500 rec
->errors
|= I_ERR_FILE_EXTENT_TOO_LARGE
;
1502 if (num_bytes
> max_inline_size
)
1503 rec
->errors
|= I_ERR_FILE_EXTENT_TOO_LARGE
;
1504 if (btrfs_file_extent_inline_item_len(eb
, item
) !=
1506 rec
->errors
|= I_ERR_INLINE_RAM_BYTES_WRONG
;
1508 rec
->found_size
+= num_bytes
;
1509 num_bytes
= (num_bytes
+ mask
) & ~mask
;
1510 } else if (extent_type
== BTRFS_FILE_EXTENT_REG
||
1511 extent_type
== BTRFS_FILE_EXTENT_PREALLOC
) {
1512 num_bytes
= btrfs_file_extent_num_bytes(eb
, fi
);
1513 disk_bytenr
= btrfs_file_extent_disk_bytenr(eb
, fi
);
1514 extent_offset
= btrfs_file_extent_offset(eb
, fi
);
1515 if (num_bytes
== 0 || (num_bytes
& mask
))
1516 rec
->errors
|= I_ERR_BAD_FILE_EXTENT
;
1517 if (num_bytes
+ extent_offset
>
1518 btrfs_file_extent_ram_bytes(eb
, fi
))
1519 rec
->errors
|= I_ERR_BAD_FILE_EXTENT
;
1520 if (extent_type
== BTRFS_FILE_EXTENT_PREALLOC
&&
1521 (btrfs_file_extent_compression(eb
, fi
) ||
1522 btrfs_file_extent_encryption(eb
, fi
) ||
1523 btrfs_file_extent_other_encoding(eb
, fi
)))
1524 rec
->errors
|= I_ERR_BAD_FILE_EXTENT
;
1525 if (disk_bytenr
> 0)
1526 rec
->found_size
+= num_bytes
;
1528 rec
->errors
|= I_ERR_BAD_FILE_EXTENT
;
1530 rec
->extent_end
= key
->offset
+ num_bytes
;
1533 * The data reloc tree will copy full extents into its inode and then
1534 * copy the corresponding csums. Because the extent it copied could be
1535 * a preallocated extent that hasn't been written to yet there may be no
1536 * csums to copy, ergo we won't have csums for our file extent. This is
1537 * ok so just don't bother checking csums if the inode belongs to the
1540 if (disk_bytenr
> 0 &&
1541 btrfs_header_owner(eb
) != BTRFS_DATA_RELOC_TREE_OBJECTID
) {
1543 if (btrfs_file_extent_compression(eb
, fi
))
1544 num_bytes
= btrfs_file_extent_disk_num_bytes(eb
, fi
);
1546 disk_bytenr
+= extent_offset
;
1548 ret
= count_csum_range(root
->fs_info
, disk_bytenr
, num_bytes
,
1552 if (extent_type
== BTRFS_FILE_EXTENT_REG
) {
1554 rec
->found_csum_item
= 1;
1555 if (found
< num_bytes
)
1556 rec
->some_csum_missing
= 1;
1557 } else if (extent_type
== BTRFS_FILE_EXTENT_PREALLOC
) {
1559 ret
= check_prealloc_extent_written(root
->fs_info
,
1565 rec
->errors
|= I_ERR_ODD_CSUM_ITEM
;
1572 static int process_one_leaf(struct btrfs_root
*root
, struct extent_buffer
*eb
,
1573 struct walk_control
*wc
)
1575 struct btrfs_key key
;
1579 struct cache_tree
*inode_cache
;
1580 struct shared_node
*active_node
;
1582 if (wc
->root_level
== wc
->active_node
&&
1583 btrfs_root_refs(&root
->root_item
) == 0)
1586 active_node
= wc
->nodes
[wc
->active_node
];
1587 inode_cache
= &active_node
->inode_cache
;
1588 nritems
= btrfs_header_nritems(eb
);
1589 for (i
= 0; i
< nritems
; i
++) {
1590 btrfs_item_key_to_cpu(eb
, &key
, i
);
1592 if (key
.objectid
== BTRFS_FREE_SPACE_OBJECTID
)
1594 if (key
.type
== BTRFS_ORPHAN_ITEM_KEY
)
1597 if (active_node
->current
== NULL
||
1598 active_node
->current
->ino
< key
.objectid
) {
1599 if (active_node
->current
) {
1600 active_node
->current
->checked
= 1;
1601 maybe_free_inode_rec(inode_cache
,
1602 active_node
->current
);
1604 active_node
->current
= get_inode_rec(inode_cache
,
1606 BUG_ON(IS_ERR(active_node
->current
));
1609 case BTRFS_DIR_ITEM_KEY
:
1610 case BTRFS_DIR_INDEX_KEY
:
1611 ret
= process_dir_item(eb
, i
, &key
, active_node
);
1613 case BTRFS_INODE_REF_KEY
:
1614 ret
= process_inode_ref(eb
, i
, &key
, active_node
);
1616 case BTRFS_INODE_EXTREF_KEY
:
1617 ret
= process_inode_extref(eb
, i
, &key
, active_node
);
1619 case BTRFS_INODE_ITEM_KEY
:
1620 ret
= process_inode_item(eb
, i
, &key
, active_node
);
1622 case BTRFS_EXTENT_DATA_KEY
:
1623 ret
= process_file_extent(root
, eb
, i
, &key
,
1633 static int walk_down_tree(struct btrfs_root
*root
, struct btrfs_path
*path
,
1634 struct walk_control
*wc
, int *level
,
1635 struct node_refs
*nrefs
)
1637 enum btrfs_tree_block_status status
;
1640 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
1641 struct extent_buffer
*next
;
1642 struct extent_buffer
*cur
;
1646 WARN_ON(*level
< 0);
1647 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
1649 if (path
->nodes
[*level
]->start
== nrefs
->bytenr
[*level
]) {
1650 refs
= nrefs
->refs
[*level
];
1653 ret
= btrfs_lookup_extent_info(NULL
, fs_info
,
1654 path
->nodes
[*level
]->start
,
1655 *level
, 1, &refs
, NULL
);
1660 nrefs
->bytenr
[*level
] = path
->nodes
[*level
]->start
;
1661 nrefs
->refs
[*level
] = refs
;
1665 ret
= enter_shared_node(root
, path
->nodes
[*level
]->start
,
1673 while (*level
>= 0) {
1674 WARN_ON(*level
< 0);
1675 WARN_ON(*level
>= BTRFS_MAX_LEVEL
);
1676 cur
= path
->nodes
[*level
];
1678 if (btrfs_header_level(cur
) != *level
)
1681 if (path
->slots
[*level
] >= btrfs_header_nritems(cur
))
1684 ret
= process_one_leaf(root
, cur
, wc
);
1689 bytenr
= btrfs_node_blockptr(cur
, path
->slots
[*level
]);
1690 ptr_gen
= btrfs_node_ptr_generation(cur
, path
->slots
[*level
]);
1692 if (bytenr
== nrefs
->bytenr
[*level
- 1]) {
1693 refs
= nrefs
->refs
[*level
- 1];
1695 ret
= btrfs_lookup_extent_info(NULL
, fs_info
, bytenr
,
1696 *level
- 1, 1, &refs
, NULL
);
1700 nrefs
->bytenr
[*level
- 1] = bytenr
;
1701 nrefs
->refs
[*level
- 1] = refs
;
1706 ret
= enter_shared_node(root
, bytenr
, refs
,
1709 path
->slots
[*level
]++;
1714 next
= btrfs_find_tree_block(fs_info
, bytenr
, fs_info
->nodesize
);
1715 if (!next
|| !btrfs_buffer_uptodate(next
, ptr_gen
)) {
1716 free_extent_buffer(next
);
1717 reada_walk_down(root
, cur
, path
->slots
[*level
]);
1718 next
= read_tree_block(root
->fs_info
, bytenr
, ptr_gen
);
1719 if (!extent_buffer_uptodate(next
)) {
1720 struct btrfs_key node_key
;
1722 btrfs_node_key_to_cpu(path
->nodes
[*level
],
1724 path
->slots
[*level
]);
1725 btrfs_add_corrupt_extent_record(root
->fs_info
,
1727 path
->nodes
[*level
]->start
,
1728 root
->fs_info
->nodesize
,
1735 ret
= check_child_node(cur
, path
->slots
[*level
], next
);
1737 free_extent_buffer(next
);
1742 if (btrfs_is_leaf(next
))
1743 status
= btrfs_check_leaf(root
, NULL
, next
);
1745 status
= btrfs_check_node(root
, NULL
, next
);
1746 if (status
!= BTRFS_TREE_BLOCK_CLEAN
) {
1747 free_extent_buffer(next
);
1752 *level
= *level
- 1;
1753 free_extent_buffer(path
->nodes
[*level
]);
1754 path
->nodes
[*level
] = next
;
1755 path
->slots
[*level
] = 0;
1758 path
->slots
[*level
] = btrfs_header_nritems(path
->nodes
[*level
]);
1762 static int walk_up_tree(struct btrfs_root
*root
, struct btrfs_path
*path
,
1763 struct walk_control
*wc
, int *level
)
1766 struct extent_buffer
*leaf
;
1768 for (i
= *level
; i
< BTRFS_MAX_LEVEL
- 1 && path
->nodes
[i
]; i
++) {
1769 leaf
= path
->nodes
[i
];
1770 if (path
->slots
[i
] + 1 < btrfs_header_nritems(leaf
)) {
1775 free_extent_buffer(path
->nodes
[*level
]);
1776 path
->nodes
[*level
] = NULL
;
1777 BUG_ON(*level
> wc
->active_node
);
1778 if (*level
== wc
->active_node
)
1779 leave_shared_node(root
, wc
, *level
);
1785 static int check_root_dir(struct inode_record
*rec
)
1787 struct inode_backref
*backref
;
1790 if (!rec
->found_inode_item
|| rec
->errors
)
1792 if (rec
->nlink
!= 1 || rec
->found_link
!= 0)
1794 if (list_empty(&rec
->backrefs
))
1796 backref
= to_inode_backref(rec
->backrefs
.next
);
1797 if (!backref
->found_inode_ref
)
1799 if (backref
->index
!= 0 || backref
->namelen
!= 2 ||
1800 memcmp(backref
->name
, "..", 2))
1802 if (backref
->found_dir_index
|| backref
->found_dir_item
)
1809 static int repair_inode_isize(struct btrfs_trans_handle
*trans
,
1810 struct btrfs_root
*root
, struct btrfs_path
*path
,
1811 struct inode_record
*rec
)
1813 struct btrfs_inode_item
*ei
;
1814 struct btrfs_key key
;
1817 key
.objectid
= rec
->ino
;
1818 key
.type
= BTRFS_INODE_ITEM_KEY
;
1819 key
.offset
= (u64
)-1;
1821 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 1);
1825 if (!path
->slots
[0]) {
1832 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
1833 if (key
.objectid
!= rec
->ino
) {
1838 ei
= btrfs_item_ptr(path
->nodes
[0], path
->slots
[0],
1839 struct btrfs_inode_item
);
1840 btrfs_set_inode_size(path
->nodes
[0], ei
, rec
->found_size
);
1841 btrfs_mark_buffer_dirty(path
->nodes
[0]);
1842 rec
->errors
&= ~I_ERR_DIR_ISIZE_WRONG
;
1843 printf("reset isize for dir %llu root %llu\n", rec
->ino
,
1844 root
->root_key
.objectid
);
1846 btrfs_release_path(path
);
1850 static int repair_inode_orphan_item(struct btrfs_trans_handle
*trans
,
1851 struct btrfs_root
*root
,
1852 struct btrfs_path
*path
,
1853 struct inode_record
*rec
)
1857 ret
= btrfs_add_orphan_item(trans
, root
, path
, rec
->ino
);
1858 btrfs_release_path(path
);
1860 rec
->errors
&= ~I_ERR_NO_ORPHAN_ITEM
;
1864 static int repair_inode_nbytes(struct btrfs_trans_handle
*trans
,
1865 struct btrfs_root
*root
,
1866 struct btrfs_path
*path
,
1867 struct inode_record
*rec
)
1869 struct btrfs_inode_item
*ei
;
1870 struct btrfs_key key
;
1873 key
.objectid
= rec
->ino
;
1874 key
.type
= BTRFS_INODE_ITEM_KEY
;
1877 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 1);
1884 /* Since ret == 0, no need to check anything */
1885 ei
= btrfs_item_ptr(path
->nodes
[0], path
->slots
[0],
1886 struct btrfs_inode_item
);
1887 btrfs_set_inode_nbytes(path
->nodes
[0], ei
, rec
->found_size
);
1888 btrfs_mark_buffer_dirty(path
->nodes
[0]);
1889 rec
->errors
&= ~I_ERR_FILE_NBYTES_WRONG
;
1890 printf("reset nbytes for ino %llu root %llu\n",
1891 rec
->ino
, root
->root_key
.objectid
);
1893 btrfs_release_path(path
);
1897 static int add_missing_dir_index(struct btrfs_root
*root
,
1898 struct cache_tree
*inode_cache
,
1899 struct inode_record
*rec
,
1900 struct inode_backref
*backref
)
1902 struct btrfs_path path
;
1903 struct btrfs_trans_handle
*trans
;
1904 struct btrfs_dir_item
*dir_item
;
1905 struct extent_buffer
*leaf
;
1906 struct btrfs_key key
;
1907 struct btrfs_disk_key disk_key
;
1908 struct inode_record
*dir_rec
;
1909 unsigned long name_ptr
;
1910 u32 data_size
= sizeof(*dir_item
) + backref
->namelen
;
1913 trans
= btrfs_start_transaction(root
, 1);
1915 return PTR_ERR(trans
);
1917 fprintf(stderr
, "repairing missing dir index item for inode %llu\n",
1918 (unsigned long long)rec
->ino
);
1920 btrfs_init_path(&path
);
1921 key
.objectid
= backref
->dir
;
1922 key
.type
= BTRFS_DIR_INDEX_KEY
;
1923 key
.offset
= backref
->index
;
1924 ret
= btrfs_insert_empty_item(trans
, root
, &path
, &key
, data_size
);
1927 leaf
= path
.nodes
[0];
1928 dir_item
= btrfs_item_ptr(leaf
, path
.slots
[0], struct btrfs_dir_item
);
1930 disk_key
.objectid
= cpu_to_le64(rec
->ino
);
1931 disk_key
.type
= BTRFS_INODE_ITEM_KEY
;
1932 disk_key
.offset
= 0;
1934 btrfs_set_dir_item_key(leaf
, dir_item
, &disk_key
);
1935 btrfs_set_dir_type(leaf
, dir_item
, imode_to_type(rec
->imode
));
1936 btrfs_set_dir_data_len(leaf
, dir_item
, 0);
1937 btrfs_set_dir_name_len(leaf
, dir_item
, backref
->namelen
);
1938 name_ptr
= (unsigned long)(dir_item
+ 1);
1939 write_extent_buffer(leaf
, backref
->name
, name_ptr
, backref
->namelen
);
1940 btrfs_mark_buffer_dirty(leaf
);
1941 btrfs_release_path(&path
);
1942 btrfs_commit_transaction(trans
, root
);
1944 backref
->found_dir_index
= 1;
1945 dir_rec
= get_inode_rec(inode_cache
, backref
->dir
, 0);
1946 BUG_ON(IS_ERR(dir_rec
));
1949 dir_rec
->found_size
+= backref
->namelen
;
1950 if (dir_rec
->found_size
== dir_rec
->isize
&&
1951 (dir_rec
->errors
& I_ERR_DIR_ISIZE_WRONG
))
1952 dir_rec
->errors
&= ~I_ERR_DIR_ISIZE_WRONG
;
1953 if (dir_rec
->found_size
!= dir_rec
->isize
)
1954 dir_rec
->errors
|= I_ERR_DIR_ISIZE_WRONG
;
1959 static int delete_dir_index(struct btrfs_root
*root
,
1960 struct inode_backref
*backref
)
1962 struct btrfs_trans_handle
*trans
;
1963 struct btrfs_dir_item
*di
;
1964 struct btrfs_path path
;
1967 trans
= btrfs_start_transaction(root
, 1);
1969 return PTR_ERR(trans
);
1971 fprintf(stderr
, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
1972 (unsigned long long)backref
->dir
,
1973 BTRFS_DIR_INDEX_KEY
, (unsigned long long)backref
->index
,
1974 (unsigned long long)root
->objectid
);
1976 btrfs_init_path(&path
);
1977 di
= btrfs_lookup_dir_index(trans
, root
, &path
, backref
->dir
,
1978 backref
->name
, backref
->namelen
,
1979 backref
->index
, -1);
1982 btrfs_release_path(&path
);
1983 btrfs_commit_transaction(trans
, root
);
1990 ret
= btrfs_del_item(trans
, root
, &path
);
1992 ret
= btrfs_delete_one_dir_name(trans
, root
, &path
, di
);
1994 btrfs_release_path(&path
);
1995 btrfs_commit_transaction(trans
, root
);
1999 static int create_inode_item(struct btrfs_root
*root
,
2000 struct inode_record
*rec
, int root_dir
)
2002 struct btrfs_trans_handle
*trans
;
2008 trans
= btrfs_start_transaction(root
, 1);
2009 if (IS_ERR(trans
)) {
2010 ret
= PTR_ERR(trans
);
2014 nlink
= root_dir
? 1 : rec
->found_link
;
2015 if (rec
->found_dir_item
) {
2016 if (rec
->found_file_extent
)
2017 fprintf(stderr
, "root %llu inode %llu has both a dir "
2018 "item and extents, unsure if it is a dir or a "
2019 "regular file so setting it as a directory\n",
2020 (unsigned long long)root
->objectid
,
2021 (unsigned long long)rec
->ino
);
2022 mode
= S_IFDIR
| 0755;
2023 size
= rec
->found_size
;
2024 } else if (!rec
->found_dir_item
) {
2025 size
= rec
->extent_end
;
2026 mode
= S_IFREG
| 0755;
2029 ret
= insert_inode_item(trans
, root
, rec
->ino
, size
, rec
->nbytes
,
2031 btrfs_commit_transaction(trans
, root
);
2035 static int repair_inode_backrefs(struct btrfs_root
*root
,
2036 struct inode_record
*rec
,
2037 struct cache_tree
*inode_cache
,
2040 struct inode_backref
*tmp
, *backref
;
2041 u64 root_dirid
= btrfs_root_dirid(&root
->root_item
);
2045 list_for_each_entry_safe(backref
, tmp
, &rec
->backrefs
, list
) {
2046 if (!delete && rec
->ino
== root_dirid
) {
2047 if (!rec
->found_inode_item
) {
2048 ret
= create_inode_item(root
, rec
, 1);
2055 /* Index 0 for root dir's are special, don't mess with it */
2056 if (rec
->ino
== root_dirid
&& backref
->index
== 0)
2060 ((backref
->found_dir_index
&& !backref
->found_inode_ref
) ||
2061 (backref
->found_dir_index
&& backref
->found_inode_ref
&&
2062 (backref
->errors
& REF_ERR_INDEX_UNMATCH
)))) {
2063 ret
= delete_dir_index(root
, backref
);
2067 list_del(&backref
->list
);
2072 if (!delete && !backref
->found_dir_index
&&
2073 backref
->found_dir_item
&& backref
->found_inode_ref
) {
2074 ret
= add_missing_dir_index(root
, inode_cache
, rec
,
2079 if (backref
->found_dir_item
&&
2080 backref
->found_dir_index
) {
2081 if (!backref
->errors
&&
2082 backref
->found_inode_ref
) {
2083 list_del(&backref
->list
);
2090 if (!delete && (!backref
->found_dir_index
&&
2091 !backref
->found_dir_item
&&
2092 backref
->found_inode_ref
)) {
2093 struct btrfs_trans_handle
*trans
;
2094 struct btrfs_key location
;
2096 ret
= check_dir_conflict(root
, backref
->name
,
2102 * let nlink fixing routine to handle it,
2103 * which can do it better.
2108 location
.objectid
= rec
->ino
;
2109 location
.type
= BTRFS_INODE_ITEM_KEY
;
2110 location
.offset
= 0;
2112 trans
= btrfs_start_transaction(root
, 1);
2113 if (IS_ERR(trans
)) {
2114 ret
= PTR_ERR(trans
);
2117 fprintf(stderr
, "adding missing dir index/item pair "
2119 (unsigned long long)rec
->ino
);
2120 ret
= btrfs_insert_dir_item(trans
, root
, backref
->name
,
2122 backref
->dir
, &location
,
2123 imode_to_type(rec
->imode
),
2126 btrfs_commit_transaction(trans
, root
);
2130 if (!delete && (backref
->found_inode_ref
&&
2131 backref
->found_dir_index
&&
2132 backref
->found_dir_item
&&
2133 !(backref
->errors
& REF_ERR_INDEX_UNMATCH
) &&
2134 !rec
->found_inode_item
)) {
2135 ret
= create_inode_item(root
, rec
, 0);
2142 return ret
? ret
: repaired
;
2146 * To determine the file type for nlink/inode_item repair
2148 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2149 * Return -ENOENT if file type is not found.
2151 static int find_file_type(struct inode_record
*rec
, u8
*type
)
2153 struct inode_backref
*backref
;
2155 /* For inode item recovered case */
2156 if (rec
->found_inode_item
) {
2157 *type
= imode_to_type(rec
->imode
);
2161 list_for_each_entry(backref
, &rec
->backrefs
, list
) {
2162 if (backref
->found_dir_index
|| backref
->found_dir_item
) {
2163 *type
= backref
->filetype
;
2171 * To determine the file name for nlink repair
2173 * Return 0 if file name is found, set name and namelen.
2174 * Return -ENOENT if file name is not found.
2176 static int find_file_name(struct inode_record
*rec
,
2177 char *name
, int *namelen
)
2179 struct inode_backref
*backref
;
2181 list_for_each_entry(backref
, &rec
->backrefs
, list
) {
2182 if (backref
->found_dir_index
|| backref
->found_dir_item
||
2183 backref
->found_inode_ref
) {
2184 memcpy(name
, backref
->name
, backref
->namelen
);
2185 *namelen
= backref
->namelen
;
2192 /* Reset the nlink of the inode to the correct one */
2193 static int reset_nlink(struct btrfs_trans_handle
*trans
,
2194 struct btrfs_root
*root
,
2195 struct btrfs_path
*path
,
2196 struct inode_record
*rec
)
2198 struct inode_backref
*backref
;
2199 struct inode_backref
*tmp
;
2200 struct btrfs_key key
;
2201 struct btrfs_inode_item
*inode_item
;
2204 /* We don't believe this either, reset it and iterate backref */
2205 rec
->found_link
= 0;
2207 /* Remove all backref including the valid ones */
2208 list_for_each_entry_safe(backref
, tmp
, &rec
->backrefs
, list
) {
2209 ret
= btrfs_unlink(trans
, root
, rec
->ino
, backref
->dir
,
2210 backref
->index
, backref
->name
,
2211 backref
->namelen
, 0);
2215 /* remove invalid backref, so it won't be added back */
2216 if (!(backref
->found_dir_index
&&
2217 backref
->found_dir_item
&&
2218 backref
->found_inode_ref
)) {
2219 list_del(&backref
->list
);
2226 /* Set nlink to 0 */
2227 key
.objectid
= rec
->ino
;
2228 key
.type
= BTRFS_INODE_ITEM_KEY
;
2230 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 1);
2237 inode_item
= btrfs_item_ptr(path
->nodes
[0], path
->slots
[0],
2238 struct btrfs_inode_item
);
2239 btrfs_set_inode_nlink(path
->nodes
[0], inode_item
, 0);
2240 btrfs_mark_buffer_dirty(path
->nodes
[0]);
2241 btrfs_release_path(path
);
2244 * Add back valid inode_ref/dir_item/dir_index,
2245 * add_link() will handle the nlink inc, so new nlink must be correct
2247 list_for_each_entry(backref
, &rec
->backrefs
, list
) {
2248 ret
= btrfs_add_link(trans
, root
, rec
->ino
, backref
->dir
,
2249 backref
->name
, backref
->namelen
,
2250 backref
->filetype
, &backref
->index
, 1, 0);
2255 btrfs_release_path(path
);
2259 static int repair_inode_nlinks(struct btrfs_trans_handle
*trans
,
2260 struct btrfs_root
*root
,
2261 struct btrfs_path
*path
,
2262 struct inode_record
*rec
)
2264 char namebuf
[BTRFS_NAME_LEN
] = {0};
2267 int name_recovered
= 0;
2268 int type_recovered
= 0;
2272 * Get file name and type first before these invalid inode ref
2273 * are deleted by remove_all_invalid_backref()
2275 name_recovered
= !find_file_name(rec
, namebuf
, &namelen
);
2276 type_recovered
= !find_file_type(rec
, &type
);
2278 if (!name_recovered
) {
2279 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2280 rec
->ino
, rec
->ino
);
2281 namelen
= count_digits(rec
->ino
);
2282 sprintf(namebuf
, "%llu", rec
->ino
);
2285 if (!type_recovered
) {
2286 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2288 type
= BTRFS_FT_REG_FILE
;
2292 ret
= reset_nlink(trans
, root
, path
, rec
);
2295 "Failed to reset nlink for inode %llu: %s\n",
2296 rec
->ino
, strerror(-ret
));
2300 if (rec
->found_link
== 0) {
2301 ret
= link_inode_to_lostfound(trans
, root
, path
, rec
->ino
,
2302 namebuf
, namelen
, type
,
2303 (u64
*)&rec
->found_link
);
2307 printf("Fixed the nlink of inode %llu\n", rec
->ino
);
2310 * Clear the flag anyway, or we will loop forever for the same inode
2311 * as it will not be removed from the bad inode list and the dead loop
2314 rec
->errors
&= ~I_ERR_LINK_COUNT_WRONG
;
2315 btrfs_release_path(path
);
2320 * Check if there is any normal(reg or prealloc) file extent for given
2322 * This is used to determine the file type when neither its dir_index/item or
2323 * inode_item exists.
2325 * This will *NOT* report error, if any error happens, just consider it does
2326 * not have any normal file extent.
2328 static int find_normal_file_extent(struct btrfs_root
*root
, u64 ino
)
2330 struct btrfs_path path
;
2331 struct btrfs_key key
;
2332 struct btrfs_key found_key
;
2333 struct btrfs_file_extent_item
*fi
;
2337 btrfs_init_path(&path
);
2339 key
.type
= BTRFS_EXTENT_DATA_KEY
;
2342 ret
= btrfs_search_slot(NULL
, root
, &key
, &path
, 0, 0);
2347 if (ret
&& path
.slots
[0] >= btrfs_header_nritems(path
.nodes
[0])) {
2348 ret
= btrfs_next_leaf(root
, &path
);
2355 btrfs_item_key_to_cpu(path
.nodes
[0], &found_key
,
2357 if (found_key
.objectid
!= ino
||
2358 found_key
.type
!= BTRFS_EXTENT_DATA_KEY
)
2360 fi
= btrfs_item_ptr(path
.nodes
[0], path
.slots
[0],
2361 struct btrfs_file_extent_item
);
2362 type
= btrfs_file_extent_type(path
.nodes
[0], fi
);
2363 if (type
!= BTRFS_FILE_EXTENT_INLINE
) {
2369 btrfs_release_path(&path
);
2373 static u32
btrfs_type_to_imode(u8 type
)
2375 static u32 imode_by_btrfs_type
[] = {
2376 [BTRFS_FT_REG_FILE
] = S_IFREG
,
2377 [BTRFS_FT_DIR
] = S_IFDIR
,
2378 [BTRFS_FT_CHRDEV
] = S_IFCHR
,
2379 [BTRFS_FT_BLKDEV
] = S_IFBLK
,
2380 [BTRFS_FT_FIFO
] = S_IFIFO
,
2381 [BTRFS_FT_SOCK
] = S_IFSOCK
,
2382 [BTRFS_FT_SYMLINK
] = S_IFLNK
,
2385 return imode_by_btrfs_type
[(type
)];
2388 static int repair_inode_no_item(struct btrfs_trans_handle
*trans
,
2389 struct btrfs_root
*root
,
2390 struct btrfs_path
*path
,
2391 struct inode_record
*rec
)
2395 int type_recovered
= 0;
2398 printf("Trying to rebuild inode:%llu\n", rec
->ino
);
2400 type_recovered
= !find_file_type(rec
, &filetype
);
2403 * Try to determine inode type if type not found.
2405 * For found regular file extent, it must be FILE.
2406 * For found dir_item/index, it must be DIR.
2408 * For undetermined one, use FILE as fallback.
2411 * 1. If found backref(inode_index/item is already handled) to it,
2413 * Need new inode-inode ref structure to allow search for that.
2415 if (!type_recovered
) {
2416 if (rec
->found_file_extent
&&
2417 find_normal_file_extent(root
, rec
->ino
)) {
2419 filetype
= BTRFS_FT_REG_FILE
;
2420 } else if (rec
->found_dir_item
) {
2422 filetype
= BTRFS_FT_DIR
;
2423 } else if (!list_empty(&rec
->orphan_extents
)) {
2425 filetype
= BTRFS_FT_REG_FILE
;
2427 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2430 filetype
= BTRFS_FT_REG_FILE
;
2434 ret
= btrfs_new_inode(trans
, root
, rec
->ino
,
2435 mode
| btrfs_type_to_imode(filetype
));
2440 * Here inode rebuild is done, we only rebuild the inode item,
2441 * don't repair the nlink(like move to lost+found).
2442 * That is the job of nlink repair.
2444 * We just fill the record and return
2446 rec
->found_dir_item
= 1;
2447 rec
->imode
= mode
| btrfs_type_to_imode(filetype
);
2449 rec
->errors
&= ~I_ERR_NO_INODE_ITEM
;
2450 /* Ensure the inode_nlinks repair function will be called */
2451 rec
->errors
|= I_ERR_LINK_COUNT_WRONG
;
2456 static int repair_inode_orphan_extent(struct btrfs_trans_handle
*trans
,
2457 struct btrfs_root
*root
,
2458 struct btrfs_path
*path
,
2459 struct inode_record
*rec
)
2461 struct orphan_data_extent
*orphan
;
2462 struct orphan_data_extent
*tmp
;
2465 list_for_each_entry_safe(orphan
, tmp
, &rec
->orphan_extents
, list
) {
2467 * Check for conflicting file extents
2469 * Here we don't know whether the extents is compressed or not,
2470 * so we can only assume it not compressed nor data offset,
2471 * and use its disk_len as extent length.
2473 ret
= btrfs_get_extent(NULL
, root
, path
, orphan
->objectid
,
2474 orphan
->offset
, orphan
->disk_len
, 0);
2475 btrfs_release_path(path
);
2480 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2481 orphan
->disk_bytenr
, orphan
->disk_len
);
2482 ret
= btrfs_free_extent(trans
,
2483 root
->fs_info
->extent_root
,
2484 orphan
->disk_bytenr
, orphan
->disk_len
,
2485 0, root
->objectid
, orphan
->objectid
,
2490 ret
= btrfs_insert_file_extent(trans
, root
, orphan
->objectid
,
2491 orphan
->offset
, orphan
->disk_bytenr
,
2492 orphan
->disk_len
, orphan
->disk_len
);
2496 /* Update file size info */
2497 rec
->found_size
+= orphan
->disk_len
;
2498 if (rec
->found_size
== rec
->nbytes
)
2499 rec
->errors
&= ~I_ERR_FILE_NBYTES_WRONG
;
2501 /* Update the file extent hole info too */
2502 ret
= del_file_extent_hole(&rec
->holes
, orphan
->offset
,
2506 if (RB_EMPTY_ROOT(&rec
->holes
))
2507 rec
->errors
&= ~I_ERR_FILE_EXTENT_DISCOUNT
;
2509 list_del(&orphan
->list
);
2512 rec
->errors
&= ~I_ERR_FILE_EXTENT_ORPHAN
;
2517 static int repair_inode_discount_extent(struct btrfs_trans_handle
*trans
,
2518 struct btrfs_root
*root
,
2519 struct btrfs_path
*path
,
2520 struct inode_record
*rec
)
2522 struct rb_node
*node
;
2523 struct file_extent_hole
*hole
;
2527 node
= rb_first(&rec
->holes
);
2531 hole
= rb_entry(node
, struct file_extent_hole
, node
);
2532 ret
= btrfs_punch_hole(trans
, root
, rec
->ino
,
2533 hole
->start
, hole
->len
);
2536 ret
= del_file_extent_hole(&rec
->holes
, hole
->start
,
2540 if (RB_EMPTY_ROOT(&rec
->holes
))
2541 rec
->errors
&= ~I_ERR_FILE_EXTENT_DISCOUNT
;
2542 node
= rb_first(&rec
->holes
);
2544 /* special case for a file losing all its file extent */
2546 ret
= btrfs_punch_hole(trans
, root
, rec
->ino
, 0,
2547 round_up(rec
->isize
,
2548 root
->fs_info
->sectorsize
));
2552 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2553 rec
->ino
, root
->objectid
);
2558 static int repair_inline_ram_bytes(struct btrfs_trans_handle
*trans
,
2559 struct btrfs_root
*root
,
2560 struct btrfs_path
*path
,
2561 struct inode_record
*rec
)
2563 struct btrfs_key key
;
2564 struct btrfs_file_extent_item
*fi
;
2565 struct btrfs_item
*i
;
2566 u64 on_disk_item_len
;
2569 key
.objectid
= rec
->ino
;
2571 key
.type
= BTRFS_EXTENT_DATA_KEY
;
2573 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 1);
2579 i
= btrfs_item_nr(path
->slots
[0]);
2580 on_disk_item_len
= btrfs_file_extent_inline_item_len(path
->nodes
[0], i
);
2581 fi
= btrfs_item_ptr(path
->nodes
[0], path
->slots
[0],
2582 struct btrfs_file_extent_item
);
2583 btrfs_set_file_extent_ram_bytes(path
->nodes
[0], fi
, on_disk_item_len
);
2584 btrfs_mark_buffer_dirty(path
->nodes
[0]);
2585 printf("Repaired inline ram_bytes for root %llu ino %llu\n",
2586 root
->objectid
, rec
->ino
);
2587 rec
->errors
&= ~I_ERR_INLINE_RAM_BYTES_WRONG
;
2589 btrfs_release_path(path
);
2593 static int try_repair_inode(struct btrfs_root
*root
, struct inode_record
*rec
)
2595 struct btrfs_trans_handle
*trans
;
2596 struct btrfs_path path
;
2599 if (!(rec
->errors
& (I_ERR_DIR_ISIZE_WRONG
|
2600 I_ERR_NO_ORPHAN_ITEM
|
2601 I_ERR_LINK_COUNT_WRONG
|
2602 I_ERR_NO_INODE_ITEM
|
2603 I_ERR_FILE_EXTENT_ORPHAN
|
2604 I_ERR_FILE_EXTENT_DISCOUNT
|
2605 I_ERR_FILE_NBYTES_WRONG
|
2606 I_ERR_INLINE_RAM_BYTES_WRONG
)))
2610 * For nlink repair, it may create a dir and add link, so
2611 * 2 for parent(256)'s dir_index and dir_item
2612 * 2 for lost+found dir's inode_item and inode_ref
2613 * 1 for the new inode_ref of the file
2614 * 2 for lost+found dir's dir_index and dir_item for the file
2616 trans
= btrfs_start_transaction(root
, 7);
2618 return PTR_ERR(trans
);
2620 btrfs_init_path(&path
);
2621 if (rec
->errors
& I_ERR_NO_INODE_ITEM
)
2622 ret
= repair_inode_no_item(trans
, root
, &path
, rec
);
2623 if (!ret
&& rec
->errors
& I_ERR_FILE_EXTENT_ORPHAN
)
2624 ret
= repair_inode_orphan_extent(trans
, root
, &path
, rec
);
2625 if (!ret
&& rec
->errors
& I_ERR_FILE_EXTENT_DISCOUNT
)
2626 ret
= repair_inode_discount_extent(trans
, root
, &path
, rec
);
2627 if (!ret
&& rec
->errors
& I_ERR_DIR_ISIZE_WRONG
)
2628 ret
= repair_inode_isize(trans
, root
, &path
, rec
);
2629 if (!ret
&& rec
->errors
& I_ERR_NO_ORPHAN_ITEM
)
2630 ret
= repair_inode_orphan_item(trans
, root
, &path
, rec
);
2631 if (!ret
&& rec
->errors
& I_ERR_LINK_COUNT_WRONG
)
2632 ret
= repair_inode_nlinks(trans
, root
, &path
, rec
);
2633 if (!ret
&& rec
->errors
& I_ERR_FILE_NBYTES_WRONG
)
2634 ret
= repair_inode_nbytes(trans
, root
, &path
, rec
);
2635 if (!ret
&& rec
->errors
& I_ERR_INLINE_RAM_BYTES_WRONG
)
2636 ret
= repair_inline_ram_bytes(trans
, root
, &path
, rec
);
2637 btrfs_commit_transaction(trans
, root
);
2638 btrfs_release_path(&path
);
2642 static int check_inode_recs(struct btrfs_root
*root
,
2643 struct cache_tree
*inode_cache
)
2645 struct cache_extent
*cache
;
2646 struct ptr_node
*node
;
2647 struct inode_record
*rec
;
2648 struct inode_backref
*backref
;
2653 u64 root_dirid
= btrfs_root_dirid(&root
->root_item
);
2655 if (btrfs_root_refs(&root
->root_item
) == 0) {
2656 if (!cache_tree_empty(inode_cache
))
2657 fprintf(stderr
, "warning line %d\n", __LINE__
);
2662 * We need to repair backrefs first because we could change some of the
2663 * errors in the inode recs.
2665 * We also need to go through and delete invalid backrefs first and then
2666 * add the correct ones second. We do this because we may get EEXIST
2667 * when adding back the correct index because we hadn't yet deleted the
2670 * For example, if we were missing a dir index then the directories
2671 * isize would be wrong, so if we fixed the isize to what we thought it
2672 * would be and then fixed the backref we'd still have a invalid fs, so
2673 * we need to add back the dir index and then check to see if the isize
2678 if (stage
== 3 && !err
)
2681 cache
= search_cache_extent(inode_cache
, 0);
2682 while (repair
&& cache
) {
2683 node
= container_of(cache
, struct ptr_node
, cache
);
2685 cache
= next_cache_extent(cache
);
2687 /* Need to free everything up and rescan */
2689 remove_cache_extent(inode_cache
, &node
->cache
);
2691 free_inode_rec(rec
);
2695 if (list_empty(&rec
->backrefs
))
2698 ret
= repair_inode_backrefs(root
, rec
, inode_cache
,
2712 rec
= get_inode_rec(inode_cache
, root_dirid
, 0);
2713 BUG_ON(IS_ERR(rec
));
2715 ret
= check_root_dir(rec
);
2717 fprintf(stderr
, "root %llu root dir %llu error\n",
2718 (unsigned long long)root
->root_key
.objectid
,
2719 (unsigned long long)root_dirid
);
2720 print_inode_error(root
, rec
);
2725 struct btrfs_trans_handle
*trans
;
2727 trans
= btrfs_start_transaction(root
, 1);
2728 if (IS_ERR(trans
)) {
2729 err
= PTR_ERR(trans
);
2734 "root %llu missing its root dir, recreating\n",
2735 (unsigned long long)root
->objectid
);
2737 ret
= btrfs_make_root_dir(trans
, root
, root_dirid
);
2740 btrfs_commit_transaction(trans
, root
);
2744 fprintf(stderr
, "root %llu root dir %llu not found\n",
2745 (unsigned long long)root
->root_key
.objectid
,
2746 (unsigned long long)root_dirid
);
2750 cache
= search_cache_extent(inode_cache
, 0);
2753 node
= container_of(cache
, struct ptr_node
, cache
);
2755 remove_cache_extent(inode_cache
, &node
->cache
);
2757 if (rec
->ino
== root_dirid
||
2758 rec
->ino
== BTRFS_ORPHAN_OBJECTID
) {
2759 free_inode_rec(rec
);
2763 if (rec
->errors
& I_ERR_NO_ORPHAN_ITEM
) {
2764 ret
= check_orphan_item(root
, rec
->ino
);
2766 rec
->errors
&= ~I_ERR_NO_ORPHAN_ITEM
;
2767 if (can_free_inode_rec(rec
)) {
2768 free_inode_rec(rec
);
2773 if (!rec
->found_inode_item
)
2774 rec
->errors
|= I_ERR_NO_INODE_ITEM
;
2775 if (rec
->found_link
!= rec
->nlink
)
2776 rec
->errors
|= I_ERR_LINK_COUNT_WRONG
;
2778 ret
= try_repair_inode(root
, rec
);
2779 if (ret
== 0 && can_free_inode_rec(rec
)) {
2780 free_inode_rec(rec
);
2785 if (!(repair
&& ret
== 0))
2787 print_inode_error(root
, rec
);
2788 list_for_each_entry(backref
, &rec
->backrefs
, list
) {
2789 if (!backref
->found_dir_item
)
2790 backref
->errors
|= REF_ERR_NO_DIR_ITEM
;
2791 if (!backref
->found_dir_index
)
2792 backref
->errors
|= REF_ERR_NO_DIR_INDEX
;
2793 if (!backref
->found_inode_ref
)
2794 backref
->errors
|= REF_ERR_NO_INODE_REF
;
2795 fprintf(stderr
, "\tunresolved ref dir %llu index %llu"
2796 " namelen %u name %s filetype %d errors %x",
2797 (unsigned long long)backref
->dir
,
2798 (unsigned long long)backref
->index
,
2799 backref
->namelen
, backref
->name
,
2800 backref
->filetype
, backref
->errors
);
2801 print_ref_error(backref
->errors
);
2803 free_inode_rec(rec
);
2805 return (error
> 0) ? -1 : 0;
2808 static struct root_record
*get_root_rec(struct cache_tree
*root_cache
,
2811 struct cache_extent
*cache
;
2812 struct root_record
*rec
= NULL
;
2815 cache
= lookup_cache_extent(root_cache
, objectid
, 1);
2817 rec
= container_of(cache
, struct root_record
, cache
);
2819 rec
= calloc(1, sizeof(*rec
));
2821 return ERR_PTR(-ENOMEM
);
2822 rec
->objectid
= objectid
;
2823 INIT_LIST_HEAD(&rec
->backrefs
);
2824 rec
->cache
.start
= objectid
;
2825 rec
->cache
.size
= 1;
2827 ret
= insert_cache_extent(root_cache
, &rec
->cache
);
2829 return ERR_PTR(-EEXIST
);
2834 static struct root_backref
*get_root_backref(struct root_record
*rec
,
2835 u64 ref_root
, u64 dir
, u64 index
,
2836 const char *name
, int namelen
)
2838 struct root_backref
*backref
;
2840 list_for_each_entry(backref
, &rec
->backrefs
, list
) {
2841 if (backref
->ref_root
!= ref_root
|| backref
->dir
!= dir
||
2842 backref
->namelen
!= namelen
)
2844 if (memcmp(name
, backref
->name
, namelen
))
2849 backref
= calloc(1, sizeof(*backref
) + namelen
+ 1);
2852 backref
->ref_root
= ref_root
;
2854 backref
->index
= index
;
2855 backref
->namelen
= namelen
;
2856 memcpy(backref
->name
, name
, namelen
);
2857 backref
->name
[namelen
] = '\0';
2858 list_add_tail(&backref
->list
, &rec
->backrefs
);
2862 static void free_root_record(struct cache_extent
*cache
)
2864 struct root_record
*rec
;
2865 struct root_backref
*backref
;
2867 rec
= container_of(cache
, struct root_record
, cache
);
2868 while (!list_empty(&rec
->backrefs
)) {
2869 backref
= to_root_backref(rec
->backrefs
.next
);
2870 list_del(&backref
->list
);
2877 FREE_EXTENT_CACHE_BASED_TREE(root_recs
, free_root_record
);
2879 static int add_root_backref(struct cache_tree
*root_cache
,
2880 u64 root_id
, u64 ref_root
, u64 dir
, u64 index
,
2881 const char *name
, int namelen
,
2882 int item_type
, int errors
)
2884 struct root_record
*rec
;
2885 struct root_backref
*backref
;
2887 rec
= get_root_rec(root_cache
, root_id
);
2888 BUG_ON(IS_ERR(rec
));
2889 backref
= get_root_backref(rec
, ref_root
, dir
, index
, name
, namelen
);
2892 backref
->errors
|= errors
;
2894 if (item_type
!= BTRFS_DIR_ITEM_KEY
) {
2895 if (backref
->found_dir_index
|| backref
->found_back_ref
||
2896 backref
->found_forward_ref
) {
2897 if (backref
->index
!= index
)
2898 backref
->errors
|= REF_ERR_INDEX_UNMATCH
;
2900 backref
->index
= index
;
2904 if (item_type
== BTRFS_DIR_ITEM_KEY
) {
2905 if (backref
->found_forward_ref
)
2907 backref
->found_dir_item
= 1;
2908 } else if (item_type
== BTRFS_DIR_INDEX_KEY
) {
2909 backref
->found_dir_index
= 1;
2910 } else if (item_type
== BTRFS_ROOT_REF_KEY
) {
2911 if (backref
->found_forward_ref
)
2912 backref
->errors
|= REF_ERR_DUP_ROOT_REF
;
2913 else if (backref
->found_dir_item
)
2915 backref
->found_forward_ref
= 1;
2916 } else if (item_type
== BTRFS_ROOT_BACKREF_KEY
) {
2917 if (backref
->found_back_ref
)
2918 backref
->errors
|= REF_ERR_DUP_ROOT_BACKREF
;
2919 backref
->found_back_ref
= 1;
2924 if (backref
->found_forward_ref
&& backref
->found_dir_item
)
2925 backref
->reachable
= 1;
2929 static int merge_root_recs(struct btrfs_root
*root
,
2930 struct cache_tree
*src_cache
,
2931 struct cache_tree
*dst_cache
)
2933 struct cache_extent
*cache
;
2934 struct ptr_node
*node
;
2935 struct inode_record
*rec
;
2936 struct inode_backref
*backref
;
2939 if (root
->root_key
.objectid
== BTRFS_TREE_RELOC_OBJECTID
) {
2940 free_inode_recs_tree(src_cache
);
2945 cache
= search_cache_extent(src_cache
, 0);
2948 node
= container_of(cache
, struct ptr_node
, cache
);
2950 remove_cache_extent(src_cache
, &node
->cache
);
2953 ret
= is_child_root(root
, root
->objectid
, rec
->ino
);
2959 list_for_each_entry(backref
, &rec
->backrefs
, list
) {
2960 BUG_ON(backref
->found_inode_ref
);
2961 if (backref
->found_dir_item
)
2962 add_root_backref(dst_cache
, rec
->ino
,
2963 root
->root_key
.objectid
, backref
->dir
,
2964 backref
->index
, backref
->name
,
2965 backref
->namelen
, BTRFS_DIR_ITEM_KEY
,
2967 if (backref
->found_dir_index
)
2968 add_root_backref(dst_cache
, rec
->ino
,
2969 root
->root_key
.objectid
, backref
->dir
,
2970 backref
->index
, backref
->name
,
2971 backref
->namelen
, BTRFS_DIR_INDEX_KEY
,
2975 free_inode_rec(rec
);
2982 static int check_root_refs(struct btrfs_root
*root
,
2983 struct cache_tree
*root_cache
)
2985 struct root_record
*rec
;
2986 struct root_record
*ref_root
;
2987 struct root_backref
*backref
;
2988 struct cache_extent
*cache
;
2994 rec
= get_root_rec(root_cache
, BTRFS_FS_TREE_OBJECTID
);
2995 BUG_ON(IS_ERR(rec
));
2998 /* fixme: this can not detect circular references */
3001 cache
= search_cache_extent(root_cache
, 0);
3006 rec
= container_of(cache
, struct root_record
, cache
);
3007 cache
= next_cache_extent(cache
);
3009 if (rec
->found_ref
== 0)
3012 list_for_each_entry(backref
, &rec
->backrefs
, list
) {
3013 if (!backref
->reachable
)
3016 ref_root
= get_root_rec(root_cache
,
3018 BUG_ON(IS_ERR(ref_root
));
3019 if (ref_root
->found_ref
> 0)
3022 backref
->reachable
= 0;
3024 if (rec
->found_ref
== 0)
3030 cache
= search_cache_extent(root_cache
, 0);
3034 rec
= container_of(cache
, struct root_record
, cache
);
3035 cache
= next_cache_extent(cache
);
3037 if (rec
->found_ref
== 0 &&
3038 rec
->objectid
>= BTRFS_FIRST_FREE_OBJECTID
&&
3039 rec
->objectid
<= BTRFS_LAST_FREE_OBJECTID
) {
3040 ret
= check_orphan_item(root
->fs_info
->tree_root
,
3046 * If we don't have a root item then we likely just have
3047 * a dir item in a snapshot for this root but no actual
3048 * ref key or anything so it's meaningless.
3050 if (!rec
->found_root_item
)
3053 fprintf(stderr
, "fs tree %llu not referenced\n",
3054 (unsigned long long)rec
->objectid
);
3058 if (rec
->found_ref
> 0 && !rec
->found_root_item
)
3060 list_for_each_entry(backref
, &rec
->backrefs
, list
) {
3061 if (!backref
->found_dir_item
)
3062 backref
->errors
|= REF_ERR_NO_DIR_ITEM
;
3063 if (!backref
->found_dir_index
)
3064 backref
->errors
|= REF_ERR_NO_DIR_INDEX
;
3065 if (!backref
->found_back_ref
)
3066 backref
->errors
|= REF_ERR_NO_ROOT_BACKREF
;
3067 if (!backref
->found_forward_ref
)
3068 backref
->errors
|= REF_ERR_NO_ROOT_REF
;
3069 if (backref
->reachable
&& backref
->errors
)
3076 fprintf(stderr
, "fs tree %llu refs %u %s\n",
3077 (unsigned long long)rec
->objectid
, rec
->found_ref
,
3078 rec
->found_root_item
? "" : "not found");
3080 list_for_each_entry(backref
, &rec
->backrefs
, list
) {
3081 if (!backref
->reachable
)
3083 if (!backref
->errors
&& rec
->found_root_item
)
3085 fprintf(stderr
, "\tunresolved ref root %llu dir %llu"
3086 " index %llu namelen %u name %s errors %x\n",
3087 (unsigned long long)backref
->ref_root
,
3088 (unsigned long long)backref
->dir
,
3089 (unsigned long long)backref
->index
,
3090 backref
->namelen
, backref
->name
,
3092 print_ref_error(backref
->errors
);
3095 return errors
> 0 ? 1 : 0;
3098 static int process_root_ref(struct extent_buffer
*eb
, int slot
,
3099 struct btrfs_key
*key
,
3100 struct cache_tree
*root_cache
)
3106 struct btrfs_root_ref
*ref
;
3107 char namebuf
[BTRFS_NAME_LEN
];
3110 ref
= btrfs_item_ptr(eb
, slot
, struct btrfs_root_ref
);
3112 dirid
= btrfs_root_ref_dirid(eb
, ref
);
3113 index
= btrfs_root_ref_sequence(eb
, ref
);
3114 name_len
= btrfs_root_ref_name_len(eb
, ref
);
3116 if (name_len
<= BTRFS_NAME_LEN
) {
3120 len
= BTRFS_NAME_LEN
;
3121 error
= REF_ERR_NAME_TOO_LONG
;
3123 read_extent_buffer(eb
, namebuf
, (unsigned long)(ref
+ 1), len
);
3125 if (key
->type
== BTRFS_ROOT_REF_KEY
) {
3126 add_root_backref(root_cache
, key
->offset
, key
->objectid
, dirid
,
3127 index
, namebuf
, len
, key
->type
, error
);
3129 add_root_backref(root_cache
, key
->objectid
, key
->offset
, dirid
,
3130 index
, namebuf
, len
, key
->type
, error
);
3135 static void free_corrupt_block(struct cache_extent
*cache
)
3137 struct btrfs_corrupt_block
*corrupt
;
3139 corrupt
= container_of(cache
, struct btrfs_corrupt_block
, cache
);
3143 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks
, free_corrupt_block
);
3146 * Repair the btree of the given root.
3148 * The fix is to remove the node key in corrupt_blocks cache_tree.
3149 * and rebalance the tree.
3150 * After the fix, the btree should be writeable.
3152 static int repair_btree(struct btrfs_root
*root
,
3153 struct cache_tree
*corrupt_blocks
)
3155 struct btrfs_trans_handle
*trans
;
3156 struct btrfs_path path
;
3157 struct btrfs_corrupt_block
*corrupt
;
3158 struct cache_extent
*cache
;
3159 struct btrfs_key key
;
3164 if (cache_tree_empty(corrupt_blocks
))
3167 trans
= btrfs_start_transaction(root
, 1);
3168 if (IS_ERR(trans
)) {
3169 ret
= PTR_ERR(trans
);
3170 fprintf(stderr
, "Error starting transaction: %s\n",
3174 btrfs_init_path(&path
);
3175 cache
= first_cache_extent(corrupt_blocks
);
3177 corrupt
= container_of(cache
, struct btrfs_corrupt_block
,
3179 level
= corrupt
->level
;
3180 path
.lowest_level
= level
;
3181 key
.objectid
= corrupt
->key
.objectid
;
3182 key
.type
= corrupt
->key
.type
;
3183 key
.offset
= corrupt
->key
.offset
;
3186 * Here we don't want to do any tree balance, since it may
3187 * cause a balance with corrupted brother leaf/node,
3188 * so ins_len set to 0 here.
3189 * Balance will be done after all corrupt node/leaf is deleted.
3191 ret
= btrfs_search_slot(trans
, root
, &key
, &path
, 0, 1);
3194 offset
= btrfs_node_blockptr(path
.nodes
[level
],
3197 /* Remove the ptr */
3198 ret
= btrfs_del_ptr(root
, &path
, level
, path
.slots
[level
]);
3202 * Remove the corresponding extent
3203 * return value is not concerned.
3205 btrfs_release_path(&path
);
3206 ret
= btrfs_free_extent(trans
, root
, offset
,
3207 root
->fs_info
->nodesize
, 0,
3208 root
->root_key
.objectid
, level
- 1, 0);
3209 cache
= next_cache_extent(cache
);
3212 /* Balance the btree using btrfs_search_slot() */
3213 cache
= first_cache_extent(corrupt_blocks
);
3215 corrupt
= container_of(cache
, struct btrfs_corrupt_block
,
3217 memcpy(&key
, &corrupt
->key
, sizeof(key
));
3218 ret
= btrfs_search_slot(trans
, root
, &key
, &path
, -1, 1);
3221 /* return will always >0 since it won't find the item */
3223 btrfs_release_path(&path
);
3224 cache
= next_cache_extent(cache
);
3227 btrfs_commit_transaction(trans
, root
);
3228 btrfs_release_path(&path
);
3232 static int check_fs_root(struct btrfs_root
*root
,
3233 struct cache_tree
*root_cache
,
3234 struct walk_control
*wc
)
3240 struct btrfs_path path
;
3241 struct shared_node root_node
;
3242 struct root_record
*rec
;
3243 struct btrfs_root_item
*root_item
= &root
->root_item
;
3244 struct cache_tree corrupt_blocks
;
3245 struct orphan_data_extent
*orphan
;
3246 struct orphan_data_extent
*tmp
;
3247 enum btrfs_tree_block_status status
;
3248 struct node_refs nrefs
;
3251 * Reuse the corrupt_block cache tree to record corrupted tree block
3253 * Unlike the usage in extent tree check, here we do it in a per
3254 * fs/subvol tree base.
3256 cache_tree_init(&corrupt_blocks
);
3257 root
->fs_info
->corrupt_blocks
= &corrupt_blocks
;
3259 if (root
->root_key
.objectid
!= BTRFS_TREE_RELOC_OBJECTID
) {
3260 rec
= get_root_rec(root_cache
, root
->root_key
.objectid
);
3261 BUG_ON(IS_ERR(rec
));
3262 if (btrfs_root_refs(root_item
) > 0)
3263 rec
->found_root_item
= 1;
3266 btrfs_init_path(&path
);
3267 memset(&root_node
, 0, sizeof(root_node
));
3268 cache_tree_init(&root_node
.root_cache
);
3269 cache_tree_init(&root_node
.inode_cache
);
3270 memset(&nrefs
, 0, sizeof(nrefs
));
3272 /* Move the orphan extent record to corresponding inode_record */
3273 list_for_each_entry_safe(orphan
, tmp
,
3274 &root
->orphan_data_extents
, list
) {
3275 struct inode_record
*inode
;
3277 inode
= get_inode_rec(&root_node
.inode_cache
, orphan
->objectid
,
3279 BUG_ON(IS_ERR(inode
));
3280 inode
->errors
|= I_ERR_FILE_EXTENT_ORPHAN
;
3281 list_move(&orphan
->list
, &inode
->orphan_extents
);
3284 level
= btrfs_header_level(root
->node
);
3285 memset(wc
->nodes
, 0, sizeof(wc
->nodes
));
3286 wc
->nodes
[level
] = &root_node
;
3287 wc
->active_node
= level
;
3288 wc
->root_level
= level
;
3290 /* We may not have checked the root block, lets do that now */
3291 if (btrfs_is_leaf(root
->node
))
3292 status
= btrfs_check_leaf(root
, NULL
, root
->node
);
3294 status
= btrfs_check_node(root
, NULL
, root
->node
);
3295 if (status
!= BTRFS_TREE_BLOCK_CLEAN
)
3298 if (btrfs_root_refs(root_item
) > 0 ||
3299 btrfs_disk_key_objectid(&root_item
->drop_progress
) == 0) {
3300 path
.nodes
[level
] = root
->node
;
3301 extent_buffer_get(root
->node
);
3302 path
.slots
[level
] = 0;
3304 struct btrfs_key key
;
3305 struct btrfs_disk_key found_key
;
3307 btrfs_disk_key_to_cpu(&key
, &root_item
->drop_progress
);
3308 level
= root_item
->drop_level
;
3309 path
.lowest_level
= level
;
3310 if (level
> btrfs_header_level(root
->node
) ||
3311 level
>= BTRFS_MAX_LEVEL
) {
3312 error("ignoring invalid drop level: %u", level
);
3315 wret
= btrfs_search_slot(NULL
, root
, &key
, &path
, 0, 0);
3318 btrfs_node_key(path
.nodes
[level
], &found_key
,
3320 WARN_ON(memcmp(&found_key
, &root_item
->drop_progress
,
3321 sizeof(found_key
)));
3326 wret
= walk_down_tree(root
, &path
, wc
, &level
, &nrefs
);
3332 wret
= walk_up_tree(root
, &path
, wc
, &level
);
3339 btrfs_release_path(&path
);
3341 if (!cache_tree_empty(&corrupt_blocks
)) {
3342 struct cache_extent
*cache
;
3343 struct btrfs_corrupt_block
*corrupt
;
3345 printf("The following tree block(s) is corrupted in tree %llu:\n",
3346 root
->root_key
.objectid
);
3347 cache
= first_cache_extent(&corrupt_blocks
);
3349 corrupt
= container_of(cache
,
3350 struct btrfs_corrupt_block
,
3352 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3353 cache
->start
, corrupt
->level
,
3354 corrupt
->key
.objectid
, corrupt
->key
.type
,
3355 corrupt
->key
.offset
);
3356 cache
= next_cache_extent(cache
);
3359 printf("Try to repair the btree for root %llu\n",
3360 root
->root_key
.objectid
);
3361 ret
= repair_btree(root
, &corrupt_blocks
);
3363 fprintf(stderr
, "Failed to repair btree: %s\n",
3366 printf("Btree for root %llu is fixed\n",
3367 root
->root_key
.objectid
);
3371 err
= merge_root_recs(root
, &root_node
.root_cache
, root_cache
);
3375 if (root_node
.current
) {
3376 root_node
.current
->checked
= 1;
3377 maybe_free_inode_rec(&root_node
.inode_cache
,
3381 err
= check_inode_recs(root
, &root_node
.inode_cache
);
3385 free_corrupt_blocks_tree(&corrupt_blocks
);
3386 root
->fs_info
->corrupt_blocks
= NULL
;
3387 free_orphan_data_extents(&root
->orphan_data_extents
);
3391 static int check_fs_roots(struct btrfs_fs_info
*fs_info
,
3392 struct cache_tree
*root_cache
)
3394 struct btrfs_path path
;
3395 struct btrfs_key key
;
3396 struct walk_control wc
;
3397 struct extent_buffer
*leaf
, *tree_node
;
3398 struct btrfs_root
*tmp_root
;
3399 struct btrfs_root
*tree_root
= fs_info
->tree_root
;
3405 * Just in case we made any changes to the extent tree that weren't
3406 * reflected into the free space cache yet.
3409 reset_cached_block_groups(fs_info
);
3410 memset(&wc
, 0, sizeof(wc
));
3411 cache_tree_init(&wc
.shared
);
3412 btrfs_init_path(&path
);
3417 key
.objectid
= skip_root
+ 1;
3420 key
.type
= BTRFS_ROOT_ITEM_KEY
;
3421 ret
= btrfs_search_slot(NULL
, tree_root
, &key
, &path
, 0, 0);
3426 tree_node
= tree_root
->node
;
3429 if (tree_node
!= tree_root
->node
) {
3430 free_root_recs_tree(root_cache
);
3431 btrfs_release_path(&path
);
3434 leaf
= path
.nodes
[0];
3435 if (path
.slots
[0] >= btrfs_header_nritems(leaf
)) {
3436 ret
= btrfs_next_leaf(tree_root
, &path
);
3442 leaf
= path
.nodes
[0];
3444 btrfs_item_key_to_cpu(leaf
, &key
, path
.slots
[0]);
3445 if (key
.type
== BTRFS_ROOT_ITEM_KEY
&&
3446 fs_root_objectid(key
.objectid
)) {
3447 if (key
.objectid
== BTRFS_TREE_RELOC_OBJECTID
) {
3448 tmp_root
= btrfs_read_fs_root_no_cache(
3451 key
.offset
= (u64
)-1;
3452 tmp_root
= btrfs_read_fs_root(
3455 if (IS_ERR(tmp_root
)) {
3459 ret
= check_fs_root(tmp_root
, root_cache
, &wc
);
3460 if (ret
== -EAGAIN
) {
3461 free_root_recs_tree(root_cache
);
3462 btrfs_release_path(&path
);
3469 * We failed to repair this root but modified
3470 * tree root, after again: label we will still
3471 * hit this root and fail to repair, so we must
3472 * skip it to avoid infinite loop.
3475 skip_root
= key
.objectid
;
3477 if (key
.objectid
== BTRFS_TREE_RELOC_OBJECTID
)
3478 btrfs_free_fs_root(tmp_root
);
3479 } else if (key
.type
== BTRFS_ROOT_REF_KEY
||
3480 key
.type
== BTRFS_ROOT_BACKREF_KEY
) {
3481 process_root_ref(leaf
, path
.slots
[0], &key
,
3488 btrfs_release_path(&path
);
3490 free_extent_cache_tree(&wc
.shared
);
3491 if (!cache_tree_empty(&wc
.shared
))
3492 fprintf(stderr
, "warning line %d\n", __LINE__
);
3497 static struct tree_backref
*find_tree_backref(struct extent_record
*rec
,
3498 u64 parent
, u64 root
)
3500 struct rb_node
*node
;
3501 struct tree_backref
*back
= NULL
;
3502 struct tree_backref match
= {
3509 match
.parent
= parent
;
3510 match
.node
.full_backref
= 1;
3515 node
= rb_search(&rec
->backref_tree
, &match
.node
.node
,
3516 (rb_compare_keys
)compare_extent_backref
, NULL
);
3518 back
= to_tree_backref(rb_node_to_extent_backref(node
));
3523 static struct data_backref
*find_data_backref(struct extent_record
*rec
,
3524 u64 parent
, u64 root
,
3525 u64 owner
, u64 offset
,
3527 u64 disk_bytenr
, u64 bytes
)
3529 struct rb_node
*node
;
3530 struct data_backref
*back
= NULL
;
3531 struct data_backref match
= {
3538 .found_ref
= found_ref
,
3539 .disk_bytenr
= disk_bytenr
,
3543 match
.parent
= parent
;
3544 match
.node
.full_backref
= 1;
3549 node
= rb_search(&rec
->backref_tree
, &match
.node
.node
,
3550 (rb_compare_keys
)compare_extent_backref
, NULL
);
3552 back
= to_data_backref(rb_node_to_extent_backref(node
));
3557 static int do_check_fs_roots(struct btrfs_fs_info
*fs_info
,
3558 struct cache_tree
*root_cache
)
3562 if (check_mode
== CHECK_MODE_LOWMEM
)
3563 ret
= check_fs_roots_lowmem(fs_info
);
3565 ret
= check_fs_roots(fs_info
, root_cache
);
3570 static int all_backpointers_checked(struct extent_record
*rec
, int print_errs
)
3572 struct extent_backref
*back
, *tmp
;
3573 struct tree_backref
*tback
;
3574 struct data_backref
*dback
;
3578 rbtree_postorder_for_each_entry_safe(back
, tmp
,
3579 &rec
->backref_tree
, node
) {
3580 if (!back
->found_extent_tree
) {
3584 if (back
->is_data
) {
3585 dback
= to_data_backref(back
);
3587 "data backref %llu %s %llu owner %llu offset %llu num_refs %lu not found in extent tree\n",
3588 (unsigned long long)rec
->start
,
3589 back
->full_backref
?
3591 back
->full_backref
?
3592 (unsigned long long)dback
->parent
:
3593 (unsigned long long)dback
->root
,
3594 (unsigned long long)dback
->owner
,
3595 (unsigned long long)dback
->offset
,
3596 (unsigned long)dback
->num_refs
);
3598 tback
= to_tree_backref(back
);
3600 "tree backref %llu parent %llu root %llu not found in extent tree\n",
3601 (unsigned long long)rec
->start
,
3602 (unsigned long long)tback
->parent
,
3603 (unsigned long long)tback
->root
);
3606 if (!back
->is_data
&& !back
->found_ref
) {
3610 tback
= to_tree_backref(back
);
3612 "backref %llu %s %llu not referenced back %p\n",
3613 (unsigned long long)rec
->start
,
3614 back
->full_backref
? "parent" : "root",
3615 back
->full_backref
?
3616 (unsigned long long)tback
->parent
:
3617 (unsigned long long)tback
->root
, back
);
3619 if (back
->is_data
) {
3620 dback
= to_data_backref(back
);
3621 if (dback
->found_ref
!= dback
->num_refs
) {
3626 "incorrect local backref count on %llu %s %llu owner %llu offset %llu found %u wanted %u back %p\n",
3627 (unsigned long long)rec
->start
,
3628 back
->full_backref
?
3630 back
->full_backref
?
3631 (unsigned long long)dback
->parent
:
3632 (unsigned long long)dback
->root
,
3633 (unsigned long long)dback
->owner
,
3634 (unsigned long long)dback
->offset
,
3635 dback
->found_ref
, dback
->num_refs
,
3638 if (dback
->disk_bytenr
!= rec
->start
) {
3643 "backref disk bytenr does not match extent record, bytenr=%llu, ref bytenr=%llu\n",
3644 (unsigned long long)rec
->start
,
3645 (unsigned long long)dback
->disk_bytenr
);
3648 if (dback
->bytes
!= rec
->nr
) {
3653 "backref bytes do not match extent backref, bytenr=%llu, ref bytes=%llu, backref bytes=%llu\n",
3654 (unsigned long long)rec
->start
,
3655 (unsigned long long)rec
->nr
,
3656 (unsigned long long)dback
->bytes
);
3659 if (!back
->is_data
) {
3662 dback
= to_data_backref(back
);
3663 found
+= dback
->found_ref
;
3666 if (found
!= rec
->refs
) {
3671 "incorrect global backref count on %llu found %llu wanted %llu\n",
3672 (unsigned long long)rec
->start
,
3673 (unsigned long long)found
,
3674 (unsigned long long)rec
->refs
);
3680 static void __free_one_backref(struct rb_node
*node
)
3682 struct extent_backref
*back
= rb_node_to_extent_backref(node
);
3687 static void free_all_extent_backrefs(struct extent_record
*rec
)
3689 rb_free_nodes(&rec
->backref_tree
, __free_one_backref
);
3692 static void free_extent_record_cache(struct cache_tree
*extent_cache
)
3694 struct cache_extent
*cache
;
3695 struct extent_record
*rec
;
3698 cache
= first_cache_extent(extent_cache
);
3701 rec
= container_of(cache
, struct extent_record
, cache
);
3702 remove_cache_extent(extent_cache
, cache
);
3703 free_all_extent_backrefs(rec
);
3708 static int maybe_free_extent_rec(struct cache_tree
*extent_cache
,
3709 struct extent_record
*rec
)
3711 if (rec
->content_checked
&& rec
->owner_ref_checked
&&
3712 rec
->extent_item_refs
== rec
->refs
&& rec
->refs
> 0 &&
3713 rec
->num_duplicates
== 0 && !all_backpointers_checked(rec
, 0) &&
3714 !rec
->bad_full_backref
&& !rec
->crossing_stripes
&&
3715 !rec
->wrong_chunk_type
) {
3716 remove_cache_extent(extent_cache
, &rec
->cache
);
3717 free_all_extent_backrefs(rec
);
3718 list_del_init(&rec
->list
);
3724 static int check_owner_ref(struct btrfs_root
*root
,
3725 struct extent_record
*rec
,
3726 struct extent_buffer
*buf
)
3728 struct extent_backref
*node
, *tmp
;
3729 struct tree_backref
*back
;
3730 struct btrfs_root
*ref_root
;
3731 struct btrfs_key key
;
3732 struct btrfs_path path
;
3733 struct extent_buffer
*parent
;
3738 rbtree_postorder_for_each_entry_safe(node
, tmp
,
3739 &rec
->backref_tree
, node
) {
3742 if (!node
->found_ref
)
3744 if (node
->full_backref
)
3746 back
= to_tree_backref(node
);
3747 if (btrfs_header_owner(buf
) == back
->root
)
3751 * Some unexpected root item referring to this one, return 1 to
3752 * indicate owner not found
3757 /* try to find the block by search corresponding fs tree */
3758 key
.objectid
= btrfs_header_owner(buf
);
3759 key
.type
= BTRFS_ROOT_ITEM_KEY
;
3760 key
.offset
= (u64
)-1;
3762 ref_root
= btrfs_read_fs_root(root
->fs_info
, &key
);
3763 if (IS_ERR(ref_root
))
3766 level
= btrfs_header_level(buf
);
3768 btrfs_item_key_to_cpu(buf
, &key
, 0);
3770 btrfs_node_key_to_cpu(buf
, &key
, 0);
3772 btrfs_init_path(&path
);
3773 path
.lowest_level
= level
+ 1;
3774 ret
= btrfs_search_slot(NULL
, ref_root
, &key
, &path
, 0, 0);
3778 parent
= path
.nodes
[level
+ 1];
3779 if (parent
&& buf
->start
== btrfs_node_blockptr(parent
,
3780 path
.slots
[level
+ 1]))
3783 btrfs_release_path(&path
);
3784 return found
? 0 : 1;
3787 static int is_extent_tree_record(struct extent_record
*rec
)
3789 struct extent_backref
*node
, *tmp
;
3790 struct tree_backref
*back
;
3793 rbtree_postorder_for_each_entry_safe(node
, tmp
,
3794 &rec
->backref_tree
, node
) {
3797 back
= to_tree_backref(node
);
3798 if (node
->full_backref
)
3800 if (back
->root
== BTRFS_EXTENT_TREE_OBJECTID
)
3807 static int record_bad_block_io(struct btrfs_fs_info
*info
,
3808 struct cache_tree
*extent_cache
,
3811 struct extent_record
*rec
;
3812 struct cache_extent
*cache
;
3813 struct btrfs_key key
;
3815 cache
= lookup_cache_extent(extent_cache
, start
, len
);
3819 rec
= container_of(cache
, struct extent_record
, cache
);
3820 if (!is_extent_tree_record(rec
))
3823 btrfs_disk_key_to_cpu(&key
, &rec
->parent_key
);
3824 return btrfs_add_corrupt_extent_record(info
, &key
, start
, len
, 0);
3827 static int swap_values(struct btrfs_root
*root
, struct btrfs_path
*path
,
3828 struct extent_buffer
*buf
, int slot
)
3830 if (btrfs_header_level(buf
)) {
3831 struct btrfs_key_ptr ptr1
, ptr2
;
3833 read_extent_buffer(buf
, &ptr1
, btrfs_node_key_ptr_offset(slot
),
3834 sizeof(struct btrfs_key_ptr
));
3835 read_extent_buffer(buf
, &ptr2
,
3836 btrfs_node_key_ptr_offset(slot
+ 1),
3837 sizeof(struct btrfs_key_ptr
));
3838 write_extent_buffer(buf
, &ptr1
,
3839 btrfs_node_key_ptr_offset(slot
+ 1),
3840 sizeof(struct btrfs_key_ptr
));
3841 write_extent_buffer(buf
, &ptr2
,
3842 btrfs_node_key_ptr_offset(slot
),
3843 sizeof(struct btrfs_key_ptr
));
3845 struct btrfs_disk_key key
;
3847 btrfs_node_key(buf
, &key
, 0);
3848 btrfs_fixup_low_keys(root
, path
, &key
,
3849 btrfs_header_level(buf
) + 1);
3852 struct btrfs_item
*item1
, *item2
;
3853 struct btrfs_key k1
, k2
;
3854 char *item1_data
, *item2_data
;
3855 u32 item1_offset
, item2_offset
, item1_size
, item2_size
;
3857 item1
= btrfs_item_nr(slot
);
3858 item2
= btrfs_item_nr(slot
+ 1);
3859 btrfs_item_key_to_cpu(buf
, &k1
, slot
);
3860 btrfs_item_key_to_cpu(buf
, &k2
, slot
+ 1);
3861 item1_offset
= btrfs_item_offset(buf
, item1
);
3862 item2_offset
= btrfs_item_offset(buf
, item2
);
3863 item1_size
= btrfs_item_size(buf
, item1
);
3864 item2_size
= btrfs_item_size(buf
, item2
);
3866 item1_data
= malloc(item1_size
);
3869 item2_data
= malloc(item2_size
);
3875 read_extent_buffer(buf
, item1_data
, item1_offset
, item1_size
);
3876 read_extent_buffer(buf
, item2_data
, item2_offset
, item2_size
);
3878 write_extent_buffer(buf
, item1_data
, item2_offset
, item2_size
);
3879 write_extent_buffer(buf
, item2_data
, item1_offset
, item1_size
);
3883 btrfs_set_item_offset(buf
, item1
, item2_offset
);
3884 btrfs_set_item_offset(buf
, item2
, item1_offset
);
3885 btrfs_set_item_size(buf
, item1
, item2_size
);
3886 btrfs_set_item_size(buf
, item2
, item1_size
);
3888 path
->slots
[0] = slot
;
3889 btrfs_set_item_key_unsafe(root
, path
, &k2
);
3890 path
->slots
[0] = slot
+ 1;
3891 btrfs_set_item_key_unsafe(root
, path
, &k1
);
3896 static int fix_key_order(struct btrfs_root
*root
, struct btrfs_path
*path
)
3898 struct extent_buffer
*buf
;
3899 struct btrfs_key k1
, k2
;
3901 int level
= path
->lowest_level
;
3904 buf
= path
->nodes
[level
];
3905 for (i
= 0; i
< btrfs_header_nritems(buf
) - 1; i
++) {
3907 btrfs_node_key_to_cpu(buf
, &k1
, i
);
3908 btrfs_node_key_to_cpu(buf
, &k2
, i
+ 1);
3910 btrfs_item_key_to_cpu(buf
, &k1
, i
);
3911 btrfs_item_key_to_cpu(buf
, &k2
, i
+ 1);
3913 if (btrfs_comp_cpu_keys(&k1
, &k2
) < 0)
3915 ret
= swap_values(root
, path
, buf
, i
);
3918 btrfs_mark_buffer_dirty(buf
);
3924 static int delete_bogus_item(struct btrfs_root
*root
,
3925 struct btrfs_path
*path
,
3926 struct extent_buffer
*buf
, int slot
)
3928 struct btrfs_key key
;
3929 int nritems
= btrfs_header_nritems(buf
);
3931 btrfs_item_key_to_cpu(buf
, &key
, slot
);
3933 /* These are all the keys we can deal with missing. */
3934 if (key
.type
!= BTRFS_DIR_INDEX_KEY
&&
3935 key
.type
!= BTRFS_EXTENT_ITEM_KEY
&&
3936 key
.type
!= BTRFS_METADATA_ITEM_KEY
&&
3937 key
.type
!= BTRFS_TREE_BLOCK_REF_KEY
&&
3938 key
.type
!= BTRFS_EXTENT_DATA_REF_KEY
)
3941 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
3942 (unsigned long long)key
.objectid
, key
.type
,
3943 (unsigned long long)key
.offset
, slot
, buf
->start
);
3944 memmove_extent_buffer(buf
, btrfs_item_nr_offset(slot
),
3945 btrfs_item_nr_offset(slot
+ 1),
3946 sizeof(struct btrfs_item
) *
3947 (nritems
- slot
- 1));
3948 btrfs_set_header_nritems(buf
, nritems
- 1);
3950 struct btrfs_disk_key disk_key
;
3952 btrfs_item_key(buf
, &disk_key
, 0);
3953 btrfs_fixup_low_keys(root
, path
, &disk_key
, 1);
3955 btrfs_mark_buffer_dirty(buf
);
3959 static int fix_item_offset(struct btrfs_root
*root
, struct btrfs_path
*path
)
3961 struct extent_buffer
*buf
;
3965 /* We should only get this for leaves */
3966 BUG_ON(path
->lowest_level
);
3967 buf
= path
->nodes
[0];
3969 for (i
= 0; i
< btrfs_header_nritems(buf
); i
++) {
3970 unsigned int shift
= 0, offset
;
3972 if (i
== 0 && btrfs_item_end_nr(buf
, i
) !=
3973 BTRFS_LEAF_DATA_SIZE(root
->fs_info
)) {
3974 if (btrfs_item_end_nr(buf
, i
) >
3975 BTRFS_LEAF_DATA_SIZE(root
->fs_info
)) {
3976 ret
= delete_bogus_item(root
, path
, buf
, i
);
3980 "item is off the end of the leaf, can't fix\n");
3984 shift
= BTRFS_LEAF_DATA_SIZE(root
->fs_info
) -
3985 btrfs_item_end_nr(buf
, i
);
3986 } else if (i
> 0 && btrfs_item_end_nr(buf
, i
) !=
3987 btrfs_item_offset_nr(buf
, i
- 1)) {
3988 if (btrfs_item_end_nr(buf
, i
) >
3989 btrfs_item_offset_nr(buf
, i
- 1)) {
3990 ret
= delete_bogus_item(root
, path
, buf
, i
);
3993 fprintf(stderr
, "items overlap, can't fix\n");
3997 shift
= btrfs_item_offset_nr(buf
, i
- 1) -
3998 btrfs_item_end_nr(buf
, i
);
4003 printf("Shifting item nr %d by %u bytes in block %llu\n",
4004 i
, shift
, (unsigned long long)buf
->start
);
4005 offset
= btrfs_item_offset_nr(buf
, i
);
4006 memmove_extent_buffer(buf
,
4007 btrfs_leaf_data(buf
) + offset
+ shift
,
4008 btrfs_leaf_data(buf
) + offset
,
4009 btrfs_item_size_nr(buf
, i
));
4010 btrfs_set_item_offset(buf
, btrfs_item_nr(i
),
4012 btrfs_mark_buffer_dirty(buf
);
4016 * We may have moved things, in which case we want to exit so we don't
4017 * write those changes out. Once we have proper abort functionality in
4018 * progs this can be changed to something nicer.
4025 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4026 * then just return -EIO.
4028 static int try_to_fix_bad_block(struct btrfs_root
*root
,
4029 struct extent_buffer
*buf
,
4030 enum btrfs_tree_block_status status
)
4032 struct btrfs_trans_handle
*trans
;
4033 struct ulist
*roots
;
4034 struct ulist_node
*node
;
4035 struct btrfs_root
*search_root
;
4036 struct btrfs_path path
;
4037 struct ulist_iterator iter
;
4038 struct btrfs_key root_key
, key
;
4041 if (status
!= BTRFS_TREE_BLOCK_BAD_KEY_ORDER
&&
4042 status
!= BTRFS_TREE_BLOCK_INVALID_OFFSETS
)
4045 ret
= btrfs_find_all_roots(NULL
, root
->fs_info
, buf
->start
, 0, &roots
);
4049 btrfs_init_path(&path
);
4050 ULIST_ITER_INIT(&iter
);
4051 while ((node
= ulist_next(roots
, &iter
))) {
4052 root_key
.objectid
= node
->val
;
4053 root_key
.type
= BTRFS_ROOT_ITEM_KEY
;
4054 root_key
.offset
= (u64
)-1;
4056 search_root
= btrfs_read_fs_root(root
->fs_info
, &root_key
);
4063 trans
= btrfs_start_transaction(search_root
, 0);
4064 if (IS_ERR(trans
)) {
4065 ret
= PTR_ERR(trans
);
4069 path
.lowest_level
= btrfs_header_level(buf
);
4070 path
.skip_check_block
= 1;
4071 if (path
.lowest_level
)
4072 btrfs_node_key_to_cpu(buf
, &key
, 0);
4074 btrfs_item_key_to_cpu(buf
, &key
, 0);
4075 ret
= btrfs_search_slot(trans
, search_root
, &key
, &path
, 0, 1);
4078 btrfs_commit_transaction(trans
, search_root
);
4081 if (status
== BTRFS_TREE_BLOCK_BAD_KEY_ORDER
)
4082 ret
= fix_key_order(search_root
, &path
);
4083 else if (status
== BTRFS_TREE_BLOCK_INVALID_OFFSETS
)
4084 ret
= fix_item_offset(search_root
, &path
);
4086 btrfs_commit_transaction(trans
, search_root
);
4089 btrfs_release_path(&path
);
4090 btrfs_commit_transaction(trans
, search_root
);
4093 btrfs_release_path(&path
);
4097 static int check_block(struct btrfs_root
*root
,
4098 struct cache_tree
*extent_cache
,
4099 struct extent_buffer
*buf
, u64 flags
)
4101 struct extent_record
*rec
;
4102 struct cache_extent
*cache
;
4103 struct btrfs_key key
;
4104 enum btrfs_tree_block_status status
;
4108 cache
= lookup_cache_extent(extent_cache
, buf
->start
, buf
->len
);
4111 rec
= container_of(cache
, struct extent_record
, cache
);
4112 rec
->generation
= btrfs_header_generation(buf
);
4114 level
= btrfs_header_level(buf
);
4115 if (btrfs_header_nritems(buf
) > 0) {
4118 btrfs_item_key_to_cpu(buf
, &key
, 0);
4120 btrfs_node_key_to_cpu(buf
, &key
, 0);
4122 rec
->info_objectid
= key
.objectid
;
4124 rec
->info_level
= level
;
4126 if (btrfs_is_leaf(buf
))
4127 status
= btrfs_check_leaf(root
, &rec
->parent_key
, buf
);
4129 status
= btrfs_check_node(root
, &rec
->parent_key
, buf
);
4131 if (status
!= BTRFS_TREE_BLOCK_CLEAN
) {
4133 status
= try_to_fix_bad_block(root
, buf
, status
);
4134 if (status
!= BTRFS_TREE_BLOCK_CLEAN
) {
4136 fprintf(stderr
, "bad block %llu\n",
4137 (unsigned long long)buf
->start
);
4140 * Signal to callers we need to start the scan over
4141 * again since we'll have cowed blocks.
4146 rec
->content_checked
= 1;
4147 if (flags
& BTRFS_BLOCK_FLAG_FULL_BACKREF
)
4148 rec
->owner_ref_checked
= 1;
4150 ret
= check_owner_ref(root
, rec
, buf
);
4152 rec
->owner_ref_checked
= 1;
4156 maybe_free_extent_rec(extent_cache
, rec
);
4161 static struct tree_backref
*find_tree_backref(struct extent_record
*rec
,
4162 u64 parent
, u64 root
)
4164 struct list_head
*cur
= rec
->backrefs
.next
;
4165 struct extent_backref
*node
;
4166 struct tree_backref
*back
;
4168 while (cur
!= &rec
->backrefs
) {
4169 node
= to_extent_backref(cur
);
4173 back
= to_tree_backref(node
);
4175 if (!node
->full_backref
)
4177 if (parent
== back
->parent
)
4180 if (node
->full_backref
)
4182 if (back
->root
== root
)
4190 static struct tree_backref
*alloc_tree_backref(struct extent_record
*rec
,
4191 u64 parent
, u64 root
)
4193 struct tree_backref
*ref
= malloc(sizeof(*ref
));
4197 memset(&ref
->node
, 0, sizeof(ref
->node
));
4199 ref
->parent
= parent
;
4200 ref
->node
.full_backref
= 1;
4203 ref
->node
.full_backref
= 0;
4210 static struct data_backref
*find_data_backref(struct extent_record
*rec
,
4211 u64 parent
, u64 root
,
4212 u64 owner
, u64 offset
,
4214 u64 disk_bytenr
, u64 bytes
)
4216 struct list_head
*cur
= rec
->backrefs
.next
;
4217 struct extent_backref
*node
;
4218 struct data_backref
*back
;
4220 while (cur
!= &rec
->backrefs
) {
4221 node
= to_extent_backref(cur
);
4225 back
= to_data_backref(node
);
4227 if (!node
->full_backref
)
4229 if (parent
== back
->parent
)
4232 if (node
->full_backref
)
4234 if (back
->root
== root
&& back
->owner
== owner
&&
4235 back
->offset
== offset
) {
4236 if (found_ref
&& node
->found_ref
&&
4237 (back
->bytes
!= bytes
||
4238 back
->disk_bytenr
!= disk_bytenr
))
4248 static struct data_backref
*alloc_data_backref(struct extent_record
*rec
,
4249 u64 parent
, u64 root
,
4250 u64 owner
, u64 offset
,
4253 struct data_backref
*ref
= malloc(sizeof(*ref
));
4257 memset(&ref
->node
, 0, sizeof(ref
->node
));
4258 ref
->node
.is_data
= 1;
4261 ref
->parent
= parent
;
4264 ref
->node
.full_backref
= 1;
4268 ref
->offset
= offset
;
4269 ref
->node
.full_backref
= 0;
4271 ref
->bytes
= max_size
;
4274 if (max_size
> rec
->max_size
)
4275 rec
->max_size
= max_size
;
4279 /* Check if the type of extent matches with its chunk */
4280 static void check_extent_type(struct extent_record
*rec
)
4282 struct btrfs_block_group_cache
*bg_cache
;
4284 bg_cache
= btrfs_lookup_first_block_group(global_info
, rec
->start
);
4288 /* data extent, check chunk directly*/
4289 if (!rec
->metadata
) {
4290 if (!(bg_cache
->flags
& BTRFS_BLOCK_GROUP_DATA
))
4291 rec
->wrong_chunk_type
= 1;
4295 /* metadata extent, check the obvious case first */
4296 if (!(bg_cache
->flags
& (BTRFS_BLOCK_GROUP_SYSTEM
|
4297 BTRFS_BLOCK_GROUP_METADATA
))) {
4298 rec
->wrong_chunk_type
= 1;
4303 * Check SYSTEM extent, as it's also marked as metadata, we can only
4304 * make sure it's a SYSTEM extent by its backref
4306 if (!RB_EMPTY_ROOT(&rec
->backref_tree
)) {
4307 struct extent_backref
*node
;
4308 struct tree_backref
*tback
;
4311 node
= rb_node_to_extent_backref(rb_first(&rec
->backref_tree
));
4312 if (node
->is_data
) {
4313 /* tree block shouldn't have data backref */
4314 rec
->wrong_chunk_type
= 1;
4317 tback
= container_of(node
, struct tree_backref
, node
);
4319 if (tback
->root
== BTRFS_CHUNK_TREE_OBJECTID
)
4320 bg_type
= BTRFS_BLOCK_GROUP_SYSTEM
;
4322 bg_type
= BTRFS_BLOCK_GROUP_METADATA
;
4323 if (!(bg_cache
->flags
& bg_type
))
4324 rec
->wrong_chunk_type
= 1;
4329 * Allocate a new extent record, fill default values from @tmpl and insert int
4330 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4331 * the cache, otherwise it fails.
4333 static int add_extent_rec_nolookup(struct cache_tree
*extent_cache
,
4334 struct extent_record
*tmpl
)
4336 struct extent_record
*rec
;
4339 BUG_ON(tmpl
->max_size
== 0);
4340 rec
= malloc(sizeof(*rec
));
4343 rec
->start
= tmpl
->start
;
4344 rec
->max_size
= tmpl
->max_size
;
4345 rec
->nr
= max(tmpl
->nr
, tmpl
->max_size
);
4346 rec
->found_rec
= tmpl
->found_rec
;
4347 rec
->content_checked
= tmpl
->content_checked
;
4348 rec
->owner_ref_checked
= tmpl
->owner_ref_checked
;
4349 rec
->num_duplicates
= 0;
4350 rec
->metadata
= tmpl
->metadata
;
4351 rec
->flag_block_full_backref
= FLAG_UNSET
;
4352 rec
->bad_full_backref
= 0;
4353 rec
->crossing_stripes
= 0;
4354 rec
->wrong_chunk_type
= 0;
4355 rec
->is_root
= tmpl
->is_root
;
4356 rec
->refs
= tmpl
->refs
;
4357 rec
->extent_item_refs
= tmpl
->extent_item_refs
;
4358 rec
->parent_generation
= tmpl
->parent_generation
;
4359 INIT_LIST_HEAD(&rec
->backrefs
);
4360 INIT_LIST_HEAD(&rec
->dups
);
4361 INIT_LIST_HEAD(&rec
->list
);
4362 rec
->backref_tree
= RB_ROOT
;
4363 memcpy(&rec
->parent_key
, &tmpl
->parent_key
, sizeof(tmpl
->parent_key
));
4364 rec
->cache
.start
= tmpl
->start
;
4365 rec
->cache
.size
= tmpl
->nr
;
4366 ret
= insert_cache_extent(extent_cache
, &rec
->cache
);
4371 bytes_used
+= rec
->nr
;
4374 rec
->crossing_stripes
= check_crossing_stripes(global_info
,
4375 rec
->start
, global_info
->nodesize
);
4376 check_extent_type(rec
);
4381 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4383 * - refs - if found, increase refs
4384 * - is_root - if found, set
4385 * - content_checked - if found, set
4386 * - owner_ref_checked - if found, set
4388 * If not found, create a new one, initialize and insert.
4390 static int add_extent_rec(struct cache_tree
*extent_cache
,
4391 struct extent_record
*tmpl
)
4393 struct extent_record
*rec
;
4394 struct cache_extent
*cache
;
4398 cache
= lookup_cache_extent(extent_cache
, tmpl
->start
, tmpl
->nr
);
4400 rec
= container_of(cache
, struct extent_record
, cache
);
4404 rec
->nr
= max(tmpl
->nr
, tmpl
->max_size
);
4407 * We need to make sure to reset nr to whatever the extent
4408 * record says was the real size, this way we can compare it to
4411 if (tmpl
->found_rec
) {
4412 if (tmpl
->start
!= rec
->start
|| rec
->found_rec
) {
4413 struct extent_record
*tmp
;
4416 if (list_empty(&rec
->list
))
4417 list_add_tail(&rec
->list
,
4418 &duplicate_extents
);
4421 * We have to do this song and dance in case we
4422 * find an extent record that falls inside of
4423 * our current extent record but does not have
4424 * the same objectid.
4426 tmp
= malloc(sizeof(*tmp
));
4429 tmp
->start
= tmpl
->start
;
4430 tmp
->max_size
= tmpl
->max_size
;
4433 tmp
->metadata
= tmpl
->metadata
;
4434 tmp
->extent_item_refs
= tmpl
->extent_item_refs
;
4435 INIT_LIST_HEAD(&tmp
->list
);
4436 list_add_tail(&tmp
->list
, &rec
->dups
);
4437 rec
->num_duplicates
++;
4444 if (tmpl
->extent_item_refs
&& !dup
) {
4445 if (rec
->extent_item_refs
) {
4447 "block %llu rec extent_item_refs %llu, passed %llu\n",
4448 (unsigned long long)tmpl
->start
,
4449 (unsigned long long)
4450 rec
->extent_item_refs
,
4451 (unsigned long long)
4452 tmpl
->extent_item_refs
);
4454 rec
->extent_item_refs
= tmpl
->extent_item_refs
;
4458 if (tmpl
->content_checked
)
4459 rec
->content_checked
= 1;
4460 if (tmpl
->owner_ref_checked
)
4461 rec
->owner_ref_checked
= 1;
4462 memcpy(&rec
->parent_key
, &tmpl
->parent_key
,
4463 sizeof(tmpl
->parent_key
));
4464 if (tmpl
->parent_generation
)
4465 rec
->parent_generation
= tmpl
->parent_generation
;
4466 if (rec
->max_size
< tmpl
->max_size
)
4467 rec
->max_size
= tmpl
->max_size
;
4470 * A metadata extent can't cross stripe_len boundary, otherwise
4471 * kernel scrub won't be able to handle it.
4472 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4476 rec
->crossing_stripes
= check_crossing_stripes(
4477 global_info
, rec
->start
,
4478 global_info
->nodesize
);
4479 check_extent_type(rec
);
4480 maybe_free_extent_rec(extent_cache
, rec
);
4484 ret
= add_extent_rec_nolookup(extent_cache
, tmpl
);
4489 static int add_tree_backref(struct cache_tree
*extent_cache
, u64 bytenr
,
4490 u64 parent
, u64 root
, int found_ref
)
4492 struct extent_record
*rec
;
4493 struct tree_backref
*back
;
4494 struct cache_extent
*cache
;
4496 bool insert
= false;
4498 cache
= lookup_cache_extent(extent_cache
, bytenr
, 1);
4500 struct extent_record tmpl
;
4502 memset(&tmpl
, 0, sizeof(tmpl
));
4503 tmpl
.start
= bytenr
;
4508 ret
= add_extent_rec_nolookup(extent_cache
, &tmpl
);
4512 /* really a bug in cache_extent implement now */
4513 cache
= lookup_cache_extent(extent_cache
, bytenr
, 1);
4518 rec
= container_of(cache
, struct extent_record
, cache
);
4519 if (rec
->start
!= bytenr
) {
4521 * Several cause, from unaligned bytenr to over lapping extents
4526 back
= find_tree_backref(rec
, parent
, root
);
4528 back
= alloc_tree_backref(rec
, parent
, root
);
4535 if (back
->node
.found_ref
) {
4537 "Extent back ref already exists for %llu parent %llu root %llu\n",
4538 (unsigned long long)bytenr
,
4539 (unsigned long long)parent
,
4540 (unsigned long long)root
);
4542 back
->node
.found_ref
= 1;
4544 if (back
->node
.found_extent_tree
) {
4546 "extent back ref already exists for %llu parent %llu root %llu\n",
4547 (unsigned long long)bytenr
,
4548 (unsigned long long)parent
,
4549 (unsigned long long)root
);
4551 back
->node
.found_extent_tree
= 1;
4554 WARN_ON(rb_insert(&rec
->backref_tree
, &back
->node
.node
,
4555 compare_extent_backref
));
4556 check_extent_type(rec
);
4557 maybe_free_extent_rec(extent_cache
, rec
);
4561 static int add_data_backref(struct cache_tree
*extent_cache
, u64 bytenr
,
4562 u64 parent
, u64 root
, u64 owner
, u64 offset
,
4563 u32 num_refs
, int found_ref
, u64 max_size
)
4565 struct extent_record
*rec
;
4566 struct data_backref
*back
;
4567 struct cache_extent
*cache
;
4569 bool insert
= false;
4571 cache
= lookup_cache_extent(extent_cache
, bytenr
, 1);
4573 struct extent_record tmpl
;
4575 memset(&tmpl
, 0, sizeof(tmpl
));
4576 tmpl
.start
= bytenr
;
4578 tmpl
.max_size
= max_size
;
4580 ret
= add_extent_rec_nolookup(extent_cache
, &tmpl
);
4584 cache
= lookup_cache_extent(extent_cache
, bytenr
, 1);
4589 rec
= container_of(cache
, struct extent_record
, cache
);
4590 if (rec
->max_size
< max_size
)
4591 rec
->max_size
= max_size
;
4594 * If found_ref is set then max_size is the real size and must match the
4595 * existing refs. So if we have already found a ref then we need to
4596 * make sure that this ref matches the existing one, otherwise we need
4597 * to add a new backref so we can notice that the backrefs don't match
4598 * and we need to figure out who is telling the truth. This is to
4599 * account for that awful fsync bug I introduced where we'd end up with
4600 * a btrfs_file_extent_item that would have its length include multiple
4601 * prealloc extents or point inside of a prealloc extent.
4603 back
= find_data_backref(rec
, parent
, root
, owner
, offset
, found_ref
,
4606 back
= alloc_data_backref(rec
, parent
, root
, owner
, offset
,
4613 BUG_ON(num_refs
!= 1);
4614 if (back
->node
.found_ref
)
4615 BUG_ON(back
->bytes
!= max_size
);
4616 back
->node
.found_ref
= 1;
4617 back
->found_ref
+= 1;
4618 if (back
->bytes
!= max_size
|| back
->disk_bytenr
!= bytenr
) {
4619 back
->bytes
= max_size
;
4620 back
->disk_bytenr
= bytenr
;
4622 /* Need to reinsert if not already in the tree */
4624 rb_erase(&back
->node
.node
, &rec
->backref_tree
);
4629 rec
->content_checked
= 1;
4630 rec
->owner_ref_checked
= 1;
4632 if (back
->node
.found_extent_tree
) {
4634 "Extent back ref already exists for %llu parent %llu root %llu owner %llu offset %llu num_refs %lu\n",
4635 (unsigned long long)bytenr
,
4636 (unsigned long long)parent
,
4637 (unsigned long long)root
,
4638 (unsigned long long)owner
,
4639 (unsigned long long)offset
,
4640 (unsigned long)num_refs
);
4642 back
->num_refs
= num_refs
;
4643 back
->node
.found_extent_tree
= 1;
4646 WARN_ON(rb_insert(&rec
->backref_tree
, &back
->node
.node
,
4647 compare_extent_backref
));
4649 maybe_free_extent_rec(extent_cache
, rec
);
4653 static int add_pending(struct cache_tree
*pending
,
4654 struct cache_tree
*seen
, u64 bytenr
, u32 size
)
4658 ret
= add_cache_extent(seen
, bytenr
, size
);
4661 add_cache_extent(pending
, bytenr
, size
);
4665 static int pick_next_pending(struct cache_tree
*pending
,
4666 struct cache_tree
*reada
,
4667 struct cache_tree
*nodes
,
4668 u64 last
, struct block_info
*bits
, int bits_nr
,
4671 unsigned long node_start
= last
;
4672 struct cache_extent
*cache
;
4675 cache
= search_cache_extent(reada
, 0);
4677 bits
[0].start
= cache
->start
;
4678 bits
[0].size
= cache
->size
;
4683 if (node_start
> 32768)
4684 node_start
-= 32768;
4686 cache
= search_cache_extent(nodes
, node_start
);
4688 cache
= search_cache_extent(nodes
, 0);
4691 cache
= search_cache_extent(pending
, 0);
4696 bits
[ret
].start
= cache
->start
;
4697 bits
[ret
].size
= cache
->size
;
4698 cache
= next_cache_extent(cache
);
4700 } while (cache
&& ret
< bits_nr
);
4706 bits
[ret
].start
= cache
->start
;
4707 bits
[ret
].size
= cache
->size
;
4708 cache
= next_cache_extent(cache
);
4710 } while (cache
&& ret
< bits_nr
);
4712 if (bits_nr
- ret
> 8) {
4713 u64 lookup
= bits
[0].start
+ bits
[0].size
;
4714 struct cache_extent
*next
;
4716 next
= search_cache_extent(pending
, lookup
);
4718 if (next
->start
- lookup
> 32768)
4720 bits
[ret
].start
= next
->start
;
4721 bits
[ret
].size
= next
->size
;
4722 lookup
= next
->start
+ next
->size
;
4726 next
= next_cache_extent(next
);
4734 static void free_chunk_record(struct cache_extent
*cache
)
4736 struct chunk_record
*rec
;
4738 rec
= container_of(cache
, struct chunk_record
, cache
);
4739 list_del_init(&rec
->list
);
4740 list_del_init(&rec
->dextents
);
4744 void free_chunk_cache_tree(struct cache_tree
*chunk_cache
)
4746 cache_tree_free_extents(chunk_cache
, free_chunk_record
);
4749 static void free_device_record(struct rb_node
*node
)
4751 struct device_record
*rec
;
4753 rec
= container_of(node
, struct device_record
, node
);
4757 FREE_RB_BASED_TREE(device_cache
, free_device_record
);
4759 int insert_block_group_record(struct block_group_tree
*tree
,
4760 struct block_group_record
*bg_rec
)
4764 ret
= insert_cache_extent(&tree
->tree
, &bg_rec
->cache
);
4768 list_add_tail(&bg_rec
->list
, &tree
->block_groups
);
4772 static void free_block_group_record(struct cache_extent
*cache
)
4774 struct block_group_record
*rec
;
4776 rec
= container_of(cache
, struct block_group_record
, cache
);
4777 list_del_init(&rec
->list
);
4781 void free_block_group_tree(struct block_group_tree
*tree
)
4783 cache_tree_free_extents(&tree
->tree
, free_block_group_record
);
4786 int insert_device_extent_record(struct device_extent_tree
*tree
,
4787 struct device_extent_record
*de_rec
)
4792 * Device extent is a bit different from the other extents, because
4793 * the extents which belong to the different devices may have the
4794 * same start and size, so we need use the special extent cache
4795 * search/insert functions.
4797 ret
= insert_cache_extent2(&tree
->tree
, &de_rec
->cache
);
4801 list_add_tail(&de_rec
->chunk_list
, &tree
->no_chunk_orphans
);
4802 list_add_tail(&de_rec
->device_list
, &tree
->no_device_orphans
);
4806 static void free_device_extent_record(struct cache_extent
*cache
)
4808 struct device_extent_record
*rec
;
4810 rec
= container_of(cache
, struct device_extent_record
, cache
);
4811 if (!list_empty(&rec
->chunk_list
))
4812 list_del_init(&rec
->chunk_list
);
4813 if (!list_empty(&rec
->device_list
))
4814 list_del_init(&rec
->device_list
);
4818 void free_device_extent_tree(struct device_extent_tree
*tree
)
4820 cache_tree_free_extents(&tree
->tree
, free_device_extent_record
);
4823 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4824 static int process_extent_ref_v0(struct cache_tree
*extent_cache
,
4825 struct extent_buffer
*leaf
, int slot
)
4827 struct btrfs_extent_ref_v0
*ref0
;
4828 struct btrfs_key key
;
4831 btrfs_item_key_to_cpu(leaf
, &key
, slot
);
4832 ref0
= btrfs_item_ptr(leaf
, slot
, struct btrfs_extent_ref_v0
);
4833 if (btrfs_ref_objectid_v0(leaf
, ref0
) < BTRFS_FIRST_FREE_OBJECTID
) {
4834 ret
= add_tree_backref(extent_cache
, key
.objectid
, key
.offset
,
4837 ret
= add_data_backref(extent_cache
, key
.objectid
, key
.offset
,
4838 0, 0, 0, btrfs_ref_count_v0(leaf
, ref0
), 0, 0);
4844 struct chunk_record
*btrfs_new_chunk_record(struct extent_buffer
*leaf
,
4845 struct btrfs_key
*key
,
4848 struct btrfs_chunk
*ptr
;
4849 struct chunk_record
*rec
;
4852 ptr
= btrfs_item_ptr(leaf
, slot
, struct btrfs_chunk
);
4853 num_stripes
= btrfs_chunk_num_stripes(leaf
, ptr
);
4855 rec
= calloc(1, btrfs_chunk_record_size(num_stripes
));
4857 fprintf(stderr
, "memory allocation failed\n");
4861 INIT_LIST_HEAD(&rec
->list
);
4862 INIT_LIST_HEAD(&rec
->dextents
);
4865 rec
->cache
.start
= key
->offset
;
4866 rec
->cache
.size
= btrfs_chunk_length(leaf
, ptr
);
4868 rec
->generation
= btrfs_header_generation(leaf
);
4870 rec
->objectid
= key
->objectid
;
4871 rec
->type
= key
->type
;
4872 rec
->offset
= key
->offset
;
4874 rec
->length
= rec
->cache
.size
;
4875 rec
->owner
= btrfs_chunk_owner(leaf
, ptr
);
4876 rec
->stripe_len
= btrfs_chunk_stripe_len(leaf
, ptr
);
4877 rec
->type_flags
= btrfs_chunk_type(leaf
, ptr
);
4878 rec
->io_width
= btrfs_chunk_io_width(leaf
, ptr
);
4879 rec
->io_align
= btrfs_chunk_io_align(leaf
, ptr
);
4880 rec
->sector_size
= btrfs_chunk_sector_size(leaf
, ptr
);
4881 rec
->num_stripes
= num_stripes
;
4882 rec
->sub_stripes
= btrfs_chunk_sub_stripes(leaf
, ptr
);
4884 for (i
= 0; i
< rec
->num_stripes
; ++i
) {
4885 rec
->stripes
[i
].devid
=
4886 btrfs_stripe_devid_nr(leaf
, ptr
, i
);
4887 rec
->stripes
[i
].offset
=
4888 btrfs_stripe_offset_nr(leaf
, ptr
, i
);
4889 read_extent_buffer(leaf
, rec
->stripes
[i
].dev_uuid
,
4890 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr
, i
),
4897 static int process_chunk_item(struct cache_tree
*chunk_cache
,
4898 struct btrfs_key
*key
, struct extent_buffer
*eb
,
4901 struct chunk_record
*rec
;
4902 struct btrfs_chunk
*chunk
;
4905 chunk
= btrfs_item_ptr(eb
, slot
, struct btrfs_chunk
);
4907 * Do extra check for this chunk item,
4909 * It's still possible one can craft a leaf with CHUNK_ITEM, with
4910 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
4911 * and owner<->key_type check.
4913 ret
= btrfs_check_chunk_valid(global_info
, eb
, chunk
, slot
,
4916 error("chunk(%llu, %llu) is not valid, ignore it",
4917 key
->offset
, btrfs_chunk_length(eb
, chunk
));
4920 rec
= btrfs_new_chunk_record(eb
, key
, slot
);
4921 ret
= insert_cache_extent(chunk_cache
, &rec
->cache
);
4923 fprintf(stderr
, "Chunk[%llu, %llu] existed.\n",
4924 rec
->offset
, rec
->length
);
4931 static int process_device_item(struct rb_root
*dev_cache
,
4932 struct btrfs_key
*key
, struct extent_buffer
*eb
, int slot
)
4934 struct btrfs_dev_item
*ptr
;
4935 struct device_record
*rec
;
4938 ptr
= btrfs_item_ptr(eb
,
4939 slot
, struct btrfs_dev_item
);
4941 rec
= malloc(sizeof(*rec
));
4943 fprintf(stderr
, "memory allocation failed\n");
4947 rec
->devid
= key
->offset
;
4948 rec
->generation
= btrfs_header_generation(eb
);
4950 rec
->objectid
= key
->objectid
;
4951 rec
->type
= key
->type
;
4952 rec
->offset
= key
->offset
;
4954 rec
->devid
= btrfs_device_id(eb
, ptr
);
4955 rec
->total_byte
= btrfs_device_total_bytes(eb
, ptr
);
4956 rec
->byte_used
= btrfs_device_bytes_used(eb
, ptr
);
4958 ret
= rb_insert(dev_cache
, &rec
->node
, device_record_compare
);
4960 fprintf(stderr
, "Device[%llu] existed.\n", rec
->devid
);
4967 struct block_group_record
*
4968 btrfs_new_block_group_record(struct extent_buffer
*leaf
, struct btrfs_key
*key
,
4971 struct btrfs_block_group_item
*ptr
;
4972 struct block_group_record
*rec
;
4974 rec
= calloc(1, sizeof(*rec
));
4976 fprintf(stderr
, "memory allocation failed\n");
4980 rec
->cache
.start
= key
->objectid
;
4981 rec
->cache
.size
= key
->offset
;
4983 rec
->generation
= btrfs_header_generation(leaf
);
4985 rec
->objectid
= key
->objectid
;
4986 rec
->type
= key
->type
;
4987 rec
->offset
= key
->offset
;
4989 ptr
= btrfs_item_ptr(leaf
, slot
, struct btrfs_block_group_item
);
4990 rec
->flags
= btrfs_disk_block_group_flags(leaf
, ptr
);
4992 INIT_LIST_HEAD(&rec
->list
);
4997 static int process_block_group_item(struct block_group_tree
*block_group_cache
,
4998 struct btrfs_key
*key
,
4999 struct extent_buffer
*eb
, int slot
)
5001 struct block_group_record
*rec
;
5004 rec
= btrfs_new_block_group_record(eb
, key
, slot
);
5005 ret
= insert_block_group_record(block_group_cache
, rec
);
5007 fprintf(stderr
, "Block Group[%llu, %llu] existed.\n",
5008 rec
->objectid
, rec
->offset
);
5015 struct device_extent_record
*
5016 btrfs_new_device_extent_record(struct extent_buffer
*leaf
,
5017 struct btrfs_key
*key
, int slot
)
5019 struct device_extent_record
*rec
;
5020 struct btrfs_dev_extent
*ptr
;
5022 rec
= calloc(1, sizeof(*rec
));
5024 fprintf(stderr
, "memory allocation failed\n");
5028 rec
->cache
.objectid
= key
->objectid
;
5029 rec
->cache
.start
= key
->offset
;
5031 rec
->generation
= btrfs_header_generation(leaf
);
5033 rec
->objectid
= key
->objectid
;
5034 rec
->type
= key
->type
;
5035 rec
->offset
= key
->offset
;
5037 ptr
= btrfs_item_ptr(leaf
, slot
, struct btrfs_dev_extent
);
5038 rec
->chunk_objecteid
=
5039 btrfs_dev_extent_chunk_objectid(leaf
, ptr
);
5041 btrfs_dev_extent_chunk_offset(leaf
, ptr
);
5042 rec
->length
= btrfs_dev_extent_length(leaf
, ptr
);
5043 rec
->cache
.size
= rec
->length
;
5045 INIT_LIST_HEAD(&rec
->chunk_list
);
5046 INIT_LIST_HEAD(&rec
->device_list
);
5052 process_device_extent_item(struct device_extent_tree
*dev_extent_cache
,
5053 struct btrfs_key
*key
, struct extent_buffer
*eb
,
5056 struct device_extent_record
*rec
;
5059 rec
= btrfs_new_device_extent_record(eb
, key
, slot
);
5060 ret
= insert_device_extent_record(dev_extent_cache
, rec
);
5063 "Device extent[%llu, %llu, %llu] existed.\n",
5064 rec
->objectid
, rec
->offset
, rec
->length
);
5071 static int process_extent_item(struct btrfs_root
*root
,
5072 struct cache_tree
*extent_cache
,
5073 struct extent_buffer
*eb
, int slot
)
5075 struct btrfs_extent_item
*ei
;
5076 struct btrfs_extent_inline_ref
*iref
;
5077 struct btrfs_extent_data_ref
*dref
;
5078 struct btrfs_shared_data_ref
*sref
;
5079 struct btrfs_key key
;
5080 struct extent_record tmpl
;
5085 u32 item_size
= btrfs_item_size_nr(eb
, slot
);
5091 btrfs_item_key_to_cpu(eb
, &key
, slot
);
5093 if (key
.type
== BTRFS_METADATA_ITEM_KEY
) {
5095 num_bytes
= root
->fs_info
->nodesize
;
5097 num_bytes
= key
.offset
;
5100 if (!IS_ALIGNED(key
.objectid
, root
->fs_info
->sectorsize
)) {
5101 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5102 key
.objectid
, root
->fs_info
->sectorsize
);
5105 if (item_size
< sizeof(*ei
)) {
5106 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5107 struct btrfs_extent_item_v0
*ei0
;
5109 if (item_size
!= sizeof(*ei0
)) {
5111 "invalid extent item format: ITEM[%llu %u %llu] leaf: %llu slot: %d",
5112 key
.objectid
, key
.type
, key
.offset
,
5113 btrfs_header_bytenr(eb
), slot
);
5116 ei0
= btrfs_item_ptr(eb
, slot
, struct btrfs_extent_item_v0
);
5117 refs
= btrfs_extent_refs_v0(eb
, ei0
);
5121 memset(&tmpl
, 0, sizeof(tmpl
));
5122 tmpl
.start
= key
.objectid
;
5123 tmpl
.nr
= num_bytes
;
5124 tmpl
.extent_item_refs
= refs
;
5125 tmpl
.metadata
= metadata
;
5127 tmpl
.max_size
= num_bytes
;
5129 return add_extent_rec(extent_cache
, &tmpl
);
5132 ei
= btrfs_item_ptr(eb
, slot
, struct btrfs_extent_item
);
5133 refs
= btrfs_extent_refs(eb
, ei
);
5134 if (btrfs_extent_flags(eb
, ei
) & BTRFS_EXTENT_FLAG_TREE_BLOCK
)
5138 if (metadata
&& num_bytes
!= root
->fs_info
->nodesize
) {
5139 error("ignore invalid metadata extent, length %llu does not equal to %u",
5140 num_bytes
, root
->fs_info
->nodesize
);
5143 if (!metadata
&& !IS_ALIGNED(num_bytes
, root
->fs_info
->sectorsize
)) {
5144 error("ignore invalid data extent, length %llu is not aligned to %u",
5145 num_bytes
, root
->fs_info
->sectorsize
);
5149 memset(&tmpl
, 0, sizeof(tmpl
));
5150 tmpl
.start
= key
.objectid
;
5151 tmpl
.nr
= num_bytes
;
5152 tmpl
.extent_item_refs
= refs
;
5153 tmpl
.metadata
= metadata
;
5155 tmpl
.max_size
= num_bytes
;
5156 add_extent_rec(extent_cache
, &tmpl
);
5158 ptr
= (unsigned long)(ei
+ 1);
5159 if (btrfs_extent_flags(eb
, ei
) & BTRFS_EXTENT_FLAG_TREE_BLOCK
&&
5160 key
.type
== BTRFS_EXTENT_ITEM_KEY
)
5161 ptr
+= sizeof(struct btrfs_tree_block_info
);
5163 end
= (unsigned long)ei
+ item_size
;
5165 iref
= (struct btrfs_extent_inline_ref
*)ptr
;
5166 type
= btrfs_extent_inline_ref_type(eb
, iref
);
5167 offset
= btrfs_extent_inline_ref_offset(eb
, iref
);
5169 case BTRFS_TREE_BLOCK_REF_KEY
:
5170 ret
= add_tree_backref(extent_cache
, key
.objectid
,
5174 "add_tree_backref failed (extent items tree block): %s",
5177 case BTRFS_SHARED_BLOCK_REF_KEY
:
5178 ret
= add_tree_backref(extent_cache
, key
.objectid
,
5182 "add_tree_backref failed (extent items shared block): %s",
5185 case BTRFS_EXTENT_DATA_REF_KEY
:
5186 dref
= (struct btrfs_extent_data_ref
*)(&iref
->offset
);
5187 add_data_backref(extent_cache
, key
.objectid
, 0,
5188 btrfs_extent_data_ref_root(eb
, dref
),
5189 btrfs_extent_data_ref_objectid(eb
,
5191 btrfs_extent_data_ref_offset(eb
, dref
),
5192 btrfs_extent_data_ref_count(eb
, dref
),
5195 case BTRFS_SHARED_DATA_REF_KEY
:
5196 sref
= (struct btrfs_shared_data_ref
*)(iref
+ 1);
5197 add_data_backref(extent_cache
, key
.objectid
, offset
,
5199 btrfs_shared_data_ref_count(eb
, sref
),
5204 "corrupt extent record: key [%llu,%u,%llu]\n",
5205 key
.objectid
, key
.type
, num_bytes
);
5208 ptr
+= btrfs_extent_inline_ref_size(type
);
5215 static int check_cache_range(struct btrfs_root
*root
,
5216 struct btrfs_block_group_cache
*cache
,
5217 u64 offset
, u64 bytes
)
5219 struct btrfs_free_space
*entry
;
5225 for (i
= 0; i
< BTRFS_SUPER_MIRROR_MAX
; i
++) {
5226 bytenr
= btrfs_sb_offset(i
);
5227 ret
= btrfs_rmap_block(root
->fs_info
,
5228 cache
->key
.objectid
, bytenr
,
5229 &logical
, &nr
, &stripe_len
);
5234 if (logical
[nr
] + stripe_len
<= offset
)
5236 if (offset
+ bytes
<= logical
[nr
])
5238 if (logical
[nr
] == offset
) {
5239 if (stripe_len
>= bytes
) {
5243 bytes
-= stripe_len
;
5244 offset
+= stripe_len
;
5245 } else if (logical
[nr
] < offset
) {
5246 if (logical
[nr
] + stripe_len
>=
5251 bytes
= (offset
+ bytes
) -
5252 (logical
[nr
] + stripe_len
);
5253 offset
= logical
[nr
] + stripe_len
;
5256 * Could be tricky, the super may land in the
5257 * middle of the area we're checking. First
5258 * check the easiest case, it's at the end.
5260 if (logical
[nr
] + stripe_len
>=
5262 bytes
= logical
[nr
] - offset
;
5266 /* Check the left side */
5267 ret
= check_cache_range(root
, cache
,
5269 logical
[nr
] - offset
);
5275 /* Now we continue with the right side */
5276 bytes
= (offset
+ bytes
) -
5277 (logical
[nr
] + stripe_len
);
5278 offset
= logical
[nr
] + stripe_len
;
5285 entry
= btrfs_find_free_space(cache
->free_space_ctl
, offset
, bytes
);
5287 fprintf(stderr
, "there is no free space entry for %llu-%llu\n",
5288 offset
, offset
+bytes
);
5292 if (entry
->offset
!= offset
) {
5293 fprintf(stderr
, "wanted offset %llu, found %llu\n", offset
,
5298 if (entry
->bytes
!= bytes
) {
5299 fprintf(stderr
, "wanted bytes %llu, found %llu for off %llu\n",
5300 bytes
, entry
->bytes
, offset
);
5304 unlink_free_space(cache
->free_space_ctl
, entry
);
5309 static int verify_space_cache(struct btrfs_root
*root
,
5310 struct btrfs_block_group_cache
*cache
)
5312 struct btrfs_path path
;
5313 struct extent_buffer
*leaf
;
5314 struct btrfs_key key
;
5318 root
= root
->fs_info
->extent_root
;
5320 last
= max_t(u64
, cache
->key
.objectid
, BTRFS_SUPER_INFO_OFFSET
);
5322 btrfs_init_path(&path
);
5323 key
.objectid
= last
;
5325 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
5326 ret
= btrfs_search_slot(NULL
, root
, &key
, &path
, 0, 0);
5331 if (path
.slots
[0] >= btrfs_header_nritems(path
.nodes
[0])) {
5332 ret
= btrfs_next_leaf(root
, &path
);
5340 leaf
= path
.nodes
[0];
5341 btrfs_item_key_to_cpu(leaf
, &key
, path
.slots
[0]);
5342 if (key
.objectid
>= cache
->key
.offset
+ cache
->key
.objectid
)
5344 if (key
.type
!= BTRFS_EXTENT_ITEM_KEY
&&
5345 key
.type
!= BTRFS_METADATA_ITEM_KEY
) {
5350 if (last
== key
.objectid
) {
5351 if (key
.type
== BTRFS_EXTENT_ITEM_KEY
)
5352 last
= key
.objectid
+ key
.offset
;
5354 last
= key
.objectid
+ root
->fs_info
->nodesize
;
5359 ret
= check_cache_range(root
, cache
, last
,
5360 key
.objectid
- last
);
5363 if (key
.type
== BTRFS_EXTENT_ITEM_KEY
)
5364 last
= key
.objectid
+ key
.offset
;
5366 last
= key
.objectid
+ root
->fs_info
->nodesize
;
5370 if (last
< cache
->key
.objectid
+ cache
->key
.offset
)
5371 ret
= check_cache_range(root
, cache
, last
,
5372 cache
->key
.objectid
+
5373 cache
->key
.offset
- last
);
5376 btrfs_release_path(&path
);
5379 !RB_EMPTY_ROOT(&cache
->free_space_ctl
->free_space_offset
)) {
5380 fprintf(stderr
, "There are still entries left in the space "
5388 static int check_space_cache(struct btrfs_root
*root
)
5390 struct btrfs_block_group_cache
*cache
;
5391 u64 start
= BTRFS_SUPER_INFO_OFFSET
+ BTRFS_SUPER_INFO_SIZE
;
5395 if (btrfs_super_cache_generation(root
->fs_info
->super_copy
) != -1ULL &&
5396 btrfs_super_generation(root
->fs_info
->super_copy
) !=
5397 btrfs_super_cache_generation(root
->fs_info
->super_copy
)) {
5398 printf("cache and super generation don't match, space cache "
5399 "will be invalidated\n");
5405 cache
= btrfs_lookup_first_block_group(root
->fs_info
, start
);
5409 start
= cache
->key
.objectid
+ cache
->key
.offset
;
5410 if (!cache
->free_space_ctl
) {
5411 if (btrfs_init_free_space_ctl(cache
,
5412 root
->fs_info
->sectorsize
)) {
5417 btrfs_remove_free_space_cache(cache
);
5420 if (btrfs_fs_compat_ro(root
->fs_info
, FREE_SPACE_TREE
)) {
5421 ret
= exclude_super_stripes(root
, cache
);
5423 fprintf(stderr
, "could not exclude super stripes: %s\n",
5428 ret
= load_free_space_tree(root
->fs_info
, cache
);
5429 free_excluded_extents(root
, cache
);
5431 fprintf(stderr
, "could not load free space tree: %s\n",
5438 ret
= load_free_space_cache(root
->fs_info
, cache
);
5445 ret
= verify_space_cache(root
, cache
);
5447 fprintf(stderr
, "cache appears valid but isn't %llu\n",
5448 cache
->key
.objectid
);
5453 return error
? -EINVAL
: 0;
5457 * Check data checksum for [@bytenr, @bytenr + @num_bytes).
5459 * Return <0 for fatal error (fails to read checksum/data or allocate memory).
5460 * Return >0 for csum mismatch for any copy.
5461 * Return 0 if everything is OK.
5463 static int check_extent_csums(struct btrfs_root
*root
, u64 bytenr
,
5464 u64 num_bytes
, unsigned long leaf_offset
,
5465 struct extent_buffer
*eb
)
5467 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
5469 u16 csum_size
= btrfs_super_csum_size(fs_info
->super_copy
);
5471 unsigned long csum_offset
;
5475 u64 data_checked
= 0;
5480 bool csum_mismatch
= false;
5482 if (num_bytes
% fs_info
->sectorsize
)
5485 data
= malloc(num_bytes
);
5489 num_copies
= btrfs_num_copies(root
->fs_info
, bytenr
, num_bytes
);
5490 while (offset
< num_bytes
) {
5492 * Mirror 0 means 'read from any valid copy', so it's skipped.
5493 * The indexes 1-N represent the n-th copy for levels with
5496 for (mirror
= 1; mirror
<= num_copies
; mirror
++) {
5497 read_len
= num_bytes
- offset
;
5498 /* read as much space once a time */
5499 ret
= read_extent_data(fs_info
, data
+ offset
,
5500 bytenr
+ offset
, &read_len
, mirror
);
5505 /* verify every 4k data's checksum */
5506 while (data_checked
< read_len
) {
5508 tmp
= offset
+ data_checked
;
5510 csum
= btrfs_csum_data((char *)data
+ tmp
,
5511 csum
, fs_info
->sectorsize
);
5512 btrfs_csum_final(csum
, (u8
*)&csum
);
5514 csum_offset
= leaf_offset
+
5515 tmp
/ fs_info
->sectorsize
* csum_size
;
5516 read_extent_buffer(eb
, (char *)&csum_expected
,
5517 csum_offset
, csum_size
);
5518 if (csum
!= csum_expected
) {
5519 csum_mismatch
= true;
5521 "mirror %d bytenr %llu csum %u expected csum %u\n",
5522 mirror
, bytenr
+ tmp
,
5523 csum
, csum_expected
);
5525 data_checked
+= fs_info
->sectorsize
;
5532 if (!ret
&& csum_mismatch
)
5537 static int check_extent_exists(struct btrfs_root
*root
, u64 bytenr
,
5540 struct btrfs_path path
;
5541 struct extent_buffer
*leaf
;
5542 struct btrfs_key key
;
5545 btrfs_init_path(&path
);
5546 key
.objectid
= bytenr
;
5547 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
5548 key
.offset
= (u64
)-1;
5551 ret
= btrfs_search_slot(NULL
, root
->fs_info
->extent_root
, &key
, &path
,
5554 fprintf(stderr
, "Error looking up extent record %d\n", ret
);
5555 btrfs_release_path(&path
);
5558 if (path
.slots
[0] > 0) {
5561 ret
= btrfs_prev_leaf(root
, &path
);
5564 } else if (ret
> 0) {
5571 btrfs_item_key_to_cpu(path
.nodes
[0], &key
, path
.slots
[0]);
5574 * Block group items come before extent items if they have the same
5575 * bytenr, so walk back one more just in case. Dear future traveller,
5576 * first congrats on mastering time travel. Now if it's not too much
5577 * trouble could you go back to 2006 and tell Chris to make the
5578 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5579 * EXTENT_ITEM_KEY please?
5581 while (key
.type
> BTRFS_EXTENT_ITEM_KEY
) {
5582 if (path
.slots
[0] > 0) {
5585 ret
= btrfs_prev_leaf(root
, &path
);
5588 } else if (ret
> 0) {
5593 btrfs_item_key_to_cpu(path
.nodes
[0], &key
, path
.slots
[0]);
5597 if (path
.slots
[0] >= btrfs_header_nritems(path
.nodes
[0])) {
5598 ret
= btrfs_next_leaf(root
, &path
);
5600 fprintf(stderr
, "Error going to next leaf "
5602 btrfs_release_path(&path
);
5608 leaf
= path
.nodes
[0];
5609 btrfs_item_key_to_cpu(leaf
, &key
, path
.slots
[0]);
5610 if (key
.type
!= BTRFS_EXTENT_ITEM_KEY
) {
5614 if (key
.objectid
+ key
.offset
< bytenr
) {
5618 if (key
.objectid
> bytenr
+ num_bytes
)
5621 if (key
.objectid
== bytenr
) {
5622 if (key
.offset
>= num_bytes
) {
5626 num_bytes
-= key
.offset
;
5627 bytenr
+= key
.offset
;
5628 } else if (key
.objectid
< bytenr
) {
5629 if (key
.objectid
+ key
.offset
>= bytenr
+ num_bytes
) {
5633 num_bytes
= (bytenr
+ num_bytes
) -
5634 (key
.objectid
+ key
.offset
);
5635 bytenr
= key
.objectid
+ key
.offset
;
5637 if (key
.objectid
+ key
.offset
< bytenr
+ num_bytes
) {
5638 u64 new_start
= key
.objectid
+ key
.offset
;
5639 u64 new_bytes
= bytenr
+ num_bytes
- new_start
;
5642 * Weird case, the extent is in the middle of
5643 * our range, we'll have to search one side
5644 * and then the other. Not sure if this happens
5645 * in real life, but no harm in coding it up
5646 * anyway just in case.
5648 btrfs_release_path(&path
);
5649 ret
= check_extent_exists(root
, new_start
,
5652 fprintf(stderr
, "Right section didn't "
5656 num_bytes
= key
.objectid
- bytenr
;
5659 num_bytes
= key
.objectid
- bytenr
;
5666 if (num_bytes
&& !ret
) {
5668 "there are no extents for csum range %llu-%llu\n",
5669 bytenr
, bytenr
+num_bytes
);
5673 btrfs_release_path(&path
);
5677 static int check_csums(struct btrfs_root
*root
)
5679 struct btrfs_path path
;
5680 struct extent_buffer
*leaf
;
5681 struct btrfs_key key
;
5682 u64 offset
= 0, num_bytes
= 0;
5683 u16 csum_size
= btrfs_super_csum_size(root
->fs_info
->super_copy
);
5687 unsigned long leaf_offset
;
5688 bool verify_csum
= !!check_data_csum
;
5690 root
= root
->fs_info
->csum_root
;
5691 if (!extent_buffer_uptodate(root
->node
)) {
5692 fprintf(stderr
, "No valid csum tree found\n");
5696 btrfs_init_path(&path
);
5697 key
.objectid
= BTRFS_EXTENT_CSUM_OBJECTID
;
5698 key
.type
= BTRFS_EXTENT_CSUM_KEY
;
5700 ret
= btrfs_search_slot(NULL
, root
, &key
, &path
, 0, 0);
5702 fprintf(stderr
, "Error searching csum tree %d\n", ret
);
5703 btrfs_release_path(&path
);
5707 if (ret
> 0 && path
.slots
[0])
5712 * For metadata dump (btrfs-image) all data is wiped so verifying data
5713 * csum is meaningless and will always report csum error.
5715 if (check_data_csum
&& (btrfs_super_flags(root
->fs_info
->super_copy
) &
5716 (BTRFS_SUPER_FLAG_METADUMP
| BTRFS_SUPER_FLAG_METADUMP_V2
))) {
5717 printf("skip data csum verification for metadata dump\n");
5718 verify_csum
= false;
5723 if (path
.slots
[0] >= btrfs_header_nritems(path
.nodes
[0])) {
5724 ret
= btrfs_next_leaf(root
, &path
);
5726 fprintf(stderr
, "Error going to next leaf "
5733 leaf
= path
.nodes
[0];
5735 btrfs_item_key_to_cpu(leaf
, &key
, path
.slots
[0]);
5736 if (key
.type
!= BTRFS_EXTENT_CSUM_KEY
) {
5741 data_len
= (btrfs_item_size_nr(leaf
, path
.slots
[0]) /
5742 csum_size
) * root
->fs_info
->sectorsize
;
5744 goto skip_csum_check
;
5745 leaf_offset
= btrfs_item_ptr_offset(leaf
, path
.slots
[0]);
5746 ret
= check_extent_csums(root
, key
.offset
, data_len
,
5749 * Only break for fatal errors, if mismatch is found, continue
5750 * checking until all extents are checked.
5758 offset
= key
.offset
;
5759 } else if (key
.offset
!= offset
+ num_bytes
) {
5760 ret
= check_extent_exists(root
, offset
, num_bytes
);
5763 "csum exists for %llu-%llu but there is no extent record\n",
5764 offset
, offset
+num_bytes
);
5767 offset
= key
.offset
;
5770 num_bytes
+= data_len
;
5774 btrfs_release_path(&path
);
5778 static int is_dropped_key(struct btrfs_key
*key
,
5779 struct btrfs_key
*drop_key
)
5781 if (key
->objectid
< drop_key
->objectid
)
5783 else if (key
->objectid
== drop_key
->objectid
) {
5784 if (key
->type
< drop_key
->type
)
5786 else if (key
->type
== drop_key
->type
) {
5787 if (key
->offset
< drop_key
->offset
)
5795 * Here are the rules for FULL_BACKREF.
5797 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5798 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5800 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
5801 * if it happened after the relocation occurred since we'll have dropped the
5802 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5803 * have no real way to know for sure.
5805 * We process the blocks one root at a time, and we start from the lowest root
5806 * objectid and go to the highest. So we can just lookup the owner backref for
5807 * the record and if we don't find it then we know it doesn't exist and we have
5810 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5811 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5812 * be set or not and then we can check later once we've gathered all the refs.
5814 static int calc_extent_flag(struct cache_tree
*extent_cache
,
5815 struct extent_buffer
*buf
,
5816 struct root_item_record
*ri
,
5819 struct extent_record
*rec
;
5820 struct cache_extent
*cache
;
5821 struct tree_backref
*tback
;
5824 cache
= lookup_cache_extent(extent_cache
, buf
->start
, 1);
5825 /* we have added this extent before */
5829 rec
= container_of(cache
, struct extent_record
, cache
);
5832 * Except file/reloc tree, we can not have
5835 if (ri
->objectid
< BTRFS_FIRST_FREE_OBJECTID
)
5840 if (buf
->start
== ri
->bytenr
)
5843 if (btrfs_header_flag(buf
, BTRFS_HEADER_FLAG_RELOC
))
5846 owner
= btrfs_header_owner(buf
);
5847 if (owner
== ri
->objectid
)
5850 tback
= find_tree_backref(rec
, 0, owner
);
5855 if (rec
->flag_block_full_backref
!= FLAG_UNSET
&&
5856 rec
->flag_block_full_backref
!= 0)
5857 rec
->bad_full_backref
= 1;
5860 *flags
|= BTRFS_BLOCK_FLAG_FULL_BACKREF
;
5861 if (rec
->flag_block_full_backref
!= FLAG_UNSET
&&
5862 rec
->flag_block_full_backref
!= 1)
5863 rec
->bad_full_backref
= 1;
5867 static void report_mismatch_key_root(u8 key_type
, u64 rootid
)
5869 fprintf(stderr
, "Invalid key type(");
5870 print_key_type(stderr
, 0, key_type
);
5871 fprintf(stderr
, ") found in root(");
5872 print_objectid(stderr
, rootid
, 0);
5873 fprintf(stderr
, ")\n");
5877 * Check if the key is valid with its extent buffer.
5879 * This is a early check in case invalid key exists in a extent buffer
5880 * This is not comprehensive yet, but should prevent wrong key/item passed
5883 static int check_type_with_root(u64 rootid
, u8 key_type
)
5886 /* Only valid in chunk tree */
5887 case BTRFS_DEV_ITEM_KEY
:
5888 case BTRFS_CHUNK_ITEM_KEY
:
5889 if (rootid
!= BTRFS_CHUNK_TREE_OBJECTID
)
5892 /* valid in csum and log tree */
5893 case BTRFS_CSUM_TREE_OBJECTID
:
5894 if (!(rootid
== BTRFS_TREE_LOG_OBJECTID
||
5898 case BTRFS_EXTENT_ITEM_KEY
:
5899 case BTRFS_METADATA_ITEM_KEY
:
5900 case BTRFS_BLOCK_GROUP_ITEM_KEY
:
5901 if (rootid
!= BTRFS_EXTENT_TREE_OBJECTID
)
5904 case BTRFS_ROOT_ITEM_KEY
:
5905 if (rootid
!= BTRFS_ROOT_TREE_OBJECTID
)
5908 case BTRFS_DEV_EXTENT_KEY
:
5909 if (rootid
!= BTRFS_DEV_TREE_OBJECTID
)
5915 report_mismatch_key_root(key_type
, rootid
);
5919 static int run_next_block(struct btrfs_root
*root
,
5920 struct block_info
*bits
,
5923 struct cache_tree
*pending
,
5924 struct cache_tree
*seen
,
5925 struct cache_tree
*reada
,
5926 struct cache_tree
*nodes
,
5927 struct cache_tree
*extent_cache
,
5928 struct cache_tree
*chunk_cache
,
5929 struct rb_root
*dev_cache
,
5930 struct block_group_tree
*block_group_cache
,
5931 struct device_extent_tree
*dev_extent_cache
,
5932 struct root_item_record
*ri
)
5934 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
5935 struct extent_buffer
*buf
;
5936 struct extent_record
*rec
= NULL
;
5947 struct btrfs_key key
;
5948 struct cache_extent
*cache
;
5951 nritems
= pick_next_pending(pending
, reada
, nodes
, *last
, bits
,
5952 bits_nr
, &reada_bits
);
5957 for (i
= 0; i
< nritems
; i
++) {
5958 ret
= add_cache_extent(reada
, bits
[i
].start
,
5963 /* fixme, get the parent transid */
5964 readahead_tree_block(fs_info
, bits
[i
].start
, 0);
5967 *last
= bits
[0].start
;
5968 bytenr
= bits
[0].start
;
5969 size
= bits
[0].size
;
5971 cache
= lookup_cache_extent(pending
, bytenr
, size
);
5973 remove_cache_extent(pending
, cache
);
5976 cache
= lookup_cache_extent(reada
, bytenr
, size
);
5978 remove_cache_extent(reada
, cache
);
5981 cache
= lookup_cache_extent(nodes
, bytenr
, size
);
5983 remove_cache_extent(nodes
, cache
);
5986 cache
= lookup_cache_extent(extent_cache
, bytenr
, size
);
5988 rec
= container_of(cache
, struct extent_record
, cache
);
5989 gen
= rec
->parent_generation
;
5992 /* fixme, get the real parent transid */
5993 buf
= read_tree_block(root
->fs_info
, bytenr
, gen
);
5994 if (!extent_buffer_uptodate(buf
)) {
5995 record_bad_block_io(root
->fs_info
,
5996 extent_cache
, bytenr
, size
);
6000 nritems
= btrfs_header_nritems(buf
);
6003 if (!init_extent_tree
) {
6004 ret
= btrfs_lookup_extent_info(NULL
, fs_info
, bytenr
,
6005 btrfs_header_level(buf
), 1, NULL
,
6008 ret
= calc_extent_flag(extent_cache
, buf
, ri
, &flags
);
6010 fprintf(stderr
, "Couldn't calc extent flags\n");
6011 flags
|= BTRFS_BLOCK_FLAG_FULL_BACKREF
;
6016 ret
= calc_extent_flag(extent_cache
, buf
, ri
, &flags
);
6018 fprintf(stderr
, "Couldn't calc extent flags\n");
6019 flags
|= BTRFS_BLOCK_FLAG_FULL_BACKREF
;
6023 if (flags
& BTRFS_BLOCK_FLAG_FULL_BACKREF
) {
6025 ri
->objectid
!= BTRFS_TREE_RELOC_OBJECTID
&&
6026 ri
->objectid
== btrfs_header_owner(buf
)) {
6028 * Ok we got to this block from it's original owner and
6029 * we have FULL_BACKREF set. Relocation can leave
6030 * converted blocks over so this is altogether possible,
6031 * however it's not possible if the generation > the
6032 * last snapshot, so check for this case.
6034 if (!btrfs_header_flag(buf
, BTRFS_HEADER_FLAG_RELOC
) &&
6035 btrfs_header_generation(buf
) > ri
->last_snapshot
) {
6036 flags
&= ~BTRFS_BLOCK_FLAG_FULL_BACKREF
;
6037 rec
->bad_full_backref
= 1;
6042 (ri
->objectid
== BTRFS_TREE_RELOC_OBJECTID
||
6043 btrfs_header_flag(buf
, BTRFS_HEADER_FLAG_RELOC
))) {
6044 flags
|= BTRFS_BLOCK_FLAG_FULL_BACKREF
;
6045 rec
->bad_full_backref
= 1;
6049 if (flags
& BTRFS_BLOCK_FLAG_FULL_BACKREF
) {
6050 rec
->flag_block_full_backref
= 1;
6054 rec
->flag_block_full_backref
= 0;
6056 owner
= btrfs_header_owner(buf
);
6059 ret
= check_block(root
, extent_cache
, buf
, flags
);
6063 if (btrfs_is_leaf(buf
)) {
6064 btree_space_waste
+= btrfs_leaf_free_space(buf
);
6065 for (i
= 0; i
< nritems
; i
++) {
6066 struct btrfs_file_extent_item
*fi
;
6068 btrfs_item_key_to_cpu(buf
, &key
, i
);
6070 * Check key type against the leaf owner.
6071 * Could filter quite a lot of early error if
6074 if (check_type_with_root(btrfs_header_owner(buf
),
6076 fprintf(stderr
, "ignoring invalid key\n");
6079 if (key
.type
== BTRFS_EXTENT_ITEM_KEY
) {
6080 process_extent_item(root
, extent_cache
, buf
,
6084 if (key
.type
== BTRFS_METADATA_ITEM_KEY
) {
6085 process_extent_item(root
, extent_cache
, buf
,
6089 if (key
.type
== BTRFS_EXTENT_CSUM_KEY
) {
6091 btrfs_item_size_nr(buf
, i
);
6094 if (key
.type
== BTRFS_CHUNK_ITEM_KEY
) {
6095 process_chunk_item(chunk_cache
, &key
, buf
, i
);
6098 if (key
.type
== BTRFS_DEV_ITEM_KEY
) {
6099 process_device_item(dev_cache
, &key
, buf
, i
);
6102 if (key
.type
== BTRFS_BLOCK_GROUP_ITEM_KEY
) {
6103 process_block_group_item(block_group_cache
,
6107 if (key
.type
== BTRFS_DEV_EXTENT_KEY
) {
6108 process_device_extent_item(dev_extent_cache
,
6113 if (key
.type
== BTRFS_EXTENT_REF_V0_KEY
) {
6114 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6115 process_extent_ref_v0(extent_cache
, buf
, i
);
6122 if (key
.type
== BTRFS_TREE_BLOCK_REF_KEY
) {
6123 ret
= add_tree_backref(extent_cache
,
6124 key
.objectid
, 0, key
.offset
, 0);
6127 "add_tree_backref failed (leaf tree block): %s",
6131 if (key
.type
== BTRFS_SHARED_BLOCK_REF_KEY
) {
6132 ret
= add_tree_backref(extent_cache
,
6133 key
.objectid
, key
.offset
, 0, 0);
6136 "add_tree_backref failed (leaf shared block): %s",
6140 if (key
.type
== BTRFS_EXTENT_DATA_REF_KEY
) {
6141 struct btrfs_extent_data_ref
*ref
;
6143 ref
= btrfs_item_ptr(buf
, i
,
6144 struct btrfs_extent_data_ref
);
6145 add_data_backref(extent_cache
,
6147 btrfs_extent_data_ref_root(buf
, ref
),
6148 btrfs_extent_data_ref_objectid(buf
,
6150 btrfs_extent_data_ref_offset(buf
, ref
),
6151 btrfs_extent_data_ref_count(buf
, ref
),
6152 0, root
->fs_info
->sectorsize
);
6155 if (key
.type
== BTRFS_SHARED_DATA_REF_KEY
) {
6156 struct btrfs_shared_data_ref
*ref
;
6158 ref
= btrfs_item_ptr(buf
, i
,
6159 struct btrfs_shared_data_ref
);
6160 add_data_backref(extent_cache
,
6161 key
.objectid
, key
.offset
, 0, 0, 0,
6162 btrfs_shared_data_ref_count(buf
, ref
),
6163 0, root
->fs_info
->sectorsize
);
6166 if (key
.type
== BTRFS_ORPHAN_ITEM_KEY
) {
6167 struct bad_item
*bad
;
6169 if (key
.objectid
== BTRFS_ORPHAN_OBJECTID
)
6173 bad
= malloc(sizeof(struct bad_item
));
6176 INIT_LIST_HEAD(&bad
->list
);
6177 memcpy(&bad
->key
, &key
,
6178 sizeof(struct btrfs_key
));
6179 bad
->root_id
= owner
;
6180 list_add_tail(&bad
->list
, &delete_items
);
6183 if (key
.type
!= BTRFS_EXTENT_DATA_KEY
)
6185 fi
= btrfs_item_ptr(buf
, i
,
6186 struct btrfs_file_extent_item
);
6187 if (btrfs_file_extent_type(buf
, fi
) ==
6188 BTRFS_FILE_EXTENT_INLINE
)
6190 if (btrfs_file_extent_disk_bytenr(buf
, fi
) == 0)
6193 data_bytes_allocated
+=
6194 btrfs_file_extent_disk_num_bytes(buf
, fi
);
6195 if (data_bytes_allocated
< root
->fs_info
->sectorsize
)
6198 data_bytes_referenced
+=
6199 btrfs_file_extent_num_bytes(buf
, fi
);
6200 add_data_backref(extent_cache
,
6201 btrfs_file_extent_disk_bytenr(buf
, fi
),
6202 parent
, owner
, key
.objectid
, key
.offset
-
6203 btrfs_file_extent_offset(buf
, fi
), 1, 1,
6204 btrfs_file_extent_disk_num_bytes(buf
, fi
));
6209 level
= btrfs_header_level(buf
);
6210 for (i
= 0; i
< nritems
; i
++) {
6211 struct extent_record tmpl
;
6213 ptr
= btrfs_node_blockptr(buf
, i
);
6214 size
= root
->fs_info
->nodesize
;
6215 btrfs_node_key_to_cpu(buf
, &key
, i
);
6217 if ((level
== ri
->drop_level
)
6218 && is_dropped_key(&key
, &ri
->drop_key
)) {
6223 memset(&tmpl
, 0, sizeof(tmpl
));
6224 btrfs_cpu_key_to_disk(&tmpl
.parent_key
, &key
);
6225 tmpl
.parent_generation
=
6226 btrfs_node_ptr_generation(buf
, i
);
6231 tmpl
.max_size
= size
;
6232 ret
= add_extent_rec(extent_cache
, &tmpl
);
6236 ret
= add_tree_backref(extent_cache
, ptr
, parent
,
6240 "add_tree_backref failed (non-leaf block): %s",
6246 add_pending(nodes
, seen
, ptr
, size
);
6248 add_pending(pending
, seen
, ptr
, size
);
6250 btree_space_waste
+= (BTRFS_NODEPTRS_PER_BLOCK(fs_info
) -
6251 nritems
) * sizeof(struct btrfs_key_ptr
);
6253 total_btree_bytes
+= buf
->len
;
6254 if (fs_root_objectid(btrfs_header_owner(buf
)))
6255 total_fs_tree_bytes
+= buf
->len
;
6256 if (btrfs_header_owner(buf
) == BTRFS_EXTENT_TREE_OBJECTID
)
6257 total_extent_tree_bytes
+= buf
->len
;
6259 free_extent_buffer(buf
);
6263 static int add_root_to_pending(struct extent_buffer
*buf
,
6264 struct cache_tree
*extent_cache
,
6265 struct cache_tree
*pending
,
6266 struct cache_tree
*seen
,
6267 struct cache_tree
*nodes
,
6270 struct extent_record tmpl
;
6273 if (btrfs_header_level(buf
) > 0)
6274 add_pending(nodes
, seen
, buf
->start
, buf
->len
);
6276 add_pending(pending
, seen
, buf
->start
, buf
->len
);
6278 memset(&tmpl
, 0, sizeof(tmpl
));
6279 tmpl
.start
= buf
->start
;
6284 tmpl
.max_size
= buf
->len
;
6285 add_extent_rec(extent_cache
, &tmpl
);
6287 if (objectid
== BTRFS_TREE_RELOC_OBJECTID
||
6288 btrfs_header_backref_rev(buf
) < BTRFS_MIXED_BACKREF_REV
)
6289 ret
= add_tree_backref(extent_cache
, buf
->start
, buf
->start
,
6292 ret
= add_tree_backref(extent_cache
, buf
->start
, 0, objectid
,
6297 /* as we fix the tree, we might be deleting blocks that
6298 * we're tracking for repair. This hook makes sure we
6299 * remove any backrefs for blocks as we are fixing them.
6301 static int free_extent_hook(struct btrfs_fs_info
*fs_info
,
6302 u64 bytenr
, u64 num_bytes
, u64 parent
,
6303 u64 root_objectid
, u64 owner
, u64 offset
,
6306 struct extent_record
*rec
;
6307 struct cache_extent
*cache
;
6309 struct cache_tree
*extent_cache
= fs_info
->fsck_extent_cache
;
6311 is_data
= owner
>= BTRFS_FIRST_FREE_OBJECTID
;
6312 cache
= lookup_cache_extent(extent_cache
, bytenr
, num_bytes
);
6316 rec
= container_of(cache
, struct extent_record
, cache
);
6318 struct data_backref
*back
;
6320 back
= find_data_backref(rec
, parent
, root_objectid
, owner
,
6321 offset
, 1, bytenr
, num_bytes
);
6324 if (back
->node
.found_ref
) {
6325 back
->found_ref
-= refs_to_drop
;
6327 rec
->refs
-= refs_to_drop
;
6329 if (back
->node
.found_extent_tree
) {
6330 back
->num_refs
-= refs_to_drop
;
6331 if (rec
->extent_item_refs
)
6332 rec
->extent_item_refs
-= refs_to_drop
;
6334 if (back
->found_ref
== 0)
6335 back
->node
.found_ref
= 0;
6336 if (back
->num_refs
== 0)
6337 back
->node
.found_extent_tree
= 0;
6339 if (!back
->node
.found_extent_tree
&& back
->node
.found_ref
) {
6340 rb_erase(&back
->node
.node
, &rec
->backref_tree
);
6344 struct tree_backref
*back
;
6346 back
= find_tree_backref(rec
, parent
, root_objectid
);
6349 if (back
->node
.found_ref
) {
6352 back
->node
.found_ref
= 0;
6354 if (back
->node
.found_extent_tree
) {
6355 if (rec
->extent_item_refs
)
6356 rec
->extent_item_refs
--;
6357 back
->node
.found_extent_tree
= 0;
6359 if (!back
->node
.found_extent_tree
&& back
->node
.found_ref
) {
6360 rb_erase(&back
->node
.node
, &rec
->backref_tree
);
6364 maybe_free_extent_rec(extent_cache
, rec
);
6369 static int delete_extent_records(struct btrfs_trans_handle
*trans
,
6370 struct btrfs_path
*path
,
6373 struct btrfs_fs_info
*fs_info
= trans
->fs_info
;
6374 struct btrfs_key key
;
6375 struct btrfs_key found_key
;
6376 struct extent_buffer
*leaf
;
6381 key
.objectid
= bytenr
;
6383 key
.offset
= (u64
)-1;
6386 ret
= btrfs_search_slot(trans
, fs_info
->extent_root
, &key
,
6393 if (path
->slots
[0] == 0)
6399 leaf
= path
->nodes
[0];
6400 slot
= path
->slots
[0];
6402 btrfs_item_key_to_cpu(leaf
, &found_key
, slot
);
6403 if (found_key
.objectid
!= bytenr
)
6406 if (found_key
.type
!= BTRFS_EXTENT_ITEM_KEY
&&
6407 found_key
.type
!= BTRFS_METADATA_ITEM_KEY
&&
6408 found_key
.type
!= BTRFS_TREE_BLOCK_REF_KEY
&&
6409 found_key
.type
!= BTRFS_EXTENT_DATA_REF_KEY
&&
6410 found_key
.type
!= BTRFS_EXTENT_REF_V0_KEY
&&
6411 found_key
.type
!= BTRFS_SHARED_BLOCK_REF_KEY
&&
6412 found_key
.type
!= BTRFS_SHARED_DATA_REF_KEY
) {
6413 btrfs_release_path(path
);
6414 if (found_key
.type
== 0) {
6415 if (found_key
.offset
== 0)
6417 key
.offset
= found_key
.offset
- 1;
6418 key
.type
= found_key
.type
;
6420 key
.type
= found_key
.type
- 1;
6421 key
.offset
= (u64
)-1;
6426 "repair deleting extent record: key [%llu,%u,%llu]\n",
6427 found_key
.objectid
, found_key
.type
, found_key
.offset
);
6429 ret
= btrfs_del_item(trans
, fs_info
->extent_root
, path
);
6432 btrfs_release_path(path
);
6434 if (found_key
.type
== BTRFS_EXTENT_ITEM_KEY
||
6435 found_key
.type
== BTRFS_METADATA_ITEM_KEY
) {
6436 u64 bytes
= (found_key
.type
== BTRFS_EXTENT_ITEM_KEY
) ?
6437 found_key
.offset
: fs_info
->nodesize
;
6439 ret
= btrfs_update_block_group(fs_info
->extent_root
,
6440 bytenr
, bytes
, 0, 0);
6446 btrfs_release_path(path
);
6451 * for a single backref, this will allocate a new extent
6452 * and add the backref to it.
6454 static int record_extent(struct btrfs_trans_handle
*trans
,
6455 struct btrfs_fs_info
*info
,
6456 struct btrfs_path
*path
,
6457 struct extent_record
*rec
,
6458 struct extent_backref
*back
,
6459 int allocated
, u64 flags
)
6462 struct btrfs_root
*extent_root
= info
->extent_root
;
6463 struct extent_buffer
*leaf
;
6464 struct btrfs_key ins_key
;
6465 struct btrfs_extent_item
*ei
;
6466 struct data_backref
*dback
;
6467 struct btrfs_tree_block_info
*bi
;
6470 rec
->max_size
= max_t(u64
, rec
->max_size
,
6474 u32 item_size
= sizeof(*ei
);
6477 item_size
+= sizeof(*bi
);
6479 ins_key
.objectid
= rec
->start
;
6480 ins_key
.offset
= rec
->max_size
;
6481 ins_key
.type
= BTRFS_EXTENT_ITEM_KEY
;
6483 ret
= btrfs_insert_empty_item(trans
, extent_root
, path
,
6484 &ins_key
, item_size
);
6488 leaf
= path
->nodes
[0];
6489 ei
= btrfs_item_ptr(leaf
, path
->slots
[0],
6490 struct btrfs_extent_item
);
6492 btrfs_set_extent_refs(leaf
, ei
, 0);
6493 btrfs_set_extent_generation(leaf
, ei
, rec
->generation
);
6495 if (back
->is_data
) {
6496 btrfs_set_extent_flags(leaf
, ei
,
6497 BTRFS_EXTENT_FLAG_DATA
);
6499 struct btrfs_disk_key copy_key
;
6501 bi
= (struct btrfs_tree_block_info
*)(ei
+ 1);
6502 memset_extent_buffer(leaf
, 0, (unsigned long)bi
,
6505 btrfs_set_disk_key_objectid(©_key
,
6506 rec
->info_objectid
);
6507 btrfs_set_disk_key_type(©_key
, 0);
6508 btrfs_set_disk_key_offset(©_key
, 0);
6510 btrfs_set_tree_block_level(leaf
, bi
, rec
->info_level
);
6511 btrfs_set_tree_block_key(leaf
, bi
, ©_key
);
6513 btrfs_set_extent_flags(leaf
, ei
,
6514 flags
| BTRFS_EXTENT_FLAG_TREE_BLOCK
);
6517 btrfs_mark_buffer_dirty(leaf
);
6518 ret
= btrfs_update_block_group(extent_root
, rec
->start
,
6519 rec
->max_size
, 1, 0);
6522 btrfs_release_path(path
);
6525 if (back
->is_data
) {
6529 dback
= to_data_backref(back
);
6530 if (back
->full_backref
)
6531 parent
= dback
->parent
;
6535 for (i
= 0; i
< dback
->found_ref
; i
++) {
6536 /* if parent != 0, we're doing a full backref
6537 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6538 * just makes the backref allocator create a data
6541 ret
= btrfs_inc_extent_ref(trans
, info
->extent_root
,
6542 rec
->start
, rec
->max_size
,
6546 BTRFS_FIRST_FREE_OBJECTID
:
6553 "adding new data backref on %llu %s %llu owner %llu offset %llu found %d\n",
6554 (unsigned long long)rec
->start
,
6555 back
->full_backref
? "parent" : "root",
6556 back
->full_backref
? (unsigned long long)parent
:
6557 (unsigned long long)dback
->root
,
6558 (unsigned long long)dback
->owner
,
6559 (unsigned long long)dback
->offset
, dback
->found_ref
);
6562 struct tree_backref
*tback
;
6564 tback
= to_tree_backref(back
);
6565 if (back
->full_backref
)
6566 parent
= tback
->parent
;
6570 ret
= btrfs_inc_extent_ref(trans
, info
->extent_root
,
6571 rec
->start
, rec
->max_size
,
6572 parent
, tback
->root
, 0, 0);
6574 "adding new tree backref on start %llu len %llu parent %llu root %llu\n",
6575 rec
->start
, rec
->max_size
, parent
, tback
->root
);
6578 btrfs_release_path(path
);
6582 static struct extent_entry
*find_entry(struct list_head
*entries
,
6583 u64 bytenr
, u64 bytes
)
6585 struct extent_entry
*entry
= NULL
;
6587 list_for_each_entry(entry
, entries
, list
) {
6588 if (entry
->bytenr
== bytenr
&& entry
->bytes
== bytes
)
6595 static struct extent_entry
*find_most_right_entry(struct list_head
*entries
)
6597 struct extent_entry
*entry
, *best
= NULL
, *prev
= NULL
;
6599 list_for_each_entry(entry
, entries
, list
) {
6601 * If there are as many broken entries as entries then we know
6602 * not to trust this particular entry.
6604 if (entry
->broken
== entry
->count
)
6608 * Special case, when there are only two entries and 'best' is
6618 * If our current entry == best then we can't be sure our best
6619 * is really the best, so we need to keep searching.
6621 if (best
&& best
->count
== entry
->count
) {
6627 /* Prev == entry, not good enough, have to keep searching */
6628 if (!prev
->broken
&& prev
->count
== entry
->count
)
6632 best
= (prev
->count
> entry
->count
) ? prev
: entry
;
6633 else if (best
->count
< entry
->count
)
6641 static int repair_ref(struct btrfs_fs_info
*info
, struct btrfs_path
*path
,
6642 struct data_backref
*dback
, struct extent_entry
*entry
)
6644 struct btrfs_trans_handle
*trans
;
6645 struct btrfs_root
*root
;
6646 struct btrfs_file_extent_item
*fi
;
6647 struct extent_buffer
*leaf
;
6648 struct btrfs_key key
;
6652 key
.objectid
= dback
->root
;
6653 key
.type
= BTRFS_ROOT_ITEM_KEY
;
6654 key
.offset
= (u64
)-1;
6655 root
= btrfs_read_fs_root(info
, &key
);
6657 fprintf(stderr
, "Couldn't find root for our ref\n");
6662 * The backref points to the original offset of the extent if it was
6663 * split, so we need to search down to the offset we have and then walk
6664 * forward until we find the backref we're looking for.
6666 key
.objectid
= dback
->owner
;
6667 key
.type
= BTRFS_EXTENT_DATA_KEY
;
6668 key
.offset
= dback
->offset
;
6669 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
6671 fprintf(stderr
, "Error looking up ref %d\n", ret
);
6676 if (path
->slots
[0] >= btrfs_header_nritems(path
->nodes
[0])) {
6677 ret
= btrfs_next_leaf(root
, path
);
6679 fprintf(stderr
, "Couldn't find our ref, next\n");
6683 leaf
= path
->nodes
[0];
6684 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
6685 if (key
.objectid
!= dback
->owner
||
6686 key
.type
!= BTRFS_EXTENT_DATA_KEY
) {
6687 fprintf(stderr
, "Couldn't find our ref, search\n");
6690 fi
= btrfs_item_ptr(leaf
, path
->slots
[0],
6691 struct btrfs_file_extent_item
);
6692 bytenr
= btrfs_file_extent_disk_bytenr(leaf
, fi
);
6693 bytes
= btrfs_file_extent_disk_num_bytes(leaf
, fi
);
6695 if (bytenr
== dback
->disk_bytenr
&& bytes
== dback
->bytes
)
6700 btrfs_release_path(path
);
6702 trans
= btrfs_start_transaction(root
, 1);
6704 return PTR_ERR(trans
);
6707 * Ok we have the key of the file extent we want to fix, now we can cow
6708 * down to the thing and fix it.
6710 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 1);
6712 fprintf(stderr
, "error cowing down to ref [%llu,%u,%llu]: %d\n",
6713 key
.objectid
, key
.type
, key
.offset
, ret
);
6718 "well that's odd, we just found this key [%llu,%u,%llu]\n",
6719 key
.objectid
, key
.type
, key
.offset
);
6723 leaf
= path
->nodes
[0];
6724 fi
= btrfs_item_ptr(leaf
, path
->slots
[0],
6725 struct btrfs_file_extent_item
);
6727 if (btrfs_file_extent_compression(leaf
, fi
) &&
6728 dback
->disk_bytenr
!= entry
->bytenr
) {
6730 "ref doesn't match the record start and is compressed, please take a btrfs-image of this file system and send it to a btrfs developer so they can complete this functionality for bytenr %llu\n",
6731 dback
->disk_bytenr
);
6736 if (dback
->node
.broken
&& dback
->disk_bytenr
!= entry
->bytenr
) {
6737 btrfs_set_file_extent_disk_bytenr(leaf
, fi
, entry
->bytenr
);
6738 } else if (dback
->disk_bytenr
> entry
->bytenr
) {
6739 u64 off_diff
, offset
;
6741 off_diff
= dback
->disk_bytenr
- entry
->bytenr
;
6742 offset
= btrfs_file_extent_offset(leaf
, fi
);
6743 if (dback
->disk_bytenr
+ offset
+
6744 btrfs_file_extent_num_bytes(leaf
, fi
) >
6745 entry
->bytenr
+ entry
->bytes
) {
6747 "ref is past the entry end, please take a btrfs-image of this file system and send it to a btrfs developer, ref %llu\n",
6748 dback
->disk_bytenr
);
6753 btrfs_set_file_extent_disk_bytenr(leaf
, fi
, entry
->bytenr
);
6754 btrfs_set_file_extent_offset(leaf
, fi
, offset
);
6755 } else if (dback
->disk_bytenr
< entry
->bytenr
) {
6758 offset
= btrfs_file_extent_offset(leaf
, fi
);
6759 if (dback
->disk_bytenr
+ offset
< entry
->bytenr
) {
6761 "ref is before the entry start, please take a btrfs-image of this file system and send it to a btrfs developer, ref %llu\n",
6762 dback
->disk_bytenr
);
6767 offset
+= dback
->disk_bytenr
;
6768 offset
-= entry
->bytenr
;
6769 btrfs_set_file_extent_disk_bytenr(leaf
, fi
, entry
->bytenr
);
6770 btrfs_set_file_extent_offset(leaf
, fi
, offset
);
6773 btrfs_set_file_extent_disk_num_bytes(leaf
, fi
, entry
->bytes
);
6776 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6777 * only do this if we aren't using compression, otherwise it's a
6780 if (!btrfs_file_extent_compression(leaf
, fi
))
6781 btrfs_set_file_extent_ram_bytes(leaf
, fi
, entry
->bytes
);
6783 printf("ram bytes may be wrong?\n");
6784 btrfs_mark_buffer_dirty(leaf
);
6786 err
= btrfs_commit_transaction(trans
, root
);
6787 btrfs_release_path(path
);
6788 return ret
? ret
: err
;
6791 static int verify_backrefs(struct btrfs_fs_info
*info
, struct btrfs_path
*path
,
6792 struct extent_record
*rec
)
6794 struct extent_backref
*back
, *tmp
;
6795 struct data_backref
*dback
;
6796 struct extent_entry
*entry
, *best
= NULL
;
6799 int broken_entries
= 0;
6804 * Metadata is easy and the backrefs should always agree on bytenr and
6805 * size, if not we've got bigger issues.
6810 rbtree_postorder_for_each_entry_safe(back
, tmp
,
6811 &rec
->backref_tree
, node
) {
6812 if (back
->full_backref
|| !back
->is_data
)
6815 dback
= to_data_backref(back
);
6818 * We only pay attention to backrefs that we found a real
6821 if (dback
->found_ref
== 0)
6825 * For now we only catch when the bytes don't match, not the
6826 * bytenr. We can easily do this at the same time, but I want
6827 * to have a fs image to test on before we just add repair
6828 * functionality willy-nilly so we know we won't screw up the
6832 entry
= find_entry(&entries
, dback
->disk_bytenr
,
6835 entry
= malloc(sizeof(struct extent_entry
));
6840 memset(entry
, 0, sizeof(*entry
));
6841 entry
->bytenr
= dback
->disk_bytenr
;
6842 entry
->bytes
= dback
->bytes
;
6843 list_add_tail(&entry
->list
, &entries
);
6848 * If we only have on entry we may think the entries agree when
6849 * in reality they don't so we have to do some extra checking.
6851 if (dback
->disk_bytenr
!= rec
->start
||
6852 dback
->bytes
!= rec
->nr
|| back
->broken
)
6863 /* Yay all the backrefs agree, carry on good sir */
6864 if (nr_entries
<= 1 && !mismatch
)
6868 "attempting to repair backref discrepency for bytenr %llu\n",
6872 * First we want to see if the backrefs can agree amongst themselves who
6873 * is right, so figure out which one of the entries has the highest
6876 best
= find_most_right_entry(&entries
);
6879 * Ok so we may have an even split between what the backrefs think, so
6880 * this is where we use the extent ref to see what it thinks.
6883 entry
= find_entry(&entries
, rec
->start
, rec
->nr
);
6884 if (!entry
&& (!broken_entries
|| !rec
->found_rec
)) {
6886 "backrefs don't agree with each other and extent record doesn't agree with anybody, so we can't fix bytenr %llu bytes %llu\n",
6887 rec
->start
, rec
->nr
);
6890 } else if (!entry
) {
6892 * Ok our backrefs were broken, we'll assume this is the
6893 * correct value and add an entry for this range.
6895 entry
= malloc(sizeof(struct extent_entry
));
6900 memset(entry
, 0, sizeof(*entry
));
6901 entry
->bytenr
= rec
->start
;
6902 entry
->bytes
= rec
->nr
;
6903 list_add_tail(&entry
->list
, &entries
);
6907 best
= find_most_right_entry(&entries
);
6910 "backrefs and extent record evenly split on who is right, this is going to require user input to fix bytenr %llu bytes %llu\n",
6911 rec
->start
, rec
->nr
);
6918 * I don't think this can happen currently as we'll abort() if we catch
6919 * this case higher up, but in case somebody removes that we still can't
6920 * deal with it properly here yet, so just bail out of that's the case.
6922 if (best
->bytenr
!= rec
->start
) {
6924 "extent start and backref starts don't match, please use btrfs-image on this file system and send it to a btrfs developer so they can make fsck fix this particular case. bytenr is %llu, bytes is %llu\n",
6925 rec
->start
, rec
->nr
);
6931 * Ok great we all agreed on an extent record, let's go find the real
6932 * references and fix up the ones that don't match.
6934 rbtree_postorder_for_each_entry_safe(back
, tmp
,
6935 &rec
->backref_tree
, node
) {
6936 if (back
->full_backref
|| !back
->is_data
)
6939 dback
= to_data_backref(back
);
6942 * Still ignoring backrefs that don't have a real ref attached
6945 if (dback
->found_ref
== 0)
6948 if (dback
->bytes
== best
->bytes
&&
6949 dback
->disk_bytenr
== best
->bytenr
)
6952 ret
= repair_ref(info
, path
, dback
, best
);
6958 * Ok we messed with the actual refs, which means we need to drop our
6959 * entire cache and go back and rescan. I know this is a huge pain and
6960 * adds a lot of extra work, but it's the only way to be safe. Once all
6961 * the backrefs agree we may not need to do anything to the extent
6966 while (!list_empty(&entries
)) {
6967 entry
= list_entry(entries
.next
, struct extent_entry
, list
);
6968 list_del_init(&entry
->list
);
6974 static int process_duplicates(struct cache_tree
*extent_cache
,
6975 struct extent_record
*rec
)
6977 struct extent_record
*good
, *tmp
;
6978 struct cache_extent
*cache
;
6982 * If we found a extent record for this extent then return, or if we
6983 * have more than one duplicate we are likely going to need to delete
6986 if (rec
->found_rec
|| rec
->num_duplicates
> 1)
6989 /* Shouldn't happen but just in case */
6990 BUG_ON(!rec
->num_duplicates
);
6993 * So this happens if we end up with a backref that doesn't match the
6994 * actual extent entry. So either the backref is bad or the extent
6995 * entry is bad. Either way we want to have the extent_record actually
6996 * reflect what we found in the extent_tree, so we need to take the
6997 * duplicate out and use that as the extent_record since the only way we
6998 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7000 remove_cache_extent(extent_cache
, &rec
->cache
);
7002 good
= to_extent_record(rec
->dups
.next
);
7003 list_del_init(&good
->list
);
7004 INIT_LIST_HEAD(&good
->backrefs
);
7005 INIT_LIST_HEAD(&good
->dups
);
7006 good
->cache
.start
= good
->start
;
7007 good
->cache
.size
= good
->nr
;
7008 good
->content_checked
= 0;
7009 good
->owner_ref_checked
= 0;
7010 good
->num_duplicates
= 0;
7011 good
->refs
= rec
->refs
;
7012 list_splice_init(&rec
->backrefs
, &good
->backrefs
);
7014 cache
= lookup_cache_extent(extent_cache
, good
->start
,
7018 tmp
= container_of(cache
, struct extent_record
, cache
);
7021 * If we find another overlapping extent and it's found_rec is
7022 * set then it's a duplicate and we need to try and delete
7025 if (tmp
->found_rec
|| tmp
->num_duplicates
> 0) {
7026 if (list_empty(&good
->list
))
7027 list_add_tail(&good
->list
,
7028 &duplicate_extents
);
7029 good
->num_duplicates
+= tmp
->num_duplicates
+ 1;
7030 list_splice_init(&tmp
->dups
, &good
->dups
);
7031 list_del_init(&tmp
->list
);
7032 list_add_tail(&tmp
->list
, &good
->dups
);
7033 remove_cache_extent(extent_cache
, &tmp
->cache
);
7038 * Ok we have another non extent item backed extent rec, so lets
7039 * just add it to this extent and carry on like we did above.
7041 good
->refs
+= tmp
->refs
;
7042 list_splice_init(&tmp
->backrefs
, &good
->backrefs
);
7043 remove_cache_extent(extent_cache
, &tmp
->cache
);
7046 ret
= insert_cache_extent(extent_cache
, &good
->cache
);
7049 return good
->num_duplicates
? 0 : 1;
7052 static int delete_duplicate_records(struct btrfs_root
*root
,
7053 struct extent_record
*rec
)
7055 struct btrfs_trans_handle
*trans
;
7056 LIST_HEAD(delete_list
);
7057 struct btrfs_path path
;
7058 struct extent_record
*tmp
, *good
, *n
;
7061 struct btrfs_key key
;
7063 btrfs_init_path(&path
);
7066 /* Find the record that covers all of the duplicates. */
7067 list_for_each_entry(tmp
, &rec
->dups
, list
) {
7068 if (good
->start
< tmp
->start
)
7070 if (good
->nr
> tmp
->nr
)
7073 if (tmp
->start
+ tmp
->nr
< good
->start
+ good
->nr
) {
7075 "Ok we have overlapping extents that aren't completely covered by each other, this is going to require more careful thought. The extents are [%llu-%llu] and [%llu-%llu]\n",
7076 tmp
->start
, tmp
->nr
, good
->start
, good
->nr
);
7083 list_add_tail(&rec
->list
, &delete_list
);
7085 list_for_each_entry_safe(tmp
, n
, &rec
->dups
, list
) {
7088 list_move_tail(&tmp
->list
, &delete_list
);
7091 root
= root
->fs_info
->extent_root
;
7092 trans
= btrfs_start_transaction(root
, 1);
7093 if (IS_ERR(trans
)) {
7094 ret
= PTR_ERR(trans
);
7098 list_for_each_entry(tmp
, &delete_list
, list
) {
7099 if (tmp
->found_rec
== 0)
7101 key
.objectid
= tmp
->start
;
7102 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
7103 key
.offset
= tmp
->nr
;
7105 /* Shouldn't happen but just in case */
7106 if (tmp
->metadata
) {
7108 "well this shouldn't happen, extent record overlaps but is metadata? [%llu, %llu]\n",
7109 tmp
->start
, tmp
->nr
);
7113 ret
= btrfs_search_slot(trans
, root
, &key
, &path
, -1, 1);
7119 ret
= btrfs_del_item(trans
, root
, &path
);
7122 btrfs_release_path(&path
);
7125 err
= btrfs_commit_transaction(trans
, root
);
7129 while (!list_empty(&delete_list
)) {
7130 tmp
= to_extent_record(delete_list
.next
);
7131 list_del_init(&tmp
->list
);
7137 while (!list_empty(&rec
->dups
)) {
7138 tmp
= to_extent_record(rec
->dups
.next
);
7139 list_del_init(&tmp
->list
);
7143 btrfs_release_path(&path
);
7145 if (!ret
&& !nr_del
)
7146 rec
->num_duplicates
= 0;
7148 return ret
? ret
: nr_del
;
7151 static int find_possible_backrefs(struct btrfs_fs_info
*info
,
7152 struct btrfs_path
*path
,
7153 struct cache_tree
*extent_cache
,
7154 struct extent_record
*rec
)
7156 struct btrfs_root
*root
;
7157 struct extent_backref
*back
, *tmp
;
7158 struct data_backref
*dback
;
7159 struct cache_extent
*cache
;
7160 struct btrfs_file_extent_item
*fi
;
7161 struct btrfs_key key
;
7165 rbtree_postorder_for_each_entry_safe(back
, tmp
,
7166 &rec
->backref_tree
, node
) {
7167 /* Don't care about full backrefs (poor unloved backrefs) */
7168 if (back
->full_backref
|| !back
->is_data
)
7171 dback
= to_data_backref(back
);
7173 /* We found this one, we don't need to do a lookup */
7174 if (dback
->found_ref
)
7177 key
.objectid
= dback
->root
;
7178 key
.type
= BTRFS_ROOT_ITEM_KEY
;
7179 key
.offset
= (u64
)-1;
7181 root
= btrfs_read_fs_root(info
, &key
);
7183 /* No root, definitely a bad ref, skip */
7184 if (IS_ERR(root
) && PTR_ERR(root
) == -ENOENT
)
7186 /* Other err, exit */
7188 return PTR_ERR(root
);
7190 key
.objectid
= dback
->owner
;
7191 key
.type
= BTRFS_EXTENT_DATA_KEY
;
7192 key
.offset
= dback
->offset
;
7193 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
7195 btrfs_release_path(path
);
7198 /* Didn't find it, we can carry on */
7203 fi
= btrfs_item_ptr(path
->nodes
[0], path
->slots
[0],
7204 struct btrfs_file_extent_item
);
7205 bytenr
= btrfs_file_extent_disk_bytenr(path
->nodes
[0], fi
);
7206 bytes
= btrfs_file_extent_disk_num_bytes(path
->nodes
[0], fi
);
7207 btrfs_release_path(path
);
7208 cache
= lookup_cache_extent(extent_cache
, bytenr
, 1);
7210 struct extent_record
*tmp
;
7212 tmp
= container_of(cache
, struct extent_record
, cache
);
7215 * If we found an extent record for the bytenr for this
7216 * particular backref then we can't add it to our
7217 * current extent record. We only want to add backrefs
7218 * that don't have a corresponding extent item in the
7219 * extent tree since they likely belong to this record
7220 * and we need to fix it if it doesn't match bytenrs.
7226 dback
->found_ref
+= 1;
7227 dback
->disk_bytenr
= bytenr
;
7228 dback
->bytes
= bytes
;
7231 * Set this so the verify backref code knows not to trust the
7232 * values in this backref.
7241 * Record orphan data ref into corresponding root.
7243 * Return 0 if the extent item contains data ref and recorded.
7244 * Return 1 if the extent item contains no useful data ref
7245 * On that case, it may contains only shared_dataref or metadata backref
7246 * or the file extent exists(this should be handled by the extent bytenr
7248 * Return <0 if something goes wrong.
7250 static int record_orphan_data_extents(struct btrfs_fs_info
*fs_info
,
7251 struct extent_record
*rec
)
7253 struct btrfs_key key
;
7254 struct btrfs_root
*dest_root
;
7255 struct extent_backref
*back
, *tmp
;
7256 struct data_backref
*dback
;
7257 struct orphan_data_extent
*orphan
;
7258 struct btrfs_path path
;
7259 int recorded_data_ref
= 0;
7264 btrfs_init_path(&path
);
7265 rbtree_postorder_for_each_entry_safe(back
, tmp
,
7266 &rec
->backref_tree
, node
) {
7267 if (back
->full_backref
|| !back
->is_data
||
7268 !back
->found_extent_tree
)
7270 dback
= to_data_backref(back
);
7271 if (dback
->found_ref
)
7273 key
.objectid
= dback
->root
;
7274 key
.type
= BTRFS_ROOT_ITEM_KEY
;
7275 key
.offset
= (u64
)-1;
7277 dest_root
= btrfs_read_fs_root(fs_info
, &key
);
7279 /* For non-exist root we just skip it */
7280 if (IS_ERR(dest_root
) || !dest_root
)
7283 key
.objectid
= dback
->owner
;
7284 key
.type
= BTRFS_EXTENT_DATA_KEY
;
7285 key
.offset
= dback
->offset
;
7287 ret
= btrfs_search_slot(NULL
, dest_root
, &key
, &path
, 0, 0);
7288 btrfs_release_path(&path
);
7290 * For ret < 0, it's OK since the fs-tree may be corrupted,
7291 * we need to record it for inode/file extent rebuild.
7292 * For ret > 0, we record it only for file extent rebuild.
7293 * For ret == 0, the file extent exists but only bytenr
7294 * mismatch, let the original bytenr fix routine to handle,
7300 orphan
= malloc(sizeof(*orphan
));
7305 INIT_LIST_HEAD(&orphan
->list
);
7306 orphan
->root
= dback
->root
;
7307 orphan
->objectid
= dback
->owner
;
7308 orphan
->offset
= dback
->offset
;
7309 orphan
->disk_bytenr
= rec
->cache
.start
;
7310 orphan
->disk_len
= rec
->cache
.size
;
7311 list_add(&dest_root
->orphan_data_extents
, &orphan
->list
);
7312 recorded_data_ref
= 1;
7315 btrfs_release_path(&path
);
7317 return !recorded_data_ref
;
7323 * when an incorrect extent item is found, this will delete
7324 * all of the existing entries for it and recreate them
7325 * based on what the tree scan found.
7327 static int fixup_extent_refs(struct btrfs_fs_info
*info
,
7328 struct cache_tree
*extent_cache
,
7329 struct extent_record
*rec
)
7331 struct btrfs_trans_handle
*trans
= NULL
;
7333 struct btrfs_path path
;
7334 struct cache_extent
*cache
;
7335 struct extent_backref
*back
, *tmp
;
7339 if (rec
->flag_block_full_backref
)
7340 flags
|= BTRFS_BLOCK_FLAG_FULL_BACKREF
;
7342 btrfs_init_path(&path
);
7343 if (rec
->refs
!= rec
->extent_item_refs
&& !rec
->metadata
) {
7345 * Sometimes the backrefs themselves are so broken they don't
7346 * get attached to any meaningful rec, so first go back and
7347 * check any of our backrefs that we couldn't find and throw
7348 * them into the list if we find the backref so that
7349 * verify_backrefs can figure out what to do.
7351 ret
= find_possible_backrefs(info
, &path
, extent_cache
, rec
);
7356 /* step one, make sure all of the backrefs agree */
7357 ret
= verify_backrefs(info
, &path
, rec
);
7361 trans
= btrfs_start_transaction(info
->extent_root
, 1);
7362 if (IS_ERR(trans
)) {
7363 ret
= PTR_ERR(trans
);
7367 /* step two, delete all the existing records */
7368 ret
= delete_extent_records(trans
, &path
, rec
->start
);
7373 /* was this block corrupt? If so, don't add references to it */
7374 cache
= lookup_cache_extent(info
->corrupt_blocks
,
7375 rec
->start
, rec
->max_size
);
7381 /* step three, recreate all the refs we did find */
7382 rbtree_postorder_for_each_entry_safe(back
, tmp
,
7383 &rec
->backref_tree
, node
) {
7385 * if we didn't find any references, don't create a
7388 if (!back
->found_ref
)
7391 rec
->bad_full_backref
= 0;
7392 ret
= record_extent(trans
, info
, &path
, rec
, back
, allocated
,
7401 int err
= btrfs_commit_transaction(trans
, info
->extent_root
);
7408 fprintf(stderr
, "Repaired extent references for %llu\n",
7409 (unsigned long long)rec
->start
);
7411 btrfs_release_path(&path
);
7415 static int fixup_extent_flags(struct btrfs_fs_info
*fs_info
,
7416 struct extent_record
*rec
)
7418 struct btrfs_trans_handle
*trans
;
7419 struct btrfs_root
*root
= fs_info
->extent_root
;
7420 struct btrfs_path path
;
7421 struct btrfs_extent_item
*ei
;
7422 struct btrfs_key key
;
7426 key
.objectid
= rec
->start
;
7427 if (rec
->metadata
) {
7428 key
.type
= BTRFS_METADATA_ITEM_KEY
;
7429 key
.offset
= rec
->info_level
;
7431 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
7432 key
.offset
= rec
->max_size
;
7435 trans
= btrfs_start_transaction(root
, 0);
7437 return PTR_ERR(trans
);
7439 btrfs_init_path(&path
);
7440 ret
= btrfs_search_slot(trans
, root
, &key
, &path
, 0, 1);
7442 btrfs_release_path(&path
);
7443 btrfs_commit_transaction(trans
, root
);
7446 fprintf(stderr
, "Didn't find extent for %llu\n",
7447 (unsigned long long)rec
->start
);
7448 btrfs_release_path(&path
);
7449 btrfs_commit_transaction(trans
, root
);
7453 ei
= btrfs_item_ptr(path
.nodes
[0], path
.slots
[0],
7454 struct btrfs_extent_item
);
7455 flags
= btrfs_extent_flags(path
.nodes
[0], ei
);
7456 if (rec
->flag_block_full_backref
) {
7457 fprintf(stderr
, "setting full backref on %llu\n",
7458 (unsigned long long)key
.objectid
);
7459 flags
|= BTRFS_BLOCK_FLAG_FULL_BACKREF
;
7461 fprintf(stderr
, "clearing full backref on %llu\n",
7462 (unsigned long long)key
.objectid
);
7463 flags
&= ~BTRFS_BLOCK_FLAG_FULL_BACKREF
;
7465 btrfs_set_extent_flags(path
.nodes
[0], ei
, flags
);
7466 btrfs_mark_buffer_dirty(path
.nodes
[0]);
7467 btrfs_release_path(&path
);
7468 ret
= btrfs_commit_transaction(trans
, root
);
7470 fprintf(stderr
, "Repaired extent flags for %llu\n",
7471 (unsigned long long)rec
->start
);
7476 /* right now we only prune from the extent allocation tree */
7477 static int prune_one_block(struct btrfs_trans_handle
*trans
,
7478 struct btrfs_fs_info
*info
,
7479 struct btrfs_corrupt_block
*corrupt
)
7482 struct btrfs_path path
;
7483 struct extent_buffer
*eb
;
7487 int level
= corrupt
->level
+ 1;
7489 btrfs_init_path(&path
);
7491 /* we want to stop at the parent to our busted block */
7492 path
.lowest_level
= level
;
7494 ret
= btrfs_search_slot(trans
, info
->extent_root
,
7495 &corrupt
->key
, &path
, -1, 1);
7500 eb
= path
.nodes
[level
];
7507 * hopefully the search gave us the block we want to prune,
7508 * lets try that first
7510 slot
= path
.slots
[level
];
7511 found
= btrfs_node_blockptr(eb
, slot
);
7512 if (found
== corrupt
->cache
.start
)
7515 nritems
= btrfs_header_nritems(eb
);
7517 /* the search failed, lets scan this node and hope we find it */
7518 for (slot
= 0; slot
< nritems
; slot
++) {
7519 found
= btrfs_node_blockptr(eb
, slot
);
7520 if (found
== corrupt
->cache
.start
)
7524 * We couldn't find the bad block.
7525 * TODO: search all the nodes for pointers to this block
7527 if (eb
== info
->extent_root
->node
) {
7532 btrfs_release_path(&path
);
7537 printk("deleting pointer to block %llu\n", corrupt
->cache
.start
);
7538 ret
= btrfs_del_ptr(info
->extent_root
, &path
, level
, slot
);
7541 btrfs_release_path(&path
);
7545 static int prune_corrupt_blocks(struct btrfs_fs_info
*info
)
7547 struct btrfs_trans_handle
*trans
= NULL
;
7548 struct cache_extent
*cache
;
7549 struct btrfs_corrupt_block
*corrupt
;
7552 cache
= search_cache_extent(info
->corrupt_blocks
, 0);
7556 trans
= btrfs_start_transaction(info
->extent_root
, 1);
7558 return PTR_ERR(trans
);
7560 corrupt
= container_of(cache
, struct btrfs_corrupt_block
, cache
);
7561 prune_one_block(trans
, info
, corrupt
);
7562 remove_cache_extent(info
->corrupt_blocks
, cache
);
7565 return btrfs_commit_transaction(trans
, info
->extent_root
);
7569 static int check_extent_refs(struct btrfs_root
*root
,
7570 struct cache_tree
*extent_cache
)
7572 struct extent_record
*rec
;
7573 struct cache_extent
*cache
;
7580 * if we're doing a repair, we have to make sure
7581 * we don't allocate from the problem extents.
7582 * In the worst case, this will be all the
7585 cache
= search_cache_extent(extent_cache
, 0);
7587 rec
= container_of(cache
, struct extent_record
, cache
);
7588 set_extent_dirty(root
->fs_info
->excluded_extents
,
7590 rec
->start
+ rec
->max_size
- 1);
7591 cache
= next_cache_extent(cache
);
7594 /* pin down all the corrupted blocks too */
7595 cache
= search_cache_extent(root
->fs_info
->corrupt_blocks
, 0);
7597 set_extent_dirty(root
->fs_info
->excluded_extents
,
7599 cache
->start
+ cache
->size
- 1);
7600 cache
= next_cache_extent(cache
);
7602 prune_corrupt_blocks(root
->fs_info
);
7603 reset_cached_block_groups(root
->fs_info
);
7606 reset_cached_block_groups(root
->fs_info
);
7609 * We need to delete any duplicate entries we find first otherwise we
7610 * could mess up the extent tree when we have backrefs that actually
7611 * belong to a different extent item and not the weird duplicate one.
7613 while (repair
&& !list_empty(&duplicate_extents
)) {
7614 rec
= to_extent_record(duplicate_extents
.next
);
7615 list_del_init(&rec
->list
);
7617 /* Sometimes we can find a backref before we find an actual
7618 * extent, so we need to process it a little bit to see if there
7619 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7620 * if this is a backref screwup. If we need to delete stuff
7621 * process_duplicates() will return 0, otherwise it will return
7624 if (process_duplicates(extent_cache
, rec
))
7626 ret
= delete_duplicate_records(root
, rec
);
7630 * delete_duplicate_records will return the number of entries
7631 * deleted, so if it's greater than 0 then we know we actually
7632 * did something and we need to remove.
7645 cache
= search_cache_extent(extent_cache
, 0);
7648 rec
= container_of(cache
, struct extent_record
, cache
);
7649 if (rec
->num_duplicates
) {
7651 "extent item %llu has multiple extent items\n",
7652 (unsigned long long)rec
->start
);
7656 if (rec
->refs
!= rec
->extent_item_refs
) {
7657 fprintf(stderr
, "ref mismatch on [%llu %llu] ",
7658 (unsigned long long)rec
->start
,
7659 (unsigned long long)rec
->nr
);
7660 fprintf(stderr
, "extent item %llu, found %llu\n",
7661 (unsigned long long)rec
->extent_item_refs
,
7662 (unsigned long long)rec
->refs
);
7663 ret
= record_orphan_data_extents(root
->fs_info
, rec
);
7669 if (all_backpointers_checked(rec
, 1)) {
7670 fprintf(stderr
, "backpointer mismatch on [%llu %llu]\n",
7671 (unsigned long long)rec
->start
,
7672 (unsigned long long)rec
->nr
);
7676 if (!rec
->owner_ref_checked
) {
7677 fprintf(stderr
, "owner ref check failed [%llu %llu]\n",
7678 (unsigned long long)rec
->start
,
7679 (unsigned long long)rec
->nr
);
7684 if (repair
&& fix
) {
7685 ret
= fixup_extent_refs(root
->fs_info
, extent_cache
,
7692 if (rec
->bad_full_backref
) {
7693 fprintf(stderr
, "bad full backref, on [%llu]\n",
7694 (unsigned long long)rec
->start
);
7696 ret
= fixup_extent_flags(root
->fs_info
, rec
);
7704 * Although it's not a extent ref's problem, we reuse this
7705 * routine for error reporting.
7706 * No repair function yet.
7708 if (rec
->crossing_stripes
) {
7710 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7711 rec
->start
, rec
->start
+ rec
->max_size
);
7715 if (rec
->wrong_chunk_type
) {
7717 "bad extent [%llu, %llu), type mismatch with chunk\n",
7718 rec
->start
, rec
->start
+ rec
->max_size
);
7723 remove_cache_extent(extent_cache
, cache
);
7724 free_all_extent_backrefs(rec
);
7725 if (!init_extent_tree
&& repair
&& (!cur_err
|| fix
))
7726 clear_extent_dirty(root
->fs_info
->excluded_extents
,
7728 rec
->start
+ rec
->max_size
- 1);
7733 if (ret
&& ret
!= -EAGAIN
) {
7734 fprintf(stderr
, "failed to repair damaged filesystem, aborting\n");
7737 struct btrfs_trans_handle
*trans
;
7739 root
= root
->fs_info
->extent_root
;
7740 trans
= btrfs_start_transaction(root
, 1);
7741 if (IS_ERR(trans
)) {
7742 ret
= PTR_ERR(trans
);
7746 ret
= btrfs_fix_block_accounting(trans
);
7749 ret
= btrfs_commit_transaction(trans
, root
);
7762 * Check the chunk with its block group/dev list ref:
7763 * Return 0 if all refs seems valid.
7764 * Return 1 if part of refs seems valid, need later check for rebuild ref
7765 * like missing block group and needs to search extent tree to rebuild them.
7766 * Return -1 if essential refs are missing and unable to rebuild.
7768 static int check_chunk_refs(struct chunk_record
*chunk_rec
,
7769 struct block_group_tree
*block_group_cache
,
7770 struct device_extent_tree
*dev_extent_cache
,
7773 struct cache_extent
*block_group_item
;
7774 struct block_group_record
*block_group_rec
;
7775 struct cache_extent
*dev_extent_item
;
7776 struct device_extent_record
*dev_extent_rec
;
7780 int metadump_v2
= 0;
7784 block_group_item
= lookup_cache_extent(&block_group_cache
->tree
,
7787 if (block_group_item
) {
7788 block_group_rec
= container_of(block_group_item
,
7789 struct block_group_record
,
7791 if (chunk_rec
->length
!= block_group_rec
->offset
||
7792 chunk_rec
->offset
!= block_group_rec
->objectid
||
7794 chunk_rec
->type_flags
!= block_group_rec
->flags
)) {
7797 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7798 chunk_rec
->objectid
,
7803 chunk_rec
->type_flags
,
7804 block_group_rec
->objectid
,
7805 block_group_rec
->type
,
7806 block_group_rec
->offset
,
7807 block_group_rec
->offset
,
7808 block_group_rec
->objectid
,
7809 block_group_rec
->flags
);
7812 list_del_init(&block_group_rec
->list
);
7813 chunk_rec
->bg_rec
= block_group_rec
;
7818 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7819 chunk_rec
->objectid
,
7824 chunk_rec
->type_flags
);
7831 length
= calc_stripe_length(chunk_rec
->type_flags
, chunk_rec
->length
,
7832 chunk_rec
->num_stripes
);
7833 for (i
= 0; i
< chunk_rec
->num_stripes
; ++i
) {
7834 devid
= chunk_rec
->stripes
[i
].devid
;
7835 offset
= chunk_rec
->stripes
[i
].offset
;
7836 dev_extent_item
= lookup_cache_extent2(&dev_extent_cache
->tree
,
7837 devid
, offset
, length
);
7838 if (dev_extent_item
) {
7839 dev_extent_rec
= container_of(dev_extent_item
,
7840 struct device_extent_record
,
7842 if (dev_extent_rec
->objectid
!= devid
||
7843 dev_extent_rec
->offset
!= offset
||
7844 dev_extent_rec
->chunk_offset
!= chunk_rec
->offset
||
7845 dev_extent_rec
->length
!= length
) {
7848 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7849 chunk_rec
->objectid
,
7852 chunk_rec
->stripes
[i
].devid
,
7853 chunk_rec
->stripes
[i
].offset
,
7854 dev_extent_rec
->objectid
,
7855 dev_extent_rec
->offset
,
7856 dev_extent_rec
->length
);
7859 list_move(&dev_extent_rec
->chunk_list
,
7860 &chunk_rec
->dextents
);
7865 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7866 chunk_rec
->objectid
,
7869 chunk_rec
->stripes
[i
].devid
,
7870 chunk_rec
->stripes
[i
].offset
);
7877 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7878 int check_chunks(struct cache_tree
*chunk_cache
,
7879 struct block_group_tree
*block_group_cache
,
7880 struct device_extent_tree
*dev_extent_cache
,
7881 struct list_head
*good
, struct list_head
*bad
,
7882 struct list_head
*rebuild
, int silent
)
7884 struct cache_extent
*chunk_item
;
7885 struct chunk_record
*chunk_rec
;
7886 struct block_group_record
*bg_rec
;
7887 struct device_extent_record
*dext_rec
;
7891 chunk_item
= first_cache_extent(chunk_cache
);
7892 while (chunk_item
) {
7893 chunk_rec
= container_of(chunk_item
, struct chunk_record
,
7895 err
= check_chunk_refs(chunk_rec
, block_group_cache
,
7896 dev_extent_cache
, silent
);
7899 if (err
== 0 && good
)
7900 list_add_tail(&chunk_rec
->list
, good
);
7901 if (err
> 0 && rebuild
)
7902 list_add_tail(&chunk_rec
->list
, rebuild
);
7904 list_add_tail(&chunk_rec
->list
, bad
);
7905 chunk_item
= next_cache_extent(chunk_item
);
7908 list_for_each_entry(bg_rec
, &block_group_cache
->block_groups
, list
) {
7911 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7919 list_for_each_entry(dext_rec
, &dev_extent_cache
->no_chunk_orphans
,
7923 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7934 static int check_device_used(struct device_record
*dev_rec
,
7935 struct device_extent_tree
*dext_cache
)
7937 struct cache_extent
*cache
;
7938 struct device_extent_record
*dev_extent_rec
;
7941 cache
= search_cache_extent2(&dext_cache
->tree
, dev_rec
->devid
, 0);
7943 dev_extent_rec
= container_of(cache
,
7944 struct device_extent_record
,
7946 if (dev_extent_rec
->objectid
!= dev_rec
->devid
)
7949 list_del_init(&dev_extent_rec
->device_list
);
7950 total_byte
+= dev_extent_rec
->length
;
7951 cache
= next_cache_extent(cache
);
7954 if (total_byte
!= dev_rec
->byte_used
) {
7956 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7957 total_byte
, dev_rec
->byte_used
, dev_rec
->objectid
,
7958 dev_rec
->type
, dev_rec
->offset
);
7966 * Unlike device size alignment check above, some super total_bytes check
7967 * failure can lead to mount failure for newer kernel.
7969 * So this function will return the error for a fatal super total_bytes problem.
7971 static bool is_super_size_valid(struct btrfs_fs_info
*fs_info
)
7973 struct btrfs_device
*dev
;
7974 struct list_head
*dev_list
= &fs_info
->fs_devices
->devices
;
7975 u64 total_bytes
= 0;
7976 u64 super_bytes
= btrfs_super_total_bytes(fs_info
->super_copy
);
7978 list_for_each_entry(dev
, dev_list
, dev_list
)
7979 total_bytes
+= dev
->total_bytes
;
7981 /* Important check, which can cause unmountable fs */
7982 if (super_bytes
< total_bytes
) {
7983 error("super total bytes %llu smaller than real device(s) size %llu",
7984 super_bytes
, total_bytes
);
7985 error("mounting this fs may fail for newer kernels");
7986 error("this can be fixed by 'btrfs rescue fix-device-size'");
7991 * Optional check, just to make everything aligned and match with each
7994 * For a btrfs-image restored fs, we don't need to check it anyway.
7996 if (btrfs_super_flags(fs_info
->super_copy
) &
7997 (BTRFS_SUPER_FLAG_METADUMP
| BTRFS_SUPER_FLAG_METADUMP_V2
))
7999 if (!IS_ALIGNED(super_bytes
, fs_info
->sectorsize
) ||
8000 !IS_ALIGNED(total_bytes
, fs_info
->sectorsize
) ||
8001 super_bytes
!= total_bytes
) {
8002 warning("minor unaligned/mismatch device size detected");
8004 "recommended to use 'btrfs rescue fix-device-size' to fix it");
8009 /* check btrfs_dev_item -> btrfs_dev_extent */
8010 static int check_devices(struct rb_root
*dev_cache
,
8011 struct device_extent_tree
*dev_extent_cache
)
8013 struct rb_node
*dev_node
;
8014 struct device_record
*dev_rec
;
8015 struct device_extent_record
*dext_rec
;
8019 dev_node
= rb_first(dev_cache
);
8021 dev_rec
= container_of(dev_node
, struct device_record
, node
);
8022 err
= check_device_used(dev_rec
, dev_extent_cache
);
8026 check_dev_size_alignment(dev_rec
->devid
, dev_rec
->total_byte
,
8027 global_info
->sectorsize
);
8028 dev_node
= rb_next(dev_node
);
8030 list_for_each_entry(dext_rec
, &dev_extent_cache
->no_device_orphans
,
8033 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8034 dext_rec
->objectid
, dext_rec
->offset
, dext_rec
->length
);
8041 static int add_root_item_to_list(struct list_head
*head
,
8042 u64 objectid
, u64 bytenr
, u64 last_snapshot
,
8043 u8 level
, u8 drop_level
,
8044 struct btrfs_key
*drop_key
)
8046 struct root_item_record
*ri_rec
;
8048 ri_rec
= malloc(sizeof(*ri_rec
));
8051 ri_rec
->bytenr
= bytenr
;
8052 ri_rec
->objectid
= objectid
;
8053 ri_rec
->level
= level
;
8054 ri_rec
->drop_level
= drop_level
;
8055 ri_rec
->last_snapshot
= last_snapshot
;
8057 memcpy(&ri_rec
->drop_key
, drop_key
, sizeof(*drop_key
));
8058 list_add_tail(&ri_rec
->list
, head
);
8063 static void free_root_item_list(struct list_head
*list
)
8065 struct root_item_record
*ri_rec
;
8067 while (!list_empty(list
)) {
8068 ri_rec
= list_first_entry(list
, struct root_item_record
,
8070 list_del_init(&ri_rec
->list
);
8075 static int deal_root_from_list(struct list_head
*list
,
8076 struct btrfs_root
*root
,
8077 struct block_info
*bits
,
8079 struct cache_tree
*pending
,
8080 struct cache_tree
*seen
,
8081 struct cache_tree
*reada
,
8082 struct cache_tree
*nodes
,
8083 struct cache_tree
*extent_cache
,
8084 struct cache_tree
*chunk_cache
,
8085 struct rb_root
*dev_cache
,
8086 struct block_group_tree
*block_group_cache
,
8087 struct device_extent_tree
*dev_extent_cache
)
8092 while (!list_empty(list
)) {
8093 struct root_item_record
*rec
;
8094 struct extent_buffer
*buf
;
8096 rec
= list_entry(list
->next
,
8097 struct root_item_record
, list
);
8099 buf
= read_tree_block(root
->fs_info
, rec
->bytenr
, 0);
8100 if (!extent_buffer_uptodate(buf
)) {
8101 free_extent_buffer(buf
);
8105 ret
= add_root_to_pending(buf
, extent_cache
, pending
,
8106 seen
, nodes
, rec
->objectid
);
8110 * To rebuild extent tree, we need deal with snapshot
8111 * one by one, otherwise we deal with node firstly which
8112 * can maximize readahead.
8116 ret
= run_next_block(root
, bits
, bits_nr
, &last
,
8117 pending
, seen
, reada
, nodes
,
8118 extent_cache
, chunk_cache
,
8119 dev_cache
, block_group_cache
,
8120 dev_extent_cache
, rec
);
8124 free_extent_buffer(buf
);
8125 list_del(&rec
->list
);
8131 ret
= run_next_block(root
, bits
, bits_nr
, &last
, pending
, seen
,
8132 reada
, nodes
, extent_cache
, chunk_cache
,
8133 dev_cache
, block_group_cache
,
8134 dev_extent_cache
, NULL
);
8145 * parse_tree_roots - Go over all roots in the tree root and add each one to
8148 * @fs_info - pointer to fs_info struct of the file system.
8150 * @normal_trees - list to contains all roots which don't have a drop
8151 * operation in progress
8153 * @dropping_trees - list containing all roots which have a drop operation
8156 * Returns 0 on success or a negative value indicating an error.
8158 static int parse_tree_roots(struct btrfs_fs_info
*fs_info
,
8159 struct list_head
*normal_trees
,
8160 struct list_head
*dropping_trees
)
8162 struct btrfs_path path
;
8163 struct btrfs_key key
;
8164 struct btrfs_key found_key
;
8165 struct btrfs_root_item ri
;
8166 struct extent_buffer
*leaf
;
8170 btrfs_init_path(&path
);
8173 key
.type
= BTRFS_ROOT_ITEM_KEY
;
8174 ret
= btrfs_search_slot(NULL
, fs_info
->tree_root
, &key
, &path
, 0, 0);
8178 leaf
= path
.nodes
[0];
8179 slot
= path
.slots
[0];
8180 if (slot
>= btrfs_header_nritems(path
.nodes
[0])) {
8181 ret
= btrfs_next_leaf(fs_info
->tree_root
, &path
);
8184 leaf
= path
.nodes
[0];
8185 slot
= path
.slots
[0];
8187 btrfs_item_key_to_cpu(leaf
, &found_key
, path
.slots
[0]);
8188 if (found_key
.type
== BTRFS_ROOT_ITEM_KEY
) {
8189 unsigned long offset
;
8193 offset
= btrfs_item_ptr_offset(leaf
, path
.slots
[0]);
8194 read_extent_buffer(leaf
, &ri
, offset
, sizeof(ri
));
8195 last_snapshot
= btrfs_root_last_snapshot(&ri
);
8196 level
= btrfs_root_level(&ri
);
8197 if (btrfs_disk_key_objectid(&ri
.drop_progress
) == 0) {
8198 ret
= add_root_item_to_list(normal_trees
,
8200 btrfs_root_bytenr(&ri
),
8201 last_snapshot
, level
,
8206 u64 objectid
= found_key
.objectid
;
8208 btrfs_disk_key_to_cpu(&found_key
,
8210 ret
= add_root_item_to_list(dropping_trees
,
8212 btrfs_root_bytenr(&ri
),
8213 last_snapshot
, level
,
8214 ri
.drop_level
, &found_key
);
8223 btrfs_release_path(&path
);
8227 static int check_chunks_and_extents(struct btrfs_fs_info
*fs_info
)
8229 struct rb_root dev_cache
;
8230 struct cache_tree chunk_cache
;
8231 struct block_group_tree block_group_cache
;
8232 struct device_extent_tree dev_extent_cache
;
8233 struct cache_tree extent_cache
;
8234 struct cache_tree seen
;
8235 struct cache_tree pending
;
8236 struct cache_tree reada
;
8237 struct cache_tree nodes
;
8238 struct extent_io_tree excluded_extents
;
8239 struct cache_tree corrupt_blocks
;
8241 struct block_info
*bits
;
8243 struct list_head dropping_trees
;
8244 struct list_head normal_trees
;
8245 struct btrfs_root
*root1
;
8246 struct btrfs_root
*root
;
8249 root
= fs_info
->fs_root
;
8250 dev_cache
= RB_ROOT
;
8251 cache_tree_init(&chunk_cache
);
8252 block_group_tree_init(&block_group_cache
);
8253 device_extent_tree_init(&dev_extent_cache
);
8255 cache_tree_init(&extent_cache
);
8256 cache_tree_init(&seen
);
8257 cache_tree_init(&pending
);
8258 cache_tree_init(&nodes
);
8259 cache_tree_init(&reada
);
8260 cache_tree_init(&corrupt_blocks
);
8261 extent_io_tree_init(&excluded_extents
);
8262 INIT_LIST_HEAD(&dropping_trees
);
8263 INIT_LIST_HEAD(&normal_trees
);
8266 fs_info
->excluded_extents
= &excluded_extents
;
8267 fs_info
->fsck_extent_cache
= &extent_cache
;
8268 fs_info
->free_extent_hook
= free_extent_hook
;
8269 fs_info
->corrupt_blocks
= &corrupt_blocks
;
8273 bits
= malloc(bits_nr
* sizeof(struct block_info
));
8280 root1
= fs_info
->tree_root
;
8281 level
= btrfs_header_level(root1
->node
);
8282 ret
= add_root_item_to_list(&normal_trees
, root1
->root_key
.objectid
,
8283 root1
->node
->start
, 0, level
, 0, NULL
);
8286 root1
= fs_info
->chunk_root
;
8287 level
= btrfs_header_level(root1
->node
);
8288 ret
= add_root_item_to_list(&normal_trees
, root1
->root_key
.objectid
,
8289 root1
->node
->start
, 0, level
, 0, NULL
);
8293 ret
= parse_tree_roots(fs_info
, &normal_trees
, &dropping_trees
);
8298 * check_block can return -EAGAIN if it fixes something, please keep
8299 * this in mind when dealing with return values from these functions, if
8300 * we get -EAGAIN we want to fall through and restart the loop.
8302 ret
= deal_root_from_list(&normal_trees
, root
, bits
, bits_nr
, &pending
,
8303 &seen
, &reada
, &nodes
, &extent_cache
,
8304 &chunk_cache
, &dev_cache
, &block_group_cache
,
8311 ret
= deal_root_from_list(&dropping_trees
, root
, bits
, bits_nr
,
8312 &pending
, &seen
, &reada
, &nodes
,
8313 &extent_cache
, &chunk_cache
, &dev_cache
,
8314 &block_group_cache
, &dev_extent_cache
);
8321 ret
= check_chunks(&chunk_cache
, &block_group_cache
,
8322 &dev_extent_cache
, NULL
, NULL
, NULL
, 0);
8329 ret
= check_extent_refs(root
, &extent_cache
);
8336 ret
= check_devices(&dev_cache
, &dev_extent_cache
);
8342 free_corrupt_blocks_tree(fs_info
->corrupt_blocks
);
8343 extent_io_tree_cleanup(&excluded_extents
);
8344 fs_info
->fsck_extent_cache
= NULL
;
8345 fs_info
->free_extent_hook
= NULL
;
8346 fs_info
->corrupt_blocks
= NULL
;
8347 fs_info
->excluded_extents
= NULL
;
8350 free_chunk_cache_tree(&chunk_cache
);
8351 free_device_cache_tree(&dev_cache
);
8352 free_block_group_tree(&block_group_cache
);
8353 free_device_extent_tree(&dev_extent_cache
);
8354 free_extent_cache_tree(&seen
);
8355 free_extent_cache_tree(&pending
);
8356 free_extent_cache_tree(&reada
);
8357 free_extent_cache_tree(&nodes
);
8358 free_root_item_list(&normal_trees
);
8359 free_root_item_list(&dropping_trees
);
8362 free_corrupt_blocks_tree(fs_info
->corrupt_blocks
);
8363 free_extent_cache_tree(&seen
);
8364 free_extent_cache_tree(&pending
);
8365 free_extent_cache_tree(&reada
);
8366 free_extent_cache_tree(&nodes
);
8367 free_chunk_cache_tree(&chunk_cache
);
8368 free_block_group_tree(&block_group_cache
);
8369 free_device_cache_tree(&dev_cache
);
8370 free_device_extent_tree(&dev_extent_cache
);
8371 free_extent_record_cache(&extent_cache
);
8372 free_root_item_list(&normal_trees
);
8373 free_root_item_list(&dropping_trees
);
8374 extent_io_tree_cleanup(&excluded_extents
);
8378 static int do_check_chunks_and_extents(struct btrfs_fs_info
*fs_info
)
8382 if (check_mode
== CHECK_MODE_LOWMEM
)
8383 ret
= check_chunks_and_extents_lowmem(fs_info
);
8385 ret
= check_chunks_and_extents(fs_info
);
8387 /* Also repair device size related problems */
8388 if (repair
&& !ret
) {
8389 ret
= btrfs_fix_device_and_super_size(fs_info
);
8396 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle
*trans
,
8397 struct btrfs_root
*root
)
8399 struct extent_buffer
*c
;
8400 struct extent_buffer
*old
= root
->node
;
8403 struct btrfs_disk_key disk_key
= {0,0,0};
8407 c
= btrfs_alloc_free_block(trans
, root
,
8408 root
->fs_info
->nodesize
,
8409 root
->root_key
.objectid
,
8410 &disk_key
, level
, 0, 0);
8414 memset_extent_buffer(c
, 0, 0, sizeof(struct btrfs_header
));
8415 btrfs_set_header_level(c
, level
);
8416 btrfs_set_header_bytenr(c
, c
->start
);
8417 btrfs_set_header_generation(c
, trans
->transid
);
8418 btrfs_set_header_backref_rev(c
, BTRFS_MIXED_BACKREF_REV
);
8419 btrfs_set_header_owner(c
, root
->root_key
.objectid
);
8421 write_extent_buffer(c
, root
->fs_info
->fsid
,
8422 btrfs_header_fsid(), BTRFS_FSID_SIZE
);
8424 write_extent_buffer(c
, root
->fs_info
->chunk_tree_uuid
,
8425 btrfs_header_chunk_tree_uuid(c
),
8428 btrfs_mark_buffer_dirty(c
);
8430 * this case can happen in the following case:
8432 * reinit reloc data root, this is because we skip pin
8433 * down reloc data tree before which means we can allocate
8434 * same block bytenr here.
8436 if (old
->start
== c
->start
) {
8437 btrfs_set_root_generation(&root
->root_item
,
8439 root
->root_item
.level
= btrfs_header_level(root
->node
);
8440 ret
= btrfs_update_root(trans
, root
->fs_info
->tree_root
,
8441 &root
->root_key
, &root
->root_item
);
8443 free_extent_buffer(c
);
8447 free_extent_buffer(old
);
8449 add_root_to_dirty_list(root
);
8453 static int reset_block_groups(struct btrfs_fs_info
*fs_info
)
8455 struct btrfs_block_group_cache
*cache
;
8456 struct btrfs_path path
;
8457 struct extent_buffer
*leaf
;
8458 struct btrfs_chunk
*chunk
;
8459 struct btrfs_key key
;
8463 btrfs_init_path(&path
);
8465 key
.type
= BTRFS_CHUNK_ITEM_KEY
;
8467 ret
= btrfs_search_slot(NULL
, fs_info
->chunk_root
, &key
, &path
, 0, 0);
8469 btrfs_release_path(&path
);
8474 * We do this in case the block groups were screwed up and had alloc
8475 * bits that aren't actually set on the chunks. This happens with
8476 * restored images every time and could happen in real life I guess.
8478 fs_info
->avail_data_alloc_bits
= 0;
8479 fs_info
->avail_metadata_alloc_bits
= 0;
8480 fs_info
->avail_system_alloc_bits
= 0;
8482 /* First we need to create the in-memory block groups */
8484 if (path
.slots
[0] >= btrfs_header_nritems(path
.nodes
[0])) {
8485 ret
= btrfs_next_leaf(fs_info
->chunk_root
, &path
);
8487 btrfs_release_path(&path
);
8495 leaf
= path
.nodes
[0];
8496 btrfs_item_key_to_cpu(leaf
, &key
, path
.slots
[0]);
8497 if (key
.type
!= BTRFS_CHUNK_ITEM_KEY
) {
8502 chunk
= btrfs_item_ptr(leaf
, path
.slots
[0], struct btrfs_chunk
);
8503 btrfs_add_block_group(fs_info
, 0,
8504 btrfs_chunk_type(leaf
, chunk
), key
.offset
,
8505 btrfs_chunk_length(leaf
, chunk
));
8506 set_extent_dirty(&fs_info
->free_space_cache
, key
.offset
,
8507 key
.offset
+ btrfs_chunk_length(leaf
, chunk
));
8512 cache
= btrfs_lookup_first_block_group(fs_info
, start
);
8516 start
= cache
->key
.objectid
+ cache
->key
.offset
;
8519 btrfs_release_path(&path
);
8523 static int reset_balance(struct btrfs_trans_handle
*trans
,
8524 struct btrfs_fs_info
*fs_info
)
8526 struct btrfs_root
*root
= fs_info
->tree_root
;
8527 struct btrfs_path path
;
8528 struct extent_buffer
*leaf
;
8529 struct btrfs_key key
;
8530 int del_slot
, del_nr
= 0;
8534 btrfs_init_path(&path
);
8535 key
.objectid
= BTRFS_BALANCE_OBJECTID
;
8536 key
.type
= BTRFS_BALANCE_ITEM_KEY
;
8538 ret
= btrfs_search_slot(trans
, root
, &key
, &path
, -1, 1);
8543 goto reinit_data_reloc
;
8548 ret
= btrfs_del_item(trans
, root
, &path
);
8551 btrfs_release_path(&path
);
8553 key
.objectid
= BTRFS_TREE_RELOC_OBJECTID
;
8554 key
.type
= BTRFS_ROOT_ITEM_KEY
;
8556 ret
= btrfs_search_slot(trans
, root
, &key
, &path
, -1, 1);
8560 if (path
.slots
[0] >= btrfs_header_nritems(path
.nodes
[0])) {
8565 ret
= btrfs_del_items(trans
, root
, &path
,
8572 btrfs_release_path(&path
);
8575 ret
= btrfs_search_slot(trans
, root
, &key
, &path
,
8582 leaf
= path
.nodes
[0];
8583 btrfs_item_key_to_cpu(leaf
, &key
, path
.slots
[0]);
8584 if (key
.objectid
> BTRFS_TREE_RELOC_OBJECTID
)
8586 if (key
.objectid
!= BTRFS_TREE_RELOC_OBJECTID
) {
8591 del_slot
= path
.slots
[0];
8600 ret
= btrfs_del_items(trans
, root
, &path
, del_slot
, del_nr
);
8604 btrfs_release_path(&path
);
8607 key
.objectid
= BTRFS_DATA_RELOC_TREE_OBJECTID
;
8608 key
.type
= BTRFS_ROOT_ITEM_KEY
;
8609 key
.offset
= (u64
)-1;
8610 root
= btrfs_read_fs_root(fs_info
, &key
);
8612 fprintf(stderr
, "Error reading data reloc tree\n");
8613 ret
= PTR_ERR(root
);
8616 record_root_in_trans(trans
, root
);
8617 ret
= btrfs_fsck_reinit_root(trans
, root
);
8620 ret
= btrfs_make_root_dir(trans
, root
, BTRFS_FIRST_FREE_OBJECTID
);
8622 btrfs_release_path(&path
);
8626 static int reinit_extent_tree(struct btrfs_trans_handle
*trans
,
8627 struct btrfs_fs_info
*fs_info
, bool pin
)
8633 * The only reason we don't do this is because right now we're just
8634 * walking the trees we find and pinning down their bytes, we don't look
8635 * at any of the leaves. In order to do mixed groups we'd have to check
8636 * the leaves of any fs roots and pin down the bytes for any file
8637 * extents we find. Not hard but why do it if we don't have to?
8639 if (btrfs_fs_incompat(fs_info
, MIXED_GROUPS
)) {
8640 fprintf(stderr
, "We don't support re-initing the extent tree "
8641 "for mixed block groups yet, please notify a btrfs "
8642 "developer you want to do this so they can add this "
8643 "functionality.\n");
8648 * first we need to walk all of the trees except the extent tree and pin
8649 * down/exclude the bytes that are in use so we don't overwrite any
8650 * existing metadata.
8651 * If pinnned, unpin will be done in the end of transaction.
8652 * If excluded, cleanup will be done in check_chunks_and_extents_lowmem.
8656 ret
= pin_metadata_blocks(fs_info
);
8658 fprintf(stderr
, "error pinning down used bytes\n");
8662 ret
= exclude_metadata_blocks(fs_info
);
8664 fprintf(stderr
, "error excluding used bytes\n");
8665 printf("try to pin down used bytes\n");
8672 * Need to drop all the block groups since we're going to recreate all
8675 btrfs_free_block_groups(fs_info
);
8676 ret
= reset_block_groups(fs_info
);
8678 fprintf(stderr
, "error resetting the block groups\n");
8682 /* Ok we can allocate now, reinit the extent root */
8683 ret
= btrfs_fsck_reinit_root(trans
, fs_info
->extent_root
);
8685 fprintf(stderr
, "extent root initialization failed\n");
8687 * When the transaction code is updated we should end the
8688 * transaction, but for now progs only knows about commit so
8689 * just return an error.
8695 * Now we have all the in-memory block groups setup so we can make
8696 * allocations properly, and the metadata we care about is safe since we
8697 * pinned all of it above.
8700 struct btrfs_block_group_cache
*cache
;
8702 cache
= btrfs_lookup_first_block_group(fs_info
, start
);
8705 start
= cache
->key
.objectid
+ cache
->key
.offset
;
8706 ret
= btrfs_insert_item(trans
, fs_info
->extent_root
,
8707 &cache
->key
, &cache
->item
,
8708 sizeof(cache
->item
));
8710 fprintf(stderr
, "Error adding block group\n");
8713 btrfs_extent_post_op(trans
);
8716 ret
= reset_balance(trans
, fs_info
);
8718 fprintf(stderr
, "error resetting the pending balance\n");
8723 static int recow_extent_buffer(struct btrfs_root
*root
, struct extent_buffer
*eb
)
8725 struct btrfs_path path
;
8726 struct btrfs_trans_handle
*trans
;
8727 struct btrfs_key key
;
8730 printf("Recowing metadata block %llu\n", eb
->start
);
8731 key
.objectid
= btrfs_header_owner(eb
);
8732 key
.type
= BTRFS_ROOT_ITEM_KEY
;
8733 key
.offset
= (u64
)-1;
8735 root
= btrfs_read_fs_root(root
->fs_info
, &key
);
8737 fprintf(stderr
, "Couldn't find owner root %llu\n",
8739 return PTR_ERR(root
);
8742 trans
= btrfs_start_transaction(root
, 1);
8744 return PTR_ERR(trans
);
8746 btrfs_init_path(&path
);
8747 path
.lowest_level
= btrfs_header_level(eb
);
8748 if (path
.lowest_level
)
8749 btrfs_node_key_to_cpu(eb
, &key
, 0);
8751 btrfs_item_key_to_cpu(eb
, &key
, 0);
8753 ret
= btrfs_search_slot(trans
, root
, &key
, &path
, 0, 1);
8754 btrfs_commit_transaction(trans
, root
);
8755 btrfs_release_path(&path
);
8759 static int delete_bad_item(struct btrfs_root
*root
, struct bad_item
*bad
)
8761 struct btrfs_path path
;
8762 struct btrfs_trans_handle
*trans
;
8763 struct btrfs_key key
;
8766 printf("Deleting bad item [%llu,%u,%llu]\n", bad
->key
.objectid
,
8767 bad
->key
.type
, bad
->key
.offset
);
8768 key
.objectid
= bad
->root_id
;
8769 key
.type
= BTRFS_ROOT_ITEM_KEY
;
8770 key
.offset
= (u64
)-1;
8772 root
= btrfs_read_fs_root(root
->fs_info
, &key
);
8774 fprintf(stderr
, "Couldn't find owner root %llu\n",
8776 return PTR_ERR(root
);
8779 trans
= btrfs_start_transaction(root
, 1);
8781 return PTR_ERR(trans
);
8783 btrfs_init_path(&path
);
8784 ret
= btrfs_search_slot(trans
, root
, &bad
->key
, &path
, -1, 1);
8790 ret
= btrfs_del_item(trans
, root
, &path
);
8792 btrfs_commit_transaction(trans
, root
);
8793 btrfs_release_path(&path
);
8797 static int zero_log_tree(struct btrfs_root
*root
)
8799 struct btrfs_trans_handle
*trans
;
8802 trans
= btrfs_start_transaction(root
, 1);
8803 if (IS_ERR(trans
)) {
8804 ret
= PTR_ERR(trans
);
8807 btrfs_set_super_log_root(root
->fs_info
->super_copy
, 0);
8808 btrfs_set_super_log_root_level(root
->fs_info
->super_copy
, 0);
8809 ret
= btrfs_commit_transaction(trans
, root
);
8813 static int populate_csum(struct btrfs_trans_handle
*trans
,
8814 struct btrfs_root
*csum_root
, char *buf
, u64 start
,
8817 struct btrfs_fs_info
*fs_info
= csum_root
->fs_info
;
8822 while (offset
< len
) {
8823 sectorsize
= fs_info
->sectorsize
;
8824 ret
= read_extent_data(fs_info
, buf
, start
+ offset
,
8828 ret
= btrfs_csum_file_block(trans
, csum_root
, start
+ len
,
8829 start
+ offset
, buf
, sectorsize
);
8832 offset
+= sectorsize
;
8837 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle
*trans
,
8838 struct btrfs_root
*csum_root
,
8839 struct btrfs_root
*cur_root
)
8841 struct btrfs_path path
;
8842 struct btrfs_key key
;
8843 struct extent_buffer
*node
;
8844 struct btrfs_file_extent_item
*fi
;
8851 buf
= malloc(cur_root
->fs_info
->sectorsize
);
8855 btrfs_init_path(&path
);
8859 ret
= btrfs_search_slot(NULL
, cur_root
, &key
, &path
, 0, 0);
8862 /* Iterate all regular file extents and fill its csum */
8864 btrfs_item_key_to_cpu(path
.nodes
[0], &key
, path
.slots
[0]);
8866 if (key
.type
!= BTRFS_EXTENT_DATA_KEY
)
8868 node
= path
.nodes
[0];
8869 slot
= path
.slots
[0];
8870 fi
= btrfs_item_ptr(node
, slot
, struct btrfs_file_extent_item
);
8871 if (btrfs_file_extent_type(node
, fi
) != BTRFS_FILE_EXTENT_REG
)
8873 start
= btrfs_file_extent_disk_bytenr(node
, fi
);
8874 len
= btrfs_file_extent_disk_num_bytes(node
, fi
);
8876 ret
= populate_csum(trans
, csum_root
, buf
, start
, len
);
8883 * TODO: if next leaf is corrupted, jump to nearest next valid
8886 ret
= btrfs_next_item(cur_root
, &path
);
8896 btrfs_release_path(&path
);
8901 static int fill_csum_tree_from_fs(struct btrfs_trans_handle
*trans
,
8902 struct btrfs_root
*csum_root
)
8904 struct btrfs_fs_info
*fs_info
= csum_root
->fs_info
;
8905 struct btrfs_path path
;
8906 struct btrfs_root
*tree_root
= fs_info
->tree_root
;
8907 struct btrfs_root
*cur_root
;
8908 struct extent_buffer
*node
;
8909 struct btrfs_key key
;
8913 btrfs_init_path(&path
);
8914 key
.objectid
= BTRFS_FS_TREE_OBJECTID
;
8916 key
.type
= BTRFS_ROOT_ITEM_KEY
;
8917 ret
= btrfs_search_slot(NULL
, tree_root
, &key
, &path
, 0, 0);
8926 node
= path
.nodes
[0];
8927 slot
= path
.slots
[0];
8928 btrfs_item_key_to_cpu(node
, &key
, slot
);
8929 if (key
.objectid
> BTRFS_LAST_FREE_OBJECTID
)
8931 if (key
.type
!= BTRFS_ROOT_ITEM_KEY
)
8933 if (!is_fstree(key
.objectid
))
8935 key
.offset
= (u64
)-1;
8937 cur_root
= btrfs_read_fs_root(fs_info
, &key
);
8938 if (IS_ERR(cur_root
) || !cur_root
) {
8939 fprintf(stderr
, "Fail to read fs/subvol tree: %lld\n",
8943 ret
= fill_csum_tree_from_one_fs_root(trans
, csum_root
,
8948 ret
= btrfs_next_item(tree_root
, &path
);
8958 btrfs_release_path(&path
);
8962 static int fill_csum_tree_from_extent(struct btrfs_trans_handle
*trans
,
8963 struct btrfs_root
*csum_root
)
8965 struct btrfs_root
*extent_root
= csum_root
->fs_info
->extent_root
;
8966 struct btrfs_path path
;
8967 struct btrfs_extent_item
*ei
;
8968 struct extent_buffer
*leaf
;
8970 struct btrfs_key key
;
8973 btrfs_init_path(&path
);
8975 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
8977 ret
= btrfs_search_slot(NULL
, extent_root
, &key
, &path
, 0, 0);
8979 btrfs_release_path(&path
);
8983 buf
= malloc(csum_root
->fs_info
->sectorsize
);
8985 btrfs_release_path(&path
);
8990 if (path
.slots
[0] >= btrfs_header_nritems(path
.nodes
[0])) {
8991 ret
= btrfs_next_leaf(extent_root
, &path
);
8999 leaf
= path
.nodes
[0];
9001 btrfs_item_key_to_cpu(leaf
, &key
, path
.slots
[0]);
9002 if (key
.type
!= BTRFS_EXTENT_ITEM_KEY
) {
9007 ei
= btrfs_item_ptr(leaf
, path
.slots
[0],
9008 struct btrfs_extent_item
);
9009 if (!(btrfs_extent_flags(leaf
, ei
) &
9010 BTRFS_EXTENT_FLAG_DATA
)) {
9015 ret
= populate_csum(trans
, csum_root
, buf
, key
.objectid
,
9022 btrfs_release_path(&path
);
9028 * Recalculate the csum and put it into the csum tree.
9030 * Extent tree init will wipe out all the extent info, so in that case, we
9031 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
9032 * will use fs/subvol trees to init the csum tree.
9034 static int fill_csum_tree(struct btrfs_trans_handle
*trans
,
9035 struct btrfs_root
*csum_root
,
9039 return fill_csum_tree_from_fs(trans
, csum_root
);
9041 return fill_csum_tree_from_extent(trans
, csum_root
);
9044 static void free_roots_info_cache(void)
9046 if (!roots_info_cache
)
9049 while (!cache_tree_empty(roots_info_cache
)) {
9050 struct cache_extent
*entry
;
9051 struct root_item_info
*rii
;
9053 entry
= first_cache_extent(roots_info_cache
);
9056 remove_cache_extent(roots_info_cache
, entry
);
9057 rii
= container_of(entry
, struct root_item_info
, cache_extent
);
9061 free(roots_info_cache
);
9062 roots_info_cache
= NULL
;
9065 static int build_roots_info_cache(struct btrfs_fs_info
*info
)
9068 struct btrfs_key key
;
9069 struct extent_buffer
*leaf
;
9070 struct btrfs_path path
;
9072 if (!roots_info_cache
) {
9073 roots_info_cache
= malloc(sizeof(*roots_info_cache
));
9074 if (!roots_info_cache
)
9076 cache_tree_init(roots_info_cache
);
9079 btrfs_init_path(&path
);
9081 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
9083 ret
= btrfs_search_slot(NULL
, info
->extent_root
, &key
, &path
, 0, 0);
9086 leaf
= path
.nodes
[0];
9089 struct btrfs_key found_key
;
9090 struct btrfs_extent_item
*ei
;
9091 struct btrfs_extent_inline_ref
*iref
;
9092 unsigned long item_end
;
9093 int slot
= path
.slots
[0];
9098 struct cache_extent
*entry
;
9099 struct root_item_info
*rii
;
9102 if (slot
>= btrfs_header_nritems(leaf
)) {
9103 ret
= btrfs_next_leaf(info
->extent_root
, &path
);
9110 leaf
= path
.nodes
[0];
9111 slot
= path
.slots
[0];
9114 btrfs_item_key_to_cpu(leaf
, &found_key
, path
.slots
[0]);
9116 if (found_key
.type
!= BTRFS_EXTENT_ITEM_KEY
&&
9117 found_key
.type
!= BTRFS_METADATA_ITEM_KEY
)
9120 ei
= btrfs_item_ptr(leaf
, slot
, struct btrfs_extent_item
);
9121 flags
= btrfs_extent_flags(leaf
, ei
);
9122 item_end
= (unsigned long)ei
+ btrfs_item_size_nr(leaf
, slot
);
9124 if (found_key
.type
== BTRFS_EXTENT_ITEM_KEY
&&
9125 !(flags
& BTRFS_EXTENT_FLAG_TREE_BLOCK
))
9128 if (found_key
.type
== BTRFS_METADATA_ITEM_KEY
) {
9129 iref
= (struct btrfs_extent_inline_ref
*)(ei
+ 1);
9130 level
= found_key
.offset
;
9132 struct btrfs_tree_block_info
*binfo
;
9134 binfo
= (struct btrfs_tree_block_info
*)(ei
+ 1);
9135 iref
= (struct btrfs_extent_inline_ref
*)(binfo
+ 1);
9136 level
= btrfs_tree_block_level(leaf
, binfo
);
9140 * It's a valid extent/metadata item that has no inline ref,
9141 * but SHARED_BLOCK_REF or other shared references.
9142 * So we need to do extra check to avoid reading beyond leaf
9145 if ((unsigned long)iref
>= item_end
)
9149 * For a root extent, it must be of the following type and the
9150 * first (and only one) iref in the item.
9152 type
= btrfs_extent_inline_ref_type(leaf
, iref
);
9153 if (type
!= BTRFS_TREE_BLOCK_REF_KEY
)
9156 root_id
= btrfs_extent_inline_ref_offset(leaf
, iref
);
9157 entry
= lookup_cache_extent(roots_info_cache
, root_id
, 1);
9159 rii
= malloc(sizeof(struct root_item_info
));
9164 rii
->cache_extent
.start
= root_id
;
9165 rii
->cache_extent
.size
= 1;
9166 rii
->level
= (u8
)-1;
9167 entry
= &rii
->cache_extent
;
9168 ret
= insert_cache_extent(roots_info_cache
, entry
);
9171 rii
= container_of(entry
, struct root_item_info
,
9175 ASSERT(rii
->cache_extent
.start
== root_id
);
9176 ASSERT(rii
->cache_extent
.size
== 1);
9178 if (level
> rii
->level
|| rii
->level
== (u8
)-1) {
9180 rii
->bytenr
= found_key
.objectid
;
9181 rii
->gen
= btrfs_extent_generation(leaf
, ei
);
9182 rii
->node_count
= 1;
9183 } else if (level
== rii
->level
) {
9191 btrfs_release_path(&path
);
9196 static int maybe_repair_root_item(struct btrfs_path
*path
,
9197 const struct btrfs_key
*root_key
,
9198 const int read_only_mode
)
9200 const u64 root_id
= root_key
->objectid
;
9201 struct cache_extent
*entry
;
9202 struct root_item_info
*rii
;
9203 struct btrfs_root_item ri
;
9204 unsigned long offset
;
9206 entry
= lookup_cache_extent(roots_info_cache
, root_id
, 1);
9209 "Error: could not find extent items for root %llu\n",
9210 root_key
->objectid
);
9214 rii
= container_of(entry
, struct root_item_info
, cache_extent
);
9215 ASSERT(rii
->cache_extent
.start
== root_id
);
9216 ASSERT(rii
->cache_extent
.size
== 1);
9218 if (rii
->node_count
!= 1) {
9220 "Error: could not find btree root extent for root %llu\n",
9225 offset
= btrfs_item_ptr_offset(path
->nodes
[0], path
->slots
[0]);
9226 read_extent_buffer(path
->nodes
[0], &ri
, offset
, sizeof(ri
));
9228 if (btrfs_root_bytenr(&ri
) != rii
->bytenr
||
9229 btrfs_root_level(&ri
) != rii
->level
||
9230 btrfs_root_generation(&ri
) != rii
->gen
) {
9233 * If we're in repair mode but our caller told us to not update
9234 * the root item, i.e. just check if it needs to be updated, don't
9235 * print this message, since the caller will call us again shortly
9236 * for the same root item without read only mode (the caller will
9237 * open a transaction first).
9239 if (!(read_only_mode
&& repair
))
9241 "%sroot item for root %llu,"
9242 " current bytenr %llu, current gen %llu, current level %u,"
9243 " new bytenr %llu, new gen %llu, new level %u\n",
9244 (read_only_mode
? "" : "fixing "),
9246 btrfs_root_bytenr(&ri
), btrfs_root_generation(&ri
),
9247 btrfs_root_level(&ri
),
9248 rii
->bytenr
, rii
->gen
, rii
->level
);
9250 if (btrfs_root_generation(&ri
) > rii
->gen
) {
9252 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9253 root_id
, btrfs_root_generation(&ri
), rii
->gen
);
9257 if (!read_only_mode
) {
9258 btrfs_set_root_bytenr(&ri
, rii
->bytenr
);
9259 btrfs_set_root_level(&ri
, rii
->level
);
9260 btrfs_set_root_generation(&ri
, rii
->gen
);
9261 write_extent_buffer(path
->nodes
[0], &ri
,
9262 offset
, sizeof(ri
));
9272 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9273 * caused read-only snapshots to be corrupted if they were created at a moment
9274 * when the source subvolume/snapshot had orphan items. The issue was that the
9275 * on-disk root items became incorrect, referring to the pre orphan cleanup root
9276 * node instead of the post orphan cleanup root node.
9277 * So this function, and its callees, just detects and fixes those cases. Even
9278 * though the regression was for read-only snapshots, this function applies to
9279 * any snapshot/subvolume root.
9280 * This must be run before any other repair code - not doing it so, makes other
9281 * repair code delete or modify backrefs in the extent tree for example, which
9282 * will result in an inconsistent fs after repairing the root items.
9284 static int repair_root_items(struct btrfs_fs_info
*info
)
9286 struct btrfs_path path
;
9287 struct btrfs_key key
;
9288 struct extent_buffer
*leaf
;
9289 struct btrfs_trans_handle
*trans
= NULL
;
9294 btrfs_init_path(&path
);
9296 ret
= build_roots_info_cache(info
);
9300 key
.objectid
= BTRFS_FIRST_FREE_OBJECTID
;
9301 key
.type
= BTRFS_ROOT_ITEM_KEY
;
9306 * Avoid opening and committing transactions if a leaf doesn't have
9307 * any root items that need to be fixed, so that we avoid rotating
9308 * backup roots unnecessarily.
9311 trans
= btrfs_start_transaction(info
->tree_root
, 1);
9312 if (IS_ERR(trans
)) {
9313 ret
= PTR_ERR(trans
);
9318 ret
= btrfs_search_slot(trans
, info
->tree_root
, &key
, &path
,
9322 leaf
= path
.nodes
[0];
9325 struct btrfs_key found_key
;
9327 if (path
.slots
[0] >= btrfs_header_nritems(leaf
)) {
9328 int no_more_keys
= find_next_key(&path
, &key
);
9330 btrfs_release_path(&path
);
9332 ret
= btrfs_commit_transaction(trans
,
9344 btrfs_item_key_to_cpu(leaf
, &found_key
, path
.slots
[0]);
9346 if (found_key
.type
!= BTRFS_ROOT_ITEM_KEY
)
9348 if (found_key
.objectid
== BTRFS_TREE_RELOC_OBJECTID
)
9351 ret
= maybe_repair_root_item(&path
, &found_key
, trans
? 0 : 1);
9355 if (!trans
&& repair
) {
9358 btrfs_release_path(&path
);
9368 free_roots_info_cache();
9369 btrfs_release_path(&path
);
9371 btrfs_commit_transaction(trans
, info
->tree_root
);
9378 static int clear_free_space_cache(struct btrfs_fs_info
*fs_info
)
9380 struct btrfs_trans_handle
*trans
;
9381 struct btrfs_block_group_cache
*bg_cache
;
9385 /* Clear all free space cache inodes and its extent data */
9387 bg_cache
= btrfs_lookup_first_block_group(fs_info
, current
);
9390 ret
= btrfs_clear_free_space_cache(fs_info
, bg_cache
);
9393 current
= bg_cache
->key
.objectid
+ bg_cache
->key
.offset
;
9396 /* Don't forget to set cache_generation to -1 */
9397 trans
= btrfs_start_transaction(fs_info
->tree_root
, 0);
9398 if (IS_ERR(trans
)) {
9399 error("failed to update super block cache generation");
9400 return PTR_ERR(trans
);
9402 btrfs_set_super_cache_generation(fs_info
->super_copy
, (u64
)-1);
9403 btrfs_commit_transaction(trans
, fs_info
->tree_root
);
9408 static int do_clear_free_space_cache(struct btrfs_fs_info
*fs_info
,
9413 if (clear_version
== 1) {
9414 if (btrfs_fs_compat_ro(fs_info
, FREE_SPACE_TREE
)) {
9416 "free space cache v2 detected, use --clear-space-cache v2");
9420 printf("Clearing free space cache\n");
9421 ret
= clear_free_space_cache(fs_info
);
9423 error("failed to clear free space cache");
9426 printf("Free space cache cleared\n");
9428 } else if (clear_version
== 2) {
9429 if (!btrfs_fs_compat_ro(fs_info
, FREE_SPACE_TREE
)) {
9430 printf("no free space cache v2 to clear\n");
9434 printf("Clear free space cache v2\n");
9435 ret
= btrfs_clear_free_space_tree(fs_info
);
9437 error("failed to clear free space cache v2: %d", ret
);
9440 printf("free space cache v2 cleared\n");
9447 const char * const cmd_check_usage
[] = {
9448 "btrfs check [options] <device>",
9449 "Check structural integrity of a filesystem (unmounted).",
9450 "Check structural integrity of an unmounted filesystem. Verify internal",
9451 "trees' consistency and item connectivity. In the repair mode try to",
9452 "fix the problems found. ",
9453 "WARNING: the repair mode is considered dangerous and should not be used",
9454 " without prior analysis of problems found on the flesystem."
9457 " starting point selection:",
9458 " -s|--super <superblock> use this superblock copy",
9459 " -b|--backup use the first valid backup root copy",
9460 " -r|--tree-root <bytenr> use the given bytenr for the tree root",
9461 " --chunk-root <bytenr> use the given bytenr for the chunk tree root",
9462 " operation modes:",
9463 " --readonly run in read-only mode (default)",
9464 " --repair try to repair the filesystem",
9465 " --force skip mount checks, repair is not possible",
9466 " --mode <MODE> allows choice of memory/IO trade-offs",
9467 " where MODE is one of:",
9468 " original - read inodes and extents to memory (requires",
9469 " more memory, does less IO)",
9470 " lowmem - try to use less memory but read blocks again",
9471 " when needed (experimental)",
9473 " --init-csum-tree create a new CRC tree",
9474 " --init-extent-tree create a new extent tree",
9475 " --clear-space-cache v1|v2 clear space cache for v1 or v2",
9476 " check and reporting options:",
9477 " --check-data-csum verify checksums of data blocks",
9478 " -Q|--qgroup-report print a report on qgroup consistency",
9479 " -E|--subvol-extents <subvolid>",
9480 " print subvolume extents and sharing state",
9481 " -p|--progress indicate progress",
9485 int cmd_check(int argc
, char **argv
)
9487 struct cache_tree root_cache
;
9488 struct btrfs_root
*root
;
9489 struct btrfs_fs_info
*info
;
9492 u64 tree_root_bytenr
= 0;
9493 u64 chunk_root_bytenr
= 0;
9494 char uuidbuf
[BTRFS_UUID_UNPARSED_SIZE
];
9498 int init_csum_tree
= 0;
9500 int clear_space_cache
= 0;
9501 int qgroup_report
= 0;
9502 int qgroups_repaired
= 0;
9503 int qgroup_report_ret
;
9504 unsigned ctree_flags
= OPEN_CTREE_EXCLUSIVE
;
9509 enum { GETOPT_VAL_REPAIR
= 257, GETOPT_VAL_INIT_CSUM
,
9510 GETOPT_VAL_INIT_EXTENT
, GETOPT_VAL_CHECK_CSUM
,
9511 GETOPT_VAL_READONLY
, GETOPT_VAL_CHUNK_TREE
,
9512 GETOPT_VAL_MODE
, GETOPT_VAL_CLEAR_SPACE_CACHE
,
9514 static const struct option long_options
[] = {
9515 { "super", required_argument
, NULL
, 's' },
9516 { "repair", no_argument
, NULL
, GETOPT_VAL_REPAIR
},
9517 { "readonly", no_argument
, NULL
, GETOPT_VAL_READONLY
},
9518 { "init-csum-tree", no_argument
, NULL
,
9519 GETOPT_VAL_INIT_CSUM
},
9520 { "init-extent-tree", no_argument
, NULL
,
9521 GETOPT_VAL_INIT_EXTENT
},
9522 { "check-data-csum", no_argument
, NULL
,
9523 GETOPT_VAL_CHECK_CSUM
},
9524 { "backup", no_argument
, NULL
, 'b' },
9525 { "subvol-extents", required_argument
, NULL
, 'E' },
9526 { "qgroup-report", no_argument
, NULL
, 'Q' },
9527 { "tree-root", required_argument
, NULL
, 'r' },
9528 { "chunk-root", required_argument
, NULL
,
9529 GETOPT_VAL_CHUNK_TREE
},
9530 { "progress", no_argument
, NULL
, 'p' },
9531 { "mode", required_argument
, NULL
,
9533 { "clear-space-cache", required_argument
, NULL
,
9534 GETOPT_VAL_CLEAR_SPACE_CACHE
},
9535 { "force", no_argument
, NULL
, GETOPT_VAL_FORCE
},
9539 c
= getopt_long(argc
, argv
, "as:br:pEQ", long_options
, NULL
);
9543 case 'a': /* ignored */ break;
9545 ctree_flags
|= OPEN_CTREE_BACKUP_ROOT
;
9548 num
= arg_strtou64(optarg
);
9549 if (num
>= BTRFS_SUPER_MIRROR_MAX
) {
9551 "super mirror should be less than %d",
9552 BTRFS_SUPER_MIRROR_MAX
);
9555 bytenr
= btrfs_sb_offset(((int)num
));
9556 printf("using SB copy %llu, bytenr %llu\n", num
,
9557 (unsigned long long)bytenr
);
9563 subvolid
= arg_strtou64(optarg
);
9566 tree_root_bytenr
= arg_strtou64(optarg
);
9568 case GETOPT_VAL_CHUNK_TREE
:
9569 chunk_root_bytenr
= arg_strtou64(optarg
);
9572 ctx
.progress_enabled
= true;
9576 usage(cmd_check_usage
);
9577 case GETOPT_VAL_REPAIR
:
9578 printf("enabling repair mode\n");
9580 ctree_flags
|= OPEN_CTREE_WRITES
;
9582 case GETOPT_VAL_READONLY
:
9585 case GETOPT_VAL_INIT_CSUM
:
9586 printf("Creating a new CRC tree\n");
9589 ctree_flags
|= OPEN_CTREE_WRITES
;
9591 case GETOPT_VAL_INIT_EXTENT
:
9592 init_extent_tree
= 1;
9593 ctree_flags
|= (OPEN_CTREE_WRITES
|
9594 OPEN_CTREE_NO_BLOCK_GROUPS
);
9597 case GETOPT_VAL_CHECK_CSUM
:
9598 check_data_csum
= 1;
9600 case GETOPT_VAL_MODE
:
9601 check_mode
= parse_check_mode(optarg
);
9602 if (check_mode
== CHECK_MODE_UNKNOWN
) {
9603 error("unknown mode: %s", optarg
);
9607 case GETOPT_VAL_CLEAR_SPACE_CACHE
:
9608 if (strcmp(optarg
, "v1") == 0) {
9609 clear_space_cache
= 1;
9610 } else if (strcmp(optarg
, "v2") == 0) {
9611 clear_space_cache
= 2;
9612 ctree_flags
|= OPEN_CTREE_INVALIDATE_FST
;
9615 "invalid argument to --clear-space-cache, must be v1 or v2");
9618 ctree_flags
|= OPEN_CTREE_WRITES
;
9620 case GETOPT_VAL_FORCE
:
9626 if (check_argc_exact(argc
- optind
, 1))
9627 usage(cmd_check_usage
);
9629 if (ctx
.progress_enabled
) {
9630 ctx
.tp
= TASK_NOTHING
;
9631 ctx
.info
= task_init(print_status_check
, print_status_return
, &ctx
);
9634 /* This check is the only reason for --readonly to exist */
9635 if (readonly
&& repair
) {
9636 error("repair options are not compatible with --readonly");
9641 * experimental and dangerous
9643 if (repair
&& check_mode
== CHECK_MODE_LOWMEM
)
9644 warning("low-memory mode repair support is only partial");
9646 printf("Opening filesystem to check...\n");
9649 cache_tree_init(&root_cache
);
9651 ret
= check_mounted(argv
[optind
]);
9654 error("could not check mount status: %s",
9660 "%s is currently mounted, use --force if you really intend to check the filesystem",
9668 error("repair and --force is not yet supported");
9675 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
9679 "filesystem mounted, continuing because of --force");
9681 /* A block device is mounted in exclusive mode by kernel */
9682 ctree_flags
&= ~OPEN_CTREE_EXCLUSIVE
;
9685 /* only allow partial opening under repair mode */
9687 ctree_flags
|= OPEN_CTREE_PARTIAL
;
9689 info
= open_ctree_fs_info(argv
[optind
], bytenr
, tree_root_bytenr
,
9690 chunk_root_bytenr
, ctree_flags
);
9692 error("cannot open file system");
9699 root
= info
->fs_root
;
9700 uuid_unparse(info
->super_copy
->fsid
, uuidbuf
);
9702 printf("Checking filesystem on %s\nUUID: %s\n", argv
[optind
], uuidbuf
);
9705 * Check the bare minimum before starting anything else that could rely
9706 * on it, namely the tree roots, any local consistency checks
9708 if (!extent_buffer_uptodate(info
->tree_root
->node
) ||
9709 !extent_buffer_uptodate(info
->dev_root
->node
) ||
9710 !extent_buffer_uptodate(info
->chunk_root
->node
)) {
9711 error("critical roots corrupted, unable to check the filesystem");
9717 if (clear_space_cache
) {
9718 ret
= do_clear_free_space_cache(info
, clear_space_cache
);
9724 * repair mode will force us to commit transaction which
9725 * will make us fail to load log tree when mounting.
9727 if (repair
&& btrfs_super_log_root(info
->super_copy
)) {
9728 ret
= ask_user("repair mode will force to clear out log tree, are you sure?");
9734 ret
= zero_log_tree(root
);
9737 error("failed to zero log tree: %d", ret
);
9742 if (qgroup_report
) {
9743 printf("Print quota groups for %s\nUUID: %s\n", argv
[optind
],
9745 ret
= qgroup_verify_all(info
);
9748 err
|= !!report_qgroups(1);
9752 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9753 subvolid
, argv
[optind
], uuidbuf
);
9754 ret
= print_extent_state(info
, subvolid
);
9759 if (init_extent_tree
|| init_csum_tree
) {
9760 struct btrfs_trans_handle
*trans
;
9762 trans
= btrfs_start_transaction(info
->extent_root
, 0);
9763 if (IS_ERR(trans
)) {
9764 error("error starting transaction");
9765 ret
= PTR_ERR(trans
);
9770 if (init_extent_tree
) {
9771 printf("Creating a new extent tree\n");
9772 ret
= reinit_extent_tree(trans
, info
,
9773 check_mode
== CHECK_MODE_ORIGINAL
);
9779 if (init_csum_tree
) {
9780 printf("Reinitialize checksum tree\n");
9781 ret
= btrfs_fsck_reinit_root(trans
, info
->csum_root
);
9783 error("checksum tree initialization failed: %d",
9790 ret
= fill_csum_tree(trans
, info
->csum_root
,
9794 error("checksum tree refilling failed: %d", ret
);
9799 * Ok now we commit and run the normal fsck, which will add
9800 * extent entries for all of the items it finds.
9802 ret
= btrfs_commit_transaction(trans
, info
->extent_root
);
9807 if (!extent_buffer_uptodate(info
->extent_root
->node
)) {
9808 error("critical: extent_root, unable to check the filesystem");
9813 if (!extent_buffer_uptodate(info
->csum_root
->node
)) {
9814 error("critical: csum_root, unable to check the filesystem");
9820 if (!init_extent_tree
) {
9821 if (!ctx
.progress_enabled
) {
9822 fprintf(stderr
, "[1/7] checking root items\n");
9824 ctx
.tp
= TASK_ROOT_ITEMS
;
9825 task_start(ctx
.info
, &ctx
.start_time
, &ctx
.item_count
);
9827 ret
= repair_root_items(info
);
9828 task_stop(ctx
.info
);
9831 error("failed to repair root items: %s", strerror(-ret
));
9835 fprintf(stderr
, "Fixed %d roots.\n", ret
);
9837 } else if (ret
> 0) {
9839 "Found %d roots with an outdated root item.\n",
9842 "Please run a filesystem check with the option --repair to fix them.\n");
9848 fprintf(stderr
, "[1/7] checking root items... skipped\n");
9851 if (!ctx
.progress_enabled
) {
9852 fprintf(stderr
, "[2/7] checking extents\n");
9854 ctx
.tp
= TASK_EXTENTS
;
9855 task_start(ctx
.info
, &ctx
.start_time
, &ctx
.item_count
);
9857 ret
= do_check_chunks_and_extents(info
);
9858 task_stop(ctx
.info
);
9862 "errors found in extent allocation tree or chunk allocation");
9864 /* Only re-check super size after we checked and repaired the fs */
9865 err
|= !is_super_size_valid(info
);
9867 is_free_space_tree
= btrfs_fs_compat_ro(info
, FREE_SPACE_TREE
);
9869 if (!ctx
.progress_enabled
) {
9870 if (is_free_space_tree
)
9871 fprintf(stderr
, "[3/7] checking free space tree\n");
9873 fprintf(stderr
, "[3/7] checking free space cache\n");
9875 ctx
.tp
= TASK_FREE_SPACE
;
9876 task_start(ctx
.info
, &ctx
.start_time
, &ctx
.item_count
);
9879 ret
= check_space_cache(root
);
9880 task_stop(ctx
.info
);
9883 if (is_free_space_tree
)
9884 error("errors found in free space tree");
9886 error("errors found in free space cache");
9891 * We used to have to have these hole extents in between our real
9892 * extents so if we don't have this flag set we need to make sure there
9893 * are no gaps in the file extents for inodes, otherwise we can just
9894 * ignore it when this happens.
9896 no_holes
= btrfs_fs_incompat(root
->fs_info
, NO_HOLES
);
9897 if (!ctx
.progress_enabled
) {
9898 fprintf(stderr
, "[4/7] checking fs roots\n");
9900 ctx
.tp
= TASK_FS_ROOTS
;
9901 task_start(ctx
.info
, &ctx
.start_time
, &ctx
.item_count
);
9904 ret
= do_check_fs_roots(info
, &root_cache
);
9905 task_stop(ctx
.info
);
9908 error("errors found in fs roots");
9912 if (!ctx
.progress_enabled
) {
9913 if (check_data_csum
)
9914 fprintf(stderr
, "[5/7] checking csums against data\n");
9917 "[5/7] checking only csums items (without verifying data)\n");
9919 ctx
.tp
= TASK_CSUMS
;
9920 task_start(ctx
.info
, &ctx
.start_time
, &ctx
.item_count
);
9923 ret
= check_csums(root
);
9924 task_stop(ctx
.info
);
9926 * Data csum error is not fatal, and it may indicate more serious
9927 * corruption, continue checking.
9930 error("errors found in csum tree");
9933 /* For low memory mode, check_fs_roots_v2 handles root refs */
9934 if (check_mode
!= CHECK_MODE_LOWMEM
) {
9935 if (!ctx
.progress_enabled
) {
9936 fprintf(stderr
, "[6/7] checking root refs\n");
9938 ctx
.tp
= TASK_ROOT_REFS
;
9939 task_start(ctx
.info
, &ctx
.start_time
, &ctx
.item_count
);
9942 ret
= check_root_refs(root
, &root_cache
);
9943 task_stop(ctx
.info
);
9946 error("errors found in root refs");
9951 "[6/7] checking root refs done with fs roots in lowmem mode, skipping\n");
9954 while (repair
&& !list_empty(&root
->fs_info
->recow_ebs
)) {
9955 struct extent_buffer
*eb
;
9957 eb
= list_first_entry(&root
->fs_info
->recow_ebs
,
9958 struct extent_buffer
, recow
);
9959 list_del_init(&eb
->recow
);
9960 ret
= recow_extent_buffer(root
, eb
);
9963 error("fails to fix transid errors");
9968 while (!list_empty(&delete_items
)) {
9969 struct bad_item
*bad
;
9971 bad
= list_first_entry(&delete_items
, struct bad_item
, list
);
9972 list_del_init(&bad
->list
);
9974 ret
= delete_bad_item(root
, bad
);
9980 if (info
->quota_enabled
) {
9981 qgroup_set_item_count_ptr(&ctx
.item_count
);
9982 if (!ctx
.progress_enabled
) {
9983 fprintf(stderr
, "[7/7] checking quota groups\n");
9985 ctx
.tp
= TASK_QGROUPS
;
9986 task_start(ctx
.info
, &ctx
.start_time
, &ctx
.item_count
);
9988 ret
= qgroup_verify_all(info
);
9989 task_stop(ctx
.info
);
9992 error("failed to check quota groups");
9995 qgroup_report_ret
= report_qgroups(0);
9996 ret
= repair_qgroups(info
, &qgroups_repaired
);
9998 error("failed to repair quota groups");
10001 if (qgroup_report_ret
&& (!qgroups_repaired
|| ret
))
10002 err
|= qgroup_report_ret
;
10006 "[7/7] checking quota groups skipped (not enabled on this FS)\n");
10009 if (!list_empty(&root
->fs_info
->recow_ebs
)) {
10010 error("transid errors in file system");
10015 printf("found %llu bytes used, ",
10016 (unsigned long long)bytes_used
);
10018 printf("error(s) found\n");
10020 printf("no error found\n");
10021 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes
);
10022 printf("total tree bytes: %llu\n",
10023 (unsigned long long)total_btree_bytes
);
10024 printf("total fs tree bytes: %llu\n",
10025 (unsigned long long)total_fs_tree_bytes
);
10026 printf("total extent tree bytes: %llu\n",
10027 (unsigned long long)total_extent_tree_bytes
);
10028 printf("btree space waste bytes: %llu\n",
10029 (unsigned long long)btree_space_waste
);
10030 printf("file data blocks allocated: %llu\n referenced %llu\n",
10031 (unsigned long long)data_bytes_allocated
,
10032 (unsigned long long)data_bytes_referenced
);
10034 free_qgroup_counts();
10035 free_root_recs_tree(&root_cache
);
10039 if (ctx
.progress_enabled
)
10040 task_deinit(ctx
.info
);