btrfs-progs: Don't BUG_ON() if we failed to load one device or one chunk
[btrfs-progs-unstable/devel.git] / check / main.c
blob8db300abb825e292218b74f730e017b95dc04683
1 /*
2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46 #include "check/mode-common.h"
47 #include "check/mode-original.h"
48 #include "check/mode-lowmem.h"
50 enum task_position {
51 TASK_EXTENTS,
52 TASK_FREE_SPACE,
53 TASK_FS_ROOTS,
54 TASK_NOTHING, /* have to be the last element */
57 struct task_ctx {
58 int progress_enabled;
59 enum task_position tp;
61 struct task_info *info;
64 u64 bytes_used = 0;
65 u64 total_csum_bytes = 0;
66 u64 total_btree_bytes = 0;
67 u64 total_fs_tree_bytes = 0;
68 u64 total_extent_tree_bytes = 0;
69 u64 btree_space_waste = 0;
70 u64 data_bytes_allocated = 0;
71 u64 data_bytes_referenced = 0;
72 LIST_HEAD(duplicate_extents);
73 LIST_HEAD(delete_items);
74 int no_holes = 0;
75 int init_extent_tree = 0;
76 int check_data_csum = 0;
77 struct btrfs_fs_info *global_info;
78 struct task_ctx ctx = { 0 };
79 struct cache_tree *roots_info_cache = NULL;
81 enum btrfs_check_mode {
82 CHECK_MODE_ORIGINAL,
83 CHECK_MODE_LOWMEM,
84 CHECK_MODE_UNKNOWN,
85 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
88 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
90 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
92 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
93 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
94 struct data_backref *back1 = to_data_backref(ext1);
95 struct data_backref *back2 = to_data_backref(ext2);
97 WARN_ON(!ext1->is_data);
98 WARN_ON(!ext2->is_data);
100 /* parent and root are a union, so this covers both */
101 if (back1->parent > back2->parent)
102 return 1;
103 if (back1->parent < back2->parent)
104 return -1;
106 /* This is a full backref and the parents match. */
107 if (back1->node.full_backref)
108 return 0;
110 if (back1->owner > back2->owner)
111 return 1;
112 if (back1->owner < back2->owner)
113 return -1;
115 if (back1->offset > back2->offset)
116 return 1;
117 if (back1->offset < back2->offset)
118 return -1;
120 if (back1->found_ref && back2->found_ref) {
121 if (back1->disk_bytenr > back2->disk_bytenr)
122 return 1;
123 if (back1->disk_bytenr < back2->disk_bytenr)
124 return -1;
126 if (back1->bytes > back2->bytes)
127 return 1;
128 if (back1->bytes < back2->bytes)
129 return -1;
132 return 0;
135 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
137 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
138 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
139 struct tree_backref *back1 = to_tree_backref(ext1);
140 struct tree_backref *back2 = to_tree_backref(ext2);
142 WARN_ON(ext1->is_data);
143 WARN_ON(ext2->is_data);
145 /* parent and root are a union, so this covers both */
146 if (back1->parent > back2->parent)
147 return 1;
148 if (back1->parent < back2->parent)
149 return -1;
151 return 0;
154 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
156 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
157 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
159 if (ext1->is_data > ext2->is_data)
160 return 1;
162 if (ext1->is_data < ext2->is_data)
163 return -1;
165 if (ext1->full_backref > ext2->full_backref)
166 return 1;
167 if (ext1->full_backref < ext2->full_backref)
168 return -1;
170 if (ext1->is_data)
171 return compare_data_backref(node1, node2);
172 else
173 return compare_tree_backref(node1, node2);
177 static void *print_status_check(void *p)
179 struct task_ctx *priv = p;
180 const char work_indicator[] = { '.', 'o', 'O', 'o' };
181 uint32_t count = 0;
182 static char *task_position_string[] = {
183 "checking extents",
184 "checking free space cache",
185 "checking fs roots",
188 task_period_start(priv->info, 1000 /* 1s */);
190 if (priv->tp == TASK_NOTHING)
191 return NULL;
193 while (1) {
194 printf("%s [%c]\r", task_position_string[priv->tp],
195 work_indicator[count % 4]);
196 count++;
197 fflush(stdout);
198 task_period_wait(priv->info);
200 return NULL;
203 static int print_status_return(void *p)
205 printf("\n");
206 fflush(stdout);
208 return 0;
211 static enum btrfs_check_mode parse_check_mode(const char *str)
213 if (strcmp(str, "lowmem") == 0)
214 return CHECK_MODE_LOWMEM;
215 if (strcmp(str, "orig") == 0)
216 return CHECK_MODE_ORIGINAL;
217 if (strcmp(str, "original") == 0)
218 return CHECK_MODE_ORIGINAL;
220 return CHECK_MODE_UNKNOWN;
223 /* Compatible function to allow reuse of old codes */
224 static u64 first_extent_gap(struct rb_root *holes)
226 struct file_extent_hole *hole;
228 if (RB_EMPTY_ROOT(holes))
229 return (u64)-1;
231 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
232 return hole->start;
235 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
237 struct file_extent_hole *hole1;
238 struct file_extent_hole *hole2;
240 hole1 = rb_entry(node1, struct file_extent_hole, node);
241 hole2 = rb_entry(node2, struct file_extent_hole, node);
243 if (hole1->start > hole2->start)
244 return -1;
245 if (hole1->start < hole2->start)
246 return 1;
247 /* Now hole1->start == hole2->start */
248 if (hole1->len >= hole2->len)
250 * Hole 1 will be merge center
251 * Same hole will be merged later
253 return -1;
254 /* Hole 2 will be merge center */
255 return 1;
259 * Add a hole to the record
261 * This will do hole merge for copy_file_extent_holes(),
262 * which will ensure there won't be continuous holes.
264 static int add_file_extent_hole(struct rb_root *holes,
265 u64 start, u64 len)
267 struct file_extent_hole *hole;
268 struct file_extent_hole *prev = NULL;
269 struct file_extent_hole *next = NULL;
271 hole = malloc(sizeof(*hole));
272 if (!hole)
273 return -ENOMEM;
274 hole->start = start;
275 hole->len = len;
276 /* Since compare will not return 0, no -EEXIST will happen */
277 rb_insert(holes, &hole->node, compare_hole);
279 /* simple merge with previous hole */
280 if (rb_prev(&hole->node))
281 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
282 node);
283 if (prev && prev->start + prev->len >= hole->start) {
284 hole->len = hole->start + hole->len - prev->start;
285 hole->start = prev->start;
286 rb_erase(&prev->node, holes);
287 free(prev);
288 prev = NULL;
291 /* iterate merge with next holes */
292 while (1) {
293 if (!rb_next(&hole->node))
294 break;
295 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
296 node);
297 if (hole->start + hole->len >= next->start) {
298 if (hole->start + hole->len <= next->start + next->len)
299 hole->len = next->start + next->len -
300 hole->start;
301 rb_erase(&next->node, holes);
302 free(next);
303 next = NULL;
304 } else
305 break;
307 return 0;
310 static int compare_hole_range(struct rb_node *node, void *data)
312 struct file_extent_hole *hole;
313 u64 start;
315 hole = (struct file_extent_hole *)data;
316 start = hole->start;
318 hole = rb_entry(node, struct file_extent_hole, node);
319 if (start < hole->start)
320 return -1;
321 if (start >= hole->start && start < hole->start + hole->len)
322 return 0;
323 return 1;
327 * Delete a hole in the record
329 * This will do the hole split and is much restrict than add.
331 static int del_file_extent_hole(struct rb_root *holes,
332 u64 start, u64 len)
334 struct file_extent_hole *hole;
335 struct file_extent_hole tmp;
336 u64 prev_start = 0;
337 u64 prev_len = 0;
338 u64 next_start = 0;
339 u64 next_len = 0;
340 struct rb_node *node;
341 int have_prev = 0;
342 int have_next = 0;
343 int ret = 0;
345 tmp.start = start;
346 tmp.len = len;
347 node = rb_search(holes, &tmp, compare_hole_range, NULL);
348 if (!node)
349 return -EEXIST;
350 hole = rb_entry(node, struct file_extent_hole, node);
351 if (start + len > hole->start + hole->len)
352 return -EEXIST;
355 * Now there will be no overlap, delete the hole and re-add the
356 * split(s) if they exists.
358 if (start > hole->start) {
359 prev_start = hole->start;
360 prev_len = start - hole->start;
361 have_prev = 1;
363 if (hole->start + hole->len > start + len) {
364 next_start = start + len;
365 next_len = hole->start + hole->len - start - len;
366 have_next = 1;
368 rb_erase(node, holes);
369 free(hole);
370 if (have_prev) {
371 ret = add_file_extent_hole(holes, prev_start, prev_len);
372 if (ret < 0)
373 return ret;
375 if (have_next) {
376 ret = add_file_extent_hole(holes, next_start, next_len);
377 if (ret < 0)
378 return ret;
380 return 0;
383 static int copy_file_extent_holes(struct rb_root *dst,
384 struct rb_root *src)
386 struct file_extent_hole *hole;
387 struct rb_node *node;
388 int ret = 0;
390 node = rb_first(src);
391 while (node) {
392 hole = rb_entry(node, struct file_extent_hole, node);
393 ret = add_file_extent_hole(dst, hole->start, hole->len);
394 if (ret)
395 break;
396 node = rb_next(node);
398 return ret;
401 static void free_file_extent_holes(struct rb_root *holes)
403 struct rb_node *node;
404 struct file_extent_hole *hole;
406 node = rb_first(holes);
407 while (node) {
408 hole = rb_entry(node, struct file_extent_hole, node);
409 rb_erase(node, holes);
410 free(hole);
411 node = rb_first(holes);
415 static void record_root_in_trans(struct btrfs_trans_handle *trans,
416 struct btrfs_root *root)
418 if (root->last_trans != trans->transid) {
419 root->track_dirty = 1;
420 root->last_trans = trans->transid;
421 root->commit_root = root->node;
422 extent_buffer_get(root->node);
426 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
428 struct device_record *rec1;
429 struct device_record *rec2;
431 rec1 = rb_entry(node1, struct device_record, node);
432 rec2 = rb_entry(node2, struct device_record, node);
433 if (rec1->devid > rec2->devid)
434 return -1;
435 else if (rec1->devid < rec2->devid)
436 return 1;
437 else
438 return 0;
441 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
443 struct inode_record *rec;
444 struct inode_backref *backref;
445 struct inode_backref *orig;
446 struct inode_backref *tmp;
447 struct orphan_data_extent *src_orphan;
448 struct orphan_data_extent *dst_orphan;
449 struct rb_node *rb;
450 size_t size;
451 int ret;
453 rec = malloc(sizeof(*rec));
454 if (!rec)
455 return ERR_PTR(-ENOMEM);
456 memcpy(rec, orig_rec, sizeof(*rec));
457 rec->refs = 1;
458 INIT_LIST_HEAD(&rec->backrefs);
459 INIT_LIST_HEAD(&rec->orphan_extents);
460 rec->holes = RB_ROOT;
462 list_for_each_entry(orig, &orig_rec->backrefs, list) {
463 size = sizeof(*orig) + orig->namelen + 1;
464 backref = malloc(size);
465 if (!backref) {
466 ret = -ENOMEM;
467 goto cleanup;
469 memcpy(backref, orig, size);
470 list_add_tail(&backref->list, &rec->backrefs);
472 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
473 dst_orphan = malloc(sizeof(*dst_orphan));
474 if (!dst_orphan) {
475 ret = -ENOMEM;
476 goto cleanup;
478 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
479 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
481 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
482 if (ret < 0)
483 goto cleanup_rb;
485 return rec;
487 cleanup_rb:
488 rb = rb_first(&rec->holes);
489 while (rb) {
490 struct file_extent_hole *hole;
492 hole = rb_entry(rb, struct file_extent_hole, node);
493 rb = rb_next(rb);
494 free(hole);
497 cleanup:
498 if (!list_empty(&rec->backrefs))
499 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
500 list_del(&orig->list);
501 free(orig);
504 if (!list_empty(&rec->orphan_extents))
505 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
506 list_del(&orig->list);
507 free(orig);
510 free(rec);
512 return ERR_PTR(ret);
515 static void print_orphan_data_extents(struct list_head *orphan_extents,
516 u64 objectid)
518 struct orphan_data_extent *orphan;
520 if (list_empty(orphan_extents))
521 return;
522 printf("The following data extent is lost in tree %llu:\n",
523 objectid);
524 list_for_each_entry(orphan, orphan_extents, list) {
525 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
526 orphan->objectid, orphan->offset, orphan->disk_bytenr,
527 orphan->disk_len);
531 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
533 u64 root_objectid = root->root_key.objectid;
534 int errors = rec->errors;
536 if (!errors)
537 return;
538 /* reloc root errors, we print its corresponding fs root objectid*/
539 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
540 root_objectid = root->root_key.offset;
541 fprintf(stderr, "reloc");
543 fprintf(stderr, "root %llu inode %llu errors %x",
544 (unsigned long long) root_objectid,
545 (unsigned long long) rec->ino, rec->errors);
547 if (errors & I_ERR_NO_INODE_ITEM)
548 fprintf(stderr, ", no inode item");
549 if (errors & I_ERR_NO_ORPHAN_ITEM)
550 fprintf(stderr, ", no orphan item");
551 if (errors & I_ERR_DUP_INODE_ITEM)
552 fprintf(stderr, ", dup inode item");
553 if (errors & I_ERR_DUP_DIR_INDEX)
554 fprintf(stderr, ", dup dir index");
555 if (errors & I_ERR_ODD_DIR_ITEM)
556 fprintf(stderr, ", odd dir item");
557 if (errors & I_ERR_ODD_FILE_EXTENT)
558 fprintf(stderr, ", odd file extent");
559 if (errors & I_ERR_BAD_FILE_EXTENT)
560 fprintf(stderr, ", bad file extent");
561 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
562 fprintf(stderr, ", file extent overlap");
563 if (errors & I_ERR_FILE_EXTENT_TOO_LARGE)
564 fprintf(stderr, ", inline file extent too large");
565 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
566 fprintf(stderr, ", file extent discount");
567 if (errors & I_ERR_DIR_ISIZE_WRONG)
568 fprintf(stderr, ", dir isize wrong");
569 if (errors & I_ERR_FILE_NBYTES_WRONG)
570 fprintf(stderr, ", nbytes wrong");
571 if (errors & I_ERR_ODD_CSUM_ITEM)
572 fprintf(stderr, ", odd csum item");
573 if (errors & I_ERR_SOME_CSUM_MISSING)
574 fprintf(stderr, ", some csum missing");
575 if (errors & I_ERR_LINK_COUNT_WRONG)
576 fprintf(stderr, ", link count wrong");
577 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
578 fprintf(stderr, ", orphan file extent");
579 if (errors & I_ERR_ODD_INODE_FLAGS)
580 fprintf(stderr, ", odd inode flags");
581 if (errors & I_ERR_INLINE_RAM_BYTES_WRONG)
582 fprintf(stderr, ", invalid inline ram bytes");
583 fprintf(stderr, "\n");
584 /* Print the orphan extents if needed */
585 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
586 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
588 /* Print the holes if needed */
589 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
590 struct file_extent_hole *hole;
591 struct rb_node *node;
592 int found = 0;
594 node = rb_first(&rec->holes);
595 fprintf(stderr, "Found file extent holes:\n");
596 while (node) {
597 found = 1;
598 hole = rb_entry(node, struct file_extent_hole, node);
599 fprintf(stderr, "\tstart: %llu, len: %llu\n",
600 hole->start, hole->len);
601 node = rb_next(node);
603 if (!found)
604 fprintf(stderr, "\tstart: 0, len: %llu\n",
605 round_up(rec->isize,
606 root->fs_info->sectorsize));
610 static void print_ref_error(int errors)
612 if (errors & REF_ERR_NO_DIR_ITEM)
613 fprintf(stderr, ", no dir item");
614 if (errors & REF_ERR_NO_DIR_INDEX)
615 fprintf(stderr, ", no dir index");
616 if (errors & REF_ERR_NO_INODE_REF)
617 fprintf(stderr, ", no inode ref");
618 if (errors & REF_ERR_DUP_DIR_ITEM)
619 fprintf(stderr, ", dup dir item");
620 if (errors & REF_ERR_DUP_DIR_INDEX)
621 fprintf(stderr, ", dup dir index");
622 if (errors & REF_ERR_DUP_INODE_REF)
623 fprintf(stderr, ", dup inode ref");
624 if (errors & REF_ERR_INDEX_UNMATCH)
625 fprintf(stderr, ", index mismatch");
626 if (errors & REF_ERR_FILETYPE_UNMATCH)
627 fprintf(stderr, ", filetype mismatch");
628 if (errors & REF_ERR_NAME_TOO_LONG)
629 fprintf(stderr, ", name too long");
630 if (errors & REF_ERR_NO_ROOT_REF)
631 fprintf(stderr, ", no root ref");
632 if (errors & REF_ERR_NO_ROOT_BACKREF)
633 fprintf(stderr, ", no root backref");
634 if (errors & REF_ERR_DUP_ROOT_REF)
635 fprintf(stderr, ", dup root ref");
636 if (errors & REF_ERR_DUP_ROOT_BACKREF)
637 fprintf(stderr, ", dup root backref");
638 fprintf(stderr, "\n");
641 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
642 u64 ino, int mod)
644 struct ptr_node *node;
645 struct cache_extent *cache;
646 struct inode_record *rec = NULL;
647 int ret;
649 cache = lookup_cache_extent(inode_cache, ino, 1);
650 if (cache) {
651 node = container_of(cache, struct ptr_node, cache);
652 rec = node->data;
653 if (mod && rec->refs > 1) {
654 node->data = clone_inode_rec(rec);
655 if (IS_ERR(node->data))
656 return node->data;
657 rec->refs--;
658 rec = node->data;
660 } else if (mod) {
661 rec = calloc(1, sizeof(*rec));
662 if (!rec)
663 return ERR_PTR(-ENOMEM);
664 rec->ino = ino;
665 rec->extent_start = (u64)-1;
666 rec->refs = 1;
667 INIT_LIST_HEAD(&rec->backrefs);
668 INIT_LIST_HEAD(&rec->orphan_extents);
669 rec->holes = RB_ROOT;
671 node = malloc(sizeof(*node));
672 if (!node) {
673 free(rec);
674 return ERR_PTR(-ENOMEM);
676 node->cache.start = ino;
677 node->cache.size = 1;
678 node->data = rec;
680 if (ino == BTRFS_FREE_INO_OBJECTID)
681 rec->found_link = 1;
683 ret = insert_cache_extent(inode_cache, &node->cache);
684 if (ret)
685 return ERR_PTR(-EEXIST);
687 return rec;
690 static void free_orphan_data_extents(struct list_head *orphan_extents)
692 struct orphan_data_extent *orphan;
694 while (!list_empty(orphan_extents)) {
695 orphan = list_entry(orphan_extents->next,
696 struct orphan_data_extent, list);
697 list_del(&orphan->list);
698 free(orphan);
702 static void free_inode_rec(struct inode_record *rec)
704 struct inode_backref *backref;
706 if (--rec->refs > 0)
707 return;
709 while (!list_empty(&rec->backrefs)) {
710 backref = to_inode_backref(rec->backrefs.next);
711 list_del(&backref->list);
712 free(backref);
714 free_orphan_data_extents(&rec->orphan_extents);
715 free_file_extent_holes(&rec->holes);
716 free(rec);
719 static int can_free_inode_rec(struct inode_record *rec)
721 if (!rec->errors && rec->checked && rec->found_inode_item &&
722 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
723 return 1;
724 return 0;
727 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
728 struct inode_record *rec)
730 struct cache_extent *cache;
731 struct inode_backref *tmp, *backref;
732 struct ptr_node *node;
733 u8 filetype;
735 if (!rec->found_inode_item)
736 return;
738 filetype = imode_to_type(rec->imode);
739 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
740 if (backref->found_dir_item && backref->found_dir_index) {
741 if (backref->filetype != filetype)
742 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
743 if (!backref->errors && backref->found_inode_ref &&
744 rec->nlink == rec->found_link) {
745 list_del(&backref->list);
746 free(backref);
751 if (!rec->checked || rec->merging)
752 return;
754 if (S_ISDIR(rec->imode)) {
755 if (rec->found_size != rec->isize)
756 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
757 if (rec->found_file_extent)
758 rec->errors |= I_ERR_ODD_FILE_EXTENT;
759 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
760 if (rec->found_dir_item)
761 rec->errors |= I_ERR_ODD_DIR_ITEM;
762 if (rec->found_size != rec->nbytes)
763 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
764 if (rec->nlink > 0 && !no_holes &&
765 (rec->extent_end < rec->isize ||
766 first_extent_gap(&rec->holes) < rec->isize))
767 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
770 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
771 if (rec->found_csum_item && rec->nodatasum)
772 rec->errors |= I_ERR_ODD_CSUM_ITEM;
773 if (rec->some_csum_missing && !rec->nodatasum)
774 rec->errors |= I_ERR_SOME_CSUM_MISSING;
777 BUG_ON(rec->refs != 1);
778 if (can_free_inode_rec(rec)) {
779 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
780 node = container_of(cache, struct ptr_node, cache);
781 BUG_ON(node->data != rec);
782 remove_cache_extent(inode_cache, &node->cache);
783 free(node);
784 free_inode_rec(rec);
788 static int check_orphan_item(struct btrfs_root *root, u64 ino)
790 struct btrfs_path path;
791 struct btrfs_key key;
792 int ret;
794 key.objectid = BTRFS_ORPHAN_OBJECTID;
795 key.type = BTRFS_ORPHAN_ITEM_KEY;
796 key.offset = ino;
798 btrfs_init_path(&path);
799 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
800 btrfs_release_path(&path);
801 if (ret > 0)
802 ret = -ENOENT;
803 return ret;
806 static int process_inode_item(struct extent_buffer *eb,
807 int slot, struct btrfs_key *key,
808 struct shared_node *active_node)
810 struct inode_record *rec;
811 struct btrfs_inode_item *item;
812 u64 flags;
814 rec = active_node->current;
815 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
816 if (rec->found_inode_item) {
817 rec->errors |= I_ERR_DUP_INODE_ITEM;
818 return 1;
820 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
821 rec->nlink = btrfs_inode_nlink(eb, item);
822 rec->isize = btrfs_inode_size(eb, item);
823 rec->nbytes = btrfs_inode_nbytes(eb, item);
824 rec->imode = btrfs_inode_mode(eb, item);
825 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
826 rec->nodatasum = 1;
827 rec->found_inode_item = 1;
828 if (rec->nlink == 0)
829 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
830 flags = btrfs_inode_flags(eb, item);
831 if (S_ISLNK(rec->imode) &&
832 flags & (BTRFS_INODE_IMMUTABLE | BTRFS_INODE_APPEND))
833 rec->errors |= I_ERR_ODD_INODE_FLAGS;
834 maybe_free_inode_rec(&active_node->inode_cache, rec);
835 return 0;
838 static struct inode_backref *get_inode_backref(struct inode_record *rec,
839 const char *name,
840 int namelen, u64 dir)
842 struct inode_backref *backref;
844 list_for_each_entry(backref, &rec->backrefs, list) {
845 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
846 break;
847 if (backref->dir != dir || backref->namelen != namelen)
848 continue;
849 if (memcmp(name, backref->name, namelen))
850 continue;
851 return backref;
854 backref = malloc(sizeof(*backref) + namelen + 1);
855 if (!backref)
856 return NULL;
857 memset(backref, 0, sizeof(*backref));
858 backref->dir = dir;
859 backref->namelen = namelen;
860 memcpy(backref->name, name, namelen);
861 backref->name[namelen] = '\0';
862 list_add_tail(&backref->list, &rec->backrefs);
863 return backref;
866 static int add_inode_backref(struct cache_tree *inode_cache,
867 u64 ino, u64 dir, u64 index,
868 const char *name, int namelen,
869 u8 filetype, u8 itemtype, int errors)
871 struct inode_record *rec;
872 struct inode_backref *backref;
874 rec = get_inode_rec(inode_cache, ino, 1);
875 BUG_ON(IS_ERR(rec));
876 backref = get_inode_backref(rec, name, namelen, dir);
877 BUG_ON(!backref);
878 if (errors)
879 backref->errors |= errors;
880 if (itemtype == BTRFS_DIR_INDEX_KEY) {
881 if (backref->found_dir_index)
882 backref->errors |= REF_ERR_DUP_DIR_INDEX;
883 if (backref->found_inode_ref && backref->index != index)
884 backref->errors |= REF_ERR_INDEX_UNMATCH;
885 if (backref->found_dir_item && backref->filetype != filetype)
886 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
888 backref->index = index;
889 backref->filetype = filetype;
890 backref->found_dir_index = 1;
891 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
892 rec->found_link++;
893 if (backref->found_dir_item)
894 backref->errors |= REF_ERR_DUP_DIR_ITEM;
895 if (backref->found_dir_index && backref->filetype != filetype)
896 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
898 backref->filetype = filetype;
899 backref->found_dir_item = 1;
900 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
901 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
902 if (backref->found_inode_ref)
903 backref->errors |= REF_ERR_DUP_INODE_REF;
904 if (backref->found_dir_index && backref->index != index)
905 backref->errors |= REF_ERR_INDEX_UNMATCH;
906 else
907 backref->index = index;
909 backref->ref_type = itemtype;
910 backref->found_inode_ref = 1;
911 } else {
912 BUG_ON(1);
915 maybe_free_inode_rec(inode_cache, rec);
916 return 0;
919 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
920 struct cache_tree *dst_cache)
922 struct inode_backref *backref;
923 u32 dir_count = 0;
924 int ret = 0;
926 dst->merging = 1;
927 list_for_each_entry(backref, &src->backrefs, list) {
928 if (backref->found_dir_index) {
929 add_inode_backref(dst_cache, dst->ino, backref->dir,
930 backref->index, backref->name,
931 backref->namelen, backref->filetype,
932 BTRFS_DIR_INDEX_KEY, backref->errors);
934 if (backref->found_dir_item) {
935 dir_count++;
936 add_inode_backref(dst_cache, dst->ino,
937 backref->dir, 0, backref->name,
938 backref->namelen, backref->filetype,
939 BTRFS_DIR_ITEM_KEY, backref->errors);
941 if (backref->found_inode_ref) {
942 add_inode_backref(dst_cache, dst->ino,
943 backref->dir, backref->index,
944 backref->name, backref->namelen, 0,
945 backref->ref_type, backref->errors);
949 if (src->found_dir_item)
950 dst->found_dir_item = 1;
951 if (src->found_file_extent)
952 dst->found_file_extent = 1;
953 if (src->found_csum_item)
954 dst->found_csum_item = 1;
955 if (src->some_csum_missing)
956 dst->some_csum_missing = 1;
957 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
958 ret = copy_file_extent_holes(&dst->holes, &src->holes);
959 if (ret < 0)
960 return ret;
963 BUG_ON(src->found_link < dir_count);
964 dst->found_link += src->found_link - dir_count;
965 dst->found_size += src->found_size;
966 if (src->extent_start != (u64)-1) {
967 if (dst->extent_start == (u64)-1) {
968 dst->extent_start = src->extent_start;
969 dst->extent_end = src->extent_end;
970 } else {
971 if (dst->extent_end > src->extent_start)
972 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
973 else if (dst->extent_end < src->extent_start) {
974 ret = add_file_extent_hole(&dst->holes,
975 dst->extent_end,
976 src->extent_start - dst->extent_end);
978 if (dst->extent_end < src->extent_end)
979 dst->extent_end = src->extent_end;
983 dst->errors |= src->errors;
984 if (src->found_inode_item) {
985 if (!dst->found_inode_item) {
986 dst->nlink = src->nlink;
987 dst->isize = src->isize;
988 dst->nbytes = src->nbytes;
989 dst->imode = src->imode;
990 dst->nodatasum = src->nodatasum;
991 dst->found_inode_item = 1;
992 } else {
993 dst->errors |= I_ERR_DUP_INODE_ITEM;
996 dst->merging = 0;
998 return 0;
1001 static int splice_shared_node(struct shared_node *src_node,
1002 struct shared_node *dst_node)
1004 struct cache_extent *cache;
1005 struct ptr_node *node, *ins;
1006 struct cache_tree *src, *dst;
1007 struct inode_record *rec, *conflict;
1008 u64 current_ino = 0;
1009 int splice = 0;
1010 int ret;
1012 if (--src_node->refs == 0)
1013 splice = 1;
1014 if (src_node->current)
1015 current_ino = src_node->current->ino;
1017 src = &src_node->root_cache;
1018 dst = &dst_node->root_cache;
1019 again:
1020 cache = search_cache_extent(src, 0);
1021 while (cache) {
1022 node = container_of(cache, struct ptr_node, cache);
1023 rec = node->data;
1024 cache = next_cache_extent(cache);
1026 if (splice) {
1027 remove_cache_extent(src, &node->cache);
1028 ins = node;
1029 } else {
1030 ins = malloc(sizeof(*ins));
1031 BUG_ON(!ins);
1032 ins->cache.start = node->cache.start;
1033 ins->cache.size = node->cache.size;
1034 ins->data = rec;
1035 rec->refs++;
1037 ret = insert_cache_extent(dst, &ins->cache);
1038 if (ret == -EEXIST) {
1039 conflict = get_inode_rec(dst, rec->ino, 1);
1040 BUG_ON(IS_ERR(conflict));
1041 merge_inode_recs(rec, conflict, dst);
1042 if (rec->checked) {
1043 conflict->checked = 1;
1044 if (dst_node->current == conflict)
1045 dst_node->current = NULL;
1047 maybe_free_inode_rec(dst, conflict);
1048 free_inode_rec(rec);
1049 free(ins);
1050 } else {
1051 BUG_ON(ret);
1055 if (src == &src_node->root_cache) {
1056 src = &src_node->inode_cache;
1057 dst = &dst_node->inode_cache;
1058 goto again;
1061 if (current_ino > 0 && (!dst_node->current ||
1062 current_ino > dst_node->current->ino)) {
1063 if (dst_node->current) {
1064 dst_node->current->checked = 1;
1065 maybe_free_inode_rec(dst, dst_node->current);
1067 dst_node->current = get_inode_rec(dst, current_ino, 1);
1068 BUG_ON(IS_ERR(dst_node->current));
1070 return 0;
1073 static void free_inode_ptr(struct cache_extent *cache)
1075 struct ptr_node *node;
1076 struct inode_record *rec;
1078 node = container_of(cache, struct ptr_node, cache);
1079 rec = node->data;
1080 free_inode_rec(rec);
1081 free(node);
1084 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1086 static struct shared_node *find_shared_node(struct cache_tree *shared,
1087 u64 bytenr)
1089 struct cache_extent *cache;
1090 struct shared_node *node;
1092 cache = lookup_cache_extent(shared, bytenr, 1);
1093 if (cache) {
1094 node = container_of(cache, struct shared_node, cache);
1095 return node;
1097 return NULL;
1100 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1102 int ret;
1103 struct shared_node *node;
1105 node = calloc(1, sizeof(*node));
1106 if (!node)
1107 return -ENOMEM;
1108 node->cache.start = bytenr;
1109 node->cache.size = 1;
1110 cache_tree_init(&node->root_cache);
1111 cache_tree_init(&node->inode_cache);
1112 node->refs = refs;
1114 ret = insert_cache_extent(shared, &node->cache);
1116 return ret;
1119 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1120 struct walk_control *wc, int level)
1122 struct shared_node *node;
1123 struct shared_node *dest;
1124 int ret;
1126 if (level == wc->active_node)
1127 return 0;
1129 BUG_ON(wc->active_node <= level);
1130 node = find_shared_node(&wc->shared, bytenr);
1131 if (!node) {
1132 ret = add_shared_node(&wc->shared, bytenr, refs);
1133 BUG_ON(ret);
1134 node = find_shared_node(&wc->shared, bytenr);
1135 wc->nodes[level] = node;
1136 wc->active_node = level;
1137 return 0;
1140 if (wc->root_level == wc->active_node &&
1141 btrfs_root_refs(&root->root_item) == 0) {
1142 if (--node->refs == 0) {
1143 free_inode_recs_tree(&node->root_cache);
1144 free_inode_recs_tree(&node->inode_cache);
1145 remove_cache_extent(&wc->shared, &node->cache);
1146 free(node);
1148 return 1;
1151 dest = wc->nodes[wc->active_node];
1152 splice_shared_node(node, dest);
1153 if (node->refs == 0) {
1154 remove_cache_extent(&wc->shared, &node->cache);
1155 free(node);
1157 return 1;
1160 static int leave_shared_node(struct btrfs_root *root,
1161 struct walk_control *wc, int level)
1163 struct shared_node *node;
1164 struct shared_node *dest;
1165 int i;
1167 if (level == wc->root_level)
1168 return 0;
1170 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1171 if (wc->nodes[i])
1172 break;
1174 BUG_ON(i >= BTRFS_MAX_LEVEL);
1176 node = wc->nodes[wc->active_node];
1177 wc->nodes[wc->active_node] = NULL;
1178 wc->active_node = i;
1180 dest = wc->nodes[wc->active_node];
1181 if (wc->active_node < wc->root_level ||
1182 btrfs_root_refs(&root->root_item) > 0) {
1183 BUG_ON(node->refs <= 1);
1184 splice_shared_node(node, dest);
1185 } else {
1186 BUG_ON(node->refs < 2);
1187 node->refs--;
1189 return 0;
1193 * Returns:
1194 * < 0 - on error
1195 * 1 - if the root with id child_root_id is a child of root parent_root_id
1196 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1197 * has other root(s) as parent(s)
1198 * 2 - if the root child_root_id doesn't have any parent roots
1200 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1201 u64 child_root_id)
1203 struct btrfs_path path;
1204 struct btrfs_key key;
1205 struct extent_buffer *leaf;
1206 int has_parent = 0;
1207 int ret;
1209 btrfs_init_path(&path);
1211 key.objectid = parent_root_id;
1212 key.type = BTRFS_ROOT_REF_KEY;
1213 key.offset = child_root_id;
1214 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1215 0, 0);
1216 if (ret < 0)
1217 return ret;
1218 btrfs_release_path(&path);
1219 if (!ret)
1220 return 1;
1222 key.objectid = child_root_id;
1223 key.type = BTRFS_ROOT_BACKREF_KEY;
1224 key.offset = 0;
1225 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1226 0, 0);
1227 if (ret < 0)
1228 goto out;
1230 while (1) {
1231 leaf = path.nodes[0];
1232 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1233 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1234 if (ret)
1235 break;
1236 leaf = path.nodes[0];
1239 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1240 if (key.objectid != child_root_id ||
1241 key.type != BTRFS_ROOT_BACKREF_KEY)
1242 break;
1244 has_parent = 1;
1246 if (key.offset == parent_root_id) {
1247 btrfs_release_path(&path);
1248 return 1;
1251 path.slots[0]++;
1253 out:
1254 btrfs_release_path(&path);
1255 if (ret < 0)
1256 return ret;
1257 return has_parent ? 0 : 2;
1260 static int process_dir_item(struct extent_buffer *eb,
1261 int slot, struct btrfs_key *key,
1262 struct shared_node *active_node)
1264 u32 total;
1265 u32 cur = 0;
1266 u32 len;
1267 u32 name_len;
1268 u32 data_len;
1269 int error;
1270 int nritems = 0;
1271 u8 filetype;
1272 struct btrfs_dir_item *di;
1273 struct inode_record *rec;
1274 struct cache_tree *root_cache;
1275 struct cache_tree *inode_cache;
1276 struct btrfs_key location;
1277 char namebuf[BTRFS_NAME_LEN];
1279 root_cache = &active_node->root_cache;
1280 inode_cache = &active_node->inode_cache;
1281 rec = active_node->current;
1282 rec->found_dir_item = 1;
1284 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1285 total = btrfs_item_size_nr(eb, slot);
1286 while (cur < total) {
1287 nritems++;
1288 btrfs_dir_item_key_to_cpu(eb, di, &location);
1289 name_len = btrfs_dir_name_len(eb, di);
1290 data_len = btrfs_dir_data_len(eb, di);
1291 filetype = btrfs_dir_type(eb, di);
1293 rec->found_size += name_len;
1294 if (cur + sizeof(*di) + name_len > total ||
1295 name_len > BTRFS_NAME_LEN) {
1296 error = REF_ERR_NAME_TOO_LONG;
1298 if (cur + sizeof(*di) > total)
1299 break;
1300 len = min_t(u32, total - cur - sizeof(*di),
1301 BTRFS_NAME_LEN);
1302 } else {
1303 len = name_len;
1304 error = 0;
1307 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1309 if (key->type == BTRFS_DIR_ITEM_KEY &&
1310 key->offset != btrfs_name_hash(namebuf, len)) {
1311 rec->errors |= I_ERR_ODD_DIR_ITEM;
1312 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1313 key->objectid, key->offset, namebuf, len, filetype,
1314 key->offset, btrfs_name_hash(namebuf, len));
1317 if (location.type == BTRFS_INODE_ITEM_KEY) {
1318 add_inode_backref(inode_cache, location.objectid,
1319 key->objectid, key->offset, namebuf,
1320 len, filetype, key->type, error);
1321 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1322 add_inode_backref(root_cache, location.objectid,
1323 key->objectid, key->offset,
1324 namebuf, len, filetype,
1325 key->type, error);
1326 } else {
1327 fprintf(stderr,
1328 "unknown location type %d in DIR_ITEM[%llu %llu]\n",
1329 location.type, key->objectid, key->offset);
1330 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1331 key->objectid, key->offset, namebuf,
1332 len, filetype, key->type, error);
1335 len = sizeof(*di) + name_len + data_len;
1336 di = (struct btrfs_dir_item *)((char *)di + len);
1337 cur += len;
1339 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1340 rec->errors |= I_ERR_DUP_DIR_INDEX;
1342 return 0;
1345 static int process_inode_ref(struct extent_buffer *eb,
1346 int slot, struct btrfs_key *key,
1347 struct shared_node *active_node)
1349 u32 total;
1350 u32 cur = 0;
1351 u32 len;
1352 u32 name_len;
1353 u64 index;
1354 int error;
1355 struct cache_tree *inode_cache;
1356 struct btrfs_inode_ref *ref;
1357 char namebuf[BTRFS_NAME_LEN];
1359 inode_cache = &active_node->inode_cache;
1361 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1362 total = btrfs_item_size_nr(eb, slot);
1363 while (cur < total) {
1364 name_len = btrfs_inode_ref_name_len(eb, ref);
1365 index = btrfs_inode_ref_index(eb, ref);
1367 /* inode_ref + namelen should not cross item boundary */
1368 if (cur + sizeof(*ref) + name_len > total ||
1369 name_len > BTRFS_NAME_LEN) {
1370 if (total < cur + sizeof(*ref))
1371 break;
1373 /* Still try to read out the remaining part */
1374 len = min_t(u32, total - cur - sizeof(*ref),
1375 BTRFS_NAME_LEN);
1376 error = REF_ERR_NAME_TOO_LONG;
1377 } else {
1378 len = name_len;
1379 error = 0;
1382 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1383 add_inode_backref(inode_cache, key->objectid, key->offset,
1384 index, namebuf, len, 0, key->type, error);
1386 len = sizeof(*ref) + name_len;
1387 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1388 cur += len;
1390 return 0;
1393 static int process_inode_extref(struct extent_buffer *eb,
1394 int slot, struct btrfs_key *key,
1395 struct shared_node *active_node)
1397 u32 total;
1398 u32 cur = 0;
1399 u32 len;
1400 u32 name_len;
1401 u64 index;
1402 u64 parent;
1403 int error;
1404 struct cache_tree *inode_cache;
1405 struct btrfs_inode_extref *extref;
1406 char namebuf[BTRFS_NAME_LEN];
1408 inode_cache = &active_node->inode_cache;
1410 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1411 total = btrfs_item_size_nr(eb, slot);
1412 while (cur < total) {
1413 name_len = btrfs_inode_extref_name_len(eb, extref);
1414 index = btrfs_inode_extref_index(eb, extref);
1415 parent = btrfs_inode_extref_parent(eb, extref);
1416 if (name_len <= BTRFS_NAME_LEN) {
1417 len = name_len;
1418 error = 0;
1419 } else {
1420 len = BTRFS_NAME_LEN;
1421 error = REF_ERR_NAME_TOO_LONG;
1423 read_extent_buffer(eb, namebuf,
1424 (unsigned long)(extref + 1), len);
1425 add_inode_backref(inode_cache, key->objectid, parent,
1426 index, namebuf, len, 0, key->type, error);
1428 len = sizeof(*extref) + name_len;
1429 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1430 cur += len;
1432 return 0;
1436 static int process_file_extent(struct btrfs_root *root,
1437 struct extent_buffer *eb,
1438 int slot, struct btrfs_key *key,
1439 struct shared_node *active_node)
1441 struct inode_record *rec;
1442 struct btrfs_file_extent_item *fi;
1443 u64 num_bytes = 0;
1444 u64 disk_bytenr = 0;
1445 u64 extent_offset = 0;
1446 u64 mask = root->fs_info->sectorsize - 1;
1447 u32 max_inline_size = min_t(u32, mask,
1448 BTRFS_MAX_INLINE_DATA_SIZE(root->fs_info));
1449 int extent_type;
1450 int ret;
1452 rec = active_node->current;
1453 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1454 rec->found_file_extent = 1;
1456 if (rec->extent_start == (u64)-1) {
1457 rec->extent_start = key->offset;
1458 rec->extent_end = key->offset;
1461 if (rec->extent_end > key->offset)
1462 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1463 else if (rec->extent_end < key->offset) {
1464 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1465 key->offset - rec->extent_end);
1466 if (ret < 0)
1467 return ret;
1470 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1471 extent_type = btrfs_file_extent_type(eb, fi);
1473 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1474 u8 compression = btrfs_file_extent_compression(eb, fi);
1475 struct btrfs_item *item = btrfs_item_nr(slot);
1477 num_bytes = btrfs_file_extent_ram_bytes(eb, fi);
1478 if (num_bytes == 0)
1479 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1480 if (compression) {
1481 if (btrfs_file_extent_inline_item_len(eb, item) >
1482 max_inline_size ||
1483 num_bytes > root->fs_info->sectorsize)
1484 rec->errors |= I_ERR_FILE_EXTENT_TOO_LARGE;
1485 } else {
1486 if (num_bytes > max_inline_size)
1487 rec->errors |= I_ERR_FILE_EXTENT_TOO_LARGE;
1488 if (btrfs_file_extent_inline_item_len(eb, item) !=
1489 num_bytes)
1490 rec->errors |= I_ERR_INLINE_RAM_BYTES_WRONG;
1492 rec->found_size += num_bytes;
1493 num_bytes = (num_bytes + mask) & ~mask;
1494 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1495 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1496 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1497 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1498 extent_offset = btrfs_file_extent_offset(eb, fi);
1499 if (num_bytes == 0 || (num_bytes & mask))
1500 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1501 if (num_bytes + extent_offset >
1502 btrfs_file_extent_ram_bytes(eb, fi))
1503 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1504 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1505 (btrfs_file_extent_compression(eb, fi) ||
1506 btrfs_file_extent_encryption(eb, fi) ||
1507 btrfs_file_extent_other_encoding(eb, fi)))
1508 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1509 if (disk_bytenr > 0)
1510 rec->found_size += num_bytes;
1511 } else {
1512 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1514 rec->extent_end = key->offset + num_bytes;
1517 * The data reloc tree will copy full extents into its inode and then
1518 * copy the corresponding csums. Because the extent it copied could be
1519 * a preallocated extent that hasn't been written to yet there may be no
1520 * csums to copy, ergo we won't have csums for our file extent. This is
1521 * ok so just don't bother checking csums if the inode belongs to the
1522 * data reloc tree.
1524 if (disk_bytenr > 0 &&
1525 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1526 u64 found;
1527 if (btrfs_file_extent_compression(eb, fi))
1528 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1529 else
1530 disk_bytenr += extent_offset;
1532 ret = count_csum_range(root->fs_info, disk_bytenr, num_bytes,
1533 &found);
1534 if (ret < 0)
1535 return ret;
1536 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1537 if (found > 0)
1538 rec->found_csum_item = 1;
1539 if (found < num_bytes)
1540 rec->some_csum_missing = 1;
1541 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1542 if (found > 0) {
1543 ret = check_prealloc_extent_written(root->fs_info,
1544 disk_bytenr,
1545 num_bytes);
1546 if (ret < 0)
1547 return ret;
1548 if (ret == 0)
1549 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1553 return 0;
1556 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1557 struct walk_control *wc)
1559 struct btrfs_key key;
1560 u32 nritems;
1561 int i;
1562 int ret = 0;
1563 struct cache_tree *inode_cache;
1564 struct shared_node *active_node;
1566 if (wc->root_level == wc->active_node &&
1567 btrfs_root_refs(&root->root_item) == 0)
1568 return 0;
1570 active_node = wc->nodes[wc->active_node];
1571 inode_cache = &active_node->inode_cache;
1572 nritems = btrfs_header_nritems(eb);
1573 for (i = 0; i < nritems; i++) {
1574 btrfs_item_key_to_cpu(eb, &key, i);
1576 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1577 continue;
1578 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1579 continue;
1581 if (active_node->current == NULL ||
1582 active_node->current->ino < key.objectid) {
1583 if (active_node->current) {
1584 active_node->current->checked = 1;
1585 maybe_free_inode_rec(inode_cache,
1586 active_node->current);
1588 active_node->current = get_inode_rec(inode_cache,
1589 key.objectid, 1);
1590 BUG_ON(IS_ERR(active_node->current));
1592 switch (key.type) {
1593 case BTRFS_DIR_ITEM_KEY:
1594 case BTRFS_DIR_INDEX_KEY:
1595 ret = process_dir_item(eb, i, &key, active_node);
1596 break;
1597 case BTRFS_INODE_REF_KEY:
1598 ret = process_inode_ref(eb, i, &key, active_node);
1599 break;
1600 case BTRFS_INODE_EXTREF_KEY:
1601 ret = process_inode_extref(eb, i, &key, active_node);
1602 break;
1603 case BTRFS_INODE_ITEM_KEY:
1604 ret = process_inode_item(eb, i, &key, active_node);
1605 break;
1606 case BTRFS_EXTENT_DATA_KEY:
1607 ret = process_file_extent(root, eb, i, &key,
1608 active_node);
1609 break;
1610 default:
1611 break;
1614 return ret;
1617 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1618 struct walk_control *wc, int *level,
1619 struct node_refs *nrefs)
1621 enum btrfs_tree_block_status status;
1622 u64 bytenr;
1623 u64 ptr_gen;
1624 struct btrfs_fs_info *fs_info = root->fs_info;
1625 struct extent_buffer *next;
1626 struct extent_buffer *cur;
1627 int ret, err = 0;
1628 u64 refs;
1630 WARN_ON(*level < 0);
1631 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1633 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1634 refs = nrefs->refs[*level];
1635 ret = 0;
1636 } else {
1637 ret = btrfs_lookup_extent_info(NULL, fs_info,
1638 path->nodes[*level]->start,
1639 *level, 1, &refs, NULL);
1640 if (ret < 0) {
1641 err = ret;
1642 goto out;
1644 nrefs->bytenr[*level] = path->nodes[*level]->start;
1645 nrefs->refs[*level] = refs;
1648 if (refs > 1) {
1649 ret = enter_shared_node(root, path->nodes[*level]->start,
1650 refs, wc, *level);
1651 if (ret > 0) {
1652 err = ret;
1653 goto out;
1657 while (*level >= 0) {
1658 WARN_ON(*level < 0);
1659 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1660 cur = path->nodes[*level];
1662 if (btrfs_header_level(cur) != *level)
1663 WARN_ON(1);
1665 if (path->slots[*level] >= btrfs_header_nritems(cur))
1666 break;
1667 if (*level == 0) {
1668 ret = process_one_leaf(root, cur, wc);
1669 if (ret < 0)
1670 err = ret;
1671 break;
1673 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1674 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1676 if (bytenr == nrefs->bytenr[*level - 1]) {
1677 refs = nrefs->refs[*level - 1];
1678 } else {
1679 ret = btrfs_lookup_extent_info(NULL, fs_info, bytenr,
1680 *level - 1, 1, &refs, NULL);
1681 if (ret < 0) {
1682 refs = 0;
1683 } else {
1684 nrefs->bytenr[*level - 1] = bytenr;
1685 nrefs->refs[*level - 1] = refs;
1689 if (refs > 1) {
1690 ret = enter_shared_node(root, bytenr, refs,
1691 wc, *level - 1);
1692 if (ret > 0) {
1693 path->slots[*level]++;
1694 continue;
1698 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
1699 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1700 free_extent_buffer(next);
1701 reada_walk_down(root, cur, path->slots[*level]);
1702 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
1703 if (!extent_buffer_uptodate(next)) {
1704 struct btrfs_key node_key;
1706 btrfs_node_key_to_cpu(path->nodes[*level],
1707 &node_key,
1708 path->slots[*level]);
1709 btrfs_add_corrupt_extent_record(root->fs_info,
1710 &node_key,
1711 path->nodes[*level]->start,
1712 root->fs_info->nodesize,
1713 *level);
1714 err = -EIO;
1715 goto out;
1719 ret = check_child_node(cur, path->slots[*level], next);
1720 if (ret) {
1721 free_extent_buffer(next);
1722 err = ret;
1723 goto out;
1726 if (btrfs_is_leaf(next))
1727 status = btrfs_check_leaf(root, NULL, next);
1728 else
1729 status = btrfs_check_node(root, NULL, next);
1730 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1731 free_extent_buffer(next);
1732 err = -EIO;
1733 goto out;
1736 *level = *level - 1;
1737 free_extent_buffer(path->nodes[*level]);
1738 path->nodes[*level] = next;
1739 path->slots[*level] = 0;
1741 out:
1742 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1743 return err;
1746 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1747 struct walk_control *wc, int *level)
1749 int i;
1750 struct extent_buffer *leaf;
1752 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1753 leaf = path->nodes[i];
1754 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1755 path->slots[i]++;
1756 *level = i;
1757 return 0;
1759 free_extent_buffer(path->nodes[*level]);
1760 path->nodes[*level] = NULL;
1761 BUG_ON(*level > wc->active_node);
1762 if (*level == wc->active_node)
1763 leave_shared_node(root, wc, *level);
1764 *level = i + 1;
1766 return 1;
1769 static int check_root_dir(struct inode_record *rec)
1771 struct inode_backref *backref;
1772 int ret = -1;
1774 if (!rec->found_inode_item || rec->errors)
1775 goto out;
1776 if (rec->nlink != 1 || rec->found_link != 0)
1777 goto out;
1778 if (list_empty(&rec->backrefs))
1779 goto out;
1780 backref = to_inode_backref(rec->backrefs.next);
1781 if (!backref->found_inode_ref)
1782 goto out;
1783 if (backref->index != 0 || backref->namelen != 2 ||
1784 memcmp(backref->name, "..", 2))
1785 goto out;
1786 if (backref->found_dir_index || backref->found_dir_item)
1787 goto out;
1788 ret = 0;
1789 out:
1790 return ret;
1793 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1794 struct btrfs_root *root, struct btrfs_path *path,
1795 struct inode_record *rec)
1797 struct btrfs_inode_item *ei;
1798 struct btrfs_key key;
1799 int ret;
1801 key.objectid = rec->ino;
1802 key.type = BTRFS_INODE_ITEM_KEY;
1803 key.offset = (u64)-1;
1805 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1806 if (ret < 0)
1807 goto out;
1808 if (ret) {
1809 if (!path->slots[0]) {
1810 ret = -ENOENT;
1811 goto out;
1813 path->slots[0]--;
1814 ret = 0;
1816 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1817 if (key.objectid != rec->ino) {
1818 ret = -ENOENT;
1819 goto out;
1822 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1823 struct btrfs_inode_item);
1824 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
1825 btrfs_mark_buffer_dirty(path->nodes[0]);
1826 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1827 printf("reset isize for dir %llu root %llu\n", rec->ino,
1828 root->root_key.objectid);
1829 out:
1830 btrfs_release_path(path);
1831 return ret;
1834 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
1835 struct btrfs_root *root,
1836 struct btrfs_path *path,
1837 struct inode_record *rec)
1839 int ret;
1841 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
1842 btrfs_release_path(path);
1843 if (!ret)
1844 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1845 return ret;
1848 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
1849 struct btrfs_root *root,
1850 struct btrfs_path *path,
1851 struct inode_record *rec)
1853 struct btrfs_inode_item *ei;
1854 struct btrfs_key key;
1855 int ret = 0;
1857 key.objectid = rec->ino;
1858 key.type = BTRFS_INODE_ITEM_KEY;
1859 key.offset = 0;
1861 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1862 if (ret) {
1863 if (ret > 0)
1864 ret = -ENOENT;
1865 goto out;
1868 /* Since ret == 0, no need to check anything */
1869 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1870 struct btrfs_inode_item);
1871 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
1872 btrfs_mark_buffer_dirty(path->nodes[0]);
1873 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
1874 printf("reset nbytes for ino %llu root %llu\n",
1875 rec->ino, root->root_key.objectid);
1876 out:
1877 btrfs_release_path(path);
1878 return ret;
1881 static int add_missing_dir_index(struct btrfs_root *root,
1882 struct cache_tree *inode_cache,
1883 struct inode_record *rec,
1884 struct inode_backref *backref)
1886 struct btrfs_path path;
1887 struct btrfs_trans_handle *trans;
1888 struct btrfs_dir_item *dir_item;
1889 struct extent_buffer *leaf;
1890 struct btrfs_key key;
1891 struct btrfs_disk_key disk_key;
1892 struct inode_record *dir_rec;
1893 unsigned long name_ptr;
1894 u32 data_size = sizeof(*dir_item) + backref->namelen;
1895 int ret;
1897 trans = btrfs_start_transaction(root, 1);
1898 if (IS_ERR(trans))
1899 return PTR_ERR(trans);
1901 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
1902 (unsigned long long)rec->ino);
1904 btrfs_init_path(&path);
1905 key.objectid = backref->dir;
1906 key.type = BTRFS_DIR_INDEX_KEY;
1907 key.offset = backref->index;
1908 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
1909 BUG_ON(ret);
1911 leaf = path.nodes[0];
1912 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
1914 disk_key.objectid = cpu_to_le64(rec->ino);
1915 disk_key.type = BTRFS_INODE_ITEM_KEY;
1916 disk_key.offset = 0;
1918 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
1919 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
1920 btrfs_set_dir_data_len(leaf, dir_item, 0);
1921 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
1922 name_ptr = (unsigned long)(dir_item + 1);
1923 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
1924 btrfs_mark_buffer_dirty(leaf);
1925 btrfs_release_path(&path);
1926 btrfs_commit_transaction(trans, root);
1928 backref->found_dir_index = 1;
1929 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
1930 BUG_ON(IS_ERR(dir_rec));
1931 if (!dir_rec)
1932 return 0;
1933 dir_rec->found_size += backref->namelen;
1934 if (dir_rec->found_size == dir_rec->isize &&
1935 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
1936 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1937 if (dir_rec->found_size != dir_rec->isize)
1938 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1940 return 0;
1943 static int delete_dir_index(struct btrfs_root *root,
1944 struct inode_backref *backref)
1946 struct btrfs_trans_handle *trans;
1947 struct btrfs_dir_item *di;
1948 struct btrfs_path path;
1949 int ret = 0;
1951 trans = btrfs_start_transaction(root, 1);
1952 if (IS_ERR(trans))
1953 return PTR_ERR(trans);
1955 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
1956 (unsigned long long)backref->dir,
1957 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
1958 (unsigned long long)root->objectid);
1960 btrfs_init_path(&path);
1961 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
1962 backref->name, backref->namelen,
1963 backref->index, -1);
1964 if (IS_ERR(di)) {
1965 ret = PTR_ERR(di);
1966 btrfs_release_path(&path);
1967 btrfs_commit_transaction(trans, root);
1968 if (ret == -ENOENT)
1969 return 0;
1970 return ret;
1973 if (!di)
1974 ret = btrfs_del_item(trans, root, &path);
1975 else
1976 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
1977 BUG_ON(ret);
1978 btrfs_release_path(&path);
1979 btrfs_commit_transaction(trans, root);
1980 return ret;
1983 static int create_inode_item(struct btrfs_root *root,
1984 struct inode_record *rec, int root_dir)
1986 struct btrfs_trans_handle *trans;
1987 u64 nlink = 0;
1988 u32 mode = 0;
1989 u64 size = 0;
1990 int ret;
1992 trans = btrfs_start_transaction(root, 1);
1993 if (IS_ERR(trans)) {
1994 ret = PTR_ERR(trans);
1995 return ret;
1998 nlink = root_dir ? 1 : rec->found_link;
1999 if (rec->found_dir_item) {
2000 if (rec->found_file_extent)
2001 fprintf(stderr, "root %llu inode %llu has both a dir "
2002 "item and extents, unsure if it is a dir or a "
2003 "regular file so setting it as a directory\n",
2004 (unsigned long long)root->objectid,
2005 (unsigned long long)rec->ino);
2006 mode = S_IFDIR | 0755;
2007 size = rec->found_size;
2008 } else if (!rec->found_dir_item) {
2009 size = rec->extent_end;
2010 mode = S_IFREG | 0755;
2013 ret = insert_inode_item(trans, root, rec->ino, size, rec->nbytes,
2014 nlink, mode);
2015 btrfs_commit_transaction(trans, root);
2016 return 0;
2019 static int repair_inode_backrefs(struct btrfs_root *root,
2020 struct inode_record *rec,
2021 struct cache_tree *inode_cache,
2022 int delete)
2024 struct inode_backref *tmp, *backref;
2025 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2026 int ret = 0;
2027 int repaired = 0;
2029 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2030 if (!delete && rec->ino == root_dirid) {
2031 if (!rec->found_inode_item) {
2032 ret = create_inode_item(root, rec, 1);
2033 if (ret)
2034 break;
2035 repaired++;
2039 /* Index 0 for root dir's are special, don't mess with it */
2040 if (rec->ino == root_dirid && backref->index == 0)
2041 continue;
2043 if (delete &&
2044 ((backref->found_dir_index && !backref->found_inode_ref) ||
2045 (backref->found_dir_index && backref->found_inode_ref &&
2046 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2047 ret = delete_dir_index(root, backref);
2048 if (ret)
2049 break;
2050 repaired++;
2051 list_del(&backref->list);
2052 free(backref);
2053 continue;
2056 if (!delete && !backref->found_dir_index &&
2057 backref->found_dir_item && backref->found_inode_ref) {
2058 ret = add_missing_dir_index(root, inode_cache, rec,
2059 backref);
2060 if (ret)
2061 break;
2062 repaired++;
2063 if (backref->found_dir_item &&
2064 backref->found_dir_index) {
2065 if (!backref->errors &&
2066 backref->found_inode_ref) {
2067 list_del(&backref->list);
2068 free(backref);
2069 continue;
2074 if (!delete && (!backref->found_dir_index &&
2075 !backref->found_dir_item &&
2076 backref->found_inode_ref)) {
2077 struct btrfs_trans_handle *trans;
2078 struct btrfs_key location;
2080 ret = check_dir_conflict(root, backref->name,
2081 backref->namelen,
2082 backref->dir,
2083 backref->index);
2084 if (ret) {
2086 * let nlink fixing routine to handle it,
2087 * which can do it better.
2089 ret = 0;
2090 break;
2092 location.objectid = rec->ino;
2093 location.type = BTRFS_INODE_ITEM_KEY;
2094 location.offset = 0;
2096 trans = btrfs_start_transaction(root, 1);
2097 if (IS_ERR(trans)) {
2098 ret = PTR_ERR(trans);
2099 break;
2101 fprintf(stderr, "adding missing dir index/item pair "
2102 "for inode %llu\n",
2103 (unsigned long long)rec->ino);
2104 ret = btrfs_insert_dir_item(trans, root, backref->name,
2105 backref->namelen,
2106 backref->dir, &location,
2107 imode_to_type(rec->imode),
2108 backref->index);
2109 BUG_ON(ret);
2110 btrfs_commit_transaction(trans, root);
2111 repaired++;
2114 if (!delete && (backref->found_inode_ref &&
2115 backref->found_dir_index &&
2116 backref->found_dir_item &&
2117 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2118 !rec->found_inode_item)) {
2119 ret = create_inode_item(root, rec, 0);
2120 if (ret)
2121 break;
2122 repaired++;
2126 return ret ? ret : repaired;
2130 * To determine the file type for nlink/inode_item repair
2132 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2133 * Return -ENOENT if file type is not found.
2135 static int find_file_type(struct inode_record *rec, u8 *type)
2137 struct inode_backref *backref;
2139 /* For inode item recovered case */
2140 if (rec->found_inode_item) {
2141 *type = imode_to_type(rec->imode);
2142 return 0;
2145 list_for_each_entry(backref, &rec->backrefs, list) {
2146 if (backref->found_dir_index || backref->found_dir_item) {
2147 *type = backref->filetype;
2148 return 0;
2151 return -ENOENT;
2155 * To determine the file name for nlink repair
2157 * Return 0 if file name is found, set name and namelen.
2158 * Return -ENOENT if file name is not found.
2160 static int find_file_name(struct inode_record *rec,
2161 char *name, int *namelen)
2163 struct inode_backref *backref;
2165 list_for_each_entry(backref, &rec->backrefs, list) {
2166 if (backref->found_dir_index || backref->found_dir_item ||
2167 backref->found_inode_ref) {
2168 memcpy(name, backref->name, backref->namelen);
2169 *namelen = backref->namelen;
2170 return 0;
2173 return -ENOENT;
2176 /* Reset the nlink of the inode to the correct one */
2177 static int reset_nlink(struct btrfs_trans_handle *trans,
2178 struct btrfs_root *root,
2179 struct btrfs_path *path,
2180 struct inode_record *rec)
2182 struct inode_backref *backref;
2183 struct inode_backref *tmp;
2184 struct btrfs_key key;
2185 struct btrfs_inode_item *inode_item;
2186 int ret = 0;
2188 /* We don't believe this either, reset it and iterate backref */
2189 rec->found_link = 0;
2191 /* Remove all backref including the valid ones */
2192 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2193 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2194 backref->index, backref->name,
2195 backref->namelen, 0);
2196 if (ret < 0)
2197 goto out;
2199 /* remove invalid backref, so it won't be added back */
2200 if (!(backref->found_dir_index &&
2201 backref->found_dir_item &&
2202 backref->found_inode_ref)) {
2203 list_del(&backref->list);
2204 free(backref);
2205 } else {
2206 rec->found_link++;
2210 /* Set nlink to 0 */
2211 key.objectid = rec->ino;
2212 key.type = BTRFS_INODE_ITEM_KEY;
2213 key.offset = 0;
2214 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2215 if (ret < 0)
2216 goto out;
2217 if (ret > 0) {
2218 ret = -ENOENT;
2219 goto out;
2221 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2222 struct btrfs_inode_item);
2223 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2224 btrfs_mark_buffer_dirty(path->nodes[0]);
2225 btrfs_release_path(path);
2228 * Add back valid inode_ref/dir_item/dir_index,
2229 * add_link() will handle the nlink inc, so new nlink must be correct
2231 list_for_each_entry(backref, &rec->backrefs, list) {
2232 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2233 backref->name, backref->namelen,
2234 backref->filetype, &backref->index, 1, 0);
2235 if (ret < 0)
2236 goto out;
2238 out:
2239 btrfs_release_path(path);
2240 return ret;
2243 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2244 struct btrfs_root *root,
2245 struct btrfs_path *path,
2246 struct inode_record *rec)
2248 char namebuf[BTRFS_NAME_LEN] = {0};
2249 u8 type = 0;
2250 int namelen = 0;
2251 int name_recovered = 0;
2252 int type_recovered = 0;
2253 int ret = 0;
2256 * Get file name and type first before these invalid inode ref
2257 * are deleted by remove_all_invalid_backref()
2259 name_recovered = !find_file_name(rec, namebuf, &namelen);
2260 type_recovered = !find_file_type(rec, &type);
2262 if (!name_recovered) {
2263 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2264 rec->ino, rec->ino);
2265 namelen = count_digits(rec->ino);
2266 sprintf(namebuf, "%llu", rec->ino);
2267 name_recovered = 1;
2269 if (!type_recovered) {
2270 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2271 rec->ino);
2272 type = BTRFS_FT_REG_FILE;
2273 type_recovered = 1;
2276 ret = reset_nlink(trans, root, path, rec);
2277 if (ret < 0) {
2278 fprintf(stderr,
2279 "Failed to reset nlink for inode %llu: %s\n",
2280 rec->ino, strerror(-ret));
2281 goto out;
2284 if (rec->found_link == 0) {
2285 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
2286 namebuf, namelen, type,
2287 (u64 *)&rec->found_link);
2288 if (ret)
2289 goto out;
2291 printf("Fixed the nlink of inode %llu\n", rec->ino);
2292 out:
2294 * Clear the flag anyway, or we will loop forever for the same inode
2295 * as it will not be removed from the bad inode list and the dead loop
2296 * happens.
2298 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2299 btrfs_release_path(path);
2300 return ret;
2304 * Check if there is any normal(reg or prealloc) file extent for given
2305 * ino.
2306 * This is used to determine the file type when neither its dir_index/item or
2307 * inode_item exists.
2309 * This will *NOT* report error, if any error happens, just consider it does
2310 * not have any normal file extent.
2312 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2314 struct btrfs_path path;
2315 struct btrfs_key key;
2316 struct btrfs_key found_key;
2317 struct btrfs_file_extent_item *fi;
2318 u8 type;
2319 int ret = 0;
2321 btrfs_init_path(&path);
2322 key.objectid = ino;
2323 key.type = BTRFS_EXTENT_DATA_KEY;
2324 key.offset = 0;
2326 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2327 if (ret < 0) {
2328 ret = 0;
2329 goto out;
2331 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2332 ret = btrfs_next_leaf(root, &path);
2333 if (ret) {
2334 ret = 0;
2335 goto out;
2338 while (1) {
2339 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2340 path.slots[0]);
2341 if (found_key.objectid != ino ||
2342 found_key.type != BTRFS_EXTENT_DATA_KEY)
2343 break;
2344 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2345 struct btrfs_file_extent_item);
2346 type = btrfs_file_extent_type(path.nodes[0], fi);
2347 if (type != BTRFS_FILE_EXTENT_INLINE) {
2348 ret = 1;
2349 goto out;
2352 out:
2353 btrfs_release_path(&path);
2354 return ret;
2357 static u32 btrfs_type_to_imode(u8 type)
2359 static u32 imode_by_btrfs_type[] = {
2360 [BTRFS_FT_REG_FILE] = S_IFREG,
2361 [BTRFS_FT_DIR] = S_IFDIR,
2362 [BTRFS_FT_CHRDEV] = S_IFCHR,
2363 [BTRFS_FT_BLKDEV] = S_IFBLK,
2364 [BTRFS_FT_FIFO] = S_IFIFO,
2365 [BTRFS_FT_SOCK] = S_IFSOCK,
2366 [BTRFS_FT_SYMLINK] = S_IFLNK,
2369 return imode_by_btrfs_type[(type)];
2372 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2373 struct btrfs_root *root,
2374 struct btrfs_path *path,
2375 struct inode_record *rec)
2377 u8 filetype;
2378 u32 mode = 0700;
2379 int type_recovered = 0;
2380 int ret = 0;
2382 printf("Trying to rebuild inode:%llu\n", rec->ino);
2384 type_recovered = !find_file_type(rec, &filetype);
2387 * Try to determine inode type if type not found.
2389 * For found regular file extent, it must be FILE.
2390 * For found dir_item/index, it must be DIR.
2392 * For undetermined one, use FILE as fallback.
2394 * TODO:
2395 * 1. If found backref(inode_index/item is already handled) to it,
2396 * it must be DIR.
2397 * Need new inode-inode ref structure to allow search for that.
2399 if (!type_recovered) {
2400 if (rec->found_file_extent &&
2401 find_normal_file_extent(root, rec->ino)) {
2402 type_recovered = 1;
2403 filetype = BTRFS_FT_REG_FILE;
2404 } else if (rec->found_dir_item) {
2405 type_recovered = 1;
2406 filetype = BTRFS_FT_DIR;
2407 } else if (!list_empty(&rec->orphan_extents)) {
2408 type_recovered = 1;
2409 filetype = BTRFS_FT_REG_FILE;
2410 } else{
2411 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2412 rec->ino);
2413 type_recovered = 1;
2414 filetype = BTRFS_FT_REG_FILE;
2418 ret = btrfs_new_inode(trans, root, rec->ino,
2419 mode | btrfs_type_to_imode(filetype));
2420 if (ret < 0)
2421 goto out;
2424 * Here inode rebuild is done, we only rebuild the inode item,
2425 * don't repair the nlink(like move to lost+found).
2426 * That is the job of nlink repair.
2428 * We just fill the record and return
2430 rec->found_dir_item = 1;
2431 rec->imode = mode | btrfs_type_to_imode(filetype);
2432 rec->nlink = 0;
2433 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2434 /* Ensure the inode_nlinks repair function will be called */
2435 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2436 out:
2437 return ret;
2440 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2441 struct btrfs_root *root,
2442 struct btrfs_path *path,
2443 struct inode_record *rec)
2445 struct orphan_data_extent *orphan;
2446 struct orphan_data_extent *tmp;
2447 int ret = 0;
2449 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2451 * Check for conflicting file extents
2453 * Here we don't know whether the extents is compressed or not,
2454 * so we can only assume it not compressed nor data offset,
2455 * and use its disk_len as extent length.
2457 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2458 orphan->offset, orphan->disk_len, 0);
2459 btrfs_release_path(path);
2460 if (ret < 0)
2461 goto out;
2462 if (!ret) {
2463 fprintf(stderr,
2464 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2465 orphan->disk_bytenr, orphan->disk_len);
2466 ret = btrfs_free_extent(trans,
2467 root->fs_info->extent_root,
2468 orphan->disk_bytenr, orphan->disk_len,
2469 0, root->objectid, orphan->objectid,
2470 orphan->offset);
2471 if (ret < 0)
2472 goto out;
2474 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2475 orphan->offset, orphan->disk_bytenr,
2476 orphan->disk_len, orphan->disk_len);
2477 if (ret < 0)
2478 goto out;
2480 /* Update file size info */
2481 rec->found_size += orphan->disk_len;
2482 if (rec->found_size == rec->nbytes)
2483 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2485 /* Update the file extent hole info too */
2486 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2487 orphan->disk_len);
2488 if (ret < 0)
2489 goto out;
2490 if (RB_EMPTY_ROOT(&rec->holes))
2491 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2493 list_del(&orphan->list);
2494 free(orphan);
2496 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2497 out:
2498 return ret;
2501 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2502 struct btrfs_root *root,
2503 struct btrfs_path *path,
2504 struct inode_record *rec)
2506 struct rb_node *node;
2507 struct file_extent_hole *hole;
2508 int found = 0;
2509 int ret = 0;
2511 node = rb_first(&rec->holes);
2513 while (node) {
2514 found = 1;
2515 hole = rb_entry(node, struct file_extent_hole, node);
2516 ret = btrfs_punch_hole(trans, root, rec->ino,
2517 hole->start, hole->len);
2518 if (ret < 0)
2519 goto out;
2520 ret = del_file_extent_hole(&rec->holes, hole->start,
2521 hole->len);
2522 if (ret < 0)
2523 goto out;
2524 if (RB_EMPTY_ROOT(&rec->holes))
2525 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2526 node = rb_first(&rec->holes);
2528 /* special case for a file losing all its file extent */
2529 if (!found) {
2530 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2531 round_up(rec->isize,
2532 root->fs_info->sectorsize));
2533 if (ret < 0)
2534 goto out;
2536 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2537 rec->ino, root->objectid);
2538 out:
2539 return ret;
2542 static int repair_inline_ram_bytes(struct btrfs_trans_handle *trans,
2543 struct btrfs_root *root,
2544 struct btrfs_path *path,
2545 struct inode_record *rec)
2547 struct btrfs_key key;
2548 struct btrfs_file_extent_item *fi;
2549 struct btrfs_item *i;
2550 u64 on_disk_item_len;
2551 int ret;
2553 key.objectid = rec->ino;
2554 key.offset = 0;
2555 key.type = BTRFS_EXTENT_DATA_KEY;
2557 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2558 if (ret > 0)
2559 ret = -ENOENT;
2560 if (ret < 0)
2561 goto out;
2563 i = btrfs_item_nr(path->slots[0]);
2564 on_disk_item_len = btrfs_file_extent_inline_item_len(path->nodes[0], i);
2565 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2566 struct btrfs_file_extent_item);
2567 btrfs_set_file_extent_ram_bytes(path->nodes[0], fi, on_disk_item_len);
2568 btrfs_mark_buffer_dirty(path->nodes[0]);
2569 printf("Repaired inline ram_bytes for root %llu ino %llu\n",
2570 root->objectid, rec->ino);
2571 rec->errors &= ~I_ERR_INLINE_RAM_BYTES_WRONG;
2572 out:
2573 btrfs_release_path(path);
2574 return ret;
2577 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2579 struct btrfs_trans_handle *trans;
2580 struct btrfs_path path;
2581 int ret = 0;
2583 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2584 I_ERR_NO_ORPHAN_ITEM |
2585 I_ERR_LINK_COUNT_WRONG |
2586 I_ERR_NO_INODE_ITEM |
2587 I_ERR_FILE_EXTENT_ORPHAN |
2588 I_ERR_FILE_EXTENT_DISCOUNT |
2589 I_ERR_FILE_NBYTES_WRONG |
2590 I_ERR_INLINE_RAM_BYTES_WRONG)))
2591 return rec->errors;
2594 * For nlink repair, it may create a dir and add link, so
2595 * 2 for parent(256)'s dir_index and dir_item
2596 * 2 for lost+found dir's inode_item and inode_ref
2597 * 1 for the new inode_ref of the file
2598 * 2 for lost+found dir's dir_index and dir_item for the file
2600 trans = btrfs_start_transaction(root, 7);
2601 if (IS_ERR(trans))
2602 return PTR_ERR(trans);
2604 btrfs_init_path(&path);
2605 if (rec->errors & I_ERR_NO_INODE_ITEM)
2606 ret = repair_inode_no_item(trans, root, &path, rec);
2607 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2608 ret = repair_inode_orphan_extent(trans, root, &path, rec);
2609 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2610 ret = repair_inode_discount_extent(trans, root, &path, rec);
2611 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2612 ret = repair_inode_isize(trans, root, &path, rec);
2613 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2614 ret = repair_inode_orphan_item(trans, root, &path, rec);
2615 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2616 ret = repair_inode_nlinks(trans, root, &path, rec);
2617 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2618 ret = repair_inode_nbytes(trans, root, &path, rec);
2619 if (!ret && rec->errors & I_ERR_INLINE_RAM_BYTES_WRONG)
2620 ret = repair_inline_ram_bytes(trans, root, &path, rec);
2621 btrfs_commit_transaction(trans, root);
2622 btrfs_release_path(&path);
2623 return ret;
2626 static int check_inode_recs(struct btrfs_root *root,
2627 struct cache_tree *inode_cache)
2629 struct cache_extent *cache;
2630 struct ptr_node *node;
2631 struct inode_record *rec;
2632 struct inode_backref *backref;
2633 int stage = 0;
2634 int ret = 0;
2635 int err = 0;
2636 u64 error = 0;
2637 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2639 if (btrfs_root_refs(&root->root_item) == 0) {
2640 if (!cache_tree_empty(inode_cache))
2641 fprintf(stderr, "warning line %d\n", __LINE__);
2642 return 0;
2646 * We need to repair backrefs first because we could change some of the
2647 * errors in the inode recs.
2649 * We also need to go through and delete invalid backrefs first and then
2650 * add the correct ones second. We do this because we may get EEXIST
2651 * when adding back the correct index because we hadn't yet deleted the
2652 * invalid index.
2654 * For example, if we were missing a dir index then the directories
2655 * isize would be wrong, so if we fixed the isize to what we thought it
2656 * would be and then fixed the backref we'd still have a invalid fs, so
2657 * we need to add back the dir index and then check to see if the isize
2658 * is still wrong.
2660 while (stage < 3) {
2661 stage++;
2662 if (stage == 3 && !err)
2663 break;
2665 cache = search_cache_extent(inode_cache, 0);
2666 while (repair && cache) {
2667 node = container_of(cache, struct ptr_node, cache);
2668 rec = node->data;
2669 cache = next_cache_extent(cache);
2671 /* Need to free everything up and rescan */
2672 if (stage == 3) {
2673 remove_cache_extent(inode_cache, &node->cache);
2674 free(node);
2675 free_inode_rec(rec);
2676 continue;
2679 if (list_empty(&rec->backrefs))
2680 continue;
2682 ret = repair_inode_backrefs(root, rec, inode_cache,
2683 stage == 1);
2684 if (ret < 0) {
2685 err = ret;
2686 stage = 2;
2687 break;
2688 } if (ret > 0) {
2689 err = -EAGAIN;
2693 if (err)
2694 return err;
2696 rec = get_inode_rec(inode_cache, root_dirid, 0);
2697 BUG_ON(IS_ERR(rec));
2698 if (rec) {
2699 ret = check_root_dir(rec);
2700 if (ret) {
2701 fprintf(stderr, "root %llu root dir %llu error\n",
2702 (unsigned long long)root->root_key.objectid,
2703 (unsigned long long)root_dirid);
2704 print_inode_error(root, rec);
2705 error++;
2707 } else {
2708 if (repair) {
2709 struct btrfs_trans_handle *trans;
2711 trans = btrfs_start_transaction(root, 1);
2712 if (IS_ERR(trans)) {
2713 err = PTR_ERR(trans);
2714 return err;
2717 fprintf(stderr,
2718 "root %llu missing its root dir, recreating\n",
2719 (unsigned long long)root->objectid);
2721 ret = btrfs_make_root_dir(trans, root, root_dirid);
2722 BUG_ON(ret);
2724 btrfs_commit_transaction(trans, root);
2725 return -EAGAIN;
2728 fprintf(stderr, "root %llu root dir %llu not found\n",
2729 (unsigned long long)root->root_key.objectid,
2730 (unsigned long long)root_dirid);
2733 while (1) {
2734 cache = search_cache_extent(inode_cache, 0);
2735 if (!cache)
2736 break;
2737 node = container_of(cache, struct ptr_node, cache);
2738 rec = node->data;
2739 remove_cache_extent(inode_cache, &node->cache);
2740 free(node);
2741 if (rec->ino == root_dirid ||
2742 rec->ino == BTRFS_ORPHAN_OBJECTID) {
2743 free_inode_rec(rec);
2744 continue;
2747 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2748 ret = check_orphan_item(root, rec->ino);
2749 if (ret == 0)
2750 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2751 if (can_free_inode_rec(rec)) {
2752 free_inode_rec(rec);
2753 continue;
2757 if (!rec->found_inode_item)
2758 rec->errors |= I_ERR_NO_INODE_ITEM;
2759 if (rec->found_link != rec->nlink)
2760 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2761 if (repair) {
2762 ret = try_repair_inode(root, rec);
2763 if (ret == 0 && can_free_inode_rec(rec)) {
2764 free_inode_rec(rec);
2765 continue;
2767 ret = 0;
2770 if (!(repair && ret == 0))
2771 error++;
2772 print_inode_error(root, rec);
2773 list_for_each_entry(backref, &rec->backrefs, list) {
2774 if (!backref->found_dir_item)
2775 backref->errors |= REF_ERR_NO_DIR_ITEM;
2776 if (!backref->found_dir_index)
2777 backref->errors |= REF_ERR_NO_DIR_INDEX;
2778 if (!backref->found_inode_ref)
2779 backref->errors |= REF_ERR_NO_INODE_REF;
2780 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
2781 " namelen %u name %s filetype %d errors %x",
2782 (unsigned long long)backref->dir,
2783 (unsigned long long)backref->index,
2784 backref->namelen, backref->name,
2785 backref->filetype, backref->errors);
2786 print_ref_error(backref->errors);
2788 free_inode_rec(rec);
2790 return (error > 0) ? -1 : 0;
2793 static struct root_record *get_root_rec(struct cache_tree *root_cache,
2794 u64 objectid)
2796 struct cache_extent *cache;
2797 struct root_record *rec = NULL;
2798 int ret;
2800 cache = lookup_cache_extent(root_cache, objectid, 1);
2801 if (cache) {
2802 rec = container_of(cache, struct root_record, cache);
2803 } else {
2804 rec = calloc(1, sizeof(*rec));
2805 if (!rec)
2806 return ERR_PTR(-ENOMEM);
2807 rec->objectid = objectid;
2808 INIT_LIST_HEAD(&rec->backrefs);
2809 rec->cache.start = objectid;
2810 rec->cache.size = 1;
2812 ret = insert_cache_extent(root_cache, &rec->cache);
2813 if (ret)
2814 return ERR_PTR(-EEXIST);
2816 return rec;
2819 static struct root_backref *get_root_backref(struct root_record *rec,
2820 u64 ref_root, u64 dir, u64 index,
2821 const char *name, int namelen)
2823 struct root_backref *backref;
2825 list_for_each_entry(backref, &rec->backrefs, list) {
2826 if (backref->ref_root != ref_root || backref->dir != dir ||
2827 backref->namelen != namelen)
2828 continue;
2829 if (memcmp(name, backref->name, namelen))
2830 continue;
2831 return backref;
2834 backref = calloc(1, sizeof(*backref) + namelen + 1);
2835 if (!backref)
2836 return NULL;
2837 backref->ref_root = ref_root;
2838 backref->dir = dir;
2839 backref->index = index;
2840 backref->namelen = namelen;
2841 memcpy(backref->name, name, namelen);
2842 backref->name[namelen] = '\0';
2843 list_add_tail(&backref->list, &rec->backrefs);
2844 return backref;
2847 static void free_root_record(struct cache_extent *cache)
2849 struct root_record *rec;
2850 struct root_backref *backref;
2852 rec = container_of(cache, struct root_record, cache);
2853 while (!list_empty(&rec->backrefs)) {
2854 backref = to_root_backref(rec->backrefs.next);
2855 list_del(&backref->list);
2856 free(backref);
2859 free(rec);
2862 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
2864 static int add_root_backref(struct cache_tree *root_cache,
2865 u64 root_id, u64 ref_root, u64 dir, u64 index,
2866 const char *name, int namelen,
2867 int item_type, int errors)
2869 struct root_record *rec;
2870 struct root_backref *backref;
2872 rec = get_root_rec(root_cache, root_id);
2873 BUG_ON(IS_ERR(rec));
2874 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
2875 BUG_ON(!backref);
2877 backref->errors |= errors;
2879 if (item_type != BTRFS_DIR_ITEM_KEY) {
2880 if (backref->found_dir_index || backref->found_back_ref ||
2881 backref->found_forward_ref) {
2882 if (backref->index != index)
2883 backref->errors |= REF_ERR_INDEX_UNMATCH;
2884 } else {
2885 backref->index = index;
2889 if (item_type == BTRFS_DIR_ITEM_KEY) {
2890 if (backref->found_forward_ref)
2891 rec->found_ref++;
2892 backref->found_dir_item = 1;
2893 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
2894 backref->found_dir_index = 1;
2895 } else if (item_type == BTRFS_ROOT_REF_KEY) {
2896 if (backref->found_forward_ref)
2897 backref->errors |= REF_ERR_DUP_ROOT_REF;
2898 else if (backref->found_dir_item)
2899 rec->found_ref++;
2900 backref->found_forward_ref = 1;
2901 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
2902 if (backref->found_back_ref)
2903 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
2904 backref->found_back_ref = 1;
2905 } else {
2906 BUG_ON(1);
2909 if (backref->found_forward_ref && backref->found_dir_item)
2910 backref->reachable = 1;
2911 return 0;
2914 static int merge_root_recs(struct btrfs_root *root,
2915 struct cache_tree *src_cache,
2916 struct cache_tree *dst_cache)
2918 struct cache_extent *cache;
2919 struct ptr_node *node;
2920 struct inode_record *rec;
2921 struct inode_backref *backref;
2922 int ret = 0;
2924 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
2925 free_inode_recs_tree(src_cache);
2926 return 0;
2929 while (1) {
2930 cache = search_cache_extent(src_cache, 0);
2931 if (!cache)
2932 break;
2933 node = container_of(cache, struct ptr_node, cache);
2934 rec = node->data;
2935 remove_cache_extent(src_cache, &node->cache);
2936 free(node);
2938 ret = is_child_root(root, root->objectid, rec->ino);
2939 if (ret < 0)
2940 break;
2941 else if (ret == 0)
2942 goto skip;
2944 list_for_each_entry(backref, &rec->backrefs, list) {
2945 BUG_ON(backref->found_inode_ref);
2946 if (backref->found_dir_item)
2947 add_root_backref(dst_cache, rec->ino,
2948 root->root_key.objectid, backref->dir,
2949 backref->index, backref->name,
2950 backref->namelen, BTRFS_DIR_ITEM_KEY,
2951 backref->errors);
2952 if (backref->found_dir_index)
2953 add_root_backref(dst_cache, rec->ino,
2954 root->root_key.objectid, backref->dir,
2955 backref->index, backref->name,
2956 backref->namelen, BTRFS_DIR_INDEX_KEY,
2957 backref->errors);
2959 skip:
2960 free_inode_rec(rec);
2962 if (ret < 0)
2963 return ret;
2964 return 0;
2967 static int check_root_refs(struct btrfs_root *root,
2968 struct cache_tree *root_cache)
2970 struct root_record *rec;
2971 struct root_record *ref_root;
2972 struct root_backref *backref;
2973 struct cache_extent *cache;
2974 int loop = 1;
2975 int ret;
2976 int error;
2977 int errors = 0;
2979 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
2980 BUG_ON(IS_ERR(rec));
2981 rec->found_ref = 1;
2983 /* fixme: this can not detect circular references */
2984 while (loop) {
2985 loop = 0;
2986 cache = search_cache_extent(root_cache, 0);
2987 while (1) {
2988 if (!cache)
2989 break;
2990 rec = container_of(cache, struct root_record, cache);
2991 cache = next_cache_extent(cache);
2993 if (rec->found_ref == 0)
2994 continue;
2996 list_for_each_entry(backref, &rec->backrefs, list) {
2997 if (!backref->reachable)
2998 continue;
3000 ref_root = get_root_rec(root_cache,
3001 backref->ref_root);
3002 BUG_ON(IS_ERR(ref_root));
3003 if (ref_root->found_ref > 0)
3004 continue;
3006 backref->reachable = 0;
3007 rec->found_ref--;
3008 if (rec->found_ref == 0)
3009 loop = 1;
3014 cache = search_cache_extent(root_cache, 0);
3015 while (1) {
3016 if (!cache)
3017 break;
3018 rec = container_of(cache, struct root_record, cache);
3019 cache = next_cache_extent(cache);
3021 if (rec->found_ref == 0 &&
3022 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3023 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3024 ret = check_orphan_item(root->fs_info->tree_root,
3025 rec->objectid);
3026 if (ret == 0)
3027 continue;
3030 * If we don't have a root item then we likely just have
3031 * a dir item in a snapshot for this root but no actual
3032 * ref key or anything so it's meaningless.
3034 if (!rec->found_root_item)
3035 continue;
3036 errors++;
3037 fprintf(stderr, "fs tree %llu not referenced\n",
3038 (unsigned long long)rec->objectid);
3041 error = 0;
3042 if (rec->found_ref > 0 && !rec->found_root_item)
3043 error = 1;
3044 list_for_each_entry(backref, &rec->backrefs, list) {
3045 if (!backref->found_dir_item)
3046 backref->errors |= REF_ERR_NO_DIR_ITEM;
3047 if (!backref->found_dir_index)
3048 backref->errors |= REF_ERR_NO_DIR_INDEX;
3049 if (!backref->found_back_ref)
3050 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3051 if (!backref->found_forward_ref)
3052 backref->errors |= REF_ERR_NO_ROOT_REF;
3053 if (backref->reachable && backref->errors)
3054 error = 1;
3056 if (!error)
3057 continue;
3059 errors++;
3060 fprintf(stderr, "fs tree %llu refs %u %s\n",
3061 (unsigned long long)rec->objectid, rec->found_ref,
3062 rec->found_root_item ? "" : "not found");
3064 list_for_each_entry(backref, &rec->backrefs, list) {
3065 if (!backref->reachable)
3066 continue;
3067 if (!backref->errors && rec->found_root_item)
3068 continue;
3069 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3070 " index %llu namelen %u name %s errors %x\n",
3071 (unsigned long long)backref->ref_root,
3072 (unsigned long long)backref->dir,
3073 (unsigned long long)backref->index,
3074 backref->namelen, backref->name,
3075 backref->errors);
3076 print_ref_error(backref->errors);
3079 return errors > 0 ? 1 : 0;
3082 static int process_root_ref(struct extent_buffer *eb, int slot,
3083 struct btrfs_key *key,
3084 struct cache_tree *root_cache)
3086 u64 dirid;
3087 u64 index;
3088 u32 len;
3089 u32 name_len;
3090 struct btrfs_root_ref *ref;
3091 char namebuf[BTRFS_NAME_LEN];
3092 int error;
3094 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3096 dirid = btrfs_root_ref_dirid(eb, ref);
3097 index = btrfs_root_ref_sequence(eb, ref);
3098 name_len = btrfs_root_ref_name_len(eb, ref);
3100 if (name_len <= BTRFS_NAME_LEN) {
3101 len = name_len;
3102 error = 0;
3103 } else {
3104 len = BTRFS_NAME_LEN;
3105 error = REF_ERR_NAME_TOO_LONG;
3107 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3109 if (key->type == BTRFS_ROOT_REF_KEY) {
3110 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3111 index, namebuf, len, key->type, error);
3112 } else {
3113 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3114 index, namebuf, len, key->type, error);
3116 return 0;
3119 static void free_corrupt_block(struct cache_extent *cache)
3121 struct btrfs_corrupt_block *corrupt;
3123 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3124 free(corrupt);
3127 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3130 * Repair the btree of the given root.
3132 * The fix is to remove the node key in corrupt_blocks cache_tree.
3133 * and rebalance the tree.
3134 * After the fix, the btree should be writeable.
3136 static int repair_btree(struct btrfs_root *root,
3137 struct cache_tree *corrupt_blocks)
3139 struct btrfs_trans_handle *trans;
3140 struct btrfs_path path;
3141 struct btrfs_corrupt_block *corrupt;
3142 struct cache_extent *cache;
3143 struct btrfs_key key;
3144 u64 offset;
3145 int level;
3146 int ret = 0;
3148 if (cache_tree_empty(corrupt_blocks))
3149 return 0;
3151 trans = btrfs_start_transaction(root, 1);
3152 if (IS_ERR(trans)) {
3153 ret = PTR_ERR(trans);
3154 fprintf(stderr, "Error starting transaction: %s\n",
3155 strerror(-ret));
3156 return ret;
3158 btrfs_init_path(&path);
3159 cache = first_cache_extent(corrupt_blocks);
3160 while (cache) {
3161 corrupt = container_of(cache, struct btrfs_corrupt_block,
3162 cache);
3163 level = corrupt->level;
3164 path.lowest_level = level;
3165 key.objectid = corrupt->key.objectid;
3166 key.type = corrupt->key.type;
3167 key.offset = corrupt->key.offset;
3170 * Here we don't want to do any tree balance, since it may
3171 * cause a balance with corrupted brother leaf/node,
3172 * so ins_len set to 0 here.
3173 * Balance will be done after all corrupt node/leaf is deleted.
3175 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3176 if (ret < 0)
3177 goto out;
3178 offset = btrfs_node_blockptr(path.nodes[level],
3179 path.slots[level]);
3181 /* Remove the ptr */
3182 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3183 if (ret < 0)
3184 goto out;
3186 * Remove the corresponding extent
3187 * return value is not concerned.
3189 btrfs_release_path(&path);
3190 ret = btrfs_free_extent(trans, root, offset,
3191 root->fs_info->nodesize, 0,
3192 root->root_key.objectid, level - 1, 0);
3193 cache = next_cache_extent(cache);
3196 /* Balance the btree using btrfs_search_slot() */
3197 cache = first_cache_extent(corrupt_blocks);
3198 while (cache) {
3199 corrupt = container_of(cache, struct btrfs_corrupt_block,
3200 cache);
3201 memcpy(&key, &corrupt->key, sizeof(key));
3202 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3203 if (ret < 0)
3204 goto out;
3205 /* return will always >0 since it won't find the item */
3206 ret = 0;
3207 btrfs_release_path(&path);
3208 cache = next_cache_extent(cache);
3210 out:
3211 btrfs_commit_transaction(trans, root);
3212 btrfs_release_path(&path);
3213 return ret;
3216 static int check_fs_root(struct btrfs_root *root,
3217 struct cache_tree *root_cache,
3218 struct walk_control *wc)
3220 int ret = 0;
3221 int err = 0;
3222 int wret;
3223 int level;
3224 struct btrfs_path path;
3225 struct shared_node root_node;
3226 struct root_record *rec;
3227 struct btrfs_root_item *root_item = &root->root_item;
3228 struct cache_tree corrupt_blocks;
3229 struct orphan_data_extent *orphan;
3230 struct orphan_data_extent *tmp;
3231 enum btrfs_tree_block_status status;
3232 struct node_refs nrefs;
3235 * Reuse the corrupt_block cache tree to record corrupted tree block
3237 * Unlike the usage in extent tree check, here we do it in a per
3238 * fs/subvol tree base.
3240 cache_tree_init(&corrupt_blocks);
3241 root->fs_info->corrupt_blocks = &corrupt_blocks;
3243 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3244 rec = get_root_rec(root_cache, root->root_key.objectid);
3245 BUG_ON(IS_ERR(rec));
3246 if (btrfs_root_refs(root_item) > 0)
3247 rec->found_root_item = 1;
3250 btrfs_init_path(&path);
3251 memset(&root_node, 0, sizeof(root_node));
3252 cache_tree_init(&root_node.root_cache);
3253 cache_tree_init(&root_node.inode_cache);
3254 memset(&nrefs, 0, sizeof(nrefs));
3256 /* Move the orphan extent record to corresponding inode_record */
3257 list_for_each_entry_safe(orphan, tmp,
3258 &root->orphan_data_extents, list) {
3259 struct inode_record *inode;
3261 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3263 BUG_ON(IS_ERR(inode));
3264 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3265 list_move(&orphan->list, &inode->orphan_extents);
3268 level = btrfs_header_level(root->node);
3269 memset(wc->nodes, 0, sizeof(wc->nodes));
3270 wc->nodes[level] = &root_node;
3271 wc->active_node = level;
3272 wc->root_level = level;
3274 /* We may not have checked the root block, lets do that now */
3275 if (btrfs_is_leaf(root->node))
3276 status = btrfs_check_leaf(root, NULL, root->node);
3277 else
3278 status = btrfs_check_node(root, NULL, root->node);
3279 if (status != BTRFS_TREE_BLOCK_CLEAN)
3280 return -EIO;
3282 if (btrfs_root_refs(root_item) > 0 ||
3283 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3284 path.nodes[level] = root->node;
3285 extent_buffer_get(root->node);
3286 path.slots[level] = 0;
3287 } else {
3288 struct btrfs_key key;
3289 struct btrfs_disk_key found_key;
3291 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3292 level = root_item->drop_level;
3293 path.lowest_level = level;
3294 if (level > btrfs_header_level(root->node) ||
3295 level >= BTRFS_MAX_LEVEL) {
3296 error("ignoring invalid drop level: %u", level);
3297 goto skip_walking;
3299 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3300 if (wret < 0)
3301 goto skip_walking;
3302 btrfs_node_key(path.nodes[level], &found_key,
3303 path.slots[level]);
3304 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3305 sizeof(found_key)));
3308 while (1) {
3309 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3310 if (wret < 0)
3311 ret = wret;
3312 if (wret != 0)
3313 break;
3315 wret = walk_up_tree(root, &path, wc, &level);
3316 if (wret < 0)
3317 ret = wret;
3318 if (wret != 0)
3319 break;
3321 skip_walking:
3322 btrfs_release_path(&path);
3324 if (!cache_tree_empty(&corrupt_blocks)) {
3325 struct cache_extent *cache;
3326 struct btrfs_corrupt_block *corrupt;
3328 printf("The following tree block(s) is corrupted in tree %llu:\n",
3329 root->root_key.objectid);
3330 cache = first_cache_extent(&corrupt_blocks);
3331 while (cache) {
3332 corrupt = container_of(cache,
3333 struct btrfs_corrupt_block,
3334 cache);
3335 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3336 cache->start, corrupt->level,
3337 corrupt->key.objectid, corrupt->key.type,
3338 corrupt->key.offset);
3339 cache = next_cache_extent(cache);
3341 if (repair) {
3342 printf("Try to repair the btree for root %llu\n",
3343 root->root_key.objectid);
3344 ret = repair_btree(root, &corrupt_blocks);
3345 if (ret < 0)
3346 fprintf(stderr, "Failed to repair btree: %s\n",
3347 strerror(-ret));
3348 if (!ret)
3349 printf("Btree for root %llu is fixed\n",
3350 root->root_key.objectid);
3354 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3355 if (err < 0)
3356 ret = err;
3358 if (root_node.current) {
3359 root_node.current->checked = 1;
3360 maybe_free_inode_rec(&root_node.inode_cache,
3361 root_node.current);
3364 err = check_inode_recs(root, &root_node.inode_cache);
3365 if (!ret)
3366 ret = err;
3368 free_corrupt_blocks_tree(&corrupt_blocks);
3369 root->fs_info->corrupt_blocks = NULL;
3370 free_orphan_data_extents(&root->orphan_data_extents);
3371 return ret;
3374 static int check_fs_roots(struct btrfs_fs_info *fs_info,
3375 struct cache_tree *root_cache)
3377 struct btrfs_path path;
3378 struct btrfs_key key;
3379 struct walk_control wc;
3380 struct extent_buffer *leaf, *tree_node;
3381 struct btrfs_root *tmp_root;
3382 struct btrfs_root *tree_root = fs_info->tree_root;
3383 int ret;
3384 int err = 0;
3386 if (ctx.progress_enabled) {
3387 ctx.tp = TASK_FS_ROOTS;
3388 task_start(ctx.info);
3392 * Just in case we made any changes to the extent tree that weren't
3393 * reflected into the free space cache yet.
3395 if (repair)
3396 reset_cached_block_groups(fs_info);
3397 memset(&wc, 0, sizeof(wc));
3398 cache_tree_init(&wc.shared);
3399 btrfs_init_path(&path);
3401 again:
3402 key.offset = 0;
3403 key.objectid = 0;
3404 key.type = BTRFS_ROOT_ITEM_KEY;
3405 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3406 if (ret < 0) {
3407 err = 1;
3408 goto out;
3410 tree_node = tree_root->node;
3411 while (1) {
3412 if (tree_node != tree_root->node) {
3413 free_root_recs_tree(root_cache);
3414 btrfs_release_path(&path);
3415 goto again;
3417 leaf = path.nodes[0];
3418 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3419 ret = btrfs_next_leaf(tree_root, &path);
3420 if (ret) {
3421 if (ret < 0)
3422 err = 1;
3423 break;
3425 leaf = path.nodes[0];
3427 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3428 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3429 fs_root_objectid(key.objectid)) {
3430 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3431 tmp_root = btrfs_read_fs_root_no_cache(
3432 fs_info, &key);
3433 } else {
3434 key.offset = (u64)-1;
3435 tmp_root = btrfs_read_fs_root(
3436 fs_info, &key);
3438 if (IS_ERR(tmp_root)) {
3439 err = 1;
3440 goto next;
3442 ret = check_fs_root(tmp_root, root_cache, &wc);
3443 if (ret == -EAGAIN) {
3444 free_root_recs_tree(root_cache);
3445 btrfs_release_path(&path);
3446 goto again;
3448 if (ret)
3449 err = 1;
3450 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3451 btrfs_free_fs_root(tmp_root);
3452 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3453 key.type == BTRFS_ROOT_BACKREF_KEY) {
3454 process_root_ref(leaf, path.slots[0], &key,
3455 root_cache);
3457 next:
3458 path.slots[0]++;
3460 out:
3461 btrfs_release_path(&path);
3462 if (err)
3463 free_extent_cache_tree(&wc.shared);
3464 if (!cache_tree_empty(&wc.shared))
3465 fprintf(stderr, "warning line %d\n", __LINE__);
3467 task_stop(ctx.info);
3469 return err;
3472 static struct tree_backref *find_tree_backref(struct extent_record *rec,
3473 u64 parent, u64 root)
3475 struct rb_node *node;
3476 struct tree_backref *back = NULL;
3477 struct tree_backref match = {
3478 .node = {
3479 .is_data = 0,
3483 if (parent) {
3484 match.parent = parent;
3485 match.node.full_backref = 1;
3486 } else {
3487 match.root = root;
3490 node = rb_search(&rec->backref_tree, &match.node.node,
3491 (rb_compare_keys)compare_extent_backref, NULL);
3492 if (node)
3493 back = to_tree_backref(rb_node_to_extent_backref(node));
3495 return back;
3498 static struct data_backref *find_data_backref(struct extent_record *rec,
3499 u64 parent, u64 root,
3500 u64 owner, u64 offset,
3501 int found_ref,
3502 u64 disk_bytenr, u64 bytes)
3504 struct rb_node *node;
3505 struct data_backref *back = NULL;
3506 struct data_backref match = {
3507 .node = {
3508 .is_data = 1,
3510 .owner = owner,
3511 .offset = offset,
3512 .bytes = bytes,
3513 .found_ref = found_ref,
3514 .disk_bytenr = disk_bytenr,
3517 if (parent) {
3518 match.parent = parent;
3519 match.node.full_backref = 1;
3520 } else {
3521 match.root = root;
3524 node = rb_search(&rec->backref_tree, &match.node.node,
3525 (rb_compare_keys)compare_extent_backref, NULL);
3526 if (node)
3527 back = to_data_backref(rb_node_to_extent_backref(node));
3529 return back;
3532 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
3533 struct cache_tree *root_cache)
3535 int ret;
3537 if (!ctx.progress_enabled)
3538 fprintf(stderr, "checking fs roots\n");
3539 if (check_mode == CHECK_MODE_LOWMEM)
3540 ret = check_fs_roots_lowmem(fs_info);
3541 else
3542 ret = check_fs_roots(fs_info, root_cache);
3544 return ret;
3547 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3549 struct extent_backref *back, *tmp;
3550 struct tree_backref *tback;
3551 struct data_backref *dback;
3552 u64 found = 0;
3553 int err = 0;
3555 rbtree_postorder_for_each_entry_safe(back, tmp,
3556 &rec->backref_tree, node) {
3557 if (!back->found_extent_tree) {
3558 err = 1;
3559 if (!print_errs)
3560 goto out;
3561 if (back->is_data) {
3562 dback = to_data_backref(back);
3563 fprintf(stderr,
3564 "data backref %llu %s %llu owner %llu offset %llu num_refs %lu not found in extent tree\n",
3565 (unsigned long long)rec->start,
3566 back->full_backref ?
3567 "parent" : "root",
3568 back->full_backref ?
3569 (unsigned long long)dback->parent :
3570 (unsigned long long)dback->root,
3571 (unsigned long long)dback->owner,
3572 (unsigned long long)dback->offset,
3573 (unsigned long)dback->num_refs);
3574 } else {
3575 tback = to_tree_backref(back);
3576 fprintf(stderr,
3577 "tree backref %llu parent %llu root %llu not found in extent tree\n",
3578 (unsigned long long)rec->start,
3579 (unsigned long long)tback->parent,
3580 (unsigned long long)tback->root);
3583 if (!back->is_data && !back->found_ref) {
3584 err = 1;
3585 if (!print_errs)
3586 goto out;
3587 tback = to_tree_backref(back);
3588 fprintf(stderr,
3589 "backref %llu %s %llu not referenced back %p\n",
3590 (unsigned long long)rec->start,
3591 back->full_backref ? "parent" : "root",
3592 back->full_backref ?
3593 (unsigned long long)tback->parent :
3594 (unsigned long long)tback->root, back);
3596 if (back->is_data) {
3597 dback = to_data_backref(back);
3598 if (dback->found_ref != dback->num_refs) {
3599 err = 1;
3600 if (!print_errs)
3601 goto out;
3602 fprintf(stderr,
3603 "incorrect local backref count on %llu %s %llu owner %llu offset %llu found %u wanted %u back %p\n",
3604 (unsigned long long)rec->start,
3605 back->full_backref ?
3606 "parent" : "root",
3607 back->full_backref ?
3608 (unsigned long long)dback->parent :
3609 (unsigned long long)dback->root,
3610 (unsigned long long)dback->owner,
3611 (unsigned long long)dback->offset,
3612 dback->found_ref, dback->num_refs,
3613 back);
3615 if (dback->disk_bytenr != rec->start) {
3616 err = 1;
3617 if (!print_errs)
3618 goto out;
3619 fprintf(stderr,
3620 "backref disk bytenr does not match extent record, bytenr=%llu, ref bytenr=%llu\n",
3621 (unsigned long long)rec->start,
3622 (unsigned long long)dback->disk_bytenr);
3625 if (dback->bytes != rec->nr) {
3626 err = 1;
3627 if (!print_errs)
3628 goto out;
3629 fprintf(stderr,
3630 "backref bytes do not match extent backref, bytenr=%llu, ref bytes=%llu, backref bytes=%llu\n",
3631 (unsigned long long)rec->start,
3632 (unsigned long long)rec->nr,
3633 (unsigned long long)dback->bytes);
3636 if (!back->is_data) {
3637 found += 1;
3638 } else {
3639 dback = to_data_backref(back);
3640 found += dback->found_ref;
3643 if (found != rec->refs) {
3644 err = 1;
3645 if (!print_errs)
3646 goto out;
3647 fprintf(stderr,
3648 "incorrect global backref count on %llu found %llu wanted %llu\n",
3649 (unsigned long long)rec->start,
3650 (unsigned long long)found,
3651 (unsigned long long)rec->refs);
3653 out:
3654 return err;
3657 static void __free_one_backref(struct rb_node *node)
3659 struct extent_backref *back = rb_node_to_extent_backref(node);
3661 free(back);
3664 static void free_all_extent_backrefs(struct extent_record *rec)
3666 rb_free_nodes(&rec->backref_tree, __free_one_backref);
3669 static void free_extent_record_cache(struct cache_tree *extent_cache)
3671 struct cache_extent *cache;
3672 struct extent_record *rec;
3674 while (1) {
3675 cache = first_cache_extent(extent_cache);
3676 if (!cache)
3677 break;
3678 rec = container_of(cache, struct extent_record, cache);
3679 remove_cache_extent(extent_cache, cache);
3680 free_all_extent_backrefs(rec);
3681 free(rec);
3685 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3686 struct extent_record *rec)
3688 if (rec->content_checked && rec->owner_ref_checked &&
3689 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3690 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3691 !rec->bad_full_backref && !rec->crossing_stripes &&
3692 !rec->wrong_chunk_type) {
3693 remove_cache_extent(extent_cache, &rec->cache);
3694 free_all_extent_backrefs(rec);
3695 list_del_init(&rec->list);
3696 free(rec);
3698 return 0;
3701 static int check_owner_ref(struct btrfs_root *root,
3702 struct extent_record *rec,
3703 struct extent_buffer *buf)
3705 struct extent_backref *node, *tmp;
3706 struct tree_backref *back;
3707 struct btrfs_root *ref_root;
3708 struct btrfs_key key;
3709 struct btrfs_path path;
3710 struct extent_buffer *parent;
3711 int level;
3712 int found = 0;
3713 int ret;
3715 rbtree_postorder_for_each_entry_safe(node, tmp,
3716 &rec->backref_tree, node) {
3717 if (node->is_data)
3718 continue;
3719 if (!node->found_ref)
3720 continue;
3721 if (node->full_backref)
3722 continue;
3723 back = to_tree_backref(node);
3724 if (btrfs_header_owner(buf) == back->root)
3725 return 0;
3727 BUG_ON(rec->is_root);
3729 /* try to find the block by search corresponding fs tree */
3730 key.objectid = btrfs_header_owner(buf);
3731 key.type = BTRFS_ROOT_ITEM_KEY;
3732 key.offset = (u64)-1;
3734 ref_root = btrfs_read_fs_root(root->fs_info, &key);
3735 if (IS_ERR(ref_root))
3736 return 1;
3738 level = btrfs_header_level(buf);
3739 if (level == 0)
3740 btrfs_item_key_to_cpu(buf, &key, 0);
3741 else
3742 btrfs_node_key_to_cpu(buf, &key, 0);
3744 btrfs_init_path(&path);
3745 path.lowest_level = level + 1;
3746 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3747 if (ret < 0)
3748 return 0;
3750 parent = path.nodes[level + 1];
3751 if (parent && buf->start == btrfs_node_blockptr(parent,
3752 path.slots[level + 1]))
3753 found = 1;
3755 btrfs_release_path(&path);
3756 return found ? 0 : 1;
3759 static int is_extent_tree_record(struct extent_record *rec)
3761 struct extent_backref *node, *tmp;
3762 struct tree_backref *back;
3763 int is_extent = 0;
3765 rbtree_postorder_for_each_entry_safe(node, tmp,
3766 &rec->backref_tree, node) {
3767 if (node->is_data)
3768 return 0;
3769 back = to_tree_backref(node);
3770 if (node->full_backref)
3771 return 0;
3772 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3773 is_extent = 1;
3775 return is_extent;
3779 static int record_bad_block_io(struct btrfs_fs_info *info,
3780 struct cache_tree *extent_cache,
3781 u64 start, u64 len)
3783 struct extent_record *rec;
3784 struct cache_extent *cache;
3785 struct btrfs_key key;
3787 cache = lookup_cache_extent(extent_cache, start, len);
3788 if (!cache)
3789 return 0;
3791 rec = container_of(cache, struct extent_record, cache);
3792 if (!is_extent_tree_record(rec))
3793 return 0;
3795 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3796 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3799 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3800 struct extent_buffer *buf, int slot)
3802 if (btrfs_header_level(buf)) {
3803 struct btrfs_key_ptr ptr1, ptr2;
3805 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3806 sizeof(struct btrfs_key_ptr));
3807 read_extent_buffer(buf, &ptr2,
3808 btrfs_node_key_ptr_offset(slot + 1),
3809 sizeof(struct btrfs_key_ptr));
3810 write_extent_buffer(buf, &ptr1,
3811 btrfs_node_key_ptr_offset(slot + 1),
3812 sizeof(struct btrfs_key_ptr));
3813 write_extent_buffer(buf, &ptr2,
3814 btrfs_node_key_ptr_offset(slot),
3815 sizeof(struct btrfs_key_ptr));
3816 if (slot == 0) {
3817 struct btrfs_disk_key key;
3819 btrfs_node_key(buf, &key, 0);
3820 btrfs_fixup_low_keys(root, path, &key,
3821 btrfs_header_level(buf) + 1);
3823 } else {
3824 struct btrfs_item *item1, *item2;
3825 struct btrfs_key k1, k2;
3826 char *item1_data, *item2_data;
3827 u32 item1_offset, item2_offset, item1_size, item2_size;
3829 item1 = btrfs_item_nr(slot);
3830 item2 = btrfs_item_nr(slot + 1);
3831 btrfs_item_key_to_cpu(buf, &k1, slot);
3832 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
3833 item1_offset = btrfs_item_offset(buf, item1);
3834 item2_offset = btrfs_item_offset(buf, item2);
3835 item1_size = btrfs_item_size(buf, item1);
3836 item2_size = btrfs_item_size(buf, item2);
3838 item1_data = malloc(item1_size);
3839 if (!item1_data)
3840 return -ENOMEM;
3841 item2_data = malloc(item2_size);
3842 if (!item2_data) {
3843 free(item1_data);
3844 return -ENOMEM;
3847 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
3848 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
3850 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
3851 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
3852 free(item1_data);
3853 free(item2_data);
3855 btrfs_set_item_offset(buf, item1, item2_offset);
3856 btrfs_set_item_offset(buf, item2, item1_offset);
3857 btrfs_set_item_size(buf, item1, item2_size);
3858 btrfs_set_item_size(buf, item2, item1_size);
3860 path->slots[0] = slot;
3861 btrfs_set_item_key_unsafe(root, path, &k2);
3862 path->slots[0] = slot + 1;
3863 btrfs_set_item_key_unsafe(root, path, &k1);
3865 return 0;
3868 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
3870 struct extent_buffer *buf;
3871 struct btrfs_key k1, k2;
3872 int i;
3873 int level = path->lowest_level;
3874 int ret = -EIO;
3876 buf = path->nodes[level];
3877 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
3878 if (level) {
3879 btrfs_node_key_to_cpu(buf, &k1, i);
3880 btrfs_node_key_to_cpu(buf, &k2, i + 1);
3881 } else {
3882 btrfs_item_key_to_cpu(buf, &k1, i);
3883 btrfs_item_key_to_cpu(buf, &k2, i + 1);
3885 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
3886 continue;
3887 ret = swap_values(root, path, buf, i);
3888 if (ret)
3889 break;
3890 btrfs_mark_buffer_dirty(buf);
3891 i = 0;
3893 return ret;
3896 static int delete_bogus_item(struct btrfs_root *root,
3897 struct btrfs_path *path,
3898 struct extent_buffer *buf, int slot)
3900 struct btrfs_key key;
3901 int nritems = btrfs_header_nritems(buf);
3903 btrfs_item_key_to_cpu(buf, &key, slot);
3905 /* These are all the keys we can deal with missing. */
3906 if (key.type != BTRFS_DIR_INDEX_KEY &&
3907 key.type != BTRFS_EXTENT_ITEM_KEY &&
3908 key.type != BTRFS_METADATA_ITEM_KEY &&
3909 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
3910 key.type != BTRFS_EXTENT_DATA_REF_KEY)
3911 return -1;
3913 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
3914 (unsigned long long)key.objectid, key.type,
3915 (unsigned long long)key.offset, slot, buf->start);
3916 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
3917 btrfs_item_nr_offset(slot + 1),
3918 sizeof(struct btrfs_item) *
3919 (nritems - slot - 1));
3920 btrfs_set_header_nritems(buf, nritems - 1);
3921 if (slot == 0) {
3922 struct btrfs_disk_key disk_key;
3924 btrfs_item_key(buf, &disk_key, 0);
3925 btrfs_fixup_low_keys(root, path, &disk_key, 1);
3927 btrfs_mark_buffer_dirty(buf);
3928 return 0;
3931 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
3933 struct extent_buffer *buf;
3934 int i;
3935 int ret = 0;
3937 /* We should only get this for leaves */
3938 BUG_ON(path->lowest_level);
3939 buf = path->nodes[0];
3940 again:
3941 for (i = 0; i < btrfs_header_nritems(buf); i++) {
3942 unsigned int shift = 0, offset;
3944 if (i == 0 && btrfs_item_end_nr(buf, i) !=
3945 BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
3946 if (btrfs_item_end_nr(buf, i) >
3947 BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
3948 ret = delete_bogus_item(root, path, buf, i);
3949 if (!ret)
3950 goto again;
3951 fprintf(stderr,
3952 "item is off the end of the leaf, can't fix\n");
3953 ret = -EIO;
3954 break;
3956 shift = BTRFS_LEAF_DATA_SIZE(root->fs_info) -
3957 btrfs_item_end_nr(buf, i);
3958 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
3959 btrfs_item_offset_nr(buf, i - 1)) {
3960 if (btrfs_item_end_nr(buf, i) >
3961 btrfs_item_offset_nr(buf, i - 1)) {
3962 ret = delete_bogus_item(root, path, buf, i);
3963 if (!ret)
3964 goto again;
3965 fprintf(stderr, "items overlap, can't fix\n");
3966 ret = -EIO;
3967 break;
3969 shift = btrfs_item_offset_nr(buf, i - 1) -
3970 btrfs_item_end_nr(buf, i);
3972 if (!shift)
3973 continue;
3975 printf("Shifting item nr %d by %u bytes in block %llu\n",
3976 i, shift, (unsigned long long)buf->start);
3977 offset = btrfs_item_offset_nr(buf, i);
3978 memmove_extent_buffer(buf,
3979 btrfs_leaf_data(buf) + offset + shift,
3980 btrfs_leaf_data(buf) + offset,
3981 btrfs_item_size_nr(buf, i));
3982 btrfs_set_item_offset(buf, btrfs_item_nr(i),
3983 offset + shift);
3984 btrfs_mark_buffer_dirty(buf);
3988 * We may have moved things, in which case we want to exit so we don't
3989 * write those changes out. Once we have proper abort functionality in
3990 * progs this can be changed to something nicer.
3992 BUG_ON(ret);
3993 return ret;
3997 * Attempt to fix basic block failures. If we can't fix it for whatever reason
3998 * then just return -EIO.
4000 static int try_to_fix_bad_block(struct btrfs_root *root,
4001 struct extent_buffer *buf,
4002 enum btrfs_tree_block_status status)
4004 struct btrfs_trans_handle *trans;
4005 struct ulist *roots;
4006 struct ulist_node *node;
4007 struct btrfs_root *search_root;
4008 struct btrfs_path path;
4009 struct ulist_iterator iter;
4010 struct btrfs_key root_key, key;
4011 int ret;
4013 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4014 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4015 return -EIO;
4017 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
4018 if (ret)
4019 return -EIO;
4021 btrfs_init_path(&path);
4022 ULIST_ITER_INIT(&iter);
4023 while ((node = ulist_next(roots, &iter))) {
4024 root_key.objectid = node->val;
4025 root_key.type = BTRFS_ROOT_ITEM_KEY;
4026 root_key.offset = (u64)-1;
4028 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4029 if (IS_ERR(root)) {
4030 ret = -EIO;
4031 break;
4035 trans = btrfs_start_transaction(search_root, 0);
4036 if (IS_ERR(trans)) {
4037 ret = PTR_ERR(trans);
4038 break;
4041 path.lowest_level = btrfs_header_level(buf);
4042 path.skip_check_block = 1;
4043 if (path.lowest_level)
4044 btrfs_node_key_to_cpu(buf, &key, 0);
4045 else
4046 btrfs_item_key_to_cpu(buf, &key, 0);
4047 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
4048 if (ret) {
4049 ret = -EIO;
4050 btrfs_commit_transaction(trans, search_root);
4051 break;
4053 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4054 ret = fix_key_order(search_root, &path);
4055 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4056 ret = fix_item_offset(search_root, &path);
4057 if (ret) {
4058 btrfs_commit_transaction(trans, search_root);
4059 break;
4061 btrfs_release_path(&path);
4062 btrfs_commit_transaction(trans, search_root);
4064 ulist_free(roots);
4065 btrfs_release_path(&path);
4066 return ret;
4069 static int check_block(struct btrfs_root *root,
4070 struct cache_tree *extent_cache,
4071 struct extent_buffer *buf, u64 flags)
4073 struct extent_record *rec;
4074 struct cache_extent *cache;
4075 struct btrfs_key key;
4076 enum btrfs_tree_block_status status;
4077 int ret = 0;
4078 int level;
4080 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4081 if (!cache)
4082 return 1;
4083 rec = container_of(cache, struct extent_record, cache);
4084 rec->generation = btrfs_header_generation(buf);
4086 level = btrfs_header_level(buf);
4087 if (btrfs_header_nritems(buf) > 0) {
4089 if (level == 0)
4090 btrfs_item_key_to_cpu(buf, &key, 0);
4091 else
4092 btrfs_node_key_to_cpu(buf, &key, 0);
4094 rec->info_objectid = key.objectid;
4096 rec->info_level = level;
4098 if (btrfs_is_leaf(buf))
4099 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4100 else
4101 status = btrfs_check_node(root, &rec->parent_key, buf);
4103 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4104 if (repair)
4105 status = try_to_fix_bad_block(root, buf, status);
4106 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4107 ret = -EIO;
4108 fprintf(stderr, "bad block %llu\n",
4109 (unsigned long long)buf->start);
4110 } else {
4112 * Signal to callers we need to start the scan over
4113 * again since we'll have cowed blocks.
4115 ret = -EAGAIN;
4117 } else {
4118 rec->content_checked = 1;
4119 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4120 rec->owner_ref_checked = 1;
4121 else {
4122 ret = check_owner_ref(root, rec, buf);
4123 if (!ret)
4124 rec->owner_ref_checked = 1;
4127 if (!ret)
4128 maybe_free_extent_rec(extent_cache, rec);
4129 return ret;
4132 #if 0
4133 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4134 u64 parent, u64 root)
4136 struct list_head *cur = rec->backrefs.next;
4137 struct extent_backref *node;
4138 struct tree_backref *back;
4140 while (cur != &rec->backrefs) {
4141 node = to_extent_backref(cur);
4142 cur = cur->next;
4143 if (node->is_data)
4144 continue;
4145 back = to_tree_backref(node);
4146 if (parent > 0) {
4147 if (!node->full_backref)
4148 continue;
4149 if (parent == back->parent)
4150 return back;
4151 } else {
4152 if (node->full_backref)
4153 continue;
4154 if (back->root == root)
4155 return back;
4158 return NULL;
4160 #endif
4162 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4163 u64 parent, u64 root)
4165 struct tree_backref *ref = malloc(sizeof(*ref));
4167 if (!ref)
4168 return NULL;
4169 memset(&ref->node, 0, sizeof(ref->node));
4170 if (parent > 0) {
4171 ref->parent = parent;
4172 ref->node.full_backref = 1;
4173 } else {
4174 ref->root = root;
4175 ref->node.full_backref = 0;
4178 return ref;
4181 #if 0
4182 static struct data_backref *find_data_backref(struct extent_record *rec,
4183 u64 parent, u64 root,
4184 u64 owner, u64 offset,
4185 int found_ref,
4186 u64 disk_bytenr, u64 bytes)
4188 struct list_head *cur = rec->backrefs.next;
4189 struct extent_backref *node;
4190 struct data_backref *back;
4192 while (cur != &rec->backrefs) {
4193 node = to_extent_backref(cur);
4194 cur = cur->next;
4195 if (!node->is_data)
4196 continue;
4197 back = to_data_backref(node);
4198 if (parent > 0) {
4199 if (!node->full_backref)
4200 continue;
4201 if (parent == back->parent)
4202 return back;
4203 } else {
4204 if (node->full_backref)
4205 continue;
4206 if (back->root == root && back->owner == owner &&
4207 back->offset == offset) {
4208 if (found_ref && node->found_ref &&
4209 (back->bytes != bytes ||
4210 back->disk_bytenr != disk_bytenr))
4211 continue;
4212 return back;
4216 return NULL;
4218 #endif
4220 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4221 u64 parent, u64 root,
4222 u64 owner, u64 offset,
4223 u64 max_size)
4225 struct data_backref *ref = malloc(sizeof(*ref));
4227 if (!ref)
4228 return NULL;
4229 memset(&ref->node, 0, sizeof(ref->node));
4230 ref->node.is_data = 1;
4232 if (parent > 0) {
4233 ref->parent = parent;
4234 ref->owner = 0;
4235 ref->offset = 0;
4236 ref->node.full_backref = 1;
4237 } else {
4238 ref->root = root;
4239 ref->owner = owner;
4240 ref->offset = offset;
4241 ref->node.full_backref = 0;
4243 ref->bytes = max_size;
4244 ref->found_ref = 0;
4245 ref->num_refs = 0;
4246 if (max_size > rec->max_size)
4247 rec->max_size = max_size;
4248 return ref;
4251 /* Check if the type of extent matches with its chunk */
4252 static void check_extent_type(struct extent_record *rec)
4254 struct btrfs_block_group_cache *bg_cache;
4256 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4257 if (!bg_cache)
4258 return;
4260 /* data extent, check chunk directly*/
4261 if (!rec->metadata) {
4262 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4263 rec->wrong_chunk_type = 1;
4264 return;
4267 /* metadata extent, check the obvious case first */
4268 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4269 BTRFS_BLOCK_GROUP_METADATA))) {
4270 rec->wrong_chunk_type = 1;
4271 return;
4275 * Check SYSTEM extent, as it's also marked as metadata, we can only
4276 * make sure it's a SYSTEM extent by its backref
4278 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4279 struct extent_backref *node;
4280 struct tree_backref *tback;
4281 u64 bg_type;
4283 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4284 if (node->is_data) {
4285 /* tree block shouldn't have data backref */
4286 rec->wrong_chunk_type = 1;
4287 return;
4289 tback = container_of(node, struct tree_backref, node);
4291 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4292 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4293 else
4294 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4295 if (!(bg_cache->flags & bg_type))
4296 rec->wrong_chunk_type = 1;
4301 * Allocate a new extent record, fill default values from @tmpl and insert int
4302 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4303 * the cache, otherwise it fails.
4305 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4306 struct extent_record *tmpl)
4308 struct extent_record *rec;
4309 int ret = 0;
4311 BUG_ON(tmpl->max_size == 0);
4312 rec = malloc(sizeof(*rec));
4313 if (!rec)
4314 return -ENOMEM;
4315 rec->start = tmpl->start;
4316 rec->max_size = tmpl->max_size;
4317 rec->nr = max(tmpl->nr, tmpl->max_size);
4318 rec->found_rec = tmpl->found_rec;
4319 rec->content_checked = tmpl->content_checked;
4320 rec->owner_ref_checked = tmpl->owner_ref_checked;
4321 rec->num_duplicates = 0;
4322 rec->metadata = tmpl->metadata;
4323 rec->flag_block_full_backref = FLAG_UNSET;
4324 rec->bad_full_backref = 0;
4325 rec->crossing_stripes = 0;
4326 rec->wrong_chunk_type = 0;
4327 rec->is_root = tmpl->is_root;
4328 rec->refs = tmpl->refs;
4329 rec->extent_item_refs = tmpl->extent_item_refs;
4330 rec->parent_generation = tmpl->parent_generation;
4331 INIT_LIST_HEAD(&rec->backrefs);
4332 INIT_LIST_HEAD(&rec->dups);
4333 INIT_LIST_HEAD(&rec->list);
4334 rec->backref_tree = RB_ROOT;
4335 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4336 rec->cache.start = tmpl->start;
4337 rec->cache.size = tmpl->nr;
4338 ret = insert_cache_extent(extent_cache, &rec->cache);
4339 if (ret) {
4340 free(rec);
4341 return ret;
4343 bytes_used += rec->nr;
4345 if (tmpl->metadata)
4346 rec->crossing_stripes = check_crossing_stripes(global_info,
4347 rec->start, global_info->nodesize);
4348 check_extent_type(rec);
4349 return ret;
4353 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4354 * some are hints:
4355 * - refs - if found, increase refs
4356 * - is_root - if found, set
4357 * - content_checked - if found, set
4358 * - owner_ref_checked - if found, set
4360 * If not found, create a new one, initialize and insert.
4362 static int add_extent_rec(struct cache_tree *extent_cache,
4363 struct extent_record *tmpl)
4365 struct extent_record *rec;
4366 struct cache_extent *cache;
4367 int ret = 0;
4368 int dup = 0;
4370 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4371 if (cache) {
4372 rec = container_of(cache, struct extent_record, cache);
4373 if (tmpl->refs)
4374 rec->refs++;
4375 if (rec->nr == 1)
4376 rec->nr = max(tmpl->nr, tmpl->max_size);
4379 * We need to make sure to reset nr to whatever the extent
4380 * record says was the real size, this way we can compare it to
4381 * the backrefs.
4383 if (tmpl->found_rec) {
4384 if (tmpl->start != rec->start || rec->found_rec) {
4385 struct extent_record *tmp;
4387 dup = 1;
4388 if (list_empty(&rec->list))
4389 list_add_tail(&rec->list,
4390 &duplicate_extents);
4393 * We have to do this song and dance in case we
4394 * find an extent record that falls inside of
4395 * our current extent record but does not have
4396 * the same objectid.
4398 tmp = malloc(sizeof(*tmp));
4399 if (!tmp)
4400 return -ENOMEM;
4401 tmp->start = tmpl->start;
4402 tmp->max_size = tmpl->max_size;
4403 tmp->nr = tmpl->nr;
4404 tmp->found_rec = 1;
4405 tmp->metadata = tmpl->metadata;
4406 tmp->extent_item_refs = tmpl->extent_item_refs;
4407 INIT_LIST_HEAD(&tmp->list);
4408 list_add_tail(&tmp->list, &rec->dups);
4409 rec->num_duplicates++;
4410 } else {
4411 rec->nr = tmpl->nr;
4412 rec->found_rec = 1;
4416 if (tmpl->extent_item_refs && !dup) {
4417 if (rec->extent_item_refs) {
4418 fprintf(stderr,
4419 "block %llu rec extent_item_refs %llu, passed %llu\n",
4420 (unsigned long long)tmpl->start,
4421 (unsigned long long)
4422 rec->extent_item_refs,
4423 (unsigned long long)
4424 tmpl->extent_item_refs);
4426 rec->extent_item_refs = tmpl->extent_item_refs;
4428 if (tmpl->is_root)
4429 rec->is_root = 1;
4430 if (tmpl->content_checked)
4431 rec->content_checked = 1;
4432 if (tmpl->owner_ref_checked)
4433 rec->owner_ref_checked = 1;
4434 memcpy(&rec->parent_key, &tmpl->parent_key,
4435 sizeof(tmpl->parent_key));
4436 if (tmpl->parent_generation)
4437 rec->parent_generation = tmpl->parent_generation;
4438 if (rec->max_size < tmpl->max_size)
4439 rec->max_size = tmpl->max_size;
4442 * A metadata extent can't cross stripe_len boundary, otherwise
4443 * kernel scrub won't be able to handle it.
4444 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4445 * it.
4447 if (tmpl->metadata)
4448 rec->crossing_stripes = check_crossing_stripes(
4449 global_info, rec->start,
4450 global_info->nodesize);
4451 check_extent_type(rec);
4452 maybe_free_extent_rec(extent_cache, rec);
4453 return ret;
4456 ret = add_extent_rec_nolookup(extent_cache, tmpl);
4458 return ret;
4461 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4462 u64 parent, u64 root, int found_ref)
4464 struct extent_record *rec;
4465 struct tree_backref *back;
4466 struct cache_extent *cache;
4467 int ret;
4468 bool insert = false;
4470 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4471 if (!cache) {
4472 struct extent_record tmpl;
4474 memset(&tmpl, 0, sizeof(tmpl));
4475 tmpl.start = bytenr;
4476 tmpl.nr = 1;
4477 tmpl.metadata = 1;
4478 tmpl.max_size = 1;
4480 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4481 if (ret)
4482 return ret;
4484 /* really a bug in cache_extent implement now */
4485 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4486 if (!cache)
4487 return -ENOENT;
4490 rec = container_of(cache, struct extent_record, cache);
4491 if (rec->start != bytenr) {
4493 * Several cause, from unaligned bytenr to over lapping extents
4495 return -EEXIST;
4498 back = find_tree_backref(rec, parent, root);
4499 if (!back) {
4500 back = alloc_tree_backref(rec, parent, root);
4501 if (!back)
4502 return -ENOMEM;
4503 insert = true;
4506 if (found_ref) {
4507 if (back->node.found_ref) {
4508 fprintf(stderr,
4509 "Extent back ref already exists for %llu parent %llu root %llu\n",
4510 (unsigned long long)bytenr,
4511 (unsigned long long)parent,
4512 (unsigned long long)root);
4514 back->node.found_ref = 1;
4515 } else {
4516 if (back->node.found_extent_tree) {
4517 fprintf(stderr,
4518 "extent back ref already exists for %llu parent %llu root %llu\n",
4519 (unsigned long long)bytenr,
4520 (unsigned long long)parent,
4521 (unsigned long long)root);
4523 back->node.found_extent_tree = 1;
4525 if (insert)
4526 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
4527 compare_extent_backref));
4528 check_extent_type(rec);
4529 maybe_free_extent_rec(extent_cache, rec);
4530 return 0;
4533 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4534 u64 parent, u64 root, u64 owner, u64 offset,
4535 u32 num_refs, int found_ref, u64 max_size)
4537 struct extent_record *rec;
4538 struct data_backref *back;
4539 struct cache_extent *cache;
4540 int ret;
4541 bool insert = false;
4543 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4544 if (!cache) {
4545 struct extent_record tmpl;
4547 memset(&tmpl, 0, sizeof(tmpl));
4548 tmpl.start = bytenr;
4549 tmpl.nr = 1;
4550 tmpl.max_size = max_size;
4552 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4553 if (ret)
4554 return ret;
4556 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4557 if (!cache)
4558 abort();
4561 rec = container_of(cache, struct extent_record, cache);
4562 if (rec->max_size < max_size)
4563 rec->max_size = max_size;
4566 * If found_ref is set then max_size is the real size and must match the
4567 * existing refs. So if we have already found a ref then we need to
4568 * make sure that this ref matches the existing one, otherwise we need
4569 * to add a new backref so we can notice that the backrefs don't match
4570 * and we need to figure out who is telling the truth. This is to
4571 * account for that awful fsync bug I introduced where we'd end up with
4572 * a btrfs_file_extent_item that would have its length include multiple
4573 * prealloc extents or point inside of a prealloc extent.
4575 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4576 bytenr, max_size);
4577 if (!back) {
4578 back = alloc_data_backref(rec, parent, root, owner, offset,
4579 max_size);
4580 BUG_ON(!back);
4581 insert = true;
4584 if (found_ref) {
4585 BUG_ON(num_refs != 1);
4586 if (back->node.found_ref)
4587 BUG_ON(back->bytes != max_size);
4588 back->node.found_ref = 1;
4589 back->found_ref += 1;
4590 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
4591 back->bytes = max_size;
4592 back->disk_bytenr = bytenr;
4594 /* Need to reinsert if not already in the tree */
4595 if (!insert) {
4596 rb_erase(&back->node.node, &rec->backref_tree);
4597 insert = true;
4600 rec->refs += 1;
4601 rec->content_checked = 1;
4602 rec->owner_ref_checked = 1;
4603 } else {
4604 if (back->node.found_extent_tree) {
4605 fprintf(stderr,
4606 "Extent back ref already exists for %llu parent %llu root %llu owner %llu offset %llu num_refs %lu\n",
4607 (unsigned long long)bytenr,
4608 (unsigned long long)parent,
4609 (unsigned long long)root,
4610 (unsigned long long)owner,
4611 (unsigned long long)offset,
4612 (unsigned long)num_refs);
4614 back->num_refs = num_refs;
4615 back->node.found_extent_tree = 1;
4617 if (insert)
4618 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
4619 compare_extent_backref));
4621 maybe_free_extent_rec(extent_cache, rec);
4622 return 0;
4625 static int add_pending(struct cache_tree *pending,
4626 struct cache_tree *seen, u64 bytenr, u32 size)
4628 int ret;
4630 ret = add_cache_extent(seen, bytenr, size);
4631 if (ret)
4632 return ret;
4633 add_cache_extent(pending, bytenr, size);
4634 return 0;
4637 static int pick_next_pending(struct cache_tree *pending,
4638 struct cache_tree *reada,
4639 struct cache_tree *nodes,
4640 u64 last, struct block_info *bits, int bits_nr,
4641 int *reada_bits)
4643 unsigned long node_start = last;
4644 struct cache_extent *cache;
4645 int ret;
4647 cache = search_cache_extent(reada, 0);
4648 if (cache) {
4649 bits[0].start = cache->start;
4650 bits[0].size = cache->size;
4651 *reada_bits = 1;
4652 return 1;
4654 *reada_bits = 0;
4655 if (node_start > 32768)
4656 node_start -= 32768;
4658 cache = search_cache_extent(nodes, node_start);
4659 if (!cache)
4660 cache = search_cache_extent(nodes, 0);
4662 if (!cache) {
4663 cache = search_cache_extent(pending, 0);
4664 if (!cache)
4665 return 0;
4666 ret = 0;
4667 do {
4668 bits[ret].start = cache->start;
4669 bits[ret].size = cache->size;
4670 cache = next_cache_extent(cache);
4671 ret++;
4672 } while (cache && ret < bits_nr);
4673 return ret;
4676 ret = 0;
4677 do {
4678 bits[ret].start = cache->start;
4679 bits[ret].size = cache->size;
4680 cache = next_cache_extent(cache);
4681 ret++;
4682 } while (cache && ret < bits_nr);
4684 if (bits_nr - ret > 8) {
4685 u64 lookup = bits[0].start + bits[0].size;
4686 struct cache_extent *next;
4688 next = search_cache_extent(pending, lookup);
4689 while (next) {
4690 if (next->start - lookup > 32768)
4691 break;
4692 bits[ret].start = next->start;
4693 bits[ret].size = next->size;
4694 lookup = next->start + next->size;
4695 ret++;
4696 if (ret == bits_nr)
4697 break;
4698 next = next_cache_extent(next);
4699 if (!next)
4700 break;
4703 return ret;
4706 static void free_chunk_record(struct cache_extent *cache)
4708 struct chunk_record *rec;
4710 rec = container_of(cache, struct chunk_record, cache);
4711 list_del_init(&rec->list);
4712 list_del_init(&rec->dextents);
4713 free(rec);
4716 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4718 cache_tree_free_extents(chunk_cache, free_chunk_record);
4721 static void free_device_record(struct rb_node *node)
4723 struct device_record *rec;
4725 rec = container_of(node, struct device_record, node);
4726 free(rec);
4729 FREE_RB_BASED_TREE(device_cache, free_device_record);
4731 int insert_block_group_record(struct block_group_tree *tree,
4732 struct block_group_record *bg_rec)
4734 int ret;
4736 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4737 if (ret)
4738 return ret;
4740 list_add_tail(&bg_rec->list, &tree->block_groups);
4741 return 0;
4744 static void free_block_group_record(struct cache_extent *cache)
4746 struct block_group_record *rec;
4748 rec = container_of(cache, struct block_group_record, cache);
4749 list_del_init(&rec->list);
4750 free(rec);
4753 void free_block_group_tree(struct block_group_tree *tree)
4755 cache_tree_free_extents(&tree->tree, free_block_group_record);
4758 int insert_device_extent_record(struct device_extent_tree *tree,
4759 struct device_extent_record *de_rec)
4761 int ret;
4764 * Device extent is a bit different from the other extents, because
4765 * the extents which belong to the different devices may have the
4766 * same start and size, so we need use the special extent cache
4767 * search/insert functions.
4769 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4770 if (ret)
4771 return ret;
4773 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4774 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4775 return 0;
4778 static void free_device_extent_record(struct cache_extent *cache)
4780 struct device_extent_record *rec;
4782 rec = container_of(cache, struct device_extent_record, cache);
4783 if (!list_empty(&rec->chunk_list))
4784 list_del_init(&rec->chunk_list);
4785 if (!list_empty(&rec->device_list))
4786 list_del_init(&rec->device_list);
4787 free(rec);
4790 void free_device_extent_tree(struct device_extent_tree *tree)
4792 cache_tree_free_extents(&tree->tree, free_device_extent_record);
4795 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4796 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4797 struct extent_buffer *leaf, int slot)
4799 struct btrfs_extent_ref_v0 *ref0;
4800 struct btrfs_key key;
4801 int ret;
4803 btrfs_item_key_to_cpu(leaf, &key, slot);
4804 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4805 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4806 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
4807 0, 0);
4808 } else {
4809 ret = add_data_backref(extent_cache, key.objectid, key.offset,
4810 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4812 return ret;
4814 #endif
4816 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4817 struct btrfs_key *key,
4818 int slot)
4820 struct btrfs_chunk *ptr;
4821 struct chunk_record *rec;
4822 int num_stripes, i;
4824 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4825 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4827 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4828 if (!rec) {
4829 fprintf(stderr, "memory allocation failed\n");
4830 exit(-1);
4833 INIT_LIST_HEAD(&rec->list);
4834 INIT_LIST_HEAD(&rec->dextents);
4835 rec->bg_rec = NULL;
4837 rec->cache.start = key->offset;
4838 rec->cache.size = btrfs_chunk_length(leaf, ptr);
4840 rec->generation = btrfs_header_generation(leaf);
4842 rec->objectid = key->objectid;
4843 rec->type = key->type;
4844 rec->offset = key->offset;
4846 rec->length = rec->cache.size;
4847 rec->owner = btrfs_chunk_owner(leaf, ptr);
4848 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4849 rec->type_flags = btrfs_chunk_type(leaf, ptr);
4850 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4851 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4852 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4853 rec->num_stripes = num_stripes;
4854 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4856 for (i = 0; i < rec->num_stripes; ++i) {
4857 rec->stripes[i].devid =
4858 btrfs_stripe_devid_nr(leaf, ptr, i);
4859 rec->stripes[i].offset =
4860 btrfs_stripe_offset_nr(leaf, ptr, i);
4861 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
4862 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
4863 BTRFS_UUID_SIZE);
4866 return rec;
4869 static int process_chunk_item(struct cache_tree *chunk_cache,
4870 struct btrfs_key *key, struct extent_buffer *eb,
4871 int slot)
4873 struct chunk_record *rec;
4874 struct btrfs_chunk *chunk;
4875 int ret = 0;
4877 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
4879 * Do extra check for this chunk item,
4881 * It's still possible one can craft a leaf with CHUNK_ITEM, with
4882 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
4883 * and owner<->key_type check.
4885 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
4886 key->offset);
4887 if (ret < 0) {
4888 error("chunk(%llu, %llu) is not valid, ignore it",
4889 key->offset, btrfs_chunk_length(eb, chunk));
4890 return 0;
4892 rec = btrfs_new_chunk_record(eb, key, slot);
4893 ret = insert_cache_extent(chunk_cache, &rec->cache);
4894 if (ret) {
4895 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
4896 rec->offset, rec->length);
4897 free(rec);
4900 return ret;
4903 static int process_device_item(struct rb_root *dev_cache,
4904 struct btrfs_key *key, struct extent_buffer *eb, int slot)
4906 struct btrfs_dev_item *ptr;
4907 struct device_record *rec;
4908 int ret = 0;
4910 ptr = btrfs_item_ptr(eb,
4911 slot, struct btrfs_dev_item);
4913 rec = malloc(sizeof(*rec));
4914 if (!rec) {
4915 fprintf(stderr, "memory allocation failed\n");
4916 return -ENOMEM;
4919 rec->devid = key->offset;
4920 rec->generation = btrfs_header_generation(eb);
4922 rec->objectid = key->objectid;
4923 rec->type = key->type;
4924 rec->offset = key->offset;
4926 rec->devid = btrfs_device_id(eb, ptr);
4927 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
4928 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
4930 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
4931 if (ret) {
4932 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
4933 free(rec);
4936 return ret;
4939 struct block_group_record *
4940 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
4941 int slot)
4943 struct btrfs_block_group_item *ptr;
4944 struct block_group_record *rec;
4946 rec = calloc(1, sizeof(*rec));
4947 if (!rec) {
4948 fprintf(stderr, "memory allocation failed\n");
4949 exit(-1);
4952 rec->cache.start = key->objectid;
4953 rec->cache.size = key->offset;
4955 rec->generation = btrfs_header_generation(leaf);
4957 rec->objectid = key->objectid;
4958 rec->type = key->type;
4959 rec->offset = key->offset;
4961 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
4962 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
4964 INIT_LIST_HEAD(&rec->list);
4966 return rec;
4969 static int process_block_group_item(struct block_group_tree *block_group_cache,
4970 struct btrfs_key *key,
4971 struct extent_buffer *eb, int slot)
4973 struct block_group_record *rec;
4974 int ret = 0;
4976 rec = btrfs_new_block_group_record(eb, key, slot);
4977 ret = insert_block_group_record(block_group_cache, rec);
4978 if (ret) {
4979 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
4980 rec->objectid, rec->offset);
4981 free(rec);
4984 return ret;
4987 struct device_extent_record *
4988 btrfs_new_device_extent_record(struct extent_buffer *leaf,
4989 struct btrfs_key *key, int slot)
4991 struct device_extent_record *rec;
4992 struct btrfs_dev_extent *ptr;
4994 rec = calloc(1, sizeof(*rec));
4995 if (!rec) {
4996 fprintf(stderr, "memory allocation failed\n");
4997 exit(-1);
5000 rec->cache.objectid = key->objectid;
5001 rec->cache.start = key->offset;
5003 rec->generation = btrfs_header_generation(leaf);
5005 rec->objectid = key->objectid;
5006 rec->type = key->type;
5007 rec->offset = key->offset;
5009 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5010 rec->chunk_objecteid =
5011 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5012 rec->chunk_offset =
5013 btrfs_dev_extent_chunk_offset(leaf, ptr);
5014 rec->length = btrfs_dev_extent_length(leaf, ptr);
5015 rec->cache.size = rec->length;
5017 INIT_LIST_HEAD(&rec->chunk_list);
5018 INIT_LIST_HEAD(&rec->device_list);
5020 return rec;
5023 static int
5024 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5025 struct btrfs_key *key, struct extent_buffer *eb,
5026 int slot)
5028 struct device_extent_record *rec;
5029 int ret;
5031 rec = btrfs_new_device_extent_record(eb, key, slot);
5032 ret = insert_device_extent_record(dev_extent_cache, rec);
5033 if (ret) {
5034 fprintf(stderr,
5035 "Device extent[%llu, %llu, %llu] existed.\n",
5036 rec->objectid, rec->offset, rec->length);
5037 free(rec);
5040 return ret;
5043 static int process_extent_item(struct btrfs_root *root,
5044 struct cache_tree *extent_cache,
5045 struct extent_buffer *eb, int slot)
5047 struct btrfs_extent_item *ei;
5048 struct btrfs_extent_inline_ref *iref;
5049 struct btrfs_extent_data_ref *dref;
5050 struct btrfs_shared_data_ref *sref;
5051 struct btrfs_key key;
5052 struct extent_record tmpl;
5053 unsigned long end;
5054 unsigned long ptr;
5055 int ret;
5056 int type;
5057 u32 item_size = btrfs_item_size_nr(eb, slot);
5058 u64 refs = 0;
5059 u64 offset;
5060 u64 num_bytes;
5061 int metadata = 0;
5063 btrfs_item_key_to_cpu(eb, &key, slot);
5065 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5066 metadata = 1;
5067 num_bytes = root->fs_info->nodesize;
5068 } else {
5069 num_bytes = key.offset;
5072 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
5073 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5074 key.objectid, root->fs_info->sectorsize);
5075 return -EIO;
5077 if (item_size < sizeof(*ei)) {
5078 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5079 struct btrfs_extent_item_v0 *ei0;
5081 if (item_size != sizeof(*ei0)) {
5082 error(
5083 "invalid extent item format: ITEM[%llu %u %llu] leaf: %llu slot: %d",
5084 key.objectid, key.type, key.offset,
5085 btrfs_header_bytenr(eb), slot);
5086 BUG();
5088 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5089 refs = btrfs_extent_refs_v0(eb, ei0);
5090 #else
5091 BUG();
5092 #endif
5093 memset(&tmpl, 0, sizeof(tmpl));
5094 tmpl.start = key.objectid;
5095 tmpl.nr = num_bytes;
5096 tmpl.extent_item_refs = refs;
5097 tmpl.metadata = metadata;
5098 tmpl.found_rec = 1;
5099 tmpl.max_size = num_bytes;
5101 return add_extent_rec(extent_cache, &tmpl);
5104 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5105 refs = btrfs_extent_refs(eb, ei);
5106 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5107 metadata = 1;
5108 else
5109 metadata = 0;
5110 if (metadata && num_bytes != root->fs_info->nodesize) {
5111 error("ignore invalid metadata extent, length %llu does not equal to %u",
5112 num_bytes, root->fs_info->nodesize);
5113 return -EIO;
5115 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
5116 error("ignore invalid data extent, length %llu is not aligned to %u",
5117 num_bytes, root->fs_info->sectorsize);
5118 return -EIO;
5121 memset(&tmpl, 0, sizeof(tmpl));
5122 tmpl.start = key.objectid;
5123 tmpl.nr = num_bytes;
5124 tmpl.extent_item_refs = refs;
5125 tmpl.metadata = metadata;
5126 tmpl.found_rec = 1;
5127 tmpl.max_size = num_bytes;
5128 add_extent_rec(extent_cache, &tmpl);
5130 ptr = (unsigned long)(ei + 1);
5131 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5132 key.type == BTRFS_EXTENT_ITEM_KEY)
5133 ptr += sizeof(struct btrfs_tree_block_info);
5135 end = (unsigned long)ei + item_size;
5136 while (ptr < end) {
5137 iref = (struct btrfs_extent_inline_ref *)ptr;
5138 type = btrfs_extent_inline_ref_type(eb, iref);
5139 offset = btrfs_extent_inline_ref_offset(eb, iref);
5140 switch (type) {
5141 case BTRFS_TREE_BLOCK_REF_KEY:
5142 ret = add_tree_backref(extent_cache, key.objectid,
5143 0, offset, 0);
5144 if (ret < 0)
5145 error(
5146 "add_tree_backref failed (extent items tree block): %s",
5147 strerror(-ret));
5148 break;
5149 case BTRFS_SHARED_BLOCK_REF_KEY:
5150 ret = add_tree_backref(extent_cache, key.objectid,
5151 offset, 0, 0);
5152 if (ret < 0)
5153 error(
5154 "add_tree_backref failed (extent items shared block): %s",
5155 strerror(-ret));
5156 break;
5157 case BTRFS_EXTENT_DATA_REF_KEY:
5158 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5159 add_data_backref(extent_cache, key.objectid, 0,
5160 btrfs_extent_data_ref_root(eb, dref),
5161 btrfs_extent_data_ref_objectid(eb,
5162 dref),
5163 btrfs_extent_data_ref_offset(eb, dref),
5164 btrfs_extent_data_ref_count(eb, dref),
5165 0, num_bytes);
5166 break;
5167 case BTRFS_SHARED_DATA_REF_KEY:
5168 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5169 add_data_backref(extent_cache, key.objectid, offset,
5170 0, 0, 0,
5171 btrfs_shared_data_ref_count(eb, sref),
5172 0, num_bytes);
5173 break;
5174 default:
5175 fprintf(stderr,
5176 "corrupt extent record: key [%llu,%u,%llu]\n",
5177 key.objectid, key.type, num_bytes);
5178 goto out;
5180 ptr += btrfs_extent_inline_ref_size(type);
5182 WARN_ON(ptr > end);
5183 out:
5184 return 0;
5187 static int check_cache_range(struct btrfs_root *root,
5188 struct btrfs_block_group_cache *cache,
5189 u64 offset, u64 bytes)
5191 struct btrfs_free_space *entry;
5192 u64 *logical;
5193 u64 bytenr;
5194 int stripe_len;
5195 int i, nr, ret;
5197 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5198 bytenr = btrfs_sb_offset(i);
5199 ret = btrfs_rmap_block(root->fs_info,
5200 cache->key.objectid, bytenr,
5201 &logical, &nr, &stripe_len);
5202 if (ret)
5203 return ret;
5205 while (nr--) {
5206 if (logical[nr] + stripe_len <= offset)
5207 continue;
5208 if (offset + bytes <= logical[nr])
5209 continue;
5210 if (logical[nr] == offset) {
5211 if (stripe_len >= bytes) {
5212 free(logical);
5213 return 0;
5215 bytes -= stripe_len;
5216 offset += stripe_len;
5217 } else if (logical[nr] < offset) {
5218 if (logical[nr] + stripe_len >=
5219 offset + bytes) {
5220 free(logical);
5221 return 0;
5223 bytes = (offset + bytes) -
5224 (logical[nr] + stripe_len);
5225 offset = logical[nr] + stripe_len;
5226 } else {
5228 * Could be tricky, the super may land in the
5229 * middle of the area we're checking. First
5230 * check the easiest case, it's at the end.
5232 if (logical[nr] + stripe_len >=
5233 bytes + offset) {
5234 bytes = logical[nr] - offset;
5235 continue;
5238 /* Check the left side */
5239 ret = check_cache_range(root, cache,
5240 offset,
5241 logical[nr] - offset);
5242 if (ret) {
5243 free(logical);
5244 return ret;
5247 /* Now we continue with the right side */
5248 bytes = (offset + bytes) -
5249 (logical[nr] + stripe_len);
5250 offset = logical[nr] + stripe_len;
5254 free(logical);
5257 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5258 if (!entry) {
5259 fprintf(stderr, "there is no free space entry for %llu-%llu\n",
5260 offset, offset+bytes);
5261 return -EINVAL;
5264 if (entry->offset != offset) {
5265 fprintf(stderr, "wanted offset %llu, found %llu\n", offset,
5266 entry->offset);
5267 return -EINVAL;
5270 if (entry->bytes != bytes) {
5271 fprintf(stderr, "wanted bytes %llu, found %llu for off %llu\n",
5272 bytes, entry->bytes, offset);
5273 return -EINVAL;
5276 unlink_free_space(cache->free_space_ctl, entry);
5277 free(entry);
5278 return 0;
5281 static int verify_space_cache(struct btrfs_root *root,
5282 struct btrfs_block_group_cache *cache)
5284 struct btrfs_path path;
5285 struct extent_buffer *leaf;
5286 struct btrfs_key key;
5287 u64 last;
5288 int ret = 0;
5290 root = root->fs_info->extent_root;
5292 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5294 btrfs_init_path(&path);
5295 key.objectid = last;
5296 key.offset = 0;
5297 key.type = BTRFS_EXTENT_ITEM_KEY;
5298 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5299 if (ret < 0)
5300 goto out;
5301 ret = 0;
5302 while (1) {
5303 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5304 ret = btrfs_next_leaf(root, &path);
5305 if (ret < 0)
5306 goto out;
5307 if (ret > 0) {
5308 ret = 0;
5309 break;
5312 leaf = path.nodes[0];
5313 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5314 if (key.objectid >= cache->key.offset + cache->key.objectid)
5315 break;
5316 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5317 key.type != BTRFS_METADATA_ITEM_KEY) {
5318 path.slots[0]++;
5319 continue;
5322 if (last == key.objectid) {
5323 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5324 last = key.objectid + key.offset;
5325 else
5326 last = key.objectid + root->fs_info->nodesize;
5327 path.slots[0]++;
5328 continue;
5331 ret = check_cache_range(root, cache, last,
5332 key.objectid - last);
5333 if (ret)
5334 break;
5335 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5336 last = key.objectid + key.offset;
5337 else
5338 last = key.objectid + root->fs_info->nodesize;
5339 path.slots[0]++;
5342 if (last < cache->key.objectid + cache->key.offset)
5343 ret = check_cache_range(root, cache, last,
5344 cache->key.objectid +
5345 cache->key.offset - last);
5347 out:
5348 btrfs_release_path(&path);
5350 if (!ret &&
5351 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5352 fprintf(stderr, "There are still entries left in the space "
5353 "cache\n");
5354 ret = -EINVAL;
5357 return ret;
5360 static int check_space_cache(struct btrfs_root *root)
5362 struct btrfs_block_group_cache *cache;
5363 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5364 int ret;
5365 int error = 0;
5367 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5368 btrfs_super_generation(root->fs_info->super_copy) !=
5369 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5370 printf("cache and super generation don't match, space cache "
5371 "will be invalidated\n");
5372 return 0;
5375 if (ctx.progress_enabled) {
5376 ctx.tp = TASK_FREE_SPACE;
5377 task_start(ctx.info);
5380 while (1) {
5381 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5382 if (!cache)
5383 break;
5385 start = cache->key.objectid + cache->key.offset;
5386 if (!cache->free_space_ctl) {
5387 if (btrfs_init_free_space_ctl(cache,
5388 root->fs_info->sectorsize)) {
5389 ret = -ENOMEM;
5390 break;
5392 } else {
5393 btrfs_remove_free_space_cache(cache);
5396 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
5397 ret = exclude_super_stripes(root, cache);
5398 if (ret) {
5399 fprintf(stderr, "could not exclude super stripes: %s\n",
5400 strerror(-ret));
5401 error++;
5402 continue;
5404 ret = load_free_space_tree(root->fs_info, cache);
5405 free_excluded_extents(root, cache);
5406 if (ret < 0) {
5407 fprintf(stderr, "could not load free space tree: %s\n",
5408 strerror(-ret));
5409 error++;
5410 continue;
5412 error += ret;
5413 } else {
5414 ret = load_free_space_cache(root->fs_info, cache);
5415 if (ret < 0)
5416 error++;
5417 if (ret <= 0)
5418 continue;
5421 ret = verify_space_cache(root, cache);
5422 if (ret) {
5423 fprintf(stderr, "cache appears valid but isn't %llu\n",
5424 cache->key.objectid);
5425 error++;
5429 task_stop(ctx.info);
5431 return error ? -EINVAL : 0;
5435 * Check data checksum for [@bytenr, @bytenr + @num_bytes).
5437 * Return <0 for fatal error (fails to read checksum/data or allocate memory).
5438 * Return >0 for csum mismatch for any copy.
5439 * Return 0 if everything is OK.
5441 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5442 u64 num_bytes, unsigned long leaf_offset,
5443 struct extent_buffer *eb)
5445 struct btrfs_fs_info *fs_info = root->fs_info;
5446 u64 offset = 0;
5447 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
5448 char *data;
5449 unsigned long csum_offset;
5450 u32 csum;
5451 u32 csum_expected;
5452 u64 read_len;
5453 u64 data_checked = 0;
5454 u64 tmp;
5455 int ret = 0;
5456 int mirror;
5457 int num_copies;
5458 bool csum_mismatch = false;
5460 if (num_bytes % fs_info->sectorsize)
5461 return -EINVAL;
5463 data = malloc(num_bytes);
5464 if (!data)
5465 return -ENOMEM;
5467 num_copies = btrfs_num_copies(root->fs_info, bytenr, num_bytes);
5468 while (offset < num_bytes) {
5470 * Mirror 0 means 'read from any valid copy', so it's skipped.
5471 * The indexes 1-N represent the n-th copy for levels with
5472 * redundancy.
5474 for (mirror = 1; mirror <= num_copies; mirror++) {
5475 read_len = num_bytes - offset;
5476 /* read as much space once a time */
5477 ret = read_extent_data(fs_info, data + offset,
5478 bytenr + offset, &read_len, mirror);
5479 if (ret)
5480 goto out;
5482 data_checked = 0;
5483 /* verify every 4k data's checksum */
5484 while (data_checked < read_len) {
5485 csum = ~(u32)0;
5486 tmp = offset + data_checked;
5488 csum = btrfs_csum_data((char *)data + tmp,
5489 csum, fs_info->sectorsize);
5490 btrfs_csum_final(csum, (u8 *)&csum);
5492 csum_offset = leaf_offset +
5493 tmp / fs_info->sectorsize * csum_size;
5494 read_extent_buffer(eb, (char *)&csum_expected,
5495 csum_offset, csum_size);
5496 if (csum != csum_expected) {
5497 csum_mismatch = true;
5498 fprintf(stderr,
5499 "mirror %d bytenr %llu csum %u expected csum %u\n",
5500 mirror, bytenr + tmp,
5501 csum, csum_expected);
5503 data_checked += fs_info->sectorsize;
5506 offset += read_len;
5508 out:
5509 free(data);
5510 if (!ret && csum_mismatch)
5511 ret = 1;
5512 return ret;
5515 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5516 u64 num_bytes)
5518 struct btrfs_path path;
5519 struct extent_buffer *leaf;
5520 struct btrfs_key key;
5521 int ret;
5523 btrfs_init_path(&path);
5524 key.objectid = bytenr;
5525 key.type = BTRFS_EXTENT_ITEM_KEY;
5526 key.offset = (u64)-1;
5528 again:
5529 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
5530 0, 0);
5531 if (ret < 0) {
5532 fprintf(stderr, "Error looking up extent record %d\n", ret);
5533 btrfs_release_path(&path);
5534 return ret;
5535 } else if (ret) {
5536 if (path.slots[0] > 0) {
5537 path.slots[0]--;
5538 } else {
5539 ret = btrfs_prev_leaf(root, &path);
5540 if (ret < 0) {
5541 goto out;
5542 } else if (ret > 0) {
5543 ret = 0;
5544 goto out;
5549 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
5552 * Block group items come before extent items if they have the same
5553 * bytenr, so walk back one more just in case. Dear future traveller,
5554 * first congrats on mastering time travel. Now if it's not too much
5555 * trouble could you go back to 2006 and tell Chris to make the
5556 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5557 * EXTENT_ITEM_KEY please?
5559 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5560 if (path.slots[0] > 0) {
5561 path.slots[0]--;
5562 } else {
5563 ret = btrfs_prev_leaf(root, &path);
5564 if (ret < 0) {
5565 goto out;
5566 } else if (ret > 0) {
5567 ret = 0;
5568 goto out;
5571 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
5574 while (num_bytes) {
5575 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5576 ret = btrfs_next_leaf(root, &path);
5577 if (ret < 0) {
5578 fprintf(stderr, "Error going to next leaf "
5579 "%d\n", ret);
5580 btrfs_release_path(&path);
5581 return ret;
5582 } else if (ret) {
5583 break;
5586 leaf = path.nodes[0];
5587 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5588 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5589 path.slots[0]++;
5590 continue;
5592 if (key.objectid + key.offset < bytenr) {
5593 path.slots[0]++;
5594 continue;
5596 if (key.objectid > bytenr + num_bytes)
5597 break;
5599 if (key.objectid == bytenr) {
5600 if (key.offset >= num_bytes) {
5601 num_bytes = 0;
5602 break;
5604 num_bytes -= key.offset;
5605 bytenr += key.offset;
5606 } else if (key.objectid < bytenr) {
5607 if (key.objectid + key.offset >= bytenr + num_bytes) {
5608 num_bytes = 0;
5609 break;
5611 num_bytes = (bytenr + num_bytes) -
5612 (key.objectid + key.offset);
5613 bytenr = key.objectid + key.offset;
5614 } else {
5615 if (key.objectid + key.offset < bytenr + num_bytes) {
5616 u64 new_start = key.objectid + key.offset;
5617 u64 new_bytes = bytenr + num_bytes - new_start;
5620 * Weird case, the extent is in the middle of
5621 * our range, we'll have to search one side
5622 * and then the other. Not sure if this happens
5623 * in real life, but no harm in coding it up
5624 * anyway just in case.
5626 btrfs_release_path(&path);
5627 ret = check_extent_exists(root, new_start,
5628 new_bytes);
5629 if (ret) {
5630 fprintf(stderr, "Right section didn't "
5631 "have a record\n");
5632 break;
5634 num_bytes = key.objectid - bytenr;
5635 goto again;
5637 num_bytes = key.objectid - bytenr;
5639 path.slots[0]++;
5641 ret = 0;
5643 out:
5644 if (num_bytes && !ret) {
5645 fprintf(stderr,
5646 "there are no extents for csum range %llu-%llu\n",
5647 bytenr, bytenr+num_bytes);
5648 ret = 1;
5651 btrfs_release_path(&path);
5652 return ret;
5655 static int check_csums(struct btrfs_root *root)
5657 struct btrfs_path path;
5658 struct extent_buffer *leaf;
5659 struct btrfs_key key;
5660 u64 offset = 0, num_bytes = 0;
5661 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5662 int errors = 0;
5663 int ret;
5664 u64 data_len;
5665 unsigned long leaf_offset;
5666 bool verify_csum = !!check_data_csum;
5668 root = root->fs_info->csum_root;
5669 if (!extent_buffer_uptodate(root->node)) {
5670 fprintf(stderr, "No valid csum tree found\n");
5671 return -ENOENT;
5674 btrfs_init_path(&path);
5675 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5676 key.type = BTRFS_EXTENT_CSUM_KEY;
5677 key.offset = 0;
5678 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5679 if (ret < 0) {
5680 fprintf(stderr, "Error searching csum tree %d\n", ret);
5681 btrfs_release_path(&path);
5682 return ret;
5685 if (ret > 0 && path.slots[0])
5686 path.slots[0]--;
5687 ret = 0;
5690 * For metadata dump (btrfs-image) all data is wiped so verifying data
5691 * csum is meaningless and will always report csum error.
5693 if (check_data_csum && (btrfs_super_flags(root->fs_info->super_copy) &
5694 (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))) {
5695 printf("skip data csum verification for metadata dump\n");
5696 verify_csum = false;
5699 while (1) {
5700 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5701 ret = btrfs_next_leaf(root, &path);
5702 if (ret < 0) {
5703 fprintf(stderr, "Error going to next leaf "
5704 "%d\n", ret);
5705 break;
5707 if (ret)
5708 break;
5710 leaf = path.nodes[0];
5712 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5713 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5714 path.slots[0]++;
5715 continue;
5718 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
5719 csum_size) * root->fs_info->sectorsize;
5720 if (!verify_csum)
5721 goto skip_csum_check;
5722 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
5723 ret = check_extent_csums(root, key.offset, data_len,
5724 leaf_offset, leaf);
5726 * Only break for fatal errors, if mismatch is found, continue
5727 * checking until all extents are checked.
5729 if (ret < 0)
5730 break;
5731 if (ret > 0)
5732 errors++;
5733 skip_csum_check:
5734 if (!num_bytes) {
5735 offset = key.offset;
5736 } else if (key.offset != offset + num_bytes) {
5737 ret = check_extent_exists(root, offset, num_bytes);
5738 if (ret) {
5739 fprintf(stderr,
5740 "csum exists for %llu-%llu but there is no extent record\n",
5741 offset, offset+num_bytes);
5742 errors++;
5744 offset = key.offset;
5745 num_bytes = 0;
5747 num_bytes += data_len;
5748 path.slots[0]++;
5751 btrfs_release_path(&path);
5752 return errors;
5755 static int is_dropped_key(struct btrfs_key *key,
5756 struct btrfs_key *drop_key)
5758 if (key->objectid < drop_key->objectid)
5759 return 1;
5760 else if (key->objectid == drop_key->objectid) {
5761 if (key->type < drop_key->type)
5762 return 1;
5763 else if (key->type == drop_key->type) {
5764 if (key->offset < drop_key->offset)
5765 return 1;
5768 return 0;
5772 * Here are the rules for FULL_BACKREF.
5774 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5775 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5776 * FULL_BACKREF set.
5777 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
5778 * if it happened after the relocation occurred since we'll have dropped the
5779 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5780 * have no real way to know for sure.
5782 * We process the blocks one root at a time, and we start from the lowest root
5783 * objectid and go to the highest. So we can just lookup the owner backref for
5784 * the record and if we don't find it then we know it doesn't exist and we have
5785 * a FULL BACKREF.
5787 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5788 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5789 * be set or not and then we can check later once we've gathered all the refs.
5791 static int calc_extent_flag(struct cache_tree *extent_cache,
5792 struct extent_buffer *buf,
5793 struct root_item_record *ri,
5794 u64 *flags)
5796 struct extent_record *rec;
5797 struct cache_extent *cache;
5798 struct tree_backref *tback;
5799 u64 owner = 0;
5801 cache = lookup_cache_extent(extent_cache, buf->start, 1);
5802 /* we have added this extent before */
5803 if (!cache)
5804 return -ENOENT;
5806 rec = container_of(cache, struct extent_record, cache);
5809 * Except file/reloc tree, we can not have
5810 * FULL BACKREF MODE
5812 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5813 goto normal;
5815 * root node
5817 if (buf->start == ri->bytenr)
5818 goto normal;
5820 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5821 goto full_backref;
5823 owner = btrfs_header_owner(buf);
5824 if (owner == ri->objectid)
5825 goto normal;
5827 tback = find_tree_backref(rec, 0, owner);
5828 if (!tback)
5829 goto full_backref;
5830 normal:
5831 *flags = 0;
5832 if (rec->flag_block_full_backref != FLAG_UNSET &&
5833 rec->flag_block_full_backref != 0)
5834 rec->bad_full_backref = 1;
5835 return 0;
5836 full_backref:
5837 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5838 if (rec->flag_block_full_backref != FLAG_UNSET &&
5839 rec->flag_block_full_backref != 1)
5840 rec->bad_full_backref = 1;
5841 return 0;
5844 static void report_mismatch_key_root(u8 key_type, u64 rootid)
5846 fprintf(stderr, "Invalid key type(");
5847 print_key_type(stderr, 0, key_type);
5848 fprintf(stderr, ") found in root(");
5849 print_objectid(stderr, rootid, 0);
5850 fprintf(stderr, ")\n");
5854 * Check if the key is valid with its extent buffer.
5856 * This is a early check in case invalid key exists in a extent buffer
5857 * This is not comprehensive yet, but should prevent wrong key/item passed
5858 * further
5860 static int check_type_with_root(u64 rootid, u8 key_type)
5862 switch (key_type) {
5863 /* Only valid in chunk tree */
5864 case BTRFS_DEV_ITEM_KEY:
5865 case BTRFS_CHUNK_ITEM_KEY:
5866 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
5867 goto err;
5868 break;
5869 /* valid in csum and log tree */
5870 case BTRFS_CSUM_TREE_OBJECTID:
5871 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
5872 is_fstree(rootid)))
5873 goto err;
5874 break;
5875 case BTRFS_EXTENT_ITEM_KEY:
5876 case BTRFS_METADATA_ITEM_KEY:
5877 case BTRFS_BLOCK_GROUP_ITEM_KEY:
5878 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
5879 goto err;
5880 break;
5881 case BTRFS_ROOT_ITEM_KEY:
5882 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
5883 goto err;
5884 break;
5885 case BTRFS_DEV_EXTENT_KEY:
5886 if (rootid != BTRFS_DEV_TREE_OBJECTID)
5887 goto err;
5888 break;
5890 return 0;
5891 err:
5892 report_mismatch_key_root(key_type, rootid);
5893 return -EINVAL;
5896 static int run_next_block(struct btrfs_root *root,
5897 struct block_info *bits,
5898 int bits_nr,
5899 u64 *last,
5900 struct cache_tree *pending,
5901 struct cache_tree *seen,
5902 struct cache_tree *reada,
5903 struct cache_tree *nodes,
5904 struct cache_tree *extent_cache,
5905 struct cache_tree *chunk_cache,
5906 struct rb_root *dev_cache,
5907 struct block_group_tree *block_group_cache,
5908 struct device_extent_tree *dev_extent_cache,
5909 struct root_item_record *ri)
5911 struct btrfs_fs_info *fs_info = root->fs_info;
5912 struct extent_buffer *buf;
5913 struct extent_record *rec = NULL;
5914 u64 bytenr;
5915 u32 size;
5916 u64 parent;
5917 u64 owner;
5918 u64 flags;
5919 u64 ptr;
5920 u64 gen = 0;
5921 int ret = 0;
5922 int i;
5923 int nritems;
5924 struct btrfs_key key;
5925 struct cache_extent *cache;
5926 int reada_bits;
5928 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5929 bits_nr, &reada_bits);
5930 if (nritems == 0)
5931 return 1;
5933 if (!reada_bits) {
5934 for (i = 0; i < nritems; i++) {
5935 ret = add_cache_extent(reada, bits[i].start,
5936 bits[i].size);
5937 if (ret == -EEXIST)
5938 continue;
5940 /* fixme, get the parent transid */
5941 readahead_tree_block(fs_info, bits[i].start, 0);
5944 *last = bits[0].start;
5945 bytenr = bits[0].start;
5946 size = bits[0].size;
5948 cache = lookup_cache_extent(pending, bytenr, size);
5949 if (cache) {
5950 remove_cache_extent(pending, cache);
5951 free(cache);
5953 cache = lookup_cache_extent(reada, bytenr, size);
5954 if (cache) {
5955 remove_cache_extent(reada, cache);
5956 free(cache);
5958 cache = lookup_cache_extent(nodes, bytenr, size);
5959 if (cache) {
5960 remove_cache_extent(nodes, cache);
5961 free(cache);
5963 cache = lookup_cache_extent(extent_cache, bytenr, size);
5964 if (cache) {
5965 rec = container_of(cache, struct extent_record, cache);
5966 gen = rec->parent_generation;
5969 /* fixme, get the real parent transid */
5970 buf = read_tree_block(root->fs_info, bytenr, gen);
5971 if (!extent_buffer_uptodate(buf)) {
5972 record_bad_block_io(root->fs_info,
5973 extent_cache, bytenr, size);
5974 goto out;
5977 nritems = btrfs_header_nritems(buf);
5979 flags = 0;
5980 if (!init_extent_tree) {
5981 ret = btrfs_lookup_extent_info(NULL, fs_info, bytenr,
5982 btrfs_header_level(buf), 1, NULL,
5983 &flags);
5984 if (ret < 0) {
5985 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
5986 if (ret < 0) {
5987 fprintf(stderr, "Couldn't calc extent flags\n");
5988 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5991 } else {
5992 flags = 0;
5993 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
5994 if (ret < 0) {
5995 fprintf(stderr, "Couldn't calc extent flags\n");
5996 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6000 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6001 if (ri != NULL &&
6002 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6003 ri->objectid == btrfs_header_owner(buf)) {
6005 * Ok we got to this block from it's original owner and
6006 * we have FULL_BACKREF set. Relocation can leave
6007 * converted blocks over so this is altogether possible,
6008 * however it's not possible if the generation > the
6009 * last snapshot, so check for this case.
6011 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6012 btrfs_header_generation(buf) > ri->last_snapshot) {
6013 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6014 rec->bad_full_backref = 1;
6017 } else {
6018 if (ri != NULL &&
6019 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6020 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6021 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6022 rec->bad_full_backref = 1;
6026 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6027 rec->flag_block_full_backref = 1;
6028 parent = bytenr;
6029 owner = 0;
6030 } else {
6031 rec->flag_block_full_backref = 0;
6032 parent = 0;
6033 owner = btrfs_header_owner(buf);
6036 ret = check_block(root, extent_cache, buf, flags);
6037 if (ret)
6038 goto out;
6040 if (btrfs_is_leaf(buf)) {
6041 btree_space_waste += btrfs_leaf_free_space(buf);
6042 for (i = 0; i < nritems; i++) {
6043 struct btrfs_file_extent_item *fi;
6045 btrfs_item_key_to_cpu(buf, &key, i);
6047 * Check key type against the leaf owner.
6048 * Could filter quite a lot of early error if
6049 * owner is correct
6051 if (check_type_with_root(btrfs_header_owner(buf),
6052 key.type)) {
6053 fprintf(stderr, "ignoring invalid key\n");
6054 continue;
6056 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6057 process_extent_item(root, extent_cache, buf,
6059 continue;
6061 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6062 process_extent_item(root, extent_cache, buf,
6064 continue;
6066 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6067 total_csum_bytes +=
6068 btrfs_item_size_nr(buf, i);
6069 continue;
6071 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6072 process_chunk_item(chunk_cache, &key, buf, i);
6073 continue;
6075 if (key.type == BTRFS_DEV_ITEM_KEY) {
6076 process_device_item(dev_cache, &key, buf, i);
6077 continue;
6079 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6080 process_block_group_item(block_group_cache,
6081 &key, buf, i);
6082 continue;
6084 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6085 process_device_extent_item(dev_extent_cache,
6086 &key, buf, i);
6087 continue;
6090 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6091 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6092 process_extent_ref_v0(extent_cache, buf, i);
6093 #else
6094 BUG();
6095 #endif
6096 continue;
6099 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6100 ret = add_tree_backref(extent_cache,
6101 key.objectid, 0, key.offset, 0);
6102 if (ret < 0)
6103 error(
6104 "add_tree_backref failed (leaf tree block): %s",
6105 strerror(-ret));
6106 continue;
6108 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6109 ret = add_tree_backref(extent_cache,
6110 key.objectid, key.offset, 0, 0);
6111 if (ret < 0)
6112 error(
6113 "add_tree_backref failed (leaf shared block): %s",
6114 strerror(-ret));
6115 continue;
6117 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6118 struct btrfs_extent_data_ref *ref;
6120 ref = btrfs_item_ptr(buf, i,
6121 struct btrfs_extent_data_ref);
6122 add_data_backref(extent_cache,
6123 key.objectid, 0,
6124 btrfs_extent_data_ref_root(buf, ref),
6125 btrfs_extent_data_ref_objectid(buf,
6126 ref),
6127 btrfs_extent_data_ref_offset(buf, ref),
6128 btrfs_extent_data_ref_count(buf, ref),
6129 0, root->fs_info->sectorsize);
6130 continue;
6132 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6133 struct btrfs_shared_data_ref *ref;
6135 ref = btrfs_item_ptr(buf, i,
6136 struct btrfs_shared_data_ref);
6137 add_data_backref(extent_cache,
6138 key.objectid, key.offset, 0, 0, 0,
6139 btrfs_shared_data_ref_count(buf, ref),
6140 0, root->fs_info->sectorsize);
6141 continue;
6143 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6144 struct bad_item *bad;
6146 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6147 continue;
6148 if (!owner)
6149 continue;
6150 bad = malloc(sizeof(struct bad_item));
6151 if (!bad)
6152 continue;
6153 INIT_LIST_HEAD(&bad->list);
6154 memcpy(&bad->key, &key,
6155 sizeof(struct btrfs_key));
6156 bad->root_id = owner;
6157 list_add_tail(&bad->list, &delete_items);
6158 continue;
6160 if (key.type != BTRFS_EXTENT_DATA_KEY)
6161 continue;
6162 fi = btrfs_item_ptr(buf, i,
6163 struct btrfs_file_extent_item);
6164 if (btrfs_file_extent_type(buf, fi) ==
6165 BTRFS_FILE_EXTENT_INLINE)
6166 continue;
6167 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6168 continue;
6170 data_bytes_allocated +=
6171 btrfs_file_extent_disk_num_bytes(buf, fi);
6172 if (data_bytes_allocated < root->fs_info->sectorsize)
6173 abort();
6175 data_bytes_referenced +=
6176 btrfs_file_extent_num_bytes(buf, fi);
6177 add_data_backref(extent_cache,
6178 btrfs_file_extent_disk_bytenr(buf, fi),
6179 parent, owner, key.objectid, key.offset -
6180 btrfs_file_extent_offset(buf, fi), 1, 1,
6181 btrfs_file_extent_disk_num_bytes(buf, fi));
6183 } else {
6184 int level;
6186 level = btrfs_header_level(buf);
6187 for (i = 0; i < nritems; i++) {
6188 struct extent_record tmpl;
6190 ptr = btrfs_node_blockptr(buf, i);
6191 size = root->fs_info->nodesize;
6192 btrfs_node_key_to_cpu(buf, &key, i);
6193 if (ri != NULL) {
6194 if ((level == ri->drop_level)
6195 && is_dropped_key(&key, &ri->drop_key)) {
6196 continue;
6200 memset(&tmpl, 0, sizeof(tmpl));
6201 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6202 tmpl.parent_generation =
6203 btrfs_node_ptr_generation(buf, i);
6204 tmpl.start = ptr;
6205 tmpl.nr = size;
6206 tmpl.refs = 1;
6207 tmpl.metadata = 1;
6208 tmpl.max_size = size;
6209 ret = add_extent_rec(extent_cache, &tmpl);
6210 if (ret < 0)
6211 goto out;
6213 ret = add_tree_backref(extent_cache, ptr, parent,
6214 owner, 1);
6215 if (ret < 0) {
6216 error(
6217 "add_tree_backref failed (non-leaf block): %s",
6218 strerror(-ret));
6219 continue;
6222 if (level > 1)
6223 add_pending(nodes, seen, ptr, size);
6224 else
6225 add_pending(pending, seen, ptr, size);
6227 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(fs_info) -
6228 nritems) * sizeof(struct btrfs_key_ptr);
6230 total_btree_bytes += buf->len;
6231 if (fs_root_objectid(btrfs_header_owner(buf)))
6232 total_fs_tree_bytes += buf->len;
6233 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6234 total_extent_tree_bytes += buf->len;
6235 out:
6236 free_extent_buffer(buf);
6237 return ret;
6240 static int add_root_to_pending(struct extent_buffer *buf,
6241 struct cache_tree *extent_cache,
6242 struct cache_tree *pending,
6243 struct cache_tree *seen,
6244 struct cache_tree *nodes,
6245 u64 objectid)
6247 struct extent_record tmpl;
6248 int ret;
6250 if (btrfs_header_level(buf) > 0)
6251 add_pending(nodes, seen, buf->start, buf->len);
6252 else
6253 add_pending(pending, seen, buf->start, buf->len);
6255 memset(&tmpl, 0, sizeof(tmpl));
6256 tmpl.start = buf->start;
6257 tmpl.nr = buf->len;
6258 tmpl.is_root = 1;
6259 tmpl.refs = 1;
6260 tmpl.metadata = 1;
6261 tmpl.max_size = buf->len;
6262 add_extent_rec(extent_cache, &tmpl);
6264 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6265 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6266 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6267 0, 1);
6268 else
6269 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6271 return ret;
6274 /* as we fix the tree, we might be deleting blocks that
6275 * we're tracking for repair. This hook makes sure we
6276 * remove any backrefs for blocks as we are fixing them.
6278 static int free_extent_hook(struct btrfs_trans_handle *trans,
6279 struct btrfs_root *root,
6280 u64 bytenr, u64 num_bytes, u64 parent,
6281 u64 root_objectid, u64 owner, u64 offset,
6282 int refs_to_drop)
6284 struct extent_record *rec;
6285 struct cache_extent *cache;
6286 int is_data;
6287 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6289 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6290 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6291 if (!cache)
6292 return 0;
6294 rec = container_of(cache, struct extent_record, cache);
6295 if (is_data) {
6296 struct data_backref *back;
6298 back = find_data_backref(rec, parent, root_objectid, owner,
6299 offset, 1, bytenr, num_bytes);
6300 if (!back)
6301 goto out;
6302 if (back->node.found_ref) {
6303 back->found_ref -= refs_to_drop;
6304 if (rec->refs)
6305 rec->refs -= refs_to_drop;
6307 if (back->node.found_extent_tree) {
6308 back->num_refs -= refs_to_drop;
6309 if (rec->extent_item_refs)
6310 rec->extent_item_refs -= refs_to_drop;
6312 if (back->found_ref == 0)
6313 back->node.found_ref = 0;
6314 if (back->num_refs == 0)
6315 back->node.found_extent_tree = 0;
6317 if (!back->node.found_extent_tree && back->node.found_ref) {
6318 rb_erase(&back->node.node, &rec->backref_tree);
6319 free(back);
6321 } else {
6322 struct tree_backref *back;
6324 back = find_tree_backref(rec, parent, root_objectid);
6325 if (!back)
6326 goto out;
6327 if (back->node.found_ref) {
6328 if (rec->refs)
6329 rec->refs--;
6330 back->node.found_ref = 0;
6332 if (back->node.found_extent_tree) {
6333 if (rec->extent_item_refs)
6334 rec->extent_item_refs--;
6335 back->node.found_extent_tree = 0;
6337 if (!back->node.found_extent_tree && back->node.found_ref) {
6338 rb_erase(&back->node.node, &rec->backref_tree);
6339 free(back);
6342 maybe_free_extent_rec(extent_cache, rec);
6343 out:
6344 return 0;
6347 static int delete_extent_records(struct btrfs_trans_handle *trans,
6348 struct btrfs_path *path,
6349 u64 bytenr)
6351 struct btrfs_fs_info *fs_info = trans->fs_info;
6352 struct btrfs_key key;
6353 struct btrfs_key found_key;
6354 struct extent_buffer *leaf;
6355 int ret;
6356 int slot;
6359 key.objectid = bytenr;
6360 key.type = (u8)-1;
6361 key.offset = (u64)-1;
6363 while (1) {
6364 ret = btrfs_search_slot(trans, fs_info->extent_root, &key,
6365 path, 0, 1);
6366 if (ret < 0)
6367 break;
6369 if (ret > 0) {
6370 ret = 0;
6371 if (path->slots[0] == 0)
6372 break;
6373 path->slots[0]--;
6375 ret = 0;
6377 leaf = path->nodes[0];
6378 slot = path->slots[0];
6380 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6381 if (found_key.objectid != bytenr)
6382 break;
6384 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6385 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6386 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6387 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6388 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6389 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6390 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6391 btrfs_release_path(path);
6392 if (found_key.type == 0) {
6393 if (found_key.offset == 0)
6394 break;
6395 key.offset = found_key.offset - 1;
6396 key.type = found_key.type;
6398 key.type = found_key.type - 1;
6399 key.offset = (u64)-1;
6400 continue;
6403 fprintf(stderr,
6404 "repair deleting extent record: key [%llu,%u,%llu]\n",
6405 found_key.objectid, found_key.type, found_key.offset);
6407 ret = btrfs_del_item(trans, fs_info->extent_root, path);
6408 if (ret)
6409 break;
6410 btrfs_release_path(path);
6412 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6413 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6414 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6415 found_key.offset : fs_info->nodesize;
6417 ret = btrfs_update_block_group(fs_info->extent_root,
6418 bytenr, bytes, 0, 0);
6419 if (ret)
6420 break;
6424 btrfs_release_path(path);
6425 return ret;
6429 * for a single backref, this will allocate a new extent
6430 * and add the backref to it.
6432 static int record_extent(struct btrfs_trans_handle *trans,
6433 struct btrfs_fs_info *info,
6434 struct btrfs_path *path,
6435 struct extent_record *rec,
6436 struct extent_backref *back,
6437 int allocated, u64 flags)
6439 int ret = 0;
6440 struct btrfs_root *extent_root = info->extent_root;
6441 struct extent_buffer *leaf;
6442 struct btrfs_key ins_key;
6443 struct btrfs_extent_item *ei;
6444 struct data_backref *dback;
6445 struct btrfs_tree_block_info *bi;
6447 if (!back->is_data)
6448 rec->max_size = max_t(u64, rec->max_size,
6449 info->nodesize);
6451 if (!allocated) {
6452 u32 item_size = sizeof(*ei);
6454 if (!back->is_data)
6455 item_size += sizeof(*bi);
6457 ins_key.objectid = rec->start;
6458 ins_key.offset = rec->max_size;
6459 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6461 ret = btrfs_insert_empty_item(trans, extent_root, path,
6462 &ins_key, item_size);
6463 if (ret)
6464 goto fail;
6466 leaf = path->nodes[0];
6467 ei = btrfs_item_ptr(leaf, path->slots[0],
6468 struct btrfs_extent_item);
6470 btrfs_set_extent_refs(leaf, ei, 0);
6471 btrfs_set_extent_generation(leaf, ei, rec->generation);
6473 if (back->is_data) {
6474 btrfs_set_extent_flags(leaf, ei,
6475 BTRFS_EXTENT_FLAG_DATA);
6476 } else {
6477 struct btrfs_disk_key copy_key;
6479 bi = (struct btrfs_tree_block_info *)(ei + 1);
6480 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6481 sizeof(*bi));
6483 btrfs_set_disk_key_objectid(&copy_key,
6484 rec->info_objectid);
6485 btrfs_set_disk_key_type(&copy_key, 0);
6486 btrfs_set_disk_key_offset(&copy_key, 0);
6488 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6489 btrfs_set_tree_block_key(leaf, bi, &copy_key);
6491 btrfs_set_extent_flags(leaf, ei,
6492 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
6495 btrfs_mark_buffer_dirty(leaf);
6496 ret = btrfs_update_block_group(extent_root, rec->start,
6497 rec->max_size, 1, 0);
6498 if (ret)
6499 goto fail;
6500 btrfs_release_path(path);
6503 if (back->is_data) {
6504 u64 parent;
6505 int i;
6507 dback = to_data_backref(back);
6508 if (back->full_backref)
6509 parent = dback->parent;
6510 else
6511 parent = 0;
6513 for (i = 0; i < dback->found_ref; i++) {
6514 /* if parent != 0, we're doing a full backref
6515 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6516 * just makes the backref allocator create a data
6517 * backref
6519 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6520 rec->start, rec->max_size,
6521 parent,
6522 dback->root,
6523 parent ?
6524 BTRFS_FIRST_FREE_OBJECTID :
6525 dback->owner,
6526 dback->offset);
6527 if (ret)
6528 break;
6530 fprintf(stderr,
6531 "adding new data backref on %llu %s %llu owner %llu offset %llu found %d\n",
6532 (unsigned long long)rec->start,
6533 back->full_backref ? "parent" : "root",
6534 back->full_backref ? (unsigned long long)parent :
6535 (unsigned long long)dback->root,
6536 (unsigned long long)dback->owner,
6537 (unsigned long long)dback->offset, dback->found_ref);
6538 } else {
6539 u64 parent;
6540 struct tree_backref *tback;
6542 tback = to_tree_backref(back);
6543 if (back->full_backref)
6544 parent = tback->parent;
6545 else
6546 parent = 0;
6548 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6549 rec->start, rec->max_size,
6550 parent, tback->root, 0, 0);
6551 fprintf(stderr,
6552 "adding new tree backref on start %llu len %llu parent %llu root %llu\n",
6553 rec->start, rec->max_size, parent, tback->root);
6555 fail:
6556 btrfs_release_path(path);
6557 return ret;
6560 static struct extent_entry *find_entry(struct list_head *entries,
6561 u64 bytenr, u64 bytes)
6563 struct extent_entry *entry = NULL;
6565 list_for_each_entry(entry, entries, list) {
6566 if (entry->bytenr == bytenr && entry->bytes == bytes)
6567 return entry;
6570 return NULL;
6573 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6575 struct extent_entry *entry, *best = NULL, *prev = NULL;
6577 list_for_each_entry(entry, entries, list) {
6579 * If there are as many broken entries as entries then we know
6580 * not to trust this particular entry.
6582 if (entry->broken == entry->count)
6583 continue;
6586 * Special case, when there are only two entries and 'best' is
6587 * the first one
6589 if (!prev) {
6590 best = entry;
6591 prev = entry;
6592 continue;
6596 * If our current entry == best then we can't be sure our best
6597 * is really the best, so we need to keep searching.
6599 if (best && best->count == entry->count) {
6600 prev = entry;
6601 best = NULL;
6602 continue;
6605 /* Prev == entry, not good enough, have to keep searching */
6606 if (!prev->broken && prev->count == entry->count)
6607 continue;
6609 if (!best)
6610 best = (prev->count > entry->count) ? prev : entry;
6611 else if (best->count < entry->count)
6612 best = entry;
6613 prev = entry;
6616 return best;
6619 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6620 struct data_backref *dback, struct extent_entry *entry)
6622 struct btrfs_trans_handle *trans;
6623 struct btrfs_root *root;
6624 struct btrfs_file_extent_item *fi;
6625 struct extent_buffer *leaf;
6626 struct btrfs_key key;
6627 u64 bytenr, bytes;
6628 int ret, err;
6630 key.objectid = dback->root;
6631 key.type = BTRFS_ROOT_ITEM_KEY;
6632 key.offset = (u64)-1;
6633 root = btrfs_read_fs_root(info, &key);
6634 if (IS_ERR(root)) {
6635 fprintf(stderr, "Couldn't find root for our ref\n");
6636 return -EINVAL;
6640 * The backref points to the original offset of the extent if it was
6641 * split, so we need to search down to the offset we have and then walk
6642 * forward until we find the backref we're looking for.
6644 key.objectid = dback->owner;
6645 key.type = BTRFS_EXTENT_DATA_KEY;
6646 key.offset = dback->offset;
6647 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6648 if (ret < 0) {
6649 fprintf(stderr, "Error looking up ref %d\n", ret);
6650 return ret;
6653 while (1) {
6654 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6655 ret = btrfs_next_leaf(root, path);
6656 if (ret) {
6657 fprintf(stderr, "Couldn't find our ref, next\n");
6658 return -EINVAL;
6661 leaf = path->nodes[0];
6662 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6663 if (key.objectid != dback->owner ||
6664 key.type != BTRFS_EXTENT_DATA_KEY) {
6665 fprintf(stderr, "Couldn't find our ref, search\n");
6666 return -EINVAL;
6668 fi = btrfs_item_ptr(leaf, path->slots[0],
6669 struct btrfs_file_extent_item);
6670 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6671 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6673 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6674 break;
6675 path->slots[0]++;
6678 btrfs_release_path(path);
6680 trans = btrfs_start_transaction(root, 1);
6681 if (IS_ERR(trans))
6682 return PTR_ERR(trans);
6685 * Ok we have the key of the file extent we want to fix, now we can cow
6686 * down to the thing and fix it.
6688 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6689 if (ret < 0) {
6690 fprintf(stderr, "error cowing down to ref [%llu,%u,%llu]: %d\n",
6691 key.objectid, key.type, key.offset, ret);
6692 goto out;
6694 if (ret > 0) {
6695 fprintf(stderr,
6696 "well that's odd, we just found this key [%llu,%u,%llu]\n",
6697 key.objectid, key.type, key.offset);
6698 ret = -EINVAL;
6699 goto out;
6701 leaf = path->nodes[0];
6702 fi = btrfs_item_ptr(leaf, path->slots[0],
6703 struct btrfs_file_extent_item);
6705 if (btrfs_file_extent_compression(leaf, fi) &&
6706 dback->disk_bytenr != entry->bytenr) {
6707 fprintf(stderr,
6708 "ref doesn't match the record start and is compressed, please take a btrfs-image of this file system and send it to a btrfs developer so they can complete this functionality for bytenr %llu\n",
6709 dback->disk_bytenr);
6710 ret = -EINVAL;
6711 goto out;
6714 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6715 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6716 } else if (dback->disk_bytenr > entry->bytenr) {
6717 u64 off_diff, offset;
6719 off_diff = dback->disk_bytenr - entry->bytenr;
6720 offset = btrfs_file_extent_offset(leaf, fi);
6721 if (dback->disk_bytenr + offset +
6722 btrfs_file_extent_num_bytes(leaf, fi) >
6723 entry->bytenr + entry->bytes) {
6724 fprintf(stderr,
6725 "ref is past the entry end, please take a btrfs-image of this file system and send it to a btrfs developer, ref %llu\n",
6726 dback->disk_bytenr);
6727 ret = -EINVAL;
6728 goto out;
6730 offset += off_diff;
6731 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6732 btrfs_set_file_extent_offset(leaf, fi, offset);
6733 } else if (dback->disk_bytenr < entry->bytenr) {
6734 u64 offset;
6736 offset = btrfs_file_extent_offset(leaf, fi);
6737 if (dback->disk_bytenr + offset < entry->bytenr) {
6738 fprintf(stderr,
6739 "ref is before the entry start, please take a btrfs-image of this file system and send it to a btrfs developer, ref %llu\n",
6740 dback->disk_bytenr);
6741 ret = -EINVAL;
6742 goto out;
6745 offset += dback->disk_bytenr;
6746 offset -= entry->bytenr;
6747 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6748 btrfs_set_file_extent_offset(leaf, fi, offset);
6751 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6754 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6755 * only do this if we aren't using compression, otherwise it's a
6756 * trickier case.
6758 if (!btrfs_file_extent_compression(leaf, fi))
6759 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6760 else
6761 printf("ram bytes may be wrong?\n");
6762 btrfs_mark_buffer_dirty(leaf);
6763 out:
6764 err = btrfs_commit_transaction(trans, root);
6765 btrfs_release_path(path);
6766 return ret ? ret : err;
6769 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6770 struct extent_record *rec)
6772 struct extent_backref *back, *tmp;
6773 struct data_backref *dback;
6774 struct extent_entry *entry, *best = NULL;
6775 LIST_HEAD(entries);
6776 int nr_entries = 0;
6777 int broken_entries = 0;
6778 int ret = 0;
6779 short mismatch = 0;
6782 * Metadata is easy and the backrefs should always agree on bytenr and
6783 * size, if not we've got bigger issues.
6785 if (rec->metadata)
6786 return 0;
6788 rbtree_postorder_for_each_entry_safe(back, tmp,
6789 &rec->backref_tree, node) {
6790 if (back->full_backref || !back->is_data)
6791 continue;
6793 dback = to_data_backref(back);
6796 * We only pay attention to backrefs that we found a real
6797 * backref for.
6799 if (dback->found_ref == 0)
6800 continue;
6803 * For now we only catch when the bytes don't match, not the
6804 * bytenr. We can easily do this at the same time, but I want
6805 * to have a fs image to test on before we just add repair
6806 * functionality willy-nilly so we know we won't screw up the
6807 * repair.
6810 entry = find_entry(&entries, dback->disk_bytenr,
6811 dback->bytes);
6812 if (!entry) {
6813 entry = malloc(sizeof(struct extent_entry));
6814 if (!entry) {
6815 ret = -ENOMEM;
6816 goto out;
6818 memset(entry, 0, sizeof(*entry));
6819 entry->bytenr = dback->disk_bytenr;
6820 entry->bytes = dback->bytes;
6821 list_add_tail(&entry->list, &entries);
6822 nr_entries++;
6826 * If we only have on entry we may think the entries agree when
6827 * in reality they don't so we have to do some extra checking.
6829 if (dback->disk_bytenr != rec->start ||
6830 dback->bytes != rec->nr || back->broken)
6831 mismatch = 1;
6833 if (back->broken) {
6834 entry->broken++;
6835 broken_entries++;
6838 entry->count++;
6841 /* Yay all the backrefs agree, carry on good sir */
6842 if (nr_entries <= 1 && !mismatch)
6843 goto out;
6845 fprintf(stderr,
6846 "attempting to repair backref discrepency for bytenr %llu\n",
6847 rec->start);
6850 * First we want to see if the backrefs can agree amongst themselves who
6851 * is right, so figure out which one of the entries has the highest
6852 * count.
6854 best = find_most_right_entry(&entries);
6857 * Ok so we may have an even split between what the backrefs think, so
6858 * this is where we use the extent ref to see what it thinks.
6860 if (!best) {
6861 entry = find_entry(&entries, rec->start, rec->nr);
6862 if (!entry && (!broken_entries || !rec->found_rec)) {
6863 fprintf(stderr,
6864 "backrefs don't agree with each other and extent record doesn't agree with anybody, so we can't fix bytenr %llu bytes %llu\n",
6865 rec->start, rec->nr);
6866 ret = -EINVAL;
6867 goto out;
6868 } else if (!entry) {
6870 * Ok our backrefs were broken, we'll assume this is the
6871 * correct value and add an entry for this range.
6873 entry = malloc(sizeof(struct extent_entry));
6874 if (!entry) {
6875 ret = -ENOMEM;
6876 goto out;
6878 memset(entry, 0, sizeof(*entry));
6879 entry->bytenr = rec->start;
6880 entry->bytes = rec->nr;
6881 list_add_tail(&entry->list, &entries);
6882 nr_entries++;
6884 entry->count++;
6885 best = find_most_right_entry(&entries);
6886 if (!best) {
6887 fprintf(stderr,
6888 "backrefs and extent record evenly split on who is right, this is going to require user input to fix bytenr %llu bytes %llu\n",
6889 rec->start, rec->nr);
6890 ret = -EINVAL;
6891 goto out;
6896 * I don't think this can happen currently as we'll abort() if we catch
6897 * this case higher up, but in case somebody removes that we still can't
6898 * deal with it properly here yet, so just bail out of that's the case.
6900 if (best->bytenr != rec->start) {
6901 fprintf(stderr,
6902 "extent start and backref starts don't match, please use btrfs-image on this file system and send it to a btrfs developer so they can make fsck fix this particular case. bytenr is %llu, bytes is %llu\n",
6903 rec->start, rec->nr);
6904 ret = -EINVAL;
6905 goto out;
6909 * Ok great we all agreed on an extent record, let's go find the real
6910 * references and fix up the ones that don't match.
6912 rbtree_postorder_for_each_entry_safe(back, tmp,
6913 &rec->backref_tree, node) {
6914 if (back->full_backref || !back->is_data)
6915 continue;
6917 dback = to_data_backref(back);
6920 * Still ignoring backrefs that don't have a real ref attached
6921 * to them.
6923 if (dback->found_ref == 0)
6924 continue;
6926 if (dback->bytes == best->bytes &&
6927 dback->disk_bytenr == best->bytenr)
6928 continue;
6930 ret = repair_ref(info, path, dback, best);
6931 if (ret)
6932 goto out;
6936 * Ok we messed with the actual refs, which means we need to drop our
6937 * entire cache and go back and rescan. I know this is a huge pain and
6938 * adds a lot of extra work, but it's the only way to be safe. Once all
6939 * the backrefs agree we may not need to do anything to the extent
6940 * record itself.
6942 ret = -EAGAIN;
6943 out:
6944 while (!list_empty(&entries)) {
6945 entry = list_entry(entries.next, struct extent_entry, list);
6946 list_del_init(&entry->list);
6947 free(entry);
6949 return ret;
6952 static int process_duplicates(struct cache_tree *extent_cache,
6953 struct extent_record *rec)
6955 struct extent_record *good, *tmp;
6956 struct cache_extent *cache;
6957 int ret;
6960 * If we found a extent record for this extent then return, or if we
6961 * have more than one duplicate we are likely going to need to delete
6962 * something.
6964 if (rec->found_rec || rec->num_duplicates > 1)
6965 return 0;
6967 /* Shouldn't happen but just in case */
6968 BUG_ON(!rec->num_duplicates);
6971 * So this happens if we end up with a backref that doesn't match the
6972 * actual extent entry. So either the backref is bad or the extent
6973 * entry is bad. Either way we want to have the extent_record actually
6974 * reflect what we found in the extent_tree, so we need to take the
6975 * duplicate out and use that as the extent_record since the only way we
6976 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6978 remove_cache_extent(extent_cache, &rec->cache);
6980 good = to_extent_record(rec->dups.next);
6981 list_del_init(&good->list);
6982 INIT_LIST_HEAD(&good->backrefs);
6983 INIT_LIST_HEAD(&good->dups);
6984 good->cache.start = good->start;
6985 good->cache.size = good->nr;
6986 good->content_checked = 0;
6987 good->owner_ref_checked = 0;
6988 good->num_duplicates = 0;
6989 good->refs = rec->refs;
6990 list_splice_init(&rec->backrefs, &good->backrefs);
6991 while (1) {
6992 cache = lookup_cache_extent(extent_cache, good->start,
6993 good->nr);
6994 if (!cache)
6995 break;
6996 tmp = container_of(cache, struct extent_record, cache);
6999 * If we find another overlapping extent and it's found_rec is
7000 * set then it's a duplicate and we need to try and delete
7001 * something.
7003 if (tmp->found_rec || tmp->num_duplicates > 0) {
7004 if (list_empty(&good->list))
7005 list_add_tail(&good->list,
7006 &duplicate_extents);
7007 good->num_duplicates += tmp->num_duplicates + 1;
7008 list_splice_init(&tmp->dups, &good->dups);
7009 list_del_init(&tmp->list);
7010 list_add_tail(&tmp->list, &good->dups);
7011 remove_cache_extent(extent_cache, &tmp->cache);
7012 continue;
7016 * Ok we have another non extent item backed extent rec, so lets
7017 * just add it to this extent and carry on like we did above.
7019 good->refs += tmp->refs;
7020 list_splice_init(&tmp->backrefs, &good->backrefs);
7021 remove_cache_extent(extent_cache, &tmp->cache);
7022 free(tmp);
7024 ret = insert_cache_extent(extent_cache, &good->cache);
7025 BUG_ON(ret);
7026 free(rec);
7027 return good->num_duplicates ? 0 : 1;
7030 static int delete_duplicate_records(struct btrfs_root *root,
7031 struct extent_record *rec)
7033 struct btrfs_trans_handle *trans;
7034 LIST_HEAD(delete_list);
7035 struct btrfs_path path;
7036 struct extent_record *tmp, *good, *n;
7037 int nr_del = 0;
7038 int ret = 0, err;
7039 struct btrfs_key key;
7041 btrfs_init_path(&path);
7043 good = rec;
7044 /* Find the record that covers all of the duplicates. */
7045 list_for_each_entry(tmp, &rec->dups, list) {
7046 if (good->start < tmp->start)
7047 continue;
7048 if (good->nr > tmp->nr)
7049 continue;
7051 if (tmp->start + tmp->nr < good->start + good->nr) {
7052 fprintf(stderr,
7053 "Ok we have overlapping extents that aren't completely covered by each other, this is going to require more careful thought. The extents are [%llu-%llu] and [%llu-%llu]\n",
7054 tmp->start, tmp->nr, good->start, good->nr);
7055 abort();
7057 good = tmp;
7060 if (good != rec)
7061 list_add_tail(&rec->list, &delete_list);
7063 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7064 if (tmp == good)
7065 continue;
7066 list_move_tail(&tmp->list, &delete_list);
7069 root = root->fs_info->extent_root;
7070 trans = btrfs_start_transaction(root, 1);
7071 if (IS_ERR(trans)) {
7072 ret = PTR_ERR(trans);
7073 goto out;
7076 list_for_each_entry(tmp, &delete_list, list) {
7077 if (tmp->found_rec == 0)
7078 continue;
7079 key.objectid = tmp->start;
7080 key.type = BTRFS_EXTENT_ITEM_KEY;
7081 key.offset = tmp->nr;
7083 /* Shouldn't happen but just in case */
7084 if (tmp->metadata) {
7085 fprintf(stderr,
7086 "well this shouldn't happen, extent record overlaps but is metadata? [%llu, %llu]\n",
7087 tmp->start, tmp->nr);
7088 abort();
7091 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
7092 if (ret) {
7093 if (ret > 0)
7094 ret = -EINVAL;
7095 break;
7097 ret = btrfs_del_item(trans, root, &path);
7098 if (ret)
7099 break;
7100 btrfs_release_path(&path);
7101 nr_del++;
7103 err = btrfs_commit_transaction(trans, root);
7104 if (err && !ret)
7105 ret = err;
7106 out:
7107 while (!list_empty(&delete_list)) {
7108 tmp = to_extent_record(delete_list.next);
7109 list_del_init(&tmp->list);
7110 if (tmp == rec)
7111 continue;
7112 free(tmp);
7115 while (!list_empty(&rec->dups)) {
7116 tmp = to_extent_record(rec->dups.next);
7117 list_del_init(&tmp->list);
7118 free(tmp);
7121 btrfs_release_path(&path);
7123 if (!ret && !nr_del)
7124 rec->num_duplicates = 0;
7126 return ret ? ret : nr_del;
7129 static int find_possible_backrefs(struct btrfs_fs_info *info,
7130 struct btrfs_path *path,
7131 struct cache_tree *extent_cache,
7132 struct extent_record *rec)
7134 struct btrfs_root *root;
7135 struct extent_backref *back, *tmp;
7136 struct data_backref *dback;
7137 struct cache_extent *cache;
7138 struct btrfs_file_extent_item *fi;
7139 struct btrfs_key key;
7140 u64 bytenr, bytes;
7141 int ret;
7143 rbtree_postorder_for_each_entry_safe(back, tmp,
7144 &rec->backref_tree, node) {
7145 /* Don't care about full backrefs (poor unloved backrefs) */
7146 if (back->full_backref || !back->is_data)
7147 continue;
7149 dback = to_data_backref(back);
7151 /* We found this one, we don't need to do a lookup */
7152 if (dback->found_ref)
7153 continue;
7155 key.objectid = dback->root;
7156 key.type = BTRFS_ROOT_ITEM_KEY;
7157 key.offset = (u64)-1;
7159 root = btrfs_read_fs_root(info, &key);
7161 /* No root, definitely a bad ref, skip */
7162 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7163 continue;
7164 /* Other err, exit */
7165 if (IS_ERR(root))
7166 return PTR_ERR(root);
7168 key.objectid = dback->owner;
7169 key.type = BTRFS_EXTENT_DATA_KEY;
7170 key.offset = dback->offset;
7171 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7172 if (ret) {
7173 btrfs_release_path(path);
7174 if (ret < 0)
7175 return ret;
7176 /* Didn't find it, we can carry on */
7177 ret = 0;
7178 continue;
7181 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7182 struct btrfs_file_extent_item);
7183 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7184 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7185 btrfs_release_path(path);
7186 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7187 if (cache) {
7188 struct extent_record *tmp;
7190 tmp = container_of(cache, struct extent_record, cache);
7193 * If we found an extent record for the bytenr for this
7194 * particular backref then we can't add it to our
7195 * current extent record. We only want to add backrefs
7196 * that don't have a corresponding extent item in the
7197 * extent tree since they likely belong to this record
7198 * and we need to fix it if it doesn't match bytenrs.
7200 if (tmp->found_rec)
7201 continue;
7204 dback->found_ref += 1;
7205 dback->disk_bytenr = bytenr;
7206 dback->bytes = bytes;
7209 * Set this so the verify backref code knows not to trust the
7210 * values in this backref.
7212 back->broken = 1;
7215 return 0;
7219 * Record orphan data ref into corresponding root.
7221 * Return 0 if the extent item contains data ref and recorded.
7222 * Return 1 if the extent item contains no useful data ref
7223 * On that case, it may contains only shared_dataref or metadata backref
7224 * or the file extent exists(this should be handled by the extent bytenr
7225 * recovery routine)
7226 * Return <0 if something goes wrong.
7228 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7229 struct extent_record *rec)
7231 struct btrfs_key key;
7232 struct btrfs_root *dest_root;
7233 struct extent_backref *back, *tmp;
7234 struct data_backref *dback;
7235 struct orphan_data_extent *orphan;
7236 struct btrfs_path path;
7237 int recorded_data_ref = 0;
7238 int ret = 0;
7240 if (rec->metadata)
7241 return 1;
7242 btrfs_init_path(&path);
7243 rbtree_postorder_for_each_entry_safe(back, tmp,
7244 &rec->backref_tree, node) {
7245 if (back->full_backref || !back->is_data ||
7246 !back->found_extent_tree)
7247 continue;
7248 dback = to_data_backref(back);
7249 if (dback->found_ref)
7250 continue;
7251 key.objectid = dback->root;
7252 key.type = BTRFS_ROOT_ITEM_KEY;
7253 key.offset = (u64)-1;
7255 dest_root = btrfs_read_fs_root(fs_info, &key);
7257 /* For non-exist root we just skip it */
7258 if (IS_ERR(dest_root) || !dest_root)
7259 continue;
7261 key.objectid = dback->owner;
7262 key.type = BTRFS_EXTENT_DATA_KEY;
7263 key.offset = dback->offset;
7265 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
7266 btrfs_release_path(&path);
7268 * For ret < 0, it's OK since the fs-tree may be corrupted,
7269 * we need to record it for inode/file extent rebuild.
7270 * For ret > 0, we record it only for file extent rebuild.
7271 * For ret == 0, the file extent exists but only bytenr
7272 * mismatch, let the original bytenr fix routine to handle,
7273 * don't record it.
7275 if (ret == 0)
7276 continue;
7277 ret = 0;
7278 orphan = malloc(sizeof(*orphan));
7279 if (!orphan) {
7280 ret = -ENOMEM;
7281 goto out;
7283 INIT_LIST_HEAD(&orphan->list);
7284 orphan->root = dback->root;
7285 orphan->objectid = dback->owner;
7286 orphan->offset = dback->offset;
7287 orphan->disk_bytenr = rec->cache.start;
7288 orphan->disk_len = rec->cache.size;
7289 list_add(&dest_root->orphan_data_extents, &orphan->list);
7290 recorded_data_ref = 1;
7292 out:
7293 btrfs_release_path(&path);
7294 if (!ret)
7295 return !recorded_data_ref;
7296 else
7297 return ret;
7301 * when an incorrect extent item is found, this will delete
7302 * all of the existing entries for it and recreate them
7303 * based on what the tree scan found.
7305 static int fixup_extent_refs(struct btrfs_fs_info *info,
7306 struct cache_tree *extent_cache,
7307 struct extent_record *rec)
7309 struct btrfs_trans_handle *trans = NULL;
7310 int ret;
7311 struct btrfs_path path;
7312 struct cache_extent *cache;
7313 struct extent_backref *back, *tmp;
7314 int allocated = 0;
7315 u64 flags = 0;
7317 if (rec->flag_block_full_backref)
7318 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7320 btrfs_init_path(&path);
7321 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7323 * Sometimes the backrefs themselves are so broken they don't
7324 * get attached to any meaningful rec, so first go back and
7325 * check any of our backrefs that we couldn't find and throw
7326 * them into the list if we find the backref so that
7327 * verify_backrefs can figure out what to do.
7329 ret = find_possible_backrefs(info, &path, extent_cache, rec);
7330 if (ret < 0)
7331 goto out;
7334 /* step one, make sure all of the backrefs agree */
7335 ret = verify_backrefs(info, &path, rec);
7336 if (ret < 0)
7337 goto out;
7339 trans = btrfs_start_transaction(info->extent_root, 1);
7340 if (IS_ERR(trans)) {
7341 ret = PTR_ERR(trans);
7342 goto out;
7345 /* step two, delete all the existing records */
7346 ret = delete_extent_records(trans, &path, rec->start);
7348 if (ret < 0)
7349 goto out;
7351 /* was this block corrupt? If so, don't add references to it */
7352 cache = lookup_cache_extent(info->corrupt_blocks,
7353 rec->start, rec->max_size);
7354 if (cache) {
7355 ret = 0;
7356 goto out;
7359 /* step three, recreate all the refs we did find */
7360 rbtree_postorder_for_each_entry_safe(back, tmp,
7361 &rec->backref_tree, node) {
7363 * if we didn't find any references, don't create a
7364 * new extent record
7366 if (!back->found_ref)
7367 continue;
7369 rec->bad_full_backref = 0;
7370 ret = record_extent(trans, info, &path, rec, back, allocated,
7371 flags);
7372 allocated = 1;
7374 if (ret)
7375 goto out;
7377 out:
7378 if (trans) {
7379 int err = btrfs_commit_transaction(trans, info->extent_root);
7381 if (!ret)
7382 ret = err;
7385 if (!ret)
7386 fprintf(stderr, "Repaired extent references for %llu\n",
7387 (unsigned long long)rec->start);
7389 btrfs_release_path(&path);
7390 return ret;
7393 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7394 struct extent_record *rec)
7396 struct btrfs_trans_handle *trans;
7397 struct btrfs_root *root = fs_info->extent_root;
7398 struct btrfs_path path;
7399 struct btrfs_extent_item *ei;
7400 struct btrfs_key key;
7401 u64 flags;
7402 int ret = 0;
7404 key.objectid = rec->start;
7405 if (rec->metadata) {
7406 key.type = BTRFS_METADATA_ITEM_KEY;
7407 key.offset = rec->info_level;
7408 } else {
7409 key.type = BTRFS_EXTENT_ITEM_KEY;
7410 key.offset = rec->max_size;
7413 trans = btrfs_start_transaction(root, 0);
7414 if (IS_ERR(trans))
7415 return PTR_ERR(trans);
7417 btrfs_init_path(&path);
7418 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
7419 if (ret < 0) {
7420 btrfs_release_path(&path);
7421 btrfs_commit_transaction(trans, root);
7422 return ret;
7423 } else if (ret) {
7424 fprintf(stderr, "Didn't find extent for %llu\n",
7425 (unsigned long long)rec->start);
7426 btrfs_release_path(&path);
7427 btrfs_commit_transaction(trans, root);
7428 return -ENOENT;
7431 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
7432 struct btrfs_extent_item);
7433 flags = btrfs_extent_flags(path.nodes[0], ei);
7434 if (rec->flag_block_full_backref) {
7435 fprintf(stderr, "setting full backref on %llu\n",
7436 (unsigned long long)key.objectid);
7437 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7438 } else {
7439 fprintf(stderr, "clearing full backref on %llu\n",
7440 (unsigned long long)key.objectid);
7441 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7443 btrfs_set_extent_flags(path.nodes[0], ei, flags);
7444 btrfs_mark_buffer_dirty(path.nodes[0]);
7445 btrfs_release_path(&path);
7446 ret = btrfs_commit_transaction(trans, root);
7447 if (!ret)
7448 fprintf(stderr, "Repaired extent flags for %llu\n",
7449 (unsigned long long)rec->start);
7451 return ret;
7454 /* right now we only prune from the extent allocation tree */
7455 static int prune_one_block(struct btrfs_trans_handle *trans,
7456 struct btrfs_fs_info *info,
7457 struct btrfs_corrupt_block *corrupt)
7459 int ret;
7460 struct btrfs_path path;
7461 struct extent_buffer *eb;
7462 u64 found;
7463 int slot;
7464 int nritems;
7465 int level = corrupt->level + 1;
7467 btrfs_init_path(&path);
7468 again:
7469 /* we want to stop at the parent to our busted block */
7470 path.lowest_level = level;
7472 ret = btrfs_search_slot(trans, info->extent_root,
7473 &corrupt->key, &path, -1, 1);
7475 if (ret < 0)
7476 goto out;
7478 eb = path.nodes[level];
7479 if (!eb) {
7480 ret = -ENOENT;
7481 goto out;
7485 * hopefully the search gave us the block we want to prune,
7486 * lets try that first
7488 slot = path.slots[level];
7489 found = btrfs_node_blockptr(eb, slot);
7490 if (found == corrupt->cache.start)
7491 goto del_ptr;
7493 nritems = btrfs_header_nritems(eb);
7495 /* the search failed, lets scan this node and hope we find it */
7496 for (slot = 0; slot < nritems; slot++) {
7497 found = btrfs_node_blockptr(eb, slot);
7498 if (found == corrupt->cache.start)
7499 goto del_ptr;
7502 * We couldn't find the bad block.
7503 * TODO: search all the nodes for pointers to this block
7505 if (eb == info->extent_root->node) {
7506 ret = -ENOENT;
7507 goto out;
7508 } else {
7509 level++;
7510 btrfs_release_path(&path);
7511 goto again;
7514 del_ptr:
7515 printk("deleting pointer to block %llu\n", corrupt->cache.start);
7516 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
7518 out:
7519 btrfs_release_path(&path);
7520 return ret;
7523 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7525 struct btrfs_trans_handle *trans = NULL;
7526 struct cache_extent *cache;
7527 struct btrfs_corrupt_block *corrupt;
7529 while (1) {
7530 cache = search_cache_extent(info->corrupt_blocks, 0);
7531 if (!cache)
7532 break;
7533 if (!trans) {
7534 trans = btrfs_start_transaction(info->extent_root, 1);
7535 if (IS_ERR(trans))
7536 return PTR_ERR(trans);
7538 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7539 prune_one_block(trans, info, corrupt);
7540 remove_cache_extent(info->corrupt_blocks, cache);
7542 if (trans)
7543 return btrfs_commit_transaction(trans, info->extent_root);
7544 return 0;
7547 static int check_extent_refs(struct btrfs_root *root,
7548 struct cache_tree *extent_cache)
7550 struct extent_record *rec;
7551 struct cache_extent *cache;
7552 int ret = 0;
7553 int had_dups = 0;
7554 int err = 0;
7556 if (repair) {
7558 * if we're doing a repair, we have to make sure
7559 * we don't allocate from the problem extents.
7560 * In the worst case, this will be all the
7561 * extents in the FS
7563 cache = search_cache_extent(extent_cache, 0);
7564 while (cache) {
7565 rec = container_of(cache, struct extent_record, cache);
7566 set_extent_dirty(root->fs_info->excluded_extents,
7567 rec->start,
7568 rec->start + rec->max_size - 1);
7569 cache = next_cache_extent(cache);
7572 /* pin down all the corrupted blocks too */
7573 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7574 while (cache) {
7575 set_extent_dirty(root->fs_info->excluded_extents,
7576 cache->start,
7577 cache->start + cache->size - 1);
7578 cache = next_cache_extent(cache);
7580 prune_corrupt_blocks(root->fs_info);
7581 reset_cached_block_groups(root->fs_info);
7584 reset_cached_block_groups(root->fs_info);
7587 * We need to delete any duplicate entries we find first otherwise we
7588 * could mess up the extent tree when we have backrefs that actually
7589 * belong to a different extent item and not the weird duplicate one.
7591 while (repair && !list_empty(&duplicate_extents)) {
7592 rec = to_extent_record(duplicate_extents.next);
7593 list_del_init(&rec->list);
7595 /* Sometimes we can find a backref before we find an actual
7596 * extent, so we need to process it a little bit to see if there
7597 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7598 * if this is a backref screwup. If we need to delete stuff
7599 * process_duplicates() will return 0, otherwise it will return
7600 * 1 and we
7602 if (process_duplicates(extent_cache, rec))
7603 continue;
7604 ret = delete_duplicate_records(root, rec);
7605 if (ret < 0)
7606 return ret;
7608 * delete_duplicate_records will return the number of entries
7609 * deleted, so if it's greater than 0 then we know we actually
7610 * did something and we need to remove.
7612 if (ret)
7613 had_dups = 1;
7616 if (had_dups)
7617 return -EAGAIN;
7619 while (1) {
7620 int cur_err = 0;
7621 int fix = 0;
7623 cache = search_cache_extent(extent_cache, 0);
7624 if (!cache)
7625 break;
7626 rec = container_of(cache, struct extent_record, cache);
7627 if (rec->num_duplicates) {
7628 fprintf(stderr,
7629 "extent item %llu has multiple extent items\n",
7630 (unsigned long long)rec->start);
7631 cur_err = 1;
7634 if (rec->refs != rec->extent_item_refs) {
7635 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7636 (unsigned long long)rec->start,
7637 (unsigned long long)rec->nr);
7638 fprintf(stderr, "extent item %llu, found %llu\n",
7639 (unsigned long long)rec->extent_item_refs,
7640 (unsigned long long)rec->refs);
7641 ret = record_orphan_data_extents(root->fs_info, rec);
7642 if (ret < 0)
7643 goto repair_abort;
7644 fix = ret;
7645 cur_err = 1;
7647 if (all_backpointers_checked(rec, 1)) {
7648 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7649 (unsigned long long)rec->start,
7650 (unsigned long long)rec->nr);
7651 fix = 1;
7652 cur_err = 1;
7654 if (!rec->owner_ref_checked) {
7655 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7656 (unsigned long long)rec->start,
7657 (unsigned long long)rec->nr);
7658 fix = 1;
7659 cur_err = 1;
7662 if (repair && fix) {
7663 ret = fixup_extent_refs(root->fs_info, extent_cache,
7664 rec);
7665 if (ret)
7666 goto repair_abort;
7670 if (rec->bad_full_backref) {
7671 fprintf(stderr, "bad full backref, on [%llu]\n",
7672 (unsigned long long)rec->start);
7673 if (repair) {
7674 ret = fixup_extent_flags(root->fs_info, rec);
7675 if (ret)
7676 goto repair_abort;
7677 fix = 1;
7679 cur_err = 1;
7682 * Although it's not a extent ref's problem, we reuse this
7683 * routine for error reporting.
7684 * No repair function yet.
7686 if (rec->crossing_stripes) {
7687 fprintf(stderr,
7688 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7689 rec->start, rec->start + rec->max_size);
7690 cur_err = 1;
7693 if (rec->wrong_chunk_type) {
7694 fprintf(stderr,
7695 "bad extent [%llu, %llu), type mismatch with chunk\n",
7696 rec->start, rec->start + rec->max_size);
7697 cur_err = 1;
7700 err = cur_err;
7701 remove_cache_extent(extent_cache, cache);
7702 free_all_extent_backrefs(rec);
7703 if (!init_extent_tree && repair && (!cur_err || fix))
7704 clear_extent_dirty(root->fs_info->excluded_extents,
7705 rec->start,
7706 rec->start + rec->max_size - 1);
7707 free(rec);
7709 repair_abort:
7710 if (repair) {
7711 if (ret && ret != -EAGAIN) {
7712 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7713 exit(1);
7714 } else if (!ret) {
7715 struct btrfs_trans_handle *trans;
7717 root = root->fs_info->extent_root;
7718 trans = btrfs_start_transaction(root, 1);
7719 if (IS_ERR(trans)) {
7720 ret = PTR_ERR(trans);
7721 goto repair_abort;
7724 ret = btrfs_fix_block_accounting(trans);
7725 if (ret)
7726 goto repair_abort;
7727 ret = btrfs_commit_transaction(trans, root);
7728 if (ret)
7729 goto repair_abort;
7731 return ret;
7734 if (err)
7735 err = -EIO;
7736 return err;
7740 * Check the chunk with its block group/dev list ref:
7741 * Return 0 if all refs seems valid.
7742 * Return 1 if part of refs seems valid, need later check for rebuild ref
7743 * like missing block group and needs to search extent tree to rebuild them.
7744 * Return -1 if essential refs are missing and unable to rebuild.
7746 static int check_chunk_refs(struct chunk_record *chunk_rec,
7747 struct block_group_tree *block_group_cache,
7748 struct device_extent_tree *dev_extent_cache,
7749 int silent)
7751 struct cache_extent *block_group_item;
7752 struct block_group_record *block_group_rec;
7753 struct cache_extent *dev_extent_item;
7754 struct device_extent_record *dev_extent_rec;
7755 u64 devid;
7756 u64 offset;
7757 u64 length;
7758 int metadump_v2 = 0;
7759 int i;
7760 int ret = 0;
7762 block_group_item = lookup_cache_extent(&block_group_cache->tree,
7763 chunk_rec->offset,
7764 chunk_rec->length);
7765 if (block_group_item) {
7766 block_group_rec = container_of(block_group_item,
7767 struct block_group_record,
7768 cache);
7769 if (chunk_rec->length != block_group_rec->offset ||
7770 chunk_rec->offset != block_group_rec->objectid ||
7771 (!metadump_v2 &&
7772 chunk_rec->type_flags != block_group_rec->flags)) {
7773 if (!silent)
7774 fprintf(stderr,
7775 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7776 chunk_rec->objectid,
7777 chunk_rec->type,
7778 chunk_rec->offset,
7779 chunk_rec->length,
7780 chunk_rec->offset,
7781 chunk_rec->type_flags,
7782 block_group_rec->objectid,
7783 block_group_rec->type,
7784 block_group_rec->offset,
7785 block_group_rec->offset,
7786 block_group_rec->objectid,
7787 block_group_rec->flags);
7788 ret = -1;
7789 } else {
7790 list_del_init(&block_group_rec->list);
7791 chunk_rec->bg_rec = block_group_rec;
7793 } else {
7794 if (!silent)
7795 fprintf(stderr,
7796 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7797 chunk_rec->objectid,
7798 chunk_rec->type,
7799 chunk_rec->offset,
7800 chunk_rec->length,
7801 chunk_rec->offset,
7802 chunk_rec->type_flags);
7803 ret = 1;
7806 if (metadump_v2)
7807 return ret;
7809 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7810 chunk_rec->num_stripes);
7811 for (i = 0; i < chunk_rec->num_stripes; ++i) {
7812 devid = chunk_rec->stripes[i].devid;
7813 offset = chunk_rec->stripes[i].offset;
7814 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7815 devid, offset, length);
7816 if (dev_extent_item) {
7817 dev_extent_rec = container_of(dev_extent_item,
7818 struct device_extent_record,
7819 cache);
7820 if (dev_extent_rec->objectid != devid ||
7821 dev_extent_rec->offset != offset ||
7822 dev_extent_rec->chunk_offset != chunk_rec->offset ||
7823 dev_extent_rec->length != length) {
7824 if (!silent)
7825 fprintf(stderr,
7826 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7827 chunk_rec->objectid,
7828 chunk_rec->type,
7829 chunk_rec->offset,
7830 chunk_rec->stripes[i].devid,
7831 chunk_rec->stripes[i].offset,
7832 dev_extent_rec->objectid,
7833 dev_extent_rec->offset,
7834 dev_extent_rec->length);
7835 ret = -1;
7836 } else {
7837 list_move(&dev_extent_rec->chunk_list,
7838 &chunk_rec->dextents);
7840 } else {
7841 if (!silent)
7842 fprintf(stderr,
7843 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7844 chunk_rec->objectid,
7845 chunk_rec->type,
7846 chunk_rec->offset,
7847 chunk_rec->stripes[i].devid,
7848 chunk_rec->stripes[i].offset);
7849 ret = -1;
7852 return ret;
7855 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7856 int check_chunks(struct cache_tree *chunk_cache,
7857 struct block_group_tree *block_group_cache,
7858 struct device_extent_tree *dev_extent_cache,
7859 struct list_head *good, struct list_head *bad,
7860 struct list_head *rebuild, int silent)
7862 struct cache_extent *chunk_item;
7863 struct chunk_record *chunk_rec;
7864 struct block_group_record *bg_rec;
7865 struct device_extent_record *dext_rec;
7866 int err;
7867 int ret = 0;
7869 chunk_item = first_cache_extent(chunk_cache);
7870 while (chunk_item) {
7871 chunk_rec = container_of(chunk_item, struct chunk_record,
7872 cache);
7873 err = check_chunk_refs(chunk_rec, block_group_cache,
7874 dev_extent_cache, silent);
7875 if (err < 0)
7876 ret = err;
7877 if (err == 0 && good)
7878 list_add_tail(&chunk_rec->list, good);
7879 if (err > 0 && rebuild)
7880 list_add_tail(&chunk_rec->list, rebuild);
7881 if (err < 0 && bad)
7882 list_add_tail(&chunk_rec->list, bad);
7883 chunk_item = next_cache_extent(chunk_item);
7886 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7887 if (!silent)
7888 fprintf(stderr,
7889 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7890 bg_rec->objectid,
7891 bg_rec->offset,
7892 bg_rec->flags);
7893 if (!ret)
7894 ret = 1;
7897 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7898 chunk_list) {
7899 if (!silent)
7900 fprintf(stderr,
7901 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7902 dext_rec->objectid,
7903 dext_rec->offset,
7904 dext_rec->length);
7905 if (!ret)
7906 ret = 1;
7908 return ret;
7912 static int check_device_used(struct device_record *dev_rec,
7913 struct device_extent_tree *dext_cache)
7915 struct cache_extent *cache;
7916 struct device_extent_record *dev_extent_rec;
7917 u64 total_byte = 0;
7919 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7920 while (cache) {
7921 dev_extent_rec = container_of(cache,
7922 struct device_extent_record,
7923 cache);
7924 if (dev_extent_rec->objectid != dev_rec->devid)
7925 break;
7927 list_del_init(&dev_extent_rec->device_list);
7928 total_byte += dev_extent_rec->length;
7929 cache = next_cache_extent(cache);
7932 if (total_byte != dev_rec->byte_used) {
7933 fprintf(stderr,
7934 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7935 total_byte, dev_rec->byte_used, dev_rec->objectid,
7936 dev_rec->type, dev_rec->offset);
7937 return -1;
7938 } else {
7939 return 0;
7944 * Unlike device size alignment check above, some super total_bytes check
7945 * failure can lead to mount failure for newer kernel.
7947 * So this function will return the error for a fatal super total_bytes problem.
7949 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
7951 struct btrfs_device *dev;
7952 struct list_head *dev_list = &fs_info->fs_devices->devices;
7953 u64 total_bytes = 0;
7954 u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
7956 list_for_each_entry(dev, dev_list, dev_list)
7957 total_bytes += dev->total_bytes;
7959 /* Important check, which can cause unmountable fs */
7960 if (super_bytes < total_bytes) {
7961 error("super total bytes %llu smaller than real device(s) size %llu",
7962 super_bytes, total_bytes);
7963 error("mounting this fs may fail for newer kernels");
7964 error("this can be fixed by 'btrfs rescue fix-device-size'");
7965 return false;
7969 * Optional check, just to make everything aligned and match with each
7970 * other.
7972 * For a btrfs-image restored fs, we don't need to check it anyway.
7974 if (btrfs_super_flags(fs_info->super_copy) &
7975 (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
7976 return true;
7977 if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
7978 !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
7979 super_bytes != total_bytes) {
7980 warning("minor unaligned/mismatch device size detected");
7981 warning(
7982 "recommended to use 'btrfs rescue fix-device-size' to fix it");
7984 return true;
7987 /* check btrfs_dev_item -> btrfs_dev_extent */
7988 static int check_devices(struct rb_root *dev_cache,
7989 struct device_extent_tree *dev_extent_cache)
7991 struct rb_node *dev_node;
7992 struct device_record *dev_rec;
7993 struct device_extent_record *dext_rec;
7994 int err;
7995 int ret = 0;
7997 dev_node = rb_first(dev_cache);
7998 while (dev_node) {
7999 dev_rec = container_of(dev_node, struct device_record, node);
8000 err = check_device_used(dev_rec, dev_extent_cache);
8001 if (err)
8002 ret = err;
8004 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
8005 global_info->sectorsize);
8006 dev_node = rb_next(dev_node);
8008 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8009 device_list) {
8010 fprintf(stderr,
8011 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8012 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8013 if (!ret)
8014 ret = 1;
8016 return ret;
8019 static int add_root_item_to_list(struct list_head *head,
8020 u64 objectid, u64 bytenr, u64 last_snapshot,
8021 u8 level, u8 drop_level,
8022 struct btrfs_key *drop_key)
8024 struct root_item_record *ri_rec;
8026 ri_rec = malloc(sizeof(*ri_rec));
8027 if (!ri_rec)
8028 return -ENOMEM;
8029 ri_rec->bytenr = bytenr;
8030 ri_rec->objectid = objectid;
8031 ri_rec->level = level;
8032 ri_rec->drop_level = drop_level;
8033 ri_rec->last_snapshot = last_snapshot;
8034 if (drop_key)
8035 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8036 list_add_tail(&ri_rec->list, head);
8038 return 0;
8041 static void free_root_item_list(struct list_head *list)
8043 struct root_item_record *ri_rec;
8045 while (!list_empty(list)) {
8046 ri_rec = list_first_entry(list, struct root_item_record,
8047 list);
8048 list_del_init(&ri_rec->list);
8049 free(ri_rec);
8053 static int deal_root_from_list(struct list_head *list,
8054 struct btrfs_root *root,
8055 struct block_info *bits,
8056 int bits_nr,
8057 struct cache_tree *pending,
8058 struct cache_tree *seen,
8059 struct cache_tree *reada,
8060 struct cache_tree *nodes,
8061 struct cache_tree *extent_cache,
8062 struct cache_tree *chunk_cache,
8063 struct rb_root *dev_cache,
8064 struct block_group_tree *block_group_cache,
8065 struct device_extent_tree *dev_extent_cache)
8067 int ret = 0;
8068 u64 last;
8070 while (!list_empty(list)) {
8071 struct root_item_record *rec;
8072 struct extent_buffer *buf;
8074 rec = list_entry(list->next,
8075 struct root_item_record, list);
8076 last = 0;
8077 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
8078 if (!extent_buffer_uptodate(buf)) {
8079 free_extent_buffer(buf);
8080 ret = -EIO;
8081 break;
8083 ret = add_root_to_pending(buf, extent_cache, pending,
8084 seen, nodes, rec->objectid);
8085 if (ret < 0)
8086 break;
8088 * To rebuild extent tree, we need deal with snapshot
8089 * one by one, otherwise we deal with node firstly which
8090 * can maximize readahead.
8092 while (1) {
8093 ret = run_next_block(root, bits, bits_nr, &last,
8094 pending, seen, reada, nodes,
8095 extent_cache, chunk_cache,
8096 dev_cache, block_group_cache,
8097 dev_extent_cache, rec);
8098 if (ret != 0)
8099 break;
8101 free_extent_buffer(buf);
8102 list_del(&rec->list);
8103 free(rec);
8104 if (ret < 0)
8105 break;
8107 while (ret >= 0) {
8108 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8109 reada, nodes, extent_cache, chunk_cache,
8110 dev_cache, block_group_cache,
8111 dev_extent_cache, NULL);
8112 if (ret != 0) {
8113 if (ret > 0)
8114 ret = 0;
8115 break;
8118 return ret;
8122 * parse_tree_roots - Go over all roots in the tree root and add each one to
8123 * a list.
8125 * @fs_info - pointer to fs_info struct of the file system.
8127 * @normal_trees - list to contains all roots which don't have a drop
8128 * operation in progress
8130 * @dropping_trees - list containing all roots which have a drop operation
8131 * pending
8133 * Returns 0 on success or a negative value indicating an error.
8135 static int parse_tree_roots(struct btrfs_fs_info *fs_info,
8136 struct list_head *normal_trees,
8137 struct list_head *dropping_trees)
8139 struct btrfs_path path;
8140 struct btrfs_key key;
8141 struct btrfs_key found_key;
8142 struct btrfs_root_item ri;
8143 struct extent_buffer *leaf;
8144 int slot;
8145 int ret = 0;
8147 btrfs_init_path(&path);
8148 key.offset = 0;
8149 key.objectid = 0;
8150 key.type = BTRFS_ROOT_ITEM_KEY;
8151 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
8152 if (ret < 0)
8153 goto out;
8154 while (1) {
8155 leaf = path.nodes[0];
8156 slot = path.slots[0];
8157 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8158 ret = btrfs_next_leaf(fs_info->tree_root, &path);
8159 if (ret != 0)
8160 break;
8161 leaf = path.nodes[0];
8162 slot = path.slots[0];
8164 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8165 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
8166 unsigned long offset;
8167 u64 last_snapshot;
8168 u8 level;
8170 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8171 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8172 last_snapshot = btrfs_root_last_snapshot(&ri);
8173 level = btrfs_root_level(&ri);
8174 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8175 ret = add_root_item_to_list(normal_trees,
8176 found_key.objectid,
8177 btrfs_root_bytenr(&ri),
8178 last_snapshot, level,
8179 0, NULL);
8180 if (ret < 0)
8181 break;
8182 } else {
8183 u64 objectid = found_key.objectid;
8185 btrfs_disk_key_to_cpu(&found_key,
8186 &ri.drop_progress);
8187 ret = add_root_item_to_list(dropping_trees,
8188 objectid,
8189 btrfs_root_bytenr(&ri),
8190 last_snapshot, level,
8191 ri.drop_level, &found_key);
8192 if (ret < 0)
8193 break;
8196 path.slots[0]++;
8199 out:
8200 btrfs_release_path(&path);
8201 return ret;
8204 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
8206 struct rb_root dev_cache;
8207 struct cache_tree chunk_cache;
8208 struct block_group_tree block_group_cache;
8209 struct device_extent_tree dev_extent_cache;
8210 struct cache_tree extent_cache;
8211 struct cache_tree seen;
8212 struct cache_tree pending;
8213 struct cache_tree reada;
8214 struct cache_tree nodes;
8215 struct extent_io_tree excluded_extents;
8216 struct cache_tree corrupt_blocks;
8217 int ret, err = 0;
8218 struct block_info *bits;
8219 int bits_nr;
8220 struct list_head dropping_trees;
8221 struct list_head normal_trees;
8222 struct btrfs_root *root1;
8223 struct btrfs_root *root;
8224 u8 level;
8226 root = fs_info->fs_root;
8227 dev_cache = RB_ROOT;
8228 cache_tree_init(&chunk_cache);
8229 block_group_tree_init(&block_group_cache);
8230 device_extent_tree_init(&dev_extent_cache);
8232 cache_tree_init(&extent_cache);
8233 cache_tree_init(&seen);
8234 cache_tree_init(&pending);
8235 cache_tree_init(&nodes);
8236 cache_tree_init(&reada);
8237 cache_tree_init(&corrupt_blocks);
8238 extent_io_tree_init(&excluded_extents);
8239 INIT_LIST_HEAD(&dropping_trees);
8240 INIT_LIST_HEAD(&normal_trees);
8242 if (repair) {
8243 fs_info->excluded_extents = &excluded_extents;
8244 fs_info->fsck_extent_cache = &extent_cache;
8245 fs_info->free_extent_hook = free_extent_hook;
8246 fs_info->corrupt_blocks = &corrupt_blocks;
8249 bits_nr = 1024;
8250 bits = malloc(bits_nr * sizeof(struct block_info));
8251 if (!bits) {
8252 perror("malloc");
8253 exit(1);
8256 if (ctx.progress_enabled) {
8257 ctx.tp = TASK_EXTENTS;
8258 task_start(ctx.info);
8261 again:
8262 root1 = fs_info->tree_root;
8263 level = btrfs_header_level(root1->node);
8264 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8265 root1->node->start, 0, level, 0, NULL);
8266 if (ret < 0)
8267 goto out;
8268 root1 = fs_info->chunk_root;
8269 level = btrfs_header_level(root1->node);
8270 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8271 root1->node->start, 0, level, 0, NULL);
8272 if (ret < 0)
8273 goto out;
8275 ret = parse_tree_roots(fs_info, &normal_trees, &dropping_trees);
8276 if (ret < 0)
8277 goto out;
8280 * check_block can return -EAGAIN if it fixes something, please keep
8281 * this in mind when dealing with return values from these functions, if
8282 * we get -EAGAIN we want to fall through and restart the loop.
8284 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8285 &seen, &reada, &nodes, &extent_cache,
8286 &chunk_cache, &dev_cache, &block_group_cache,
8287 &dev_extent_cache);
8288 if (ret < 0) {
8289 if (ret == -EAGAIN)
8290 goto loop;
8291 goto out;
8293 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8294 &pending, &seen, &reada, &nodes,
8295 &extent_cache, &chunk_cache, &dev_cache,
8296 &block_group_cache, &dev_extent_cache);
8297 if (ret < 0) {
8298 if (ret == -EAGAIN)
8299 goto loop;
8300 goto out;
8303 ret = check_chunks(&chunk_cache, &block_group_cache,
8304 &dev_extent_cache, NULL, NULL, NULL, 0);
8305 if (ret) {
8306 if (ret == -EAGAIN)
8307 goto loop;
8308 err = ret;
8311 ret = check_extent_refs(root, &extent_cache);
8312 if (ret < 0) {
8313 if (ret == -EAGAIN)
8314 goto loop;
8315 goto out;
8318 ret = check_devices(&dev_cache, &dev_extent_cache);
8319 if (ret && err)
8320 ret = err;
8322 out:
8323 task_stop(ctx.info);
8324 if (repair) {
8325 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
8326 extent_io_tree_cleanup(&excluded_extents);
8327 fs_info->fsck_extent_cache = NULL;
8328 fs_info->free_extent_hook = NULL;
8329 fs_info->corrupt_blocks = NULL;
8330 fs_info->excluded_extents = NULL;
8332 free(bits);
8333 free_chunk_cache_tree(&chunk_cache);
8334 free_device_cache_tree(&dev_cache);
8335 free_block_group_tree(&block_group_cache);
8336 free_device_extent_tree(&dev_extent_cache);
8337 free_extent_cache_tree(&seen);
8338 free_extent_cache_tree(&pending);
8339 free_extent_cache_tree(&reada);
8340 free_extent_cache_tree(&nodes);
8341 free_root_item_list(&normal_trees);
8342 free_root_item_list(&dropping_trees);
8343 return ret;
8344 loop:
8345 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
8346 free_extent_cache_tree(&seen);
8347 free_extent_cache_tree(&pending);
8348 free_extent_cache_tree(&reada);
8349 free_extent_cache_tree(&nodes);
8350 free_chunk_cache_tree(&chunk_cache);
8351 free_block_group_tree(&block_group_cache);
8352 free_device_cache_tree(&dev_cache);
8353 free_device_extent_tree(&dev_extent_cache);
8354 free_extent_record_cache(&extent_cache);
8355 free_root_item_list(&normal_trees);
8356 free_root_item_list(&dropping_trees);
8357 extent_io_tree_cleanup(&excluded_extents);
8358 goto again;
8361 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
8363 int ret;
8365 if (!ctx.progress_enabled)
8366 fprintf(stderr, "checking extents\n");
8367 if (check_mode == CHECK_MODE_LOWMEM)
8368 ret = check_chunks_and_extents_lowmem(fs_info);
8369 else
8370 ret = check_chunks_and_extents(fs_info);
8372 /* Also repair device size related problems */
8373 if (repair && !ret) {
8374 ret = btrfs_fix_device_and_super_size(fs_info);
8375 if (ret > 0)
8376 ret = 0;
8378 return ret;
8381 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8382 struct btrfs_root *root, int overwrite)
8384 struct extent_buffer *c;
8385 struct extent_buffer *old = root->node;
8386 int level;
8387 int ret;
8388 struct btrfs_disk_key disk_key = {0,0,0};
8390 level = 0;
8392 if (overwrite) {
8393 c = old;
8394 extent_buffer_get(c);
8395 goto init;
8397 c = btrfs_alloc_free_block(trans, root,
8398 root->fs_info->nodesize,
8399 root->root_key.objectid,
8400 &disk_key, level, 0, 0);
8401 if (IS_ERR(c)) {
8402 c = old;
8403 extent_buffer_get(c);
8404 overwrite = 1;
8406 init:
8407 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8408 btrfs_set_header_level(c, level);
8409 btrfs_set_header_bytenr(c, c->start);
8410 btrfs_set_header_generation(c, trans->transid);
8411 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8412 btrfs_set_header_owner(c, root->root_key.objectid);
8414 write_extent_buffer(c, root->fs_info->fsid,
8415 btrfs_header_fsid(), BTRFS_FSID_SIZE);
8417 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8418 btrfs_header_chunk_tree_uuid(c),
8419 BTRFS_UUID_SIZE);
8421 btrfs_mark_buffer_dirty(c);
8423 * this case can happen in the following case:
8425 * 1.overwrite previous root.
8427 * 2.reinit reloc data root, this is because we skip pin
8428 * down reloc data tree before which means we can allocate
8429 * same block bytenr here.
8431 if (old->start == c->start) {
8432 btrfs_set_root_generation(&root->root_item,
8433 trans->transid);
8434 root->root_item.level = btrfs_header_level(root->node);
8435 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8436 &root->root_key, &root->root_item);
8437 if (ret) {
8438 free_extent_buffer(c);
8439 return ret;
8442 free_extent_buffer(old);
8443 root->node = c;
8444 add_root_to_dirty_list(root);
8445 return 0;
8448 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8450 struct btrfs_block_group_cache *cache;
8451 struct btrfs_path path;
8452 struct extent_buffer *leaf;
8453 struct btrfs_chunk *chunk;
8454 struct btrfs_key key;
8455 int ret;
8456 u64 start;
8458 btrfs_init_path(&path);
8459 key.objectid = 0;
8460 key.type = BTRFS_CHUNK_ITEM_KEY;
8461 key.offset = 0;
8462 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
8463 if (ret < 0) {
8464 btrfs_release_path(&path);
8465 return ret;
8469 * We do this in case the block groups were screwed up and had alloc
8470 * bits that aren't actually set on the chunks. This happens with
8471 * restored images every time and could happen in real life I guess.
8473 fs_info->avail_data_alloc_bits = 0;
8474 fs_info->avail_metadata_alloc_bits = 0;
8475 fs_info->avail_system_alloc_bits = 0;
8477 /* First we need to create the in-memory block groups */
8478 while (1) {
8479 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8480 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
8481 if (ret < 0) {
8482 btrfs_release_path(&path);
8483 return ret;
8485 if (ret) {
8486 ret = 0;
8487 break;
8490 leaf = path.nodes[0];
8491 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8492 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8493 path.slots[0]++;
8494 continue;
8497 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
8498 btrfs_add_block_group(fs_info, 0,
8499 btrfs_chunk_type(leaf, chunk), key.offset,
8500 btrfs_chunk_length(leaf, chunk));
8501 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8502 key.offset + btrfs_chunk_length(leaf, chunk));
8503 path.slots[0]++;
8505 start = 0;
8506 while (1) {
8507 cache = btrfs_lookup_first_block_group(fs_info, start);
8508 if (!cache)
8509 break;
8510 cache->cached = 1;
8511 start = cache->key.objectid + cache->key.offset;
8514 btrfs_release_path(&path);
8515 return 0;
8518 static int reset_balance(struct btrfs_trans_handle *trans,
8519 struct btrfs_fs_info *fs_info)
8521 struct btrfs_root *root = fs_info->tree_root;
8522 struct btrfs_path path;
8523 struct extent_buffer *leaf;
8524 struct btrfs_key key;
8525 int del_slot, del_nr = 0;
8526 int ret;
8527 int found = 0;
8529 btrfs_init_path(&path);
8530 key.objectid = BTRFS_BALANCE_OBJECTID;
8531 key.type = BTRFS_BALANCE_ITEM_KEY;
8532 key.offset = 0;
8533 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8534 if (ret) {
8535 if (ret > 0)
8536 ret = 0;
8537 if (!ret)
8538 goto reinit_data_reloc;
8539 else
8540 goto out;
8543 ret = btrfs_del_item(trans, root, &path);
8544 if (ret)
8545 goto out;
8546 btrfs_release_path(&path);
8548 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8549 key.type = BTRFS_ROOT_ITEM_KEY;
8550 key.offset = 0;
8551 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8552 if (ret < 0)
8553 goto out;
8554 while (1) {
8555 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8556 if (!found)
8557 break;
8559 if (del_nr) {
8560 ret = btrfs_del_items(trans, root, &path,
8561 del_slot, del_nr);
8562 del_nr = 0;
8563 if (ret)
8564 goto out;
8566 key.offset++;
8567 btrfs_release_path(&path);
8569 found = 0;
8570 ret = btrfs_search_slot(trans, root, &key, &path,
8571 -1, 1);
8572 if (ret < 0)
8573 goto out;
8574 continue;
8576 found = 1;
8577 leaf = path.nodes[0];
8578 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8579 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8580 break;
8581 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8582 path.slots[0]++;
8583 continue;
8585 if (!del_nr) {
8586 del_slot = path.slots[0];
8587 del_nr = 1;
8588 } else {
8589 del_nr++;
8591 path.slots[0]++;
8594 if (del_nr) {
8595 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
8596 if (ret)
8597 goto out;
8599 btrfs_release_path(&path);
8601 reinit_data_reloc:
8602 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8603 key.type = BTRFS_ROOT_ITEM_KEY;
8604 key.offset = (u64)-1;
8605 root = btrfs_read_fs_root(fs_info, &key);
8606 if (IS_ERR(root)) {
8607 fprintf(stderr, "Error reading data reloc tree\n");
8608 ret = PTR_ERR(root);
8609 goto out;
8611 record_root_in_trans(trans, root);
8612 ret = btrfs_fsck_reinit_root(trans, root, 0);
8613 if (ret)
8614 goto out;
8615 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8616 out:
8617 btrfs_release_path(&path);
8618 return ret;
8621 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8622 struct btrfs_fs_info *fs_info, bool pin)
8624 u64 start = 0;
8625 int ret;
8628 * The only reason we don't do this is because right now we're just
8629 * walking the trees we find and pinning down their bytes, we don't look
8630 * at any of the leaves. In order to do mixed groups we'd have to check
8631 * the leaves of any fs roots and pin down the bytes for any file
8632 * extents we find. Not hard but why do it if we don't have to?
8634 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
8635 fprintf(stderr, "We don't support re-initing the extent tree "
8636 "for mixed block groups yet, please notify a btrfs "
8637 "developer you want to do this so they can add this "
8638 "functionality.\n");
8639 return -EINVAL;
8643 * first we need to walk all of the trees except the extent tree and pin
8644 * down/exclude the bytes that are in use so we don't overwrite any
8645 * existing metadata.
8646 * If pinnned, unpin will be done in the end of transaction.
8647 * If excluded, cleanup will be done in check_chunks_and_extents_lowmem.
8649 again:
8650 if (pin) {
8651 ret = pin_metadata_blocks(fs_info);
8652 if (ret) {
8653 fprintf(stderr, "error pinning down used bytes\n");
8654 return ret;
8656 } else {
8657 ret = exclude_metadata_blocks(fs_info);
8658 if (ret) {
8659 fprintf(stderr, "error excluding used bytes\n");
8660 printf("try to pin down used bytes\n");
8661 pin = true;
8662 goto again;
8667 * Need to drop all the block groups since we're going to recreate all
8668 * of them again.
8670 btrfs_free_block_groups(fs_info);
8671 ret = reset_block_groups(fs_info);
8672 if (ret) {
8673 fprintf(stderr, "error resetting the block groups\n");
8674 return ret;
8677 /* Ok we can allocate now, reinit the extent root */
8678 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8679 if (ret) {
8680 fprintf(stderr, "extent root initialization failed\n");
8682 * When the transaction code is updated we should end the
8683 * transaction, but for now progs only knows about commit so
8684 * just return an error.
8686 return ret;
8690 * Now we have all the in-memory block groups setup so we can make
8691 * allocations properly, and the metadata we care about is safe since we
8692 * pinned all of it above.
8694 while (1) {
8695 struct btrfs_block_group_cache *cache;
8697 cache = btrfs_lookup_first_block_group(fs_info, start);
8698 if (!cache)
8699 break;
8700 start = cache->key.objectid + cache->key.offset;
8701 ret = btrfs_insert_item(trans, fs_info->extent_root,
8702 &cache->key, &cache->item,
8703 sizeof(cache->item));
8704 if (ret) {
8705 fprintf(stderr, "Error adding block group\n");
8706 return ret;
8708 btrfs_extent_post_op(trans);
8711 ret = reset_balance(trans, fs_info);
8712 if (ret)
8713 fprintf(stderr, "error resetting the pending balance\n");
8715 return ret;
8718 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8720 struct btrfs_path path;
8721 struct btrfs_trans_handle *trans;
8722 struct btrfs_key key;
8723 int ret;
8725 printf("Recowing metadata block %llu\n", eb->start);
8726 key.objectid = btrfs_header_owner(eb);
8727 key.type = BTRFS_ROOT_ITEM_KEY;
8728 key.offset = (u64)-1;
8730 root = btrfs_read_fs_root(root->fs_info, &key);
8731 if (IS_ERR(root)) {
8732 fprintf(stderr, "Couldn't find owner root %llu\n",
8733 key.objectid);
8734 return PTR_ERR(root);
8737 trans = btrfs_start_transaction(root, 1);
8738 if (IS_ERR(trans))
8739 return PTR_ERR(trans);
8741 btrfs_init_path(&path);
8742 path.lowest_level = btrfs_header_level(eb);
8743 if (path.lowest_level)
8744 btrfs_node_key_to_cpu(eb, &key, 0);
8745 else
8746 btrfs_item_key_to_cpu(eb, &key, 0);
8748 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
8749 btrfs_commit_transaction(trans, root);
8750 btrfs_release_path(&path);
8751 return ret;
8754 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8756 struct btrfs_path path;
8757 struct btrfs_trans_handle *trans;
8758 struct btrfs_key key;
8759 int ret;
8761 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8762 bad->key.type, bad->key.offset);
8763 key.objectid = bad->root_id;
8764 key.type = BTRFS_ROOT_ITEM_KEY;
8765 key.offset = (u64)-1;
8767 root = btrfs_read_fs_root(root->fs_info, &key);
8768 if (IS_ERR(root)) {
8769 fprintf(stderr, "Couldn't find owner root %llu\n",
8770 key.objectid);
8771 return PTR_ERR(root);
8774 trans = btrfs_start_transaction(root, 1);
8775 if (IS_ERR(trans))
8776 return PTR_ERR(trans);
8778 btrfs_init_path(&path);
8779 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
8780 if (ret) {
8781 if (ret > 0)
8782 ret = 0;
8783 goto out;
8785 ret = btrfs_del_item(trans, root, &path);
8786 out:
8787 btrfs_commit_transaction(trans, root);
8788 btrfs_release_path(&path);
8789 return ret;
8792 static int zero_log_tree(struct btrfs_root *root)
8794 struct btrfs_trans_handle *trans;
8795 int ret;
8797 trans = btrfs_start_transaction(root, 1);
8798 if (IS_ERR(trans)) {
8799 ret = PTR_ERR(trans);
8800 return ret;
8802 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8803 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8804 ret = btrfs_commit_transaction(trans, root);
8805 return ret;
8808 static int populate_csum(struct btrfs_trans_handle *trans,
8809 struct btrfs_root *csum_root, char *buf, u64 start,
8810 u64 len)
8812 struct btrfs_fs_info *fs_info = csum_root->fs_info;
8813 u64 offset = 0;
8814 u64 sectorsize;
8815 int ret = 0;
8817 while (offset < len) {
8818 sectorsize = fs_info->sectorsize;
8819 ret = read_extent_data(fs_info, buf, start + offset,
8820 &sectorsize, 0);
8821 if (ret)
8822 break;
8823 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8824 start + offset, buf, sectorsize);
8825 if (ret)
8826 break;
8827 offset += sectorsize;
8829 return ret;
8832 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8833 struct btrfs_root *csum_root,
8834 struct btrfs_root *cur_root)
8836 struct btrfs_path path;
8837 struct btrfs_key key;
8838 struct extent_buffer *node;
8839 struct btrfs_file_extent_item *fi;
8840 char *buf = NULL;
8841 u64 start = 0;
8842 u64 len = 0;
8843 int slot = 0;
8844 int ret = 0;
8846 buf = malloc(cur_root->fs_info->sectorsize);
8847 if (!buf)
8848 return -ENOMEM;
8850 btrfs_init_path(&path);
8851 key.objectid = 0;
8852 key.offset = 0;
8853 key.type = 0;
8854 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
8855 if (ret < 0)
8856 goto out;
8857 /* Iterate all regular file extents and fill its csum */
8858 while (1) {
8859 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8861 if (key.type != BTRFS_EXTENT_DATA_KEY)
8862 goto next;
8863 node = path.nodes[0];
8864 slot = path.slots[0];
8865 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8866 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8867 goto next;
8868 start = btrfs_file_extent_disk_bytenr(node, fi);
8869 len = btrfs_file_extent_disk_num_bytes(node, fi);
8871 ret = populate_csum(trans, csum_root, buf, start, len);
8872 if (ret == -EEXIST)
8873 ret = 0;
8874 if (ret < 0)
8875 goto out;
8876 next:
8878 * TODO: if next leaf is corrupted, jump to nearest next valid
8879 * leaf.
8881 ret = btrfs_next_item(cur_root, &path);
8882 if (ret < 0)
8883 goto out;
8884 if (ret > 0) {
8885 ret = 0;
8886 goto out;
8890 out:
8891 btrfs_release_path(&path);
8892 free(buf);
8893 return ret;
8896 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8897 struct btrfs_root *csum_root)
8899 struct btrfs_fs_info *fs_info = csum_root->fs_info;
8900 struct btrfs_path path;
8901 struct btrfs_root *tree_root = fs_info->tree_root;
8902 struct btrfs_root *cur_root;
8903 struct extent_buffer *node;
8904 struct btrfs_key key;
8905 int slot = 0;
8906 int ret = 0;
8908 btrfs_init_path(&path);
8909 key.objectid = BTRFS_FS_TREE_OBJECTID;
8910 key.offset = 0;
8911 key.type = BTRFS_ROOT_ITEM_KEY;
8912 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
8913 if (ret < 0)
8914 goto out;
8915 if (ret > 0) {
8916 ret = -ENOENT;
8917 goto out;
8920 while (1) {
8921 node = path.nodes[0];
8922 slot = path.slots[0];
8923 btrfs_item_key_to_cpu(node, &key, slot);
8924 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
8925 goto out;
8926 if (key.type != BTRFS_ROOT_ITEM_KEY)
8927 goto next;
8928 if (!is_fstree(key.objectid))
8929 goto next;
8930 key.offset = (u64)-1;
8932 cur_root = btrfs_read_fs_root(fs_info, &key);
8933 if (IS_ERR(cur_root) || !cur_root) {
8934 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
8935 key.objectid);
8936 goto out;
8938 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
8939 cur_root);
8940 if (ret < 0)
8941 goto out;
8942 next:
8943 ret = btrfs_next_item(tree_root, &path);
8944 if (ret > 0) {
8945 ret = 0;
8946 goto out;
8948 if (ret < 0)
8949 goto out;
8952 out:
8953 btrfs_release_path(&path);
8954 return ret;
8957 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
8958 struct btrfs_root *csum_root)
8960 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
8961 struct btrfs_path path;
8962 struct btrfs_extent_item *ei;
8963 struct extent_buffer *leaf;
8964 char *buf;
8965 struct btrfs_key key;
8966 int ret;
8968 btrfs_init_path(&path);
8969 key.objectid = 0;
8970 key.type = BTRFS_EXTENT_ITEM_KEY;
8971 key.offset = 0;
8972 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8973 if (ret < 0) {
8974 btrfs_release_path(&path);
8975 return ret;
8978 buf = malloc(csum_root->fs_info->sectorsize);
8979 if (!buf) {
8980 btrfs_release_path(&path);
8981 return -ENOMEM;
8984 while (1) {
8985 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8986 ret = btrfs_next_leaf(extent_root, &path);
8987 if (ret < 0)
8988 break;
8989 if (ret) {
8990 ret = 0;
8991 break;
8994 leaf = path.nodes[0];
8996 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8997 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8998 path.slots[0]++;
8999 continue;
9002 ei = btrfs_item_ptr(leaf, path.slots[0],
9003 struct btrfs_extent_item);
9004 if (!(btrfs_extent_flags(leaf, ei) &
9005 BTRFS_EXTENT_FLAG_DATA)) {
9006 path.slots[0]++;
9007 continue;
9010 ret = populate_csum(trans, csum_root, buf, key.objectid,
9011 key.offset);
9012 if (ret)
9013 break;
9014 path.slots[0]++;
9017 btrfs_release_path(&path);
9018 free(buf);
9019 return ret;
9023 * Recalculate the csum and put it into the csum tree.
9025 * Extent tree init will wipe out all the extent info, so in that case, we
9026 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
9027 * will use fs/subvol trees to init the csum tree.
9029 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9030 struct btrfs_root *csum_root,
9031 int search_fs_tree)
9033 if (search_fs_tree)
9034 return fill_csum_tree_from_fs(trans, csum_root);
9035 else
9036 return fill_csum_tree_from_extent(trans, csum_root);
9039 static void free_roots_info_cache(void)
9041 if (!roots_info_cache)
9042 return;
9044 while (!cache_tree_empty(roots_info_cache)) {
9045 struct cache_extent *entry;
9046 struct root_item_info *rii;
9048 entry = first_cache_extent(roots_info_cache);
9049 if (!entry)
9050 break;
9051 remove_cache_extent(roots_info_cache, entry);
9052 rii = container_of(entry, struct root_item_info, cache_extent);
9053 free(rii);
9056 free(roots_info_cache);
9057 roots_info_cache = NULL;
9060 static int build_roots_info_cache(struct btrfs_fs_info *info)
9062 int ret = 0;
9063 struct btrfs_key key;
9064 struct extent_buffer *leaf;
9065 struct btrfs_path path;
9067 if (!roots_info_cache) {
9068 roots_info_cache = malloc(sizeof(*roots_info_cache));
9069 if (!roots_info_cache)
9070 return -ENOMEM;
9071 cache_tree_init(roots_info_cache);
9074 btrfs_init_path(&path);
9075 key.objectid = 0;
9076 key.type = BTRFS_EXTENT_ITEM_KEY;
9077 key.offset = 0;
9078 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
9079 if (ret < 0)
9080 goto out;
9081 leaf = path.nodes[0];
9083 while (1) {
9084 struct btrfs_key found_key;
9085 struct btrfs_extent_item *ei;
9086 struct btrfs_extent_inline_ref *iref;
9087 unsigned long item_end;
9088 int slot = path.slots[0];
9089 int type;
9090 u64 flags;
9091 u64 root_id;
9092 u8 level;
9093 struct cache_extent *entry;
9094 struct root_item_info *rii;
9096 if (slot >= btrfs_header_nritems(leaf)) {
9097 ret = btrfs_next_leaf(info->extent_root, &path);
9098 if (ret < 0) {
9099 break;
9100 } else if (ret) {
9101 ret = 0;
9102 break;
9104 leaf = path.nodes[0];
9105 slot = path.slots[0];
9108 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9110 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9111 found_key.type != BTRFS_METADATA_ITEM_KEY)
9112 goto next;
9114 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9115 flags = btrfs_extent_flags(leaf, ei);
9116 item_end = (unsigned long)ei + btrfs_item_size_nr(leaf, slot);
9118 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9119 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9120 goto next;
9122 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9123 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9124 level = found_key.offset;
9125 } else {
9126 struct btrfs_tree_block_info *binfo;
9128 binfo = (struct btrfs_tree_block_info *)(ei + 1);
9129 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
9130 level = btrfs_tree_block_level(leaf, binfo);
9134 * It's a valid extent/metadata item that has no inline ref,
9135 * but SHARED_BLOCK_REF or other shared references.
9136 * So we need to do extra check to avoid reading beyond leaf
9137 * boudnary.
9139 if ((unsigned long)iref >= item_end)
9140 goto next;
9143 * For a root extent, it must be of the following type and the
9144 * first (and only one) iref in the item.
9146 type = btrfs_extent_inline_ref_type(leaf, iref);
9147 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9148 goto next;
9150 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9151 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9152 if (!entry) {
9153 rii = malloc(sizeof(struct root_item_info));
9154 if (!rii) {
9155 ret = -ENOMEM;
9156 goto out;
9158 rii->cache_extent.start = root_id;
9159 rii->cache_extent.size = 1;
9160 rii->level = (u8)-1;
9161 entry = &rii->cache_extent;
9162 ret = insert_cache_extent(roots_info_cache, entry);
9163 ASSERT(ret == 0);
9164 } else {
9165 rii = container_of(entry, struct root_item_info,
9166 cache_extent);
9169 ASSERT(rii->cache_extent.start == root_id);
9170 ASSERT(rii->cache_extent.size == 1);
9172 if (level > rii->level || rii->level == (u8)-1) {
9173 rii->level = level;
9174 rii->bytenr = found_key.objectid;
9175 rii->gen = btrfs_extent_generation(leaf, ei);
9176 rii->node_count = 1;
9177 } else if (level == rii->level) {
9178 rii->node_count++;
9180 next:
9181 path.slots[0]++;
9184 out:
9185 btrfs_release_path(&path);
9187 return ret;
9190 static int maybe_repair_root_item(struct btrfs_path *path,
9191 const struct btrfs_key *root_key,
9192 const int read_only_mode)
9194 const u64 root_id = root_key->objectid;
9195 struct cache_extent *entry;
9196 struct root_item_info *rii;
9197 struct btrfs_root_item ri;
9198 unsigned long offset;
9200 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9201 if (!entry) {
9202 fprintf(stderr,
9203 "Error: could not find extent items for root %llu\n",
9204 root_key->objectid);
9205 return -ENOENT;
9208 rii = container_of(entry, struct root_item_info, cache_extent);
9209 ASSERT(rii->cache_extent.start == root_id);
9210 ASSERT(rii->cache_extent.size == 1);
9212 if (rii->node_count != 1) {
9213 fprintf(stderr,
9214 "Error: could not find btree root extent for root %llu\n",
9215 root_id);
9216 return -ENOENT;
9219 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9220 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9222 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9223 btrfs_root_level(&ri) != rii->level ||
9224 btrfs_root_generation(&ri) != rii->gen) {
9227 * If we're in repair mode but our caller told us to not update
9228 * the root item, i.e. just check if it needs to be updated, don't
9229 * print this message, since the caller will call us again shortly
9230 * for the same root item without read only mode (the caller will
9231 * open a transaction first).
9233 if (!(read_only_mode && repair))
9234 fprintf(stderr,
9235 "%sroot item for root %llu,"
9236 " current bytenr %llu, current gen %llu, current level %u,"
9237 " new bytenr %llu, new gen %llu, new level %u\n",
9238 (read_only_mode ? "" : "fixing "),
9239 root_id,
9240 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9241 btrfs_root_level(&ri),
9242 rii->bytenr, rii->gen, rii->level);
9244 if (btrfs_root_generation(&ri) > rii->gen) {
9245 fprintf(stderr,
9246 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9247 root_id, btrfs_root_generation(&ri), rii->gen);
9248 return -EINVAL;
9251 if (!read_only_mode) {
9252 btrfs_set_root_bytenr(&ri, rii->bytenr);
9253 btrfs_set_root_level(&ri, rii->level);
9254 btrfs_set_root_generation(&ri, rii->gen);
9255 write_extent_buffer(path->nodes[0], &ri,
9256 offset, sizeof(ri));
9259 return 1;
9262 return 0;
9266 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9267 * caused read-only snapshots to be corrupted if they were created at a moment
9268 * when the source subvolume/snapshot had orphan items. The issue was that the
9269 * on-disk root items became incorrect, referring to the pre orphan cleanup root
9270 * node instead of the post orphan cleanup root node.
9271 * So this function, and its callees, just detects and fixes those cases. Even
9272 * though the regression was for read-only snapshots, this function applies to
9273 * any snapshot/subvolume root.
9274 * This must be run before any other repair code - not doing it so, makes other
9275 * repair code delete or modify backrefs in the extent tree for example, which
9276 * will result in an inconsistent fs after repairing the root items.
9278 static int repair_root_items(struct btrfs_fs_info *info)
9280 struct btrfs_path path;
9281 struct btrfs_key key;
9282 struct extent_buffer *leaf;
9283 struct btrfs_trans_handle *trans = NULL;
9284 int ret = 0;
9285 int bad_roots = 0;
9286 int need_trans = 0;
9288 btrfs_init_path(&path);
9290 ret = build_roots_info_cache(info);
9291 if (ret)
9292 goto out;
9294 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9295 key.type = BTRFS_ROOT_ITEM_KEY;
9296 key.offset = 0;
9298 again:
9300 * Avoid opening and committing transactions if a leaf doesn't have
9301 * any root items that need to be fixed, so that we avoid rotating
9302 * backup roots unnecessarily.
9304 if (need_trans) {
9305 trans = btrfs_start_transaction(info->tree_root, 1);
9306 if (IS_ERR(trans)) {
9307 ret = PTR_ERR(trans);
9308 goto out;
9312 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
9313 0, trans ? 1 : 0);
9314 if (ret < 0)
9315 goto out;
9316 leaf = path.nodes[0];
9318 while (1) {
9319 struct btrfs_key found_key;
9321 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
9322 int no_more_keys = find_next_key(&path, &key);
9324 btrfs_release_path(&path);
9325 if (trans) {
9326 ret = btrfs_commit_transaction(trans,
9327 info->tree_root);
9328 trans = NULL;
9329 if (ret < 0)
9330 goto out;
9332 need_trans = 0;
9333 if (no_more_keys)
9334 break;
9335 goto again;
9338 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9340 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9341 goto next;
9342 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9343 goto next;
9345 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
9346 if (ret < 0)
9347 goto out;
9348 if (ret) {
9349 if (!trans && repair) {
9350 need_trans = 1;
9351 key = found_key;
9352 btrfs_release_path(&path);
9353 goto again;
9355 bad_roots++;
9357 next:
9358 path.slots[0]++;
9360 ret = 0;
9361 out:
9362 free_roots_info_cache();
9363 btrfs_release_path(&path);
9364 if (trans)
9365 btrfs_commit_transaction(trans, info->tree_root);
9366 if (ret < 0)
9367 return ret;
9369 return bad_roots;
9372 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
9374 struct btrfs_trans_handle *trans;
9375 struct btrfs_block_group_cache *bg_cache;
9376 u64 current = 0;
9377 int ret = 0;
9379 /* Clear all free space cache inodes and its extent data */
9380 while (1) {
9381 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
9382 if (!bg_cache)
9383 break;
9384 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
9385 if (ret < 0)
9386 return ret;
9387 current = bg_cache->key.objectid + bg_cache->key.offset;
9390 /* Don't forget to set cache_generation to -1 */
9391 trans = btrfs_start_transaction(fs_info->tree_root, 0);
9392 if (IS_ERR(trans)) {
9393 error("failed to update super block cache generation");
9394 return PTR_ERR(trans);
9396 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
9397 btrfs_commit_transaction(trans, fs_info->tree_root);
9399 return ret;
9402 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
9403 int clear_version)
9405 int ret = 0;
9407 if (clear_version == 1) {
9408 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
9409 error(
9410 "free space cache v2 detected, use --clear-space-cache v2");
9411 ret = 1;
9412 goto close_out;
9414 printf("Clearing free space cache\n");
9415 ret = clear_free_space_cache(fs_info);
9416 if (ret) {
9417 error("failed to clear free space cache");
9418 ret = 1;
9419 } else {
9420 printf("Free space cache cleared\n");
9422 } else if (clear_version == 2) {
9423 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
9424 printf("no free space cache v2 to clear\n");
9425 ret = 0;
9426 goto close_out;
9428 printf("Clear free space cache v2\n");
9429 ret = btrfs_clear_free_space_tree(fs_info);
9430 if (ret) {
9431 error("failed to clear free space cache v2: %d", ret);
9432 ret = 1;
9433 } else {
9434 printf("free space cache v2 cleared\n");
9437 close_out:
9438 return ret;
9441 const char * const cmd_check_usage[] = {
9442 "btrfs check [options] <device>",
9443 "Check structural integrity of a filesystem (unmounted).",
9444 "Check structural integrity of an unmounted filesystem. Verify internal",
9445 "trees' consistency and item connectivity. In the repair mode try to",
9446 "fix the problems found. ",
9447 "WARNING: the repair mode is considered dangerous",
9449 "-s|--super <superblock> use this superblock copy",
9450 "-b|--backup use the first valid backup root copy",
9451 "--force skip mount checks, repair is not possible",
9452 "--repair try to repair the filesystem",
9453 "--readonly run in read-only mode (default)",
9454 "--init-csum-tree create a new CRC tree",
9455 "--init-extent-tree create a new extent tree",
9456 "--mode <MODE> allows choice of memory/IO trade-offs",
9457 " where MODE is one of:",
9458 " original - read inodes and extents to memory (requires",
9459 " more memory, does less IO)",
9460 " lowmem - try to use less memory but read blocks again",
9461 " when needed (experimental)",
9462 "--check-data-csum verify checksums of data blocks",
9463 "-Q|--qgroup-report print a report on qgroup consistency",
9464 "-E|--subvol-extents <subvolid>",
9465 " print subvolume extents and sharing state",
9466 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
9467 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
9468 "-p|--progress indicate progress",
9469 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
9470 NULL
9473 int cmd_check(int argc, char **argv)
9475 struct cache_tree root_cache;
9476 struct btrfs_root *root;
9477 struct btrfs_fs_info *info;
9478 u64 bytenr = 0;
9479 u64 subvolid = 0;
9480 u64 tree_root_bytenr = 0;
9481 u64 chunk_root_bytenr = 0;
9482 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9483 int ret = 0;
9484 int err = 0;
9485 u64 num;
9486 int init_csum_tree = 0;
9487 int readonly = 0;
9488 int clear_space_cache = 0;
9489 int qgroup_report = 0;
9490 int qgroups_repaired = 0;
9491 int qgroup_report_ret;
9492 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
9493 int force = 0;
9495 while(1) {
9496 int c;
9497 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
9498 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
9499 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
9500 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
9501 GETOPT_VAL_FORCE };
9502 static const struct option long_options[] = {
9503 { "super", required_argument, NULL, 's' },
9504 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
9505 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
9506 { "init-csum-tree", no_argument, NULL,
9507 GETOPT_VAL_INIT_CSUM },
9508 { "init-extent-tree", no_argument, NULL,
9509 GETOPT_VAL_INIT_EXTENT },
9510 { "check-data-csum", no_argument, NULL,
9511 GETOPT_VAL_CHECK_CSUM },
9512 { "backup", no_argument, NULL, 'b' },
9513 { "subvol-extents", required_argument, NULL, 'E' },
9514 { "qgroup-report", no_argument, NULL, 'Q' },
9515 { "tree-root", required_argument, NULL, 'r' },
9516 { "chunk-root", required_argument, NULL,
9517 GETOPT_VAL_CHUNK_TREE },
9518 { "progress", no_argument, NULL, 'p' },
9519 { "mode", required_argument, NULL,
9520 GETOPT_VAL_MODE },
9521 { "clear-space-cache", required_argument, NULL,
9522 GETOPT_VAL_CLEAR_SPACE_CACHE},
9523 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
9524 { NULL, 0, NULL, 0}
9527 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
9528 if (c < 0)
9529 break;
9530 switch(c) {
9531 case 'a': /* ignored */ break;
9532 case 'b':
9533 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9534 break;
9535 case 's':
9536 num = arg_strtou64(optarg);
9537 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9538 error(
9539 "super mirror should be less than %d",
9540 BTRFS_SUPER_MIRROR_MAX);
9541 exit(1);
9543 bytenr = btrfs_sb_offset(((int)num));
9544 printf("using SB copy %llu, bytenr %llu\n", num,
9545 (unsigned long long)bytenr);
9546 break;
9547 case 'Q':
9548 qgroup_report = 1;
9549 break;
9550 case 'E':
9551 subvolid = arg_strtou64(optarg);
9552 break;
9553 case 'r':
9554 tree_root_bytenr = arg_strtou64(optarg);
9555 break;
9556 case GETOPT_VAL_CHUNK_TREE:
9557 chunk_root_bytenr = arg_strtou64(optarg);
9558 break;
9559 case 'p':
9560 ctx.progress_enabled = true;
9561 break;
9562 case '?':
9563 case 'h':
9564 usage(cmd_check_usage);
9565 case GETOPT_VAL_REPAIR:
9566 printf("enabling repair mode\n");
9567 repair = 1;
9568 ctree_flags |= OPEN_CTREE_WRITES;
9569 break;
9570 case GETOPT_VAL_READONLY:
9571 readonly = 1;
9572 break;
9573 case GETOPT_VAL_INIT_CSUM:
9574 printf("Creating a new CRC tree\n");
9575 init_csum_tree = 1;
9576 repair = 1;
9577 ctree_flags |= OPEN_CTREE_WRITES;
9578 break;
9579 case GETOPT_VAL_INIT_EXTENT:
9580 init_extent_tree = 1;
9581 ctree_flags |= (OPEN_CTREE_WRITES |
9582 OPEN_CTREE_NO_BLOCK_GROUPS);
9583 repair = 1;
9584 break;
9585 case GETOPT_VAL_CHECK_CSUM:
9586 check_data_csum = 1;
9587 break;
9588 case GETOPT_VAL_MODE:
9589 check_mode = parse_check_mode(optarg);
9590 if (check_mode == CHECK_MODE_UNKNOWN) {
9591 error("unknown mode: %s", optarg);
9592 exit(1);
9594 break;
9595 case GETOPT_VAL_CLEAR_SPACE_CACHE:
9596 if (strcmp(optarg, "v1") == 0) {
9597 clear_space_cache = 1;
9598 } else if (strcmp(optarg, "v2") == 0) {
9599 clear_space_cache = 2;
9600 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
9601 } else {
9602 error(
9603 "invalid argument to --clear-space-cache, must be v1 or v2");
9604 exit(1);
9606 ctree_flags |= OPEN_CTREE_WRITES;
9607 break;
9608 case GETOPT_VAL_FORCE:
9609 force = 1;
9610 break;
9614 if (check_argc_exact(argc - optind, 1))
9615 usage(cmd_check_usage);
9617 if (ctx.progress_enabled) {
9618 ctx.tp = TASK_NOTHING;
9619 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9622 /* This check is the only reason for --readonly to exist */
9623 if (readonly && repair) {
9624 error("repair options are not compatible with --readonly");
9625 exit(1);
9629 * experimental and dangerous
9631 if (repair && check_mode == CHECK_MODE_LOWMEM)
9632 warning("low-memory mode repair support is only partial");
9634 radix_tree_init();
9635 cache_tree_init(&root_cache);
9637 ret = check_mounted(argv[optind]);
9638 if (!force) {
9639 if (ret < 0) {
9640 error("could not check mount status: %s",
9641 strerror(-ret));
9642 err |= !!ret;
9643 goto err_out;
9644 } else if (ret) {
9645 error(
9646 "%s is currently mounted, use --force if you really intend to check the filesystem",
9647 argv[optind]);
9648 ret = -EBUSY;
9649 err |= !!ret;
9650 goto err_out;
9652 } else {
9653 if (repair) {
9654 error("repair and --force is not yet supported");
9655 ret = 1;
9656 err |= !!ret;
9657 goto err_out;
9659 if (ret < 0) {
9660 warning(
9661 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
9662 argv[optind]);
9663 } else if (ret) {
9664 warning(
9665 "filesystem mounted, continuing because of --force");
9667 /* A block device is mounted in exclusive mode by kernel */
9668 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
9671 /* only allow partial opening under repair mode */
9672 if (repair)
9673 ctree_flags |= OPEN_CTREE_PARTIAL;
9675 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9676 chunk_root_bytenr, ctree_flags);
9677 if (!info) {
9678 error("cannot open file system");
9679 ret = -EIO;
9680 err |= !!ret;
9681 goto err_out;
9684 global_info = info;
9685 root = info->fs_root;
9686 uuid_unparse(info->super_copy->fsid, uuidbuf);
9688 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9691 * Check the bare minimum before starting anything else that could rely
9692 * on it, namely the tree roots, any local consistency checks
9694 if (!extent_buffer_uptodate(info->tree_root->node) ||
9695 !extent_buffer_uptodate(info->dev_root->node) ||
9696 !extent_buffer_uptodate(info->chunk_root->node)) {
9697 error("critical roots corrupted, unable to check the filesystem");
9698 err |= !!ret;
9699 ret = -EIO;
9700 goto close_out;
9703 if (clear_space_cache) {
9704 ret = do_clear_free_space_cache(info, clear_space_cache);
9705 err |= !!ret;
9706 goto close_out;
9710 * repair mode will force us to commit transaction which
9711 * will make us fail to load log tree when mounting.
9713 if (repair && btrfs_super_log_root(info->super_copy)) {
9714 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
9715 if (!ret) {
9716 ret = 1;
9717 err |= !!ret;
9718 goto close_out;
9720 ret = zero_log_tree(root);
9721 err |= !!ret;
9722 if (ret) {
9723 error("failed to zero log tree: %d", ret);
9724 goto close_out;
9728 if (qgroup_report) {
9729 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9730 uuidbuf);
9731 ret = qgroup_verify_all(info);
9732 err |= !!ret;
9733 if (ret == 0)
9734 err |= !!report_qgroups(1);
9735 goto close_out;
9737 if (subvolid) {
9738 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9739 subvolid, argv[optind], uuidbuf);
9740 ret = print_extent_state(info, subvolid);
9741 err |= !!ret;
9742 goto close_out;
9745 if (init_extent_tree || init_csum_tree) {
9746 struct btrfs_trans_handle *trans;
9748 trans = btrfs_start_transaction(info->extent_root, 0);
9749 if (IS_ERR(trans)) {
9750 error("error starting transaction");
9751 ret = PTR_ERR(trans);
9752 err |= !!ret;
9753 goto close_out;
9756 if (init_extent_tree) {
9757 printf("Creating a new extent tree\n");
9758 ret = reinit_extent_tree(trans, info,
9759 check_mode == CHECK_MODE_ORIGINAL);
9760 err |= !!ret;
9761 if (ret)
9762 goto close_out;
9765 if (init_csum_tree) {
9766 printf("Reinitialize checksum tree\n");
9767 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9768 if (ret) {
9769 error("checksum tree initialization failed: %d",
9770 ret);
9771 ret = -EIO;
9772 err |= !!ret;
9773 goto close_out;
9776 ret = fill_csum_tree(trans, info->csum_root,
9777 init_extent_tree);
9778 err |= !!ret;
9779 if (ret) {
9780 error("checksum tree refilling failed: %d", ret);
9781 return -EIO;
9785 * Ok now we commit and run the normal fsck, which will add
9786 * extent entries for all of the items it finds.
9788 ret = btrfs_commit_transaction(trans, info->extent_root);
9789 err |= !!ret;
9790 if (ret)
9791 goto close_out;
9793 if (!extent_buffer_uptodate(info->extent_root->node)) {
9794 error("critical: extent_root, unable to check the filesystem");
9795 ret = -EIO;
9796 err |= !!ret;
9797 goto close_out;
9799 if (!extent_buffer_uptodate(info->csum_root->node)) {
9800 error("critical: csum_root, unable to check the filesystem");
9801 ret = -EIO;
9802 err |= !!ret;
9803 goto close_out;
9806 if (!init_extent_tree) {
9807 ret = repair_root_items(info);
9808 if (ret < 0) {
9809 err = !!ret;
9810 error("failed to repair root items: %s", strerror(-ret));
9811 goto close_out;
9813 if (repair) {
9814 fprintf(stderr, "Fixed %d roots.\n", ret);
9815 ret = 0;
9816 } else if (ret > 0) {
9817 fprintf(stderr,
9818 "Found %d roots with an outdated root item.\n",
9819 ret);
9820 fprintf(stderr,
9821 "Please run a filesystem check with the option --repair to fix them.\n");
9822 ret = 1;
9823 err |= ret;
9824 goto close_out;
9828 ret = do_check_chunks_and_extents(info);
9829 err |= !!ret;
9830 if (ret)
9831 error(
9832 "errors found in extent allocation tree or chunk allocation");
9834 /* Only re-check super size after we checked and repaired the fs */
9835 err |= !is_super_size_valid(info);
9837 if (!ctx.progress_enabled) {
9838 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
9839 fprintf(stderr, "checking free space tree\n");
9840 else
9841 fprintf(stderr, "checking free space cache\n");
9843 ret = check_space_cache(root);
9844 err |= !!ret;
9845 if (ret) {
9846 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
9847 error("errors found in free space tree");
9848 else
9849 error("errors found in free space cache");
9850 goto out;
9854 * We used to have to have these hole extents in between our real
9855 * extents so if we don't have this flag set we need to make sure there
9856 * are no gaps in the file extents for inodes, otherwise we can just
9857 * ignore it when this happens.
9859 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
9860 ret = do_check_fs_roots(info, &root_cache);
9861 err |= !!ret;
9862 if (ret) {
9863 error("errors found in fs roots");
9864 goto out;
9867 if (check_data_csum)
9868 fprintf(stderr, "checking csums against data\n");
9869 else
9870 fprintf(stderr,
9871 "checking only csum items (without verifying data)\n");
9872 ret = check_csums(root);
9874 * Data csum error is not fatal, and it may indicate more serious
9875 * corruption, continue checking.
9877 if (ret)
9878 error("errors found in csum tree");
9879 err |= !!ret;
9881 fprintf(stderr, "checking root refs\n");
9882 /* For low memory mode, check_fs_roots_v2 handles root refs */
9883 if (check_mode != CHECK_MODE_LOWMEM) {
9884 ret = check_root_refs(root, &root_cache);
9885 err |= !!ret;
9886 if (ret) {
9887 error("errors found in root refs");
9888 goto out;
9892 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9893 struct extent_buffer *eb;
9895 eb = list_first_entry(&root->fs_info->recow_ebs,
9896 struct extent_buffer, recow);
9897 list_del_init(&eb->recow);
9898 ret = recow_extent_buffer(root, eb);
9899 err |= !!ret;
9900 if (ret) {
9901 error("fails to fix transid errors");
9902 break;
9906 while (!list_empty(&delete_items)) {
9907 struct bad_item *bad;
9909 bad = list_first_entry(&delete_items, struct bad_item, list);
9910 list_del_init(&bad->list);
9911 if (repair) {
9912 ret = delete_bad_item(root, bad);
9913 err |= !!ret;
9915 free(bad);
9918 if (info->quota_enabled) {
9919 fprintf(stderr, "checking quota groups\n");
9920 ret = qgroup_verify_all(info);
9921 err |= !!ret;
9922 if (ret) {
9923 error("failed to check quota groups");
9924 goto out;
9926 qgroup_report_ret = report_qgroups(0);
9927 ret = repair_qgroups(info, &qgroups_repaired);
9928 if (ret) {
9929 error("failed to repair quota groups");
9930 goto out;
9932 if (qgroup_report_ret && (!qgroups_repaired || ret))
9933 err |= qgroup_report_ret;
9934 ret = 0;
9937 if (!list_empty(&root->fs_info->recow_ebs)) {
9938 error("transid errors in file system");
9939 ret = 1;
9940 err |= !!ret;
9942 out:
9943 printf("found %llu bytes used, ",
9944 (unsigned long long)bytes_used);
9945 if (err)
9946 printf("error(s) found\n");
9947 else
9948 printf("no error found\n");
9949 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9950 printf("total tree bytes: %llu\n",
9951 (unsigned long long)total_btree_bytes);
9952 printf("total fs tree bytes: %llu\n",
9953 (unsigned long long)total_fs_tree_bytes);
9954 printf("total extent tree bytes: %llu\n",
9955 (unsigned long long)total_extent_tree_bytes);
9956 printf("btree space waste bytes: %llu\n",
9957 (unsigned long long)btree_space_waste);
9958 printf("file data blocks allocated: %llu\n referenced %llu\n",
9959 (unsigned long long)data_bytes_allocated,
9960 (unsigned long long)data_bytes_referenced);
9962 free_qgroup_counts();
9963 free_root_recs_tree(&root_cache);
9964 close_out:
9965 close_ctree(root);
9966 err_out:
9967 if (ctx.progress_enabled)
9968 task_deinit(ctx.info);
9970 return err;