Btrfs progs v4.17.1
[btrfs-progs-unstable/devel.git] / check / main.c
blobbc2ee22f7943af4b6ddb40baaa90dd4d93ca6041
1 /*
2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include <time.h>
29 #include "ctree.h"
30 #include "volumes.h"
31 #include "repair.h"
32 #include "disk-io.h"
33 #include "print-tree.h"
34 #include "task-utils.h"
35 #include "transaction.h"
36 #include "utils.h"
37 #include "commands.h"
38 #include "free-space-cache.h"
39 #include "free-space-tree.h"
40 #include "btrfsck.h"
41 #include "qgroup-verify.h"
42 #include "rbtree-utils.h"
43 #include "backref.h"
44 #include "kernel-shared/ulist.h"
45 #include "hash.h"
46 #include "help.h"
47 #include "check/mode-common.h"
48 #include "check/mode-original.h"
49 #include "check/mode-lowmem.h"
51 u64 bytes_used = 0;
52 u64 total_csum_bytes = 0;
53 u64 total_btree_bytes = 0;
54 u64 total_fs_tree_bytes = 0;
55 u64 total_extent_tree_bytes = 0;
56 u64 btree_space_waste = 0;
57 u64 data_bytes_allocated = 0;
58 u64 data_bytes_referenced = 0;
59 LIST_HEAD(duplicate_extents);
60 LIST_HEAD(delete_items);
61 int no_holes = 0;
62 static int is_free_space_tree = 0;
63 int init_extent_tree = 0;
64 int check_data_csum = 0;
65 struct btrfs_fs_info *global_info;
66 struct task_ctx ctx = { 0 };
67 struct cache_tree *roots_info_cache = NULL;
69 enum btrfs_check_mode {
70 CHECK_MODE_ORIGINAL,
71 CHECK_MODE_LOWMEM,
72 CHECK_MODE_UNKNOWN,
73 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
76 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
78 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
80 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
81 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
82 struct data_backref *back1 = to_data_backref(ext1);
83 struct data_backref *back2 = to_data_backref(ext2);
85 WARN_ON(!ext1->is_data);
86 WARN_ON(!ext2->is_data);
88 /* parent and root are a union, so this covers both */
89 if (back1->parent > back2->parent)
90 return 1;
91 if (back1->parent < back2->parent)
92 return -1;
94 /* This is a full backref and the parents match. */
95 if (back1->node.full_backref)
96 return 0;
98 if (back1->owner > back2->owner)
99 return 1;
100 if (back1->owner < back2->owner)
101 return -1;
103 if (back1->offset > back2->offset)
104 return 1;
105 if (back1->offset < back2->offset)
106 return -1;
108 if (back1->found_ref && back2->found_ref) {
109 if (back1->disk_bytenr > back2->disk_bytenr)
110 return 1;
111 if (back1->disk_bytenr < back2->disk_bytenr)
112 return -1;
114 if (back1->bytes > back2->bytes)
115 return 1;
116 if (back1->bytes < back2->bytes)
117 return -1;
120 return 0;
123 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
125 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
126 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
127 struct tree_backref *back1 = to_tree_backref(ext1);
128 struct tree_backref *back2 = to_tree_backref(ext2);
130 WARN_ON(ext1->is_data);
131 WARN_ON(ext2->is_data);
133 /* parent and root are a union, so this covers both */
134 if (back1->parent > back2->parent)
135 return 1;
136 if (back1->parent < back2->parent)
137 return -1;
139 return 0;
142 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
144 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
145 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
147 if (ext1->is_data > ext2->is_data)
148 return 1;
150 if (ext1->is_data < ext2->is_data)
151 return -1;
153 if (ext1->full_backref > ext2->full_backref)
154 return 1;
155 if (ext1->full_backref < ext2->full_backref)
156 return -1;
158 if (ext1->is_data)
159 return compare_data_backref(node1, node2);
160 else
161 return compare_tree_backref(node1, node2);
164 static void print_status_check_line(void *p)
166 struct task_ctx *priv = p;
167 const char *task_position_string[] = {
168 "[1/7] checking root items ",
169 "[2/7] checking extents ",
170 is_free_space_tree ?
171 "[3/7] checking free space tree " :
172 "[3/7] checking free space cache ",
173 "[4/7] checking fs roots ",
174 check_data_csum ?
175 "[5/7] checking csums against data " :
176 "[5/7] checking csums (without verifying data) ",
177 "[6/7] checking root refs ",
178 "[7/7] checking quota groups ",
180 time_t elapsed;
181 int hours;
182 int minutes;
183 int seconds;
185 elapsed = time(NULL) - priv->start_time;
186 hours = elapsed / 3600;
187 elapsed -= hours * 3600;
188 minutes = elapsed / 60;
189 elapsed -= minutes * 60;
190 seconds = elapsed;
192 printf("%s (%d:%02d:%02d elapsed", task_position_string[priv->tp],
193 hours, minutes, seconds);
194 if (priv->item_count > 0)
195 printf(", %llu items checked)\r", priv->item_count);
196 else
197 printf(")\r");
198 fflush(stdout);
201 static void *print_status_check(void *p)
203 struct task_ctx *priv = p;
205 /* 1 second */
206 task_period_start(priv->info, 1000);
208 if (priv->tp == TASK_NOTHING)
209 return NULL;
211 while (1) {
212 print_status_check_line(p);
213 task_period_wait(priv->info);
215 return NULL;
218 static int print_status_return(void *p)
220 print_status_check_line(p);
221 printf("\n");
222 fflush(stdout);
224 return 0;
227 static enum btrfs_check_mode parse_check_mode(const char *str)
229 if (strcmp(str, "lowmem") == 0)
230 return CHECK_MODE_LOWMEM;
231 if (strcmp(str, "orig") == 0)
232 return CHECK_MODE_ORIGINAL;
233 if (strcmp(str, "original") == 0)
234 return CHECK_MODE_ORIGINAL;
236 return CHECK_MODE_UNKNOWN;
239 /* Compatible function to allow reuse of old codes */
240 static u64 first_extent_gap(struct rb_root *holes)
242 struct file_extent_hole *hole;
244 if (RB_EMPTY_ROOT(holes))
245 return (u64)-1;
247 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
248 return hole->start;
251 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
253 struct file_extent_hole *hole1;
254 struct file_extent_hole *hole2;
256 hole1 = rb_entry(node1, struct file_extent_hole, node);
257 hole2 = rb_entry(node2, struct file_extent_hole, node);
259 if (hole1->start > hole2->start)
260 return -1;
261 if (hole1->start < hole2->start)
262 return 1;
263 /* Now hole1->start == hole2->start */
264 if (hole1->len >= hole2->len)
266 * Hole 1 will be merge center
267 * Same hole will be merged later
269 return -1;
270 /* Hole 2 will be merge center */
271 return 1;
275 * Add a hole to the record
277 * This will do hole merge for copy_file_extent_holes(),
278 * which will ensure there won't be continuous holes.
280 static int add_file_extent_hole(struct rb_root *holes,
281 u64 start, u64 len)
283 struct file_extent_hole *hole;
284 struct file_extent_hole *prev = NULL;
285 struct file_extent_hole *next = NULL;
287 hole = malloc(sizeof(*hole));
288 if (!hole)
289 return -ENOMEM;
290 hole->start = start;
291 hole->len = len;
292 /* Since compare will not return 0, no -EEXIST will happen */
293 rb_insert(holes, &hole->node, compare_hole);
295 /* simple merge with previous hole */
296 if (rb_prev(&hole->node))
297 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
298 node);
299 if (prev && prev->start + prev->len >= hole->start) {
300 hole->len = hole->start + hole->len - prev->start;
301 hole->start = prev->start;
302 rb_erase(&prev->node, holes);
303 free(prev);
304 prev = NULL;
307 /* iterate merge with next holes */
308 while (1) {
309 if (!rb_next(&hole->node))
310 break;
311 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
312 node);
313 if (hole->start + hole->len >= next->start) {
314 if (hole->start + hole->len <= next->start + next->len)
315 hole->len = next->start + next->len -
316 hole->start;
317 rb_erase(&next->node, holes);
318 free(next);
319 next = NULL;
320 } else
321 break;
323 return 0;
326 static int compare_hole_range(struct rb_node *node, void *data)
328 struct file_extent_hole *hole;
329 u64 start;
331 hole = (struct file_extent_hole *)data;
332 start = hole->start;
334 hole = rb_entry(node, struct file_extent_hole, node);
335 if (start < hole->start)
336 return -1;
337 if (start >= hole->start && start < hole->start + hole->len)
338 return 0;
339 return 1;
343 * Delete a hole in the record
345 * This will do the hole split and is much restrict than add.
347 static int del_file_extent_hole(struct rb_root *holes,
348 u64 start, u64 len)
350 struct file_extent_hole *hole;
351 struct file_extent_hole tmp;
352 u64 prev_start = 0;
353 u64 prev_len = 0;
354 u64 next_start = 0;
355 u64 next_len = 0;
356 struct rb_node *node;
357 int have_prev = 0;
358 int have_next = 0;
359 int ret = 0;
361 tmp.start = start;
362 tmp.len = len;
363 node = rb_search(holes, &tmp, compare_hole_range, NULL);
364 if (!node)
365 return -EEXIST;
366 hole = rb_entry(node, struct file_extent_hole, node);
367 if (start + len > hole->start + hole->len)
368 return -EEXIST;
371 * Now there will be no overlap, delete the hole and re-add the
372 * split(s) if they exists.
374 if (start > hole->start) {
375 prev_start = hole->start;
376 prev_len = start - hole->start;
377 have_prev = 1;
379 if (hole->start + hole->len > start + len) {
380 next_start = start + len;
381 next_len = hole->start + hole->len - start - len;
382 have_next = 1;
384 rb_erase(node, holes);
385 free(hole);
386 if (have_prev) {
387 ret = add_file_extent_hole(holes, prev_start, prev_len);
388 if (ret < 0)
389 return ret;
391 if (have_next) {
392 ret = add_file_extent_hole(holes, next_start, next_len);
393 if (ret < 0)
394 return ret;
396 return 0;
399 static int copy_file_extent_holes(struct rb_root *dst,
400 struct rb_root *src)
402 struct file_extent_hole *hole;
403 struct rb_node *node;
404 int ret = 0;
406 node = rb_first(src);
407 while (node) {
408 hole = rb_entry(node, struct file_extent_hole, node);
409 ret = add_file_extent_hole(dst, hole->start, hole->len);
410 if (ret)
411 break;
412 node = rb_next(node);
414 return ret;
417 static void free_file_extent_holes(struct rb_root *holes)
419 struct rb_node *node;
420 struct file_extent_hole *hole;
422 node = rb_first(holes);
423 while (node) {
424 hole = rb_entry(node, struct file_extent_hole, node);
425 rb_erase(node, holes);
426 free(hole);
427 node = rb_first(holes);
431 static void record_root_in_trans(struct btrfs_trans_handle *trans,
432 struct btrfs_root *root)
434 if (root->last_trans != trans->transid) {
435 root->track_dirty = 1;
436 root->last_trans = trans->transid;
437 root->commit_root = root->node;
438 extent_buffer_get(root->node);
442 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
444 struct device_record *rec1;
445 struct device_record *rec2;
447 rec1 = rb_entry(node1, struct device_record, node);
448 rec2 = rb_entry(node2, struct device_record, node);
449 if (rec1->devid > rec2->devid)
450 return -1;
451 else if (rec1->devid < rec2->devid)
452 return 1;
453 else
454 return 0;
457 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
459 struct inode_record *rec;
460 struct inode_backref *backref;
461 struct inode_backref *orig;
462 struct inode_backref *tmp;
463 struct orphan_data_extent *src_orphan;
464 struct orphan_data_extent *dst_orphan;
465 struct rb_node *rb;
466 size_t size;
467 int ret;
469 rec = malloc(sizeof(*rec));
470 if (!rec)
471 return ERR_PTR(-ENOMEM);
472 memcpy(rec, orig_rec, sizeof(*rec));
473 rec->refs = 1;
474 INIT_LIST_HEAD(&rec->backrefs);
475 INIT_LIST_HEAD(&rec->orphan_extents);
476 rec->holes = RB_ROOT;
478 list_for_each_entry(orig, &orig_rec->backrefs, list) {
479 size = sizeof(*orig) + orig->namelen + 1;
480 backref = malloc(size);
481 if (!backref) {
482 ret = -ENOMEM;
483 goto cleanup;
485 memcpy(backref, orig, size);
486 list_add_tail(&backref->list, &rec->backrefs);
488 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
489 dst_orphan = malloc(sizeof(*dst_orphan));
490 if (!dst_orphan) {
491 ret = -ENOMEM;
492 goto cleanup;
494 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
495 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
497 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
498 if (ret < 0)
499 goto cleanup_rb;
501 return rec;
503 cleanup_rb:
504 rb = rb_first(&rec->holes);
505 while (rb) {
506 struct file_extent_hole *hole;
508 hole = rb_entry(rb, struct file_extent_hole, node);
509 rb = rb_next(rb);
510 free(hole);
513 cleanup:
514 if (!list_empty(&rec->backrefs))
515 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
516 list_del(&orig->list);
517 free(orig);
520 if (!list_empty(&rec->orphan_extents))
521 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
522 list_del(&orig->list);
523 free(orig);
526 free(rec);
528 return ERR_PTR(ret);
531 static void print_orphan_data_extents(struct list_head *orphan_extents,
532 u64 objectid)
534 struct orphan_data_extent *orphan;
536 if (list_empty(orphan_extents))
537 return;
538 printf("The following data extent is lost in tree %llu:\n",
539 objectid);
540 list_for_each_entry(orphan, orphan_extents, list) {
541 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
542 orphan->objectid, orphan->offset, orphan->disk_bytenr,
543 orphan->disk_len);
547 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
549 u64 root_objectid = root->root_key.objectid;
550 int errors = rec->errors;
552 if (!errors)
553 return;
554 /* reloc root errors, we print its corresponding fs root objectid*/
555 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
556 root_objectid = root->root_key.offset;
557 fprintf(stderr, "reloc");
559 fprintf(stderr, "root %llu inode %llu errors %x",
560 (unsigned long long) root_objectid,
561 (unsigned long long) rec->ino, rec->errors);
563 if (errors & I_ERR_NO_INODE_ITEM)
564 fprintf(stderr, ", no inode item");
565 if (errors & I_ERR_NO_ORPHAN_ITEM)
566 fprintf(stderr, ", no orphan item");
567 if (errors & I_ERR_DUP_INODE_ITEM)
568 fprintf(stderr, ", dup inode item");
569 if (errors & I_ERR_DUP_DIR_INDEX)
570 fprintf(stderr, ", dup dir index");
571 if (errors & I_ERR_ODD_DIR_ITEM)
572 fprintf(stderr, ", odd dir item");
573 if (errors & I_ERR_ODD_FILE_EXTENT)
574 fprintf(stderr, ", odd file extent");
575 if (errors & I_ERR_BAD_FILE_EXTENT)
576 fprintf(stderr, ", bad file extent");
577 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
578 fprintf(stderr, ", file extent overlap");
579 if (errors & I_ERR_FILE_EXTENT_TOO_LARGE)
580 fprintf(stderr, ", inline file extent too large");
581 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
582 fprintf(stderr, ", file extent discount");
583 if (errors & I_ERR_DIR_ISIZE_WRONG)
584 fprintf(stderr, ", dir isize wrong");
585 if (errors & I_ERR_FILE_NBYTES_WRONG)
586 fprintf(stderr, ", nbytes wrong");
587 if (errors & I_ERR_ODD_CSUM_ITEM)
588 fprintf(stderr, ", odd csum item");
589 if (errors & I_ERR_SOME_CSUM_MISSING)
590 fprintf(stderr, ", some csum missing");
591 if (errors & I_ERR_LINK_COUNT_WRONG)
592 fprintf(stderr, ", link count wrong");
593 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
594 fprintf(stderr, ", orphan file extent");
595 if (errors & I_ERR_ODD_INODE_FLAGS)
596 fprintf(stderr, ", odd inode flags");
597 if (errors & I_ERR_INLINE_RAM_BYTES_WRONG)
598 fprintf(stderr, ", invalid inline ram bytes");
599 fprintf(stderr, "\n");
600 /* Print the orphan extents if needed */
601 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
602 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
604 /* Print the holes if needed */
605 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
606 struct file_extent_hole *hole;
607 struct rb_node *node;
608 int found = 0;
610 node = rb_first(&rec->holes);
611 fprintf(stderr, "Found file extent holes:\n");
612 while (node) {
613 found = 1;
614 hole = rb_entry(node, struct file_extent_hole, node);
615 fprintf(stderr, "\tstart: %llu, len: %llu\n",
616 hole->start, hole->len);
617 node = rb_next(node);
619 if (!found)
620 fprintf(stderr, "\tstart: 0, len: %llu\n",
621 round_up(rec->isize,
622 root->fs_info->sectorsize));
626 static void print_ref_error(int errors)
628 if (errors & REF_ERR_NO_DIR_ITEM)
629 fprintf(stderr, ", no dir item");
630 if (errors & REF_ERR_NO_DIR_INDEX)
631 fprintf(stderr, ", no dir index");
632 if (errors & REF_ERR_NO_INODE_REF)
633 fprintf(stderr, ", no inode ref");
634 if (errors & REF_ERR_DUP_DIR_ITEM)
635 fprintf(stderr, ", dup dir item");
636 if (errors & REF_ERR_DUP_DIR_INDEX)
637 fprintf(stderr, ", dup dir index");
638 if (errors & REF_ERR_DUP_INODE_REF)
639 fprintf(stderr, ", dup inode ref");
640 if (errors & REF_ERR_INDEX_UNMATCH)
641 fprintf(stderr, ", index mismatch");
642 if (errors & REF_ERR_FILETYPE_UNMATCH)
643 fprintf(stderr, ", filetype mismatch");
644 if (errors & REF_ERR_NAME_TOO_LONG)
645 fprintf(stderr, ", name too long");
646 if (errors & REF_ERR_NO_ROOT_REF)
647 fprintf(stderr, ", no root ref");
648 if (errors & REF_ERR_NO_ROOT_BACKREF)
649 fprintf(stderr, ", no root backref");
650 if (errors & REF_ERR_DUP_ROOT_REF)
651 fprintf(stderr, ", dup root ref");
652 if (errors & REF_ERR_DUP_ROOT_BACKREF)
653 fprintf(stderr, ", dup root backref");
654 fprintf(stderr, "\n");
657 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
658 u64 ino, int mod)
660 struct ptr_node *node;
661 struct cache_extent *cache;
662 struct inode_record *rec = NULL;
663 int ret;
665 cache = lookup_cache_extent(inode_cache, ino, 1);
666 if (cache) {
667 node = container_of(cache, struct ptr_node, cache);
668 rec = node->data;
669 if (mod && rec->refs > 1) {
670 node->data = clone_inode_rec(rec);
671 if (IS_ERR(node->data))
672 return node->data;
673 rec->refs--;
674 rec = node->data;
676 } else if (mod) {
677 rec = calloc(1, sizeof(*rec));
678 if (!rec)
679 return ERR_PTR(-ENOMEM);
680 rec->ino = ino;
681 rec->extent_start = (u64)-1;
682 rec->refs = 1;
683 INIT_LIST_HEAD(&rec->backrefs);
684 INIT_LIST_HEAD(&rec->orphan_extents);
685 rec->holes = RB_ROOT;
687 node = malloc(sizeof(*node));
688 if (!node) {
689 free(rec);
690 return ERR_PTR(-ENOMEM);
692 node->cache.start = ino;
693 node->cache.size = 1;
694 node->data = rec;
696 if (ino == BTRFS_FREE_INO_OBJECTID)
697 rec->found_link = 1;
699 ret = insert_cache_extent(inode_cache, &node->cache);
700 if (ret)
701 return ERR_PTR(-EEXIST);
703 return rec;
706 static void free_orphan_data_extents(struct list_head *orphan_extents)
708 struct orphan_data_extent *orphan;
710 while (!list_empty(orphan_extents)) {
711 orphan = list_entry(orphan_extents->next,
712 struct orphan_data_extent, list);
713 list_del(&orphan->list);
714 free(orphan);
718 static void free_inode_rec(struct inode_record *rec)
720 struct inode_backref *backref;
722 if (--rec->refs > 0)
723 return;
725 while (!list_empty(&rec->backrefs)) {
726 backref = to_inode_backref(rec->backrefs.next);
727 list_del(&backref->list);
728 free(backref);
730 free_orphan_data_extents(&rec->orphan_extents);
731 free_file_extent_holes(&rec->holes);
732 free(rec);
735 static int can_free_inode_rec(struct inode_record *rec)
737 if (!rec->errors && rec->checked && rec->found_inode_item &&
738 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
739 return 1;
740 return 0;
743 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
744 struct inode_record *rec)
746 struct cache_extent *cache;
747 struct inode_backref *tmp, *backref;
748 struct ptr_node *node;
749 u8 filetype;
751 if (!rec->found_inode_item)
752 return;
754 filetype = imode_to_type(rec->imode);
755 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
756 if (backref->found_dir_item && backref->found_dir_index) {
757 if (backref->filetype != filetype)
758 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
759 if (!backref->errors && backref->found_inode_ref &&
760 rec->nlink == rec->found_link) {
761 list_del(&backref->list);
762 free(backref);
767 if (!rec->checked || rec->merging)
768 return;
770 if (S_ISDIR(rec->imode)) {
771 if (rec->found_size != rec->isize)
772 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
773 if (rec->found_file_extent)
774 rec->errors |= I_ERR_ODD_FILE_EXTENT;
775 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
776 if (rec->found_dir_item)
777 rec->errors |= I_ERR_ODD_DIR_ITEM;
778 if (rec->found_size != rec->nbytes)
779 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
780 if (rec->nlink > 0 && !no_holes &&
781 (rec->extent_end < rec->isize ||
782 first_extent_gap(&rec->holes) < rec->isize))
783 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
786 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
787 if (rec->found_csum_item && rec->nodatasum)
788 rec->errors |= I_ERR_ODD_CSUM_ITEM;
789 if (rec->some_csum_missing && !rec->nodatasum)
790 rec->errors |= I_ERR_SOME_CSUM_MISSING;
793 BUG_ON(rec->refs != 1);
794 if (can_free_inode_rec(rec)) {
795 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
796 node = container_of(cache, struct ptr_node, cache);
797 BUG_ON(node->data != rec);
798 remove_cache_extent(inode_cache, &node->cache);
799 free(node);
800 free_inode_rec(rec);
804 static int check_orphan_item(struct btrfs_root *root, u64 ino)
806 struct btrfs_path path;
807 struct btrfs_key key;
808 int ret;
810 key.objectid = BTRFS_ORPHAN_OBJECTID;
811 key.type = BTRFS_ORPHAN_ITEM_KEY;
812 key.offset = ino;
814 btrfs_init_path(&path);
815 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
816 btrfs_release_path(&path);
817 if (ret > 0)
818 ret = -ENOENT;
819 return ret;
822 static int process_inode_item(struct extent_buffer *eb,
823 int slot, struct btrfs_key *key,
824 struct shared_node *active_node)
826 struct inode_record *rec;
827 struct btrfs_inode_item *item;
828 u64 flags;
830 rec = active_node->current;
831 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
832 if (rec->found_inode_item) {
833 rec->errors |= I_ERR_DUP_INODE_ITEM;
834 return 1;
836 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
837 rec->nlink = btrfs_inode_nlink(eb, item);
838 rec->isize = btrfs_inode_size(eb, item);
839 rec->nbytes = btrfs_inode_nbytes(eb, item);
840 rec->imode = btrfs_inode_mode(eb, item);
841 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
842 rec->nodatasum = 1;
843 rec->found_inode_item = 1;
844 if (rec->nlink == 0)
845 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
846 flags = btrfs_inode_flags(eb, item);
847 if (S_ISLNK(rec->imode) &&
848 flags & (BTRFS_INODE_IMMUTABLE | BTRFS_INODE_APPEND))
849 rec->errors |= I_ERR_ODD_INODE_FLAGS;
850 maybe_free_inode_rec(&active_node->inode_cache, rec);
851 return 0;
854 static struct inode_backref *get_inode_backref(struct inode_record *rec,
855 const char *name,
856 int namelen, u64 dir)
858 struct inode_backref *backref;
860 list_for_each_entry(backref, &rec->backrefs, list) {
861 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
862 break;
863 if (backref->dir != dir || backref->namelen != namelen)
864 continue;
865 if (memcmp(name, backref->name, namelen))
866 continue;
867 return backref;
870 backref = malloc(sizeof(*backref) + namelen + 1);
871 if (!backref)
872 return NULL;
873 memset(backref, 0, sizeof(*backref));
874 backref->dir = dir;
875 backref->namelen = namelen;
876 memcpy(backref->name, name, namelen);
877 backref->name[namelen] = '\0';
878 list_add_tail(&backref->list, &rec->backrefs);
879 return backref;
882 static int add_inode_backref(struct cache_tree *inode_cache,
883 u64 ino, u64 dir, u64 index,
884 const char *name, int namelen,
885 u8 filetype, u8 itemtype, int errors)
887 struct inode_record *rec;
888 struct inode_backref *backref;
890 rec = get_inode_rec(inode_cache, ino, 1);
891 BUG_ON(IS_ERR(rec));
892 backref = get_inode_backref(rec, name, namelen, dir);
893 BUG_ON(!backref);
894 if (errors)
895 backref->errors |= errors;
896 if (itemtype == BTRFS_DIR_INDEX_KEY) {
897 if (backref->found_dir_index)
898 backref->errors |= REF_ERR_DUP_DIR_INDEX;
899 if (backref->found_inode_ref && backref->index != index)
900 backref->errors |= REF_ERR_INDEX_UNMATCH;
901 if (backref->found_dir_item && backref->filetype != filetype)
902 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
904 backref->index = index;
905 backref->filetype = filetype;
906 backref->found_dir_index = 1;
907 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
908 rec->found_link++;
909 if (backref->found_dir_item)
910 backref->errors |= REF_ERR_DUP_DIR_ITEM;
911 if (backref->found_dir_index && backref->filetype != filetype)
912 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
914 backref->filetype = filetype;
915 backref->found_dir_item = 1;
916 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
917 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
918 if (backref->found_inode_ref)
919 backref->errors |= REF_ERR_DUP_INODE_REF;
920 if (backref->found_dir_index && backref->index != index)
921 backref->errors |= REF_ERR_INDEX_UNMATCH;
922 else
923 backref->index = index;
925 backref->ref_type = itemtype;
926 backref->found_inode_ref = 1;
927 } else {
928 BUG_ON(1);
931 maybe_free_inode_rec(inode_cache, rec);
932 return 0;
935 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
936 struct cache_tree *dst_cache)
938 struct inode_backref *backref;
939 u32 dir_count = 0;
940 int ret = 0;
942 dst->merging = 1;
943 list_for_each_entry(backref, &src->backrefs, list) {
944 if (backref->found_dir_index) {
945 add_inode_backref(dst_cache, dst->ino, backref->dir,
946 backref->index, backref->name,
947 backref->namelen, backref->filetype,
948 BTRFS_DIR_INDEX_KEY, backref->errors);
950 if (backref->found_dir_item) {
951 dir_count++;
952 add_inode_backref(dst_cache, dst->ino,
953 backref->dir, 0, backref->name,
954 backref->namelen, backref->filetype,
955 BTRFS_DIR_ITEM_KEY, backref->errors);
957 if (backref->found_inode_ref) {
958 add_inode_backref(dst_cache, dst->ino,
959 backref->dir, backref->index,
960 backref->name, backref->namelen, 0,
961 backref->ref_type, backref->errors);
965 if (src->found_dir_item)
966 dst->found_dir_item = 1;
967 if (src->found_file_extent)
968 dst->found_file_extent = 1;
969 if (src->found_csum_item)
970 dst->found_csum_item = 1;
971 if (src->some_csum_missing)
972 dst->some_csum_missing = 1;
973 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
974 ret = copy_file_extent_holes(&dst->holes, &src->holes);
975 if (ret < 0)
976 return ret;
979 BUG_ON(src->found_link < dir_count);
980 dst->found_link += src->found_link - dir_count;
981 dst->found_size += src->found_size;
982 if (src->extent_start != (u64)-1) {
983 if (dst->extent_start == (u64)-1) {
984 dst->extent_start = src->extent_start;
985 dst->extent_end = src->extent_end;
986 } else {
987 if (dst->extent_end > src->extent_start)
988 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
989 else if (dst->extent_end < src->extent_start) {
990 ret = add_file_extent_hole(&dst->holes,
991 dst->extent_end,
992 src->extent_start - dst->extent_end);
994 if (dst->extent_end < src->extent_end)
995 dst->extent_end = src->extent_end;
999 dst->errors |= src->errors;
1000 if (src->found_inode_item) {
1001 if (!dst->found_inode_item) {
1002 dst->nlink = src->nlink;
1003 dst->isize = src->isize;
1004 dst->nbytes = src->nbytes;
1005 dst->imode = src->imode;
1006 dst->nodatasum = src->nodatasum;
1007 dst->found_inode_item = 1;
1008 } else {
1009 dst->errors |= I_ERR_DUP_INODE_ITEM;
1012 dst->merging = 0;
1014 return 0;
1017 static int splice_shared_node(struct shared_node *src_node,
1018 struct shared_node *dst_node)
1020 struct cache_extent *cache;
1021 struct ptr_node *node, *ins;
1022 struct cache_tree *src, *dst;
1023 struct inode_record *rec, *conflict;
1024 u64 current_ino = 0;
1025 int splice = 0;
1026 int ret;
1028 if (--src_node->refs == 0)
1029 splice = 1;
1030 if (src_node->current)
1031 current_ino = src_node->current->ino;
1033 src = &src_node->root_cache;
1034 dst = &dst_node->root_cache;
1035 again:
1036 cache = search_cache_extent(src, 0);
1037 while (cache) {
1038 node = container_of(cache, struct ptr_node, cache);
1039 rec = node->data;
1040 cache = next_cache_extent(cache);
1042 if (splice) {
1043 remove_cache_extent(src, &node->cache);
1044 ins = node;
1045 } else {
1046 ins = malloc(sizeof(*ins));
1047 BUG_ON(!ins);
1048 ins->cache.start = node->cache.start;
1049 ins->cache.size = node->cache.size;
1050 ins->data = rec;
1051 rec->refs++;
1053 ret = insert_cache_extent(dst, &ins->cache);
1054 if (ret == -EEXIST) {
1055 conflict = get_inode_rec(dst, rec->ino, 1);
1056 BUG_ON(IS_ERR(conflict));
1057 merge_inode_recs(rec, conflict, dst);
1058 if (rec->checked) {
1059 conflict->checked = 1;
1060 if (dst_node->current == conflict)
1061 dst_node->current = NULL;
1063 maybe_free_inode_rec(dst, conflict);
1064 free_inode_rec(rec);
1065 free(ins);
1066 } else {
1067 BUG_ON(ret);
1071 if (src == &src_node->root_cache) {
1072 src = &src_node->inode_cache;
1073 dst = &dst_node->inode_cache;
1074 goto again;
1077 if (current_ino > 0 && (!dst_node->current ||
1078 current_ino > dst_node->current->ino)) {
1079 if (dst_node->current) {
1080 dst_node->current->checked = 1;
1081 maybe_free_inode_rec(dst, dst_node->current);
1083 dst_node->current = get_inode_rec(dst, current_ino, 1);
1084 BUG_ON(IS_ERR(dst_node->current));
1086 return 0;
1089 static void free_inode_ptr(struct cache_extent *cache)
1091 struct ptr_node *node;
1092 struct inode_record *rec;
1094 node = container_of(cache, struct ptr_node, cache);
1095 rec = node->data;
1096 free_inode_rec(rec);
1097 free(node);
1100 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1102 static struct shared_node *find_shared_node(struct cache_tree *shared,
1103 u64 bytenr)
1105 struct cache_extent *cache;
1106 struct shared_node *node;
1108 cache = lookup_cache_extent(shared, bytenr, 1);
1109 if (cache) {
1110 node = container_of(cache, struct shared_node, cache);
1111 return node;
1113 return NULL;
1116 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1118 int ret;
1119 struct shared_node *node;
1121 node = calloc(1, sizeof(*node));
1122 if (!node)
1123 return -ENOMEM;
1124 node->cache.start = bytenr;
1125 node->cache.size = 1;
1126 cache_tree_init(&node->root_cache);
1127 cache_tree_init(&node->inode_cache);
1128 node->refs = refs;
1130 ret = insert_cache_extent(shared, &node->cache);
1132 return ret;
1135 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1136 struct walk_control *wc, int level)
1138 struct shared_node *node;
1139 struct shared_node *dest;
1140 int ret;
1142 if (level == wc->active_node)
1143 return 0;
1145 BUG_ON(wc->active_node <= level);
1146 node = find_shared_node(&wc->shared, bytenr);
1147 if (!node) {
1148 ret = add_shared_node(&wc->shared, bytenr, refs);
1149 BUG_ON(ret);
1150 node = find_shared_node(&wc->shared, bytenr);
1151 wc->nodes[level] = node;
1152 wc->active_node = level;
1153 return 0;
1156 if (wc->root_level == wc->active_node &&
1157 btrfs_root_refs(&root->root_item) == 0) {
1158 if (--node->refs == 0) {
1159 free_inode_recs_tree(&node->root_cache);
1160 free_inode_recs_tree(&node->inode_cache);
1161 remove_cache_extent(&wc->shared, &node->cache);
1162 free(node);
1164 return 1;
1167 dest = wc->nodes[wc->active_node];
1168 splice_shared_node(node, dest);
1169 if (node->refs == 0) {
1170 remove_cache_extent(&wc->shared, &node->cache);
1171 free(node);
1173 return 1;
1176 static int leave_shared_node(struct btrfs_root *root,
1177 struct walk_control *wc, int level)
1179 struct shared_node *node;
1180 struct shared_node *dest;
1181 int i;
1183 if (level == wc->root_level)
1184 return 0;
1186 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1187 if (wc->nodes[i])
1188 break;
1190 BUG_ON(i >= BTRFS_MAX_LEVEL);
1192 node = wc->nodes[wc->active_node];
1193 wc->nodes[wc->active_node] = NULL;
1194 wc->active_node = i;
1196 dest = wc->nodes[wc->active_node];
1197 if (wc->active_node < wc->root_level ||
1198 btrfs_root_refs(&root->root_item) > 0) {
1199 BUG_ON(node->refs <= 1);
1200 splice_shared_node(node, dest);
1201 } else {
1202 BUG_ON(node->refs < 2);
1203 node->refs--;
1205 return 0;
1209 * Returns:
1210 * < 0 - on error
1211 * 1 - if the root with id child_root_id is a child of root parent_root_id
1212 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1213 * has other root(s) as parent(s)
1214 * 2 - if the root child_root_id doesn't have any parent roots
1216 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1217 u64 child_root_id)
1219 struct btrfs_path path;
1220 struct btrfs_key key;
1221 struct extent_buffer *leaf;
1222 int has_parent = 0;
1223 int ret;
1225 btrfs_init_path(&path);
1227 key.objectid = parent_root_id;
1228 key.type = BTRFS_ROOT_REF_KEY;
1229 key.offset = child_root_id;
1230 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1231 0, 0);
1232 if (ret < 0)
1233 return ret;
1234 btrfs_release_path(&path);
1235 if (!ret)
1236 return 1;
1238 key.objectid = child_root_id;
1239 key.type = BTRFS_ROOT_BACKREF_KEY;
1240 key.offset = 0;
1241 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1242 0, 0);
1243 if (ret < 0)
1244 goto out;
1246 while (1) {
1247 leaf = path.nodes[0];
1248 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1249 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1250 if (ret)
1251 break;
1252 leaf = path.nodes[0];
1255 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1256 if (key.objectid != child_root_id ||
1257 key.type != BTRFS_ROOT_BACKREF_KEY)
1258 break;
1260 has_parent = 1;
1262 if (key.offset == parent_root_id) {
1263 btrfs_release_path(&path);
1264 return 1;
1267 path.slots[0]++;
1269 out:
1270 btrfs_release_path(&path);
1271 if (ret < 0)
1272 return ret;
1273 return has_parent ? 0 : 2;
1276 static int process_dir_item(struct extent_buffer *eb,
1277 int slot, struct btrfs_key *key,
1278 struct shared_node *active_node)
1280 u32 total;
1281 u32 cur = 0;
1282 u32 len;
1283 u32 name_len;
1284 u32 data_len;
1285 int error;
1286 int nritems = 0;
1287 u8 filetype;
1288 struct btrfs_dir_item *di;
1289 struct inode_record *rec;
1290 struct cache_tree *root_cache;
1291 struct cache_tree *inode_cache;
1292 struct btrfs_key location;
1293 char namebuf[BTRFS_NAME_LEN];
1295 root_cache = &active_node->root_cache;
1296 inode_cache = &active_node->inode_cache;
1297 rec = active_node->current;
1298 rec->found_dir_item = 1;
1300 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1301 total = btrfs_item_size_nr(eb, slot);
1302 while (cur < total) {
1303 nritems++;
1304 btrfs_dir_item_key_to_cpu(eb, di, &location);
1305 name_len = btrfs_dir_name_len(eb, di);
1306 data_len = btrfs_dir_data_len(eb, di);
1307 filetype = btrfs_dir_type(eb, di);
1309 rec->found_size += name_len;
1310 if (cur + sizeof(*di) + name_len > total ||
1311 name_len > BTRFS_NAME_LEN) {
1312 error = REF_ERR_NAME_TOO_LONG;
1314 if (cur + sizeof(*di) > total)
1315 break;
1316 len = min_t(u32, total - cur - sizeof(*di),
1317 BTRFS_NAME_LEN);
1318 } else {
1319 len = name_len;
1320 error = 0;
1323 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1325 if (key->type == BTRFS_DIR_ITEM_KEY &&
1326 key->offset != btrfs_name_hash(namebuf, len)) {
1327 rec->errors |= I_ERR_ODD_DIR_ITEM;
1328 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1329 key->objectid, key->offset, namebuf, len, filetype,
1330 key->offset, btrfs_name_hash(namebuf, len));
1333 if (location.type == BTRFS_INODE_ITEM_KEY) {
1334 add_inode_backref(inode_cache, location.objectid,
1335 key->objectid, key->offset, namebuf,
1336 len, filetype, key->type, error);
1337 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1338 add_inode_backref(root_cache, location.objectid,
1339 key->objectid, key->offset,
1340 namebuf, len, filetype,
1341 key->type, error);
1342 } else {
1343 fprintf(stderr,
1344 "unknown location type %d in DIR_ITEM[%llu %llu]\n",
1345 location.type, key->objectid, key->offset);
1346 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1347 key->objectid, key->offset, namebuf,
1348 len, filetype, key->type, error);
1351 len = sizeof(*di) + name_len + data_len;
1352 di = (struct btrfs_dir_item *)((char *)di + len);
1353 cur += len;
1355 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1356 rec->errors |= I_ERR_DUP_DIR_INDEX;
1358 return 0;
1361 static int process_inode_ref(struct extent_buffer *eb,
1362 int slot, struct btrfs_key *key,
1363 struct shared_node *active_node)
1365 u32 total;
1366 u32 cur = 0;
1367 u32 len;
1368 u32 name_len;
1369 u64 index;
1370 int error;
1371 struct cache_tree *inode_cache;
1372 struct btrfs_inode_ref *ref;
1373 char namebuf[BTRFS_NAME_LEN];
1375 inode_cache = &active_node->inode_cache;
1377 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1378 total = btrfs_item_size_nr(eb, slot);
1379 while (cur < total) {
1380 name_len = btrfs_inode_ref_name_len(eb, ref);
1381 index = btrfs_inode_ref_index(eb, ref);
1383 /* inode_ref + namelen should not cross item boundary */
1384 if (cur + sizeof(*ref) + name_len > total ||
1385 name_len > BTRFS_NAME_LEN) {
1386 if (total < cur + sizeof(*ref))
1387 break;
1389 /* Still try to read out the remaining part */
1390 len = min_t(u32, total - cur - sizeof(*ref),
1391 BTRFS_NAME_LEN);
1392 error = REF_ERR_NAME_TOO_LONG;
1393 } else {
1394 len = name_len;
1395 error = 0;
1398 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1399 add_inode_backref(inode_cache, key->objectid, key->offset,
1400 index, namebuf, len, 0, key->type, error);
1402 len = sizeof(*ref) + name_len;
1403 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1404 cur += len;
1406 return 0;
1409 static int process_inode_extref(struct extent_buffer *eb,
1410 int slot, struct btrfs_key *key,
1411 struct shared_node *active_node)
1413 u32 total;
1414 u32 cur = 0;
1415 u32 len;
1416 u32 name_len;
1417 u64 index;
1418 u64 parent;
1419 int error;
1420 struct cache_tree *inode_cache;
1421 struct btrfs_inode_extref *extref;
1422 char namebuf[BTRFS_NAME_LEN];
1424 inode_cache = &active_node->inode_cache;
1426 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1427 total = btrfs_item_size_nr(eb, slot);
1428 while (cur < total) {
1429 name_len = btrfs_inode_extref_name_len(eb, extref);
1430 index = btrfs_inode_extref_index(eb, extref);
1431 parent = btrfs_inode_extref_parent(eb, extref);
1432 if (name_len <= BTRFS_NAME_LEN) {
1433 len = name_len;
1434 error = 0;
1435 } else {
1436 len = BTRFS_NAME_LEN;
1437 error = REF_ERR_NAME_TOO_LONG;
1439 read_extent_buffer(eb, namebuf,
1440 (unsigned long)(extref + 1), len);
1441 add_inode_backref(inode_cache, key->objectid, parent,
1442 index, namebuf, len, 0, key->type, error);
1444 len = sizeof(*extref) + name_len;
1445 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1446 cur += len;
1448 return 0;
1452 static int process_file_extent(struct btrfs_root *root,
1453 struct extent_buffer *eb,
1454 int slot, struct btrfs_key *key,
1455 struct shared_node *active_node)
1457 struct inode_record *rec;
1458 struct btrfs_file_extent_item *fi;
1459 u64 num_bytes = 0;
1460 u64 disk_bytenr = 0;
1461 u64 extent_offset = 0;
1462 u64 mask = root->fs_info->sectorsize - 1;
1463 u32 max_inline_size = min_t(u32, mask,
1464 BTRFS_MAX_INLINE_DATA_SIZE(root->fs_info));
1465 int extent_type;
1466 int ret;
1468 rec = active_node->current;
1469 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1470 rec->found_file_extent = 1;
1472 if (rec->extent_start == (u64)-1) {
1473 rec->extent_start = key->offset;
1474 rec->extent_end = key->offset;
1477 if (rec->extent_end > key->offset)
1478 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1479 else if (rec->extent_end < key->offset) {
1480 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1481 key->offset - rec->extent_end);
1482 if (ret < 0)
1483 return ret;
1486 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1487 extent_type = btrfs_file_extent_type(eb, fi);
1489 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1490 u8 compression = btrfs_file_extent_compression(eb, fi);
1491 struct btrfs_item *item = btrfs_item_nr(slot);
1493 num_bytes = btrfs_file_extent_ram_bytes(eb, fi);
1494 if (num_bytes == 0)
1495 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1496 if (compression) {
1497 if (btrfs_file_extent_inline_item_len(eb, item) >
1498 max_inline_size ||
1499 num_bytes > root->fs_info->sectorsize)
1500 rec->errors |= I_ERR_FILE_EXTENT_TOO_LARGE;
1501 } else {
1502 if (num_bytes > max_inline_size)
1503 rec->errors |= I_ERR_FILE_EXTENT_TOO_LARGE;
1504 if (btrfs_file_extent_inline_item_len(eb, item) !=
1505 num_bytes)
1506 rec->errors |= I_ERR_INLINE_RAM_BYTES_WRONG;
1508 rec->found_size += num_bytes;
1509 num_bytes = (num_bytes + mask) & ~mask;
1510 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1511 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1512 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1513 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1514 extent_offset = btrfs_file_extent_offset(eb, fi);
1515 if (num_bytes == 0 || (num_bytes & mask))
1516 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1517 if (num_bytes + extent_offset >
1518 btrfs_file_extent_ram_bytes(eb, fi))
1519 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1520 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1521 (btrfs_file_extent_compression(eb, fi) ||
1522 btrfs_file_extent_encryption(eb, fi) ||
1523 btrfs_file_extent_other_encoding(eb, fi)))
1524 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1525 if (disk_bytenr > 0)
1526 rec->found_size += num_bytes;
1527 } else {
1528 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1530 rec->extent_end = key->offset + num_bytes;
1533 * The data reloc tree will copy full extents into its inode and then
1534 * copy the corresponding csums. Because the extent it copied could be
1535 * a preallocated extent that hasn't been written to yet there may be no
1536 * csums to copy, ergo we won't have csums for our file extent. This is
1537 * ok so just don't bother checking csums if the inode belongs to the
1538 * data reloc tree.
1540 if (disk_bytenr > 0 &&
1541 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1542 u64 found;
1543 if (btrfs_file_extent_compression(eb, fi))
1544 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1545 else
1546 disk_bytenr += extent_offset;
1548 ret = count_csum_range(root->fs_info, disk_bytenr, num_bytes,
1549 &found);
1550 if (ret < 0)
1551 return ret;
1552 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1553 if (found > 0)
1554 rec->found_csum_item = 1;
1555 if (found < num_bytes)
1556 rec->some_csum_missing = 1;
1557 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1558 if (found > 0) {
1559 ret = check_prealloc_extent_written(root->fs_info,
1560 disk_bytenr,
1561 num_bytes);
1562 if (ret < 0)
1563 return ret;
1564 if (ret == 0)
1565 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1569 return 0;
1572 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1573 struct walk_control *wc)
1575 struct btrfs_key key;
1576 u32 nritems;
1577 int i;
1578 int ret = 0;
1579 struct cache_tree *inode_cache;
1580 struct shared_node *active_node;
1582 if (wc->root_level == wc->active_node &&
1583 btrfs_root_refs(&root->root_item) == 0)
1584 return 0;
1586 active_node = wc->nodes[wc->active_node];
1587 inode_cache = &active_node->inode_cache;
1588 nritems = btrfs_header_nritems(eb);
1589 for (i = 0; i < nritems; i++) {
1590 btrfs_item_key_to_cpu(eb, &key, i);
1592 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1593 continue;
1594 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1595 continue;
1597 if (active_node->current == NULL ||
1598 active_node->current->ino < key.objectid) {
1599 if (active_node->current) {
1600 active_node->current->checked = 1;
1601 maybe_free_inode_rec(inode_cache,
1602 active_node->current);
1604 active_node->current = get_inode_rec(inode_cache,
1605 key.objectid, 1);
1606 BUG_ON(IS_ERR(active_node->current));
1608 switch (key.type) {
1609 case BTRFS_DIR_ITEM_KEY:
1610 case BTRFS_DIR_INDEX_KEY:
1611 ret = process_dir_item(eb, i, &key, active_node);
1612 break;
1613 case BTRFS_INODE_REF_KEY:
1614 ret = process_inode_ref(eb, i, &key, active_node);
1615 break;
1616 case BTRFS_INODE_EXTREF_KEY:
1617 ret = process_inode_extref(eb, i, &key, active_node);
1618 break;
1619 case BTRFS_INODE_ITEM_KEY:
1620 ret = process_inode_item(eb, i, &key, active_node);
1621 break;
1622 case BTRFS_EXTENT_DATA_KEY:
1623 ret = process_file_extent(root, eb, i, &key,
1624 active_node);
1625 break;
1626 default:
1627 break;
1630 return ret;
1633 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1634 struct walk_control *wc, int *level,
1635 struct node_refs *nrefs)
1637 enum btrfs_tree_block_status status;
1638 u64 bytenr;
1639 u64 ptr_gen;
1640 struct btrfs_fs_info *fs_info = root->fs_info;
1641 struct extent_buffer *next;
1642 struct extent_buffer *cur;
1643 int ret, err = 0;
1644 u64 refs;
1646 WARN_ON(*level < 0);
1647 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1649 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1650 refs = nrefs->refs[*level];
1651 ret = 0;
1652 } else {
1653 ret = btrfs_lookup_extent_info(NULL, fs_info,
1654 path->nodes[*level]->start,
1655 *level, 1, &refs, NULL);
1656 if (ret < 0) {
1657 err = ret;
1658 goto out;
1660 nrefs->bytenr[*level] = path->nodes[*level]->start;
1661 nrefs->refs[*level] = refs;
1664 if (refs > 1) {
1665 ret = enter_shared_node(root, path->nodes[*level]->start,
1666 refs, wc, *level);
1667 if (ret > 0) {
1668 err = ret;
1669 goto out;
1673 while (*level >= 0) {
1674 WARN_ON(*level < 0);
1675 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1676 cur = path->nodes[*level];
1678 if (btrfs_header_level(cur) != *level)
1679 WARN_ON(1);
1681 if (path->slots[*level] >= btrfs_header_nritems(cur))
1682 break;
1683 if (*level == 0) {
1684 ret = process_one_leaf(root, cur, wc);
1685 if (ret < 0)
1686 err = ret;
1687 break;
1689 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1690 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1692 if (bytenr == nrefs->bytenr[*level - 1]) {
1693 refs = nrefs->refs[*level - 1];
1694 } else {
1695 ret = btrfs_lookup_extent_info(NULL, fs_info, bytenr,
1696 *level - 1, 1, &refs, NULL);
1697 if (ret < 0) {
1698 refs = 0;
1699 } else {
1700 nrefs->bytenr[*level - 1] = bytenr;
1701 nrefs->refs[*level - 1] = refs;
1705 if (refs > 1) {
1706 ret = enter_shared_node(root, bytenr, refs,
1707 wc, *level - 1);
1708 if (ret > 0) {
1709 path->slots[*level]++;
1710 continue;
1714 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
1715 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1716 free_extent_buffer(next);
1717 reada_walk_down(root, cur, path->slots[*level]);
1718 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
1719 if (!extent_buffer_uptodate(next)) {
1720 struct btrfs_key node_key;
1722 btrfs_node_key_to_cpu(path->nodes[*level],
1723 &node_key,
1724 path->slots[*level]);
1725 btrfs_add_corrupt_extent_record(root->fs_info,
1726 &node_key,
1727 path->nodes[*level]->start,
1728 root->fs_info->nodesize,
1729 *level);
1730 err = -EIO;
1731 goto out;
1735 ret = check_child_node(cur, path->slots[*level], next);
1736 if (ret) {
1737 free_extent_buffer(next);
1738 err = ret;
1739 goto out;
1742 if (btrfs_is_leaf(next))
1743 status = btrfs_check_leaf(root, NULL, next);
1744 else
1745 status = btrfs_check_node(root, NULL, next);
1746 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1747 free_extent_buffer(next);
1748 err = -EIO;
1749 goto out;
1752 *level = *level - 1;
1753 free_extent_buffer(path->nodes[*level]);
1754 path->nodes[*level] = next;
1755 path->slots[*level] = 0;
1757 out:
1758 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1759 return err;
1762 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1763 struct walk_control *wc, int *level)
1765 int i;
1766 struct extent_buffer *leaf;
1768 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1769 leaf = path->nodes[i];
1770 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1771 path->slots[i]++;
1772 *level = i;
1773 return 0;
1775 free_extent_buffer(path->nodes[*level]);
1776 path->nodes[*level] = NULL;
1777 BUG_ON(*level > wc->active_node);
1778 if (*level == wc->active_node)
1779 leave_shared_node(root, wc, *level);
1780 *level = i + 1;
1782 return 1;
1785 static int check_root_dir(struct inode_record *rec)
1787 struct inode_backref *backref;
1788 int ret = -1;
1790 if (!rec->found_inode_item || rec->errors)
1791 goto out;
1792 if (rec->nlink != 1 || rec->found_link != 0)
1793 goto out;
1794 if (list_empty(&rec->backrefs))
1795 goto out;
1796 backref = to_inode_backref(rec->backrefs.next);
1797 if (!backref->found_inode_ref)
1798 goto out;
1799 if (backref->index != 0 || backref->namelen != 2 ||
1800 memcmp(backref->name, "..", 2))
1801 goto out;
1802 if (backref->found_dir_index || backref->found_dir_item)
1803 goto out;
1804 ret = 0;
1805 out:
1806 return ret;
1809 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1810 struct btrfs_root *root, struct btrfs_path *path,
1811 struct inode_record *rec)
1813 struct btrfs_inode_item *ei;
1814 struct btrfs_key key;
1815 int ret;
1817 key.objectid = rec->ino;
1818 key.type = BTRFS_INODE_ITEM_KEY;
1819 key.offset = (u64)-1;
1821 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1822 if (ret < 0)
1823 goto out;
1824 if (ret) {
1825 if (!path->slots[0]) {
1826 ret = -ENOENT;
1827 goto out;
1829 path->slots[0]--;
1830 ret = 0;
1832 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1833 if (key.objectid != rec->ino) {
1834 ret = -ENOENT;
1835 goto out;
1838 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1839 struct btrfs_inode_item);
1840 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
1841 btrfs_mark_buffer_dirty(path->nodes[0]);
1842 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1843 printf("reset isize for dir %llu root %llu\n", rec->ino,
1844 root->root_key.objectid);
1845 out:
1846 btrfs_release_path(path);
1847 return ret;
1850 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
1851 struct btrfs_root *root,
1852 struct btrfs_path *path,
1853 struct inode_record *rec)
1855 int ret;
1857 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
1858 btrfs_release_path(path);
1859 if (!ret)
1860 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1861 return ret;
1864 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
1865 struct btrfs_root *root,
1866 struct btrfs_path *path,
1867 struct inode_record *rec)
1869 struct btrfs_inode_item *ei;
1870 struct btrfs_key key;
1871 int ret = 0;
1873 key.objectid = rec->ino;
1874 key.type = BTRFS_INODE_ITEM_KEY;
1875 key.offset = 0;
1877 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1878 if (ret) {
1879 if (ret > 0)
1880 ret = -ENOENT;
1881 goto out;
1884 /* Since ret == 0, no need to check anything */
1885 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1886 struct btrfs_inode_item);
1887 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
1888 btrfs_mark_buffer_dirty(path->nodes[0]);
1889 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
1890 printf("reset nbytes for ino %llu root %llu\n",
1891 rec->ino, root->root_key.objectid);
1892 out:
1893 btrfs_release_path(path);
1894 return ret;
1897 static int add_missing_dir_index(struct btrfs_root *root,
1898 struct cache_tree *inode_cache,
1899 struct inode_record *rec,
1900 struct inode_backref *backref)
1902 struct btrfs_path path;
1903 struct btrfs_trans_handle *trans;
1904 struct btrfs_dir_item *dir_item;
1905 struct extent_buffer *leaf;
1906 struct btrfs_key key;
1907 struct btrfs_disk_key disk_key;
1908 struct inode_record *dir_rec;
1909 unsigned long name_ptr;
1910 u32 data_size = sizeof(*dir_item) + backref->namelen;
1911 int ret;
1913 trans = btrfs_start_transaction(root, 1);
1914 if (IS_ERR(trans))
1915 return PTR_ERR(trans);
1917 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
1918 (unsigned long long)rec->ino);
1920 btrfs_init_path(&path);
1921 key.objectid = backref->dir;
1922 key.type = BTRFS_DIR_INDEX_KEY;
1923 key.offset = backref->index;
1924 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
1925 BUG_ON(ret);
1927 leaf = path.nodes[0];
1928 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
1930 disk_key.objectid = cpu_to_le64(rec->ino);
1931 disk_key.type = BTRFS_INODE_ITEM_KEY;
1932 disk_key.offset = 0;
1934 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
1935 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
1936 btrfs_set_dir_data_len(leaf, dir_item, 0);
1937 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
1938 name_ptr = (unsigned long)(dir_item + 1);
1939 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
1940 btrfs_mark_buffer_dirty(leaf);
1941 btrfs_release_path(&path);
1942 btrfs_commit_transaction(trans, root);
1944 backref->found_dir_index = 1;
1945 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
1946 BUG_ON(IS_ERR(dir_rec));
1947 if (!dir_rec)
1948 return 0;
1949 dir_rec->found_size += backref->namelen;
1950 if (dir_rec->found_size == dir_rec->isize &&
1951 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
1952 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1953 if (dir_rec->found_size != dir_rec->isize)
1954 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1956 return 0;
1959 static int delete_dir_index(struct btrfs_root *root,
1960 struct inode_backref *backref)
1962 struct btrfs_trans_handle *trans;
1963 struct btrfs_dir_item *di;
1964 struct btrfs_path path;
1965 int ret = 0;
1967 trans = btrfs_start_transaction(root, 1);
1968 if (IS_ERR(trans))
1969 return PTR_ERR(trans);
1971 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
1972 (unsigned long long)backref->dir,
1973 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
1974 (unsigned long long)root->objectid);
1976 btrfs_init_path(&path);
1977 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
1978 backref->name, backref->namelen,
1979 backref->index, -1);
1980 if (IS_ERR(di)) {
1981 ret = PTR_ERR(di);
1982 btrfs_release_path(&path);
1983 btrfs_commit_transaction(trans, root);
1984 if (ret == -ENOENT)
1985 return 0;
1986 return ret;
1989 if (!di)
1990 ret = btrfs_del_item(trans, root, &path);
1991 else
1992 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
1993 BUG_ON(ret);
1994 btrfs_release_path(&path);
1995 btrfs_commit_transaction(trans, root);
1996 return ret;
1999 static int create_inode_item(struct btrfs_root *root,
2000 struct inode_record *rec, int root_dir)
2002 struct btrfs_trans_handle *trans;
2003 u64 nlink = 0;
2004 u32 mode = 0;
2005 u64 size = 0;
2006 int ret;
2008 trans = btrfs_start_transaction(root, 1);
2009 if (IS_ERR(trans)) {
2010 ret = PTR_ERR(trans);
2011 return ret;
2014 nlink = root_dir ? 1 : rec->found_link;
2015 if (rec->found_dir_item) {
2016 if (rec->found_file_extent)
2017 fprintf(stderr, "root %llu inode %llu has both a dir "
2018 "item and extents, unsure if it is a dir or a "
2019 "regular file so setting it as a directory\n",
2020 (unsigned long long)root->objectid,
2021 (unsigned long long)rec->ino);
2022 mode = S_IFDIR | 0755;
2023 size = rec->found_size;
2024 } else if (!rec->found_dir_item) {
2025 size = rec->extent_end;
2026 mode = S_IFREG | 0755;
2029 ret = insert_inode_item(trans, root, rec->ino, size, rec->nbytes,
2030 nlink, mode);
2031 btrfs_commit_transaction(trans, root);
2032 return 0;
2035 static int repair_inode_backrefs(struct btrfs_root *root,
2036 struct inode_record *rec,
2037 struct cache_tree *inode_cache,
2038 int delete)
2040 struct inode_backref *tmp, *backref;
2041 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2042 int ret = 0;
2043 int repaired = 0;
2045 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2046 if (!delete && rec->ino == root_dirid) {
2047 if (!rec->found_inode_item) {
2048 ret = create_inode_item(root, rec, 1);
2049 if (ret)
2050 break;
2051 repaired++;
2055 /* Index 0 for root dir's are special, don't mess with it */
2056 if (rec->ino == root_dirid && backref->index == 0)
2057 continue;
2059 if (delete &&
2060 ((backref->found_dir_index && !backref->found_inode_ref) ||
2061 (backref->found_dir_index && backref->found_inode_ref &&
2062 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2063 ret = delete_dir_index(root, backref);
2064 if (ret)
2065 break;
2066 repaired++;
2067 list_del(&backref->list);
2068 free(backref);
2069 continue;
2072 if (!delete && !backref->found_dir_index &&
2073 backref->found_dir_item && backref->found_inode_ref) {
2074 ret = add_missing_dir_index(root, inode_cache, rec,
2075 backref);
2076 if (ret)
2077 break;
2078 repaired++;
2079 if (backref->found_dir_item &&
2080 backref->found_dir_index) {
2081 if (!backref->errors &&
2082 backref->found_inode_ref) {
2083 list_del(&backref->list);
2084 free(backref);
2085 continue;
2090 if (!delete && (!backref->found_dir_index &&
2091 !backref->found_dir_item &&
2092 backref->found_inode_ref)) {
2093 struct btrfs_trans_handle *trans;
2094 struct btrfs_key location;
2096 ret = check_dir_conflict(root, backref->name,
2097 backref->namelen,
2098 backref->dir,
2099 backref->index);
2100 if (ret) {
2102 * let nlink fixing routine to handle it,
2103 * which can do it better.
2105 ret = 0;
2106 break;
2108 location.objectid = rec->ino;
2109 location.type = BTRFS_INODE_ITEM_KEY;
2110 location.offset = 0;
2112 trans = btrfs_start_transaction(root, 1);
2113 if (IS_ERR(trans)) {
2114 ret = PTR_ERR(trans);
2115 break;
2117 fprintf(stderr, "adding missing dir index/item pair "
2118 "for inode %llu\n",
2119 (unsigned long long)rec->ino);
2120 ret = btrfs_insert_dir_item(trans, root, backref->name,
2121 backref->namelen,
2122 backref->dir, &location,
2123 imode_to_type(rec->imode),
2124 backref->index);
2125 BUG_ON(ret);
2126 btrfs_commit_transaction(trans, root);
2127 repaired++;
2130 if (!delete && (backref->found_inode_ref &&
2131 backref->found_dir_index &&
2132 backref->found_dir_item &&
2133 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2134 !rec->found_inode_item)) {
2135 ret = create_inode_item(root, rec, 0);
2136 if (ret)
2137 break;
2138 repaired++;
2142 return ret ? ret : repaired;
2146 * To determine the file type for nlink/inode_item repair
2148 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2149 * Return -ENOENT if file type is not found.
2151 static int find_file_type(struct inode_record *rec, u8 *type)
2153 struct inode_backref *backref;
2155 /* For inode item recovered case */
2156 if (rec->found_inode_item) {
2157 *type = imode_to_type(rec->imode);
2158 return 0;
2161 list_for_each_entry(backref, &rec->backrefs, list) {
2162 if (backref->found_dir_index || backref->found_dir_item) {
2163 *type = backref->filetype;
2164 return 0;
2167 return -ENOENT;
2171 * To determine the file name for nlink repair
2173 * Return 0 if file name is found, set name and namelen.
2174 * Return -ENOENT if file name is not found.
2176 static int find_file_name(struct inode_record *rec,
2177 char *name, int *namelen)
2179 struct inode_backref *backref;
2181 list_for_each_entry(backref, &rec->backrefs, list) {
2182 if (backref->found_dir_index || backref->found_dir_item ||
2183 backref->found_inode_ref) {
2184 memcpy(name, backref->name, backref->namelen);
2185 *namelen = backref->namelen;
2186 return 0;
2189 return -ENOENT;
2192 /* Reset the nlink of the inode to the correct one */
2193 static int reset_nlink(struct btrfs_trans_handle *trans,
2194 struct btrfs_root *root,
2195 struct btrfs_path *path,
2196 struct inode_record *rec)
2198 struct inode_backref *backref;
2199 struct inode_backref *tmp;
2200 struct btrfs_key key;
2201 struct btrfs_inode_item *inode_item;
2202 int ret = 0;
2204 /* We don't believe this either, reset it and iterate backref */
2205 rec->found_link = 0;
2207 /* Remove all backref including the valid ones */
2208 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2209 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2210 backref->index, backref->name,
2211 backref->namelen, 0);
2212 if (ret < 0)
2213 goto out;
2215 /* remove invalid backref, so it won't be added back */
2216 if (!(backref->found_dir_index &&
2217 backref->found_dir_item &&
2218 backref->found_inode_ref)) {
2219 list_del(&backref->list);
2220 free(backref);
2221 } else {
2222 rec->found_link++;
2226 /* Set nlink to 0 */
2227 key.objectid = rec->ino;
2228 key.type = BTRFS_INODE_ITEM_KEY;
2229 key.offset = 0;
2230 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2231 if (ret < 0)
2232 goto out;
2233 if (ret > 0) {
2234 ret = -ENOENT;
2235 goto out;
2237 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2238 struct btrfs_inode_item);
2239 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2240 btrfs_mark_buffer_dirty(path->nodes[0]);
2241 btrfs_release_path(path);
2244 * Add back valid inode_ref/dir_item/dir_index,
2245 * add_link() will handle the nlink inc, so new nlink must be correct
2247 list_for_each_entry(backref, &rec->backrefs, list) {
2248 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2249 backref->name, backref->namelen,
2250 backref->filetype, &backref->index, 1, 0);
2251 if (ret < 0)
2252 goto out;
2254 out:
2255 btrfs_release_path(path);
2256 return ret;
2259 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2260 struct btrfs_root *root,
2261 struct btrfs_path *path,
2262 struct inode_record *rec)
2264 char namebuf[BTRFS_NAME_LEN] = {0};
2265 u8 type = 0;
2266 int namelen = 0;
2267 int name_recovered = 0;
2268 int type_recovered = 0;
2269 int ret = 0;
2272 * Get file name and type first before these invalid inode ref
2273 * are deleted by remove_all_invalid_backref()
2275 name_recovered = !find_file_name(rec, namebuf, &namelen);
2276 type_recovered = !find_file_type(rec, &type);
2278 if (!name_recovered) {
2279 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2280 rec->ino, rec->ino);
2281 namelen = count_digits(rec->ino);
2282 sprintf(namebuf, "%llu", rec->ino);
2283 name_recovered = 1;
2285 if (!type_recovered) {
2286 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2287 rec->ino);
2288 type = BTRFS_FT_REG_FILE;
2289 type_recovered = 1;
2292 ret = reset_nlink(trans, root, path, rec);
2293 if (ret < 0) {
2294 fprintf(stderr,
2295 "Failed to reset nlink for inode %llu: %s\n",
2296 rec->ino, strerror(-ret));
2297 goto out;
2300 if (rec->found_link == 0) {
2301 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
2302 namebuf, namelen, type,
2303 (u64 *)&rec->found_link);
2304 if (ret)
2305 goto out;
2307 printf("Fixed the nlink of inode %llu\n", rec->ino);
2308 out:
2310 * Clear the flag anyway, or we will loop forever for the same inode
2311 * as it will not be removed from the bad inode list and the dead loop
2312 * happens.
2314 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2315 btrfs_release_path(path);
2316 return ret;
2320 * Check if there is any normal(reg or prealloc) file extent for given
2321 * ino.
2322 * This is used to determine the file type when neither its dir_index/item or
2323 * inode_item exists.
2325 * This will *NOT* report error, if any error happens, just consider it does
2326 * not have any normal file extent.
2328 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2330 struct btrfs_path path;
2331 struct btrfs_key key;
2332 struct btrfs_key found_key;
2333 struct btrfs_file_extent_item *fi;
2334 u8 type;
2335 int ret = 0;
2337 btrfs_init_path(&path);
2338 key.objectid = ino;
2339 key.type = BTRFS_EXTENT_DATA_KEY;
2340 key.offset = 0;
2342 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2343 if (ret < 0) {
2344 ret = 0;
2345 goto out;
2347 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2348 ret = btrfs_next_leaf(root, &path);
2349 if (ret) {
2350 ret = 0;
2351 goto out;
2354 while (1) {
2355 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2356 path.slots[0]);
2357 if (found_key.objectid != ino ||
2358 found_key.type != BTRFS_EXTENT_DATA_KEY)
2359 break;
2360 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2361 struct btrfs_file_extent_item);
2362 type = btrfs_file_extent_type(path.nodes[0], fi);
2363 if (type != BTRFS_FILE_EXTENT_INLINE) {
2364 ret = 1;
2365 goto out;
2368 out:
2369 btrfs_release_path(&path);
2370 return ret;
2373 static u32 btrfs_type_to_imode(u8 type)
2375 static u32 imode_by_btrfs_type[] = {
2376 [BTRFS_FT_REG_FILE] = S_IFREG,
2377 [BTRFS_FT_DIR] = S_IFDIR,
2378 [BTRFS_FT_CHRDEV] = S_IFCHR,
2379 [BTRFS_FT_BLKDEV] = S_IFBLK,
2380 [BTRFS_FT_FIFO] = S_IFIFO,
2381 [BTRFS_FT_SOCK] = S_IFSOCK,
2382 [BTRFS_FT_SYMLINK] = S_IFLNK,
2385 return imode_by_btrfs_type[(type)];
2388 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2389 struct btrfs_root *root,
2390 struct btrfs_path *path,
2391 struct inode_record *rec)
2393 u8 filetype;
2394 u32 mode = 0700;
2395 int type_recovered = 0;
2396 int ret = 0;
2398 printf("Trying to rebuild inode:%llu\n", rec->ino);
2400 type_recovered = !find_file_type(rec, &filetype);
2403 * Try to determine inode type if type not found.
2405 * For found regular file extent, it must be FILE.
2406 * For found dir_item/index, it must be DIR.
2408 * For undetermined one, use FILE as fallback.
2410 * TODO:
2411 * 1. If found backref(inode_index/item is already handled) to it,
2412 * it must be DIR.
2413 * Need new inode-inode ref structure to allow search for that.
2415 if (!type_recovered) {
2416 if (rec->found_file_extent &&
2417 find_normal_file_extent(root, rec->ino)) {
2418 type_recovered = 1;
2419 filetype = BTRFS_FT_REG_FILE;
2420 } else if (rec->found_dir_item) {
2421 type_recovered = 1;
2422 filetype = BTRFS_FT_DIR;
2423 } else if (!list_empty(&rec->orphan_extents)) {
2424 type_recovered = 1;
2425 filetype = BTRFS_FT_REG_FILE;
2426 } else{
2427 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2428 rec->ino);
2429 type_recovered = 1;
2430 filetype = BTRFS_FT_REG_FILE;
2434 ret = btrfs_new_inode(trans, root, rec->ino,
2435 mode | btrfs_type_to_imode(filetype));
2436 if (ret < 0)
2437 goto out;
2440 * Here inode rebuild is done, we only rebuild the inode item,
2441 * don't repair the nlink(like move to lost+found).
2442 * That is the job of nlink repair.
2444 * We just fill the record and return
2446 rec->found_dir_item = 1;
2447 rec->imode = mode | btrfs_type_to_imode(filetype);
2448 rec->nlink = 0;
2449 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2450 /* Ensure the inode_nlinks repair function will be called */
2451 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2452 out:
2453 return ret;
2456 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2457 struct btrfs_root *root,
2458 struct btrfs_path *path,
2459 struct inode_record *rec)
2461 struct orphan_data_extent *orphan;
2462 struct orphan_data_extent *tmp;
2463 int ret = 0;
2465 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2467 * Check for conflicting file extents
2469 * Here we don't know whether the extents is compressed or not,
2470 * so we can only assume it not compressed nor data offset,
2471 * and use its disk_len as extent length.
2473 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2474 orphan->offset, orphan->disk_len, 0);
2475 btrfs_release_path(path);
2476 if (ret < 0)
2477 goto out;
2478 if (!ret) {
2479 fprintf(stderr,
2480 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2481 orphan->disk_bytenr, orphan->disk_len);
2482 ret = btrfs_free_extent(trans,
2483 root->fs_info->extent_root,
2484 orphan->disk_bytenr, orphan->disk_len,
2485 0, root->objectid, orphan->objectid,
2486 orphan->offset);
2487 if (ret < 0)
2488 goto out;
2490 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2491 orphan->offset, orphan->disk_bytenr,
2492 orphan->disk_len, orphan->disk_len);
2493 if (ret < 0)
2494 goto out;
2496 /* Update file size info */
2497 rec->found_size += orphan->disk_len;
2498 if (rec->found_size == rec->nbytes)
2499 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2501 /* Update the file extent hole info too */
2502 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2503 orphan->disk_len);
2504 if (ret < 0)
2505 goto out;
2506 if (RB_EMPTY_ROOT(&rec->holes))
2507 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2509 list_del(&orphan->list);
2510 free(orphan);
2512 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2513 out:
2514 return ret;
2517 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2518 struct btrfs_root *root,
2519 struct btrfs_path *path,
2520 struct inode_record *rec)
2522 struct rb_node *node;
2523 struct file_extent_hole *hole;
2524 int found = 0;
2525 int ret = 0;
2527 node = rb_first(&rec->holes);
2529 while (node) {
2530 found = 1;
2531 hole = rb_entry(node, struct file_extent_hole, node);
2532 ret = btrfs_punch_hole(trans, root, rec->ino,
2533 hole->start, hole->len);
2534 if (ret < 0)
2535 goto out;
2536 ret = del_file_extent_hole(&rec->holes, hole->start,
2537 hole->len);
2538 if (ret < 0)
2539 goto out;
2540 if (RB_EMPTY_ROOT(&rec->holes))
2541 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2542 node = rb_first(&rec->holes);
2544 /* special case for a file losing all its file extent */
2545 if (!found) {
2546 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2547 round_up(rec->isize,
2548 root->fs_info->sectorsize));
2549 if (ret < 0)
2550 goto out;
2552 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2553 rec->ino, root->objectid);
2554 out:
2555 return ret;
2558 static int repair_inline_ram_bytes(struct btrfs_trans_handle *trans,
2559 struct btrfs_root *root,
2560 struct btrfs_path *path,
2561 struct inode_record *rec)
2563 struct btrfs_key key;
2564 struct btrfs_file_extent_item *fi;
2565 struct btrfs_item *i;
2566 u64 on_disk_item_len;
2567 int ret;
2569 key.objectid = rec->ino;
2570 key.offset = 0;
2571 key.type = BTRFS_EXTENT_DATA_KEY;
2573 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2574 if (ret > 0)
2575 ret = -ENOENT;
2576 if (ret < 0)
2577 goto out;
2579 i = btrfs_item_nr(path->slots[0]);
2580 on_disk_item_len = btrfs_file_extent_inline_item_len(path->nodes[0], i);
2581 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2582 struct btrfs_file_extent_item);
2583 btrfs_set_file_extent_ram_bytes(path->nodes[0], fi, on_disk_item_len);
2584 btrfs_mark_buffer_dirty(path->nodes[0]);
2585 printf("Repaired inline ram_bytes for root %llu ino %llu\n",
2586 root->objectid, rec->ino);
2587 rec->errors &= ~I_ERR_INLINE_RAM_BYTES_WRONG;
2588 out:
2589 btrfs_release_path(path);
2590 return ret;
2593 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2595 struct btrfs_trans_handle *trans;
2596 struct btrfs_path path;
2597 int ret = 0;
2599 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2600 I_ERR_NO_ORPHAN_ITEM |
2601 I_ERR_LINK_COUNT_WRONG |
2602 I_ERR_NO_INODE_ITEM |
2603 I_ERR_FILE_EXTENT_ORPHAN |
2604 I_ERR_FILE_EXTENT_DISCOUNT |
2605 I_ERR_FILE_NBYTES_WRONG |
2606 I_ERR_INLINE_RAM_BYTES_WRONG)))
2607 return rec->errors;
2610 * For nlink repair, it may create a dir and add link, so
2611 * 2 for parent(256)'s dir_index and dir_item
2612 * 2 for lost+found dir's inode_item and inode_ref
2613 * 1 for the new inode_ref of the file
2614 * 2 for lost+found dir's dir_index and dir_item for the file
2616 trans = btrfs_start_transaction(root, 7);
2617 if (IS_ERR(trans))
2618 return PTR_ERR(trans);
2620 btrfs_init_path(&path);
2621 if (rec->errors & I_ERR_NO_INODE_ITEM)
2622 ret = repair_inode_no_item(trans, root, &path, rec);
2623 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2624 ret = repair_inode_orphan_extent(trans, root, &path, rec);
2625 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2626 ret = repair_inode_discount_extent(trans, root, &path, rec);
2627 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2628 ret = repair_inode_isize(trans, root, &path, rec);
2629 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2630 ret = repair_inode_orphan_item(trans, root, &path, rec);
2631 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2632 ret = repair_inode_nlinks(trans, root, &path, rec);
2633 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2634 ret = repair_inode_nbytes(trans, root, &path, rec);
2635 if (!ret && rec->errors & I_ERR_INLINE_RAM_BYTES_WRONG)
2636 ret = repair_inline_ram_bytes(trans, root, &path, rec);
2637 btrfs_commit_transaction(trans, root);
2638 btrfs_release_path(&path);
2639 return ret;
2642 static int check_inode_recs(struct btrfs_root *root,
2643 struct cache_tree *inode_cache)
2645 struct cache_extent *cache;
2646 struct ptr_node *node;
2647 struct inode_record *rec;
2648 struct inode_backref *backref;
2649 int stage = 0;
2650 int ret = 0;
2651 int err = 0;
2652 u64 error = 0;
2653 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2655 if (btrfs_root_refs(&root->root_item) == 0) {
2656 if (!cache_tree_empty(inode_cache))
2657 fprintf(stderr, "warning line %d\n", __LINE__);
2658 return 0;
2662 * We need to repair backrefs first because we could change some of the
2663 * errors in the inode recs.
2665 * We also need to go through and delete invalid backrefs first and then
2666 * add the correct ones second. We do this because we may get EEXIST
2667 * when adding back the correct index because we hadn't yet deleted the
2668 * invalid index.
2670 * For example, if we were missing a dir index then the directories
2671 * isize would be wrong, so if we fixed the isize to what we thought it
2672 * would be and then fixed the backref we'd still have a invalid fs, so
2673 * we need to add back the dir index and then check to see if the isize
2674 * is still wrong.
2676 while (stage < 3) {
2677 stage++;
2678 if (stage == 3 && !err)
2679 break;
2681 cache = search_cache_extent(inode_cache, 0);
2682 while (repair && cache) {
2683 node = container_of(cache, struct ptr_node, cache);
2684 rec = node->data;
2685 cache = next_cache_extent(cache);
2687 /* Need to free everything up and rescan */
2688 if (stage == 3) {
2689 remove_cache_extent(inode_cache, &node->cache);
2690 free(node);
2691 free_inode_rec(rec);
2692 continue;
2695 if (list_empty(&rec->backrefs))
2696 continue;
2698 ret = repair_inode_backrefs(root, rec, inode_cache,
2699 stage == 1);
2700 if (ret < 0) {
2701 err = ret;
2702 stage = 2;
2703 break;
2704 } if (ret > 0) {
2705 err = -EAGAIN;
2709 if (err)
2710 return err;
2712 rec = get_inode_rec(inode_cache, root_dirid, 0);
2713 BUG_ON(IS_ERR(rec));
2714 if (rec) {
2715 ret = check_root_dir(rec);
2716 if (ret) {
2717 fprintf(stderr, "root %llu root dir %llu error\n",
2718 (unsigned long long)root->root_key.objectid,
2719 (unsigned long long)root_dirid);
2720 print_inode_error(root, rec);
2721 error++;
2723 } else {
2724 if (repair) {
2725 struct btrfs_trans_handle *trans;
2727 trans = btrfs_start_transaction(root, 1);
2728 if (IS_ERR(trans)) {
2729 err = PTR_ERR(trans);
2730 return err;
2733 fprintf(stderr,
2734 "root %llu missing its root dir, recreating\n",
2735 (unsigned long long)root->objectid);
2737 ret = btrfs_make_root_dir(trans, root, root_dirid);
2738 BUG_ON(ret);
2740 btrfs_commit_transaction(trans, root);
2741 return -EAGAIN;
2744 fprintf(stderr, "root %llu root dir %llu not found\n",
2745 (unsigned long long)root->root_key.objectid,
2746 (unsigned long long)root_dirid);
2749 while (1) {
2750 cache = search_cache_extent(inode_cache, 0);
2751 if (!cache)
2752 break;
2753 node = container_of(cache, struct ptr_node, cache);
2754 rec = node->data;
2755 remove_cache_extent(inode_cache, &node->cache);
2756 free(node);
2757 if (rec->ino == root_dirid ||
2758 rec->ino == BTRFS_ORPHAN_OBJECTID) {
2759 free_inode_rec(rec);
2760 continue;
2763 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2764 ret = check_orphan_item(root, rec->ino);
2765 if (ret == 0)
2766 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2767 if (can_free_inode_rec(rec)) {
2768 free_inode_rec(rec);
2769 continue;
2773 if (!rec->found_inode_item)
2774 rec->errors |= I_ERR_NO_INODE_ITEM;
2775 if (rec->found_link != rec->nlink)
2776 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2777 if (repair) {
2778 ret = try_repair_inode(root, rec);
2779 if (ret == 0 && can_free_inode_rec(rec)) {
2780 free_inode_rec(rec);
2781 continue;
2785 if (!(repair && ret == 0))
2786 error++;
2787 print_inode_error(root, rec);
2788 list_for_each_entry(backref, &rec->backrefs, list) {
2789 if (!backref->found_dir_item)
2790 backref->errors |= REF_ERR_NO_DIR_ITEM;
2791 if (!backref->found_dir_index)
2792 backref->errors |= REF_ERR_NO_DIR_INDEX;
2793 if (!backref->found_inode_ref)
2794 backref->errors |= REF_ERR_NO_INODE_REF;
2795 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
2796 " namelen %u name %s filetype %d errors %x",
2797 (unsigned long long)backref->dir,
2798 (unsigned long long)backref->index,
2799 backref->namelen, backref->name,
2800 backref->filetype, backref->errors);
2801 print_ref_error(backref->errors);
2803 free_inode_rec(rec);
2805 return (error > 0) ? -1 : 0;
2808 static struct root_record *get_root_rec(struct cache_tree *root_cache,
2809 u64 objectid)
2811 struct cache_extent *cache;
2812 struct root_record *rec = NULL;
2813 int ret;
2815 cache = lookup_cache_extent(root_cache, objectid, 1);
2816 if (cache) {
2817 rec = container_of(cache, struct root_record, cache);
2818 } else {
2819 rec = calloc(1, sizeof(*rec));
2820 if (!rec)
2821 return ERR_PTR(-ENOMEM);
2822 rec->objectid = objectid;
2823 INIT_LIST_HEAD(&rec->backrefs);
2824 rec->cache.start = objectid;
2825 rec->cache.size = 1;
2827 ret = insert_cache_extent(root_cache, &rec->cache);
2828 if (ret)
2829 return ERR_PTR(-EEXIST);
2831 return rec;
2834 static struct root_backref *get_root_backref(struct root_record *rec,
2835 u64 ref_root, u64 dir, u64 index,
2836 const char *name, int namelen)
2838 struct root_backref *backref;
2840 list_for_each_entry(backref, &rec->backrefs, list) {
2841 if (backref->ref_root != ref_root || backref->dir != dir ||
2842 backref->namelen != namelen)
2843 continue;
2844 if (memcmp(name, backref->name, namelen))
2845 continue;
2846 return backref;
2849 backref = calloc(1, sizeof(*backref) + namelen + 1);
2850 if (!backref)
2851 return NULL;
2852 backref->ref_root = ref_root;
2853 backref->dir = dir;
2854 backref->index = index;
2855 backref->namelen = namelen;
2856 memcpy(backref->name, name, namelen);
2857 backref->name[namelen] = '\0';
2858 list_add_tail(&backref->list, &rec->backrefs);
2859 return backref;
2862 static void free_root_record(struct cache_extent *cache)
2864 struct root_record *rec;
2865 struct root_backref *backref;
2867 rec = container_of(cache, struct root_record, cache);
2868 while (!list_empty(&rec->backrefs)) {
2869 backref = to_root_backref(rec->backrefs.next);
2870 list_del(&backref->list);
2871 free(backref);
2874 free(rec);
2877 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
2879 static int add_root_backref(struct cache_tree *root_cache,
2880 u64 root_id, u64 ref_root, u64 dir, u64 index,
2881 const char *name, int namelen,
2882 int item_type, int errors)
2884 struct root_record *rec;
2885 struct root_backref *backref;
2887 rec = get_root_rec(root_cache, root_id);
2888 BUG_ON(IS_ERR(rec));
2889 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
2890 BUG_ON(!backref);
2892 backref->errors |= errors;
2894 if (item_type != BTRFS_DIR_ITEM_KEY) {
2895 if (backref->found_dir_index || backref->found_back_ref ||
2896 backref->found_forward_ref) {
2897 if (backref->index != index)
2898 backref->errors |= REF_ERR_INDEX_UNMATCH;
2899 } else {
2900 backref->index = index;
2904 if (item_type == BTRFS_DIR_ITEM_KEY) {
2905 if (backref->found_forward_ref)
2906 rec->found_ref++;
2907 backref->found_dir_item = 1;
2908 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
2909 backref->found_dir_index = 1;
2910 } else if (item_type == BTRFS_ROOT_REF_KEY) {
2911 if (backref->found_forward_ref)
2912 backref->errors |= REF_ERR_DUP_ROOT_REF;
2913 else if (backref->found_dir_item)
2914 rec->found_ref++;
2915 backref->found_forward_ref = 1;
2916 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
2917 if (backref->found_back_ref)
2918 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
2919 backref->found_back_ref = 1;
2920 } else {
2921 BUG_ON(1);
2924 if (backref->found_forward_ref && backref->found_dir_item)
2925 backref->reachable = 1;
2926 return 0;
2929 static int merge_root_recs(struct btrfs_root *root,
2930 struct cache_tree *src_cache,
2931 struct cache_tree *dst_cache)
2933 struct cache_extent *cache;
2934 struct ptr_node *node;
2935 struct inode_record *rec;
2936 struct inode_backref *backref;
2937 int ret = 0;
2939 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
2940 free_inode_recs_tree(src_cache);
2941 return 0;
2944 while (1) {
2945 cache = search_cache_extent(src_cache, 0);
2946 if (!cache)
2947 break;
2948 node = container_of(cache, struct ptr_node, cache);
2949 rec = node->data;
2950 remove_cache_extent(src_cache, &node->cache);
2951 free(node);
2953 ret = is_child_root(root, root->objectid, rec->ino);
2954 if (ret < 0)
2955 break;
2956 else if (ret == 0)
2957 goto skip;
2959 list_for_each_entry(backref, &rec->backrefs, list) {
2960 BUG_ON(backref->found_inode_ref);
2961 if (backref->found_dir_item)
2962 add_root_backref(dst_cache, rec->ino,
2963 root->root_key.objectid, backref->dir,
2964 backref->index, backref->name,
2965 backref->namelen, BTRFS_DIR_ITEM_KEY,
2966 backref->errors);
2967 if (backref->found_dir_index)
2968 add_root_backref(dst_cache, rec->ino,
2969 root->root_key.objectid, backref->dir,
2970 backref->index, backref->name,
2971 backref->namelen, BTRFS_DIR_INDEX_KEY,
2972 backref->errors);
2974 skip:
2975 free_inode_rec(rec);
2977 if (ret < 0)
2978 return ret;
2979 return 0;
2982 static int check_root_refs(struct btrfs_root *root,
2983 struct cache_tree *root_cache)
2985 struct root_record *rec;
2986 struct root_record *ref_root;
2987 struct root_backref *backref;
2988 struct cache_extent *cache;
2989 int loop = 1;
2990 int ret;
2991 int error;
2992 int errors = 0;
2994 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
2995 BUG_ON(IS_ERR(rec));
2996 rec->found_ref = 1;
2998 /* fixme: this can not detect circular references */
2999 while (loop) {
3000 loop = 0;
3001 cache = search_cache_extent(root_cache, 0);
3002 while (1) {
3003 ctx.item_count++;
3004 if (!cache)
3005 break;
3006 rec = container_of(cache, struct root_record, cache);
3007 cache = next_cache_extent(cache);
3009 if (rec->found_ref == 0)
3010 continue;
3012 list_for_each_entry(backref, &rec->backrefs, list) {
3013 if (!backref->reachable)
3014 continue;
3016 ref_root = get_root_rec(root_cache,
3017 backref->ref_root);
3018 BUG_ON(IS_ERR(ref_root));
3019 if (ref_root->found_ref > 0)
3020 continue;
3022 backref->reachable = 0;
3023 rec->found_ref--;
3024 if (rec->found_ref == 0)
3025 loop = 1;
3030 cache = search_cache_extent(root_cache, 0);
3031 while (1) {
3032 if (!cache)
3033 break;
3034 rec = container_of(cache, struct root_record, cache);
3035 cache = next_cache_extent(cache);
3037 if (rec->found_ref == 0 &&
3038 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3039 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3040 ret = check_orphan_item(root->fs_info->tree_root,
3041 rec->objectid);
3042 if (ret == 0)
3043 continue;
3046 * If we don't have a root item then we likely just have
3047 * a dir item in a snapshot for this root but no actual
3048 * ref key or anything so it's meaningless.
3050 if (!rec->found_root_item)
3051 continue;
3052 errors++;
3053 fprintf(stderr, "fs tree %llu not referenced\n",
3054 (unsigned long long)rec->objectid);
3057 error = 0;
3058 if (rec->found_ref > 0 && !rec->found_root_item)
3059 error = 1;
3060 list_for_each_entry(backref, &rec->backrefs, list) {
3061 if (!backref->found_dir_item)
3062 backref->errors |= REF_ERR_NO_DIR_ITEM;
3063 if (!backref->found_dir_index)
3064 backref->errors |= REF_ERR_NO_DIR_INDEX;
3065 if (!backref->found_back_ref)
3066 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3067 if (!backref->found_forward_ref)
3068 backref->errors |= REF_ERR_NO_ROOT_REF;
3069 if (backref->reachable && backref->errors)
3070 error = 1;
3072 if (!error)
3073 continue;
3075 errors++;
3076 fprintf(stderr, "fs tree %llu refs %u %s\n",
3077 (unsigned long long)rec->objectid, rec->found_ref,
3078 rec->found_root_item ? "" : "not found");
3080 list_for_each_entry(backref, &rec->backrefs, list) {
3081 if (!backref->reachable)
3082 continue;
3083 if (!backref->errors && rec->found_root_item)
3084 continue;
3085 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3086 " index %llu namelen %u name %s errors %x\n",
3087 (unsigned long long)backref->ref_root,
3088 (unsigned long long)backref->dir,
3089 (unsigned long long)backref->index,
3090 backref->namelen, backref->name,
3091 backref->errors);
3092 print_ref_error(backref->errors);
3095 return errors > 0 ? 1 : 0;
3098 static int process_root_ref(struct extent_buffer *eb, int slot,
3099 struct btrfs_key *key,
3100 struct cache_tree *root_cache)
3102 u64 dirid;
3103 u64 index;
3104 u32 len;
3105 u32 name_len;
3106 struct btrfs_root_ref *ref;
3107 char namebuf[BTRFS_NAME_LEN];
3108 int error;
3110 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3112 dirid = btrfs_root_ref_dirid(eb, ref);
3113 index = btrfs_root_ref_sequence(eb, ref);
3114 name_len = btrfs_root_ref_name_len(eb, ref);
3116 if (name_len <= BTRFS_NAME_LEN) {
3117 len = name_len;
3118 error = 0;
3119 } else {
3120 len = BTRFS_NAME_LEN;
3121 error = REF_ERR_NAME_TOO_LONG;
3123 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3125 if (key->type == BTRFS_ROOT_REF_KEY) {
3126 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3127 index, namebuf, len, key->type, error);
3128 } else {
3129 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3130 index, namebuf, len, key->type, error);
3132 return 0;
3135 static void free_corrupt_block(struct cache_extent *cache)
3137 struct btrfs_corrupt_block *corrupt;
3139 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3140 free(corrupt);
3143 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3146 * Repair the btree of the given root.
3148 * The fix is to remove the node key in corrupt_blocks cache_tree.
3149 * and rebalance the tree.
3150 * After the fix, the btree should be writeable.
3152 static int repair_btree(struct btrfs_root *root,
3153 struct cache_tree *corrupt_blocks)
3155 struct btrfs_trans_handle *trans;
3156 struct btrfs_path path;
3157 struct btrfs_corrupt_block *corrupt;
3158 struct cache_extent *cache;
3159 struct btrfs_key key;
3160 u64 offset;
3161 int level;
3162 int ret = 0;
3164 if (cache_tree_empty(corrupt_blocks))
3165 return 0;
3167 trans = btrfs_start_transaction(root, 1);
3168 if (IS_ERR(trans)) {
3169 ret = PTR_ERR(trans);
3170 fprintf(stderr, "Error starting transaction: %s\n",
3171 strerror(-ret));
3172 return ret;
3174 btrfs_init_path(&path);
3175 cache = first_cache_extent(corrupt_blocks);
3176 while (cache) {
3177 corrupt = container_of(cache, struct btrfs_corrupt_block,
3178 cache);
3179 level = corrupt->level;
3180 path.lowest_level = level;
3181 key.objectid = corrupt->key.objectid;
3182 key.type = corrupt->key.type;
3183 key.offset = corrupt->key.offset;
3186 * Here we don't want to do any tree balance, since it may
3187 * cause a balance with corrupted brother leaf/node,
3188 * so ins_len set to 0 here.
3189 * Balance will be done after all corrupt node/leaf is deleted.
3191 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3192 if (ret < 0)
3193 goto out;
3194 offset = btrfs_node_blockptr(path.nodes[level],
3195 path.slots[level]);
3197 /* Remove the ptr */
3198 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3199 if (ret < 0)
3200 goto out;
3202 * Remove the corresponding extent
3203 * return value is not concerned.
3205 btrfs_release_path(&path);
3206 ret = btrfs_free_extent(trans, root, offset,
3207 root->fs_info->nodesize, 0,
3208 root->root_key.objectid, level - 1, 0);
3209 cache = next_cache_extent(cache);
3212 /* Balance the btree using btrfs_search_slot() */
3213 cache = first_cache_extent(corrupt_blocks);
3214 while (cache) {
3215 corrupt = container_of(cache, struct btrfs_corrupt_block,
3216 cache);
3217 memcpy(&key, &corrupt->key, sizeof(key));
3218 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3219 if (ret < 0)
3220 goto out;
3221 /* return will always >0 since it won't find the item */
3222 ret = 0;
3223 btrfs_release_path(&path);
3224 cache = next_cache_extent(cache);
3226 out:
3227 btrfs_commit_transaction(trans, root);
3228 btrfs_release_path(&path);
3229 return ret;
3232 static int check_fs_root(struct btrfs_root *root,
3233 struct cache_tree *root_cache,
3234 struct walk_control *wc)
3236 int ret = 0;
3237 int err = 0;
3238 int wret;
3239 int level;
3240 struct btrfs_path path;
3241 struct shared_node root_node;
3242 struct root_record *rec;
3243 struct btrfs_root_item *root_item = &root->root_item;
3244 struct cache_tree corrupt_blocks;
3245 struct orphan_data_extent *orphan;
3246 struct orphan_data_extent *tmp;
3247 enum btrfs_tree_block_status status;
3248 struct node_refs nrefs;
3251 * Reuse the corrupt_block cache tree to record corrupted tree block
3253 * Unlike the usage in extent tree check, here we do it in a per
3254 * fs/subvol tree base.
3256 cache_tree_init(&corrupt_blocks);
3257 root->fs_info->corrupt_blocks = &corrupt_blocks;
3259 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3260 rec = get_root_rec(root_cache, root->root_key.objectid);
3261 BUG_ON(IS_ERR(rec));
3262 if (btrfs_root_refs(root_item) > 0)
3263 rec->found_root_item = 1;
3266 btrfs_init_path(&path);
3267 memset(&root_node, 0, sizeof(root_node));
3268 cache_tree_init(&root_node.root_cache);
3269 cache_tree_init(&root_node.inode_cache);
3270 memset(&nrefs, 0, sizeof(nrefs));
3272 /* Move the orphan extent record to corresponding inode_record */
3273 list_for_each_entry_safe(orphan, tmp,
3274 &root->orphan_data_extents, list) {
3275 struct inode_record *inode;
3277 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3279 BUG_ON(IS_ERR(inode));
3280 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3281 list_move(&orphan->list, &inode->orphan_extents);
3284 level = btrfs_header_level(root->node);
3285 memset(wc->nodes, 0, sizeof(wc->nodes));
3286 wc->nodes[level] = &root_node;
3287 wc->active_node = level;
3288 wc->root_level = level;
3290 /* We may not have checked the root block, lets do that now */
3291 if (btrfs_is_leaf(root->node))
3292 status = btrfs_check_leaf(root, NULL, root->node);
3293 else
3294 status = btrfs_check_node(root, NULL, root->node);
3295 if (status != BTRFS_TREE_BLOCK_CLEAN)
3296 return -EIO;
3298 if (btrfs_root_refs(root_item) > 0 ||
3299 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3300 path.nodes[level] = root->node;
3301 extent_buffer_get(root->node);
3302 path.slots[level] = 0;
3303 } else {
3304 struct btrfs_key key;
3305 struct btrfs_disk_key found_key;
3307 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3308 level = root_item->drop_level;
3309 path.lowest_level = level;
3310 if (level > btrfs_header_level(root->node) ||
3311 level >= BTRFS_MAX_LEVEL) {
3312 error("ignoring invalid drop level: %u", level);
3313 goto skip_walking;
3315 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3316 if (wret < 0)
3317 goto skip_walking;
3318 btrfs_node_key(path.nodes[level], &found_key,
3319 path.slots[level]);
3320 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3321 sizeof(found_key)));
3324 while (1) {
3325 ctx.item_count++;
3326 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3327 if (wret < 0)
3328 ret = wret;
3329 if (wret != 0)
3330 break;
3332 wret = walk_up_tree(root, &path, wc, &level);
3333 if (wret < 0)
3334 ret = wret;
3335 if (wret != 0)
3336 break;
3338 skip_walking:
3339 btrfs_release_path(&path);
3341 if (!cache_tree_empty(&corrupt_blocks)) {
3342 struct cache_extent *cache;
3343 struct btrfs_corrupt_block *corrupt;
3345 printf("The following tree block(s) is corrupted in tree %llu:\n",
3346 root->root_key.objectid);
3347 cache = first_cache_extent(&corrupt_blocks);
3348 while (cache) {
3349 corrupt = container_of(cache,
3350 struct btrfs_corrupt_block,
3351 cache);
3352 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3353 cache->start, corrupt->level,
3354 corrupt->key.objectid, corrupt->key.type,
3355 corrupt->key.offset);
3356 cache = next_cache_extent(cache);
3358 if (repair) {
3359 printf("Try to repair the btree for root %llu\n",
3360 root->root_key.objectid);
3361 ret = repair_btree(root, &corrupt_blocks);
3362 if (ret < 0)
3363 fprintf(stderr, "Failed to repair btree: %s\n",
3364 strerror(-ret));
3365 if (!ret)
3366 printf("Btree for root %llu is fixed\n",
3367 root->root_key.objectid);
3371 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3372 if (err < 0)
3373 ret = err;
3375 if (root_node.current) {
3376 root_node.current->checked = 1;
3377 maybe_free_inode_rec(&root_node.inode_cache,
3378 root_node.current);
3381 err = check_inode_recs(root, &root_node.inode_cache);
3382 if (!ret)
3383 ret = err;
3385 free_corrupt_blocks_tree(&corrupt_blocks);
3386 root->fs_info->corrupt_blocks = NULL;
3387 free_orphan_data_extents(&root->orphan_data_extents);
3388 return ret;
3391 static int check_fs_roots(struct btrfs_fs_info *fs_info,
3392 struct cache_tree *root_cache)
3394 struct btrfs_path path;
3395 struct btrfs_key key;
3396 struct walk_control wc;
3397 struct extent_buffer *leaf, *tree_node;
3398 struct btrfs_root *tmp_root;
3399 struct btrfs_root *tree_root = fs_info->tree_root;
3400 u64 skip_root = 0;
3401 int ret;
3402 int err = 0;
3405 * Just in case we made any changes to the extent tree that weren't
3406 * reflected into the free space cache yet.
3408 if (repair)
3409 reset_cached_block_groups(fs_info);
3410 memset(&wc, 0, sizeof(wc));
3411 cache_tree_init(&wc.shared);
3412 btrfs_init_path(&path);
3414 again:
3415 key.offset = 0;
3416 if (skip_root)
3417 key.objectid = skip_root + 1;
3418 else
3419 key.objectid = 0;
3420 key.type = BTRFS_ROOT_ITEM_KEY;
3421 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3422 if (ret < 0) {
3423 err = 1;
3424 goto out;
3426 tree_node = tree_root->node;
3427 while (1) {
3429 if (tree_node != tree_root->node) {
3430 free_root_recs_tree(root_cache);
3431 btrfs_release_path(&path);
3432 goto again;
3434 leaf = path.nodes[0];
3435 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3436 ret = btrfs_next_leaf(tree_root, &path);
3437 if (ret) {
3438 if (ret < 0)
3439 err = 1;
3440 break;
3442 leaf = path.nodes[0];
3444 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3445 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3446 fs_root_objectid(key.objectid)) {
3447 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3448 tmp_root = btrfs_read_fs_root_no_cache(
3449 fs_info, &key);
3450 } else {
3451 key.offset = (u64)-1;
3452 tmp_root = btrfs_read_fs_root(
3453 fs_info, &key);
3455 if (IS_ERR(tmp_root)) {
3456 err = 1;
3457 goto next;
3459 ret = check_fs_root(tmp_root, root_cache, &wc);
3460 if (ret == -EAGAIN) {
3461 free_root_recs_tree(root_cache);
3462 btrfs_release_path(&path);
3463 goto again;
3465 if (ret) {
3466 err = 1;
3469 * We failed to repair this root but modified
3470 * tree root, after again: label we will still
3471 * hit this root and fail to repair, so we must
3472 * skip it to avoid infinite loop.
3474 if (repair)
3475 skip_root = key.objectid;
3477 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3478 btrfs_free_fs_root(tmp_root);
3479 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3480 key.type == BTRFS_ROOT_BACKREF_KEY) {
3481 process_root_ref(leaf, path.slots[0], &key,
3482 root_cache);
3484 next:
3485 path.slots[0]++;
3487 out:
3488 btrfs_release_path(&path);
3489 if (err)
3490 free_extent_cache_tree(&wc.shared);
3491 if (!cache_tree_empty(&wc.shared))
3492 fprintf(stderr, "warning line %d\n", __LINE__);
3494 return err;
3497 static struct tree_backref *find_tree_backref(struct extent_record *rec,
3498 u64 parent, u64 root)
3500 struct rb_node *node;
3501 struct tree_backref *back = NULL;
3502 struct tree_backref match = {
3503 .node = {
3504 .is_data = 0,
3508 if (parent) {
3509 match.parent = parent;
3510 match.node.full_backref = 1;
3511 } else {
3512 match.root = root;
3515 node = rb_search(&rec->backref_tree, &match.node.node,
3516 (rb_compare_keys)compare_extent_backref, NULL);
3517 if (node)
3518 back = to_tree_backref(rb_node_to_extent_backref(node));
3520 return back;
3523 static struct data_backref *find_data_backref(struct extent_record *rec,
3524 u64 parent, u64 root,
3525 u64 owner, u64 offset,
3526 int found_ref,
3527 u64 disk_bytenr, u64 bytes)
3529 struct rb_node *node;
3530 struct data_backref *back = NULL;
3531 struct data_backref match = {
3532 .node = {
3533 .is_data = 1,
3535 .owner = owner,
3536 .offset = offset,
3537 .bytes = bytes,
3538 .found_ref = found_ref,
3539 .disk_bytenr = disk_bytenr,
3542 if (parent) {
3543 match.parent = parent;
3544 match.node.full_backref = 1;
3545 } else {
3546 match.root = root;
3549 node = rb_search(&rec->backref_tree, &match.node.node,
3550 (rb_compare_keys)compare_extent_backref, NULL);
3551 if (node)
3552 back = to_data_backref(rb_node_to_extent_backref(node));
3554 return back;
3557 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
3558 struct cache_tree *root_cache)
3560 int ret;
3562 if (check_mode == CHECK_MODE_LOWMEM)
3563 ret = check_fs_roots_lowmem(fs_info);
3564 else
3565 ret = check_fs_roots(fs_info, root_cache);
3567 return ret;
3570 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3572 struct extent_backref *back, *tmp;
3573 struct tree_backref *tback;
3574 struct data_backref *dback;
3575 u64 found = 0;
3576 int err = 0;
3578 rbtree_postorder_for_each_entry_safe(back, tmp,
3579 &rec->backref_tree, node) {
3580 if (!back->found_extent_tree) {
3581 err = 1;
3582 if (!print_errs)
3583 goto out;
3584 if (back->is_data) {
3585 dback = to_data_backref(back);
3586 fprintf(stderr,
3587 "data backref %llu %s %llu owner %llu offset %llu num_refs %lu not found in extent tree\n",
3588 (unsigned long long)rec->start,
3589 back->full_backref ?
3590 "parent" : "root",
3591 back->full_backref ?
3592 (unsigned long long)dback->parent :
3593 (unsigned long long)dback->root,
3594 (unsigned long long)dback->owner,
3595 (unsigned long long)dback->offset,
3596 (unsigned long)dback->num_refs);
3597 } else {
3598 tback = to_tree_backref(back);
3599 fprintf(stderr,
3600 "tree backref %llu parent %llu root %llu not found in extent tree\n",
3601 (unsigned long long)rec->start,
3602 (unsigned long long)tback->parent,
3603 (unsigned long long)tback->root);
3606 if (!back->is_data && !back->found_ref) {
3607 err = 1;
3608 if (!print_errs)
3609 goto out;
3610 tback = to_tree_backref(back);
3611 fprintf(stderr,
3612 "backref %llu %s %llu not referenced back %p\n",
3613 (unsigned long long)rec->start,
3614 back->full_backref ? "parent" : "root",
3615 back->full_backref ?
3616 (unsigned long long)tback->parent :
3617 (unsigned long long)tback->root, back);
3619 if (back->is_data) {
3620 dback = to_data_backref(back);
3621 if (dback->found_ref != dback->num_refs) {
3622 err = 1;
3623 if (!print_errs)
3624 goto out;
3625 fprintf(stderr,
3626 "incorrect local backref count on %llu %s %llu owner %llu offset %llu found %u wanted %u back %p\n",
3627 (unsigned long long)rec->start,
3628 back->full_backref ?
3629 "parent" : "root",
3630 back->full_backref ?
3631 (unsigned long long)dback->parent :
3632 (unsigned long long)dback->root,
3633 (unsigned long long)dback->owner,
3634 (unsigned long long)dback->offset,
3635 dback->found_ref, dback->num_refs,
3636 back);
3638 if (dback->disk_bytenr != rec->start) {
3639 err = 1;
3640 if (!print_errs)
3641 goto out;
3642 fprintf(stderr,
3643 "backref disk bytenr does not match extent record, bytenr=%llu, ref bytenr=%llu\n",
3644 (unsigned long long)rec->start,
3645 (unsigned long long)dback->disk_bytenr);
3648 if (dback->bytes != rec->nr) {
3649 err = 1;
3650 if (!print_errs)
3651 goto out;
3652 fprintf(stderr,
3653 "backref bytes do not match extent backref, bytenr=%llu, ref bytes=%llu, backref bytes=%llu\n",
3654 (unsigned long long)rec->start,
3655 (unsigned long long)rec->nr,
3656 (unsigned long long)dback->bytes);
3659 if (!back->is_data) {
3660 found += 1;
3661 } else {
3662 dback = to_data_backref(back);
3663 found += dback->found_ref;
3666 if (found != rec->refs) {
3667 err = 1;
3668 if (!print_errs)
3669 goto out;
3670 fprintf(stderr,
3671 "incorrect global backref count on %llu found %llu wanted %llu\n",
3672 (unsigned long long)rec->start,
3673 (unsigned long long)found,
3674 (unsigned long long)rec->refs);
3676 out:
3677 return err;
3680 static void __free_one_backref(struct rb_node *node)
3682 struct extent_backref *back = rb_node_to_extent_backref(node);
3684 free(back);
3687 static void free_all_extent_backrefs(struct extent_record *rec)
3689 rb_free_nodes(&rec->backref_tree, __free_one_backref);
3692 static void free_extent_record_cache(struct cache_tree *extent_cache)
3694 struct cache_extent *cache;
3695 struct extent_record *rec;
3697 while (1) {
3698 cache = first_cache_extent(extent_cache);
3699 if (!cache)
3700 break;
3701 rec = container_of(cache, struct extent_record, cache);
3702 remove_cache_extent(extent_cache, cache);
3703 free_all_extent_backrefs(rec);
3704 free(rec);
3708 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3709 struct extent_record *rec)
3711 if (rec->content_checked && rec->owner_ref_checked &&
3712 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3713 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3714 !rec->bad_full_backref && !rec->crossing_stripes &&
3715 !rec->wrong_chunk_type) {
3716 remove_cache_extent(extent_cache, &rec->cache);
3717 free_all_extent_backrefs(rec);
3718 list_del_init(&rec->list);
3719 free(rec);
3721 return 0;
3724 static int check_owner_ref(struct btrfs_root *root,
3725 struct extent_record *rec,
3726 struct extent_buffer *buf)
3728 struct extent_backref *node, *tmp;
3729 struct tree_backref *back;
3730 struct btrfs_root *ref_root;
3731 struct btrfs_key key;
3732 struct btrfs_path path;
3733 struct extent_buffer *parent;
3734 int level;
3735 int found = 0;
3736 int ret;
3738 rbtree_postorder_for_each_entry_safe(node, tmp,
3739 &rec->backref_tree, node) {
3740 if (node->is_data)
3741 continue;
3742 if (!node->found_ref)
3743 continue;
3744 if (node->full_backref)
3745 continue;
3746 back = to_tree_backref(node);
3747 if (btrfs_header_owner(buf) == back->root)
3748 return 0;
3751 * Some unexpected root item referring to this one, return 1 to
3752 * indicate owner not found
3754 if (rec->is_root)
3755 return 1;
3757 /* try to find the block by search corresponding fs tree */
3758 key.objectid = btrfs_header_owner(buf);
3759 key.type = BTRFS_ROOT_ITEM_KEY;
3760 key.offset = (u64)-1;
3762 ref_root = btrfs_read_fs_root(root->fs_info, &key);
3763 if (IS_ERR(ref_root))
3764 return 1;
3766 level = btrfs_header_level(buf);
3767 if (level == 0)
3768 btrfs_item_key_to_cpu(buf, &key, 0);
3769 else
3770 btrfs_node_key_to_cpu(buf, &key, 0);
3772 btrfs_init_path(&path);
3773 path.lowest_level = level + 1;
3774 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3775 if (ret < 0)
3776 return 0;
3778 parent = path.nodes[level + 1];
3779 if (parent && buf->start == btrfs_node_blockptr(parent,
3780 path.slots[level + 1]))
3781 found = 1;
3783 btrfs_release_path(&path);
3784 return found ? 0 : 1;
3787 static int is_extent_tree_record(struct extent_record *rec)
3789 struct extent_backref *node, *tmp;
3790 struct tree_backref *back;
3791 int is_extent = 0;
3793 rbtree_postorder_for_each_entry_safe(node, tmp,
3794 &rec->backref_tree, node) {
3795 if (node->is_data)
3796 return 0;
3797 back = to_tree_backref(node);
3798 if (node->full_backref)
3799 return 0;
3800 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3801 is_extent = 1;
3803 return is_extent;
3807 static int record_bad_block_io(struct btrfs_fs_info *info,
3808 struct cache_tree *extent_cache,
3809 u64 start, u64 len)
3811 struct extent_record *rec;
3812 struct cache_extent *cache;
3813 struct btrfs_key key;
3815 cache = lookup_cache_extent(extent_cache, start, len);
3816 if (!cache)
3817 return 0;
3819 rec = container_of(cache, struct extent_record, cache);
3820 if (!is_extent_tree_record(rec))
3821 return 0;
3823 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3824 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3827 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3828 struct extent_buffer *buf, int slot)
3830 if (btrfs_header_level(buf)) {
3831 struct btrfs_key_ptr ptr1, ptr2;
3833 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3834 sizeof(struct btrfs_key_ptr));
3835 read_extent_buffer(buf, &ptr2,
3836 btrfs_node_key_ptr_offset(slot + 1),
3837 sizeof(struct btrfs_key_ptr));
3838 write_extent_buffer(buf, &ptr1,
3839 btrfs_node_key_ptr_offset(slot + 1),
3840 sizeof(struct btrfs_key_ptr));
3841 write_extent_buffer(buf, &ptr2,
3842 btrfs_node_key_ptr_offset(slot),
3843 sizeof(struct btrfs_key_ptr));
3844 if (slot == 0) {
3845 struct btrfs_disk_key key;
3847 btrfs_node_key(buf, &key, 0);
3848 btrfs_fixup_low_keys(root, path, &key,
3849 btrfs_header_level(buf) + 1);
3851 } else {
3852 struct btrfs_item *item1, *item2;
3853 struct btrfs_key k1, k2;
3854 char *item1_data, *item2_data;
3855 u32 item1_offset, item2_offset, item1_size, item2_size;
3857 item1 = btrfs_item_nr(slot);
3858 item2 = btrfs_item_nr(slot + 1);
3859 btrfs_item_key_to_cpu(buf, &k1, slot);
3860 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
3861 item1_offset = btrfs_item_offset(buf, item1);
3862 item2_offset = btrfs_item_offset(buf, item2);
3863 item1_size = btrfs_item_size(buf, item1);
3864 item2_size = btrfs_item_size(buf, item2);
3866 item1_data = malloc(item1_size);
3867 if (!item1_data)
3868 return -ENOMEM;
3869 item2_data = malloc(item2_size);
3870 if (!item2_data) {
3871 free(item1_data);
3872 return -ENOMEM;
3875 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
3876 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
3878 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
3879 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
3880 free(item1_data);
3881 free(item2_data);
3883 btrfs_set_item_offset(buf, item1, item2_offset);
3884 btrfs_set_item_offset(buf, item2, item1_offset);
3885 btrfs_set_item_size(buf, item1, item2_size);
3886 btrfs_set_item_size(buf, item2, item1_size);
3888 path->slots[0] = slot;
3889 btrfs_set_item_key_unsafe(root, path, &k2);
3890 path->slots[0] = slot + 1;
3891 btrfs_set_item_key_unsafe(root, path, &k1);
3893 return 0;
3896 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
3898 struct extent_buffer *buf;
3899 struct btrfs_key k1, k2;
3900 int i;
3901 int level = path->lowest_level;
3902 int ret = -EIO;
3904 buf = path->nodes[level];
3905 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
3906 if (level) {
3907 btrfs_node_key_to_cpu(buf, &k1, i);
3908 btrfs_node_key_to_cpu(buf, &k2, i + 1);
3909 } else {
3910 btrfs_item_key_to_cpu(buf, &k1, i);
3911 btrfs_item_key_to_cpu(buf, &k2, i + 1);
3913 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
3914 continue;
3915 ret = swap_values(root, path, buf, i);
3916 if (ret)
3917 break;
3918 btrfs_mark_buffer_dirty(buf);
3919 i = 0;
3921 return ret;
3924 static int delete_bogus_item(struct btrfs_root *root,
3925 struct btrfs_path *path,
3926 struct extent_buffer *buf, int slot)
3928 struct btrfs_key key;
3929 int nritems = btrfs_header_nritems(buf);
3931 btrfs_item_key_to_cpu(buf, &key, slot);
3933 /* These are all the keys we can deal with missing. */
3934 if (key.type != BTRFS_DIR_INDEX_KEY &&
3935 key.type != BTRFS_EXTENT_ITEM_KEY &&
3936 key.type != BTRFS_METADATA_ITEM_KEY &&
3937 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
3938 key.type != BTRFS_EXTENT_DATA_REF_KEY)
3939 return -1;
3941 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
3942 (unsigned long long)key.objectid, key.type,
3943 (unsigned long long)key.offset, slot, buf->start);
3944 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
3945 btrfs_item_nr_offset(slot + 1),
3946 sizeof(struct btrfs_item) *
3947 (nritems - slot - 1));
3948 btrfs_set_header_nritems(buf, nritems - 1);
3949 if (slot == 0) {
3950 struct btrfs_disk_key disk_key;
3952 btrfs_item_key(buf, &disk_key, 0);
3953 btrfs_fixup_low_keys(root, path, &disk_key, 1);
3955 btrfs_mark_buffer_dirty(buf);
3956 return 0;
3959 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
3961 struct extent_buffer *buf;
3962 int i;
3963 int ret = 0;
3965 /* We should only get this for leaves */
3966 BUG_ON(path->lowest_level);
3967 buf = path->nodes[0];
3968 again:
3969 for (i = 0; i < btrfs_header_nritems(buf); i++) {
3970 unsigned int shift = 0, offset;
3972 if (i == 0 && btrfs_item_end_nr(buf, i) !=
3973 BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
3974 if (btrfs_item_end_nr(buf, i) >
3975 BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
3976 ret = delete_bogus_item(root, path, buf, i);
3977 if (!ret)
3978 goto again;
3979 fprintf(stderr,
3980 "item is off the end of the leaf, can't fix\n");
3981 ret = -EIO;
3982 break;
3984 shift = BTRFS_LEAF_DATA_SIZE(root->fs_info) -
3985 btrfs_item_end_nr(buf, i);
3986 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
3987 btrfs_item_offset_nr(buf, i - 1)) {
3988 if (btrfs_item_end_nr(buf, i) >
3989 btrfs_item_offset_nr(buf, i - 1)) {
3990 ret = delete_bogus_item(root, path, buf, i);
3991 if (!ret)
3992 goto again;
3993 fprintf(stderr, "items overlap, can't fix\n");
3994 ret = -EIO;
3995 break;
3997 shift = btrfs_item_offset_nr(buf, i - 1) -
3998 btrfs_item_end_nr(buf, i);
4000 if (!shift)
4001 continue;
4003 printf("Shifting item nr %d by %u bytes in block %llu\n",
4004 i, shift, (unsigned long long)buf->start);
4005 offset = btrfs_item_offset_nr(buf, i);
4006 memmove_extent_buffer(buf,
4007 btrfs_leaf_data(buf) + offset + shift,
4008 btrfs_leaf_data(buf) + offset,
4009 btrfs_item_size_nr(buf, i));
4010 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4011 offset + shift);
4012 btrfs_mark_buffer_dirty(buf);
4016 * We may have moved things, in which case we want to exit so we don't
4017 * write those changes out. Once we have proper abort functionality in
4018 * progs this can be changed to something nicer.
4020 BUG_ON(ret);
4021 return ret;
4025 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4026 * then just return -EIO.
4028 static int try_to_fix_bad_block(struct btrfs_root *root,
4029 struct extent_buffer *buf,
4030 enum btrfs_tree_block_status status)
4032 struct btrfs_trans_handle *trans;
4033 struct ulist *roots;
4034 struct ulist_node *node;
4035 struct btrfs_root *search_root;
4036 struct btrfs_path path;
4037 struct ulist_iterator iter;
4038 struct btrfs_key root_key, key;
4039 int ret;
4041 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4042 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4043 return -EIO;
4045 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
4046 if (ret)
4047 return -EIO;
4049 btrfs_init_path(&path);
4050 ULIST_ITER_INIT(&iter);
4051 while ((node = ulist_next(roots, &iter))) {
4052 root_key.objectid = node->val;
4053 root_key.type = BTRFS_ROOT_ITEM_KEY;
4054 root_key.offset = (u64)-1;
4056 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4057 if (IS_ERR(root)) {
4058 ret = -EIO;
4059 break;
4063 trans = btrfs_start_transaction(search_root, 0);
4064 if (IS_ERR(trans)) {
4065 ret = PTR_ERR(trans);
4066 break;
4069 path.lowest_level = btrfs_header_level(buf);
4070 path.skip_check_block = 1;
4071 if (path.lowest_level)
4072 btrfs_node_key_to_cpu(buf, &key, 0);
4073 else
4074 btrfs_item_key_to_cpu(buf, &key, 0);
4075 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
4076 if (ret) {
4077 ret = -EIO;
4078 btrfs_commit_transaction(trans, search_root);
4079 break;
4081 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4082 ret = fix_key_order(search_root, &path);
4083 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4084 ret = fix_item_offset(search_root, &path);
4085 if (ret) {
4086 btrfs_commit_transaction(trans, search_root);
4087 break;
4089 btrfs_release_path(&path);
4090 btrfs_commit_transaction(trans, search_root);
4092 ulist_free(roots);
4093 btrfs_release_path(&path);
4094 return ret;
4097 static int check_block(struct btrfs_root *root,
4098 struct cache_tree *extent_cache,
4099 struct extent_buffer *buf, u64 flags)
4101 struct extent_record *rec;
4102 struct cache_extent *cache;
4103 struct btrfs_key key;
4104 enum btrfs_tree_block_status status;
4105 int ret = 0;
4106 int level;
4108 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4109 if (!cache)
4110 return 1;
4111 rec = container_of(cache, struct extent_record, cache);
4112 rec->generation = btrfs_header_generation(buf);
4114 level = btrfs_header_level(buf);
4115 if (btrfs_header_nritems(buf) > 0) {
4117 if (level == 0)
4118 btrfs_item_key_to_cpu(buf, &key, 0);
4119 else
4120 btrfs_node_key_to_cpu(buf, &key, 0);
4122 rec->info_objectid = key.objectid;
4124 rec->info_level = level;
4126 if (btrfs_is_leaf(buf))
4127 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4128 else
4129 status = btrfs_check_node(root, &rec->parent_key, buf);
4131 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4132 if (repair)
4133 status = try_to_fix_bad_block(root, buf, status);
4134 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4135 ret = -EIO;
4136 fprintf(stderr, "bad block %llu\n",
4137 (unsigned long long)buf->start);
4138 } else {
4140 * Signal to callers we need to start the scan over
4141 * again since we'll have cowed blocks.
4143 ret = -EAGAIN;
4145 } else {
4146 rec->content_checked = 1;
4147 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4148 rec->owner_ref_checked = 1;
4149 else {
4150 ret = check_owner_ref(root, rec, buf);
4151 if (!ret)
4152 rec->owner_ref_checked = 1;
4155 if (!ret)
4156 maybe_free_extent_rec(extent_cache, rec);
4157 return ret;
4160 #if 0
4161 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4162 u64 parent, u64 root)
4164 struct list_head *cur = rec->backrefs.next;
4165 struct extent_backref *node;
4166 struct tree_backref *back;
4168 while (cur != &rec->backrefs) {
4169 node = to_extent_backref(cur);
4170 cur = cur->next;
4171 if (node->is_data)
4172 continue;
4173 back = to_tree_backref(node);
4174 if (parent > 0) {
4175 if (!node->full_backref)
4176 continue;
4177 if (parent == back->parent)
4178 return back;
4179 } else {
4180 if (node->full_backref)
4181 continue;
4182 if (back->root == root)
4183 return back;
4186 return NULL;
4188 #endif
4190 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4191 u64 parent, u64 root)
4193 struct tree_backref *ref = malloc(sizeof(*ref));
4195 if (!ref)
4196 return NULL;
4197 memset(&ref->node, 0, sizeof(ref->node));
4198 if (parent > 0) {
4199 ref->parent = parent;
4200 ref->node.full_backref = 1;
4201 } else {
4202 ref->root = root;
4203 ref->node.full_backref = 0;
4206 return ref;
4209 #if 0
4210 static struct data_backref *find_data_backref(struct extent_record *rec,
4211 u64 parent, u64 root,
4212 u64 owner, u64 offset,
4213 int found_ref,
4214 u64 disk_bytenr, u64 bytes)
4216 struct list_head *cur = rec->backrefs.next;
4217 struct extent_backref *node;
4218 struct data_backref *back;
4220 while (cur != &rec->backrefs) {
4221 node = to_extent_backref(cur);
4222 cur = cur->next;
4223 if (!node->is_data)
4224 continue;
4225 back = to_data_backref(node);
4226 if (parent > 0) {
4227 if (!node->full_backref)
4228 continue;
4229 if (parent == back->parent)
4230 return back;
4231 } else {
4232 if (node->full_backref)
4233 continue;
4234 if (back->root == root && back->owner == owner &&
4235 back->offset == offset) {
4236 if (found_ref && node->found_ref &&
4237 (back->bytes != bytes ||
4238 back->disk_bytenr != disk_bytenr))
4239 continue;
4240 return back;
4244 return NULL;
4246 #endif
4248 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4249 u64 parent, u64 root,
4250 u64 owner, u64 offset,
4251 u64 max_size)
4253 struct data_backref *ref = malloc(sizeof(*ref));
4255 if (!ref)
4256 return NULL;
4257 memset(&ref->node, 0, sizeof(ref->node));
4258 ref->node.is_data = 1;
4260 if (parent > 0) {
4261 ref->parent = parent;
4262 ref->owner = 0;
4263 ref->offset = 0;
4264 ref->node.full_backref = 1;
4265 } else {
4266 ref->root = root;
4267 ref->owner = owner;
4268 ref->offset = offset;
4269 ref->node.full_backref = 0;
4271 ref->bytes = max_size;
4272 ref->found_ref = 0;
4273 ref->num_refs = 0;
4274 if (max_size > rec->max_size)
4275 rec->max_size = max_size;
4276 return ref;
4279 /* Check if the type of extent matches with its chunk */
4280 static void check_extent_type(struct extent_record *rec)
4282 struct btrfs_block_group_cache *bg_cache;
4284 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4285 if (!bg_cache)
4286 return;
4288 /* data extent, check chunk directly*/
4289 if (!rec->metadata) {
4290 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4291 rec->wrong_chunk_type = 1;
4292 return;
4295 /* metadata extent, check the obvious case first */
4296 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4297 BTRFS_BLOCK_GROUP_METADATA))) {
4298 rec->wrong_chunk_type = 1;
4299 return;
4303 * Check SYSTEM extent, as it's also marked as metadata, we can only
4304 * make sure it's a SYSTEM extent by its backref
4306 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4307 struct extent_backref *node;
4308 struct tree_backref *tback;
4309 u64 bg_type;
4311 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4312 if (node->is_data) {
4313 /* tree block shouldn't have data backref */
4314 rec->wrong_chunk_type = 1;
4315 return;
4317 tback = container_of(node, struct tree_backref, node);
4319 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4320 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4321 else
4322 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4323 if (!(bg_cache->flags & bg_type))
4324 rec->wrong_chunk_type = 1;
4329 * Allocate a new extent record, fill default values from @tmpl and insert int
4330 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4331 * the cache, otherwise it fails.
4333 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4334 struct extent_record *tmpl)
4336 struct extent_record *rec;
4337 int ret = 0;
4339 BUG_ON(tmpl->max_size == 0);
4340 rec = malloc(sizeof(*rec));
4341 if (!rec)
4342 return -ENOMEM;
4343 rec->start = tmpl->start;
4344 rec->max_size = tmpl->max_size;
4345 rec->nr = max(tmpl->nr, tmpl->max_size);
4346 rec->found_rec = tmpl->found_rec;
4347 rec->content_checked = tmpl->content_checked;
4348 rec->owner_ref_checked = tmpl->owner_ref_checked;
4349 rec->num_duplicates = 0;
4350 rec->metadata = tmpl->metadata;
4351 rec->flag_block_full_backref = FLAG_UNSET;
4352 rec->bad_full_backref = 0;
4353 rec->crossing_stripes = 0;
4354 rec->wrong_chunk_type = 0;
4355 rec->is_root = tmpl->is_root;
4356 rec->refs = tmpl->refs;
4357 rec->extent_item_refs = tmpl->extent_item_refs;
4358 rec->parent_generation = tmpl->parent_generation;
4359 INIT_LIST_HEAD(&rec->backrefs);
4360 INIT_LIST_HEAD(&rec->dups);
4361 INIT_LIST_HEAD(&rec->list);
4362 rec->backref_tree = RB_ROOT;
4363 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4364 rec->cache.start = tmpl->start;
4365 rec->cache.size = tmpl->nr;
4366 ret = insert_cache_extent(extent_cache, &rec->cache);
4367 if (ret) {
4368 free(rec);
4369 return ret;
4371 bytes_used += rec->nr;
4373 if (tmpl->metadata)
4374 rec->crossing_stripes = check_crossing_stripes(global_info,
4375 rec->start, global_info->nodesize);
4376 check_extent_type(rec);
4377 return ret;
4381 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4382 * some are hints:
4383 * - refs - if found, increase refs
4384 * - is_root - if found, set
4385 * - content_checked - if found, set
4386 * - owner_ref_checked - if found, set
4388 * If not found, create a new one, initialize and insert.
4390 static int add_extent_rec(struct cache_tree *extent_cache,
4391 struct extent_record *tmpl)
4393 struct extent_record *rec;
4394 struct cache_extent *cache;
4395 int ret = 0;
4396 int dup = 0;
4398 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4399 if (cache) {
4400 rec = container_of(cache, struct extent_record, cache);
4401 if (tmpl->refs)
4402 rec->refs++;
4403 if (rec->nr == 1)
4404 rec->nr = max(tmpl->nr, tmpl->max_size);
4407 * We need to make sure to reset nr to whatever the extent
4408 * record says was the real size, this way we can compare it to
4409 * the backrefs.
4411 if (tmpl->found_rec) {
4412 if (tmpl->start != rec->start || rec->found_rec) {
4413 struct extent_record *tmp;
4415 dup = 1;
4416 if (list_empty(&rec->list))
4417 list_add_tail(&rec->list,
4418 &duplicate_extents);
4421 * We have to do this song and dance in case we
4422 * find an extent record that falls inside of
4423 * our current extent record but does not have
4424 * the same objectid.
4426 tmp = malloc(sizeof(*tmp));
4427 if (!tmp)
4428 return -ENOMEM;
4429 tmp->start = tmpl->start;
4430 tmp->max_size = tmpl->max_size;
4431 tmp->nr = tmpl->nr;
4432 tmp->found_rec = 1;
4433 tmp->metadata = tmpl->metadata;
4434 tmp->extent_item_refs = tmpl->extent_item_refs;
4435 INIT_LIST_HEAD(&tmp->list);
4436 list_add_tail(&tmp->list, &rec->dups);
4437 rec->num_duplicates++;
4438 } else {
4439 rec->nr = tmpl->nr;
4440 rec->found_rec = 1;
4444 if (tmpl->extent_item_refs && !dup) {
4445 if (rec->extent_item_refs) {
4446 fprintf(stderr,
4447 "block %llu rec extent_item_refs %llu, passed %llu\n",
4448 (unsigned long long)tmpl->start,
4449 (unsigned long long)
4450 rec->extent_item_refs,
4451 (unsigned long long)
4452 tmpl->extent_item_refs);
4454 rec->extent_item_refs = tmpl->extent_item_refs;
4456 if (tmpl->is_root)
4457 rec->is_root = 1;
4458 if (tmpl->content_checked)
4459 rec->content_checked = 1;
4460 if (tmpl->owner_ref_checked)
4461 rec->owner_ref_checked = 1;
4462 memcpy(&rec->parent_key, &tmpl->parent_key,
4463 sizeof(tmpl->parent_key));
4464 if (tmpl->parent_generation)
4465 rec->parent_generation = tmpl->parent_generation;
4466 if (rec->max_size < tmpl->max_size)
4467 rec->max_size = tmpl->max_size;
4470 * A metadata extent can't cross stripe_len boundary, otherwise
4471 * kernel scrub won't be able to handle it.
4472 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4473 * it.
4475 if (tmpl->metadata)
4476 rec->crossing_stripes = check_crossing_stripes(
4477 global_info, rec->start,
4478 global_info->nodesize);
4479 check_extent_type(rec);
4480 maybe_free_extent_rec(extent_cache, rec);
4481 return ret;
4484 ret = add_extent_rec_nolookup(extent_cache, tmpl);
4486 return ret;
4489 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4490 u64 parent, u64 root, int found_ref)
4492 struct extent_record *rec;
4493 struct tree_backref *back;
4494 struct cache_extent *cache;
4495 int ret;
4496 bool insert = false;
4498 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4499 if (!cache) {
4500 struct extent_record tmpl;
4502 memset(&tmpl, 0, sizeof(tmpl));
4503 tmpl.start = bytenr;
4504 tmpl.nr = 1;
4505 tmpl.metadata = 1;
4506 tmpl.max_size = 1;
4508 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4509 if (ret)
4510 return ret;
4512 /* really a bug in cache_extent implement now */
4513 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4514 if (!cache)
4515 return -ENOENT;
4518 rec = container_of(cache, struct extent_record, cache);
4519 if (rec->start != bytenr) {
4521 * Several cause, from unaligned bytenr to over lapping extents
4523 return -EEXIST;
4526 back = find_tree_backref(rec, parent, root);
4527 if (!back) {
4528 back = alloc_tree_backref(rec, parent, root);
4529 if (!back)
4530 return -ENOMEM;
4531 insert = true;
4534 if (found_ref) {
4535 if (back->node.found_ref) {
4536 fprintf(stderr,
4537 "Extent back ref already exists for %llu parent %llu root %llu\n",
4538 (unsigned long long)bytenr,
4539 (unsigned long long)parent,
4540 (unsigned long long)root);
4542 back->node.found_ref = 1;
4543 } else {
4544 if (back->node.found_extent_tree) {
4545 fprintf(stderr,
4546 "extent back ref already exists for %llu parent %llu root %llu\n",
4547 (unsigned long long)bytenr,
4548 (unsigned long long)parent,
4549 (unsigned long long)root);
4551 back->node.found_extent_tree = 1;
4553 if (insert)
4554 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
4555 compare_extent_backref));
4556 check_extent_type(rec);
4557 maybe_free_extent_rec(extent_cache, rec);
4558 return 0;
4561 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4562 u64 parent, u64 root, u64 owner, u64 offset,
4563 u32 num_refs, int found_ref, u64 max_size)
4565 struct extent_record *rec;
4566 struct data_backref *back;
4567 struct cache_extent *cache;
4568 int ret;
4569 bool insert = false;
4571 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4572 if (!cache) {
4573 struct extent_record tmpl;
4575 memset(&tmpl, 0, sizeof(tmpl));
4576 tmpl.start = bytenr;
4577 tmpl.nr = 1;
4578 tmpl.max_size = max_size;
4580 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4581 if (ret)
4582 return ret;
4584 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4585 if (!cache)
4586 abort();
4589 rec = container_of(cache, struct extent_record, cache);
4590 if (rec->max_size < max_size)
4591 rec->max_size = max_size;
4594 * If found_ref is set then max_size is the real size and must match the
4595 * existing refs. So if we have already found a ref then we need to
4596 * make sure that this ref matches the existing one, otherwise we need
4597 * to add a new backref so we can notice that the backrefs don't match
4598 * and we need to figure out who is telling the truth. This is to
4599 * account for that awful fsync bug I introduced where we'd end up with
4600 * a btrfs_file_extent_item that would have its length include multiple
4601 * prealloc extents or point inside of a prealloc extent.
4603 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4604 bytenr, max_size);
4605 if (!back) {
4606 back = alloc_data_backref(rec, parent, root, owner, offset,
4607 max_size);
4608 BUG_ON(!back);
4609 insert = true;
4612 if (found_ref) {
4613 BUG_ON(num_refs != 1);
4614 if (back->node.found_ref)
4615 BUG_ON(back->bytes != max_size);
4616 back->node.found_ref = 1;
4617 back->found_ref += 1;
4618 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
4619 back->bytes = max_size;
4620 back->disk_bytenr = bytenr;
4622 /* Need to reinsert if not already in the tree */
4623 if (!insert) {
4624 rb_erase(&back->node.node, &rec->backref_tree);
4625 insert = true;
4628 rec->refs += 1;
4629 rec->content_checked = 1;
4630 rec->owner_ref_checked = 1;
4631 } else {
4632 if (back->node.found_extent_tree) {
4633 fprintf(stderr,
4634 "Extent back ref already exists for %llu parent %llu root %llu owner %llu offset %llu num_refs %lu\n",
4635 (unsigned long long)bytenr,
4636 (unsigned long long)parent,
4637 (unsigned long long)root,
4638 (unsigned long long)owner,
4639 (unsigned long long)offset,
4640 (unsigned long)num_refs);
4642 back->num_refs = num_refs;
4643 back->node.found_extent_tree = 1;
4645 if (insert)
4646 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
4647 compare_extent_backref));
4649 maybe_free_extent_rec(extent_cache, rec);
4650 return 0;
4653 static int add_pending(struct cache_tree *pending,
4654 struct cache_tree *seen, u64 bytenr, u32 size)
4656 int ret;
4658 ret = add_cache_extent(seen, bytenr, size);
4659 if (ret)
4660 return ret;
4661 add_cache_extent(pending, bytenr, size);
4662 return 0;
4665 static int pick_next_pending(struct cache_tree *pending,
4666 struct cache_tree *reada,
4667 struct cache_tree *nodes,
4668 u64 last, struct block_info *bits, int bits_nr,
4669 int *reada_bits)
4671 unsigned long node_start = last;
4672 struct cache_extent *cache;
4673 int ret;
4675 cache = search_cache_extent(reada, 0);
4676 if (cache) {
4677 bits[0].start = cache->start;
4678 bits[0].size = cache->size;
4679 *reada_bits = 1;
4680 return 1;
4682 *reada_bits = 0;
4683 if (node_start > 32768)
4684 node_start -= 32768;
4686 cache = search_cache_extent(nodes, node_start);
4687 if (!cache)
4688 cache = search_cache_extent(nodes, 0);
4690 if (!cache) {
4691 cache = search_cache_extent(pending, 0);
4692 if (!cache)
4693 return 0;
4694 ret = 0;
4695 do {
4696 bits[ret].start = cache->start;
4697 bits[ret].size = cache->size;
4698 cache = next_cache_extent(cache);
4699 ret++;
4700 } while (cache && ret < bits_nr);
4701 return ret;
4704 ret = 0;
4705 do {
4706 bits[ret].start = cache->start;
4707 bits[ret].size = cache->size;
4708 cache = next_cache_extent(cache);
4709 ret++;
4710 } while (cache && ret < bits_nr);
4712 if (bits_nr - ret > 8) {
4713 u64 lookup = bits[0].start + bits[0].size;
4714 struct cache_extent *next;
4716 next = search_cache_extent(pending, lookup);
4717 while (next) {
4718 if (next->start - lookup > 32768)
4719 break;
4720 bits[ret].start = next->start;
4721 bits[ret].size = next->size;
4722 lookup = next->start + next->size;
4723 ret++;
4724 if (ret == bits_nr)
4725 break;
4726 next = next_cache_extent(next);
4727 if (!next)
4728 break;
4731 return ret;
4734 static void free_chunk_record(struct cache_extent *cache)
4736 struct chunk_record *rec;
4738 rec = container_of(cache, struct chunk_record, cache);
4739 list_del_init(&rec->list);
4740 list_del_init(&rec->dextents);
4741 free(rec);
4744 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4746 cache_tree_free_extents(chunk_cache, free_chunk_record);
4749 static void free_device_record(struct rb_node *node)
4751 struct device_record *rec;
4753 rec = container_of(node, struct device_record, node);
4754 free(rec);
4757 FREE_RB_BASED_TREE(device_cache, free_device_record);
4759 int insert_block_group_record(struct block_group_tree *tree,
4760 struct block_group_record *bg_rec)
4762 int ret;
4764 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4765 if (ret)
4766 return ret;
4768 list_add_tail(&bg_rec->list, &tree->block_groups);
4769 return 0;
4772 static void free_block_group_record(struct cache_extent *cache)
4774 struct block_group_record *rec;
4776 rec = container_of(cache, struct block_group_record, cache);
4777 list_del_init(&rec->list);
4778 free(rec);
4781 void free_block_group_tree(struct block_group_tree *tree)
4783 cache_tree_free_extents(&tree->tree, free_block_group_record);
4786 int insert_device_extent_record(struct device_extent_tree *tree,
4787 struct device_extent_record *de_rec)
4789 int ret;
4792 * Device extent is a bit different from the other extents, because
4793 * the extents which belong to the different devices may have the
4794 * same start and size, so we need use the special extent cache
4795 * search/insert functions.
4797 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4798 if (ret)
4799 return ret;
4801 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4802 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4803 return 0;
4806 static void free_device_extent_record(struct cache_extent *cache)
4808 struct device_extent_record *rec;
4810 rec = container_of(cache, struct device_extent_record, cache);
4811 if (!list_empty(&rec->chunk_list))
4812 list_del_init(&rec->chunk_list);
4813 if (!list_empty(&rec->device_list))
4814 list_del_init(&rec->device_list);
4815 free(rec);
4818 void free_device_extent_tree(struct device_extent_tree *tree)
4820 cache_tree_free_extents(&tree->tree, free_device_extent_record);
4823 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4824 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4825 struct extent_buffer *leaf, int slot)
4827 struct btrfs_extent_ref_v0 *ref0;
4828 struct btrfs_key key;
4829 int ret;
4831 btrfs_item_key_to_cpu(leaf, &key, slot);
4832 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4833 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4834 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
4835 0, 0);
4836 } else {
4837 ret = add_data_backref(extent_cache, key.objectid, key.offset,
4838 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4840 return ret;
4842 #endif
4844 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4845 struct btrfs_key *key,
4846 int slot)
4848 struct btrfs_chunk *ptr;
4849 struct chunk_record *rec;
4850 int num_stripes, i;
4852 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4853 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4855 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4856 if (!rec) {
4857 fprintf(stderr, "memory allocation failed\n");
4858 exit(-1);
4861 INIT_LIST_HEAD(&rec->list);
4862 INIT_LIST_HEAD(&rec->dextents);
4863 rec->bg_rec = NULL;
4865 rec->cache.start = key->offset;
4866 rec->cache.size = btrfs_chunk_length(leaf, ptr);
4868 rec->generation = btrfs_header_generation(leaf);
4870 rec->objectid = key->objectid;
4871 rec->type = key->type;
4872 rec->offset = key->offset;
4874 rec->length = rec->cache.size;
4875 rec->owner = btrfs_chunk_owner(leaf, ptr);
4876 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4877 rec->type_flags = btrfs_chunk_type(leaf, ptr);
4878 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4879 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4880 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4881 rec->num_stripes = num_stripes;
4882 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4884 for (i = 0; i < rec->num_stripes; ++i) {
4885 rec->stripes[i].devid =
4886 btrfs_stripe_devid_nr(leaf, ptr, i);
4887 rec->stripes[i].offset =
4888 btrfs_stripe_offset_nr(leaf, ptr, i);
4889 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
4890 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
4891 BTRFS_UUID_SIZE);
4894 return rec;
4897 static int process_chunk_item(struct cache_tree *chunk_cache,
4898 struct btrfs_key *key, struct extent_buffer *eb,
4899 int slot)
4901 struct chunk_record *rec;
4902 struct btrfs_chunk *chunk;
4903 int ret = 0;
4905 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
4907 * Do extra check for this chunk item,
4909 * It's still possible one can craft a leaf with CHUNK_ITEM, with
4910 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
4911 * and owner<->key_type check.
4913 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
4914 key->offset);
4915 if (ret < 0) {
4916 error("chunk(%llu, %llu) is not valid, ignore it",
4917 key->offset, btrfs_chunk_length(eb, chunk));
4918 return 0;
4920 rec = btrfs_new_chunk_record(eb, key, slot);
4921 ret = insert_cache_extent(chunk_cache, &rec->cache);
4922 if (ret) {
4923 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
4924 rec->offset, rec->length);
4925 free(rec);
4928 return ret;
4931 static int process_device_item(struct rb_root *dev_cache,
4932 struct btrfs_key *key, struct extent_buffer *eb, int slot)
4934 struct btrfs_dev_item *ptr;
4935 struct device_record *rec;
4936 int ret = 0;
4938 ptr = btrfs_item_ptr(eb,
4939 slot, struct btrfs_dev_item);
4941 rec = malloc(sizeof(*rec));
4942 if (!rec) {
4943 fprintf(stderr, "memory allocation failed\n");
4944 return -ENOMEM;
4947 rec->devid = key->offset;
4948 rec->generation = btrfs_header_generation(eb);
4950 rec->objectid = key->objectid;
4951 rec->type = key->type;
4952 rec->offset = key->offset;
4954 rec->devid = btrfs_device_id(eb, ptr);
4955 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
4956 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
4958 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
4959 if (ret) {
4960 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
4961 free(rec);
4964 return ret;
4967 struct block_group_record *
4968 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
4969 int slot)
4971 struct btrfs_block_group_item *ptr;
4972 struct block_group_record *rec;
4974 rec = calloc(1, sizeof(*rec));
4975 if (!rec) {
4976 fprintf(stderr, "memory allocation failed\n");
4977 exit(-1);
4980 rec->cache.start = key->objectid;
4981 rec->cache.size = key->offset;
4983 rec->generation = btrfs_header_generation(leaf);
4985 rec->objectid = key->objectid;
4986 rec->type = key->type;
4987 rec->offset = key->offset;
4989 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
4990 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
4992 INIT_LIST_HEAD(&rec->list);
4994 return rec;
4997 static int process_block_group_item(struct block_group_tree *block_group_cache,
4998 struct btrfs_key *key,
4999 struct extent_buffer *eb, int slot)
5001 struct block_group_record *rec;
5002 int ret = 0;
5004 rec = btrfs_new_block_group_record(eb, key, slot);
5005 ret = insert_block_group_record(block_group_cache, rec);
5006 if (ret) {
5007 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5008 rec->objectid, rec->offset);
5009 free(rec);
5012 return ret;
5015 struct device_extent_record *
5016 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5017 struct btrfs_key *key, int slot)
5019 struct device_extent_record *rec;
5020 struct btrfs_dev_extent *ptr;
5022 rec = calloc(1, sizeof(*rec));
5023 if (!rec) {
5024 fprintf(stderr, "memory allocation failed\n");
5025 exit(-1);
5028 rec->cache.objectid = key->objectid;
5029 rec->cache.start = key->offset;
5031 rec->generation = btrfs_header_generation(leaf);
5033 rec->objectid = key->objectid;
5034 rec->type = key->type;
5035 rec->offset = key->offset;
5037 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5038 rec->chunk_objecteid =
5039 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5040 rec->chunk_offset =
5041 btrfs_dev_extent_chunk_offset(leaf, ptr);
5042 rec->length = btrfs_dev_extent_length(leaf, ptr);
5043 rec->cache.size = rec->length;
5045 INIT_LIST_HEAD(&rec->chunk_list);
5046 INIT_LIST_HEAD(&rec->device_list);
5048 return rec;
5051 static int
5052 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5053 struct btrfs_key *key, struct extent_buffer *eb,
5054 int slot)
5056 struct device_extent_record *rec;
5057 int ret;
5059 rec = btrfs_new_device_extent_record(eb, key, slot);
5060 ret = insert_device_extent_record(dev_extent_cache, rec);
5061 if (ret) {
5062 fprintf(stderr,
5063 "Device extent[%llu, %llu, %llu] existed.\n",
5064 rec->objectid, rec->offset, rec->length);
5065 free(rec);
5068 return ret;
5071 static int process_extent_item(struct btrfs_root *root,
5072 struct cache_tree *extent_cache,
5073 struct extent_buffer *eb, int slot)
5075 struct btrfs_extent_item *ei;
5076 struct btrfs_extent_inline_ref *iref;
5077 struct btrfs_extent_data_ref *dref;
5078 struct btrfs_shared_data_ref *sref;
5079 struct btrfs_key key;
5080 struct extent_record tmpl;
5081 unsigned long end;
5082 unsigned long ptr;
5083 int ret;
5084 int type;
5085 u32 item_size = btrfs_item_size_nr(eb, slot);
5086 u64 refs = 0;
5087 u64 offset;
5088 u64 num_bytes;
5089 int metadata = 0;
5091 btrfs_item_key_to_cpu(eb, &key, slot);
5093 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5094 metadata = 1;
5095 num_bytes = root->fs_info->nodesize;
5096 } else {
5097 num_bytes = key.offset;
5100 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
5101 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5102 key.objectid, root->fs_info->sectorsize);
5103 return -EIO;
5105 if (item_size < sizeof(*ei)) {
5106 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5107 struct btrfs_extent_item_v0 *ei0;
5109 if (item_size != sizeof(*ei0)) {
5110 error(
5111 "invalid extent item format: ITEM[%llu %u %llu] leaf: %llu slot: %d",
5112 key.objectid, key.type, key.offset,
5113 btrfs_header_bytenr(eb), slot);
5114 BUG();
5116 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5117 refs = btrfs_extent_refs_v0(eb, ei0);
5118 #else
5119 BUG();
5120 #endif
5121 memset(&tmpl, 0, sizeof(tmpl));
5122 tmpl.start = key.objectid;
5123 tmpl.nr = num_bytes;
5124 tmpl.extent_item_refs = refs;
5125 tmpl.metadata = metadata;
5126 tmpl.found_rec = 1;
5127 tmpl.max_size = num_bytes;
5129 return add_extent_rec(extent_cache, &tmpl);
5132 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5133 refs = btrfs_extent_refs(eb, ei);
5134 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5135 metadata = 1;
5136 else
5137 metadata = 0;
5138 if (metadata && num_bytes != root->fs_info->nodesize) {
5139 error("ignore invalid metadata extent, length %llu does not equal to %u",
5140 num_bytes, root->fs_info->nodesize);
5141 return -EIO;
5143 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
5144 error("ignore invalid data extent, length %llu is not aligned to %u",
5145 num_bytes, root->fs_info->sectorsize);
5146 return -EIO;
5149 memset(&tmpl, 0, sizeof(tmpl));
5150 tmpl.start = key.objectid;
5151 tmpl.nr = num_bytes;
5152 tmpl.extent_item_refs = refs;
5153 tmpl.metadata = metadata;
5154 tmpl.found_rec = 1;
5155 tmpl.max_size = num_bytes;
5156 add_extent_rec(extent_cache, &tmpl);
5158 ptr = (unsigned long)(ei + 1);
5159 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5160 key.type == BTRFS_EXTENT_ITEM_KEY)
5161 ptr += sizeof(struct btrfs_tree_block_info);
5163 end = (unsigned long)ei + item_size;
5164 while (ptr < end) {
5165 iref = (struct btrfs_extent_inline_ref *)ptr;
5166 type = btrfs_extent_inline_ref_type(eb, iref);
5167 offset = btrfs_extent_inline_ref_offset(eb, iref);
5168 switch (type) {
5169 case BTRFS_TREE_BLOCK_REF_KEY:
5170 ret = add_tree_backref(extent_cache, key.objectid,
5171 0, offset, 0);
5172 if (ret < 0)
5173 error(
5174 "add_tree_backref failed (extent items tree block): %s",
5175 strerror(-ret));
5176 break;
5177 case BTRFS_SHARED_BLOCK_REF_KEY:
5178 ret = add_tree_backref(extent_cache, key.objectid,
5179 offset, 0, 0);
5180 if (ret < 0)
5181 error(
5182 "add_tree_backref failed (extent items shared block): %s",
5183 strerror(-ret));
5184 break;
5185 case BTRFS_EXTENT_DATA_REF_KEY:
5186 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5187 add_data_backref(extent_cache, key.objectid, 0,
5188 btrfs_extent_data_ref_root(eb, dref),
5189 btrfs_extent_data_ref_objectid(eb,
5190 dref),
5191 btrfs_extent_data_ref_offset(eb, dref),
5192 btrfs_extent_data_ref_count(eb, dref),
5193 0, num_bytes);
5194 break;
5195 case BTRFS_SHARED_DATA_REF_KEY:
5196 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5197 add_data_backref(extent_cache, key.objectid, offset,
5198 0, 0, 0,
5199 btrfs_shared_data_ref_count(eb, sref),
5200 0, num_bytes);
5201 break;
5202 default:
5203 fprintf(stderr,
5204 "corrupt extent record: key [%llu,%u,%llu]\n",
5205 key.objectid, key.type, num_bytes);
5206 goto out;
5208 ptr += btrfs_extent_inline_ref_size(type);
5210 WARN_ON(ptr > end);
5211 out:
5212 return 0;
5215 static int check_cache_range(struct btrfs_root *root,
5216 struct btrfs_block_group_cache *cache,
5217 u64 offset, u64 bytes)
5219 struct btrfs_free_space *entry;
5220 u64 *logical;
5221 u64 bytenr;
5222 int stripe_len;
5223 int i, nr, ret;
5225 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5226 bytenr = btrfs_sb_offset(i);
5227 ret = btrfs_rmap_block(root->fs_info,
5228 cache->key.objectid, bytenr,
5229 &logical, &nr, &stripe_len);
5230 if (ret)
5231 return ret;
5233 while (nr--) {
5234 if (logical[nr] + stripe_len <= offset)
5235 continue;
5236 if (offset + bytes <= logical[nr])
5237 continue;
5238 if (logical[nr] == offset) {
5239 if (stripe_len >= bytes) {
5240 free(logical);
5241 return 0;
5243 bytes -= stripe_len;
5244 offset += stripe_len;
5245 } else if (logical[nr] < offset) {
5246 if (logical[nr] + stripe_len >=
5247 offset + bytes) {
5248 free(logical);
5249 return 0;
5251 bytes = (offset + bytes) -
5252 (logical[nr] + stripe_len);
5253 offset = logical[nr] + stripe_len;
5254 } else {
5256 * Could be tricky, the super may land in the
5257 * middle of the area we're checking. First
5258 * check the easiest case, it's at the end.
5260 if (logical[nr] + stripe_len >=
5261 bytes + offset) {
5262 bytes = logical[nr] - offset;
5263 continue;
5266 /* Check the left side */
5267 ret = check_cache_range(root, cache,
5268 offset,
5269 logical[nr] - offset);
5270 if (ret) {
5271 free(logical);
5272 return ret;
5275 /* Now we continue with the right side */
5276 bytes = (offset + bytes) -
5277 (logical[nr] + stripe_len);
5278 offset = logical[nr] + stripe_len;
5282 free(logical);
5285 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5286 if (!entry) {
5287 fprintf(stderr, "there is no free space entry for %llu-%llu\n",
5288 offset, offset+bytes);
5289 return -EINVAL;
5292 if (entry->offset != offset) {
5293 fprintf(stderr, "wanted offset %llu, found %llu\n", offset,
5294 entry->offset);
5295 return -EINVAL;
5298 if (entry->bytes != bytes) {
5299 fprintf(stderr, "wanted bytes %llu, found %llu for off %llu\n",
5300 bytes, entry->bytes, offset);
5301 return -EINVAL;
5304 unlink_free_space(cache->free_space_ctl, entry);
5305 free(entry);
5306 return 0;
5309 static int verify_space_cache(struct btrfs_root *root,
5310 struct btrfs_block_group_cache *cache)
5312 struct btrfs_path path;
5313 struct extent_buffer *leaf;
5314 struct btrfs_key key;
5315 u64 last;
5316 int ret = 0;
5318 root = root->fs_info->extent_root;
5320 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5322 btrfs_init_path(&path);
5323 key.objectid = last;
5324 key.offset = 0;
5325 key.type = BTRFS_EXTENT_ITEM_KEY;
5326 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5327 if (ret < 0)
5328 goto out;
5329 ret = 0;
5330 while (1) {
5331 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5332 ret = btrfs_next_leaf(root, &path);
5333 if (ret < 0)
5334 goto out;
5335 if (ret > 0) {
5336 ret = 0;
5337 break;
5340 leaf = path.nodes[0];
5341 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5342 if (key.objectid >= cache->key.offset + cache->key.objectid)
5343 break;
5344 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5345 key.type != BTRFS_METADATA_ITEM_KEY) {
5346 path.slots[0]++;
5347 continue;
5350 if (last == key.objectid) {
5351 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5352 last = key.objectid + key.offset;
5353 else
5354 last = key.objectid + root->fs_info->nodesize;
5355 path.slots[0]++;
5356 continue;
5359 ret = check_cache_range(root, cache, last,
5360 key.objectid - last);
5361 if (ret)
5362 break;
5363 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5364 last = key.objectid + key.offset;
5365 else
5366 last = key.objectid + root->fs_info->nodesize;
5367 path.slots[0]++;
5370 if (last < cache->key.objectid + cache->key.offset)
5371 ret = check_cache_range(root, cache, last,
5372 cache->key.objectid +
5373 cache->key.offset - last);
5375 out:
5376 btrfs_release_path(&path);
5378 if (!ret &&
5379 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5380 fprintf(stderr, "There are still entries left in the space "
5381 "cache\n");
5382 ret = -EINVAL;
5385 return ret;
5388 static int check_space_cache(struct btrfs_root *root)
5390 struct btrfs_block_group_cache *cache;
5391 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5392 int ret;
5393 int error = 0;
5395 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5396 btrfs_super_generation(root->fs_info->super_copy) !=
5397 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5398 printf("cache and super generation don't match, space cache "
5399 "will be invalidated\n");
5400 return 0;
5403 while (1) {
5404 ctx.item_count++;
5405 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5406 if (!cache)
5407 break;
5409 start = cache->key.objectid + cache->key.offset;
5410 if (!cache->free_space_ctl) {
5411 if (btrfs_init_free_space_ctl(cache,
5412 root->fs_info->sectorsize)) {
5413 ret = -ENOMEM;
5414 break;
5416 } else {
5417 btrfs_remove_free_space_cache(cache);
5420 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
5421 ret = exclude_super_stripes(root, cache);
5422 if (ret) {
5423 fprintf(stderr, "could not exclude super stripes: %s\n",
5424 strerror(-ret));
5425 error++;
5426 continue;
5428 ret = load_free_space_tree(root->fs_info, cache);
5429 free_excluded_extents(root, cache);
5430 if (ret < 0) {
5431 fprintf(stderr, "could not load free space tree: %s\n",
5432 strerror(-ret));
5433 error++;
5434 continue;
5436 error += ret;
5437 } else {
5438 ret = load_free_space_cache(root->fs_info, cache);
5439 if (ret < 0)
5440 error++;
5441 if (ret <= 0)
5442 continue;
5445 ret = verify_space_cache(root, cache);
5446 if (ret) {
5447 fprintf(stderr, "cache appears valid but isn't %llu\n",
5448 cache->key.objectid);
5449 error++;
5453 return error ? -EINVAL : 0;
5457 * Check data checksum for [@bytenr, @bytenr + @num_bytes).
5459 * Return <0 for fatal error (fails to read checksum/data or allocate memory).
5460 * Return >0 for csum mismatch for any copy.
5461 * Return 0 if everything is OK.
5463 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5464 u64 num_bytes, unsigned long leaf_offset,
5465 struct extent_buffer *eb)
5467 struct btrfs_fs_info *fs_info = root->fs_info;
5468 u64 offset = 0;
5469 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
5470 char *data;
5471 unsigned long csum_offset;
5472 u32 csum;
5473 u32 csum_expected;
5474 u64 read_len;
5475 u64 data_checked = 0;
5476 u64 tmp;
5477 int ret = 0;
5478 int mirror;
5479 int num_copies;
5480 bool csum_mismatch = false;
5482 if (num_bytes % fs_info->sectorsize)
5483 return -EINVAL;
5485 data = malloc(num_bytes);
5486 if (!data)
5487 return -ENOMEM;
5489 num_copies = btrfs_num_copies(root->fs_info, bytenr, num_bytes);
5490 while (offset < num_bytes) {
5492 * Mirror 0 means 'read from any valid copy', so it's skipped.
5493 * The indexes 1-N represent the n-th copy for levels with
5494 * redundancy.
5496 for (mirror = 1; mirror <= num_copies; mirror++) {
5497 read_len = num_bytes - offset;
5498 /* read as much space once a time */
5499 ret = read_extent_data(fs_info, data + offset,
5500 bytenr + offset, &read_len, mirror);
5501 if (ret)
5502 goto out;
5504 data_checked = 0;
5505 /* verify every 4k data's checksum */
5506 while (data_checked < read_len) {
5507 csum = ~(u32)0;
5508 tmp = offset + data_checked;
5510 csum = btrfs_csum_data((char *)data + tmp,
5511 csum, fs_info->sectorsize);
5512 btrfs_csum_final(csum, (u8 *)&csum);
5514 csum_offset = leaf_offset +
5515 tmp / fs_info->sectorsize * csum_size;
5516 read_extent_buffer(eb, (char *)&csum_expected,
5517 csum_offset, csum_size);
5518 if (csum != csum_expected) {
5519 csum_mismatch = true;
5520 fprintf(stderr,
5521 "mirror %d bytenr %llu csum %u expected csum %u\n",
5522 mirror, bytenr + tmp,
5523 csum, csum_expected);
5525 data_checked += fs_info->sectorsize;
5528 offset += read_len;
5530 out:
5531 free(data);
5532 if (!ret && csum_mismatch)
5533 ret = 1;
5534 return ret;
5537 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5538 u64 num_bytes)
5540 struct btrfs_path path;
5541 struct extent_buffer *leaf;
5542 struct btrfs_key key;
5543 int ret;
5545 btrfs_init_path(&path);
5546 key.objectid = bytenr;
5547 key.type = BTRFS_EXTENT_ITEM_KEY;
5548 key.offset = (u64)-1;
5550 again:
5551 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
5552 0, 0);
5553 if (ret < 0) {
5554 fprintf(stderr, "Error looking up extent record %d\n", ret);
5555 btrfs_release_path(&path);
5556 return ret;
5557 } else if (ret) {
5558 if (path.slots[0] > 0) {
5559 path.slots[0]--;
5560 } else {
5561 ret = btrfs_prev_leaf(root, &path);
5562 if (ret < 0) {
5563 goto out;
5564 } else if (ret > 0) {
5565 ret = 0;
5566 goto out;
5571 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
5574 * Block group items come before extent items if they have the same
5575 * bytenr, so walk back one more just in case. Dear future traveller,
5576 * first congrats on mastering time travel. Now if it's not too much
5577 * trouble could you go back to 2006 and tell Chris to make the
5578 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5579 * EXTENT_ITEM_KEY please?
5581 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5582 if (path.slots[0] > 0) {
5583 path.slots[0]--;
5584 } else {
5585 ret = btrfs_prev_leaf(root, &path);
5586 if (ret < 0) {
5587 goto out;
5588 } else if (ret > 0) {
5589 ret = 0;
5590 goto out;
5593 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
5596 while (num_bytes) {
5597 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5598 ret = btrfs_next_leaf(root, &path);
5599 if (ret < 0) {
5600 fprintf(stderr, "Error going to next leaf "
5601 "%d\n", ret);
5602 btrfs_release_path(&path);
5603 return ret;
5604 } else if (ret) {
5605 break;
5608 leaf = path.nodes[0];
5609 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5610 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5611 path.slots[0]++;
5612 continue;
5614 if (key.objectid + key.offset < bytenr) {
5615 path.slots[0]++;
5616 continue;
5618 if (key.objectid > bytenr + num_bytes)
5619 break;
5621 if (key.objectid == bytenr) {
5622 if (key.offset >= num_bytes) {
5623 num_bytes = 0;
5624 break;
5626 num_bytes -= key.offset;
5627 bytenr += key.offset;
5628 } else if (key.objectid < bytenr) {
5629 if (key.objectid + key.offset >= bytenr + num_bytes) {
5630 num_bytes = 0;
5631 break;
5633 num_bytes = (bytenr + num_bytes) -
5634 (key.objectid + key.offset);
5635 bytenr = key.objectid + key.offset;
5636 } else {
5637 if (key.objectid + key.offset < bytenr + num_bytes) {
5638 u64 new_start = key.objectid + key.offset;
5639 u64 new_bytes = bytenr + num_bytes - new_start;
5642 * Weird case, the extent is in the middle of
5643 * our range, we'll have to search one side
5644 * and then the other. Not sure if this happens
5645 * in real life, but no harm in coding it up
5646 * anyway just in case.
5648 btrfs_release_path(&path);
5649 ret = check_extent_exists(root, new_start,
5650 new_bytes);
5651 if (ret) {
5652 fprintf(stderr, "Right section didn't "
5653 "have a record\n");
5654 break;
5656 num_bytes = key.objectid - bytenr;
5657 goto again;
5659 num_bytes = key.objectid - bytenr;
5661 path.slots[0]++;
5663 ret = 0;
5665 out:
5666 if (num_bytes && !ret) {
5667 fprintf(stderr,
5668 "there are no extents for csum range %llu-%llu\n",
5669 bytenr, bytenr+num_bytes);
5670 ret = 1;
5673 btrfs_release_path(&path);
5674 return ret;
5677 static int check_csums(struct btrfs_root *root)
5679 struct btrfs_path path;
5680 struct extent_buffer *leaf;
5681 struct btrfs_key key;
5682 u64 offset = 0, num_bytes = 0;
5683 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5684 int errors = 0;
5685 int ret;
5686 u64 data_len;
5687 unsigned long leaf_offset;
5688 bool verify_csum = !!check_data_csum;
5690 root = root->fs_info->csum_root;
5691 if (!extent_buffer_uptodate(root->node)) {
5692 fprintf(stderr, "No valid csum tree found\n");
5693 return -ENOENT;
5696 btrfs_init_path(&path);
5697 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5698 key.type = BTRFS_EXTENT_CSUM_KEY;
5699 key.offset = 0;
5700 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5701 if (ret < 0) {
5702 fprintf(stderr, "Error searching csum tree %d\n", ret);
5703 btrfs_release_path(&path);
5704 return ret;
5707 if (ret > 0 && path.slots[0])
5708 path.slots[0]--;
5709 ret = 0;
5712 * For metadata dump (btrfs-image) all data is wiped so verifying data
5713 * csum is meaningless and will always report csum error.
5715 if (check_data_csum && (btrfs_super_flags(root->fs_info->super_copy) &
5716 (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))) {
5717 printf("skip data csum verification for metadata dump\n");
5718 verify_csum = false;
5721 while (1) {
5722 ctx.item_count++;
5723 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5724 ret = btrfs_next_leaf(root, &path);
5725 if (ret < 0) {
5726 fprintf(stderr, "Error going to next leaf "
5727 "%d\n", ret);
5728 break;
5730 if (ret)
5731 break;
5733 leaf = path.nodes[0];
5735 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5736 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5737 path.slots[0]++;
5738 continue;
5741 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
5742 csum_size) * root->fs_info->sectorsize;
5743 if (!verify_csum)
5744 goto skip_csum_check;
5745 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
5746 ret = check_extent_csums(root, key.offset, data_len,
5747 leaf_offset, leaf);
5749 * Only break for fatal errors, if mismatch is found, continue
5750 * checking until all extents are checked.
5752 if (ret < 0)
5753 break;
5754 if (ret > 0)
5755 errors++;
5756 skip_csum_check:
5757 if (!num_bytes) {
5758 offset = key.offset;
5759 } else if (key.offset != offset + num_bytes) {
5760 ret = check_extent_exists(root, offset, num_bytes);
5761 if (ret) {
5762 fprintf(stderr,
5763 "csum exists for %llu-%llu but there is no extent record\n",
5764 offset, offset+num_bytes);
5765 errors++;
5767 offset = key.offset;
5768 num_bytes = 0;
5770 num_bytes += data_len;
5771 path.slots[0]++;
5774 btrfs_release_path(&path);
5775 return errors;
5778 static int is_dropped_key(struct btrfs_key *key,
5779 struct btrfs_key *drop_key)
5781 if (key->objectid < drop_key->objectid)
5782 return 1;
5783 else if (key->objectid == drop_key->objectid) {
5784 if (key->type < drop_key->type)
5785 return 1;
5786 else if (key->type == drop_key->type) {
5787 if (key->offset < drop_key->offset)
5788 return 1;
5791 return 0;
5795 * Here are the rules for FULL_BACKREF.
5797 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5798 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5799 * FULL_BACKREF set.
5800 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
5801 * if it happened after the relocation occurred since we'll have dropped the
5802 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5803 * have no real way to know for sure.
5805 * We process the blocks one root at a time, and we start from the lowest root
5806 * objectid and go to the highest. So we can just lookup the owner backref for
5807 * the record and if we don't find it then we know it doesn't exist and we have
5808 * a FULL BACKREF.
5810 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5811 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5812 * be set or not and then we can check later once we've gathered all the refs.
5814 static int calc_extent_flag(struct cache_tree *extent_cache,
5815 struct extent_buffer *buf,
5816 struct root_item_record *ri,
5817 u64 *flags)
5819 struct extent_record *rec;
5820 struct cache_extent *cache;
5821 struct tree_backref *tback;
5822 u64 owner = 0;
5824 cache = lookup_cache_extent(extent_cache, buf->start, 1);
5825 /* we have added this extent before */
5826 if (!cache)
5827 return -ENOENT;
5829 rec = container_of(cache, struct extent_record, cache);
5832 * Except file/reloc tree, we can not have
5833 * FULL BACKREF MODE
5835 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5836 goto normal;
5838 * root node
5840 if (buf->start == ri->bytenr)
5841 goto normal;
5843 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5844 goto full_backref;
5846 owner = btrfs_header_owner(buf);
5847 if (owner == ri->objectid)
5848 goto normal;
5850 tback = find_tree_backref(rec, 0, owner);
5851 if (!tback)
5852 goto full_backref;
5853 normal:
5854 *flags = 0;
5855 if (rec->flag_block_full_backref != FLAG_UNSET &&
5856 rec->flag_block_full_backref != 0)
5857 rec->bad_full_backref = 1;
5858 return 0;
5859 full_backref:
5860 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5861 if (rec->flag_block_full_backref != FLAG_UNSET &&
5862 rec->flag_block_full_backref != 1)
5863 rec->bad_full_backref = 1;
5864 return 0;
5867 static void report_mismatch_key_root(u8 key_type, u64 rootid)
5869 fprintf(stderr, "Invalid key type(");
5870 print_key_type(stderr, 0, key_type);
5871 fprintf(stderr, ") found in root(");
5872 print_objectid(stderr, rootid, 0);
5873 fprintf(stderr, ")\n");
5877 * Check if the key is valid with its extent buffer.
5879 * This is a early check in case invalid key exists in a extent buffer
5880 * This is not comprehensive yet, but should prevent wrong key/item passed
5881 * further
5883 static int check_type_with_root(u64 rootid, u8 key_type)
5885 switch (key_type) {
5886 /* Only valid in chunk tree */
5887 case BTRFS_DEV_ITEM_KEY:
5888 case BTRFS_CHUNK_ITEM_KEY:
5889 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
5890 goto err;
5891 break;
5892 /* valid in csum and log tree */
5893 case BTRFS_CSUM_TREE_OBJECTID:
5894 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
5895 is_fstree(rootid)))
5896 goto err;
5897 break;
5898 case BTRFS_EXTENT_ITEM_KEY:
5899 case BTRFS_METADATA_ITEM_KEY:
5900 case BTRFS_BLOCK_GROUP_ITEM_KEY:
5901 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
5902 goto err;
5903 break;
5904 case BTRFS_ROOT_ITEM_KEY:
5905 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
5906 goto err;
5907 break;
5908 case BTRFS_DEV_EXTENT_KEY:
5909 if (rootid != BTRFS_DEV_TREE_OBJECTID)
5910 goto err;
5911 break;
5913 return 0;
5914 err:
5915 report_mismatch_key_root(key_type, rootid);
5916 return -EINVAL;
5919 static int run_next_block(struct btrfs_root *root,
5920 struct block_info *bits,
5921 int bits_nr,
5922 u64 *last,
5923 struct cache_tree *pending,
5924 struct cache_tree *seen,
5925 struct cache_tree *reada,
5926 struct cache_tree *nodes,
5927 struct cache_tree *extent_cache,
5928 struct cache_tree *chunk_cache,
5929 struct rb_root *dev_cache,
5930 struct block_group_tree *block_group_cache,
5931 struct device_extent_tree *dev_extent_cache,
5932 struct root_item_record *ri)
5934 struct btrfs_fs_info *fs_info = root->fs_info;
5935 struct extent_buffer *buf;
5936 struct extent_record *rec = NULL;
5937 u64 bytenr;
5938 u32 size;
5939 u64 parent;
5940 u64 owner;
5941 u64 flags;
5942 u64 ptr;
5943 u64 gen = 0;
5944 int ret = 0;
5945 int i;
5946 int nritems;
5947 struct btrfs_key key;
5948 struct cache_extent *cache;
5949 int reada_bits;
5951 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5952 bits_nr, &reada_bits);
5953 if (nritems == 0)
5954 return 1;
5956 if (!reada_bits) {
5957 for (i = 0; i < nritems; i++) {
5958 ret = add_cache_extent(reada, bits[i].start,
5959 bits[i].size);
5960 if (ret == -EEXIST)
5961 continue;
5963 /* fixme, get the parent transid */
5964 readahead_tree_block(fs_info, bits[i].start, 0);
5967 *last = bits[0].start;
5968 bytenr = bits[0].start;
5969 size = bits[0].size;
5971 cache = lookup_cache_extent(pending, bytenr, size);
5972 if (cache) {
5973 remove_cache_extent(pending, cache);
5974 free(cache);
5976 cache = lookup_cache_extent(reada, bytenr, size);
5977 if (cache) {
5978 remove_cache_extent(reada, cache);
5979 free(cache);
5981 cache = lookup_cache_extent(nodes, bytenr, size);
5982 if (cache) {
5983 remove_cache_extent(nodes, cache);
5984 free(cache);
5986 cache = lookup_cache_extent(extent_cache, bytenr, size);
5987 if (cache) {
5988 rec = container_of(cache, struct extent_record, cache);
5989 gen = rec->parent_generation;
5992 /* fixme, get the real parent transid */
5993 buf = read_tree_block(root->fs_info, bytenr, gen);
5994 if (!extent_buffer_uptodate(buf)) {
5995 record_bad_block_io(root->fs_info,
5996 extent_cache, bytenr, size);
5997 goto out;
6000 nritems = btrfs_header_nritems(buf);
6002 flags = 0;
6003 if (!init_extent_tree) {
6004 ret = btrfs_lookup_extent_info(NULL, fs_info, bytenr,
6005 btrfs_header_level(buf), 1, NULL,
6006 &flags);
6007 if (ret < 0) {
6008 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
6009 if (ret < 0) {
6010 fprintf(stderr, "Couldn't calc extent flags\n");
6011 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6014 } else {
6015 flags = 0;
6016 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
6017 if (ret < 0) {
6018 fprintf(stderr, "Couldn't calc extent flags\n");
6019 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6023 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6024 if (ri != NULL &&
6025 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6026 ri->objectid == btrfs_header_owner(buf)) {
6028 * Ok we got to this block from it's original owner and
6029 * we have FULL_BACKREF set. Relocation can leave
6030 * converted blocks over so this is altogether possible,
6031 * however it's not possible if the generation > the
6032 * last snapshot, so check for this case.
6034 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6035 btrfs_header_generation(buf) > ri->last_snapshot) {
6036 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6037 rec->bad_full_backref = 1;
6040 } else {
6041 if (ri != NULL &&
6042 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6043 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6044 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6045 rec->bad_full_backref = 1;
6049 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6050 rec->flag_block_full_backref = 1;
6051 parent = bytenr;
6052 owner = 0;
6053 } else {
6054 rec->flag_block_full_backref = 0;
6055 parent = 0;
6056 owner = btrfs_header_owner(buf);
6059 ret = check_block(root, extent_cache, buf, flags);
6060 if (ret)
6061 goto out;
6063 if (btrfs_is_leaf(buf)) {
6064 btree_space_waste += btrfs_leaf_free_space(buf);
6065 for (i = 0; i < nritems; i++) {
6066 struct btrfs_file_extent_item *fi;
6068 btrfs_item_key_to_cpu(buf, &key, i);
6070 * Check key type against the leaf owner.
6071 * Could filter quite a lot of early error if
6072 * owner is correct
6074 if (check_type_with_root(btrfs_header_owner(buf),
6075 key.type)) {
6076 fprintf(stderr, "ignoring invalid key\n");
6077 continue;
6079 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6080 process_extent_item(root, extent_cache, buf,
6082 continue;
6084 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6085 process_extent_item(root, extent_cache, buf,
6087 continue;
6089 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6090 total_csum_bytes +=
6091 btrfs_item_size_nr(buf, i);
6092 continue;
6094 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6095 process_chunk_item(chunk_cache, &key, buf, i);
6096 continue;
6098 if (key.type == BTRFS_DEV_ITEM_KEY) {
6099 process_device_item(dev_cache, &key, buf, i);
6100 continue;
6102 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6103 process_block_group_item(block_group_cache,
6104 &key, buf, i);
6105 continue;
6107 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6108 process_device_extent_item(dev_extent_cache,
6109 &key, buf, i);
6110 continue;
6113 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6114 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6115 process_extent_ref_v0(extent_cache, buf, i);
6116 #else
6117 BUG();
6118 #endif
6119 continue;
6122 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6123 ret = add_tree_backref(extent_cache,
6124 key.objectid, 0, key.offset, 0);
6125 if (ret < 0)
6126 error(
6127 "add_tree_backref failed (leaf tree block): %s",
6128 strerror(-ret));
6129 continue;
6131 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6132 ret = add_tree_backref(extent_cache,
6133 key.objectid, key.offset, 0, 0);
6134 if (ret < 0)
6135 error(
6136 "add_tree_backref failed (leaf shared block): %s",
6137 strerror(-ret));
6138 continue;
6140 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6141 struct btrfs_extent_data_ref *ref;
6143 ref = btrfs_item_ptr(buf, i,
6144 struct btrfs_extent_data_ref);
6145 add_data_backref(extent_cache,
6146 key.objectid, 0,
6147 btrfs_extent_data_ref_root(buf, ref),
6148 btrfs_extent_data_ref_objectid(buf,
6149 ref),
6150 btrfs_extent_data_ref_offset(buf, ref),
6151 btrfs_extent_data_ref_count(buf, ref),
6152 0, root->fs_info->sectorsize);
6153 continue;
6155 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6156 struct btrfs_shared_data_ref *ref;
6158 ref = btrfs_item_ptr(buf, i,
6159 struct btrfs_shared_data_ref);
6160 add_data_backref(extent_cache,
6161 key.objectid, key.offset, 0, 0, 0,
6162 btrfs_shared_data_ref_count(buf, ref),
6163 0, root->fs_info->sectorsize);
6164 continue;
6166 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6167 struct bad_item *bad;
6169 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6170 continue;
6171 if (!owner)
6172 continue;
6173 bad = malloc(sizeof(struct bad_item));
6174 if (!bad)
6175 continue;
6176 INIT_LIST_HEAD(&bad->list);
6177 memcpy(&bad->key, &key,
6178 sizeof(struct btrfs_key));
6179 bad->root_id = owner;
6180 list_add_tail(&bad->list, &delete_items);
6181 continue;
6183 if (key.type != BTRFS_EXTENT_DATA_KEY)
6184 continue;
6185 fi = btrfs_item_ptr(buf, i,
6186 struct btrfs_file_extent_item);
6187 if (btrfs_file_extent_type(buf, fi) ==
6188 BTRFS_FILE_EXTENT_INLINE)
6189 continue;
6190 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6191 continue;
6193 data_bytes_allocated +=
6194 btrfs_file_extent_disk_num_bytes(buf, fi);
6195 if (data_bytes_allocated < root->fs_info->sectorsize)
6196 abort();
6198 data_bytes_referenced +=
6199 btrfs_file_extent_num_bytes(buf, fi);
6200 add_data_backref(extent_cache,
6201 btrfs_file_extent_disk_bytenr(buf, fi),
6202 parent, owner, key.objectid, key.offset -
6203 btrfs_file_extent_offset(buf, fi), 1, 1,
6204 btrfs_file_extent_disk_num_bytes(buf, fi));
6206 } else {
6207 int level;
6209 level = btrfs_header_level(buf);
6210 for (i = 0; i < nritems; i++) {
6211 struct extent_record tmpl;
6213 ptr = btrfs_node_blockptr(buf, i);
6214 size = root->fs_info->nodesize;
6215 btrfs_node_key_to_cpu(buf, &key, i);
6216 if (ri != NULL) {
6217 if ((level == ri->drop_level)
6218 && is_dropped_key(&key, &ri->drop_key)) {
6219 continue;
6223 memset(&tmpl, 0, sizeof(tmpl));
6224 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6225 tmpl.parent_generation =
6226 btrfs_node_ptr_generation(buf, i);
6227 tmpl.start = ptr;
6228 tmpl.nr = size;
6229 tmpl.refs = 1;
6230 tmpl.metadata = 1;
6231 tmpl.max_size = size;
6232 ret = add_extent_rec(extent_cache, &tmpl);
6233 if (ret < 0)
6234 goto out;
6236 ret = add_tree_backref(extent_cache, ptr, parent,
6237 owner, 1);
6238 if (ret < 0) {
6239 error(
6240 "add_tree_backref failed (non-leaf block): %s",
6241 strerror(-ret));
6242 continue;
6245 if (level > 1)
6246 add_pending(nodes, seen, ptr, size);
6247 else
6248 add_pending(pending, seen, ptr, size);
6250 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(fs_info) -
6251 nritems) * sizeof(struct btrfs_key_ptr);
6253 total_btree_bytes += buf->len;
6254 if (fs_root_objectid(btrfs_header_owner(buf)))
6255 total_fs_tree_bytes += buf->len;
6256 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6257 total_extent_tree_bytes += buf->len;
6258 out:
6259 free_extent_buffer(buf);
6260 return ret;
6263 static int add_root_to_pending(struct extent_buffer *buf,
6264 struct cache_tree *extent_cache,
6265 struct cache_tree *pending,
6266 struct cache_tree *seen,
6267 struct cache_tree *nodes,
6268 u64 objectid)
6270 struct extent_record tmpl;
6271 int ret;
6273 if (btrfs_header_level(buf) > 0)
6274 add_pending(nodes, seen, buf->start, buf->len);
6275 else
6276 add_pending(pending, seen, buf->start, buf->len);
6278 memset(&tmpl, 0, sizeof(tmpl));
6279 tmpl.start = buf->start;
6280 tmpl.nr = buf->len;
6281 tmpl.is_root = 1;
6282 tmpl.refs = 1;
6283 tmpl.metadata = 1;
6284 tmpl.max_size = buf->len;
6285 add_extent_rec(extent_cache, &tmpl);
6287 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6288 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6289 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6290 0, 1);
6291 else
6292 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6294 return ret;
6297 /* as we fix the tree, we might be deleting blocks that
6298 * we're tracking for repair. This hook makes sure we
6299 * remove any backrefs for blocks as we are fixing them.
6301 static int free_extent_hook(struct btrfs_fs_info *fs_info,
6302 u64 bytenr, u64 num_bytes, u64 parent,
6303 u64 root_objectid, u64 owner, u64 offset,
6304 int refs_to_drop)
6306 struct extent_record *rec;
6307 struct cache_extent *cache;
6308 int is_data;
6309 struct cache_tree *extent_cache = fs_info->fsck_extent_cache;
6311 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6312 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6313 if (!cache)
6314 return 0;
6316 rec = container_of(cache, struct extent_record, cache);
6317 if (is_data) {
6318 struct data_backref *back;
6320 back = find_data_backref(rec, parent, root_objectid, owner,
6321 offset, 1, bytenr, num_bytes);
6322 if (!back)
6323 goto out;
6324 if (back->node.found_ref) {
6325 back->found_ref -= refs_to_drop;
6326 if (rec->refs)
6327 rec->refs -= refs_to_drop;
6329 if (back->node.found_extent_tree) {
6330 back->num_refs -= refs_to_drop;
6331 if (rec->extent_item_refs)
6332 rec->extent_item_refs -= refs_to_drop;
6334 if (back->found_ref == 0)
6335 back->node.found_ref = 0;
6336 if (back->num_refs == 0)
6337 back->node.found_extent_tree = 0;
6339 if (!back->node.found_extent_tree && back->node.found_ref) {
6340 rb_erase(&back->node.node, &rec->backref_tree);
6341 free(back);
6343 } else {
6344 struct tree_backref *back;
6346 back = find_tree_backref(rec, parent, root_objectid);
6347 if (!back)
6348 goto out;
6349 if (back->node.found_ref) {
6350 if (rec->refs)
6351 rec->refs--;
6352 back->node.found_ref = 0;
6354 if (back->node.found_extent_tree) {
6355 if (rec->extent_item_refs)
6356 rec->extent_item_refs--;
6357 back->node.found_extent_tree = 0;
6359 if (!back->node.found_extent_tree && back->node.found_ref) {
6360 rb_erase(&back->node.node, &rec->backref_tree);
6361 free(back);
6364 maybe_free_extent_rec(extent_cache, rec);
6365 out:
6366 return 0;
6369 static int delete_extent_records(struct btrfs_trans_handle *trans,
6370 struct btrfs_path *path,
6371 u64 bytenr)
6373 struct btrfs_fs_info *fs_info = trans->fs_info;
6374 struct btrfs_key key;
6375 struct btrfs_key found_key;
6376 struct extent_buffer *leaf;
6377 int ret;
6378 int slot;
6381 key.objectid = bytenr;
6382 key.type = (u8)-1;
6383 key.offset = (u64)-1;
6385 while (1) {
6386 ret = btrfs_search_slot(trans, fs_info->extent_root, &key,
6387 path, 0, 1);
6388 if (ret < 0)
6389 break;
6391 if (ret > 0) {
6392 ret = 0;
6393 if (path->slots[0] == 0)
6394 break;
6395 path->slots[0]--;
6397 ret = 0;
6399 leaf = path->nodes[0];
6400 slot = path->slots[0];
6402 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6403 if (found_key.objectid != bytenr)
6404 break;
6406 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6407 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6408 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6409 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6410 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6411 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6412 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6413 btrfs_release_path(path);
6414 if (found_key.type == 0) {
6415 if (found_key.offset == 0)
6416 break;
6417 key.offset = found_key.offset - 1;
6418 key.type = found_key.type;
6420 key.type = found_key.type - 1;
6421 key.offset = (u64)-1;
6422 continue;
6425 fprintf(stderr,
6426 "repair deleting extent record: key [%llu,%u,%llu]\n",
6427 found_key.objectid, found_key.type, found_key.offset);
6429 ret = btrfs_del_item(trans, fs_info->extent_root, path);
6430 if (ret)
6431 break;
6432 btrfs_release_path(path);
6434 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6435 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6436 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6437 found_key.offset : fs_info->nodesize;
6439 ret = btrfs_update_block_group(fs_info->extent_root,
6440 bytenr, bytes, 0, 0);
6441 if (ret)
6442 break;
6446 btrfs_release_path(path);
6447 return ret;
6451 * for a single backref, this will allocate a new extent
6452 * and add the backref to it.
6454 static int record_extent(struct btrfs_trans_handle *trans,
6455 struct btrfs_fs_info *info,
6456 struct btrfs_path *path,
6457 struct extent_record *rec,
6458 struct extent_backref *back,
6459 int allocated, u64 flags)
6461 int ret = 0;
6462 struct btrfs_root *extent_root = info->extent_root;
6463 struct extent_buffer *leaf;
6464 struct btrfs_key ins_key;
6465 struct btrfs_extent_item *ei;
6466 struct data_backref *dback;
6467 struct btrfs_tree_block_info *bi;
6469 if (!back->is_data)
6470 rec->max_size = max_t(u64, rec->max_size,
6471 info->nodesize);
6473 if (!allocated) {
6474 u32 item_size = sizeof(*ei);
6476 if (!back->is_data)
6477 item_size += sizeof(*bi);
6479 ins_key.objectid = rec->start;
6480 ins_key.offset = rec->max_size;
6481 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6483 ret = btrfs_insert_empty_item(trans, extent_root, path,
6484 &ins_key, item_size);
6485 if (ret)
6486 goto fail;
6488 leaf = path->nodes[0];
6489 ei = btrfs_item_ptr(leaf, path->slots[0],
6490 struct btrfs_extent_item);
6492 btrfs_set_extent_refs(leaf, ei, 0);
6493 btrfs_set_extent_generation(leaf, ei, rec->generation);
6495 if (back->is_data) {
6496 btrfs_set_extent_flags(leaf, ei,
6497 BTRFS_EXTENT_FLAG_DATA);
6498 } else {
6499 struct btrfs_disk_key copy_key;
6501 bi = (struct btrfs_tree_block_info *)(ei + 1);
6502 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6503 sizeof(*bi));
6505 btrfs_set_disk_key_objectid(&copy_key,
6506 rec->info_objectid);
6507 btrfs_set_disk_key_type(&copy_key, 0);
6508 btrfs_set_disk_key_offset(&copy_key, 0);
6510 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6511 btrfs_set_tree_block_key(leaf, bi, &copy_key);
6513 btrfs_set_extent_flags(leaf, ei,
6514 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
6517 btrfs_mark_buffer_dirty(leaf);
6518 ret = btrfs_update_block_group(extent_root, rec->start,
6519 rec->max_size, 1, 0);
6520 if (ret)
6521 goto fail;
6522 btrfs_release_path(path);
6525 if (back->is_data) {
6526 u64 parent;
6527 int i;
6529 dback = to_data_backref(back);
6530 if (back->full_backref)
6531 parent = dback->parent;
6532 else
6533 parent = 0;
6535 for (i = 0; i < dback->found_ref; i++) {
6536 /* if parent != 0, we're doing a full backref
6537 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6538 * just makes the backref allocator create a data
6539 * backref
6541 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6542 rec->start, rec->max_size,
6543 parent,
6544 dback->root,
6545 parent ?
6546 BTRFS_FIRST_FREE_OBJECTID :
6547 dback->owner,
6548 dback->offset);
6549 if (ret)
6550 break;
6552 fprintf(stderr,
6553 "adding new data backref on %llu %s %llu owner %llu offset %llu found %d\n",
6554 (unsigned long long)rec->start,
6555 back->full_backref ? "parent" : "root",
6556 back->full_backref ? (unsigned long long)parent :
6557 (unsigned long long)dback->root,
6558 (unsigned long long)dback->owner,
6559 (unsigned long long)dback->offset, dback->found_ref);
6560 } else {
6561 u64 parent;
6562 struct tree_backref *tback;
6564 tback = to_tree_backref(back);
6565 if (back->full_backref)
6566 parent = tback->parent;
6567 else
6568 parent = 0;
6570 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6571 rec->start, rec->max_size,
6572 parent, tback->root, 0, 0);
6573 fprintf(stderr,
6574 "adding new tree backref on start %llu len %llu parent %llu root %llu\n",
6575 rec->start, rec->max_size, parent, tback->root);
6577 fail:
6578 btrfs_release_path(path);
6579 return ret;
6582 static struct extent_entry *find_entry(struct list_head *entries,
6583 u64 bytenr, u64 bytes)
6585 struct extent_entry *entry = NULL;
6587 list_for_each_entry(entry, entries, list) {
6588 if (entry->bytenr == bytenr && entry->bytes == bytes)
6589 return entry;
6592 return NULL;
6595 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6597 struct extent_entry *entry, *best = NULL, *prev = NULL;
6599 list_for_each_entry(entry, entries, list) {
6601 * If there are as many broken entries as entries then we know
6602 * not to trust this particular entry.
6604 if (entry->broken == entry->count)
6605 continue;
6608 * Special case, when there are only two entries and 'best' is
6609 * the first one
6611 if (!prev) {
6612 best = entry;
6613 prev = entry;
6614 continue;
6618 * If our current entry == best then we can't be sure our best
6619 * is really the best, so we need to keep searching.
6621 if (best && best->count == entry->count) {
6622 prev = entry;
6623 best = NULL;
6624 continue;
6627 /* Prev == entry, not good enough, have to keep searching */
6628 if (!prev->broken && prev->count == entry->count)
6629 continue;
6631 if (!best)
6632 best = (prev->count > entry->count) ? prev : entry;
6633 else if (best->count < entry->count)
6634 best = entry;
6635 prev = entry;
6638 return best;
6641 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6642 struct data_backref *dback, struct extent_entry *entry)
6644 struct btrfs_trans_handle *trans;
6645 struct btrfs_root *root;
6646 struct btrfs_file_extent_item *fi;
6647 struct extent_buffer *leaf;
6648 struct btrfs_key key;
6649 u64 bytenr, bytes;
6650 int ret, err;
6652 key.objectid = dback->root;
6653 key.type = BTRFS_ROOT_ITEM_KEY;
6654 key.offset = (u64)-1;
6655 root = btrfs_read_fs_root(info, &key);
6656 if (IS_ERR(root)) {
6657 fprintf(stderr, "Couldn't find root for our ref\n");
6658 return -EINVAL;
6662 * The backref points to the original offset of the extent if it was
6663 * split, so we need to search down to the offset we have and then walk
6664 * forward until we find the backref we're looking for.
6666 key.objectid = dback->owner;
6667 key.type = BTRFS_EXTENT_DATA_KEY;
6668 key.offset = dback->offset;
6669 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6670 if (ret < 0) {
6671 fprintf(stderr, "Error looking up ref %d\n", ret);
6672 return ret;
6675 while (1) {
6676 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6677 ret = btrfs_next_leaf(root, path);
6678 if (ret) {
6679 fprintf(stderr, "Couldn't find our ref, next\n");
6680 return -EINVAL;
6683 leaf = path->nodes[0];
6684 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6685 if (key.objectid != dback->owner ||
6686 key.type != BTRFS_EXTENT_DATA_KEY) {
6687 fprintf(stderr, "Couldn't find our ref, search\n");
6688 return -EINVAL;
6690 fi = btrfs_item_ptr(leaf, path->slots[0],
6691 struct btrfs_file_extent_item);
6692 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6693 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6695 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6696 break;
6697 path->slots[0]++;
6700 btrfs_release_path(path);
6702 trans = btrfs_start_transaction(root, 1);
6703 if (IS_ERR(trans))
6704 return PTR_ERR(trans);
6707 * Ok we have the key of the file extent we want to fix, now we can cow
6708 * down to the thing and fix it.
6710 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6711 if (ret < 0) {
6712 fprintf(stderr, "error cowing down to ref [%llu,%u,%llu]: %d\n",
6713 key.objectid, key.type, key.offset, ret);
6714 goto out;
6716 if (ret > 0) {
6717 fprintf(stderr,
6718 "well that's odd, we just found this key [%llu,%u,%llu]\n",
6719 key.objectid, key.type, key.offset);
6720 ret = -EINVAL;
6721 goto out;
6723 leaf = path->nodes[0];
6724 fi = btrfs_item_ptr(leaf, path->slots[0],
6725 struct btrfs_file_extent_item);
6727 if (btrfs_file_extent_compression(leaf, fi) &&
6728 dback->disk_bytenr != entry->bytenr) {
6729 fprintf(stderr,
6730 "ref doesn't match the record start and is compressed, please take a btrfs-image of this file system and send it to a btrfs developer so they can complete this functionality for bytenr %llu\n",
6731 dback->disk_bytenr);
6732 ret = -EINVAL;
6733 goto out;
6736 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6737 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6738 } else if (dback->disk_bytenr > entry->bytenr) {
6739 u64 off_diff, offset;
6741 off_diff = dback->disk_bytenr - entry->bytenr;
6742 offset = btrfs_file_extent_offset(leaf, fi);
6743 if (dback->disk_bytenr + offset +
6744 btrfs_file_extent_num_bytes(leaf, fi) >
6745 entry->bytenr + entry->bytes) {
6746 fprintf(stderr,
6747 "ref is past the entry end, please take a btrfs-image of this file system and send it to a btrfs developer, ref %llu\n",
6748 dback->disk_bytenr);
6749 ret = -EINVAL;
6750 goto out;
6752 offset += off_diff;
6753 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6754 btrfs_set_file_extent_offset(leaf, fi, offset);
6755 } else if (dback->disk_bytenr < entry->bytenr) {
6756 u64 offset;
6758 offset = btrfs_file_extent_offset(leaf, fi);
6759 if (dback->disk_bytenr + offset < entry->bytenr) {
6760 fprintf(stderr,
6761 "ref is before the entry start, please take a btrfs-image of this file system and send it to a btrfs developer, ref %llu\n",
6762 dback->disk_bytenr);
6763 ret = -EINVAL;
6764 goto out;
6767 offset += dback->disk_bytenr;
6768 offset -= entry->bytenr;
6769 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6770 btrfs_set_file_extent_offset(leaf, fi, offset);
6773 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6776 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6777 * only do this if we aren't using compression, otherwise it's a
6778 * trickier case.
6780 if (!btrfs_file_extent_compression(leaf, fi))
6781 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6782 else
6783 printf("ram bytes may be wrong?\n");
6784 btrfs_mark_buffer_dirty(leaf);
6785 out:
6786 err = btrfs_commit_transaction(trans, root);
6787 btrfs_release_path(path);
6788 return ret ? ret : err;
6791 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6792 struct extent_record *rec)
6794 struct extent_backref *back, *tmp;
6795 struct data_backref *dback;
6796 struct extent_entry *entry, *best = NULL;
6797 LIST_HEAD(entries);
6798 int nr_entries = 0;
6799 int broken_entries = 0;
6800 int ret = 0;
6801 short mismatch = 0;
6804 * Metadata is easy and the backrefs should always agree on bytenr and
6805 * size, if not we've got bigger issues.
6807 if (rec->metadata)
6808 return 0;
6810 rbtree_postorder_for_each_entry_safe(back, tmp,
6811 &rec->backref_tree, node) {
6812 if (back->full_backref || !back->is_data)
6813 continue;
6815 dback = to_data_backref(back);
6818 * We only pay attention to backrefs that we found a real
6819 * backref for.
6821 if (dback->found_ref == 0)
6822 continue;
6825 * For now we only catch when the bytes don't match, not the
6826 * bytenr. We can easily do this at the same time, but I want
6827 * to have a fs image to test on before we just add repair
6828 * functionality willy-nilly so we know we won't screw up the
6829 * repair.
6832 entry = find_entry(&entries, dback->disk_bytenr,
6833 dback->bytes);
6834 if (!entry) {
6835 entry = malloc(sizeof(struct extent_entry));
6836 if (!entry) {
6837 ret = -ENOMEM;
6838 goto out;
6840 memset(entry, 0, sizeof(*entry));
6841 entry->bytenr = dback->disk_bytenr;
6842 entry->bytes = dback->bytes;
6843 list_add_tail(&entry->list, &entries);
6844 nr_entries++;
6848 * If we only have on entry we may think the entries agree when
6849 * in reality they don't so we have to do some extra checking.
6851 if (dback->disk_bytenr != rec->start ||
6852 dback->bytes != rec->nr || back->broken)
6853 mismatch = 1;
6855 if (back->broken) {
6856 entry->broken++;
6857 broken_entries++;
6860 entry->count++;
6863 /* Yay all the backrefs agree, carry on good sir */
6864 if (nr_entries <= 1 && !mismatch)
6865 goto out;
6867 fprintf(stderr,
6868 "attempting to repair backref discrepency for bytenr %llu\n",
6869 rec->start);
6872 * First we want to see if the backrefs can agree amongst themselves who
6873 * is right, so figure out which one of the entries has the highest
6874 * count.
6876 best = find_most_right_entry(&entries);
6879 * Ok so we may have an even split between what the backrefs think, so
6880 * this is where we use the extent ref to see what it thinks.
6882 if (!best) {
6883 entry = find_entry(&entries, rec->start, rec->nr);
6884 if (!entry && (!broken_entries || !rec->found_rec)) {
6885 fprintf(stderr,
6886 "backrefs don't agree with each other and extent record doesn't agree with anybody, so we can't fix bytenr %llu bytes %llu\n",
6887 rec->start, rec->nr);
6888 ret = -EINVAL;
6889 goto out;
6890 } else if (!entry) {
6892 * Ok our backrefs were broken, we'll assume this is the
6893 * correct value and add an entry for this range.
6895 entry = malloc(sizeof(struct extent_entry));
6896 if (!entry) {
6897 ret = -ENOMEM;
6898 goto out;
6900 memset(entry, 0, sizeof(*entry));
6901 entry->bytenr = rec->start;
6902 entry->bytes = rec->nr;
6903 list_add_tail(&entry->list, &entries);
6904 nr_entries++;
6906 entry->count++;
6907 best = find_most_right_entry(&entries);
6908 if (!best) {
6909 fprintf(stderr,
6910 "backrefs and extent record evenly split on who is right, this is going to require user input to fix bytenr %llu bytes %llu\n",
6911 rec->start, rec->nr);
6912 ret = -EINVAL;
6913 goto out;
6918 * I don't think this can happen currently as we'll abort() if we catch
6919 * this case higher up, but in case somebody removes that we still can't
6920 * deal with it properly here yet, so just bail out of that's the case.
6922 if (best->bytenr != rec->start) {
6923 fprintf(stderr,
6924 "extent start and backref starts don't match, please use btrfs-image on this file system and send it to a btrfs developer so they can make fsck fix this particular case. bytenr is %llu, bytes is %llu\n",
6925 rec->start, rec->nr);
6926 ret = -EINVAL;
6927 goto out;
6931 * Ok great we all agreed on an extent record, let's go find the real
6932 * references and fix up the ones that don't match.
6934 rbtree_postorder_for_each_entry_safe(back, tmp,
6935 &rec->backref_tree, node) {
6936 if (back->full_backref || !back->is_data)
6937 continue;
6939 dback = to_data_backref(back);
6942 * Still ignoring backrefs that don't have a real ref attached
6943 * to them.
6945 if (dback->found_ref == 0)
6946 continue;
6948 if (dback->bytes == best->bytes &&
6949 dback->disk_bytenr == best->bytenr)
6950 continue;
6952 ret = repair_ref(info, path, dback, best);
6953 if (ret)
6954 goto out;
6958 * Ok we messed with the actual refs, which means we need to drop our
6959 * entire cache and go back and rescan. I know this is a huge pain and
6960 * adds a lot of extra work, but it's the only way to be safe. Once all
6961 * the backrefs agree we may not need to do anything to the extent
6962 * record itself.
6964 ret = -EAGAIN;
6965 out:
6966 while (!list_empty(&entries)) {
6967 entry = list_entry(entries.next, struct extent_entry, list);
6968 list_del_init(&entry->list);
6969 free(entry);
6971 return ret;
6974 static int process_duplicates(struct cache_tree *extent_cache,
6975 struct extent_record *rec)
6977 struct extent_record *good, *tmp;
6978 struct cache_extent *cache;
6979 int ret;
6982 * If we found a extent record for this extent then return, or if we
6983 * have more than one duplicate we are likely going to need to delete
6984 * something.
6986 if (rec->found_rec || rec->num_duplicates > 1)
6987 return 0;
6989 /* Shouldn't happen but just in case */
6990 BUG_ON(!rec->num_duplicates);
6993 * So this happens if we end up with a backref that doesn't match the
6994 * actual extent entry. So either the backref is bad or the extent
6995 * entry is bad. Either way we want to have the extent_record actually
6996 * reflect what we found in the extent_tree, so we need to take the
6997 * duplicate out and use that as the extent_record since the only way we
6998 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7000 remove_cache_extent(extent_cache, &rec->cache);
7002 good = to_extent_record(rec->dups.next);
7003 list_del_init(&good->list);
7004 INIT_LIST_HEAD(&good->backrefs);
7005 INIT_LIST_HEAD(&good->dups);
7006 good->cache.start = good->start;
7007 good->cache.size = good->nr;
7008 good->content_checked = 0;
7009 good->owner_ref_checked = 0;
7010 good->num_duplicates = 0;
7011 good->refs = rec->refs;
7012 list_splice_init(&rec->backrefs, &good->backrefs);
7013 while (1) {
7014 cache = lookup_cache_extent(extent_cache, good->start,
7015 good->nr);
7016 if (!cache)
7017 break;
7018 tmp = container_of(cache, struct extent_record, cache);
7021 * If we find another overlapping extent and it's found_rec is
7022 * set then it's a duplicate and we need to try and delete
7023 * something.
7025 if (tmp->found_rec || tmp->num_duplicates > 0) {
7026 if (list_empty(&good->list))
7027 list_add_tail(&good->list,
7028 &duplicate_extents);
7029 good->num_duplicates += tmp->num_duplicates + 1;
7030 list_splice_init(&tmp->dups, &good->dups);
7031 list_del_init(&tmp->list);
7032 list_add_tail(&tmp->list, &good->dups);
7033 remove_cache_extent(extent_cache, &tmp->cache);
7034 continue;
7038 * Ok we have another non extent item backed extent rec, so lets
7039 * just add it to this extent and carry on like we did above.
7041 good->refs += tmp->refs;
7042 list_splice_init(&tmp->backrefs, &good->backrefs);
7043 remove_cache_extent(extent_cache, &tmp->cache);
7044 free(tmp);
7046 ret = insert_cache_extent(extent_cache, &good->cache);
7047 BUG_ON(ret);
7048 free(rec);
7049 return good->num_duplicates ? 0 : 1;
7052 static int delete_duplicate_records(struct btrfs_root *root,
7053 struct extent_record *rec)
7055 struct btrfs_trans_handle *trans;
7056 LIST_HEAD(delete_list);
7057 struct btrfs_path path;
7058 struct extent_record *tmp, *good, *n;
7059 int nr_del = 0;
7060 int ret = 0, err;
7061 struct btrfs_key key;
7063 btrfs_init_path(&path);
7065 good = rec;
7066 /* Find the record that covers all of the duplicates. */
7067 list_for_each_entry(tmp, &rec->dups, list) {
7068 if (good->start < tmp->start)
7069 continue;
7070 if (good->nr > tmp->nr)
7071 continue;
7073 if (tmp->start + tmp->nr < good->start + good->nr) {
7074 fprintf(stderr,
7075 "Ok we have overlapping extents that aren't completely covered by each other, this is going to require more careful thought. The extents are [%llu-%llu] and [%llu-%llu]\n",
7076 tmp->start, tmp->nr, good->start, good->nr);
7077 abort();
7079 good = tmp;
7082 if (good != rec)
7083 list_add_tail(&rec->list, &delete_list);
7085 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7086 if (tmp == good)
7087 continue;
7088 list_move_tail(&tmp->list, &delete_list);
7091 root = root->fs_info->extent_root;
7092 trans = btrfs_start_transaction(root, 1);
7093 if (IS_ERR(trans)) {
7094 ret = PTR_ERR(trans);
7095 goto out;
7098 list_for_each_entry(tmp, &delete_list, list) {
7099 if (tmp->found_rec == 0)
7100 continue;
7101 key.objectid = tmp->start;
7102 key.type = BTRFS_EXTENT_ITEM_KEY;
7103 key.offset = tmp->nr;
7105 /* Shouldn't happen but just in case */
7106 if (tmp->metadata) {
7107 fprintf(stderr,
7108 "well this shouldn't happen, extent record overlaps but is metadata? [%llu, %llu]\n",
7109 tmp->start, tmp->nr);
7110 abort();
7113 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
7114 if (ret) {
7115 if (ret > 0)
7116 ret = -EINVAL;
7117 break;
7119 ret = btrfs_del_item(trans, root, &path);
7120 if (ret)
7121 break;
7122 btrfs_release_path(&path);
7123 nr_del++;
7125 err = btrfs_commit_transaction(trans, root);
7126 if (err && !ret)
7127 ret = err;
7128 out:
7129 while (!list_empty(&delete_list)) {
7130 tmp = to_extent_record(delete_list.next);
7131 list_del_init(&tmp->list);
7132 if (tmp == rec)
7133 continue;
7134 free(tmp);
7137 while (!list_empty(&rec->dups)) {
7138 tmp = to_extent_record(rec->dups.next);
7139 list_del_init(&tmp->list);
7140 free(tmp);
7143 btrfs_release_path(&path);
7145 if (!ret && !nr_del)
7146 rec->num_duplicates = 0;
7148 return ret ? ret : nr_del;
7151 static int find_possible_backrefs(struct btrfs_fs_info *info,
7152 struct btrfs_path *path,
7153 struct cache_tree *extent_cache,
7154 struct extent_record *rec)
7156 struct btrfs_root *root;
7157 struct extent_backref *back, *tmp;
7158 struct data_backref *dback;
7159 struct cache_extent *cache;
7160 struct btrfs_file_extent_item *fi;
7161 struct btrfs_key key;
7162 u64 bytenr, bytes;
7163 int ret;
7165 rbtree_postorder_for_each_entry_safe(back, tmp,
7166 &rec->backref_tree, node) {
7167 /* Don't care about full backrefs (poor unloved backrefs) */
7168 if (back->full_backref || !back->is_data)
7169 continue;
7171 dback = to_data_backref(back);
7173 /* We found this one, we don't need to do a lookup */
7174 if (dback->found_ref)
7175 continue;
7177 key.objectid = dback->root;
7178 key.type = BTRFS_ROOT_ITEM_KEY;
7179 key.offset = (u64)-1;
7181 root = btrfs_read_fs_root(info, &key);
7183 /* No root, definitely a bad ref, skip */
7184 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7185 continue;
7186 /* Other err, exit */
7187 if (IS_ERR(root))
7188 return PTR_ERR(root);
7190 key.objectid = dback->owner;
7191 key.type = BTRFS_EXTENT_DATA_KEY;
7192 key.offset = dback->offset;
7193 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7194 if (ret) {
7195 btrfs_release_path(path);
7196 if (ret < 0)
7197 return ret;
7198 /* Didn't find it, we can carry on */
7199 ret = 0;
7200 continue;
7203 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7204 struct btrfs_file_extent_item);
7205 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7206 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7207 btrfs_release_path(path);
7208 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7209 if (cache) {
7210 struct extent_record *tmp;
7212 tmp = container_of(cache, struct extent_record, cache);
7215 * If we found an extent record for the bytenr for this
7216 * particular backref then we can't add it to our
7217 * current extent record. We only want to add backrefs
7218 * that don't have a corresponding extent item in the
7219 * extent tree since they likely belong to this record
7220 * and we need to fix it if it doesn't match bytenrs.
7222 if (tmp->found_rec)
7223 continue;
7226 dback->found_ref += 1;
7227 dback->disk_bytenr = bytenr;
7228 dback->bytes = bytes;
7231 * Set this so the verify backref code knows not to trust the
7232 * values in this backref.
7234 back->broken = 1;
7237 return 0;
7241 * Record orphan data ref into corresponding root.
7243 * Return 0 if the extent item contains data ref and recorded.
7244 * Return 1 if the extent item contains no useful data ref
7245 * On that case, it may contains only shared_dataref or metadata backref
7246 * or the file extent exists(this should be handled by the extent bytenr
7247 * recovery routine)
7248 * Return <0 if something goes wrong.
7250 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7251 struct extent_record *rec)
7253 struct btrfs_key key;
7254 struct btrfs_root *dest_root;
7255 struct extent_backref *back, *tmp;
7256 struct data_backref *dback;
7257 struct orphan_data_extent *orphan;
7258 struct btrfs_path path;
7259 int recorded_data_ref = 0;
7260 int ret = 0;
7262 if (rec->metadata)
7263 return 1;
7264 btrfs_init_path(&path);
7265 rbtree_postorder_for_each_entry_safe(back, tmp,
7266 &rec->backref_tree, node) {
7267 if (back->full_backref || !back->is_data ||
7268 !back->found_extent_tree)
7269 continue;
7270 dback = to_data_backref(back);
7271 if (dback->found_ref)
7272 continue;
7273 key.objectid = dback->root;
7274 key.type = BTRFS_ROOT_ITEM_KEY;
7275 key.offset = (u64)-1;
7277 dest_root = btrfs_read_fs_root(fs_info, &key);
7279 /* For non-exist root we just skip it */
7280 if (IS_ERR(dest_root) || !dest_root)
7281 continue;
7283 key.objectid = dback->owner;
7284 key.type = BTRFS_EXTENT_DATA_KEY;
7285 key.offset = dback->offset;
7287 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
7288 btrfs_release_path(&path);
7290 * For ret < 0, it's OK since the fs-tree may be corrupted,
7291 * we need to record it for inode/file extent rebuild.
7292 * For ret > 0, we record it only for file extent rebuild.
7293 * For ret == 0, the file extent exists but only bytenr
7294 * mismatch, let the original bytenr fix routine to handle,
7295 * don't record it.
7297 if (ret == 0)
7298 continue;
7299 ret = 0;
7300 orphan = malloc(sizeof(*orphan));
7301 if (!orphan) {
7302 ret = -ENOMEM;
7303 goto out;
7305 INIT_LIST_HEAD(&orphan->list);
7306 orphan->root = dback->root;
7307 orphan->objectid = dback->owner;
7308 orphan->offset = dback->offset;
7309 orphan->disk_bytenr = rec->cache.start;
7310 orphan->disk_len = rec->cache.size;
7311 list_add(&dest_root->orphan_data_extents, &orphan->list);
7312 recorded_data_ref = 1;
7314 out:
7315 btrfs_release_path(&path);
7316 if (!ret)
7317 return !recorded_data_ref;
7318 else
7319 return ret;
7323 * when an incorrect extent item is found, this will delete
7324 * all of the existing entries for it and recreate them
7325 * based on what the tree scan found.
7327 static int fixup_extent_refs(struct btrfs_fs_info *info,
7328 struct cache_tree *extent_cache,
7329 struct extent_record *rec)
7331 struct btrfs_trans_handle *trans = NULL;
7332 int ret;
7333 struct btrfs_path path;
7334 struct cache_extent *cache;
7335 struct extent_backref *back, *tmp;
7336 int allocated = 0;
7337 u64 flags = 0;
7339 if (rec->flag_block_full_backref)
7340 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7342 btrfs_init_path(&path);
7343 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7345 * Sometimes the backrefs themselves are so broken they don't
7346 * get attached to any meaningful rec, so first go back and
7347 * check any of our backrefs that we couldn't find and throw
7348 * them into the list if we find the backref so that
7349 * verify_backrefs can figure out what to do.
7351 ret = find_possible_backrefs(info, &path, extent_cache, rec);
7352 if (ret < 0)
7353 goto out;
7356 /* step one, make sure all of the backrefs agree */
7357 ret = verify_backrefs(info, &path, rec);
7358 if (ret < 0)
7359 goto out;
7361 trans = btrfs_start_transaction(info->extent_root, 1);
7362 if (IS_ERR(trans)) {
7363 ret = PTR_ERR(trans);
7364 goto out;
7367 /* step two, delete all the existing records */
7368 ret = delete_extent_records(trans, &path, rec->start);
7370 if (ret < 0)
7371 goto out;
7373 /* was this block corrupt? If so, don't add references to it */
7374 cache = lookup_cache_extent(info->corrupt_blocks,
7375 rec->start, rec->max_size);
7376 if (cache) {
7377 ret = 0;
7378 goto out;
7381 /* step three, recreate all the refs we did find */
7382 rbtree_postorder_for_each_entry_safe(back, tmp,
7383 &rec->backref_tree, node) {
7385 * if we didn't find any references, don't create a
7386 * new extent record
7388 if (!back->found_ref)
7389 continue;
7391 rec->bad_full_backref = 0;
7392 ret = record_extent(trans, info, &path, rec, back, allocated,
7393 flags);
7394 allocated = 1;
7396 if (ret)
7397 goto out;
7399 out:
7400 if (trans) {
7401 int err = btrfs_commit_transaction(trans, info->extent_root);
7403 if (!ret)
7404 ret = err;
7407 if (!ret)
7408 fprintf(stderr, "Repaired extent references for %llu\n",
7409 (unsigned long long)rec->start);
7411 btrfs_release_path(&path);
7412 return ret;
7415 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7416 struct extent_record *rec)
7418 struct btrfs_trans_handle *trans;
7419 struct btrfs_root *root = fs_info->extent_root;
7420 struct btrfs_path path;
7421 struct btrfs_extent_item *ei;
7422 struct btrfs_key key;
7423 u64 flags;
7424 int ret = 0;
7426 key.objectid = rec->start;
7427 if (rec->metadata) {
7428 key.type = BTRFS_METADATA_ITEM_KEY;
7429 key.offset = rec->info_level;
7430 } else {
7431 key.type = BTRFS_EXTENT_ITEM_KEY;
7432 key.offset = rec->max_size;
7435 trans = btrfs_start_transaction(root, 0);
7436 if (IS_ERR(trans))
7437 return PTR_ERR(trans);
7439 btrfs_init_path(&path);
7440 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
7441 if (ret < 0) {
7442 btrfs_release_path(&path);
7443 btrfs_commit_transaction(trans, root);
7444 return ret;
7445 } else if (ret) {
7446 fprintf(stderr, "Didn't find extent for %llu\n",
7447 (unsigned long long)rec->start);
7448 btrfs_release_path(&path);
7449 btrfs_commit_transaction(trans, root);
7450 return -ENOENT;
7453 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
7454 struct btrfs_extent_item);
7455 flags = btrfs_extent_flags(path.nodes[0], ei);
7456 if (rec->flag_block_full_backref) {
7457 fprintf(stderr, "setting full backref on %llu\n",
7458 (unsigned long long)key.objectid);
7459 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7460 } else {
7461 fprintf(stderr, "clearing full backref on %llu\n",
7462 (unsigned long long)key.objectid);
7463 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7465 btrfs_set_extent_flags(path.nodes[0], ei, flags);
7466 btrfs_mark_buffer_dirty(path.nodes[0]);
7467 btrfs_release_path(&path);
7468 ret = btrfs_commit_transaction(trans, root);
7469 if (!ret)
7470 fprintf(stderr, "Repaired extent flags for %llu\n",
7471 (unsigned long long)rec->start);
7473 return ret;
7476 /* right now we only prune from the extent allocation tree */
7477 static int prune_one_block(struct btrfs_trans_handle *trans,
7478 struct btrfs_fs_info *info,
7479 struct btrfs_corrupt_block *corrupt)
7481 int ret;
7482 struct btrfs_path path;
7483 struct extent_buffer *eb;
7484 u64 found;
7485 int slot;
7486 int nritems;
7487 int level = corrupt->level + 1;
7489 btrfs_init_path(&path);
7490 again:
7491 /* we want to stop at the parent to our busted block */
7492 path.lowest_level = level;
7494 ret = btrfs_search_slot(trans, info->extent_root,
7495 &corrupt->key, &path, -1, 1);
7497 if (ret < 0)
7498 goto out;
7500 eb = path.nodes[level];
7501 if (!eb) {
7502 ret = -ENOENT;
7503 goto out;
7507 * hopefully the search gave us the block we want to prune,
7508 * lets try that first
7510 slot = path.slots[level];
7511 found = btrfs_node_blockptr(eb, slot);
7512 if (found == corrupt->cache.start)
7513 goto del_ptr;
7515 nritems = btrfs_header_nritems(eb);
7517 /* the search failed, lets scan this node and hope we find it */
7518 for (slot = 0; slot < nritems; slot++) {
7519 found = btrfs_node_blockptr(eb, slot);
7520 if (found == corrupt->cache.start)
7521 goto del_ptr;
7524 * We couldn't find the bad block.
7525 * TODO: search all the nodes for pointers to this block
7527 if (eb == info->extent_root->node) {
7528 ret = -ENOENT;
7529 goto out;
7530 } else {
7531 level++;
7532 btrfs_release_path(&path);
7533 goto again;
7536 del_ptr:
7537 printk("deleting pointer to block %llu\n", corrupt->cache.start);
7538 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
7540 out:
7541 btrfs_release_path(&path);
7542 return ret;
7545 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7547 struct btrfs_trans_handle *trans = NULL;
7548 struct cache_extent *cache;
7549 struct btrfs_corrupt_block *corrupt;
7551 while (1) {
7552 cache = search_cache_extent(info->corrupt_blocks, 0);
7553 if (!cache)
7554 break;
7555 if (!trans) {
7556 trans = btrfs_start_transaction(info->extent_root, 1);
7557 if (IS_ERR(trans))
7558 return PTR_ERR(trans);
7560 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7561 prune_one_block(trans, info, corrupt);
7562 remove_cache_extent(info->corrupt_blocks, cache);
7564 if (trans)
7565 return btrfs_commit_transaction(trans, info->extent_root);
7566 return 0;
7569 static int check_extent_refs(struct btrfs_root *root,
7570 struct cache_tree *extent_cache)
7572 struct extent_record *rec;
7573 struct cache_extent *cache;
7574 int ret = 0;
7575 int had_dups = 0;
7576 int err = 0;
7578 if (repair) {
7580 * if we're doing a repair, we have to make sure
7581 * we don't allocate from the problem extents.
7582 * In the worst case, this will be all the
7583 * extents in the FS
7585 cache = search_cache_extent(extent_cache, 0);
7586 while (cache) {
7587 rec = container_of(cache, struct extent_record, cache);
7588 set_extent_dirty(root->fs_info->excluded_extents,
7589 rec->start,
7590 rec->start + rec->max_size - 1);
7591 cache = next_cache_extent(cache);
7594 /* pin down all the corrupted blocks too */
7595 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7596 while (cache) {
7597 set_extent_dirty(root->fs_info->excluded_extents,
7598 cache->start,
7599 cache->start + cache->size - 1);
7600 cache = next_cache_extent(cache);
7602 prune_corrupt_blocks(root->fs_info);
7603 reset_cached_block_groups(root->fs_info);
7606 reset_cached_block_groups(root->fs_info);
7609 * We need to delete any duplicate entries we find first otherwise we
7610 * could mess up the extent tree when we have backrefs that actually
7611 * belong to a different extent item and not the weird duplicate one.
7613 while (repair && !list_empty(&duplicate_extents)) {
7614 rec = to_extent_record(duplicate_extents.next);
7615 list_del_init(&rec->list);
7617 /* Sometimes we can find a backref before we find an actual
7618 * extent, so we need to process it a little bit to see if there
7619 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7620 * if this is a backref screwup. If we need to delete stuff
7621 * process_duplicates() will return 0, otherwise it will return
7622 * 1 and we
7624 if (process_duplicates(extent_cache, rec))
7625 continue;
7626 ret = delete_duplicate_records(root, rec);
7627 if (ret < 0)
7628 return ret;
7630 * delete_duplicate_records will return the number of entries
7631 * deleted, so if it's greater than 0 then we know we actually
7632 * did something and we need to remove.
7634 if (ret)
7635 had_dups = 1;
7638 if (had_dups)
7639 return -EAGAIN;
7641 while (1) {
7642 int cur_err = 0;
7643 int fix = 0;
7645 cache = search_cache_extent(extent_cache, 0);
7646 if (!cache)
7647 break;
7648 rec = container_of(cache, struct extent_record, cache);
7649 if (rec->num_duplicates) {
7650 fprintf(stderr,
7651 "extent item %llu has multiple extent items\n",
7652 (unsigned long long)rec->start);
7653 cur_err = 1;
7656 if (rec->refs != rec->extent_item_refs) {
7657 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7658 (unsigned long long)rec->start,
7659 (unsigned long long)rec->nr);
7660 fprintf(stderr, "extent item %llu, found %llu\n",
7661 (unsigned long long)rec->extent_item_refs,
7662 (unsigned long long)rec->refs);
7663 ret = record_orphan_data_extents(root->fs_info, rec);
7664 if (ret < 0)
7665 goto repair_abort;
7666 fix = ret;
7667 cur_err = 1;
7669 if (all_backpointers_checked(rec, 1)) {
7670 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7671 (unsigned long long)rec->start,
7672 (unsigned long long)rec->nr);
7673 fix = 1;
7674 cur_err = 1;
7676 if (!rec->owner_ref_checked) {
7677 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7678 (unsigned long long)rec->start,
7679 (unsigned long long)rec->nr);
7680 fix = 1;
7681 cur_err = 1;
7684 if (repair && fix) {
7685 ret = fixup_extent_refs(root->fs_info, extent_cache,
7686 rec);
7687 if (ret)
7688 goto repair_abort;
7692 if (rec->bad_full_backref) {
7693 fprintf(stderr, "bad full backref, on [%llu]\n",
7694 (unsigned long long)rec->start);
7695 if (repair) {
7696 ret = fixup_extent_flags(root->fs_info, rec);
7697 if (ret)
7698 goto repair_abort;
7699 fix = 1;
7701 cur_err = 1;
7704 * Although it's not a extent ref's problem, we reuse this
7705 * routine for error reporting.
7706 * No repair function yet.
7708 if (rec->crossing_stripes) {
7709 fprintf(stderr,
7710 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7711 rec->start, rec->start + rec->max_size);
7712 cur_err = 1;
7715 if (rec->wrong_chunk_type) {
7716 fprintf(stderr,
7717 "bad extent [%llu, %llu), type mismatch with chunk\n",
7718 rec->start, rec->start + rec->max_size);
7719 cur_err = 1;
7722 err = cur_err;
7723 remove_cache_extent(extent_cache, cache);
7724 free_all_extent_backrefs(rec);
7725 if (!init_extent_tree && repair && (!cur_err || fix))
7726 clear_extent_dirty(root->fs_info->excluded_extents,
7727 rec->start,
7728 rec->start + rec->max_size - 1);
7729 free(rec);
7731 repair_abort:
7732 if (repair) {
7733 if (ret && ret != -EAGAIN) {
7734 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7735 exit(1);
7736 } else if (!ret) {
7737 struct btrfs_trans_handle *trans;
7739 root = root->fs_info->extent_root;
7740 trans = btrfs_start_transaction(root, 1);
7741 if (IS_ERR(trans)) {
7742 ret = PTR_ERR(trans);
7743 goto repair_abort;
7746 ret = btrfs_fix_block_accounting(trans);
7747 if (ret)
7748 goto repair_abort;
7749 ret = btrfs_commit_transaction(trans, root);
7750 if (ret)
7751 goto repair_abort;
7753 return ret;
7756 if (err)
7757 err = -EIO;
7758 return err;
7762 * Check the chunk with its block group/dev list ref:
7763 * Return 0 if all refs seems valid.
7764 * Return 1 if part of refs seems valid, need later check for rebuild ref
7765 * like missing block group and needs to search extent tree to rebuild them.
7766 * Return -1 if essential refs are missing and unable to rebuild.
7768 static int check_chunk_refs(struct chunk_record *chunk_rec,
7769 struct block_group_tree *block_group_cache,
7770 struct device_extent_tree *dev_extent_cache,
7771 int silent)
7773 struct cache_extent *block_group_item;
7774 struct block_group_record *block_group_rec;
7775 struct cache_extent *dev_extent_item;
7776 struct device_extent_record *dev_extent_rec;
7777 u64 devid;
7778 u64 offset;
7779 u64 length;
7780 int metadump_v2 = 0;
7781 int i;
7782 int ret = 0;
7784 block_group_item = lookup_cache_extent(&block_group_cache->tree,
7785 chunk_rec->offset,
7786 chunk_rec->length);
7787 if (block_group_item) {
7788 block_group_rec = container_of(block_group_item,
7789 struct block_group_record,
7790 cache);
7791 if (chunk_rec->length != block_group_rec->offset ||
7792 chunk_rec->offset != block_group_rec->objectid ||
7793 (!metadump_v2 &&
7794 chunk_rec->type_flags != block_group_rec->flags)) {
7795 if (!silent)
7796 fprintf(stderr,
7797 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7798 chunk_rec->objectid,
7799 chunk_rec->type,
7800 chunk_rec->offset,
7801 chunk_rec->length,
7802 chunk_rec->offset,
7803 chunk_rec->type_flags,
7804 block_group_rec->objectid,
7805 block_group_rec->type,
7806 block_group_rec->offset,
7807 block_group_rec->offset,
7808 block_group_rec->objectid,
7809 block_group_rec->flags);
7810 ret = -1;
7811 } else {
7812 list_del_init(&block_group_rec->list);
7813 chunk_rec->bg_rec = block_group_rec;
7815 } else {
7816 if (!silent)
7817 fprintf(stderr,
7818 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7819 chunk_rec->objectid,
7820 chunk_rec->type,
7821 chunk_rec->offset,
7822 chunk_rec->length,
7823 chunk_rec->offset,
7824 chunk_rec->type_flags);
7825 ret = 1;
7828 if (metadump_v2)
7829 return ret;
7831 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7832 chunk_rec->num_stripes);
7833 for (i = 0; i < chunk_rec->num_stripes; ++i) {
7834 devid = chunk_rec->stripes[i].devid;
7835 offset = chunk_rec->stripes[i].offset;
7836 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7837 devid, offset, length);
7838 if (dev_extent_item) {
7839 dev_extent_rec = container_of(dev_extent_item,
7840 struct device_extent_record,
7841 cache);
7842 if (dev_extent_rec->objectid != devid ||
7843 dev_extent_rec->offset != offset ||
7844 dev_extent_rec->chunk_offset != chunk_rec->offset ||
7845 dev_extent_rec->length != length) {
7846 if (!silent)
7847 fprintf(stderr,
7848 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7849 chunk_rec->objectid,
7850 chunk_rec->type,
7851 chunk_rec->offset,
7852 chunk_rec->stripes[i].devid,
7853 chunk_rec->stripes[i].offset,
7854 dev_extent_rec->objectid,
7855 dev_extent_rec->offset,
7856 dev_extent_rec->length);
7857 ret = -1;
7858 } else {
7859 list_move(&dev_extent_rec->chunk_list,
7860 &chunk_rec->dextents);
7862 } else {
7863 if (!silent)
7864 fprintf(stderr,
7865 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7866 chunk_rec->objectid,
7867 chunk_rec->type,
7868 chunk_rec->offset,
7869 chunk_rec->stripes[i].devid,
7870 chunk_rec->stripes[i].offset);
7871 ret = -1;
7874 return ret;
7877 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7878 int check_chunks(struct cache_tree *chunk_cache,
7879 struct block_group_tree *block_group_cache,
7880 struct device_extent_tree *dev_extent_cache,
7881 struct list_head *good, struct list_head *bad,
7882 struct list_head *rebuild, int silent)
7884 struct cache_extent *chunk_item;
7885 struct chunk_record *chunk_rec;
7886 struct block_group_record *bg_rec;
7887 struct device_extent_record *dext_rec;
7888 int err;
7889 int ret = 0;
7891 chunk_item = first_cache_extent(chunk_cache);
7892 while (chunk_item) {
7893 chunk_rec = container_of(chunk_item, struct chunk_record,
7894 cache);
7895 err = check_chunk_refs(chunk_rec, block_group_cache,
7896 dev_extent_cache, silent);
7897 if (err < 0)
7898 ret = err;
7899 if (err == 0 && good)
7900 list_add_tail(&chunk_rec->list, good);
7901 if (err > 0 && rebuild)
7902 list_add_tail(&chunk_rec->list, rebuild);
7903 if (err < 0 && bad)
7904 list_add_tail(&chunk_rec->list, bad);
7905 chunk_item = next_cache_extent(chunk_item);
7908 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7909 if (!silent)
7910 fprintf(stderr,
7911 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7912 bg_rec->objectid,
7913 bg_rec->offset,
7914 bg_rec->flags);
7915 if (!ret)
7916 ret = 1;
7919 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7920 chunk_list) {
7921 if (!silent)
7922 fprintf(stderr,
7923 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7924 dext_rec->objectid,
7925 dext_rec->offset,
7926 dext_rec->length);
7927 if (!ret)
7928 ret = 1;
7930 return ret;
7934 static int check_device_used(struct device_record *dev_rec,
7935 struct device_extent_tree *dext_cache)
7937 struct cache_extent *cache;
7938 struct device_extent_record *dev_extent_rec;
7939 u64 total_byte = 0;
7941 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7942 while (cache) {
7943 dev_extent_rec = container_of(cache,
7944 struct device_extent_record,
7945 cache);
7946 if (dev_extent_rec->objectid != dev_rec->devid)
7947 break;
7949 list_del_init(&dev_extent_rec->device_list);
7950 total_byte += dev_extent_rec->length;
7951 cache = next_cache_extent(cache);
7954 if (total_byte != dev_rec->byte_used) {
7955 fprintf(stderr,
7956 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7957 total_byte, dev_rec->byte_used, dev_rec->objectid,
7958 dev_rec->type, dev_rec->offset);
7959 return -1;
7960 } else {
7961 return 0;
7966 * Unlike device size alignment check above, some super total_bytes check
7967 * failure can lead to mount failure for newer kernel.
7969 * So this function will return the error for a fatal super total_bytes problem.
7971 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
7973 struct btrfs_device *dev;
7974 struct list_head *dev_list = &fs_info->fs_devices->devices;
7975 u64 total_bytes = 0;
7976 u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
7978 list_for_each_entry(dev, dev_list, dev_list)
7979 total_bytes += dev->total_bytes;
7981 /* Important check, which can cause unmountable fs */
7982 if (super_bytes < total_bytes) {
7983 error("super total bytes %llu smaller than real device(s) size %llu",
7984 super_bytes, total_bytes);
7985 error("mounting this fs may fail for newer kernels");
7986 error("this can be fixed by 'btrfs rescue fix-device-size'");
7987 return false;
7991 * Optional check, just to make everything aligned and match with each
7992 * other.
7994 * For a btrfs-image restored fs, we don't need to check it anyway.
7996 if (btrfs_super_flags(fs_info->super_copy) &
7997 (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
7998 return true;
7999 if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
8000 !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
8001 super_bytes != total_bytes) {
8002 warning("minor unaligned/mismatch device size detected");
8003 warning(
8004 "recommended to use 'btrfs rescue fix-device-size' to fix it");
8006 return true;
8009 /* check btrfs_dev_item -> btrfs_dev_extent */
8010 static int check_devices(struct rb_root *dev_cache,
8011 struct device_extent_tree *dev_extent_cache)
8013 struct rb_node *dev_node;
8014 struct device_record *dev_rec;
8015 struct device_extent_record *dext_rec;
8016 int err;
8017 int ret = 0;
8019 dev_node = rb_first(dev_cache);
8020 while (dev_node) {
8021 dev_rec = container_of(dev_node, struct device_record, node);
8022 err = check_device_used(dev_rec, dev_extent_cache);
8023 if (err)
8024 ret = err;
8026 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
8027 global_info->sectorsize);
8028 dev_node = rb_next(dev_node);
8030 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8031 device_list) {
8032 fprintf(stderr,
8033 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8034 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8035 if (!ret)
8036 ret = 1;
8038 return ret;
8041 static int add_root_item_to_list(struct list_head *head,
8042 u64 objectid, u64 bytenr, u64 last_snapshot,
8043 u8 level, u8 drop_level,
8044 struct btrfs_key *drop_key)
8046 struct root_item_record *ri_rec;
8048 ri_rec = malloc(sizeof(*ri_rec));
8049 if (!ri_rec)
8050 return -ENOMEM;
8051 ri_rec->bytenr = bytenr;
8052 ri_rec->objectid = objectid;
8053 ri_rec->level = level;
8054 ri_rec->drop_level = drop_level;
8055 ri_rec->last_snapshot = last_snapshot;
8056 if (drop_key)
8057 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8058 list_add_tail(&ri_rec->list, head);
8060 return 0;
8063 static void free_root_item_list(struct list_head *list)
8065 struct root_item_record *ri_rec;
8067 while (!list_empty(list)) {
8068 ri_rec = list_first_entry(list, struct root_item_record,
8069 list);
8070 list_del_init(&ri_rec->list);
8071 free(ri_rec);
8075 static int deal_root_from_list(struct list_head *list,
8076 struct btrfs_root *root,
8077 struct block_info *bits,
8078 int bits_nr,
8079 struct cache_tree *pending,
8080 struct cache_tree *seen,
8081 struct cache_tree *reada,
8082 struct cache_tree *nodes,
8083 struct cache_tree *extent_cache,
8084 struct cache_tree *chunk_cache,
8085 struct rb_root *dev_cache,
8086 struct block_group_tree *block_group_cache,
8087 struct device_extent_tree *dev_extent_cache)
8089 int ret = 0;
8090 u64 last;
8092 while (!list_empty(list)) {
8093 struct root_item_record *rec;
8094 struct extent_buffer *buf;
8096 rec = list_entry(list->next,
8097 struct root_item_record, list);
8098 last = 0;
8099 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
8100 if (!extent_buffer_uptodate(buf)) {
8101 free_extent_buffer(buf);
8102 ret = -EIO;
8103 break;
8105 ret = add_root_to_pending(buf, extent_cache, pending,
8106 seen, nodes, rec->objectid);
8107 if (ret < 0)
8108 break;
8110 * To rebuild extent tree, we need deal with snapshot
8111 * one by one, otherwise we deal with node firstly which
8112 * can maximize readahead.
8114 while (1) {
8115 ctx.item_count++;
8116 ret = run_next_block(root, bits, bits_nr, &last,
8117 pending, seen, reada, nodes,
8118 extent_cache, chunk_cache,
8119 dev_cache, block_group_cache,
8120 dev_extent_cache, rec);
8121 if (ret != 0)
8122 break;
8124 free_extent_buffer(buf);
8125 list_del(&rec->list);
8126 free(rec);
8127 if (ret < 0)
8128 break;
8130 while (ret >= 0) {
8131 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8132 reada, nodes, extent_cache, chunk_cache,
8133 dev_cache, block_group_cache,
8134 dev_extent_cache, NULL);
8135 if (ret != 0) {
8136 if (ret > 0)
8137 ret = 0;
8138 break;
8141 return ret;
8145 * parse_tree_roots - Go over all roots in the tree root and add each one to
8146 * a list.
8148 * @fs_info - pointer to fs_info struct of the file system.
8150 * @normal_trees - list to contains all roots which don't have a drop
8151 * operation in progress
8153 * @dropping_trees - list containing all roots which have a drop operation
8154 * pending
8156 * Returns 0 on success or a negative value indicating an error.
8158 static int parse_tree_roots(struct btrfs_fs_info *fs_info,
8159 struct list_head *normal_trees,
8160 struct list_head *dropping_trees)
8162 struct btrfs_path path;
8163 struct btrfs_key key;
8164 struct btrfs_key found_key;
8165 struct btrfs_root_item ri;
8166 struct extent_buffer *leaf;
8167 int slot;
8168 int ret = 0;
8170 btrfs_init_path(&path);
8171 key.offset = 0;
8172 key.objectid = 0;
8173 key.type = BTRFS_ROOT_ITEM_KEY;
8174 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
8175 if (ret < 0)
8176 goto out;
8177 while (1) {
8178 leaf = path.nodes[0];
8179 slot = path.slots[0];
8180 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8181 ret = btrfs_next_leaf(fs_info->tree_root, &path);
8182 if (ret != 0)
8183 break;
8184 leaf = path.nodes[0];
8185 slot = path.slots[0];
8187 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8188 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
8189 unsigned long offset;
8190 u64 last_snapshot;
8191 u8 level;
8193 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8194 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8195 last_snapshot = btrfs_root_last_snapshot(&ri);
8196 level = btrfs_root_level(&ri);
8197 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8198 ret = add_root_item_to_list(normal_trees,
8199 found_key.objectid,
8200 btrfs_root_bytenr(&ri),
8201 last_snapshot, level,
8202 0, NULL);
8203 if (ret < 0)
8204 break;
8205 } else {
8206 u64 objectid = found_key.objectid;
8208 btrfs_disk_key_to_cpu(&found_key,
8209 &ri.drop_progress);
8210 ret = add_root_item_to_list(dropping_trees,
8211 objectid,
8212 btrfs_root_bytenr(&ri),
8213 last_snapshot, level,
8214 ri.drop_level, &found_key);
8215 if (ret < 0)
8216 break;
8219 path.slots[0]++;
8222 out:
8223 btrfs_release_path(&path);
8224 return ret;
8227 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
8229 struct rb_root dev_cache;
8230 struct cache_tree chunk_cache;
8231 struct block_group_tree block_group_cache;
8232 struct device_extent_tree dev_extent_cache;
8233 struct cache_tree extent_cache;
8234 struct cache_tree seen;
8235 struct cache_tree pending;
8236 struct cache_tree reada;
8237 struct cache_tree nodes;
8238 struct extent_io_tree excluded_extents;
8239 struct cache_tree corrupt_blocks;
8240 int ret, err = 0;
8241 struct block_info *bits;
8242 int bits_nr;
8243 struct list_head dropping_trees;
8244 struct list_head normal_trees;
8245 struct btrfs_root *root1;
8246 struct btrfs_root *root;
8247 u8 level;
8249 root = fs_info->fs_root;
8250 dev_cache = RB_ROOT;
8251 cache_tree_init(&chunk_cache);
8252 block_group_tree_init(&block_group_cache);
8253 device_extent_tree_init(&dev_extent_cache);
8255 cache_tree_init(&extent_cache);
8256 cache_tree_init(&seen);
8257 cache_tree_init(&pending);
8258 cache_tree_init(&nodes);
8259 cache_tree_init(&reada);
8260 cache_tree_init(&corrupt_blocks);
8261 extent_io_tree_init(&excluded_extents);
8262 INIT_LIST_HEAD(&dropping_trees);
8263 INIT_LIST_HEAD(&normal_trees);
8265 if (repair) {
8266 fs_info->excluded_extents = &excluded_extents;
8267 fs_info->fsck_extent_cache = &extent_cache;
8268 fs_info->free_extent_hook = free_extent_hook;
8269 fs_info->corrupt_blocks = &corrupt_blocks;
8272 bits_nr = 1024;
8273 bits = malloc(bits_nr * sizeof(struct block_info));
8274 if (!bits) {
8275 perror("malloc");
8276 exit(1);
8279 again:
8280 root1 = fs_info->tree_root;
8281 level = btrfs_header_level(root1->node);
8282 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8283 root1->node->start, 0, level, 0, NULL);
8284 if (ret < 0)
8285 goto out;
8286 root1 = fs_info->chunk_root;
8287 level = btrfs_header_level(root1->node);
8288 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8289 root1->node->start, 0, level, 0, NULL);
8290 if (ret < 0)
8291 goto out;
8293 ret = parse_tree_roots(fs_info, &normal_trees, &dropping_trees);
8294 if (ret < 0)
8295 goto out;
8298 * check_block can return -EAGAIN if it fixes something, please keep
8299 * this in mind when dealing with return values from these functions, if
8300 * we get -EAGAIN we want to fall through and restart the loop.
8302 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8303 &seen, &reada, &nodes, &extent_cache,
8304 &chunk_cache, &dev_cache, &block_group_cache,
8305 &dev_extent_cache);
8306 if (ret < 0) {
8307 if (ret == -EAGAIN)
8308 goto loop;
8309 goto out;
8311 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8312 &pending, &seen, &reada, &nodes,
8313 &extent_cache, &chunk_cache, &dev_cache,
8314 &block_group_cache, &dev_extent_cache);
8315 if (ret < 0) {
8316 if (ret == -EAGAIN)
8317 goto loop;
8318 goto out;
8321 ret = check_chunks(&chunk_cache, &block_group_cache,
8322 &dev_extent_cache, NULL, NULL, NULL, 0);
8323 if (ret) {
8324 if (ret == -EAGAIN)
8325 goto loop;
8326 err = ret;
8329 ret = check_extent_refs(root, &extent_cache);
8330 if (ret < 0) {
8331 if (ret == -EAGAIN)
8332 goto loop;
8333 goto out;
8336 ret = check_devices(&dev_cache, &dev_extent_cache);
8337 if (ret && err)
8338 ret = err;
8340 out:
8341 if (repair) {
8342 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
8343 extent_io_tree_cleanup(&excluded_extents);
8344 fs_info->fsck_extent_cache = NULL;
8345 fs_info->free_extent_hook = NULL;
8346 fs_info->corrupt_blocks = NULL;
8347 fs_info->excluded_extents = NULL;
8349 free(bits);
8350 free_chunk_cache_tree(&chunk_cache);
8351 free_device_cache_tree(&dev_cache);
8352 free_block_group_tree(&block_group_cache);
8353 free_device_extent_tree(&dev_extent_cache);
8354 free_extent_cache_tree(&seen);
8355 free_extent_cache_tree(&pending);
8356 free_extent_cache_tree(&reada);
8357 free_extent_cache_tree(&nodes);
8358 free_root_item_list(&normal_trees);
8359 free_root_item_list(&dropping_trees);
8360 return ret;
8361 loop:
8362 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
8363 free_extent_cache_tree(&seen);
8364 free_extent_cache_tree(&pending);
8365 free_extent_cache_tree(&reada);
8366 free_extent_cache_tree(&nodes);
8367 free_chunk_cache_tree(&chunk_cache);
8368 free_block_group_tree(&block_group_cache);
8369 free_device_cache_tree(&dev_cache);
8370 free_device_extent_tree(&dev_extent_cache);
8371 free_extent_record_cache(&extent_cache);
8372 free_root_item_list(&normal_trees);
8373 free_root_item_list(&dropping_trees);
8374 extent_io_tree_cleanup(&excluded_extents);
8375 goto again;
8378 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
8380 int ret;
8382 if (check_mode == CHECK_MODE_LOWMEM)
8383 ret = check_chunks_and_extents_lowmem(fs_info);
8384 else
8385 ret = check_chunks_and_extents(fs_info);
8387 /* Also repair device size related problems */
8388 if (repair && !ret) {
8389 ret = btrfs_fix_device_and_super_size(fs_info);
8390 if (ret > 0)
8391 ret = 0;
8393 return ret;
8396 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8397 struct btrfs_root *root)
8399 struct extent_buffer *c;
8400 struct extent_buffer *old = root->node;
8401 int level;
8402 int ret;
8403 struct btrfs_disk_key disk_key = {0,0,0};
8405 level = 0;
8407 c = btrfs_alloc_free_block(trans, root,
8408 root->fs_info->nodesize,
8409 root->root_key.objectid,
8410 &disk_key, level, 0, 0);
8411 if (IS_ERR(c))
8412 return PTR_ERR(c);
8414 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8415 btrfs_set_header_level(c, level);
8416 btrfs_set_header_bytenr(c, c->start);
8417 btrfs_set_header_generation(c, trans->transid);
8418 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8419 btrfs_set_header_owner(c, root->root_key.objectid);
8421 write_extent_buffer(c, root->fs_info->fsid,
8422 btrfs_header_fsid(), BTRFS_FSID_SIZE);
8424 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8425 btrfs_header_chunk_tree_uuid(c),
8426 BTRFS_UUID_SIZE);
8428 btrfs_mark_buffer_dirty(c);
8430 * this case can happen in the following case:
8432 * reinit reloc data root, this is because we skip pin
8433 * down reloc data tree before which means we can allocate
8434 * same block bytenr here.
8436 if (old->start == c->start) {
8437 btrfs_set_root_generation(&root->root_item,
8438 trans->transid);
8439 root->root_item.level = btrfs_header_level(root->node);
8440 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8441 &root->root_key, &root->root_item);
8442 if (ret) {
8443 free_extent_buffer(c);
8444 return ret;
8447 free_extent_buffer(old);
8448 root->node = c;
8449 add_root_to_dirty_list(root);
8450 return 0;
8453 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8455 struct btrfs_block_group_cache *cache;
8456 struct btrfs_path path;
8457 struct extent_buffer *leaf;
8458 struct btrfs_chunk *chunk;
8459 struct btrfs_key key;
8460 int ret;
8461 u64 start;
8463 btrfs_init_path(&path);
8464 key.objectid = 0;
8465 key.type = BTRFS_CHUNK_ITEM_KEY;
8466 key.offset = 0;
8467 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
8468 if (ret < 0) {
8469 btrfs_release_path(&path);
8470 return ret;
8474 * We do this in case the block groups were screwed up and had alloc
8475 * bits that aren't actually set on the chunks. This happens with
8476 * restored images every time and could happen in real life I guess.
8478 fs_info->avail_data_alloc_bits = 0;
8479 fs_info->avail_metadata_alloc_bits = 0;
8480 fs_info->avail_system_alloc_bits = 0;
8482 /* First we need to create the in-memory block groups */
8483 while (1) {
8484 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8485 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
8486 if (ret < 0) {
8487 btrfs_release_path(&path);
8488 return ret;
8490 if (ret) {
8491 ret = 0;
8492 break;
8495 leaf = path.nodes[0];
8496 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8497 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8498 path.slots[0]++;
8499 continue;
8502 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
8503 btrfs_add_block_group(fs_info, 0,
8504 btrfs_chunk_type(leaf, chunk), key.offset,
8505 btrfs_chunk_length(leaf, chunk));
8506 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8507 key.offset + btrfs_chunk_length(leaf, chunk));
8508 path.slots[0]++;
8510 start = 0;
8511 while (1) {
8512 cache = btrfs_lookup_first_block_group(fs_info, start);
8513 if (!cache)
8514 break;
8515 cache->cached = 1;
8516 start = cache->key.objectid + cache->key.offset;
8519 btrfs_release_path(&path);
8520 return 0;
8523 static int reset_balance(struct btrfs_trans_handle *trans,
8524 struct btrfs_fs_info *fs_info)
8526 struct btrfs_root *root = fs_info->tree_root;
8527 struct btrfs_path path;
8528 struct extent_buffer *leaf;
8529 struct btrfs_key key;
8530 int del_slot, del_nr = 0;
8531 int ret;
8532 int found = 0;
8534 btrfs_init_path(&path);
8535 key.objectid = BTRFS_BALANCE_OBJECTID;
8536 key.type = BTRFS_BALANCE_ITEM_KEY;
8537 key.offset = 0;
8538 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8539 if (ret) {
8540 if (ret > 0)
8541 ret = 0;
8542 if (!ret)
8543 goto reinit_data_reloc;
8544 else
8545 goto out;
8548 ret = btrfs_del_item(trans, root, &path);
8549 if (ret)
8550 goto out;
8551 btrfs_release_path(&path);
8553 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8554 key.type = BTRFS_ROOT_ITEM_KEY;
8555 key.offset = 0;
8556 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8557 if (ret < 0)
8558 goto out;
8559 while (1) {
8560 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8561 if (!found)
8562 break;
8564 if (del_nr) {
8565 ret = btrfs_del_items(trans, root, &path,
8566 del_slot, del_nr);
8567 del_nr = 0;
8568 if (ret)
8569 goto out;
8571 key.offset++;
8572 btrfs_release_path(&path);
8574 found = 0;
8575 ret = btrfs_search_slot(trans, root, &key, &path,
8576 -1, 1);
8577 if (ret < 0)
8578 goto out;
8579 continue;
8581 found = 1;
8582 leaf = path.nodes[0];
8583 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8584 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8585 break;
8586 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8587 path.slots[0]++;
8588 continue;
8590 if (!del_nr) {
8591 del_slot = path.slots[0];
8592 del_nr = 1;
8593 } else {
8594 del_nr++;
8596 path.slots[0]++;
8599 if (del_nr) {
8600 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
8601 if (ret)
8602 goto out;
8604 btrfs_release_path(&path);
8606 reinit_data_reloc:
8607 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8608 key.type = BTRFS_ROOT_ITEM_KEY;
8609 key.offset = (u64)-1;
8610 root = btrfs_read_fs_root(fs_info, &key);
8611 if (IS_ERR(root)) {
8612 fprintf(stderr, "Error reading data reloc tree\n");
8613 ret = PTR_ERR(root);
8614 goto out;
8616 record_root_in_trans(trans, root);
8617 ret = btrfs_fsck_reinit_root(trans, root);
8618 if (ret)
8619 goto out;
8620 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8621 out:
8622 btrfs_release_path(&path);
8623 return ret;
8626 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8627 struct btrfs_fs_info *fs_info, bool pin)
8629 u64 start = 0;
8630 int ret;
8633 * The only reason we don't do this is because right now we're just
8634 * walking the trees we find and pinning down their bytes, we don't look
8635 * at any of the leaves. In order to do mixed groups we'd have to check
8636 * the leaves of any fs roots and pin down the bytes for any file
8637 * extents we find. Not hard but why do it if we don't have to?
8639 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
8640 fprintf(stderr, "We don't support re-initing the extent tree "
8641 "for mixed block groups yet, please notify a btrfs "
8642 "developer you want to do this so they can add this "
8643 "functionality.\n");
8644 return -EINVAL;
8648 * first we need to walk all of the trees except the extent tree and pin
8649 * down/exclude the bytes that are in use so we don't overwrite any
8650 * existing metadata.
8651 * If pinnned, unpin will be done in the end of transaction.
8652 * If excluded, cleanup will be done in check_chunks_and_extents_lowmem.
8654 again:
8655 if (pin) {
8656 ret = pin_metadata_blocks(fs_info);
8657 if (ret) {
8658 fprintf(stderr, "error pinning down used bytes\n");
8659 return ret;
8661 } else {
8662 ret = exclude_metadata_blocks(fs_info);
8663 if (ret) {
8664 fprintf(stderr, "error excluding used bytes\n");
8665 printf("try to pin down used bytes\n");
8666 pin = true;
8667 goto again;
8672 * Need to drop all the block groups since we're going to recreate all
8673 * of them again.
8675 btrfs_free_block_groups(fs_info);
8676 ret = reset_block_groups(fs_info);
8677 if (ret) {
8678 fprintf(stderr, "error resetting the block groups\n");
8679 return ret;
8682 /* Ok we can allocate now, reinit the extent root */
8683 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root);
8684 if (ret) {
8685 fprintf(stderr, "extent root initialization failed\n");
8687 * When the transaction code is updated we should end the
8688 * transaction, but for now progs only knows about commit so
8689 * just return an error.
8691 return ret;
8695 * Now we have all the in-memory block groups setup so we can make
8696 * allocations properly, and the metadata we care about is safe since we
8697 * pinned all of it above.
8699 while (1) {
8700 struct btrfs_block_group_cache *cache;
8702 cache = btrfs_lookup_first_block_group(fs_info, start);
8703 if (!cache)
8704 break;
8705 start = cache->key.objectid + cache->key.offset;
8706 ret = btrfs_insert_item(trans, fs_info->extent_root,
8707 &cache->key, &cache->item,
8708 sizeof(cache->item));
8709 if (ret) {
8710 fprintf(stderr, "Error adding block group\n");
8711 return ret;
8713 btrfs_extent_post_op(trans);
8716 ret = reset_balance(trans, fs_info);
8717 if (ret)
8718 fprintf(stderr, "error resetting the pending balance\n");
8720 return ret;
8723 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8725 struct btrfs_path path;
8726 struct btrfs_trans_handle *trans;
8727 struct btrfs_key key;
8728 int ret;
8730 printf("Recowing metadata block %llu\n", eb->start);
8731 key.objectid = btrfs_header_owner(eb);
8732 key.type = BTRFS_ROOT_ITEM_KEY;
8733 key.offset = (u64)-1;
8735 root = btrfs_read_fs_root(root->fs_info, &key);
8736 if (IS_ERR(root)) {
8737 fprintf(stderr, "Couldn't find owner root %llu\n",
8738 key.objectid);
8739 return PTR_ERR(root);
8742 trans = btrfs_start_transaction(root, 1);
8743 if (IS_ERR(trans))
8744 return PTR_ERR(trans);
8746 btrfs_init_path(&path);
8747 path.lowest_level = btrfs_header_level(eb);
8748 if (path.lowest_level)
8749 btrfs_node_key_to_cpu(eb, &key, 0);
8750 else
8751 btrfs_item_key_to_cpu(eb, &key, 0);
8753 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
8754 btrfs_commit_transaction(trans, root);
8755 btrfs_release_path(&path);
8756 return ret;
8759 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8761 struct btrfs_path path;
8762 struct btrfs_trans_handle *trans;
8763 struct btrfs_key key;
8764 int ret;
8766 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8767 bad->key.type, bad->key.offset);
8768 key.objectid = bad->root_id;
8769 key.type = BTRFS_ROOT_ITEM_KEY;
8770 key.offset = (u64)-1;
8772 root = btrfs_read_fs_root(root->fs_info, &key);
8773 if (IS_ERR(root)) {
8774 fprintf(stderr, "Couldn't find owner root %llu\n",
8775 key.objectid);
8776 return PTR_ERR(root);
8779 trans = btrfs_start_transaction(root, 1);
8780 if (IS_ERR(trans))
8781 return PTR_ERR(trans);
8783 btrfs_init_path(&path);
8784 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
8785 if (ret) {
8786 if (ret > 0)
8787 ret = 0;
8788 goto out;
8790 ret = btrfs_del_item(trans, root, &path);
8791 out:
8792 btrfs_commit_transaction(trans, root);
8793 btrfs_release_path(&path);
8794 return ret;
8797 static int zero_log_tree(struct btrfs_root *root)
8799 struct btrfs_trans_handle *trans;
8800 int ret;
8802 trans = btrfs_start_transaction(root, 1);
8803 if (IS_ERR(trans)) {
8804 ret = PTR_ERR(trans);
8805 return ret;
8807 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8808 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8809 ret = btrfs_commit_transaction(trans, root);
8810 return ret;
8813 static int populate_csum(struct btrfs_trans_handle *trans,
8814 struct btrfs_root *csum_root, char *buf, u64 start,
8815 u64 len)
8817 struct btrfs_fs_info *fs_info = csum_root->fs_info;
8818 u64 offset = 0;
8819 u64 sectorsize;
8820 int ret = 0;
8822 while (offset < len) {
8823 sectorsize = fs_info->sectorsize;
8824 ret = read_extent_data(fs_info, buf, start + offset,
8825 &sectorsize, 0);
8826 if (ret)
8827 break;
8828 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8829 start + offset, buf, sectorsize);
8830 if (ret)
8831 break;
8832 offset += sectorsize;
8834 return ret;
8837 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8838 struct btrfs_root *csum_root,
8839 struct btrfs_root *cur_root)
8841 struct btrfs_path path;
8842 struct btrfs_key key;
8843 struct extent_buffer *node;
8844 struct btrfs_file_extent_item *fi;
8845 char *buf = NULL;
8846 u64 start = 0;
8847 u64 len = 0;
8848 int slot = 0;
8849 int ret = 0;
8851 buf = malloc(cur_root->fs_info->sectorsize);
8852 if (!buf)
8853 return -ENOMEM;
8855 btrfs_init_path(&path);
8856 key.objectid = 0;
8857 key.offset = 0;
8858 key.type = 0;
8859 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
8860 if (ret < 0)
8861 goto out;
8862 /* Iterate all regular file extents and fill its csum */
8863 while (1) {
8864 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8866 if (key.type != BTRFS_EXTENT_DATA_KEY)
8867 goto next;
8868 node = path.nodes[0];
8869 slot = path.slots[0];
8870 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8871 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8872 goto next;
8873 start = btrfs_file_extent_disk_bytenr(node, fi);
8874 len = btrfs_file_extent_disk_num_bytes(node, fi);
8876 ret = populate_csum(trans, csum_root, buf, start, len);
8877 if (ret == -EEXIST)
8878 ret = 0;
8879 if (ret < 0)
8880 goto out;
8881 next:
8883 * TODO: if next leaf is corrupted, jump to nearest next valid
8884 * leaf.
8886 ret = btrfs_next_item(cur_root, &path);
8887 if (ret < 0)
8888 goto out;
8889 if (ret > 0) {
8890 ret = 0;
8891 goto out;
8895 out:
8896 btrfs_release_path(&path);
8897 free(buf);
8898 return ret;
8901 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8902 struct btrfs_root *csum_root)
8904 struct btrfs_fs_info *fs_info = csum_root->fs_info;
8905 struct btrfs_path path;
8906 struct btrfs_root *tree_root = fs_info->tree_root;
8907 struct btrfs_root *cur_root;
8908 struct extent_buffer *node;
8909 struct btrfs_key key;
8910 int slot = 0;
8911 int ret = 0;
8913 btrfs_init_path(&path);
8914 key.objectid = BTRFS_FS_TREE_OBJECTID;
8915 key.offset = 0;
8916 key.type = BTRFS_ROOT_ITEM_KEY;
8917 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
8918 if (ret < 0)
8919 goto out;
8920 if (ret > 0) {
8921 ret = -ENOENT;
8922 goto out;
8925 while (1) {
8926 node = path.nodes[0];
8927 slot = path.slots[0];
8928 btrfs_item_key_to_cpu(node, &key, slot);
8929 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
8930 goto out;
8931 if (key.type != BTRFS_ROOT_ITEM_KEY)
8932 goto next;
8933 if (!is_fstree(key.objectid))
8934 goto next;
8935 key.offset = (u64)-1;
8937 cur_root = btrfs_read_fs_root(fs_info, &key);
8938 if (IS_ERR(cur_root) || !cur_root) {
8939 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
8940 key.objectid);
8941 goto out;
8943 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
8944 cur_root);
8945 if (ret < 0)
8946 goto out;
8947 next:
8948 ret = btrfs_next_item(tree_root, &path);
8949 if (ret > 0) {
8950 ret = 0;
8951 goto out;
8953 if (ret < 0)
8954 goto out;
8957 out:
8958 btrfs_release_path(&path);
8959 return ret;
8962 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
8963 struct btrfs_root *csum_root)
8965 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
8966 struct btrfs_path path;
8967 struct btrfs_extent_item *ei;
8968 struct extent_buffer *leaf;
8969 char *buf;
8970 struct btrfs_key key;
8971 int ret;
8973 btrfs_init_path(&path);
8974 key.objectid = 0;
8975 key.type = BTRFS_EXTENT_ITEM_KEY;
8976 key.offset = 0;
8977 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8978 if (ret < 0) {
8979 btrfs_release_path(&path);
8980 return ret;
8983 buf = malloc(csum_root->fs_info->sectorsize);
8984 if (!buf) {
8985 btrfs_release_path(&path);
8986 return -ENOMEM;
8989 while (1) {
8990 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8991 ret = btrfs_next_leaf(extent_root, &path);
8992 if (ret < 0)
8993 break;
8994 if (ret) {
8995 ret = 0;
8996 break;
8999 leaf = path.nodes[0];
9001 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
9002 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9003 path.slots[0]++;
9004 continue;
9007 ei = btrfs_item_ptr(leaf, path.slots[0],
9008 struct btrfs_extent_item);
9009 if (!(btrfs_extent_flags(leaf, ei) &
9010 BTRFS_EXTENT_FLAG_DATA)) {
9011 path.slots[0]++;
9012 continue;
9015 ret = populate_csum(trans, csum_root, buf, key.objectid,
9016 key.offset);
9017 if (ret)
9018 break;
9019 path.slots[0]++;
9022 btrfs_release_path(&path);
9023 free(buf);
9024 return ret;
9028 * Recalculate the csum and put it into the csum tree.
9030 * Extent tree init will wipe out all the extent info, so in that case, we
9031 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
9032 * will use fs/subvol trees to init the csum tree.
9034 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9035 struct btrfs_root *csum_root,
9036 int search_fs_tree)
9038 if (search_fs_tree)
9039 return fill_csum_tree_from_fs(trans, csum_root);
9040 else
9041 return fill_csum_tree_from_extent(trans, csum_root);
9044 static void free_roots_info_cache(void)
9046 if (!roots_info_cache)
9047 return;
9049 while (!cache_tree_empty(roots_info_cache)) {
9050 struct cache_extent *entry;
9051 struct root_item_info *rii;
9053 entry = first_cache_extent(roots_info_cache);
9054 if (!entry)
9055 break;
9056 remove_cache_extent(roots_info_cache, entry);
9057 rii = container_of(entry, struct root_item_info, cache_extent);
9058 free(rii);
9061 free(roots_info_cache);
9062 roots_info_cache = NULL;
9065 static int build_roots_info_cache(struct btrfs_fs_info *info)
9067 int ret = 0;
9068 struct btrfs_key key;
9069 struct extent_buffer *leaf;
9070 struct btrfs_path path;
9072 if (!roots_info_cache) {
9073 roots_info_cache = malloc(sizeof(*roots_info_cache));
9074 if (!roots_info_cache)
9075 return -ENOMEM;
9076 cache_tree_init(roots_info_cache);
9079 btrfs_init_path(&path);
9080 key.objectid = 0;
9081 key.type = BTRFS_EXTENT_ITEM_KEY;
9082 key.offset = 0;
9083 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
9084 if (ret < 0)
9085 goto out;
9086 leaf = path.nodes[0];
9088 while (1) {
9089 struct btrfs_key found_key;
9090 struct btrfs_extent_item *ei;
9091 struct btrfs_extent_inline_ref *iref;
9092 unsigned long item_end;
9093 int slot = path.slots[0];
9094 int type;
9095 u64 flags;
9096 u64 root_id;
9097 u8 level;
9098 struct cache_extent *entry;
9099 struct root_item_info *rii;
9101 ctx.item_count++;
9102 if (slot >= btrfs_header_nritems(leaf)) {
9103 ret = btrfs_next_leaf(info->extent_root, &path);
9104 if (ret < 0) {
9105 break;
9106 } else if (ret) {
9107 ret = 0;
9108 break;
9110 leaf = path.nodes[0];
9111 slot = path.slots[0];
9114 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9116 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9117 found_key.type != BTRFS_METADATA_ITEM_KEY)
9118 goto next;
9120 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9121 flags = btrfs_extent_flags(leaf, ei);
9122 item_end = (unsigned long)ei + btrfs_item_size_nr(leaf, slot);
9124 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9125 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9126 goto next;
9128 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9129 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9130 level = found_key.offset;
9131 } else {
9132 struct btrfs_tree_block_info *binfo;
9134 binfo = (struct btrfs_tree_block_info *)(ei + 1);
9135 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
9136 level = btrfs_tree_block_level(leaf, binfo);
9140 * It's a valid extent/metadata item that has no inline ref,
9141 * but SHARED_BLOCK_REF or other shared references.
9142 * So we need to do extra check to avoid reading beyond leaf
9143 * boudnary.
9145 if ((unsigned long)iref >= item_end)
9146 goto next;
9149 * For a root extent, it must be of the following type and the
9150 * first (and only one) iref in the item.
9152 type = btrfs_extent_inline_ref_type(leaf, iref);
9153 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9154 goto next;
9156 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9157 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9158 if (!entry) {
9159 rii = malloc(sizeof(struct root_item_info));
9160 if (!rii) {
9161 ret = -ENOMEM;
9162 goto out;
9164 rii->cache_extent.start = root_id;
9165 rii->cache_extent.size = 1;
9166 rii->level = (u8)-1;
9167 entry = &rii->cache_extent;
9168 ret = insert_cache_extent(roots_info_cache, entry);
9169 ASSERT(ret == 0);
9170 } else {
9171 rii = container_of(entry, struct root_item_info,
9172 cache_extent);
9175 ASSERT(rii->cache_extent.start == root_id);
9176 ASSERT(rii->cache_extent.size == 1);
9178 if (level > rii->level || rii->level == (u8)-1) {
9179 rii->level = level;
9180 rii->bytenr = found_key.objectid;
9181 rii->gen = btrfs_extent_generation(leaf, ei);
9182 rii->node_count = 1;
9183 } else if (level == rii->level) {
9184 rii->node_count++;
9186 next:
9187 path.slots[0]++;
9190 out:
9191 btrfs_release_path(&path);
9193 return ret;
9196 static int maybe_repair_root_item(struct btrfs_path *path,
9197 const struct btrfs_key *root_key,
9198 const int read_only_mode)
9200 const u64 root_id = root_key->objectid;
9201 struct cache_extent *entry;
9202 struct root_item_info *rii;
9203 struct btrfs_root_item ri;
9204 unsigned long offset;
9206 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9207 if (!entry) {
9208 fprintf(stderr,
9209 "Error: could not find extent items for root %llu\n",
9210 root_key->objectid);
9211 return -ENOENT;
9214 rii = container_of(entry, struct root_item_info, cache_extent);
9215 ASSERT(rii->cache_extent.start == root_id);
9216 ASSERT(rii->cache_extent.size == 1);
9218 if (rii->node_count != 1) {
9219 fprintf(stderr,
9220 "Error: could not find btree root extent for root %llu\n",
9221 root_id);
9222 return -ENOENT;
9225 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9226 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9228 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9229 btrfs_root_level(&ri) != rii->level ||
9230 btrfs_root_generation(&ri) != rii->gen) {
9233 * If we're in repair mode but our caller told us to not update
9234 * the root item, i.e. just check if it needs to be updated, don't
9235 * print this message, since the caller will call us again shortly
9236 * for the same root item without read only mode (the caller will
9237 * open a transaction first).
9239 if (!(read_only_mode && repair))
9240 fprintf(stderr,
9241 "%sroot item for root %llu,"
9242 " current bytenr %llu, current gen %llu, current level %u,"
9243 " new bytenr %llu, new gen %llu, new level %u\n",
9244 (read_only_mode ? "" : "fixing "),
9245 root_id,
9246 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9247 btrfs_root_level(&ri),
9248 rii->bytenr, rii->gen, rii->level);
9250 if (btrfs_root_generation(&ri) > rii->gen) {
9251 fprintf(stderr,
9252 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9253 root_id, btrfs_root_generation(&ri), rii->gen);
9254 return -EINVAL;
9257 if (!read_only_mode) {
9258 btrfs_set_root_bytenr(&ri, rii->bytenr);
9259 btrfs_set_root_level(&ri, rii->level);
9260 btrfs_set_root_generation(&ri, rii->gen);
9261 write_extent_buffer(path->nodes[0], &ri,
9262 offset, sizeof(ri));
9265 return 1;
9268 return 0;
9272 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9273 * caused read-only snapshots to be corrupted if they were created at a moment
9274 * when the source subvolume/snapshot had orphan items. The issue was that the
9275 * on-disk root items became incorrect, referring to the pre orphan cleanup root
9276 * node instead of the post orphan cleanup root node.
9277 * So this function, and its callees, just detects and fixes those cases. Even
9278 * though the regression was for read-only snapshots, this function applies to
9279 * any snapshot/subvolume root.
9280 * This must be run before any other repair code - not doing it so, makes other
9281 * repair code delete or modify backrefs in the extent tree for example, which
9282 * will result in an inconsistent fs after repairing the root items.
9284 static int repair_root_items(struct btrfs_fs_info *info)
9286 struct btrfs_path path;
9287 struct btrfs_key key;
9288 struct extent_buffer *leaf;
9289 struct btrfs_trans_handle *trans = NULL;
9290 int ret = 0;
9291 int bad_roots = 0;
9292 int need_trans = 0;
9294 btrfs_init_path(&path);
9296 ret = build_roots_info_cache(info);
9297 if (ret)
9298 goto out;
9300 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9301 key.type = BTRFS_ROOT_ITEM_KEY;
9302 key.offset = 0;
9304 again:
9306 * Avoid opening and committing transactions if a leaf doesn't have
9307 * any root items that need to be fixed, so that we avoid rotating
9308 * backup roots unnecessarily.
9310 if (need_trans) {
9311 trans = btrfs_start_transaction(info->tree_root, 1);
9312 if (IS_ERR(trans)) {
9313 ret = PTR_ERR(trans);
9314 goto out;
9318 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
9319 0, trans ? 1 : 0);
9320 if (ret < 0)
9321 goto out;
9322 leaf = path.nodes[0];
9324 while (1) {
9325 struct btrfs_key found_key;
9327 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
9328 int no_more_keys = find_next_key(&path, &key);
9330 btrfs_release_path(&path);
9331 if (trans) {
9332 ret = btrfs_commit_transaction(trans,
9333 info->tree_root);
9334 trans = NULL;
9335 if (ret < 0)
9336 goto out;
9338 need_trans = 0;
9339 if (no_more_keys)
9340 break;
9341 goto again;
9344 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9346 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9347 goto next;
9348 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9349 goto next;
9351 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
9352 if (ret < 0)
9353 goto out;
9354 if (ret) {
9355 if (!trans && repair) {
9356 need_trans = 1;
9357 key = found_key;
9358 btrfs_release_path(&path);
9359 goto again;
9361 bad_roots++;
9363 next:
9364 path.slots[0]++;
9366 ret = 0;
9367 out:
9368 free_roots_info_cache();
9369 btrfs_release_path(&path);
9370 if (trans)
9371 btrfs_commit_transaction(trans, info->tree_root);
9372 if (ret < 0)
9373 return ret;
9375 return bad_roots;
9378 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
9380 struct btrfs_trans_handle *trans;
9381 struct btrfs_block_group_cache *bg_cache;
9382 u64 current = 0;
9383 int ret = 0;
9385 /* Clear all free space cache inodes and its extent data */
9386 while (1) {
9387 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
9388 if (!bg_cache)
9389 break;
9390 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
9391 if (ret < 0)
9392 return ret;
9393 current = bg_cache->key.objectid + bg_cache->key.offset;
9396 /* Don't forget to set cache_generation to -1 */
9397 trans = btrfs_start_transaction(fs_info->tree_root, 0);
9398 if (IS_ERR(trans)) {
9399 error("failed to update super block cache generation");
9400 return PTR_ERR(trans);
9402 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
9403 btrfs_commit_transaction(trans, fs_info->tree_root);
9405 return ret;
9408 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
9409 int clear_version)
9411 int ret = 0;
9413 if (clear_version == 1) {
9414 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
9415 error(
9416 "free space cache v2 detected, use --clear-space-cache v2");
9417 ret = 1;
9418 goto close_out;
9420 printf("Clearing free space cache\n");
9421 ret = clear_free_space_cache(fs_info);
9422 if (ret) {
9423 error("failed to clear free space cache");
9424 ret = 1;
9425 } else {
9426 printf("Free space cache cleared\n");
9428 } else if (clear_version == 2) {
9429 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
9430 printf("no free space cache v2 to clear\n");
9431 ret = 0;
9432 goto close_out;
9434 printf("Clear free space cache v2\n");
9435 ret = btrfs_clear_free_space_tree(fs_info);
9436 if (ret) {
9437 error("failed to clear free space cache v2: %d", ret);
9438 ret = 1;
9439 } else {
9440 printf("free space cache v2 cleared\n");
9443 close_out:
9444 return ret;
9447 const char * const cmd_check_usage[] = {
9448 "btrfs check [options] <device>",
9449 "Check structural integrity of a filesystem (unmounted).",
9450 "Check structural integrity of an unmounted filesystem. Verify internal",
9451 "trees' consistency and item connectivity. In the repair mode try to",
9452 "fix the problems found. ",
9453 "WARNING: the repair mode is considered dangerous and should not be used",
9454 " without prior analysis of problems found on the flesystem."
9456 "Options:",
9457 " starting point selection:",
9458 " -s|--super <superblock> use this superblock copy",
9459 " -b|--backup use the first valid backup root copy",
9460 " -r|--tree-root <bytenr> use the given bytenr for the tree root",
9461 " --chunk-root <bytenr> use the given bytenr for the chunk tree root",
9462 " operation modes:",
9463 " --readonly run in read-only mode (default)",
9464 " --repair try to repair the filesystem",
9465 " --force skip mount checks, repair is not possible",
9466 " --mode <MODE> allows choice of memory/IO trade-offs",
9467 " where MODE is one of:",
9468 " original - read inodes and extents to memory (requires",
9469 " more memory, does less IO)",
9470 " lowmem - try to use less memory but read blocks again",
9471 " when needed (experimental)",
9472 " repair options:",
9473 " --init-csum-tree create a new CRC tree",
9474 " --init-extent-tree create a new extent tree",
9475 " --clear-space-cache v1|v2 clear space cache for v1 or v2",
9476 " check and reporting options:",
9477 " --check-data-csum verify checksums of data blocks",
9478 " -Q|--qgroup-report print a report on qgroup consistency",
9479 " -E|--subvol-extents <subvolid>",
9480 " print subvolume extents and sharing state",
9481 " -p|--progress indicate progress",
9482 NULL
9485 int cmd_check(int argc, char **argv)
9487 struct cache_tree root_cache;
9488 struct btrfs_root *root;
9489 struct btrfs_fs_info *info;
9490 u64 bytenr = 0;
9491 u64 subvolid = 0;
9492 u64 tree_root_bytenr = 0;
9493 u64 chunk_root_bytenr = 0;
9494 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9495 int ret = 0;
9496 int err = 0;
9497 u64 num;
9498 int init_csum_tree = 0;
9499 int readonly = 0;
9500 int clear_space_cache = 0;
9501 int qgroup_report = 0;
9502 int qgroups_repaired = 0;
9503 int qgroup_report_ret;
9504 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
9505 int force = 0;
9507 while(1) {
9508 int c;
9509 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
9510 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
9511 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
9512 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
9513 GETOPT_VAL_FORCE };
9514 static const struct option long_options[] = {
9515 { "super", required_argument, NULL, 's' },
9516 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
9517 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
9518 { "init-csum-tree", no_argument, NULL,
9519 GETOPT_VAL_INIT_CSUM },
9520 { "init-extent-tree", no_argument, NULL,
9521 GETOPT_VAL_INIT_EXTENT },
9522 { "check-data-csum", no_argument, NULL,
9523 GETOPT_VAL_CHECK_CSUM },
9524 { "backup", no_argument, NULL, 'b' },
9525 { "subvol-extents", required_argument, NULL, 'E' },
9526 { "qgroup-report", no_argument, NULL, 'Q' },
9527 { "tree-root", required_argument, NULL, 'r' },
9528 { "chunk-root", required_argument, NULL,
9529 GETOPT_VAL_CHUNK_TREE },
9530 { "progress", no_argument, NULL, 'p' },
9531 { "mode", required_argument, NULL,
9532 GETOPT_VAL_MODE },
9533 { "clear-space-cache", required_argument, NULL,
9534 GETOPT_VAL_CLEAR_SPACE_CACHE},
9535 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
9536 { NULL, 0, NULL, 0}
9539 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
9540 if (c < 0)
9541 break;
9542 switch(c) {
9543 case 'a': /* ignored */ break;
9544 case 'b':
9545 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9546 break;
9547 case 's':
9548 num = arg_strtou64(optarg);
9549 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9550 error(
9551 "super mirror should be less than %d",
9552 BTRFS_SUPER_MIRROR_MAX);
9553 exit(1);
9555 bytenr = btrfs_sb_offset(((int)num));
9556 printf("using SB copy %llu, bytenr %llu\n", num,
9557 (unsigned long long)bytenr);
9558 break;
9559 case 'Q':
9560 qgroup_report = 1;
9561 break;
9562 case 'E':
9563 subvolid = arg_strtou64(optarg);
9564 break;
9565 case 'r':
9566 tree_root_bytenr = arg_strtou64(optarg);
9567 break;
9568 case GETOPT_VAL_CHUNK_TREE:
9569 chunk_root_bytenr = arg_strtou64(optarg);
9570 break;
9571 case 'p':
9572 ctx.progress_enabled = true;
9573 break;
9574 case '?':
9575 case 'h':
9576 usage(cmd_check_usage);
9577 case GETOPT_VAL_REPAIR:
9578 printf("enabling repair mode\n");
9579 repair = 1;
9580 ctree_flags |= OPEN_CTREE_WRITES;
9581 break;
9582 case GETOPT_VAL_READONLY:
9583 readonly = 1;
9584 break;
9585 case GETOPT_VAL_INIT_CSUM:
9586 printf("Creating a new CRC tree\n");
9587 init_csum_tree = 1;
9588 repair = 1;
9589 ctree_flags |= OPEN_CTREE_WRITES;
9590 break;
9591 case GETOPT_VAL_INIT_EXTENT:
9592 init_extent_tree = 1;
9593 ctree_flags |= (OPEN_CTREE_WRITES |
9594 OPEN_CTREE_NO_BLOCK_GROUPS);
9595 repair = 1;
9596 break;
9597 case GETOPT_VAL_CHECK_CSUM:
9598 check_data_csum = 1;
9599 break;
9600 case GETOPT_VAL_MODE:
9601 check_mode = parse_check_mode(optarg);
9602 if (check_mode == CHECK_MODE_UNKNOWN) {
9603 error("unknown mode: %s", optarg);
9604 exit(1);
9606 break;
9607 case GETOPT_VAL_CLEAR_SPACE_CACHE:
9608 if (strcmp(optarg, "v1") == 0) {
9609 clear_space_cache = 1;
9610 } else if (strcmp(optarg, "v2") == 0) {
9611 clear_space_cache = 2;
9612 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
9613 } else {
9614 error(
9615 "invalid argument to --clear-space-cache, must be v1 or v2");
9616 exit(1);
9618 ctree_flags |= OPEN_CTREE_WRITES;
9619 break;
9620 case GETOPT_VAL_FORCE:
9621 force = 1;
9622 break;
9626 if (check_argc_exact(argc - optind, 1))
9627 usage(cmd_check_usage);
9629 if (ctx.progress_enabled) {
9630 ctx.tp = TASK_NOTHING;
9631 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9634 /* This check is the only reason for --readonly to exist */
9635 if (readonly && repair) {
9636 error("repair options are not compatible with --readonly");
9637 exit(1);
9641 * experimental and dangerous
9643 if (repair && check_mode == CHECK_MODE_LOWMEM)
9644 warning("low-memory mode repair support is only partial");
9646 printf("Opening filesystem to check...\n");
9648 radix_tree_init();
9649 cache_tree_init(&root_cache);
9651 ret = check_mounted(argv[optind]);
9652 if (!force) {
9653 if (ret < 0) {
9654 error("could not check mount status: %s",
9655 strerror(-ret));
9656 err |= !!ret;
9657 goto err_out;
9658 } else if (ret) {
9659 error(
9660 "%s is currently mounted, use --force if you really intend to check the filesystem",
9661 argv[optind]);
9662 ret = -EBUSY;
9663 err |= !!ret;
9664 goto err_out;
9666 } else {
9667 if (repair) {
9668 error("repair and --force is not yet supported");
9669 ret = 1;
9670 err |= !!ret;
9671 goto err_out;
9673 if (ret < 0) {
9674 warning(
9675 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
9676 argv[optind]);
9677 } else if (ret) {
9678 warning(
9679 "filesystem mounted, continuing because of --force");
9681 /* A block device is mounted in exclusive mode by kernel */
9682 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
9685 /* only allow partial opening under repair mode */
9686 if (repair)
9687 ctree_flags |= OPEN_CTREE_PARTIAL;
9689 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9690 chunk_root_bytenr, ctree_flags);
9691 if (!info) {
9692 error("cannot open file system");
9693 ret = -EIO;
9694 err |= !!ret;
9695 goto err_out;
9698 global_info = info;
9699 root = info->fs_root;
9700 uuid_unparse(info->super_copy->fsid, uuidbuf);
9702 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9705 * Check the bare minimum before starting anything else that could rely
9706 * on it, namely the tree roots, any local consistency checks
9708 if (!extent_buffer_uptodate(info->tree_root->node) ||
9709 !extent_buffer_uptodate(info->dev_root->node) ||
9710 !extent_buffer_uptodate(info->chunk_root->node)) {
9711 error("critical roots corrupted, unable to check the filesystem");
9712 err |= !!ret;
9713 ret = -EIO;
9714 goto close_out;
9717 if (clear_space_cache) {
9718 ret = do_clear_free_space_cache(info, clear_space_cache);
9719 err |= !!ret;
9720 goto close_out;
9724 * repair mode will force us to commit transaction which
9725 * will make us fail to load log tree when mounting.
9727 if (repair && btrfs_super_log_root(info->super_copy)) {
9728 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
9729 if (!ret) {
9730 ret = 1;
9731 err |= !!ret;
9732 goto close_out;
9734 ret = zero_log_tree(root);
9735 err |= !!ret;
9736 if (ret) {
9737 error("failed to zero log tree: %d", ret);
9738 goto close_out;
9742 if (qgroup_report) {
9743 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9744 uuidbuf);
9745 ret = qgroup_verify_all(info);
9746 err |= !!ret;
9747 if (ret == 0)
9748 err |= !!report_qgroups(1);
9749 goto close_out;
9751 if (subvolid) {
9752 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9753 subvolid, argv[optind], uuidbuf);
9754 ret = print_extent_state(info, subvolid);
9755 err |= !!ret;
9756 goto close_out;
9759 if (init_extent_tree || init_csum_tree) {
9760 struct btrfs_trans_handle *trans;
9762 trans = btrfs_start_transaction(info->extent_root, 0);
9763 if (IS_ERR(trans)) {
9764 error("error starting transaction");
9765 ret = PTR_ERR(trans);
9766 err |= !!ret;
9767 goto close_out;
9770 if (init_extent_tree) {
9771 printf("Creating a new extent tree\n");
9772 ret = reinit_extent_tree(trans, info,
9773 check_mode == CHECK_MODE_ORIGINAL);
9774 err |= !!ret;
9775 if (ret)
9776 goto close_out;
9779 if (init_csum_tree) {
9780 printf("Reinitialize checksum tree\n");
9781 ret = btrfs_fsck_reinit_root(trans, info->csum_root);
9782 if (ret) {
9783 error("checksum tree initialization failed: %d",
9784 ret);
9785 ret = -EIO;
9786 err |= !!ret;
9787 goto close_out;
9790 ret = fill_csum_tree(trans, info->csum_root,
9791 init_extent_tree);
9792 err |= !!ret;
9793 if (ret) {
9794 error("checksum tree refilling failed: %d", ret);
9795 return -EIO;
9799 * Ok now we commit and run the normal fsck, which will add
9800 * extent entries for all of the items it finds.
9802 ret = btrfs_commit_transaction(trans, info->extent_root);
9803 err |= !!ret;
9804 if (ret)
9805 goto close_out;
9807 if (!extent_buffer_uptodate(info->extent_root->node)) {
9808 error("critical: extent_root, unable to check the filesystem");
9809 ret = -EIO;
9810 err |= !!ret;
9811 goto close_out;
9813 if (!extent_buffer_uptodate(info->csum_root->node)) {
9814 error("critical: csum_root, unable to check the filesystem");
9815 ret = -EIO;
9816 err |= !!ret;
9817 goto close_out;
9820 if (!init_extent_tree) {
9821 if (!ctx.progress_enabled) {
9822 fprintf(stderr, "[1/7] checking root items\n");
9823 } else {
9824 ctx.tp = TASK_ROOT_ITEMS;
9825 task_start(ctx.info, &ctx.start_time, &ctx.item_count);
9827 ret = repair_root_items(info);
9828 task_stop(ctx.info);
9829 if (ret < 0) {
9830 err = !!ret;
9831 error("failed to repair root items: %s", strerror(-ret));
9832 goto close_out;
9834 if (repair) {
9835 fprintf(stderr, "Fixed %d roots.\n", ret);
9836 ret = 0;
9837 } else if (ret > 0) {
9838 fprintf(stderr,
9839 "Found %d roots with an outdated root item.\n",
9840 ret);
9841 fprintf(stderr,
9842 "Please run a filesystem check with the option --repair to fix them.\n");
9843 ret = 1;
9844 err |= ret;
9845 goto close_out;
9847 } else {
9848 fprintf(stderr, "[1/7] checking root items... skipped\n");
9851 if (!ctx.progress_enabled) {
9852 fprintf(stderr, "[2/7] checking extents\n");
9853 } else {
9854 ctx.tp = TASK_EXTENTS;
9855 task_start(ctx.info, &ctx.start_time, &ctx.item_count);
9857 ret = do_check_chunks_and_extents(info);
9858 task_stop(ctx.info);
9859 err |= !!ret;
9860 if (ret)
9861 error(
9862 "errors found in extent allocation tree or chunk allocation");
9864 /* Only re-check super size after we checked and repaired the fs */
9865 err |= !is_super_size_valid(info);
9867 is_free_space_tree = btrfs_fs_compat_ro(info, FREE_SPACE_TREE);
9869 if (!ctx.progress_enabled) {
9870 if (is_free_space_tree)
9871 fprintf(stderr, "[3/7] checking free space tree\n");
9872 else
9873 fprintf(stderr, "[3/7] checking free space cache\n");
9874 } else {
9875 ctx.tp = TASK_FREE_SPACE;
9876 task_start(ctx.info, &ctx.start_time, &ctx.item_count);
9879 ret = check_space_cache(root);
9880 task_stop(ctx.info);
9881 err |= !!ret;
9882 if (ret) {
9883 if (is_free_space_tree)
9884 error("errors found in free space tree");
9885 else
9886 error("errors found in free space cache");
9887 goto out;
9891 * We used to have to have these hole extents in between our real
9892 * extents so if we don't have this flag set we need to make sure there
9893 * are no gaps in the file extents for inodes, otherwise we can just
9894 * ignore it when this happens.
9896 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
9897 if (!ctx.progress_enabled) {
9898 fprintf(stderr, "[4/7] checking fs roots\n");
9899 } else {
9900 ctx.tp = TASK_FS_ROOTS;
9901 task_start(ctx.info, &ctx.start_time, &ctx.item_count);
9904 ret = do_check_fs_roots(info, &root_cache);
9905 task_stop(ctx.info);
9906 err |= !!ret;
9907 if (ret) {
9908 error("errors found in fs roots");
9909 goto out;
9912 if (!ctx.progress_enabled) {
9913 if (check_data_csum)
9914 fprintf(stderr, "[5/7] checking csums against data\n");
9915 else
9916 fprintf(stderr,
9917 "[5/7] checking only csums items (without verifying data)\n");
9918 } else {
9919 ctx.tp = TASK_CSUMS;
9920 task_start(ctx.info, &ctx.start_time, &ctx.item_count);
9923 ret = check_csums(root);
9924 task_stop(ctx.info);
9926 * Data csum error is not fatal, and it may indicate more serious
9927 * corruption, continue checking.
9929 if (ret)
9930 error("errors found in csum tree");
9931 err |= !!ret;
9933 /* For low memory mode, check_fs_roots_v2 handles root refs */
9934 if (check_mode != CHECK_MODE_LOWMEM) {
9935 if (!ctx.progress_enabled) {
9936 fprintf(stderr, "[6/7] checking root refs\n");
9937 } else {
9938 ctx.tp = TASK_ROOT_REFS;
9939 task_start(ctx.info, &ctx.start_time, &ctx.item_count);
9942 ret = check_root_refs(root, &root_cache);
9943 task_stop(ctx.info);
9944 err |= !!ret;
9945 if (ret) {
9946 error("errors found in root refs");
9947 goto out;
9949 } else {
9950 fprintf(stderr,
9951 "[6/7] checking root refs done with fs roots in lowmem mode, skipping\n");
9954 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9955 struct extent_buffer *eb;
9957 eb = list_first_entry(&root->fs_info->recow_ebs,
9958 struct extent_buffer, recow);
9959 list_del_init(&eb->recow);
9960 ret = recow_extent_buffer(root, eb);
9961 err |= !!ret;
9962 if (ret) {
9963 error("fails to fix transid errors");
9964 break;
9968 while (!list_empty(&delete_items)) {
9969 struct bad_item *bad;
9971 bad = list_first_entry(&delete_items, struct bad_item, list);
9972 list_del_init(&bad->list);
9973 if (repair) {
9974 ret = delete_bad_item(root, bad);
9975 err |= !!ret;
9977 free(bad);
9980 if (info->quota_enabled) {
9981 qgroup_set_item_count_ptr(&ctx.item_count);
9982 if (!ctx.progress_enabled) {
9983 fprintf(stderr, "[7/7] checking quota groups\n");
9984 } else {
9985 ctx.tp = TASK_QGROUPS;
9986 task_start(ctx.info, &ctx.start_time, &ctx.item_count);
9988 ret = qgroup_verify_all(info);
9989 task_stop(ctx.info);
9990 err |= !!ret;
9991 if (ret) {
9992 error("failed to check quota groups");
9993 goto out;
9995 qgroup_report_ret = report_qgroups(0);
9996 ret = repair_qgroups(info, &qgroups_repaired);
9997 if (ret) {
9998 error("failed to repair quota groups");
9999 goto out;
10001 if (qgroup_report_ret && (!qgroups_repaired || ret))
10002 err |= qgroup_report_ret;
10003 ret = 0;
10004 } else {
10005 fprintf(stderr,
10006 "[7/7] checking quota groups skipped (not enabled on this FS)\n");
10009 if (!list_empty(&root->fs_info->recow_ebs)) {
10010 error("transid errors in file system");
10011 ret = 1;
10012 err |= !!ret;
10014 out:
10015 printf("found %llu bytes used, ",
10016 (unsigned long long)bytes_used);
10017 if (err)
10018 printf("error(s) found\n");
10019 else
10020 printf("no error found\n");
10021 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
10022 printf("total tree bytes: %llu\n",
10023 (unsigned long long)total_btree_bytes);
10024 printf("total fs tree bytes: %llu\n",
10025 (unsigned long long)total_fs_tree_bytes);
10026 printf("total extent tree bytes: %llu\n",
10027 (unsigned long long)total_extent_tree_bytes);
10028 printf("btree space waste bytes: %llu\n",
10029 (unsigned long long)btree_space_waste);
10030 printf("file data blocks allocated: %llu\n referenced %llu\n",
10031 (unsigned long long)data_bytes_allocated,
10032 (unsigned long long)data_bytes_referenced);
10034 free_qgroup_counts();
10035 free_root_recs_tree(&root_cache);
10036 close_out:
10037 close_ctree(root);
10038 err_out:
10039 if (ctx.progress_enabled)
10040 task_deinit(ctx.info);
10042 return err;