btrfs-progs: use on-stack buffer for btrfs_scan_one_device
[btrfs-progs-unstable/devel.git] / cmds-check.c
blob6a0b50a3c248c6cad2da251bcf60f18878cf634f
1 /*
2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "btrfsck.h"
39 #include "qgroup-verify.h"
40 #include "rbtree-utils.h"
41 #include "backref.h"
42 #include "ulist.h"
44 enum task_position {
45 TASK_EXTENTS,
46 TASK_FREE_SPACE,
47 TASK_FS_ROOTS,
48 TASK_NOTHING, /* have to be the last element */
51 struct task_ctx {
52 int progress_enabled;
53 enum task_position tp;
55 struct task_info *info;
58 static u64 bytes_used = 0;
59 static u64 total_csum_bytes = 0;
60 static u64 total_btree_bytes = 0;
61 static u64 total_fs_tree_bytes = 0;
62 static u64 total_extent_tree_bytes = 0;
63 static u64 btree_space_waste = 0;
64 static u64 data_bytes_allocated = 0;
65 static u64 data_bytes_referenced = 0;
66 static int found_old_backref = 0;
67 static LIST_HEAD(duplicate_extents);
68 static LIST_HEAD(delete_items);
69 static int repair = 0;
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
76 static void *print_status_check(void *p)
78 struct task_ctx *priv = p;
79 const char work_indicator[] = { '.', 'o', 'O', 'o' };
80 uint32_t count = 0;
81 static char *task_position_string[] = {
82 "checking extents",
83 "checking free space cache",
84 "checking fs roots",
87 task_period_start(priv->info, 1000 /* 1s */);
89 if (priv->tp == TASK_NOTHING)
90 return NULL;
92 while (1) {
93 printf("%s [%c]\r", task_position_string[priv->tp],
94 work_indicator[count % 4]);
95 count++;
96 fflush(stdout);
97 task_period_wait(priv->info);
99 return NULL;
102 static int print_status_return(void *p)
104 printf("\n");
105 fflush(stdout);
107 return 0;
110 struct extent_backref {
111 struct list_head list;
112 unsigned int is_data:1;
113 unsigned int found_extent_tree:1;
114 unsigned int full_backref:1;
115 unsigned int found_ref:1;
116 unsigned int broken:1;
119 struct data_backref {
120 struct extent_backref node;
121 union {
122 u64 parent;
123 u64 root;
125 u64 owner;
126 u64 offset;
127 u64 disk_bytenr;
128 u64 bytes;
129 u64 ram_bytes;
130 u32 num_refs;
131 u32 found_ref;
135 * Much like data_backref, just removed the undetermined members
136 * and change it to use list_head.
137 * During extent scan, it is stored in root->orphan_data_extent.
138 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
140 struct orphan_data_extent {
141 struct list_head list;
142 u64 root;
143 u64 objectid;
144 u64 offset;
145 u64 disk_bytenr;
146 u64 disk_len;
149 struct tree_backref {
150 struct extent_backref node;
151 union {
152 u64 parent;
153 u64 root;
157 struct extent_record {
158 struct list_head backrefs;
159 struct list_head dups;
160 struct list_head list;
161 struct cache_extent cache;
162 struct btrfs_disk_key parent_key;
163 u64 start;
164 u64 max_size;
165 u64 nr;
166 u64 refs;
167 u64 extent_item_refs;
168 u64 generation;
169 u64 parent_generation;
170 u64 info_objectid;
171 u32 num_duplicates;
172 u8 info_level;
173 int flag_block_full_backref;
174 unsigned int found_rec:1;
175 unsigned int content_checked:1;
176 unsigned int owner_ref_checked:1;
177 unsigned int is_root:1;
178 unsigned int metadata:1;
179 unsigned int bad_full_backref:1;
180 unsigned int crossing_stripes:1;
181 unsigned int wrong_chunk_type:1;
184 struct inode_backref {
185 struct list_head list;
186 unsigned int found_dir_item:1;
187 unsigned int found_dir_index:1;
188 unsigned int found_inode_ref:1;
189 unsigned int filetype:8;
190 int errors;
191 unsigned int ref_type;
192 u64 dir;
193 u64 index;
194 u16 namelen;
195 char name[0];
198 struct root_item_record {
199 struct list_head list;
200 u64 objectid;
201 u64 bytenr;
202 u64 last_snapshot;
203 u8 level;
204 u8 drop_level;
205 int level_size;
206 struct btrfs_key drop_key;
209 #define REF_ERR_NO_DIR_ITEM (1 << 0)
210 #define REF_ERR_NO_DIR_INDEX (1 << 1)
211 #define REF_ERR_NO_INODE_REF (1 << 2)
212 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
213 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
214 #define REF_ERR_DUP_INODE_REF (1 << 5)
215 #define REF_ERR_INDEX_UNMATCH (1 << 6)
216 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
217 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
218 #define REF_ERR_NO_ROOT_REF (1 << 9)
219 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
220 #define REF_ERR_DUP_ROOT_REF (1 << 11)
221 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
223 struct file_extent_hole {
224 struct rb_node node;
225 u64 start;
226 u64 len;
229 /* Compatible function to allow reuse of old codes */
230 static u64 first_extent_gap(struct rb_root *holes)
232 struct file_extent_hole *hole;
234 if (RB_EMPTY_ROOT(holes))
235 return (u64)-1;
237 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
238 return hole->start;
241 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
243 struct file_extent_hole *hole1;
244 struct file_extent_hole *hole2;
246 hole1 = rb_entry(node1, struct file_extent_hole, node);
247 hole2 = rb_entry(node2, struct file_extent_hole, node);
249 if (hole1->start > hole2->start)
250 return -1;
251 if (hole1->start < hole2->start)
252 return 1;
253 /* Now hole1->start == hole2->start */
254 if (hole1->len >= hole2->len)
256 * Hole 1 will be merge center
257 * Same hole will be merged later
259 return -1;
260 /* Hole 2 will be merge center */
261 return 1;
265 * Add a hole to the record
267 * This will do hole merge for copy_file_extent_holes(),
268 * which will ensure there won't be continuous holes.
270 static int add_file_extent_hole(struct rb_root *holes,
271 u64 start, u64 len)
273 struct file_extent_hole *hole;
274 struct file_extent_hole *prev = NULL;
275 struct file_extent_hole *next = NULL;
277 hole = malloc(sizeof(*hole));
278 if (!hole)
279 return -ENOMEM;
280 hole->start = start;
281 hole->len = len;
282 /* Since compare will not return 0, no -EEXIST will happen */
283 rb_insert(holes, &hole->node, compare_hole);
285 /* simple merge with previous hole */
286 if (rb_prev(&hole->node))
287 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
288 node);
289 if (prev && prev->start + prev->len >= hole->start) {
290 hole->len = hole->start + hole->len - prev->start;
291 hole->start = prev->start;
292 rb_erase(&prev->node, holes);
293 free(prev);
294 prev = NULL;
297 /* iterate merge with next holes */
298 while (1) {
299 if (!rb_next(&hole->node))
300 break;
301 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
302 node);
303 if (hole->start + hole->len >= next->start) {
304 if (hole->start + hole->len <= next->start + next->len)
305 hole->len = next->start + next->len -
306 hole->start;
307 rb_erase(&next->node, holes);
308 free(next);
309 next = NULL;
310 } else
311 break;
313 return 0;
316 static int compare_hole_range(struct rb_node *node, void *data)
318 struct file_extent_hole *hole;
319 u64 start;
321 hole = (struct file_extent_hole *)data;
322 start = hole->start;
324 hole = rb_entry(node, struct file_extent_hole, node);
325 if (start < hole->start)
326 return -1;
327 if (start >= hole->start && start < hole->start + hole->len)
328 return 0;
329 return 1;
333 * Delete a hole in the record
335 * This will do the hole split and is much restrict than add.
337 static int del_file_extent_hole(struct rb_root *holes,
338 u64 start, u64 len)
340 struct file_extent_hole *hole;
341 struct file_extent_hole tmp;
342 u64 prev_start = 0;
343 u64 prev_len = 0;
344 u64 next_start = 0;
345 u64 next_len = 0;
346 struct rb_node *node;
347 int have_prev = 0;
348 int have_next = 0;
349 int ret = 0;
351 tmp.start = start;
352 tmp.len = len;
353 node = rb_search(holes, &tmp, compare_hole_range, NULL);
354 if (!node)
355 return -EEXIST;
356 hole = rb_entry(node, struct file_extent_hole, node);
357 if (start + len > hole->start + hole->len)
358 return -EEXIST;
361 * Now there will be no overflap, delete the hole and re-add the
362 * split(s) if they exists.
364 if (start > hole->start) {
365 prev_start = hole->start;
366 prev_len = start - hole->start;
367 have_prev = 1;
369 if (hole->start + hole->len > start + len) {
370 next_start = start + len;
371 next_len = hole->start + hole->len - start - len;
372 have_next = 1;
374 rb_erase(node, holes);
375 free(hole);
376 if (have_prev) {
377 ret = add_file_extent_hole(holes, prev_start, prev_len);
378 if (ret < 0)
379 return ret;
381 if (have_next) {
382 ret = add_file_extent_hole(holes, next_start, next_len);
383 if (ret < 0)
384 return ret;
386 return 0;
389 static int copy_file_extent_holes(struct rb_root *dst,
390 struct rb_root *src)
392 struct file_extent_hole *hole;
393 struct rb_node *node;
394 int ret = 0;
396 node = rb_first(src);
397 while (node) {
398 hole = rb_entry(node, struct file_extent_hole, node);
399 ret = add_file_extent_hole(dst, hole->start, hole->len);
400 if (ret)
401 break;
402 node = rb_next(node);
404 return ret;
407 static void free_file_extent_holes(struct rb_root *holes)
409 struct rb_node *node;
410 struct file_extent_hole *hole;
412 node = rb_first(holes);
413 while (node) {
414 hole = rb_entry(node, struct file_extent_hole, node);
415 rb_erase(node, holes);
416 free(hole);
417 node = rb_first(holes);
421 struct inode_record {
422 struct list_head backrefs;
423 unsigned int checked:1;
424 unsigned int merging:1;
425 unsigned int found_inode_item:1;
426 unsigned int found_dir_item:1;
427 unsigned int found_file_extent:1;
428 unsigned int found_csum_item:1;
429 unsigned int some_csum_missing:1;
430 unsigned int nodatasum:1;
431 int errors;
433 u64 ino;
434 u32 nlink;
435 u32 imode;
436 u64 isize;
437 u64 nbytes;
439 u32 found_link;
440 u64 found_size;
441 u64 extent_start;
442 u64 extent_end;
443 struct rb_root holes;
444 struct list_head orphan_extents;
446 u32 refs;
449 #define I_ERR_NO_INODE_ITEM (1 << 0)
450 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
451 #define I_ERR_DUP_INODE_ITEM (1 << 2)
452 #define I_ERR_DUP_DIR_INDEX (1 << 3)
453 #define I_ERR_ODD_DIR_ITEM (1 << 4)
454 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
455 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
456 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
457 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
458 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
459 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
460 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
461 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
462 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
463 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
465 struct root_backref {
466 struct list_head list;
467 unsigned int found_dir_item:1;
468 unsigned int found_dir_index:1;
469 unsigned int found_back_ref:1;
470 unsigned int found_forward_ref:1;
471 unsigned int reachable:1;
472 int errors;
473 u64 ref_root;
474 u64 dir;
475 u64 index;
476 u16 namelen;
477 char name[0];
480 struct root_record {
481 struct list_head backrefs;
482 struct cache_extent cache;
483 unsigned int found_root_item:1;
484 u64 objectid;
485 u32 found_ref;
488 struct ptr_node {
489 struct cache_extent cache;
490 void *data;
493 struct shared_node {
494 struct cache_extent cache;
495 struct cache_tree root_cache;
496 struct cache_tree inode_cache;
497 struct inode_record *current;
498 u32 refs;
501 struct block_info {
502 u64 start;
503 u32 size;
506 struct walk_control {
507 struct cache_tree shared;
508 struct shared_node *nodes[BTRFS_MAX_LEVEL];
509 int active_node;
510 int root_level;
513 struct bad_item {
514 struct btrfs_key key;
515 u64 root_id;
516 struct list_head list;
519 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
521 static void record_root_in_trans(struct btrfs_trans_handle *trans,
522 struct btrfs_root *root)
524 if (root->last_trans != trans->transid) {
525 root->track_dirty = 1;
526 root->last_trans = trans->transid;
527 root->commit_root = root->node;
528 extent_buffer_get(root->node);
532 static u8 imode_to_type(u32 imode)
534 #define S_SHIFT 12
535 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
536 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
537 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
538 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
539 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
540 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
541 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
542 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
545 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
546 #undef S_SHIFT
549 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
551 struct device_record *rec1;
552 struct device_record *rec2;
554 rec1 = rb_entry(node1, struct device_record, node);
555 rec2 = rb_entry(node2, struct device_record, node);
556 if (rec1->devid > rec2->devid)
557 return -1;
558 else if (rec1->devid < rec2->devid)
559 return 1;
560 else
561 return 0;
564 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
566 struct inode_record *rec;
567 struct inode_backref *backref;
568 struct inode_backref *orig;
569 struct orphan_data_extent *src_orphan;
570 struct orphan_data_extent *dst_orphan;
571 size_t size;
572 int ret;
574 rec = malloc(sizeof(*rec));
575 memcpy(rec, orig_rec, sizeof(*rec));
576 rec->refs = 1;
577 INIT_LIST_HEAD(&rec->backrefs);
578 INIT_LIST_HEAD(&rec->orphan_extents);
579 rec->holes = RB_ROOT;
581 list_for_each_entry(orig, &orig_rec->backrefs, list) {
582 size = sizeof(*orig) + orig->namelen + 1;
583 backref = malloc(size);
584 memcpy(backref, orig, size);
585 list_add_tail(&backref->list, &rec->backrefs);
587 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
588 dst_orphan = malloc(sizeof(*dst_orphan));
589 /* TODO: Fix all the HELL of un-catched -ENOMEM case */
590 BUG_ON(!dst_orphan);
591 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
592 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
594 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
595 BUG_ON(ret < 0);
597 return rec;
600 static void print_orphan_data_extents(struct list_head *orphan_extents,
601 u64 objectid)
603 struct orphan_data_extent *orphan;
605 if (list_empty(orphan_extents))
606 return;
607 printf("The following data extent is lost in tree %llu:\n",
608 objectid);
609 list_for_each_entry(orphan, orphan_extents, list) {
610 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
611 orphan->objectid, orphan->offset, orphan->disk_bytenr,
612 orphan->disk_len);
616 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
618 u64 root_objectid = root->root_key.objectid;
619 int errors = rec->errors;
621 if (!errors)
622 return;
623 /* reloc root errors, we print its corresponding fs root objectid*/
624 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
625 root_objectid = root->root_key.offset;
626 fprintf(stderr, "reloc");
628 fprintf(stderr, "root %llu inode %llu errors %x",
629 (unsigned long long) root_objectid,
630 (unsigned long long) rec->ino, rec->errors);
632 if (errors & I_ERR_NO_INODE_ITEM)
633 fprintf(stderr, ", no inode item");
634 if (errors & I_ERR_NO_ORPHAN_ITEM)
635 fprintf(stderr, ", no orphan item");
636 if (errors & I_ERR_DUP_INODE_ITEM)
637 fprintf(stderr, ", dup inode item");
638 if (errors & I_ERR_DUP_DIR_INDEX)
639 fprintf(stderr, ", dup dir index");
640 if (errors & I_ERR_ODD_DIR_ITEM)
641 fprintf(stderr, ", odd dir item");
642 if (errors & I_ERR_ODD_FILE_EXTENT)
643 fprintf(stderr, ", odd file extent");
644 if (errors & I_ERR_BAD_FILE_EXTENT)
645 fprintf(stderr, ", bad file extent");
646 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
647 fprintf(stderr, ", file extent overlap");
648 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
649 fprintf(stderr, ", file extent discount");
650 if (errors & I_ERR_DIR_ISIZE_WRONG)
651 fprintf(stderr, ", dir isize wrong");
652 if (errors & I_ERR_FILE_NBYTES_WRONG)
653 fprintf(stderr, ", nbytes wrong");
654 if (errors & I_ERR_ODD_CSUM_ITEM)
655 fprintf(stderr, ", odd csum item");
656 if (errors & I_ERR_SOME_CSUM_MISSING)
657 fprintf(stderr, ", some csum missing");
658 if (errors & I_ERR_LINK_COUNT_WRONG)
659 fprintf(stderr, ", link count wrong");
660 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
661 fprintf(stderr, ", orphan file extent");
662 fprintf(stderr, "\n");
663 /* Print the orphan extents if needed */
664 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
665 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
667 /* Print the holes if needed */
668 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
669 struct file_extent_hole *hole;
670 struct rb_node *node;
671 int found = 0;
673 node = rb_first(&rec->holes);
674 fprintf(stderr, "Found file extent holes:\n");
675 while (node) {
676 found = 1;
677 hole = rb_entry(node, struct file_extent_hole, node);
678 fprintf(stderr, "\tstart: %llu, len: %llu\n",
679 hole->start, hole->len);
680 node = rb_next(node);
682 if (!found)
683 fprintf(stderr, "\tstart: 0, len: %llu\n",
684 round_up(rec->isize, root->sectorsize));
688 static void print_ref_error(int errors)
690 if (errors & REF_ERR_NO_DIR_ITEM)
691 fprintf(stderr, ", no dir item");
692 if (errors & REF_ERR_NO_DIR_INDEX)
693 fprintf(stderr, ", no dir index");
694 if (errors & REF_ERR_NO_INODE_REF)
695 fprintf(stderr, ", no inode ref");
696 if (errors & REF_ERR_DUP_DIR_ITEM)
697 fprintf(stderr, ", dup dir item");
698 if (errors & REF_ERR_DUP_DIR_INDEX)
699 fprintf(stderr, ", dup dir index");
700 if (errors & REF_ERR_DUP_INODE_REF)
701 fprintf(stderr, ", dup inode ref");
702 if (errors & REF_ERR_INDEX_UNMATCH)
703 fprintf(stderr, ", index unmatch");
704 if (errors & REF_ERR_FILETYPE_UNMATCH)
705 fprintf(stderr, ", filetype unmatch");
706 if (errors & REF_ERR_NAME_TOO_LONG)
707 fprintf(stderr, ", name too long");
708 if (errors & REF_ERR_NO_ROOT_REF)
709 fprintf(stderr, ", no root ref");
710 if (errors & REF_ERR_NO_ROOT_BACKREF)
711 fprintf(stderr, ", no root backref");
712 if (errors & REF_ERR_DUP_ROOT_REF)
713 fprintf(stderr, ", dup root ref");
714 if (errors & REF_ERR_DUP_ROOT_BACKREF)
715 fprintf(stderr, ", dup root backref");
716 fprintf(stderr, "\n");
719 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
720 u64 ino, int mod)
722 struct ptr_node *node;
723 struct cache_extent *cache;
724 struct inode_record *rec = NULL;
725 int ret;
727 cache = lookup_cache_extent(inode_cache, ino, 1);
728 if (cache) {
729 node = container_of(cache, struct ptr_node, cache);
730 rec = node->data;
731 if (mod && rec->refs > 1) {
732 node->data = clone_inode_rec(rec);
733 rec->refs--;
734 rec = node->data;
736 } else if (mod) {
737 rec = calloc(1, sizeof(*rec));
738 rec->ino = ino;
739 rec->extent_start = (u64)-1;
740 rec->refs = 1;
741 INIT_LIST_HEAD(&rec->backrefs);
742 INIT_LIST_HEAD(&rec->orphan_extents);
743 rec->holes = RB_ROOT;
745 node = malloc(sizeof(*node));
746 node->cache.start = ino;
747 node->cache.size = 1;
748 node->data = rec;
750 if (ino == BTRFS_FREE_INO_OBJECTID)
751 rec->found_link = 1;
753 ret = insert_cache_extent(inode_cache, &node->cache);
754 BUG_ON(ret);
756 return rec;
759 static void free_orphan_data_extents(struct list_head *orphan_extents)
761 struct orphan_data_extent *orphan;
763 while (!list_empty(orphan_extents)) {
764 orphan = list_entry(orphan_extents->next,
765 struct orphan_data_extent, list);
766 list_del(&orphan->list);
767 free(orphan);
771 static void free_inode_rec(struct inode_record *rec)
773 struct inode_backref *backref;
775 if (--rec->refs > 0)
776 return;
778 while (!list_empty(&rec->backrefs)) {
779 backref = list_entry(rec->backrefs.next,
780 struct inode_backref, list);
781 list_del(&backref->list);
782 free(backref);
784 free_orphan_data_extents(&rec->orphan_extents);
785 free_file_extent_holes(&rec->holes);
786 free(rec);
789 static int can_free_inode_rec(struct inode_record *rec)
791 if (!rec->errors && rec->checked && rec->found_inode_item &&
792 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
793 return 1;
794 return 0;
797 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
798 struct inode_record *rec)
800 struct cache_extent *cache;
801 struct inode_backref *tmp, *backref;
802 struct ptr_node *node;
803 unsigned char filetype;
805 if (!rec->found_inode_item)
806 return;
808 filetype = imode_to_type(rec->imode);
809 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
810 if (backref->found_dir_item && backref->found_dir_index) {
811 if (backref->filetype != filetype)
812 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
813 if (!backref->errors && backref->found_inode_ref) {
814 list_del(&backref->list);
815 free(backref);
820 if (!rec->checked || rec->merging)
821 return;
823 if (S_ISDIR(rec->imode)) {
824 if (rec->found_size != rec->isize)
825 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
826 if (rec->found_file_extent)
827 rec->errors |= I_ERR_ODD_FILE_EXTENT;
828 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
829 if (rec->found_dir_item)
830 rec->errors |= I_ERR_ODD_DIR_ITEM;
831 if (rec->found_size != rec->nbytes)
832 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
833 if (rec->nlink > 0 && !no_holes &&
834 (rec->extent_end < rec->isize ||
835 first_extent_gap(&rec->holes) < rec->isize))
836 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
839 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
840 if (rec->found_csum_item && rec->nodatasum)
841 rec->errors |= I_ERR_ODD_CSUM_ITEM;
842 if (rec->some_csum_missing && !rec->nodatasum)
843 rec->errors |= I_ERR_SOME_CSUM_MISSING;
846 BUG_ON(rec->refs != 1);
847 if (can_free_inode_rec(rec)) {
848 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
849 node = container_of(cache, struct ptr_node, cache);
850 BUG_ON(node->data != rec);
851 remove_cache_extent(inode_cache, &node->cache);
852 free(node);
853 free_inode_rec(rec);
857 static int check_orphan_item(struct btrfs_root *root, u64 ino)
859 struct btrfs_path path;
860 struct btrfs_key key;
861 int ret;
863 key.objectid = BTRFS_ORPHAN_OBJECTID;
864 key.type = BTRFS_ORPHAN_ITEM_KEY;
865 key.offset = ino;
867 btrfs_init_path(&path);
868 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
869 btrfs_release_path(&path);
870 if (ret > 0)
871 ret = -ENOENT;
872 return ret;
875 static int process_inode_item(struct extent_buffer *eb,
876 int slot, struct btrfs_key *key,
877 struct shared_node *active_node)
879 struct inode_record *rec;
880 struct btrfs_inode_item *item;
882 rec = active_node->current;
883 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
884 if (rec->found_inode_item) {
885 rec->errors |= I_ERR_DUP_INODE_ITEM;
886 return 1;
888 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
889 rec->nlink = btrfs_inode_nlink(eb, item);
890 rec->isize = btrfs_inode_size(eb, item);
891 rec->nbytes = btrfs_inode_nbytes(eb, item);
892 rec->imode = btrfs_inode_mode(eb, item);
893 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
894 rec->nodatasum = 1;
895 rec->found_inode_item = 1;
896 if (rec->nlink == 0)
897 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
898 maybe_free_inode_rec(&active_node->inode_cache, rec);
899 return 0;
902 static struct inode_backref *get_inode_backref(struct inode_record *rec,
903 const char *name,
904 int namelen, u64 dir)
906 struct inode_backref *backref;
908 list_for_each_entry(backref, &rec->backrefs, list) {
909 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
910 break;
911 if (backref->dir != dir || backref->namelen != namelen)
912 continue;
913 if (memcmp(name, backref->name, namelen))
914 continue;
915 return backref;
918 backref = malloc(sizeof(*backref) + namelen + 1);
919 memset(backref, 0, sizeof(*backref));
920 backref->dir = dir;
921 backref->namelen = namelen;
922 memcpy(backref->name, name, namelen);
923 backref->name[namelen] = '\0';
924 list_add_tail(&backref->list, &rec->backrefs);
925 return backref;
928 static int add_inode_backref(struct cache_tree *inode_cache,
929 u64 ino, u64 dir, u64 index,
930 const char *name, int namelen,
931 int filetype, int itemtype, int errors)
933 struct inode_record *rec;
934 struct inode_backref *backref;
936 rec = get_inode_rec(inode_cache, ino, 1);
937 backref = get_inode_backref(rec, name, namelen, dir);
938 if (errors)
939 backref->errors |= errors;
940 if (itemtype == BTRFS_DIR_INDEX_KEY) {
941 if (backref->found_dir_index)
942 backref->errors |= REF_ERR_DUP_DIR_INDEX;
943 if (backref->found_inode_ref && backref->index != index)
944 backref->errors |= REF_ERR_INDEX_UNMATCH;
945 if (backref->found_dir_item && backref->filetype != filetype)
946 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
948 backref->index = index;
949 backref->filetype = filetype;
950 backref->found_dir_index = 1;
951 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
952 rec->found_link++;
953 if (backref->found_dir_item)
954 backref->errors |= REF_ERR_DUP_DIR_ITEM;
955 if (backref->found_dir_index && backref->filetype != filetype)
956 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
958 backref->filetype = filetype;
959 backref->found_dir_item = 1;
960 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
961 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
962 if (backref->found_inode_ref)
963 backref->errors |= REF_ERR_DUP_INODE_REF;
964 if (backref->found_dir_index && backref->index != index)
965 backref->errors |= REF_ERR_INDEX_UNMATCH;
966 else
967 backref->index = index;
969 backref->ref_type = itemtype;
970 backref->found_inode_ref = 1;
971 } else {
972 BUG_ON(1);
975 maybe_free_inode_rec(inode_cache, rec);
976 return 0;
979 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
980 struct cache_tree *dst_cache)
982 struct inode_backref *backref;
983 u32 dir_count = 0;
984 int ret = 0;
986 dst->merging = 1;
987 list_for_each_entry(backref, &src->backrefs, list) {
988 if (backref->found_dir_index) {
989 add_inode_backref(dst_cache, dst->ino, backref->dir,
990 backref->index, backref->name,
991 backref->namelen, backref->filetype,
992 BTRFS_DIR_INDEX_KEY, backref->errors);
994 if (backref->found_dir_item) {
995 dir_count++;
996 add_inode_backref(dst_cache, dst->ino,
997 backref->dir, 0, backref->name,
998 backref->namelen, backref->filetype,
999 BTRFS_DIR_ITEM_KEY, backref->errors);
1001 if (backref->found_inode_ref) {
1002 add_inode_backref(dst_cache, dst->ino,
1003 backref->dir, backref->index,
1004 backref->name, backref->namelen, 0,
1005 backref->ref_type, backref->errors);
1009 if (src->found_dir_item)
1010 dst->found_dir_item = 1;
1011 if (src->found_file_extent)
1012 dst->found_file_extent = 1;
1013 if (src->found_csum_item)
1014 dst->found_csum_item = 1;
1015 if (src->some_csum_missing)
1016 dst->some_csum_missing = 1;
1017 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1018 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1019 if (ret < 0)
1020 return ret;
1023 BUG_ON(src->found_link < dir_count);
1024 dst->found_link += src->found_link - dir_count;
1025 dst->found_size += src->found_size;
1026 if (src->extent_start != (u64)-1) {
1027 if (dst->extent_start == (u64)-1) {
1028 dst->extent_start = src->extent_start;
1029 dst->extent_end = src->extent_end;
1030 } else {
1031 if (dst->extent_end > src->extent_start)
1032 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1033 else if (dst->extent_end < src->extent_start) {
1034 ret = add_file_extent_hole(&dst->holes,
1035 dst->extent_end,
1036 src->extent_start - dst->extent_end);
1038 if (dst->extent_end < src->extent_end)
1039 dst->extent_end = src->extent_end;
1043 dst->errors |= src->errors;
1044 if (src->found_inode_item) {
1045 if (!dst->found_inode_item) {
1046 dst->nlink = src->nlink;
1047 dst->isize = src->isize;
1048 dst->nbytes = src->nbytes;
1049 dst->imode = src->imode;
1050 dst->nodatasum = src->nodatasum;
1051 dst->found_inode_item = 1;
1052 } else {
1053 dst->errors |= I_ERR_DUP_INODE_ITEM;
1056 dst->merging = 0;
1058 return 0;
1061 static int splice_shared_node(struct shared_node *src_node,
1062 struct shared_node *dst_node)
1064 struct cache_extent *cache;
1065 struct ptr_node *node, *ins;
1066 struct cache_tree *src, *dst;
1067 struct inode_record *rec, *conflict;
1068 u64 current_ino = 0;
1069 int splice = 0;
1070 int ret;
1072 if (--src_node->refs == 0)
1073 splice = 1;
1074 if (src_node->current)
1075 current_ino = src_node->current->ino;
1077 src = &src_node->root_cache;
1078 dst = &dst_node->root_cache;
1079 again:
1080 cache = search_cache_extent(src, 0);
1081 while (cache) {
1082 node = container_of(cache, struct ptr_node, cache);
1083 rec = node->data;
1084 cache = next_cache_extent(cache);
1086 if (splice) {
1087 remove_cache_extent(src, &node->cache);
1088 ins = node;
1089 } else {
1090 ins = malloc(sizeof(*ins));
1091 ins->cache.start = node->cache.start;
1092 ins->cache.size = node->cache.size;
1093 ins->data = rec;
1094 rec->refs++;
1096 ret = insert_cache_extent(dst, &ins->cache);
1097 if (ret == -EEXIST) {
1098 conflict = get_inode_rec(dst, rec->ino, 1);
1099 merge_inode_recs(rec, conflict, dst);
1100 if (rec->checked) {
1101 conflict->checked = 1;
1102 if (dst_node->current == conflict)
1103 dst_node->current = NULL;
1105 maybe_free_inode_rec(dst, conflict);
1106 free_inode_rec(rec);
1107 free(ins);
1108 } else {
1109 BUG_ON(ret);
1113 if (src == &src_node->root_cache) {
1114 src = &src_node->inode_cache;
1115 dst = &dst_node->inode_cache;
1116 goto again;
1119 if (current_ino > 0 && (!dst_node->current ||
1120 current_ino > dst_node->current->ino)) {
1121 if (dst_node->current) {
1122 dst_node->current->checked = 1;
1123 maybe_free_inode_rec(dst, dst_node->current);
1125 dst_node->current = get_inode_rec(dst, current_ino, 1);
1127 return 0;
1130 static void free_inode_ptr(struct cache_extent *cache)
1132 struct ptr_node *node;
1133 struct inode_record *rec;
1135 node = container_of(cache, struct ptr_node, cache);
1136 rec = node->data;
1137 free_inode_rec(rec);
1138 free(node);
1141 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1143 static struct shared_node *find_shared_node(struct cache_tree *shared,
1144 u64 bytenr)
1146 struct cache_extent *cache;
1147 struct shared_node *node;
1149 cache = lookup_cache_extent(shared, bytenr, 1);
1150 if (cache) {
1151 node = container_of(cache, struct shared_node, cache);
1152 return node;
1154 return NULL;
1157 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1159 int ret;
1160 struct shared_node *node;
1162 node = calloc(1, sizeof(*node));
1163 node->cache.start = bytenr;
1164 node->cache.size = 1;
1165 cache_tree_init(&node->root_cache);
1166 cache_tree_init(&node->inode_cache);
1167 node->refs = refs;
1169 ret = insert_cache_extent(shared, &node->cache);
1170 BUG_ON(ret);
1171 return 0;
1174 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1175 struct walk_control *wc, int level)
1177 struct shared_node *node;
1178 struct shared_node *dest;
1180 if (level == wc->active_node)
1181 return 0;
1183 BUG_ON(wc->active_node <= level);
1184 node = find_shared_node(&wc->shared, bytenr);
1185 if (!node) {
1186 add_shared_node(&wc->shared, bytenr, refs);
1187 node = find_shared_node(&wc->shared, bytenr);
1188 wc->nodes[level] = node;
1189 wc->active_node = level;
1190 return 0;
1193 if (wc->root_level == wc->active_node &&
1194 btrfs_root_refs(&root->root_item) == 0) {
1195 if (--node->refs == 0) {
1196 free_inode_recs_tree(&node->root_cache);
1197 free_inode_recs_tree(&node->inode_cache);
1198 remove_cache_extent(&wc->shared, &node->cache);
1199 free(node);
1201 return 1;
1204 dest = wc->nodes[wc->active_node];
1205 splice_shared_node(node, dest);
1206 if (node->refs == 0) {
1207 remove_cache_extent(&wc->shared, &node->cache);
1208 free(node);
1210 return 1;
1213 static int leave_shared_node(struct btrfs_root *root,
1214 struct walk_control *wc, int level)
1216 struct shared_node *node;
1217 struct shared_node *dest;
1218 int i;
1220 if (level == wc->root_level)
1221 return 0;
1223 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1224 if (wc->nodes[i])
1225 break;
1227 BUG_ON(i >= BTRFS_MAX_LEVEL);
1229 node = wc->nodes[wc->active_node];
1230 wc->nodes[wc->active_node] = NULL;
1231 wc->active_node = i;
1233 dest = wc->nodes[wc->active_node];
1234 if (wc->active_node < wc->root_level ||
1235 btrfs_root_refs(&root->root_item) > 0) {
1236 BUG_ON(node->refs <= 1);
1237 splice_shared_node(node, dest);
1238 } else {
1239 BUG_ON(node->refs < 2);
1240 node->refs--;
1242 return 0;
1246 * Returns:
1247 * < 0 - on error
1248 * 1 - if the root with id child_root_id is a child of root parent_root_id
1249 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1250 * has other root(s) as parent(s)
1251 * 2 - if the root child_root_id doesn't have any parent roots
1253 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1254 u64 child_root_id)
1256 struct btrfs_path path;
1257 struct btrfs_key key;
1258 struct extent_buffer *leaf;
1259 int has_parent = 0;
1260 int ret;
1262 btrfs_init_path(&path);
1264 key.objectid = parent_root_id;
1265 key.type = BTRFS_ROOT_REF_KEY;
1266 key.offset = child_root_id;
1267 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1268 0, 0);
1269 if (ret < 0)
1270 return ret;
1271 btrfs_release_path(&path);
1272 if (!ret)
1273 return 1;
1275 key.objectid = child_root_id;
1276 key.type = BTRFS_ROOT_BACKREF_KEY;
1277 key.offset = 0;
1278 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1279 0, 0);
1280 if (ret < 0)
1281 goto out;
1283 while (1) {
1284 leaf = path.nodes[0];
1285 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1286 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1287 if (ret)
1288 break;
1289 leaf = path.nodes[0];
1292 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1293 if (key.objectid != child_root_id ||
1294 key.type != BTRFS_ROOT_BACKREF_KEY)
1295 break;
1297 has_parent = 1;
1299 if (key.offset == parent_root_id) {
1300 btrfs_release_path(&path);
1301 return 1;
1304 path.slots[0]++;
1306 out:
1307 btrfs_release_path(&path);
1308 if (ret < 0)
1309 return ret;
1310 return has_parent ? 0 : 2;
1313 static int process_dir_item(struct btrfs_root *root,
1314 struct extent_buffer *eb,
1315 int slot, struct btrfs_key *key,
1316 struct shared_node *active_node)
1318 u32 total;
1319 u32 cur = 0;
1320 u32 len;
1321 u32 name_len;
1322 u32 data_len;
1323 int error;
1324 int nritems = 0;
1325 int filetype;
1326 struct btrfs_dir_item *di;
1327 struct inode_record *rec;
1328 struct cache_tree *root_cache;
1329 struct cache_tree *inode_cache;
1330 struct btrfs_key location;
1331 char namebuf[BTRFS_NAME_LEN];
1333 root_cache = &active_node->root_cache;
1334 inode_cache = &active_node->inode_cache;
1335 rec = active_node->current;
1336 rec->found_dir_item = 1;
1338 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1339 total = btrfs_item_size_nr(eb, slot);
1340 while (cur < total) {
1341 nritems++;
1342 btrfs_dir_item_key_to_cpu(eb, di, &location);
1343 name_len = btrfs_dir_name_len(eb, di);
1344 data_len = btrfs_dir_data_len(eb, di);
1345 filetype = btrfs_dir_type(eb, di);
1347 rec->found_size += name_len;
1348 if (name_len <= BTRFS_NAME_LEN) {
1349 len = name_len;
1350 error = 0;
1351 } else {
1352 len = BTRFS_NAME_LEN;
1353 error = REF_ERR_NAME_TOO_LONG;
1355 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1357 if (location.type == BTRFS_INODE_ITEM_KEY) {
1358 add_inode_backref(inode_cache, location.objectid,
1359 key->objectid, key->offset, namebuf,
1360 len, filetype, key->type, error);
1361 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1362 add_inode_backref(root_cache, location.objectid,
1363 key->objectid, key->offset,
1364 namebuf, len, filetype,
1365 key->type, error);
1366 } else {
1367 fprintf(stderr, "invalid location in dir item %u\n",
1368 location.type);
1369 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1370 key->objectid, key->offset, namebuf,
1371 len, filetype, key->type, error);
1374 len = sizeof(*di) + name_len + data_len;
1375 di = (struct btrfs_dir_item *)((char *)di + len);
1376 cur += len;
1378 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1379 rec->errors |= I_ERR_DUP_DIR_INDEX;
1381 return 0;
1384 static int process_inode_ref(struct extent_buffer *eb,
1385 int slot, struct btrfs_key *key,
1386 struct shared_node *active_node)
1388 u32 total;
1389 u32 cur = 0;
1390 u32 len;
1391 u32 name_len;
1392 u64 index;
1393 int error;
1394 struct cache_tree *inode_cache;
1395 struct btrfs_inode_ref *ref;
1396 char namebuf[BTRFS_NAME_LEN];
1398 inode_cache = &active_node->inode_cache;
1400 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1401 total = btrfs_item_size_nr(eb, slot);
1402 while (cur < total) {
1403 name_len = btrfs_inode_ref_name_len(eb, ref);
1404 index = btrfs_inode_ref_index(eb, ref);
1405 if (name_len <= BTRFS_NAME_LEN) {
1406 len = name_len;
1407 error = 0;
1408 } else {
1409 len = BTRFS_NAME_LEN;
1410 error = REF_ERR_NAME_TOO_LONG;
1412 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1413 add_inode_backref(inode_cache, key->objectid, key->offset,
1414 index, namebuf, len, 0, key->type, error);
1416 len = sizeof(*ref) + name_len;
1417 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1418 cur += len;
1420 return 0;
1423 static int process_inode_extref(struct extent_buffer *eb,
1424 int slot, struct btrfs_key *key,
1425 struct shared_node *active_node)
1427 u32 total;
1428 u32 cur = 0;
1429 u32 len;
1430 u32 name_len;
1431 u64 index;
1432 u64 parent;
1433 int error;
1434 struct cache_tree *inode_cache;
1435 struct btrfs_inode_extref *extref;
1436 char namebuf[BTRFS_NAME_LEN];
1438 inode_cache = &active_node->inode_cache;
1440 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1441 total = btrfs_item_size_nr(eb, slot);
1442 while (cur < total) {
1443 name_len = btrfs_inode_extref_name_len(eb, extref);
1444 index = btrfs_inode_extref_index(eb, extref);
1445 parent = btrfs_inode_extref_parent(eb, extref);
1446 if (name_len <= BTRFS_NAME_LEN) {
1447 len = name_len;
1448 error = 0;
1449 } else {
1450 len = BTRFS_NAME_LEN;
1451 error = REF_ERR_NAME_TOO_LONG;
1453 read_extent_buffer(eb, namebuf,
1454 (unsigned long)(extref + 1), len);
1455 add_inode_backref(inode_cache, key->objectid, parent,
1456 index, namebuf, len, 0, key->type, error);
1458 len = sizeof(*extref) + name_len;
1459 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1460 cur += len;
1462 return 0;
1466 static int count_csum_range(struct btrfs_root *root, u64 start,
1467 u64 len, u64 *found)
1469 struct btrfs_key key;
1470 struct btrfs_path path;
1471 struct extent_buffer *leaf;
1472 int ret;
1473 size_t size;
1474 *found = 0;
1475 u64 csum_end;
1476 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1478 btrfs_init_path(&path);
1480 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1481 key.offset = start;
1482 key.type = BTRFS_EXTENT_CSUM_KEY;
1484 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1485 &key, &path, 0, 0);
1486 if (ret < 0)
1487 goto out;
1488 if (ret > 0 && path.slots[0] > 0) {
1489 leaf = path.nodes[0];
1490 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1491 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1492 key.type == BTRFS_EXTENT_CSUM_KEY)
1493 path.slots[0]--;
1496 while (len > 0) {
1497 leaf = path.nodes[0];
1498 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1499 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1500 if (ret > 0)
1501 break;
1502 else if (ret < 0)
1503 goto out;
1504 leaf = path.nodes[0];
1507 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1508 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1509 key.type != BTRFS_EXTENT_CSUM_KEY)
1510 break;
1512 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1513 if (key.offset >= start + len)
1514 break;
1516 if (key.offset > start)
1517 start = key.offset;
1519 size = btrfs_item_size_nr(leaf, path.slots[0]);
1520 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1521 if (csum_end > start) {
1522 size = min(csum_end - start, len);
1523 len -= size;
1524 start += size;
1525 *found += size;
1528 path.slots[0]++;
1530 out:
1531 btrfs_release_path(&path);
1532 if (ret < 0)
1533 return ret;
1534 return 0;
1537 static int process_file_extent(struct btrfs_root *root,
1538 struct extent_buffer *eb,
1539 int slot, struct btrfs_key *key,
1540 struct shared_node *active_node)
1542 struct inode_record *rec;
1543 struct btrfs_file_extent_item *fi;
1544 u64 num_bytes = 0;
1545 u64 disk_bytenr = 0;
1546 u64 extent_offset = 0;
1547 u64 mask = root->sectorsize - 1;
1548 int extent_type;
1549 int ret;
1551 rec = active_node->current;
1552 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1553 rec->found_file_extent = 1;
1555 if (rec->extent_start == (u64)-1) {
1556 rec->extent_start = key->offset;
1557 rec->extent_end = key->offset;
1560 if (rec->extent_end > key->offset)
1561 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1562 else if (rec->extent_end < key->offset) {
1563 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1564 key->offset - rec->extent_end);
1565 if (ret < 0)
1566 return ret;
1569 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1570 extent_type = btrfs_file_extent_type(eb, fi);
1572 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1573 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1574 if (num_bytes == 0)
1575 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1576 rec->found_size += num_bytes;
1577 num_bytes = (num_bytes + mask) & ~mask;
1578 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1579 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1580 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1581 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1582 extent_offset = btrfs_file_extent_offset(eb, fi);
1583 if (num_bytes == 0 || (num_bytes & mask))
1584 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1585 if (num_bytes + extent_offset >
1586 btrfs_file_extent_ram_bytes(eb, fi))
1587 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1588 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1589 (btrfs_file_extent_compression(eb, fi) ||
1590 btrfs_file_extent_encryption(eb, fi) ||
1591 btrfs_file_extent_other_encoding(eb, fi)))
1592 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1593 if (disk_bytenr > 0)
1594 rec->found_size += num_bytes;
1595 } else {
1596 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1598 rec->extent_end = key->offset + num_bytes;
1601 * The data reloc tree will copy full extents into its inode and then
1602 * copy the corresponding csums. Because the extent it copied could be
1603 * a preallocated extent that hasn't been written to yet there may be no
1604 * csums to copy, ergo we won't have csums for our file extent. This is
1605 * ok so just don't bother checking csums if the inode belongs to the
1606 * data reloc tree.
1608 if (disk_bytenr > 0 &&
1609 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1610 u64 found;
1611 if (btrfs_file_extent_compression(eb, fi))
1612 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1613 else
1614 disk_bytenr += extent_offset;
1616 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1617 if (ret < 0)
1618 return ret;
1619 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1620 if (found > 0)
1621 rec->found_csum_item = 1;
1622 if (found < num_bytes)
1623 rec->some_csum_missing = 1;
1624 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1625 if (found > 0)
1626 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1629 return 0;
1632 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1633 struct walk_control *wc)
1635 struct btrfs_key key;
1636 u32 nritems;
1637 int i;
1638 int ret = 0;
1639 struct cache_tree *inode_cache;
1640 struct shared_node *active_node;
1642 if (wc->root_level == wc->active_node &&
1643 btrfs_root_refs(&root->root_item) == 0)
1644 return 0;
1646 active_node = wc->nodes[wc->active_node];
1647 inode_cache = &active_node->inode_cache;
1648 nritems = btrfs_header_nritems(eb);
1649 for (i = 0; i < nritems; i++) {
1650 btrfs_item_key_to_cpu(eb, &key, i);
1652 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1653 continue;
1654 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1655 continue;
1657 if (active_node->current == NULL ||
1658 active_node->current->ino < key.objectid) {
1659 if (active_node->current) {
1660 active_node->current->checked = 1;
1661 maybe_free_inode_rec(inode_cache,
1662 active_node->current);
1664 active_node->current = get_inode_rec(inode_cache,
1665 key.objectid, 1);
1667 switch (key.type) {
1668 case BTRFS_DIR_ITEM_KEY:
1669 case BTRFS_DIR_INDEX_KEY:
1670 ret = process_dir_item(root, eb, i, &key, active_node);
1671 break;
1672 case BTRFS_INODE_REF_KEY:
1673 ret = process_inode_ref(eb, i, &key, active_node);
1674 break;
1675 case BTRFS_INODE_EXTREF_KEY:
1676 ret = process_inode_extref(eb, i, &key, active_node);
1677 break;
1678 case BTRFS_INODE_ITEM_KEY:
1679 ret = process_inode_item(eb, i, &key, active_node);
1680 break;
1681 case BTRFS_EXTENT_DATA_KEY:
1682 ret = process_file_extent(root, eb, i, &key,
1683 active_node);
1684 break;
1685 default:
1686 break;
1689 return ret;
1692 static void reada_walk_down(struct btrfs_root *root,
1693 struct extent_buffer *node, int slot)
1695 u64 bytenr;
1696 u64 ptr_gen;
1697 u32 nritems;
1698 u32 blocksize;
1699 int i;
1700 int level;
1702 level = btrfs_header_level(node);
1703 if (level != 1)
1704 return;
1706 nritems = btrfs_header_nritems(node);
1707 blocksize = btrfs_level_size(root, level - 1);
1708 for (i = slot; i < nritems; i++) {
1709 bytenr = btrfs_node_blockptr(node, i);
1710 ptr_gen = btrfs_node_ptr_generation(node, i);
1711 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1716 * Check the child node/leaf by the following condition:
1717 * 1. the first item key of the node/leaf should be the same with the one
1718 * in parent.
1719 * 2. block in parent node should match the child node/leaf.
1720 * 3. generation of parent node and child's header should be consistent.
1722 * Or the child node/leaf pointed by the key in parent is not valid.
1724 * We hope to check leaf owner too, but since subvol may share leaves,
1725 * which makes leaf owner check not so strong, key check should be
1726 * sufficient enough for that case.
1728 static int check_child_node(struct btrfs_root *root,
1729 struct extent_buffer *parent, int slot,
1730 struct extent_buffer *child)
1732 struct btrfs_key parent_key;
1733 struct btrfs_key child_key;
1734 int ret = 0;
1736 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1737 if (btrfs_header_level(child) == 0)
1738 btrfs_item_key_to_cpu(child, &child_key, 0);
1739 else
1740 btrfs_node_key_to_cpu(child, &child_key, 0);
1742 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1743 ret = -EINVAL;
1744 fprintf(stderr,
1745 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1746 parent_key.objectid, parent_key.type, parent_key.offset,
1747 child_key.objectid, child_key.type, child_key.offset);
1749 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1750 ret = -EINVAL;
1751 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1752 btrfs_node_blockptr(parent, slot),
1753 btrfs_header_bytenr(child));
1755 if (btrfs_node_ptr_generation(parent, slot) !=
1756 btrfs_header_generation(child)) {
1757 ret = -EINVAL;
1758 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1759 btrfs_header_generation(child),
1760 btrfs_node_ptr_generation(parent, slot));
1762 return ret;
1765 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1766 struct walk_control *wc, int *level)
1768 enum btrfs_tree_block_status status;
1769 u64 bytenr;
1770 u64 ptr_gen;
1771 struct extent_buffer *next;
1772 struct extent_buffer *cur;
1773 u32 blocksize;
1774 int ret, err = 0;
1775 u64 refs;
1777 WARN_ON(*level < 0);
1778 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1779 ret = btrfs_lookup_extent_info(NULL, root,
1780 path->nodes[*level]->start,
1781 *level, 1, &refs, NULL);
1782 if (ret < 0) {
1783 err = ret;
1784 goto out;
1787 if (refs > 1) {
1788 ret = enter_shared_node(root, path->nodes[*level]->start,
1789 refs, wc, *level);
1790 if (ret > 0) {
1791 err = ret;
1792 goto out;
1796 while (*level >= 0) {
1797 WARN_ON(*level < 0);
1798 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1799 cur = path->nodes[*level];
1801 if (btrfs_header_level(cur) != *level)
1802 WARN_ON(1);
1804 if (path->slots[*level] >= btrfs_header_nritems(cur))
1805 break;
1806 if (*level == 0) {
1807 ret = process_one_leaf(root, cur, wc);
1808 if (ret < 0)
1809 err = ret;
1810 break;
1812 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1813 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1814 blocksize = btrfs_level_size(root, *level - 1);
1815 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1816 1, &refs, NULL);
1817 if (ret < 0)
1818 refs = 0;
1820 if (refs > 1) {
1821 ret = enter_shared_node(root, bytenr, refs,
1822 wc, *level - 1);
1823 if (ret > 0) {
1824 path->slots[*level]++;
1825 continue;
1829 next = btrfs_find_tree_block(root, bytenr, blocksize);
1830 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1831 free_extent_buffer(next);
1832 reada_walk_down(root, cur, path->slots[*level]);
1833 next = read_tree_block(root, bytenr, blocksize,
1834 ptr_gen);
1835 if (!extent_buffer_uptodate(next)) {
1836 struct btrfs_key node_key;
1838 btrfs_node_key_to_cpu(path->nodes[*level],
1839 &node_key,
1840 path->slots[*level]);
1841 btrfs_add_corrupt_extent_record(root->fs_info,
1842 &node_key,
1843 path->nodes[*level]->start,
1844 root->leafsize, *level);
1845 err = -EIO;
1846 goto out;
1850 ret = check_child_node(root, cur, path->slots[*level], next);
1851 if (ret) {
1852 err = ret;
1853 goto out;
1856 if (btrfs_is_leaf(next))
1857 status = btrfs_check_leaf(root, NULL, next);
1858 else
1859 status = btrfs_check_node(root, NULL, next);
1860 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1861 free_extent_buffer(next);
1862 err = -EIO;
1863 goto out;
1866 *level = *level - 1;
1867 free_extent_buffer(path->nodes[*level]);
1868 path->nodes[*level] = next;
1869 path->slots[*level] = 0;
1871 out:
1872 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1873 return err;
1876 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1877 struct walk_control *wc, int *level)
1879 int i;
1880 struct extent_buffer *leaf;
1882 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1883 leaf = path->nodes[i];
1884 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1885 path->slots[i]++;
1886 *level = i;
1887 return 0;
1888 } else {
1889 free_extent_buffer(path->nodes[*level]);
1890 path->nodes[*level] = NULL;
1891 BUG_ON(*level > wc->active_node);
1892 if (*level == wc->active_node)
1893 leave_shared_node(root, wc, *level);
1894 *level = i + 1;
1897 return 1;
1900 static int check_root_dir(struct inode_record *rec)
1902 struct inode_backref *backref;
1903 int ret = -1;
1905 if (!rec->found_inode_item || rec->errors)
1906 goto out;
1907 if (rec->nlink != 1 || rec->found_link != 0)
1908 goto out;
1909 if (list_empty(&rec->backrefs))
1910 goto out;
1911 backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1912 if (!backref->found_inode_ref)
1913 goto out;
1914 if (backref->index != 0 || backref->namelen != 2 ||
1915 memcmp(backref->name, "..", 2))
1916 goto out;
1917 if (backref->found_dir_index || backref->found_dir_item)
1918 goto out;
1919 ret = 0;
1920 out:
1921 return ret;
1924 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1925 struct btrfs_root *root, struct btrfs_path *path,
1926 struct inode_record *rec)
1928 struct btrfs_inode_item *ei;
1929 struct btrfs_key key;
1930 int ret;
1932 key.objectid = rec->ino;
1933 key.type = BTRFS_INODE_ITEM_KEY;
1934 key.offset = (u64)-1;
1936 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1937 if (ret < 0)
1938 goto out;
1939 if (ret) {
1940 if (!path->slots[0]) {
1941 ret = -ENOENT;
1942 goto out;
1944 path->slots[0]--;
1945 ret = 0;
1947 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1948 if (key.objectid != rec->ino) {
1949 ret = -ENOENT;
1950 goto out;
1953 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1954 struct btrfs_inode_item);
1955 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
1956 btrfs_mark_buffer_dirty(path->nodes[0]);
1957 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
1958 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
1959 root->root_key.objectid);
1960 out:
1961 btrfs_release_path(path);
1962 return ret;
1965 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
1966 struct btrfs_root *root,
1967 struct btrfs_path *path,
1968 struct inode_record *rec)
1970 int ret;
1972 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
1973 btrfs_release_path(path);
1974 if (!ret)
1975 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
1976 return ret;
1979 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
1980 struct btrfs_root *root,
1981 struct btrfs_path *path,
1982 struct inode_record *rec)
1984 struct btrfs_inode_item *ei;
1985 struct btrfs_key key;
1986 int ret = 0;
1988 key.objectid = rec->ino;
1989 key.type = BTRFS_INODE_ITEM_KEY;
1990 key.offset = 0;
1992 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1993 if (ret) {
1994 if (ret > 0)
1995 ret = -ENOENT;
1996 goto out;
1999 /* Since ret == 0, no need to check anything */
2000 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2001 struct btrfs_inode_item);
2002 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2003 btrfs_mark_buffer_dirty(path->nodes[0]);
2004 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2005 printf("reset nbytes for ino %llu root %llu\n",
2006 rec->ino, root->root_key.objectid);
2007 out:
2008 btrfs_release_path(path);
2009 return ret;
2012 static int add_missing_dir_index(struct btrfs_root *root,
2013 struct cache_tree *inode_cache,
2014 struct inode_record *rec,
2015 struct inode_backref *backref)
2017 struct btrfs_path *path;
2018 struct btrfs_trans_handle *trans;
2019 struct btrfs_dir_item *dir_item;
2020 struct extent_buffer *leaf;
2021 struct btrfs_key key;
2022 struct btrfs_disk_key disk_key;
2023 struct inode_record *dir_rec;
2024 unsigned long name_ptr;
2025 u32 data_size = sizeof(*dir_item) + backref->namelen;
2026 int ret;
2028 path = btrfs_alloc_path();
2029 if (!path)
2030 return -ENOMEM;
2032 trans = btrfs_start_transaction(root, 1);
2033 if (IS_ERR(trans)) {
2034 btrfs_free_path(path);
2035 return PTR_ERR(trans);
2038 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2039 (unsigned long long)rec->ino);
2040 key.objectid = backref->dir;
2041 key.type = BTRFS_DIR_INDEX_KEY;
2042 key.offset = backref->index;
2044 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2045 BUG_ON(ret);
2047 leaf = path->nodes[0];
2048 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2050 disk_key.objectid = cpu_to_le64(rec->ino);
2051 disk_key.type = BTRFS_INODE_ITEM_KEY;
2052 disk_key.offset = 0;
2054 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2055 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2056 btrfs_set_dir_data_len(leaf, dir_item, 0);
2057 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2058 name_ptr = (unsigned long)(dir_item + 1);
2059 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2060 btrfs_mark_buffer_dirty(leaf);
2061 btrfs_free_path(path);
2062 btrfs_commit_transaction(trans, root);
2064 backref->found_dir_index = 1;
2065 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2066 if (!dir_rec)
2067 return 0;
2068 dir_rec->found_size += backref->namelen;
2069 if (dir_rec->found_size == dir_rec->isize &&
2070 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2071 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2072 if (dir_rec->found_size != dir_rec->isize)
2073 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2075 return 0;
2078 static int delete_dir_index(struct btrfs_root *root,
2079 struct cache_tree *inode_cache,
2080 struct inode_record *rec,
2081 struct inode_backref *backref)
2083 struct btrfs_trans_handle *trans;
2084 struct btrfs_dir_item *di;
2085 struct btrfs_path *path;
2086 int ret = 0;
2088 path = btrfs_alloc_path();
2089 if (!path)
2090 return -ENOMEM;
2092 trans = btrfs_start_transaction(root, 1);
2093 if (IS_ERR(trans)) {
2094 btrfs_free_path(path);
2095 return PTR_ERR(trans);
2099 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2100 (unsigned long long)backref->dir,
2101 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2102 (unsigned long long)root->objectid);
2104 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2105 backref->name, backref->namelen,
2106 backref->index, -1);
2107 if (IS_ERR(di)) {
2108 ret = PTR_ERR(di);
2109 btrfs_free_path(path);
2110 btrfs_commit_transaction(trans, root);
2111 if (ret == -ENOENT)
2112 return 0;
2113 return ret;
2116 if (!di)
2117 ret = btrfs_del_item(trans, root, path);
2118 else
2119 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2120 BUG_ON(ret);
2121 btrfs_free_path(path);
2122 btrfs_commit_transaction(trans, root);
2123 return ret;
2126 static int create_inode_item(struct btrfs_root *root,
2127 struct inode_record *rec,
2128 struct inode_backref *backref, int root_dir)
2130 struct btrfs_trans_handle *trans;
2131 struct btrfs_inode_item inode_item;
2132 time_t now = time(NULL);
2133 int ret;
2135 trans = btrfs_start_transaction(root, 1);
2136 if (IS_ERR(trans)) {
2137 ret = PTR_ERR(trans);
2138 return ret;
2141 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2142 "be incomplete, please check permissions and content after "
2143 "the fsck completes.\n", (unsigned long long)root->objectid,
2144 (unsigned long long)rec->ino);
2146 memset(&inode_item, 0, sizeof(inode_item));
2147 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2148 if (root_dir)
2149 btrfs_set_stack_inode_nlink(&inode_item, 1);
2150 else
2151 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2152 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2153 if (rec->found_dir_item) {
2154 if (rec->found_file_extent)
2155 fprintf(stderr, "root %llu inode %llu has both a dir "
2156 "item and extents, unsure if it is a dir or a "
2157 "regular file so setting it as a directory\n",
2158 (unsigned long long)root->objectid,
2159 (unsigned long long)rec->ino);
2160 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2161 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2162 } else if (!rec->found_dir_item) {
2163 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2164 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2166 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2167 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2168 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2169 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2170 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2171 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2172 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2173 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2175 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2176 BUG_ON(ret);
2177 btrfs_commit_transaction(trans, root);
2178 return 0;
2181 static int repair_inode_backrefs(struct btrfs_root *root,
2182 struct inode_record *rec,
2183 struct cache_tree *inode_cache,
2184 int delete)
2186 struct inode_backref *tmp, *backref;
2187 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2188 int ret = 0;
2189 int repaired = 0;
2191 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2192 if (!delete && rec->ino == root_dirid) {
2193 if (!rec->found_inode_item) {
2194 ret = create_inode_item(root, rec, backref, 1);
2195 if (ret)
2196 break;
2197 repaired++;
2201 /* Index 0 for root dir's are special, don't mess with it */
2202 if (rec->ino == root_dirid && backref->index == 0)
2203 continue;
2205 if (delete &&
2206 ((backref->found_dir_index && !backref->found_inode_ref) ||
2207 (backref->found_dir_index && backref->found_inode_ref &&
2208 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2209 ret = delete_dir_index(root, inode_cache, rec, backref);
2210 if (ret)
2211 break;
2212 repaired++;
2213 list_del(&backref->list);
2214 free(backref);
2217 if (!delete && !backref->found_dir_index &&
2218 backref->found_dir_item && backref->found_inode_ref) {
2219 ret = add_missing_dir_index(root, inode_cache, rec,
2220 backref);
2221 if (ret)
2222 break;
2223 repaired++;
2224 if (backref->found_dir_item &&
2225 backref->found_dir_index &&
2226 backref->found_dir_index) {
2227 if (!backref->errors &&
2228 backref->found_inode_ref) {
2229 list_del(&backref->list);
2230 free(backref);
2235 if (!delete && (!backref->found_dir_index &&
2236 !backref->found_dir_item &&
2237 backref->found_inode_ref)) {
2238 struct btrfs_trans_handle *trans;
2239 struct btrfs_key location;
2241 ret = check_dir_conflict(root, backref->name,
2242 backref->namelen,
2243 backref->dir,
2244 backref->index);
2245 if (ret) {
2247 * let nlink fixing routine to handle it,
2248 * which can do it better.
2250 ret = 0;
2251 break;
2253 location.objectid = rec->ino;
2254 location.type = BTRFS_INODE_ITEM_KEY;
2255 location.offset = 0;
2257 trans = btrfs_start_transaction(root, 1);
2258 if (IS_ERR(trans)) {
2259 ret = PTR_ERR(trans);
2260 break;
2262 fprintf(stderr, "adding missing dir index/item pair "
2263 "for inode %llu\n",
2264 (unsigned long long)rec->ino);
2265 ret = btrfs_insert_dir_item(trans, root, backref->name,
2266 backref->namelen,
2267 backref->dir, &location,
2268 imode_to_type(rec->imode),
2269 backref->index);
2270 BUG_ON(ret);
2271 btrfs_commit_transaction(trans, root);
2272 repaired++;
2275 if (!delete && (backref->found_inode_ref &&
2276 backref->found_dir_index &&
2277 backref->found_dir_item &&
2278 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2279 !rec->found_inode_item)) {
2280 ret = create_inode_item(root, rec, backref, 0);
2281 if (ret)
2282 break;
2283 repaired++;
2287 return ret ? ret : repaired;
2291 * To determine the file type for nlink/inode_item repair
2293 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2294 * Return -ENOENT if file type is not found.
2296 static int find_file_type(struct inode_record *rec, u8 *type)
2298 struct inode_backref *backref;
2300 /* For inode item recovered case */
2301 if (rec->found_inode_item) {
2302 *type = imode_to_type(rec->imode);
2303 return 0;
2306 list_for_each_entry(backref, &rec->backrefs, list) {
2307 if (backref->found_dir_index || backref->found_dir_item) {
2308 *type = backref->filetype;
2309 return 0;
2312 return -ENOENT;
2316 * To determine the file name for nlink repair
2318 * Return 0 if file name is found, set name and namelen.
2319 * Return -ENOENT if file name is not found.
2321 static int find_file_name(struct inode_record *rec,
2322 char *name, int *namelen)
2324 struct inode_backref *backref;
2326 list_for_each_entry(backref, &rec->backrefs, list) {
2327 if (backref->found_dir_index || backref->found_dir_item ||
2328 backref->found_inode_ref) {
2329 memcpy(name, backref->name, backref->namelen);
2330 *namelen = backref->namelen;
2331 return 0;
2334 return -ENOENT;
2337 /* Reset the nlink of the inode to the correct one */
2338 static int reset_nlink(struct btrfs_trans_handle *trans,
2339 struct btrfs_root *root,
2340 struct btrfs_path *path,
2341 struct inode_record *rec)
2343 struct inode_backref *backref;
2344 struct inode_backref *tmp;
2345 struct btrfs_key key;
2346 struct btrfs_inode_item *inode_item;
2347 int ret = 0;
2349 /* We don't believe this either, reset it and iterate backref */
2350 rec->found_link = 0;
2352 /* Remove all backref including the valid ones */
2353 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2354 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2355 backref->index, backref->name,
2356 backref->namelen, 0);
2357 if (ret < 0)
2358 goto out;
2360 /* remove invalid backref, so it won't be added back */
2361 if (!(backref->found_dir_index &&
2362 backref->found_dir_item &&
2363 backref->found_inode_ref)) {
2364 list_del(&backref->list);
2365 free(backref);
2366 } else {
2367 rec->found_link++;
2371 /* Set nlink to 0 */
2372 key.objectid = rec->ino;
2373 key.type = BTRFS_INODE_ITEM_KEY;
2374 key.offset = 0;
2375 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2376 if (ret < 0)
2377 goto out;
2378 if (ret > 0) {
2379 ret = -ENOENT;
2380 goto out;
2382 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2383 struct btrfs_inode_item);
2384 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2385 btrfs_mark_buffer_dirty(path->nodes[0]);
2386 btrfs_release_path(path);
2389 * Add back valid inode_ref/dir_item/dir_index,
2390 * add_link() will handle the nlink inc, so new nlink must be correct
2392 list_for_each_entry(backref, &rec->backrefs, list) {
2393 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2394 backref->name, backref->namelen,
2395 backref->ref_type, &backref->index, 1);
2396 if (ret < 0)
2397 goto out;
2399 out:
2400 btrfs_release_path(path);
2401 return ret;
2404 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2405 struct btrfs_root *root,
2406 struct btrfs_path *path,
2407 struct inode_record *rec)
2409 char *dir_name = "lost+found";
2410 char namebuf[BTRFS_NAME_LEN] = {0};
2411 u64 lost_found_ino;
2412 u32 mode = 0700;
2413 u8 type = 0;
2414 int namelen = 0;
2415 int name_recovered = 0;
2416 int type_recovered = 0;
2417 int ret = 0;
2420 * Get file name and type first before these invalid inode ref
2421 * are deleted by remove_all_invalid_backref()
2423 name_recovered = !find_file_name(rec, namebuf, &namelen);
2424 type_recovered = !find_file_type(rec, &type);
2426 if (!name_recovered) {
2427 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2428 rec->ino, rec->ino);
2429 namelen = count_digits(rec->ino);
2430 sprintf(namebuf, "%llu", rec->ino);
2431 name_recovered = 1;
2433 if (!type_recovered) {
2434 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2435 rec->ino);
2436 type = BTRFS_FT_REG_FILE;
2437 type_recovered = 1;
2440 ret = reset_nlink(trans, root, path, rec);
2441 if (ret < 0) {
2442 fprintf(stderr,
2443 "Failed to reset nlink for inode %llu: %s\n",
2444 rec->ino, strerror(-ret));
2445 goto out;
2448 if (rec->found_link == 0) {
2449 lost_found_ino = root->highest_inode;
2450 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2451 ret = -EOVERFLOW;
2452 goto out;
2454 lost_found_ino++;
2455 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2456 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2457 mode);
2458 if (ret < 0) {
2459 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2460 dir_name, strerror(-ret));
2461 goto out;
2463 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2464 namebuf, namelen, type, NULL, 1);
2466 * Add ".INO" suffix several times to handle case where
2467 * "FILENAME.INO" is already taken by another file.
2469 while (ret == -EEXIST) {
2471 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2473 if (namelen + count_digits(rec->ino) + 1 >
2474 BTRFS_NAME_LEN) {
2475 ret = -EFBIG;
2476 goto out;
2478 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2479 ".%llu", rec->ino);
2480 namelen += count_digits(rec->ino) + 1;
2481 ret = btrfs_add_link(trans, root, rec->ino,
2482 lost_found_ino, namebuf,
2483 namelen, type, NULL, 1);
2485 if (ret < 0) {
2486 fprintf(stderr,
2487 "Failed to link the inode %llu to %s dir: %s\n",
2488 rec->ino, dir_name, strerror(-ret));
2489 goto out;
2492 * Just increase the found_link, don't actually add the
2493 * backref. This will make things easier and this inode
2494 * record will be freed after the repair is done.
2495 * So fsck will not report problem about this inode.
2497 rec->found_link++;
2498 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2499 namelen, namebuf, dir_name);
2501 printf("Fixed the nlink of inode %llu\n", rec->ino);
2502 out:
2504 * Clear the flag anyway, or we will loop forever for the same inode
2505 * as it will not be removed from the bad inode list and the dead loop
2506 * happens.
2508 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2509 btrfs_release_path(path);
2510 return ret;
2514 * Check if there is any normal(reg or prealloc) file extent for given
2515 * ino.
2516 * This is used to determine the file type when neither its dir_index/item or
2517 * inode_item exists.
2519 * This will *NOT* report error, if any error happens, just consider it does
2520 * not have any normal file extent.
2522 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2524 struct btrfs_path *path;
2525 struct btrfs_key key;
2526 struct btrfs_key found_key;
2527 struct btrfs_file_extent_item *fi;
2528 u8 type;
2529 int ret = 0;
2531 path = btrfs_alloc_path();
2532 if (!path)
2533 goto out;
2534 key.objectid = ino;
2535 key.type = BTRFS_EXTENT_DATA_KEY;
2536 key.offset = 0;
2538 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2539 if (ret < 0) {
2540 ret = 0;
2541 goto out;
2543 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2544 ret = btrfs_next_leaf(root, path);
2545 if (ret) {
2546 ret = 0;
2547 goto out;
2550 while (1) {
2551 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2552 path->slots[0]);
2553 if (found_key.objectid != ino ||
2554 found_key.type != BTRFS_EXTENT_DATA_KEY)
2555 break;
2556 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2557 struct btrfs_file_extent_item);
2558 type = btrfs_file_extent_type(path->nodes[0], fi);
2559 if (type != BTRFS_FILE_EXTENT_INLINE) {
2560 ret = 1;
2561 goto out;
2564 out:
2565 btrfs_free_path(path);
2566 return ret;
2569 static u32 btrfs_type_to_imode(u8 type)
2571 static u32 imode_by_btrfs_type[] = {
2572 [BTRFS_FT_REG_FILE] = S_IFREG,
2573 [BTRFS_FT_DIR] = S_IFDIR,
2574 [BTRFS_FT_CHRDEV] = S_IFCHR,
2575 [BTRFS_FT_BLKDEV] = S_IFBLK,
2576 [BTRFS_FT_FIFO] = S_IFIFO,
2577 [BTRFS_FT_SOCK] = S_IFSOCK,
2578 [BTRFS_FT_SYMLINK] = S_IFLNK,
2581 return imode_by_btrfs_type[(type)];
2584 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2585 struct btrfs_root *root,
2586 struct btrfs_path *path,
2587 struct inode_record *rec)
2589 u8 filetype;
2590 u32 mode = 0700;
2591 int type_recovered = 0;
2592 int ret = 0;
2594 printf("Trying to rebuild inode:%llu\n", rec->ino);
2596 type_recovered = !find_file_type(rec, &filetype);
2599 * Try to determine inode type if type not found.
2601 * For found regular file extent, it must be FILE.
2602 * For found dir_item/index, it must be DIR.
2604 * For undetermined one, use FILE as fallback.
2606 * TODO:
2607 * 1. If found backref(inode_index/item is already handled) to it,
2608 * it must be DIR.
2609 * Need new inode-inode ref structure to allow search for that.
2611 if (!type_recovered) {
2612 if (rec->found_file_extent &&
2613 find_normal_file_extent(root, rec->ino)) {
2614 type_recovered = 1;
2615 filetype = BTRFS_FT_REG_FILE;
2616 } else if (rec->found_dir_item) {
2617 type_recovered = 1;
2618 filetype = BTRFS_FT_DIR;
2619 } else if (!list_empty(&rec->orphan_extents)) {
2620 type_recovered = 1;
2621 filetype = BTRFS_FT_REG_FILE;
2622 } else{
2623 printf("Can't determint the filetype for inode %llu, assume it is a normal file\n",
2624 rec->ino);
2625 type_recovered = 1;
2626 filetype = BTRFS_FT_REG_FILE;
2630 ret = btrfs_new_inode(trans, root, rec->ino,
2631 mode | btrfs_type_to_imode(filetype));
2632 if (ret < 0)
2633 goto out;
2636 * Here inode rebuild is done, we only rebuild the inode item,
2637 * don't repair the nlink(like move to lost+found).
2638 * That is the job of nlink repair.
2640 * We just fill the record and return
2642 rec->found_dir_item = 1;
2643 rec->imode = mode | btrfs_type_to_imode(filetype);
2644 rec->nlink = 0;
2645 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2646 /* Ensure the inode_nlinks repair function will be called */
2647 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2648 out:
2649 return ret;
2652 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2653 struct btrfs_root *root,
2654 struct btrfs_path *path,
2655 struct inode_record *rec)
2657 struct orphan_data_extent *orphan;
2658 struct orphan_data_extent *tmp;
2659 int ret = 0;
2661 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2663 * Check for conflicting file extents
2665 * Here we don't know whether the extents is compressed or not,
2666 * so we can only assume it not compressed nor data offset,
2667 * and use its disk_len as extent length.
2669 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2670 orphan->offset, orphan->disk_len, 0);
2671 btrfs_release_path(path);
2672 if (ret < 0)
2673 goto out;
2674 if (!ret) {
2675 fprintf(stderr,
2676 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2677 orphan->disk_bytenr, orphan->disk_len);
2678 ret = btrfs_free_extent(trans,
2679 root->fs_info->extent_root,
2680 orphan->disk_bytenr, orphan->disk_len,
2681 0, root->objectid, orphan->objectid,
2682 orphan->offset);
2683 if (ret < 0)
2684 goto out;
2686 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2687 orphan->offset, orphan->disk_bytenr,
2688 orphan->disk_len, orphan->disk_len);
2689 if (ret < 0)
2690 goto out;
2692 /* Update file size info */
2693 rec->found_size += orphan->disk_len;
2694 if (rec->found_size == rec->nbytes)
2695 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2697 /* Update the file extent hole info too */
2698 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2699 orphan->disk_len);
2700 if (ret < 0)
2701 goto out;
2702 if (RB_EMPTY_ROOT(&rec->holes))
2703 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2705 list_del(&orphan->list);
2706 free(orphan);
2708 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2709 out:
2710 return ret;
2713 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2714 struct btrfs_root *root,
2715 struct btrfs_path *path,
2716 struct inode_record *rec)
2718 struct rb_node *node;
2719 struct file_extent_hole *hole;
2720 int found = 0;
2721 int ret = 0;
2723 node = rb_first(&rec->holes);
2725 while (node) {
2726 found = 1;
2727 hole = rb_entry(node, struct file_extent_hole, node);
2728 ret = btrfs_punch_hole(trans, root, rec->ino,
2729 hole->start, hole->len);
2730 if (ret < 0)
2731 goto out;
2732 ret = del_file_extent_hole(&rec->holes, hole->start,
2733 hole->len);
2734 if (ret < 0)
2735 goto out;
2736 if (RB_EMPTY_ROOT(&rec->holes))
2737 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2738 node = rb_first(&rec->holes);
2740 /* special case for a file losing all its file extent */
2741 if (!found) {
2742 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2743 round_up(rec->isize, root->sectorsize));
2744 if (ret < 0)
2745 goto out;
2747 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2748 rec->ino, root->objectid);
2749 out:
2750 return ret;
2753 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2755 struct btrfs_trans_handle *trans;
2756 struct btrfs_path *path;
2757 int ret = 0;
2759 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2760 I_ERR_NO_ORPHAN_ITEM |
2761 I_ERR_LINK_COUNT_WRONG |
2762 I_ERR_NO_INODE_ITEM |
2763 I_ERR_FILE_EXTENT_ORPHAN |
2764 I_ERR_FILE_EXTENT_DISCOUNT|
2765 I_ERR_FILE_NBYTES_WRONG)))
2766 return rec->errors;
2768 path = btrfs_alloc_path();
2769 if (!path)
2770 return -ENOMEM;
2773 * For nlink repair, it may create a dir and add link, so
2774 * 2 for parent(256)'s dir_index and dir_item
2775 * 2 for lost+found dir's inode_item and inode_ref
2776 * 1 for the new inode_ref of the file
2777 * 2 for lost+found dir's dir_index and dir_item for the file
2779 trans = btrfs_start_transaction(root, 7);
2780 if (IS_ERR(trans)) {
2781 btrfs_free_path(path);
2782 return PTR_ERR(trans);
2785 if (rec->errors & I_ERR_NO_INODE_ITEM)
2786 ret = repair_inode_no_item(trans, root, path, rec);
2787 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2788 ret = repair_inode_orphan_extent(trans, root, path, rec);
2789 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2790 ret = repair_inode_discount_extent(trans, root, path, rec);
2791 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2792 ret = repair_inode_isize(trans, root, path, rec);
2793 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2794 ret = repair_inode_orphan_item(trans, root, path, rec);
2795 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2796 ret = repair_inode_nlinks(trans, root, path, rec);
2797 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2798 ret = repair_inode_nbytes(trans, root, path, rec);
2799 btrfs_commit_transaction(trans, root);
2800 btrfs_free_path(path);
2801 return ret;
2804 static int check_inode_recs(struct btrfs_root *root,
2805 struct cache_tree *inode_cache)
2807 struct cache_extent *cache;
2808 struct ptr_node *node;
2809 struct inode_record *rec;
2810 struct inode_backref *backref;
2811 int stage = 0;
2812 int ret = 0;
2813 int err = 0;
2814 u64 error = 0;
2815 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2817 if (btrfs_root_refs(&root->root_item) == 0) {
2818 if (!cache_tree_empty(inode_cache))
2819 fprintf(stderr, "warning line %d\n", __LINE__);
2820 return 0;
2824 * We need to record the highest inode number for later 'lost+found'
2825 * dir creation.
2826 * We must select a ino not used/refered by any existing inode, or
2827 * 'lost+found' ino may be a missing ino in a corrupted leaf,
2828 * this may cause 'lost+found' dir has wrong nlinks.
2830 cache = last_cache_extent(inode_cache);
2831 if (cache) {
2832 node = container_of(cache, struct ptr_node, cache);
2833 rec = node->data;
2834 if (rec->ino > root->highest_inode)
2835 root->highest_inode = rec->ino;
2839 * We need to repair backrefs first because we could change some of the
2840 * errors in the inode recs.
2842 * We also need to go through and delete invalid backrefs first and then
2843 * add the correct ones second. We do this because we may get EEXIST
2844 * when adding back the correct index because we hadn't yet deleted the
2845 * invalid index.
2847 * For example, if we were missing a dir index then the directories
2848 * isize would be wrong, so if we fixed the isize to what we thought it
2849 * would be and then fixed the backref we'd still have a invalid fs, so
2850 * we need to add back the dir index and then check to see if the isize
2851 * is still wrong.
2853 while (stage < 3) {
2854 stage++;
2855 if (stage == 3 && !err)
2856 break;
2858 cache = search_cache_extent(inode_cache, 0);
2859 while (repair && cache) {
2860 node = container_of(cache, struct ptr_node, cache);
2861 rec = node->data;
2862 cache = next_cache_extent(cache);
2864 /* Need to free everything up and rescan */
2865 if (stage == 3) {
2866 remove_cache_extent(inode_cache, &node->cache);
2867 free(node);
2868 free_inode_rec(rec);
2869 continue;
2872 if (list_empty(&rec->backrefs))
2873 continue;
2875 ret = repair_inode_backrefs(root, rec, inode_cache,
2876 stage == 1);
2877 if (ret < 0) {
2878 err = ret;
2879 stage = 2;
2880 break;
2881 } if (ret > 0) {
2882 err = -EAGAIN;
2886 if (err)
2887 return err;
2889 rec = get_inode_rec(inode_cache, root_dirid, 0);
2890 if (rec) {
2891 ret = check_root_dir(rec);
2892 if (ret) {
2893 fprintf(stderr, "root %llu root dir %llu error\n",
2894 (unsigned long long)root->root_key.objectid,
2895 (unsigned long long)root_dirid);
2896 print_inode_error(root, rec);
2897 error++;
2899 } else {
2900 if (repair) {
2901 struct btrfs_trans_handle *trans;
2903 trans = btrfs_start_transaction(root, 1);
2904 if (IS_ERR(trans)) {
2905 err = PTR_ERR(trans);
2906 return err;
2909 fprintf(stderr,
2910 "root %llu missing its root dir, recreating\n",
2911 (unsigned long long)root->objectid);
2913 ret = btrfs_make_root_dir(trans, root, root_dirid);
2914 BUG_ON(ret);
2916 btrfs_commit_transaction(trans, root);
2917 return -EAGAIN;
2920 fprintf(stderr, "root %llu root dir %llu not found\n",
2921 (unsigned long long)root->root_key.objectid,
2922 (unsigned long long)root_dirid);
2925 while (1) {
2926 cache = search_cache_extent(inode_cache, 0);
2927 if (!cache)
2928 break;
2929 node = container_of(cache, struct ptr_node, cache);
2930 rec = node->data;
2931 remove_cache_extent(inode_cache, &node->cache);
2932 free(node);
2933 if (rec->ino == root_dirid ||
2934 rec->ino == BTRFS_ORPHAN_OBJECTID) {
2935 free_inode_rec(rec);
2936 continue;
2939 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
2940 ret = check_orphan_item(root, rec->ino);
2941 if (ret == 0)
2942 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2943 if (can_free_inode_rec(rec)) {
2944 free_inode_rec(rec);
2945 continue;
2949 if (!rec->found_inode_item)
2950 rec->errors |= I_ERR_NO_INODE_ITEM;
2951 if (rec->found_link != rec->nlink)
2952 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2953 if (repair) {
2954 ret = try_repair_inode(root, rec);
2955 if (ret == 0 && can_free_inode_rec(rec)) {
2956 free_inode_rec(rec);
2957 continue;
2959 ret = 0;
2962 if (!(repair && ret == 0))
2963 error++;
2964 print_inode_error(root, rec);
2965 list_for_each_entry(backref, &rec->backrefs, list) {
2966 if (!backref->found_dir_item)
2967 backref->errors |= REF_ERR_NO_DIR_ITEM;
2968 if (!backref->found_dir_index)
2969 backref->errors |= REF_ERR_NO_DIR_INDEX;
2970 if (!backref->found_inode_ref)
2971 backref->errors |= REF_ERR_NO_INODE_REF;
2972 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
2973 " namelen %u name %s filetype %d errors %x",
2974 (unsigned long long)backref->dir,
2975 (unsigned long long)backref->index,
2976 backref->namelen, backref->name,
2977 backref->filetype, backref->errors);
2978 print_ref_error(backref->errors);
2980 free_inode_rec(rec);
2982 return (error > 0) ? -1 : 0;
2985 static struct root_record *get_root_rec(struct cache_tree *root_cache,
2986 u64 objectid)
2988 struct cache_extent *cache;
2989 struct root_record *rec = NULL;
2990 int ret;
2992 cache = lookup_cache_extent(root_cache, objectid, 1);
2993 if (cache) {
2994 rec = container_of(cache, struct root_record, cache);
2995 } else {
2996 rec = calloc(1, sizeof(*rec));
2997 rec->objectid = objectid;
2998 INIT_LIST_HEAD(&rec->backrefs);
2999 rec->cache.start = objectid;
3000 rec->cache.size = 1;
3002 ret = insert_cache_extent(root_cache, &rec->cache);
3003 BUG_ON(ret);
3005 return rec;
3008 static struct root_backref *get_root_backref(struct root_record *rec,
3009 u64 ref_root, u64 dir, u64 index,
3010 const char *name, int namelen)
3012 struct root_backref *backref;
3014 list_for_each_entry(backref, &rec->backrefs, list) {
3015 if (backref->ref_root != ref_root || backref->dir != dir ||
3016 backref->namelen != namelen)
3017 continue;
3018 if (memcmp(name, backref->name, namelen))
3019 continue;
3020 return backref;
3023 backref = calloc(1, sizeof(*backref) + namelen + 1);
3024 backref->ref_root = ref_root;
3025 backref->dir = dir;
3026 backref->index = index;
3027 backref->namelen = namelen;
3028 memcpy(backref->name, name, namelen);
3029 backref->name[namelen] = '\0';
3030 list_add_tail(&backref->list, &rec->backrefs);
3031 return backref;
3034 static void free_root_record(struct cache_extent *cache)
3036 struct root_record *rec;
3037 struct root_backref *backref;
3039 rec = container_of(cache, struct root_record, cache);
3040 while (!list_empty(&rec->backrefs)) {
3041 backref = list_entry(rec->backrefs.next,
3042 struct root_backref, list);
3043 list_del(&backref->list);
3044 free(backref);
3047 kfree(rec);
3050 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3052 static int add_root_backref(struct cache_tree *root_cache,
3053 u64 root_id, u64 ref_root, u64 dir, u64 index,
3054 const char *name, int namelen,
3055 int item_type, int errors)
3057 struct root_record *rec;
3058 struct root_backref *backref;
3060 rec = get_root_rec(root_cache, root_id);
3061 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3063 backref->errors |= errors;
3065 if (item_type != BTRFS_DIR_ITEM_KEY) {
3066 if (backref->found_dir_index || backref->found_back_ref ||
3067 backref->found_forward_ref) {
3068 if (backref->index != index)
3069 backref->errors |= REF_ERR_INDEX_UNMATCH;
3070 } else {
3071 backref->index = index;
3075 if (item_type == BTRFS_DIR_ITEM_KEY) {
3076 if (backref->found_forward_ref)
3077 rec->found_ref++;
3078 backref->found_dir_item = 1;
3079 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3080 backref->found_dir_index = 1;
3081 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3082 if (backref->found_forward_ref)
3083 backref->errors |= REF_ERR_DUP_ROOT_REF;
3084 else if (backref->found_dir_item)
3085 rec->found_ref++;
3086 backref->found_forward_ref = 1;
3087 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3088 if (backref->found_back_ref)
3089 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3090 backref->found_back_ref = 1;
3091 } else {
3092 BUG_ON(1);
3095 if (backref->found_forward_ref && backref->found_dir_item)
3096 backref->reachable = 1;
3097 return 0;
3100 static int merge_root_recs(struct btrfs_root *root,
3101 struct cache_tree *src_cache,
3102 struct cache_tree *dst_cache)
3104 struct cache_extent *cache;
3105 struct ptr_node *node;
3106 struct inode_record *rec;
3107 struct inode_backref *backref;
3108 int ret = 0;
3110 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3111 free_inode_recs_tree(src_cache);
3112 return 0;
3115 while (1) {
3116 cache = search_cache_extent(src_cache, 0);
3117 if (!cache)
3118 break;
3119 node = container_of(cache, struct ptr_node, cache);
3120 rec = node->data;
3121 remove_cache_extent(src_cache, &node->cache);
3122 free(node);
3124 ret = is_child_root(root, root->objectid, rec->ino);
3125 if (ret < 0)
3126 break;
3127 else if (ret == 0)
3128 goto skip;
3130 list_for_each_entry(backref, &rec->backrefs, list) {
3131 BUG_ON(backref->found_inode_ref);
3132 if (backref->found_dir_item)
3133 add_root_backref(dst_cache, rec->ino,
3134 root->root_key.objectid, backref->dir,
3135 backref->index, backref->name,
3136 backref->namelen, BTRFS_DIR_ITEM_KEY,
3137 backref->errors);
3138 if (backref->found_dir_index)
3139 add_root_backref(dst_cache, rec->ino,
3140 root->root_key.objectid, backref->dir,
3141 backref->index, backref->name,
3142 backref->namelen, BTRFS_DIR_INDEX_KEY,
3143 backref->errors);
3145 skip:
3146 free_inode_rec(rec);
3148 if (ret < 0)
3149 return ret;
3150 return 0;
3153 static int check_root_refs(struct btrfs_root *root,
3154 struct cache_tree *root_cache)
3156 struct root_record *rec;
3157 struct root_record *ref_root;
3158 struct root_backref *backref;
3159 struct cache_extent *cache;
3160 int loop = 1;
3161 int ret;
3162 int error;
3163 int errors = 0;
3165 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3166 rec->found_ref = 1;
3168 /* fixme: this can not detect circular references */
3169 while (loop) {
3170 loop = 0;
3171 cache = search_cache_extent(root_cache, 0);
3172 while (1) {
3173 if (!cache)
3174 break;
3175 rec = container_of(cache, struct root_record, cache);
3176 cache = next_cache_extent(cache);
3178 if (rec->found_ref == 0)
3179 continue;
3181 list_for_each_entry(backref, &rec->backrefs, list) {
3182 if (!backref->reachable)
3183 continue;
3185 ref_root = get_root_rec(root_cache,
3186 backref->ref_root);
3187 if (ref_root->found_ref > 0)
3188 continue;
3190 backref->reachable = 0;
3191 rec->found_ref--;
3192 if (rec->found_ref == 0)
3193 loop = 1;
3198 cache = search_cache_extent(root_cache, 0);
3199 while (1) {
3200 if (!cache)
3201 break;
3202 rec = container_of(cache, struct root_record, cache);
3203 cache = next_cache_extent(cache);
3205 if (rec->found_ref == 0 &&
3206 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3207 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3208 ret = check_orphan_item(root->fs_info->tree_root,
3209 rec->objectid);
3210 if (ret == 0)
3211 continue;
3214 * If we don't have a root item then we likely just have
3215 * a dir item in a snapshot for this root but no actual
3216 * ref key or anything so it's meaningless.
3218 if (!rec->found_root_item)
3219 continue;
3220 errors++;
3221 fprintf(stderr, "fs tree %llu not referenced\n",
3222 (unsigned long long)rec->objectid);
3225 error = 0;
3226 if (rec->found_ref > 0 && !rec->found_root_item)
3227 error = 1;
3228 list_for_each_entry(backref, &rec->backrefs, list) {
3229 if (!backref->found_dir_item)
3230 backref->errors |= REF_ERR_NO_DIR_ITEM;
3231 if (!backref->found_dir_index)
3232 backref->errors |= REF_ERR_NO_DIR_INDEX;
3233 if (!backref->found_back_ref)
3234 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3235 if (!backref->found_forward_ref)
3236 backref->errors |= REF_ERR_NO_ROOT_REF;
3237 if (backref->reachable && backref->errors)
3238 error = 1;
3240 if (!error)
3241 continue;
3243 errors++;
3244 fprintf(stderr, "fs tree %llu refs %u %s\n",
3245 (unsigned long long)rec->objectid, rec->found_ref,
3246 rec->found_root_item ? "" : "not found");
3248 list_for_each_entry(backref, &rec->backrefs, list) {
3249 if (!backref->reachable)
3250 continue;
3251 if (!backref->errors && rec->found_root_item)
3252 continue;
3253 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3254 " index %llu namelen %u name %s errors %x\n",
3255 (unsigned long long)backref->ref_root,
3256 (unsigned long long)backref->dir,
3257 (unsigned long long)backref->index,
3258 backref->namelen, backref->name,
3259 backref->errors);
3260 print_ref_error(backref->errors);
3263 return errors > 0 ? 1 : 0;
3266 static int process_root_ref(struct extent_buffer *eb, int slot,
3267 struct btrfs_key *key,
3268 struct cache_tree *root_cache)
3270 u64 dirid;
3271 u64 index;
3272 u32 len;
3273 u32 name_len;
3274 struct btrfs_root_ref *ref;
3275 char namebuf[BTRFS_NAME_LEN];
3276 int error;
3278 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3280 dirid = btrfs_root_ref_dirid(eb, ref);
3281 index = btrfs_root_ref_sequence(eb, ref);
3282 name_len = btrfs_root_ref_name_len(eb, ref);
3284 if (name_len <= BTRFS_NAME_LEN) {
3285 len = name_len;
3286 error = 0;
3287 } else {
3288 len = BTRFS_NAME_LEN;
3289 error = REF_ERR_NAME_TOO_LONG;
3291 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3293 if (key->type == BTRFS_ROOT_REF_KEY) {
3294 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3295 index, namebuf, len, key->type, error);
3296 } else {
3297 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3298 index, namebuf, len, key->type, error);
3300 return 0;
3303 static void free_corrupt_block(struct cache_extent *cache)
3305 struct btrfs_corrupt_block *corrupt;
3307 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3308 free(corrupt);
3311 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3314 * Repair the btree of the given root.
3316 * The fix is to remove the node key in corrupt_blocks cache_tree.
3317 * and rebalance the tree.
3318 * After the fix, the btree should be writeable.
3320 static int repair_btree(struct btrfs_root *root,
3321 struct cache_tree *corrupt_blocks)
3323 struct btrfs_trans_handle *trans;
3324 struct btrfs_path *path;
3325 struct btrfs_corrupt_block *corrupt;
3326 struct cache_extent *cache;
3327 struct btrfs_key key;
3328 u64 offset;
3329 int level;
3330 int ret = 0;
3332 if (cache_tree_empty(corrupt_blocks))
3333 return 0;
3335 path = btrfs_alloc_path();
3336 if (!path)
3337 return -ENOMEM;
3339 trans = btrfs_start_transaction(root, 1);
3340 if (IS_ERR(trans)) {
3341 ret = PTR_ERR(trans);
3342 fprintf(stderr, "Error starting transaction: %s\n",
3343 strerror(-ret));
3344 goto out_free_path;
3346 cache = first_cache_extent(corrupt_blocks);
3347 while (cache) {
3348 corrupt = container_of(cache, struct btrfs_corrupt_block,
3349 cache);
3350 level = corrupt->level;
3351 path->lowest_level = level;
3352 key.objectid = corrupt->key.objectid;
3353 key.type = corrupt->key.type;
3354 key.offset = corrupt->key.offset;
3357 * Here we don't want to do any tree balance, since it may
3358 * cause a balance with corrupted brother leaf/node,
3359 * so ins_len set to 0 here.
3360 * Balance will be done after all corrupt node/leaf is deleted.
3362 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3363 if (ret < 0)
3364 goto out;
3365 offset = btrfs_node_blockptr(path->nodes[level],
3366 path->slots[level]);
3368 /* Remove the ptr */
3369 ret = btrfs_del_ptr(trans, root, path, level,
3370 path->slots[level]);
3371 if (ret < 0)
3372 goto out;
3374 * Remove the corresponding extent
3375 * return value is not concerned.
3377 btrfs_release_path(path);
3378 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3379 0, root->root_key.objectid,
3380 level - 1, 0);
3381 cache = next_cache_extent(cache);
3384 /* Balance the btree using btrfs_search_slot() */
3385 cache = first_cache_extent(corrupt_blocks);
3386 while (cache) {
3387 corrupt = container_of(cache, struct btrfs_corrupt_block,
3388 cache);
3389 memcpy(&key, &corrupt->key, sizeof(key));
3390 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3391 if (ret < 0)
3392 goto out;
3393 /* return will always >0 since it won't find the item */
3394 ret = 0;
3395 btrfs_release_path(path);
3396 cache = next_cache_extent(cache);
3398 out:
3399 btrfs_commit_transaction(trans, root);
3400 out_free_path:
3401 btrfs_free_path(path);
3402 return ret;
3405 static int check_fs_root(struct btrfs_root *root,
3406 struct cache_tree *root_cache,
3407 struct walk_control *wc)
3409 int ret = 0;
3410 int err = 0;
3411 int wret;
3412 int level;
3413 struct btrfs_path path;
3414 struct shared_node root_node;
3415 struct root_record *rec;
3416 struct btrfs_root_item *root_item = &root->root_item;
3417 struct cache_tree corrupt_blocks;
3418 struct orphan_data_extent *orphan;
3419 struct orphan_data_extent *tmp;
3420 enum btrfs_tree_block_status status;
3423 * Reuse the corrupt_block cache tree to record corrupted tree block
3425 * Unlike the usage in extent tree check, here we do it in a per
3426 * fs/subvol tree base.
3428 cache_tree_init(&corrupt_blocks);
3429 root->fs_info->corrupt_blocks = &corrupt_blocks;
3431 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3432 rec = get_root_rec(root_cache, root->root_key.objectid);
3433 if (btrfs_root_refs(root_item) > 0)
3434 rec->found_root_item = 1;
3437 btrfs_init_path(&path);
3438 memset(&root_node, 0, sizeof(root_node));
3439 cache_tree_init(&root_node.root_cache);
3440 cache_tree_init(&root_node.inode_cache);
3442 /* Move the orphan extent record to corresponding inode_record */
3443 list_for_each_entry_safe(orphan, tmp,
3444 &root->orphan_data_extents, list) {
3445 struct inode_record *inode;
3447 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3449 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3450 list_move(&orphan->list, &inode->orphan_extents);
3453 level = btrfs_header_level(root->node);
3454 memset(wc->nodes, 0, sizeof(wc->nodes));
3455 wc->nodes[level] = &root_node;
3456 wc->active_node = level;
3457 wc->root_level = level;
3459 /* We may not have checked the root block, lets do that now */
3460 if (btrfs_is_leaf(root->node))
3461 status = btrfs_check_leaf(root, NULL, root->node);
3462 else
3463 status = btrfs_check_node(root, NULL, root->node);
3464 if (status != BTRFS_TREE_BLOCK_CLEAN)
3465 return -EIO;
3467 if (btrfs_root_refs(root_item) > 0 ||
3468 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3469 path.nodes[level] = root->node;
3470 extent_buffer_get(root->node);
3471 path.slots[level] = 0;
3472 } else {
3473 struct btrfs_key key;
3474 struct btrfs_disk_key found_key;
3476 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3477 level = root_item->drop_level;
3478 path.lowest_level = level;
3479 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3480 if (wret < 0)
3481 goto skip_walking;
3482 btrfs_node_key(path.nodes[level], &found_key,
3483 path.slots[level]);
3484 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3485 sizeof(found_key)));
3488 while (1) {
3489 wret = walk_down_tree(root, &path, wc, &level);
3490 if (wret < 0)
3491 ret = wret;
3492 if (wret != 0)
3493 break;
3495 wret = walk_up_tree(root, &path, wc, &level);
3496 if (wret < 0)
3497 ret = wret;
3498 if (wret != 0)
3499 break;
3501 skip_walking:
3502 btrfs_release_path(&path);
3504 if (!cache_tree_empty(&corrupt_blocks)) {
3505 struct cache_extent *cache;
3506 struct btrfs_corrupt_block *corrupt;
3508 printf("The following tree block(s) is corrupted in tree %llu:\n",
3509 root->root_key.objectid);
3510 cache = first_cache_extent(&corrupt_blocks);
3511 while (cache) {
3512 corrupt = container_of(cache,
3513 struct btrfs_corrupt_block,
3514 cache);
3515 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3516 cache->start, corrupt->level,
3517 corrupt->key.objectid, corrupt->key.type,
3518 corrupt->key.offset);
3519 cache = next_cache_extent(cache);
3521 if (repair) {
3522 printf("Try to repair the btree for root %llu\n",
3523 root->root_key.objectid);
3524 ret = repair_btree(root, &corrupt_blocks);
3525 if (ret < 0)
3526 fprintf(stderr, "Failed to repair btree: %s\n",
3527 strerror(-ret));
3528 if (!ret)
3529 printf("Btree for root %llu is fixed\n",
3530 root->root_key.objectid);
3534 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3535 if (err < 0)
3536 ret = err;
3538 if (root_node.current) {
3539 root_node.current->checked = 1;
3540 maybe_free_inode_rec(&root_node.inode_cache,
3541 root_node.current);
3544 err = check_inode_recs(root, &root_node.inode_cache);
3545 if (!ret)
3546 ret = err;
3548 free_corrupt_blocks_tree(&corrupt_blocks);
3549 root->fs_info->corrupt_blocks = NULL;
3550 free_orphan_data_extents(&root->orphan_data_extents);
3551 return ret;
3554 static int fs_root_objectid(u64 objectid)
3556 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3557 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3558 return 1;
3559 return is_fstree(objectid);
3562 static int check_fs_roots(struct btrfs_root *root,
3563 struct cache_tree *root_cache)
3565 struct btrfs_path path;
3566 struct btrfs_key key;
3567 struct walk_control wc;
3568 struct extent_buffer *leaf, *tree_node;
3569 struct btrfs_root *tmp_root;
3570 struct btrfs_root *tree_root = root->fs_info->tree_root;
3571 int ret;
3572 int err = 0;
3574 if (ctx.progress_enabled) {
3575 ctx.tp = TASK_FS_ROOTS;
3576 task_start(ctx.info);
3580 * Just in case we made any changes to the extent tree that weren't
3581 * reflected into the free space cache yet.
3583 if (repair)
3584 reset_cached_block_groups(root->fs_info);
3585 memset(&wc, 0, sizeof(wc));
3586 cache_tree_init(&wc.shared);
3587 btrfs_init_path(&path);
3589 again:
3590 key.offset = 0;
3591 key.objectid = 0;
3592 key.type = BTRFS_ROOT_ITEM_KEY;
3593 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3594 if (ret < 0) {
3595 err = 1;
3596 goto out;
3598 tree_node = tree_root->node;
3599 while (1) {
3600 if (tree_node != tree_root->node) {
3601 free_root_recs_tree(root_cache);
3602 btrfs_release_path(&path);
3603 goto again;
3605 leaf = path.nodes[0];
3606 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3607 ret = btrfs_next_leaf(tree_root, &path);
3608 if (ret) {
3609 if (ret < 0)
3610 err = 1;
3611 break;
3613 leaf = path.nodes[0];
3615 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3616 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3617 fs_root_objectid(key.objectid)) {
3618 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3619 tmp_root = btrfs_read_fs_root_no_cache(
3620 root->fs_info, &key);
3621 } else {
3622 key.offset = (u64)-1;
3623 tmp_root = btrfs_read_fs_root(
3624 root->fs_info, &key);
3626 if (IS_ERR(tmp_root)) {
3627 err = 1;
3628 goto next;
3630 ret = check_fs_root(tmp_root, root_cache, &wc);
3631 if (ret == -EAGAIN) {
3632 free_root_recs_tree(root_cache);
3633 btrfs_release_path(&path);
3634 goto again;
3636 if (ret)
3637 err = 1;
3638 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3639 btrfs_free_fs_root(tmp_root);
3640 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3641 key.type == BTRFS_ROOT_BACKREF_KEY) {
3642 process_root_ref(leaf, path.slots[0], &key,
3643 root_cache);
3645 next:
3646 path.slots[0]++;
3648 out:
3649 btrfs_release_path(&path);
3650 if (err)
3651 free_extent_cache_tree(&wc.shared);
3652 if (!cache_tree_empty(&wc.shared))
3653 fprintf(stderr, "warning line %d\n", __LINE__);
3655 task_stop(ctx.info);
3657 return err;
3660 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3662 struct list_head *cur = rec->backrefs.next;
3663 struct extent_backref *back;
3664 struct tree_backref *tback;
3665 struct data_backref *dback;
3666 u64 found = 0;
3667 int err = 0;
3669 while(cur != &rec->backrefs) {
3670 back = list_entry(cur, struct extent_backref, list);
3671 cur = cur->next;
3672 if (!back->found_extent_tree) {
3673 err = 1;
3674 if (!print_errs)
3675 goto out;
3676 if (back->is_data) {
3677 dback = (struct data_backref *)back;
3678 fprintf(stderr, "Backref %llu %s %llu"
3679 " owner %llu offset %llu num_refs %lu"
3680 " not found in extent tree\n",
3681 (unsigned long long)rec->start,
3682 back->full_backref ?
3683 "parent" : "root",
3684 back->full_backref ?
3685 (unsigned long long)dback->parent:
3686 (unsigned long long)dback->root,
3687 (unsigned long long)dback->owner,
3688 (unsigned long long)dback->offset,
3689 (unsigned long)dback->num_refs);
3690 } else {
3691 tback = (struct tree_backref *)back;
3692 fprintf(stderr, "Backref %llu parent %llu"
3693 " root %llu not found in extent tree\n",
3694 (unsigned long long)rec->start,
3695 (unsigned long long)tback->parent,
3696 (unsigned long long)tback->root);
3699 if (!back->is_data && !back->found_ref) {
3700 err = 1;
3701 if (!print_errs)
3702 goto out;
3703 tback = (struct tree_backref *)back;
3704 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3705 (unsigned long long)rec->start,
3706 back->full_backref ? "parent" : "root",
3707 back->full_backref ?
3708 (unsigned long long)tback->parent :
3709 (unsigned long long)tback->root, back);
3711 if (back->is_data) {
3712 dback = (struct data_backref *)back;
3713 if (dback->found_ref != dback->num_refs) {
3714 err = 1;
3715 if (!print_errs)
3716 goto out;
3717 fprintf(stderr, "Incorrect local backref count"
3718 " on %llu %s %llu owner %llu"
3719 " offset %llu found %u wanted %u back %p\n",
3720 (unsigned long long)rec->start,
3721 back->full_backref ?
3722 "parent" : "root",
3723 back->full_backref ?
3724 (unsigned long long)dback->parent:
3725 (unsigned long long)dback->root,
3726 (unsigned long long)dback->owner,
3727 (unsigned long long)dback->offset,
3728 dback->found_ref, dback->num_refs, back);
3730 if (dback->disk_bytenr != rec->start) {
3731 err = 1;
3732 if (!print_errs)
3733 goto out;
3734 fprintf(stderr, "Backref disk bytenr does not"
3735 " match extent record, bytenr=%llu, "
3736 "ref bytenr=%llu\n",
3737 (unsigned long long)rec->start,
3738 (unsigned long long)dback->disk_bytenr);
3741 if (dback->bytes != rec->nr) {
3742 err = 1;
3743 if (!print_errs)
3744 goto out;
3745 fprintf(stderr, "Backref bytes do not match "
3746 "extent backref, bytenr=%llu, ref "
3747 "bytes=%llu, backref bytes=%llu\n",
3748 (unsigned long long)rec->start,
3749 (unsigned long long)rec->nr,
3750 (unsigned long long)dback->bytes);
3753 if (!back->is_data) {
3754 found += 1;
3755 } else {
3756 dback = (struct data_backref *)back;
3757 found += dback->found_ref;
3760 if (found != rec->refs) {
3761 err = 1;
3762 if (!print_errs)
3763 goto out;
3764 fprintf(stderr, "Incorrect global backref count "
3765 "on %llu found %llu wanted %llu\n",
3766 (unsigned long long)rec->start,
3767 (unsigned long long)found,
3768 (unsigned long long)rec->refs);
3770 out:
3771 return err;
3774 static int free_all_extent_backrefs(struct extent_record *rec)
3776 struct extent_backref *back;
3777 struct list_head *cur;
3778 while (!list_empty(&rec->backrefs)) {
3779 cur = rec->backrefs.next;
3780 back = list_entry(cur, struct extent_backref, list);
3781 list_del(cur);
3782 free(back);
3784 return 0;
3787 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3788 struct cache_tree *extent_cache)
3790 struct cache_extent *cache;
3791 struct extent_record *rec;
3793 while (1) {
3794 cache = first_cache_extent(extent_cache);
3795 if (!cache)
3796 break;
3797 rec = container_of(cache, struct extent_record, cache);
3798 remove_cache_extent(extent_cache, cache);
3799 free_all_extent_backrefs(rec);
3800 free(rec);
3804 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3805 struct extent_record *rec)
3807 if (rec->content_checked && rec->owner_ref_checked &&
3808 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3809 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3810 !rec->bad_full_backref && !rec->crossing_stripes &&
3811 !rec->wrong_chunk_type) {
3812 remove_cache_extent(extent_cache, &rec->cache);
3813 free_all_extent_backrefs(rec);
3814 list_del_init(&rec->list);
3815 free(rec);
3817 return 0;
3820 static int check_owner_ref(struct btrfs_root *root,
3821 struct extent_record *rec,
3822 struct extent_buffer *buf)
3824 struct extent_backref *node;
3825 struct tree_backref *back;
3826 struct btrfs_root *ref_root;
3827 struct btrfs_key key;
3828 struct btrfs_path path;
3829 struct extent_buffer *parent;
3830 int level;
3831 int found = 0;
3832 int ret;
3834 list_for_each_entry(node, &rec->backrefs, list) {
3835 if (node->is_data)
3836 continue;
3837 if (!node->found_ref)
3838 continue;
3839 if (node->full_backref)
3840 continue;
3841 back = (struct tree_backref *)node;
3842 if (btrfs_header_owner(buf) == back->root)
3843 return 0;
3845 BUG_ON(rec->is_root);
3847 /* try to find the block by search corresponding fs tree */
3848 key.objectid = btrfs_header_owner(buf);
3849 key.type = BTRFS_ROOT_ITEM_KEY;
3850 key.offset = (u64)-1;
3852 ref_root = btrfs_read_fs_root(root->fs_info, &key);
3853 if (IS_ERR(ref_root))
3854 return 1;
3856 level = btrfs_header_level(buf);
3857 if (level == 0)
3858 btrfs_item_key_to_cpu(buf, &key, 0);
3859 else
3860 btrfs_node_key_to_cpu(buf, &key, 0);
3862 btrfs_init_path(&path);
3863 path.lowest_level = level + 1;
3864 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3865 if (ret < 0)
3866 return 0;
3868 parent = path.nodes[level + 1];
3869 if (parent && buf->start == btrfs_node_blockptr(parent,
3870 path.slots[level + 1]))
3871 found = 1;
3873 btrfs_release_path(&path);
3874 return found ? 0 : 1;
3877 static int is_extent_tree_record(struct extent_record *rec)
3879 struct list_head *cur = rec->backrefs.next;
3880 struct extent_backref *node;
3881 struct tree_backref *back;
3882 int is_extent = 0;
3884 while(cur != &rec->backrefs) {
3885 node = list_entry(cur, struct extent_backref, list);
3886 cur = cur->next;
3887 if (node->is_data)
3888 return 0;
3889 back = (struct tree_backref *)node;
3890 if (node->full_backref)
3891 return 0;
3892 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3893 is_extent = 1;
3895 return is_extent;
3899 static int record_bad_block_io(struct btrfs_fs_info *info,
3900 struct cache_tree *extent_cache,
3901 u64 start, u64 len)
3903 struct extent_record *rec;
3904 struct cache_extent *cache;
3905 struct btrfs_key key;
3907 cache = lookup_cache_extent(extent_cache, start, len);
3908 if (!cache)
3909 return 0;
3911 rec = container_of(cache, struct extent_record, cache);
3912 if (!is_extent_tree_record(rec))
3913 return 0;
3915 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3916 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3919 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
3920 struct extent_buffer *buf, int slot)
3922 if (btrfs_header_level(buf)) {
3923 struct btrfs_key_ptr ptr1, ptr2;
3925 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
3926 sizeof(struct btrfs_key_ptr));
3927 read_extent_buffer(buf, &ptr2,
3928 btrfs_node_key_ptr_offset(slot + 1),
3929 sizeof(struct btrfs_key_ptr));
3930 write_extent_buffer(buf, &ptr1,
3931 btrfs_node_key_ptr_offset(slot + 1),
3932 sizeof(struct btrfs_key_ptr));
3933 write_extent_buffer(buf, &ptr2,
3934 btrfs_node_key_ptr_offset(slot),
3935 sizeof(struct btrfs_key_ptr));
3936 if (slot == 0) {
3937 struct btrfs_disk_key key;
3938 btrfs_node_key(buf, &key, 0);
3939 btrfs_fixup_low_keys(root, path, &key,
3940 btrfs_header_level(buf) + 1);
3942 } else {
3943 struct btrfs_item *item1, *item2;
3944 struct btrfs_key k1, k2;
3945 char *item1_data, *item2_data;
3946 u32 item1_offset, item2_offset, item1_size, item2_size;
3948 item1 = btrfs_item_nr(slot);
3949 item2 = btrfs_item_nr(slot + 1);
3950 btrfs_item_key_to_cpu(buf, &k1, slot);
3951 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
3952 item1_offset = btrfs_item_offset(buf, item1);
3953 item2_offset = btrfs_item_offset(buf, item2);
3954 item1_size = btrfs_item_size(buf, item1);
3955 item2_size = btrfs_item_size(buf, item2);
3957 item1_data = malloc(item1_size);
3958 if (!item1_data)
3959 return -ENOMEM;
3960 item2_data = malloc(item2_size);
3961 if (!item2_data) {
3962 free(item1_data);
3963 return -ENOMEM;
3966 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
3967 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
3969 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
3970 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
3971 free(item1_data);
3972 free(item2_data);
3974 btrfs_set_item_offset(buf, item1, item2_offset);
3975 btrfs_set_item_offset(buf, item2, item1_offset);
3976 btrfs_set_item_size(buf, item1, item2_size);
3977 btrfs_set_item_size(buf, item2, item1_size);
3979 path->slots[0] = slot;
3980 btrfs_set_item_key_unsafe(root, path, &k2);
3981 path->slots[0] = slot + 1;
3982 btrfs_set_item_key_unsafe(root, path, &k1);
3984 return 0;
3987 static int fix_key_order(struct btrfs_trans_handle *trans,
3988 struct btrfs_root *root,
3989 struct btrfs_path *path)
3991 struct extent_buffer *buf;
3992 struct btrfs_key k1, k2;
3993 int i;
3994 int level = path->lowest_level;
3995 int ret = -EIO;
3997 buf = path->nodes[level];
3998 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
3999 if (level) {
4000 btrfs_node_key_to_cpu(buf, &k1, i);
4001 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4002 } else {
4003 btrfs_item_key_to_cpu(buf, &k1, i);
4004 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4006 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4007 continue;
4008 ret = swap_values(root, path, buf, i);
4009 if (ret)
4010 break;
4011 btrfs_mark_buffer_dirty(buf);
4012 i = 0;
4014 return ret;
4017 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4018 struct btrfs_root *root,
4019 struct btrfs_path *path,
4020 struct extent_buffer *buf, int slot)
4022 struct btrfs_key key;
4023 int nritems = btrfs_header_nritems(buf);
4025 btrfs_item_key_to_cpu(buf, &key, slot);
4027 /* These are all the keys we can deal with missing. */
4028 if (key.type != BTRFS_DIR_INDEX_KEY &&
4029 key.type != BTRFS_EXTENT_ITEM_KEY &&
4030 key.type != BTRFS_METADATA_ITEM_KEY &&
4031 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4032 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4033 return -1;
4035 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4036 (unsigned long long)key.objectid, key.type,
4037 (unsigned long long)key.offset, slot, buf->start);
4038 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4039 btrfs_item_nr_offset(slot + 1),
4040 sizeof(struct btrfs_item) *
4041 (nritems - slot - 1));
4042 btrfs_set_header_nritems(buf, nritems - 1);
4043 if (slot == 0) {
4044 struct btrfs_disk_key disk_key;
4046 btrfs_item_key(buf, &disk_key, 0);
4047 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4049 btrfs_mark_buffer_dirty(buf);
4050 return 0;
4053 static int fix_item_offset(struct btrfs_trans_handle *trans,
4054 struct btrfs_root *root,
4055 struct btrfs_path *path)
4057 struct extent_buffer *buf;
4058 int i;
4059 int ret = 0;
4061 /* We should only get this for leaves */
4062 BUG_ON(path->lowest_level);
4063 buf = path->nodes[0];
4064 again:
4065 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4066 unsigned int shift = 0, offset;
4068 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4069 BTRFS_LEAF_DATA_SIZE(root)) {
4070 if (btrfs_item_end_nr(buf, i) >
4071 BTRFS_LEAF_DATA_SIZE(root)) {
4072 ret = delete_bogus_item(trans, root, path,
4073 buf, i);
4074 if (!ret)
4075 goto again;
4076 fprintf(stderr, "item is off the end of the "
4077 "leaf, can't fix\n");
4078 ret = -EIO;
4079 break;
4081 shift = BTRFS_LEAF_DATA_SIZE(root) -
4082 btrfs_item_end_nr(buf, i);
4083 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4084 btrfs_item_offset_nr(buf, i - 1)) {
4085 if (btrfs_item_end_nr(buf, i) >
4086 btrfs_item_offset_nr(buf, i - 1)) {
4087 ret = delete_bogus_item(trans, root, path,
4088 buf, i);
4089 if (!ret)
4090 goto again;
4091 fprintf(stderr, "items overlap, can't fix\n");
4092 ret = -EIO;
4093 break;
4095 shift = btrfs_item_offset_nr(buf, i - 1) -
4096 btrfs_item_end_nr(buf, i);
4098 if (!shift)
4099 continue;
4101 printf("Shifting item nr %d by %u bytes in block %llu\n",
4102 i, shift, (unsigned long long)buf->start);
4103 offset = btrfs_item_offset_nr(buf, i);
4104 memmove_extent_buffer(buf,
4105 btrfs_leaf_data(buf) + offset + shift,
4106 btrfs_leaf_data(buf) + offset,
4107 btrfs_item_size_nr(buf, i));
4108 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4109 offset + shift);
4110 btrfs_mark_buffer_dirty(buf);
4114 * We may have moved things, in which case we want to exit so we don't
4115 * write those changes out. Once we have proper abort functionality in
4116 * progs this can be changed to something nicer.
4118 BUG_ON(ret);
4119 return ret;
4123 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4124 * then just return -EIO.
4126 static int try_to_fix_bad_block(struct btrfs_root *root,
4127 struct extent_buffer *buf,
4128 enum btrfs_tree_block_status status)
4130 struct btrfs_trans_handle *trans;
4131 struct ulist *roots;
4132 struct ulist_node *node;
4133 struct btrfs_root *search_root;
4134 struct btrfs_path *path;
4135 struct ulist_iterator iter;
4136 struct btrfs_key root_key, key;
4137 int ret;
4139 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4140 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4141 return -EIO;
4143 path = btrfs_alloc_path();
4144 if (!path)
4145 return -EIO;
4147 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4148 0, &roots);
4149 if (ret) {
4150 btrfs_free_path(path);
4151 return -EIO;
4154 ULIST_ITER_INIT(&iter);
4155 while ((node = ulist_next(roots, &iter))) {
4156 root_key.objectid = node->val;
4157 root_key.type = BTRFS_ROOT_ITEM_KEY;
4158 root_key.offset = (u64)-1;
4160 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4161 if (IS_ERR(root)) {
4162 ret = -EIO;
4163 break;
4167 trans = btrfs_start_transaction(search_root, 0);
4168 if (IS_ERR(trans)) {
4169 ret = PTR_ERR(trans);
4170 break;
4173 path->lowest_level = btrfs_header_level(buf);
4174 path->skip_check_block = 1;
4175 if (path->lowest_level)
4176 btrfs_node_key_to_cpu(buf, &key, 0);
4177 else
4178 btrfs_item_key_to_cpu(buf, &key, 0);
4179 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4180 if (ret) {
4181 ret = -EIO;
4182 btrfs_commit_transaction(trans, search_root);
4183 break;
4185 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4186 ret = fix_key_order(trans, search_root, path);
4187 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4188 ret = fix_item_offset(trans, search_root, path);
4189 if (ret) {
4190 btrfs_commit_transaction(trans, search_root);
4191 break;
4193 btrfs_release_path(path);
4194 btrfs_commit_transaction(trans, search_root);
4196 ulist_free(roots);
4197 btrfs_free_path(path);
4198 return ret;
4201 static int check_block(struct btrfs_root *root,
4202 struct cache_tree *extent_cache,
4203 struct extent_buffer *buf, u64 flags)
4205 struct extent_record *rec;
4206 struct cache_extent *cache;
4207 struct btrfs_key key;
4208 enum btrfs_tree_block_status status;
4209 int ret = 0;
4210 int level;
4212 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4213 if (!cache)
4214 return 1;
4215 rec = container_of(cache, struct extent_record, cache);
4216 rec->generation = btrfs_header_generation(buf);
4218 level = btrfs_header_level(buf);
4219 if (btrfs_header_nritems(buf) > 0) {
4221 if (level == 0)
4222 btrfs_item_key_to_cpu(buf, &key, 0);
4223 else
4224 btrfs_node_key_to_cpu(buf, &key, 0);
4226 rec->info_objectid = key.objectid;
4228 rec->info_level = level;
4230 if (btrfs_is_leaf(buf))
4231 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4232 else
4233 status = btrfs_check_node(root, &rec->parent_key, buf);
4235 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4236 if (repair)
4237 status = try_to_fix_bad_block(root, buf, status);
4238 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4239 ret = -EIO;
4240 fprintf(stderr, "bad block %llu\n",
4241 (unsigned long long)buf->start);
4242 } else {
4244 * Signal to callers we need to start the scan over
4245 * again since we'll have cow'ed blocks.
4247 ret = -EAGAIN;
4249 } else {
4250 rec->content_checked = 1;
4251 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4252 rec->owner_ref_checked = 1;
4253 else {
4254 ret = check_owner_ref(root, rec, buf);
4255 if (!ret)
4256 rec->owner_ref_checked = 1;
4259 if (!ret)
4260 maybe_free_extent_rec(extent_cache, rec);
4261 return ret;
4264 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4265 u64 parent, u64 root)
4267 struct list_head *cur = rec->backrefs.next;
4268 struct extent_backref *node;
4269 struct tree_backref *back;
4271 while(cur != &rec->backrefs) {
4272 node = list_entry(cur, struct extent_backref, list);
4273 cur = cur->next;
4274 if (node->is_data)
4275 continue;
4276 back = (struct tree_backref *)node;
4277 if (parent > 0) {
4278 if (!node->full_backref)
4279 continue;
4280 if (parent == back->parent)
4281 return back;
4282 } else {
4283 if (node->full_backref)
4284 continue;
4285 if (back->root == root)
4286 return back;
4289 return NULL;
4292 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4293 u64 parent, u64 root)
4295 struct tree_backref *ref = malloc(sizeof(*ref));
4296 memset(&ref->node, 0, sizeof(ref->node));
4297 if (parent > 0) {
4298 ref->parent = parent;
4299 ref->node.full_backref = 1;
4300 } else {
4301 ref->root = root;
4302 ref->node.full_backref = 0;
4304 list_add_tail(&ref->node.list, &rec->backrefs);
4306 return ref;
4309 static struct data_backref *find_data_backref(struct extent_record *rec,
4310 u64 parent, u64 root,
4311 u64 owner, u64 offset,
4312 int found_ref,
4313 u64 disk_bytenr, u64 bytes)
4315 struct list_head *cur = rec->backrefs.next;
4316 struct extent_backref *node;
4317 struct data_backref *back;
4319 while(cur != &rec->backrefs) {
4320 node = list_entry(cur, struct extent_backref, list);
4321 cur = cur->next;
4322 if (!node->is_data)
4323 continue;
4324 back = (struct data_backref *)node;
4325 if (parent > 0) {
4326 if (!node->full_backref)
4327 continue;
4328 if (parent == back->parent)
4329 return back;
4330 } else {
4331 if (node->full_backref)
4332 continue;
4333 if (back->root == root && back->owner == owner &&
4334 back->offset == offset) {
4335 if (found_ref && node->found_ref &&
4336 (back->bytes != bytes ||
4337 back->disk_bytenr != disk_bytenr))
4338 continue;
4339 return back;
4343 return NULL;
4346 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4347 u64 parent, u64 root,
4348 u64 owner, u64 offset,
4349 u64 max_size)
4351 struct data_backref *ref = malloc(sizeof(*ref));
4352 memset(&ref->node, 0, sizeof(ref->node));
4353 ref->node.is_data = 1;
4355 if (parent > 0) {
4356 ref->parent = parent;
4357 ref->owner = 0;
4358 ref->offset = 0;
4359 ref->node.full_backref = 1;
4360 } else {
4361 ref->root = root;
4362 ref->owner = owner;
4363 ref->offset = offset;
4364 ref->node.full_backref = 0;
4366 ref->bytes = max_size;
4367 ref->found_ref = 0;
4368 ref->num_refs = 0;
4369 list_add_tail(&ref->node.list, &rec->backrefs);
4370 if (max_size > rec->max_size)
4371 rec->max_size = max_size;
4372 return ref;
4375 /* Check if the type of extent matches with its chunk */
4376 static void check_extent_type(struct extent_record *rec)
4378 struct btrfs_block_group_cache *bg_cache;
4380 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4381 if (!bg_cache)
4382 return;
4384 /* data extent, check chunk directly*/
4385 if (!rec->metadata) {
4386 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4387 rec->wrong_chunk_type = 1;
4388 return;
4391 /* metadata extent, check the obvious case first */
4392 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4393 BTRFS_BLOCK_GROUP_METADATA))) {
4394 rec->wrong_chunk_type = 1;
4395 return;
4399 * Check SYSTEM extent, as it's also marked as metadata, we can only
4400 * make sure it's a SYSTEM extent by its backref
4402 if (!list_empty(&rec->backrefs)) {
4403 struct extent_backref *node;
4404 struct tree_backref *tback;
4405 u64 bg_type;
4407 node = list_entry(rec->backrefs.next, struct extent_backref,
4408 list);
4409 if (node->is_data) {
4410 /* tree block shouldn't have data backref */
4411 rec->wrong_chunk_type = 1;
4412 return;
4414 tback = container_of(node, struct tree_backref, node);
4416 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4417 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4418 else
4419 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4420 if (!(bg_cache->flags & bg_type))
4421 rec->wrong_chunk_type = 1;
4425 static int add_extent_rec(struct cache_tree *extent_cache,
4426 struct btrfs_key *parent_key, u64 parent_gen,
4427 u64 start, u64 nr, u64 extent_item_refs,
4428 int is_root, int inc_ref, int set_checked,
4429 int metadata, int extent_rec, u64 max_size)
4431 struct extent_record *rec;
4432 struct cache_extent *cache;
4433 int ret = 0;
4434 int dup = 0;
4436 cache = lookup_cache_extent(extent_cache, start, nr);
4437 if (cache) {
4438 rec = container_of(cache, struct extent_record, cache);
4439 if (inc_ref)
4440 rec->refs++;
4441 if (rec->nr == 1)
4442 rec->nr = max(nr, max_size);
4445 * We need to make sure to reset nr to whatever the extent
4446 * record says was the real size, this way we can compare it to
4447 * the backrefs.
4449 if (extent_rec) {
4450 if (start != rec->start || rec->found_rec) {
4451 struct extent_record *tmp;
4453 dup = 1;
4454 if (list_empty(&rec->list))
4455 list_add_tail(&rec->list,
4456 &duplicate_extents);
4459 * We have to do this song and dance in case we
4460 * find an extent record that falls inside of
4461 * our current extent record but does not have
4462 * the same objectid.
4464 tmp = malloc(sizeof(*tmp));
4465 if (!tmp)
4466 return -ENOMEM;
4467 tmp->start = start;
4468 tmp->max_size = max_size;
4469 tmp->nr = nr;
4470 tmp->found_rec = 1;
4471 tmp->metadata = metadata;
4472 tmp->extent_item_refs = extent_item_refs;
4473 INIT_LIST_HEAD(&tmp->list);
4474 list_add_tail(&tmp->list, &rec->dups);
4475 rec->num_duplicates++;
4476 } else {
4477 rec->nr = nr;
4478 rec->found_rec = 1;
4482 if (extent_item_refs && !dup) {
4483 if (rec->extent_item_refs) {
4484 fprintf(stderr, "block %llu rec "
4485 "extent_item_refs %llu, passed %llu\n",
4486 (unsigned long long)start,
4487 (unsigned long long)
4488 rec->extent_item_refs,
4489 (unsigned long long)extent_item_refs);
4491 rec->extent_item_refs = extent_item_refs;
4493 if (is_root)
4494 rec->is_root = 1;
4495 if (set_checked) {
4496 rec->content_checked = 1;
4497 rec->owner_ref_checked = 1;
4500 if (parent_key)
4501 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4502 if (parent_gen)
4503 rec->parent_generation = parent_gen;
4505 if (rec->max_size < max_size)
4506 rec->max_size = max_size;
4509 * A metadata extent can't cross stripe_len boundary, otherwise
4510 * kernel scrub won't be able to handle it.
4511 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4512 * it.
4514 if (metadata && check_crossing_stripes(rec->start,
4515 rec->max_size))
4516 rec->crossing_stripes = 1;
4517 check_extent_type(rec);
4518 maybe_free_extent_rec(extent_cache, rec);
4519 return ret;
4521 rec = malloc(sizeof(*rec));
4522 rec->start = start;
4523 rec->max_size = max_size;
4524 rec->nr = max(nr, max_size);
4525 rec->found_rec = !!extent_rec;
4526 rec->content_checked = 0;
4527 rec->owner_ref_checked = 0;
4528 rec->num_duplicates = 0;
4529 rec->metadata = metadata;
4530 rec->flag_block_full_backref = -1;
4531 rec->bad_full_backref = 0;
4532 rec->crossing_stripes = 0;
4533 rec->wrong_chunk_type = 0;
4534 INIT_LIST_HEAD(&rec->backrefs);
4535 INIT_LIST_HEAD(&rec->dups);
4536 INIT_LIST_HEAD(&rec->list);
4538 if (is_root)
4539 rec->is_root = 1;
4540 else
4541 rec->is_root = 0;
4543 if (inc_ref)
4544 rec->refs = 1;
4545 else
4546 rec->refs = 0;
4548 if (extent_item_refs)
4549 rec->extent_item_refs = extent_item_refs;
4550 else
4551 rec->extent_item_refs = 0;
4553 if (parent_key)
4554 btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4555 else
4556 memset(&rec->parent_key, 0, sizeof(*parent_key));
4558 if (parent_gen)
4559 rec->parent_generation = parent_gen;
4560 else
4561 rec->parent_generation = 0;
4563 rec->cache.start = start;
4564 rec->cache.size = nr;
4565 ret = insert_cache_extent(extent_cache, &rec->cache);
4566 BUG_ON(ret);
4567 bytes_used += nr;
4568 if (set_checked) {
4569 rec->content_checked = 1;
4570 rec->owner_ref_checked = 1;
4573 if (metadata)
4574 if (check_crossing_stripes(rec->start, rec->max_size))
4575 rec->crossing_stripes = 1;
4576 check_extent_type(rec);
4577 return ret;
4580 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4581 u64 parent, u64 root, int found_ref)
4583 struct extent_record *rec;
4584 struct tree_backref *back;
4585 struct cache_extent *cache;
4587 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4588 if (!cache) {
4589 add_extent_rec(extent_cache, NULL, 0, bytenr,
4590 1, 0, 0, 0, 0, 1, 0, 0);
4591 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4592 if (!cache)
4593 abort();
4596 rec = container_of(cache, struct extent_record, cache);
4597 if (rec->start != bytenr) {
4598 abort();
4601 back = find_tree_backref(rec, parent, root);
4602 if (!back)
4603 back = alloc_tree_backref(rec, parent, root);
4605 if (found_ref) {
4606 if (back->node.found_ref) {
4607 fprintf(stderr, "Extent back ref already exists "
4608 "for %llu parent %llu root %llu \n",
4609 (unsigned long long)bytenr,
4610 (unsigned long long)parent,
4611 (unsigned long long)root);
4613 back->node.found_ref = 1;
4614 } else {
4615 if (back->node.found_extent_tree) {
4616 fprintf(stderr, "Extent back ref already exists "
4617 "for %llu parent %llu root %llu \n",
4618 (unsigned long long)bytenr,
4619 (unsigned long long)parent,
4620 (unsigned long long)root);
4622 back->node.found_extent_tree = 1;
4624 check_extent_type(rec);
4625 maybe_free_extent_rec(extent_cache, rec);
4626 return 0;
4629 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4630 u64 parent, u64 root, u64 owner, u64 offset,
4631 u32 num_refs, int found_ref, u64 max_size)
4633 struct extent_record *rec;
4634 struct data_backref *back;
4635 struct cache_extent *cache;
4637 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4638 if (!cache) {
4639 add_extent_rec(extent_cache, NULL, 0, bytenr, 1, 0, 0, 0, 0,
4640 0, 0, max_size);
4641 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4642 if (!cache)
4643 abort();
4646 rec = container_of(cache, struct extent_record, cache);
4647 if (rec->max_size < max_size)
4648 rec->max_size = max_size;
4651 * If found_ref is set then max_size is the real size and must match the
4652 * existing refs. So if we have already found a ref then we need to
4653 * make sure that this ref matches the existing one, otherwise we need
4654 * to add a new backref so we can notice that the backrefs don't match
4655 * and we need to figure out who is telling the truth. This is to
4656 * account for that awful fsync bug I introduced where we'd end up with
4657 * a btrfs_file_extent_item that would have its length include multiple
4658 * prealloc extents or point inside of a prealloc extent.
4660 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4661 bytenr, max_size);
4662 if (!back)
4663 back = alloc_data_backref(rec, parent, root, owner, offset,
4664 max_size);
4666 if (found_ref) {
4667 BUG_ON(num_refs != 1);
4668 if (back->node.found_ref)
4669 BUG_ON(back->bytes != max_size);
4670 back->node.found_ref = 1;
4671 back->found_ref += 1;
4672 back->bytes = max_size;
4673 back->disk_bytenr = bytenr;
4674 rec->refs += 1;
4675 rec->content_checked = 1;
4676 rec->owner_ref_checked = 1;
4677 } else {
4678 if (back->node.found_extent_tree) {
4679 fprintf(stderr, "Extent back ref already exists "
4680 "for %llu parent %llu root %llu "
4681 "owner %llu offset %llu num_refs %lu\n",
4682 (unsigned long long)bytenr,
4683 (unsigned long long)parent,
4684 (unsigned long long)root,
4685 (unsigned long long)owner,
4686 (unsigned long long)offset,
4687 (unsigned long)num_refs);
4689 back->num_refs = num_refs;
4690 back->node.found_extent_tree = 1;
4692 maybe_free_extent_rec(extent_cache, rec);
4693 return 0;
4696 static int add_pending(struct cache_tree *pending,
4697 struct cache_tree *seen, u64 bytenr, u32 size)
4699 int ret;
4700 ret = add_cache_extent(seen, bytenr, size);
4701 if (ret)
4702 return ret;
4703 add_cache_extent(pending, bytenr, size);
4704 return 0;
4707 static int pick_next_pending(struct cache_tree *pending,
4708 struct cache_tree *reada,
4709 struct cache_tree *nodes,
4710 u64 last, struct block_info *bits, int bits_nr,
4711 int *reada_bits)
4713 unsigned long node_start = last;
4714 struct cache_extent *cache;
4715 int ret;
4717 cache = search_cache_extent(reada, 0);
4718 if (cache) {
4719 bits[0].start = cache->start;
4720 bits[0].size = cache->size;
4721 *reada_bits = 1;
4722 return 1;
4724 *reada_bits = 0;
4725 if (node_start > 32768)
4726 node_start -= 32768;
4728 cache = search_cache_extent(nodes, node_start);
4729 if (!cache)
4730 cache = search_cache_extent(nodes, 0);
4732 if (!cache) {
4733 cache = search_cache_extent(pending, 0);
4734 if (!cache)
4735 return 0;
4736 ret = 0;
4737 do {
4738 bits[ret].start = cache->start;
4739 bits[ret].size = cache->size;
4740 cache = next_cache_extent(cache);
4741 ret++;
4742 } while (cache && ret < bits_nr);
4743 return ret;
4746 ret = 0;
4747 do {
4748 bits[ret].start = cache->start;
4749 bits[ret].size = cache->size;
4750 cache = next_cache_extent(cache);
4751 ret++;
4752 } while (cache && ret < bits_nr);
4754 if (bits_nr - ret > 8) {
4755 u64 lookup = bits[0].start + bits[0].size;
4756 struct cache_extent *next;
4757 next = search_cache_extent(pending, lookup);
4758 while(next) {
4759 if (next->start - lookup > 32768)
4760 break;
4761 bits[ret].start = next->start;
4762 bits[ret].size = next->size;
4763 lookup = next->start + next->size;
4764 ret++;
4765 if (ret == bits_nr)
4766 break;
4767 next = next_cache_extent(next);
4768 if (!next)
4769 break;
4772 return ret;
4775 static void free_chunk_record(struct cache_extent *cache)
4777 struct chunk_record *rec;
4779 rec = container_of(cache, struct chunk_record, cache);
4780 list_del_init(&rec->list);
4781 list_del_init(&rec->dextents);
4782 free(rec);
4785 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4787 cache_tree_free_extents(chunk_cache, free_chunk_record);
4790 static void free_device_record(struct rb_node *node)
4792 struct device_record *rec;
4794 rec = container_of(node, struct device_record, node);
4795 free(rec);
4798 FREE_RB_BASED_TREE(device_cache, free_device_record);
4800 int insert_block_group_record(struct block_group_tree *tree,
4801 struct block_group_record *bg_rec)
4803 int ret;
4805 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4806 if (ret)
4807 return ret;
4809 list_add_tail(&bg_rec->list, &tree->block_groups);
4810 return 0;
4813 static void free_block_group_record(struct cache_extent *cache)
4815 struct block_group_record *rec;
4817 rec = container_of(cache, struct block_group_record, cache);
4818 list_del_init(&rec->list);
4819 free(rec);
4822 void free_block_group_tree(struct block_group_tree *tree)
4824 cache_tree_free_extents(&tree->tree, free_block_group_record);
4827 int insert_device_extent_record(struct device_extent_tree *tree,
4828 struct device_extent_record *de_rec)
4830 int ret;
4833 * Device extent is a bit different from the other extents, because
4834 * the extents which belong to the different devices may have the
4835 * same start and size, so we need use the special extent cache
4836 * search/insert functions.
4838 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4839 if (ret)
4840 return ret;
4842 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4843 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4844 return 0;
4847 static void free_device_extent_record(struct cache_extent *cache)
4849 struct device_extent_record *rec;
4851 rec = container_of(cache, struct device_extent_record, cache);
4852 if (!list_empty(&rec->chunk_list))
4853 list_del_init(&rec->chunk_list);
4854 if (!list_empty(&rec->device_list))
4855 list_del_init(&rec->device_list);
4856 free(rec);
4859 void free_device_extent_tree(struct device_extent_tree *tree)
4861 cache_tree_free_extents(&tree->tree, free_device_extent_record);
4864 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4865 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4866 struct extent_buffer *leaf, int slot)
4868 struct btrfs_extent_ref_v0 *ref0;
4869 struct btrfs_key key;
4871 btrfs_item_key_to_cpu(leaf, &key, slot);
4872 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4873 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4874 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
4875 } else {
4876 add_data_backref(extent_cache, key.objectid, key.offset, 0,
4877 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4879 return 0;
4881 #endif
4883 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4884 struct btrfs_key *key,
4885 int slot)
4887 struct btrfs_chunk *ptr;
4888 struct chunk_record *rec;
4889 int num_stripes, i;
4891 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4892 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
4894 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
4895 if (!rec) {
4896 fprintf(stderr, "memory allocation failed\n");
4897 exit(-1);
4900 INIT_LIST_HEAD(&rec->list);
4901 INIT_LIST_HEAD(&rec->dextents);
4902 rec->bg_rec = NULL;
4904 rec->cache.start = key->offset;
4905 rec->cache.size = btrfs_chunk_length(leaf, ptr);
4907 rec->generation = btrfs_header_generation(leaf);
4909 rec->objectid = key->objectid;
4910 rec->type = key->type;
4911 rec->offset = key->offset;
4913 rec->length = rec->cache.size;
4914 rec->owner = btrfs_chunk_owner(leaf, ptr);
4915 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
4916 rec->type_flags = btrfs_chunk_type(leaf, ptr);
4917 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
4918 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
4919 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
4920 rec->num_stripes = num_stripes;
4921 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
4923 for (i = 0; i < rec->num_stripes; ++i) {
4924 rec->stripes[i].devid =
4925 btrfs_stripe_devid_nr(leaf, ptr, i);
4926 rec->stripes[i].offset =
4927 btrfs_stripe_offset_nr(leaf, ptr, i);
4928 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
4929 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
4930 BTRFS_UUID_SIZE);
4933 return rec;
4936 static int process_chunk_item(struct cache_tree *chunk_cache,
4937 struct btrfs_key *key, struct extent_buffer *eb,
4938 int slot)
4940 struct chunk_record *rec;
4941 int ret = 0;
4943 rec = btrfs_new_chunk_record(eb, key, slot);
4944 ret = insert_cache_extent(chunk_cache, &rec->cache);
4945 if (ret) {
4946 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
4947 rec->offset, rec->length);
4948 free(rec);
4951 return ret;
4954 static int process_device_item(struct rb_root *dev_cache,
4955 struct btrfs_key *key, struct extent_buffer *eb, int slot)
4957 struct btrfs_dev_item *ptr;
4958 struct device_record *rec;
4959 int ret = 0;
4961 ptr = btrfs_item_ptr(eb,
4962 slot, struct btrfs_dev_item);
4964 rec = malloc(sizeof(*rec));
4965 if (!rec) {
4966 fprintf(stderr, "memory allocation failed\n");
4967 return -ENOMEM;
4970 rec->devid = key->offset;
4971 rec->generation = btrfs_header_generation(eb);
4973 rec->objectid = key->objectid;
4974 rec->type = key->type;
4975 rec->offset = key->offset;
4977 rec->devid = btrfs_device_id(eb, ptr);
4978 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
4979 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
4981 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
4982 if (ret) {
4983 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
4984 free(rec);
4987 return ret;
4990 struct block_group_record *
4991 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
4992 int slot)
4994 struct btrfs_block_group_item *ptr;
4995 struct block_group_record *rec;
4997 rec = calloc(1, sizeof(*rec));
4998 if (!rec) {
4999 fprintf(stderr, "memory allocation failed\n");
5000 exit(-1);
5003 rec->cache.start = key->objectid;
5004 rec->cache.size = key->offset;
5006 rec->generation = btrfs_header_generation(leaf);
5008 rec->objectid = key->objectid;
5009 rec->type = key->type;
5010 rec->offset = key->offset;
5012 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5013 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5015 INIT_LIST_HEAD(&rec->list);
5017 return rec;
5020 static int process_block_group_item(struct block_group_tree *block_group_cache,
5021 struct btrfs_key *key,
5022 struct extent_buffer *eb, int slot)
5024 struct block_group_record *rec;
5025 int ret = 0;
5027 rec = btrfs_new_block_group_record(eb, key, slot);
5028 ret = insert_block_group_record(block_group_cache, rec);
5029 if (ret) {
5030 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5031 rec->objectid, rec->offset);
5032 free(rec);
5035 return ret;
5038 struct device_extent_record *
5039 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5040 struct btrfs_key *key, int slot)
5042 struct device_extent_record *rec;
5043 struct btrfs_dev_extent *ptr;
5045 rec = calloc(1, sizeof(*rec));
5046 if (!rec) {
5047 fprintf(stderr, "memory allocation failed\n");
5048 exit(-1);
5051 rec->cache.objectid = key->objectid;
5052 rec->cache.start = key->offset;
5054 rec->generation = btrfs_header_generation(leaf);
5056 rec->objectid = key->objectid;
5057 rec->type = key->type;
5058 rec->offset = key->offset;
5060 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5061 rec->chunk_objecteid =
5062 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5063 rec->chunk_offset =
5064 btrfs_dev_extent_chunk_offset(leaf, ptr);
5065 rec->length = btrfs_dev_extent_length(leaf, ptr);
5066 rec->cache.size = rec->length;
5068 INIT_LIST_HEAD(&rec->chunk_list);
5069 INIT_LIST_HEAD(&rec->device_list);
5071 return rec;
5074 static int
5075 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5076 struct btrfs_key *key, struct extent_buffer *eb,
5077 int slot)
5079 struct device_extent_record *rec;
5080 int ret;
5082 rec = btrfs_new_device_extent_record(eb, key, slot);
5083 ret = insert_device_extent_record(dev_extent_cache, rec);
5084 if (ret) {
5085 fprintf(stderr,
5086 "Device extent[%llu, %llu, %llu] existed.\n",
5087 rec->objectid, rec->offset, rec->length);
5088 free(rec);
5091 return ret;
5094 static int process_extent_item(struct btrfs_root *root,
5095 struct cache_tree *extent_cache,
5096 struct extent_buffer *eb, int slot)
5098 struct btrfs_extent_item *ei;
5099 struct btrfs_extent_inline_ref *iref;
5100 struct btrfs_extent_data_ref *dref;
5101 struct btrfs_shared_data_ref *sref;
5102 struct btrfs_key key;
5103 unsigned long end;
5104 unsigned long ptr;
5105 int type;
5106 u32 item_size = btrfs_item_size_nr(eb, slot);
5107 u64 refs = 0;
5108 u64 offset;
5109 u64 num_bytes;
5110 int metadata = 0;
5112 btrfs_item_key_to_cpu(eb, &key, slot);
5114 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5115 metadata = 1;
5116 num_bytes = root->leafsize;
5117 } else {
5118 num_bytes = key.offset;
5121 if (item_size < sizeof(*ei)) {
5122 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5123 struct btrfs_extent_item_v0 *ei0;
5124 BUG_ON(item_size != sizeof(*ei0));
5125 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5126 refs = btrfs_extent_refs_v0(eb, ei0);
5127 #else
5128 BUG();
5129 #endif
5130 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
5131 num_bytes, refs, 0, 0, 0, metadata, 1,
5132 num_bytes);
5135 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5136 refs = btrfs_extent_refs(eb, ei);
5138 add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
5139 refs, 0, 0, 0, metadata, 1, num_bytes);
5141 ptr = (unsigned long)(ei + 1);
5142 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5143 key.type == BTRFS_EXTENT_ITEM_KEY)
5144 ptr += sizeof(struct btrfs_tree_block_info);
5146 end = (unsigned long)ei + item_size;
5147 while (ptr < end) {
5148 iref = (struct btrfs_extent_inline_ref *)ptr;
5149 type = btrfs_extent_inline_ref_type(eb, iref);
5150 offset = btrfs_extent_inline_ref_offset(eb, iref);
5151 switch (type) {
5152 case BTRFS_TREE_BLOCK_REF_KEY:
5153 add_tree_backref(extent_cache, key.objectid,
5154 0, offset, 0);
5155 break;
5156 case BTRFS_SHARED_BLOCK_REF_KEY:
5157 add_tree_backref(extent_cache, key.objectid,
5158 offset, 0, 0);
5159 break;
5160 case BTRFS_EXTENT_DATA_REF_KEY:
5161 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5162 add_data_backref(extent_cache, key.objectid, 0,
5163 btrfs_extent_data_ref_root(eb, dref),
5164 btrfs_extent_data_ref_objectid(eb,
5165 dref),
5166 btrfs_extent_data_ref_offset(eb, dref),
5167 btrfs_extent_data_ref_count(eb, dref),
5168 0, num_bytes);
5169 break;
5170 case BTRFS_SHARED_DATA_REF_KEY:
5171 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5172 add_data_backref(extent_cache, key.objectid, offset,
5173 0, 0, 0,
5174 btrfs_shared_data_ref_count(eb, sref),
5175 0, num_bytes);
5176 break;
5177 default:
5178 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5179 key.objectid, key.type, num_bytes);
5180 goto out;
5182 ptr += btrfs_extent_inline_ref_size(type);
5184 WARN_ON(ptr > end);
5185 out:
5186 return 0;
5189 static int check_cache_range(struct btrfs_root *root,
5190 struct btrfs_block_group_cache *cache,
5191 u64 offset, u64 bytes)
5193 struct btrfs_free_space *entry;
5194 u64 *logical;
5195 u64 bytenr;
5196 int stripe_len;
5197 int i, nr, ret;
5199 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5200 bytenr = btrfs_sb_offset(i);
5201 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5202 cache->key.objectid, bytenr, 0,
5203 &logical, &nr, &stripe_len);
5204 if (ret)
5205 return ret;
5207 while (nr--) {
5208 if (logical[nr] + stripe_len <= offset)
5209 continue;
5210 if (offset + bytes <= logical[nr])
5211 continue;
5212 if (logical[nr] == offset) {
5213 if (stripe_len >= bytes) {
5214 kfree(logical);
5215 return 0;
5217 bytes -= stripe_len;
5218 offset += stripe_len;
5219 } else if (logical[nr] < offset) {
5220 if (logical[nr] + stripe_len >=
5221 offset + bytes) {
5222 kfree(logical);
5223 return 0;
5225 bytes = (offset + bytes) -
5226 (logical[nr] + stripe_len);
5227 offset = logical[nr] + stripe_len;
5228 } else {
5230 * Could be tricky, the super may land in the
5231 * middle of the area we're checking. First
5232 * check the easiest case, it's at the end.
5234 if (logical[nr] + stripe_len >=
5235 bytes + offset) {
5236 bytes = logical[nr] - offset;
5237 continue;
5240 /* Check the left side */
5241 ret = check_cache_range(root, cache,
5242 offset,
5243 logical[nr] - offset);
5244 if (ret) {
5245 kfree(logical);
5246 return ret;
5249 /* Now we continue with the right side */
5250 bytes = (offset + bytes) -
5251 (logical[nr] + stripe_len);
5252 offset = logical[nr] + stripe_len;
5256 kfree(logical);
5259 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5260 if (!entry) {
5261 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5262 offset, offset+bytes);
5263 return -EINVAL;
5266 if (entry->offset != offset) {
5267 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5268 entry->offset);
5269 return -EINVAL;
5272 if (entry->bytes != bytes) {
5273 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5274 bytes, entry->bytes, offset);
5275 return -EINVAL;
5278 unlink_free_space(cache->free_space_ctl, entry);
5279 free(entry);
5280 return 0;
5283 static int verify_space_cache(struct btrfs_root *root,
5284 struct btrfs_block_group_cache *cache)
5286 struct btrfs_path *path;
5287 struct extent_buffer *leaf;
5288 struct btrfs_key key;
5289 u64 last;
5290 int ret = 0;
5292 path = btrfs_alloc_path();
5293 if (!path)
5294 return -ENOMEM;
5296 root = root->fs_info->extent_root;
5298 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5300 key.objectid = last;
5301 key.offset = 0;
5302 key.type = BTRFS_EXTENT_ITEM_KEY;
5304 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5305 if (ret < 0)
5306 goto out;
5307 ret = 0;
5308 while (1) {
5309 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5310 ret = btrfs_next_leaf(root, path);
5311 if (ret < 0)
5312 goto out;
5313 if (ret > 0) {
5314 ret = 0;
5315 break;
5318 leaf = path->nodes[0];
5319 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5320 if (key.objectid >= cache->key.offset + cache->key.objectid)
5321 break;
5322 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5323 key.type != BTRFS_METADATA_ITEM_KEY) {
5324 path->slots[0]++;
5325 continue;
5328 if (last == key.objectid) {
5329 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5330 last = key.objectid + key.offset;
5331 else
5332 last = key.objectid + root->leafsize;
5333 path->slots[0]++;
5334 continue;
5337 ret = check_cache_range(root, cache, last,
5338 key.objectid - last);
5339 if (ret)
5340 break;
5341 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5342 last = key.objectid + key.offset;
5343 else
5344 last = key.objectid + root->leafsize;
5345 path->slots[0]++;
5348 if (last < cache->key.objectid + cache->key.offset)
5349 ret = check_cache_range(root, cache, last,
5350 cache->key.objectid +
5351 cache->key.offset - last);
5353 out:
5354 btrfs_free_path(path);
5356 if (!ret &&
5357 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5358 fprintf(stderr, "There are still entries left in the space "
5359 "cache\n");
5360 ret = -EINVAL;
5363 return ret;
5366 static int check_space_cache(struct btrfs_root *root)
5368 struct btrfs_block_group_cache *cache;
5369 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5370 int ret;
5371 int error = 0;
5373 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5374 btrfs_super_generation(root->fs_info->super_copy) !=
5375 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5376 printf("cache and super generation don't match, space cache "
5377 "will be invalidated\n");
5378 return 0;
5381 if (ctx.progress_enabled) {
5382 ctx.tp = TASK_FREE_SPACE;
5383 task_start(ctx.info);
5386 while (1) {
5387 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5388 if (!cache)
5389 break;
5391 start = cache->key.objectid + cache->key.offset;
5392 if (!cache->free_space_ctl) {
5393 if (btrfs_init_free_space_ctl(cache,
5394 root->sectorsize)) {
5395 ret = -ENOMEM;
5396 break;
5398 } else {
5399 btrfs_remove_free_space_cache(cache);
5402 ret = load_free_space_cache(root->fs_info, cache);
5403 if (!ret)
5404 continue;
5406 ret = verify_space_cache(root, cache);
5407 if (ret) {
5408 fprintf(stderr, "cache appears valid but isnt %Lu\n",
5409 cache->key.objectid);
5410 error++;
5414 task_stop(ctx.info);
5416 return error ? -EINVAL : 0;
5419 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5420 u64 num_bytes, unsigned long leaf_offset,
5421 struct extent_buffer *eb) {
5423 u64 offset = 0;
5424 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5425 char *data;
5426 unsigned long csum_offset;
5427 u32 csum;
5428 u32 csum_expected;
5429 u64 read_len;
5430 u64 data_checked = 0;
5431 u64 tmp;
5432 int ret = 0;
5433 int mirror;
5434 int num_copies;
5436 if (num_bytes % root->sectorsize)
5437 return -EINVAL;
5439 data = malloc(num_bytes);
5440 if (!data)
5441 return -ENOMEM;
5443 while (offset < num_bytes) {
5444 mirror = 0;
5445 again:
5446 read_len = num_bytes - offset;
5447 /* read as much space once a time */
5448 ret = read_extent_data(root, data + offset,
5449 bytenr + offset, &read_len, mirror);
5450 if (ret)
5451 goto out;
5452 data_checked = 0;
5453 /* verify every 4k data's checksum */
5454 while (data_checked < read_len) {
5455 csum = ~(u32)0;
5456 tmp = offset + data_checked;
5458 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5459 csum, root->sectorsize);
5460 btrfs_csum_final(csum, (char *)&csum);
5462 csum_offset = leaf_offset +
5463 tmp / root->sectorsize * csum_size;
5464 read_extent_buffer(eb, (char *)&csum_expected,
5465 csum_offset, csum_size);
5466 /* try another mirror */
5467 if (csum != csum_expected) {
5468 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5469 mirror, bytenr + tmp,
5470 csum, csum_expected);
5471 num_copies = btrfs_num_copies(
5472 &root->fs_info->mapping_tree,
5473 bytenr, num_bytes);
5474 if (mirror < num_copies - 1) {
5475 mirror += 1;
5476 goto again;
5479 data_checked += root->sectorsize;
5481 offset += read_len;
5483 out:
5484 free(data);
5485 return ret;
5488 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5489 u64 num_bytes)
5491 struct btrfs_path *path;
5492 struct extent_buffer *leaf;
5493 struct btrfs_key key;
5494 int ret;
5496 path = btrfs_alloc_path();
5497 if (!path) {
5498 fprintf(stderr, "Error allocing path\n");
5499 return -ENOMEM;
5502 key.objectid = bytenr;
5503 key.type = BTRFS_EXTENT_ITEM_KEY;
5504 key.offset = (u64)-1;
5506 again:
5507 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5508 0, 0);
5509 if (ret < 0) {
5510 fprintf(stderr, "Error looking up extent record %d\n", ret);
5511 btrfs_free_path(path);
5512 return ret;
5513 } else if (ret) {
5514 if (path->slots[0] > 0) {
5515 path->slots[0]--;
5516 } else {
5517 ret = btrfs_prev_leaf(root, path);
5518 if (ret < 0) {
5519 goto out;
5520 } else if (ret > 0) {
5521 ret = 0;
5522 goto out;
5527 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5530 * Block group items come before extent items if they have the same
5531 * bytenr, so walk back one more just in case. Dear future traveler,
5532 * first congrats on mastering time travel. Now if it's not too much
5533 * trouble could you go back to 2006 and tell Chris to make the
5534 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5535 * EXTENT_ITEM_KEY please?
5537 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5538 if (path->slots[0] > 0) {
5539 path->slots[0]--;
5540 } else {
5541 ret = btrfs_prev_leaf(root, path);
5542 if (ret < 0) {
5543 goto out;
5544 } else if (ret > 0) {
5545 ret = 0;
5546 goto out;
5549 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5552 while (num_bytes) {
5553 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5554 ret = btrfs_next_leaf(root, path);
5555 if (ret < 0) {
5556 fprintf(stderr, "Error going to next leaf "
5557 "%d\n", ret);
5558 btrfs_free_path(path);
5559 return ret;
5560 } else if (ret) {
5561 break;
5564 leaf = path->nodes[0];
5565 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5566 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5567 path->slots[0]++;
5568 continue;
5570 if (key.objectid + key.offset < bytenr) {
5571 path->slots[0]++;
5572 continue;
5574 if (key.objectid > bytenr + num_bytes)
5575 break;
5577 if (key.objectid == bytenr) {
5578 if (key.offset >= num_bytes) {
5579 num_bytes = 0;
5580 break;
5582 num_bytes -= key.offset;
5583 bytenr += key.offset;
5584 } else if (key.objectid < bytenr) {
5585 if (key.objectid + key.offset >= bytenr + num_bytes) {
5586 num_bytes = 0;
5587 break;
5589 num_bytes = (bytenr + num_bytes) -
5590 (key.objectid + key.offset);
5591 bytenr = key.objectid + key.offset;
5592 } else {
5593 if (key.objectid + key.offset < bytenr + num_bytes) {
5594 u64 new_start = key.objectid + key.offset;
5595 u64 new_bytes = bytenr + num_bytes - new_start;
5598 * Weird case, the extent is in the middle of
5599 * our range, we'll have to search one side
5600 * and then the other. Not sure if this happens
5601 * in real life, but no harm in coding it up
5602 * anyway just in case.
5604 btrfs_release_path(path);
5605 ret = check_extent_exists(root, new_start,
5606 new_bytes);
5607 if (ret) {
5608 fprintf(stderr, "Right section didn't "
5609 "have a record\n");
5610 break;
5612 num_bytes = key.objectid - bytenr;
5613 goto again;
5615 num_bytes = key.objectid - bytenr;
5617 path->slots[0]++;
5619 ret = 0;
5621 out:
5622 if (num_bytes && !ret) {
5623 fprintf(stderr, "There are no extents for csum range "
5624 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5625 ret = 1;
5628 btrfs_free_path(path);
5629 return ret;
5632 static int check_csums(struct btrfs_root *root)
5634 struct btrfs_path *path;
5635 struct extent_buffer *leaf;
5636 struct btrfs_key key;
5637 u64 offset = 0, num_bytes = 0;
5638 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5639 int errors = 0;
5640 int ret;
5641 u64 data_len;
5642 unsigned long leaf_offset;
5644 root = root->fs_info->csum_root;
5645 if (!extent_buffer_uptodate(root->node)) {
5646 fprintf(stderr, "No valid csum tree found\n");
5647 return -ENOENT;
5650 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5651 key.type = BTRFS_EXTENT_CSUM_KEY;
5652 key.offset = 0;
5654 path = btrfs_alloc_path();
5655 if (!path)
5656 return -ENOMEM;
5658 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5659 if (ret < 0) {
5660 fprintf(stderr, "Error searching csum tree %d\n", ret);
5661 btrfs_free_path(path);
5662 return ret;
5665 if (ret > 0 && path->slots[0])
5666 path->slots[0]--;
5667 ret = 0;
5669 while (1) {
5670 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5671 ret = btrfs_next_leaf(root, path);
5672 if (ret < 0) {
5673 fprintf(stderr, "Error going to next leaf "
5674 "%d\n", ret);
5675 break;
5677 if (ret)
5678 break;
5680 leaf = path->nodes[0];
5682 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5683 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5684 path->slots[0]++;
5685 continue;
5688 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5689 csum_size) * root->sectorsize;
5690 if (!check_data_csum)
5691 goto skip_csum_check;
5692 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5693 ret = check_extent_csums(root, key.offset, data_len,
5694 leaf_offset, leaf);
5695 if (ret)
5696 break;
5697 skip_csum_check:
5698 if (!num_bytes) {
5699 offset = key.offset;
5700 } else if (key.offset != offset + num_bytes) {
5701 ret = check_extent_exists(root, offset, num_bytes);
5702 if (ret) {
5703 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5704 "there is no extent record\n",
5705 offset, offset+num_bytes);
5706 errors++;
5708 offset = key.offset;
5709 num_bytes = 0;
5711 num_bytes += data_len;
5712 path->slots[0]++;
5715 btrfs_free_path(path);
5716 return errors;
5719 static int is_dropped_key(struct btrfs_key *key,
5720 struct btrfs_key *drop_key) {
5721 if (key->objectid < drop_key->objectid)
5722 return 1;
5723 else if (key->objectid == drop_key->objectid) {
5724 if (key->type < drop_key->type)
5725 return 1;
5726 else if (key->type == drop_key->type) {
5727 if (key->offset < drop_key->offset)
5728 return 1;
5731 return 0;
5735 * Here are the rules for FULL_BACKREF.
5737 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5738 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5739 * FULL_BACKREF set.
5740 * 3) We cow'ed the block walking down a reloc tree. This is impossible to tell
5741 * if it happened after the relocation occurred since we'll have dropped the
5742 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5743 * have no real way to know for sure.
5745 * We process the blocks one root at a time, and we start from the lowest root
5746 * objectid and go to the highest. So we can just lookup the owner backref for
5747 * the record and if we don't find it then we know it doesn't exist and we have
5748 * a FULL BACKREF.
5750 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5751 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5752 * be set or not and then we can check later once we've gathered all the refs.
5754 static int calc_extent_flag(struct btrfs_root *root,
5755 struct cache_tree *extent_cache,
5756 struct extent_buffer *buf,
5757 struct root_item_record *ri,
5758 u64 *flags)
5760 struct extent_record *rec;
5761 struct cache_extent *cache;
5762 struct tree_backref *tback;
5763 u64 owner = 0;
5765 cache = lookup_cache_extent(extent_cache, buf->start, 1);
5766 /* we have added this extent before */
5767 BUG_ON(!cache);
5768 rec = container_of(cache, struct extent_record, cache);
5771 * Except file/reloc tree, we can not have
5772 * FULL BACKREF MODE
5774 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5775 goto normal;
5777 * root node
5779 if (buf->start == ri->bytenr)
5780 goto normal;
5782 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5783 goto full_backref;
5785 owner = btrfs_header_owner(buf);
5786 if (owner == ri->objectid)
5787 goto normal;
5789 tback = find_tree_backref(rec, 0, owner);
5790 if (!tback)
5791 goto full_backref;
5792 normal:
5793 *flags = 0;
5794 if (rec->flag_block_full_backref != -1 &&
5795 rec->flag_block_full_backref != 0)
5796 rec->bad_full_backref = 1;
5797 return 0;
5798 full_backref:
5799 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5800 if (rec->flag_block_full_backref != -1 &&
5801 rec->flag_block_full_backref != 1)
5802 rec->bad_full_backref = 1;
5803 return 0;
5806 static int run_next_block(struct btrfs_root *root,
5807 struct block_info *bits,
5808 int bits_nr,
5809 u64 *last,
5810 struct cache_tree *pending,
5811 struct cache_tree *seen,
5812 struct cache_tree *reada,
5813 struct cache_tree *nodes,
5814 struct cache_tree *extent_cache,
5815 struct cache_tree *chunk_cache,
5816 struct rb_root *dev_cache,
5817 struct block_group_tree *block_group_cache,
5818 struct device_extent_tree *dev_extent_cache,
5819 struct root_item_record *ri)
5821 struct extent_buffer *buf;
5822 struct extent_record *rec = NULL;
5823 u64 bytenr;
5824 u32 size;
5825 u64 parent;
5826 u64 owner;
5827 u64 flags;
5828 u64 ptr;
5829 u64 gen = 0;
5830 int ret = 0;
5831 int i;
5832 int nritems;
5833 struct btrfs_key key;
5834 struct cache_extent *cache;
5835 int reada_bits;
5837 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5838 bits_nr, &reada_bits);
5839 if (nritems == 0)
5840 return 1;
5842 if (!reada_bits) {
5843 for(i = 0; i < nritems; i++) {
5844 ret = add_cache_extent(reada, bits[i].start,
5845 bits[i].size);
5846 if (ret == -EEXIST)
5847 continue;
5849 /* fixme, get the parent transid */
5850 readahead_tree_block(root, bits[i].start,
5851 bits[i].size, 0);
5854 *last = bits[0].start;
5855 bytenr = bits[0].start;
5856 size = bits[0].size;
5858 cache = lookup_cache_extent(pending, bytenr, size);
5859 if (cache) {
5860 remove_cache_extent(pending, cache);
5861 free(cache);
5863 cache = lookup_cache_extent(reada, bytenr, size);
5864 if (cache) {
5865 remove_cache_extent(reada, cache);
5866 free(cache);
5868 cache = lookup_cache_extent(nodes, bytenr, size);
5869 if (cache) {
5870 remove_cache_extent(nodes, cache);
5871 free(cache);
5873 cache = lookup_cache_extent(extent_cache, bytenr, size);
5874 if (cache) {
5875 rec = container_of(cache, struct extent_record, cache);
5876 gen = rec->parent_generation;
5879 /* fixme, get the real parent transid */
5880 buf = read_tree_block(root, bytenr, size, gen);
5881 if (!extent_buffer_uptodate(buf)) {
5882 record_bad_block_io(root->fs_info,
5883 extent_cache, bytenr, size);
5884 goto out;
5887 nritems = btrfs_header_nritems(buf);
5889 flags = 0;
5890 if (!init_extent_tree) {
5891 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
5892 btrfs_header_level(buf), 1, NULL,
5893 &flags);
5894 if (ret < 0) {
5895 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5896 if (ret < 0) {
5897 fprintf(stderr, "Couldn't calc extent flags\n");
5898 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5901 } else {
5902 flags = 0;
5903 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
5904 if (ret < 0) {
5905 fprintf(stderr, "Couldn't calc extent flags\n");
5906 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5910 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5911 if (ri != NULL &&
5912 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
5913 ri->objectid == btrfs_header_owner(buf)) {
5915 * Ok we got to this block from it's original owner and
5916 * we have FULL_BACKREF set. Relocation can leave
5917 * converted blocks over so this is altogether possible,
5918 * however it's not possible if the generation > the
5919 * last snapshot, so check for this case.
5921 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
5922 btrfs_header_generation(buf) > ri->last_snapshot) {
5923 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
5924 rec->bad_full_backref = 1;
5927 } else {
5928 if (ri != NULL &&
5929 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
5930 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
5931 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5932 rec->bad_full_backref = 1;
5936 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5937 rec->flag_block_full_backref = 1;
5938 parent = bytenr;
5939 owner = 0;
5940 } else {
5941 rec->flag_block_full_backref = 0;
5942 parent = 0;
5943 owner = btrfs_header_owner(buf);
5946 ret = check_block(root, extent_cache, buf, flags);
5947 if (ret)
5948 goto out;
5950 if (btrfs_is_leaf(buf)) {
5951 btree_space_waste += btrfs_leaf_free_space(root, buf);
5952 for (i = 0; i < nritems; i++) {
5953 struct btrfs_file_extent_item *fi;
5954 btrfs_item_key_to_cpu(buf, &key, i);
5955 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
5956 process_extent_item(root, extent_cache, buf,
5958 continue;
5960 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5961 process_extent_item(root, extent_cache, buf,
5963 continue;
5965 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
5966 total_csum_bytes +=
5967 btrfs_item_size_nr(buf, i);
5968 continue;
5970 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
5971 process_chunk_item(chunk_cache, &key, buf, i);
5972 continue;
5974 if (key.type == BTRFS_DEV_ITEM_KEY) {
5975 process_device_item(dev_cache, &key, buf, i);
5976 continue;
5978 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
5979 process_block_group_item(block_group_cache,
5980 &key, buf, i);
5981 continue;
5983 if (key.type == BTRFS_DEV_EXTENT_KEY) {
5984 process_device_extent_item(dev_extent_cache,
5985 &key, buf, i);
5986 continue;
5989 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
5990 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5991 process_extent_ref_v0(extent_cache, buf, i);
5992 #else
5993 BUG();
5994 #endif
5995 continue;
5998 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
5999 add_tree_backref(extent_cache, key.objectid, 0,
6000 key.offset, 0);
6001 continue;
6003 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6004 add_tree_backref(extent_cache, key.objectid,
6005 key.offset, 0, 0);
6006 continue;
6008 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6009 struct btrfs_extent_data_ref *ref;
6010 ref = btrfs_item_ptr(buf, i,
6011 struct btrfs_extent_data_ref);
6012 add_data_backref(extent_cache,
6013 key.objectid, 0,
6014 btrfs_extent_data_ref_root(buf, ref),
6015 btrfs_extent_data_ref_objectid(buf,
6016 ref),
6017 btrfs_extent_data_ref_offset(buf, ref),
6018 btrfs_extent_data_ref_count(buf, ref),
6019 0, root->sectorsize);
6020 continue;
6022 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6023 struct btrfs_shared_data_ref *ref;
6024 ref = btrfs_item_ptr(buf, i,
6025 struct btrfs_shared_data_ref);
6026 add_data_backref(extent_cache,
6027 key.objectid, key.offset, 0, 0, 0,
6028 btrfs_shared_data_ref_count(buf, ref),
6029 0, root->sectorsize);
6030 continue;
6032 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6033 struct bad_item *bad;
6035 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6036 continue;
6037 if (!owner)
6038 continue;
6039 bad = malloc(sizeof(struct bad_item));
6040 if (!bad)
6041 continue;
6042 INIT_LIST_HEAD(&bad->list);
6043 memcpy(&bad->key, &key,
6044 sizeof(struct btrfs_key));
6045 bad->root_id = owner;
6046 list_add_tail(&bad->list, &delete_items);
6047 continue;
6049 if (key.type != BTRFS_EXTENT_DATA_KEY)
6050 continue;
6051 fi = btrfs_item_ptr(buf, i,
6052 struct btrfs_file_extent_item);
6053 if (btrfs_file_extent_type(buf, fi) ==
6054 BTRFS_FILE_EXTENT_INLINE)
6055 continue;
6056 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6057 continue;
6059 data_bytes_allocated +=
6060 btrfs_file_extent_disk_num_bytes(buf, fi);
6061 if (data_bytes_allocated < root->sectorsize) {
6062 abort();
6064 data_bytes_referenced +=
6065 btrfs_file_extent_num_bytes(buf, fi);
6066 add_data_backref(extent_cache,
6067 btrfs_file_extent_disk_bytenr(buf, fi),
6068 parent, owner, key.objectid, key.offset -
6069 btrfs_file_extent_offset(buf, fi), 1, 1,
6070 btrfs_file_extent_disk_num_bytes(buf, fi));
6072 } else {
6073 int level;
6074 struct btrfs_key first_key;
6076 first_key.objectid = 0;
6078 if (nritems > 0)
6079 btrfs_item_key_to_cpu(buf, &first_key, 0);
6080 level = btrfs_header_level(buf);
6081 for (i = 0; i < nritems; i++) {
6082 ptr = btrfs_node_blockptr(buf, i);
6083 size = btrfs_level_size(root, level - 1);
6084 btrfs_node_key_to_cpu(buf, &key, i);
6085 if (ri != NULL) {
6086 if ((level == ri->drop_level)
6087 && is_dropped_key(&key, &ri->drop_key)) {
6088 continue;
6091 ret = add_extent_rec(extent_cache, &key,
6092 btrfs_node_ptr_generation(buf, i),
6093 ptr, size, 0, 0, 1, 0, 1, 0,
6094 size);
6095 BUG_ON(ret);
6097 add_tree_backref(extent_cache, ptr, parent, owner, 1);
6099 if (level > 1) {
6100 add_pending(nodes, seen, ptr, size);
6101 } else {
6102 add_pending(pending, seen, ptr, size);
6105 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6106 nritems) * sizeof(struct btrfs_key_ptr);
6108 total_btree_bytes += buf->len;
6109 if (fs_root_objectid(btrfs_header_owner(buf)))
6110 total_fs_tree_bytes += buf->len;
6111 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6112 total_extent_tree_bytes += buf->len;
6113 if (!found_old_backref &&
6114 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6115 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6116 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6117 found_old_backref = 1;
6118 out:
6119 free_extent_buffer(buf);
6120 return ret;
6123 static int add_root_to_pending(struct extent_buffer *buf,
6124 struct cache_tree *extent_cache,
6125 struct cache_tree *pending,
6126 struct cache_tree *seen,
6127 struct cache_tree *nodes,
6128 u64 objectid)
6130 if (btrfs_header_level(buf) > 0)
6131 add_pending(nodes, seen, buf->start, buf->len);
6132 else
6133 add_pending(pending, seen, buf->start, buf->len);
6134 add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
6135 0, 1, 1, 0, 1, 0, buf->len);
6137 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6138 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6139 add_tree_backref(extent_cache, buf->start, buf->start,
6140 0, 1);
6141 else
6142 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6143 return 0;
6146 /* as we fix the tree, we might be deleting blocks that
6147 * we're tracking for repair. This hook makes sure we
6148 * remove any backrefs for blocks as we are fixing them.
6150 static int free_extent_hook(struct btrfs_trans_handle *trans,
6151 struct btrfs_root *root,
6152 u64 bytenr, u64 num_bytes, u64 parent,
6153 u64 root_objectid, u64 owner, u64 offset,
6154 int refs_to_drop)
6156 struct extent_record *rec;
6157 struct cache_extent *cache;
6158 int is_data;
6159 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6161 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6162 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6163 if (!cache)
6164 return 0;
6166 rec = container_of(cache, struct extent_record, cache);
6167 if (is_data) {
6168 struct data_backref *back;
6169 back = find_data_backref(rec, parent, root_objectid, owner,
6170 offset, 1, bytenr, num_bytes);
6171 if (!back)
6172 goto out;
6173 if (back->node.found_ref) {
6174 back->found_ref -= refs_to_drop;
6175 if (rec->refs)
6176 rec->refs -= refs_to_drop;
6178 if (back->node.found_extent_tree) {
6179 back->num_refs -= refs_to_drop;
6180 if (rec->extent_item_refs)
6181 rec->extent_item_refs -= refs_to_drop;
6183 if (back->found_ref == 0)
6184 back->node.found_ref = 0;
6185 if (back->num_refs == 0)
6186 back->node.found_extent_tree = 0;
6188 if (!back->node.found_extent_tree && back->node.found_ref) {
6189 list_del(&back->node.list);
6190 free(back);
6192 } else {
6193 struct tree_backref *back;
6194 back = find_tree_backref(rec, parent, root_objectid);
6195 if (!back)
6196 goto out;
6197 if (back->node.found_ref) {
6198 if (rec->refs)
6199 rec->refs--;
6200 back->node.found_ref = 0;
6202 if (back->node.found_extent_tree) {
6203 if (rec->extent_item_refs)
6204 rec->extent_item_refs--;
6205 back->node.found_extent_tree = 0;
6207 if (!back->node.found_extent_tree && back->node.found_ref) {
6208 list_del(&back->node.list);
6209 free(back);
6212 maybe_free_extent_rec(extent_cache, rec);
6213 out:
6214 return 0;
6217 static int delete_extent_records(struct btrfs_trans_handle *trans,
6218 struct btrfs_root *root,
6219 struct btrfs_path *path,
6220 u64 bytenr, u64 new_len)
6222 struct btrfs_key key;
6223 struct btrfs_key found_key;
6224 struct extent_buffer *leaf;
6225 int ret;
6226 int slot;
6229 key.objectid = bytenr;
6230 key.type = (u8)-1;
6231 key.offset = (u64)-1;
6233 while(1) {
6234 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6235 &key, path, 0, 1);
6236 if (ret < 0)
6237 break;
6239 if (ret > 0) {
6240 ret = 0;
6241 if (path->slots[0] == 0)
6242 break;
6243 path->slots[0]--;
6245 ret = 0;
6247 leaf = path->nodes[0];
6248 slot = path->slots[0];
6250 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6251 if (found_key.objectid != bytenr)
6252 break;
6254 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6255 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6256 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6257 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6258 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6259 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6260 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6261 btrfs_release_path(path);
6262 if (found_key.type == 0) {
6263 if (found_key.offset == 0)
6264 break;
6265 key.offset = found_key.offset - 1;
6266 key.type = found_key.type;
6268 key.type = found_key.type - 1;
6269 key.offset = (u64)-1;
6270 continue;
6273 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6274 found_key.objectid, found_key.type, found_key.offset);
6276 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6277 if (ret)
6278 break;
6279 btrfs_release_path(path);
6281 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6282 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6283 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6284 found_key.offset : root->leafsize;
6286 ret = btrfs_update_block_group(trans, root, bytenr,
6287 bytes, 0, 0);
6288 if (ret)
6289 break;
6293 btrfs_release_path(path);
6294 return ret;
6298 * for a single backref, this will allocate a new extent
6299 * and add the backref to it.
6301 static int record_extent(struct btrfs_trans_handle *trans,
6302 struct btrfs_fs_info *info,
6303 struct btrfs_path *path,
6304 struct extent_record *rec,
6305 struct extent_backref *back,
6306 int allocated, u64 flags)
6308 int ret;
6309 struct btrfs_root *extent_root = info->extent_root;
6310 struct extent_buffer *leaf;
6311 struct btrfs_key ins_key;
6312 struct btrfs_extent_item *ei;
6313 struct tree_backref *tback;
6314 struct data_backref *dback;
6315 struct btrfs_tree_block_info *bi;
6317 if (!back->is_data)
6318 rec->max_size = max_t(u64, rec->max_size,
6319 info->extent_root->leafsize);
6321 if (!allocated) {
6322 u32 item_size = sizeof(*ei);
6324 if (!back->is_data)
6325 item_size += sizeof(*bi);
6327 ins_key.objectid = rec->start;
6328 ins_key.offset = rec->max_size;
6329 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6331 ret = btrfs_insert_empty_item(trans, extent_root, path,
6332 &ins_key, item_size);
6333 if (ret)
6334 goto fail;
6336 leaf = path->nodes[0];
6337 ei = btrfs_item_ptr(leaf, path->slots[0],
6338 struct btrfs_extent_item);
6340 btrfs_set_extent_refs(leaf, ei, 0);
6341 btrfs_set_extent_generation(leaf, ei, rec->generation);
6343 if (back->is_data) {
6344 btrfs_set_extent_flags(leaf, ei,
6345 BTRFS_EXTENT_FLAG_DATA);
6346 } else {
6347 struct btrfs_disk_key copy_key;;
6349 tback = (struct tree_backref *)back;
6350 bi = (struct btrfs_tree_block_info *)(ei + 1);
6351 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6352 sizeof(*bi));
6354 btrfs_set_disk_key_objectid(&copy_key,
6355 rec->info_objectid);
6356 btrfs_set_disk_key_type(&copy_key, 0);
6357 btrfs_set_disk_key_offset(&copy_key, 0);
6359 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6360 btrfs_set_tree_block_key(leaf, bi, &copy_key);
6362 btrfs_set_extent_flags(leaf, ei,
6363 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6366 btrfs_mark_buffer_dirty(leaf);
6367 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6368 rec->max_size, 1, 0);
6369 if (ret)
6370 goto fail;
6371 btrfs_release_path(path);
6374 if (back->is_data) {
6375 u64 parent;
6376 int i;
6378 dback = (struct data_backref *)back;
6379 if (back->full_backref)
6380 parent = dback->parent;
6381 else
6382 parent = 0;
6384 for (i = 0; i < dback->found_ref; i++) {
6385 /* if parent != 0, we're doing a full backref
6386 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6387 * just makes the backref allocator create a data
6388 * backref
6390 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6391 rec->start, rec->max_size,
6392 parent,
6393 dback->root,
6394 parent ?
6395 BTRFS_FIRST_FREE_OBJECTID :
6396 dback->owner,
6397 dback->offset);
6398 if (ret)
6399 break;
6401 fprintf(stderr, "adding new data backref"
6402 " on %llu %s %llu owner %llu"
6403 " offset %llu found %d\n",
6404 (unsigned long long)rec->start,
6405 back->full_backref ?
6406 "parent" : "root",
6407 back->full_backref ?
6408 (unsigned long long)parent :
6409 (unsigned long long)dback->root,
6410 (unsigned long long)dback->owner,
6411 (unsigned long long)dback->offset,
6412 dback->found_ref);
6413 } else {
6414 u64 parent;
6416 tback = (struct tree_backref *)back;
6417 if (back->full_backref)
6418 parent = tback->parent;
6419 else
6420 parent = 0;
6422 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6423 rec->start, rec->max_size,
6424 parent, tback->root, 0, 0);
6425 fprintf(stderr, "adding new tree backref on "
6426 "start %llu len %llu parent %llu root %llu\n",
6427 rec->start, rec->max_size, parent, tback->root);
6429 fail:
6430 btrfs_release_path(path);
6431 return ret;
6434 struct extent_entry {
6435 u64 bytenr;
6436 u64 bytes;
6437 int count;
6438 int broken;
6439 struct list_head list;
6442 static struct extent_entry *find_entry(struct list_head *entries,
6443 u64 bytenr, u64 bytes)
6445 struct extent_entry *entry = NULL;
6447 list_for_each_entry(entry, entries, list) {
6448 if (entry->bytenr == bytenr && entry->bytes == bytes)
6449 return entry;
6452 return NULL;
6455 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6457 struct extent_entry *entry, *best = NULL, *prev = NULL;
6459 list_for_each_entry(entry, entries, list) {
6460 if (!prev) {
6461 prev = entry;
6462 continue;
6466 * If there are as many broken entries as entries then we know
6467 * not to trust this particular entry.
6469 if (entry->broken == entry->count)
6470 continue;
6473 * If our current entry == best then we can't be sure our best
6474 * is really the best, so we need to keep searching.
6476 if (best && best->count == entry->count) {
6477 prev = entry;
6478 best = NULL;
6479 continue;
6482 /* Prev == entry, not good enough, have to keep searching */
6483 if (!prev->broken && prev->count == entry->count)
6484 continue;
6486 if (!best)
6487 best = (prev->count > entry->count) ? prev : entry;
6488 else if (best->count < entry->count)
6489 best = entry;
6490 prev = entry;
6493 return best;
6496 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6497 struct data_backref *dback, struct extent_entry *entry)
6499 struct btrfs_trans_handle *trans;
6500 struct btrfs_root *root;
6501 struct btrfs_file_extent_item *fi;
6502 struct extent_buffer *leaf;
6503 struct btrfs_key key;
6504 u64 bytenr, bytes;
6505 int ret, err;
6507 key.objectid = dback->root;
6508 key.type = BTRFS_ROOT_ITEM_KEY;
6509 key.offset = (u64)-1;
6510 root = btrfs_read_fs_root(info, &key);
6511 if (IS_ERR(root)) {
6512 fprintf(stderr, "Couldn't find root for our ref\n");
6513 return -EINVAL;
6517 * The backref points to the original offset of the extent if it was
6518 * split, so we need to search down to the offset we have and then walk
6519 * forward until we find the backref we're looking for.
6521 key.objectid = dback->owner;
6522 key.type = BTRFS_EXTENT_DATA_KEY;
6523 key.offset = dback->offset;
6524 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6525 if (ret < 0) {
6526 fprintf(stderr, "Error looking up ref %d\n", ret);
6527 return ret;
6530 while (1) {
6531 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6532 ret = btrfs_next_leaf(root, path);
6533 if (ret) {
6534 fprintf(stderr, "Couldn't find our ref, next\n");
6535 return -EINVAL;
6538 leaf = path->nodes[0];
6539 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6540 if (key.objectid != dback->owner ||
6541 key.type != BTRFS_EXTENT_DATA_KEY) {
6542 fprintf(stderr, "Couldn't find our ref, search\n");
6543 return -EINVAL;
6545 fi = btrfs_item_ptr(leaf, path->slots[0],
6546 struct btrfs_file_extent_item);
6547 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6548 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6550 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6551 break;
6552 path->slots[0]++;
6555 btrfs_release_path(path);
6557 trans = btrfs_start_transaction(root, 1);
6558 if (IS_ERR(trans))
6559 return PTR_ERR(trans);
6562 * Ok we have the key of the file extent we want to fix, now we can cow
6563 * down to the thing and fix it.
6565 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6566 if (ret < 0) {
6567 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6568 key.objectid, key.type, key.offset, ret);
6569 goto out;
6571 if (ret > 0) {
6572 fprintf(stderr, "Well that's odd, we just found this key "
6573 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6574 key.offset);
6575 ret = -EINVAL;
6576 goto out;
6578 leaf = path->nodes[0];
6579 fi = btrfs_item_ptr(leaf, path->slots[0],
6580 struct btrfs_file_extent_item);
6582 if (btrfs_file_extent_compression(leaf, fi) &&
6583 dback->disk_bytenr != entry->bytenr) {
6584 fprintf(stderr, "Ref doesn't match the record start and is "
6585 "compressed, please take a btrfs-image of this file "
6586 "system and send it to a btrfs developer so they can "
6587 "complete this functionality for bytenr %Lu\n",
6588 dback->disk_bytenr);
6589 ret = -EINVAL;
6590 goto out;
6593 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6594 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6595 } else if (dback->disk_bytenr > entry->bytenr) {
6596 u64 off_diff, offset;
6598 off_diff = dback->disk_bytenr - entry->bytenr;
6599 offset = btrfs_file_extent_offset(leaf, fi);
6600 if (dback->disk_bytenr + offset +
6601 btrfs_file_extent_num_bytes(leaf, fi) >
6602 entry->bytenr + entry->bytes) {
6603 fprintf(stderr, "Ref is past the entry end, please "
6604 "take a btrfs-image of this file system and "
6605 "send it to a btrfs developer, ref %Lu\n",
6606 dback->disk_bytenr);
6607 ret = -EINVAL;
6608 goto out;
6610 offset += off_diff;
6611 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6612 btrfs_set_file_extent_offset(leaf, fi, offset);
6613 } else if (dback->disk_bytenr < entry->bytenr) {
6614 u64 offset;
6616 offset = btrfs_file_extent_offset(leaf, fi);
6617 if (dback->disk_bytenr + offset < entry->bytenr) {
6618 fprintf(stderr, "Ref is before the entry start, please"
6619 " take a btrfs-image of this file system and "
6620 "send it to a btrfs developer, ref %Lu\n",
6621 dback->disk_bytenr);
6622 ret = -EINVAL;
6623 goto out;
6626 offset += dback->disk_bytenr;
6627 offset -= entry->bytenr;
6628 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6629 btrfs_set_file_extent_offset(leaf, fi, offset);
6632 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6635 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6636 * only do this if we aren't using compression, otherwise it's a
6637 * trickier case.
6639 if (!btrfs_file_extent_compression(leaf, fi))
6640 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6641 else
6642 printf("ram bytes may be wrong?\n");
6643 btrfs_mark_buffer_dirty(leaf);
6644 out:
6645 err = btrfs_commit_transaction(trans, root);
6646 btrfs_release_path(path);
6647 return ret ? ret : err;
6650 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6651 struct extent_record *rec)
6653 struct extent_backref *back;
6654 struct data_backref *dback;
6655 struct extent_entry *entry, *best = NULL;
6656 LIST_HEAD(entries);
6657 int nr_entries = 0;
6658 int broken_entries = 0;
6659 int ret = 0;
6660 short mismatch = 0;
6663 * Metadata is easy and the backrefs should always agree on bytenr and
6664 * size, if not we've got bigger issues.
6666 if (rec->metadata)
6667 return 0;
6669 list_for_each_entry(back, &rec->backrefs, list) {
6670 if (back->full_backref || !back->is_data)
6671 continue;
6673 dback = (struct data_backref *)back;
6676 * We only pay attention to backrefs that we found a real
6677 * backref for.
6679 if (dback->found_ref == 0)
6680 continue;
6683 * For now we only catch when the bytes don't match, not the
6684 * bytenr. We can easily do this at the same time, but I want
6685 * to have a fs image to test on before we just add repair
6686 * functionality willy-nilly so we know we won't screw up the
6687 * repair.
6690 entry = find_entry(&entries, dback->disk_bytenr,
6691 dback->bytes);
6692 if (!entry) {
6693 entry = malloc(sizeof(struct extent_entry));
6694 if (!entry) {
6695 ret = -ENOMEM;
6696 goto out;
6698 memset(entry, 0, sizeof(*entry));
6699 entry->bytenr = dback->disk_bytenr;
6700 entry->bytes = dback->bytes;
6701 list_add_tail(&entry->list, &entries);
6702 nr_entries++;
6706 * If we only have on entry we may think the entries agree when
6707 * in reality they don't so we have to do some extra checking.
6709 if (dback->disk_bytenr != rec->start ||
6710 dback->bytes != rec->nr || back->broken)
6711 mismatch = 1;
6713 if (back->broken) {
6714 entry->broken++;
6715 broken_entries++;
6718 entry->count++;
6721 /* Yay all the backrefs agree, carry on good sir */
6722 if (nr_entries <= 1 && !mismatch)
6723 goto out;
6725 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6726 "%Lu\n", rec->start);
6729 * First we want to see if the backrefs can agree amongst themselves who
6730 * is right, so figure out which one of the entries has the highest
6731 * count.
6733 best = find_most_right_entry(&entries);
6736 * Ok so we may have an even split between what the backrefs think, so
6737 * this is where we use the extent ref to see what it thinks.
6739 if (!best) {
6740 entry = find_entry(&entries, rec->start, rec->nr);
6741 if (!entry && (!broken_entries || !rec->found_rec)) {
6742 fprintf(stderr, "Backrefs don't agree with each other "
6743 "and extent record doesn't agree with anybody,"
6744 " so we can't fix bytenr %Lu bytes %Lu\n",
6745 rec->start, rec->nr);
6746 ret = -EINVAL;
6747 goto out;
6748 } else if (!entry) {
6750 * Ok our backrefs were broken, we'll assume this is the
6751 * correct value and add an entry for this range.
6753 entry = malloc(sizeof(struct extent_entry));
6754 if (!entry) {
6755 ret = -ENOMEM;
6756 goto out;
6758 memset(entry, 0, sizeof(*entry));
6759 entry->bytenr = rec->start;
6760 entry->bytes = rec->nr;
6761 list_add_tail(&entry->list, &entries);
6762 nr_entries++;
6764 entry->count++;
6765 best = find_most_right_entry(&entries);
6766 if (!best) {
6767 fprintf(stderr, "Backrefs and extent record evenly "
6768 "split on who is right, this is going to "
6769 "require user input to fix bytenr %Lu bytes "
6770 "%Lu\n", rec->start, rec->nr);
6771 ret = -EINVAL;
6772 goto out;
6777 * I don't think this can happen currently as we'll abort() if we catch
6778 * this case higher up, but in case somebody removes that we still can't
6779 * deal with it properly here yet, so just bail out of that's the case.
6781 if (best->bytenr != rec->start) {
6782 fprintf(stderr, "Extent start and backref starts don't match, "
6783 "please use btrfs-image on this file system and send "
6784 "it to a btrfs developer so they can make fsck fix "
6785 "this particular case. bytenr is %Lu, bytes is %Lu\n",
6786 rec->start, rec->nr);
6787 ret = -EINVAL;
6788 goto out;
6792 * Ok great we all agreed on an extent record, let's go find the real
6793 * references and fix up the ones that don't match.
6795 list_for_each_entry(back, &rec->backrefs, list) {
6796 if (back->full_backref || !back->is_data)
6797 continue;
6799 dback = (struct data_backref *)back;
6802 * Still ignoring backrefs that don't have a real ref attached
6803 * to them.
6805 if (dback->found_ref == 0)
6806 continue;
6808 if (dback->bytes == best->bytes &&
6809 dback->disk_bytenr == best->bytenr)
6810 continue;
6812 ret = repair_ref(info, path, dback, best);
6813 if (ret)
6814 goto out;
6818 * Ok we messed with the actual refs, which means we need to drop our
6819 * entire cache and go back and rescan. I know this is a huge pain and
6820 * adds a lot of extra work, but it's the only way to be safe. Once all
6821 * the backrefs agree we may not need to do anything to the extent
6822 * record itself.
6824 ret = -EAGAIN;
6825 out:
6826 while (!list_empty(&entries)) {
6827 entry = list_entry(entries.next, struct extent_entry, list);
6828 list_del_init(&entry->list);
6829 free(entry);
6831 return ret;
6834 static int process_duplicates(struct btrfs_root *root,
6835 struct cache_tree *extent_cache,
6836 struct extent_record *rec)
6838 struct extent_record *good, *tmp;
6839 struct cache_extent *cache;
6840 int ret;
6843 * If we found a extent record for this extent then return, or if we
6844 * have more than one duplicate we are likely going to need to delete
6845 * something.
6847 if (rec->found_rec || rec->num_duplicates > 1)
6848 return 0;
6850 /* Shouldn't happen but just in case */
6851 BUG_ON(!rec->num_duplicates);
6854 * So this happens if we end up with a backref that doesn't match the
6855 * actual extent entry. So either the backref is bad or the extent
6856 * entry is bad. Either way we want to have the extent_record actually
6857 * reflect what we found in the extent_tree, so we need to take the
6858 * duplicate out and use that as the extent_record since the only way we
6859 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6861 remove_cache_extent(extent_cache, &rec->cache);
6863 good = list_entry(rec->dups.next, struct extent_record, list);
6864 list_del_init(&good->list);
6865 INIT_LIST_HEAD(&good->backrefs);
6866 INIT_LIST_HEAD(&good->dups);
6867 good->cache.start = good->start;
6868 good->cache.size = good->nr;
6869 good->content_checked = 0;
6870 good->owner_ref_checked = 0;
6871 good->num_duplicates = 0;
6872 good->refs = rec->refs;
6873 list_splice_init(&rec->backrefs, &good->backrefs);
6874 while (1) {
6875 cache = lookup_cache_extent(extent_cache, good->start,
6876 good->nr);
6877 if (!cache)
6878 break;
6879 tmp = container_of(cache, struct extent_record, cache);
6882 * If we find another overlapping extent and it's found_rec is
6883 * set then it's a duplicate and we need to try and delete
6884 * something.
6886 if (tmp->found_rec || tmp->num_duplicates > 0) {
6887 if (list_empty(&good->list))
6888 list_add_tail(&good->list,
6889 &duplicate_extents);
6890 good->num_duplicates += tmp->num_duplicates + 1;
6891 list_splice_init(&tmp->dups, &good->dups);
6892 list_del_init(&tmp->list);
6893 list_add_tail(&tmp->list, &good->dups);
6894 remove_cache_extent(extent_cache, &tmp->cache);
6895 continue;
6899 * Ok we have another non extent item backed extent rec, so lets
6900 * just add it to this extent and carry on like we did above.
6902 good->refs += tmp->refs;
6903 list_splice_init(&tmp->backrefs, &good->backrefs);
6904 remove_cache_extent(extent_cache, &tmp->cache);
6905 free(tmp);
6907 ret = insert_cache_extent(extent_cache, &good->cache);
6908 BUG_ON(ret);
6909 free(rec);
6910 return good->num_duplicates ? 0 : 1;
6913 static int delete_duplicate_records(struct btrfs_root *root,
6914 struct extent_record *rec)
6916 struct btrfs_trans_handle *trans;
6917 LIST_HEAD(delete_list);
6918 struct btrfs_path *path;
6919 struct extent_record *tmp, *good, *n;
6920 int nr_del = 0;
6921 int ret = 0, err;
6922 struct btrfs_key key;
6924 path = btrfs_alloc_path();
6925 if (!path) {
6926 ret = -ENOMEM;
6927 goto out;
6930 good = rec;
6931 /* Find the record that covers all of the duplicates. */
6932 list_for_each_entry(tmp, &rec->dups, list) {
6933 if (good->start < tmp->start)
6934 continue;
6935 if (good->nr > tmp->nr)
6936 continue;
6938 if (tmp->start + tmp->nr < good->start + good->nr) {
6939 fprintf(stderr, "Ok we have overlapping extents that "
6940 "aren't completely covered by eachother, this "
6941 "is going to require more careful thought. "
6942 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
6943 tmp->start, tmp->nr, good->start, good->nr);
6944 abort();
6946 good = tmp;
6949 if (good != rec)
6950 list_add_tail(&rec->list, &delete_list);
6952 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
6953 if (tmp == good)
6954 continue;
6955 list_move_tail(&tmp->list, &delete_list);
6958 root = root->fs_info->extent_root;
6959 trans = btrfs_start_transaction(root, 1);
6960 if (IS_ERR(trans)) {
6961 ret = PTR_ERR(trans);
6962 goto out;
6965 list_for_each_entry(tmp, &delete_list, list) {
6966 if (tmp->found_rec == 0)
6967 continue;
6968 key.objectid = tmp->start;
6969 key.type = BTRFS_EXTENT_ITEM_KEY;
6970 key.offset = tmp->nr;
6972 /* Shouldn't happen but just in case */
6973 if (tmp->metadata) {
6974 fprintf(stderr, "Well this shouldn't happen, extent "
6975 "record overlaps but is metadata? "
6976 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
6977 abort();
6980 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
6981 if (ret) {
6982 if (ret > 0)
6983 ret = -EINVAL;
6984 break;
6986 ret = btrfs_del_item(trans, root, path);
6987 if (ret)
6988 break;
6989 btrfs_release_path(path);
6990 nr_del++;
6992 err = btrfs_commit_transaction(trans, root);
6993 if (err && !ret)
6994 ret = err;
6995 out:
6996 while (!list_empty(&delete_list)) {
6997 tmp = list_entry(delete_list.next, struct extent_record, list);
6998 list_del_init(&tmp->list);
6999 if (tmp == rec)
7000 continue;
7001 free(tmp);
7004 while (!list_empty(&rec->dups)) {
7005 tmp = list_entry(rec->dups.next, struct extent_record, list);
7006 list_del_init(&tmp->list);
7007 free(tmp);
7010 btrfs_free_path(path);
7012 if (!ret && !nr_del)
7013 rec->num_duplicates = 0;
7015 return ret ? ret : nr_del;
7018 static int find_possible_backrefs(struct btrfs_fs_info *info,
7019 struct btrfs_path *path,
7020 struct cache_tree *extent_cache,
7021 struct extent_record *rec)
7023 struct btrfs_root *root;
7024 struct extent_backref *back;
7025 struct data_backref *dback;
7026 struct cache_extent *cache;
7027 struct btrfs_file_extent_item *fi;
7028 struct btrfs_key key;
7029 u64 bytenr, bytes;
7030 int ret;
7032 list_for_each_entry(back, &rec->backrefs, list) {
7033 /* Don't care about full backrefs (poor unloved backrefs) */
7034 if (back->full_backref || !back->is_data)
7035 continue;
7037 dback = (struct data_backref *)back;
7039 /* We found this one, we don't need to do a lookup */
7040 if (dback->found_ref)
7041 continue;
7043 key.objectid = dback->root;
7044 key.type = BTRFS_ROOT_ITEM_KEY;
7045 key.offset = (u64)-1;
7047 root = btrfs_read_fs_root(info, &key);
7049 /* No root, definitely a bad ref, skip */
7050 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7051 continue;
7052 /* Other err, exit */
7053 if (IS_ERR(root))
7054 return PTR_ERR(root);
7056 key.objectid = dback->owner;
7057 key.type = BTRFS_EXTENT_DATA_KEY;
7058 key.offset = dback->offset;
7059 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7060 if (ret) {
7061 btrfs_release_path(path);
7062 if (ret < 0)
7063 return ret;
7064 /* Didn't find it, we can carry on */
7065 ret = 0;
7066 continue;
7069 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7070 struct btrfs_file_extent_item);
7071 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7072 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7073 btrfs_release_path(path);
7074 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7075 if (cache) {
7076 struct extent_record *tmp;
7077 tmp = container_of(cache, struct extent_record, cache);
7080 * If we found an extent record for the bytenr for this
7081 * particular backref then we can't add it to our
7082 * current extent record. We only want to add backrefs
7083 * that don't have a corresponding extent item in the
7084 * extent tree since they likely belong to this record
7085 * and we need to fix it if it doesn't match bytenrs.
7087 if (tmp->found_rec)
7088 continue;
7091 dback->found_ref += 1;
7092 dback->disk_bytenr = bytenr;
7093 dback->bytes = bytes;
7096 * Set this so the verify backref code knows not to trust the
7097 * values in this backref.
7099 back->broken = 1;
7102 return 0;
7106 * Record orphan data ref into corresponding root.
7108 * Return 0 if the extent item contains data ref and recorded.
7109 * Return 1 if the extent item contains no useful data ref
7110 * On that case, it may contains only shared_dataref or metadata backref
7111 * or the file extent exists(this should be handled by the extent bytenr
7112 * recovery routine)
7113 * Return <0 if something goes wrong.
7115 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7116 struct extent_record *rec)
7118 struct btrfs_key key;
7119 struct btrfs_root *dest_root;
7120 struct extent_backref *back;
7121 struct data_backref *dback;
7122 struct orphan_data_extent *orphan;
7123 struct btrfs_path *path;
7124 int recorded_data_ref = 0;
7125 int ret = 0;
7127 if (rec->metadata)
7128 return 1;
7129 path = btrfs_alloc_path();
7130 if (!path)
7131 return -ENOMEM;
7132 list_for_each_entry(back, &rec->backrefs, list) {
7133 if (back->full_backref || !back->is_data ||
7134 !back->found_extent_tree)
7135 continue;
7136 dback = (struct data_backref *)back;
7137 if (dback->found_ref)
7138 continue;
7139 key.objectid = dback->root;
7140 key.type = BTRFS_ROOT_ITEM_KEY;
7141 key.offset = (u64)-1;
7143 dest_root = btrfs_read_fs_root(fs_info, &key);
7145 /* For non-exist root we just skip it */
7146 if (IS_ERR(dest_root) || !dest_root)
7147 continue;
7149 key.objectid = dback->owner;
7150 key.type = BTRFS_EXTENT_DATA_KEY;
7151 key.offset = dback->offset;
7153 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7155 * For ret < 0, it's OK since the fs-tree may be corrupted,
7156 * we need to record it for inode/file extent rebuild.
7157 * For ret > 0, we record it only for file extent rebuild.
7158 * For ret == 0, the file extent exists but only bytenr
7159 * mismatch, let the original bytenr fix routine to handle,
7160 * don't record it.
7162 if (ret == 0)
7163 continue;
7164 ret = 0;
7165 orphan = malloc(sizeof(*orphan));
7166 if (!orphan) {
7167 ret = -ENOMEM;
7168 goto out;
7170 INIT_LIST_HEAD(&orphan->list);
7171 orphan->root = dback->root;
7172 orphan->objectid = dback->owner;
7173 orphan->offset = dback->offset;
7174 orphan->disk_bytenr = rec->cache.start;
7175 orphan->disk_len = rec->cache.size;
7176 list_add(&dest_root->orphan_data_extents, &orphan->list);
7177 recorded_data_ref = 1;
7179 out:
7180 btrfs_free_path(path);
7181 if (!ret)
7182 return !recorded_data_ref;
7183 else
7184 return ret;
7188 * when an incorrect extent item is found, this will delete
7189 * all of the existing entries for it and recreate them
7190 * based on what the tree scan found.
7192 static int fixup_extent_refs(struct btrfs_fs_info *info,
7193 struct cache_tree *extent_cache,
7194 struct extent_record *rec)
7196 struct btrfs_trans_handle *trans = NULL;
7197 int ret;
7198 struct btrfs_path *path;
7199 struct list_head *cur = rec->backrefs.next;
7200 struct cache_extent *cache;
7201 struct extent_backref *back;
7202 int allocated = 0;
7203 u64 flags = 0;
7205 if (rec->flag_block_full_backref)
7206 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7208 path = btrfs_alloc_path();
7209 if (!path)
7210 return -ENOMEM;
7212 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7214 * Sometimes the backrefs themselves are so broken they don't
7215 * get attached to any meaningful rec, so first go back and
7216 * check any of our backrefs that we couldn't find and throw
7217 * them into the list if we find the backref so that
7218 * verify_backrefs can figure out what to do.
7220 ret = find_possible_backrefs(info, path, extent_cache, rec);
7221 if (ret < 0)
7222 goto out;
7225 /* step one, make sure all of the backrefs agree */
7226 ret = verify_backrefs(info, path, rec);
7227 if (ret < 0)
7228 goto out;
7230 trans = btrfs_start_transaction(info->extent_root, 1);
7231 if (IS_ERR(trans)) {
7232 ret = PTR_ERR(trans);
7233 goto out;
7236 /* step two, delete all the existing records */
7237 ret = delete_extent_records(trans, info->extent_root, path,
7238 rec->start, rec->max_size);
7240 if (ret < 0)
7241 goto out;
7243 /* was this block corrupt? If so, don't add references to it */
7244 cache = lookup_cache_extent(info->corrupt_blocks,
7245 rec->start, rec->max_size);
7246 if (cache) {
7247 ret = 0;
7248 goto out;
7251 /* step three, recreate all the refs we did find */
7252 while(cur != &rec->backrefs) {
7253 back = list_entry(cur, struct extent_backref, list);
7254 cur = cur->next;
7257 * if we didn't find any references, don't create a
7258 * new extent record
7260 if (!back->found_ref)
7261 continue;
7263 rec->bad_full_backref = 0;
7264 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7265 allocated = 1;
7267 if (ret)
7268 goto out;
7270 out:
7271 if (trans) {
7272 int err = btrfs_commit_transaction(trans, info->extent_root);
7273 if (!ret)
7274 ret = err;
7277 btrfs_free_path(path);
7278 return ret;
7281 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7282 struct extent_record *rec)
7284 struct btrfs_trans_handle *trans;
7285 struct btrfs_root *root = fs_info->extent_root;
7286 struct btrfs_path *path;
7287 struct btrfs_extent_item *ei;
7288 struct btrfs_key key;
7289 u64 flags;
7290 int ret = 0;
7292 key.objectid = rec->start;
7293 if (rec->metadata) {
7294 key.type = BTRFS_METADATA_ITEM_KEY;
7295 key.offset = rec->info_level;
7296 } else {
7297 key.type = BTRFS_EXTENT_ITEM_KEY;
7298 key.offset = rec->max_size;
7301 path = btrfs_alloc_path();
7302 if (!path)
7303 return -ENOMEM;
7305 trans = btrfs_start_transaction(root, 0);
7306 if (IS_ERR(trans)) {
7307 btrfs_free_path(path);
7308 return PTR_ERR(trans);
7311 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7312 if (ret < 0) {
7313 btrfs_free_path(path);
7314 btrfs_commit_transaction(trans, root);
7315 return ret;
7316 } else if (ret) {
7317 fprintf(stderr, "Didn't find extent for %llu\n",
7318 (unsigned long long)rec->start);
7319 btrfs_free_path(path);
7320 btrfs_commit_transaction(trans, root);
7321 return -ENOENT;
7324 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7325 struct btrfs_extent_item);
7326 flags = btrfs_extent_flags(path->nodes[0], ei);
7327 if (rec->flag_block_full_backref) {
7328 fprintf(stderr, "setting full backref on %llu\n",
7329 (unsigned long long)key.objectid);
7330 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7331 } else {
7332 fprintf(stderr, "clearing full backref on %llu\n",
7333 (unsigned long long)key.objectid);
7334 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7336 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7337 btrfs_mark_buffer_dirty(path->nodes[0]);
7338 btrfs_free_path(path);
7339 return btrfs_commit_transaction(trans, root);
7342 /* right now we only prune from the extent allocation tree */
7343 static int prune_one_block(struct btrfs_trans_handle *trans,
7344 struct btrfs_fs_info *info,
7345 struct btrfs_corrupt_block *corrupt)
7347 int ret;
7348 struct btrfs_path path;
7349 struct extent_buffer *eb;
7350 u64 found;
7351 int slot;
7352 int nritems;
7353 int level = corrupt->level + 1;
7355 btrfs_init_path(&path);
7356 again:
7357 /* we want to stop at the parent to our busted block */
7358 path.lowest_level = level;
7360 ret = btrfs_search_slot(trans, info->extent_root,
7361 &corrupt->key, &path, -1, 1);
7363 if (ret < 0)
7364 goto out;
7366 eb = path.nodes[level];
7367 if (!eb) {
7368 ret = -ENOENT;
7369 goto out;
7373 * hopefully the search gave us the block we want to prune,
7374 * lets try that first
7376 slot = path.slots[level];
7377 found = btrfs_node_blockptr(eb, slot);
7378 if (found == corrupt->cache.start)
7379 goto del_ptr;
7381 nritems = btrfs_header_nritems(eb);
7383 /* the search failed, lets scan this node and hope we find it */
7384 for (slot = 0; slot < nritems; slot++) {
7385 found = btrfs_node_blockptr(eb, slot);
7386 if (found == corrupt->cache.start)
7387 goto del_ptr;
7390 * we couldn't find the bad block. TODO, search all the nodes for pointers
7391 * to this block
7393 if (eb == info->extent_root->node) {
7394 ret = -ENOENT;
7395 goto out;
7396 } else {
7397 level++;
7398 btrfs_release_path(&path);
7399 goto again;
7402 del_ptr:
7403 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7404 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7406 out:
7407 btrfs_release_path(&path);
7408 return ret;
7411 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7413 struct btrfs_trans_handle *trans = NULL;
7414 struct cache_extent *cache;
7415 struct btrfs_corrupt_block *corrupt;
7417 while (1) {
7418 cache = search_cache_extent(info->corrupt_blocks, 0);
7419 if (!cache)
7420 break;
7421 if (!trans) {
7422 trans = btrfs_start_transaction(info->extent_root, 1);
7423 if (IS_ERR(trans))
7424 return PTR_ERR(trans);
7426 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7427 prune_one_block(trans, info, corrupt);
7428 remove_cache_extent(info->corrupt_blocks, cache);
7430 if (trans)
7431 return btrfs_commit_transaction(trans, info->extent_root);
7432 return 0;
7435 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7437 struct btrfs_block_group_cache *cache;
7438 u64 start, end;
7439 int ret;
7441 while (1) {
7442 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7443 &start, &end, EXTENT_DIRTY);
7444 if (ret)
7445 break;
7446 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7447 GFP_NOFS);
7450 start = 0;
7451 while (1) {
7452 cache = btrfs_lookup_first_block_group(fs_info, start);
7453 if (!cache)
7454 break;
7455 if (cache->cached)
7456 cache->cached = 0;
7457 start = cache->key.objectid + cache->key.offset;
7461 static int check_extent_refs(struct btrfs_root *root,
7462 struct cache_tree *extent_cache)
7464 struct extent_record *rec;
7465 struct cache_extent *cache;
7466 int err = 0;
7467 int ret = 0;
7468 int fixed = 0;
7469 int had_dups = 0;
7470 int recorded = 0;
7472 if (repair) {
7474 * if we're doing a repair, we have to make sure
7475 * we don't allocate from the problem extents.
7476 * In the worst case, this will be all the
7477 * extents in the FS
7479 cache = search_cache_extent(extent_cache, 0);
7480 while(cache) {
7481 rec = container_of(cache, struct extent_record, cache);
7482 set_extent_dirty(root->fs_info->excluded_extents,
7483 rec->start,
7484 rec->start + rec->max_size - 1,
7485 GFP_NOFS);
7486 cache = next_cache_extent(cache);
7489 /* pin down all the corrupted blocks too */
7490 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7491 while(cache) {
7492 set_extent_dirty(root->fs_info->excluded_extents,
7493 cache->start,
7494 cache->start + cache->size - 1,
7495 GFP_NOFS);
7496 cache = next_cache_extent(cache);
7498 prune_corrupt_blocks(root->fs_info);
7499 reset_cached_block_groups(root->fs_info);
7502 reset_cached_block_groups(root->fs_info);
7505 * We need to delete any duplicate entries we find first otherwise we
7506 * could mess up the extent tree when we have backrefs that actually
7507 * belong to a different extent item and not the weird duplicate one.
7509 while (repair && !list_empty(&duplicate_extents)) {
7510 rec = list_entry(duplicate_extents.next, struct extent_record,
7511 list);
7512 list_del_init(&rec->list);
7514 /* Sometimes we can find a backref before we find an actual
7515 * extent, so we need to process it a little bit to see if there
7516 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7517 * if this is a backref screwup. If we need to delete stuff
7518 * process_duplicates() will return 0, otherwise it will return
7519 * 1 and we
7521 if (process_duplicates(root, extent_cache, rec))
7522 continue;
7523 ret = delete_duplicate_records(root, rec);
7524 if (ret < 0)
7525 return ret;
7527 * delete_duplicate_records will return the number of entries
7528 * deleted, so if it's greater than 0 then we know we actually
7529 * did something and we need to remove.
7531 if (ret)
7532 had_dups = 1;
7535 if (had_dups)
7536 return -EAGAIN;
7538 while(1) {
7539 int cur_err = 0;
7541 fixed = 0;
7542 recorded = 0;
7543 cache = search_cache_extent(extent_cache, 0);
7544 if (!cache)
7545 break;
7546 rec = container_of(cache, struct extent_record, cache);
7547 if (rec->num_duplicates) {
7548 fprintf(stderr, "extent item %llu has multiple extent "
7549 "items\n", (unsigned long long)rec->start);
7550 err = 1;
7551 cur_err = 1;
7554 if (rec->refs != rec->extent_item_refs) {
7555 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7556 (unsigned long long)rec->start,
7557 (unsigned long long)rec->nr);
7558 fprintf(stderr, "extent item %llu, found %llu\n",
7559 (unsigned long long)rec->extent_item_refs,
7560 (unsigned long long)rec->refs);
7561 ret = record_orphan_data_extents(root->fs_info, rec);
7562 if (ret < 0)
7563 goto repair_abort;
7564 if (ret == 0) {
7565 recorded = 1;
7566 } else {
7568 * we can't use the extent to repair file
7569 * extent, let the fallback method handle it.
7571 if (!fixed && repair) {
7572 ret = fixup_extent_refs(
7573 root->fs_info,
7574 extent_cache, rec);
7575 if (ret)
7576 goto repair_abort;
7577 fixed = 1;
7580 err = 1;
7581 cur_err = 1;
7583 if (all_backpointers_checked(rec, 1)) {
7584 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7585 (unsigned long long)rec->start,
7586 (unsigned long long)rec->nr);
7588 if (!fixed && !recorded && repair) {
7589 ret = fixup_extent_refs(root->fs_info,
7590 extent_cache, rec);
7591 if (ret)
7592 goto repair_abort;
7593 fixed = 1;
7595 cur_err = 1;
7596 err = 1;
7598 if (!rec->owner_ref_checked) {
7599 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7600 (unsigned long long)rec->start,
7601 (unsigned long long)rec->nr);
7602 if (!fixed && !recorded && repair) {
7603 ret = fixup_extent_refs(root->fs_info,
7604 extent_cache, rec);
7605 if (ret)
7606 goto repair_abort;
7607 fixed = 1;
7609 err = 1;
7610 cur_err = 1;
7612 if (rec->bad_full_backref) {
7613 fprintf(stderr, "bad full backref, on [%llu]\n",
7614 (unsigned long long)rec->start);
7615 if (repair) {
7616 ret = fixup_extent_flags(root->fs_info, rec);
7617 if (ret)
7618 goto repair_abort;
7619 fixed = 1;
7621 err = 1;
7622 cur_err = 1;
7625 * Although it's not a extent ref's problem, we reuse this
7626 * routine for error reporting.
7627 * No repair function yet.
7629 if (rec->crossing_stripes) {
7630 fprintf(stderr,
7631 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7632 rec->start, rec->start + rec->max_size);
7633 err = 1;
7634 cur_err = 1;
7637 if (rec->wrong_chunk_type) {
7638 fprintf(stderr,
7639 "bad extent [%llu, %llu), type mismatch with chunk\n",
7640 rec->start, rec->start + rec->max_size);
7641 err = 1;
7642 cur_err = 1;
7645 remove_cache_extent(extent_cache, cache);
7646 free_all_extent_backrefs(rec);
7647 if (!init_extent_tree && repair && (!cur_err || fixed))
7648 clear_extent_dirty(root->fs_info->excluded_extents,
7649 rec->start,
7650 rec->start + rec->max_size - 1,
7651 GFP_NOFS);
7652 free(rec);
7654 repair_abort:
7655 if (repair) {
7656 if (ret && ret != -EAGAIN) {
7657 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7658 exit(1);
7659 } else if (!ret) {
7660 struct btrfs_trans_handle *trans;
7662 root = root->fs_info->extent_root;
7663 trans = btrfs_start_transaction(root, 1);
7664 if (IS_ERR(trans)) {
7665 ret = PTR_ERR(trans);
7666 goto repair_abort;
7669 btrfs_fix_block_accounting(trans, root);
7670 ret = btrfs_commit_transaction(trans, root);
7671 if (ret)
7672 goto repair_abort;
7674 if (err)
7675 fprintf(stderr, "repaired damaged extent references\n");
7676 return ret;
7678 return err;
7681 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7683 u64 stripe_size;
7685 if (type & BTRFS_BLOCK_GROUP_RAID0) {
7686 stripe_size = length;
7687 stripe_size /= num_stripes;
7688 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7689 stripe_size = length * 2;
7690 stripe_size /= num_stripes;
7691 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7692 stripe_size = length;
7693 stripe_size /= (num_stripes - 1);
7694 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7695 stripe_size = length;
7696 stripe_size /= (num_stripes - 2);
7697 } else {
7698 stripe_size = length;
7700 return stripe_size;
7704 * Check the chunk with its block group/dev list ref:
7705 * Return 0 if all refs seems valid.
7706 * Return 1 if part of refs seems valid, need later check for rebuild ref
7707 * like missing block group and needs to search extent tree to rebuild them.
7708 * Return -1 if essential refs are missing and unable to rebuild.
7710 static int check_chunk_refs(struct chunk_record *chunk_rec,
7711 struct block_group_tree *block_group_cache,
7712 struct device_extent_tree *dev_extent_cache,
7713 int silent)
7715 struct cache_extent *block_group_item;
7716 struct block_group_record *block_group_rec;
7717 struct cache_extent *dev_extent_item;
7718 struct device_extent_record *dev_extent_rec;
7719 u64 devid;
7720 u64 offset;
7721 u64 length;
7722 int metadump_v2 = 0;
7723 int i;
7724 int ret = 0;
7726 block_group_item = lookup_cache_extent(&block_group_cache->tree,
7727 chunk_rec->offset,
7728 chunk_rec->length);
7729 if (block_group_item) {
7730 block_group_rec = container_of(block_group_item,
7731 struct block_group_record,
7732 cache);
7733 if (chunk_rec->length != block_group_rec->offset ||
7734 chunk_rec->offset != block_group_rec->objectid ||
7735 (!metadump_v2 &&
7736 chunk_rec->type_flags != block_group_rec->flags)) {
7737 if (!silent)
7738 fprintf(stderr,
7739 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7740 chunk_rec->objectid,
7741 chunk_rec->type,
7742 chunk_rec->offset,
7743 chunk_rec->length,
7744 chunk_rec->offset,
7745 chunk_rec->type_flags,
7746 block_group_rec->objectid,
7747 block_group_rec->type,
7748 block_group_rec->offset,
7749 block_group_rec->offset,
7750 block_group_rec->objectid,
7751 block_group_rec->flags);
7752 ret = -1;
7753 } else {
7754 list_del_init(&block_group_rec->list);
7755 chunk_rec->bg_rec = block_group_rec;
7757 } else {
7758 if (!silent)
7759 fprintf(stderr,
7760 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7761 chunk_rec->objectid,
7762 chunk_rec->type,
7763 chunk_rec->offset,
7764 chunk_rec->length,
7765 chunk_rec->offset,
7766 chunk_rec->type_flags);
7767 ret = 1;
7770 if (metadump_v2)
7771 return ret;
7773 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7774 chunk_rec->num_stripes);
7775 for (i = 0; i < chunk_rec->num_stripes; ++i) {
7776 devid = chunk_rec->stripes[i].devid;
7777 offset = chunk_rec->stripes[i].offset;
7778 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7779 devid, offset, length);
7780 if (dev_extent_item) {
7781 dev_extent_rec = container_of(dev_extent_item,
7782 struct device_extent_record,
7783 cache);
7784 if (dev_extent_rec->objectid != devid ||
7785 dev_extent_rec->offset != offset ||
7786 dev_extent_rec->chunk_offset != chunk_rec->offset ||
7787 dev_extent_rec->length != length) {
7788 if (!silent)
7789 fprintf(stderr,
7790 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7791 chunk_rec->objectid,
7792 chunk_rec->type,
7793 chunk_rec->offset,
7794 chunk_rec->stripes[i].devid,
7795 chunk_rec->stripes[i].offset,
7796 dev_extent_rec->objectid,
7797 dev_extent_rec->offset,
7798 dev_extent_rec->length);
7799 ret = -1;
7800 } else {
7801 list_move(&dev_extent_rec->chunk_list,
7802 &chunk_rec->dextents);
7804 } else {
7805 if (!silent)
7806 fprintf(stderr,
7807 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7808 chunk_rec->objectid,
7809 chunk_rec->type,
7810 chunk_rec->offset,
7811 chunk_rec->stripes[i].devid,
7812 chunk_rec->stripes[i].offset);
7813 ret = -1;
7816 return ret;
7819 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7820 int check_chunks(struct cache_tree *chunk_cache,
7821 struct block_group_tree *block_group_cache,
7822 struct device_extent_tree *dev_extent_cache,
7823 struct list_head *good, struct list_head *bad,
7824 struct list_head *rebuild, int silent)
7826 struct cache_extent *chunk_item;
7827 struct chunk_record *chunk_rec;
7828 struct block_group_record *bg_rec;
7829 struct device_extent_record *dext_rec;
7830 int err;
7831 int ret = 0;
7833 chunk_item = first_cache_extent(chunk_cache);
7834 while (chunk_item) {
7835 chunk_rec = container_of(chunk_item, struct chunk_record,
7836 cache);
7837 err = check_chunk_refs(chunk_rec, block_group_cache,
7838 dev_extent_cache, silent);
7839 if (err < 0)
7840 ret = err;
7841 if (err == 0 && good)
7842 list_add_tail(&chunk_rec->list, good);
7843 if (err > 0 && rebuild)
7844 list_add_tail(&chunk_rec->list, rebuild);
7845 if (err < 0 && bad)
7846 list_add_tail(&chunk_rec->list, bad);
7847 chunk_item = next_cache_extent(chunk_item);
7850 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7851 if (!silent)
7852 fprintf(stderr,
7853 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7854 bg_rec->objectid,
7855 bg_rec->offset,
7856 bg_rec->flags);
7857 if (!ret)
7858 ret = 1;
7861 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7862 chunk_list) {
7863 if (!silent)
7864 fprintf(stderr,
7865 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7866 dext_rec->objectid,
7867 dext_rec->offset,
7868 dext_rec->length);
7869 if (!ret)
7870 ret = 1;
7872 return ret;
7876 static int check_device_used(struct device_record *dev_rec,
7877 struct device_extent_tree *dext_cache)
7879 struct cache_extent *cache;
7880 struct device_extent_record *dev_extent_rec;
7881 u64 total_byte = 0;
7883 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
7884 while (cache) {
7885 dev_extent_rec = container_of(cache,
7886 struct device_extent_record,
7887 cache);
7888 if (dev_extent_rec->objectid != dev_rec->devid)
7889 break;
7891 list_del_init(&dev_extent_rec->device_list);
7892 total_byte += dev_extent_rec->length;
7893 cache = next_cache_extent(cache);
7896 if (total_byte != dev_rec->byte_used) {
7897 fprintf(stderr,
7898 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
7899 total_byte, dev_rec->byte_used, dev_rec->objectid,
7900 dev_rec->type, dev_rec->offset);
7901 return -1;
7902 } else {
7903 return 0;
7907 /* check btrfs_dev_item -> btrfs_dev_extent */
7908 static int check_devices(struct rb_root *dev_cache,
7909 struct device_extent_tree *dev_extent_cache)
7911 struct rb_node *dev_node;
7912 struct device_record *dev_rec;
7913 struct device_extent_record *dext_rec;
7914 int err;
7915 int ret = 0;
7917 dev_node = rb_first(dev_cache);
7918 while (dev_node) {
7919 dev_rec = container_of(dev_node, struct device_record, node);
7920 err = check_device_used(dev_rec, dev_extent_cache);
7921 if (err)
7922 ret = err;
7924 dev_node = rb_next(dev_node);
7926 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
7927 device_list) {
7928 fprintf(stderr,
7929 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
7930 dext_rec->objectid, dext_rec->offset, dext_rec->length);
7931 if (!ret)
7932 ret = 1;
7934 return ret;
7937 static int add_root_item_to_list(struct list_head *head,
7938 u64 objectid, u64 bytenr, u64 last_snapshot,
7939 u8 level, u8 drop_level,
7940 int level_size, struct btrfs_key *drop_key)
7943 struct root_item_record *ri_rec;
7944 ri_rec = malloc(sizeof(*ri_rec));
7945 if (!ri_rec)
7946 return -ENOMEM;
7947 ri_rec->bytenr = bytenr;
7948 ri_rec->objectid = objectid;
7949 ri_rec->level = level;
7950 ri_rec->level_size = level_size;
7951 ri_rec->drop_level = drop_level;
7952 ri_rec->last_snapshot = last_snapshot;
7953 if (drop_key)
7954 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
7955 list_add_tail(&ri_rec->list, head);
7957 return 0;
7960 static void free_root_item_list(struct list_head *list)
7962 struct root_item_record *ri_rec;
7964 while (!list_empty(list)) {
7965 ri_rec = list_first_entry(list, struct root_item_record,
7966 list);
7967 list_del_init(&ri_rec->list);
7968 free(ri_rec);
7972 static int deal_root_from_list(struct list_head *list,
7973 struct btrfs_root *root,
7974 struct block_info *bits,
7975 int bits_nr,
7976 struct cache_tree *pending,
7977 struct cache_tree *seen,
7978 struct cache_tree *reada,
7979 struct cache_tree *nodes,
7980 struct cache_tree *extent_cache,
7981 struct cache_tree *chunk_cache,
7982 struct rb_root *dev_cache,
7983 struct block_group_tree *block_group_cache,
7984 struct device_extent_tree *dev_extent_cache)
7986 int ret = 0;
7987 u64 last;
7989 while (!list_empty(list)) {
7990 struct root_item_record *rec;
7991 struct extent_buffer *buf;
7992 rec = list_entry(list->next,
7993 struct root_item_record, list);
7994 last = 0;
7995 buf = read_tree_block(root->fs_info->tree_root,
7996 rec->bytenr, rec->level_size, 0);
7997 if (!extent_buffer_uptodate(buf)) {
7998 free_extent_buffer(buf);
7999 ret = -EIO;
8000 break;
8002 add_root_to_pending(buf, extent_cache, pending,
8003 seen, nodes, rec->objectid);
8005 * To rebuild extent tree, we need deal with snapshot
8006 * one by one, otherwise we deal with node firstly which
8007 * can maximize readahead.
8009 while (1) {
8010 ret = run_next_block(root, bits, bits_nr, &last,
8011 pending, seen, reada, nodes,
8012 extent_cache, chunk_cache,
8013 dev_cache, block_group_cache,
8014 dev_extent_cache, rec);
8015 if (ret != 0)
8016 break;
8018 free_extent_buffer(buf);
8019 list_del(&rec->list);
8020 free(rec);
8021 if (ret < 0)
8022 break;
8024 while (ret >= 0) {
8025 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8026 reada, nodes, extent_cache, chunk_cache,
8027 dev_cache, block_group_cache,
8028 dev_extent_cache, NULL);
8029 if (ret != 0) {
8030 if (ret > 0)
8031 ret = 0;
8032 break;
8035 return ret;
8038 static int check_chunks_and_extents(struct btrfs_root *root)
8040 struct rb_root dev_cache;
8041 struct cache_tree chunk_cache;
8042 struct block_group_tree block_group_cache;
8043 struct device_extent_tree dev_extent_cache;
8044 struct cache_tree extent_cache;
8045 struct cache_tree seen;
8046 struct cache_tree pending;
8047 struct cache_tree reada;
8048 struct cache_tree nodes;
8049 struct extent_io_tree excluded_extents;
8050 struct cache_tree corrupt_blocks;
8051 struct btrfs_path path;
8052 struct btrfs_key key;
8053 struct btrfs_key found_key;
8054 int ret, err = 0;
8055 struct block_info *bits;
8056 int bits_nr;
8057 struct extent_buffer *leaf;
8058 int slot;
8059 struct btrfs_root_item ri;
8060 struct list_head dropping_trees;
8061 struct list_head normal_trees;
8062 struct btrfs_root *root1;
8063 u64 objectid;
8064 u32 level_size;
8065 u8 level;
8067 dev_cache = RB_ROOT;
8068 cache_tree_init(&chunk_cache);
8069 block_group_tree_init(&block_group_cache);
8070 device_extent_tree_init(&dev_extent_cache);
8072 cache_tree_init(&extent_cache);
8073 cache_tree_init(&seen);
8074 cache_tree_init(&pending);
8075 cache_tree_init(&nodes);
8076 cache_tree_init(&reada);
8077 cache_tree_init(&corrupt_blocks);
8078 extent_io_tree_init(&excluded_extents);
8079 INIT_LIST_HEAD(&dropping_trees);
8080 INIT_LIST_HEAD(&normal_trees);
8082 if (repair) {
8083 root->fs_info->excluded_extents = &excluded_extents;
8084 root->fs_info->fsck_extent_cache = &extent_cache;
8085 root->fs_info->free_extent_hook = free_extent_hook;
8086 root->fs_info->corrupt_blocks = &corrupt_blocks;
8089 bits_nr = 1024;
8090 bits = malloc(bits_nr * sizeof(struct block_info));
8091 if (!bits) {
8092 perror("malloc");
8093 exit(1);
8096 if (ctx.progress_enabled) {
8097 ctx.tp = TASK_EXTENTS;
8098 task_start(ctx.info);
8101 again:
8102 root1 = root->fs_info->tree_root;
8103 level = btrfs_header_level(root1->node);
8104 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8105 root1->node->start, 0, level, 0,
8106 btrfs_level_size(root1, level), NULL);
8107 if (ret < 0)
8108 goto out;
8109 root1 = root->fs_info->chunk_root;
8110 level = btrfs_header_level(root1->node);
8111 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8112 root1->node->start, 0, level, 0,
8113 btrfs_level_size(root1, level), NULL);
8114 if (ret < 0)
8115 goto out;
8116 btrfs_init_path(&path);
8117 key.offset = 0;
8118 key.objectid = 0;
8119 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8120 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8121 &key, &path, 0, 0);
8122 if (ret < 0)
8123 goto out;
8124 while(1) {
8125 leaf = path.nodes[0];
8126 slot = path.slots[0];
8127 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8128 ret = btrfs_next_leaf(root, &path);
8129 if (ret != 0)
8130 break;
8131 leaf = path.nodes[0];
8132 slot = path.slots[0];
8134 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8135 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8136 unsigned long offset;
8137 u64 last_snapshot;
8139 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8140 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8141 last_snapshot = btrfs_root_last_snapshot(&ri);
8142 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8143 level = btrfs_root_level(&ri);
8144 level_size = btrfs_level_size(root, level);
8145 ret = add_root_item_to_list(&normal_trees,
8146 found_key.objectid,
8147 btrfs_root_bytenr(&ri),
8148 last_snapshot, level,
8149 0, level_size, NULL);
8150 if (ret < 0)
8151 goto out;
8152 } else {
8153 level = btrfs_root_level(&ri);
8154 level_size = btrfs_level_size(root, level);
8155 objectid = found_key.objectid;
8156 btrfs_disk_key_to_cpu(&found_key,
8157 &ri.drop_progress);
8158 ret = add_root_item_to_list(&dropping_trees,
8159 objectid,
8160 btrfs_root_bytenr(&ri),
8161 last_snapshot, level,
8162 ri.drop_level,
8163 level_size, &found_key);
8164 if (ret < 0)
8165 goto out;
8168 path.slots[0]++;
8170 btrfs_release_path(&path);
8173 * check_block can return -EAGAIN if it fixes something, please keep
8174 * this in mind when dealing with return values from these functions, if
8175 * we get -EAGAIN we want to fall through and restart the loop.
8177 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8178 &seen, &reada, &nodes, &extent_cache,
8179 &chunk_cache, &dev_cache, &block_group_cache,
8180 &dev_extent_cache);
8181 if (ret < 0) {
8182 if (ret == -EAGAIN)
8183 goto loop;
8184 goto out;
8186 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8187 &pending, &seen, &reada, &nodes,
8188 &extent_cache, &chunk_cache, &dev_cache,
8189 &block_group_cache, &dev_extent_cache);
8190 if (ret < 0) {
8191 if (ret == -EAGAIN)
8192 goto loop;
8193 goto out;
8196 ret = check_chunks(&chunk_cache, &block_group_cache,
8197 &dev_extent_cache, NULL, NULL, NULL, 0);
8198 if (ret) {
8199 if (ret == -EAGAIN)
8200 goto loop;
8201 err = ret;
8204 ret = check_extent_refs(root, &extent_cache);
8205 if (ret < 0) {
8206 if (ret == -EAGAIN)
8207 goto loop;
8208 goto out;
8211 ret = check_devices(&dev_cache, &dev_extent_cache);
8212 if (ret && err)
8213 ret = err;
8215 out:
8216 task_stop(ctx.info);
8217 if (repair) {
8218 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8219 extent_io_tree_cleanup(&excluded_extents);
8220 root->fs_info->fsck_extent_cache = NULL;
8221 root->fs_info->free_extent_hook = NULL;
8222 root->fs_info->corrupt_blocks = NULL;
8223 root->fs_info->excluded_extents = NULL;
8225 free(bits);
8226 free_chunk_cache_tree(&chunk_cache);
8227 free_device_cache_tree(&dev_cache);
8228 free_block_group_tree(&block_group_cache);
8229 free_device_extent_tree(&dev_extent_cache);
8230 free_extent_cache_tree(&seen);
8231 free_extent_cache_tree(&pending);
8232 free_extent_cache_tree(&reada);
8233 free_extent_cache_tree(&nodes);
8234 return ret;
8235 loop:
8236 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8237 free_extent_cache_tree(&seen);
8238 free_extent_cache_tree(&pending);
8239 free_extent_cache_tree(&reada);
8240 free_extent_cache_tree(&nodes);
8241 free_chunk_cache_tree(&chunk_cache);
8242 free_block_group_tree(&block_group_cache);
8243 free_device_cache_tree(&dev_cache);
8244 free_device_extent_tree(&dev_extent_cache);
8245 free_extent_record_cache(root->fs_info, &extent_cache);
8246 free_root_item_list(&normal_trees);
8247 free_root_item_list(&dropping_trees);
8248 extent_io_tree_cleanup(&excluded_extents);
8249 goto again;
8252 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8253 struct btrfs_root *root, int overwrite)
8255 struct extent_buffer *c;
8256 struct extent_buffer *old = root->node;
8257 int level;
8258 int ret;
8259 struct btrfs_disk_key disk_key = {0,0,0};
8261 level = 0;
8263 if (overwrite) {
8264 c = old;
8265 extent_buffer_get(c);
8266 goto init;
8268 c = btrfs_alloc_free_block(trans, root,
8269 btrfs_level_size(root, 0),
8270 root->root_key.objectid,
8271 &disk_key, level, 0, 0);
8272 if (IS_ERR(c)) {
8273 c = old;
8274 extent_buffer_get(c);
8275 overwrite = 1;
8277 init:
8278 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8279 btrfs_set_header_level(c, level);
8280 btrfs_set_header_bytenr(c, c->start);
8281 btrfs_set_header_generation(c, trans->transid);
8282 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8283 btrfs_set_header_owner(c, root->root_key.objectid);
8285 write_extent_buffer(c, root->fs_info->fsid,
8286 btrfs_header_fsid(), BTRFS_FSID_SIZE);
8288 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8289 btrfs_header_chunk_tree_uuid(c),
8290 BTRFS_UUID_SIZE);
8292 btrfs_mark_buffer_dirty(c);
8294 * this case can happen in the following case:
8296 * 1.overwrite previous root.
8298 * 2.reinit reloc data root, this is because we skip pin
8299 * down reloc data tree before which means we can allocate
8300 * same block bytenr here.
8302 if (old->start == c->start) {
8303 btrfs_set_root_generation(&root->root_item,
8304 trans->transid);
8305 root->root_item.level = btrfs_header_level(root->node);
8306 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8307 &root->root_key, &root->root_item);
8308 if (ret) {
8309 free_extent_buffer(c);
8310 return ret;
8313 free_extent_buffer(old);
8314 root->node = c;
8315 add_root_to_dirty_list(root);
8316 return 0;
8319 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8320 struct extent_buffer *eb, int tree_root)
8322 struct extent_buffer *tmp;
8323 struct btrfs_root_item *ri;
8324 struct btrfs_key key;
8325 u64 bytenr;
8326 u32 leafsize;
8327 int level = btrfs_header_level(eb);
8328 int nritems;
8329 int ret;
8330 int i;
8333 * If we have pinned this block before, don't pin it again.
8334 * This can not only avoid forever loop with broken filesystem
8335 * but also give us some speedups.
8337 if (test_range_bit(&fs_info->pinned_extents, eb->start,
8338 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8339 return 0;
8341 btrfs_pin_extent(fs_info, eb->start, eb->len);
8343 leafsize = btrfs_super_leafsize(fs_info->super_copy);
8344 nritems = btrfs_header_nritems(eb);
8345 for (i = 0; i < nritems; i++) {
8346 if (level == 0) {
8347 btrfs_item_key_to_cpu(eb, &key, i);
8348 if (key.type != BTRFS_ROOT_ITEM_KEY)
8349 continue;
8350 /* Skip the extent root and reloc roots */
8351 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8352 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8353 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8354 continue;
8355 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8356 bytenr = btrfs_disk_root_bytenr(eb, ri);
8359 * If at any point we start needing the real root we
8360 * will have to build a stump root for the root we are
8361 * in, but for now this doesn't actually use the root so
8362 * just pass in extent_root.
8364 tmp = read_tree_block(fs_info->extent_root, bytenr,
8365 leafsize, 0);
8366 if (!extent_buffer_uptodate(tmp)) {
8367 fprintf(stderr, "Error reading root block\n");
8368 return -EIO;
8370 ret = pin_down_tree_blocks(fs_info, tmp, 0);
8371 free_extent_buffer(tmp);
8372 if (ret)
8373 return ret;
8374 } else {
8375 bytenr = btrfs_node_blockptr(eb, i);
8377 /* If we aren't the tree root don't read the block */
8378 if (level == 1 && !tree_root) {
8379 btrfs_pin_extent(fs_info, bytenr, leafsize);
8380 continue;
8383 tmp = read_tree_block(fs_info->extent_root, bytenr,
8384 leafsize, 0);
8385 if (!extent_buffer_uptodate(tmp)) {
8386 fprintf(stderr, "Error reading tree block\n");
8387 return -EIO;
8389 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8390 free_extent_buffer(tmp);
8391 if (ret)
8392 return ret;
8396 return 0;
8399 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8401 int ret;
8403 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8404 if (ret)
8405 return ret;
8407 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8410 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8412 struct btrfs_block_group_cache *cache;
8413 struct btrfs_path *path;
8414 struct extent_buffer *leaf;
8415 struct btrfs_chunk *chunk;
8416 struct btrfs_key key;
8417 int ret;
8418 u64 start;
8420 path = btrfs_alloc_path();
8421 if (!path)
8422 return -ENOMEM;
8424 key.objectid = 0;
8425 key.type = BTRFS_CHUNK_ITEM_KEY;
8426 key.offset = 0;
8428 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8429 if (ret < 0) {
8430 btrfs_free_path(path);
8431 return ret;
8435 * We do this in case the block groups were screwed up and had alloc
8436 * bits that aren't actually set on the chunks. This happens with
8437 * restored images every time and could happen in real life I guess.
8439 fs_info->avail_data_alloc_bits = 0;
8440 fs_info->avail_metadata_alloc_bits = 0;
8441 fs_info->avail_system_alloc_bits = 0;
8443 /* First we need to create the in-memory block groups */
8444 while (1) {
8445 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8446 ret = btrfs_next_leaf(fs_info->chunk_root, path);
8447 if (ret < 0) {
8448 btrfs_free_path(path);
8449 return ret;
8451 if (ret) {
8452 ret = 0;
8453 break;
8456 leaf = path->nodes[0];
8457 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8458 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8459 path->slots[0]++;
8460 continue;
8463 chunk = btrfs_item_ptr(leaf, path->slots[0],
8464 struct btrfs_chunk);
8465 btrfs_add_block_group(fs_info, 0,
8466 btrfs_chunk_type(leaf, chunk),
8467 key.objectid, key.offset,
8468 btrfs_chunk_length(leaf, chunk));
8469 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8470 key.offset + btrfs_chunk_length(leaf, chunk),
8471 GFP_NOFS);
8472 path->slots[0]++;
8474 start = 0;
8475 while (1) {
8476 cache = btrfs_lookup_first_block_group(fs_info, start);
8477 if (!cache)
8478 break;
8479 cache->cached = 1;
8480 start = cache->key.objectid + cache->key.offset;
8483 btrfs_free_path(path);
8484 return 0;
8487 static int reset_balance(struct btrfs_trans_handle *trans,
8488 struct btrfs_fs_info *fs_info)
8490 struct btrfs_root *root = fs_info->tree_root;
8491 struct btrfs_path *path;
8492 struct extent_buffer *leaf;
8493 struct btrfs_key key;
8494 int del_slot, del_nr = 0;
8495 int ret;
8496 int found = 0;
8498 path = btrfs_alloc_path();
8499 if (!path)
8500 return -ENOMEM;
8502 key.objectid = BTRFS_BALANCE_OBJECTID;
8503 key.type = BTRFS_BALANCE_ITEM_KEY;
8504 key.offset = 0;
8506 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8507 if (ret) {
8508 if (ret > 0)
8509 ret = 0;
8510 if (!ret)
8511 goto reinit_data_reloc;
8512 else
8513 goto out;
8516 ret = btrfs_del_item(trans, root, path);
8517 if (ret)
8518 goto out;
8519 btrfs_release_path(path);
8521 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8522 key.type = BTRFS_ROOT_ITEM_KEY;
8523 key.offset = 0;
8525 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8526 if (ret < 0)
8527 goto out;
8528 while (1) {
8529 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8530 if (!found)
8531 break;
8533 if (del_nr) {
8534 ret = btrfs_del_items(trans, root, path,
8535 del_slot, del_nr);
8536 del_nr = 0;
8537 if (ret)
8538 goto out;
8540 key.offset++;
8541 btrfs_release_path(path);
8543 found = 0;
8544 ret = btrfs_search_slot(trans, root, &key, path,
8545 -1, 1);
8546 if (ret < 0)
8547 goto out;
8548 continue;
8550 found = 1;
8551 leaf = path->nodes[0];
8552 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8553 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8554 break;
8555 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8556 path->slots[0]++;
8557 continue;
8559 if (!del_nr) {
8560 del_slot = path->slots[0];
8561 del_nr = 1;
8562 } else {
8563 del_nr++;
8565 path->slots[0]++;
8568 if (del_nr) {
8569 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8570 if (ret)
8571 goto out;
8573 btrfs_release_path(path);
8575 reinit_data_reloc:
8576 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8577 key.type = BTRFS_ROOT_ITEM_KEY;
8578 key.offset = (u64)-1;
8579 root = btrfs_read_fs_root(fs_info, &key);
8580 if (IS_ERR(root)) {
8581 fprintf(stderr, "Error reading data reloc tree\n");
8582 ret = PTR_ERR(root);
8583 goto out;
8585 record_root_in_trans(trans, root);
8586 ret = btrfs_fsck_reinit_root(trans, root, 0);
8587 if (ret)
8588 goto out;
8589 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8590 out:
8591 btrfs_free_path(path);
8592 return ret;
8595 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8596 struct btrfs_fs_info *fs_info)
8598 u64 start = 0;
8599 int ret;
8602 * The only reason we don't do this is because right now we're just
8603 * walking the trees we find and pinning down their bytes, we don't look
8604 * at any of the leaves. In order to do mixed groups we'd have to check
8605 * the leaves of any fs roots and pin down the bytes for any file
8606 * extents we find. Not hard but why do it if we don't have to?
8608 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8609 fprintf(stderr, "We don't support re-initing the extent tree "
8610 "for mixed block groups yet, please notify a btrfs "
8611 "developer you want to do this so they can add this "
8612 "functionality.\n");
8613 return -EINVAL;
8617 * first we need to walk all of the trees except the extent tree and pin
8618 * down the bytes that are in use so we don't overwrite any existing
8619 * metadata.
8621 ret = pin_metadata_blocks(fs_info);
8622 if (ret) {
8623 fprintf(stderr, "error pinning down used bytes\n");
8624 return ret;
8628 * Need to drop all the block groups since we're going to recreate all
8629 * of them again.
8631 btrfs_free_block_groups(fs_info);
8632 ret = reset_block_groups(fs_info);
8633 if (ret) {
8634 fprintf(stderr, "error resetting the block groups\n");
8635 return ret;
8638 /* Ok we can allocate now, reinit the extent root */
8639 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8640 if (ret) {
8641 fprintf(stderr, "extent root initialization failed\n");
8643 * When the transaction code is updated we should end the
8644 * transaction, but for now progs only knows about commit so
8645 * just return an error.
8647 return ret;
8651 * Now we have all the in-memory block groups setup so we can make
8652 * allocations properly, and the metadata we care about is safe since we
8653 * pinned all of it above.
8655 while (1) {
8656 struct btrfs_block_group_cache *cache;
8658 cache = btrfs_lookup_first_block_group(fs_info, start);
8659 if (!cache)
8660 break;
8661 start = cache->key.objectid + cache->key.offset;
8662 ret = btrfs_insert_item(trans, fs_info->extent_root,
8663 &cache->key, &cache->item,
8664 sizeof(cache->item));
8665 if (ret) {
8666 fprintf(stderr, "Error adding block group\n");
8667 return ret;
8669 btrfs_extent_post_op(trans, fs_info->extent_root);
8672 ret = reset_balance(trans, fs_info);
8673 if (ret)
8674 fprintf(stderr, "error reseting the pending balance\n");
8676 return ret;
8679 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8681 struct btrfs_path *path;
8682 struct btrfs_trans_handle *trans;
8683 struct btrfs_key key;
8684 int ret;
8686 printf("Recowing metadata block %llu\n", eb->start);
8687 key.objectid = btrfs_header_owner(eb);
8688 key.type = BTRFS_ROOT_ITEM_KEY;
8689 key.offset = (u64)-1;
8691 root = btrfs_read_fs_root(root->fs_info, &key);
8692 if (IS_ERR(root)) {
8693 fprintf(stderr, "Couldn't find owner root %llu\n",
8694 key.objectid);
8695 return PTR_ERR(root);
8698 path = btrfs_alloc_path();
8699 if (!path)
8700 return -ENOMEM;
8702 trans = btrfs_start_transaction(root, 1);
8703 if (IS_ERR(trans)) {
8704 btrfs_free_path(path);
8705 return PTR_ERR(trans);
8708 path->lowest_level = btrfs_header_level(eb);
8709 if (path->lowest_level)
8710 btrfs_node_key_to_cpu(eb, &key, 0);
8711 else
8712 btrfs_item_key_to_cpu(eb, &key, 0);
8714 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8715 btrfs_commit_transaction(trans, root);
8716 btrfs_free_path(path);
8717 return ret;
8720 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8722 struct btrfs_path *path;
8723 struct btrfs_trans_handle *trans;
8724 struct btrfs_key key;
8725 int ret;
8727 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8728 bad->key.type, bad->key.offset);
8729 key.objectid = bad->root_id;
8730 key.type = BTRFS_ROOT_ITEM_KEY;
8731 key.offset = (u64)-1;
8733 root = btrfs_read_fs_root(root->fs_info, &key);
8734 if (IS_ERR(root)) {
8735 fprintf(stderr, "Couldn't find owner root %llu\n",
8736 key.objectid);
8737 return PTR_ERR(root);
8740 path = btrfs_alloc_path();
8741 if (!path)
8742 return -ENOMEM;
8744 trans = btrfs_start_transaction(root, 1);
8745 if (IS_ERR(trans)) {
8746 btrfs_free_path(path);
8747 return PTR_ERR(trans);
8750 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8751 if (ret) {
8752 if (ret > 0)
8753 ret = 0;
8754 goto out;
8756 ret = btrfs_del_item(trans, root, path);
8757 out:
8758 btrfs_commit_transaction(trans, root);
8759 btrfs_free_path(path);
8760 return ret;
8763 static int zero_log_tree(struct btrfs_root *root)
8765 struct btrfs_trans_handle *trans;
8766 int ret;
8768 trans = btrfs_start_transaction(root, 1);
8769 if (IS_ERR(trans)) {
8770 ret = PTR_ERR(trans);
8771 return ret;
8773 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8774 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8775 ret = btrfs_commit_transaction(trans, root);
8776 return ret;
8779 static int populate_csum(struct btrfs_trans_handle *trans,
8780 struct btrfs_root *csum_root, char *buf, u64 start,
8781 u64 len)
8783 u64 offset = 0;
8784 u64 sectorsize;
8785 int ret = 0;
8787 while (offset < len) {
8788 sectorsize = csum_root->sectorsize;
8789 ret = read_extent_data(csum_root, buf, start + offset,
8790 &sectorsize, 0);
8791 if (ret)
8792 break;
8793 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8794 start + offset, buf, sectorsize);
8795 if (ret)
8796 break;
8797 offset += sectorsize;
8799 return ret;
8802 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8803 struct btrfs_root *csum_root,
8804 struct btrfs_root *cur_root)
8806 struct btrfs_path *path;
8807 struct btrfs_key key;
8808 struct extent_buffer *node;
8809 struct btrfs_file_extent_item *fi;
8810 char *buf = NULL;
8811 u64 start = 0;
8812 u64 len = 0;
8813 int slot = 0;
8814 int ret = 0;
8816 path = btrfs_alloc_path();
8817 if (!path)
8818 return -ENOMEM;
8819 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
8820 if (!buf) {
8821 ret = -ENOMEM;
8822 goto out;
8825 key.objectid = 0;
8826 key.offset = 0;
8827 key.type = 0;
8829 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
8830 if (ret < 0)
8831 goto out;
8832 /* Iterate all regular file extents and fill its csum */
8833 while (1) {
8834 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
8836 if (key.type != BTRFS_EXTENT_DATA_KEY)
8837 goto next;
8838 node = path->nodes[0];
8839 slot = path->slots[0];
8840 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8841 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8842 goto next;
8843 start = btrfs_file_extent_disk_bytenr(node, fi);
8844 len = btrfs_file_extent_disk_num_bytes(node, fi);
8846 ret = populate_csum(trans, csum_root, buf, start, len);
8847 if (ret == -EEXIST)
8848 ret = 0;
8849 if (ret < 0)
8850 goto out;
8851 next:
8853 * TODO: if next leaf is corrupted, jump to nearest next valid
8854 * leaf.
8856 ret = btrfs_next_item(cur_root, path);
8857 if (ret < 0)
8858 goto out;
8859 if (ret > 0) {
8860 ret = 0;
8861 goto out;
8865 out:
8866 btrfs_free_path(path);
8867 free(buf);
8868 return ret;
8871 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
8872 struct btrfs_root *csum_root)
8874 struct btrfs_fs_info *fs_info = csum_root->fs_info;
8875 struct btrfs_path *path;
8876 struct btrfs_root *tree_root = fs_info->tree_root;
8877 struct btrfs_root *cur_root;
8878 struct extent_buffer *node;
8879 struct btrfs_key key;
8880 int slot = 0;
8881 int ret = 0;
8883 path = btrfs_alloc_path();
8884 if (!path)
8885 return -ENOMEM;
8887 key.objectid = BTRFS_FS_TREE_OBJECTID;
8888 key.offset = 0;
8889 key.type = BTRFS_ROOT_ITEM_KEY;
8891 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
8892 if (ret < 0)
8893 goto out;
8894 if (ret > 0) {
8895 ret = -ENOENT;
8896 goto out;
8899 while (1) {
8900 node = path->nodes[0];
8901 slot = path->slots[0];
8902 btrfs_item_key_to_cpu(node, &key, slot);
8903 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
8904 goto out;
8905 if (key.type != BTRFS_ROOT_ITEM_KEY)
8906 goto next;
8907 if (!is_fstree(key.objectid))
8908 goto next;
8909 key.offset = (u64)-1;
8911 cur_root = btrfs_read_fs_root(fs_info, &key);
8912 if (IS_ERR(cur_root) || !cur_root) {
8913 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
8914 key.objectid);
8915 goto out;
8917 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
8918 cur_root);
8919 if (ret < 0)
8920 goto out;
8921 next:
8922 ret = btrfs_next_item(tree_root, path);
8923 if (ret > 0) {
8924 ret = 0;
8925 goto out;
8927 if (ret < 0)
8928 goto out;
8931 out:
8932 btrfs_free_path(path);
8933 return ret;
8936 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
8937 struct btrfs_root *csum_root)
8939 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
8940 struct btrfs_path *path;
8941 struct btrfs_extent_item *ei;
8942 struct extent_buffer *leaf;
8943 char *buf;
8944 struct btrfs_key key;
8945 int ret;
8947 path = btrfs_alloc_path();
8948 if (!path)
8949 return -ENOMEM;
8951 key.objectid = 0;
8952 key.type = BTRFS_EXTENT_ITEM_KEY;
8953 key.offset = 0;
8955 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
8956 if (ret < 0) {
8957 btrfs_free_path(path);
8958 return ret;
8961 buf = malloc(csum_root->sectorsize);
8962 if (!buf) {
8963 btrfs_free_path(path);
8964 return -ENOMEM;
8967 while (1) {
8968 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8969 ret = btrfs_next_leaf(extent_root, path);
8970 if (ret < 0)
8971 break;
8972 if (ret) {
8973 ret = 0;
8974 break;
8977 leaf = path->nodes[0];
8979 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8980 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8981 path->slots[0]++;
8982 continue;
8985 ei = btrfs_item_ptr(leaf, path->slots[0],
8986 struct btrfs_extent_item);
8987 if (!(btrfs_extent_flags(leaf, ei) &
8988 BTRFS_EXTENT_FLAG_DATA)) {
8989 path->slots[0]++;
8990 continue;
8993 ret = populate_csum(trans, csum_root, buf, key.objectid,
8994 key.offset);
8995 if (ret)
8996 break;
8997 path->slots[0]++;
9000 btrfs_free_path(path);
9001 free(buf);
9002 return ret;
9006 * Recalculate the csum and put it into the csum tree.
9008 * Extent tree init will wipe out all the extent info, so in that case, we
9009 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
9010 * will use fs/subvol trees to init the csum tree.
9012 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9013 struct btrfs_root *csum_root,
9014 int search_fs_tree)
9016 if (search_fs_tree)
9017 return fill_csum_tree_from_fs(trans, csum_root);
9018 else
9019 return fill_csum_tree_from_extent(trans, csum_root);
9022 struct root_item_info {
9023 /* level of the root */
9024 u8 level;
9025 /* number of nodes at this level, must be 1 for a root */
9026 int node_count;
9027 u64 bytenr;
9028 u64 gen;
9029 struct cache_extent cache_extent;
9032 static struct cache_tree *roots_info_cache = NULL;
9034 static void free_roots_info_cache(void)
9036 if (!roots_info_cache)
9037 return;
9039 while (!cache_tree_empty(roots_info_cache)) {
9040 struct cache_extent *entry;
9041 struct root_item_info *rii;
9043 entry = first_cache_extent(roots_info_cache);
9044 if (!entry)
9045 break;
9046 remove_cache_extent(roots_info_cache, entry);
9047 rii = container_of(entry, struct root_item_info, cache_extent);
9048 free(rii);
9051 free(roots_info_cache);
9052 roots_info_cache = NULL;
9055 static int build_roots_info_cache(struct btrfs_fs_info *info)
9057 int ret = 0;
9058 struct btrfs_key key;
9059 struct extent_buffer *leaf;
9060 struct btrfs_path *path;
9062 if (!roots_info_cache) {
9063 roots_info_cache = malloc(sizeof(*roots_info_cache));
9064 if (!roots_info_cache)
9065 return -ENOMEM;
9066 cache_tree_init(roots_info_cache);
9069 path = btrfs_alloc_path();
9070 if (!path)
9071 return -ENOMEM;
9073 key.objectid = 0;
9074 key.type = BTRFS_EXTENT_ITEM_KEY;
9075 key.offset = 0;
9077 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9078 if (ret < 0)
9079 goto out;
9080 leaf = path->nodes[0];
9082 while (1) {
9083 struct btrfs_key found_key;
9084 struct btrfs_extent_item *ei;
9085 struct btrfs_extent_inline_ref *iref;
9086 int slot = path->slots[0];
9087 int type;
9088 u64 flags;
9089 u64 root_id;
9090 u8 level;
9091 struct cache_extent *entry;
9092 struct root_item_info *rii;
9094 if (slot >= btrfs_header_nritems(leaf)) {
9095 ret = btrfs_next_leaf(info->extent_root, path);
9096 if (ret < 0) {
9097 break;
9098 } else if (ret) {
9099 ret = 0;
9100 break;
9102 leaf = path->nodes[0];
9103 slot = path->slots[0];
9106 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9108 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9109 found_key.type != BTRFS_METADATA_ITEM_KEY)
9110 goto next;
9112 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9113 flags = btrfs_extent_flags(leaf, ei);
9115 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9116 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9117 goto next;
9119 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9120 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9121 level = found_key.offset;
9122 } else {
9123 struct btrfs_tree_block_info *info;
9125 info = (struct btrfs_tree_block_info *)(ei + 1);
9126 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9127 level = btrfs_tree_block_level(leaf, info);
9131 * For a root extent, it must be of the following type and the
9132 * first (and only one) iref in the item.
9134 type = btrfs_extent_inline_ref_type(leaf, iref);
9135 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9136 goto next;
9138 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9139 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9140 if (!entry) {
9141 rii = malloc(sizeof(struct root_item_info));
9142 if (!rii) {
9143 ret = -ENOMEM;
9144 goto out;
9146 rii->cache_extent.start = root_id;
9147 rii->cache_extent.size = 1;
9148 rii->level = (u8)-1;
9149 entry = &rii->cache_extent;
9150 ret = insert_cache_extent(roots_info_cache, entry);
9151 ASSERT(ret == 0);
9152 } else {
9153 rii = container_of(entry, struct root_item_info,
9154 cache_extent);
9157 ASSERT(rii->cache_extent.start == root_id);
9158 ASSERT(rii->cache_extent.size == 1);
9160 if (level > rii->level || rii->level == (u8)-1) {
9161 rii->level = level;
9162 rii->bytenr = found_key.objectid;
9163 rii->gen = btrfs_extent_generation(leaf, ei);
9164 rii->node_count = 1;
9165 } else if (level == rii->level) {
9166 rii->node_count++;
9168 next:
9169 path->slots[0]++;
9172 out:
9173 btrfs_free_path(path);
9175 return ret;
9178 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9179 struct btrfs_path *path,
9180 const struct btrfs_key *root_key,
9181 const int read_only_mode)
9183 const u64 root_id = root_key->objectid;
9184 struct cache_extent *entry;
9185 struct root_item_info *rii;
9186 struct btrfs_root_item ri;
9187 unsigned long offset;
9189 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9190 if (!entry) {
9191 fprintf(stderr,
9192 "Error: could not find extent items for root %llu\n",
9193 root_key->objectid);
9194 return -ENOENT;
9197 rii = container_of(entry, struct root_item_info, cache_extent);
9198 ASSERT(rii->cache_extent.start == root_id);
9199 ASSERT(rii->cache_extent.size == 1);
9201 if (rii->node_count != 1) {
9202 fprintf(stderr,
9203 "Error: could not find btree root extent for root %llu\n",
9204 root_id);
9205 return -ENOENT;
9208 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9209 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9211 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9212 btrfs_root_level(&ri) != rii->level ||
9213 btrfs_root_generation(&ri) != rii->gen) {
9216 * If we're in repair mode but our caller told us to not update
9217 * the root item, i.e. just check if it needs to be updated, don't
9218 * print this message, since the caller will call us again shortly
9219 * for the same root item without read only mode (the caller will
9220 * open a transaction first).
9222 if (!(read_only_mode && repair))
9223 fprintf(stderr,
9224 "%sroot item for root %llu,"
9225 " current bytenr %llu, current gen %llu, current level %u,"
9226 " new bytenr %llu, new gen %llu, new level %u\n",
9227 (read_only_mode ? "" : "fixing "),
9228 root_id,
9229 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9230 btrfs_root_level(&ri),
9231 rii->bytenr, rii->gen, rii->level);
9233 if (btrfs_root_generation(&ri) > rii->gen) {
9234 fprintf(stderr,
9235 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9236 root_id, btrfs_root_generation(&ri), rii->gen);
9237 return -EINVAL;
9240 if (!read_only_mode) {
9241 btrfs_set_root_bytenr(&ri, rii->bytenr);
9242 btrfs_set_root_level(&ri, rii->level);
9243 btrfs_set_root_generation(&ri, rii->gen);
9244 write_extent_buffer(path->nodes[0], &ri,
9245 offset, sizeof(ri));
9248 return 1;
9251 return 0;
9255 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9256 * caused read-only snapshots to be corrupted if they were created at a moment
9257 * when the source subvolume/snapshot had orphan items. The issue was that the
9258 * on-disk root items became incorrect, referring to the pre orphan cleanup root
9259 * node instead of the post orphan cleanup root node.
9260 * So this function, and its callees, just detects and fixes those cases. Even
9261 * though the regression was for read-only snapshots, this function applies to
9262 * any snapshot/subvolume root.
9263 * This must be run before any other repair code - not doing it so, makes other
9264 * repair code delete or modify backrefs in the extent tree for example, which
9265 * will result in an inconsistent fs after repairing the root items.
9267 static int repair_root_items(struct btrfs_fs_info *info)
9269 struct btrfs_path *path = NULL;
9270 struct btrfs_key key;
9271 struct extent_buffer *leaf;
9272 struct btrfs_trans_handle *trans = NULL;
9273 int ret = 0;
9274 int bad_roots = 0;
9275 int need_trans = 0;
9277 ret = build_roots_info_cache(info);
9278 if (ret)
9279 goto out;
9281 path = btrfs_alloc_path();
9282 if (!path) {
9283 ret = -ENOMEM;
9284 goto out;
9287 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9288 key.type = BTRFS_ROOT_ITEM_KEY;
9289 key.offset = 0;
9291 again:
9293 * Avoid opening and committing transactions if a leaf doesn't have
9294 * any root items that need to be fixed, so that we avoid rotating
9295 * backup roots unnecessarily.
9297 if (need_trans) {
9298 trans = btrfs_start_transaction(info->tree_root, 1);
9299 if (IS_ERR(trans)) {
9300 ret = PTR_ERR(trans);
9301 goto out;
9305 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9306 0, trans ? 1 : 0);
9307 if (ret < 0)
9308 goto out;
9309 leaf = path->nodes[0];
9311 while (1) {
9312 struct btrfs_key found_key;
9314 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9315 int no_more_keys = find_next_key(path, &key);
9317 btrfs_release_path(path);
9318 if (trans) {
9319 ret = btrfs_commit_transaction(trans,
9320 info->tree_root);
9321 trans = NULL;
9322 if (ret < 0)
9323 goto out;
9325 need_trans = 0;
9326 if (no_more_keys)
9327 break;
9328 goto again;
9331 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9333 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9334 goto next;
9335 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9336 goto next;
9338 ret = maybe_repair_root_item(info, path, &found_key,
9339 trans ? 0 : 1);
9340 if (ret < 0)
9341 goto out;
9342 if (ret) {
9343 if (!trans && repair) {
9344 need_trans = 1;
9345 key = found_key;
9346 btrfs_release_path(path);
9347 goto again;
9349 bad_roots++;
9351 next:
9352 path->slots[0]++;
9354 ret = 0;
9355 out:
9356 free_roots_info_cache();
9357 btrfs_free_path(path);
9358 if (trans)
9359 btrfs_commit_transaction(trans, info->tree_root);
9360 if (ret < 0)
9361 return ret;
9363 return bad_roots;
9366 const char * const cmd_check_usage[] = {
9367 "btrfs check [options] <device>",
9368 "Check structural inegrity of a filesystem (unmounted).",
9369 "Check structural inegrity of an unmounted filesystem. Verify internal",
9370 "trees' consistency and item connectivity. In the repair mode try to",
9371 "fix the problems found.",
9372 "WARNING: the repair mode is considered dangerous",
9374 "-s|--super <superblock> use this superblock copy",
9375 "-b|--backup use the backup root copy",
9376 "--repair try to repair the filesystem",
9377 "--readonly run in read-only mode (default)",
9378 "--init-csum-tree create a new CRC tree",
9379 "--init-extent-tree create a new extent tree",
9380 "--check-data-csum verify checkums of data blocks",
9381 "-Q|--qgroup-report print a report on qgroup consistency",
9382 "-E|--subvol-extents <subvolid>",
9383 " print subvolume extents and sharing state",
9384 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
9385 "-p|--progress indicate progress",
9386 NULL
9389 int cmd_check(int argc, char **argv)
9391 struct cache_tree root_cache;
9392 struct btrfs_root *root;
9393 struct btrfs_fs_info *info;
9394 u64 bytenr = 0;
9395 u64 subvolid = 0;
9396 u64 tree_root_bytenr = 0;
9397 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9398 int ret;
9399 u64 num;
9400 int init_csum_tree = 0;
9401 int readonly = 0;
9402 int qgroup_report = 0;
9403 enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9405 while(1) {
9406 int c;
9407 enum { OPT_REPAIR = 257, OPT_INIT_CSUM, OPT_INIT_EXTENT,
9408 OPT_CHECK_CSUM, OPT_READONLY };
9409 static const struct option long_options[] = {
9410 { "super", required_argument, NULL, 's' },
9411 { "repair", no_argument, NULL, OPT_REPAIR },
9412 { "readonly", no_argument, NULL, OPT_READONLY },
9413 { "init-csum-tree", no_argument, NULL, OPT_INIT_CSUM },
9414 { "init-extent-tree", no_argument, NULL, OPT_INIT_EXTENT },
9415 { "check-data-csum", no_argument, NULL, OPT_CHECK_CSUM },
9416 { "backup", no_argument, NULL, 'b' },
9417 { "subvol-extents", required_argument, NULL, 'E' },
9418 { "qgroup-report", no_argument, NULL, 'Q' },
9419 { "tree-root", required_argument, NULL, 'r' },
9420 { "progress", no_argument, NULL, 'p' },
9421 { NULL, 0, NULL, 0}
9424 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9425 if (c < 0)
9426 break;
9427 switch(c) {
9428 case 'a': /* ignored */ break;
9429 case 'b':
9430 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9431 break;
9432 case 's':
9433 num = arg_strtou64(optarg);
9434 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9435 fprintf(stderr,
9436 "ERROR: super mirror should be less than: %d\n",
9437 BTRFS_SUPER_MIRROR_MAX);
9438 exit(1);
9440 bytenr = btrfs_sb_offset(((int)num));
9441 printf("using SB copy %llu, bytenr %llu\n", num,
9442 (unsigned long long)bytenr);
9443 break;
9444 case 'Q':
9445 qgroup_report = 1;
9446 break;
9447 case 'E':
9448 subvolid = arg_strtou64(optarg);
9449 break;
9450 case 'r':
9451 tree_root_bytenr = arg_strtou64(optarg);
9452 break;
9453 case 'p':
9454 ctx.progress_enabled = true;
9455 break;
9456 case '?':
9457 case 'h':
9458 usage(cmd_check_usage);
9459 case OPT_REPAIR:
9460 printf("enabling repair mode\n");
9461 repair = 1;
9462 ctree_flags |= OPEN_CTREE_WRITES;
9463 break;
9464 case OPT_READONLY:
9465 readonly = 1;
9466 break;
9467 case OPT_INIT_CSUM:
9468 printf("Creating a new CRC tree\n");
9469 init_csum_tree = 1;
9470 repair = 1;
9471 ctree_flags |= OPEN_CTREE_WRITES;
9472 break;
9473 case OPT_INIT_EXTENT:
9474 init_extent_tree = 1;
9475 ctree_flags |= (OPEN_CTREE_WRITES |
9476 OPEN_CTREE_NO_BLOCK_GROUPS);
9477 repair = 1;
9478 break;
9479 case OPT_CHECK_CSUM:
9480 check_data_csum = 1;
9481 break;
9484 argc = argc - optind;
9486 if (check_argc_exact(argc, 1))
9487 usage(cmd_check_usage);
9489 if (ctx.progress_enabled) {
9490 ctx.tp = TASK_NOTHING;
9491 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9494 /* This check is the only reason for --readonly to exist */
9495 if (readonly && repair) {
9496 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9497 exit(1);
9500 radix_tree_init();
9501 cache_tree_init(&root_cache);
9503 if((ret = check_mounted(argv[optind])) < 0) {
9504 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9505 goto err_out;
9506 } else if(ret) {
9507 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9508 ret = -EBUSY;
9509 goto err_out;
9512 /* only allow partial opening under repair mode */
9513 if (repair)
9514 ctree_flags |= OPEN_CTREE_PARTIAL;
9516 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9517 ctree_flags);
9518 if (!info) {
9519 fprintf(stderr, "Couldn't open file system\n");
9520 ret = -EIO;
9521 goto err_out;
9524 global_info = info;
9525 root = info->fs_root;
9528 * repair mode will force us to commit transaction which
9529 * will make us fail to load log tree when mounting.
9531 if (repair && btrfs_super_log_root(info->super_copy)) {
9532 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9533 if (!ret) {
9534 ret = 1;
9535 goto close_out;
9537 ret = zero_log_tree(root);
9538 if (ret) {
9539 fprintf(stderr, "fail to zero log tree\n");
9540 goto close_out;
9544 uuid_unparse(info->super_copy->fsid, uuidbuf);
9545 if (qgroup_report) {
9546 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9547 uuidbuf);
9548 ret = qgroup_verify_all(info);
9549 if (ret == 0)
9550 print_qgroup_report(1);
9551 goto close_out;
9553 if (subvolid) {
9554 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9555 subvolid, argv[optind], uuidbuf);
9556 ret = print_extent_state(info, subvolid);
9557 goto close_out;
9559 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9561 if (!extent_buffer_uptodate(info->tree_root->node) ||
9562 !extent_buffer_uptodate(info->dev_root->node) ||
9563 !extent_buffer_uptodate(info->chunk_root->node)) {
9564 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9565 ret = -EIO;
9566 goto close_out;
9569 if (init_extent_tree || init_csum_tree) {
9570 struct btrfs_trans_handle *trans;
9572 trans = btrfs_start_transaction(info->extent_root, 0);
9573 if (IS_ERR(trans)) {
9574 fprintf(stderr, "Error starting transaction\n");
9575 ret = PTR_ERR(trans);
9576 goto close_out;
9579 if (init_extent_tree) {
9580 printf("Creating a new extent tree\n");
9581 ret = reinit_extent_tree(trans, info);
9582 if (ret)
9583 goto close_out;
9586 if (init_csum_tree) {
9587 fprintf(stderr, "Reinit crc root\n");
9588 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9589 if (ret) {
9590 fprintf(stderr, "crc root initialization failed\n");
9591 ret = -EIO;
9592 goto close_out;
9595 ret = fill_csum_tree(trans, info->csum_root,
9596 init_extent_tree);
9597 if (ret) {
9598 fprintf(stderr, "crc refilling failed\n");
9599 return -EIO;
9603 * Ok now we commit and run the normal fsck, which will add
9604 * extent entries for all of the items it finds.
9606 ret = btrfs_commit_transaction(trans, info->extent_root);
9607 if (ret)
9608 goto close_out;
9610 if (!extent_buffer_uptodate(info->extent_root->node)) {
9611 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9612 ret = -EIO;
9613 goto close_out;
9615 if (!extent_buffer_uptodate(info->csum_root->node)) {
9616 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9617 ret = -EIO;
9618 goto close_out;
9621 if (!ctx.progress_enabled)
9622 fprintf(stderr, "checking extents\n");
9623 ret = check_chunks_and_extents(root);
9624 if (ret)
9625 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9627 ret = repair_root_items(info);
9628 if (ret < 0)
9629 goto close_out;
9630 if (repair) {
9631 fprintf(stderr, "Fixed %d roots.\n", ret);
9632 ret = 0;
9633 } else if (ret > 0) {
9634 fprintf(stderr,
9635 "Found %d roots with an outdated root item.\n",
9636 ret);
9637 fprintf(stderr,
9638 "Please run a filesystem check with the option --repair to fix them.\n");
9639 ret = 1;
9640 goto close_out;
9643 if (!ctx.progress_enabled)
9644 fprintf(stderr, "checking free space cache\n");
9645 ret = check_space_cache(root);
9646 if (ret)
9647 goto out;
9650 * We used to have to have these hole extents in between our real
9651 * extents so if we don't have this flag set we need to make sure there
9652 * are no gaps in the file extents for inodes, otherwise we can just
9653 * ignore it when this happens.
9655 no_holes = btrfs_fs_incompat(root->fs_info,
9656 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9657 if (!ctx.progress_enabled)
9658 fprintf(stderr, "checking fs roots\n");
9659 ret = check_fs_roots(root, &root_cache);
9660 if (ret)
9661 goto out;
9663 fprintf(stderr, "checking csums\n");
9664 ret = check_csums(root);
9665 if (ret)
9666 goto out;
9668 fprintf(stderr, "checking root refs\n");
9669 ret = check_root_refs(root, &root_cache);
9670 if (ret)
9671 goto out;
9673 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9674 struct extent_buffer *eb;
9676 eb = list_first_entry(&root->fs_info->recow_ebs,
9677 struct extent_buffer, recow);
9678 list_del_init(&eb->recow);
9679 ret = recow_extent_buffer(root, eb);
9680 if (ret)
9681 break;
9684 while (!list_empty(&delete_items)) {
9685 struct bad_item *bad;
9687 bad = list_first_entry(&delete_items, struct bad_item, list);
9688 list_del_init(&bad->list);
9689 if (repair)
9690 ret = delete_bad_item(root, bad);
9691 free(bad);
9694 if (info->quota_enabled) {
9695 int err;
9696 fprintf(stderr, "checking quota groups\n");
9697 err = qgroup_verify_all(info);
9698 if (err)
9699 goto out;
9702 if (!list_empty(&root->fs_info->recow_ebs)) {
9703 fprintf(stderr, "Transid errors in file system\n");
9704 ret = 1;
9706 out:
9707 print_qgroup_report(0);
9708 if (found_old_backref) { /*
9709 * there was a disk format change when mixed
9710 * backref was in testing tree. The old format
9711 * existed about one week.
9713 printf("\n * Found old mixed backref format. "
9714 "The old format is not supported! *"
9715 "\n * Please mount the FS in readonly mode, "
9716 "backup data and re-format the FS. *\n\n");
9717 ret = 1;
9719 printf("found %llu bytes used err is %d\n",
9720 (unsigned long long)bytes_used, ret);
9721 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9722 printf("total tree bytes: %llu\n",
9723 (unsigned long long)total_btree_bytes);
9724 printf("total fs tree bytes: %llu\n",
9725 (unsigned long long)total_fs_tree_bytes);
9726 printf("total extent tree bytes: %llu\n",
9727 (unsigned long long)total_extent_tree_bytes);
9728 printf("btree space waste bytes: %llu\n",
9729 (unsigned long long)btree_space_waste);
9730 printf("file data blocks allocated: %llu\n referenced %llu\n",
9731 (unsigned long long)data_bytes_allocated,
9732 (unsigned long long)data_bytes_referenced);
9734 free_root_recs_tree(&root_cache);
9735 close_out:
9736 close_ctree(root);
9737 err_out:
9738 if (ctx.progress_enabled)
9739 task_deinit(ctx.info);
9741 return ret;