btrfs-progs: docs: add missing short option for qroup-report
[btrfs-progs-unstable/devel.git] / cmds-check.c
blobf6320b21665263dd42a52cd7196c415fd369d228
1 /*
2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
47 enum task_position {
48 TASK_EXTENTS,
49 TASK_FREE_SPACE,
50 TASK_FS_ROOTS,
51 TASK_NOTHING, /* have to be the last element */
54 struct task_ctx {
55 int progress_enabled;
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
79 enum btrfs_check_mode {
80 CHECK_MODE_ORIGINAL,
81 CHECK_MODE_LOWMEM,
82 CHECK_MODE_UNKNOWN,
83 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
88 struct extent_backref {
89 struct list_head list;
90 unsigned int is_data:1;
91 unsigned int found_extent_tree:1;
92 unsigned int full_backref:1;
93 unsigned int found_ref:1;
94 unsigned int broken:1;
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
99 return list_entry(entry, struct extent_backref, list);
102 struct data_backref {
103 struct extent_backref node;
104 union {
105 u64 parent;
106 u64 root;
108 u64 owner;
109 u64 offset;
110 u64 disk_bytenr;
111 u64 bytes;
112 u64 ram_bytes;
113 u32 num_refs;
114 u32 found_ref;
117 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM (1<<14) /* no inode_item */
131 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
137 return container_of(back, struct data_backref, node);
141 * Much like data_backref, just removed the undetermined members
142 * and change it to use list_head.
143 * During extent scan, it is stored in root->orphan_data_extent.
144 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
146 struct orphan_data_extent {
147 struct list_head list;
148 u64 root;
149 u64 objectid;
150 u64 offset;
151 u64 disk_bytenr;
152 u64 disk_len;
155 struct tree_backref {
156 struct extent_backref node;
157 union {
158 u64 parent;
159 u64 root;
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
165 return container_of(back, struct tree_backref, node);
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
171 struct extent_record {
172 struct list_head backrefs;
173 struct list_head dups;
174 struct list_head list;
175 struct cache_extent cache;
176 struct btrfs_disk_key parent_key;
177 u64 start;
178 u64 max_size;
179 u64 nr;
180 u64 refs;
181 u64 extent_item_refs;
182 u64 generation;
183 u64 parent_generation;
184 u64 info_objectid;
185 u32 num_duplicates;
186 u8 info_level;
187 unsigned int flag_block_full_backref:2;
188 unsigned int found_rec:1;
189 unsigned int content_checked:1;
190 unsigned int owner_ref_checked:1;
191 unsigned int is_root:1;
192 unsigned int metadata:1;
193 unsigned int bad_full_backref:1;
194 unsigned int crossing_stripes:1;
195 unsigned int wrong_chunk_type:1;
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
200 return container_of(entry, struct extent_record, list);
203 struct inode_backref {
204 struct list_head list;
205 unsigned int found_dir_item:1;
206 unsigned int found_dir_index:1;
207 unsigned int found_inode_ref:1;
208 u8 filetype;
209 u8 ref_type;
210 int errors;
211 u64 dir;
212 u64 index;
213 u16 namelen;
214 char name[0];
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
219 return list_entry(entry, struct inode_backref, list);
222 struct root_item_record {
223 struct list_head list;
224 u64 objectid;
225 u64 bytenr;
226 u64 last_snapshot;
227 u8 level;
228 u8 drop_level;
229 int level_size;
230 struct btrfs_key drop_key;
233 #define REF_ERR_NO_DIR_ITEM (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX (1 << 1)
235 #define REF_ERR_NO_INODE_REF (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
238 #define REF_ERR_DUP_INODE_REF (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
247 struct file_extent_hole {
248 struct rb_node node;
249 u64 start;
250 u64 len;
253 struct inode_record {
254 struct list_head backrefs;
255 unsigned int checked:1;
256 unsigned int merging:1;
257 unsigned int found_inode_item:1;
258 unsigned int found_dir_item:1;
259 unsigned int found_file_extent:1;
260 unsigned int found_csum_item:1;
261 unsigned int some_csum_missing:1;
262 unsigned int nodatasum:1;
263 int errors;
265 u64 ino;
266 u32 nlink;
267 u32 imode;
268 u64 isize;
269 u64 nbytes;
271 u32 found_link;
272 u64 found_size;
273 u64 extent_start;
274 u64 extent_end;
275 struct rb_root holes;
276 struct list_head orphan_extents;
278 u32 refs;
281 #define I_ERR_NO_INODE_ITEM (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
297 struct root_backref {
298 struct list_head list;
299 unsigned int found_dir_item:1;
300 unsigned int found_dir_index:1;
301 unsigned int found_back_ref:1;
302 unsigned int found_forward_ref:1;
303 unsigned int reachable:1;
304 int errors;
305 u64 ref_root;
306 u64 dir;
307 u64 index;
308 u16 namelen;
309 char name[0];
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
314 return list_entry(entry, struct root_backref, list);
317 struct root_record {
318 struct list_head backrefs;
319 struct cache_extent cache;
320 unsigned int found_root_item:1;
321 u64 objectid;
322 u32 found_ref;
325 struct ptr_node {
326 struct cache_extent cache;
327 void *data;
330 struct shared_node {
331 struct cache_extent cache;
332 struct cache_tree root_cache;
333 struct cache_tree inode_cache;
334 struct inode_record *current;
335 u32 refs;
338 struct block_info {
339 u64 start;
340 u32 size;
343 struct walk_control {
344 struct cache_tree shared;
345 struct shared_node *nodes[BTRFS_MAX_LEVEL];
346 int active_node;
347 int root_level;
350 struct bad_item {
351 struct btrfs_key key;
352 u64 root_id;
353 struct list_head list;
356 struct extent_entry {
357 u64 bytenr;
358 u64 bytes;
359 int count;
360 int broken;
361 struct list_head list;
364 struct root_item_info {
365 /* level of the root */
366 u8 level;
367 /* number of nodes at this level, must be 1 for a root */
368 int node_count;
369 u64 bytenr;
370 u64 gen;
371 struct cache_extent cache_extent;
375 * Error bit for low memory mode check.
377 * Currently no caller cares about it yet. Just internal use for error
378 * classification.
380 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH (1 << 8)
391 static void *print_status_check(void *p)
393 struct task_ctx *priv = p;
394 const char work_indicator[] = { '.', 'o', 'O', 'o' };
395 uint32_t count = 0;
396 static char *task_position_string[] = {
397 "checking extents",
398 "checking free space cache",
399 "checking fs roots",
402 task_period_start(priv->info, 1000 /* 1s */);
404 if (priv->tp == TASK_NOTHING)
405 return NULL;
407 while (1) {
408 printf("%s [%c]\r", task_position_string[priv->tp],
409 work_indicator[count % 4]);
410 count++;
411 fflush(stdout);
412 task_period_wait(priv->info);
414 return NULL;
417 static int print_status_return(void *p)
419 printf("\n");
420 fflush(stdout);
422 return 0;
425 static enum btrfs_check_mode parse_check_mode(const char *str)
427 if (strcmp(str, "lowmem") == 0)
428 return CHECK_MODE_LOWMEM;
429 if (strcmp(str, "orig") == 0)
430 return CHECK_MODE_ORIGINAL;
431 if (strcmp(str, "original") == 0)
432 return CHECK_MODE_ORIGINAL;
434 return CHECK_MODE_UNKNOWN;
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
440 struct file_extent_hole *hole;
442 if (RB_EMPTY_ROOT(holes))
443 return (u64)-1;
445 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
446 return hole->start;
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
451 struct file_extent_hole *hole1;
452 struct file_extent_hole *hole2;
454 hole1 = rb_entry(node1, struct file_extent_hole, node);
455 hole2 = rb_entry(node2, struct file_extent_hole, node);
457 if (hole1->start > hole2->start)
458 return -1;
459 if (hole1->start < hole2->start)
460 return 1;
461 /* Now hole1->start == hole2->start */
462 if (hole1->len >= hole2->len)
464 * Hole 1 will be merge center
465 * Same hole will be merged later
467 return -1;
468 /* Hole 2 will be merge center */
469 return 1;
473 * Add a hole to the record
475 * This will do hole merge for copy_file_extent_holes(),
476 * which will ensure there won't be continuous holes.
478 static int add_file_extent_hole(struct rb_root *holes,
479 u64 start, u64 len)
481 struct file_extent_hole *hole;
482 struct file_extent_hole *prev = NULL;
483 struct file_extent_hole *next = NULL;
485 hole = malloc(sizeof(*hole));
486 if (!hole)
487 return -ENOMEM;
488 hole->start = start;
489 hole->len = len;
490 /* Since compare will not return 0, no -EEXIST will happen */
491 rb_insert(holes, &hole->node, compare_hole);
493 /* simple merge with previous hole */
494 if (rb_prev(&hole->node))
495 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496 node);
497 if (prev && prev->start + prev->len >= hole->start) {
498 hole->len = hole->start + hole->len - prev->start;
499 hole->start = prev->start;
500 rb_erase(&prev->node, holes);
501 free(prev);
502 prev = NULL;
505 /* iterate merge with next holes */
506 while (1) {
507 if (!rb_next(&hole->node))
508 break;
509 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510 node);
511 if (hole->start + hole->len >= next->start) {
512 if (hole->start + hole->len <= next->start + next->len)
513 hole->len = next->start + next->len -
514 hole->start;
515 rb_erase(&next->node, holes);
516 free(next);
517 next = NULL;
518 } else
519 break;
521 return 0;
524 static int compare_hole_range(struct rb_node *node, void *data)
526 struct file_extent_hole *hole;
527 u64 start;
529 hole = (struct file_extent_hole *)data;
530 start = hole->start;
532 hole = rb_entry(node, struct file_extent_hole, node);
533 if (start < hole->start)
534 return -1;
535 if (start >= hole->start && start < hole->start + hole->len)
536 return 0;
537 return 1;
541 * Delete a hole in the record
543 * This will do the hole split and is much restrict than add.
545 static int del_file_extent_hole(struct rb_root *holes,
546 u64 start, u64 len)
548 struct file_extent_hole *hole;
549 struct file_extent_hole tmp;
550 u64 prev_start = 0;
551 u64 prev_len = 0;
552 u64 next_start = 0;
553 u64 next_len = 0;
554 struct rb_node *node;
555 int have_prev = 0;
556 int have_next = 0;
557 int ret = 0;
559 tmp.start = start;
560 tmp.len = len;
561 node = rb_search(holes, &tmp, compare_hole_range, NULL);
562 if (!node)
563 return -EEXIST;
564 hole = rb_entry(node, struct file_extent_hole, node);
565 if (start + len > hole->start + hole->len)
566 return -EEXIST;
569 * Now there will be no overlap, delete the hole and re-add the
570 * split(s) if they exists.
572 if (start > hole->start) {
573 prev_start = hole->start;
574 prev_len = start - hole->start;
575 have_prev = 1;
577 if (hole->start + hole->len > start + len) {
578 next_start = start + len;
579 next_len = hole->start + hole->len - start - len;
580 have_next = 1;
582 rb_erase(node, holes);
583 free(hole);
584 if (have_prev) {
585 ret = add_file_extent_hole(holes, prev_start, prev_len);
586 if (ret < 0)
587 return ret;
589 if (have_next) {
590 ret = add_file_extent_hole(holes, next_start, next_len);
591 if (ret < 0)
592 return ret;
594 return 0;
597 static int copy_file_extent_holes(struct rb_root *dst,
598 struct rb_root *src)
600 struct file_extent_hole *hole;
601 struct rb_node *node;
602 int ret = 0;
604 node = rb_first(src);
605 while (node) {
606 hole = rb_entry(node, struct file_extent_hole, node);
607 ret = add_file_extent_hole(dst, hole->start, hole->len);
608 if (ret)
609 break;
610 node = rb_next(node);
612 return ret;
615 static void free_file_extent_holes(struct rb_root *holes)
617 struct rb_node *node;
618 struct file_extent_hole *hole;
620 node = rb_first(holes);
621 while (node) {
622 hole = rb_entry(node, struct file_extent_hole, node);
623 rb_erase(node, holes);
624 free(hole);
625 node = rb_first(holes);
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632 struct btrfs_root *root)
634 if (root->last_trans != trans->transid) {
635 root->track_dirty = 1;
636 root->last_trans = trans->transid;
637 root->commit_root = root->node;
638 extent_buffer_get(root->node);
642 static u8 imode_to_type(u32 imode)
644 #define S_SHIFT 12
645 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
647 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
648 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
649 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
650 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
651 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
652 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
655 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
656 #undef S_SHIFT
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
661 struct device_record *rec1;
662 struct device_record *rec2;
664 rec1 = rb_entry(node1, struct device_record, node);
665 rec2 = rb_entry(node2, struct device_record, node);
666 if (rec1->devid > rec2->devid)
667 return -1;
668 else if (rec1->devid < rec2->devid)
669 return 1;
670 else
671 return 0;
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
676 struct inode_record *rec;
677 struct inode_backref *backref;
678 struct inode_backref *orig;
679 struct inode_backref *tmp;
680 struct orphan_data_extent *src_orphan;
681 struct orphan_data_extent *dst_orphan;
682 struct rb_node *rb;
683 size_t size;
684 int ret;
686 rec = malloc(sizeof(*rec));
687 if (!rec)
688 return ERR_PTR(-ENOMEM);
689 memcpy(rec, orig_rec, sizeof(*rec));
690 rec->refs = 1;
691 INIT_LIST_HEAD(&rec->backrefs);
692 INIT_LIST_HEAD(&rec->orphan_extents);
693 rec->holes = RB_ROOT;
695 list_for_each_entry(orig, &orig_rec->backrefs, list) {
696 size = sizeof(*orig) + orig->namelen + 1;
697 backref = malloc(size);
698 if (!backref) {
699 ret = -ENOMEM;
700 goto cleanup;
702 memcpy(backref, orig, size);
703 list_add_tail(&backref->list, &rec->backrefs);
705 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706 dst_orphan = malloc(sizeof(*dst_orphan));
707 if (!dst_orphan) {
708 ret = -ENOMEM;
709 goto cleanup;
711 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
714 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
715 if (ret < 0)
716 goto cleanup_rb;
718 return rec;
720 cleanup_rb:
721 rb = rb_first(&rec->holes);
722 while (rb) {
723 struct file_extent_hole *hole;
725 hole = rb_entry(rb, struct file_extent_hole, node);
726 rb = rb_next(rb);
727 free(hole);
730 cleanup:
731 if (!list_empty(&rec->backrefs))
732 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733 list_del(&orig->list);
734 free(orig);
737 if (!list_empty(&rec->orphan_extents))
738 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739 list_del(&orig->list);
740 free(orig);
743 free(rec);
745 return ERR_PTR(ret);
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
749 u64 objectid)
751 struct orphan_data_extent *orphan;
753 if (list_empty(orphan_extents))
754 return;
755 printf("The following data extent is lost in tree %llu:\n",
756 objectid);
757 list_for_each_entry(orphan, orphan_extents, list) {
758 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759 orphan->objectid, orphan->offset, orphan->disk_bytenr,
760 orphan->disk_len);
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
766 u64 root_objectid = root->root_key.objectid;
767 int errors = rec->errors;
769 if (!errors)
770 return;
771 /* reloc root errors, we print its corresponding fs root objectid*/
772 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773 root_objectid = root->root_key.offset;
774 fprintf(stderr, "reloc");
776 fprintf(stderr, "root %llu inode %llu errors %x",
777 (unsigned long long) root_objectid,
778 (unsigned long long) rec->ino, rec->errors);
780 if (errors & I_ERR_NO_INODE_ITEM)
781 fprintf(stderr, ", no inode item");
782 if (errors & I_ERR_NO_ORPHAN_ITEM)
783 fprintf(stderr, ", no orphan item");
784 if (errors & I_ERR_DUP_INODE_ITEM)
785 fprintf(stderr, ", dup inode item");
786 if (errors & I_ERR_DUP_DIR_INDEX)
787 fprintf(stderr, ", dup dir index");
788 if (errors & I_ERR_ODD_DIR_ITEM)
789 fprintf(stderr, ", odd dir item");
790 if (errors & I_ERR_ODD_FILE_EXTENT)
791 fprintf(stderr, ", odd file extent");
792 if (errors & I_ERR_BAD_FILE_EXTENT)
793 fprintf(stderr, ", bad file extent");
794 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795 fprintf(stderr, ", file extent overlap");
796 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797 fprintf(stderr, ", file extent discount");
798 if (errors & I_ERR_DIR_ISIZE_WRONG)
799 fprintf(stderr, ", dir isize wrong");
800 if (errors & I_ERR_FILE_NBYTES_WRONG)
801 fprintf(stderr, ", nbytes wrong");
802 if (errors & I_ERR_ODD_CSUM_ITEM)
803 fprintf(stderr, ", odd csum item");
804 if (errors & I_ERR_SOME_CSUM_MISSING)
805 fprintf(stderr, ", some csum missing");
806 if (errors & I_ERR_LINK_COUNT_WRONG)
807 fprintf(stderr, ", link count wrong");
808 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809 fprintf(stderr, ", orphan file extent");
810 fprintf(stderr, "\n");
811 /* Print the orphan extents if needed */
812 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
815 /* Print the holes if needed */
816 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817 struct file_extent_hole *hole;
818 struct rb_node *node;
819 int found = 0;
821 node = rb_first(&rec->holes);
822 fprintf(stderr, "Found file extent holes:\n");
823 while (node) {
824 found = 1;
825 hole = rb_entry(node, struct file_extent_hole, node);
826 fprintf(stderr, "\tstart: %llu, len: %llu\n",
827 hole->start, hole->len);
828 node = rb_next(node);
830 if (!found)
831 fprintf(stderr, "\tstart: 0, len: %llu\n",
832 round_up(rec->isize,
833 root->fs_info->sectorsize));
837 static void print_ref_error(int errors)
839 if (errors & REF_ERR_NO_DIR_ITEM)
840 fprintf(stderr, ", no dir item");
841 if (errors & REF_ERR_NO_DIR_INDEX)
842 fprintf(stderr, ", no dir index");
843 if (errors & REF_ERR_NO_INODE_REF)
844 fprintf(stderr, ", no inode ref");
845 if (errors & REF_ERR_DUP_DIR_ITEM)
846 fprintf(stderr, ", dup dir item");
847 if (errors & REF_ERR_DUP_DIR_INDEX)
848 fprintf(stderr, ", dup dir index");
849 if (errors & REF_ERR_DUP_INODE_REF)
850 fprintf(stderr, ", dup inode ref");
851 if (errors & REF_ERR_INDEX_UNMATCH)
852 fprintf(stderr, ", index mismatch");
853 if (errors & REF_ERR_FILETYPE_UNMATCH)
854 fprintf(stderr, ", filetype mismatch");
855 if (errors & REF_ERR_NAME_TOO_LONG)
856 fprintf(stderr, ", name too long");
857 if (errors & REF_ERR_NO_ROOT_REF)
858 fprintf(stderr, ", no root ref");
859 if (errors & REF_ERR_NO_ROOT_BACKREF)
860 fprintf(stderr, ", no root backref");
861 if (errors & REF_ERR_DUP_ROOT_REF)
862 fprintf(stderr, ", dup root ref");
863 if (errors & REF_ERR_DUP_ROOT_BACKREF)
864 fprintf(stderr, ", dup root backref");
865 fprintf(stderr, "\n");
868 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
869 u64 ino, int mod)
871 struct ptr_node *node;
872 struct cache_extent *cache;
873 struct inode_record *rec = NULL;
874 int ret;
876 cache = lookup_cache_extent(inode_cache, ino, 1);
877 if (cache) {
878 node = container_of(cache, struct ptr_node, cache);
879 rec = node->data;
880 if (mod && rec->refs > 1) {
881 node->data = clone_inode_rec(rec);
882 if (IS_ERR(node->data))
883 return node->data;
884 rec->refs--;
885 rec = node->data;
887 } else if (mod) {
888 rec = calloc(1, sizeof(*rec));
889 if (!rec)
890 return ERR_PTR(-ENOMEM);
891 rec->ino = ino;
892 rec->extent_start = (u64)-1;
893 rec->refs = 1;
894 INIT_LIST_HEAD(&rec->backrefs);
895 INIT_LIST_HEAD(&rec->orphan_extents);
896 rec->holes = RB_ROOT;
898 node = malloc(sizeof(*node));
899 if (!node) {
900 free(rec);
901 return ERR_PTR(-ENOMEM);
903 node->cache.start = ino;
904 node->cache.size = 1;
905 node->data = rec;
907 if (ino == BTRFS_FREE_INO_OBJECTID)
908 rec->found_link = 1;
910 ret = insert_cache_extent(inode_cache, &node->cache);
911 if (ret)
912 return ERR_PTR(-EEXIST);
914 return rec;
917 static void free_orphan_data_extents(struct list_head *orphan_extents)
919 struct orphan_data_extent *orphan;
921 while (!list_empty(orphan_extents)) {
922 orphan = list_entry(orphan_extents->next,
923 struct orphan_data_extent, list);
924 list_del(&orphan->list);
925 free(orphan);
929 static void free_inode_rec(struct inode_record *rec)
931 struct inode_backref *backref;
933 if (--rec->refs > 0)
934 return;
936 while (!list_empty(&rec->backrefs)) {
937 backref = to_inode_backref(rec->backrefs.next);
938 list_del(&backref->list);
939 free(backref);
941 free_orphan_data_extents(&rec->orphan_extents);
942 free_file_extent_holes(&rec->holes);
943 free(rec);
946 static int can_free_inode_rec(struct inode_record *rec)
948 if (!rec->errors && rec->checked && rec->found_inode_item &&
949 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
950 return 1;
951 return 0;
954 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
955 struct inode_record *rec)
957 struct cache_extent *cache;
958 struct inode_backref *tmp, *backref;
959 struct ptr_node *node;
960 u8 filetype;
962 if (!rec->found_inode_item)
963 return;
965 filetype = imode_to_type(rec->imode);
966 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
967 if (backref->found_dir_item && backref->found_dir_index) {
968 if (backref->filetype != filetype)
969 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
970 if (!backref->errors && backref->found_inode_ref &&
971 rec->nlink == rec->found_link) {
972 list_del(&backref->list);
973 free(backref);
978 if (!rec->checked || rec->merging)
979 return;
981 if (S_ISDIR(rec->imode)) {
982 if (rec->found_size != rec->isize)
983 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
984 if (rec->found_file_extent)
985 rec->errors |= I_ERR_ODD_FILE_EXTENT;
986 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
987 if (rec->found_dir_item)
988 rec->errors |= I_ERR_ODD_DIR_ITEM;
989 if (rec->found_size != rec->nbytes)
990 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
991 if (rec->nlink > 0 && !no_holes &&
992 (rec->extent_end < rec->isize ||
993 first_extent_gap(&rec->holes) < rec->isize))
994 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
997 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
998 if (rec->found_csum_item && rec->nodatasum)
999 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1000 if (rec->some_csum_missing && !rec->nodatasum)
1001 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1004 BUG_ON(rec->refs != 1);
1005 if (can_free_inode_rec(rec)) {
1006 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1007 node = container_of(cache, struct ptr_node, cache);
1008 BUG_ON(node->data != rec);
1009 remove_cache_extent(inode_cache, &node->cache);
1010 free(node);
1011 free_inode_rec(rec);
1015 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1017 struct btrfs_path path;
1018 struct btrfs_key key;
1019 int ret;
1021 key.objectid = BTRFS_ORPHAN_OBJECTID;
1022 key.type = BTRFS_ORPHAN_ITEM_KEY;
1023 key.offset = ino;
1025 btrfs_init_path(&path);
1026 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1027 btrfs_release_path(&path);
1028 if (ret > 0)
1029 ret = -ENOENT;
1030 return ret;
1033 static int process_inode_item(struct extent_buffer *eb,
1034 int slot, struct btrfs_key *key,
1035 struct shared_node *active_node)
1037 struct inode_record *rec;
1038 struct btrfs_inode_item *item;
1040 rec = active_node->current;
1041 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1042 if (rec->found_inode_item) {
1043 rec->errors |= I_ERR_DUP_INODE_ITEM;
1044 return 1;
1046 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1047 rec->nlink = btrfs_inode_nlink(eb, item);
1048 rec->isize = btrfs_inode_size(eb, item);
1049 rec->nbytes = btrfs_inode_nbytes(eb, item);
1050 rec->imode = btrfs_inode_mode(eb, item);
1051 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1052 rec->nodatasum = 1;
1053 rec->found_inode_item = 1;
1054 if (rec->nlink == 0)
1055 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1056 maybe_free_inode_rec(&active_node->inode_cache, rec);
1057 return 0;
1060 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1061 const char *name,
1062 int namelen, u64 dir)
1064 struct inode_backref *backref;
1066 list_for_each_entry(backref, &rec->backrefs, list) {
1067 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1068 break;
1069 if (backref->dir != dir || backref->namelen != namelen)
1070 continue;
1071 if (memcmp(name, backref->name, namelen))
1072 continue;
1073 return backref;
1076 backref = malloc(sizeof(*backref) + namelen + 1);
1077 if (!backref)
1078 return NULL;
1079 memset(backref, 0, sizeof(*backref));
1080 backref->dir = dir;
1081 backref->namelen = namelen;
1082 memcpy(backref->name, name, namelen);
1083 backref->name[namelen] = '\0';
1084 list_add_tail(&backref->list, &rec->backrefs);
1085 return backref;
1088 static int add_inode_backref(struct cache_tree *inode_cache,
1089 u64 ino, u64 dir, u64 index,
1090 const char *name, int namelen,
1091 u8 filetype, u8 itemtype, int errors)
1093 struct inode_record *rec;
1094 struct inode_backref *backref;
1096 rec = get_inode_rec(inode_cache, ino, 1);
1097 BUG_ON(IS_ERR(rec));
1098 backref = get_inode_backref(rec, name, namelen, dir);
1099 BUG_ON(!backref);
1100 if (errors)
1101 backref->errors |= errors;
1102 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1103 if (backref->found_dir_index)
1104 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1105 if (backref->found_inode_ref && backref->index != index)
1106 backref->errors |= REF_ERR_INDEX_UNMATCH;
1107 if (backref->found_dir_item && backref->filetype != filetype)
1108 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1110 backref->index = index;
1111 backref->filetype = filetype;
1112 backref->found_dir_index = 1;
1113 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1114 rec->found_link++;
1115 if (backref->found_dir_item)
1116 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1117 if (backref->found_dir_index && backref->filetype != filetype)
1118 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1120 backref->filetype = filetype;
1121 backref->found_dir_item = 1;
1122 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1123 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1124 if (backref->found_inode_ref)
1125 backref->errors |= REF_ERR_DUP_INODE_REF;
1126 if (backref->found_dir_index && backref->index != index)
1127 backref->errors |= REF_ERR_INDEX_UNMATCH;
1128 else
1129 backref->index = index;
1131 backref->ref_type = itemtype;
1132 backref->found_inode_ref = 1;
1133 } else {
1134 BUG_ON(1);
1137 maybe_free_inode_rec(inode_cache, rec);
1138 return 0;
1141 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1142 struct cache_tree *dst_cache)
1144 struct inode_backref *backref;
1145 u32 dir_count = 0;
1146 int ret = 0;
1148 dst->merging = 1;
1149 list_for_each_entry(backref, &src->backrefs, list) {
1150 if (backref->found_dir_index) {
1151 add_inode_backref(dst_cache, dst->ino, backref->dir,
1152 backref->index, backref->name,
1153 backref->namelen, backref->filetype,
1154 BTRFS_DIR_INDEX_KEY, backref->errors);
1156 if (backref->found_dir_item) {
1157 dir_count++;
1158 add_inode_backref(dst_cache, dst->ino,
1159 backref->dir, 0, backref->name,
1160 backref->namelen, backref->filetype,
1161 BTRFS_DIR_ITEM_KEY, backref->errors);
1163 if (backref->found_inode_ref) {
1164 add_inode_backref(dst_cache, dst->ino,
1165 backref->dir, backref->index,
1166 backref->name, backref->namelen, 0,
1167 backref->ref_type, backref->errors);
1171 if (src->found_dir_item)
1172 dst->found_dir_item = 1;
1173 if (src->found_file_extent)
1174 dst->found_file_extent = 1;
1175 if (src->found_csum_item)
1176 dst->found_csum_item = 1;
1177 if (src->some_csum_missing)
1178 dst->some_csum_missing = 1;
1179 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1180 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1181 if (ret < 0)
1182 return ret;
1185 BUG_ON(src->found_link < dir_count);
1186 dst->found_link += src->found_link - dir_count;
1187 dst->found_size += src->found_size;
1188 if (src->extent_start != (u64)-1) {
1189 if (dst->extent_start == (u64)-1) {
1190 dst->extent_start = src->extent_start;
1191 dst->extent_end = src->extent_end;
1192 } else {
1193 if (dst->extent_end > src->extent_start)
1194 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1195 else if (dst->extent_end < src->extent_start) {
1196 ret = add_file_extent_hole(&dst->holes,
1197 dst->extent_end,
1198 src->extent_start - dst->extent_end);
1200 if (dst->extent_end < src->extent_end)
1201 dst->extent_end = src->extent_end;
1205 dst->errors |= src->errors;
1206 if (src->found_inode_item) {
1207 if (!dst->found_inode_item) {
1208 dst->nlink = src->nlink;
1209 dst->isize = src->isize;
1210 dst->nbytes = src->nbytes;
1211 dst->imode = src->imode;
1212 dst->nodatasum = src->nodatasum;
1213 dst->found_inode_item = 1;
1214 } else {
1215 dst->errors |= I_ERR_DUP_INODE_ITEM;
1218 dst->merging = 0;
1220 return 0;
1223 static int splice_shared_node(struct shared_node *src_node,
1224 struct shared_node *dst_node)
1226 struct cache_extent *cache;
1227 struct ptr_node *node, *ins;
1228 struct cache_tree *src, *dst;
1229 struct inode_record *rec, *conflict;
1230 u64 current_ino = 0;
1231 int splice = 0;
1232 int ret;
1234 if (--src_node->refs == 0)
1235 splice = 1;
1236 if (src_node->current)
1237 current_ino = src_node->current->ino;
1239 src = &src_node->root_cache;
1240 dst = &dst_node->root_cache;
1241 again:
1242 cache = search_cache_extent(src, 0);
1243 while (cache) {
1244 node = container_of(cache, struct ptr_node, cache);
1245 rec = node->data;
1246 cache = next_cache_extent(cache);
1248 if (splice) {
1249 remove_cache_extent(src, &node->cache);
1250 ins = node;
1251 } else {
1252 ins = malloc(sizeof(*ins));
1253 BUG_ON(!ins);
1254 ins->cache.start = node->cache.start;
1255 ins->cache.size = node->cache.size;
1256 ins->data = rec;
1257 rec->refs++;
1259 ret = insert_cache_extent(dst, &ins->cache);
1260 if (ret == -EEXIST) {
1261 conflict = get_inode_rec(dst, rec->ino, 1);
1262 BUG_ON(IS_ERR(conflict));
1263 merge_inode_recs(rec, conflict, dst);
1264 if (rec->checked) {
1265 conflict->checked = 1;
1266 if (dst_node->current == conflict)
1267 dst_node->current = NULL;
1269 maybe_free_inode_rec(dst, conflict);
1270 free_inode_rec(rec);
1271 free(ins);
1272 } else {
1273 BUG_ON(ret);
1277 if (src == &src_node->root_cache) {
1278 src = &src_node->inode_cache;
1279 dst = &dst_node->inode_cache;
1280 goto again;
1283 if (current_ino > 0 && (!dst_node->current ||
1284 current_ino > dst_node->current->ino)) {
1285 if (dst_node->current) {
1286 dst_node->current->checked = 1;
1287 maybe_free_inode_rec(dst, dst_node->current);
1289 dst_node->current = get_inode_rec(dst, current_ino, 1);
1290 BUG_ON(IS_ERR(dst_node->current));
1292 return 0;
1295 static void free_inode_ptr(struct cache_extent *cache)
1297 struct ptr_node *node;
1298 struct inode_record *rec;
1300 node = container_of(cache, struct ptr_node, cache);
1301 rec = node->data;
1302 free_inode_rec(rec);
1303 free(node);
1306 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1308 static struct shared_node *find_shared_node(struct cache_tree *shared,
1309 u64 bytenr)
1311 struct cache_extent *cache;
1312 struct shared_node *node;
1314 cache = lookup_cache_extent(shared, bytenr, 1);
1315 if (cache) {
1316 node = container_of(cache, struct shared_node, cache);
1317 return node;
1319 return NULL;
1322 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1324 int ret;
1325 struct shared_node *node;
1327 node = calloc(1, sizeof(*node));
1328 if (!node)
1329 return -ENOMEM;
1330 node->cache.start = bytenr;
1331 node->cache.size = 1;
1332 cache_tree_init(&node->root_cache);
1333 cache_tree_init(&node->inode_cache);
1334 node->refs = refs;
1336 ret = insert_cache_extent(shared, &node->cache);
1338 return ret;
1341 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1342 struct walk_control *wc, int level)
1344 struct shared_node *node;
1345 struct shared_node *dest;
1346 int ret;
1348 if (level == wc->active_node)
1349 return 0;
1351 BUG_ON(wc->active_node <= level);
1352 node = find_shared_node(&wc->shared, bytenr);
1353 if (!node) {
1354 ret = add_shared_node(&wc->shared, bytenr, refs);
1355 BUG_ON(ret);
1356 node = find_shared_node(&wc->shared, bytenr);
1357 wc->nodes[level] = node;
1358 wc->active_node = level;
1359 return 0;
1362 if (wc->root_level == wc->active_node &&
1363 btrfs_root_refs(&root->root_item) == 0) {
1364 if (--node->refs == 0) {
1365 free_inode_recs_tree(&node->root_cache);
1366 free_inode_recs_tree(&node->inode_cache);
1367 remove_cache_extent(&wc->shared, &node->cache);
1368 free(node);
1370 return 1;
1373 dest = wc->nodes[wc->active_node];
1374 splice_shared_node(node, dest);
1375 if (node->refs == 0) {
1376 remove_cache_extent(&wc->shared, &node->cache);
1377 free(node);
1379 return 1;
1382 static int leave_shared_node(struct btrfs_root *root,
1383 struct walk_control *wc, int level)
1385 struct shared_node *node;
1386 struct shared_node *dest;
1387 int i;
1389 if (level == wc->root_level)
1390 return 0;
1392 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1393 if (wc->nodes[i])
1394 break;
1396 BUG_ON(i >= BTRFS_MAX_LEVEL);
1398 node = wc->nodes[wc->active_node];
1399 wc->nodes[wc->active_node] = NULL;
1400 wc->active_node = i;
1402 dest = wc->nodes[wc->active_node];
1403 if (wc->active_node < wc->root_level ||
1404 btrfs_root_refs(&root->root_item) > 0) {
1405 BUG_ON(node->refs <= 1);
1406 splice_shared_node(node, dest);
1407 } else {
1408 BUG_ON(node->refs < 2);
1409 node->refs--;
1411 return 0;
1415 * Returns:
1416 * < 0 - on error
1417 * 1 - if the root with id child_root_id is a child of root parent_root_id
1418 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1419 * has other root(s) as parent(s)
1420 * 2 - if the root child_root_id doesn't have any parent roots
1422 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1423 u64 child_root_id)
1425 struct btrfs_path path;
1426 struct btrfs_key key;
1427 struct extent_buffer *leaf;
1428 int has_parent = 0;
1429 int ret;
1431 btrfs_init_path(&path);
1433 key.objectid = parent_root_id;
1434 key.type = BTRFS_ROOT_REF_KEY;
1435 key.offset = child_root_id;
1436 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1437 0, 0);
1438 if (ret < 0)
1439 return ret;
1440 btrfs_release_path(&path);
1441 if (!ret)
1442 return 1;
1444 key.objectid = child_root_id;
1445 key.type = BTRFS_ROOT_BACKREF_KEY;
1446 key.offset = 0;
1447 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1448 0, 0);
1449 if (ret < 0)
1450 goto out;
1452 while (1) {
1453 leaf = path.nodes[0];
1454 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1455 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1456 if (ret)
1457 break;
1458 leaf = path.nodes[0];
1461 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1462 if (key.objectid != child_root_id ||
1463 key.type != BTRFS_ROOT_BACKREF_KEY)
1464 break;
1466 has_parent = 1;
1468 if (key.offset == parent_root_id) {
1469 btrfs_release_path(&path);
1470 return 1;
1473 path.slots[0]++;
1475 out:
1476 btrfs_release_path(&path);
1477 if (ret < 0)
1478 return ret;
1479 return has_parent ? 0 : 2;
1482 static int process_dir_item(struct extent_buffer *eb,
1483 int slot, struct btrfs_key *key,
1484 struct shared_node *active_node)
1486 u32 total;
1487 u32 cur = 0;
1488 u32 len;
1489 u32 name_len;
1490 u32 data_len;
1491 int error;
1492 int nritems = 0;
1493 u8 filetype;
1494 struct btrfs_dir_item *di;
1495 struct inode_record *rec;
1496 struct cache_tree *root_cache;
1497 struct cache_tree *inode_cache;
1498 struct btrfs_key location;
1499 char namebuf[BTRFS_NAME_LEN];
1501 root_cache = &active_node->root_cache;
1502 inode_cache = &active_node->inode_cache;
1503 rec = active_node->current;
1504 rec->found_dir_item = 1;
1506 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1507 total = btrfs_item_size_nr(eb, slot);
1508 while (cur < total) {
1509 nritems++;
1510 btrfs_dir_item_key_to_cpu(eb, di, &location);
1511 name_len = btrfs_dir_name_len(eb, di);
1512 data_len = btrfs_dir_data_len(eb, di);
1513 filetype = btrfs_dir_type(eb, di);
1515 rec->found_size += name_len;
1516 if (cur + sizeof(*di) + name_len > total ||
1517 name_len > BTRFS_NAME_LEN) {
1518 error = REF_ERR_NAME_TOO_LONG;
1520 if (cur + sizeof(*di) > total)
1521 break;
1522 len = min_t(u32, total - cur - sizeof(*di),
1523 BTRFS_NAME_LEN);
1524 } else {
1525 len = name_len;
1526 error = 0;
1529 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1531 if (key->type == BTRFS_DIR_ITEM_KEY &&
1532 key->offset != btrfs_name_hash(namebuf, len)) {
1533 rec->errors |= I_ERR_ODD_DIR_ITEM;
1534 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1535 key->objectid, key->offset, namebuf, len, filetype,
1536 key->offset, btrfs_name_hash(namebuf, len));
1539 if (location.type == BTRFS_INODE_ITEM_KEY) {
1540 add_inode_backref(inode_cache, location.objectid,
1541 key->objectid, key->offset, namebuf,
1542 len, filetype, key->type, error);
1543 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1544 add_inode_backref(root_cache, location.objectid,
1545 key->objectid, key->offset,
1546 namebuf, len, filetype,
1547 key->type, error);
1548 } else {
1549 fprintf(stderr, "invalid location in dir item %u\n",
1550 location.type);
1551 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1552 key->objectid, key->offset, namebuf,
1553 len, filetype, key->type, error);
1556 len = sizeof(*di) + name_len + data_len;
1557 di = (struct btrfs_dir_item *)((char *)di + len);
1558 cur += len;
1560 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1561 rec->errors |= I_ERR_DUP_DIR_INDEX;
1563 return 0;
1566 static int process_inode_ref(struct extent_buffer *eb,
1567 int slot, struct btrfs_key *key,
1568 struct shared_node *active_node)
1570 u32 total;
1571 u32 cur = 0;
1572 u32 len;
1573 u32 name_len;
1574 u64 index;
1575 int error;
1576 struct cache_tree *inode_cache;
1577 struct btrfs_inode_ref *ref;
1578 char namebuf[BTRFS_NAME_LEN];
1580 inode_cache = &active_node->inode_cache;
1582 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1583 total = btrfs_item_size_nr(eb, slot);
1584 while (cur < total) {
1585 name_len = btrfs_inode_ref_name_len(eb, ref);
1586 index = btrfs_inode_ref_index(eb, ref);
1588 /* inode_ref + namelen should not cross item boundary */
1589 if (cur + sizeof(*ref) + name_len > total ||
1590 name_len > BTRFS_NAME_LEN) {
1591 if (total < cur + sizeof(*ref))
1592 break;
1594 /* Still try to read out the remaining part */
1595 len = min_t(u32, total - cur - sizeof(*ref),
1596 BTRFS_NAME_LEN);
1597 error = REF_ERR_NAME_TOO_LONG;
1598 } else {
1599 len = name_len;
1600 error = 0;
1603 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1604 add_inode_backref(inode_cache, key->objectid, key->offset,
1605 index, namebuf, len, 0, key->type, error);
1607 len = sizeof(*ref) + name_len;
1608 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1609 cur += len;
1611 return 0;
1614 static int process_inode_extref(struct extent_buffer *eb,
1615 int slot, struct btrfs_key *key,
1616 struct shared_node *active_node)
1618 u32 total;
1619 u32 cur = 0;
1620 u32 len;
1621 u32 name_len;
1622 u64 index;
1623 u64 parent;
1624 int error;
1625 struct cache_tree *inode_cache;
1626 struct btrfs_inode_extref *extref;
1627 char namebuf[BTRFS_NAME_LEN];
1629 inode_cache = &active_node->inode_cache;
1631 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1632 total = btrfs_item_size_nr(eb, slot);
1633 while (cur < total) {
1634 name_len = btrfs_inode_extref_name_len(eb, extref);
1635 index = btrfs_inode_extref_index(eb, extref);
1636 parent = btrfs_inode_extref_parent(eb, extref);
1637 if (name_len <= BTRFS_NAME_LEN) {
1638 len = name_len;
1639 error = 0;
1640 } else {
1641 len = BTRFS_NAME_LEN;
1642 error = REF_ERR_NAME_TOO_LONG;
1644 read_extent_buffer(eb, namebuf,
1645 (unsigned long)(extref + 1), len);
1646 add_inode_backref(inode_cache, key->objectid, parent,
1647 index, namebuf, len, 0, key->type, error);
1649 len = sizeof(*extref) + name_len;
1650 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1651 cur += len;
1653 return 0;
1657 static int count_csum_range(struct btrfs_root *root, u64 start,
1658 u64 len, u64 *found)
1660 struct btrfs_key key;
1661 struct btrfs_path path;
1662 struct extent_buffer *leaf;
1663 int ret;
1664 size_t size;
1665 *found = 0;
1666 u64 csum_end;
1667 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1669 btrfs_init_path(&path);
1671 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1672 key.offset = start;
1673 key.type = BTRFS_EXTENT_CSUM_KEY;
1675 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1676 &key, &path, 0, 0);
1677 if (ret < 0)
1678 goto out;
1679 if (ret > 0 && path.slots[0] > 0) {
1680 leaf = path.nodes[0];
1681 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1682 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1683 key.type == BTRFS_EXTENT_CSUM_KEY)
1684 path.slots[0]--;
1687 while (len > 0) {
1688 leaf = path.nodes[0];
1689 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1690 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1691 if (ret > 0)
1692 break;
1693 else if (ret < 0)
1694 goto out;
1695 leaf = path.nodes[0];
1698 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1699 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1700 key.type != BTRFS_EXTENT_CSUM_KEY)
1701 break;
1703 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1704 if (key.offset >= start + len)
1705 break;
1707 if (key.offset > start)
1708 start = key.offset;
1710 size = btrfs_item_size_nr(leaf, path.slots[0]);
1711 csum_end = key.offset + (size / csum_size) *
1712 root->fs_info->sectorsize;
1713 if (csum_end > start) {
1714 size = min(csum_end - start, len);
1715 len -= size;
1716 start += size;
1717 *found += size;
1720 path.slots[0]++;
1722 out:
1723 btrfs_release_path(&path);
1724 if (ret < 0)
1725 return ret;
1726 return 0;
1729 static int process_file_extent(struct btrfs_root *root,
1730 struct extent_buffer *eb,
1731 int slot, struct btrfs_key *key,
1732 struct shared_node *active_node)
1734 struct inode_record *rec;
1735 struct btrfs_file_extent_item *fi;
1736 u64 num_bytes = 0;
1737 u64 disk_bytenr = 0;
1738 u64 extent_offset = 0;
1739 u64 mask = root->fs_info->sectorsize - 1;
1740 int extent_type;
1741 int ret;
1743 rec = active_node->current;
1744 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1745 rec->found_file_extent = 1;
1747 if (rec->extent_start == (u64)-1) {
1748 rec->extent_start = key->offset;
1749 rec->extent_end = key->offset;
1752 if (rec->extent_end > key->offset)
1753 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1754 else if (rec->extent_end < key->offset) {
1755 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1756 key->offset - rec->extent_end);
1757 if (ret < 0)
1758 return ret;
1761 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1762 extent_type = btrfs_file_extent_type(eb, fi);
1764 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1765 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1766 if (num_bytes == 0)
1767 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1768 rec->found_size += num_bytes;
1769 num_bytes = (num_bytes + mask) & ~mask;
1770 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1771 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1772 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1773 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1774 extent_offset = btrfs_file_extent_offset(eb, fi);
1775 if (num_bytes == 0 || (num_bytes & mask))
1776 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1777 if (num_bytes + extent_offset >
1778 btrfs_file_extent_ram_bytes(eb, fi))
1779 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1780 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1781 (btrfs_file_extent_compression(eb, fi) ||
1782 btrfs_file_extent_encryption(eb, fi) ||
1783 btrfs_file_extent_other_encoding(eb, fi)))
1784 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1785 if (disk_bytenr > 0)
1786 rec->found_size += num_bytes;
1787 } else {
1788 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1790 rec->extent_end = key->offset + num_bytes;
1793 * The data reloc tree will copy full extents into its inode and then
1794 * copy the corresponding csums. Because the extent it copied could be
1795 * a preallocated extent that hasn't been written to yet there may be no
1796 * csums to copy, ergo we won't have csums for our file extent. This is
1797 * ok so just don't bother checking csums if the inode belongs to the
1798 * data reloc tree.
1800 if (disk_bytenr > 0 &&
1801 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1802 u64 found;
1803 if (btrfs_file_extent_compression(eb, fi))
1804 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1805 else
1806 disk_bytenr += extent_offset;
1808 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1809 if (ret < 0)
1810 return ret;
1811 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1812 if (found > 0)
1813 rec->found_csum_item = 1;
1814 if (found < num_bytes)
1815 rec->some_csum_missing = 1;
1816 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1817 if (found > 0)
1818 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1821 return 0;
1824 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1825 struct walk_control *wc)
1827 struct btrfs_key key;
1828 u32 nritems;
1829 int i;
1830 int ret = 0;
1831 struct cache_tree *inode_cache;
1832 struct shared_node *active_node;
1834 if (wc->root_level == wc->active_node &&
1835 btrfs_root_refs(&root->root_item) == 0)
1836 return 0;
1838 active_node = wc->nodes[wc->active_node];
1839 inode_cache = &active_node->inode_cache;
1840 nritems = btrfs_header_nritems(eb);
1841 for (i = 0; i < nritems; i++) {
1842 btrfs_item_key_to_cpu(eb, &key, i);
1844 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1845 continue;
1846 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1847 continue;
1849 if (active_node->current == NULL ||
1850 active_node->current->ino < key.objectid) {
1851 if (active_node->current) {
1852 active_node->current->checked = 1;
1853 maybe_free_inode_rec(inode_cache,
1854 active_node->current);
1856 active_node->current = get_inode_rec(inode_cache,
1857 key.objectid, 1);
1858 BUG_ON(IS_ERR(active_node->current));
1860 switch (key.type) {
1861 case BTRFS_DIR_ITEM_KEY:
1862 case BTRFS_DIR_INDEX_KEY:
1863 ret = process_dir_item(eb, i, &key, active_node);
1864 break;
1865 case BTRFS_INODE_REF_KEY:
1866 ret = process_inode_ref(eb, i, &key, active_node);
1867 break;
1868 case BTRFS_INODE_EXTREF_KEY:
1869 ret = process_inode_extref(eb, i, &key, active_node);
1870 break;
1871 case BTRFS_INODE_ITEM_KEY:
1872 ret = process_inode_item(eb, i, &key, active_node);
1873 break;
1874 case BTRFS_EXTENT_DATA_KEY:
1875 ret = process_file_extent(root, eb, i, &key,
1876 active_node);
1877 break;
1878 default:
1879 break;
1882 return ret;
1885 struct node_refs {
1886 u64 bytenr[BTRFS_MAX_LEVEL];
1887 u64 refs[BTRFS_MAX_LEVEL];
1888 int need_check[BTRFS_MAX_LEVEL];
1891 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1892 struct node_refs *nrefs, u64 level);
1893 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1894 unsigned int ext_ref);
1897 * Returns >0 Found error, not fatal, should continue
1898 * Returns <0 Fatal error, must exit the whole check
1899 * Returns 0 No errors found
1901 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1902 struct node_refs *nrefs, int *level, int ext_ref)
1904 struct extent_buffer *cur = path->nodes[0];
1905 struct btrfs_key key;
1906 u64 cur_bytenr;
1907 u32 nritems;
1908 u64 first_ino = 0;
1909 int root_level = btrfs_header_level(root->node);
1910 int i;
1911 int ret = 0; /* Final return value */
1912 int err = 0; /* Positive error bitmap */
1914 cur_bytenr = cur->start;
1916 /* skip to first inode item or the first inode number change */
1917 nritems = btrfs_header_nritems(cur);
1918 for (i = 0; i < nritems; i++) {
1919 btrfs_item_key_to_cpu(cur, &key, i);
1920 if (i == 0)
1921 first_ino = key.objectid;
1922 if (key.type == BTRFS_INODE_ITEM_KEY ||
1923 (first_ino && first_ino != key.objectid))
1924 break;
1926 if (i == nritems) {
1927 path->slots[0] = nritems;
1928 return 0;
1930 path->slots[0] = i;
1932 again:
1933 err |= check_inode_item(root, path, ext_ref);
1935 if (err & LAST_ITEM)
1936 goto out;
1938 /* still have inode items in thie leaf */
1939 if (cur->start == cur_bytenr)
1940 goto again;
1943 * we have switched to another leaf, above nodes may
1944 * have changed, here walk down the path, if a node
1945 * or leaf is shared, check whether we can skip this
1946 * node or leaf.
1948 for (i = root_level; i >= 0; i--) {
1949 if (path->nodes[i]->start == nrefs->bytenr[i])
1950 continue;
1952 ret = update_nodes_refs(root,
1953 path->nodes[i]->start,
1954 nrefs, i);
1955 if (ret)
1956 goto out;
1958 if (!nrefs->need_check[i]) {
1959 *level += 1;
1960 break;
1964 for (i = 0; i < *level; i++) {
1965 free_extent_buffer(path->nodes[i]);
1966 path->nodes[i] = NULL;
1968 out:
1969 err &= ~LAST_ITEM;
1970 if (err && !ret)
1971 ret = err;
1972 return ret;
1975 static void reada_walk_down(struct btrfs_root *root,
1976 struct extent_buffer *node, int slot)
1978 struct btrfs_fs_info *fs_info = root->fs_info;
1979 u64 bytenr;
1980 u64 ptr_gen;
1981 u32 nritems;
1982 u32 blocksize;
1983 int i;
1984 int level;
1986 level = btrfs_header_level(node);
1987 if (level != 1)
1988 return;
1990 nritems = btrfs_header_nritems(node);
1991 blocksize = fs_info->nodesize;
1992 for (i = slot; i < nritems; i++) {
1993 bytenr = btrfs_node_blockptr(node, i);
1994 ptr_gen = btrfs_node_ptr_generation(node, i);
1995 readahead_tree_block(fs_info, bytenr, blocksize, ptr_gen);
2000 * Check the child node/leaf by the following condition:
2001 * 1. the first item key of the node/leaf should be the same with the one
2002 * in parent.
2003 * 2. block in parent node should match the child node/leaf.
2004 * 3. generation of parent node and child's header should be consistent.
2006 * Or the child node/leaf pointed by the key in parent is not valid.
2008 * We hope to check leaf owner too, but since subvol may share leaves,
2009 * which makes leaf owner check not so strong, key check should be
2010 * sufficient enough for that case.
2012 static int check_child_node(struct extent_buffer *parent, int slot,
2013 struct extent_buffer *child)
2015 struct btrfs_key parent_key;
2016 struct btrfs_key child_key;
2017 int ret = 0;
2019 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2020 if (btrfs_header_level(child) == 0)
2021 btrfs_item_key_to_cpu(child, &child_key, 0);
2022 else
2023 btrfs_node_key_to_cpu(child, &child_key, 0);
2025 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2026 ret = -EINVAL;
2027 fprintf(stderr,
2028 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2029 parent_key.objectid, parent_key.type, parent_key.offset,
2030 child_key.objectid, child_key.type, child_key.offset);
2032 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2033 ret = -EINVAL;
2034 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2035 btrfs_node_blockptr(parent, slot),
2036 btrfs_header_bytenr(child));
2038 if (btrfs_node_ptr_generation(parent, slot) !=
2039 btrfs_header_generation(child)) {
2040 ret = -EINVAL;
2041 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2042 btrfs_header_generation(child),
2043 btrfs_node_ptr_generation(parent, slot));
2045 return ret;
2049 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2050 * in every fs or file tree check. Here we find its all root ids, and only check
2051 * it in the fs or file tree which has the smallest root id.
2053 static int need_check(struct btrfs_root *root, struct ulist *roots)
2055 struct rb_node *node;
2056 struct ulist_node *u;
2058 if (roots->nnodes == 1)
2059 return 1;
2061 node = rb_first(&roots->root);
2062 u = rb_entry(node, struct ulist_node, rb_node);
2064 * current root id is not smallest, we skip it and let it be checked
2065 * in the fs or file tree who hash the smallest root id.
2067 if (root->objectid != u->val)
2068 return 0;
2070 return 1;
2074 * for a tree node or leaf, we record its reference count, so later if we still
2075 * process this node or leaf, don't need to compute its reference count again.
2077 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2078 struct node_refs *nrefs, u64 level)
2080 int check, ret;
2081 u64 refs;
2082 struct ulist *roots;
2084 if (nrefs->bytenr[level] != bytenr) {
2085 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2086 level, 1, &refs, NULL);
2087 if (ret < 0)
2088 return ret;
2090 nrefs->bytenr[level] = bytenr;
2091 nrefs->refs[level] = refs;
2092 if (refs > 1) {
2093 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2094 0, &roots);
2095 if (ret)
2096 return -EIO;
2098 check = need_check(root, roots);
2099 ulist_free(roots);
2100 nrefs->need_check[level] = check;
2101 } else {
2102 nrefs->need_check[level] = 1;
2106 return 0;
2109 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2110 struct walk_control *wc, int *level,
2111 struct node_refs *nrefs)
2113 enum btrfs_tree_block_status status;
2114 u64 bytenr;
2115 u64 ptr_gen;
2116 struct btrfs_fs_info *fs_info = root->fs_info;
2117 struct extent_buffer *next;
2118 struct extent_buffer *cur;
2119 u32 blocksize;
2120 int ret, err = 0;
2121 u64 refs;
2123 WARN_ON(*level < 0);
2124 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2126 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2127 refs = nrefs->refs[*level];
2128 ret = 0;
2129 } else {
2130 ret = btrfs_lookup_extent_info(NULL, root,
2131 path->nodes[*level]->start,
2132 *level, 1, &refs, NULL);
2133 if (ret < 0) {
2134 err = ret;
2135 goto out;
2137 nrefs->bytenr[*level] = path->nodes[*level]->start;
2138 nrefs->refs[*level] = refs;
2141 if (refs > 1) {
2142 ret = enter_shared_node(root, path->nodes[*level]->start,
2143 refs, wc, *level);
2144 if (ret > 0) {
2145 err = ret;
2146 goto out;
2150 while (*level >= 0) {
2151 WARN_ON(*level < 0);
2152 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2153 cur = path->nodes[*level];
2155 if (btrfs_header_level(cur) != *level)
2156 WARN_ON(1);
2158 if (path->slots[*level] >= btrfs_header_nritems(cur))
2159 break;
2160 if (*level == 0) {
2161 ret = process_one_leaf(root, cur, wc);
2162 if (ret < 0)
2163 err = ret;
2164 break;
2166 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2167 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2168 blocksize = fs_info->nodesize;
2170 if (bytenr == nrefs->bytenr[*level - 1]) {
2171 refs = nrefs->refs[*level - 1];
2172 } else {
2173 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2174 *level - 1, 1, &refs, NULL);
2175 if (ret < 0) {
2176 refs = 0;
2177 } else {
2178 nrefs->bytenr[*level - 1] = bytenr;
2179 nrefs->refs[*level - 1] = refs;
2183 if (refs > 1) {
2184 ret = enter_shared_node(root, bytenr, refs,
2185 wc, *level - 1);
2186 if (ret > 0) {
2187 path->slots[*level]++;
2188 continue;
2192 next = btrfs_find_tree_block(fs_info, bytenr, blocksize);
2193 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2194 free_extent_buffer(next);
2195 reada_walk_down(root, cur, path->slots[*level]);
2196 next = read_tree_block(root->fs_info, bytenr, blocksize,
2197 ptr_gen);
2198 if (!extent_buffer_uptodate(next)) {
2199 struct btrfs_key node_key;
2201 btrfs_node_key_to_cpu(path->nodes[*level],
2202 &node_key,
2203 path->slots[*level]);
2204 btrfs_add_corrupt_extent_record(root->fs_info,
2205 &node_key,
2206 path->nodes[*level]->start,
2207 root->fs_info->nodesize,
2208 *level);
2209 err = -EIO;
2210 goto out;
2214 ret = check_child_node(cur, path->slots[*level], next);
2215 if (ret) {
2216 free_extent_buffer(next);
2217 err = ret;
2218 goto out;
2221 if (btrfs_is_leaf(next))
2222 status = btrfs_check_leaf(root, NULL, next);
2223 else
2224 status = btrfs_check_node(root, NULL, next);
2225 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2226 free_extent_buffer(next);
2227 err = -EIO;
2228 goto out;
2231 *level = *level - 1;
2232 free_extent_buffer(path->nodes[*level]);
2233 path->nodes[*level] = next;
2234 path->slots[*level] = 0;
2236 out:
2237 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2238 return err;
2241 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2242 unsigned int ext_ref);
2245 * Returns >0 Found error, should continue
2246 * Returns <0 Fatal error, must exit the whole check
2247 * Returns 0 No errors found
2249 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2250 int *level, struct node_refs *nrefs, int ext_ref)
2252 enum btrfs_tree_block_status status;
2253 u64 bytenr;
2254 u64 ptr_gen;
2255 struct btrfs_fs_info *fs_info = root->fs_info;
2256 struct extent_buffer *next;
2257 struct extent_buffer *cur;
2258 u32 blocksize;
2259 int ret;
2261 WARN_ON(*level < 0);
2262 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2264 ret = update_nodes_refs(root, path->nodes[*level]->start,
2265 nrefs, *level);
2266 if (ret < 0)
2267 return ret;
2269 while (*level >= 0) {
2270 WARN_ON(*level < 0);
2271 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2272 cur = path->nodes[*level];
2274 if (btrfs_header_level(cur) != *level)
2275 WARN_ON(1);
2277 if (path->slots[*level] >= btrfs_header_nritems(cur))
2278 break;
2279 /* Don't forgot to check leaf/node validation */
2280 if (*level == 0) {
2281 ret = btrfs_check_leaf(root, NULL, cur);
2282 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2283 ret = -EIO;
2284 break;
2286 ret = process_one_leaf_v2(root, path, nrefs,
2287 level, ext_ref);
2288 break;
2289 } else {
2290 ret = btrfs_check_node(root, NULL, cur);
2291 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2292 ret = -EIO;
2293 break;
2296 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2297 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2298 blocksize = fs_info->nodesize;
2300 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2301 if (ret)
2302 break;
2303 if (!nrefs->need_check[*level - 1]) {
2304 path->slots[*level]++;
2305 continue;
2308 next = btrfs_find_tree_block(fs_info, bytenr, blocksize);
2309 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2310 free_extent_buffer(next);
2311 reada_walk_down(root, cur, path->slots[*level]);
2312 next = read_tree_block(fs_info, bytenr, blocksize,
2313 ptr_gen);
2314 if (!extent_buffer_uptodate(next)) {
2315 struct btrfs_key node_key;
2317 btrfs_node_key_to_cpu(path->nodes[*level],
2318 &node_key,
2319 path->slots[*level]);
2320 btrfs_add_corrupt_extent_record(fs_info,
2321 &node_key,
2322 path->nodes[*level]->start,
2323 fs_info->nodesize,
2324 *level);
2325 ret = -EIO;
2326 break;
2330 ret = check_child_node(cur, path->slots[*level], next);
2331 if (ret < 0)
2332 break;
2334 if (btrfs_is_leaf(next))
2335 status = btrfs_check_leaf(root, NULL, next);
2336 else
2337 status = btrfs_check_node(root, NULL, next);
2338 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2339 free_extent_buffer(next);
2340 ret = -EIO;
2341 break;
2344 *level = *level - 1;
2345 free_extent_buffer(path->nodes[*level]);
2346 path->nodes[*level] = next;
2347 path->slots[*level] = 0;
2349 return ret;
2352 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2353 struct walk_control *wc, int *level)
2355 int i;
2356 struct extent_buffer *leaf;
2358 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2359 leaf = path->nodes[i];
2360 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2361 path->slots[i]++;
2362 *level = i;
2363 return 0;
2364 } else {
2365 free_extent_buffer(path->nodes[*level]);
2366 path->nodes[*level] = NULL;
2367 BUG_ON(*level > wc->active_node);
2368 if (*level == wc->active_node)
2369 leave_shared_node(root, wc, *level);
2370 *level = i + 1;
2373 return 1;
2376 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2377 int *level)
2379 int i;
2380 struct extent_buffer *leaf;
2382 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2383 leaf = path->nodes[i];
2384 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2385 path->slots[i]++;
2386 *level = i;
2387 return 0;
2388 } else {
2389 free_extent_buffer(path->nodes[*level]);
2390 path->nodes[*level] = NULL;
2391 *level = i + 1;
2394 return 1;
2397 static int check_root_dir(struct inode_record *rec)
2399 struct inode_backref *backref;
2400 int ret = -1;
2402 if (!rec->found_inode_item || rec->errors)
2403 goto out;
2404 if (rec->nlink != 1 || rec->found_link != 0)
2405 goto out;
2406 if (list_empty(&rec->backrefs))
2407 goto out;
2408 backref = to_inode_backref(rec->backrefs.next);
2409 if (!backref->found_inode_ref)
2410 goto out;
2411 if (backref->index != 0 || backref->namelen != 2 ||
2412 memcmp(backref->name, "..", 2))
2413 goto out;
2414 if (backref->found_dir_index || backref->found_dir_item)
2415 goto out;
2416 ret = 0;
2417 out:
2418 return ret;
2421 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2422 struct btrfs_root *root, struct btrfs_path *path,
2423 struct inode_record *rec)
2425 struct btrfs_inode_item *ei;
2426 struct btrfs_key key;
2427 int ret;
2429 key.objectid = rec->ino;
2430 key.type = BTRFS_INODE_ITEM_KEY;
2431 key.offset = (u64)-1;
2433 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2434 if (ret < 0)
2435 goto out;
2436 if (ret) {
2437 if (!path->slots[0]) {
2438 ret = -ENOENT;
2439 goto out;
2441 path->slots[0]--;
2442 ret = 0;
2444 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2445 if (key.objectid != rec->ino) {
2446 ret = -ENOENT;
2447 goto out;
2450 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2451 struct btrfs_inode_item);
2452 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2453 btrfs_mark_buffer_dirty(path->nodes[0]);
2454 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2455 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2456 root->root_key.objectid);
2457 out:
2458 btrfs_release_path(path);
2459 return ret;
2462 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2463 struct btrfs_root *root,
2464 struct btrfs_path *path,
2465 struct inode_record *rec)
2467 int ret;
2469 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2470 btrfs_release_path(path);
2471 if (!ret)
2472 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2473 return ret;
2476 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2477 struct btrfs_root *root,
2478 struct btrfs_path *path,
2479 struct inode_record *rec)
2481 struct btrfs_inode_item *ei;
2482 struct btrfs_key key;
2483 int ret = 0;
2485 key.objectid = rec->ino;
2486 key.type = BTRFS_INODE_ITEM_KEY;
2487 key.offset = 0;
2489 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2490 if (ret) {
2491 if (ret > 0)
2492 ret = -ENOENT;
2493 goto out;
2496 /* Since ret == 0, no need to check anything */
2497 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2498 struct btrfs_inode_item);
2499 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2500 btrfs_mark_buffer_dirty(path->nodes[0]);
2501 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2502 printf("reset nbytes for ino %llu root %llu\n",
2503 rec->ino, root->root_key.objectid);
2504 out:
2505 btrfs_release_path(path);
2506 return ret;
2509 static int add_missing_dir_index(struct btrfs_root *root,
2510 struct cache_tree *inode_cache,
2511 struct inode_record *rec,
2512 struct inode_backref *backref)
2514 struct btrfs_path path;
2515 struct btrfs_trans_handle *trans;
2516 struct btrfs_dir_item *dir_item;
2517 struct extent_buffer *leaf;
2518 struct btrfs_key key;
2519 struct btrfs_disk_key disk_key;
2520 struct inode_record *dir_rec;
2521 unsigned long name_ptr;
2522 u32 data_size = sizeof(*dir_item) + backref->namelen;
2523 int ret;
2525 trans = btrfs_start_transaction(root, 1);
2526 if (IS_ERR(trans))
2527 return PTR_ERR(trans);
2529 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2530 (unsigned long long)rec->ino);
2532 btrfs_init_path(&path);
2533 key.objectid = backref->dir;
2534 key.type = BTRFS_DIR_INDEX_KEY;
2535 key.offset = backref->index;
2536 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2537 BUG_ON(ret);
2539 leaf = path.nodes[0];
2540 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2542 disk_key.objectid = cpu_to_le64(rec->ino);
2543 disk_key.type = BTRFS_INODE_ITEM_KEY;
2544 disk_key.offset = 0;
2546 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2547 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2548 btrfs_set_dir_data_len(leaf, dir_item, 0);
2549 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2550 name_ptr = (unsigned long)(dir_item + 1);
2551 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2552 btrfs_mark_buffer_dirty(leaf);
2553 btrfs_release_path(&path);
2554 btrfs_commit_transaction(trans, root);
2556 backref->found_dir_index = 1;
2557 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2558 BUG_ON(IS_ERR(dir_rec));
2559 if (!dir_rec)
2560 return 0;
2561 dir_rec->found_size += backref->namelen;
2562 if (dir_rec->found_size == dir_rec->isize &&
2563 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2564 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2565 if (dir_rec->found_size != dir_rec->isize)
2566 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2568 return 0;
2571 static int delete_dir_index(struct btrfs_root *root,
2572 struct inode_backref *backref)
2574 struct btrfs_trans_handle *trans;
2575 struct btrfs_dir_item *di;
2576 struct btrfs_path path;
2577 int ret = 0;
2579 trans = btrfs_start_transaction(root, 1);
2580 if (IS_ERR(trans))
2581 return PTR_ERR(trans);
2583 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2584 (unsigned long long)backref->dir,
2585 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2586 (unsigned long long)root->objectid);
2588 btrfs_init_path(&path);
2589 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2590 backref->name, backref->namelen,
2591 backref->index, -1);
2592 if (IS_ERR(di)) {
2593 ret = PTR_ERR(di);
2594 btrfs_release_path(&path);
2595 btrfs_commit_transaction(trans, root);
2596 if (ret == -ENOENT)
2597 return 0;
2598 return ret;
2601 if (!di)
2602 ret = btrfs_del_item(trans, root, &path);
2603 else
2604 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2605 BUG_ON(ret);
2606 btrfs_release_path(&path);
2607 btrfs_commit_transaction(trans, root);
2608 return ret;
2611 static int create_inode_item(struct btrfs_root *root,
2612 struct inode_record *rec,
2613 int root_dir)
2615 struct btrfs_trans_handle *trans;
2616 struct btrfs_inode_item inode_item;
2617 time_t now = time(NULL);
2618 int ret;
2620 trans = btrfs_start_transaction(root, 1);
2621 if (IS_ERR(trans)) {
2622 ret = PTR_ERR(trans);
2623 return ret;
2626 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2627 "be incomplete, please check permissions and content after "
2628 "the fsck completes.\n", (unsigned long long)root->objectid,
2629 (unsigned long long)rec->ino);
2631 memset(&inode_item, 0, sizeof(inode_item));
2632 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2633 if (root_dir)
2634 btrfs_set_stack_inode_nlink(&inode_item, 1);
2635 else
2636 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2637 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2638 if (rec->found_dir_item) {
2639 if (rec->found_file_extent)
2640 fprintf(stderr, "root %llu inode %llu has both a dir "
2641 "item and extents, unsure if it is a dir or a "
2642 "regular file so setting it as a directory\n",
2643 (unsigned long long)root->objectid,
2644 (unsigned long long)rec->ino);
2645 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2646 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2647 } else if (!rec->found_dir_item) {
2648 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2649 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2651 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2652 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2653 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2654 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2655 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2656 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2657 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2658 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2660 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2661 BUG_ON(ret);
2662 btrfs_commit_transaction(trans, root);
2663 return 0;
2666 static int repair_inode_backrefs(struct btrfs_root *root,
2667 struct inode_record *rec,
2668 struct cache_tree *inode_cache,
2669 int delete)
2671 struct inode_backref *tmp, *backref;
2672 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2673 int ret = 0;
2674 int repaired = 0;
2676 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2677 if (!delete && rec->ino == root_dirid) {
2678 if (!rec->found_inode_item) {
2679 ret = create_inode_item(root, rec, 1);
2680 if (ret)
2681 break;
2682 repaired++;
2686 /* Index 0 for root dir's are special, don't mess with it */
2687 if (rec->ino == root_dirid && backref->index == 0)
2688 continue;
2690 if (delete &&
2691 ((backref->found_dir_index && !backref->found_inode_ref) ||
2692 (backref->found_dir_index && backref->found_inode_ref &&
2693 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2694 ret = delete_dir_index(root, backref);
2695 if (ret)
2696 break;
2697 repaired++;
2698 list_del(&backref->list);
2699 free(backref);
2700 continue;
2703 if (!delete && !backref->found_dir_index &&
2704 backref->found_dir_item && backref->found_inode_ref) {
2705 ret = add_missing_dir_index(root, inode_cache, rec,
2706 backref);
2707 if (ret)
2708 break;
2709 repaired++;
2710 if (backref->found_dir_item &&
2711 backref->found_dir_index) {
2712 if (!backref->errors &&
2713 backref->found_inode_ref) {
2714 list_del(&backref->list);
2715 free(backref);
2716 continue;
2721 if (!delete && (!backref->found_dir_index &&
2722 !backref->found_dir_item &&
2723 backref->found_inode_ref)) {
2724 struct btrfs_trans_handle *trans;
2725 struct btrfs_key location;
2727 ret = check_dir_conflict(root, backref->name,
2728 backref->namelen,
2729 backref->dir,
2730 backref->index);
2731 if (ret) {
2733 * let nlink fixing routine to handle it,
2734 * which can do it better.
2736 ret = 0;
2737 break;
2739 location.objectid = rec->ino;
2740 location.type = BTRFS_INODE_ITEM_KEY;
2741 location.offset = 0;
2743 trans = btrfs_start_transaction(root, 1);
2744 if (IS_ERR(trans)) {
2745 ret = PTR_ERR(trans);
2746 break;
2748 fprintf(stderr, "adding missing dir index/item pair "
2749 "for inode %llu\n",
2750 (unsigned long long)rec->ino);
2751 ret = btrfs_insert_dir_item(trans, root, backref->name,
2752 backref->namelen,
2753 backref->dir, &location,
2754 imode_to_type(rec->imode),
2755 backref->index);
2756 BUG_ON(ret);
2757 btrfs_commit_transaction(trans, root);
2758 repaired++;
2761 if (!delete && (backref->found_inode_ref &&
2762 backref->found_dir_index &&
2763 backref->found_dir_item &&
2764 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2765 !rec->found_inode_item)) {
2766 ret = create_inode_item(root, rec, 0);
2767 if (ret)
2768 break;
2769 repaired++;
2773 return ret ? ret : repaired;
2777 * To determine the file type for nlink/inode_item repair
2779 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2780 * Return -ENOENT if file type is not found.
2782 static int find_file_type(struct inode_record *rec, u8 *type)
2784 struct inode_backref *backref;
2786 /* For inode item recovered case */
2787 if (rec->found_inode_item) {
2788 *type = imode_to_type(rec->imode);
2789 return 0;
2792 list_for_each_entry(backref, &rec->backrefs, list) {
2793 if (backref->found_dir_index || backref->found_dir_item) {
2794 *type = backref->filetype;
2795 return 0;
2798 return -ENOENT;
2802 * To determine the file name for nlink repair
2804 * Return 0 if file name is found, set name and namelen.
2805 * Return -ENOENT if file name is not found.
2807 static int find_file_name(struct inode_record *rec,
2808 char *name, int *namelen)
2810 struct inode_backref *backref;
2812 list_for_each_entry(backref, &rec->backrefs, list) {
2813 if (backref->found_dir_index || backref->found_dir_item ||
2814 backref->found_inode_ref) {
2815 memcpy(name, backref->name, backref->namelen);
2816 *namelen = backref->namelen;
2817 return 0;
2820 return -ENOENT;
2823 /* Reset the nlink of the inode to the correct one */
2824 static int reset_nlink(struct btrfs_trans_handle *trans,
2825 struct btrfs_root *root,
2826 struct btrfs_path *path,
2827 struct inode_record *rec)
2829 struct inode_backref *backref;
2830 struct inode_backref *tmp;
2831 struct btrfs_key key;
2832 struct btrfs_inode_item *inode_item;
2833 int ret = 0;
2835 /* We don't believe this either, reset it and iterate backref */
2836 rec->found_link = 0;
2838 /* Remove all backref including the valid ones */
2839 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2840 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2841 backref->index, backref->name,
2842 backref->namelen, 0);
2843 if (ret < 0)
2844 goto out;
2846 /* remove invalid backref, so it won't be added back */
2847 if (!(backref->found_dir_index &&
2848 backref->found_dir_item &&
2849 backref->found_inode_ref)) {
2850 list_del(&backref->list);
2851 free(backref);
2852 } else {
2853 rec->found_link++;
2857 /* Set nlink to 0 */
2858 key.objectid = rec->ino;
2859 key.type = BTRFS_INODE_ITEM_KEY;
2860 key.offset = 0;
2861 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2862 if (ret < 0)
2863 goto out;
2864 if (ret > 0) {
2865 ret = -ENOENT;
2866 goto out;
2868 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2869 struct btrfs_inode_item);
2870 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2871 btrfs_mark_buffer_dirty(path->nodes[0]);
2872 btrfs_release_path(path);
2875 * Add back valid inode_ref/dir_item/dir_index,
2876 * add_link() will handle the nlink inc, so new nlink must be correct
2878 list_for_each_entry(backref, &rec->backrefs, list) {
2879 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2880 backref->name, backref->namelen,
2881 backref->filetype, &backref->index, 1);
2882 if (ret < 0)
2883 goto out;
2885 out:
2886 btrfs_release_path(path);
2887 return ret;
2890 static int get_highest_inode(struct btrfs_trans_handle *trans,
2891 struct btrfs_root *root,
2892 struct btrfs_path *path,
2893 u64 *highest_ino)
2895 struct btrfs_key key, found_key;
2896 int ret;
2898 btrfs_init_path(path);
2899 key.objectid = BTRFS_LAST_FREE_OBJECTID;
2900 key.offset = -1;
2901 key.type = BTRFS_INODE_ITEM_KEY;
2902 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2903 if (ret == 1) {
2904 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2905 path->slots[0] - 1);
2906 *highest_ino = found_key.objectid;
2907 ret = 0;
2909 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2910 ret = -EOVERFLOW;
2911 btrfs_release_path(path);
2912 return ret;
2915 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2916 struct btrfs_root *root,
2917 struct btrfs_path *path,
2918 struct inode_record *rec)
2920 char *dir_name = "lost+found";
2921 char namebuf[BTRFS_NAME_LEN] = {0};
2922 u64 lost_found_ino;
2923 u32 mode = 0700;
2924 u8 type = 0;
2925 int namelen = 0;
2926 int name_recovered = 0;
2927 int type_recovered = 0;
2928 int ret = 0;
2931 * Get file name and type first before these invalid inode ref
2932 * are deleted by remove_all_invalid_backref()
2934 name_recovered = !find_file_name(rec, namebuf, &namelen);
2935 type_recovered = !find_file_type(rec, &type);
2937 if (!name_recovered) {
2938 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2939 rec->ino, rec->ino);
2940 namelen = count_digits(rec->ino);
2941 sprintf(namebuf, "%llu", rec->ino);
2942 name_recovered = 1;
2944 if (!type_recovered) {
2945 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2946 rec->ino);
2947 type = BTRFS_FT_REG_FILE;
2948 type_recovered = 1;
2951 ret = reset_nlink(trans, root, path, rec);
2952 if (ret < 0) {
2953 fprintf(stderr,
2954 "Failed to reset nlink for inode %llu: %s\n",
2955 rec->ino, strerror(-ret));
2956 goto out;
2959 if (rec->found_link == 0) {
2960 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2961 if (ret < 0)
2962 goto out;
2963 lost_found_ino++;
2964 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2965 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2966 mode);
2967 if (ret < 0) {
2968 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2969 dir_name, strerror(-ret));
2970 goto out;
2972 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2973 namebuf, namelen, type, NULL, 1);
2975 * Add ".INO" suffix several times to handle case where
2976 * "FILENAME.INO" is already taken by another file.
2978 while (ret == -EEXIST) {
2980 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2982 if (namelen + count_digits(rec->ino) + 1 >
2983 BTRFS_NAME_LEN) {
2984 ret = -EFBIG;
2985 goto out;
2987 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2988 ".%llu", rec->ino);
2989 namelen += count_digits(rec->ino) + 1;
2990 ret = btrfs_add_link(trans, root, rec->ino,
2991 lost_found_ino, namebuf,
2992 namelen, type, NULL, 1);
2994 if (ret < 0) {
2995 fprintf(stderr,
2996 "Failed to link the inode %llu to %s dir: %s\n",
2997 rec->ino, dir_name, strerror(-ret));
2998 goto out;
3001 * Just increase the found_link, don't actually add the
3002 * backref. This will make things easier and this inode
3003 * record will be freed after the repair is done.
3004 * So fsck will not report problem about this inode.
3006 rec->found_link++;
3007 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3008 namelen, namebuf, dir_name);
3010 printf("Fixed the nlink of inode %llu\n", rec->ino);
3011 out:
3013 * Clear the flag anyway, or we will loop forever for the same inode
3014 * as it will not be removed from the bad inode list and the dead loop
3015 * happens.
3017 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3018 btrfs_release_path(path);
3019 return ret;
3023 * Check if there is any normal(reg or prealloc) file extent for given
3024 * ino.
3025 * This is used to determine the file type when neither its dir_index/item or
3026 * inode_item exists.
3028 * This will *NOT* report error, if any error happens, just consider it does
3029 * not have any normal file extent.
3031 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3033 struct btrfs_path path;
3034 struct btrfs_key key;
3035 struct btrfs_key found_key;
3036 struct btrfs_file_extent_item *fi;
3037 u8 type;
3038 int ret = 0;
3040 btrfs_init_path(&path);
3041 key.objectid = ino;
3042 key.type = BTRFS_EXTENT_DATA_KEY;
3043 key.offset = 0;
3045 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3046 if (ret < 0) {
3047 ret = 0;
3048 goto out;
3050 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3051 ret = btrfs_next_leaf(root, &path);
3052 if (ret) {
3053 ret = 0;
3054 goto out;
3057 while (1) {
3058 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3059 path.slots[0]);
3060 if (found_key.objectid != ino ||
3061 found_key.type != BTRFS_EXTENT_DATA_KEY)
3062 break;
3063 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3064 struct btrfs_file_extent_item);
3065 type = btrfs_file_extent_type(path.nodes[0], fi);
3066 if (type != BTRFS_FILE_EXTENT_INLINE) {
3067 ret = 1;
3068 goto out;
3071 out:
3072 btrfs_release_path(&path);
3073 return ret;
3076 static u32 btrfs_type_to_imode(u8 type)
3078 static u32 imode_by_btrfs_type[] = {
3079 [BTRFS_FT_REG_FILE] = S_IFREG,
3080 [BTRFS_FT_DIR] = S_IFDIR,
3081 [BTRFS_FT_CHRDEV] = S_IFCHR,
3082 [BTRFS_FT_BLKDEV] = S_IFBLK,
3083 [BTRFS_FT_FIFO] = S_IFIFO,
3084 [BTRFS_FT_SOCK] = S_IFSOCK,
3085 [BTRFS_FT_SYMLINK] = S_IFLNK,
3088 return imode_by_btrfs_type[(type)];
3091 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3092 struct btrfs_root *root,
3093 struct btrfs_path *path,
3094 struct inode_record *rec)
3096 u8 filetype;
3097 u32 mode = 0700;
3098 int type_recovered = 0;
3099 int ret = 0;
3101 printf("Trying to rebuild inode:%llu\n", rec->ino);
3103 type_recovered = !find_file_type(rec, &filetype);
3106 * Try to determine inode type if type not found.
3108 * For found regular file extent, it must be FILE.
3109 * For found dir_item/index, it must be DIR.
3111 * For undetermined one, use FILE as fallback.
3113 * TODO:
3114 * 1. If found backref(inode_index/item is already handled) to it,
3115 * it must be DIR.
3116 * Need new inode-inode ref structure to allow search for that.
3118 if (!type_recovered) {
3119 if (rec->found_file_extent &&
3120 find_normal_file_extent(root, rec->ino)) {
3121 type_recovered = 1;
3122 filetype = BTRFS_FT_REG_FILE;
3123 } else if (rec->found_dir_item) {
3124 type_recovered = 1;
3125 filetype = BTRFS_FT_DIR;
3126 } else if (!list_empty(&rec->orphan_extents)) {
3127 type_recovered = 1;
3128 filetype = BTRFS_FT_REG_FILE;
3129 } else{
3130 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3131 rec->ino);
3132 type_recovered = 1;
3133 filetype = BTRFS_FT_REG_FILE;
3137 ret = btrfs_new_inode(trans, root, rec->ino,
3138 mode | btrfs_type_to_imode(filetype));
3139 if (ret < 0)
3140 goto out;
3143 * Here inode rebuild is done, we only rebuild the inode item,
3144 * don't repair the nlink(like move to lost+found).
3145 * That is the job of nlink repair.
3147 * We just fill the record and return
3149 rec->found_dir_item = 1;
3150 rec->imode = mode | btrfs_type_to_imode(filetype);
3151 rec->nlink = 0;
3152 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3153 /* Ensure the inode_nlinks repair function will be called */
3154 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3155 out:
3156 return ret;
3159 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3160 struct btrfs_root *root,
3161 struct btrfs_path *path,
3162 struct inode_record *rec)
3164 struct orphan_data_extent *orphan;
3165 struct orphan_data_extent *tmp;
3166 int ret = 0;
3168 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3170 * Check for conflicting file extents
3172 * Here we don't know whether the extents is compressed or not,
3173 * so we can only assume it not compressed nor data offset,
3174 * and use its disk_len as extent length.
3176 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3177 orphan->offset, orphan->disk_len, 0);
3178 btrfs_release_path(path);
3179 if (ret < 0)
3180 goto out;
3181 if (!ret) {
3182 fprintf(stderr,
3183 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3184 orphan->disk_bytenr, orphan->disk_len);
3185 ret = btrfs_free_extent(trans,
3186 root->fs_info->extent_root,
3187 orphan->disk_bytenr, orphan->disk_len,
3188 0, root->objectid, orphan->objectid,
3189 orphan->offset);
3190 if (ret < 0)
3191 goto out;
3193 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3194 orphan->offset, orphan->disk_bytenr,
3195 orphan->disk_len, orphan->disk_len);
3196 if (ret < 0)
3197 goto out;
3199 /* Update file size info */
3200 rec->found_size += orphan->disk_len;
3201 if (rec->found_size == rec->nbytes)
3202 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3204 /* Update the file extent hole info too */
3205 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3206 orphan->disk_len);
3207 if (ret < 0)
3208 goto out;
3209 if (RB_EMPTY_ROOT(&rec->holes))
3210 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3212 list_del(&orphan->list);
3213 free(orphan);
3215 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3216 out:
3217 return ret;
3220 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3221 struct btrfs_root *root,
3222 struct btrfs_path *path,
3223 struct inode_record *rec)
3225 struct rb_node *node;
3226 struct file_extent_hole *hole;
3227 int found = 0;
3228 int ret = 0;
3230 node = rb_first(&rec->holes);
3232 while (node) {
3233 found = 1;
3234 hole = rb_entry(node, struct file_extent_hole, node);
3235 ret = btrfs_punch_hole(trans, root, rec->ino,
3236 hole->start, hole->len);
3237 if (ret < 0)
3238 goto out;
3239 ret = del_file_extent_hole(&rec->holes, hole->start,
3240 hole->len);
3241 if (ret < 0)
3242 goto out;
3243 if (RB_EMPTY_ROOT(&rec->holes))
3244 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3245 node = rb_first(&rec->holes);
3247 /* special case for a file losing all its file extent */
3248 if (!found) {
3249 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3250 round_up(rec->isize,
3251 root->fs_info->sectorsize));
3252 if (ret < 0)
3253 goto out;
3255 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3256 rec->ino, root->objectid);
3257 out:
3258 return ret;
3261 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3263 struct btrfs_trans_handle *trans;
3264 struct btrfs_path path;
3265 int ret = 0;
3267 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3268 I_ERR_NO_ORPHAN_ITEM |
3269 I_ERR_LINK_COUNT_WRONG |
3270 I_ERR_NO_INODE_ITEM |
3271 I_ERR_FILE_EXTENT_ORPHAN |
3272 I_ERR_FILE_EXTENT_DISCOUNT|
3273 I_ERR_FILE_NBYTES_WRONG)))
3274 return rec->errors;
3277 * For nlink repair, it may create a dir and add link, so
3278 * 2 for parent(256)'s dir_index and dir_item
3279 * 2 for lost+found dir's inode_item and inode_ref
3280 * 1 for the new inode_ref of the file
3281 * 2 for lost+found dir's dir_index and dir_item for the file
3283 trans = btrfs_start_transaction(root, 7);
3284 if (IS_ERR(trans))
3285 return PTR_ERR(trans);
3287 btrfs_init_path(&path);
3288 if (rec->errors & I_ERR_NO_INODE_ITEM)
3289 ret = repair_inode_no_item(trans, root, &path, rec);
3290 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3291 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3292 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3293 ret = repair_inode_discount_extent(trans, root, &path, rec);
3294 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3295 ret = repair_inode_isize(trans, root, &path, rec);
3296 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3297 ret = repair_inode_orphan_item(trans, root, &path, rec);
3298 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3299 ret = repair_inode_nlinks(trans, root, &path, rec);
3300 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3301 ret = repair_inode_nbytes(trans, root, &path, rec);
3302 btrfs_commit_transaction(trans, root);
3303 btrfs_release_path(&path);
3304 return ret;
3307 static int check_inode_recs(struct btrfs_root *root,
3308 struct cache_tree *inode_cache)
3310 struct cache_extent *cache;
3311 struct ptr_node *node;
3312 struct inode_record *rec;
3313 struct inode_backref *backref;
3314 int stage = 0;
3315 int ret = 0;
3316 int err = 0;
3317 u64 error = 0;
3318 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3320 if (btrfs_root_refs(&root->root_item) == 0) {
3321 if (!cache_tree_empty(inode_cache))
3322 fprintf(stderr, "warning line %d\n", __LINE__);
3323 return 0;
3327 * We need to repair backrefs first because we could change some of the
3328 * errors in the inode recs.
3330 * We also need to go through and delete invalid backrefs first and then
3331 * add the correct ones second. We do this because we may get EEXIST
3332 * when adding back the correct index because we hadn't yet deleted the
3333 * invalid index.
3335 * For example, if we were missing a dir index then the directories
3336 * isize would be wrong, so if we fixed the isize to what we thought it
3337 * would be and then fixed the backref we'd still have a invalid fs, so
3338 * we need to add back the dir index and then check to see if the isize
3339 * is still wrong.
3341 while (stage < 3) {
3342 stage++;
3343 if (stage == 3 && !err)
3344 break;
3346 cache = search_cache_extent(inode_cache, 0);
3347 while (repair && cache) {
3348 node = container_of(cache, struct ptr_node, cache);
3349 rec = node->data;
3350 cache = next_cache_extent(cache);
3352 /* Need to free everything up and rescan */
3353 if (stage == 3) {
3354 remove_cache_extent(inode_cache, &node->cache);
3355 free(node);
3356 free_inode_rec(rec);
3357 continue;
3360 if (list_empty(&rec->backrefs))
3361 continue;
3363 ret = repair_inode_backrefs(root, rec, inode_cache,
3364 stage == 1);
3365 if (ret < 0) {
3366 err = ret;
3367 stage = 2;
3368 break;
3369 } if (ret > 0) {
3370 err = -EAGAIN;
3374 if (err)
3375 return err;
3377 rec = get_inode_rec(inode_cache, root_dirid, 0);
3378 BUG_ON(IS_ERR(rec));
3379 if (rec) {
3380 ret = check_root_dir(rec);
3381 if (ret) {
3382 fprintf(stderr, "root %llu root dir %llu error\n",
3383 (unsigned long long)root->root_key.objectid,
3384 (unsigned long long)root_dirid);
3385 print_inode_error(root, rec);
3386 error++;
3388 } else {
3389 if (repair) {
3390 struct btrfs_trans_handle *trans;
3392 trans = btrfs_start_transaction(root, 1);
3393 if (IS_ERR(trans)) {
3394 err = PTR_ERR(trans);
3395 return err;
3398 fprintf(stderr,
3399 "root %llu missing its root dir, recreating\n",
3400 (unsigned long long)root->objectid);
3402 ret = btrfs_make_root_dir(trans, root, root_dirid);
3403 BUG_ON(ret);
3405 btrfs_commit_transaction(trans, root);
3406 return -EAGAIN;
3409 fprintf(stderr, "root %llu root dir %llu not found\n",
3410 (unsigned long long)root->root_key.objectid,
3411 (unsigned long long)root_dirid);
3414 while (1) {
3415 cache = search_cache_extent(inode_cache, 0);
3416 if (!cache)
3417 break;
3418 node = container_of(cache, struct ptr_node, cache);
3419 rec = node->data;
3420 remove_cache_extent(inode_cache, &node->cache);
3421 free(node);
3422 if (rec->ino == root_dirid ||
3423 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3424 free_inode_rec(rec);
3425 continue;
3428 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3429 ret = check_orphan_item(root, rec->ino);
3430 if (ret == 0)
3431 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3432 if (can_free_inode_rec(rec)) {
3433 free_inode_rec(rec);
3434 continue;
3438 if (!rec->found_inode_item)
3439 rec->errors |= I_ERR_NO_INODE_ITEM;
3440 if (rec->found_link != rec->nlink)
3441 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3442 if (repair) {
3443 ret = try_repair_inode(root, rec);
3444 if (ret == 0 && can_free_inode_rec(rec)) {
3445 free_inode_rec(rec);
3446 continue;
3448 ret = 0;
3451 if (!(repair && ret == 0))
3452 error++;
3453 print_inode_error(root, rec);
3454 list_for_each_entry(backref, &rec->backrefs, list) {
3455 if (!backref->found_dir_item)
3456 backref->errors |= REF_ERR_NO_DIR_ITEM;
3457 if (!backref->found_dir_index)
3458 backref->errors |= REF_ERR_NO_DIR_INDEX;
3459 if (!backref->found_inode_ref)
3460 backref->errors |= REF_ERR_NO_INODE_REF;
3461 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3462 " namelen %u name %s filetype %d errors %x",
3463 (unsigned long long)backref->dir,
3464 (unsigned long long)backref->index,
3465 backref->namelen, backref->name,
3466 backref->filetype, backref->errors);
3467 print_ref_error(backref->errors);
3469 free_inode_rec(rec);
3471 return (error > 0) ? -1 : 0;
3474 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3475 u64 objectid)
3477 struct cache_extent *cache;
3478 struct root_record *rec = NULL;
3479 int ret;
3481 cache = lookup_cache_extent(root_cache, objectid, 1);
3482 if (cache) {
3483 rec = container_of(cache, struct root_record, cache);
3484 } else {
3485 rec = calloc(1, sizeof(*rec));
3486 if (!rec)
3487 return ERR_PTR(-ENOMEM);
3488 rec->objectid = objectid;
3489 INIT_LIST_HEAD(&rec->backrefs);
3490 rec->cache.start = objectid;
3491 rec->cache.size = 1;
3493 ret = insert_cache_extent(root_cache, &rec->cache);
3494 if (ret)
3495 return ERR_PTR(-EEXIST);
3497 return rec;
3500 static struct root_backref *get_root_backref(struct root_record *rec,
3501 u64 ref_root, u64 dir, u64 index,
3502 const char *name, int namelen)
3504 struct root_backref *backref;
3506 list_for_each_entry(backref, &rec->backrefs, list) {
3507 if (backref->ref_root != ref_root || backref->dir != dir ||
3508 backref->namelen != namelen)
3509 continue;
3510 if (memcmp(name, backref->name, namelen))
3511 continue;
3512 return backref;
3515 backref = calloc(1, sizeof(*backref) + namelen + 1);
3516 if (!backref)
3517 return NULL;
3518 backref->ref_root = ref_root;
3519 backref->dir = dir;
3520 backref->index = index;
3521 backref->namelen = namelen;
3522 memcpy(backref->name, name, namelen);
3523 backref->name[namelen] = '\0';
3524 list_add_tail(&backref->list, &rec->backrefs);
3525 return backref;
3528 static void free_root_record(struct cache_extent *cache)
3530 struct root_record *rec;
3531 struct root_backref *backref;
3533 rec = container_of(cache, struct root_record, cache);
3534 while (!list_empty(&rec->backrefs)) {
3535 backref = to_root_backref(rec->backrefs.next);
3536 list_del(&backref->list);
3537 free(backref);
3540 free(rec);
3543 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3545 static int add_root_backref(struct cache_tree *root_cache,
3546 u64 root_id, u64 ref_root, u64 dir, u64 index,
3547 const char *name, int namelen,
3548 int item_type, int errors)
3550 struct root_record *rec;
3551 struct root_backref *backref;
3553 rec = get_root_rec(root_cache, root_id);
3554 BUG_ON(IS_ERR(rec));
3555 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3556 BUG_ON(!backref);
3558 backref->errors |= errors;
3560 if (item_type != BTRFS_DIR_ITEM_KEY) {
3561 if (backref->found_dir_index || backref->found_back_ref ||
3562 backref->found_forward_ref) {
3563 if (backref->index != index)
3564 backref->errors |= REF_ERR_INDEX_UNMATCH;
3565 } else {
3566 backref->index = index;
3570 if (item_type == BTRFS_DIR_ITEM_KEY) {
3571 if (backref->found_forward_ref)
3572 rec->found_ref++;
3573 backref->found_dir_item = 1;
3574 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3575 backref->found_dir_index = 1;
3576 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3577 if (backref->found_forward_ref)
3578 backref->errors |= REF_ERR_DUP_ROOT_REF;
3579 else if (backref->found_dir_item)
3580 rec->found_ref++;
3581 backref->found_forward_ref = 1;
3582 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3583 if (backref->found_back_ref)
3584 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3585 backref->found_back_ref = 1;
3586 } else {
3587 BUG_ON(1);
3590 if (backref->found_forward_ref && backref->found_dir_item)
3591 backref->reachable = 1;
3592 return 0;
3595 static int merge_root_recs(struct btrfs_root *root,
3596 struct cache_tree *src_cache,
3597 struct cache_tree *dst_cache)
3599 struct cache_extent *cache;
3600 struct ptr_node *node;
3601 struct inode_record *rec;
3602 struct inode_backref *backref;
3603 int ret = 0;
3605 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3606 free_inode_recs_tree(src_cache);
3607 return 0;
3610 while (1) {
3611 cache = search_cache_extent(src_cache, 0);
3612 if (!cache)
3613 break;
3614 node = container_of(cache, struct ptr_node, cache);
3615 rec = node->data;
3616 remove_cache_extent(src_cache, &node->cache);
3617 free(node);
3619 ret = is_child_root(root, root->objectid, rec->ino);
3620 if (ret < 0)
3621 break;
3622 else if (ret == 0)
3623 goto skip;
3625 list_for_each_entry(backref, &rec->backrefs, list) {
3626 BUG_ON(backref->found_inode_ref);
3627 if (backref->found_dir_item)
3628 add_root_backref(dst_cache, rec->ino,
3629 root->root_key.objectid, backref->dir,
3630 backref->index, backref->name,
3631 backref->namelen, BTRFS_DIR_ITEM_KEY,
3632 backref->errors);
3633 if (backref->found_dir_index)
3634 add_root_backref(dst_cache, rec->ino,
3635 root->root_key.objectid, backref->dir,
3636 backref->index, backref->name,
3637 backref->namelen, BTRFS_DIR_INDEX_KEY,
3638 backref->errors);
3640 skip:
3641 free_inode_rec(rec);
3643 if (ret < 0)
3644 return ret;
3645 return 0;
3648 static int check_root_refs(struct btrfs_root *root,
3649 struct cache_tree *root_cache)
3651 struct root_record *rec;
3652 struct root_record *ref_root;
3653 struct root_backref *backref;
3654 struct cache_extent *cache;
3655 int loop = 1;
3656 int ret;
3657 int error;
3658 int errors = 0;
3660 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3661 BUG_ON(IS_ERR(rec));
3662 rec->found_ref = 1;
3664 /* fixme: this can not detect circular references */
3665 while (loop) {
3666 loop = 0;
3667 cache = search_cache_extent(root_cache, 0);
3668 while (1) {
3669 if (!cache)
3670 break;
3671 rec = container_of(cache, struct root_record, cache);
3672 cache = next_cache_extent(cache);
3674 if (rec->found_ref == 0)
3675 continue;
3677 list_for_each_entry(backref, &rec->backrefs, list) {
3678 if (!backref->reachable)
3679 continue;
3681 ref_root = get_root_rec(root_cache,
3682 backref->ref_root);
3683 BUG_ON(IS_ERR(ref_root));
3684 if (ref_root->found_ref > 0)
3685 continue;
3687 backref->reachable = 0;
3688 rec->found_ref--;
3689 if (rec->found_ref == 0)
3690 loop = 1;
3695 cache = search_cache_extent(root_cache, 0);
3696 while (1) {
3697 if (!cache)
3698 break;
3699 rec = container_of(cache, struct root_record, cache);
3700 cache = next_cache_extent(cache);
3702 if (rec->found_ref == 0 &&
3703 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3704 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3705 ret = check_orphan_item(root->fs_info->tree_root,
3706 rec->objectid);
3707 if (ret == 0)
3708 continue;
3711 * If we don't have a root item then we likely just have
3712 * a dir item in a snapshot for this root but no actual
3713 * ref key or anything so it's meaningless.
3715 if (!rec->found_root_item)
3716 continue;
3717 errors++;
3718 fprintf(stderr, "fs tree %llu not referenced\n",
3719 (unsigned long long)rec->objectid);
3722 error = 0;
3723 if (rec->found_ref > 0 && !rec->found_root_item)
3724 error = 1;
3725 list_for_each_entry(backref, &rec->backrefs, list) {
3726 if (!backref->found_dir_item)
3727 backref->errors |= REF_ERR_NO_DIR_ITEM;
3728 if (!backref->found_dir_index)
3729 backref->errors |= REF_ERR_NO_DIR_INDEX;
3730 if (!backref->found_back_ref)
3731 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3732 if (!backref->found_forward_ref)
3733 backref->errors |= REF_ERR_NO_ROOT_REF;
3734 if (backref->reachable && backref->errors)
3735 error = 1;
3737 if (!error)
3738 continue;
3740 errors++;
3741 fprintf(stderr, "fs tree %llu refs %u %s\n",
3742 (unsigned long long)rec->objectid, rec->found_ref,
3743 rec->found_root_item ? "" : "not found");
3745 list_for_each_entry(backref, &rec->backrefs, list) {
3746 if (!backref->reachable)
3747 continue;
3748 if (!backref->errors && rec->found_root_item)
3749 continue;
3750 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3751 " index %llu namelen %u name %s errors %x\n",
3752 (unsigned long long)backref->ref_root,
3753 (unsigned long long)backref->dir,
3754 (unsigned long long)backref->index,
3755 backref->namelen, backref->name,
3756 backref->errors);
3757 print_ref_error(backref->errors);
3760 return errors > 0 ? 1 : 0;
3763 static int process_root_ref(struct extent_buffer *eb, int slot,
3764 struct btrfs_key *key,
3765 struct cache_tree *root_cache)
3767 u64 dirid;
3768 u64 index;
3769 u32 len;
3770 u32 name_len;
3771 struct btrfs_root_ref *ref;
3772 char namebuf[BTRFS_NAME_LEN];
3773 int error;
3775 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3777 dirid = btrfs_root_ref_dirid(eb, ref);
3778 index = btrfs_root_ref_sequence(eb, ref);
3779 name_len = btrfs_root_ref_name_len(eb, ref);
3781 if (name_len <= BTRFS_NAME_LEN) {
3782 len = name_len;
3783 error = 0;
3784 } else {
3785 len = BTRFS_NAME_LEN;
3786 error = REF_ERR_NAME_TOO_LONG;
3788 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3790 if (key->type == BTRFS_ROOT_REF_KEY) {
3791 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3792 index, namebuf, len, key->type, error);
3793 } else {
3794 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3795 index, namebuf, len, key->type, error);
3797 return 0;
3800 static void free_corrupt_block(struct cache_extent *cache)
3802 struct btrfs_corrupt_block *corrupt;
3804 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3805 free(corrupt);
3808 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3811 * Repair the btree of the given root.
3813 * The fix is to remove the node key in corrupt_blocks cache_tree.
3814 * and rebalance the tree.
3815 * After the fix, the btree should be writeable.
3817 static int repair_btree(struct btrfs_root *root,
3818 struct cache_tree *corrupt_blocks)
3820 struct btrfs_trans_handle *trans;
3821 struct btrfs_path path;
3822 struct btrfs_corrupt_block *corrupt;
3823 struct cache_extent *cache;
3824 struct btrfs_key key;
3825 u64 offset;
3826 int level;
3827 int ret = 0;
3829 if (cache_tree_empty(corrupt_blocks))
3830 return 0;
3832 trans = btrfs_start_transaction(root, 1);
3833 if (IS_ERR(trans)) {
3834 ret = PTR_ERR(trans);
3835 fprintf(stderr, "Error starting transaction: %s\n",
3836 strerror(-ret));
3837 return ret;
3839 btrfs_init_path(&path);
3840 cache = first_cache_extent(corrupt_blocks);
3841 while (cache) {
3842 corrupt = container_of(cache, struct btrfs_corrupt_block,
3843 cache);
3844 level = corrupt->level;
3845 path.lowest_level = level;
3846 key.objectid = corrupt->key.objectid;
3847 key.type = corrupt->key.type;
3848 key.offset = corrupt->key.offset;
3851 * Here we don't want to do any tree balance, since it may
3852 * cause a balance with corrupted brother leaf/node,
3853 * so ins_len set to 0 here.
3854 * Balance will be done after all corrupt node/leaf is deleted.
3856 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3857 if (ret < 0)
3858 goto out;
3859 offset = btrfs_node_blockptr(path.nodes[level],
3860 path.slots[level]);
3862 /* Remove the ptr */
3863 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3864 if (ret < 0)
3865 goto out;
3867 * Remove the corresponding extent
3868 * return value is not concerned.
3870 btrfs_release_path(&path);
3871 ret = btrfs_free_extent(trans, root, offset,
3872 root->fs_info->nodesize, 0,
3873 root->root_key.objectid, level - 1, 0);
3874 cache = next_cache_extent(cache);
3877 /* Balance the btree using btrfs_search_slot() */
3878 cache = first_cache_extent(corrupt_blocks);
3879 while (cache) {
3880 corrupt = container_of(cache, struct btrfs_corrupt_block,
3881 cache);
3882 memcpy(&key, &corrupt->key, sizeof(key));
3883 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3884 if (ret < 0)
3885 goto out;
3886 /* return will always >0 since it won't find the item */
3887 ret = 0;
3888 btrfs_release_path(&path);
3889 cache = next_cache_extent(cache);
3891 out:
3892 btrfs_commit_transaction(trans, root);
3893 btrfs_release_path(&path);
3894 return ret;
3897 static int check_fs_root(struct btrfs_root *root,
3898 struct cache_tree *root_cache,
3899 struct walk_control *wc)
3901 int ret = 0;
3902 int err = 0;
3903 int wret;
3904 int level;
3905 struct btrfs_path path;
3906 struct shared_node root_node;
3907 struct root_record *rec;
3908 struct btrfs_root_item *root_item = &root->root_item;
3909 struct cache_tree corrupt_blocks;
3910 struct orphan_data_extent *orphan;
3911 struct orphan_data_extent *tmp;
3912 enum btrfs_tree_block_status status;
3913 struct node_refs nrefs;
3916 * Reuse the corrupt_block cache tree to record corrupted tree block
3918 * Unlike the usage in extent tree check, here we do it in a per
3919 * fs/subvol tree base.
3921 cache_tree_init(&corrupt_blocks);
3922 root->fs_info->corrupt_blocks = &corrupt_blocks;
3924 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3925 rec = get_root_rec(root_cache, root->root_key.objectid);
3926 BUG_ON(IS_ERR(rec));
3927 if (btrfs_root_refs(root_item) > 0)
3928 rec->found_root_item = 1;
3931 btrfs_init_path(&path);
3932 memset(&root_node, 0, sizeof(root_node));
3933 cache_tree_init(&root_node.root_cache);
3934 cache_tree_init(&root_node.inode_cache);
3935 memset(&nrefs, 0, sizeof(nrefs));
3937 /* Move the orphan extent record to corresponding inode_record */
3938 list_for_each_entry_safe(orphan, tmp,
3939 &root->orphan_data_extents, list) {
3940 struct inode_record *inode;
3942 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3944 BUG_ON(IS_ERR(inode));
3945 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3946 list_move(&orphan->list, &inode->orphan_extents);
3949 level = btrfs_header_level(root->node);
3950 memset(wc->nodes, 0, sizeof(wc->nodes));
3951 wc->nodes[level] = &root_node;
3952 wc->active_node = level;
3953 wc->root_level = level;
3955 /* We may not have checked the root block, lets do that now */
3956 if (btrfs_is_leaf(root->node))
3957 status = btrfs_check_leaf(root, NULL, root->node);
3958 else
3959 status = btrfs_check_node(root, NULL, root->node);
3960 if (status != BTRFS_TREE_BLOCK_CLEAN)
3961 return -EIO;
3963 if (btrfs_root_refs(root_item) > 0 ||
3964 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3965 path.nodes[level] = root->node;
3966 extent_buffer_get(root->node);
3967 path.slots[level] = 0;
3968 } else {
3969 struct btrfs_key key;
3970 struct btrfs_disk_key found_key;
3972 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3973 level = root_item->drop_level;
3974 path.lowest_level = level;
3975 if (level > btrfs_header_level(root->node) ||
3976 level >= BTRFS_MAX_LEVEL) {
3977 error("ignoring invalid drop level: %u", level);
3978 goto skip_walking;
3980 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3981 if (wret < 0)
3982 goto skip_walking;
3983 btrfs_node_key(path.nodes[level], &found_key,
3984 path.slots[level]);
3985 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3986 sizeof(found_key)));
3989 while (1) {
3990 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3991 if (wret < 0)
3992 ret = wret;
3993 if (wret != 0)
3994 break;
3996 wret = walk_up_tree(root, &path, wc, &level);
3997 if (wret < 0)
3998 ret = wret;
3999 if (wret != 0)
4000 break;
4002 skip_walking:
4003 btrfs_release_path(&path);
4005 if (!cache_tree_empty(&corrupt_blocks)) {
4006 struct cache_extent *cache;
4007 struct btrfs_corrupt_block *corrupt;
4009 printf("The following tree block(s) is corrupted in tree %llu:\n",
4010 root->root_key.objectid);
4011 cache = first_cache_extent(&corrupt_blocks);
4012 while (cache) {
4013 corrupt = container_of(cache,
4014 struct btrfs_corrupt_block,
4015 cache);
4016 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4017 cache->start, corrupt->level,
4018 corrupt->key.objectid, corrupt->key.type,
4019 corrupt->key.offset);
4020 cache = next_cache_extent(cache);
4022 if (repair) {
4023 printf("Try to repair the btree for root %llu\n",
4024 root->root_key.objectid);
4025 ret = repair_btree(root, &corrupt_blocks);
4026 if (ret < 0)
4027 fprintf(stderr, "Failed to repair btree: %s\n",
4028 strerror(-ret));
4029 if (!ret)
4030 printf("Btree for root %llu is fixed\n",
4031 root->root_key.objectid);
4035 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4036 if (err < 0)
4037 ret = err;
4039 if (root_node.current) {
4040 root_node.current->checked = 1;
4041 maybe_free_inode_rec(&root_node.inode_cache,
4042 root_node.current);
4045 err = check_inode_recs(root, &root_node.inode_cache);
4046 if (!ret)
4047 ret = err;
4049 free_corrupt_blocks_tree(&corrupt_blocks);
4050 root->fs_info->corrupt_blocks = NULL;
4051 free_orphan_data_extents(&root->orphan_data_extents);
4052 return ret;
4055 static int fs_root_objectid(u64 objectid)
4057 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4058 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4059 return 1;
4060 return is_fstree(objectid);
4063 static int check_fs_roots(struct btrfs_root *root,
4064 struct cache_tree *root_cache)
4066 struct btrfs_path path;
4067 struct btrfs_key key;
4068 struct walk_control wc;
4069 struct extent_buffer *leaf, *tree_node;
4070 struct btrfs_root *tmp_root;
4071 struct btrfs_root *tree_root = root->fs_info->tree_root;
4072 int ret;
4073 int err = 0;
4075 if (ctx.progress_enabled) {
4076 ctx.tp = TASK_FS_ROOTS;
4077 task_start(ctx.info);
4081 * Just in case we made any changes to the extent tree that weren't
4082 * reflected into the free space cache yet.
4084 if (repair)
4085 reset_cached_block_groups(root->fs_info);
4086 memset(&wc, 0, sizeof(wc));
4087 cache_tree_init(&wc.shared);
4088 btrfs_init_path(&path);
4090 again:
4091 key.offset = 0;
4092 key.objectid = 0;
4093 key.type = BTRFS_ROOT_ITEM_KEY;
4094 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4095 if (ret < 0) {
4096 err = 1;
4097 goto out;
4099 tree_node = tree_root->node;
4100 while (1) {
4101 if (tree_node != tree_root->node) {
4102 free_root_recs_tree(root_cache);
4103 btrfs_release_path(&path);
4104 goto again;
4106 leaf = path.nodes[0];
4107 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4108 ret = btrfs_next_leaf(tree_root, &path);
4109 if (ret) {
4110 if (ret < 0)
4111 err = 1;
4112 break;
4114 leaf = path.nodes[0];
4116 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4117 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4118 fs_root_objectid(key.objectid)) {
4119 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4120 tmp_root = btrfs_read_fs_root_no_cache(
4121 root->fs_info, &key);
4122 } else {
4123 key.offset = (u64)-1;
4124 tmp_root = btrfs_read_fs_root(
4125 root->fs_info, &key);
4127 if (IS_ERR(tmp_root)) {
4128 err = 1;
4129 goto next;
4131 ret = check_fs_root(tmp_root, root_cache, &wc);
4132 if (ret == -EAGAIN) {
4133 free_root_recs_tree(root_cache);
4134 btrfs_release_path(&path);
4135 goto again;
4137 if (ret)
4138 err = 1;
4139 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4140 btrfs_free_fs_root(tmp_root);
4141 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4142 key.type == BTRFS_ROOT_BACKREF_KEY) {
4143 process_root_ref(leaf, path.slots[0], &key,
4144 root_cache);
4146 next:
4147 path.slots[0]++;
4149 out:
4150 btrfs_release_path(&path);
4151 if (err)
4152 free_extent_cache_tree(&wc.shared);
4153 if (!cache_tree_empty(&wc.shared))
4154 fprintf(stderr, "warning line %d\n", __LINE__);
4156 task_stop(ctx.info);
4158 return err;
4162 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4163 * INODE_REF/INODE_EXTREF match.
4165 * @root: the root of the fs/file tree
4166 * @ref_key: the key of the INODE_REF/INODE_EXTREF
4167 * @key: the key of the DIR_ITEM/DIR_INDEX
4168 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
4169 * distinguish root_dir between normal dir/file
4170 * @name: the name in the INODE_REF/INODE_EXTREF
4171 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4172 * @mode: the st_mode of INODE_ITEM
4174 * Return 0 if no error occurred.
4175 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4176 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4177 * dir/file.
4178 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4179 * not match for normal dir/file.
4181 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4182 struct btrfs_key *key, u64 index, char *name,
4183 u32 namelen, u32 mode)
4185 struct btrfs_path path;
4186 struct extent_buffer *node;
4187 struct btrfs_dir_item *di;
4188 struct btrfs_key location;
4189 char namebuf[BTRFS_NAME_LEN] = {0};
4190 u32 total;
4191 u32 cur = 0;
4192 u32 len;
4193 u32 name_len;
4194 u32 data_len;
4195 u8 filetype;
4196 int slot;
4197 int ret;
4199 btrfs_init_path(&path);
4200 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4201 if (ret < 0) {
4202 ret = DIR_ITEM_MISSING;
4203 goto out;
4206 /* Process root dir and goto out*/
4207 if (index == 0) {
4208 if (ret == 0) {
4209 ret = ROOT_DIR_ERROR;
4210 error(
4211 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4212 root->objectid,
4213 ref_key->type == BTRFS_INODE_REF_KEY ?
4214 "REF" : "EXTREF",
4215 ref_key->objectid, ref_key->offset,
4216 key->type == BTRFS_DIR_ITEM_KEY ?
4217 "DIR_ITEM" : "DIR_INDEX");
4218 } else {
4219 ret = 0;
4222 goto out;
4225 /* Process normal file/dir */
4226 if (ret > 0) {
4227 ret = DIR_ITEM_MISSING;
4228 error(
4229 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4230 root->objectid,
4231 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4232 ref_key->objectid, ref_key->offset,
4233 key->type == BTRFS_DIR_ITEM_KEY ?
4234 "DIR_ITEM" : "DIR_INDEX",
4235 key->objectid, key->offset, namelen, name,
4236 imode_to_type(mode));
4237 goto out;
4240 /* Check whether inode_id/filetype/name match */
4241 node = path.nodes[0];
4242 slot = path.slots[0];
4243 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4244 total = btrfs_item_size_nr(node, slot);
4245 while (cur < total) {
4246 ret = DIR_ITEM_MISMATCH;
4247 name_len = btrfs_dir_name_len(node, di);
4248 data_len = btrfs_dir_data_len(node, di);
4250 btrfs_dir_item_key_to_cpu(node, di, &location);
4251 if (location.objectid != ref_key->objectid ||
4252 location.type != BTRFS_INODE_ITEM_KEY ||
4253 location.offset != 0)
4254 goto next;
4256 filetype = btrfs_dir_type(node, di);
4257 if (imode_to_type(mode) != filetype)
4258 goto next;
4260 if (cur + sizeof(*di) + name_len > total ||
4261 name_len > BTRFS_NAME_LEN) {
4262 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4263 root->objectid,
4264 key->type == BTRFS_DIR_ITEM_KEY ?
4265 "DIR_ITEM" : "DIR_INDEX",
4266 key->objectid, key->offset, name_len);
4268 if (cur + sizeof(*di) > total)
4269 break;
4270 len = min_t(u32, total - cur - sizeof(*di),
4271 BTRFS_NAME_LEN);
4272 } else {
4273 len = name_len;
4276 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4277 if (len != namelen || strncmp(namebuf, name, len))
4278 goto next;
4280 ret = 0;
4281 goto out;
4282 next:
4283 len = sizeof(*di) + name_len + data_len;
4284 di = (struct btrfs_dir_item *)((char *)di + len);
4285 cur += len;
4287 if (ret == DIR_ITEM_MISMATCH)
4288 error(
4289 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4290 root->objectid,
4291 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4292 ref_key->objectid, ref_key->offset,
4293 key->type == BTRFS_DIR_ITEM_KEY ?
4294 "DIR_ITEM" : "DIR_INDEX",
4295 key->objectid, key->offset, namelen, name,
4296 imode_to_type(mode));
4297 out:
4298 btrfs_release_path(&path);
4299 return ret;
4303 * Traverse the given INODE_REF and call find_dir_item() to find related
4304 * DIR_ITEM/DIR_INDEX.
4306 * @root: the root of the fs/file tree
4307 * @ref_key: the key of the INODE_REF
4308 * @refs: the count of INODE_REF
4309 * @mode: the st_mode of INODE_ITEM
4311 * Return 0 if no error occurred.
4313 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4314 struct extent_buffer *node, int slot, u64 *refs,
4315 int mode)
4317 struct btrfs_key key;
4318 struct btrfs_inode_ref *ref;
4319 char namebuf[BTRFS_NAME_LEN] = {0};
4320 u32 total;
4321 u32 cur = 0;
4322 u32 len;
4323 u32 name_len;
4324 u64 index;
4325 int ret, err = 0;
4327 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4328 total = btrfs_item_size_nr(node, slot);
4330 next:
4331 /* Update inode ref count */
4332 (*refs)++;
4334 index = btrfs_inode_ref_index(node, ref);
4335 name_len = btrfs_inode_ref_name_len(node, ref);
4336 if (cur + sizeof(*ref) + name_len > total ||
4337 name_len > BTRFS_NAME_LEN) {
4338 warning("root %llu INODE_REF[%llu %llu] name too long",
4339 root->objectid, ref_key->objectid, ref_key->offset);
4341 if (total < cur + sizeof(*ref))
4342 goto out;
4343 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4344 } else {
4345 len = name_len;
4348 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4350 /* Check root dir ref name */
4351 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4352 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4353 root->objectid, ref_key->objectid, ref_key->offset,
4354 namebuf);
4355 err |= ROOT_DIR_ERROR;
4358 /* Find related DIR_INDEX */
4359 key.objectid = ref_key->offset;
4360 key.type = BTRFS_DIR_INDEX_KEY;
4361 key.offset = index;
4362 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4363 err |= ret;
4365 /* Find related dir_item */
4366 key.objectid = ref_key->offset;
4367 key.type = BTRFS_DIR_ITEM_KEY;
4368 key.offset = btrfs_name_hash(namebuf, len);
4369 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4370 err |= ret;
4372 len = sizeof(*ref) + name_len;
4373 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4374 cur += len;
4375 if (cur < total)
4376 goto next;
4378 out:
4379 return err;
4383 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4384 * DIR_ITEM/DIR_INDEX.
4386 * @root: the root of the fs/file tree
4387 * @ref_key: the key of the INODE_EXTREF
4388 * @refs: the count of INODE_EXTREF
4389 * @mode: the st_mode of INODE_ITEM
4391 * Return 0 if no error occurred.
4393 static int check_inode_extref(struct btrfs_root *root,
4394 struct btrfs_key *ref_key,
4395 struct extent_buffer *node, int slot, u64 *refs,
4396 int mode)
4398 struct btrfs_key key;
4399 struct btrfs_inode_extref *extref;
4400 char namebuf[BTRFS_NAME_LEN] = {0};
4401 u32 total;
4402 u32 cur = 0;
4403 u32 len;
4404 u32 name_len;
4405 u64 index;
4406 u64 parent;
4407 int ret;
4408 int err = 0;
4410 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4411 total = btrfs_item_size_nr(node, slot);
4413 next:
4414 /* update inode ref count */
4415 (*refs)++;
4416 name_len = btrfs_inode_extref_name_len(node, extref);
4417 index = btrfs_inode_extref_index(node, extref);
4418 parent = btrfs_inode_extref_parent(node, extref);
4419 if (name_len <= BTRFS_NAME_LEN) {
4420 len = name_len;
4421 } else {
4422 len = BTRFS_NAME_LEN;
4423 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4424 root->objectid, ref_key->objectid, ref_key->offset);
4426 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4428 /* Check root dir ref name */
4429 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4430 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4431 root->objectid, ref_key->objectid, ref_key->offset,
4432 namebuf);
4433 err |= ROOT_DIR_ERROR;
4436 /* find related dir_index */
4437 key.objectid = parent;
4438 key.type = BTRFS_DIR_INDEX_KEY;
4439 key.offset = index;
4440 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4441 err |= ret;
4443 /* find related dir_item */
4444 key.objectid = parent;
4445 key.type = BTRFS_DIR_ITEM_KEY;
4446 key.offset = btrfs_name_hash(namebuf, len);
4447 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4448 err |= ret;
4450 len = sizeof(*extref) + name_len;
4451 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4452 cur += len;
4454 if (cur < total)
4455 goto next;
4457 return err;
4461 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4462 * DIR_ITEM/DIR_INDEX match.
4464 * @root: the root of the fs/file tree
4465 * @key: the key of the INODE_REF/INODE_EXTREF
4466 * @name: the name in the INODE_REF/INODE_EXTREF
4467 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4468 * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4469 * to (u64)-1
4470 * @ext_ref: the EXTENDED_IREF feature
4472 * Return 0 if no error occurred.
4473 * Return >0 for error bitmap
4475 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4476 char *name, int namelen, u64 index,
4477 unsigned int ext_ref)
4479 struct btrfs_path path;
4480 struct btrfs_inode_ref *ref;
4481 struct btrfs_inode_extref *extref;
4482 struct extent_buffer *node;
4483 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4484 u32 total;
4485 u32 cur = 0;
4486 u32 len;
4487 u32 ref_namelen;
4488 u64 ref_index;
4489 u64 parent;
4490 u64 dir_id;
4491 int slot;
4492 int ret;
4494 btrfs_init_path(&path);
4495 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4496 if (ret) {
4497 ret = INODE_REF_MISSING;
4498 goto extref;
4501 node = path.nodes[0];
4502 slot = path.slots[0];
4504 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4505 total = btrfs_item_size_nr(node, slot);
4507 /* Iterate all entry of INODE_REF */
4508 while (cur < total) {
4509 ret = INODE_REF_MISSING;
4511 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4512 ref_index = btrfs_inode_ref_index(node, ref);
4513 if (index != (u64)-1 && index != ref_index)
4514 goto next_ref;
4516 if (cur + sizeof(*ref) + ref_namelen > total ||
4517 ref_namelen > BTRFS_NAME_LEN) {
4518 warning("root %llu INODE %s[%llu %llu] name too long",
4519 root->objectid,
4520 key->type == BTRFS_INODE_REF_KEY ?
4521 "REF" : "EXTREF",
4522 key->objectid, key->offset);
4524 if (cur + sizeof(*ref) > total)
4525 break;
4526 len = min_t(u32, total - cur - sizeof(*ref),
4527 BTRFS_NAME_LEN);
4528 } else {
4529 len = ref_namelen;
4532 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4533 len);
4535 if (len != namelen || strncmp(ref_namebuf, name, len))
4536 goto next_ref;
4538 ret = 0;
4539 goto out;
4540 next_ref:
4541 len = sizeof(*ref) + ref_namelen;
4542 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4543 cur += len;
4546 extref:
4547 /* Skip if not support EXTENDED_IREF feature */
4548 if (!ext_ref)
4549 goto out;
4551 btrfs_release_path(&path);
4552 btrfs_init_path(&path);
4554 dir_id = key->offset;
4555 key->type = BTRFS_INODE_EXTREF_KEY;
4556 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4558 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4559 if (ret) {
4560 ret = INODE_REF_MISSING;
4561 goto out;
4564 node = path.nodes[0];
4565 slot = path.slots[0];
4567 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4568 cur = 0;
4569 total = btrfs_item_size_nr(node, slot);
4571 /* Iterate all entry of INODE_EXTREF */
4572 while (cur < total) {
4573 ret = INODE_REF_MISSING;
4575 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4576 ref_index = btrfs_inode_extref_index(node, extref);
4577 parent = btrfs_inode_extref_parent(node, extref);
4578 if (index != (u64)-1 && index != ref_index)
4579 goto next_extref;
4581 if (parent != dir_id)
4582 goto next_extref;
4584 if (ref_namelen <= BTRFS_NAME_LEN) {
4585 len = ref_namelen;
4586 } else {
4587 len = BTRFS_NAME_LEN;
4588 warning("root %llu INODE %s[%llu %llu] name too long",
4589 root->objectid,
4590 key->type == BTRFS_INODE_REF_KEY ?
4591 "REF" : "EXTREF",
4592 key->objectid, key->offset);
4594 read_extent_buffer(node, ref_namebuf,
4595 (unsigned long)(extref + 1), len);
4597 if (len != namelen || strncmp(ref_namebuf, name, len))
4598 goto next_extref;
4600 ret = 0;
4601 goto out;
4603 next_extref:
4604 len = sizeof(*extref) + ref_namelen;
4605 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4606 cur += len;
4609 out:
4610 btrfs_release_path(&path);
4611 return ret;
4615 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4616 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4618 * @root: the root of the fs/file tree
4619 * @key: the key of the INODE_REF/INODE_EXTREF
4620 * @size: the st_size of the INODE_ITEM
4621 * @ext_ref: the EXTENDED_IREF feature
4623 * Return 0 if no error occurred.
4625 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4626 struct extent_buffer *node, int slot, u64 *size,
4627 unsigned int ext_ref)
4629 struct btrfs_dir_item *di;
4630 struct btrfs_inode_item *ii;
4631 struct btrfs_path path;
4632 struct btrfs_key location;
4633 char namebuf[BTRFS_NAME_LEN] = {0};
4634 u32 total;
4635 u32 cur = 0;
4636 u32 len;
4637 u32 name_len;
4638 u32 data_len;
4639 u8 filetype;
4640 u32 mode;
4641 u64 index;
4642 int ret;
4643 int err = 0;
4646 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4647 * ignore index check.
4649 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4651 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4652 total = btrfs_item_size_nr(node, slot);
4654 while (cur < total) {
4655 data_len = btrfs_dir_data_len(node, di);
4656 if (data_len)
4657 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4658 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4659 "DIR_ITEM" : "DIR_INDEX",
4660 key->objectid, key->offset, data_len);
4662 name_len = btrfs_dir_name_len(node, di);
4663 if (cur + sizeof(*di) + name_len > total ||
4664 name_len > BTRFS_NAME_LEN) {
4665 warning("root %llu %s[%llu %llu] name too long",
4666 root->objectid,
4667 key->type == BTRFS_DIR_ITEM_KEY ?
4668 "DIR_ITEM" : "DIR_INDEX",
4669 key->objectid, key->offset);
4671 if (cur + sizeof(*di) > total)
4672 break;
4673 len = min_t(u32, total - cur - sizeof(*di),
4674 BTRFS_NAME_LEN);
4675 } else {
4676 len = name_len;
4678 (*size) += name_len;
4680 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4681 filetype = btrfs_dir_type(node, di);
4683 if (key->type == BTRFS_DIR_ITEM_KEY &&
4684 key->offset != btrfs_name_hash(namebuf, len)) {
4685 err |= -EIO;
4686 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4687 root->objectid, key->objectid, key->offset,
4688 namebuf, len, filetype, key->offset,
4689 btrfs_name_hash(namebuf, len));
4692 btrfs_init_path(&path);
4693 btrfs_dir_item_key_to_cpu(node, di, &location);
4695 /* Ignore related ROOT_ITEM check */
4696 if (location.type == BTRFS_ROOT_ITEM_KEY)
4697 goto next;
4699 /* Check relative INODE_ITEM(existence/filetype) */
4700 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4701 if (ret) {
4702 err |= INODE_ITEM_MISSING;
4703 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4704 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4705 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4706 key->offset, location.objectid, name_len,
4707 namebuf, filetype);
4708 goto next;
4711 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4712 struct btrfs_inode_item);
4713 mode = btrfs_inode_mode(path.nodes[0], ii);
4715 if (imode_to_type(mode) != filetype) {
4716 err |= INODE_ITEM_MISMATCH;
4717 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4718 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4719 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4720 key->offset, name_len, namebuf, filetype);
4723 /* Check relative INODE_REF/INODE_EXTREF */
4724 location.type = BTRFS_INODE_REF_KEY;
4725 location.offset = key->objectid;
4726 ret = find_inode_ref(root, &location, namebuf, len,
4727 index, ext_ref);
4728 err |= ret;
4729 if (ret & INODE_REF_MISSING)
4730 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4731 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4732 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4733 key->offset, name_len, namebuf, filetype);
4735 next:
4736 btrfs_release_path(&path);
4737 len = sizeof(*di) + name_len + data_len;
4738 di = (struct btrfs_dir_item *)((char *)di + len);
4739 cur += len;
4741 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4742 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4743 root->objectid, key->objectid, key->offset);
4744 break;
4748 return err;
4752 * Check file extent datasum/hole, update the size of the file extents,
4753 * check and update the last offset of the file extent.
4755 * @root: the root of fs/file tree.
4756 * @fkey: the key of the file extent.
4757 * @nodatasum: INODE_NODATASUM feature.
4758 * @size: the sum of all EXTENT_DATA items size for this inode.
4759 * @end: the offset of the last extent.
4761 * Return 0 if no error occurred.
4763 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4764 struct extent_buffer *node, int slot,
4765 unsigned int nodatasum, u64 *size, u64 *end)
4767 struct btrfs_file_extent_item *fi;
4768 u64 disk_bytenr;
4769 u64 disk_num_bytes;
4770 u64 extent_num_bytes;
4771 u64 extent_offset;
4772 u64 csum_found; /* In byte size, sectorsize aligned */
4773 u64 search_start; /* Logical range start we search for csum */
4774 u64 search_len; /* Logical range len we search for csum */
4775 unsigned int extent_type;
4776 unsigned int is_hole;
4777 int compressed = 0;
4778 int ret;
4779 int err = 0;
4781 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4783 /* Check inline extent */
4784 extent_type = btrfs_file_extent_type(node, fi);
4785 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4786 struct btrfs_item *e = btrfs_item_nr(slot);
4787 u32 item_inline_len;
4789 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4790 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4791 compressed = btrfs_file_extent_compression(node, fi);
4792 if (extent_num_bytes == 0) {
4793 error(
4794 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4795 root->objectid, fkey->objectid, fkey->offset);
4796 err |= FILE_EXTENT_ERROR;
4798 if (!compressed && extent_num_bytes != item_inline_len) {
4799 error(
4800 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4801 root->objectid, fkey->objectid, fkey->offset,
4802 extent_num_bytes, item_inline_len);
4803 err |= FILE_EXTENT_ERROR;
4805 *end += extent_num_bytes;
4806 *size += extent_num_bytes;
4807 return err;
4810 /* Check extent type */
4811 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4812 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4813 err |= FILE_EXTENT_ERROR;
4814 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4815 root->objectid, fkey->objectid, fkey->offset);
4816 return err;
4819 /* Check REG_EXTENT/PREALLOC_EXTENT */
4820 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4821 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4822 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4823 extent_offset = btrfs_file_extent_offset(node, fi);
4824 compressed = btrfs_file_extent_compression(node, fi);
4825 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4828 * Check EXTENT_DATA csum
4830 * For plain (uncompressed) extent, we should only check the range
4831 * we're referring to, as it's possible that part of prealloc extent
4832 * has been written, and has csum:
4834 * |<--- Original large preallocated extent A ---->|
4835 * |<- Prealloc File Extent ->|<- Regular Extent ->|
4836 * No csum Has csum
4838 * For compressed extent, we should check the whole range.
4840 if (!compressed) {
4841 search_start = disk_bytenr + extent_offset;
4842 search_len = extent_num_bytes;
4843 } else {
4844 search_start = disk_bytenr;
4845 search_len = disk_num_bytes;
4847 ret = count_csum_range(root, search_start, search_len, &csum_found);
4848 if (csum_found > 0 && nodatasum) {
4849 err |= ODD_CSUM_ITEM;
4850 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4851 root->objectid, fkey->objectid, fkey->offset);
4852 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4853 !is_hole && (ret < 0 || csum_found < search_len)) {
4854 err |= CSUM_ITEM_MISSING;
4855 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4856 root->objectid, fkey->objectid, fkey->offset,
4857 csum_found, search_len);
4858 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4859 err |= ODD_CSUM_ITEM;
4860 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4861 root->objectid, fkey->objectid, fkey->offset, csum_found);
4864 /* Check EXTENT_DATA hole */
4865 if (!no_holes && *end != fkey->offset) {
4866 err |= FILE_EXTENT_ERROR;
4867 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4868 root->objectid, fkey->objectid, fkey->offset);
4871 *end += extent_num_bytes;
4872 if (!is_hole)
4873 *size += extent_num_bytes;
4875 return err;
4879 * Check INODE_ITEM and related ITEMs (the same inode number)
4880 * 1. check link count
4881 * 2. check inode ref/extref
4882 * 3. check dir item/index
4884 * @ext_ref: the EXTENDED_IREF feature
4886 * Return 0 if no error occurred.
4887 * Return >0 for error or hit the traversal is done(by error bitmap)
4889 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4890 unsigned int ext_ref)
4892 struct extent_buffer *node;
4893 struct btrfs_inode_item *ii;
4894 struct btrfs_key key;
4895 u64 inode_id;
4896 u32 mode;
4897 u64 nlink;
4898 u64 nbytes;
4899 u64 isize;
4900 u64 size = 0;
4901 u64 refs = 0;
4902 u64 extent_end = 0;
4903 u64 extent_size = 0;
4904 unsigned int dir;
4905 unsigned int nodatasum;
4906 int slot;
4907 int ret;
4908 int err = 0;
4910 node = path->nodes[0];
4911 slot = path->slots[0];
4913 btrfs_item_key_to_cpu(node, &key, slot);
4914 inode_id = key.objectid;
4916 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4917 ret = btrfs_next_item(root, path);
4918 if (ret > 0)
4919 err |= LAST_ITEM;
4920 return err;
4923 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4924 isize = btrfs_inode_size(node, ii);
4925 nbytes = btrfs_inode_nbytes(node, ii);
4926 mode = btrfs_inode_mode(node, ii);
4927 dir = imode_to_type(mode) == BTRFS_FT_DIR;
4928 nlink = btrfs_inode_nlink(node, ii);
4929 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4931 while (1) {
4932 ret = btrfs_next_item(root, path);
4933 if (ret < 0) {
4934 /* out will fill 'err' rusing current statistics */
4935 goto out;
4936 } else if (ret > 0) {
4937 err |= LAST_ITEM;
4938 goto out;
4941 node = path->nodes[0];
4942 slot = path->slots[0];
4943 btrfs_item_key_to_cpu(node, &key, slot);
4944 if (key.objectid != inode_id)
4945 goto out;
4947 switch (key.type) {
4948 case BTRFS_INODE_REF_KEY:
4949 ret = check_inode_ref(root, &key, node, slot, &refs,
4950 mode);
4951 err |= ret;
4952 break;
4953 case BTRFS_INODE_EXTREF_KEY:
4954 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4955 warning("root %llu EXTREF[%llu %llu] isn't supported",
4956 root->objectid, key.objectid,
4957 key.offset);
4958 ret = check_inode_extref(root, &key, node, slot, &refs,
4959 mode);
4960 err |= ret;
4961 break;
4962 case BTRFS_DIR_ITEM_KEY:
4963 case BTRFS_DIR_INDEX_KEY:
4964 if (!dir) {
4965 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4966 root->objectid, inode_id,
4967 imode_to_type(mode), key.objectid,
4968 key.offset);
4970 ret = check_dir_item(root, &key, node, slot, &size,
4971 ext_ref);
4972 err |= ret;
4973 break;
4974 case BTRFS_EXTENT_DATA_KEY:
4975 if (dir) {
4976 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4977 root->objectid, inode_id, key.objectid,
4978 key.offset);
4980 ret = check_file_extent(root, &key, node, slot,
4981 nodatasum, &extent_size,
4982 &extent_end);
4983 err |= ret;
4984 break;
4985 case BTRFS_XATTR_ITEM_KEY:
4986 break;
4987 default:
4988 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4989 key.objectid, key.type, key.offset);
4993 out:
4994 /* verify INODE_ITEM nlink/isize/nbytes */
4995 if (dir) {
4996 if (nlink != 1) {
4997 err |= LINK_COUNT_ERROR;
4998 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4999 root->objectid, inode_id, nlink);
5003 * Just a warning, as dir inode nbytes is just an
5004 * instructive value.
5006 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5007 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5008 root->objectid, inode_id,
5009 root->fs_info->nodesize);
5012 if (isize != size) {
5013 err |= ISIZE_ERROR;
5014 error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
5015 root->objectid, inode_id, isize, size);
5017 } else {
5018 if (nlink != refs) {
5019 err |= LINK_COUNT_ERROR;
5020 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5021 root->objectid, inode_id, nlink, refs);
5022 } else if (!nlink) {
5023 err |= ORPHAN_ITEM;
5026 if (!nbytes && !no_holes && extent_end < isize) {
5027 err |= NBYTES_ERROR;
5028 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5029 root->objectid, inode_id, isize);
5032 if (nbytes != extent_size) {
5033 err |= NBYTES_ERROR;
5034 error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
5035 root->objectid, inode_id, nbytes, extent_size);
5039 return err;
5042 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5044 struct btrfs_path path;
5045 struct btrfs_key key;
5046 int err = 0;
5047 int ret;
5049 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5050 key.type = BTRFS_INODE_ITEM_KEY;
5051 key.offset = 0;
5053 /* For root being dropped, we don't need to check first inode */
5054 if (btrfs_root_refs(&root->root_item) == 0 &&
5055 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5056 key.objectid)
5057 return 0;
5059 btrfs_init_path(&path);
5061 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5062 if (ret < 0)
5063 goto out;
5064 if (ret > 0) {
5065 ret = 0;
5066 err |= INODE_ITEM_MISSING;
5067 error("first inode item of root %llu is missing",
5068 root->objectid);
5071 err |= check_inode_item(root, &path, ext_ref);
5072 err &= ~LAST_ITEM;
5073 if (err && !ret)
5074 ret = -EIO;
5075 out:
5076 btrfs_release_path(&path);
5077 return ret;
5081 * Iterate all item on the tree and call check_inode_item() to check.
5083 * @root: the root of the tree to be checked.
5084 * @ext_ref: the EXTENDED_IREF feature
5086 * Return 0 if no error found.
5087 * Return <0 for error.
5089 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5091 struct btrfs_path path;
5092 struct node_refs nrefs;
5093 struct btrfs_root_item *root_item = &root->root_item;
5094 int ret;
5095 int level;
5096 int err = 0;
5099 * We need to manually check the first inode item(256)
5100 * As the following traversal function will only start from
5101 * the first inode item in the leaf, if inode item(256) is missing
5102 * we will just skip it forever.
5104 ret = check_fs_first_inode(root, ext_ref);
5105 if (ret < 0)
5106 return ret;
5108 memset(&nrefs, 0, sizeof(nrefs));
5109 level = btrfs_header_level(root->node);
5110 btrfs_init_path(&path);
5112 if (btrfs_root_refs(root_item) > 0 ||
5113 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5114 path.nodes[level] = root->node;
5115 path.slots[level] = 0;
5116 extent_buffer_get(root->node);
5117 } else {
5118 struct btrfs_key key;
5120 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5121 level = root_item->drop_level;
5122 path.lowest_level = level;
5123 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5124 if (ret < 0)
5125 goto out;
5126 ret = 0;
5129 while (1) {
5130 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5131 err |= !!ret;
5133 /* if ret is negative, walk shall stop */
5134 if (ret < 0) {
5135 ret = err;
5136 break;
5139 ret = walk_up_tree_v2(root, &path, &level);
5140 if (ret != 0) {
5141 /* Normal exit, reset ret to err */
5142 ret = err;
5143 break;
5147 out:
5148 btrfs_release_path(&path);
5149 return ret;
5153 * Find the relative ref for root_ref and root_backref.
5155 * @root: the root of the root tree.
5156 * @ref_key: the key of the root ref.
5158 * Return 0 if no error occurred.
5160 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5161 struct extent_buffer *node, int slot)
5163 struct btrfs_path path;
5164 struct btrfs_key key;
5165 struct btrfs_root_ref *ref;
5166 struct btrfs_root_ref *backref;
5167 char ref_name[BTRFS_NAME_LEN] = {0};
5168 char backref_name[BTRFS_NAME_LEN] = {0};
5169 u64 ref_dirid;
5170 u64 ref_seq;
5171 u32 ref_namelen;
5172 u64 backref_dirid;
5173 u64 backref_seq;
5174 u32 backref_namelen;
5175 u32 len;
5176 int ret;
5177 int err = 0;
5179 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5180 ref_dirid = btrfs_root_ref_dirid(node, ref);
5181 ref_seq = btrfs_root_ref_sequence(node, ref);
5182 ref_namelen = btrfs_root_ref_name_len(node, ref);
5184 if (ref_namelen <= BTRFS_NAME_LEN) {
5185 len = ref_namelen;
5186 } else {
5187 len = BTRFS_NAME_LEN;
5188 warning("%s[%llu %llu] ref_name too long",
5189 ref_key->type == BTRFS_ROOT_REF_KEY ?
5190 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5191 ref_key->offset);
5193 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5195 /* Find relative root_ref */
5196 key.objectid = ref_key->offset;
5197 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5198 key.offset = ref_key->objectid;
5200 btrfs_init_path(&path);
5201 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5202 if (ret) {
5203 err |= ROOT_REF_MISSING;
5204 error("%s[%llu %llu] couldn't find relative ref",
5205 ref_key->type == BTRFS_ROOT_REF_KEY ?
5206 "ROOT_REF" : "ROOT_BACKREF",
5207 ref_key->objectid, ref_key->offset);
5208 goto out;
5211 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5212 struct btrfs_root_ref);
5213 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5214 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5215 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5217 if (backref_namelen <= BTRFS_NAME_LEN) {
5218 len = backref_namelen;
5219 } else {
5220 len = BTRFS_NAME_LEN;
5221 warning("%s[%llu %llu] ref_name too long",
5222 key.type == BTRFS_ROOT_REF_KEY ?
5223 "ROOT_REF" : "ROOT_BACKREF",
5224 key.objectid, key.offset);
5226 read_extent_buffer(path.nodes[0], backref_name,
5227 (unsigned long)(backref + 1), len);
5229 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5230 ref_namelen != backref_namelen ||
5231 strncmp(ref_name, backref_name, len)) {
5232 err |= ROOT_REF_MISMATCH;
5233 error("%s[%llu %llu] mismatch relative ref",
5234 ref_key->type == BTRFS_ROOT_REF_KEY ?
5235 "ROOT_REF" : "ROOT_BACKREF",
5236 ref_key->objectid, ref_key->offset);
5238 out:
5239 btrfs_release_path(&path);
5240 return err;
5244 * Check all fs/file tree in low_memory mode.
5246 * 1. for fs tree root item, call check_fs_root_v2()
5247 * 2. for fs tree root ref/backref, call check_root_ref()
5249 * Return 0 if no error occurred.
5251 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5253 struct btrfs_root *tree_root = fs_info->tree_root;
5254 struct btrfs_root *cur_root = NULL;
5255 struct btrfs_path path;
5256 struct btrfs_key key;
5257 struct extent_buffer *node;
5258 unsigned int ext_ref;
5259 int slot;
5260 int ret;
5261 int err = 0;
5263 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5265 btrfs_init_path(&path);
5266 key.objectid = BTRFS_FS_TREE_OBJECTID;
5267 key.offset = 0;
5268 key.type = BTRFS_ROOT_ITEM_KEY;
5270 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5271 if (ret < 0) {
5272 err = ret;
5273 goto out;
5274 } else if (ret > 0) {
5275 err = -ENOENT;
5276 goto out;
5279 while (1) {
5280 node = path.nodes[0];
5281 slot = path.slots[0];
5282 btrfs_item_key_to_cpu(node, &key, slot);
5283 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5284 goto out;
5285 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5286 fs_root_objectid(key.objectid)) {
5287 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5288 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5289 &key);
5290 } else {
5291 key.offset = (u64)-1;
5292 cur_root = btrfs_read_fs_root(fs_info, &key);
5295 if (IS_ERR(cur_root)) {
5296 error("Fail to read fs/subvol tree: %lld",
5297 key.objectid);
5298 err = -EIO;
5299 goto next;
5302 ret = check_fs_root_v2(cur_root, ext_ref);
5303 err |= ret;
5305 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5306 btrfs_free_fs_root(cur_root);
5307 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5308 key.type == BTRFS_ROOT_BACKREF_KEY) {
5309 ret = check_root_ref(tree_root, &key, node, slot);
5310 err |= ret;
5312 next:
5313 ret = btrfs_next_item(tree_root, &path);
5314 if (ret > 0)
5315 goto out;
5316 if (ret < 0) {
5317 err = ret;
5318 goto out;
5322 out:
5323 btrfs_release_path(&path);
5324 return err;
5327 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5329 struct list_head *cur = rec->backrefs.next;
5330 struct extent_backref *back;
5331 struct tree_backref *tback;
5332 struct data_backref *dback;
5333 u64 found = 0;
5334 int err = 0;
5336 while(cur != &rec->backrefs) {
5337 back = to_extent_backref(cur);
5338 cur = cur->next;
5339 if (!back->found_extent_tree) {
5340 err = 1;
5341 if (!print_errs)
5342 goto out;
5343 if (back->is_data) {
5344 dback = to_data_backref(back);
5345 fprintf(stderr, "Backref %llu %s %llu"
5346 " owner %llu offset %llu num_refs %lu"
5347 " not found in extent tree\n",
5348 (unsigned long long)rec->start,
5349 back->full_backref ?
5350 "parent" : "root",
5351 back->full_backref ?
5352 (unsigned long long)dback->parent:
5353 (unsigned long long)dback->root,
5354 (unsigned long long)dback->owner,
5355 (unsigned long long)dback->offset,
5356 (unsigned long)dback->num_refs);
5357 } else {
5358 tback = to_tree_backref(back);
5359 fprintf(stderr, "Backref %llu parent %llu"
5360 " root %llu not found in extent tree\n",
5361 (unsigned long long)rec->start,
5362 (unsigned long long)tback->parent,
5363 (unsigned long long)tback->root);
5366 if (!back->is_data && !back->found_ref) {
5367 err = 1;
5368 if (!print_errs)
5369 goto out;
5370 tback = to_tree_backref(back);
5371 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5372 (unsigned long long)rec->start,
5373 back->full_backref ? "parent" : "root",
5374 back->full_backref ?
5375 (unsigned long long)tback->parent :
5376 (unsigned long long)tback->root, back);
5378 if (back->is_data) {
5379 dback = to_data_backref(back);
5380 if (dback->found_ref != dback->num_refs) {
5381 err = 1;
5382 if (!print_errs)
5383 goto out;
5384 fprintf(stderr, "Incorrect local backref count"
5385 " on %llu %s %llu owner %llu"
5386 " offset %llu found %u wanted %u back %p\n",
5387 (unsigned long long)rec->start,
5388 back->full_backref ?
5389 "parent" : "root",
5390 back->full_backref ?
5391 (unsigned long long)dback->parent:
5392 (unsigned long long)dback->root,
5393 (unsigned long long)dback->owner,
5394 (unsigned long long)dback->offset,
5395 dback->found_ref, dback->num_refs, back);
5397 if (dback->disk_bytenr != rec->start) {
5398 err = 1;
5399 if (!print_errs)
5400 goto out;
5401 fprintf(stderr, "Backref disk bytenr does not"
5402 " match extent record, bytenr=%llu, "
5403 "ref bytenr=%llu\n",
5404 (unsigned long long)rec->start,
5405 (unsigned long long)dback->disk_bytenr);
5408 if (dback->bytes != rec->nr) {
5409 err = 1;
5410 if (!print_errs)
5411 goto out;
5412 fprintf(stderr, "Backref bytes do not match "
5413 "extent backref, bytenr=%llu, ref "
5414 "bytes=%llu, backref bytes=%llu\n",
5415 (unsigned long long)rec->start,
5416 (unsigned long long)rec->nr,
5417 (unsigned long long)dback->bytes);
5420 if (!back->is_data) {
5421 found += 1;
5422 } else {
5423 dback = to_data_backref(back);
5424 found += dback->found_ref;
5427 if (found != rec->refs) {
5428 err = 1;
5429 if (!print_errs)
5430 goto out;
5431 fprintf(stderr, "Incorrect global backref count "
5432 "on %llu found %llu wanted %llu\n",
5433 (unsigned long long)rec->start,
5434 (unsigned long long)found,
5435 (unsigned long long)rec->refs);
5437 out:
5438 return err;
5441 static int free_all_extent_backrefs(struct extent_record *rec)
5443 struct extent_backref *back;
5444 struct list_head *cur;
5445 while (!list_empty(&rec->backrefs)) {
5446 cur = rec->backrefs.next;
5447 back = to_extent_backref(cur);
5448 list_del(cur);
5449 free(back);
5451 return 0;
5454 static void free_extent_record_cache(struct cache_tree *extent_cache)
5456 struct cache_extent *cache;
5457 struct extent_record *rec;
5459 while (1) {
5460 cache = first_cache_extent(extent_cache);
5461 if (!cache)
5462 break;
5463 rec = container_of(cache, struct extent_record, cache);
5464 remove_cache_extent(extent_cache, cache);
5465 free_all_extent_backrefs(rec);
5466 free(rec);
5470 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5471 struct extent_record *rec)
5473 if (rec->content_checked && rec->owner_ref_checked &&
5474 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5475 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5476 !rec->bad_full_backref && !rec->crossing_stripes &&
5477 !rec->wrong_chunk_type) {
5478 remove_cache_extent(extent_cache, &rec->cache);
5479 free_all_extent_backrefs(rec);
5480 list_del_init(&rec->list);
5481 free(rec);
5483 return 0;
5486 static int check_owner_ref(struct btrfs_root *root,
5487 struct extent_record *rec,
5488 struct extent_buffer *buf)
5490 struct extent_backref *node;
5491 struct tree_backref *back;
5492 struct btrfs_root *ref_root;
5493 struct btrfs_key key;
5494 struct btrfs_path path;
5495 struct extent_buffer *parent;
5496 int level;
5497 int found = 0;
5498 int ret;
5500 list_for_each_entry(node, &rec->backrefs, list) {
5501 if (node->is_data)
5502 continue;
5503 if (!node->found_ref)
5504 continue;
5505 if (node->full_backref)
5506 continue;
5507 back = to_tree_backref(node);
5508 if (btrfs_header_owner(buf) == back->root)
5509 return 0;
5511 BUG_ON(rec->is_root);
5513 /* try to find the block by search corresponding fs tree */
5514 key.objectid = btrfs_header_owner(buf);
5515 key.type = BTRFS_ROOT_ITEM_KEY;
5516 key.offset = (u64)-1;
5518 ref_root = btrfs_read_fs_root(root->fs_info, &key);
5519 if (IS_ERR(ref_root))
5520 return 1;
5522 level = btrfs_header_level(buf);
5523 if (level == 0)
5524 btrfs_item_key_to_cpu(buf, &key, 0);
5525 else
5526 btrfs_node_key_to_cpu(buf, &key, 0);
5528 btrfs_init_path(&path);
5529 path.lowest_level = level + 1;
5530 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5531 if (ret < 0)
5532 return 0;
5534 parent = path.nodes[level + 1];
5535 if (parent && buf->start == btrfs_node_blockptr(parent,
5536 path.slots[level + 1]))
5537 found = 1;
5539 btrfs_release_path(&path);
5540 return found ? 0 : 1;
5543 static int is_extent_tree_record(struct extent_record *rec)
5545 struct list_head *cur = rec->backrefs.next;
5546 struct extent_backref *node;
5547 struct tree_backref *back;
5548 int is_extent = 0;
5550 while(cur != &rec->backrefs) {
5551 node = to_extent_backref(cur);
5552 cur = cur->next;
5553 if (node->is_data)
5554 return 0;
5555 back = to_tree_backref(node);
5556 if (node->full_backref)
5557 return 0;
5558 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5559 is_extent = 1;
5561 return is_extent;
5565 static int record_bad_block_io(struct btrfs_fs_info *info,
5566 struct cache_tree *extent_cache,
5567 u64 start, u64 len)
5569 struct extent_record *rec;
5570 struct cache_extent *cache;
5571 struct btrfs_key key;
5573 cache = lookup_cache_extent(extent_cache, start, len);
5574 if (!cache)
5575 return 0;
5577 rec = container_of(cache, struct extent_record, cache);
5578 if (!is_extent_tree_record(rec))
5579 return 0;
5581 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5582 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5585 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5586 struct extent_buffer *buf, int slot)
5588 if (btrfs_header_level(buf)) {
5589 struct btrfs_key_ptr ptr1, ptr2;
5591 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5592 sizeof(struct btrfs_key_ptr));
5593 read_extent_buffer(buf, &ptr2,
5594 btrfs_node_key_ptr_offset(slot + 1),
5595 sizeof(struct btrfs_key_ptr));
5596 write_extent_buffer(buf, &ptr1,
5597 btrfs_node_key_ptr_offset(slot + 1),
5598 sizeof(struct btrfs_key_ptr));
5599 write_extent_buffer(buf, &ptr2,
5600 btrfs_node_key_ptr_offset(slot),
5601 sizeof(struct btrfs_key_ptr));
5602 if (slot == 0) {
5603 struct btrfs_disk_key key;
5604 btrfs_node_key(buf, &key, 0);
5605 btrfs_fixup_low_keys(root, path, &key,
5606 btrfs_header_level(buf) + 1);
5608 } else {
5609 struct btrfs_item *item1, *item2;
5610 struct btrfs_key k1, k2;
5611 char *item1_data, *item2_data;
5612 u32 item1_offset, item2_offset, item1_size, item2_size;
5614 item1 = btrfs_item_nr(slot);
5615 item2 = btrfs_item_nr(slot + 1);
5616 btrfs_item_key_to_cpu(buf, &k1, slot);
5617 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5618 item1_offset = btrfs_item_offset(buf, item1);
5619 item2_offset = btrfs_item_offset(buf, item2);
5620 item1_size = btrfs_item_size(buf, item1);
5621 item2_size = btrfs_item_size(buf, item2);
5623 item1_data = malloc(item1_size);
5624 if (!item1_data)
5625 return -ENOMEM;
5626 item2_data = malloc(item2_size);
5627 if (!item2_data) {
5628 free(item1_data);
5629 return -ENOMEM;
5632 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5633 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5635 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5636 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5637 free(item1_data);
5638 free(item2_data);
5640 btrfs_set_item_offset(buf, item1, item2_offset);
5641 btrfs_set_item_offset(buf, item2, item1_offset);
5642 btrfs_set_item_size(buf, item1, item2_size);
5643 btrfs_set_item_size(buf, item2, item1_size);
5645 path->slots[0] = slot;
5646 btrfs_set_item_key_unsafe(root, path, &k2);
5647 path->slots[0] = slot + 1;
5648 btrfs_set_item_key_unsafe(root, path, &k1);
5650 return 0;
5653 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5655 struct extent_buffer *buf;
5656 struct btrfs_key k1, k2;
5657 int i;
5658 int level = path->lowest_level;
5659 int ret = -EIO;
5661 buf = path->nodes[level];
5662 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5663 if (level) {
5664 btrfs_node_key_to_cpu(buf, &k1, i);
5665 btrfs_node_key_to_cpu(buf, &k2, i + 1);
5666 } else {
5667 btrfs_item_key_to_cpu(buf, &k1, i);
5668 btrfs_item_key_to_cpu(buf, &k2, i + 1);
5670 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5671 continue;
5672 ret = swap_values(root, path, buf, i);
5673 if (ret)
5674 break;
5675 btrfs_mark_buffer_dirty(buf);
5676 i = 0;
5678 return ret;
5681 static int delete_bogus_item(struct btrfs_root *root,
5682 struct btrfs_path *path,
5683 struct extent_buffer *buf, int slot)
5685 struct btrfs_key key;
5686 int nritems = btrfs_header_nritems(buf);
5688 btrfs_item_key_to_cpu(buf, &key, slot);
5690 /* These are all the keys we can deal with missing. */
5691 if (key.type != BTRFS_DIR_INDEX_KEY &&
5692 key.type != BTRFS_EXTENT_ITEM_KEY &&
5693 key.type != BTRFS_METADATA_ITEM_KEY &&
5694 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5695 key.type != BTRFS_EXTENT_DATA_REF_KEY)
5696 return -1;
5698 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5699 (unsigned long long)key.objectid, key.type,
5700 (unsigned long long)key.offset, slot, buf->start);
5701 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5702 btrfs_item_nr_offset(slot + 1),
5703 sizeof(struct btrfs_item) *
5704 (nritems - slot - 1));
5705 btrfs_set_header_nritems(buf, nritems - 1);
5706 if (slot == 0) {
5707 struct btrfs_disk_key disk_key;
5709 btrfs_item_key(buf, &disk_key, 0);
5710 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5712 btrfs_mark_buffer_dirty(buf);
5713 return 0;
5716 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5718 struct extent_buffer *buf;
5719 int i;
5720 int ret = 0;
5722 /* We should only get this for leaves */
5723 BUG_ON(path->lowest_level);
5724 buf = path->nodes[0];
5725 again:
5726 for (i = 0; i < btrfs_header_nritems(buf); i++) {
5727 unsigned int shift = 0, offset;
5729 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5730 BTRFS_LEAF_DATA_SIZE(root)) {
5731 if (btrfs_item_end_nr(buf, i) >
5732 BTRFS_LEAF_DATA_SIZE(root)) {
5733 ret = delete_bogus_item(root, path, buf, i);
5734 if (!ret)
5735 goto again;
5736 fprintf(stderr, "item is off the end of the "
5737 "leaf, can't fix\n");
5738 ret = -EIO;
5739 break;
5741 shift = BTRFS_LEAF_DATA_SIZE(root) -
5742 btrfs_item_end_nr(buf, i);
5743 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5744 btrfs_item_offset_nr(buf, i - 1)) {
5745 if (btrfs_item_end_nr(buf, i) >
5746 btrfs_item_offset_nr(buf, i - 1)) {
5747 ret = delete_bogus_item(root, path, buf, i);
5748 if (!ret)
5749 goto again;
5750 fprintf(stderr, "items overlap, can't fix\n");
5751 ret = -EIO;
5752 break;
5754 shift = btrfs_item_offset_nr(buf, i - 1) -
5755 btrfs_item_end_nr(buf, i);
5757 if (!shift)
5758 continue;
5760 printf("Shifting item nr %d by %u bytes in block %llu\n",
5761 i, shift, (unsigned long long)buf->start);
5762 offset = btrfs_item_offset_nr(buf, i);
5763 memmove_extent_buffer(buf,
5764 btrfs_leaf_data(buf) + offset + shift,
5765 btrfs_leaf_data(buf) + offset,
5766 btrfs_item_size_nr(buf, i));
5767 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5768 offset + shift);
5769 btrfs_mark_buffer_dirty(buf);
5773 * We may have moved things, in which case we want to exit so we don't
5774 * write those changes out. Once we have proper abort functionality in
5775 * progs this can be changed to something nicer.
5777 BUG_ON(ret);
5778 return ret;
5782 * Attempt to fix basic block failures. If we can't fix it for whatever reason
5783 * then just return -EIO.
5785 static int try_to_fix_bad_block(struct btrfs_root *root,
5786 struct extent_buffer *buf,
5787 enum btrfs_tree_block_status status)
5789 struct btrfs_trans_handle *trans;
5790 struct ulist *roots;
5791 struct ulist_node *node;
5792 struct btrfs_root *search_root;
5793 struct btrfs_path path;
5794 struct ulist_iterator iter;
5795 struct btrfs_key root_key, key;
5796 int ret;
5798 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5799 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5800 return -EIO;
5802 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5803 if (ret)
5804 return -EIO;
5806 btrfs_init_path(&path);
5807 ULIST_ITER_INIT(&iter);
5808 while ((node = ulist_next(roots, &iter))) {
5809 root_key.objectid = node->val;
5810 root_key.type = BTRFS_ROOT_ITEM_KEY;
5811 root_key.offset = (u64)-1;
5813 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5814 if (IS_ERR(root)) {
5815 ret = -EIO;
5816 break;
5820 trans = btrfs_start_transaction(search_root, 0);
5821 if (IS_ERR(trans)) {
5822 ret = PTR_ERR(trans);
5823 break;
5826 path.lowest_level = btrfs_header_level(buf);
5827 path.skip_check_block = 1;
5828 if (path.lowest_level)
5829 btrfs_node_key_to_cpu(buf, &key, 0);
5830 else
5831 btrfs_item_key_to_cpu(buf, &key, 0);
5832 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5833 if (ret) {
5834 ret = -EIO;
5835 btrfs_commit_transaction(trans, search_root);
5836 break;
5838 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5839 ret = fix_key_order(search_root, &path);
5840 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5841 ret = fix_item_offset(search_root, &path);
5842 if (ret) {
5843 btrfs_commit_transaction(trans, search_root);
5844 break;
5846 btrfs_release_path(&path);
5847 btrfs_commit_transaction(trans, search_root);
5849 ulist_free(roots);
5850 btrfs_release_path(&path);
5851 return ret;
5854 static int check_block(struct btrfs_root *root,
5855 struct cache_tree *extent_cache,
5856 struct extent_buffer *buf, u64 flags)
5858 struct extent_record *rec;
5859 struct cache_extent *cache;
5860 struct btrfs_key key;
5861 enum btrfs_tree_block_status status;
5862 int ret = 0;
5863 int level;
5865 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5866 if (!cache)
5867 return 1;
5868 rec = container_of(cache, struct extent_record, cache);
5869 rec->generation = btrfs_header_generation(buf);
5871 level = btrfs_header_level(buf);
5872 if (btrfs_header_nritems(buf) > 0) {
5874 if (level == 0)
5875 btrfs_item_key_to_cpu(buf, &key, 0);
5876 else
5877 btrfs_node_key_to_cpu(buf, &key, 0);
5879 rec->info_objectid = key.objectid;
5881 rec->info_level = level;
5883 if (btrfs_is_leaf(buf))
5884 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5885 else
5886 status = btrfs_check_node(root, &rec->parent_key, buf);
5888 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5889 if (repair)
5890 status = try_to_fix_bad_block(root, buf, status);
5891 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5892 ret = -EIO;
5893 fprintf(stderr, "bad block %llu\n",
5894 (unsigned long long)buf->start);
5895 } else {
5897 * Signal to callers we need to start the scan over
5898 * again since we'll have cowed blocks.
5900 ret = -EAGAIN;
5902 } else {
5903 rec->content_checked = 1;
5904 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5905 rec->owner_ref_checked = 1;
5906 else {
5907 ret = check_owner_ref(root, rec, buf);
5908 if (!ret)
5909 rec->owner_ref_checked = 1;
5912 if (!ret)
5913 maybe_free_extent_rec(extent_cache, rec);
5914 return ret;
5917 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5918 u64 parent, u64 root)
5920 struct list_head *cur = rec->backrefs.next;
5921 struct extent_backref *node;
5922 struct tree_backref *back;
5924 while(cur != &rec->backrefs) {
5925 node = to_extent_backref(cur);
5926 cur = cur->next;
5927 if (node->is_data)
5928 continue;
5929 back = to_tree_backref(node);
5930 if (parent > 0) {
5931 if (!node->full_backref)
5932 continue;
5933 if (parent == back->parent)
5934 return back;
5935 } else {
5936 if (node->full_backref)
5937 continue;
5938 if (back->root == root)
5939 return back;
5942 return NULL;
5945 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5946 u64 parent, u64 root)
5948 struct tree_backref *ref = malloc(sizeof(*ref));
5950 if (!ref)
5951 return NULL;
5952 memset(&ref->node, 0, sizeof(ref->node));
5953 if (parent > 0) {
5954 ref->parent = parent;
5955 ref->node.full_backref = 1;
5956 } else {
5957 ref->root = root;
5958 ref->node.full_backref = 0;
5960 list_add_tail(&ref->node.list, &rec->backrefs);
5962 return ref;
5965 static struct data_backref *find_data_backref(struct extent_record *rec,
5966 u64 parent, u64 root,
5967 u64 owner, u64 offset,
5968 int found_ref,
5969 u64 disk_bytenr, u64 bytes)
5971 struct list_head *cur = rec->backrefs.next;
5972 struct extent_backref *node;
5973 struct data_backref *back;
5975 while(cur != &rec->backrefs) {
5976 node = to_extent_backref(cur);
5977 cur = cur->next;
5978 if (!node->is_data)
5979 continue;
5980 back = to_data_backref(node);
5981 if (parent > 0) {
5982 if (!node->full_backref)
5983 continue;
5984 if (parent == back->parent)
5985 return back;
5986 } else {
5987 if (node->full_backref)
5988 continue;
5989 if (back->root == root && back->owner == owner &&
5990 back->offset == offset) {
5991 if (found_ref && node->found_ref &&
5992 (back->bytes != bytes ||
5993 back->disk_bytenr != disk_bytenr))
5994 continue;
5995 return back;
5999 return NULL;
6002 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6003 u64 parent, u64 root,
6004 u64 owner, u64 offset,
6005 u64 max_size)
6007 struct data_backref *ref = malloc(sizeof(*ref));
6009 if (!ref)
6010 return NULL;
6011 memset(&ref->node, 0, sizeof(ref->node));
6012 ref->node.is_data = 1;
6014 if (parent > 0) {
6015 ref->parent = parent;
6016 ref->owner = 0;
6017 ref->offset = 0;
6018 ref->node.full_backref = 1;
6019 } else {
6020 ref->root = root;
6021 ref->owner = owner;
6022 ref->offset = offset;
6023 ref->node.full_backref = 0;
6025 ref->bytes = max_size;
6026 ref->found_ref = 0;
6027 ref->num_refs = 0;
6028 list_add_tail(&ref->node.list, &rec->backrefs);
6029 if (max_size > rec->max_size)
6030 rec->max_size = max_size;
6031 return ref;
6034 /* Check if the type of extent matches with its chunk */
6035 static void check_extent_type(struct extent_record *rec)
6037 struct btrfs_block_group_cache *bg_cache;
6039 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6040 if (!bg_cache)
6041 return;
6043 /* data extent, check chunk directly*/
6044 if (!rec->metadata) {
6045 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6046 rec->wrong_chunk_type = 1;
6047 return;
6050 /* metadata extent, check the obvious case first */
6051 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6052 BTRFS_BLOCK_GROUP_METADATA))) {
6053 rec->wrong_chunk_type = 1;
6054 return;
6058 * Check SYSTEM extent, as it's also marked as metadata, we can only
6059 * make sure it's a SYSTEM extent by its backref
6061 if (!list_empty(&rec->backrefs)) {
6062 struct extent_backref *node;
6063 struct tree_backref *tback;
6064 u64 bg_type;
6066 node = to_extent_backref(rec->backrefs.next);
6067 if (node->is_data) {
6068 /* tree block shouldn't have data backref */
6069 rec->wrong_chunk_type = 1;
6070 return;
6072 tback = container_of(node, struct tree_backref, node);
6074 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6075 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6076 else
6077 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6078 if (!(bg_cache->flags & bg_type))
6079 rec->wrong_chunk_type = 1;
6084 * Allocate a new extent record, fill default values from @tmpl and insert int
6085 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6086 * the cache, otherwise it fails.
6088 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6089 struct extent_record *tmpl)
6091 struct extent_record *rec;
6092 int ret = 0;
6094 BUG_ON(tmpl->max_size == 0);
6095 rec = malloc(sizeof(*rec));
6096 if (!rec)
6097 return -ENOMEM;
6098 rec->start = tmpl->start;
6099 rec->max_size = tmpl->max_size;
6100 rec->nr = max(tmpl->nr, tmpl->max_size);
6101 rec->found_rec = tmpl->found_rec;
6102 rec->content_checked = tmpl->content_checked;
6103 rec->owner_ref_checked = tmpl->owner_ref_checked;
6104 rec->num_duplicates = 0;
6105 rec->metadata = tmpl->metadata;
6106 rec->flag_block_full_backref = FLAG_UNSET;
6107 rec->bad_full_backref = 0;
6108 rec->crossing_stripes = 0;
6109 rec->wrong_chunk_type = 0;
6110 rec->is_root = tmpl->is_root;
6111 rec->refs = tmpl->refs;
6112 rec->extent_item_refs = tmpl->extent_item_refs;
6113 rec->parent_generation = tmpl->parent_generation;
6114 INIT_LIST_HEAD(&rec->backrefs);
6115 INIT_LIST_HEAD(&rec->dups);
6116 INIT_LIST_HEAD(&rec->list);
6117 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6118 rec->cache.start = tmpl->start;
6119 rec->cache.size = tmpl->nr;
6120 ret = insert_cache_extent(extent_cache, &rec->cache);
6121 if (ret) {
6122 free(rec);
6123 return ret;
6125 bytes_used += rec->nr;
6127 if (tmpl->metadata)
6128 rec->crossing_stripes = check_crossing_stripes(global_info,
6129 rec->start, global_info->nodesize);
6130 check_extent_type(rec);
6131 return ret;
6135 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6136 * some are hints:
6137 * - refs - if found, increase refs
6138 * - is_root - if found, set
6139 * - content_checked - if found, set
6140 * - owner_ref_checked - if found, set
6142 * If not found, create a new one, initialize and insert.
6144 static int add_extent_rec(struct cache_tree *extent_cache,
6145 struct extent_record *tmpl)
6147 struct extent_record *rec;
6148 struct cache_extent *cache;
6149 int ret = 0;
6150 int dup = 0;
6152 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6153 if (cache) {
6154 rec = container_of(cache, struct extent_record, cache);
6155 if (tmpl->refs)
6156 rec->refs++;
6157 if (rec->nr == 1)
6158 rec->nr = max(tmpl->nr, tmpl->max_size);
6161 * We need to make sure to reset nr to whatever the extent
6162 * record says was the real size, this way we can compare it to
6163 * the backrefs.
6165 if (tmpl->found_rec) {
6166 if (tmpl->start != rec->start || rec->found_rec) {
6167 struct extent_record *tmp;
6169 dup = 1;
6170 if (list_empty(&rec->list))
6171 list_add_tail(&rec->list,
6172 &duplicate_extents);
6175 * We have to do this song and dance in case we
6176 * find an extent record that falls inside of
6177 * our current extent record but does not have
6178 * the same objectid.
6180 tmp = malloc(sizeof(*tmp));
6181 if (!tmp)
6182 return -ENOMEM;
6183 tmp->start = tmpl->start;
6184 tmp->max_size = tmpl->max_size;
6185 tmp->nr = tmpl->nr;
6186 tmp->found_rec = 1;
6187 tmp->metadata = tmpl->metadata;
6188 tmp->extent_item_refs = tmpl->extent_item_refs;
6189 INIT_LIST_HEAD(&tmp->list);
6190 list_add_tail(&tmp->list, &rec->dups);
6191 rec->num_duplicates++;
6192 } else {
6193 rec->nr = tmpl->nr;
6194 rec->found_rec = 1;
6198 if (tmpl->extent_item_refs && !dup) {
6199 if (rec->extent_item_refs) {
6200 fprintf(stderr, "block %llu rec "
6201 "extent_item_refs %llu, passed %llu\n",
6202 (unsigned long long)tmpl->start,
6203 (unsigned long long)
6204 rec->extent_item_refs,
6205 (unsigned long long)tmpl->extent_item_refs);
6207 rec->extent_item_refs = tmpl->extent_item_refs;
6209 if (tmpl->is_root)
6210 rec->is_root = 1;
6211 if (tmpl->content_checked)
6212 rec->content_checked = 1;
6213 if (tmpl->owner_ref_checked)
6214 rec->owner_ref_checked = 1;
6215 memcpy(&rec->parent_key, &tmpl->parent_key,
6216 sizeof(tmpl->parent_key));
6217 if (tmpl->parent_generation)
6218 rec->parent_generation = tmpl->parent_generation;
6219 if (rec->max_size < tmpl->max_size)
6220 rec->max_size = tmpl->max_size;
6223 * A metadata extent can't cross stripe_len boundary, otherwise
6224 * kernel scrub won't be able to handle it.
6225 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6226 * it.
6228 if (tmpl->metadata)
6229 rec->crossing_stripes = check_crossing_stripes(
6230 global_info, rec->start,
6231 global_info->nodesize);
6232 check_extent_type(rec);
6233 maybe_free_extent_rec(extent_cache, rec);
6234 return ret;
6237 ret = add_extent_rec_nolookup(extent_cache, tmpl);
6239 return ret;
6242 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6243 u64 parent, u64 root, int found_ref)
6245 struct extent_record *rec;
6246 struct tree_backref *back;
6247 struct cache_extent *cache;
6248 int ret;
6250 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6251 if (!cache) {
6252 struct extent_record tmpl;
6254 memset(&tmpl, 0, sizeof(tmpl));
6255 tmpl.start = bytenr;
6256 tmpl.nr = 1;
6257 tmpl.metadata = 1;
6258 tmpl.max_size = 1;
6260 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6261 if (ret)
6262 return ret;
6264 /* really a bug in cache_extent implement now */
6265 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6266 if (!cache)
6267 return -ENOENT;
6270 rec = container_of(cache, struct extent_record, cache);
6271 if (rec->start != bytenr) {
6273 * Several cause, from unaligned bytenr to over lapping extents
6275 return -EEXIST;
6278 back = find_tree_backref(rec, parent, root);
6279 if (!back) {
6280 back = alloc_tree_backref(rec, parent, root);
6281 if (!back)
6282 return -ENOMEM;
6285 if (found_ref) {
6286 if (back->node.found_ref) {
6287 fprintf(stderr, "Extent back ref already exists "
6288 "for %llu parent %llu root %llu \n",
6289 (unsigned long long)bytenr,
6290 (unsigned long long)parent,
6291 (unsigned long long)root);
6293 back->node.found_ref = 1;
6294 } else {
6295 if (back->node.found_extent_tree) {
6296 fprintf(stderr, "Extent back ref already exists "
6297 "for %llu parent %llu root %llu \n",
6298 (unsigned long long)bytenr,
6299 (unsigned long long)parent,
6300 (unsigned long long)root);
6302 back->node.found_extent_tree = 1;
6304 check_extent_type(rec);
6305 maybe_free_extent_rec(extent_cache, rec);
6306 return 0;
6309 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6310 u64 parent, u64 root, u64 owner, u64 offset,
6311 u32 num_refs, int found_ref, u64 max_size)
6313 struct extent_record *rec;
6314 struct data_backref *back;
6315 struct cache_extent *cache;
6316 int ret;
6318 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6319 if (!cache) {
6320 struct extent_record tmpl;
6322 memset(&tmpl, 0, sizeof(tmpl));
6323 tmpl.start = bytenr;
6324 tmpl.nr = 1;
6325 tmpl.max_size = max_size;
6327 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6328 if (ret)
6329 return ret;
6331 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6332 if (!cache)
6333 abort();
6336 rec = container_of(cache, struct extent_record, cache);
6337 if (rec->max_size < max_size)
6338 rec->max_size = max_size;
6341 * If found_ref is set then max_size is the real size and must match the
6342 * existing refs. So if we have already found a ref then we need to
6343 * make sure that this ref matches the existing one, otherwise we need
6344 * to add a new backref so we can notice that the backrefs don't match
6345 * and we need to figure out who is telling the truth. This is to
6346 * account for that awful fsync bug I introduced where we'd end up with
6347 * a btrfs_file_extent_item that would have its length include multiple
6348 * prealloc extents or point inside of a prealloc extent.
6350 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6351 bytenr, max_size);
6352 if (!back) {
6353 back = alloc_data_backref(rec, parent, root, owner, offset,
6354 max_size);
6355 BUG_ON(!back);
6358 if (found_ref) {
6359 BUG_ON(num_refs != 1);
6360 if (back->node.found_ref)
6361 BUG_ON(back->bytes != max_size);
6362 back->node.found_ref = 1;
6363 back->found_ref += 1;
6364 back->bytes = max_size;
6365 back->disk_bytenr = bytenr;
6366 rec->refs += 1;
6367 rec->content_checked = 1;
6368 rec->owner_ref_checked = 1;
6369 } else {
6370 if (back->node.found_extent_tree) {
6371 fprintf(stderr, "Extent back ref already exists "
6372 "for %llu parent %llu root %llu "
6373 "owner %llu offset %llu num_refs %lu\n",
6374 (unsigned long long)bytenr,
6375 (unsigned long long)parent,
6376 (unsigned long long)root,
6377 (unsigned long long)owner,
6378 (unsigned long long)offset,
6379 (unsigned long)num_refs);
6381 back->num_refs = num_refs;
6382 back->node.found_extent_tree = 1;
6384 maybe_free_extent_rec(extent_cache, rec);
6385 return 0;
6388 static int add_pending(struct cache_tree *pending,
6389 struct cache_tree *seen, u64 bytenr, u32 size)
6391 int ret;
6392 ret = add_cache_extent(seen, bytenr, size);
6393 if (ret)
6394 return ret;
6395 add_cache_extent(pending, bytenr, size);
6396 return 0;
6399 static int pick_next_pending(struct cache_tree *pending,
6400 struct cache_tree *reada,
6401 struct cache_tree *nodes,
6402 u64 last, struct block_info *bits, int bits_nr,
6403 int *reada_bits)
6405 unsigned long node_start = last;
6406 struct cache_extent *cache;
6407 int ret;
6409 cache = search_cache_extent(reada, 0);
6410 if (cache) {
6411 bits[0].start = cache->start;
6412 bits[0].size = cache->size;
6413 *reada_bits = 1;
6414 return 1;
6416 *reada_bits = 0;
6417 if (node_start > 32768)
6418 node_start -= 32768;
6420 cache = search_cache_extent(nodes, node_start);
6421 if (!cache)
6422 cache = search_cache_extent(nodes, 0);
6424 if (!cache) {
6425 cache = search_cache_extent(pending, 0);
6426 if (!cache)
6427 return 0;
6428 ret = 0;
6429 do {
6430 bits[ret].start = cache->start;
6431 bits[ret].size = cache->size;
6432 cache = next_cache_extent(cache);
6433 ret++;
6434 } while (cache && ret < bits_nr);
6435 return ret;
6438 ret = 0;
6439 do {
6440 bits[ret].start = cache->start;
6441 bits[ret].size = cache->size;
6442 cache = next_cache_extent(cache);
6443 ret++;
6444 } while (cache && ret < bits_nr);
6446 if (bits_nr - ret > 8) {
6447 u64 lookup = bits[0].start + bits[0].size;
6448 struct cache_extent *next;
6449 next = search_cache_extent(pending, lookup);
6450 while(next) {
6451 if (next->start - lookup > 32768)
6452 break;
6453 bits[ret].start = next->start;
6454 bits[ret].size = next->size;
6455 lookup = next->start + next->size;
6456 ret++;
6457 if (ret == bits_nr)
6458 break;
6459 next = next_cache_extent(next);
6460 if (!next)
6461 break;
6464 return ret;
6467 static void free_chunk_record(struct cache_extent *cache)
6469 struct chunk_record *rec;
6471 rec = container_of(cache, struct chunk_record, cache);
6472 list_del_init(&rec->list);
6473 list_del_init(&rec->dextents);
6474 free(rec);
6477 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6479 cache_tree_free_extents(chunk_cache, free_chunk_record);
6482 static void free_device_record(struct rb_node *node)
6484 struct device_record *rec;
6486 rec = container_of(node, struct device_record, node);
6487 free(rec);
6490 FREE_RB_BASED_TREE(device_cache, free_device_record);
6492 int insert_block_group_record(struct block_group_tree *tree,
6493 struct block_group_record *bg_rec)
6495 int ret;
6497 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6498 if (ret)
6499 return ret;
6501 list_add_tail(&bg_rec->list, &tree->block_groups);
6502 return 0;
6505 static void free_block_group_record(struct cache_extent *cache)
6507 struct block_group_record *rec;
6509 rec = container_of(cache, struct block_group_record, cache);
6510 list_del_init(&rec->list);
6511 free(rec);
6514 void free_block_group_tree(struct block_group_tree *tree)
6516 cache_tree_free_extents(&tree->tree, free_block_group_record);
6519 int insert_device_extent_record(struct device_extent_tree *tree,
6520 struct device_extent_record *de_rec)
6522 int ret;
6525 * Device extent is a bit different from the other extents, because
6526 * the extents which belong to the different devices may have the
6527 * same start and size, so we need use the special extent cache
6528 * search/insert functions.
6530 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6531 if (ret)
6532 return ret;
6534 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6535 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6536 return 0;
6539 static void free_device_extent_record(struct cache_extent *cache)
6541 struct device_extent_record *rec;
6543 rec = container_of(cache, struct device_extent_record, cache);
6544 if (!list_empty(&rec->chunk_list))
6545 list_del_init(&rec->chunk_list);
6546 if (!list_empty(&rec->device_list))
6547 list_del_init(&rec->device_list);
6548 free(rec);
6551 void free_device_extent_tree(struct device_extent_tree *tree)
6553 cache_tree_free_extents(&tree->tree, free_device_extent_record);
6556 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6557 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6558 struct extent_buffer *leaf, int slot)
6560 struct btrfs_extent_ref_v0 *ref0;
6561 struct btrfs_key key;
6562 int ret;
6564 btrfs_item_key_to_cpu(leaf, &key, slot);
6565 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6566 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6567 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6568 0, 0);
6569 } else {
6570 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6571 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6573 return ret;
6575 #endif
6577 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6578 struct btrfs_key *key,
6579 int slot)
6581 struct btrfs_chunk *ptr;
6582 struct chunk_record *rec;
6583 int num_stripes, i;
6585 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6586 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6588 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6589 if (!rec) {
6590 fprintf(stderr, "memory allocation failed\n");
6591 exit(-1);
6594 INIT_LIST_HEAD(&rec->list);
6595 INIT_LIST_HEAD(&rec->dextents);
6596 rec->bg_rec = NULL;
6598 rec->cache.start = key->offset;
6599 rec->cache.size = btrfs_chunk_length(leaf, ptr);
6601 rec->generation = btrfs_header_generation(leaf);
6603 rec->objectid = key->objectid;
6604 rec->type = key->type;
6605 rec->offset = key->offset;
6607 rec->length = rec->cache.size;
6608 rec->owner = btrfs_chunk_owner(leaf, ptr);
6609 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6610 rec->type_flags = btrfs_chunk_type(leaf, ptr);
6611 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6612 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6613 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6614 rec->num_stripes = num_stripes;
6615 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6617 for (i = 0; i < rec->num_stripes; ++i) {
6618 rec->stripes[i].devid =
6619 btrfs_stripe_devid_nr(leaf, ptr, i);
6620 rec->stripes[i].offset =
6621 btrfs_stripe_offset_nr(leaf, ptr, i);
6622 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6623 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6624 BTRFS_UUID_SIZE);
6627 return rec;
6630 static int process_chunk_item(struct cache_tree *chunk_cache,
6631 struct btrfs_key *key, struct extent_buffer *eb,
6632 int slot)
6634 struct chunk_record *rec;
6635 struct btrfs_chunk *chunk;
6636 int ret = 0;
6638 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6640 * Do extra check for this chunk item,
6642 * It's still possible one can craft a leaf with CHUNK_ITEM, with
6643 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6644 * and owner<->key_type check.
6646 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
6647 key->offset);
6648 if (ret < 0) {
6649 error("chunk(%llu, %llu) is not valid, ignore it",
6650 key->offset, btrfs_chunk_length(eb, chunk));
6651 return 0;
6653 rec = btrfs_new_chunk_record(eb, key, slot);
6654 ret = insert_cache_extent(chunk_cache, &rec->cache);
6655 if (ret) {
6656 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6657 rec->offset, rec->length);
6658 free(rec);
6661 return ret;
6664 static int process_device_item(struct rb_root *dev_cache,
6665 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6667 struct btrfs_dev_item *ptr;
6668 struct device_record *rec;
6669 int ret = 0;
6671 ptr = btrfs_item_ptr(eb,
6672 slot, struct btrfs_dev_item);
6674 rec = malloc(sizeof(*rec));
6675 if (!rec) {
6676 fprintf(stderr, "memory allocation failed\n");
6677 return -ENOMEM;
6680 rec->devid = key->offset;
6681 rec->generation = btrfs_header_generation(eb);
6683 rec->objectid = key->objectid;
6684 rec->type = key->type;
6685 rec->offset = key->offset;
6687 rec->devid = btrfs_device_id(eb, ptr);
6688 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6689 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6691 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6692 if (ret) {
6693 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6694 free(rec);
6697 return ret;
6700 struct block_group_record *
6701 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6702 int slot)
6704 struct btrfs_block_group_item *ptr;
6705 struct block_group_record *rec;
6707 rec = calloc(1, sizeof(*rec));
6708 if (!rec) {
6709 fprintf(stderr, "memory allocation failed\n");
6710 exit(-1);
6713 rec->cache.start = key->objectid;
6714 rec->cache.size = key->offset;
6716 rec->generation = btrfs_header_generation(leaf);
6718 rec->objectid = key->objectid;
6719 rec->type = key->type;
6720 rec->offset = key->offset;
6722 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6723 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6725 INIT_LIST_HEAD(&rec->list);
6727 return rec;
6730 static int process_block_group_item(struct block_group_tree *block_group_cache,
6731 struct btrfs_key *key,
6732 struct extent_buffer *eb, int slot)
6734 struct block_group_record *rec;
6735 int ret = 0;
6737 rec = btrfs_new_block_group_record(eb, key, slot);
6738 ret = insert_block_group_record(block_group_cache, rec);
6739 if (ret) {
6740 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6741 rec->objectid, rec->offset);
6742 free(rec);
6745 return ret;
6748 struct device_extent_record *
6749 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6750 struct btrfs_key *key, int slot)
6752 struct device_extent_record *rec;
6753 struct btrfs_dev_extent *ptr;
6755 rec = calloc(1, sizeof(*rec));
6756 if (!rec) {
6757 fprintf(stderr, "memory allocation failed\n");
6758 exit(-1);
6761 rec->cache.objectid = key->objectid;
6762 rec->cache.start = key->offset;
6764 rec->generation = btrfs_header_generation(leaf);
6766 rec->objectid = key->objectid;
6767 rec->type = key->type;
6768 rec->offset = key->offset;
6770 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6771 rec->chunk_objecteid =
6772 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6773 rec->chunk_offset =
6774 btrfs_dev_extent_chunk_offset(leaf, ptr);
6775 rec->length = btrfs_dev_extent_length(leaf, ptr);
6776 rec->cache.size = rec->length;
6778 INIT_LIST_HEAD(&rec->chunk_list);
6779 INIT_LIST_HEAD(&rec->device_list);
6781 return rec;
6784 static int
6785 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6786 struct btrfs_key *key, struct extent_buffer *eb,
6787 int slot)
6789 struct device_extent_record *rec;
6790 int ret;
6792 rec = btrfs_new_device_extent_record(eb, key, slot);
6793 ret = insert_device_extent_record(dev_extent_cache, rec);
6794 if (ret) {
6795 fprintf(stderr,
6796 "Device extent[%llu, %llu, %llu] existed.\n",
6797 rec->objectid, rec->offset, rec->length);
6798 free(rec);
6801 return ret;
6804 static int process_extent_item(struct btrfs_root *root,
6805 struct cache_tree *extent_cache,
6806 struct extent_buffer *eb, int slot)
6808 struct btrfs_extent_item *ei;
6809 struct btrfs_extent_inline_ref *iref;
6810 struct btrfs_extent_data_ref *dref;
6811 struct btrfs_shared_data_ref *sref;
6812 struct btrfs_key key;
6813 struct extent_record tmpl;
6814 unsigned long end;
6815 unsigned long ptr;
6816 int ret;
6817 int type;
6818 u32 item_size = btrfs_item_size_nr(eb, slot);
6819 u64 refs = 0;
6820 u64 offset;
6821 u64 num_bytes;
6822 int metadata = 0;
6824 btrfs_item_key_to_cpu(eb, &key, slot);
6826 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6827 metadata = 1;
6828 num_bytes = root->fs_info->nodesize;
6829 } else {
6830 num_bytes = key.offset;
6833 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
6834 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6835 key.objectid, root->fs_info->sectorsize);
6836 return -EIO;
6838 if (item_size < sizeof(*ei)) {
6839 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6840 struct btrfs_extent_item_v0 *ei0;
6841 BUG_ON(item_size != sizeof(*ei0));
6842 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6843 refs = btrfs_extent_refs_v0(eb, ei0);
6844 #else
6845 BUG();
6846 #endif
6847 memset(&tmpl, 0, sizeof(tmpl));
6848 tmpl.start = key.objectid;
6849 tmpl.nr = num_bytes;
6850 tmpl.extent_item_refs = refs;
6851 tmpl.metadata = metadata;
6852 tmpl.found_rec = 1;
6853 tmpl.max_size = num_bytes;
6855 return add_extent_rec(extent_cache, &tmpl);
6858 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6859 refs = btrfs_extent_refs(eb, ei);
6860 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6861 metadata = 1;
6862 else
6863 metadata = 0;
6864 if (metadata && num_bytes != root->fs_info->nodesize) {
6865 error("ignore invalid metadata extent, length %llu does not equal to %u",
6866 num_bytes, root->fs_info->nodesize);
6867 return -EIO;
6869 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
6870 error("ignore invalid data extent, length %llu is not aligned to %u",
6871 num_bytes, root->fs_info->sectorsize);
6872 return -EIO;
6875 memset(&tmpl, 0, sizeof(tmpl));
6876 tmpl.start = key.objectid;
6877 tmpl.nr = num_bytes;
6878 tmpl.extent_item_refs = refs;
6879 tmpl.metadata = metadata;
6880 tmpl.found_rec = 1;
6881 tmpl.max_size = num_bytes;
6882 add_extent_rec(extent_cache, &tmpl);
6884 ptr = (unsigned long)(ei + 1);
6885 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6886 key.type == BTRFS_EXTENT_ITEM_KEY)
6887 ptr += sizeof(struct btrfs_tree_block_info);
6889 end = (unsigned long)ei + item_size;
6890 while (ptr < end) {
6891 iref = (struct btrfs_extent_inline_ref *)ptr;
6892 type = btrfs_extent_inline_ref_type(eb, iref);
6893 offset = btrfs_extent_inline_ref_offset(eb, iref);
6894 switch (type) {
6895 case BTRFS_TREE_BLOCK_REF_KEY:
6896 ret = add_tree_backref(extent_cache, key.objectid,
6897 0, offset, 0);
6898 if (ret < 0)
6899 error(
6900 "add_tree_backref failed (extent items tree block): %s",
6901 strerror(-ret));
6902 break;
6903 case BTRFS_SHARED_BLOCK_REF_KEY:
6904 ret = add_tree_backref(extent_cache, key.objectid,
6905 offset, 0, 0);
6906 if (ret < 0)
6907 error(
6908 "add_tree_backref failed (extent items shared block): %s",
6909 strerror(-ret));
6910 break;
6911 case BTRFS_EXTENT_DATA_REF_KEY:
6912 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6913 add_data_backref(extent_cache, key.objectid, 0,
6914 btrfs_extent_data_ref_root(eb, dref),
6915 btrfs_extent_data_ref_objectid(eb,
6916 dref),
6917 btrfs_extent_data_ref_offset(eb, dref),
6918 btrfs_extent_data_ref_count(eb, dref),
6919 0, num_bytes);
6920 break;
6921 case BTRFS_SHARED_DATA_REF_KEY:
6922 sref = (struct btrfs_shared_data_ref *)(iref + 1);
6923 add_data_backref(extent_cache, key.objectid, offset,
6924 0, 0, 0,
6925 btrfs_shared_data_ref_count(eb, sref),
6926 0, num_bytes);
6927 break;
6928 default:
6929 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6930 key.objectid, key.type, num_bytes);
6931 goto out;
6933 ptr += btrfs_extent_inline_ref_size(type);
6935 WARN_ON(ptr > end);
6936 out:
6937 return 0;
6940 static int check_cache_range(struct btrfs_root *root,
6941 struct btrfs_block_group_cache *cache,
6942 u64 offset, u64 bytes)
6944 struct btrfs_free_space *entry;
6945 u64 *logical;
6946 u64 bytenr;
6947 int stripe_len;
6948 int i, nr, ret;
6950 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6951 bytenr = btrfs_sb_offset(i);
6952 ret = btrfs_rmap_block(root->fs_info,
6953 cache->key.objectid, bytenr, 0,
6954 &logical, &nr, &stripe_len);
6955 if (ret)
6956 return ret;
6958 while (nr--) {
6959 if (logical[nr] + stripe_len <= offset)
6960 continue;
6961 if (offset + bytes <= logical[nr])
6962 continue;
6963 if (logical[nr] == offset) {
6964 if (stripe_len >= bytes) {
6965 free(logical);
6966 return 0;
6968 bytes -= stripe_len;
6969 offset += stripe_len;
6970 } else if (logical[nr] < offset) {
6971 if (logical[nr] + stripe_len >=
6972 offset + bytes) {
6973 free(logical);
6974 return 0;
6976 bytes = (offset + bytes) -
6977 (logical[nr] + stripe_len);
6978 offset = logical[nr] + stripe_len;
6979 } else {
6981 * Could be tricky, the super may land in the
6982 * middle of the area we're checking. First
6983 * check the easiest case, it's at the end.
6985 if (logical[nr] + stripe_len >=
6986 bytes + offset) {
6987 bytes = logical[nr] - offset;
6988 continue;
6991 /* Check the left side */
6992 ret = check_cache_range(root, cache,
6993 offset,
6994 logical[nr] - offset);
6995 if (ret) {
6996 free(logical);
6997 return ret;
7000 /* Now we continue with the right side */
7001 bytes = (offset + bytes) -
7002 (logical[nr] + stripe_len);
7003 offset = logical[nr] + stripe_len;
7007 free(logical);
7010 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7011 if (!entry) {
7012 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7013 offset, offset+bytes);
7014 return -EINVAL;
7017 if (entry->offset != offset) {
7018 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7019 entry->offset);
7020 return -EINVAL;
7023 if (entry->bytes != bytes) {
7024 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7025 bytes, entry->bytes, offset);
7026 return -EINVAL;
7029 unlink_free_space(cache->free_space_ctl, entry);
7030 free(entry);
7031 return 0;
7034 static int verify_space_cache(struct btrfs_root *root,
7035 struct btrfs_block_group_cache *cache)
7037 struct btrfs_path path;
7038 struct extent_buffer *leaf;
7039 struct btrfs_key key;
7040 u64 last;
7041 int ret = 0;
7043 root = root->fs_info->extent_root;
7045 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7047 btrfs_init_path(&path);
7048 key.objectid = last;
7049 key.offset = 0;
7050 key.type = BTRFS_EXTENT_ITEM_KEY;
7051 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7052 if (ret < 0)
7053 goto out;
7054 ret = 0;
7055 while (1) {
7056 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7057 ret = btrfs_next_leaf(root, &path);
7058 if (ret < 0)
7059 goto out;
7060 if (ret > 0) {
7061 ret = 0;
7062 break;
7065 leaf = path.nodes[0];
7066 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7067 if (key.objectid >= cache->key.offset + cache->key.objectid)
7068 break;
7069 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7070 key.type != BTRFS_METADATA_ITEM_KEY) {
7071 path.slots[0]++;
7072 continue;
7075 if (last == key.objectid) {
7076 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7077 last = key.objectid + key.offset;
7078 else
7079 last = key.objectid + root->fs_info->nodesize;
7080 path.slots[0]++;
7081 continue;
7084 ret = check_cache_range(root, cache, last,
7085 key.objectid - last);
7086 if (ret)
7087 break;
7088 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7089 last = key.objectid + key.offset;
7090 else
7091 last = key.objectid + root->fs_info->nodesize;
7092 path.slots[0]++;
7095 if (last < cache->key.objectid + cache->key.offset)
7096 ret = check_cache_range(root, cache, last,
7097 cache->key.objectid +
7098 cache->key.offset - last);
7100 out:
7101 btrfs_release_path(&path);
7103 if (!ret &&
7104 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7105 fprintf(stderr, "There are still entries left in the space "
7106 "cache\n");
7107 ret = -EINVAL;
7110 return ret;
7113 static int check_space_cache(struct btrfs_root *root)
7115 struct btrfs_block_group_cache *cache;
7116 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7117 int ret;
7118 int error = 0;
7120 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7121 btrfs_super_generation(root->fs_info->super_copy) !=
7122 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7123 printf("cache and super generation don't match, space cache "
7124 "will be invalidated\n");
7125 return 0;
7128 if (ctx.progress_enabled) {
7129 ctx.tp = TASK_FREE_SPACE;
7130 task_start(ctx.info);
7133 while (1) {
7134 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7135 if (!cache)
7136 break;
7138 start = cache->key.objectid + cache->key.offset;
7139 if (!cache->free_space_ctl) {
7140 if (btrfs_init_free_space_ctl(cache,
7141 root->fs_info->sectorsize)) {
7142 ret = -ENOMEM;
7143 break;
7145 } else {
7146 btrfs_remove_free_space_cache(cache);
7149 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7150 ret = exclude_super_stripes(root, cache);
7151 if (ret) {
7152 fprintf(stderr, "could not exclude super stripes: %s\n",
7153 strerror(-ret));
7154 error++;
7155 continue;
7157 ret = load_free_space_tree(root->fs_info, cache);
7158 free_excluded_extents(root, cache);
7159 if (ret < 0) {
7160 fprintf(stderr, "could not load free space tree: %s\n",
7161 strerror(-ret));
7162 error++;
7163 continue;
7165 error += ret;
7166 } else {
7167 ret = load_free_space_cache(root->fs_info, cache);
7168 if (!ret)
7169 continue;
7172 ret = verify_space_cache(root, cache);
7173 if (ret) {
7174 fprintf(stderr, "cache appears valid but isn't %Lu\n",
7175 cache->key.objectid);
7176 error++;
7180 task_stop(ctx.info);
7182 return error ? -EINVAL : 0;
7185 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7186 u64 num_bytes, unsigned long leaf_offset,
7187 struct extent_buffer *eb) {
7189 struct btrfs_fs_info *fs_info = root->fs_info;
7190 u64 offset = 0;
7191 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7192 char *data;
7193 unsigned long csum_offset;
7194 u32 csum;
7195 u32 csum_expected;
7196 u64 read_len;
7197 u64 data_checked = 0;
7198 u64 tmp;
7199 int ret = 0;
7200 int mirror;
7201 int num_copies;
7203 if (num_bytes % fs_info->sectorsize)
7204 return -EINVAL;
7206 data = malloc(num_bytes);
7207 if (!data)
7208 return -ENOMEM;
7210 while (offset < num_bytes) {
7211 mirror = 0;
7212 again:
7213 read_len = num_bytes - offset;
7214 /* read as much space once a time */
7215 ret = read_extent_data(fs_info, data + offset,
7216 bytenr + offset, &read_len, mirror);
7217 if (ret)
7218 goto out;
7219 data_checked = 0;
7220 /* verify every 4k data's checksum */
7221 while (data_checked < read_len) {
7222 csum = ~(u32)0;
7223 tmp = offset + data_checked;
7225 csum = btrfs_csum_data((char *)data + tmp,
7226 csum, fs_info->sectorsize);
7227 btrfs_csum_final(csum, (u8 *)&csum);
7229 csum_offset = leaf_offset +
7230 tmp / fs_info->sectorsize * csum_size;
7231 read_extent_buffer(eb, (char *)&csum_expected,
7232 csum_offset, csum_size);
7233 /* try another mirror */
7234 if (csum != csum_expected) {
7235 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7236 mirror, bytenr + tmp,
7237 csum, csum_expected);
7238 num_copies = btrfs_num_copies(root->fs_info,
7239 bytenr, num_bytes);
7240 if (mirror < num_copies - 1) {
7241 mirror += 1;
7242 goto again;
7245 data_checked += fs_info->sectorsize;
7247 offset += read_len;
7249 out:
7250 free(data);
7251 return ret;
7254 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7255 u64 num_bytes)
7257 struct btrfs_path path;
7258 struct extent_buffer *leaf;
7259 struct btrfs_key key;
7260 int ret;
7262 btrfs_init_path(&path);
7263 key.objectid = bytenr;
7264 key.type = BTRFS_EXTENT_ITEM_KEY;
7265 key.offset = (u64)-1;
7267 again:
7268 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7269 0, 0);
7270 if (ret < 0) {
7271 fprintf(stderr, "Error looking up extent record %d\n", ret);
7272 btrfs_release_path(&path);
7273 return ret;
7274 } else if (ret) {
7275 if (path.slots[0] > 0) {
7276 path.slots[0]--;
7277 } else {
7278 ret = btrfs_prev_leaf(root, &path);
7279 if (ret < 0) {
7280 goto out;
7281 } else if (ret > 0) {
7282 ret = 0;
7283 goto out;
7288 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7291 * Block group items come before extent items if they have the same
7292 * bytenr, so walk back one more just in case. Dear future traveller,
7293 * first congrats on mastering time travel. Now if it's not too much
7294 * trouble could you go back to 2006 and tell Chris to make the
7295 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7296 * EXTENT_ITEM_KEY please?
7298 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7299 if (path.slots[0] > 0) {
7300 path.slots[0]--;
7301 } else {
7302 ret = btrfs_prev_leaf(root, &path);
7303 if (ret < 0) {
7304 goto out;
7305 } else if (ret > 0) {
7306 ret = 0;
7307 goto out;
7310 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7313 while (num_bytes) {
7314 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7315 ret = btrfs_next_leaf(root, &path);
7316 if (ret < 0) {
7317 fprintf(stderr, "Error going to next leaf "
7318 "%d\n", ret);
7319 btrfs_release_path(&path);
7320 return ret;
7321 } else if (ret) {
7322 break;
7325 leaf = path.nodes[0];
7326 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7327 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7328 path.slots[0]++;
7329 continue;
7331 if (key.objectid + key.offset < bytenr) {
7332 path.slots[0]++;
7333 continue;
7335 if (key.objectid > bytenr + num_bytes)
7336 break;
7338 if (key.objectid == bytenr) {
7339 if (key.offset >= num_bytes) {
7340 num_bytes = 0;
7341 break;
7343 num_bytes -= key.offset;
7344 bytenr += key.offset;
7345 } else if (key.objectid < bytenr) {
7346 if (key.objectid + key.offset >= bytenr + num_bytes) {
7347 num_bytes = 0;
7348 break;
7350 num_bytes = (bytenr + num_bytes) -
7351 (key.objectid + key.offset);
7352 bytenr = key.objectid + key.offset;
7353 } else {
7354 if (key.objectid + key.offset < bytenr + num_bytes) {
7355 u64 new_start = key.objectid + key.offset;
7356 u64 new_bytes = bytenr + num_bytes - new_start;
7359 * Weird case, the extent is in the middle of
7360 * our range, we'll have to search one side
7361 * and then the other. Not sure if this happens
7362 * in real life, but no harm in coding it up
7363 * anyway just in case.
7365 btrfs_release_path(&path);
7366 ret = check_extent_exists(root, new_start,
7367 new_bytes);
7368 if (ret) {
7369 fprintf(stderr, "Right section didn't "
7370 "have a record\n");
7371 break;
7373 num_bytes = key.objectid - bytenr;
7374 goto again;
7376 num_bytes = key.objectid - bytenr;
7378 path.slots[0]++;
7380 ret = 0;
7382 out:
7383 if (num_bytes && !ret) {
7384 fprintf(stderr, "There are no extents for csum range "
7385 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7386 ret = 1;
7389 btrfs_release_path(&path);
7390 return ret;
7393 static int check_csums(struct btrfs_root *root)
7395 struct btrfs_path path;
7396 struct extent_buffer *leaf;
7397 struct btrfs_key key;
7398 u64 offset = 0, num_bytes = 0;
7399 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7400 int errors = 0;
7401 int ret;
7402 u64 data_len;
7403 unsigned long leaf_offset;
7405 root = root->fs_info->csum_root;
7406 if (!extent_buffer_uptodate(root->node)) {
7407 fprintf(stderr, "No valid csum tree found\n");
7408 return -ENOENT;
7411 btrfs_init_path(&path);
7412 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7413 key.type = BTRFS_EXTENT_CSUM_KEY;
7414 key.offset = 0;
7415 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7416 if (ret < 0) {
7417 fprintf(stderr, "Error searching csum tree %d\n", ret);
7418 btrfs_release_path(&path);
7419 return ret;
7422 if (ret > 0 && path.slots[0])
7423 path.slots[0]--;
7424 ret = 0;
7426 while (1) {
7427 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7428 ret = btrfs_next_leaf(root, &path);
7429 if (ret < 0) {
7430 fprintf(stderr, "Error going to next leaf "
7431 "%d\n", ret);
7432 break;
7434 if (ret)
7435 break;
7437 leaf = path.nodes[0];
7439 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7440 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7441 path.slots[0]++;
7442 continue;
7445 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7446 csum_size) * root->fs_info->sectorsize;
7447 if (!check_data_csum)
7448 goto skip_csum_check;
7449 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7450 ret = check_extent_csums(root, key.offset, data_len,
7451 leaf_offset, leaf);
7452 if (ret)
7453 break;
7454 skip_csum_check:
7455 if (!num_bytes) {
7456 offset = key.offset;
7457 } else if (key.offset != offset + num_bytes) {
7458 ret = check_extent_exists(root, offset, num_bytes);
7459 if (ret) {
7460 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7461 "there is no extent record\n",
7462 offset, offset+num_bytes);
7463 errors++;
7465 offset = key.offset;
7466 num_bytes = 0;
7468 num_bytes += data_len;
7469 path.slots[0]++;
7472 btrfs_release_path(&path);
7473 return errors;
7476 static int is_dropped_key(struct btrfs_key *key,
7477 struct btrfs_key *drop_key) {
7478 if (key->objectid < drop_key->objectid)
7479 return 1;
7480 else if (key->objectid == drop_key->objectid) {
7481 if (key->type < drop_key->type)
7482 return 1;
7483 else if (key->type == drop_key->type) {
7484 if (key->offset < drop_key->offset)
7485 return 1;
7488 return 0;
7492 * Here are the rules for FULL_BACKREF.
7494 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7495 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7496 * FULL_BACKREF set.
7497 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
7498 * if it happened after the relocation occurred since we'll have dropped the
7499 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7500 * have no real way to know for sure.
7502 * We process the blocks one root at a time, and we start from the lowest root
7503 * objectid and go to the highest. So we can just lookup the owner backref for
7504 * the record and if we don't find it then we know it doesn't exist and we have
7505 * a FULL BACKREF.
7507 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7508 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7509 * be set or not and then we can check later once we've gathered all the refs.
7511 static int calc_extent_flag(struct cache_tree *extent_cache,
7512 struct extent_buffer *buf,
7513 struct root_item_record *ri,
7514 u64 *flags)
7516 struct extent_record *rec;
7517 struct cache_extent *cache;
7518 struct tree_backref *tback;
7519 u64 owner = 0;
7521 cache = lookup_cache_extent(extent_cache, buf->start, 1);
7522 /* we have added this extent before */
7523 if (!cache)
7524 return -ENOENT;
7526 rec = container_of(cache, struct extent_record, cache);
7529 * Except file/reloc tree, we can not have
7530 * FULL BACKREF MODE
7532 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7533 goto normal;
7535 * root node
7537 if (buf->start == ri->bytenr)
7538 goto normal;
7540 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7541 goto full_backref;
7543 owner = btrfs_header_owner(buf);
7544 if (owner == ri->objectid)
7545 goto normal;
7547 tback = find_tree_backref(rec, 0, owner);
7548 if (!tback)
7549 goto full_backref;
7550 normal:
7551 *flags = 0;
7552 if (rec->flag_block_full_backref != FLAG_UNSET &&
7553 rec->flag_block_full_backref != 0)
7554 rec->bad_full_backref = 1;
7555 return 0;
7556 full_backref:
7557 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7558 if (rec->flag_block_full_backref != FLAG_UNSET &&
7559 rec->flag_block_full_backref != 1)
7560 rec->bad_full_backref = 1;
7561 return 0;
7564 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7566 fprintf(stderr, "Invalid key type(");
7567 print_key_type(stderr, 0, key_type);
7568 fprintf(stderr, ") found in root(");
7569 print_objectid(stderr, rootid, 0);
7570 fprintf(stderr, ")\n");
7574 * Check if the key is valid with its extent buffer.
7576 * This is a early check in case invalid key exists in a extent buffer
7577 * This is not comprehensive yet, but should prevent wrong key/item passed
7578 * further
7580 static int check_type_with_root(u64 rootid, u8 key_type)
7582 switch (key_type) {
7583 /* Only valid in chunk tree */
7584 case BTRFS_DEV_ITEM_KEY:
7585 case BTRFS_CHUNK_ITEM_KEY:
7586 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7587 goto err;
7588 break;
7589 /* valid in csum and log tree */
7590 case BTRFS_CSUM_TREE_OBJECTID:
7591 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7592 is_fstree(rootid)))
7593 goto err;
7594 break;
7595 case BTRFS_EXTENT_ITEM_KEY:
7596 case BTRFS_METADATA_ITEM_KEY:
7597 case BTRFS_BLOCK_GROUP_ITEM_KEY:
7598 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7599 goto err;
7600 break;
7601 case BTRFS_ROOT_ITEM_KEY:
7602 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7603 goto err;
7604 break;
7605 case BTRFS_DEV_EXTENT_KEY:
7606 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7607 goto err;
7608 break;
7610 return 0;
7611 err:
7612 report_mismatch_key_root(key_type, rootid);
7613 return -EINVAL;
7616 static int run_next_block(struct btrfs_root *root,
7617 struct block_info *bits,
7618 int bits_nr,
7619 u64 *last,
7620 struct cache_tree *pending,
7621 struct cache_tree *seen,
7622 struct cache_tree *reada,
7623 struct cache_tree *nodes,
7624 struct cache_tree *extent_cache,
7625 struct cache_tree *chunk_cache,
7626 struct rb_root *dev_cache,
7627 struct block_group_tree *block_group_cache,
7628 struct device_extent_tree *dev_extent_cache,
7629 struct root_item_record *ri)
7631 struct btrfs_fs_info *fs_info = root->fs_info;
7632 struct extent_buffer *buf;
7633 struct extent_record *rec = NULL;
7634 u64 bytenr;
7635 u32 size;
7636 u64 parent;
7637 u64 owner;
7638 u64 flags;
7639 u64 ptr;
7640 u64 gen = 0;
7641 int ret = 0;
7642 int i;
7643 int nritems;
7644 struct btrfs_key key;
7645 struct cache_extent *cache;
7646 int reada_bits;
7648 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7649 bits_nr, &reada_bits);
7650 if (nritems == 0)
7651 return 1;
7653 if (!reada_bits) {
7654 for(i = 0; i < nritems; i++) {
7655 ret = add_cache_extent(reada, bits[i].start,
7656 bits[i].size);
7657 if (ret == -EEXIST)
7658 continue;
7660 /* fixme, get the parent transid */
7661 readahead_tree_block(fs_info, bits[i].start,
7662 bits[i].size, 0);
7665 *last = bits[0].start;
7666 bytenr = bits[0].start;
7667 size = bits[0].size;
7669 cache = lookup_cache_extent(pending, bytenr, size);
7670 if (cache) {
7671 remove_cache_extent(pending, cache);
7672 free(cache);
7674 cache = lookup_cache_extent(reada, bytenr, size);
7675 if (cache) {
7676 remove_cache_extent(reada, cache);
7677 free(cache);
7679 cache = lookup_cache_extent(nodes, bytenr, size);
7680 if (cache) {
7681 remove_cache_extent(nodes, cache);
7682 free(cache);
7684 cache = lookup_cache_extent(extent_cache, bytenr, size);
7685 if (cache) {
7686 rec = container_of(cache, struct extent_record, cache);
7687 gen = rec->parent_generation;
7690 /* fixme, get the real parent transid */
7691 buf = read_tree_block(root->fs_info, bytenr, size, gen);
7692 if (!extent_buffer_uptodate(buf)) {
7693 record_bad_block_io(root->fs_info,
7694 extent_cache, bytenr, size);
7695 goto out;
7698 nritems = btrfs_header_nritems(buf);
7700 flags = 0;
7701 if (!init_extent_tree) {
7702 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7703 btrfs_header_level(buf), 1, NULL,
7704 &flags);
7705 if (ret < 0) {
7706 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7707 if (ret < 0) {
7708 fprintf(stderr, "Couldn't calc extent flags\n");
7709 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7712 } else {
7713 flags = 0;
7714 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7715 if (ret < 0) {
7716 fprintf(stderr, "Couldn't calc extent flags\n");
7717 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7721 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7722 if (ri != NULL &&
7723 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7724 ri->objectid == btrfs_header_owner(buf)) {
7726 * Ok we got to this block from it's original owner and
7727 * we have FULL_BACKREF set. Relocation can leave
7728 * converted blocks over so this is altogether possible,
7729 * however it's not possible if the generation > the
7730 * last snapshot, so check for this case.
7732 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7733 btrfs_header_generation(buf) > ri->last_snapshot) {
7734 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7735 rec->bad_full_backref = 1;
7738 } else {
7739 if (ri != NULL &&
7740 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7741 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7742 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7743 rec->bad_full_backref = 1;
7747 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7748 rec->flag_block_full_backref = 1;
7749 parent = bytenr;
7750 owner = 0;
7751 } else {
7752 rec->flag_block_full_backref = 0;
7753 parent = 0;
7754 owner = btrfs_header_owner(buf);
7757 ret = check_block(root, extent_cache, buf, flags);
7758 if (ret)
7759 goto out;
7761 if (btrfs_is_leaf(buf)) {
7762 btree_space_waste += btrfs_leaf_free_space(root, buf);
7763 for (i = 0; i < nritems; i++) {
7764 struct btrfs_file_extent_item *fi;
7765 btrfs_item_key_to_cpu(buf, &key, i);
7767 * Check key type against the leaf owner.
7768 * Could filter quite a lot of early error if
7769 * owner is correct
7771 if (check_type_with_root(btrfs_header_owner(buf),
7772 key.type)) {
7773 fprintf(stderr, "ignoring invalid key\n");
7774 continue;
7776 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7777 process_extent_item(root, extent_cache, buf,
7779 continue;
7781 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7782 process_extent_item(root, extent_cache, buf,
7784 continue;
7786 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7787 total_csum_bytes +=
7788 btrfs_item_size_nr(buf, i);
7789 continue;
7791 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7792 process_chunk_item(chunk_cache, &key, buf, i);
7793 continue;
7795 if (key.type == BTRFS_DEV_ITEM_KEY) {
7796 process_device_item(dev_cache, &key, buf, i);
7797 continue;
7799 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7800 process_block_group_item(block_group_cache,
7801 &key, buf, i);
7802 continue;
7804 if (key.type == BTRFS_DEV_EXTENT_KEY) {
7805 process_device_extent_item(dev_extent_cache,
7806 &key, buf, i);
7807 continue;
7810 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7811 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7812 process_extent_ref_v0(extent_cache, buf, i);
7813 #else
7814 BUG();
7815 #endif
7816 continue;
7819 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7820 ret = add_tree_backref(extent_cache,
7821 key.objectid, 0, key.offset, 0);
7822 if (ret < 0)
7823 error(
7824 "add_tree_backref failed (leaf tree block): %s",
7825 strerror(-ret));
7826 continue;
7828 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7829 ret = add_tree_backref(extent_cache,
7830 key.objectid, key.offset, 0, 0);
7831 if (ret < 0)
7832 error(
7833 "add_tree_backref failed (leaf shared block): %s",
7834 strerror(-ret));
7835 continue;
7837 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7838 struct btrfs_extent_data_ref *ref;
7839 ref = btrfs_item_ptr(buf, i,
7840 struct btrfs_extent_data_ref);
7841 add_data_backref(extent_cache,
7842 key.objectid, 0,
7843 btrfs_extent_data_ref_root(buf, ref),
7844 btrfs_extent_data_ref_objectid(buf,
7845 ref),
7846 btrfs_extent_data_ref_offset(buf, ref),
7847 btrfs_extent_data_ref_count(buf, ref),
7848 0, root->fs_info->sectorsize);
7849 continue;
7851 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7852 struct btrfs_shared_data_ref *ref;
7853 ref = btrfs_item_ptr(buf, i,
7854 struct btrfs_shared_data_ref);
7855 add_data_backref(extent_cache,
7856 key.objectid, key.offset, 0, 0, 0,
7857 btrfs_shared_data_ref_count(buf, ref),
7858 0, root->fs_info->sectorsize);
7859 continue;
7861 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7862 struct bad_item *bad;
7864 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7865 continue;
7866 if (!owner)
7867 continue;
7868 bad = malloc(sizeof(struct bad_item));
7869 if (!bad)
7870 continue;
7871 INIT_LIST_HEAD(&bad->list);
7872 memcpy(&bad->key, &key,
7873 sizeof(struct btrfs_key));
7874 bad->root_id = owner;
7875 list_add_tail(&bad->list, &delete_items);
7876 continue;
7878 if (key.type != BTRFS_EXTENT_DATA_KEY)
7879 continue;
7880 fi = btrfs_item_ptr(buf, i,
7881 struct btrfs_file_extent_item);
7882 if (btrfs_file_extent_type(buf, fi) ==
7883 BTRFS_FILE_EXTENT_INLINE)
7884 continue;
7885 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7886 continue;
7888 data_bytes_allocated +=
7889 btrfs_file_extent_disk_num_bytes(buf, fi);
7890 if (data_bytes_allocated < root->fs_info->sectorsize) {
7891 abort();
7893 data_bytes_referenced +=
7894 btrfs_file_extent_num_bytes(buf, fi);
7895 add_data_backref(extent_cache,
7896 btrfs_file_extent_disk_bytenr(buf, fi),
7897 parent, owner, key.objectid, key.offset -
7898 btrfs_file_extent_offset(buf, fi), 1, 1,
7899 btrfs_file_extent_disk_num_bytes(buf, fi));
7901 } else {
7902 int level;
7903 struct btrfs_key first_key;
7905 first_key.objectid = 0;
7907 if (nritems > 0)
7908 btrfs_item_key_to_cpu(buf, &first_key, 0);
7909 level = btrfs_header_level(buf);
7910 for (i = 0; i < nritems; i++) {
7911 struct extent_record tmpl;
7913 ptr = btrfs_node_blockptr(buf, i);
7914 size = root->fs_info->nodesize;
7915 btrfs_node_key_to_cpu(buf, &key, i);
7916 if (ri != NULL) {
7917 if ((level == ri->drop_level)
7918 && is_dropped_key(&key, &ri->drop_key)) {
7919 continue;
7923 memset(&tmpl, 0, sizeof(tmpl));
7924 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7925 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7926 tmpl.start = ptr;
7927 tmpl.nr = size;
7928 tmpl.refs = 1;
7929 tmpl.metadata = 1;
7930 tmpl.max_size = size;
7931 ret = add_extent_rec(extent_cache, &tmpl);
7932 if (ret < 0)
7933 goto out;
7935 ret = add_tree_backref(extent_cache, ptr, parent,
7936 owner, 1);
7937 if (ret < 0) {
7938 error(
7939 "add_tree_backref failed (non-leaf block): %s",
7940 strerror(-ret));
7941 continue;
7944 if (level > 1) {
7945 add_pending(nodes, seen, ptr, size);
7946 } else {
7947 add_pending(pending, seen, ptr, size);
7950 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7951 nritems) * sizeof(struct btrfs_key_ptr);
7953 total_btree_bytes += buf->len;
7954 if (fs_root_objectid(btrfs_header_owner(buf)))
7955 total_fs_tree_bytes += buf->len;
7956 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7957 total_extent_tree_bytes += buf->len;
7958 if (!found_old_backref &&
7959 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7960 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7961 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7962 found_old_backref = 1;
7963 out:
7964 free_extent_buffer(buf);
7965 return ret;
7968 static int add_root_to_pending(struct extent_buffer *buf,
7969 struct cache_tree *extent_cache,
7970 struct cache_tree *pending,
7971 struct cache_tree *seen,
7972 struct cache_tree *nodes,
7973 u64 objectid)
7975 struct extent_record tmpl;
7976 int ret;
7978 if (btrfs_header_level(buf) > 0)
7979 add_pending(nodes, seen, buf->start, buf->len);
7980 else
7981 add_pending(pending, seen, buf->start, buf->len);
7983 memset(&tmpl, 0, sizeof(tmpl));
7984 tmpl.start = buf->start;
7985 tmpl.nr = buf->len;
7986 tmpl.is_root = 1;
7987 tmpl.refs = 1;
7988 tmpl.metadata = 1;
7989 tmpl.max_size = buf->len;
7990 add_extent_rec(extent_cache, &tmpl);
7992 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7993 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7994 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7995 0, 1);
7996 else
7997 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7999 return ret;
8002 /* as we fix the tree, we might be deleting blocks that
8003 * we're tracking for repair. This hook makes sure we
8004 * remove any backrefs for blocks as we are fixing them.
8006 static int free_extent_hook(struct btrfs_trans_handle *trans,
8007 struct btrfs_root *root,
8008 u64 bytenr, u64 num_bytes, u64 parent,
8009 u64 root_objectid, u64 owner, u64 offset,
8010 int refs_to_drop)
8012 struct extent_record *rec;
8013 struct cache_extent *cache;
8014 int is_data;
8015 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8017 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8018 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8019 if (!cache)
8020 return 0;
8022 rec = container_of(cache, struct extent_record, cache);
8023 if (is_data) {
8024 struct data_backref *back;
8025 back = find_data_backref(rec, parent, root_objectid, owner,
8026 offset, 1, bytenr, num_bytes);
8027 if (!back)
8028 goto out;
8029 if (back->node.found_ref) {
8030 back->found_ref -= refs_to_drop;
8031 if (rec->refs)
8032 rec->refs -= refs_to_drop;
8034 if (back->node.found_extent_tree) {
8035 back->num_refs -= refs_to_drop;
8036 if (rec->extent_item_refs)
8037 rec->extent_item_refs -= refs_to_drop;
8039 if (back->found_ref == 0)
8040 back->node.found_ref = 0;
8041 if (back->num_refs == 0)
8042 back->node.found_extent_tree = 0;
8044 if (!back->node.found_extent_tree && back->node.found_ref) {
8045 list_del(&back->node.list);
8046 free(back);
8048 } else {
8049 struct tree_backref *back;
8050 back = find_tree_backref(rec, parent, root_objectid);
8051 if (!back)
8052 goto out;
8053 if (back->node.found_ref) {
8054 if (rec->refs)
8055 rec->refs--;
8056 back->node.found_ref = 0;
8058 if (back->node.found_extent_tree) {
8059 if (rec->extent_item_refs)
8060 rec->extent_item_refs--;
8061 back->node.found_extent_tree = 0;
8063 if (!back->node.found_extent_tree && back->node.found_ref) {
8064 list_del(&back->node.list);
8065 free(back);
8068 maybe_free_extent_rec(extent_cache, rec);
8069 out:
8070 return 0;
8073 static int delete_extent_records(struct btrfs_trans_handle *trans,
8074 struct btrfs_root *root,
8075 struct btrfs_path *path,
8076 u64 bytenr)
8078 struct btrfs_key key;
8079 struct btrfs_key found_key;
8080 struct extent_buffer *leaf;
8081 int ret;
8082 int slot;
8085 key.objectid = bytenr;
8086 key.type = (u8)-1;
8087 key.offset = (u64)-1;
8089 while(1) {
8090 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8091 &key, path, 0, 1);
8092 if (ret < 0)
8093 break;
8095 if (ret > 0) {
8096 ret = 0;
8097 if (path->slots[0] == 0)
8098 break;
8099 path->slots[0]--;
8101 ret = 0;
8103 leaf = path->nodes[0];
8104 slot = path->slots[0];
8106 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8107 if (found_key.objectid != bytenr)
8108 break;
8110 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8111 found_key.type != BTRFS_METADATA_ITEM_KEY &&
8112 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8113 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8114 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8115 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8116 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8117 btrfs_release_path(path);
8118 if (found_key.type == 0) {
8119 if (found_key.offset == 0)
8120 break;
8121 key.offset = found_key.offset - 1;
8122 key.type = found_key.type;
8124 key.type = found_key.type - 1;
8125 key.offset = (u64)-1;
8126 continue;
8129 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8130 found_key.objectid, found_key.type, found_key.offset);
8132 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8133 if (ret)
8134 break;
8135 btrfs_release_path(path);
8137 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8138 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8139 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8140 found_key.offset : root->fs_info->nodesize;
8142 ret = btrfs_update_block_group(trans, root, bytenr,
8143 bytes, 0, 0);
8144 if (ret)
8145 break;
8149 btrfs_release_path(path);
8150 return ret;
8154 * for a single backref, this will allocate a new extent
8155 * and add the backref to it.
8157 static int record_extent(struct btrfs_trans_handle *trans,
8158 struct btrfs_fs_info *info,
8159 struct btrfs_path *path,
8160 struct extent_record *rec,
8161 struct extent_backref *back,
8162 int allocated, u64 flags)
8164 int ret = 0;
8165 struct btrfs_root *extent_root = info->extent_root;
8166 struct extent_buffer *leaf;
8167 struct btrfs_key ins_key;
8168 struct btrfs_extent_item *ei;
8169 struct data_backref *dback;
8170 struct btrfs_tree_block_info *bi;
8172 if (!back->is_data)
8173 rec->max_size = max_t(u64, rec->max_size,
8174 info->nodesize);
8176 if (!allocated) {
8177 u32 item_size = sizeof(*ei);
8179 if (!back->is_data)
8180 item_size += sizeof(*bi);
8182 ins_key.objectid = rec->start;
8183 ins_key.offset = rec->max_size;
8184 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8186 ret = btrfs_insert_empty_item(trans, extent_root, path,
8187 &ins_key, item_size);
8188 if (ret)
8189 goto fail;
8191 leaf = path->nodes[0];
8192 ei = btrfs_item_ptr(leaf, path->slots[0],
8193 struct btrfs_extent_item);
8195 btrfs_set_extent_refs(leaf, ei, 0);
8196 btrfs_set_extent_generation(leaf, ei, rec->generation);
8198 if (back->is_data) {
8199 btrfs_set_extent_flags(leaf, ei,
8200 BTRFS_EXTENT_FLAG_DATA);
8201 } else {
8202 struct btrfs_disk_key copy_key;;
8204 bi = (struct btrfs_tree_block_info *)(ei + 1);
8205 memset_extent_buffer(leaf, 0, (unsigned long)bi,
8206 sizeof(*bi));
8208 btrfs_set_disk_key_objectid(&copy_key,
8209 rec->info_objectid);
8210 btrfs_set_disk_key_type(&copy_key, 0);
8211 btrfs_set_disk_key_offset(&copy_key, 0);
8213 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8214 btrfs_set_tree_block_key(leaf, bi, &copy_key);
8216 btrfs_set_extent_flags(leaf, ei,
8217 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8220 btrfs_mark_buffer_dirty(leaf);
8221 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8222 rec->max_size, 1, 0);
8223 if (ret)
8224 goto fail;
8225 btrfs_release_path(path);
8228 if (back->is_data) {
8229 u64 parent;
8230 int i;
8232 dback = to_data_backref(back);
8233 if (back->full_backref)
8234 parent = dback->parent;
8235 else
8236 parent = 0;
8238 for (i = 0; i < dback->found_ref; i++) {
8239 /* if parent != 0, we're doing a full backref
8240 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8241 * just makes the backref allocator create a data
8242 * backref
8244 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8245 rec->start, rec->max_size,
8246 parent,
8247 dback->root,
8248 parent ?
8249 BTRFS_FIRST_FREE_OBJECTID :
8250 dback->owner,
8251 dback->offset);
8252 if (ret)
8253 break;
8255 fprintf(stderr, "adding new data backref"
8256 " on %llu %s %llu owner %llu"
8257 " offset %llu found %d\n",
8258 (unsigned long long)rec->start,
8259 back->full_backref ?
8260 "parent" : "root",
8261 back->full_backref ?
8262 (unsigned long long)parent :
8263 (unsigned long long)dback->root,
8264 (unsigned long long)dback->owner,
8265 (unsigned long long)dback->offset,
8266 dback->found_ref);
8267 } else {
8268 u64 parent;
8269 struct tree_backref *tback;
8271 tback = to_tree_backref(back);
8272 if (back->full_backref)
8273 parent = tback->parent;
8274 else
8275 parent = 0;
8277 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8278 rec->start, rec->max_size,
8279 parent, tback->root, 0, 0);
8280 fprintf(stderr, "adding new tree backref on "
8281 "start %llu len %llu parent %llu root %llu\n",
8282 rec->start, rec->max_size, parent, tback->root);
8284 fail:
8285 btrfs_release_path(path);
8286 return ret;
8289 static struct extent_entry *find_entry(struct list_head *entries,
8290 u64 bytenr, u64 bytes)
8292 struct extent_entry *entry = NULL;
8294 list_for_each_entry(entry, entries, list) {
8295 if (entry->bytenr == bytenr && entry->bytes == bytes)
8296 return entry;
8299 return NULL;
8302 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8304 struct extent_entry *entry, *best = NULL, *prev = NULL;
8306 list_for_each_entry(entry, entries, list) {
8308 * If there are as many broken entries as entries then we know
8309 * not to trust this particular entry.
8311 if (entry->broken == entry->count)
8312 continue;
8315 * Special case, when there are only two entries and 'best' is
8316 * the first one
8318 if (!prev) {
8319 best = entry;
8320 prev = entry;
8321 continue;
8325 * If our current entry == best then we can't be sure our best
8326 * is really the best, so we need to keep searching.
8328 if (best && best->count == entry->count) {
8329 prev = entry;
8330 best = NULL;
8331 continue;
8334 /* Prev == entry, not good enough, have to keep searching */
8335 if (!prev->broken && prev->count == entry->count)
8336 continue;
8338 if (!best)
8339 best = (prev->count > entry->count) ? prev : entry;
8340 else if (best->count < entry->count)
8341 best = entry;
8342 prev = entry;
8345 return best;
8348 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8349 struct data_backref *dback, struct extent_entry *entry)
8351 struct btrfs_trans_handle *trans;
8352 struct btrfs_root *root;
8353 struct btrfs_file_extent_item *fi;
8354 struct extent_buffer *leaf;
8355 struct btrfs_key key;
8356 u64 bytenr, bytes;
8357 int ret, err;
8359 key.objectid = dback->root;
8360 key.type = BTRFS_ROOT_ITEM_KEY;
8361 key.offset = (u64)-1;
8362 root = btrfs_read_fs_root(info, &key);
8363 if (IS_ERR(root)) {
8364 fprintf(stderr, "Couldn't find root for our ref\n");
8365 return -EINVAL;
8369 * The backref points to the original offset of the extent if it was
8370 * split, so we need to search down to the offset we have and then walk
8371 * forward until we find the backref we're looking for.
8373 key.objectid = dback->owner;
8374 key.type = BTRFS_EXTENT_DATA_KEY;
8375 key.offset = dback->offset;
8376 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8377 if (ret < 0) {
8378 fprintf(stderr, "Error looking up ref %d\n", ret);
8379 return ret;
8382 while (1) {
8383 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8384 ret = btrfs_next_leaf(root, path);
8385 if (ret) {
8386 fprintf(stderr, "Couldn't find our ref, next\n");
8387 return -EINVAL;
8390 leaf = path->nodes[0];
8391 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8392 if (key.objectid != dback->owner ||
8393 key.type != BTRFS_EXTENT_DATA_KEY) {
8394 fprintf(stderr, "Couldn't find our ref, search\n");
8395 return -EINVAL;
8397 fi = btrfs_item_ptr(leaf, path->slots[0],
8398 struct btrfs_file_extent_item);
8399 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8400 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8402 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8403 break;
8404 path->slots[0]++;
8407 btrfs_release_path(path);
8409 trans = btrfs_start_transaction(root, 1);
8410 if (IS_ERR(trans))
8411 return PTR_ERR(trans);
8414 * Ok we have the key of the file extent we want to fix, now we can cow
8415 * down to the thing and fix it.
8417 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8418 if (ret < 0) {
8419 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8420 key.objectid, key.type, key.offset, ret);
8421 goto out;
8423 if (ret > 0) {
8424 fprintf(stderr, "Well that's odd, we just found this key "
8425 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8426 key.offset);
8427 ret = -EINVAL;
8428 goto out;
8430 leaf = path->nodes[0];
8431 fi = btrfs_item_ptr(leaf, path->slots[0],
8432 struct btrfs_file_extent_item);
8434 if (btrfs_file_extent_compression(leaf, fi) &&
8435 dback->disk_bytenr != entry->bytenr) {
8436 fprintf(stderr, "Ref doesn't match the record start and is "
8437 "compressed, please take a btrfs-image of this file "
8438 "system and send it to a btrfs developer so they can "
8439 "complete this functionality for bytenr %Lu\n",
8440 dback->disk_bytenr);
8441 ret = -EINVAL;
8442 goto out;
8445 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8446 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8447 } else if (dback->disk_bytenr > entry->bytenr) {
8448 u64 off_diff, offset;
8450 off_diff = dback->disk_bytenr - entry->bytenr;
8451 offset = btrfs_file_extent_offset(leaf, fi);
8452 if (dback->disk_bytenr + offset +
8453 btrfs_file_extent_num_bytes(leaf, fi) >
8454 entry->bytenr + entry->bytes) {
8455 fprintf(stderr, "Ref is past the entry end, please "
8456 "take a btrfs-image of this file system and "
8457 "send it to a btrfs developer, ref %Lu\n",
8458 dback->disk_bytenr);
8459 ret = -EINVAL;
8460 goto out;
8462 offset += off_diff;
8463 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8464 btrfs_set_file_extent_offset(leaf, fi, offset);
8465 } else if (dback->disk_bytenr < entry->bytenr) {
8466 u64 offset;
8468 offset = btrfs_file_extent_offset(leaf, fi);
8469 if (dback->disk_bytenr + offset < entry->bytenr) {
8470 fprintf(stderr, "Ref is before the entry start, please"
8471 " take a btrfs-image of this file system and "
8472 "send it to a btrfs developer, ref %Lu\n",
8473 dback->disk_bytenr);
8474 ret = -EINVAL;
8475 goto out;
8478 offset += dback->disk_bytenr;
8479 offset -= entry->bytenr;
8480 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8481 btrfs_set_file_extent_offset(leaf, fi, offset);
8484 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8487 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8488 * only do this if we aren't using compression, otherwise it's a
8489 * trickier case.
8491 if (!btrfs_file_extent_compression(leaf, fi))
8492 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8493 else
8494 printf("ram bytes may be wrong?\n");
8495 btrfs_mark_buffer_dirty(leaf);
8496 out:
8497 err = btrfs_commit_transaction(trans, root);
8498 btrfs_release_path(path);
8499 return ret ? ret : err;
8502 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8503 struct extent_record *rec)
8505 struct extent_backref *back;
8506 struct data_backref *dback;
8507 struct extent_entry *entry, *best = NULL;
8508 LIST_HEAD(entries);
8509 int nr_entries = 0;
8510 int broken_entries = 0;
8511 int ret = 0;
8512 short mismatch = 0;
8515 * Metadata is easy and the backrefs should always agree on bytenr and
8516 * size, if not we've got bigger issues.
8518 if (rec->metadata)
8519 return 0;
8521 list_for_each_entry(back, &rec->backrefs, list) {
8522 if (back->full_backref || !back->is_data)
8523 continue;
8525 dback = to_data_backref(back);
8528 * We only pay attention to backrefs that we found a real
8529 * backref for.
8531 if (dback->found_ref == 0)
8532 continue;
8535 * For now we only catch when the bytes don't match, not the
8536 * bytenr. We can easily do this at the same time, but I want
8537 * to have a fs image to test on before we just add repair
8538 * functionality willy-nilly so we know we won't screw up the
8539 * repair.
8542 entry = find_entry(&entries, dback->disk_bytenr,
8543 dback->bytes);
8544 if (!entry) {
8545 entry = malloc(sizeof(struct extent_entry));
8546 if (!entry) {
8547 ret = -ENOMEM;
8548 goto out;
8550 memset(entry, 0, sizeof(*entry));
8551 entry->bytenr = dback->disk_bytenr;
8552 entry->bytes = dback->bytes;
8553 list_add_tail(&entry->list, &entries);
8554 nr_entries++;
8558 * If we only have on entry we may think the entries agree when
8559 * in reality they don't so we have to do some extra checking.
8561 if (dback->disk_bytenr != rec->start ||
8562 dback->bytes != rec->nr || back->broken)
8563 mismatch = 1;
8565 if (back->broken) {
8566 entry->broken++;
8567 broken_entries++;
8570 entry->count++;
8573 /* Yay all the backrefs agree, carry on good sir */
8574 if (nr_entries <= 1 && !mismatch)
8575 goto out;
8577 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8578 "%Lu\n", rec->start);
8581 * First we want to see if the backrefs can agree amongst themselves who
8582 * is right, so figure out which one of the entries has the highest
8583 * count.
8585 best = find_most_right_entry(&entries);
8588 * Ok so we may have an even split between what the backrefs think, so
8589 * this is where we use the extent ref to see what it thinks.
8591 if (!best) {
8592 entry = find_entry(&entries, rec->start, rec->nr);
8593 if (!entry && (!broken_entries || !rec->found_rec)) {
8594 fprintf(stderr, "Backrefs don't agree with each other "
8595 "and extent record doesn't agree with anybody,"
8596 " so we can't fix bytenr %Lu bytes %Lu\n",
8597 rec->start, rec->nr);
8598 ret = -EINVAL;
8599 goto out;
8600 } else if (!entry) {
8602 * Ok our backrefs were broken, we'll assume this is the
8603 * correct value and add an entry for this range.
8605 entry = malloc(sizeof(struct extent_entry));
8606 if (!entry) {
8607 ret = -ENOMEM;
8608 goto out;
8610 memset(entry, 0, sizeof(*entry));
8611 entry->bytenr = rec->start;
8612 entry->bytes = rec->nr;
8613 list_add_tail(&entry->list, &entries);
8614 nr_entries++;
8616 entry->count++;
8617 best = find_most_right_entry(&entries);
8618 if (!best) {
8619 fprintf(stderr, "Backrefs and extent record evenly "
8620 "split on who is right, this is going to "
8621 "require user input to fix bytenr %Lu bytes "
8622 "%Lu\n", rec->start, rec->nr);
8623 ret = -EINVAL;
8624 goto out;
8629 * I don't think this can happen currently as we'll abort() if we catch
8630 * this case higher up, but in case somebody removes that we still can't
8631 * deal with it properly here yet, so just bail out of that's the case.
8633 if (best->bytenr != rec->start) {
8634 fprintf(stderr, "Extent start and backref starts don't match, "
8635 "please use btrfs-image on this file system and send "
8636 "it to a btrfs developer so they can make fsck fix "
8637 "this particular case. bytenr is %Lu, bytes is %Lu\n",
8638 rec->start, rec->nr);
8639 ret = -EINVAL;
8640 goto out;
8644 * Ok great we all agreed on an extent record, let's go find the real
8645 * references and fix up the ones that don't match.
8647 list_for_each_entry(back, &rec->backrefs, list) {
8648 if (back->full_backref || !back->is_data)
8649 continue;
8651 dback = to_data_backref(back);
8654 * Still ignoring backrefs that don't have a real ref attached
8655 * to them.
8657 if (dback->found_ref == 0)
8658 continue;
8660 if (dback->bytes == best->bytes &&
8661 dback->disk_bytenr == best->bytenr)
8662 continue;
8664 ret = repair_ref(info, path, dback, best);
8665 if (ret)
8666 goto out;
8670 * Ok we messed with the actual refs, which means we need to drop our
8671 * entire cache and go back and rescan. I know this is a huge pain and
8672 * adds a lot of extra work, but it's the only way to be safe. Once all
8673 * the backrefs agree we may not need to do anything to the extent
8674 * record itself.
8676 ret = -EAGAIN;
8677 out:
8678 while (!list_empty(&entries)) {
8679 entry = list_entry(entries.next, struct extent_entry, list);
8680 list_del_init(&entry->list);
8681 free(entry);
8683 return ret;
8686 static int process_duplicates(struct cache_tree *extent_cache,
8687 struct extent_record *rec)
8689 struct extent_record *good, *tmp;
8690 struct cache_extent *cache;
8691 int ret;
8694 * If we found a extent record for this extent then return, or if we
8695 * have more than one duplicate we are likely going to need to delete
8696 * something.
8698 if (rec->found_rec || rec->num_duplicates > 1)
8699 return 0;
8701 /* Shouldn't happen but just in case */
8702 BUG_ON(!rec->num_duplicates);
8705 * So this happens if we end up with a backref that doesn't match the
8706 * actual extent entry. So either the backref is bad or the extent
8707 * entry is bad. Either way we want to have the extent_record actually
8708 * reflect what we found in the extent_tree, so we need to take the
8709 * duplicate out and use that as the extent_record since the only way we
8710 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8712 remove_cache_extent(extent_cache, &rec->cache);
8714 good = to_extent_record(rec->dups.next);
8715 list_del_init(&good->list);
8716 INIT_LIST_HEAD(&good->backrefs);
8717 INIT_LIST_HEAD(&good->dups);
8718 good->cache.start = good->start;
8719 good->cache.size = good->nr;
8720 good->content_checked = 0;
8721 good->owner_ref_checked = 0;
8722 good->num_duplicates = 0;
8723 good->refs = rec->refs;
8724 list_splice_init(&rec->backrefs, &good->backrefs);
8725 while (1) {
8726 cache = lookup_cache_extent(extent_cache, good->start,
8727 good->nr);
8728 if (!cache)
8729 break;
8730 tmp = container_of(cache, struct extent_record, cache);
8733 * If we find another overlapping extent and it's found_rec is
8734 * set then it's a duplicate and we need to try and delete
8735 * something.
8737 if (tmp->found_rec || tmp->num_duplicates > 0) {
8738 if (list_empty(&good->list))
8739 list_add_tail(&good->list,
8740 &duplicate_extents);
8741 good->num_duplicates += tmp->num_duplicates + 1;
8742 list_splice_init(&tmp->dups, &good->dups);
8743 list_del_init(&tmp->list);
8744 list_add_tail(&tmp->list, &good->dups);
8745 remove_cache_extent(extent_cache, &tmp->cache);
8746 continue;
8750 * Ok we have another non extent item backed extent rec, so lets
8751 * just add it to this extent and carry on like we did above.
8753 good->refs += tmp->refs;
8754 list_splice_init(&tmp->backrefs, &good->backrefs);
8755 remove_cache_extent(extent_cache, &tmp->cache);
8756 free(tmp);
8758 ret = insert_cache_extent(extent_cache, &good->cache);
8759 BUG_ON(ret);
8760 free(rec);
8761 return good->num_duplicates ? 0 : 1;
8764 static int delete_duplicate_records(struct btrfs_root *root,
8765 struct extent_record *rec)
8767 struct btrfs_trans_handle *trans;
8768 LIST_HEAD(delete_list);
8769 struct btrfs_path path;
8770 struct extent_record *tmp, *good, *n;
8771 int nr_del = 0;
8772 int ret = 0, err;
8773 struct btrfs_key key;
8775 btrfs_init_path(&path);
8777 good = rec;
8778 /* Find the record that covers all of the duplicates. */
8779 list_for_each_entry(tmp, &rec->dups, list) {
8780 if (good->start < tmp->start)
8781 continue;
8782 if (good->nr > tmp->nr)
8783 continue;
8785 if (tmp->start + tmp->nr < good->start + good->nr) {
8786 fprintf(stderr, "Ok we have overlapping extents that "
8787 "aren't completely covered by each other, this "
8788 "is going to require more careful thought. "
8789 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8790 tmp->start, tmp->nr, good->start, good->nr);
8791 abort();
8793 good = tmp;
8796 if (good != rec)
8797 list_add_tail(&rec->list, &delete_list);
8799 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8800 if (tmp == good)
8801 continue;
8802 list_move_tail(&tmp->list, &delete_list);
8805 root = root->fs_info->extent_root;
8806 trans = btrfs_start_transaction(root, 1);
8807 if (IS_ERR(trans)) {
8808 ret = PTR_ERR(trans);
8809 goto out;
8812 list_for_each_entry(tmp, &delete_list, list) {
8813 if (tmp->found_rec == 0)
8814 continue;
8815 key.objectid = tmp->start;
8816 key.type = BTRFS_EXTENT_ITEM_KEY;
8817 key.offset = tmp->nr;
8819 /* Shouldn't happen but just in case */
8820 if (tmp->metadata) {
8821 fprintf(stderr, "Well this shouldn't happen, extent "
8822 "record overlaps but is metadata? "
8823 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8824 abort();
8827 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8828 if (ret) {
8829 if (ret > 0)
8830 ret = -EINVAL;
8831 break;
8833 ret = btrfs_del_item(trans, root, &path);
8834 if (ret)
8835 break;
8836 btrfs_release_path(&path);
8837 nr_del++;
8839 err = btrfs_commit_transaction(trans, root);
8840 if (err && !ret)
8841 ret = err;
8842 out:
8843 while (!list_empty(&delete_list)) {
8844 tmp = to_extent_record(delete_list.next);
8845 list_del_init(&tmp->list);
8846 if (tmp == rec)
8847 continue;
8848 free(tmp);
8851 while (!list_empty(&rec->dups)) {
8852 tmp = to_extent_record(rec->dups.next);
8853 list_del_init(&tmp->list);
8854 free(tmp);
8857 btrfs_release_path(&path);
8859 if (!ret && !nr_del)
8860 rec->num_duplicates = 0;
8862 return ret ? ret : nr_del;
8865 static int find_possible_backrefs(struct btrfs_fs_info *info,
8866 struct btrfs_path *path,
8867 struct cache_tree *extent_cache,
8868 struct extent_record *rec)
8870 struct btrfs_root *root;
8871 struct extent_backref *back;
8872 struct data_backref *dback;
8873 struct cache_extent *cache;
8874 struct btrfs_file_extent_item *fi;
8875 struct btrfs_key key;
8876 u64 bytenr, bytes;
8877 int ret;
8879 list_for_each_entry(back, &rec->backrefs, list) {
8880 /* Don't care about full backrefs (poor unloved backrefs) */
8881 if (back->full_backref || !back->is_data)
8882 continue;
8884 dback = to_data_backref(back);
8886 /* We found this one, we don't need to do a lookup */
8887 if (dback->found_ref)
8888 continue;
8890 key.objectid = dback->root;
8891 key.type = BTRFS_ROOT_ITEM_KEY;
8892 key.offset = (u64)-1;
8894 root = btrfs_read_fs_root(info, &key);
8896 /* No root, definitely a bad ref, skip */
8897 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8898 continue;
8899 /* Other err, exit */
8900 if (IS_ERR(root))
8901 return PTR_ERR(root);
8903 key.objectid = dback->owner;
8904 key.type = BTRFS_EXTENT_DATA_KEY;
8905 key.offset = dback->offset;
8906 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8907 if (ret) {
8908 btrfs_release_path(path);
8909 if (ret < 0)
8910 return ret;
8911 /* Didn't find it, we can carry on */
8912 ret = 0;
8913 continue;
8916 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8917 struct btrfs_file_extent_item);
8918 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8919 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8920 btrfs_release_path(path);
8921 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8922 if (cache) {
8923 struct extent_record *tmp;
8924 tmp = container_of(cache, struct extent_record, cache);
8927 * If we found an extent record for the bytenr for this
8928 * particular backref then we can't add it to our
8929 * current extent record. We only want to add backrefs
8930 * that don't have a corresponding extent item in the
8931 * extent tree since they likely belong to this record
8932 * and we need to fix it if it doesn't match bytenrs.
8934 if (tmp->found_rec)
8935 continue;
8938 dback->found_ref += 1;
8939 dback->disk_bytenr = bytenr;
8940 dback->bytes = bytes;
8943 * Set this so the verify backref code knows not to trust the
8944 * values in this backref.
8946 back->broken = 1;
8949 return 0;
8953 * Record orphan data ref into corresponding root.
8955 * Return 0 if the extent item contains data ref and recorded.
8956 * Return 1 if the extent item contains no useful data ref
8957 * On that case, it may contains only shared_dataref or metadata backref
8958 * or the file extent exists(this should be handled by the extent bytenr
8959 * recovery routine)
8960 * Return <0 if something goes wrong.
8962 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8963 struct extent_record *rec)
8965 struct btrfs_key key;
8966 struct btrfs_root *dest_root;
8967 struct extent_backref *back;
8968 struct data_backref *dback;
8969 struct orphan_data_extent *orphan;
8970 struct btrfs_path path;
8971 int recorded_data_ref = 0;
8972 int ret = 0;
8974 if (rec->metadata)
8975 return 1;
8976 btrfs_init_path(&path);
8977 list_for_each_entry(back, &rec->backrefs, list) {
8978 if (back->full_backref || !back->is_data ||
8979 !back->found_extent_tree)
8980 continue;
8981 dback = to_data_backref(back);
8982 if (dback->found_ref)
8983 continue;
8984 key.objectid = dback->root;
8985 key.type = BTRFS_ROOT_ITEM_KEY;
8986 key.offset = (u64)-1;
8988 dest_root = btrfs_read_fs_root(fs_info, &key);
8990 /* For non-exist root we just skip it */
8991 if (IS_ERR(dest_root) || !dest_root)
8992 continue;
8994 key.objectid = dback->owner;
8995 key.type = BTRFS_EXTENT_DATA_KEY;
8996 key.offset = dback->offset;
8998 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8999 btrfs_release_path(&path);
9001 * For ret < 0, it's OK since the fs-tree may be corrupted,
9002 * we need to record it for inode/file extent rebuild.
9003 * For ret > 0, we record it only for file extent rebuild.
9004 * For ret == 0, the file extent exists but only bytenr
9005 * mismatch, let the original bytenr fix routine to handle,
9006 * don't record it.
9008 if (ret == 0)
9009 continue;
9010 ret = 0;
9011 orphan = malloc(sizeof(*orphan));
9012 if (!orphan) {
9013 ret = -ENOMEM;
9014 goto out;
9016 INIT_LIST_HEAD(&orphan->list);
9017 orphan->root = dback->root;
9018 orphan->objectid = dback->owner;
9019 orphan->offset = dback->offset;
9020 orphan->disk_bytenr = rec->cache.start;
9021 orphan->disk_len = rec->cache.size;
9022 list_add(&dest_root->orphan_data_extents, &orphan->list);
9023 recorded_data_ref = 1;
9025 out:
9026 btrfs_release_path(&path);
9027 if (!ret)
9028 return !recorded_data_ref;
9029 else
9030 return ret;
9034 * when an incorrect extent item is found, this will delete
9035 * all of the existing entries for it and recreate them
9036 * based on what the tree scan found.
9038 static int fixup_extent_refs(struct btrfs_fs_info *info,
9039 struct cache_tree *extent_cache,
9040 struct extent_record *rec)
9042 struct btrfs_trans_handle *trans = NULL;
9043 int ret;
9044 struct btrfs_path path;
9045 struct list_head *cur = rec->backrefs.next;
9046 struct cache_extent *cache;
9047 struct extent_backref *back;
9048 int allocated = 0;
9049 u64 flags = 0;
9051 if (rec->flag_block_full_backref)
9052 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9054 btrfs_init_path(&path);
9055 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9057 * Sometimes the backrefs themselves are so broken they don't
9058 * get attached to any meaningful rec, so first go back and
9059 * check any of our backrefs that we couldn't find and throw
9060 * them into the list if we find the backref so that
9061 * verify_backrefs can figure out what to do.
9063 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9064 if (ret < 0)
9065 goto out;
9068 /* step one, make sure all of the backrefs agree */
9069 ret = verify_backrefs(info, &path, rec);
9070 if (ret < 0)
9071 goto out;
9073 trans = btrfs_start_transaction(info->extent_root, 1);
9074 if (IS_ERR(trans)) {
9075 ret = PTR_ERR(trans);
9076 goto out;
9079 /* step two, delete all the existing records */
9080 ret = delete_extent_records(trans, info->extent_root, &path,
9081 rec->start);
9083 if (ret < 0)
9084 goto out;
9086 /* was this block corrupt? If so, don't add references to it */
9087 cache = lookup_cache_extent(info->corrupt_blocks,
9088 rec->start, rec->max_size);
9089 if (cache) {
9090 ret = 0;
9091 goto out;
9094 /* step three, recreate all the refs we did find */
9095 while(cur != &rec->backrefs) {
9096 back = to_extent_backref(cur);
9097 cur = cur->next;
9100 * if we didn't find any references, don't create a
9101 * new extent record
9103 if (!back->found_ref)
9104 continue;
9106 rec->bad_full_backref = 0;
9107 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9108 allocated = 1;
9110 if (ret)
9111 goto out;
9113 out:
9114 if (trans) {
9115 int err = btrfs_commit_transaction(trans, info->extent_root);
9116 if (!ret)
9117 ret = err;
9120 if (!ret)
9121 fprintf(stderr, "Repaired extent references for %llu\n",
9122 (unsigned long long)rec->start);
9124 btrfs_release_path(&path);
9125 return ret;
9128 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9129 struct extent_record *rec)
9131 struct btrfs_trans_handle *trans;
9132 struct btrfs_root *root = fs_info->extent_root;
9133 struct btrfs_path path;
9134 struct btrfs_extent_item *ei;
9135 struct btrfs_key key;
9136 u64 flags;
9137 int ret = 0;
9139 key.objectid = rec->start;
9140 if (rec->metadata) {
9141 key.type = BTRFS_METADATA_ITEM_KEY;
9142 key.offset = rec->info_level;
9143 } else {
9144 key.type = BTRFS_EXTENT_ITEM_KEY;
9145 key.offset = rec->max_size;
9148 trans = btrfs_start_transaction(root, 0);
9149 if (IS_ERR(trans))
9150 return PTR_ERR(trans);
9152 btrfs_init_path(&path);
9153 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9154 if (ret < 0) {
9155 btrfs_release_path(&path);
9156 btrfs_commit_transaction(trans, root);
9157 return ret;
9158 } else if (ret) {
9159 fprintf(stderr, "Didn't find extent for %llu\n",
9160 (unsigned long long)rec->start);
9161 btrfs_release_path(&path);
9162 btrfs_commit_transaction(trans, root);
9163 return -ENOENT;
9166 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9167 struct btrfs_extent_item);
9168 flags = btrfs_extent_flags(path.nodes[0], ei);
9169 if (rec->flag_block_full_backref) {
9170 fprintf(stderr, "setting full backref on %llu\n",
9171 (unsigned long long)key.objectid);
9172 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9173 } else {
9174 fprintf(stderr, "clearing full backref on %llu\n",
9175 (unsigned long long)key.objectid);
9176 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9178 btrfs_set_extent_flags(path.nodes[0], ei, flags);
9179 btrfs_mark_buffer_dirty(path.nodes[0]);
9180 btrfs_release_path(&path);
9181 ret = btrfs_commit_transaction(trans, root);
9182 if (!ret)
9183 fprintf(stderr, "Repaired extent flags for %llu\n",
9184 (unsigned long long)rec->start);
9186 return ret;
9189 /* right now we only prune from the extent allocation tree */
9190 static int prune_one_block(struct btrfs_trans_handle *trans,
9191 struct btrfs_fs_info *info,
9192 struct btrfs_corrupt_block *corrupt)
9194 int ret;
9195 struct btrfs_path path;
9196 struct extent_buffer *eb;
9197 u64 found;
9198 int slot;
9199 int nritems;
9200 int level = corrupt->level + 1;
9202 btrfs_init_path(&path);
9203 again:
9204 /* we want to stop at the parent to our busted block */
9205 path.lowest_level = level;
9207 ret = btrfs_search_slot(trans, info->extent_root,
9208 &corrupt->key, &path, -1, 1);
9210 if (ret < 0)
9211 goto out;
9213 eb = path.nodes[level];
9214 if (!eb) {
9215 ret = -ENOENT;
9216 goto out;
9220 * hopefully the search gave us the block we want to prune,
9221 * lets try that first
9223 slot = path.slots[level];
9224 found = btrfs_node_blockptr(eb, slot);
9225 if (found == corrupt->cache.start)
9226 goto del_ptr;
9228 nritems = btrfs_header_nritems(eb);
9230 /* the search failed, lets scan this node and hope we find it */
9231 for (slot = 0; slot < nritems; slot++) {
9232 found = btrfs_node_blockptr(eb, slot);
9233 if (found == corrupt->cache.start)
9234 goto del_ptr;
9237 * we couldn't find the bad block. TODO, search all the nodes for pointers
9238 * to this block
9240 if (eb == info->extent_root->node) {
9241 ret = -ENOENT;
9242 goto out;
9243 } else {
9244 level++;
9245 btrfs_release_path(&path);
9246 goto again;
9249 del_ptr:
9250 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9251 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9253 out:
9254 btrfs_release_path(&path);
9255 return ret;
9258 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9260 struct btrfs_trans_handle *trans = NULL;
9261 struct cache_extent *cache;
9262 struct btrfs_corrupt_block *corrupt;
9264 while (1) {
9265 cache = search_cache_extent(info->corrupt_blocks, 0);
9266 if (!cache)
9267 break;
9268 if (!trans) {
9269 trans = btrfs_start_transaction(info->extent_root, 1);
9270 if (IS_ERR(trans))
9271 return PTR_ERR(trans);
9273 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9274 prune_one_block(trans, info, corrupt);
9275 remove_cache_extent(info->corrupt_blocks, cache);
9277 if (trans)
9278 return btrfs_commit_transaction(trans, info->extent_root);
9279 return 0;
9282 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9284 struct btrfs_block_group_cache *cache;
9285 u64 start, end;
9286 int ret;
9288 while (1) {
9289 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9290 &start, &end, EXTENT_DIRTY);
9291 if (ret)
9292 break;
9293 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9296 start = 0;
9297 while (1) {
9298 cache = btrfs_lookup_first_block_group(fs_info, start);
9299 if (!cache)
9300 break;
9301 if (cache->cached)
9302 cache->cached = 0;
9303 start = cache->key.objectid + cache->key.offset;
9307 static int check_extent_refs(struct btrfs_root *root,
9308 struct cache_tree *extent_cache)
9310 struct extent_record *rec;
9311 struct cache_extent *cache;
9312 int ret = 0;
9313 int had_dups = 0;
9315 if (repair) {
9317 * if we're doing a repair, we have to make sure
9318 * we don't allocate from the problem extents.
9319 * In the worst case, this will be all the
9320 * extents in the FS
9322 cache = search_cache_extent(extent_cache, 0);
9323 while(cache) {
9324 rec = container_of(cache, struct extent_record, cache);
9325 set_extent_dirty(root->fs_info->excluded_extents,
9326 rec->start,
9327 rec->start + rec->max_size - 1);
9328 cache = next_cache_extent(cache);
9331 /* pin down all the corrupted blocks too */
9332 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9333 while(cache) {
9334 set_extent_dirty(root->fs_info->excluded_extents,
9335 cache->start,
9336 cache->start + cache->size - 1);
9337 cache = next_cache_extent(cache);
9339 prune_corrupt_blocks(root->fs_info);
9340 reset_cached_block_groups(root->fs_info);
9343 reset_cached_block_groups(root->fs_info);
9346 * We need to delete any duplicate entries we find first otherwise we
9347 * could mess up the extent tree when we have backrefs that actually
9348 * belong to a different extent item and not the weird duplicate one.
9350 while (repair && !list_empty(&duplicate_extents)) {
9351 rec = to_extent_record(duplicate_extents.next);
9352 list_del_init(&rec->list);
9354 /* Sometimes we can find a backref before we find an actual
9355 * extent, so we need to process it a little bit to see if there
9356 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9357 * if this is a backref screwup. If we need to delete stuff
9358 * process_duplicates() will return 0, otherwise it will return
9359 * 1 and we
9361 if (process_duplicates(extent_cache, rec))
9362 continue;
9363 ret = delete_duplicate_records(root, rec);
9364 if (ret < 0)
9365 return ret;
9367 * delete_duplicate_records will return the number of entries
9368 * deleted, so if it's greater than 0 then we know we actually
9369 * did something and we need to remove.
9371 if (ret)
9372 had_dups = 1;
9375 if (had_dups)
9376 return -EAGAIN;
9378 while(1) {
9379 int cur_err = 0;
9380 int fix = 0;
9382 cache = search_cache_extent(extent_cache, 0);
9383 if (!cache)
9384 break;
9385 rec = container_of(cache, struct extent_record, cache);
9386 if (rec->num_duplicates) {
9387 fprintf(stderr, "extent item %llu has multiple extent "
9388 "items\n", (unsigned long long)rec->start);
9389 cur_err = 1;
9392 if (rec->refs != rec->extent_item_refs) {
9393 fprintf(stderr, "ref mismatch on [%llu %llu] ",
9394 (unsigned long long)rec->start,
9395 (unsigned long long)rec->nr);
9396 fprintf(stderr, "extent item %llu, found %llu\n",
9397 (unsigned long long)rec->extent_item_refs,
9398 (unsigned long long)rec->refs);
9399 ret = record_orphan_data_extents(root->fs_info, rec);
9400 if (ret < 0)
9401 goto repair_abort;
9402 fix = ret;
9403 cur_err = 1;
9405 if (all_backpointers_checked(rec, 1)) {
9406 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9407 (unsigned long long)rec->start,
9408 (unsigned long long)rec->nr);
9409 fix = 1;
9410 cur_err = 1;
9412 if (!rec->owner_ref_checked) {
9413 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9414 (unsigned long long)rec->start,
9415 (unsigned long long)rec->nr);
9416 fix = 1;
9417 cur_err = 1;
9420 if (repair && fix) {
9421 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9422 if (ret)
9423 goto repair_abort;
9427 if (rec->bad_full_backref) {
9428 fprintf(stderr, "bad full backref, on [%llu]\n",
9429 (unsigned long long)rec->start);
9430 if (repair) {
9431 ret = fixup_extent_flags(root->fs_info, rec);
9432 if (ret)
9433 goto repair_abort;
9434 fix = 1;
9436 cur_err = 1;
9439 * Although it's not a extent ref's problem, we reuse this
9440 * routine for error reporting.
9441 * No repair function yet.
9443 if (rec->crossing_stripes) {
9444 fprintf(stderr,
9445 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9446 rec->start, rec->start + rec->max_size);
9447 cur_err = 1;
9450 if (rec->wrong_chunk_type) {
9451 fprintf(stderr,
9452 "bad extent [%llu, %llu), type mismatch with chunk\n",
9453 rec->start, rec->start + rec->max_size);
9454 cur_err = 1;
9457 remove_cache_extent(extent_cache, cache);
9458 free_all_extent_backrefs(rec);
9459 if (!init_extent_tree && repair && (!cur_err || fix))
9460 clear_extent_dirty(root->fs_info->excluded_extents,
9461 rec->start,
9462 rec->start + rec->max_size - 1);
9463 free(rec);
9465 repair_abort:
9466 if (repair) {
9467 if (ret && ret != -EAGAIN) {
9468 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9469 exit(1);
9470 } else if (!ret) {
9471 struct btrfs_trans_handle *trans;
9473 root = root->fs_info->extent_root;
9474 trans = btrfs_start_transaction(root, 1);
9475 if (IS_ERR(trans)) {
9476 ret = PTR_ERR(trans);
9477 goto repair_abort;
9480 btrfs_fix_block_accounting(trans, root);
9481 ret = btrfs_commit_transaction(trans, root);
9482 if (ret)
9483 goto repair_abort;
9485 return ret;
9487 return 0;
9490 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9492 u64 stripe_size;
9494 if (type & BTRFS_BLOCK_GROUP_RAID0) {
9495 stripe_size = length;
9496 stripe_size /= num_stripes;
9497 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9498 stripe_size = length * 2;
9499 stripe_size /= num_stripes;
9500 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9501 stripe_size = length;
9502 stripe_size /= (num_stripes - 1);
9503 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9504 stripe_size = length;
9505 stripe_size /= (num_stripes - 2);
9506 } else {
9507 stripe_size = length;
9509 return stripe_size;
9513 * Check the chunk with its block group/dev list ref:
9514 * Return 0 if all refs seems valid.
9515 * Return 1 if part of refs seems valid, need later check for rebuild ref
9516 * like missing block group and needs to search extent tree to rebuild them.
9517 * Return -1 if essential refs are missing and unable to rebuild.
9519 static int check_chunk_refs(struct chunk_record *chunk_rec,
9520 struct block_group_tree *block_group_cache,
9521 struct device_extent_tree *dev_extent_cache,
9522 int silent)
9524 struct cache_extent *block_group_item;
9525 struct block_group_record *block_group_rec;
9526 struct cache_extent *dev_extent_item;
9527 struct device_extent_record *dev_extent_rec;
9528 u64 devid;
9529 u64 offset;
9530 u64 length;
9531 int metadump_v2 = 0;
9532 int i;
9533 int ret = 0;
9535 block_group_item = lookup_cache_extent(&block_group_cache->tree,
9536 chunk_rec->offset,
9537 chunk_rec->length);
9538 if (block_group_item) {
9539 block_group_rec = container_of(block_group_item,
9540 struct block_group_record,
9541 cache);
9542 if (chunk_rec->length != block_group_rec->offset ||
9543 chunk_rec->offset != block_group_rec->objectid ||
9544 (!metadump_v2 &&
9545 chunk_rec->type_flags != block_group_rec->flags)) {
9546 if (!silent)
9547 fprintf(stderr,
9548 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9549 chunk_rec->objectid,
9550 chunk_rec->type,
9551 chunk_rec->offset,
9552 chunk_rec->length,
9553 chunk_rec->offset,
9554 chunk_rec->type_flags,
9555 block_group_rec->objectid,
9556 block_group_rec->type,
9557 block_group_rec->offset,
9558 block_group_rec->offset,
9559 block_group_rec->objectid,
9560 block_group_rec->flags);
9561 ret = -1;
9562 } else {
9563 list_del_init(&block_group_rec->list);
9564 chunk_rec->bg_rec = block_group_rec;
9566 } else {
9567 if (!silent)
9568 fprintf(stderr,
9569 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9570 chunk_rec->objectid,
9571 chunk_rec->type,
9572 chunk_rec->offset,
9573 chunk_rec->length,
9574 chunk_rec->offset,
9575 chunk_rec->type_flags);
9576 ret = 1;
9579 if (metadump_v2)
9580 return ret;
9582 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9583 chunk_rec->num_stripes);
9584 for (i = 0; i < chunk_rec->num_stripes; ++i) {
9585 devid = chunk_rec->stripes[i].devid;
9586 offset = chunk_rec->stripes[i].offset;
9587 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9588 devid, offset, length);
9589 if (dev_extent_item) {
9590 dev_extent_rec = container_of(dev_extent_item,
9591 struct device_extent_record,
9592 cache);
9593 if (dev_extent_rec->objectid != devid ||
9594 dev_extent_rec->offset != offset ||
9595 dev_extent_rec->chunk_offset != chunk_rec->offset ||
9596 dev_extent_rec->length != length) {
9597 if (!silent)
9598 fprintf(stderr,
9599 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9600 chunk_rec->objectid,
9601 chunk_rec->type,
9602 chunk_rec->offset,
9603 chunk_rec->stripes[i].devid,
9604 chunk_rec->stripes[i].offset,
9605 dev_extent_rec->objectid,
9606 dev_extent_rec->offset,
9607 dev_extent_rec->length);
9608 ret = -1;
9609 } else {
9610 list_move(&dev_extent_rec->chunk_list,
9611 &chunk_rec->dextents);
9613 } else {
9614 if (!silent)
9615 fprintf(stderr,
9616 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9617 chunk_rec->objectid,
9618 chunk_rec->type,
9619 chunk_rec->offset,
9620 chunk_rec->stripes[i].devid,
9621 chunk_rec->stripes[i].offset);
9622 ret = -1;
9625 return ret;
9628 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9629 int check_chunks(struct cache_tree *chunk_cache,
9630 struct block_group_tree *block_group_cache,
9631 struct device_extent_tree *dev_extent_cache,
9632 struct list_head *good, struct list_head *bad,
9633 struct list_head *rebuild, int silent)
9635 struct cache_extent *chunk_item;
9636 struct chunk_record *chunk_rec;
9637 struct block_group_record *bg_rec;
9638 struct device_extent_record *dext_rec;
9639 int err;
9640 int ret = 0;
9642 chunk_item = first_cache_extent(chunk_cache);
9643 while (chunk_item) {
9644 chunk_rec = container_of(chunk_item, struct chunk_record,
9645 cache);
9646 err = check_chunk_refs(chunk_rec, block_group_cache,
9647 dev_extent_cache, silent);
9648 if (err < 0)
9649 ret = err;
9650 if (err == 0 && good)
9651 list_add_tail(&chunk_rec->list, good);
9652 if (err > 0 && rebuild)
9653 list_add_tail(&chunk_rec->list, rebuild);
9654 if (err < 0 && bad)
9655 list_add_tail(&chunk_rec->list, bad);
9656 chunk_item = next_cache_extent(chunk_item);
9659 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9660 if (!silent)
9661 fprintf(stderr,
9662 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9663 bg_rec->objectid,
9664 bg_rec->offset,
9665 bg_rec->flags);
9666 if (!ret)
9667 ret = 1;
9670 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9671 chunk_list) {
9672 if (!silent)
9673 fprintf(stderr,
9674 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9675 dext_rec->objectid,
9676 dext_rec->offset,
9677 dext_rec->length);
9678 if (!ret)
9679 ret = 1;
9681 return ret;
9685 static int check_device_used(struct device_record *dev_rec,
9686 struct device_extent_tree *dext_cache)
9688 struct cache_extent *cache;
9689 struct device_extent_record *dev_extent_rec;
9690 u64 total_byte = 0;
9692 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9693 while (cache) {
9694 dev_extent_rec = container_of(cache,
9695 struct device_extent_record,
9696 cache);
9697 if (dev_extent_rec->objectid != dev_rec->devid)
9698 break;
9700 list_del_init(&dev_extent_rec->device_list);
9701 total_byte += dev_extent_rec->length;
9702 cache = next_cache_extent(cache);
9705 if (total_byte != dev_rec->byte_used) {
9706 fprintf(stderr,
9707 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9708 total_byte, dev_rec->byte_used, dev_rec->objectid,
9709 dev_rec->type, dev_rec->offset);
9710 return -1;
9711 } else {
9712 return 0;
9716 /* check btrfs_dev_item -> btrfs_dev_extent */
9717 static int check_devices(struct rb_root *dev_cache,
9718 struct device_extent_tree *dev_extent_cache)
9720 struct rb_node *dev_node;
9721 struct device_record *dev_rec;
9722 struct device_extent_record *dext_rec;
9723 int err;
9724 int ret = 0;
9726 dev_node = rb_first(dev_cache);
9727 while (dev_node) {
9728 dev_rec = container_of(dev_node, struct device_record, node);
9729 err = check_device_used(dev_rec, dev_extent_cache);
9730 if (err)
9731 ret = err;
9733 dev_node = rb_next(dev_node);
9735 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9736 device_list) {
9737 fprintf(stderr,
9738 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9739 dext_rec->objectid, dext_rec->offset, dext_rec->length);
9740 if (!ret)
9741 ret = 1;
9743 return ret;
9746 static int add_root_item_to_list(struct list_head *head,
9747 u64 objectid, u64 bytenr, u64 last_snapshot,
9748 u8 level, u8 drop_level,
9749 int level_size, struct btrfs_key *drop_key)
9752 struct root_item_record *ri_rec;
9753 ri_rec = malloc(sizeof(*ri_rec));
9754 if (!ri_rec)
9755 return -ENOMEM;
9756 ri_rec->bytenr = bytenr;
9757 ri_rec->objectid = objectid;
9758 ri_rec->level = level;
9759 ri_rec->level_size = level_size;
9760 ri_rec->drop_level = drop_level;
9761 ri_rec->last_snapshot = last_snapshot;
9762 if (drop_key)
9763 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9764 list_add_tail(&ri_rec->list, head);
9766 return 0;
9769 static void free_root_item_list(struct list_head *list)
9771 struct root_item_record *ri_rec;
9773 while (!list_empty(list)) {
9774 ri_rec = list_first_entry(list, struct root_item_record,
9775 list);
9776 list_del_init(&ri_rec->list);
9777 free(ri_rec);
9781 static int deal_root_from_list(struct list_head *list,
9782 struct btrfs_root *root,
9783 struct block_info *bits,
9784 int bits_nr,
9785 struct cache_tree *pending,
9786 struct cache_tree *seen,
9787 struct cache_tree *reada,
9788 struct cache_tree *nodes,
9789 struct cache_tree *extent_cache,
9790 struct cache_tree *chunk_cache,
9791 struct rb_root *dev_cache,
9792 struct block_group_tree *block_group_cache,
9793 struct device_extent_tree *dev_extent_cache)
9795 int ret = 0;
9796 u64 last;
9798 while (!list_empty(list)) {
9799 struct root_item_record *rec;
9800 struct extent_buffer *buf;
9801 rec = list_entry(list->next,
9802 struct root_item_record, list);
9803 last = 0;
9804 buf = read_tree_block(root->fs_info,
9805 rec->bytenr, rec->level_size, 0);
9806 if (!extent_buffer_uptodate(buf)) {
9807 free_extent_buffer(buf);
9808 ret = -EIO;
9809 break;
9811 ret = add_root_to_pending(buf, extent_cache, pending,
9812 seen, nodes, rec->objectid);
9813 if (ret < 0)
9814 break;
9816 * To rebuild extent tree, we need deal with snapshot
9817 * one by one, otherwise we deal with node firstly which
9818 * can maximize readahead.
9820 while (1) {
9821 ret = run_next_block(root, bits, bits_nr, &last,
9822 pending, seen, reada, nodes,
9823 extent_cache, chunk_cache,
9824 dev_cache, block_group_cache,
9825 dev_extent_cache, rec);
9826 if (ret != 0)
9827 break;
9829 free_extent_buffer(buf);
9830 list_del(&rec->list);
9831 free(rec);
9832 if (ret < 0)
9833 break;
9835 while (ret >= 0) {
9836 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9837 reada, nodes, extent_cache, chunk_cache,
9838 dev_cache, block_group_cache,
9839 dev_extent_cache, NULL);
9840 if (ret != 0) {
9841 if (ret > 0)
9842 ret = 0;
9843 break;
9846 return ret;
9849 static int check_chunks_and_extents(struct btrfs_root *root)
9851 struct rb_root dev_cache;
9852 struct cache_tree chunk_cache;
9853 struct block_group_tree block_group_cache;
9854 struct device_extent_tree dev_extent_cache;
9855 struct cache_tree extent_cache;
9856 struct cache_tree seen;
9857 struct cache_tree pending;
9858 struct cache_tree reada;
9859 struct cache_tree nodes;
9860 struct extent_io_tree excluded_extents;
9861 struct cache_tree corrupt_blocks;
9862 struct btrfs_path path;
9863 struct btrfs_key key;
9864 struct btrfs_key found_key;
9865 int ret, err = 0;
9866 struct block_info *bits;
9867 int bits_nr;
9868 struct extent_buffer *leaf;
9869 int slot;
9870 struct btrfs_root_item ri;
9871 struct list_head dropping_trees;
9872 struct list_head normal_trees;
9873 struct btrfs_root *root1;
9874 u64 objectid;
9875 u32 level_size;
9876 u8 level;
9878 dev_cache = RB_ROOT;
9879 cache_tree_init(&chunk_cache);
9880 block_group_tree_init(&block_group_cache);
9881 device_extent_tree_init(&dev_extent_cache);
9883 cache_tree_init(&extent_cache);
9884 cache_tree_init(&seen);
9885 cache_tree_init(&pending);
9886 cache_tree_init(&nodes);
9887 cache_tree_init(&reada);
9888 cache_tree_init(&corrupt_blocks);
9889 extent_io_tree_init(&excluded_extents);
9890 INIT_LIST_HEAD(&dropping_trees);
9891 INIT_LIST_HEAD(&normal_trees);
9893 if (repair) {
9894 root->fs_info->excluded_extents = &excluded_extents;
9895 root->fs_info->fsck_extent_cache = &extent_cache;
9896 root->fs_info->free_extent_hook = free_extent_hook;
9897 root->fs_info->corrupt_blocks = &corrupt_blocks;
9900 bits_nr = 1024;
9901 bits = malloc(bits_nr * sizeof(struct block_info));
9902 if (!bits) {
9903 perror("malloc");
9904 exit(1);
9907 if (ctx.progress_enabled) {
9908 ctx.tp = TASK_EXTENTS;
9909 task_start(ctx.info);
9912 again:
9913 root1 = root->fs_info->tree_root;
9914 level = btrfs_header_level(root1->node);
9915 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9916 root1->node->start, 0, level, 0,
9917 root1->fs_info->nodesize, NULL);
9918 if (ret < 0)
9919 goto out;
9920 root1 = root->fs_info->chunk_root;
9921 level = btrfs_header_level(root1->node);
9922 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9923 root1->node->start, 0, level, 0,
9924 root1->fs_info->nodesize, NULL);
9925 if (ret < 0)
9926 goto out;
9927 btrfs_init_path(&path);
9928 key.offset = 0;
9929 key.objectid = 0;
9930 key.type = BTRFS_ROOT_ITEM_KEY;
9931 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9932 &key, &path, 0, 0);
9933 if (ret < 0)
9934 goto out;
9935 while(1) {
9936 leaf = path.nodes[0];
9937 slot = path.slots[0];
9938 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9939 ret = btrfs_next_leaf(root, &path);
9940 if (ret != 0)
9941 break;
9942 leaf = path.nodes[0];
9943 slot = path.slots[0];
9945 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9946 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9947 unsigned long offset;
9948 u64 last_snapshot;
9950 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9951 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9952 last_snapshot = btrfs_root_last_snapshot(&ri);
9953 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9954 level = btrfs_root_level(&ri);
9955 level_size = root->fs_info->nodesize;
9956 ret = add_root_item_to_list(&normal_trees,
9957 found_key.objectid,
9958 btrfs_root_bytenr(&ri),
9959 last_snapshot, level,
9960 0, level_size, NULL);
9961 if (ret < 0)
9962 goto out;
9963 } else {
9964 level = btrfs_root_level(&ri);
9965 level_size = root->fs_info->nodesize;
9966 objectid = found_key.objectid;
9967 btrfs_disk_key_to_cpu(&found_key,
9968 &ri.drop_progress);
9969 ret = add_root_item_to_list(&dropping_trees,
9970 objectid,
9971 btrfs_root_bytenr(&ri),
9972 last_snapshot, level,
9973 ri.drop_level,
9974 level_size, &found_key);
9975 if (ret < 0)
9976 goto out;
9979 path.slots[0]++;
9981 btrfs_release_path(&path);
9984 * check_block can return -EAGAIN if it fixes something, please keep
9985 * this in mind when dealing with return values from these functions, if
9986 * we get -EAGAIN we want to fall through and restart the loop.
9988 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9989 &seen, &reada, &nodes, &extent_cache,
9990 &chunk_cache, &dev_cache, &block_group_cache,
9991 &dev_extent_cache);
9992 if (ret < 0) {
9993 if (ret == -EAGAIN)
9994 goto loop;
9995 goto out;
9997 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9998 &pending, &seen, &reada, &nodes,
9999 &extent_cache, &chunk_cache, &dev_cache,
10000 &block_group_cache, &dev_extent_cache);
10001 if (ret < 0) {
10002 if (ret == -EAGAIN)
10003 goto loop;
10004 goto out;
10007 ret = check_chunks(&chunk_cache, &block_group_cache,
10008 &dev_extent_cache, NULL, NULL, NULL, 0);
10009 if (ret) {
10010 if (ret == -EAGAIN)
10011 goto loop;
10012 err = ret;
10015 ret = check_extent_refs(root, &extent_cache);
10016 if (ret < 0) {
10017 if (ret == -EAGAIN)
10018 goto loop;
10019 goto out;
10022 ret = check_devices(&dev_cache, &dev_extent_cache);
10023 if (ret && err)
10024 ret = err;
10026 out:
10027 task_stop(ctx.info);
10028 if (repair) {
10029 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10030 extent_io_tree_cleanup(&excluded_extents);
10031 root->fs_info->fsck_extent_cache = NULL;
10032 root->fs_info->free_extent_hook = NULL;
10033 root->fs_info->corrupt_blocks = NULL;
10034 root->fs_info->excluded_extents = NULL;
10036 free(bits);
10037 free_chunk_cache_tree(&chunk_cache);
10038 free_device_cache_tree(&dev_cache);
10039 free_block_group_tree(&block_group_cache);
10040 free_device_extent_tree(&dev_extent_cache);
10041 free_extent_cache_tree(&seen);
10042 free_extent_cache_tree(&pending);
10043 free_extent_cache_tree(&reada);
10044 free_extent_cache_tree(&nodes);
10045 free_root_item_list(&normal_trees);
10046 free_root_item_list(&dropping_trees);
10047 return ret;
10048 loop:
10049 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10050 free_extent_cache_tree(&seen);
10051 free_extent_cache_tree(&pending);
10052 free_extent_cache_tree(&reada);
10053 free_extent_cache_tree(&nodes);
10054 free_chunk_cache_tree(&chunk_cache);
10055 free_block_group_tree(&block_group_cache);
10056 free_device_cache_tree(&dev_cache);
10057 free_device_extent_tree(&dev_extent_cache);
10058 free_extent_record_cache(&extent_cache);
10059 free_root_item_list(&normal_trees);
10060 free_root_item_list(&dropping_trees);
10061 extent_io_tree_cleanup(&excluded_extents);
10062 goto again;
10066 * Check backrefs of a tree block given by @bytenr or @eb.
10068 * @root: the root containing the @bytenr or @eb
10069 * @eb: tree block extent buffer, can be NULL
10070 * @bytenr: bytenr of the tree block to search
10071 * @level: tree level of the tree block
10072 * @owner: owner of the tree block
10074 * Return >0 for any error found and output error message
10075 * Return 0 for no error found
10077 static int check_tree_block_ref(struct btrfs_root *root,
10078 struct extent_buffer *eb, u64 bytenr,
10079 int level, u64 owner)
10081 struct btrfs_key key;
10082 struct btrfs_root *extent_root = root->fs_info->extent_root;
10083 struct btrfs_path path;
10084 struct btrfs_extent_item *ei;
10085 struct btrfs_extent_inline_ref *iref;
10086 struct extent_buffer *leaf;
10087 unsigned long end;
10088 unsigned long ptr;
10089 int slot;
10090 int skinny_level;
10091 int type;
10092 u32 nodesize = root->fs_info->nodesize;
10093 u32 item_size;
10094 u64 offset;
10095 int tree_reloc_root = 0;
10096 int found_ref = 0;
10097 int err = 0;
10098 int ret;
10100 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10101 btrfs_header_bytenr(root->node) == bytenr)
10102 tree_reloc_root = 1;
10104 btrfs_init_path(&path);
10105 key.objectid = bytenr;
10106 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10107 key.type = BTRFS_METADATA_ITEM_KEY;
10108 else
10109 key.type = BTRFS_EXTENT_ITEM_KEY;
10110 key.offset = (u64)-1;
10112 /* Search for the backref in extent tree */
10113 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10114 if (ret < 0) {
10115 err |= BACKREF_MISSING;
10116 goto out;
10118 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10119 if (ret) {
10120 err |= BACKREF_MISSING;
10121 goto out;
10124 leaf = path.nodes[0];
10125 slot = path.slots[0];
10126 btrfs_item_key_to_cpu(leaf, &key, slot);
10128 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10130 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10131 skinny_level = (int)key.offset;
10132 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10133 } else {
10134 struct btrfs_tree_block_info *info;
10136 info = (struct btrfs_tree_block_info *)(ei + 1);
10137 skinny_level = btrfs_tree_block_level(leaf, info);
10138 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10141 if (eb) {
10142 u64 header_gen;
10143 u64 extent_gen;
10145 if (!(btrfs_extent_flags(leaf, ei) &
10146 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10147 error(
10148 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10149 key.objectid, nodesize,
10150 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10151 err = BACKREF_MISMATCH;
10153 header_gen = btrfs_header_generation(eb);
10154 extent_gen = btrfs_extent_generation(leaf, ei);
10155 if (header_gen != extent_gen) {
10156 error(
10157 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10158 key.objectid, nodesize, header_gen,
10159 extent_gen);
10160 err = BACKREF_MISMATCH;
10162 if (level != skinny_level) {
10163 error(
10164 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10165 key.objectid, nodesize, level, skinny_level);
10166 err = BACKREF_MISMATCH;
10168 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10169 error(
10170 "extent[%llu %u] is referred by other roots than %llu",
10171 key.objectid, nodesize, root->objectid);
10172 err = BACKREF_MISMATCH;
10177 * Iterate the extent/metadata item to find the exact backref
10179 item_size = btrfs_item_size_nr(leaf, slot);
10180 ptr = (unsigned long)iref;
10181 end = (unsigned long)ei + item_size;
10182 while (ptr < end) {
10183 iref = (struct btrfs_extent_inline_ref *)ptr;
10184 type = btrfs_extent_inline_ref_type(leaf, iref);
10185 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10187 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10188 (offset == root->objectid || offset == owner)) {
10189 found_ref = 1;
10190 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10192 * Backref of tree reloc root points to itself, no need
10193 * to check backref any more.
10195 if (tree_reloc_root)
10196 found_ref = 1;
10197 else
10198 /* Check if the backref points to valid referencer */
10199 found_ref = !check_tree_block_ref(root, NULL,
10200 offset, level + 1, owner);
10203 if (found_ref)
10204 break;
10205 ptr += btrfs_extent_inline_ref_size(type);
10209 * Inlined extent item doesn't have what we need, check
10210 * TREE_BLOCK_REF_KEY
10212 if (!found_ref) {
10213 btrfs_release_path(&path);
10214 key.objectid = bytenr;
10215 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10216 key.offset = root->objectid;
10218 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10219 if (!ret)
10220 found_ref = 1;
10222 if (!found_ref)
10223 err |= BACKREF_MISSING;
10224 out:
10225 btrfs_release_path(&path);
10226 if (eb && (err & BACKREF_MISSING))
10227 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10228 bytenr, nodesize, owner, level);
10229 return err;
10233 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10235 * Return >0 any error found and output error message
10236 * Return 0 for no error found
10238 static int check_extent_data_item(struct btrfs_root *root,
10239 struct extent_buffer *eb, int slot)
10241 struct btrfs_file_extent_item *fi;
10242 struct btrfs_path path;
10243 struct btrfs_root *extent_root = root->fs_info->extent_root;
10244 struct btrfs_key fi_key;
10245 struct btrfs_key dbref_key;
10246 struct extent_buffer *leaf;
10247 struct btrfs_extent_item *ei;
10248 struct btrfs_extent_inline_ref *iref;
10249 struct btrfs_extent_data_ref *dref;
10250 u64 owner;
10251 u64 disk_bytenr;
10252 u64 disk_num_bytes;
10253 u64 extent_num_bytes;
10254 u64 extent_flags;
10255 u32 item_size;
10256 unsigned long end;
10257 unsigned long ptr;
10258 int type;
10259 u64 ref_root;
10260 int found_dbackref = 0;
10261 int err = 0;
10262 int ret;
10264 btrfs_item_key_to_cpu(eb, &fi_key, slot);
10265 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10267 /* Nothing to check for hole and inline data extents */
10268 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10269 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10270 return 0;
10272 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10273 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10274 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10276 /* Check unaligned disk_num_bytes and num_bytes */
10277 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10278 error(
10279 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10280 fi_key.objectid, fi_key.offset, disk_num_bytes,
10281 root->fs_info->sectorsize);
10282 err |= BYTES_UNALIGNED;
10283 } else {
10284 data_bytes_allocated += disk_num_bytes;
10286 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10287 error(
10288 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10289 fi_key.objectid, fi_key.offset, extent_num_bytes,
10290 root->fs_info->sectorsize);
10291 err |= BYTES_UNALIGNED;
10292 } else {
10293 data_bytes_referenced += extent_num_bytes;
10295 owner = btrfs_header_owner(eb);
10297 /* Check the extent item of the file extent in extent tree */
10298 btrfs_init_path(&path);
10299 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10300 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10301 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10303 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10304 if (ret)
10305 goto out;
10307 leaf = path.nodes[0];
10308 slot = path.slots[0];
10309 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10311 extent_flags = btrfs_extent_flags(leaf, ei);
10313 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10314 error(
10315 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10316 disk_bytenr, disk_num_bytes,
10317 BTRFS_EXTENT_FLAG_DATA);
10318 err |= BACKREF_MISMATCH;
10321 /* Check data backref inside that extent item */
10322 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10323 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10324 ptr = (unsigned long)iref;
10325 end = (unsigned long)ei + item_size;
10326 while (ptr < end) {
10327 iref = (struct btrfs_extent_inline_ref *)ptr;
10328 type = btrfs_extent_inline_ref_type(leaf, iref);
10329 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10331 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10332 ref_root = btrfs_extent_data_ref_root(leaf, dref);
10333 if (ref_root == owner || ref_root == root->objectid)
10334 found_dbackref = 1;
10335 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10336 found_dbackref = !check_tree_block_ref(root, NULL,
10337 btrfs_extent_inline_ref_offset(leaf, iref),
10338 0, owner);
10341 if (found_dbackref)
10342 break;
10343 ptr += btrfs_extent_inline_ref_size(type);
10346 if (!found_dbackref) {
10347 btrfs_release_path(&path);
10349 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10350 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10351 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10352 dbref_key.offset = hash_extent_data_ref(root->objectid,
10353 fi_key.objectid, fi_key.offset);
10355 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10356 &dbref_key, &path, 0, 0);
10357 if (!ret) {
10358 found_dbackref = 1;
10359 goto out;
10362 btrfs_release_path(&path);
10365 * Neither inlined nor EXTENT_DATA_REF found, try
10366 * SHARED_DATA_REF as last chance.
10368 dbref_key.objectid = disk_bytenr;
10369 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10370 dbref_key.offset = eb->start;
10372 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10373 &dbref_key, &path, 0, 0);
10374 if (!ret) {
10375 found_dbackref = 1;
10376 goto out;
10380 out:
10381 if (!found_dbackref)
10382 err |= BACKREF_MISSING;
10383 btrfs_release_path(&path);
10384 if (err & BACKREF_MISSING) {
10385 error("data extent[%llu %llu] backref lost",
10386 disk_bytenr, disk_num_bytes);
10388 return err;
10392 * Get real tree block level for the case like shared block
10393 * Return >= 0 as tree level
10394 * Return <0 for error
10396 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10398 struct extent_buffer *eb;
10399 struct btrfs_path path;
10400 struct btrfs_key key;
10401 struct btrfs_extent_item *ei;
10402 u64 flags;
10403 u64 transid;
10404 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10405 u8 backref_level;
10406 u8 header_level;
10407 int ret;
10409 /* Search extent tree for extent generation and level */
10410 key.objectid = bytenr;
10411 key.type = BTRFS_METADATA_ITEM_KEY;
10412 key.offset = (u64)-1;
10414 btrfs_init_path(&path);
10415 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10416 if (ret < 0)
10417 goto release_out;
10418 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10419 if (ret < 0)
10420 goto release_out;
10421 if (ret > 0) {
10422 ret = -ENOENT;
10423 goto release_out;
10426 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10427 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10428 struct btrfs_extent_item);
10429 flags = btrfs_extent_flags(path.nodes[0], ei);
10430 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10431 ret = -ENOENT;
10432 goto release_out;
10435 /* Get transid for later read_tree_block() check */
10436 transid = btrfs_extent_generation(path.nodes[0], ei);
10438 /* Get backref level as one source */
10439 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10440 backref_level = key.offset;
10441 } else {
10442 struct btrfs_tree_block_info *info;
10444 info = (struct btrfs_tree_block_info *)(ei + 1);
10445 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10447 btrfs_release_path(&path);
10449 /* Get level from tree block as an alternative source */
10450 eb = read_tree_block(fs_info, bytenr, nodesize, transid);
10451 if (!extent_buffer_uptodate(eb)) {
10452 free_extent_buffer(eb);
10453 return -EIO;
10455 header_level = btrfs_header_level(eb);
10456 free_extent_buffer(eb);
10458 if (header_level != backref_level)
10459 return -EIO;
10460 return header_level;
10462 release_out:
10463 btrfs_release_path(&path);
10464 return ret;
10468 * Check if a tree block backref is valid (points to a valid tree block)
10469 * if level == -1, level will be resolved
10470 * Return >0 for any error found and print error message
10472 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10473 u64 bytenr, int level)
10475 struct btrfs_root *root;
10476 struct btrfs_key key;
10477 struct btrfs_path path;
10478 struct extent_buffer *eb;
10479 struct extent_buffer *node;
10480 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10481 int err = 0;
10482 int ret;
10484 /* Query level for level == -1 special case */
10485 if (level == -1)
10486 level = query_tree_block_level(fs_info, bytenr);
10487 if (level < 0) {
10488 err |= REFERENCER_MISSING;
10489 goto out;
10492 key.objectid = root_id;
10493 key.type = BTRFS_ROOT_ITEM_KEY;
10494 key.offset = (u64)-1;
10496 root = btrfs_read_fs_root(fs_info, &key);
10497 if (IS_ERR(root)) {
10498 err |= REFERENCER_MISSING;
10499 goto out;
10502 /* Read out the tree block to get item/node key */
10503 eb = read_tree_block(fs_info, bytenr, root->fs_info->nodesize, 0);
10504 if (!extent_buffer_uptodate(eb)) {
10505 err |= REFERENCER_MISSING;
10506 free_extent_buffer(eb);
10507 goto out;
10510 /* Empty tree, no need to check key */
10511 if (!btrfs_header_nritems(eb) && !level) {
10512 free_extent_buffer(eb);
10513 goto out;
10516 if (level)
10517 btrfs_node_key_to_cpu(eb, &key, 0);
10518 else
10519 btrfs_item_key_to_cpu(eb, &key, 0);
10521 free_extent_buffer(eb);
10523 btrfs_init_path(&path);
10524 path.lowest_level = level;
10525 /* Search with the first key, to ensure we can reach it */
10526 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10527 if (ret < 0) {
10528 err |= REFERENCER_MISSING;
10529 goto release_out;
10532 node = path.nodes[level];
10533 if (btrfs_header_bytenr(node) != bytenr) {
10534 error(
10535 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10536 bytenr, nodesize, bytenr,
10537 btrfs_header_bytenr(node));
10538 err |= REFERENCER_MISMATCH;
10540 if (btrfs_header_level(node) != level) {
10541 error(
10542 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10543 bytenr, nodesize, level,
10544 btrfs_header_level(node));
10545 err |= REFERENCER_MISMATCH;
10548 release_out:
10549 btrfs_release_path(&path);
10550 out:
10551 if (err & REFERENCER_MISSING) {
10552 if (level < 0)
10553 error("extent [%llu %d] lost referencer (owner: %llu)",
10554 bytenr, nodesize, root_id);
10555 else
10556 error(
10557 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10558 bytenr, nodesize, root_id, level);
10561 return err;
10565 * Check if tree block @eb is tree reloc root.
10566 * Return 0 if it's not or any problem happens
10567 * Return 1 if it's a tree reloc root
10569 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10570 struct extent_buffer *eb)
10572 struct btrfs_root *tree_reloc_root;
10573 struct btrfs_key key;
10574 u64 bytenr = btrfs_header_bytenr(eb);
10575 u64 owner = btrfs_header_owner(eb);
10576 int ret = 0;
10578 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10579 key.offset = owner;
10580 key.type = BTRFS_ROOT_ITEM_KEY;
10582 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10583 if (IS_ERR(tree_reloc_root))
10584 return 0;
10586 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10587 ret = 1;
10588 btrfs_free_fs_root(tree_reloc_root);
10589 return ret;
10593 * Check referencer for shared block backref
10594 * If level == -1, this function will resolve the level.
10596 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10597 u64 parent, u64 bytenr, int level)
10599 struct extent_buffer *eb;
10600 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10601 u32 nr;
10602 int found_parent = 0;
10603 int i;
10605 eb = read_tree_block(fs_info, parent, nodesize, 0);
10606 if (!extent_buffer_uptodate(eb))
10607 goto out;
10609 if (level == -1)
10610 level = query_tree_block_level(fs_info, bytenr);
10611 if (level < 0)
10612 goto out;
10614 /* It's possible it's a tree reloc root */
10615 if (parent == bytenr) {
10616 if (is_tree_reloc_root(fs_info, eb))
10617 found_parent = 1;
10618 goto out;
10621 if (level + 1 != btrfs_header_level(eb))
10622 goto out;
10624 nr = btrfs_header_nritems(eb);
10625 for (i = 0; i < nr; i++) {
10626 if (bytenr == btrfs_node_blockptr(eb, i)) {
10627 found_parent = 1;
10628 break;
10631 out:
10632 free_extent_buffer(eb);
10633 if (!found_parent) {
10634 error(
10635 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10636 bytenr, nodesize, parent, level);
10637 return REFERENCER_MISSING;
10639 return 0;
10643 * Check referencer for normal (inlined) data ref
10644 * If len == 0, it will be resolved by searching in extent tree
10646 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10647 u64 root_id, u64 objectid, u64 offset,
10648 u64 bytenr, u64 len, u32 count)
10650 struct btrfs_root *root;
10651 struct btrfs_root *extent_root = fs_info->extent_root;
10652 struct btrfs_key key;
10653 struct btrfs_path path;
10654 struct extent_buffer *leaf;
10655 struct btrfs_file_extent_item *fi;
10656 u32 found_count = 0;
10657 int slot;
10658 int ret = 0;
10660 if (!len) {
10661 key.objectid = bytenr;
10662 key.type = BTRFS_EXTENT_ITEM_KEY;
10663 key.offset = (u64)-1;
10665 btrfs_init_path(&path);
10666 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10667 if (ret < 0)
10668 goto out;
10669 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10670 if (ret)
10671 goto out;
10672 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10673 if (key.objectid != bytenr ||
10674 key.type != BTRFS_EXTENT_ITEM_KEY)
10675 goto out;
10676 len = key.offset;
10677 btrfs_release_path(&path);
10679 key.objectid = root_id;
10680 key.type = BTRFS_ROOT_ITEM_KEY;
10681 key.offset = (u64)-1;
10682 btrfs_init_path(&path);
10684 root = btrfs_read_fs_root(fs_info, &key);
10685 if (IS_ERR(root))
10686 goto out;
10688 key.objectid = objectid;
10689 key.type = BTRFS_EXTENT_DATA_KEY;
10691 * It can be nasty as data backref offset is
10692 * file offset - file extent offset, which is smaller or
10693 * equal to original backref offset. The only special case is
10694 * overflow. So we need to special check and do further search.
10696 key.offset = offset & (1ULL << 63) ? 0 : offset;
10698 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10699 if (ret < 0)
10700 goto out;
10703 * Search afterwards to get correct one
10704 * NOTE: As we must do a comprehensive check on the data backref to
10705 * make sure the dref count also matches, we must iterate all file
10706 * extents for that inode.
10708 while (1) {
10709 leaf = path.nodes[0];
10710 slot = path.slots[0];
10712 if (slot >= btrfs_header_nritems(leaf))
10713 goto next;
10714 btrfs_item_key_to_cpu(leaf, &key, slot);
10715 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10716 break;
10717 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10719 * Except normal disk bytenr and disk num bytes, we still
10720 * need to do extra check on dbackref offset as
10721 * dbackref offset = file_offset - file_extent_offset
10723 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10724 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10725 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10726 offset)
10727 found_count++;
10729 next:
10730 ret = btrfs_next_item(root, &path);
10731 if (ret)
10732 break;
10734 out:
10735 btrfs_release_path(&path);
10736 if (found_count != count) {
10737 error(
10738 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10739 bytenr, len, root_id, objectid, offset, count, found_count);
10740 return REFERENCER_MISSING;
10742 return 0;
10746 * Check if the referencer of a shared data backref exists
10748 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10749 u64 parent, u64 bytenr)
10751 struct extent_buffer *eb;
10752 struct btrfs_key key;
10753 struct btrfs_file_extent_item *fi;
10754 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10755 u32 nr;
10756 int found_parent = 0;
10757 int i;
10759 eb = read_tree_block(fs_info, parent, nodesize, 0);
10760 if (!extent_buffer_uptodate(eb))
10761 goto out;
10763 nr = btrfs_header_nritems(eb);
10764 for (i = 0; i < nr; i++) {
10765 btrfs_item_key_to_cpu(eb, &key, i);
10766 if (key.type != BTRFS_EXTENT_DATA_KEY)
10767 continue;
10769 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10770 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10771 continue;
10773 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10774 found_parent = 1;
10775 break;
10779 out:
10780 free_extent_buffer(eb);
10781 if (!found_parent) {
10782 error("shared extent %llu referencer lost (parent: %llu)",
10783 bytenr, parent);
10784 return REFERENCER_MISSING;
10786 return 0;
10790 * This function will check a given extent item, including its backref and
10791 * itself (like crossing stripe boundary and type)
10793 * Since we don't use extent_record anymore, introduce new error bit
10795 static int check_extent_item(struct btrfs_fs_info *fs_info,
10796 struct extent_buffer *eb, int slot)
10798 struct btrfs_extent_item *ei;
10799 struct btrfs_extent_inline_ref *iref;
10800 struct btrfs_extent_data_ref *dref;
10801 unsigned long end;
10802 unsigned long ptr;
10803 int type;
10804 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10805 u32 item_size = btrfs_item_size_nr(eb, slot);
10806 u64 flags;
10807 u64 offset;
10808 int metadata = 0;
10809 int level;
10810 struct btrfs_key key;
10811 int ret;
10812 int err = 0;
10814 btrfs_item_key_to_cpu(eb, &key, slot);
10815 if (key.type == BTRFS_EXTENT_ITEM_KEY)
10816 bytes_used += key.offset;
10817 else
10818 bytes_used += nodesize;
10820 if (item_size < sizeof(*ei)) {
10822 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10823 * old thing when on disk format is still un-determined.
10824 * No need to care about it anymore
10826 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10827 return -ENOTTY;
10830 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10831 flags = btrfs_extent_flags(eb, ei);
10833 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10834 metadata = 1;
10835 if (metadata && check_crossing_stripes(global_info, key.objectid,
10836 eb->len)) {
10837 error("bad metadata [%llu, %llu) crossing stripe boundary",
10838 key.objectid, key.objectid + nodesize);
10839 err |= CROSSING_STRIPE_BOUNDARY;
10842 ptr = (unsigned long)(ei + 1);
10844 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10845 /* Old EXTENT_ITEM metadata */
10846 struct btrfs_tree_block_info *info;
10848 info = (struct btrfs_tree_block_info *)ptr;
10849 level = btrfs_tree_block_level(eb, info);
10850 ptr += sizeof(struct btrfs_tree_block_info);
10851 } else {
10852 /* New METADATA_ITEM */
10853 level = key.offset;
10855 end = (unsigned long)ei + item_size;
10857 next:
10858 /* Reached extent item end normally */
10859 if (ptr == end)
10860 goto out;
10862 /* Beyond extent item end, wrong item size */
10863 if (ptr > end) {
10864 err |= ITEM_SIZE_MISMATCH;
10865 error("extent item at bytenr %llu slot %d has wrong size",
10866 eb->start, slot);
10867 goto out;
10870 /* Now check every backref in this extent item */
10871 iref = (struct btrfs_extent_inline_ref *)ptr;
10872 type = btrfs_extent_inline_ref_type(eb, iref);
10873 offset = btrfs_extent_inline_ref_offset(eb, iref);
10874 switch (type) {
10875 case BTRFS_TREE_BLOCK_REF_KEY:
10876 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10877 level);
10878 err |= ret;
10879 break;
10880 case BTRFS_SHARED_BLOCK_REF_KEY:
10881 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10882 level);
10883 err |= ret;
10884 break;
10885 case BTRFS_EXTENT_DATA_REF_KEY:
10886 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10887 ret = check_extent_data_backref(fs_info,
10888 btrfs_extent_data_ref_root(eb, dref),
10889 btrfs_extent_data_ref_objectid(eb, dref),
10890 btrfs_extent_data_ref_offset(eb, dref),
10891 key.objectid, key.offset,
10892 btrfs_extent_data_ref_count(eb, dref));
10893 err |= ret;
10894 break;
10895 case BTRFS_SHARED_DATA_REF_KEY:
10896 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10897 err |= ret;
10898 break;
10899 default:
10900 error("extent[%llu %d %llu] has unknown ref type: %d",
10901 key.objectid, key.type, key.offset, type);
10902 err |= UNKNOWN_TYPE;
10903 goto out;
10906 ptr += btrfs_extent_inline_ref_size(type);
10907 goto next;
10909 out:
10910 return err;
10914 * Check if a dev extent item is referred correctly by its chunk
10916 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10917 struct extent_buffer *eb, int slot)
10919 struct btrfs_root *chunk_root = fs_info->chunk_root;
10920 struct btrfs_dev_extent *ptr;
10921 struct btrfs_path path;
10922 struct btrfs_key chunk_key;
10923 struct btrfs_key devext_key;
10924 struct btrfs_chunk *chunk;
10925 struct extent_buffer *l;
10926 int num_stripes;
10927 u64 length;
10928 int i;
10929 int found_chunk = 0;
10930 int ret;
10932 btrfs_item_key_to_cpu(eb, &devext_key, slot);
10933 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10934 length = btrfs_dev_extent_length(eb, ptr);
10936 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10937 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10938 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10940 btrfs_init_path(&path);
10941 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10942 if (ret)
10943 goto out;
10945 l = path.nodes[0];
10946 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10947 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
10948 chunk_key.offset);
10949 if (ret < 0)
10950 goto out;
10952 if (btrfs_stripe_length(fs_info, l, chunk) != length)
10953 goto out;
10955 num_stripes = btrfs_chunk_num_stripes(l, chunk);
10956 for (i = 0; i < num_stripes; i++) {
10957 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10958 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10960 if (devid == devext_key.objectid &&
10961 offset == devext_key.offset) {
10962 found_chunk = 1;
10963 break;
10966 out:
10967 btrfs_release_path(&path);
10968 if (!found_chunk) {
10969 error(
10970 "device extent[%llu, %llu, %llu] did not find the related chunk",
10971 devext_key.objectid, devext_key.offset, length);
10972 return REFERENCER_MISSING;
10974 return 0;
10978 * Check if the used space is correct with the dev item
10980 static int check_dev_item(struct btrfs_fs_info *fs_info,
10981 struct extent_buffer *eb, int slot)
10983 struct btrfs_root *dev_root = fs_info->dev_root;
10984 struct btrfs_dev_item *dev_item;
10985 struct btrfs_path path;
10986 struct btrfs_key key;
10987 struct btrfs_dev_extent *ptr;
10988 u64 dev_id;
10989 u64 used;
10990 u64 total = 0;
10991 int ret;
10993 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10994 dev_id = btrfs_device_id(eb, dev_item);
10995 used = btrfs_device_bytes_used(eb, dev_item);
10997 key.objectid = dev_id;
10998 key.type = BTRFS_DEV_EXTENT_KEY;
10999 key.offset = 0;
11001 btrfs_init_path(&path);
11002 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
11003 if (ret < 0) {
11004 btrfs_item_key_to_cpu(eb, &key, slot);
11005 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
11006 key.objectid, key.type, key.offset);
11007 btrfs_release_path(&path);
11008 return REFERENCER_MISSING;
11011 /* Iterate dev_extents to calculate the used space of a device */
11012 while (1) {
11013 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
11014 goto next;
11016 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11017 if (key.objectid > dev_id)
11018 break;
11019 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11020 goto next;
11022 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11023 struct btrfs_dev_extent);
11024 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11025 next:
11026 ret = btrfs_next_item(dev_root, &path);
11027 if (ret)
11028 break;
11030 btrfs_release_path(&path);
11032 if (used != total) {
11033 btrfs_item_key_to_cpu(eb, &key, slot);
11034 error(
11035 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11036 total, used, BTRFS_ROOT_TREE_OBJECTID,
11037 BTRFS_DEV_EXTENT_KEY, dev_id);
11038 return ACCOUNTING_MISMATCH;
11040 return 0;
11044 * Check a block group item with its referener (chunk) and its used space
11045 * with extent/metadata item
11047 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11048 struct extent_buffer *eb, int slot)
11050 struct btrfs_root *extent_root = fs_info->extent_root;
11051 struct btrfs_root *chunk_root = fs_info->chunk_root;
11052 struct btrfs_block_group_item *bi;
11053 struct btrfs_block_group_item bg_item;
11054 struct btrfs_path path;
11055 struct btrfs_key bg_key;
11056 struct btrfs_key chunk_key;
11057 struct btrfs_key extent_key;
11058 struct btrfs_chunk *chunk;
11059 struct extent_buffer *leaf;
11060 struct btrfs_extent_item *ei;
11061 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11062 u64 flags;
11063 u64 bg_flags;
11064 u64 used;
11065 u64 total = 0;
11066 int ret;
11067 int err = 0;
11069 btrfs_item_key_to_cpu(eb, &bg_key, slot);
11070 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11071 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11072 used = btrfs_block_group_used(&bg_item);
11073 bg_flags = btrfs_block_group_flags(&bg_item);
11075 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11076 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11077 chunk_key.offset = bg_key.objectid;
11079 btrfs_init_path(&path);
11080 /* Search for the referencer chunk */
11081 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11082 if (ret) {
11083 error(
11084 "block group[%llu %llu] did not find the related chunk item",
11085 bg_key.objectid, bg_key.offset);
11086 err |= REFERENCER_MISSING;
11087 } else {
11088 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11089 struct btrfs_chunk);
11090 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11091 bg_key.offset) {
11092 error(
11093 "block group[%llu %llu] related chunk item length does not match",
11094 bg_key.objectid, bg_key.offset);
11095 err |= REFERENCER_MISMATCH;
11098 btrfs_release_path(&path);
11100 /* Search from the block group bytenr */
11101 extent_key.objectid = bg_key.objectid;
11102 extent_key.type = 0;
11103 extent_key.offset = 0;
11105 btrfs_init_path(&path);
11106 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11107 if (ret < 0)
11108 goto out;
11110 /* Iterate extent tree to account used space */
11111 while (1) {
11112 leaf = path.nodes[0];
11114 /* Search slot can point to the last item beyond leaf nritems */
11115 if (path.slots[0] >= btrfs_header_nritems(leaf))
11116 goto next;
11118 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11119 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11120 break;
11122 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11123 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11124 goto next;
11125 if (extent_key.objectid < bg_key.objectid)
11126 goto next;
11128 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11129 total += nodesize;
11130 else
11131 total += extent_key.offset;
11133 ei = btrfs_item_ptr(leaf, path.slots[0],
11134 struct btrfs_extent_item);
11135 flags = btrfs_extent_flags(leaf, ei);
11136 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11137 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11138 error(
11139 "bad extent[%llu, %llu) type mismatch with chunk",
11140 extent_key.objectid,
11141 extent_key.objectid + extent_key.offset);
11142 err |= CHUNK_TYPE_MISMATCH;
11144 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11145 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11146 BTRFS_BLOCK_GROUP_METADATA))) {
11147 error(
11148 "bad extent[%llu, %llu) type mismatch with chunk",
11149 extent_key.objectid,
11150 extent_key.objectid + nodesize);
11151 err |= CHUNK_TYPE_MISMATCH;
11154 next:
11155 ret = btrfs_next_item(extent_root, &path);
11156 if (ret)
11157 break;
11160 out:
11161 btrfs_release_path(&path);
11163 if (total != used) {
11164 error(
11165 "block group[%llu %llu] used %llu but extent items used %llu",
11166 bg_key.objectid, bg_key.offset, used, total);
11167 err |= ACCOUNTING_MISMATCH;
11169 return err;
11173 * Check a chunk item.
11174 * Including checking all referred dev_extents and block group
11176 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11177 struct extent_buffer *eb, int slot)
11179 struct btrfs_root *extent_root = fs_info->extent_root;
11180 struct btrfs_root *dev_root = fs_info->dev_root;
11181 struct btrfs_path path;
11182 struct btrfs_key chunk_key;
11183 struct btrfs_key bg_key;
11184 struct btrfs_key devext_key;
11185 struct btrfs_chunk *chunk;
11186 struct extent_buffer *leaf;
11187 struct btrfs_block_group_item *bi;
11188 struct btrfs_block_group_item bg_item;
11189 struct btrfs_dev_extent *ptr;
11190 u64 length;
11191 u64 chunk_end;
11192 u64 stripe_len;
11193 u64 type;
11194 int num_stripes;
11195 u64 offset;
11196 u64 objectid;
11197 int i;
11198 int ret;
11199 int err = 0;
11201 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11202 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11203 length = btrfs_chunk_length(eb, chunk);
11204 chunk_end = chunk_key.offset + length;
11205 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11206 chunk_key.offset);
11207 if (ret < 0) {
11208 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11209 chunk_end);
11210 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11211 goto out;
11213 type = btrfs_chunk_type(eb, chunk);
11215 bg_key.objectid = chunk_key.offset;
11216 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11217 bg_key.offset = length;
11219 btrfs_init_path(&path);
11220 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11221 if (ret) {
11222 error(
11223 "chunk[%llu %llu) did not find the related block group item",
11224 chunk_key.offset, chunk_end);
11225 err |= REFERENCER_MISSING;
11226 } else{
11227 leaf = path.nodes[0];
11228 bi = btrfs_item_ptr(leaf, path.slots[0],
11229 struct btrfs_block_group_item);
11230 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11231 sizeof(bg_item));
11232 if (btrfs_block_group_flags(&bg_item) != type) {
11233 error(
11234 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11235 chunk_key.offset, chunk_end, type,
11236 btrfs_block_group_flags(&bg_item));
11237 err |= REFERENCER_MISSING;
11241 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11242 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11243 for (i = 0; i < num_stripes; i++) {
11244 btrfs_release_path(&path);
11245 btrfs_init_path(&path);
11246 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11247 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11248 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11250 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11251 0, 0);
11252 if (ret)
11253 goto not_match_dev;
11255 leaf = path.nodes[0];
11256 ptr = btrfs_item_ptr(leaf, path.slots[0],
11257 struct btrfs_dev_extent);
11258 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11259 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11260 if (objectid != chunk_key.objectid ||
11261 offset != chunk_key.offset ||
11262 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11263 goto not_match_dev;
11264 continue;
11265 not_match_dev:
11266 err |= BACKREF_MISSING;
11267 error(
11268 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11269 chunk_key.objectid, chunk_end, i);
11270 continue;
11272 btrfs_release_path(&path);
11273 out:
11274 return err;
11278 * Main entry function to check known items and update related accounting info
11280 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11282 struct btrfs_fs_info *fs_info = root->fs_info;
11283 struct btrfs_key key;
11284 int slot = 0;
11285 int type;
11286 struct btrfs_extent_data_ref *dref;
11287 int ret;
11288 int err = 0;
11290 next:
11291 btrfs_item_key_to_cpu(eb, &key, slot);
11292 type = key.type;
11294 switch (type) {
11295 case BTRFS_EXTENT_DATA_KEY:
11296 ret = check_extent_data_item(root, eb, slot);
11297 err |= ret;
11298 break;
11299 case BTRFS_BLOCK_GROUP_ITEM_KEY:
11300 ret = check_block_group_item(fs_info, eb, slot);
11301 err |= ret;
11302 break;
11303 case BTRFS_DEV_ITEM_KEY:
11304 ret = check_dev_item(fs_info, eb, slot);
11305 err |= ret;
11306 break;
11307 case BTRFS_CHUNK_ITEM_KEY:
11308 ret = check_chunk_item(fs_info, eb, slot);
11309 err |= ret;
11310 break;
11311 case BTRFS_DEV_EXTENT_KEY:
11312 ret = check_dev_extent_item(fs_info, eb, slot);
11313 err |= ret;
11314 break;
11315 case BTRFS_EXTENT_ITEM_KEY:
11316 case BTRFS_METADATA_ITEM_KEY:
11317 ret = check_extent_item(fs_info, eb, slot);
11318 err |= ret;
11319 break;
11320 case BTRFS_EXTENT_CSUM_KEY:
11321 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11322 break;
11323 case BTRFS_TREE_BLOCK_REF_KEY:
11324 ret = check_tree_block_backref(fs_info, key.offset,
11325 key.objectid, -1);
11326 err |= ret;
11327 break;
11328 case BTRFS_EXTENT_DATA_REF_KEY:
11329 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11330 ret = check_extent_data_backref(fs_info,
11331 btrfs_extent_data_ref_root(eb, dref),
11332 btrfs_extent_data_ref_objectid(eb, dref),
11333 btrfs_extent_data_ref_offset(eb, dref),
11334 key.objectid, 0,
11335 btrfs_extent_data_ref_count(eb, dref));
11336 err |= ret;
11337 break;
11338 case BTRFS_SHARED_BLOCK_REF_KEY:
11339 ret = check_shared_block_backref(fs_info, key.offset,
11340 key.objectid, -1);
11341 err |= ret;
11342 break;
11343 case BTRFS_SHARED_DATA_REF_KEY:
11344 ret = check_shared_data_backref(fs_info, key.offset,
11345 key.objectid);
11346 err |= ret;
11347 break;
11348 default:
11349 break;
11352 if (++slot < btrfs_header_nritems(eb))
11353 goto next;
11355 return err;
11359 * Helper function for later fs/subvol tree check. To determine if a tree
11360 * block should be checked.
11361 * This function will ensure only the direct referencer with lowest rootid to
11362 * check a fs/subvolume tree block.
11364 * Backref check at extent tree would detect errors like missing subvolume
11365 * tree, so we can do aggressive check to reduce duplicated checks.
11367 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11369 struct btrfs_root *extent_root = root->fs_info->extent_root;
11370 struct btrfs_key key;
11371 struct btrfs_path path;
11372 struct extent_buffer *leaf;
11373 int slot;
11374 struct btrfs_extent_item *ei;
11375 unsigned long ptr;
11376 unsigned long end;
11377 int type;
11378 u32 item_size;
11379 u64 offset;
11380 struct btrfs_extent_inline_ref *iref;
11381 int ret;
11383 btrfs_init_path(&path);
11384 key.objectid = btrfs_header_bytenr(eb);
11385 key.type = BTRFS_METADATA_ITEM_KEY;
11386 key.offset = (u64)-1;
11389 * Any failure in backref resolving means we can't determine
11390 * whom the tree block belongs to.
11391 * So in that case, we need to check that tree block
11393 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11394 if (ret < 0)
11395 goto need_check;
11397 ret = btrfs_previous_extent_item(extent_root, &path,
11398 btrfs_header_bytenr(eb));
11399 if (ret)
11400 goto need_check;
11402 leaf = path.nodes[0];
11403 slot = path.slots[0];
11404 btrfs_item_key_to_cpu(leaf, &key, slot);
11405 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11407 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11408 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11409 } else {
11410 struct btrfs_tree_block_info *info;
11412 info = (struct btrfs_tree_block_info *)(ei + 1);
11413 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11416 item_size = btrfs_item_size_nr(leaf, slot);
11417 ptr = (unsigned long)iref;
11418 end = (unsigned long)ei + item_size;
11419 while (ptr < end) {
11420 iref = (struct btrfs_extent_inline_ref *)ptr;
11421 type = btrfs_extent_inline_ref_type(leaf, iref);
11422 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11425 * We only check the tree block if current root is
11426 * the lowest referencer of it.
11428 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11429 offset < root->objectid) {
11430 btrfs_release_path(&path);
11431 return 0;
11434 ptr += btrfs_extent_inline_ref_size(type);
11437 * Normally we should also check keyed tree block ref, but that may be
11438 * very time consuming. Inlined ref should already make us skip a lot
11439 * of refs now. So skip search keyed tree block ref.
11442 need_check:
11443 btrfs_release_path(&path);
11444 return 1;
11448 * Traversal function for tree block. We will do:
11449 * 1) Skip shared fs/subvolume tree blocks
11450 * 2) Update related bytes accounting
11451 * 3) Pre-order traversal
11453 static int traverse_tree_block(struct btrfs_root *root,
11454 struct extent_buffer *node)
11456 struct extent_buffer *eb;
11457 struct btrfs_key key;
11458 struct btrfs_key drop_key;
11459 int level;
11460 u64 nr;
11461 int i;
11462 int err = 0;
11463 int ret;
11466 * Skip shared fs/subvolume tree block, in that case they will
11467 * be checked by referencer with lowest rootid
11469 if (is_fstree(root->objectid) && !should_check(root, node))
11470 return 0;
11472 /* Update bytes accounting */
11473 total_btree_bytes += node->len;
11474 if (fs_root_objectid(btrfs_header_owner(node)))
11475 total_fs_tree_bytes += node->len;
11476 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11477 total_extent_tree_bytes += node->len;
11478 if (!found_old_backref &&
11479 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11480 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11481 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11482 found_old_backref = 1;
11484 /* pre-order tranversal, check itself first */
11485 level = btrfs_header_level(node);
11486 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11487 btrfs_header_level(node),
11488 btrfs_header_owner(node));
11489 err |= ret;
11490 if (err)
11491 error(
11492 "check %s failed root %llu bytenr %llu level %d, force continue check",
11493 level ? "node":"leaf", root->objectid,
11494 btrfs_header_bytenr(node), btrfs_header_level(node));
11496 if (!level) {
11497 btree_space_waste += btrfs_leaf_free_space(root, node);
11498 ret = check_leaf_items(root, node);
11499 err |= ret;
11500 return err;
11503 nr = btrfs_header_nritems(node);
11504 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11505 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11506 sizeof(struct btrfs_key_ptr);
11508 /* Then check all its children */
11509 for (i = 0; i < nr; i++) {
11510 u64 blocknr = btrfs_node_blockptr(node, i);
11512 btrfs_node_key_to_cpu(node, &key, i);
11513 if (level == root->root_item.drop_level &&
11514 is_dropped_key(&key, &drop_key))
11515 continue;
11518 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11519 * to call the function itself.
11521 eb = read_tree_block(root->fs_info, blocknr,
11522 root->fs_info->nodesize, 0);
11523 if (extent_buffer_uptodate(eb)) {
11524 ret = traverse_tree_block(root, eb);
11525 err |= ret;
11527 free_extent_buffer(eb);
11530 return err;
11534 * Low memory usage version check_chunks_and_extents.
11536 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11538 struct btrfs_path path;
11539 struct btrfs_key key;
11540 struct btrfs_root *root1;
11541 struct btrfs_root *cur_root;
11542 int err = 0;
11543 int ret;
11545 root1 = root->fs_info->chunk_root;
11546 ret = traverse_tree_block(root1, root1->node);
11547 err |= ret;
11549 root1 = root->fs_info->tree_root;
11550 ret = traverse_tree_block(root1, root1->node);
11551 err |= ret;
11553 btrfs_init_path(&path);
11554 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11555 key.offset = 0;
11556 key.type = BTRFS_ROOT_ITEM_KEY;
11558 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11559 if (ret) {
11560 error("cannot find extent treet in tree_root");
11561 goto out;
11564 while (1) {
11565 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11566 if (key.type != BTRFS_ROOT_ITEM_KEY)
11567 goto next;
11568 key.offset = (u64)-1;
11570 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11571 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11572 &key);
11573 else
11574 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11575 if (IS_ERR(cur_root) || !cur_root) {
11576 error("failed to read tree: %lld", key.objectid);
11577 goto next;
11580 ret = traverse_tree_block(cur_root, cur_root->node);
11581 err |= ret;
11583 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11584 btrfs_free_fs_root(cur_root);
11585 next:
11586 ret = btrfs_next_item(root1, &path);
11587 if (ret)
11588 goto out;
11591 out:
11592 btrfs_release_path(&path);
11593 return err;
11596 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11597 struct btrfs_root *root, int overwrite)
11599 struct extent_buffer *c;
11600 struct extent_buffer *old = root->node;
11601 int level;
11602 int ret;
11603 struct btrfs_disk_key disk_key = {0,0,0};
11605 level = 0;
11607 if (overwrite) {
11608 c = old;
11609 extent_buffer_get(c);
11610 goto init;
11612 c = btrfs_alloc_free_block(trans, root,
11613 root->fs_info->nodesize,
11614 root->root_key.objectid,
11615 &disk_key, level, 0, 0);
11616 if (IS_ERR(c)) {
11617 c = old;
11618 extent_buffer_get(c);
11619 overwrite = 1;
11621 init:
11622 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11623 btrfs_set_header_level(c, level);
11624 btrfs_set_header_bytenr(c, c->start);
11625 btrfs_set_header_generation(c, trans->transid);
11626 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11627 btrfs_set_header_owner(c, root->root_key.objectid);
11629 write_extent_buffer(c, root->fs_info->fsid,
11630 btrfs_header_fsid(), BTRFS_FSID_SIZE);
11632 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11633 btrfs_header_chunk_tree_uuid(c),
11634 BTRFS_UUID_SIZE);
11636 btrfs_mark_buffer_dirty(c);
11638 * this case can happen in the following case:
11640 * 1.overwrite previous root.
11642 * 2.reinit reloc data root, this is because we skip pin
11643 * down reloc data tree before which means we can allocate
11644 * same block bytenr here.
11646 if (old->start == c->start) {
11647 btrfs_set_root_generation(&root->root_item,
11648 trans->transid);
11649 root->root_item.level = btrfs_header_level(root->node);
11650 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11651 &root->root_key, &root->root_item);
11652 if (ret) {
11653 free_extent_buffer(c);
11654 return ret;
11657 free_extent_buffer(old);
11658 root->node = c;
11659 add_root_to_dirty_list(root);
11660 return 0;
11663 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11664 struct extent_buffer *eb, int tree_root)
11666 struct extent_buffer *tmp;
11667 struct btrfs_root_item *ri;
11668 struct btrfs_key key;
11669 u64 bytenr;
11670 u32 nodesize;
11671 int level = btrfs_header_level(eb);
11672 int nritems;
11673 int ret;
11674 int i;
11677 * If we have pinned this block before, don't pin it again.
11678 * This can not only avoid forever loop with broken filesystem
11679 * but also give us some speedups.
11681 if (test_range_bit(&fs_info->pinned_extents, eb->start,
11682 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11683 return 0;
11685 btrfs_pin_extent(fs_info, eb->start, eb->len);
11687 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11688 nritems = btrfs_header_nritems(eb);
11689 for (i = 0; i < nritems; i++) {
11690 if (level == 0) {
11691 btrfs_item_key_to_cpu(eb, &key, i);
11692 if (key.type != BTRFS_ROOT_ITEM_KEY)
11693 continue;
11694 /* Skip the extent root and reloc roots */
11695 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11696 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11697 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11698 continue;
11699 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11700 bytenr = btrfs_disk_root_bytenr(eb, ri);
11703 * If at any point we start needing the real root we
11704 * will have to build a stump root for the root we are
11705 * in, but for now this doesn't actually use the root so
11706 * just pass in extent_root.
11708 tmp = read_tree_block(fs_info, bytenr, nodesize, 0);
11709 if (!extent_buffer_uptodate(tmp)) {
11710 fprintf(stderr, "Error reading root block\n");
11711 return -EIO;
11713 ret = pin_down_tree_blocks(fs_info, tmp, 0);
11714 free_extent_buffer(tmp);
11715 if (ret)
11716 return ret;
11717 } else {
11718 bytenr = btrfs_node_blockptr(eb, i);
11720 /* If we aren't the tree root don't read the block */
11721 if (level == 1 && !tree_root) {
11722 btrfs_pin_extent(fs_info, bytenr, nodesize);
11723 continue;
11726 tmp = read_tree_block(fs_info, bytenr,
11727 nodesize, 0);
11728 if (!extent_buffer_uptodate(tmp)) {
11729 fprintf(stderr, "Error reading tree block\n");
11730 return -EIO;
11732 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11733 free_extent_buffer(tmp);
11734 if (ret)
11735 return ret;
11739 return 0;
11742 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11744 int ret;
11746 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11747 if (ret)
11748 return ret;
11750 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11753 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11755 struct btrfs_block_group_cache *cache;
11756 struct btrfs_path path;
11757 struct extent_buffer *leaf;
11758 struct btrfs_chunk *chunk;
11759 struct btrfs_key key;
11760 int ret;
11761 u64 start;
11763 btrfs_init_path(&path);
11764 key.objectid = 0;
11765 key.type = BTRFS_CHUNK_ITEM_KEY;
11766 key.offset = 0;
11767 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11768 if (ret < 0) {
11769 btrfs_release_path(&path);
11770 return ret;
11774 * We do this in case the block groups were screwed up and had alloc
11775 * bits that aren't actually set on the chunks. This happens with
11776 * restored images every time and could happen in real life I guess.
11778 fs_info->avail_data_alloc_bits = 0;
11779 fs_info->avail_metadata_alloc_bits = 0;
11780 fs_info->avail_system_alloc_bits = 0;
11782 /* First we need to create the in-memory block groups */
11783 while (1) {
11784 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11785 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11786 if (ret < 0) {
11787 btrfs_release_path(&path);
11788 return ret;
11790 if (ret) {
11791 ret = 0;
11792 break;
11795 leaf = path.nodes[0];
11796 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11797 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11798 path.slots[0]++;
11799 continue;
11802 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11803 btrfs_add_block_group(fs_info, 0,
11804 btrfs_chunk_type(leaf, chunk),
11805 key.objectid, key.offset,
11806 btrfs_chunk_length(leaf, chunk));
11807 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11808 key.offset + btrfs_chunk_length(leaf, chunk));
11809 path.slots[0]++;
11811 start = 0;
11812 while (1) {
11813 cache = btrfs_lookup_first_block_group(fs_info, start);
11814 if (!cache)
11815 break;
11816 cache->cached = 1;
11817 start = cache->key.objectid + cache->key.offset;
11820 btrfs_release_path(&path);
11821 return 0;
11824 static int reset_balance(struct btrfs_trans_handle *trans,
11825 struct btrfs_fs_info *fs_info)
11827 struct btrfs_root *root = fs_info->tree_root;
11828 struct btrfs_path path;
11829 struct extent_buffer *leaf;
11830 struct btrfs_key key;
11831 int del_slot, del_nr = 0;
11832 int ret;
11833 int found = 0;
11835 btrfs_init_path(&path);
11836 key.objectid = BTRFS_BALANCE_OBJECTID;
11837 key.type = BTRFS_BALANCE_ITEM_KEY;
11838 key.offset = 0;
11839 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11840 if (ret) {
11841 if (ret > 0)
11842 ret = 0;
11843 if (!ret)
11844 goto reinit_data_reloc;
11845 else
11846 goto out;
11849 ret = btrfs_del_item(trans, root, &path);
11850 if (ret)
11851 goto out;
11852 btrfs_release_path(&path);
11854 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11855 key.type = BTRFS_ROOT_ITEM_KEY;
11856 key.offset = 0;
11857 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11858 if (ret < 0)
11859 goto out;
11860 while (1) {
11861 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11862 if (!found)
11863 break;
11865 if (del_nr) {
11866 ret = btrfs_del_items(trans, root, &path,
11867 del_slot, del_nr);
11868 del_nr = 0;
11869 if (ret)
11870 goto out;
11872 key.offset++;
11873 btrfs_release_path(&path);
11875 found = 0;
11876 ret = btrfs_search_slot(trans, root, &key, &path,
11877 -1, 1);
11878 if (ret < 0)
11879 goto out;
11880 continue;
11882 found = 1;
11883 leaf = path.nodes[0];
11884 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11885 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11886 break;
11887 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11888 path.slots[0]++;
11889 continue;
11891 if (!del_nr) {
11892 del_slot = path.slots[0];
11893 del_nr = 1;
11894 } else {
11895 del_nr++;
11897 path.slots[0]++;
11900 if (del_nr) {
11901 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11902 if (ret)
11903 goto out;
11905 btrfs_release_path(&path);
11907 reinit_data_reloc:
11908 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11909 key.type = BTRFS_ROOT_ITEM_KEY;
11910 key.offset = (u64)-1;
11911 root = btrfs_read_fs_root(fs_info, &key);
11912 if (IS_ERR(root)) {
11913 fprintf(stderr, "Error reading data reloc tree\n");
11914 ret = PTR_ERR(root);
11915 goto out;
11917 record_root_in_trans(trans, root);
11918 ret = btrfs_fsck_reinit_root(trans, root, 0);
11919 if (ret)
11920 goto out;
11921 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11922 out:
11923 btrfs_release_path(&path);
11924 return ret;
11927 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11928 struct btrfs_fs_info *fs_info)
11930 u64 start = 0;
11931 int ret;
11934 * The only reason we don't do this is because right now we're just
11935 * walking the trees we find and pinning down their bytes, we don't look
11936 * at any of the leaves. In order to do mixed groups we'd have to check
11937 * the leaves of any fs roots and pin down the bytes for any file
11938 * extents we find. Not hard but why do it if we don't have to?
11940 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11941 fprintf(stderr, "We don't support re-initing the extent tree "
11942 "for mixed block groups yet, please notify a btrfs "
11943 "developer you want to do this so they can add this "
11944 "functionality.\n");
11945 return -EINVAL;
11949 * first we need to walk all of the trees except the extent tree and pin
11950 * down the bytes that are in use so we don't overwrite any existing
11951 * metadata.
11953 ret = pin_metadata_blocks(fs_info);
11954 if (ret) {
11955 fprintf(stderr, "error pinning down used bytes\n");
11956 return ret;
11960 * Need to drop all the block groups since we're going to recreate all
11961 * of them again.
11963 btrfs_free_block_groups(fs_info);
11964 ret = reset_block_groups(fs_info);
11965 if (ret) {
11966 fprintf(stderr, "error resetting the block groups\n");
11967 return ret;
11970 /* Ok we can allocate now, reinit the extent root */
11971 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11972 if (ret) {
11973 fprintf(stderr, "extent root initialization failed\n");
11975 * When the transaction code is updated we should end the
11976 * transaction, but for now progs only knows about commit so
11977 * just return an error.
11979 return ret;
11983 * Now we have all the in-memory block groups setup so we can make
11984 * allocations properly, and the metadata we care about is safe since we
11985 * pinned all of it above.
11987 while (1) {
11988 struct btrfs_block_group_cache *cache;
11990 cache = btrfs_lookup_first_block_group(fs_info, start);
11991 if (!cache)
11992 break;
11993 start = cache->key.objectid + cache->key.offset;
11994 ret = btrfs_insert_item(trans, fs_info->extent_root,
11995 &cache->key, &cache->item,
11996 sizeof(cache->item));
11997 if (ret) {
11998 fprintf(stderr, "Error adding block group\n");
11999 return ret;
12001 btrfs_extent_post_op(trans, fs_info->extent_root);
12004 ret = reset_balance(trans, fs_info);
12005 if (ret)
12006 fprintf(stderr, "error resetting the pending balance\n");
12008 return ret;
12011 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
12013 struct btrfs_path path;
12014 struct btrfs_trans_handle *trans;
12015 struct btrfs_key key;
12016 int ret;
12018 printf("Recowing metadata block %llu\n", eb->start);
12019 key.objectid = btrfs_header_owner(eb);
12020 key.type = BTRFS_ROOT_ITEM_KEY;
12021 key.offset = (u64)-1;
12023 root = btrfs_read_fs_root(root->fs_info, &key);
12024 if (IS_ERR(root)) {
12025 fprintf(stderr, "Couldn't find owner root %llu\n",
12026 key.objectid);
12027 return PTR_ERR(root);
12030 trans = btrfs_start_transaction(root, 1);
12031 if (IS_ERR(trans))
12032 return PTR_ERR(trans);
12034 btrfs_init_path(&path);
12035 path.lowest_level = btrfs_header_level(eb);
12036 if (path.lowest_level)
12037 btrfs_node_key_to_cpu(eb, &key, 0);
12038 else
12039 btrfs_item_key_to_cpu(eb, &key, 0);
12041 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12042 btrfs_commit_transaction(trans, root);
12043 btrfs_release_path(&path);
12044 return ret;
12047 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12049 struct btrfs_path path;
12050 struct btrfs_trans_handle *trans;
12051 struct btrfs_key key;
12052 int ret;
12054 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12055 bad->key.type, bad->key.offset);
12056 key.objectid = bad->root_id;
12057 key.type = BTRFS_ROOT_ITEM_KEY;
12058 key.offset = (u64)-1;
12060 root = btrfs_read_fs_root(root->fs_info, &key);
12061 if (IS_ERR(root)) {
12062 fprintf(stderr, "Couldn't find owner root %llu\n",
12063 key.objectid);
12064 return PTR_ERR(root);
12067 trans = btrfs_start_transaction(root, 1);
12068 if (IS_ERR(trans))
12069 return PTR_ERR(trans);
12071 btrfs_init_path(&path);
12072 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12073 if (ret) {
12074 if (ret > 0)
12075 ret = 0;
12076 goto out;
12078 ret = btrfs_del_item(trans, root, &path);
12079 out:
12080 btrfs_commit_transaction(trans, root);
12081 btrfs_release_path(&path);
12082 return ret;
12085 static int zero_log_tree(struct btrfs_root *root)
12087 struct btrfs_trans_handle *trans;
12088 int ret;
12090 trans = btrfs_start_transaction(root, 1);
12091 if (IS_ERR(trans)) {
12092 ret = PTR_ERR(trans);
12093 return ret;
12095 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12096 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12097 ret = btrfs_commit_transaction(trans, root);
12098 return ret;
12101 static int populate_csum(struct btrfs_trans_handle *trans,
12102 struct btrfs_root *csum_root, char *buf, u64 start,
12103 u64 len)
12105 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12106 u64 offset = 0;
12107 u64 sectorsize;
12108 int ret = 0;
12110 while (offset < len) {
12111 sectorsize = fs_info->sectorsize;
12112 ret = read_extent_data(fs_info, buf, start + offset,
12113 &sectorsize, 0);
12114 if (ret)
12115 break;
12116 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12117 start + offset, buf, sectorsize);
12118 if (ret)
12119 break;
12120 offset += sectorsize;
12122 return ret;
12125 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12126 struct btrfs_root *csum_root,
12127 struct btrfs_root *cur_root)
12129 struct btrfs_path path;
12130 struct btrfs_key key;
12131 struct extent_buffer *node;
12132 struct btrfs_file_extent_item *fi;
12133 char *buf = NULL;
12134 u64 start = 0;
12135 u64 len = 0;
12136 int slot = 0;
12137 int ret = 0;
12139 buf = malloc(cur_root->fs_info->sectorsize);
12140 if (!buf)
12141 return -ENOMEM;
12143 btrfs_init_path(&path);
12144 key.objectid = 0;
12145 key.offset = 0;
12146 key.type = 0;
12147 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12148 if (ret < 0)
12149 goto out;
12150 /* Iterate all regular file extents and fill its csum */
12151 while (1) {
12152 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12154 if (key.type != BTRFS_EXTENT_DATA_KEY)
12155 goto next;
12156 node = path.nodes[0];
12157 slot = path.slots[0];
12158 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12159 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12160 goto next;
12161 start = btrfs_file_extent_disk_bytenr(node, fi);
12162 len = btrfs_file_extent_disk_num_bytes(node, fi);
12164 ret = populate_csum(trans, csum_root, buf, start, len);
12165 if (ret == -EEXIST)
12166 ret = 0;
12167 if (ret < 0)
12168 goto out;
12169 next:
12171 * TODO: if next leaf is corrupted, jump to nearest next valid
12172 * leaf.
12174 ret = btrfs_next_item(cur_root, &path);
12175 if (ret < 0)
12176 goto out;
12177 if (ret > 0) {
12178 ret = 0;
12179 goto out;
12183 out:
12184 btrfs_release_path(&path);
12185 free(buf);
12186 return ret;
12189 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12190 struct btrfs_root *csum_root)
12192 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12193 struct btrfs_path path;
12194 struct btrfs_root *tree_root = fs_info->tree_root;
12195 struct btrfs_root *cur_root;
12196 struct extent_buffer *node;
12197 struct btrfs_key key;
12198 int slot = 0;
12199 int ret = 0;
12201 btrfs_init_path(&path);
12202 key.objectid = BTRFS_FS_TREE_OBJECTID;
12203 key.offset = 0;
12204 key.type = BTRFS_ROOT_ITEM_KEY;
12205 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12206 if (ret < 0)
12207 goto out;
12208 if (ret > 0) {
12209 ret = -ENOENT;
12210 goto out;
12213 while (1) {
12214 node = path.nodes[0];
12215 slot = path.slots[0];
12216 btrfs_item_key_to_cpu(node, &key, slot);
12217 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12218 goto out;
12219 if (key.type != BTRFS_ROOT_ITEM_KEY)
12220 goto next;
12221 if (!is_fstree(key.objectid))
12222 goto next;
12223 key.offset = (u64)-1;
12225 cur_root = btrfs_read_fs_root(fs_info, &key);
12226 if (IS_ERR(cur_root) || !cur_root) {
12227 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12228 key.objectid);
12229 goto out;
12231 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12232 cur_root);
12233 if (ret < 0)
12234 goto out;
12235 next:
12236 ret = btrfs_next_item(tree_root, &path);
12237 if (ret > 0) {
12238 ret = 0;
12239 goto out;
12241 if (ret < 0)
12242 goto out;
12245 out:
12246 btrfs_release_path(&path);
12247 return ret;
12250 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12251 struct btrfs_root *csum_root)
12253 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12254 struct btrfs_path path;
12255 struct btrfs_extent_item *ei;
12256 struct extent_buffer *leaf;
12257 char *buf;
12258 struct btrfs_key key;
12259 int ret;
12261 btrfs_init_path(&path);
12262 key.objectid = 0;
12263 key.type = BTRFS_EXTENT_ITEM_KEY;
12264 key.offset = 0;
12265 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12266 if (ret < 0) {
12267 btrfs_release_path(&path);
12268 return ret;
12271 buf = malloc(csum_root->fs_info->sectorsize);
12272 if (!buf) {
12273 btrfs_release_path(&path);
12274 return -ENOMEM;
12277 while (1) {
12278 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12279 ret = btrfs_next_leaf(extent_root, &path);
12280 if (ret < 0)
12281 break;
12282 if (ret) {
12283 ret = 0;
12284 break;
12287 leaf = path.nodes[0];
12289 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12290 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12291 path.slots[0]++;
12292 continue;
12295 ei = btrfs_item_ptr(leaf, path.slots[0],
12296 struct btrfs_extent_item);
12297 if (!(btrfs_extent_flags(leaf, ei) &
12298 BTRFS_EXTENT_FLAG_DATA)) {
12299 path.slots[0]++;
12300 continue;
12303 ret = populate_csum(trans, csum_root, buf, key.objectid,
12304 key.offset);
12305 if (ret)
12306 break;
12307 path.slots[0]++;
12310 btrfs_release_path(&path);
12311 free(buf);
12312 return ret;
12316 * Recalculate the csum and put it into the csum tree.
12318 * Extent tree init will wipe out all the extent info, so in that case, we
12319 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
12320 * will use fs/subvol trees to init the csum tree.
12322 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12323 struct btrfs_root *csum_root,
12324 int search_fs_tree)
12326 if (search_fs_tree)
12327 return fill_csum_tree_from_fs(trans, csum_root);
12328 else
12329 return fill_csum_tree_from_extent(trans, csum_root);
12332 static void free_roots_info_cache(void)
12334 if (!roots_info_cache)
12335 return;
12337 while (!cache_tree_empty(roots_info_cache)) {
12338 struct cache_extent *entry;
12339 struct root_item_info *rii;
12341 entry = first_cache_extent(roots_info_cache);
12342 if (!entry)
12343 break;
12344 remove_cache_extent(roots_info_cache, entry);
12345 rii = container_of(entry, struct root_item_info, cache_extent);
12346 free(rii);
12349 free(roots_info_cache);
12350 roots_info_cache = NULL;
12353 static int build_roots_info_cache(struct btrfs_fs_info *info)
12355 int ret = 0;
12356 struct btrfs_key key;
12357 struct extent_buffer *leaf;
12358 struct btrfs_path path;
12360 if (!roots_info_cache) {
12361 roots_info_cache = malloc(sizeof(*roots_info_cache));
12362 if (!roots_info_cache)
12363 return -ENOMEM;
12364 cache_tree_init(roots_info_cache);
12367 btrfs_init_path(&path);
12368 key.objectid = 0;
12369 key.type = BTRFS_EXTENT_ITEM_KEY;
12370 key.offset = 0;
12371 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12372 if (ret < 0)
12373 goto out;
12374 leaf = path.nodes[0];
12376 while (1) {
12377 struct btrfs_key found_key;
12378 struct btrfs_extent_item *ei;
12379 struct btrfs_extent_inline_ref *iref;
12380 int slot = path.slots[0];
12381 int type;
12382 u64 flags;
12383 u64 root_id;
12384 u8 level;
12385 struct cache_extent *entry;
12386 struct root_item_info *rii;
12388 if (slot >= btrfs_header_nritems(leaf)) {
12389 ret = btrfs_next_leaf(info->extent_root, &path);
12390 if (ret < 0) {
12391 break;
12392 } else if (ret) {
12393 ret = 0;
12394 break;
12396 leaf = path.nodes[0];
12397 slot = path.slots[0];
12400 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12402 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12403 found_key.type != BTRFS_METADATA_ITEM_KEY)
12404 goto next;
12406 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12407 flags = btrfs_extent_flags(leaf, ei);
12409 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12410 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12411 goto next;
12413 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12414 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12415 level = found_key.offset;
12416 } else {
12417 struct btrfs_tree_block_info *binfo;
12419 binfo = (struct btrfs_tree_block_info *)(ei + 1);
12420 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12421 level = btrfs_tree_block_level(leaf, binfo);
12425 * For a root extent, it must be of the following type and the
12426 * first (and only one) iref in the item.
12428 type = btrfs_extent_inline_ref_type(leaf, iref);
12429 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12430 goto next;
12432 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12433 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12434 if (!entry) {
12435 rii = malloc(sizeof(struct root_item_info));
12436 if (!rii) {
12437 ret = -ENOMEM;
12438 goto out;
12440 rii->cache_extent.start = root_id;
12441 rii->cache_extent.size = 1;
12442 rii->level = (u8)-1;
12443 entry = &rii->cache_extent;
12444 ret = insert_cache_extent(roots_info_cache, entry);
12445 ASSERT(ret == 0);
12446 } else {
12447 rii = container_of(entry, struct root_item_info,
12448 cache_extent);
12451 ASSERT(rii->cache_extent.start == root_id);
12452 ASSERT(rii->cache_extent.size == 1);
12454 if (level > rii->level || rii->level == (u8)-1) {
12455 rii->level = level;
12456 rii->bytenr = found_key.objectid;
12457 rii->gen = btrfs_extent_generation(leaf, ei);
12458 rii->node_count = 1;
12459 } else if (level == rii->level) {
12460 rii->node_count++;
12462 next:
12463 path.slots[0]++;
12466 out:
12467 btrfs_release_path(&path);
12469 return ret;
12472 static int maybe_repair_root_item(struct btrfs_path *path,
12473 const struct btrfs_key *root_key,
12474 const int read_only_mode)
12476 const u64 root_id = root_key->objectid;
12477 struct cache_extent *entry;
12478 struct root_item_info *rii;
12479 struct btrfs_root_item ri;
12480 unsigned long offset;
12482 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12483 if (!entry) {
12484 fprintf(stderr,
12485 "Error: could not find extent items for root %llu\n",
12486 root_key->objectid);
12487 return -ENOENT;
12490 rii = container_of(entry, struct root_item_info, cache_extent);
12491 ASSERT(rii->cache_extent.start == root_id);
12492 ASSERT(rii->cache_extent.size == 1);
12494 if (rii->node_count != 1) {
12495 fprintf(stderr,
12496 "Error: could not find btree root extent for root %llu\n",
12497 root_id);
12498 return -ENOENT;
12501 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12502 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12504 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12505 btrfs_root_level(&ri) != rii->level ||
12506 btrfs_root_generation(&ri) != rii->gen) {
12509 * If we're in repair mode but our caller told us to not update
12510 * the root item, i.e. just check if it needs to be updated, don't
12511 * print this message, since the caller will call us again shortly
12512 * for the same root item without read only mode (the caller will
12513 * open a transaction first).
12515 if (!(read_only_mode && repair))
12516 fprintf(stderr,
12517 "%sroot item for root %llu,"
12518 " current bytenr %llu, current gen %llu, current level %u,"
12519 " new bytenr %llu, new gen %llu, new level %u\n",
12520 (read_only_mode ? "" : "fixing "),
12521 root_id,
12522 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12523 btrfs_root_level(&ri),
12524 rii->bytenr, rii->gen, rii->level);
12526 if (btrfs_root_generation(&ri) > rii->gen) {
12527 fprintf(stderr,
12528 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12529 root_id, btrfs_root_generation(&ri), rii->gen);
12530 return -EINVAL;
12533 if (!read_only_mode) {
12534 btrfs_set_root_bytenr(&ri, rii->bytenr);
12535 btrfs_set_root_level(&ri, rii->level);
12536 btrfs_set_root_generation(&ri, rii->gen);
12537 write_extent_buffer(path->nodes[0], &ri,
12538 offset, sizeof(ri));
12541 return 1;
12544 return 0;
12548 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12549 * caused read-only snapshots to be corrupted if they were created at a moment
12550 * when the source subvolume/snapshot had orphan items. The issue was that the
12551 * on-disk root items became incorrect, referring to the pre orphan cleanup root
12552 * node instead of the post orphan cleanup root node.
12553 * So this function, and its callees, just detects and fixes those cases. Even
12554 * though the regression was for read-only snapshots, this function applies to
12555 * any snapshot/subvolume root.
12556 * This must be run before any other repair code - not doing it so, makes other
12557 * repair code delete or modify backrefs in the extent tree for example, which
12558 * will result in an inconsistent fs after repairing the root items.
12560 static int repair_root_items(struct btrfs_fs_info *info)
12562 struct btrfs_path path;
12563 struct btrfs_key key;
12564 struct extent_buffer *leaf;
12565 struct btrfs_trans_handle *trans = NULL;
12566 int ret = 0;
12567 int bad_roots = 0;
12568 int need_trans = 0;
12570 btrfs_init_path(&path);
12572 ret = build_roots_info_cache(info);
12573 if (ret)
12574 goto out;
12576 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12577 key.type = BTRFS_ROOT_ITEM_KEY;
12578 key.offset = 0;
12580 again:
12582 * Avoid opening and committing transactions if a leaf doesn't have
12583 * any root items that need to be fixed, so that we avoid rotating
12584 * backup roots unnecessarily.
12586 if (need_trans) {
12587 trans = btrfs_start_transaction(info->tree_root, 1);
12588 if (IS_ERR(trans)) {
12589 ret = PTR_ERR(trans);
12590 goto out;
12594 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12595 0, trans ? 1 : 0);
12596 if (ret < 0)
12597 goto out;
12598 leaf = path.nodes[0];
12600 while (1) {
12601 struct btrfs_key found_key;
12603 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12604 int no_more_keys = find_next_key(&path, &key);
12606 btrfs_release_path(&path);
12607 if (trans) {
12608 ret = btrfs_commit_transaction(trans,
12609 info->tree_root);
12610 trans = NULL;
12611 if (ret < 0)
12612 goto out;
12614 need_trans = 0;
12615 if (no_more_keys)
12616 break;
12617 goto again;
12620 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12622 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12623 goto next;
12624 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12625 goto next;
12627 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12628 if (ret < 0)
12629 goto out;
12630 if (ret) {
12631 if (!trans && repair) {
12632 need_trans = 1;
12633 key = found_key;
12634 btrfs_release_path(&path);
12635 goto again;
12637 bad_roots++;
12639 next:
12640 path.slots[0]++;
12642 ret = 0;
12643 out:
12644 free_roots_info_cache();
12645 btrfs_release_path(&path);
12646 if (trans)
12647 btrfs_commit_transaction(trans, info->tree_root);
12648 if (ret < 0)
12649 return ret;
12651 return bad_roots;
12654 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12656 struct btrfs_trans_handle *trans;
12657 struct btrfs_block_group_cache *bg_cache;
12658 u64 current = 0;
12659 int ret = 0;
12661 /* Clear all free space cache inodes and its extent data */
12662 while (1) {
12663 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12664 if (!bg_cache)
12665 break;
12666 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12667 if (ret < 0)
12668 return ret;
12669 current = bg_cache->key.objectid + bg_cache->key.offset;
12672 /* Don't forget to set cache_generation to -1 */
12673 trans = btrfs_start_transaction(fs_info->tree_root, 0);
12674 if (IS_ERR(trans)) {
12675 error("failed to update super block cache generation");
12676 return PTR_ERR(trans);
12678 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12679 btrfs_commit_transaction(trans, fs_info->tree_root);
12681 return ret;
12684 const char * const cmd_check_usage[] = {
12685 "btrfs check [options] <device>",
12686 "Check structural integrity of a filesystem (unmounted).",
12687 "Check structural integrity of an unmounted filesystem. Verify internal",
12688 "trees' consistency and item connectivity. In the repair mode try to",
12689 "fix the problems found. ",
12690 "WARNING: the repair mode is considered dangerous",
12692 "-s|--super <superblock> use this superblock copy",
12693 "-b|--backup use the first valid backup root copy",
12694 "--repair try to repair the filesystem",
12695 "--readonly run in read-only mode (default)",
12696 "--init-csum-tree create a new CRC tree",
12697 "--init-extent-tree create a new extent tree",
12698 "--mode <MODE> allows choice of memory/IO trade-offs",
12699 " where MODE is one of:",
12700 " original - read inodes and extents to memory (requires",
12701 " more memory, does less IO)",
12702 " lowmem - try to use less memory but read blocks again",
12703 " when needed",
12704 "--check-data-csum verify checksums of data blocks",
12705 "-Q|--qgroup-report print a report on qgroup consistency",
12706 "-E|--subvol-extents <subvolid>",
12707 " print subvolume extents and sharing state",
12708 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
12709 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
12710 "-p|--progress indicate progress",
12711 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
12712 NULL
12715 int cmd_check(int argc, char **argv)
12717 struct cache_tree root_cache;
12718 struct btrfs_root *root;
12719 struct btrfs_fs_info *info;
12720 u64 bytenr = 0;
12721 u64 subvolid = 0;
12722 u64 tree_root_bytenr = 0;
12723 u64 chunk_root_bytenr = 0;
12724 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12725 int ret;
12726 int err = 0;
12727 u64 num;
12728 int init_csum_tree = 0;
12729 int readonly = 0;
12730 int clear_space_cache = 0;
12731 int qgroup_report = 0;
12732 int qgroups_repaired = 0;
12733 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12735 while(1) {
12736 int c;
12737 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12738 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12739 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12740 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12741 static const struct option long_options[] = {
12742 { "super", required_argument, NULL, 's' },
12743 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12744 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12745 { "init-csum-tree", no_argument, NULL,
12746 GETOPT_VAL_INIT_CSUM },
12747 { "init-extent-tree", no_argument, NULL,
12748 GETOPT_VAL_INIT_EXTENT },
12749 { "check-data-csum", no_argument, NULL,
12750 GETOPT_VAL_CHECK_CSUM },
12751 { "backup", no_argument, NULL, 'b' },
12752 { "subvol-extents", required_argument, NULL, 'E' },
12753 { "qgroup-report", no_argument, NULL, 'Q' },
12754 { "tree-root", required_argument, NULL, 'r' },
12755 { "chunk-root", required_argument, NULL,
12756 GETOPT_VAL_CHUNK_TREE },
12757 { "progress", no_argument, NULL, 'p' },
12758 { "mode", required_argument, NULL,
12759 GETOPT_VAL_MODE },
12760 { "clear-space-cache", required_argument, NULL,
12761 GETOPT_VAL_CLEAR_SPACE_CACHE},
12762 { NULL, 0, NULL, 0}
12765 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
12766 if (c < 0)
12767 break;
12768 switch(c) {
12769 case 'a': /* ignored */ break;
12770 case 'b':
12771 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12772 break;
12773 case 's':
12774 num = arg_strtou64(optarg);
12775 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12776 error(
12777 "super mirror should be less than %d",
12778 BTRFS_SUPER_MIRROR_MAX);
12779 exit(1);
12781 bytenr = btrfs_sb_offset(((int)num));
12782 printf("using SB copy %llu, bytenr %llu\n", num,
12783 (unsigned long long)bytenr);
12784 break;
12785 case 'Q':
12786 qgroup_report = 1;
12787 break;
12788 case 'E':
12789 subvolid = arg_strtou64(optarg);
12790 break;
12791 case 'r':
12792 tree_root_bytenr = arg_strtou64(optarg);
12793 break;
12794 case GETOPT_VAL_CHUNK_TREE:
12795 chunk_root_bytenr = arg_strtou64(optarg);
12796 break;
12797 case 'p':
12798 ctx.progress_enabled = true;
12799 break;
12800 case '?':
12801 case 'h':
12802 usage(cmd_check_usage);
12803 case GETOPT_VAL_REPAIR:
12804 printf("enabling repair mode\n");
12805 repair = 1;
12806 ctree_flags |= OPEN_CTREE_WRITES;
12807 break;
12808 case GETOPT_VAL_READONLY:
12809 readonly = 1;
12810 break;
12811 case GETOPT_VAL_INIT_CSUM:
12812 printf("Creating a new CRC tree\n");
12813 init_csum_tree = 1;
12814 repair = 1;
12815 ctree_flags |= OPEN_CTREE_WRITES;
12816 break;
12817 case GETOPT_VAL_INIT_EXTENT:
12818 init_extent_tree = 1;
12819 ctree_flags |= (OPEN_CTREE_WRITES |
12820 OPEN_CTREE_NO_BLOCK_GROUPS);
12821 repair = 1;
12822 break;
12823 case GETOPT_VAL_CHECK_CSUM:
12824 check_data_csum = 1;
12825 break;
12826 case GETOPT_VAL_MODE:
12827 check_mode = parse_check_mode(optarg);
12828 if (check_mode == CHECK_MODE_UNKNOWN) {
12829 error("unknown mode: %s", optarg);
12830 exit(1);
12832 break;
12833 case GETOPT_VAL_CLEAR_SPACE_CACHE:
12834 if (strcmp(optarg, "v1") == 0) {
12835 clear_space_cache = 1;
12836 } else if (strcmp(optarg, "v2") == 0) {
12837 clear_space_cache = 2;
12838 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12839 } else {
12840 error(
12841 "invalid argument to --clear-space-cache, must be v1 or v2");
12842 exit(1);
12844 ctree_flags |= OPEN_CTREE_WRITES;
12845 break;
12849 if (check_argc_exact(argc - optind, 1))
12850 usage(cmd_check_usage);
12852 if (ctx.progress_enabled) {
12853 ctx.tp = TASK_NOTHING;
12854 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12857 /* This check is the only reason for --readonly to exist */
12858 if (readonly && repair) {
12859 error("repair options are not compatible with --readonly");
12860 exit(1);
12864 * Not supported yet
12866 if (repair && check_mode == CHECK_MODE_LOWMEM) {
12867 error("low memory mode doesn't support repair yet");
12868 exit(1);
12871 radix_tree_init();
12872 cache_tree_init(&root_cache);
12874 if((ret = check_mounted(argv[optind])) < 0) {
12875 error("could not check mount status: %s", strerror(-ret));
12876 err |= !!ret;
12877 goto err_out;
12878 } else if(ret) {
12879 error("%s is currently mounted, aborting", argv[optind]);
12880 ret = -EBUSY;
12881 err |= !!ret;
12882 goto err_out;
12885 /* only allow partial opening under repair mode */
12886 if (repair)
12887 ctree_flags |= OPEN_CTREE_PARTIAL;
12889 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12890 chunk_root_bytenr, ctree_flags);
12891 if (!info) {
12892 error("cannot open file system");
12893 ret = -EIO;
12894 err |= !!ret;
12895 goto err_out;
12898 global_info = info;
12899 root = info->fs_root;
12900 if (clear_space_cache == 1) {
12901 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12902 error(
12903 "free space cache v2 detected, use --clear-space-cache v2");
12904 ret = 1;
12905 goto close_out;
12907 printf("Clearing free space cache\n");
12908 ret = clear_free_space_cache(info);
12909 if (ret) {
12910 error("failed to clear free space cache");
12911 ret = 1;
12912 } else {
12913 printf("Free space cache cleared\n");
12915 goto close_out;
12916 } else if (clear_space_cache == 2) {
12917 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12918 printf("no free space cache v2 to clear\n");
12919 ret = 0;
12920 goto close_out;
12922 printf("Clear free space cache v2\n");
12923 ret = btrfs_clear_free_space_tree(info);
12924 if (ret) {
12925 error("failed to clear free space cache v2: %d", ret);
12926 ret = 1;
12927 } else {
12928 printf("free space cache v2 cleared\n");
12930 goto close_out;
12934 * repair mode will force us to commit transaction which
12935 * will make us fail to load log tree when mounting.
12937 if (repair && btrfs_super_log_root(info->super_copy)) {
12938 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12939 if (!ret) {
12940 ret = 1;
12941 err |= !!ret;
12942 goto close_out;
12944 ret = zero_log_tree(root);
12945 err |= !!ret;
12946 if (ret) {
12947 error("failed to zero log tree: %d", ret);
12948 goto close_out;
12952 uuid_unparse(info->super_copy->fsid, uuidbuf);
12953 if (qgroup_report) {
12954 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12955 uuidbuf);
12956 ret = qgroup_verify_all(info);
12957 err |= !!ret;
12958 if (ret == 0)
12959 report_qgroups(1);
12960 goto close_out;
12962 if (subvolid) {
12963 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12964 subvolid, argv[optind], uuidbuf);
12965 ret = print_extent_state(info, subvolid);
12966 err |= !!ret;
12967 goto close_out;
12969 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12971 if (!extent_buffer_uptodate(info->tree_root->node) ||
12972 !extent_buffer_uptodate(info->dev_root->node) ||
12973 !extent_buffer_uptodate(info->chunk_root->node)) {
12974 error("critical roots corrupted, unable to check the filesystem");
12975 err |= !!ret;
12976 ret = -EIO;
12977 goto close_out;
12980 if (init_extent_tree || init_csum_tree) {
12981 struct btrfs_trans_handle *trans;
12983 trans = btrfs_start_transaction(info->extent_root, 0);
12984 if (IS_ERR(trans)) {
12985 error("error starting transaction");
12986 ret = PTR_ERR(trans);
12987 err |= !!ret;
12988 goto close_out;
12991 if (init_extent_tree) {
12992 printf("Creating a new extent tree\n");
12993 ret = reinit_extent_tree(trans, info);
12994 err |= !!ret;
12995 if (ret)
12996 goto close_out;
12999 if (init_csum_tree) {
13000 printf("Reinitialize checksum tree\n");
13001 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
13002 if (ret) {
13003 error("checksum tree initialization failed: %d",
13004 ret);
13005 ret = -EIO;
13006 err |= !!ret;
13007 goto close_out;
13010 ret = fill_csum_tree(trans, info->csum_root,
13011 init_extent_tree);
13012 err |= !!ret;
13013 if (ret) {
13014 error("checksum tree refilling failed: %d", ret);
13015 return -EIO;
13019 * Ok now we commit and run the normal fsck, which will add
13020 * extent entries for all of the items it finds.
13022 ret = btrfs_commit_transaction(trans, info->extent_root);
13023 err |= !!ret;
13024 if (ret)
13025 goto close_out;
13027 if (!extent_buffer_uptodate(info->extent_root->node)) {
13028 error("critical: extent_root, unable to check the filesystem");
13029 ret = -EIO;
13030 err |= !!ret;
13031 goto close_out;
13033 if (!extent_buffer_uptodate(info->csum_root->node)) {
13034 error("critical: csum_root, unable to check the filesystem");
13035 ret = -EIO;
13036 err |= !!ret;
13037 goto close_out;
13040 if (!ctx.progress_enabled)
13041 fprintf(stderr, "checking extents\n");
13042 if (check_mode == CHECK_MODE_LOWMEM)
13043 ret = check_chunks_and_extents_v2(root);
13044 else
13045 ret = check_chunks_and_extents(root);
13046 err |= !!ret;
13047 if (ret)
13048 error(
13049 "errors found in extent allocation tree or chunk allocation");
13051 ret = repair_root_items(info);
13052 err |= !!ret;
13053 if (ret < 0) {
13054 error("failed to repair root items: %s", strerror(-ret));
13055 goto close_out;
13057 if (repair) {
13058 fprintf(stderr, "Fixed %d roots.\n", ret);
13059 ret = 0;
13060 } else if (ret > 0) {
13061 fprintf(stderr,
13062 "Found %d roots with an outdated root item.\n",
13063 ret);
13064 fprintf(stderr,
13065 "Please run a filesystem check with the option --repair to fix them.\n");
13066 ret = 1;
13067 err |= !!ret;
13068 goto close_out;
13071 if (!ctx.progress_enabled) {
13072 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13073 fprintf(stderr, "checking free space tree\n");
13074 else
13075 fprintf(stderr, "checking free space cache\n");
13077 ret = check_space_cache(root);
13078 err |= !!ret;
13079 if (ret) {
13080 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13081 error("errors found in free space tree");
13082 else
13083 error("errors found in free space cache");
13084 goto out;
13088 * We used to have to have these hole extents in between our real
13089 * extents so if we don't have this flag set we need to make sure there
13090 * are no gaps in the file extents for inodes, otherwise we can just
13091 * ignore it when this happens.
13093 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13094 if (!ctx.progress_enabled)
13095 fprintf(stderr, "checking fs roots\n");
13096 if (check_mode == CHECK_MODE_LOWMEM)
13097 ret = check_fs_roots_v2(root->fs_info);
13098 else
13099 ret = check_fs_roots(root, &root_cache);
13100 err |= !!ret;
13101 if (ret) {
13102 error("errors found in fs roots");
13103 goto out;
13106 fprintf(stderr, "checking csums\n");
13107 ret = check_csums(root);
13108 err |= !!ret;
13109 if (ret) {
13110 error("errors found in csum tree");
13111 goto out;
13114 fprintf(stderr, "checking root refs\n");
13115 /* For low memory mode, check_fs_roots_v2 handles root refs */
13116 if (check_mode != CHECK_MODE_LOWMEM) {
13117 ret = check_root_refs(root, &root_cache);
13118 err |= !!ret;
13119 if (ret) {
13120 error("errors found in root refs");
13121 goto out;
13125 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13126 struct extent_buffer *eb;
13128 eb = list_first_entry(&root->fs_info->recow_ebs,
13129 struct extent_buffer, recow);
13130 list_del_init(&eb->recow);
13131 ret = recow_extent_buffer(root, eb);
13132 err |= !!ret;
13133 if (ret) {
13134 error("fails to fix transid errors");
13135 break;
13139 while (!list_empty(&delete_items)) {
13140 struct bad_item *bad;
13142 bad = list_first_entry(&delete_items, struct bad_item, list);
13143 list_del_init(&bad->list);
13144 if (repair) {
13145 ret = delete_bad_item(root, bad);
13146 err |= !!ret;
13148 free(bad);
13151 if (info->quota_enabled) {
13152 fprintf(stderr, "checking quota groups\n");
13153 ret = qgroup_verify_all(info);
13154 err |= !!ret;
13155 if (ret) {
13156 error("failed to check quota groups");
13157 goto out;
13159 report_qgroups(0);
13160 ret = repair_qgroups(info, &qgroups_repaired);
13161 err |= !!ret;
13162 if (err) {
13163 error("failed to repair quota groups");
13164 goto out;
13166 ret = 0;
13169 if (!list_empty(&root->fs_info->recow_ebs)) {
13170 error("transid errors in file system");
13171 ret = 1;
13172 err |= !!ret;
13174 out:
13175 if (found_old_backref) { /*
13176 * there was a disk format change when mixed
13177 * backref was in testing tree. The old format
13178 * existed about one week.
13180 printf("\n * Found old mixed backref format. "
13181 "The old format is not supported! *"
13182 "\n * Please mount the FS in readonly mode, "
13183 "backup data and re-format the FS. *\n\n");
13184 err |= 1;
13186 printf("found %llu bytes used, ",
13187 (unsigned long long)bytes_used);
13188 if (err)
13189 printf("error(s) found\n");
13190 else
13191 printf("no error found\n");
13192 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13193 printf("total tree bytes: %llu\n",
13194 (unsigned long long)total_btree_bytes);
13195 printf("total fs tree bytes: %llu\n",
13196 (unsigned long long)total_fs_tree_bytes);
13197 printf("total extent tree bytes: %llu\n",
13198 (unsigned long long)total_extent_tree_bytes);
13199 printf("btree space waste bytes: %llu\n",
13200 (unsigned long long)btree_space_waste);
13201 printf("file data blocks allocated: %llu\n referenced %llu\n",
13202 (unsigned long long)data_bytes_allocated,
13203 (unsigned long long)data_bytes_referenced);
13205 free_qgroup_counts();
13206 free_root_recs_tree(&root_cache);
13207 close_out:
13208 close_ctree(root);
13209 err_out:
13210 if (ctx.progress_enabled)
13211 task_deinit(ctx.info);
13213 return err;