btrfs-progs: image: move metadump definitions to own header
[btrfs-progs-unstable/devel.git] / cmds-check.c
blobaf8c62c8639ace3166b134bc1549dbbd0b32f173
1 /*
2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
47 enum task_position {
48 TASK_EXTENTS,
49 TASK_FREE_SPACE,
50 TASK_FS_ROOTS,
51 TASK_NOTHING, /* have to be the last element */
54 struct task_ctx {
55 int progress_enabled;
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
79 CHECK_MODE_ORIGINAL,
80 CHECK_MODE_LOWMEM,
81 CHECK_MODE_UNKNOWN,
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
88 struct rb_node node;
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
98 return rb_entry(node, struct extent_backref, node);
101 struct data_backref {
102 struct extent_backref node;
103 union {
104 u64 parent;
105 u64 root;
107 u64 owner;
108 u64 offset;
109 u64 disk_bytenr;
110 u64 bytes;
111 u64 ram_bytes;
112 u32 num_refs;
113 u32 found_ref;
116 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM (1<<14) /* no inode_item */
130 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH (1<<19) /* INODE_INDEX found but not match */
135 #define DIR_COUNT_AGAIN (1<<20) /* DIR isize should be recalculated */
136 #define BG_ACCOUNTING_ERROR (1<<21) /* Block group accounting error */
138 static inline struct data_backref* to_data_backref(struct extent_backref *back)
140 return container_of(back, struct data_backref, node);
143 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
145 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
146 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
147 struct data_backref *back1 = to_data_backref(ext1);
148 struct data_backref *back2 = to_data_backref(ext2);
150 WARN_ON(!ext1->is_data);
151 WARN_ON(!ext2->is_data);
153 /* parent and root are a union, so this covers both */
154 if (back1->parent > back2->parent)
155 return 1;
156 if (back1->parent < back2->parent)
157 return -1;
159 /* This is a full backref and the parents match. */
160 if (back1->node.full_backref)
161 return 0;
163 if (back1->owner > back2->owner)
164 return 1;
165 if (back1->owner < back2->owner)
166 return -1;
168 if (back1->offset > back2->offset)
169 return 1;
170 if (back1->offset < back2->offset)
171 return -1;
173 if (back1->found_ref && back2->found_ref) {
174 if (back1->disk_bytenr > back2->disk_bytenr)
175 return 1;
176 if (back1->disk_bytenr < back2->disk_bytenr)
177 return -1;
179 if (back1->bytes > back2->bytes)
180 return 1;
181 if (back1->bytes < back2->bytes)
182 return -1;
185 return 0;
189 * Much like data_backref, just removed the undetermined members
190 * and change it to use list_head.
191 * During extent scan, it is stored in root->orphan_data_extent.
192 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
194 struct orphan_data_extent {
195 struct list_head list;
196 u64 root;
197 u64 objectid;
198 u64 offset;
199 u64 disk_bytenr;
200 u64 disk_len;
203 struct tree_backref {
204 struct extent_backref node;
205 union {
206 u64 parent;
207 u64 root;
211 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
213 return container_of(back, struct tree_backref, node);
216 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
218 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
219 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
220 struct tree_backref *back1 = to_tree_backref(ext1);
221 struct tree_backref *back2 = to_tree_backref(ext2);
223 WARN_ON(ext1->is_data);
224 WARN_ON(ext2->is_data);
226 /* parent and root are a union, so this covers both */
227 if (back1->parent > back2->parent)
228 return 1;
229 if (back1->parent < back2->parent)
230 return -1;
232 return 0;
235 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
237 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
238 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
240 if (ext1->is_data > ext2->is_data)
241 return 1;
243 if (ext1->is_data < ext2->is_data)
244 return -1;
246 if (ext1->full_backref > ext2->full_backref)
247 return 1;
248 if (ext1->full_backref < ext2->full_backref)
249 return -1;
251 if (ext1->is_data)
252 return compare_data_backref(node1, node2);
253 else
254 return compare_tree_backref(node1, node2);
257 /* Explicit initialization for extent_record::flag_block_full_backref */
258 enum { FLAG_UNSET = 2 };
260 struct extent_record {
261 struct list_head backrefs;
262 struct list_head dups;
263 struct rb_root backref_tree;
264 struct list_head list;
265 struct cache_extent cache;
266 struct btrfs_disk_key parent_key;
267 u64 start;
268 u64 max_size;
269 u64 nr;
270 u64 refs;
271 u64 extent_item_refs;
272 u64 generation;
273 u64 parent_generation;
274 u64 info_objectid;
275 u32 num_duplicates;
276 u8 info_level;
277 unsigned int flag_block_full_backref:2;
278 unsigned int found_rec:1;
279 unsigned int content_checked:1;
280 unsigned int owner_ref_checked:1;
281 unsigned int is_root:1;
282 unsigned int metadata:1;
283 unsigned int bad_full_backref:1;
284 unsigned int crossing_stripes:1;
285 unsigned int wrong_chunk_type:1;
288 static inline struct extent_record* to_extent_record(struct list_head *entry)
290 return container_of(entry, struct extent_record, list);
293 struct inode_backref {
294 struct list_head list;
295 unsigned int found_dir_item:1;
296 unsigned int found_dir_index:1;
297 unsigned int found_inode_ref:1;
298 u8 filetype;
299 u8 ref_type;
300 int errors;
301 u64 dir;
302 u64 index;
303 u16 namelen;
304 char name[0];
307 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
309 return list_entry(entry, struct inode_backref, list);
312 struct root_item_record {
313 struct list_head list;
314 u64 objectid;
315 u64 bytenr;
316 u64 last_snapshot;
317 u8 level;
318 u8 drop_level;
319 struct btrfs_key drop_key;
322 #define REF_ERR_NO_DIR_ITEM (1 << 0)
323 #define REF_ERR_NO_DIR_INDEX (1 << 1)
324 #define REF_ERR_NO_INODE_REF (1 << 2)
325 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
326 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
327 #define REF_ERR_DUP_INODE_REF (1 << 5)
328 #define REF_ERR_INDEX_UNMATCH (1 << 6)
329 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
330 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
331 #define REF_ERR_NO_ROOT_REF (1 << 9)
332 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
333 #define REF_ERR_DUP_ROOT_REF (1 << 11)
334 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
336 struct file_extent_hole {
337 struct rb_node node;
338 u64 start;
339 u64 len;
342 struct inode_record {
343 struct list_head backrefs;
344 unsigned int checked:1;
345 unsigned int merging:1;
346 unsigned int found_inode_item:1;
347 unsigned int found_dir_item:1;
348 unsigned int found_file_extent:1;
349 unsigned int found_csum_item:1;
350 unsigned int some_csum_missing:1;
351 unsigned int nodatasum:1;
352 int errors;
354 u64 ino;
355 u32 nlink;
356 u32 imode;
357 u64 isize;
358 u64 nbytes;
360 u32 found_link;
361 u64 found_size;
362 u64 extent_start;
363 u64 extent_end;
364 struct rb_root holes;
365 struct list_head orphan_extents;
367 u32 refs;
370 #define I_ERR_NO_INODE_ITEM (1 << 0)
371 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
372 #define I_ERR_DUP_INODE_ITEM (1 << 2)
373 #define I_ERR_DUP_DIR_INDEX (1 << 3)
374 #define I_ERR_ODD_DIR_ITEM (1 << 4)
375 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
376 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
377 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
378 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
379 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
380 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
381 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
382 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
383 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
384 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
386 struct root_backref {
387 struct list_head list;
388 unsigned int found_dir_item:1;
389 unsigned int found_dir_index:1;
390 unsigned int found_back_ref:1;
391 unsigned int found_forward_ref:1;
392 unsigned int reachable:1;
393 int errors;
394 u64 ref_root;
395 u64 dir;
396 u64 index;
397 u16 namelen;
398 char name[0];
401 static inline struct root_backref* to_root_backref(struct list_head *entry)
403 return list_entry(entry, struct root_backref, list);
406 struct root_record {
407 struct list_head backrefs;
408 struct cache_extent cache;
409 unsigned int found_root_item:1;
410 u64 objectid;
411 u32 found_ref;
414 struct ptr_node {
415 struct cache_extent cache;
416 void *data;
419 struct shared_node {
420 struct cache_extent cache;
421 struct cache_tree root_cache;
422 struct cache_tree inode_cache;
423 struct inode_record *current;
424 u32 refs;
427 struct block_info {
428 u64 start;
429 u32 size;
432 struct walk_control {
433 struct cache_tree shared;
434 struct shared_node *nodes[BTRFS_MAX_LEVEL];
435 int active_node;
436 int root_level;
439 struct bad_item {
440 struct btrfs_key key;
441 u64 root_id;
442 struct list_head list;
445 struct extent_entry {
446 u64 bytenr;
447 u64 bytes;
448 int count;
449 int broken;
450 struct list_head list;
453 struct root_item_info {
454 /* level of the root */
455 u8 level;
456 /* number of nodes at this level, must be 1 for a root */
457 int node_count;
458 u64 bytenr;
459 u64 gen;
460 struct cache_extent cache_extent;
464 * Error bit for low memory mode check.
466 * Currently no caller cares about it yet. Just internal use for error
467 * classification.
469 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
470 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
471 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
472 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
473 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
474 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
475 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
476 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
477 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
478 #define CHUNK_TYPE_MISMATCH (1 << 8)
480 static void *print_status_check(void *p)
482 struct task_ctx *priv = p;
483 const char work_indicator[] = { '.', 'o', 'O', 'o' };
484 uint32_t count = 0;
485 static char *task_position_string[] = {
486 "checking extents",
487 "checking free space cache",
488 "checking fs roots",
491 task_period_start(priv->info, 1000 /* 1s */);
493 if (priv->tp == TASK_NOTHING)
494 return NULL;
496 while (1) {
497 printf("%s [%c]\r", task_position_string[priv->tp],
498 work_indicator[count % 4]);
499 count++;
500 fflush(stdout);
501 task_period_wait(priv->info);
503 return NULL;
506 static int print_status_return(void *p)
508 printf("\n");
509 fflush(stdout);
511 return 0;
514 static enum btrfs_check_mode parse_check_mode(const char *str)
516 if (strcmp(str, "lowmem") == 0)
517 return CHECK_MODE_LOWMEM;
518 if (strcmp(str, "orig") == 0)
519 return CHECK_MODE_ORIGINAL;
520 if (strcmp(str, "original") == 0)
521 return CHECK_MODE_ORIGINAL;
523 return CHECK_MODE_UNKNOWN;
526 /* Compatible function to allow reuse of old codes */
527 static u64 first_extent_gap(struct rb_root *holes)
529 struct file_extent_hole *hole;
531 if (RB_EMPTY_ROOT(holes))
532 return (u64)-1;
534 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
535 return hole->start;
538 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
540 struct file_extent_hole *hole1;
541 struct file_extent_hole *hole2;
543 hole1 = rb_entry(node1, struct file_extent_hole, node);
544 hole2 = rb_entry(node2, struct file_extent_hole, node);
546 if (hole1->start > hole2->start)
547 return -1;
548 if (hole1->start < hole2->start)
549 return 1;
550 /* Now hole1->start == hole2->start */
551 if (hole1->len >= hole2->len)
553 * Hole 1 will be merge center
554 * Same hole will be merged later
556 return -1;
557 /* Hole 2 will be merge center */
558 return 1;
562 * Add a hole to the record
564 * This will do hole merge for copy_file_extent_holes(),
565 * which will ensure there won't be continuous holes.
567 static int add_file_extent_hole(struct rb_root *holes,
568 u64 start, u64 len)
570 struct file_extent_hole *hole;
571 struct file_extent_hole *prev = NULL;
572 struct file_extent_hole *next = NULL;
574 hole = malloc(sizeof(*hole));
575 if (!hole)
576 return -ENOMEM;
577 hole->start = start;
578 hole->len = len;
579 /* Since compare will not return 0, no -EEXIST will happen */
580 rb_insert(holes, &hole->node, compare_hole);
582 /* simple merge with previous hole */
583 if (rb_prev(&hole->node))
584 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
585 node);
586 if (prev && prev->start + prev->len >= hole->start) {
587 hole->len = hole->start + hole->len - prev->start;
588 hole->start = prev->start;
589 rb_erase(&prev->node, holes);
590 free(prev);
591 prev = NULL;
594 /* iterate merge with next holes */
595 while (1) {
596 if (!rb_next(&hole->node))
597 break;
598 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
599 node);
600 if (hole->start + hole->len >= next->start) {
601 if (hole->start + hole->len <= next->start + next->len)
602 hole->len = next->start + next->len -
603 hole->start;
604 rb_erase(&next->node, holes);
605 free(next);
606 next = NULL;
607 } else
608 break;
610 return 0;
613 static int compare_hole_range(struct rb_node *node, void *data)
615 struct file_extent_hole *hole;
616 u64 start;
618 hole = (struct file_extent_hole *)data;
619 start = hole->start;
621 hole = rb_entry(node, struct file_extent_hole, node);
622 if (start < hole->start)
623 return -1;
624 if (start >= hole->start && start < hole->start + hole->len)
625 return 0;
626 return 1;
630 * Delete a hole in the record
632 * This will do the hole split and is much restrict than add.
634 static int del_file_extent_hole(struct rb_root *holes,
635 u64 start, u64 len)
637 struct file_extent_hole *hole;
638 struct file_extent_hole tmp;
639 u64 prev_start = 0;
640 u64 prev_len = 0;
641 u64 next_start = 0;
642 u64 next_len = 0;
643 struct rb_node *node;
644 int have_prev = 0;
645 int have_next = 0;
646 int ret = 0;
648 tmp.start = start;
649 tmp.len = len;
650 node = rb_search(holes, &tmp, compare_hole_range, NULL);
651 if (!node)
652 return -EEXIST;
653 hole = rb_entry(node, struct file_extent_hole, node);
654 if (start + len > hole->start + hole->len)
655 return -EEXIST;
658 * Now there will be no overlap, delete the hole and re-add the
659 * split(s) if they exists.
661 if (start > hole->start) {
662 prev_start = hole->start;
663 prev_len = start - hole->start;
664 have_prev = 1;
666 if (hole->start + hole->len > start + len) {
667 next_start = start + len;
668 next_len = hole->start + hole->len - start - len;
669 have_next = 1;
671 rb_erase(node, holes);
672 free(hole);
673 if (have_prev) {
674 ret = add_file_extent_hole(holes, prev_start, prev_len);
675 if (ret < 0)
676 return ret;
678 if (have_next) {
679 ret = add_file_extent_hole(holes, next_start, next_len);
680 if (ret < 0)
681 return ret;
683 return 0;
686 static int copy_file_extent_holes(struct rb_root *dst,
687 struct rb_root *src)
689 struct file_extent_hole *hole;
690 struct rb_node *node;
691 int ret = 0;
693 node = rb_first(src);
694 while (node) {
695 hole = rb_entry(node, struct file_extent_hole, node);
696 ret = add_file_extent_hole(dst, hole->start, hole->len);
697 if (ret)
698 break;
699 node = rb_next(node);
701 return ret;
704 static void free_file_extent_holes(struct rb_root *holes)
706 struct rb_node *node;
707 struct file_extent_hole *hole;
709 node = rb_first(holes);
710 while (node) {
711 hole = rb_entry(node, struct file_extent_hole, node);
712 rb_erase(node, holes);
713 free(hole);
714 node = rb_first(holes);
718 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
720 static void record_root_in_trans(struct btrfs_trans_handle *trans,
721 struct btrfs_root *root)
723 if (root->last_trans != trans->transid) {
724 root->track_dirty = 1;
725 root->last_trans = trans->transid;
726 root->commit_root = root->node;
727 extent_buffer_get(root->node);
731 static u8 imode_to_type(u32 imode)
733 #define S_SHIFT 12
734 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
735 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
736 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
737 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
738 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
739 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
740 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
741 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
744 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
745 #undef S_SHIFT
748 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
750 struct device_record *rec1;
751 struct device_record *rec2;
753 rec1 = rb_entry(node1, struct device_record, node);
754 rec2 = rb_entry(node2, struct device_record, node);
755 if (rec1->devid > rec2->devid)
756 return -1;
757 else if (rec1->devid < rec2->devid)
758 return 1;
759 else
760 return 0;
763 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
765 struct inode_record *rec;
766 struct inode_backref *backref;
767 struct inode_backref *orig;
768 struct inode_backref *tmp;
769 struct orphan_data_extent *src_orphan;
770 struct orphan_data_extent *dst_orphan;
771 struct rb_node *rb;
772 size_t size;
773 int ret;
775 rec = malloc(sizeof(*rec));
776 if (!rec)
777 return ERR_PTR(-ENOMEM);
778 memcpy(rec, orig_rec, sizeof(*rec));
779 rec->refs = 1;
780 INIT_LIST_HEAD(&rec->backrefs);
781 INIT_LIST_HEAD(&rec->orphan_extents);
782 rec->holes = RB_ROOT;
784 list_for_each_entry(orig, &orig_rec->backrefs, list) {
785 size = sizeof(*orig) + orig->namelen + 1;
786 backref = malloc(size);
787 if (!backref) {
788 ret = -ENOMEM;
789 goto cleanup;
791 memcpy(backref, orig, size);
792 list_add_tail(&backref->list, &rec->backrefs);
794 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
795 dst_orphan = malloc(sizeof(*dst_orphan));
796 if (!dst_orphan) {
797 ret = -ENOMEM;
798 goto cleanup;
800 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
801 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
803 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
804 if (ret < 0)
805 goto cleanup_rb;
807 return rec;
809 cleanup_rb:
810 rb = rb_first(&rec->holes);
811 while (rb) {
812 struct file_extent_hole *hole;
814 hole = rb_entry(rb, struct file_extent_hole, node);
815 rb = rb_next(rb);
816 free(hole);
819 cleanup:
820 if (!list_empty(&rec->backrefs))
821 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
822 list_del(&orig->list);
823 free(orig);
826 if (!list_empty(&rec->orphan_extents))
827 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
828 list_del(&orig->list);
829 free(orig);
832 free(rec);
834 return ERR_PTR(ret);
837 static void print_orphan_data_extents(struct list_head *orphan_extents,
838 u64 objectid)
840 struct orphan_data_extent *orphan;
842 if (list_empty(orphan_extents))
843 return;
844 printf("The following data extent is lost in tree %llu:\n",
845 objectid);
846 list_for_each_entry(orphan, orphan_extents, list) {
847 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
848 orphan->objectid, orphan->offset, orphan->disk_bytenr,
849 orphan->disk_len);
853 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
855 u64 root_objectid = root->root_key.objectid;
856 int errors = rec->errors;
858 if (!errors)
859 return;
860 /* reloc root errors, we print its corresponding fs root objectid*/
861 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
862 root_objectid = root->root_key.offset;
863 fprintf(stderr, "reloc");
865 fprintf(stderr, "root %llu inode %llu errors %x",
866 (unsigned long long) root_objectid,
867 (unsigned long long) rec->ino, rec->errors);
869 if (errors & I_ERR_NO_INODE_ITEM)
870 fprintf(stderr, ", no inode item");
871 if (errors & I_ERR_NO_ORPHAN_ITEM)
872 fprintf(stderr, ", no orphan item");
873 if (errors & I_ERR_DUP_INODE_ITEM)
874 fprintf(stderr, ", dup inode item");
875 if (errors & I_ERR_DUP_DIR_INDEX)
876 fprintf(stderr, ", dup dir index");
877 if (errors & I_ERR_ODD_DIR_ITEM)
878 fprintf(stderr, ", odd dir item");
879 if (errors & I_ERR_ODD_FILE_EXTENT)
880 fprintf(stderr, ", odd file extent");
881 if (errors & I_ERR_BAD_FILE_EXTENT)
882 fprintf(stderr, ", bad file extent");
883 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
884 fprintf(stderr, ", file extent overlap");
885 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
886 fprintf(stderr, ", file extent discount");
887 if (errors & I_ERR_DIR_ISIZE_WRONG)
888 fprintf(stderr, ", dir isize wrong");
889 if (errors & I_ERR_FILE_NBYTES_WRONG)
890 fprintf(stderr, ", nbytes wrong");
891 if (errors & I_ERR_ODD_CSUM_ITEM)
892 fprintf(stderr, ", odd csum item");
893 if (errors & I_ERR_SOME_CSUM_MISSING)
894 fprintf(stderr, ", some csum missing");
895 if (errors & I_ERR_LINK_COUNT_WRONG)
896 fprintf(stderr, ", link count wrong");
897 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
898 fprintf(stderr, ", orphan file extent");
899 fprintf(stderr, "\n");
900 /* Print the orphan extents if needed */
901 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
902 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
904 /* Print the holes if needed */
905 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
906 struct file_extent_hole *hole;
907 struct rb_node *node;
908 int found = 0;
910 node = rb_first(&rec->holes);
911 fprintf(stderr, "Found file extent holes:\n");
912 while (node) {
913 found = 1;
914 hole = rb_entry(node, struct file_extent_hole, node);
915 fprintf(stderr, "\tstart: %llu, len: %llu\n",
916 hole->start, hole->len);
917 node = rb_next(node);
919 if (!found)
920 fprintf(stderr, "\tstart: 0, len: %llu\n",
921 round_up(rec->isize,
922 root->fs_info->sectorsize));
926 static void print_ref_error(int errors)
928 if (errors & REF_ERR_NO_DIR_ITEM)
929 fprintf(stderr, ", no dir item");
930 if (errors & REF_ERR_NO_DIR_INDEX)
931 fprintf(stderr, ", no dir index");
932 if (errors & REF_ERR_NO_INODE_REF)
933 fprintf(stderr, ", no inode ref");
934 if (errors & REF_ERR_DUP_DIR_ITEM)
935 fprintf(stderr, ", dup dir item");
936 if (errors & REF_ERR_DUP_DIR_INDEX)
937 fprintf(stderr, ", dup dir index");
938 if (errors & REF_ERR_DUP_INODE_REF)
939 fprintf(stderr, ", dup inode ref");
940 if (errors & REF_ERR_INDEX_UNMATCH)
941 fprintf(stderr, ", index mismatch");
942 if (errors & REF_ERR_FILETYPE_UNMATCH)
943 fprintf(stderr, ", filetype mismatch");
944 if (errors & REF_ERR_NAME_TOO_LONG)
945 fprintf(stderr, ", name too long");
946 if (errors & REF_ERR_NO_ROOT_REF)
947 fprintf(stderr, ", no root ref");
948 if (errors & REF_ERR_NO_ROOT_BACKREF)
949 fprintf(stderr, ", no root backref");
950 if (errors & REF_ERR_DUP_ROOT_REF)
951 fprintf(stderr, ", dup root ref");
952 if (errors & REF_ERR_DUP_ROOT_BACKREF)
953 fprintf(stderr, ", dup root backref");
954 fprintf(stderr, "\n");
957 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
958 u64 ino, int mod)
960 struct ptr_node *node;
961 struct cache_extent *cache;
962 struct inode_record *rec = NULL;
963 int ret;
965 cache = lookup_cache_extent(inode_cache, ino, 1);
966 if (cache) {
967 node = container_of(cache, struct ptr_node, cache);
968 rec = node->data;
969 if (mod && rec->refs > 1) {
970 node->data = clone_inode_rec(rec);
971 if (IS_ERR(node->data))
972 return node->data;
973 rec->refs--;
974 rec = node->data;
976 } else if (mod) {
977 rec = calloc(1, sizeof(*rec));
978 if (!rec)
979 return ERR_PTR(-ENOMEM);
980 rec->ino = ino;
981 rec->extent_start = (u64)-1;
982 rec->refs = 1;
983 INIT_LIST_HEAD(&rec->backrefs);
984 INIT_LIST_HEAD(&rec->orphan_extents);
985 rec->holes = RB_ROOT;
987 node = malloc(sizeof(*node));
988 if (!node) {
989 free(rec);
990 return ERR_PTR(-ENOMEM);
992 node->cache.start = ino;
993 node->cache.size = 1;
994 node->data = rec;
996 if (ino == BTRFS_FREE_INO_OBJECTID)
997 rec->found_link = 1;
999 ret = insert_cache_extent(inode_cache, &node->cache);
1000 if (ret)
1001 return ERR_PTR(-EEXIST);
1003 return rec;
1006 static void free_orphan_data_extents(struct list_head *orphan_extents)
1008 struct orphan_data_extent *orphan;
1010 while (!list_empty(orphan_extents)) {
1011 orphan = list_entry(orphan_extents->next,
1012 struct orphan_data_extent, list);
1013 list_del(&orphan->list);
1014 free(orphan);
1018 static void free_inode_rec(struct inode_record *rec)
1020 struct inode_backref *backref;
1022 if (--rec->refs > 0)
1023 return;
1025 while (!list_empty(&rec->backrefs)) {
1026 backref = to_inode_backref(rec->backrefs.next);
1027 list_del(&backref->list);
1028 free(backref);
1030 free_orphan_data_extents(&rec->orphan_extents);
1031 free_file_extent_holes(&rec->holes);
1032 free(rec);
1035 static int can_free_inode_rec(struct inode_record *rec)
1037 if (!rec->errors && rec->checked && rec->found_inode_item &&
1038 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1039 return 1;
1040 return 0;
1043 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1044 struct inode_record *rec)
1046 struct cache_extent *cache;
1047 struct inode_backref *tmp, *backref;
1048 struct ptr_node *node;
1049 u8 filetype;
1051 if (!rec->found_inode_item)
1052 return;
1054 filetype = imode_to_type(rec->imode);
1055 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1056 if (backref->found_dir_item && backref->found_dir_index) {
1057 if (backref->filetype != filetype)
1058 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1059 if (!backref->errors && backref->found_inode_ref &&
1060 rec->nlink == rec->found_link) {
1061 list_del(&backref->list);
1062 free(backref);
1067 if (!rec->checked || rec->merging)
1068 return;
1070 if (S_ISDIR(rec->imode)) {
1071 if (rec->found_size != rec->isize)
1072 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1073 if (rec->found_file_extent)
1074 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1075 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1076 if (rec->found_dir_item)
1077 rec->errors |= I_ERR_ODD_DIR_ITEM;
1078 if (rec->found_size != rec->nbytes)
1079 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1080 if (rec->nlink > 0 && !no_holes &&
1081 (rec->extent_end < rec->isize ||
1082 first_extent_gap(&rec->holes) < rec->isize))
1083 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1086 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1087 if (rec->found_csum_item && rec->nodatasum)
1088 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1089 if (rec->some_csum_missing && !rec->nodatasum)
1090 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1093 BUG_ON(rec->refs != 1);
1094 if (can_free_inode_rec(rec)) {
1095 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1096 node = container_of(cache, struct ptr_node, cache);
1097 BUG_ON(node->data != rec);
1098 remove_cache_extent(inode_cache, &node->cache);
1099 free(node);
1100 free_inode_rec(rec);
1104 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1106 struct btrfs_path path;
1107 struct btrfs_key key;
1108 int ret;
1110 key.objectid = BTRFS_ORPHAN_OBJECTID;
1111 key.type = BTRFS_ORPHAN_ITEM_KEY;
1112 key.offset = ino;
1114 btrfs_init_path(&path);
1115 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1116 btrfs_release_path(&path);
1117 if (ret > 0)
1118 ret = -ENOENT;
1119 return ret;
1122 static int process_inode_item(struct extent_buffer *eb,
1123 int slot, struct btrfs_key *key,
1124 struct shared_node *active_node)
1126 struct inode_record *rec;
1127 struct btrfs_inode_item *item;
1129 rec = active_node->current;
1130 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1131 if (rec->found_inode_item) {
1132 rec->errors |= I_ERR_DUP_INODE_ITEM;
1133 return 1;
1135 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1136 rec->nlink = btrfs_inode_nlink(eb, item);
1137 rec->isize = btrfs_inode_size(eb, item);
1138 rec->nbytes = btrfs_inode_nbytes(eb, item);
1139 rec->imode = btrfs_inode_mode(eb, item);
1140 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1141 rec->nodatasum = 1;
1142 rec->found_inode_item = 1;
1143 if (rec->nlink == 0)
1144 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1145 maybe_free_inode_rec(&active_node->inode_cache, rec);
1146 return 0;
1149 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1150 const char *name,
1151 int namelen, u64 dir)
1153 struct inode_backref *backref;
1155 list_for_each_entry(backref, &rec->backrefs, list) {
1156 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1157 break;
1158 if (backref->dir != dir || backref->namelen != namelen)
1159 continue;
1160 if (memcmp(name, backref->name, namelen))
1161 continue;
1162 return backref;
1165 backref = malloc(sizeof(*backref) + namelen + 1);
1166 if (!backref)
1167 return NULL;
1168 memset(backref, 0, sizeof(*backref));
1169 backref->dir = dir;
1170 backref->namelen = namelen;
1171 memcpy(backref->name, name, namelen);
1172 backref->name[namelen] = '\0';
1173 list_add_tail(&backref->list, &rec->backrefs);
1174 return backref;
1177 static int add_inode_backref(struct cache_tree *inode_cache,
1178 u64 ino, u64 dir, u64 index,
1179 const char *name, int namelen,
1180 u8 filetype, u8 itemtype, int errors)
1182 struct inode_record *rec;
1183 struct inode_backref *backref;
1185 rec = get_inode_rec(inode_cache, ino, 1);
1186 BUG_ON(IS_ERR(rec));
1187 backref = get_inode_backref(rec, name, namelen, dir);
1188 BUG_ON(!backref);
1189 if (errors)
1190 backref->errors |= errors;
1191 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1192 if (backref->found_dir_index)
1193 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1194 if (backref->found_inode_ref && backref->index != index)
1195 backref->errors |= REF_ERR_INDEX_UNMATCH;
1196 if (backref->found_dir_item && backref->filetype != filetype)
1197 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1199 backref->index = index;
1200 backref->filetype = filetype;
1201 backref->found_dir_index = 1;
1202 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1203 rec->found_link++;
1204 if (backref->found_dir_item)
1205 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1206 if (backref->found_dir_index && backref->filetype != filetype)
1207 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1209 backref->filetype = filetype;
1210 backref->found_dir_item = 1;
1211 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1212 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1213 if (backref->found_inode_ref)
1214 backref->errors |= REF_ERR_DUP_INODE_REF;
1215 if (backref->found_dir_index && backref->index != index)
1216 backref->errors |= REF_ERR_INDEX_UNMATCH;
1217 else
1218 backref->index = index;
1220 backref->ref_type = itemtype;
1221 backref->found_inode_ref = 1;
1222 } else {
1223 BUG_ON(1);
1226 maybe_free_inode_rec(inode_cache, rec);
1227 return 0;
1230 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1231 struct cache_tree *dst_cache)
1233 struct inode_backref *backref;
1234 u32 dir_count = 0;
1235 int ret = 0;
1237 dst->merging = 1;
1238 list_for_each_entry(backref, &src->backrefs, list) {
1239 if (backref->found_dir_index) {
1240 add_inode_backref(dst_cache, dst->ino, backref->dir,
1241 backref->index, backref->name,
1242 backref->namelen, backref->filetype,
1243 BTRFS_DIR_INDEX_KEY, backref->errors);
1245 if (backref->found_dir_item) {
1246 dir_count++;
1247 add_inode_backref(dst_cache, dst->ino,
1248 backref->dir, 0, backref->name,
1249 backref->namelen, backref->filetype,
1250 BTRFS_DIR_ITEM_KEY, backref->errors);
1252 if (backref->found_inode_ref) {
1253 add_inode_backref(dst_cache, dst->ino,
1254 backref->dir, backref->index,
1255 backref->name, backref->namelen, 0,
1256 backref->ref_type, backref->errors);
1260 if (src->found_dir_item)
1261 dst->found_dir_item = 1;
1262 if (src->found_file_extent)
1263 dst->found_file_extent = 1;
1264 if (src->found_csum_item)
1265 dst->found_csum_item = 1;
1266 if (src->some_csum_missing)
1267 dst->some_csum_missing = 1;
1268 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1269 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1270 if (ret < 0)
1271 return ret;
1274 BUG_ON(src->found_link < dir_count);
1275 dst->found_link += src->found_link - dir_count;
1276 dst->found_size += src->found_size;
1277 if (src->extent_start != (u64)-1) {
1278 if (dst->extent_start == (u64)-1) {
1279 dst->extent_start = src->extent_start;
1280 dst->extent_end = src->extent_end;
1281 } else {
1282 if (dst->extent_end > src->extent_start)
1283 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1284 else if (dst->extent_end < src->extent_start) {
1285 ret = add_file_extent_hole(&dst->holes,
1286 dst->extent_end,
1287 src->extent_start - dst->extent_end);
1289 if (dst->extent_end < src->extent_end)
1290 dst->extent_end = src->extent_end;
1294 dst->errors |= src->errors;
1295 if (src->found_inode_item) {
1296 if (!dst->found_inode_item) {
1297 dst->nlink = src->nlink;
1298 dst->isize = src->isize;
1299 dst->nbytes = src->nbytes;
1300 dst->imode = src->imode;
1301 dst->nodatasum = src->nodatasum;
1302 dst->found_inode_item = 1;
1303 } else {
1304 dst->errors |= I_ERR_DUP_INODE_ITEM;
1307 dst->merging = 0;
1309 return 0;
1312 static int splice_shared_node(struct shared_node *src_node,
1313 struct shared_node *dst_node)
1315 struct cache_extent *cache;
1316 struct ptr_node *node, *ins;
1317 struct cache_tree *src, *dst;
1318 struct inode_record *rec, *conflict;
1319 u64 current_ino = 0;
1320 int splice = 0;
1321 int ret;
1323 if (--src_node->refs == 0)
1324 splice = 1;
1325 if (src_node->current)
1326 current_ino = src_node->current->ino;
1328 src = &src_node->root_cache;
1329 dst = &dst_node->root_cache;
1330 again:
1331 cache = search_cache_extent(src, 0);
1332 while (cache) {
1333 node = container_of(cache, struct ptr_node, cache);
1334 rec = node->data;
1335 cache = next_cache_extent(cache);
1337 if (splice) {
1338 remove_cache_extent(src, &node->cache);
1339 ins = node;
1340 } else {
1341 ins = malloc(sizeof(*ins));
1342 BUG_ON(!ins);
1343 ins->cache.start = node->cache.start;
1344 ins->cache.size = node->cache.size;
1345 ins->data = rec;
1346 rec->refs++;
1348 ret = insert_cache_extent(dst, &ins->cache);
1349 if (ret == -EEXIST) {
1350 conflict = get_inode_rec(dst, rec->ino, 1);
1351 BUG_ON(IS_ERR(conflict));
1352 merge_inode_recs(rec, conflict, dst);
1353 if (rec->checked) {
1354 conflict->checked = 1;
1355 if (dst_node->current == conflict)
1356 dst_node->current = NULL;
1358 maybe_free_inode_rec(dst, conflict);
1359 free_inode_rec(rec);
1360 free(ins);
1361 } else {
1362 BUG_ON(ret);
1366 if (src == &src_node->root_cache) {
1367 src = &src_node->inode_cache;
1368 dst = &dst_node->inode_cache;
1369 goto again;
1372 if (current_ino > 0 && (!dst_node->current ||
1373 current_ino > dst_node->current->ino)) {
1374 if (dst_node->current) {
1375 dst_node->current->checked = 1;
1376 maybe_free_inode_rec(dst, dst_node->current);
1378 dst_node->current = get_inode_rec(dst, current_ino, 1);
1379 BUG_ON(IS_ERR(dst_node->current));
1381 return 0;
1384 static void free_inode_ptr(struct cache_extent *cache)
1386 struct ptr_node *node;
1387 struct inode_record *rec;
1389 node = container_of(cache, struct ptr_node, cache);
1390 rec = node->data;
1391 free_inode_rec(rec);
1392 free(node);
1395 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1397 static struct shared_node *find_shared_node(struct cache_tree *shared,
1398 u64 bytenr)
1400 struct cache_extent *cache;
1401 struct shared_node *node;
1403 cache = lookup_cache_extent(shared, bytenr, 1);
1404 if (cache) {
1405 node = container_of(cache, struct shared_node, cache);
1406 return node;
1408 return NULL;
1411 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1413 int ret;
1414 struct shared_node *node;
1416 node = calloc(1, sizeof(*node));
1417 if (!node)
1418 return -ENOMEM;
1419 node->cache.start = bytenr;
1420 node->cache.size = 1;
1421 cache_tree_init(&node->root_cache);
1422 cache_tree_init(&node->inode_cache);
1423 node->refs = refs;
1425 ret = insert_cache_extent(shared, &node->cache);
1427 return ret;
1430 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1431 struct walk_control *wc, int level)
1433 struct shared_node *node;
1434 struct shared_node *dest;
1435 int ret;
1437 if (level == wc->active_node)
1438 return 0;
1440 BUG_ON(wc->active_node <= level);
1441 node = find_shared_node(&wc->shared, bytenr);
1442 if (!node) {
1443 ret = add_shared_node(&wc->shared, bytenr, refs);
1444 BUG_ON(ret);
1445 node = find_shared_node(&wc->shared, bytenr);
1446 wc->nodes[level] = node;
1447 wc->active_node = level;
1448 return 0;
1451 if (wc->root_level == wc->active_node &&
1452 btrfs_root_refs(&root->root_item) == 0) {
1453 if (--node->refs == 0) {
1454 free_inode_recs_tree(&node->root_cache);
1455 free_inode_recs_tree(&node->inode_cache);
1456 remove_cache_extent(&wc->shared, &node->cache);
1457 free(node);
1459 return 1;
1462 dest = wc->nodes[wc->active_node];
1463 splice_shared_node(node, dest);
1464 if (node->refs == 0) {
1465 remove_cache_extent(&wc->shared, &node->cache);
1466 free(node);
1468 return 1;
1471 static int leave_shared_node(struct btrfs_root *root,
1472 struct walk_control *wc, int level)
1474 struct shared_node *node;
1475 struct shared_node *dest;
1476 int i;
1478 if (level == wc->root_level)
1479 return 0;
1481 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1482 if (wc->nodes[i])
1483 break;
1485 BUG_ON(i >= BTRFS_MAX_LEVEL);
1487 node = wc->nodes[wc->active_node];
1488 wc->nodes[wc->active_node] = NULL;
1489 wc->active_node = i;
1491 dest = wc->nodes[wc->active_node];
1492 if (wc->active_node < wc->root_level ||
1493 btrfs_root_refs(&root->root_item) > 0) {
1494 BUG_ON(node->refs <= 1);
1495 splice_shared_node(node, dest);
1496 } else {
1497 BUG_ON(node->refs < 2);
1498 node->refs--;
1500 return 0;
1504 * Returns:
1505 * < 0 - on error
1506 * 1 - if the root with id child_root_id is a child of root parent_root_id
1507 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1508 * has other root(s) as parent(s)
1509 * 2 - if the root child_root_id doesn't have any parent roots
1511 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1512 u64 child_root_id)
1514 struct btrfs_path path;
1515 struct btrfs_key key;
1516 struct extent_buffer *leaf;
1517 int has_parent = 0;
1518 int ret;
1520 btrfs_init_path(&path);
1522 key.objectid = parent_root_id;
1523 key.type = BTRFS_ROOT_REF_KEY;
1524 key.offset = child_root_id;
1525 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1526 0, 0);
1527 if (ret < 0)
1528 return ret;
1529 btrfs_release_path(&path);
1530 if (!ret)
1531 return 1;
1533 key.objectid = child_root_id;
1534 key.type = BTRFS_ROOT_BACKREF_KEY;
1535 key.offset = 0;
1536 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1537 0, 0);
1538 if (ret < 0)
1539 goto out;
1541 while (1) {
1542 leaf = path.nodes[0];
1543 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1544 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1545 if (ret)
1546 break;
1547 leaf = path.nodes[0];
1550 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1551 if (key.objectid != child_root_id ||
1552 key.type != BTRFS_ROOT_BACKREF_KEY)
1553 break;
1555 has_parent = 1;
1557 if (key.offset == parent_root_id) {
1558 btrfs_release_path(&path);
1559 return 1;
1562 path.slots[0]++;
1564 out:
1565 btrfs_release_path(&path);
1566 if (ret < 0)
1567 return ret;
1568 return has_parent ? 0 : 2;
1571 static int process_dir_item(struct extent_buffer *eb,
1572 int slot, struct btrfs_key *key,
1573 struct shared_node *active_node)
1575 u32 total;
1576 u32 cur = 0;
1577 u32 len;
1578 u32 name_len;
1579 u32 data_len;
1580 int error;
1581 int nritems = 0;
1582 u8 filetype;
1583 struct btrfs_dir_item *di;
1584 struct inode_record *rec;
1585 struct cache_tree *root_cache;
1586 struct cache_tree *inode_cache;
1587 struct btrfs_key location;
1588 char namebuf[BTRFS_NAME_LEN];
1590 root_cache = &active_node->root_cache;
1591 inode_cache = &active_node->inode_cache;
1592 rec = active_node->current;
1593 rec->found_dir_item = 1;
1595 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1596 total = btrfs_item_size_nr(eb, slot);
1597 while (cur < total) {
1598 nritems++;
1599 btrfs_dir_item_key_to_cpu(eb, di, &location);
1600 name_len = btrfs_dir_name_len(eb, di);
1601 data_len = btrfs_dir_data_len(eb, di);
1602 filetype = btrfs_dir_type(eb, di);
1604 rec->found_size += name_len;
1605 if (cur + sizeof(*di) + name_len > total ||
1606 name_len > BTRFS_NAME_LEN) {
1607 error = REF_ERR_NAME_TOO_LONG;
1609 if (cur + sizeof(*di) > total)
1610 break;
1611 len = min_t(u32, total - cur - sizeof(*di),
1612 BTRFS_NAME_LEN);
1613 } else {
1614 len = name_len;
1615 error = 0;
1618 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1620 if (key->type == BTRFS_DIR_ITEM_KEY &&
1621 key->offset != btrfs_name_hash(namebuf, len)) {
1622 rec->errors |= I_ERR_ODD_DIR_ITEM;
1623 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1624 key->objectid, key->offset, namebuf, len, filetype,
1625 key->offset, btrfs_name_hash(namebuf, len));
1628 if (location.type == BTRFS_INODE_ITEM_KEY) {
1629 add_inode_backref(inode_cache, location.objectid,
1630 key->objectid, key->offset, namebuf,
1631 len, filetype, key->type, error);
1632 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1633 add_inode_backref(root_cache, location.objectid,
1634 key->objectid, key->offset,
1635 namebuf, len, filetype,
1636 key->type, error);
1637 } else {
1638 fprintf(stderr, "invalid location in dir item %u\n",
1639 location.type);
1640 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1641 key->objectid, key->offset, namebuf,
1642 len, filetype, key->type, error);
1645 len = sizeof(*di) + name_len + data_len;
1646 di = (struct btrfs_dir_item *)((char *)di + len);
1647 cur += len;
1649 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1650 rec->errors |= I_ERR_DUP_DIR_INDEX;
1652 return 0;
1655 static int process_inode_ref(struct extent_buffer *eb,
1656 int slot, struct btrfs_key *key,
1657 struct shared_node *active_node)
1659 u32 total;
1660 u32 cur = 0;
1661 u32 len;
1662 u32 name_len;
1663 u64 index;
1664 int error;
1665 struct cache_tree *inode_cache;
1666 struct btrfs_inode_ref *ref;
1667 char namebuf[BTRFS_NAME_LEN];
1669 inode_cache = &active_node->inode_cache;
1671 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1672 total = btrfs_item_size_nr(eb, slot);
1673 while (cur < total) {
1674 name_len = btrfs_inode_ref_name_len(eb, ref);
1675 index = btrfs_inode_ref_index(eb, ref);
1677 /* inode_ref + namelen should not cross item boundary */
1678 if (cur + sizeof(*ref) + name_len > total ||
1679 name_len > BTRFS_NAME_LEN) {
1680 if (total < cur + sizeof(*ref))
1681 break;
1683 /* Still try to read out the remaining part */
1684 len = min_t(u32, total - cur - sizeof(*ref),
1685 BTRFS_NAME_LEN);
1686 error = REF_ERR_NAME_TOO_LONG;
1687 } else {
1688 len = name_len;
1689 error = 0;
1692 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1693 add_inode_backref(inode_cache, key->objectid, key->offset,
1694 index, namebuf, len, 0, key->type, error);
1696 len = sizeof(*ref) + name_len;
1697 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1698 cur += len;
1700 return 0;
1703 static int process_inode_extref(struct extent_buffer *eb,
1704 int slot, struct btrfs_key *key,
1705 struct shared_node *active_node)
1707 u32 total;
1708 u32 cur = 0;
1709 u32 len;
1710 u32 name_len;
1711 u64 index;
1712 u64 parent;
1713 int error;
1714 struct cache_tree *inode_cache;
1715 struct btrfs_inode_extref *extref;
1716 char namebuf[BTRFS_NAME_LEN];
1718 inode_cache = &active_node->inode_cache;
1720 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1721 total = btrfs_item_size_nr(eb, slot);
1722 while (cur < total) {
1723 name_len = btrfs_inode_extref_name_len(eb, extref);
1724 index = btrfs_inode_extref_index(eb, extref);
1725 parent = btrfs_inode_extref_parent(eb, extref);
1726 if (name_len <= BTRFS_NAME_LEN) {
1727 len = name_len;
1728 error = 0;
1729 } else {
1730 len = BTRFS_NAME_LEN;
1731 error = REF_ERR_NAME_TOO_LONG;
1733 read_extent_buffer(eb, namebuf,
1734 (unsigned long)(extref + 1), len);
1735 add_inode_backref(inode_cache, key->objectid, parent,
1736 index, namebuf, len, 0, key->type, error);
1738 len = sizeof(*extref) + name_len;
1739 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1740 cur += len;
1742 return 0;
1746 static int count_csum_range(struct btrfs_root *root, u64 start,
1747 u64 len, u64 *found)
1749 struct btrfs_key key;
1750 struct btrfs_path path;
1751 struct extent_buffer *leaf;
1752 int ret;
1753 size_t size;
1754 *found = 0;
1755 u64 csum_end;
1756 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1758 btrfs_init_path(&path);
1760 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1761 key.offset = start;
1762 key.type = BTRFS_EXTENT_CSUM_KEY;
1764 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1765 &key, &path, 0, 0);
1766 if (ret < 0)
1767 goto out;
1768 if (ret > 0 && path.slots[0] > 0) {
1769 leaf = path.nodes[0];
1770 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1771 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1772 key.type == BTRFS_EXTENT_CSUM_KEY)
1773 path.slots[0]--;
1776 while (len > 0) {
1777 leaf = path.nodes[0];
1778 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1779 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1780 if (ret > 0)
1781 break;
1782 else if (ret < 0)
1783 goto out;
1784 leaf = path.nodes[0];
1787 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1788 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1789 key.type != BTRFS_EXTENT_CSUM_KEY)
1790 break;
1792 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1793 if (key.offset >= start + len)
1794 break;
1796 if (key.offset > start)
1797 start = key.offset;
1799 size = btrfs_item_size_nr(leaf, path.slots[0]);
1800 csum_end = key.offset + (size / csum_size) *
1801 root->fs_info->sectorsize;
1802 if (csum_end > start) {
1803 size = min(csum_end - start, len);
1804 len -= size;
1805 start += size;
1806 *found += size;
1809 path.slots[0]++;
1811 out:
1812 btrfs_release_path(&path);
1813 if (ret < 0)
1814 return ret;
1815 return 0;
1818 static int process_file_extent(struct btrfs_root *root,
1819 struct extent_buffer *eb,
1820 int slot, struct btrfs_key *key,
1821 struct shared_node *active_node)
1823 struct inode_record *rec;
1824 struct btrfs_file_extent_item *fi;
1825 u64 num_bytes = 0;
1826 u64 disk_bytenr = 0;
1827 u64 extent_offset = 0;
1828 u64 mask = root->fs_info->sectorsize - 1;
1829 int extent_type;
1830 int ret;
1832 rec = active_node->current;
1833 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1834 rec->found_file_extent = 1;
1836 if (rec->extent_start == (u64)-1) {
1837 rec->extent_start = key->offset;
1838 rec->extent_end = key->offset;
1841 if (rec->extent_end > key->offset)
1842 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1843 else if (rec->extent_end < key->offset) {
1844 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1845 key->offset - rec->extent_end);
1846 if (ret < 0)
1847 return ret;
1850 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1851 extent_type = btrfs_file_extent_type(eb, fi);
1853 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1854 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1855 if (num_bytes == 0)
1856 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1857 rec->found_size += num_bytes;
1858 num_bytes = (num_bytes + mask) & ~mask;
1859 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1860 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1861 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1862 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1863 extent_offset = btrfs_file_extent_offset(eb, fi);
1864 if (num_bytes == 0 || (num_bytes & mask))
1865 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1866 if (num_bytes + extent_offset >
1867 btrfs_file_extent_ram_bytes(eb, fi))
1868 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1869 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1870 (btrfs_file_extent_compression(eb, fi) ||
1871 btrfs_file_extent_encryption(eb, fi) ||
1872 btrfs_file_extent_other_encoding(eb, fi)))
1873 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1874 if (disk_bytenr > 0)
1875 rec->found_size += num_bytes;
1876 } else {
1877 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1879 rec->extent_end = key->offset + num_bytes;
1882 * The data reloc tree will copy full extents into its inode and then
1883 * copy the corresponding csums. Because the extent it copied could be
1884 * a preallocated extent that hasn't been written to yet there may be no
1885 * csums to copy, ergo we won't have csums for our file extent. This is
1886 * ok so just don't bother checking csums if the inode belongs to the
1887 * data reloc tree.
1889 if (disk_bytenr > 0 &&
1890 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1891 u64 found;
1892 if (btrfs_file_extent_compression(eb, fi))
1893 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1894 else
1895 disk_bytenr += extent_offset;
1897 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1898 if (ret < 0)
1899 return ret;
1900 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1901 if (found > 0)
1902 rec->found_csum_item = 1;
1903 if (found < num_bytes)
1904 rec->some_csum_missing = 1;
1905 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1906 if (found > 0)
1907 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1910 return 0;
1913 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1914 struct walk_control *wc)
1916 struct btrfs_key key;
1917 u32 nritems;
1918 int i;
1919 int ret = 0;
1920 struct cache_tree *inode_cache;
1921 struct shared_node *active_node;
1923 if (wc->root_level == wc->active_node &&
1924 btrfs_root_refs(&root->root_item) == 0)
1925 return 0;
1927 active_node = wc->nodes[wc->active_node];
1928 inode_cache = &active_node->inode_cache;
1929 nritems = btrfs_header_nritems(eb);
1930 for (i = 0; i < nritems; i++) {
1931 btrfs_item_key_to_cpu(eb, &key, i);
1933 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1934 continue;
1935 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1936 continue;
1938 if (active_node->current == NULL ||
1939 active_node->current->ino < key.objectid) {
1940 if (active_node->current) {
1941 active_node->current->checked = 1;
1942 maybe_free_inode_rec(inode_cache,
1943 active_node->current);
1945 active_node->current = get_inode_rec(inode_cache,
1946 key.objectid, 1);
1947 BUG_ON(IS_ERR(active_node->current));
1949 switch (key.type) {
1950 case BTRFS_DIR_ITEM_KEY:
1951 case BTRFS_DIR_INDEX_KEY:
1952 ret = process_dir_item(eb, i, &key, active_node);
1953 break;
1954 case BTRFS_INODE_REF_KEY:
1955 ret = process_inode_ref(eb, i, &key, active_node);
1956 break;
1957 case BTRFS_INODE_EXTREF_KEY:
1958 ret = process_inode_extref(eb, i, &key, active_node);
1959 break;
1960 case BTRFS_INODE_ITEM_KEY:
1961 ret = process_inode_item(eb, i, &key, active_node);
1962 break;
1963 case BTRFS_EXTENT_DATA_KEY:
1964 ret = process_file_extent(root, eb, i, &key,
1965 active_node);
1966 break;
1967 default:
1968 break;
1971 return ret;
1974 struct node_refs {
1975 u64 bytenr[BTRFS_MAX_LEVEL];
1976 u64 refs[BTRFS_MAX_LEVEL];
1977 int need_check[BTRFS_MAX_LEVEL];
1978 /* field for checking all trees */
1979 int checked[BTRFS_MAX_LEVEL];
1980 /* the corresponding extent should be marked as full backref or not */
1981 int full_backref[BTRFS_MAX_LEVEL];
1984 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1985 struct extent_buffer *eb, struct node_refs *nrefs,
1986 u64 level, int check_all);
1987 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1988 unsigned int ext_ref);
1991 * Returns >0 Found error, not fatal, should continue
1992 * Returns <0 Fatal error, must exit the whole check
1993 * Returns 0 No errors found
1995 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1996 struct node_refs *nrefs, int *level, int ext_ref)
1998 struct extent_buffer *cur = path->nodes[0];
1999 struct btrfs_key key;
2000 u64 cur_bytenr;
2001 u32 nritems;
2002 u64 first_ino = 0;
2003 int root_level = btrfs_header_level(root->node);
2004 int i;
2005 int ret = 0; /* Final return value */
2006 int err = 0; /* Positive error bitmap */
2008 cur_bytenr = cur->start;
2010 /* skip to first inode item or the first inode number change */
2011 nritems = btrfs_header_nritems(cur);
2012 for (i = 0; i < nritems; i++) {
2013 btrfs_item_key_to_cpu(cur, &key, i);
2014 if (i == 0)
2015 first_ino = key.objectid;
2016 if (key.type == BTRFS_INODE_ITEM_KEY ||
2017 (first_ino && first_ino != key.objectid))
2018 break;
2020 if (i == nritems) {
2021 path->slots[0] = nritems;
2022 return 0;
2024 path->slots[0] = i;
2026 again:
2027 err |= check_inode_item(root, path, ext_ref);
2029 /* modify cur since check_inode_item may change path */
2030 cur = path->nodes[0];
2032 if (err & LAST_ITEM)
2033 goto out;
2035 /* still have inode items in thie leaf */
2036 if (cur->start == cur_bytenr)
2037 goto again;
2040 * we have switched to another leaf, above nodes may
2041 * have changed, here walk down the path, if a node
2042 * or leaf is shared, check whether we can skip this
2043 * node or leaf.
2045 for (i = root_level; i >= 0; i--) {
2046 if (path->nodes[i]->start == nrefs->bytenr[i])
2047 continue;
2049 ret = update_nodes_refs(root, path->nodes[i]->start,
2050 path->nodes[i], nrefs, i, 0);
2051 if (ret)
2052 goto out;
2054 if (!nrefs->need_check[i]) {
2055 *level += 1;
2056 break;
2060 for (i = 0; i < *level; i++) {
2061 free_extent_buffer(path->nodes[i]);
2062 path->nodes[i] = NULL;
2064 out:
2065 err &= ~LAST_ITEM;
2066 if (err && !ret)
2067 ret = err;
2068 return ret;
2071 static void reada_walk_down(struct btrfs_root *root,
2072 struct extent_buffer *node, int slot)
2074 struct btrfs_fs_info *fs_info = root->fs_info;
2075 u64 bytenr;
2076 u64 ptr_gen;
2077 u32 nritems;
2078 int i;
2079 int level;
2081 level = btrfs_header_level(node);
2082 if (level != 1)
2083 return;
2085 nritems = btrfs_header_nritems(node);
2086 for (i = slot; i < nritems; i++) {
2087 bytenr = btrfs_node_blockptr(node, i);
2088 ptr_gen = btrfs_node_ptr_generation(node, i);
2089 readahead_tree_block(fs_info, bytenr, ptr_gen);
2094 * Check the child node/leaf by the following condition:
2095 * 1. the first item key of the node/leaf should be the same with the one
2096 * in parent.
2097 * 2. block in parent node should match the child node/leaf.
2098 * 3. generation of parent node and child's header should be consistent.
2100 * Or the child node/leaf pointed by the key in parent is not valid.
2102 * We hope to check leaf owner too, but since subvol may share leaves,
2103 * which makes leaf owner check not so strong, key check should be
2104 * sufficient enough for that case.
2106 static int check_child_node(struct extent_buffer *parent, int slot,
2107 struct extent_buffer *child)
2109 struct btrfs_key parent_key;
2110 struct btrfs_key child_key;
2111 int ret = 0;
2113 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2114 if (btrfs_header_level(child) == 0)
2115 btrfs_item_key_to_cpu(child, &child_key, 0);
2116 else
2117 btrfs_node_key_to_cpu(child, &child_key, 0);
2119 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2120 ret = -EINVAL;
2121 fprintf(stderr,
2122 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2123 parent_key.objectid, parent_key.type, parent_key.offset,
2124 child_key.objectid, child_key.type, child_key.offset);
2126 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2127 ret = -EINVAL;
2128 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2129 btrfs_node_blockptr(parent, slot),
2130 btrfs_header_bytenr(child));
2132 if (btrfs_node_ptr_generation(parent, slot) !=
2133 btrfs_header_generation(child)) {
2134 ret = -EINVAL;
2135 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2136 btrfs_header_generation(child),
2137 btrfs_node_ptr_generation(parent, slot));
2139 return ret;
2143 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2144 * in every fs or file tree check. Here we find its all root ids, and only check
2145 * it in the fs or file tree which has the smallest root id.
2147 static int need_check(struct btrfs_root *root, struct ulist *roots)
2149 struct rb_node *node;
2150 struct ulist_node *u;
2152 if (roots->nnodes == 1)
2153 return 1;
2155 node = rb_first(&roots->root);
2156 u = rb_entry(node, struct ulist_node, rb_node);
2158 * current root id is not smallest, we skip it and let it be checked
2159 * in the fs or file tree who hash the smallest root id.
2161 if (root->objectid != u->val)
2162 return 0;
2164 return 1;
2167 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
2168 u64 *flags_ret)
2170 struct btrfs_root *extent_root = root->fs_info->extent_root;
2171 struct btrfs_root_item *ri = &root->root_item;
2172 struct btrfs_extent_inline_ref *iref;
2173 struct btrfs_extent_item *ei;
2174 struct btrfs_key key;
2175 struct btrfs_path *path = NULL;
2176 unsigned long ptr;
2177 unsigned long end;
2178 u64 flags;
2179 u64 owner = 0;
2180 u64 offset;
2181 int slot;
2182 int type;
2183 int ret = 0;
2186 * Except file/reloc tree, we can not have FULL BACKREF MODE
2188 if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
2189 goto normal;
2191 /* root node */
2192 if (eb->start == btrfs_root_bytenr(ri))
2193 goto normal;
2195 if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
2196 goto full_backref;
2198 owner = btrfs_header_owner(eb);
2199 if (owner == root->objectid)
2200 goto normal;
2202 path = btrfs_alloc_path();
2203 if (!path)
2204 return -ENOMEM;
2206 key.objectid = btrfs_header_bytenr(eb);
2207 key.type = (u8)-1;
2208 key.offset = (u64)-1;
2210 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2211 if (ret <= 0) {
2212 ret = -EIO;
2213 goto out;
2216 if (ret > 0) {
2217 ret = btrfs_previous_extent_item(extent_root, path,
2218 key.objectid);
2219 if (ret)
2220 goto full_backref;
2223 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2225 eb = path->nodes[0];
2226 slot = path->slots[0];
2227 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
2229 flags = btrfs_extent_flags(eb, ei);
2230 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2231 goto full_backref;
2233 ptr = (unsigned long)(ei + 1);
2234 end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
2236 if (key.type == BTRFS_EXTENT_ITEM_KEY)
2237 ptr += sizeof(struct btrfs_tree_block_info);
2239 next:
2240 /* Reached extent item ends normally */
2241 if (ptr == end)
2242 goto full_backref;
2244 /* Beyond extent item end, wrong item size */
2245 if (ptr > end) {
2246 error("extent item at bytenr %llu slot %d has wrong size",
2247 eb->start, slot);
2248 goto full_backref;
2251 iref = (struct btrfs_extent_inline_ref *)ptr;
2252 offset = btrfs_extent_inline_ref_offset(eb, iref);
2253 type = btrfs_extent_inline_ref_type(eb, iref);
2255 if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
2256 goto normal;
2257 ptr += btrfs_extent_inline_ref_size(type);
2258 goto next;
2260 normal:
2261 *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
2262 goto out;
2264 full_backref:
2265 *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2266 out:
2267 btrfs_free_path(path);
2268 return ret;
2272 * for a tree node or leaf, we record its reference count, so later if we still
2273 * process this node or leaf, don't need to compute its reference count again.
2275 * @bytenr if @bytenr == (u64)-1, only update nrefs->full_backref[level]
2277 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2278 struct extent_buffer *eb, struct node_refs *nrefs,
2279 u64 level, int check_all)
2281 struct ulist *roots;
2282 u64 refs = 0;
2283 u64 flags = 0;
2284 int root_level = btrfs_header_level(root->node);
2285 int check;
2286 int ret;
2288 if (nrefs->bytenr[level] == bytenr)
2289 return 0;
2291 if (bytenr != (u64)-1) {
2292 /* the return value of this function seems a mistake */
2293 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2294 level, 1, &refs, &flags);
2295 /* temporary fix */
2296 if (ret < 0 && !check_all)
2297 return ret;
2299 nrefs->bytenr[level] = bytenr;
2300 nrefs->refs[level] = refs;
2301 nrefs->full_backref[level] = 0;
2302 nrefs->checked[level] = 0;
2304 if (refs > 1) {
2305 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2306 0, &roots);
2307 if (ret)
2308 return -EIO;
2310 check = need_check(root, roots);
2311 ulist_free(roots);
2312 nrefs->need_check[level] = check;
2313 } else {
2314 if (!check_all) {
2315 nrefs->need_check[level] = 1;
2316 } else {
2317 if (level == root_level) {
2318 nrefs->need_check[level] = 1;
2319 } else {
2321 * The node refs may have not been
2322 * updated if upper needs checking (the
2323 * lowest root_objectid) the node can
2324 * be checked.
2326 nrefs->need_check[level] =
2327 nrefs->need_check[level + 1];
2333 if (check_all && eb) {
2334 calc_extent_flag_v2(root, eb, &flags);
2335 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2336 nrefs->full_backref[level] = 1;
2339 return 0;
2343 * @level if @level == -1 means extent data item
2344 * else normal treeblocl.
2346 static int should_check_extent_strictly(struct btrfs_root *root,
2347 struct node_refs *nrefs, int level)
2349 int root_level = btrfs_header_level(root->node);
2351 if (level > root_level || level < -1)
2352 return 1;
2353 if (level == root_level)
2354 return 1;
2356 * if the upper node is marked full backref, it should contain shared
2357 * backref of the parent (except owner == root->objectid).
2359 while (++level <= root_level)
2360 if (nrefs->refs[level] > 1)
2361 return 0;
2363 return 1;
2366 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2367 struct walk_control *wc, int *level,
2368 struct node_refs *nrefs)
2370 enum btrfs_tree_block_status status;
2371 u64 bytenr;
2372 u64 ptr_gen;
2373 struct btrfs_fs_info *fs_info = root->fs_info;
2374 struct extent_buffer *next;
2375 struct extent_buffer *cur;
2376 int ret, err = 0;
2377 u64 refs;
2379 WARN_ON(*level < 0);
2380 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2382 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2383 refs = nrefs->refs[*level];
2384 ret = 0;
2385 } else {
2386 ret = btrfs_lookup_extent_info(NULL, root,
2387 path->nodes[*level]->start,
2388 *level, 1, &refs, NULL);
2389 if (ret < 0) {
2390 err = ret;
2391 goto out;
2393 nrefs->bytenr[*level] = path->nodes[*level]->start;
2394 nrefs->refs[*level] = refs;
2397 if (refs > 1) {
2398 ret = enter_shared_node(root, path->nodes[*level]->start,
2399 refs, wc, *level);
2400 if (ret > 0) {
2401 err = ret;
2402 goto out;
2406 while (*level >= 0) {
2407 WARN_ON(*level < 0);
2408 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2409 cur = path->nodes[*level];
2411 if (btrfs_header_level(cur) != *level)
2412 WARN_ON(1);
2414 if (path->slots[*level] >= btrfs_header_nritems(cur))
2415 break;
2416 if (*level == 0) {
2417 ret = process_one_leaf(root, cur, wc);
2418 if (ret < 0)
2419 err = ret;
2420 break;
2422 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2423 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2425 if (bytenr == nrefs->bytenr[*level - 1]) {
2426 refs = nrefs->refs[*level - 1];
2427 } else {
2428 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2429 *level - 1, 1, &refs, NULL);
2430 if (ret < 0) {
2431 refs = 0;
2432 } else {
2433 nrefs->bytenr[*level - 1] = bytenr;
2434 nrefs->refs[*level - 1] = refs;
2438 if (refs > 1) {
2439 ret = enter_shared_node(root, bytenr, refs,
2440 wc, *level - 1);
2441 if (ret > 0) {
2442 path->slots[*level]++;
2443 continue;
2447 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2448 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2449 free_extent_buffer(next);
2450 reada_walk_down(root, cur, path->slots[*level]);
2451 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2452 if (!extent_buffer_uptodate(next)) {
2453 struct btrfs_key node_key;
2455 btrfs_node_key_to_cpu(path->nodes[*level],
2456 &node_key,
2457 path->slots[*level]);
2458 btrfs_add_corrupt_extent_record(root->fs_info,
2459 &node_key,
2460 path->nodes[*level]->start,
2461 root->fs_info->nodesize,
2462 *level);
2463 err = -EIO;
2464 goto out;
2468 ret = check_child_node(cur, path->slots[*level], next);
2469 if (ret) {
2470 free_extent_buffer(next);
2471 err = ret;
2472 goto out;
2475 if (btrfs_is_leaf(next))
2476 status = btrfs_check_leaf(root, NULL, next);
2477 else
2478 status = btrfs_check_node(root, NULL, next);
2479 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2480 free_extent_buffer(next);
2481 err = -EIO;
2482 goto out;
2485 *level = *level - 1;
2486 free_extent_buffer(path->nodes[*level]);
2487 path->nodes[*level] = next;
2488 path->slots[*level] = 0;
2490 out:
2491 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2492 return err;
2495 static int fs_root_objectid(u64 objectid);
2498 * Update global fs information.
2500 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2501 int level)
2503 u32 free_nrs;
2504 struct extent_buffer *eb = path->nodes[level];
2506 total_btree_bytes += eb->len;
2507 if (fs_root_objectid(root->objectid))
2508 total_fs_tree_bytes += eb->len;
2509 if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2510 total_extent_tree_bytes += eb->len;
2512 if (level == 0) {
2513 btree_space_waste += btrfs_leaf_free_space(root, eb);
2514 } else {
2515 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root) -
2516 btrfs_header_nritems(eb));
2517 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2522 * This function only handles BACKREF_MISSING,
2523 * If corresponding extent item exists, increase the ref, else insert an extent
2524 * item and backref.
2526 * Returns error bits after repair.
2528 static int repair_tree_block_ref(struct btrfs_trans_handle *trans,
2529 struct btrfs_root *root,
2530 struct extent_buffer *node,
2531 struct node_refs *nrefs, int level, int err)
2533 struct btrfs_fs_info *fs_info = root->fs_info;
2534 struct btrfs_root *extent_root = fs_info->extent_root;
2535 struct btrfs_path path;
2536 struct btrfs_extent_item *ei;
2537 struct btrfs_tree_block_info *bi;
2538 struct btrfs_key key;
2539 struct extent_buffer *eb;
2540 u32 size = sizeof(*ei);
2541 u32 node_size = root->fs_info->nodesize;
2542 int insert_extent = 0;
2543 int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2544 int root_level = btrfs_header_level(root->node);
2545 int generation;
2546 int ret;
2547 u64 owner;
2548 u64 bytenr;
2549 u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
2550 u64 parent = 0;
2552 if ((err & BACKREF_MISSING) == 0)
2553 return err;
2555 WARN_ON(level > BTRFS_MAX_LEVEL);
2556 WARN_ON(level < 0);
2558 btrfs_init_path(&path);
2559 bytenr = btrfs_header_bytenr(node);
2560 owner = btrfs_header_owner(node);
2561 generation = btrfs_header_generation(node);
2563 key.objectid = bytenr;
2564 key.type = (u8)-1;
2565 key.offset = (u64)-1;
2567 /* Search for the extent item */
2568 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2569 if (ret <= 0) {
2570 ret = -EIO;
2571 goto out;
2574 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
2575 if (ret)
2576 insert_extent = 1;
2578 /* calculate if the extent item flag is full backref or not */
2579 if (nrefs->full_backref[level] != 0)
2580 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2582 /* insert an extent item */
2583 if (insert_extent) {
2584 struct btrfs_disk_key copy_key;
2586 generation = btrfs_header_generation(node);
2588 if (level < root_level && nrefs->full_backref[level + 1] &&
2589 owner != root->objectid) {
2590 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2593 key.objectid = bytenr;
2594 if (!skinny_metadata) {
2595 key.type = BTRFS_EXTENT_ITEM_KEY;
2596 key.offset = node_size;
2597 size += sizeof(*bi);
2598 } else {
2599 key.type = BTRFS_METADATA_ITEM_KEY;
2600 key.offset = level;
2603 btrfs_release_path(&path);
2604 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
2605 size);
2606 if (ret)
2607 goto out;
2609 eb = path.nodes[0];
2610 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
2612 btrfs_set_extent_refs(eb, ei, 0);
2613 btrfs_set_extent_generation(eb, ei, generation);
2614 btrfs_set_extent_flags(eb, ei, flags);
2616 if (!skinny_metadata) {
2617 bi = (struct btrfs_tree_block_info *)(ei + 1);
2618 memset_extent_buffer(eb, 0, (unsigned long)bi,
2619 sizeof(*bi));
2620 btrfs_set_disk_key_objectid(&copy_key, root->objectid);
2621 btrfs_set_disk_key_type(&copy_key, 0);
2622 btrfs_set_disk_key_offset(&copy_key, 0);
2624 btrfs_set_tree_block_level(eb, bi, level);
2625 btrfs_set_tree_block_key(eb, bi, &copy_key);
2627 btrfs_mark_buffer_dirty(eb);
2628 printf("Added an extent item [%llu %u]\n", bytenr, node_size);
2629 btrfs_update_block_group(trans, extent_root, bytenr, node_size,
2630 1, 0);
2632 nrefs->refs[level] = 0;
2633 nrefs->full_backref[level] =
2634 flags & BTRFS_BLOCK_FLAG_FULL_BACKREF;
2635 btrfs_release_path(&path);
2638 if (level < root_level && nrefs->full_backref[level + 1] &&
2639 owner != root->objectid)
2640 parent = nrefs->bytenr[level + 1];
2642 /* increase the ref */
2643 ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size,
2644 parent, root->objectid, level, 0);
2646 nrefs->refs[level]++;
2647 out:
2648 btrfs_release_path(&path);
2649 if (ret) {
2650 error(
2651 "failed to repair tree block ref start %llu root %llu due to %s",
2652 bytenr, root->objectid, strerror(-ret));
2653 } else {
2654 printf("Added one tree block ref start %llu %s %llu\n",
2655 bytenr, parent ? "parent" : "root",
2656 parent ? parent : root->objectid);
2657 err &= ~BACKREF_MISSING;
2660 return err;
2663 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2664 unsigned int ext_ref);
2665 static int check_tree_block_ref(struct btrfs_root *root,
2666 struct extent_buffer *eb, u64 bytenr,
2667 int level, u64 owner, struct node_refs *nrefs);
2668 static int check_leaf_items(struct btrfs_trans_handle *trans,
2669 struct btrfs_root *root, struct btrfs_path *path,
2670 struct node_refs *nrefs, int account_bytes);
2673 * @trans just for lowmem repair mode
2674 * @check all if not 0 then check all tree block backrefs and items
2675 * 0 then just check relationship of items in fs tree(s)
2677 * Returns >0 Found error, should continue
2678 * Returns <0 Fatal error, must exit the whole check
2679 * Returns 0 No errors found
2681 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2682 struct btrfs_root *root, struct btrfs_path *path,
2683 int *level, struct node_refs *nrefs, int ext_ref,
2684 int check_all)
2687 enum btrfs_tree_block_status status;
2688 u64 bytenr;
2689 u64 ptr_gen;
2690 struct btrfs_fs_info *fs_info = root->fs_info;
2691 struct extent_buffer *next;
2692 struct extent_buffer *cur;
2693 int ret;
2694 int err = 0;
2695 int check;
2696 int account_file_data = 0;
2698 WARN_ON(*level < 0);
2699 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2701 ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2702 path->nodes[*level], nrefs, *level, check_all);
2703 if (ret < 0)
2704 return ret;
2706 while (*level >= 0) {
2707 WARN_ON(*level < 0);
2708 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2709 cur = path->nodes[*level];
2710 bytenr = btrfs_header_bytenr(cur);
2711 check = nrefs->need_check[*level];
2713 if (btrfs_header_level(cur) != *level)
2714 WARN_ON(1);
2716 * Update bytes accounting and check tree block ref
2717 * NOTE: Doing accounting and check before checking nritems
2718 * is necessary because of empty node/leaf.
2720 if ((check_all && !nrefs->checked[*level]) ||
2721 (!check_all && nrefs->need_check[*level])) {
2722 ret = check_tree_block_ref(root, cur,
2723 btrfs_header_bytenr(cur), btrfs_header_level(cur),
2724 btrfs_header_owner(cur), nrefs);
2726 if (repair && ret)
2727 ret = repair_tree_block_ref(trans, root,
2728 path->nodes[*level], nrefs, *level, ret);
2729 err |= ret;
2731 if (check_all && nrefs->need_check[*level] &&
2732 nrefs->refs[*level]) {
2733 account_bytes(root, path, *level);
2734 account_file_data = 1;
2736 nrefs->checked[*level] = 1;
2739 if (path->slots[*level] >= btrfs_header_nritems(cur))
2740 break;
2742 /* Don't forgot to check leaf/node validation */
2743 if (*level == 0) {
2744 /* skip duplicate check */
2745 if (check || !check_all) {
2746 ret = btrfs_check_leaf(root, NULL, cur);
2747 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2748 err |= -EIO;
2749 break;
2753 ret = 0;
2754 if (!check_all)
2755 ret = process_one_leaf_v2(root, path, nrefs,
2756 level, ext_ref);
2757 else
2758 ret = check_leaf_items(trans, root, path,
2759 nrefs, account_file_data);
2760 err |= ret;
2761 break;
2762 } else {
2763 if (check || !check_all) {
2764 ret = btrfs_check_node(root, NULL, cur);
2765 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2766 err |= -EIO;
2767 break;
2772 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2773 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2775 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2776 check_all);
2777 if (ret < 0)
2778 break;
2780 * check all trees in check_chunks_and_extent_v2
2781 * check shared node once in check_fs_roots
2783 if (!check_all && !nrefs->need_check[*level - 1]) {
2784 path->slots[*level]++;
2785 continue;
2788 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2789 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2790 free_extent_buffer(next);
2791 reada_walk_down(root, cur, path->slots[*level]);
2792 next = read_tree_block(fs_info, bytenr, ptr_gen);
2793 if (!extent_buffer_uptodate(next)) {
2794 struct btrfs_key node_key;
2796 btrfs_node_key_to_cpu(path->nodes[*level],
2797 &node_key,
2798 path->slots[*level]);
2799 btrfs_add_corrupt_extent_record(fs_info,
2800 &node_key, path->nodes[*level]->start,
2801 fs_info->nodesize, *level);
2802 err |= -EIO;
2803 break;
2807 ret = check_child_node(cur, path->slots[*level], next);
2808 err |= ret;
2809 if (ret < 0)
2810 break;
2812 if (btrfs_is_leaf(next))
2813 status = btrfs_check_leaf(root, NULL, next);
2814 else
2815 status = btrfs_check_node(root, NULL, next);
2816 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2817 free_extent_buffer(next);
2818 err |= -EIO;
2819 break;
2822 *level = *level - 1;
2823 free_extent_buffer(path->nodes[*level]);
2824 path->nodes[*level] = next;
2825 path->slots[*level] = 0;
2826 account_file_data = 0;
2828 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2830 return err;
2833 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2834 struct walk_control *wc, int *level)
2836 int i;
2837 struct extent_buffer *leaf;
2839 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2840 leaf = path->nodes[i];
2841 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2842 path->slots[i]++;
2843 *level = i;
2844 return 0;
2845 } else {
2846 free_extent_buffer(path->nodes[*level]);
2847 path->nodes[*level] = NULL;
2848 BUG_ON(*level > wc->active_node);
2849 if (*level == wc->active_node)
2850 leave_shared_node(root, wc, *level);
2851 *level = i + 1;
2854 return 1;
2857 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2858 int *level)
2860 int i;
2861 struct extent_buffer *leaf;
2863 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2864 leaf = path->nodes[i];
2865 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2866 path->slots[i]++;
2867 *level = i;
2868 return 0;
2869 } else {
2870 free_extent_buffer(path->nodes[*level]);
2871 path->nodes[*level] = NULL;
2872 *level = i + 1;
2875 return 1;
2878 static int check_root_dir(struct inode_record *rec)
2880 struct inode_backref *backref;
2881 int ret = -1;
2883 if (!rec->found_inode_item || rec->errors)
2884 goto out;
2885 if (rec->nlink != 1 || rec->found_link != 0)
2886 goto out;
2887 if (list_empty(&rec->backrefs))
2888 goto out;
2889 backref = to_inode_backref(rec->backrefs.next);
2890 if (!backref->found_inode_ref)
2891 goto out;
2892 if (backref->index != 0 || backref->namelen != 2 ||
2893 memcmp(backref->name, "..", 2))
2894 goto out;
2895 if (backref->found_dir_index || backref->found_dir_item)
2896 goto out;
2897 ret = 0;
2898 out:
2899 return ret;
2902 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2903 struct btrfs_root *root, struct btrfs_path *path,
2904 struct inode_record *rec)
2906 struct btrfs_inode_item *ei;
2907 struct btrfs_key key;
2908 int ret;
2910 key.objectid = rec->ino;
2911 key.type = BTRFS_INODE_ITEM_KEY;
2912 key.offset = (u64)-1;
2914 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2915 if (ret < 0)
2916 goto out;
2917 if (ret) {
2918 if (!path->slots[0]) {
2919 ret = -ENOENT;
2920 goto out;
2922 path->slots[0]--;
2923 ret = 0;
2925 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2926 if (key.objectid != rec->ino) {
2927 ret = -ENOENT;
2928 goto out;
2931 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2932 struct btrfs_inode_item);
2933 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2934 btrfs_mark_buffer_dirty(path->nodes[0]);
2935 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2936 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2937 root->root_key.objectid);
2938 out:
2939 btrfs_release_path(path);
2940 return ret;
2943 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2944 struct btrfs_root *root,
2945 struct btrfs_path *path,
2946 struct inode_record *rec)
2948 int ret;
2950 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2951 btrfs_release_path(path);
2952 if (!ret)
2953 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2954 return ret;
2957 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2958 struct btrfs_root *root,
2959 struct btrfs_path *path,
2960 struct inode_record *rec)
2962 struct btrfs_inode_item *ei;
2963 struct btrfs_key key;
2964 int ret = 0;
2966 key.objectid = rec->ino;
2967 key.type = BTRFS_INODE_ITEM_KEY;
2968 key.offset = 0;
2970 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2971 if (ret) {
2972 if (ret > 0)
2973 ret = -ENOENT;
2974 goto out;
2977 /* Since ret == 0, no need to check anything */
2978 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2979 struct btrfs_inode_item);
2980 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2981 btrfs_mark_buffer_dirty(path->nodes[0]);
2982 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2983 printf("reset nbytes for ino %llu root %llu\n",
2984 rec->ino, root->root_key.objectid);
2985 out:
2986 btrfs_release_path(path);
2987 return ret;
2990 static int add_missing_dir_index(struct btrfs_root *root,
2991 struct cache_tree *inode_cache,
2992 struct inode_record *rec,
2993 struct inode_backref *backref)
2995 struct btrfs_path path;
2996 struct btrfs_trans_handle *trans;
2997 struct btrfs_dir_item *dir_item;
2998 struct extent_buffer *leaf;
2999 struct btrfs_key key;
3000 struct btrfs_disk_key disk_key;
3001 struct inode_record *dir_rec;
3002 unsigned long name_ptr;
3003 u32 data_size = sizeof(*dir_item) + backref->namelen;
3004 int ret;
3006 trans = btrfs_start_transaction(root, 1);
3007 if (IS_ERR(trans))
3008 return PTR_ERR(trans);
3010 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
3011 (unsigned long long)rec->ino);
3013 btrfs_init_path(&path);
3014 key.objectid = backref->dir;
3015 key.type = BTRFS_DIR_INDEX_KEY;
3016 key.offset = backref->index;
3017 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
3018 BUG_ON(ret);
3020 leaf = path.nodes[0];
3021 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
3023 disk_key.objectid = cpu_to_le64(rec->ino);
3024 disk_key.type = BTRFS_INODE_ITEM_KEY;
3025 disk_key.offset = 0;
3027 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
3028 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
3029 btrfs_set_dir_data_len(leaf, dir_item, 0);
3030 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
3031 name_ptr = (unsigned long)(dir_item + 1);
3032 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
3033 btrfs_mark_buffer_dirty(leaf);
3034 btrfs_release_path(&path);
3035 btrfs_commit_transaction(trans, root);
3037 backref->found_dir_index = 1;
3038 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
3039 BUG_ON(IS_ERR(dir_rec));
3040 if (!dir_rec)
3041 return 0;
3042 dir_rec->found_size += backref->namelen;
3043 if (dir_rec->found_size == dir_rec->isize &&
3044 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
3045 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
3046 if (dir_rec->found_size != dir_rec->isize)
3047 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
3049 return 0;
3052 static int delete_dir_index(struct btrfs_root *root,
3053 struct inode_backref *backref)
3055 struct btrfs_trans_handle *trans;
3056 struct btrfs_dir_item *di;
3057 struct btrfs_path path;
3058 int ret = 0;
3060 trans = btrfs_start_transaction(root, 1);
3061 if (IS_ERR(trans))
3062 return PTR_ERR(trans);
3064 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
3065 (unsigned long long)backref->dir,
3066 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
3067 (unsigned long long)root->objectid);
3069 btrfs_init_path(&path);
3070 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
3071 backref->name, backref->namelen,
3072 backref->index, -1);
3073 if (IS_ERR(di)) {
3074 ret = PTR_ERR(di);
3075 btrfs_release_path(&path);
3076 btrfs_commit_transaction(trans, root);
3077 if (ret == -ENOENT)
3078 return 0;
3079 return ret;
3082 if (!di)
3083 ret = btrfs_del_item(trans, root, &path);
3084 else
3085 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
3086 BUG_ON(ret);
3087 btrfs_release_path(&path);
3088 btrfs_commit_transaction(trans, root);
3089 return ret;
3092 static int __create_inode_item(struct btrfs_trans_handle *trans,
3093 struct btrfs_root *root, u64 ino, u64 size,
3094 u64 nbytes, u64 nlink, u32 mode)
3096 struct btrfs_inode_item ii;
3097 time_t now = time(NULL);
3098 int ret;
3100 btrfs_set_stack_inode_size(&ii, size);
3101 btrfs_set_stack_inode_nbytes(&ii, nbytes);
3102 btrfs_set_stack_inode_nlink(&ii, nlink);
3103 btrfs_set_stack_inode_mode(&ii, mode);
3104 btrfs_set_stack_inode_generation(&ii, trans->transid);
3105 btrfs_set_stack_timespec_nsec(&ii.atime, 0);
3106 btrfs_set_stack_timespec_sec(&ii.ctime, now);
3107 btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
3108 btrfs_set_stack_timespec_sec(&ii.mtime, now);
3109 btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
3110 btrfs_set_stack_timespec_sec(&ii.otime, 0);
3111 btrfs_set_stack_timespec_nsec(&ii.otime, 0);
3113 ret = btrfs_insert_inode(trans, root, ino, &ii);
3114 ASSERT(!ret);
3116 warning("root %llu inode %llu recreating inode item, this may "
3117 "be incomplete, please check permissions and content after "
3118 "the fsck completes.\n", (unsigned long long)root->objectid,
3119 (unsigned long long)ino);
3121 return 0;
3124 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
3125 struct btrfs_root *root, u64 ino,
3126 u8 filetype)
3128 u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
3130 return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
3133 static int create_inode_item(struct btrfs_root *root,
3134 struct inode_record *rec, int root_dir)
3136 struct btrfs_trans_handle *trans;
3137 u64 nlink = 0;
3138 u32 mode = 0;
3139 u64 size = 0;
3140 int ret;
3142 trans = btrfs_start_transaction(root, 1);
3143 if (IS_ERR(trans)) {
3144 ret = PTR_ERR(trans);
3145 return ret;
3148 nlink = root_dir ? 1 : rec->found_link;
3149 if (rec->found_dir_item) {
3150 if (rec->found_file_extent)
3151 fprintf(stderr, "root %llu inode %llu has both a dir "
3152 "item and extents, unsure if it is a dir or a "
3153 "regular file so setting it as a directory\n",
3154 (unsigned long long)root->objectid,
3155 (unsigned long long)rec->ino);
3156 mode = S_IFDIR | 0755;
3157 size = rec->found_size;
3158 } else if (!rec->found_dir_item) {
3159 size = rec->extent_end;
3160 mode = S_IFREG | 0755;
3163 ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
3164 nlink, mode);
3165 btrfs_commit_transaction(trans, root);
3166 return 0;
3169 static int repair_inode_backrefs(struct btrfs_root *root,
3170 struct inode_record *rec,
3171 struct cache_tree *inode_cache,
3172 int delete)
3174 struct inode_backref *tmp, *backref;
3175 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3176 int ret = 0;
3177 int repaired = 0;
3179 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3180 if (!delete && rec->ino == root_dirid) {
3181 if (!rec->found_inode_item) {
3182 ret = create_inode_item(root, rec, 1);
3183 if (ret)
3184 break;
3185 repaired++;
3189 /* Index 0 for root dir's are special, don't mess with it */
3190 if (rec->ino == root_dirid && backref->index == 0)
3191 continue;
3193 if (delete &&
3194 ((backref->found_dir_index && !backref->found_inode_ref) ||
3195 (backref->found_dir_index && backref->found_inode_ref &&
3196 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
3197 ret = delete_dir_index(root, backref);
3198 if (ret)
3199 break;
3200 repaired++;
3201 list_del(&backref->list);
3202 free(backref);
3203 continue;
3206 if (!delete && !backref->found_dir_index &&
3207 backref->found_dir_item && backref->found_inode_ref) {
3208 ret = add_missing_dir_index(root, inode_cache, rec,
3209 backref);
3210 if (ret)
3211 break;
3212 repaired++;
3213 if (backref->found_dir_item &&
3214 backref->found_dir_index) {
3215 if (!backref->errors &&
3216 backref->found_inode_ref) {
3217 list_del(&backref->list);
3218 free(backref);
3219 continue;
3224 if (!delete && (!backref->found_dir_index &&
3225 !backref->found_dir_item &&
3226 backref->found_inode_ref)) {
3227 struct btrfs_trans_handle *trans;
3228 struct btrfs_key location;
3230 ret = check_dir_conflict(root, backref->name,
3231 backref->namelen,
3232 backref->dir,
3233 backref->index);
3234 if (ret) {
3236 * let nlink fixing routine to handle it,
3237 * which can do it better.
3239 ret = 0;
3240 break;
3242 location.objectid = rec->ino;
3243 location.type = BTRFS_INODE_ITEM_KEY;
3244 location.offset = 0;
3246 trans = btrfs_start_transaction(root, 1);
3247 if (IS_ERR(trans)) {
3248 ret = PTR_ERR(trans);
3249 break;
3251 fprintf(stderr, "adding missing dir index/item pair "
3252 "for inode %llu\n",
3253 (unsigned long long)rec->ino);
3254 ret = btrfs_insert_dir_item(trans, root, backref->name,
3255 backref->namelen,
3256 backref->dir, &location,
3257 imode_to_type(rec->imode),
3258 backref->index);
3259 BUG_ON(ret);
3260 btrfs_commit_transaction(trans, root);
3261 repaired++;
3264 if (!delete && (backref->found_inode_ref &&
3265 backref->found_dir_index &&
3266 backref->found_dir_item &&
3267 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
3268 !rec->found_inode_item)) {
3269 ret = create_inode_item(root, rec, 0);
3270 if (ret)
3271 break;
3272 repaired++;
3276 return ret ? ret : repaired;
3280 * To determine the file type for nlink/inode_item repair
3282 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
3283 * Return -ENOENT if file type is not found.
3285 static int find_file_type(struct inode_record *rec, u8 *type)
3287 struct inode_backref *backref;
3289 /* For inode item recovered case */
3290 if (rec->found_inode_item) {
3291 *type = imode_to_type(rec->imode);
3292 return 0;
3295 list_for_each_entry(backref, &rec->backrefs, list) {
3296 if (backref->found_dir_index || backref->found_dir_item) {
3297 *type = backref->filetype;
3298 return 0;
3301 return -ENOENT;
3305 * To determine the file name for nlink repair
3307 * Return 0 if file name is found, set name and namelen.
3308 * Return -ENOENT if file name is not found.
3310 static int find_file_name(struct inode_record *rec,
3311 char *name, int *namelen)
3313 struct inode_backref *backref;
3315 list_for_each_entry(backref, &rec->backrefs, list) {
3316 if (backref->found_dir_index || backref->found_dir_item ||
3317 backref->found_inode_ref) {
3318 memcpy(name, backref->name, backref->namelen);
3319 *namelen = backref->namelen;
3320 return 0;
3323 return -ENOENT;
3326 /* Reset the nlink of the inode to the correct one */
3327 static int reset_nlink(struct btrfs_trans_handle *trans,
3328 struct btrfs_root *root,
3329 struct btrfs_path *path,
3330 struct inode_record *rec)
3332 struct inode_backref *backref;
3333 struct inode_backref *tmp;
3334 struct btrfs_key key;
3335 struct btrfs_inode_item *inode_item;
3336 int ret = 0;
3338 /* We don't believe this either, reset it and iterate backref */
3339 rec->found_link = 0;
3341 /* Remove all backref including the valid ones */
3342 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3343 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
3344 backref->index, backref->name,
3345 backref->namelen, 0);
3346 if (ret < 0)
3347 goto out;
3349 /* remove invalid backref, so it won't be added back */
3350 if (!(backref->found_dir_index &&
3351 backref->found_dir_item &&
3352 backref->found_inode_ref)) {
3353 list_del(&backref->list);
3354 free(backref);
3355 } else {
3356 rec->found_link++;
3360 /* Set nlink to 0 */
3361 key.objectid = rec->ino;
3362 key.type = BTRFS_INODE_ITEM_KEY;
3363 key.offset = 0;
3364 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3365 if (ret < 0)
3366 goto out;
3367 if (ret > 0) {
3368 ret = -ENOENT;
3369 goto out;
3371 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3372 struct btrfs_inode_item);
3373 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
3374 btrfs_mark_buffer_dirty(path->nodes[0]);
3375 btrfs_release_path(path);
3378 * Add back valid inode_ref/dir_item/dir_index,
3379 * add_link() will handle the nlink inc, so new nlink must be correct
3381 list_for_each_entry(backref, &rec->backrefs, list) {
3382 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
3383 backref->name, backref->namelen,
3384 backref->filetype, &backref->index, 1, 0);
3385 if (ret < 0)
3386 goto out;
3388 out:
3389 btrfs_release_path(path);
3390 return ret;
3393 static int get_highest_inode(struct btrfs_trans_handle *trans,
3394 struct btrfs_root *root,
3395 struct btrfs_path *path,
3396 u64 *highest_ino)
3398 struct btrfs_key key, found_key;
3399 int ret;
3401 btrfs_init_path(path);
3402 key.objectid = BTRFS_LAST_FREE_OBJECTID;
3403 key.offset = -1;
3404 key.type = BTRFS_INODE_ITEM_KEY;
3405 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3406 if (ret == 1) {
3407 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3408 path->slots[0] - 1);
3409 *highest_ino = found_key.objectid;
3410 ret = 0;
3412 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3413 ret = -EOVERFLOW;
3414 btrfs_release_path(path);
3415 return ret;
3419 * Link inode to dir 'lost+found'. Increase @ref_count.
3421 * Returns 0 means success.
3422 * Returns <0 means failure.
3424 static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
3425 struct btrfs_root *root,
3426 struct btrfs_path *path,
3427 u64 ino, char *namebuf, u32 name_len,
3428 u8 filetype, u64 *ref_count)
3430 char *dir_name = "lost+found";
3431 u64 lost_found_ino;
3432 int ret;
3433 u32 mode = 0700;
3435 btrfs_release_path(path);
3436 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3437 if (ret < 0)
3438 goto out;
3439 lost_found_ino++;
3441 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3442 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3443 mode);
3444 if (ret < 0) {
3445 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
3446 goto out;
3448 ret = btrfs_add_link(trans, root, ino, lost_found_ino,
3449 namebuf, name_len, filetype, NULL, 1, 0);
3451 * Add ".INO" suffix several times to handle case where
3452 * "FILENAME.INO" is already taken by another file.
3454 while (ret == -EEXIST) {
3456 * Conflicting file name, add ".INO" as suffix * +1 for '.'
3458 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
3459 ret = -EFBIG;
3460 goto out;
3462 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
3463 ".%llu", ino);
3464 name_len += count_digits(ino) + 1;
3465 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
3466 name_len, filetype, NULL, 1, 0);
3468 if (ret < 0) {
3469 error("failed to link the inode %llu to %s dir: %s",
3470 ino, dir_name, strerror(-ret));
3471 goto out;
3474 ++*ref_count;
3475 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3476 name_len, namebuf, dir_name);
3477 out:
3478 btrfs_release_path(path);
3479 if (ret)
3480 error("failed to move file '%.*s' to '%s' dir", name_len,
3481 namebuf, dir_name);
3482 return ret;
3485 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3486 struct btrfs_root *root,
3487 struct btrfs_path *path,
3488 struct inode_record *rec)
3490 char namebuf[BTRFS_NAME_LEN] = {0};
3491 u8 type = 0;
3492 int namelen = 0;
3493 int name_recovered = 0;
3494 int type_recovered = 0;
3495 int ret = 0;
3498 * Get file name and type first before these invalid inode ref
3499 * are deleted by remove_all_invalid_backref()
3501 name_recovered = !find_file_name(rec, namebuf, &namelen);
3502 type_recovered = !find_file_type(rec, &type);
3504 if (!name_recovered) {
3505 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3506 rec->ino, rec->ino);
3507 namelen = count_digits(rec->ino);
3508 sprintf(namebuf, "%llu", rec->ino);
3509 name_recovered = 1;
3511 if (!type_recovered) {
3512 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3513 rec->ino);
3514 type = BTRFS_FT_REG_FILE;
3515 type_recovered = 1;
3518 ret = reset_nlink(trans, root, path, rec);
3519 if (ret < 0) {
3520 fprintf(stderr,
3521 "Failed to reset nlink for inode %llu: %s\n",
3522 rec->ino, strerror(-ret));
3523 goto out;
3526 if (rec->found_link == 0) {
3527 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3528 namebuf, namelen, type,
3529 (u64 *)&rec->found_link);
3530 if (ret)
3531 goto out;
3533 printf("Fixed the nlink of inode %llu\n", rec->ino);
3534 out:
3536 * Clear the flag anyway, or we will loop forever for the same inode
3537 * as it will not be removed from the bad inode list and the dead loop
3538 * happens.
3540 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3541 btrfs_release_path(path);
3542 return ret;
3546 * Check if there is any normal(reg or prealloc) file extent for given
3547 * ino.
3548 * This is used to determine the file type when neither its dir_index/item or
3549 * inode_item exists.
3551 * This will *NOT* report error, if any error happens, just consider it does
3552 * not have any normal file extent.
3554 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3556 struct btrfs_path path;
3557 struct btrfs_key key;
3558 struct btrfs_key found_key;
3559 struct btrfs_file_extent_item *fi;
3560 u8 type;
3561 int ret = 0;
3563 btrfs_init_path(&path);
3564 key.objectid = ino;
3565 key.type = BTRFS_EXTENT_DATA_KEY;
3566 key.offset = 0;
3568 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3569 if (ret < 0) {
3570 ret = 0;
3571 goto out;
3573 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3574 ret = btrfs_next_leaf(root, &path);
3575 if (ret) {
3576 ret = 0;
3577 goto out;
3580 while (1) {
3581 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3582 path.slots[0]);
3583 if (found_key.objectid != ino ||
3584 found_key.type != BTRFS_EXTENT_DATA_KEY)
3585 break;
3586 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3587 struct btrfs_file_extent_item);
3588 type = btrfs_file_extent_type(path.nodes[0], fi);
3589 if (type != BTRFS_FILE_EXTENT_INLINE) {
3590 ret = 1;
3591 goto out;
3594 out:
3595 btrfs_release_path(&path);
3596 return ret;
3599 static u32 btrfs_type_to_imode(u8 type)
3601 static u32 imode_by_btrfs_type[] = {
3602 [BTRFS_FT_REG_FILE] = S_IFREG,
3603 [BTRFS_FT_DIR] = S_IFDIR,
3604 [BTRFS_FT_CHRDEV] = S_IFCHR,
3605 [BTRFS_FT_BLKDEV] = S_IFBLK,
3606 [BTRFS_FT_FIFO] = S_IFIFO,
3607 [BTRFS_FT_SOCK] = S_IFSOCK,
3608 [BTRFS_FT_SYMLINK] = S_IFLNK,
3611 return imode_by_btrfs_type[(type)];
3614 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3615 struct btrfs_root *root,
3616 struct btrfs_path *path,
3617 struct inode_record *rec)
3619 u8 filetype;
3620 u32 mode = 0700;
3621 int type_recovered = 0;
3622 int ret = 0;
3624 printf("Trying to rebuild inode:%llu\n", rec->ino);
3626 type_recovered = !find_file_type(rec, &filetype);
3629 * Try to determine inode type if type not found.
3631 * For found regular file extent, it must be FILE.
3632 * For found dir_item/index, it must be DIR.
3634 * For undetermined one, use FILE as fallback.
3636 * TODO:
3637 * 1. If found backref(inode_index/item is already handled) to it,
3638 * it must be DIR.
3639 * Need new inode-inode ref structure to allow search for that.
3641 if (!type_recovered) {
3642 if (rec->found_file_extent &&
3643 find_normal_file_extent(root, rec->ino)) {
3644 type_recovered = 1;
3645 filetype = BTRFS_FT_REG_FILE;
3646 } else if (rec->found_dir_item) {
3647 type_recovered = 1;
3648 filetype = BTRFS_FT_DIR;
3649 } else if (!list_empty(&rec->orphan_extents)) {
3650 type_recovered = 1;
3651 filetype = BTRFS_FT_REG_FILE;
3652 } else{
3653 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3654 rec->ino);
3655 type_recovered = 1;
3656 filetype = BTRFS_FT_REG_FILE;
3660 ret = btrfs_new_inode(trans, root, rec->ino,
3661 mode | btrfs_type_to_imode(filetype));
3662 if (ret < 0)
3663 goto out;
3666 * Here inode rebuild is done, we only rebuild the inode item,
3667 * don't repair the nlink(like move to lost+found).
3668 * That is the job of nlink repair.
3670 * We just fill the record and return
3672 rec->found_dir_item = 1;
3673 rec->imode = mode | btrfs_type_to_imode(filetype);
3674 rec->nlink = 0;
3675 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3676 /* Ensure the inode_nlinks repair function will be called */
3677 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3678 out:
3679 return ret;
3682 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3683 struct btrfs_root *root,
3684 struct btrfs_path *path,
3685 struct inode_record *rec)
3687 struct orphan_data_extent *orphan;
3688 struct orphan_data_extent *tmp;
3689 int ret = 0;
3691 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3693 * Check for conflicting file extents
3695 * Here we don't know whether the extents is compressed or not,
3696 * so we can only assume it not compressed nor data offset,
3697 * and use its disk_len as extent length.
3699 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3700 orphan->offset, orphan->disk_len, 0);
3701 btrfs_release_path(path);
3702 if (ret < 0)
3703 goto out;
3704 if (!ret) {
3705 fprintf(stderr,
3706 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3707 orphan->disk_bytenr, orphan->disk_len);
3708 ret = btrfs_free_extent(trans,
3709 root->fs_info->extent_root,
3710 orphan->disk_bytenr, orphan->disk_len,
3711 0, root->objectid, orphan->objectid,
3712 orphan->offset);
3713 if (ret < 0)
3714 goto out;
3716 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3717 orphan->offset, orphan->disk_bytenr,
3718 orphan->disk_len, orphan->disk_len);
3719 if (ret < 0)
3720 goto out;
3722 /* Update file size info */
3723 rec->found_size += orphan->disk_len;
3724 if (rec->found_size == rec->nbytes)
3725 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3727 /* Update the file extent hole info too */
3728 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3729 orphan->disk_len);
3730 if (ret < 0)
3731 goto out;
3732 if (RB_EMPTY_ROOT(&rec->holes))
3733 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3735 list_del(&orphan->list);
3736 free(orphan);
3738 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3739 out:
3740 return ret;
3743 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3744 struct btrfs_root *root,
3745 struct btrfs_path *path,
3746 struct inode_record *rec)
3748 struct rb_node *node;
3749 struct file_extent_hole *hole;
3750 int found = 0;
3751 int ret = 0;
3753 node = rb_first(&rec->holes);
3755 while (node) {
3756 found = 1;
3757 hole = rb_entry(node, struct file_extent_hole, node);
3758 ret = btrfs_punch_hole(trans, root, rec->ino,
3759 hole->start, hole->len);
3760 if (ret < 0)
3761 goto out;
3762 ret = del_file_extent_hole(&rec->holes, hole->start,
3763 hole->len);
3764 if (ret < 0)
3765 goto out;
3766 if (RB_EMPTY_ROOT(&rec->holes))
3767 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3768 node = rb_first(&rec->holes);
3770 /* special case for a file losing all its file extent */
3771 if (!found) {
3772 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3773 round_up(rec->isize,
3774 root->fs_info->sectorsize));
3775 if (ret < 0)
3776 goto out;
3778 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3779 rec->ino, root->objectid);
3780 out:
3781 return ret;
3784 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3786 struct btrfs_trans_handle *trans;
3787 struct btrfs_path path;
3788 int ret = 0;
3790 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3791 I_ERR_NO_ORPHAN_ITEM |
3792 I_ERR_LINK_COUNT_WRONG |
3793 I_ERR_NO_INODE_ITEM |
3794 I_ERR_FILE_EXTENT_ORPHAN |
3795 I_ERR_FILE_EXTENT_DISCOUNT|
3796 I_ERR_FILE_NBYTES_WRONG)))
3797 return rec->errors;
3800 * For nlink repair, it may create a dir and add link, so
3801 * 2 for parent(256)'s dir_index and dir_item
3802 * 2 for lost+found dir's inode_item and inode_ref
3803 * 1 for the new inode_ref of the file
3804 * 2 for lost+found dir's dir_index and dir_item for the file
3806 trans = btrfs_start_transaction(root, 7);
3807 if (IS_ERR(trans))
3808 return PTR_ERR(trans);
3810 btrfs_init_path(&path);
3811 if (rec->errors & I_ERR_NO_INODE_ITEM)
3812 ret = repair_inode_no_item(trans, root, &path, rec);
3813 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3814 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3815 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3816 ret = repair_inode_discount_extent(trans, root, &path, rec);
3817 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3818 ret = repair_inode_isize(trans, root, &path, rec);
3819 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3820 ret = repair_inode_orphan_item(trans, root, &path, rec);
3821 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3822 ret = repair_inode_nlinks(trans, root, &path, rec);
3823 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3824 ret = repair_inode_nbytes(trans, root, &path, rec);
3825 btrfs_commit_transaction(trans, root);
3826 btrfs_release_path(&path);
3827 return ret;
3830 static int check_inode_recs(struct btrfs_root *root,
3831 struct cache_tree *inode_cache)
3833 struct cache_extent *cache;
3834 struct ptr_node *node;
3835 struct inode_record *rec;
3836 struct inode_backref *backref;
3837 int stage = 0;
3838 int ret = 0;
3839 int err = 0;
3840 u64 error = 0;
3841 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3843 if (btrfs_root_refs(&root->root_item) == 0) {
3844 if (!cache_tree_empty(inode_cache))
3845 fprintf(stderr, "warning line %d\n", __LINE__);
3846 return 0;
3850 * We need to repair backrefs first because we could change some of the
3851 * errors in the inode recs.
3853 * We also need to go through and delete invalid backrefs first and then
3854 * add the correct ones second. We do this because we may get EEXIST
3855 * when adding back the correct index because we hadn't yet deleted the
3856 * invalid index.
3858 * For example, if we were missing a dir index then the directories
3859 * isize would be wrong, so if we fixed the isize to what we thought it
3860 * would be and then fixed the backref we'd still have a invalid fs, so
3861 * we need to add back the dir index and then check to see if the isize
3862 * is still wrong.
3864 while (stage < 3) {
3865 stage++;
3866 if (stage == 3 && !err)
3867 break;
3869 cache = search_cache_extent(inode_cache, 0);
3870 while (repair && cache) {
3871 node = container_of(cache, struct ptr_node, cache);
3872 rec = node->data;
3873 cache = next_cache_extent(cache);
3875 /* Need to free everything up and rescan */
3876 if (stage == 3) {
3877 remove_cache_extent(inode_cache, &node->cache);
3878 free(node);
3879 free_inode_rec(rec);
3880 continue;
3883 if (list_empty(&rec->backrefs))
3884 continue;
3886 ret = repair_inode_backrefs(root, rec, inode_cache,
3887 stage == 1);
3888 if (ret < 0) {
3889 err = ret;
3890 stage = 2;
3891 break;
3892 } if (ret > 0) {
3893 err = -EAGAIN;
3897 if (err)
3898 return err;
3900 rec = get_inode_rec(inode_cache, root_dirid, 0);
3901 BUG_ON(IS_ERR(rec));
3902 if (rec) {
3903 ret = check_root_dir(rec);
3904 if (ret) {
3905 fprintf(stderr, "root %llu root dir %llu error\n",
3906 (unsigned long long)root->root_key.objectid,
3907 (unsigned long long)root_dirid);
3908 print_inode_error(root, rec);
3909 error++;
3911 } else {
3912 if (repair) {
3913 struct btrfs_trans_handle *trans;
3915 trans = btrfs_start_transaction(root, 1);
3916 if (IS_ERR(trans)) {
3917 err = PTR_ERR(trans);
3918 return err;
3921 fprintf(stderr,
3922 "root %llu missing its root dir, recreating\n",
3923 (unsigned long long)root->objectid);
3925 ret = btrfs_make_root_dir(trans, root, root_dirid);
3926 BUG_ON(ret);
3928 btrfs_commit_transaction(trans, root);
3929 return -EAGAIN;
3932 fprintf(stderr, "root %llu root dir %llu not found\n",
3933 (unsigned long long)root->root_key.objectid,
3934 (unsigned long long)root_dirid);
3937 while (1) {
3938 cache = search_cache_extent(inode_cache, 0);
3939 if (!cache)
3940 break;
3941 node = container_of(cache, struct ptr_node, cache);
3942 rec = node->data;
3943 remove_cache_extent(inode_cache, &node->cache);
3944 free(node);
3945 if (rec->ino == root_dirid ||
3946 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3947 free_inode_rec(rec);
3948 continue;
3951 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3952 ret = check_orphan_item(root, rec->ino);
3953 if (ret == 0)
3954 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3955 if (can_free_inode_rec(rec)) {
3956 free_inode_rec(rec);
3957 continue;
3961 if (!rec->found_inode_item)
3962 rec->errors |= I_ERR_NO_INODE_ITEM;
3963 if (rec->found_link != rec->nlink)
3964 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3965 if (repair) {
3966 ret = try_repair_inode(root, rec);
3967 if (ret == 0 && can_free_inode_rec(rec)) {
3968 free_inode_rec(rec);
3969 continue;
3971 ret = 0;
3974 if (!(repair && ret == 0))
3975 error++;
3976 print_inode_error(root, rec);
3977 list_for_each_entry(backref, &rec->backrefs, list) {
3978 if (!backref->found_dir_item)
3979 backref->errors |= REF_ERR_NO_DIR_ITEM;
3980 if (!backref->found_dir_index)
3981 backref->errors |= REF_ERR_NO_DIR_INDEX;
3982 if (!backref->found_inode_ref)
3983 backref->errors |= REF_ERR_NO_INODE_REF;
3984 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3985 " namelen %u name %s filetype %d errors %x",
3986 (unsigned long long)backref->dir,
3987 (unsigned long long)backref->index,
3988 backref->namelen, backref->name,
3989 backref->filetype, backref->errors);
3990 print_ref_error(backref->errors);
3992 free_inode_rec(rec);
3994 return (error > 0) ? -1 : 0;
3997 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3998 u64 objectid)
4000 struct cache_extent *cache;
4001 struct root_record *rec = NULL;
4002 int ret;
4004 cache = lookup_cache_extent(root_cache, objectid, 1);
4005 if (cache) {
4006 rec = container_of(cache, struct root_record, cache);
4007 } else {
4008 rec = calloc(1, sizeof(*rec));
4009 if (!rec)
4010 return ERR_PTR(-ENOMEM);
4011 rec->objectid = objectid;
4012 INIT_LIST_HEAD(&rec->backrefs);
4013 rec->cache.start = objectid;
4014 rec->cache.size = 1;
4016 ret = insert_cache_extent(root_cache, &rec->cache);
4017 if (ret)
4018 return ERR_PTR(-EEXIST);
4020 return rec;
4023 static struct root_backref *get_root_backref(struct root_record *rec,
4024 u64 ref_root, u64 dir, u64 index,
4025 const char *name, int namelen)
4027 struct root_backref *backref;
4029 list_for_each_entry(backref, &rec->backrefs, list) {
4030 if (backref->ref_root != ref_root || backref->dir != dir ||
4031 backref->namelen != namelen)
4032 continue;
4033 if (memcmp(name, backref->name, namelen))
4034 continue;
4035 return backref;
4038 backref = calloc(1, sizeof(*backref) + namelen + 1);
4039 if (!backref)
4040 return NULL;
4041 backref->ref_root = ref_root;
4042 backref->dir = dir;
4043 backref->index = index;
4044 backref->namelen = namelen;
4045 memcpy(backref->name, name, namelen);
4046 backref->name[namelen] = '\0';
4047 list_add_tail(&backref->list, &rec->backrefs);
4048 return backref;
4051 static void free_root_record(struct cache_extent *cache)
4053 struct root_record *rec;
4054 struct root_backref *backref;
4056 rec = container_of(cache, struct root_record, cache);
4057 while (!list_empty(&rec->backrefs)) {
4058 backref = to_root_backref(rec->backrefs.next);
4059 list_del(&backref->list);
4060 free(backref);
4063 free(rec);
4066 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
4068 static int add_root_backref(struct cache_tree *root_cache,
4069 u64 root_id, u64 ref_root, u64 dir, u64 index,
4070 const char *name, int namelen,
4071 int item_type, int errors)
4073 struct root_record *rec;
4074 struct root_backref *backref;
4076 rec = get_root_rec(root_cache, root_id);
4077 BUG_ON(IS_ERR(rec));
4078 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
4079 BUG_ON(!backref);
4081 backref->errors |= errors;
4083 if (item_type != BTRFS_DIR_ITEM_KEY) {
4084 if (backref->found_dir_index || backref->found_back_ref ||
4085 backref->found_forward_ref) {
4086 if (backref->index != index)
4087 backref->errors |= REF_ERR_INDEX_UNMATCH;
4088 } else {
4089 backref->index = index;
4093 if (item_type == BTRFS_DIR_ITEM_KEY) {
4094 if (backref->found_forward_ref)
4095 rec->found_ref++;
4096 backref->found_dir_item = 1;
4097 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
4098 backref->found_dir_index = 1;
4099 } else if (item_type == BTRFS_ROOT_REF_KEY) {
4100 if (backref->found_forward_ref)
4101 backref->errors |= REF_ERR_DUP_ROOT_REF;
4102 else if (backref->found_dir_item)
4103 rec->found_ref++;
4104 backref->found_forward_ref = 1;
4105 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
4106 if (backref->found_back_ref)
4107 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
4108 backref->found_back_ref = 1;
4109 } else {
4110 BUG_ON(1);
4113 if (backref->found_forward_ref && backref->found_dir_item)
4114 backref->reachable = 1;
4115 return 0;
4118 static int merge_root_recs(struct btrfs_root *root,
4119 struct cache_tree *src_cache,
4120 struct cache_tree *dst_cache)
4122 struct cache_extent *cache;
4123 struct ptr_node *node;
4124 struct inode_record *rec;
4125 struct inode_backref *backref;
4126 int ret = 0;
4128 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4129 free_inode_recs_tree(src_cache);
4130 return 0;
4133 while (1) {
4134 cache = search_cache_extent(src_cache, 0);
4135 if (!cache)
4136 break;
4137 node = container_of(cache, struct ptr_node, cache);
4138 rec = node->data;
4139 remove_cache_extent(src_cache, &node->cache);
4140 free(node);
4142 ret = is_child_root(root, root->objectid, rec->ino);
4143 if (ret < 0)
4144 break;
4145 else if (ret == 0)
4146 goto skip;
4148 list_for_each_entry(backref, &rec->backrefs, list) {
4149 BUG_ON(backref->found_inode_ref);
4150 if (backref->found_dir_item)
4151 add_root_backref(dst_cache, rec->ino,
4152 root->root_key.objectid, backref->dir,
4153 backref->index, backref->name,
4154 backref->namelen, BTRFS_DIR_ITEM_KEY,
4155 backref->errors);
4156 if (backref->found_dir_index)
4157 add_root_backref(dst_cache, rec->ino,
4158 root->root_key.objectid, backref->dir,
4159 backref->index, backref->name,
4160 backref->namelen, BTRFS_DIR_INDEX_KEY,
4161 backref->errors);
4163 skip:
4164 free_inode_rec(rec);
4166 if (ret < 0)
4167 return ret;
4168 return 0;
4171 static int check_root_refs(struct btrfs_root *root,
4172 struct cache_tree *root_cache)
4174 struct root_record *rec;
4175 struct root_record *ref_root;
4176 struct root_backref *backref;
4177 struct cache_extent *cache;
4178 int loop = 1;
4179 int ret;
4180 int error;
4181 int errors = 0;
4183 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
4184 BUG_ON(IS_ERR(rec));
4185 rec->found_ref = 1;
4187 /* fixme: this can not detect circular references */
4188 while (loop) {
4189 loop = 0;
4190 cache = search_cache_extent(root_cache, 0);
4191 while (1) {
4192 if (!cache)
4193 break;
4194 rec = container_of(cache, struct root_record, cache);
4195 cache = next_cache_extent(cache);
4197 if (rec->found_ref == 0)
4198 continue;
4200 list_for_each_entry(backref, &rec->backrefs, list) {
4201 if (!backref->reachable)
4202 continue;
4204 ref_root = get_root_rec(root_cache,
4205 backref->ref_root);
4206 BUG_ON(IS_ERR(ref_root));
4207 if (ref_root->found_ref > 0)
4208 continue;
4210 backref->reachable = 0;
4211 rec->found_ref--;
4212 if (rec->found_ref == 0)
4213 loop = 1;
4218 cache = search_cache_extent(root_cache, 0);
4219 while (1) {
4220 if (!cache)
4221 break;
4222 rec = container_of(cache, struct root_record, cache);
4223 cache = next_cache_extent(cache);
4225 if (rec->found_ref == 0 &&
4226 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
4227 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
4228 ret = check_orphan_item(root->fs_info->tree_root,
4229 rec->objectid);
4230 if (ret == 0)
4231 continue;
4234 * If we don't have a root item then we likely just have
4235 * a dir item in a snapshot for this root but no actual
4236 * ref key or anything so it's meaningless.
4238 if (!rec->found_root_item)
4239 continue;
4240 errors++;
4241 fprintf(stderr, "fs tree %llu not referenced\n",
4242 (unsigned long long)rec->objectid);
4245 error = 0;
4246 if (rec->found_ref > 0 && !rec->found_root_item)
4247 error = 1;
4248 list_for_each_entry(backref, &rec->backrefs, list) {
4249 if (!backref->found_dir_item)
4250 backref->errors |= REF_ERR_NO_DIR_ITEM;
4251 if (!backref->found_dir_index)
4252 backref->errors |= REF_ERR_NO_DIR_INDEX;
4253 if (!backref->found_back_ref)
4254 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
4255 if (!backref->found_forward_ref)
4256 backref->errors |= REF_ERR_NO_ROOT_REF;
4257 if (backref->reachable && backref->errors)
4258 error = 1;
4260 if (!error)
4261 continue;
4263 errors++;
4264 fprintf(stderr, "fs tree %llu refs %u %s\n",
4265 (unsigned long long)rec->objectid, rec->found_ref,
4266 rec->found_root_item ? "" : "not found");
4268 list_for_each_entry(backref, &rec->backrefs, list) {
4269 if (!backref->reachable)
4270 continue;
4271 if (!backref->errors && rec->found_root_item)
4272 continue;
4273 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
4274 " index %llu namelen %u name %s errors %x\n",
4275 (unsigned long long)backref->ref_root,
4276 (unsigned long long)backref->dir,
4277 (unsigned long long)backref->index,
4278 backref->namelen, backref->name,
4279 backref->errors);
4280 print_ref_error(backref->errors);
4283 return errors > 0 ? 1 : 0;
4286 static int process_root_ref(struct extent_buffer *eb, int slot,
4287 struct btrfs_key *key,
4288 struct cache_tree *root_cache)
4290 u64 dirid;
4291 u64 index;
4292 u32 len;
4293 u32 name_len;
4294 struct btrfs_root_ref *ref;
4295 char namebuf[BTRFS_NAME_LEN];
4296 int error;
4298 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
4300 dirid = btrfs_root_ref_dirid(eb, ref);
4301 index = btrfs_root_ref_sequence(eb, ref);
4302 name_len = btrfs_root_ref_name_len(eb, ref);
4304 if (name_len <= BTRFS_NAME_LEN) {
4305 len = name_len;
4306 error = 0;
4307 } else {
4308 len = BTRFS_NAME_LEN;
4309 error = REF_ERR_NAME_TOO_LONG;
4311 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
4313 if (key->type == BTRFS_ROOT_REF_KEY) {
4314 add_root_backref(root_cache, key->offset, key->objectid, dirid,
4315 index, namebuf, len, key->type, error);
4316 } else {
4317 add_root_backref(root_cache, key->objectid, key->offset, dirid,
4318 index, namebuf, len, key->type, error);
4320 return 0;
4323 static void free_corrupt_block(struct cache_extent *cache)
4325 struct btrfs_corrupt_block *corrupt;
4327 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
4328 free(corrupt);
4331 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
4334 * Repair the btree of the given root.
4336 * The fix is to remove the node key in corrupt_blocks cache_tree.
4337 * and rebalance the tree.
4338 * After the fix, the btree should be writeable.
4340 static int repair_btree(struct btrfs_root *root,
4341 struct cache_tree *corrupt_blocks)
4343 struct btrfs_trans_handle *trans;
4344 struct btrfs_path path;
4345 struct btrfs_corrupt_block *corrupt;
4346 struct cache_extent *cache;
4347 struct btrfs_key key;
4348 u64 offset;
4349 int level;
4350 int ret = 0;
4352 if (cache_tree_empty(corrupt_blocks))
4353 return 0;
4355 trans = btrfs_start_transaction(root, 1);
4356 if (IS_ERR(trans)) {
4357 ret = PTR_ERR(trans);
4358 fprintf(stderr, "Error starting transaction: %s\n",
4359 strerror(-ret));
4360 return ret;
4362 btrfs_init_path(&path);
4363 cache = first_cache_extent(corrupt_blocks);
4364 while (cache) {
4365 corrupt = container_of(cache, struct btrfs_corrupt_block,
4366 cache);
4367 level = corrupt->level;
4368 path.lowest_level = level;
4369 key.objectid = corrupt->key.objectid;
4370 key.type = corrupt->key.type;
4371 key.offset = corrupt->key.offset;
4374 * Here we don't want to do any tree balance, since it may
4375 * cause a balance with corrupted brother leaf/node,
4376 * so ins_len set to 0 here.
4377 * Balance will be done after all corrupt node/leaf is deleted.
4379 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
4380 if (ret < 0)
4381 goto out;
4382 offset = btrfs_node_blockptr(path.nodes[level],
4383 path.slots[level]);
4385 /* Remove the ptr */
4386 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
4387 if (ret < 0)
4388 goto out;
4390 * Remove the corresponding extent
4391 * return value is not concerned.
4393 btrfs_release_path(&path);
4394 ret = btrfs_free_extent(trans, root, offset,
4395 root->fs_info->nodesize, 0,
4396 root->root_key.objectid, level - 1, 0);
4397 cache = next_cache_extent(cache);
4400 /* Balance the btree using btrfs_search_slot() */
4401 cache = first_cache_extent(corrupt_blocks);
4402 while (cache) {
4403 corrupt = container_of(cache, struct btrfs_corrupt_block,
4404 cache);
4405 memcpy(&key, &corrupt->key, sizeof(key));
4406 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
4407 if (ret < 0)
4408 goto out;
4409 /* return will always >0 since it won't find the item */
4410 ret = 0;
4411 btrfs_release_path(&path);
4412 cache = next_cache_extent(cache);
4414 out:
4415 btrfs_commit_transaction(trans, root);
4416 btrfs_release_path(&path);
4417 return ret;
4420 static int check_fs_root(struct btrfs_root *root,
4421 struct cache_tree *root_cache,
4422 struct walk_control *wc)
4424 int ret = 0;
4425 int err = 0;
4426 int wret;
4427 int level;
4428 struct btrfs_path path;
4429 struct shared_node root_node;
4430 struct root_record *rec;
4431 struct btrfs_root_item *root_item = &root->root_item;
4432 struct cache_tree corrupt_blocks;
4433 struct orphan_data_extent *orphan;
4434 struct orphan_data_extent *tmp;
4435 enum btrfs_tree_block_status status;
4436 struct node_refs nrefs;
4439 * Reuse the corrupt_block cache tree to record corrupted tree block
4441 * Unlike the usage in extent tree check, here we do it in a per
4442 * fs/subvol tree base.
4444 cache_tree_init(&corrupt_blocks);
4445 root->fs_info->corrupt_blocks = &corrupt_blocks;
4447 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4448 rec = get_root_rec(root_cache, root->root_key.objectid);
4449 BUG_ON(IS_ERR(rec));
4450 if (btrfs_root_refs(root_item) > 0)
4451 rec->found_root_item = 1;
4454 btrfs_init_path(&path);
4455 memset(&root_node, 0, sizeof(root_node));
4456 cache_tree_init(&root_node.root_cache);
4457 cache_tree_init(&root_node.inode_cache);
4458 memset(&nrefs, 0, sizeof(nrefs));
4460 /* Move the orphan extent record to corresponding inode_record */
4461 list_for_each_entry_safe(orphan, tmp,
4462 &root->orphan_data_extents, list) {
4463 struct inode_record *inode;
4465 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4467 BUG_ON(IS_ERR(inode));
4468 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4469 list_move(&orphan->list, &inode->orphan_extents);
4472 level = btrfs_header_level(root->node);
4473 memset(wc->nodes, 0, sizeof(wc->nodes));
4474 wc->nodes[level] = &root_node;
4475 wc->active_node = level;
4476 wc->root_level = level;
4478 /* We may not have checked the root block, lets do that now */
4479 if (btrfs_is_leaf(root->node))
4480 status = btrfs_check_leaf(root, NULL, root->node);
4481 else
4482 status = btrfs_check_node(root, NULL, root->node);
4483 if (status != BTRFS_TREE_BLOCK_CLEAN)
4484 return -EIO;
4486 if (btrfs_root_refs(root_item) > 0 ||
4487 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4488 path.nodes[level] = root->node;
4489 extent_buffer_get(root->node);
4490 path.slots[level] = 0;
4491 } else {
4492 struct btrfs_key key;
4493 struct btrfs_disk_key found_key;
4495 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4496 level = root_item->drop_level;
4497 path.lowest_level = level;
4498 if (level > btrfs_header_level(root->node) ||
4499 level >= BTRFS_MAX_LEVEL) {
4500 error("ignoring invalid drop level: %u", level);
4501 goto skip_walking;
4503 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4504 if (wret < 0)
4505 goto skip_walking;
4506 btrfs_node_key(path.nodes[level], &found_key,
4507 path.slots[level]);
4508 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4509 sizeof(found_key)));
4512 while (1) {
4513 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4514 if (wret < 0)
4515 ret = wret;
4516 if (wret != 0)
4517 break;
4519 wret = walk_up_tree(root, &path, wc, &level);
4520 if (wret < 0)
4521 ret = wret;
4522 if (wret != 0)
4523 break;
4525 skip_walking:
4526 btrfs_release_path(&path);
4528 if (!cache_tree_empty(&corrupt_blocks)) {
4529 struct cache_extent *cache;
4530 struct btrfs_corrupt_block *corrupt;
4532 printf("The following tree block(s) is corrupted in tree %llu:\n",
4533 root->root_key.objectid);
4534 cache = first_cache_extent(&corrupt_blocks);
4535 while (cache) {
4536 corrupt = container_of(cache,
4537 struct btrfs_corrupt_block,
4538 cache);
4539 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4540 cache->start, corrupt->level,
4541 corrupt->key.objectid, corrupt->key.type,
4542 corrupt->key.offset);
4543 cache = next_cache_extent(cache);
4545 if (repair) {
4546 printf("Try to repair the btree for root %llu\n",
4547 root->root_key.objectid);
4548 ret = repair_btree(root, &corrupt_blocks);
4549 if (ret < 0)
4550 fprintf(stderr, "Failed to repair btree: %s\n",
4551 strerror(-ret));
4552 if (!ret)
4553 printf("Btree for root %llu is fixed\n",
4554 root->root_key.objectid);
4558 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4559 if (err < 0)
4560 ret = err;
4562 if (root_node.current) {
4563 root_node.current->checked = 1;
4564 maybe_free_inode_rec(&root_node.inode_cache,
4565 root_node.current);
4568 err = check_inode_recs(root, &root_node.inode_cache);
4569 if (!ret)
4570 ret = err;
4572 free_corrupt_blocks_tree(&corrupt_blocks);
4573 root->fs_info->corrupt_blocks = NULL;
4574 free_orphan_data_extents(&root->orphan_data_extents);
4575 return ret;
4578 static int fs_root_objectid(u64 objectid)
4580 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4581 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4582 return 1;
4583 return is_fstree(objectid);
4586 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4587 struct cache_tree *root_cache)
4589 struct btrfs_path path;
4590 struct btrfs_key key;
4591 struct walk_control wc;
4592 struct extent_buffer *leaf, *tree_node;
4593 struct btrfs_root *tmp_root;
4594 struct btrfs_root *tree_root = fs_info->tree_root;
4595 int ret;
4596 int err = 0;
4598 if (ctx.progress_enabled) {
4599 ctx.tp = TASK_FS_ROOTS;
4600 task_start(ctx.info);
4604 * Just in case we made any changes to the extent tree that weren't
4605 * reflected into the free space cache yet.
4607 if (repair)
4608 reset_cached_block_groups(fs_info);
4609 memset(&wc, 0, sizeof(wc));
4610 cache_tree_init(&wc.shared);
4611 btrfs_init_path(&path);
4613 again:
4614 key.offset = 0;
4615 key.objectid = 0;
4616 key.type = BTRFS_ROOT_ITEM_KEY;
4617 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4618 if (ret < 0) {
4619 err = 1;
4620 goto out;
4622 tree_node = tree_root->node;
4623 while (1) {
4624 if (tree_node != tree_root->node) {
4625 free_root_recs_tree(root_cache);
4626 btrfs_release_path(&path);
4627 goto again;
4629 leaf = path.nodes[0];
4630 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4631 ret = btrfs_next_leaf(tree_root, &path);
4632 if (ret) {
4633 if (ret < 0)
4634 err = 1;
4635 break;
4637 leaf = path.nodes[0];
4639 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4640 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4641 fs_root_objectid(key.objectid)) {
4642 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4643 tmp_root = btrfs_read_fs_root_no_cache(
4644 fs_info, &key);
4645 } else {
4646 key.offset = (u64)-1;
4647 tmp_root = btrfs_read_fs_root(
4648 fs_info, &key);
4650 if (IS_ERR(tmp_root)) {
4651 err = 1;
4652 goto next;
4654 ret = check_fs_root(tmp_root, root_cache, &wc);
4655 if (ret == -EAGAIN) {
4656 free_root_recs_tree(root_cache);
4657 btrfs_release_path(&path);
4658 goto again;
4660 if (ret)
4661 err = 1;
4662 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4663 btrfs_free_fs_root(tmp_root);
4664 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4665 key.type == BTRFS_ROOT_BACKREF_KEY) {
4666 process_root_ref(leaf, path.slots[0], &key,
4667 root_cache);
4669 next:
4670 path.slots[0]++;
4672 out:
4673 btrfs_release_path(&path);
4674 if (err)
4675 free_extent_cache_tree(&wc.shared);
4676 if (!cache_tree_empty(&wc.shared))
4677 fprintf(stderr, "warning line %d\n", __LINE__);
4679 task_stop(ctx.info);
4681 return err;
4685 * Find the @index according by @ino and name.
4686 * Notice:time efficiency is O(N)
4688 * @root: the root of the fs/file tree
4689 * @index_ret: the index as return value
4690 * @namebuf: the name to match
4691 * @name_len: the length of name to match
4692 * @file_type: the file_type of INODE_ITEM to match
4694 * Returns 0 if found and *@index_ret will be modified with right value
4695 * Returns< 0 not found and *@index_ret will be (u64)-1
4697 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4698 u64 *index_ret, char *namebuf, u32 name_len,
4699 u8 file_type)
4701 struct btrfs_path path;
4702 struct extent_buffer *node;
4703 struct btrfs_dir_item *di;
4704 struct btrfs_key key;
4705 struct btrfs_key location;
4706 char name[BTRFS_NAME_LEN] = {0};
4708 u32 total;
4709 u32 cur = 0;
4710 u32 len;
4711 u32 data_len;
4712 u8 filetype;
4713 int slot;
4714 int ret;
4716 ASSERT(index_ret);
4718 /* search from the last index */
4719 key.objectid = dirid;
4720 key.offset = (u64)-1;
4721 key.type = BTRFS_DIR_INDEX_KEY;
4723 btrfs_init_path(&path);
4724 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4725 if (ret < 0)
4726 return ret;
4728 loop:
4729 ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4730 if (ret) {
4731 ret = -ENOENT;
4732 *index_ret = (64)-1;
4733 goto out;
4735 /* Check whether inode_id/filetype/name match */
4736 node = path.nodes[0];
4737 slot = path.slots[0];
4738 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4739 total = btrfs_item_size_nr(node, slot);
4740 while (cur < total) {
4741 ret = -ENOENT;
4742 len = btrfs_dir_name_len(node, di);
4743 data_len = btrfs_dir_data_len(node, di);
4745 btrfs_dir_item_key_to_cpu(node, di, &location);
4746 if (location.objectid != location_id ||
4747 location.type != BTRFS_INODE_ITEM_KEY ||
4748 location.offset != 0)
4749 goto next;
4751 filetype = btrfs_dir_type(node, di);
4752 if (file_type != filetype)
4753 goto next;
4755 if (len > BTRFS_NAME_LEN)
4756 len = BTRFS_NAME_LEN;
4758 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4759 if (len != name_len || strncmp(namebuf, name, len))
4760 goto next;
4762 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4763 *index_ret = key.offset;
4764 ret = 0;
4765 goto out;
4766 next:
4767 len += sizeof(*di) + data_len;
4768 di = (struct btrfs_dir_item *)((char *)di + len);
4769 cur += len;
4771 goto loop;
4773 out:
4774 btrfs_release_path(&path);
4775 return ret;
4779 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4780 * INODE_REF/INODE_EXTREF match.
4782 * @root: the root of the fs/file tree
4783 * @key: the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4784 * value while find index
4785 * @location_key: location key of the struct btrfs_dir_item to match
4786 * @name: the name to match
4787 * @namelen: the length of name
4788 * @file_type: the type of file to math
4790 * Return 0 if no error occurred.
4791 * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4792 * DIR_ITEM/DIR_INDEX
4793 * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4794 * and DIR_ITEM/DIR_INDEX mismatch
4796 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4797 struct btrfs_key *location_key, char *name,
4798 u32 namelen, u8 file_type)
4800 struct btrfs_path path;
4801 struct extent_buffer *node;
4802 struct btrfs_dir_item *di;
4803 struct btrfs_key location;
4804 char namebuf[BTRFS_NAME_LEN] = {0};
4805 u32 total;
4806 u32 cur = 0;
4807 u32 len;
4808 u32 data_len;
4809 u8 filetype;
4810 int slot;
4811 int ret;
4813 /* get the index by traversing all index */
4814 if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4815 ret = find_dir_index(root, key->objectid,
4816 location_key->objectid, &key->offset,
4817 name, namelen, file_type);
4818 if (ret)
4819 ret = DIR_INDEX_MISSING;
4820 return ret;
4823 btrfs_init_path(&path);
4824 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4825 if (ret) {
4826 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4827 DIR_INDEX_MISSING;
4828 goto out;
4831 /* Check whether inode_id/filetype/name match */
4832 node = path.nodes[0];
4833 slot = path.slots[0];
4834 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4835 total = btrfs_item_size_nr(node, slot);
4836 while (cur < total) {
4837 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4838 DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4840 len = btrfs_dir_name_len(node, di);
4841 data_len = btrfs_dir_data_len(node, di);
4843 btrfs_dir_item_key_to_cpu(node, di, &location);
4844 if (location.objectid != location_key->objectid ||
4845 location.type != location_key->type ||
4846 location.offset != location_key->offset)
4847 goto next;
4849 filetype = btrfs_dir_type(node, di);
4850 if (file_type != filetype)
4851 goto next;
4853 if (len > BTRFS_NAME_LEN) {
4854 len = BTRFS_NAME_LEN;
4855 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4856 root->objectid,
4857 key->type == BTRFS_DIR_ITEM_KEY ?
4858 "DIR_ITEM" : "DIR_INDEX",
4859 key->objectid, key->offset, len);
4861 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4862 len);
4863 if (len != namelen || strncmp(namebuf, name, len))
4864 goto next;
4866 ret = 0;
4867 goto out;
4868 next:
4869 len += sizeof(*di) + data_len;
4870 di = (struct btrfs_dir_item *)((char *)di + len);
4871 cur += len;
4874 out:
4875 btrfs_release_path(&path);
4876 return ret;
4880 * Prints inode ref error message
4882 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4883 u64 index, const char *namebuf, int name_len,
4884 u8 filetype, int err)
4886 if (!err)
4887 return;
4889 /* root dir error */
4890 if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4891 error(
4892 "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4893 root->objectid, key->objectid, key->offset, namebuf);
4894 return;
4897 /* normal error */
4898 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4899 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4900 root->objectid, key->offset,
4901 btrfs_name_hash(namebuf, name_len),
4902 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4903 namebuf, filetype);
4904 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4905 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4906 root->objectid, key->offset, index,
4907 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4908 namebuf, filetype);
4912 * Insert the missing inode item.
4914 * Returns 0 means success.
4915 * Returns <0 means error.
4917 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4918 u8 filetype)
4920 struct btrfs_key key;
4921 struct btrfs_trans_handle *trans;
4922 struct btrfs_path path;
4923 int ret;
4925 key.objectid = ino;
4926 key.type = BTRFS_INODE_ITEM_KEY;
4927 key.offset = 0;
4929 btrfs_init_path(&path);
4930 trans = btrfs_start_transaction(root, 1);
4931 if (IS_ERR(trans)) {
4932 ret = -EIO;
4933 goto out;
4936 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4937 if (ret < 0 || !ret)
4938 goto fail;
4940 /* insert inode item */
4941 create_inode_item_lowmem(trans, root, ino, filetype);
4942 ret = 0;
4943 fail:
4944 btrfs_commit_transaction(trans, root);
4945 out:
4946 if (ret)
4947 error("failed to repair root %llu INODE ITEM[%llu] missing",
4948 root->objectid, ino);
4949 btrfs_release_path(&path);
4950 return ret;
4954 * The ternary means dir item, dir index and relative inode ref.
4955 * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4956 * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4957 * strategy:
4958 * If two of three is missing or mismatched, delete the existing one.
4959 * If one of three is missing or mismatched, add the missing one.
4961 * returns 0 means success.
4962 * returns not 0 means on error;
4964 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4965 u64 index, char *name, int name_len, u8 filetype,
4966 int err)
4968 struct btrfs_trans_handle *trans;
4969 int stage = 0;
4970 int ret = 0;
4973 * stage shall be one of following valild values:
4974 * 0: Fine, nothing to do.
4975 * 1: One of three is wrong, so add missing one.
4976 * 2: Two of three is wrong, so delete existed one.
4978 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4979 stage++;
4980 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4981 stage++;
4982 if (err & (INODE_REF_MISSING))
4983 stage++;
4985 /* stage must be smllarer than 3 */
4986 ASSERT(stage < 3);
4988 trans = btrfs_start_transaction(root, 1);
4989 if (stage == 2) {
4990 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4991 name_len, 0);
4992 goto out;
4994 if (stage == 1) {
4995 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4996 filetype, &index, 1, 1);
4997 goto out;
4999 out:
5000 btrfs_commit_transaction(trans, root);
5002 if (ret)
5003 error("fail to repair inode %llu name %s filetype %u",
5004 ino, name, filetype);
5005 else
5006 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
5007 stage == 2 ? "Delete" : "Add",
5008 ino, name, filetype);
5010 return ret;
5014 * Traverse the given INODE_REF and call find_dir_item() to find related
5015 * DIR_ITEM/DIR_INDEX.
5017 * @root: the root of the fs/file tree
5018 * @ref_key: the key of the INODE_REF
5019 * @path the path provides node and slot
5020 * @refs: the count of INODE_REF
5021 * @mode: the st_mode of INODE_ITEM
5022 * @name_ret: returns with the first ref's name
5023 * @name_len_ret: len of the name_ret
5025 * Return 0 if no error occurred.
5027 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5028 struct btrfs_path *path, char *name_ret,
5029 u32 *namelen_ret, u64 *refs_ret, int mode)
5031 struct btrfs_key key;
5032 struct btrfs_key location;
5033 struct btrfs_inode_ref *ref;
5034 struct extent_buffer *node;
5035 char namebuf[BTRFS_NAME_LEN] = {0};
5036 u32 total;
5037 u32 cur = 0;
5038 u32 len;
5039 u32 name_len;
5040 u64 index;
5041 int ret;
5042 int err = 0;
5043 int tmp_err;
5044 int slot;
5045 int need_research = 0;
5046 u64 refs;
5048 begin:
5049 err = 0;
5050 cur = 0;
5051 refs = *refs_ret;
5053 /* since after repair, path and the dir item may be changed */
5054 if (need_research) {
5055 need_research = 0;
5056 btrfs_release_path(path);
5057 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
5058 /* the item was deleted, let path point to the last checked item */
5059 if (ret > 0) {
5060 if (path->slots[0] == 0)
5061 btrfs_prev_leaf(root, path);
5062 else
5063 path->slots[0]--;
5065 if (ret)
5066 goto out;
5069 location.objectid = ref_key->objectid;
5070 location.type = BTRFS_INODE_ITEM_KEY;
5071 location.offset = 0;
5072 node = path->nodes[0];
5073 slot = path->slots[0];
5075 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5076 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
5077 total = btrfs_item_size_nr(node, slot);
5079 next:
5080 /* Update inode ref count */
5081 refs++;
5082 tmp_err = 0;
5083 index = btrfs_inode_ref_index(node, ref);
5084 name_len = btrfs_inode_ref_name_len(node, ref);
5086 if (name_len <= BTRFS_NAME_LEN) {
5087 len = name_len;
5088 } else {
5089 len = BTRFS_NAME_LEN;
5090 warning("root %llu INODE_REF[%llu %llu] name too long",
5091 root->objectid, ref_key->objectid, ref_key->offset);
5094 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
5096 /* copy the first name found to name_ret */
5097 if (refs == 1 && name_ret) {
5098 memcpy(name_ret, namebuf, len);
5099 *namelen_ret = len;
5102 /* Check root dir ref */
5103 if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
5104 if (index != 0 || len != strlen("..") ||
5105 strncmp("..", namebuf, len) ||
5106 ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
5107 /* set err bits then repair will delete the ref */
5108 err |= DIR_INDEX_MISSING;
5109 err |= DIR_ITEM_MISSING;
5111 goto end;
5114 /* Find related DIR_INDEX */
5115 key.objectid = ref_key->offset;
5116 key.type = BTRFS_DIR_INDEX_KEY;
5117 key.offset = index;
5118 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
5119 imode_to_type(mode));
5121 /* Find related dir_item */
5122 key.objectid = ref_key->offset;
5123 key.type = BTRFS_DIR_ITEM_KEY;
5124 key.offset = btrfs_name_hash(namebuf, len);
5125 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
5126 imode_to_type(mode));
5127 end:
5128 if (tmp_err && repair) {
5129 ret = repair_ternary_lowmem(root, ref_key->offset,
5130 ref_key->objectid, index, namebuf,
5131 name_len, imode_to_type(mode),
5132 tmp_err);
5133 if (!ret) {
5134 need_research = 1;
5135 goto begin;
5138 print_inode_ref_err(root, ref_key, index, namebuf, name_len,
5139 imode_to_type(mode), tmp_err);
5140 err |= tmp_err;
5141 len = sizeof(*ref) + name_len;
5142 ref = (struct btrfs_inode_ref *)((char *)ref + len);
5143 cur += len;
5144 if (cur < total)
5145 goto next;
5147 out:
5148 *refs_ret = refs;
5149 return err;
5153 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
5154 * DIR_ITEM/DIR_INDEX.
5156 * @root: the root of the fs/file tree
5157 * @ref_key: the key of the INODE_EXTREF
5158 * @refs: the count of INODE_EXTREF
5159 * @mode: the st_mode of INODE_ITEM
5161 * Return 0 if no error occurred.
5163 static int check_inode_extref(struct btrfs_root *root,
5164 struct btrfs_key *ref_key,
5165 struct extent_buffer *node, int slot, u64 *refs,
5166 int mode)
5168 struct btrfs_key key;
5169 struct btrfs_key location;
5170 struct btrfs_inode_extref *extref;
5171 char namebuf[BTRFS_NAME_LEN] = {0};
5172 u32 total;
5173 u32 cur = 0;
5174 u32 len;
5175 u32 name_len;
5176 u64 index;
5177 u64 parent;
5178 int ret;
5179 int err = 0;
5181 location.objectid = ref_key->objectid;
5182 location.type = BTRFS_INODE_ITEM_KEY;
5183 location.offset = 0;
5185 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5186 total = btrfs_item_size_nr(node, slot);
5188 next:
5189 /* update inode ref count */
5190 (*refs)++;
5191 name_len = btrfs_inode_extref_name_len(node, extref);
5192 index = btrfs_inode_extref_index(node, extref);
5193 parent = btrfs_inode_extref_parent(node, extref);
5194 if (name_len <= BTRFS_NAME_LEN) {
5195 len = name_len;
5196 } else {
5197 len = BTRFS_NAME_LEN;
5198 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
5199 root->objectid, ref_key->objectid, ref_key->offset);
5201 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
5203 /* Check root dir ref name */
5204 if (index == 0 && strncmp(namebuf, "..", name_len)) {
5205 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
5206 root->objectid, ref_key->objectid, ref_key->offset,
5207 namebuf);
5208 err |= ROOT_DIR_ERROR;
5211 /* find related dir_index */
5212 key.objectid = parent;
5213 key.type = BTRFS_DIR_INDEX_KEY;
5214 key.offset = index;
5215 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5216 err |= ret;
5218 /* find related dir_item */
5219 key.objectid = parent;
5220 key.type = BTRFS_DIR_ITEM_KEY;
5221 key.offset = btrfs_name_hash(namebuf, len);
5222 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5223 err |= ret;
5225 len = sizeof(*extref) + name_len;
5226 extref = (struct btrfs_inode_extref *)((char *)extref + len);
5227 cur += len;
5229 if (cur < total)
5230 goto next;
5232 return err;
5236 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
5237 * DIR_ITEM/DIR_INDEX match.
5238 * Return with @index_ret.
5240 * @root: the root of the fs/file tree
5241 * @key: the key of the INODE_REF/INODE_EXTREF
5242 * @name: the name in the INODE_REF/INODE_EXTREF
5243 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
5244 * @index_ret: the index in the INODE_REF/INODE_EXTREF,
5245 * value (64)-1 means do not check index
5246 * @ext_ref: the EXTENDED_IREF feature
5248 * Return 0 if no error occurred.
5249 * Return >0 for error bitmap
5251 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
5252 char *name, int namelen, u64 *index_ret,
5253 unsigned int ext_ref)
5255 struct btrfs_path path;
5256 struct btrfs_inode_ref *ref;
5257 struct btrfs_inode_extref *extref;
5258 struct extent_buffer *node;
5259 char ref_namebuf[BTRFS_NAME_LEN] = {0};
5260 u32 total;
5261 u32 cur = 0;
5262 u32 len;
5263 u32 ref_namelen;
5264 u64 ref_index;
5265 u64 parent;
5266 u64 dir_id;
5267 int slot;
5268 int ret;
5270 ASSERT(index_ret);
5272 btrfs_init_path(&path);
5273 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5274 if (ret) {
5275 ret = INODE_REF_MISSING;
5276 goto extref;
5279 node = path.nodes[0];
5280 slot = path.slots[0];
5282 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
5283 total = btrfs_item_size_nr(node, slot);
5285 /* Iterate all entry of INODE_REF */
5286 while (cur < total) {
5287 ret = INODE_REF_MISSING;
5289 ref_namelen = btrfs_inode_ref_name_len(node, ref);
5290 ref_index = btrfs_inode_ref_index(node, ref);
5291 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5292 goto next_ref;
5294 if (cur + sizeof(*ref) + ref_namelen > total ||
5295 ref_namelen > BTRFS_NAME_LEN) {
5296 warning("root %llu INODE %s[%llu %llu] name too long",
5297 root->objectid,
5298 key->type == BTRFS_INODE_REF_KEY ?
5299 "REF" : "EXTREF",
5300 key->objectid, key->offset);
5302 if (cur + sizeof(*ref) > total)
5303 break;
5304 len = min_t(u32, total - cur - sizeof(*ref),
5305 BTRFS_NAME_LEN);
5306 } else {
5307 len = ref_namelen;
5310 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
5311 len);
5313 if (len != namelen || strncmp(ref_namebuf, name, len))
5314 goto next_ref;
5316 *index_ret = ref_index;
5317 ret = 0;
5318 goto out;
5319 next_ref:
5320 len = sizeof(*ref) + ref_namelen;
5321 ref = (struct btrfs_inode_ref *)((char *)ref + len);
5322 cur += len;
5325 extref:
5326 /* Skip if not support EXTENDED_IREF feature */
5327 if (!ext_ref)
5328 goto out;
5330 btrfs_release_path(&path);
5331 btrfs_init_path(&path);
5333 dir_id = key->offset;
5334 key->type = BTRFS_INODE_EXTREF_KEY;
5335 key->offset = btrfs_extref_hash(dir_id, name, namelen);
5337 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5338 if (ret) {
5339 ret = INODE_REF_MISSING;
5340 goto out;
5343 node = path.nodes[0];
5344 slot = path.slots[0];
5346 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5347 cur = 0;
5348 total = btrfs_item_size_nr(node, slot);
5350 /* Iterate all entry of INODE_EXTREF */
5351 while (cur < total) {
5352 ret = INODE_REF_MISSING;
5354 ref_namelen = btrfs_inode_extref_name_len(node, extref);
5355 ref_index = btrfs_inode_extref_index(node, extref);
5356 parent = btrfs_inode_extref_parent(node, extref);
5357 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5358 goto next_extref;
5360 if (parent != dir_id)
5361 goto next_extref;
5363 if (ref_namelen <= BTRFS_NAME_LEN) {
5364 len = ref_namelen;
5365 } else {
5366 len = BTRFS_NAME_LEN;
5367 warning("root %llu INODE %s[%llu %llu] name too long",
5368 root->objectid,
5369 key->type == BTRFS_INODE_REF_KEY ?
5370 "REF" : "EXTREF",
5371 key->objectid, key->offset);
5373 read_extent_buffer(node, ref_namebuf,
5374 (unsigned long)(extref + 1), len);
5376 if (len != namelen || strncmp(ref_namebuf, name, len))
5377 goto next_extref;
5379 *index_ret = ref_index;
5380 ret = 0;
5381 goto out;
5383 next_extref:
5384 len = sizeof(*extref) + ref_namelen;
5385 extref = (struct btrfs_inode_extref *)((char *)extref + len);
5386 cur += len;
5389 out:
5390 btrfs_release_path(&path);
5391 return ret;
5394 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
5395 u64 ino, u64 index, const char *namebuf,
5396 int name_len, u8 filetype, int err)
5398 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
5399 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
5400 root->objectid, key->objectid, key->offset, namebuf,
5401 filetype,
5402 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5405 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
5406 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
5407 root->objectid, key->objectid, index, namebuf, filetype,
5408 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5411 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
5412 error(
5413 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
5414 root->objectid, ino, index, namebuf, filetype,
5415 err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
5418 if (err & INODE_REF_MISSING)
5419 error(
5420 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5421 root->objectid, ino, key->objectid, namebuf, filetype);
5426 * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5428 * Returns error after repair
5430 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5431 u64 index, u8 filetype, char *namebuf, u32 name_len,
5432 int err)
5434 int ret;
5436 if (err & INODE_ITEM_MISSING) {
5437 ret = repair_inode_item_missing(root, ino, filetype);
5438 if (!ret)
5439 err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5442 if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5443 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5444 name_len, filetype, err);
5445 if (!ret) {
5446 err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5447 err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5448 err &= ~(INODE_REF_MISSING);
5451 return err;
5454 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5455 u64 *size_ret)
5457 struct btrfs_key key;
5458 struct btrfs_path path;
5459 u32 len;
5460 struct btrfs_dir_item *di;
5461 int ret;
5462 int cur = 0;
5463 int total = 0;
5465 ASSERT(size_ret);
5466 *size_ret = 0;
5468 key.objectid = ino;
5469 key.type = type;
5470 key.offset = (u64)-1;
5472 btrfs_init_path(&path);
5473 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5474 if (ret < 0) {
5475 ret = -EIO;
5476 goto out;
5478 /* if found, go to spacial case */
5479 if (ret == 0)
5480 goto special_case;
5482 loop:
5483 ret = btrfs_previous_item(root, &path, ino, type);
5485 if (ret) {
5486 ret = 0;
5487 goto out;
5490 special_case:
5491 di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5492 cur = 0;
5493 total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5495 while (cur < total) {
5496 len = btrfs_dir_name_len(path.nodes[0], di);
5497 if (len > BTRFS_NAME_LEN)
5498 len = BTRFS_NAME_LEN;
5499 *size_ret += len;
5501 len += btrfs_dir_data_len(path.nodes[0], di);
5502 len += sizeof(*di);
5503 di = (struct btrfs_dir_item *)((char *)di + len);
5504 cur += len;
5506 goto loop;
5508 out:
5509 btrfs_release_path(&path);
5510 return ret;
5513 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5515 u64 item_size;
5516 u64 index_size;
5517 int ret;
5519 ASSERT(size);
5520 ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5521 if (ret)
5522 goto out;
5524 ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5525 if (ret)
5526 goto out;
5528 *size = item_size + index_size;
5530 out:
5531 if (ret)
5532 error("failed to count root %llu INODE[%llu] root size",
5533 root->objectid, ino);
5534 return ret;
5538 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5539 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5541 * @root: the root of the fs/file tree
5542 * @key: the key of the INODE_REF/INODE_EXTREF
5543 * @path: the path
5544 * @size: the st_size of the INODE_ITEM
5545 * @ext_ref: the EXTENDED_IREF feature
5547 * Return 0 if no error occurred.
5548 * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5550 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5551 struct btrfs_path *path, u64 *size,
5552 unsigned int ext_ref)
5554 struct btrfs_dir_item *di;
5555 struct btrfs_inode_item *ii;
5556 struct btrfs_key key;
5557 struct btrfs_key location;
5558 struct extent_buffer *node;
5559 int slot;
5560 char namebuf[BTRFS_NAME_LEN] = {0};
5561 u32 total;
5562 u32 cur = 0;
5563 u32 len;
5564 u32 name_len;
5565 u32 data_len;
5566 u8 filetype;
5567 u32 mode = 0;
5568 u64 index;
5569 int ret;
5570 int err;
5571 int tmp_err;
5572 int need_research = 0;
5575 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5576 * ignore index check.
5578 if (di_key->type == BTRFS_DIR_INDEX_KEY)
5579 index = di_key->offset;
5580 else
5581 index = (u64)-1;
5582 begin:
5583 err = 0;
5584 cur = 0;
5586 /* since after repair, path and the dir item may be changed */
5587 if (need_research) {
5588 need_research = 0;
5589 err |= DIR_COUNT_AGAIN;
5590 btrfs_release_path(path);
5591 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5592 /* the item was deleted, let path point the last checked item */
5593 if (ret > 0) {
5594 if (path->slots[0] == 0)
5595 btrfs_prev_leaf(root, path);
5596 else
5597 path->slots[0]--;
5599 if (ret)
5600 goto out;
5603 node = path->nodes[0];
5604 slot = path->slots[0];
5606 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5607 total = btrfs_item_size_nr(node, slot);
5608 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5610 while (cur < total) {
5611 data_len = btrfs_dir_data_len(node, di);
5612 tmp_err = 0;
5613 if (data_len)
5614 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5615 root->objectid,
5616 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5617 di_key->objectid, di_key->offset, data_len);
5619 name_len = btrfs_dir_name_len(node, di);
5620 if (name_len <= BTRFS_NAME_LEN) {
5621 len = name_len;
5622 } else {
5623 len = BTRFS_NAME_LEN;
5624 warning("root %llu %s[%llu %llu] name too long",
5625 root->objectid,
5626 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5627 di_key->objectid, di_key->offset);
5629 (*size) += name_len;
5630 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5631 len);
5632 filetype = btrfs_dir_type(node, di);
5634 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5635 di_key->offset != btrfs_name_hash(namebuf, len)) {
5636 err |= -EIO;
5637 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5638 root->objectid, di_key->objectid, di_key->offset,
5639 namebuf, len, filetype, di_key->offset,
5640 btrfs_name_hash(namebuf, len));
5643 btrfs_dir_item_key_to_cpu(node, di, &location);
5644 /* Ignore related ROOT_ITEM check */
5645 if (location.type == BTRFS_ROOT_ITEM_KEY)
5646 goto next;
5648 btrfs_release_path(path);
5649 /* Check relative INODE_ITEM(existence/filetype) */
5650 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5651 if (ret) {
5652 tmp_err |= INODE_ITEM_MISSING;
5653 goto next;
5656 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5657 struct btrfs_inode_item);
5658 mode = btrfs_inode_mode(path->nodes[0], ii);
5659 if (imode_to_type(mode) != filetype) {
5660 tmp_err |= INODE_ITEM_MISMATCH;
5661 goto next;
5664 /* Check relative INODE_REF/INODE_EXTREF */
5665 key.objectid = location.objectid;
5666 key.type = BTRFS_INODE_REF_KEY;
5667 key.offset = di_key->objectid;
5668 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5669 &index, ext_ref);
5671 /* check relative INDEX/ITEM */
5672 key.objectid = di_key->objectid;
5673 if (key.type == BTRFS_DIR_ITEM_KEY) {
5674 key.type = BTRFS_DIR_INDEX_KEY;
5675 key.offset = index;
5676 } else {
5677 key.type = BTRFS_DIR_ITEM_KEY;
5678 key.offset = btrfs_name_hash(namebuf, name_len);
5681 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5682 name_len, filetype);
5683 /* find_dir_item may find index */
5684 if (key.type == BTRFS_DIR_INDEX_KEY)
5685 index = key.offset;
5686 next:
5688 if (tmp_err && repair) {
5689 ret = repair_dir_item(root, di_key->objectid,
5690 location.objectid, index,
5691 imode_to_type(mode), namebuf,
5692 name_len, tmp_err);
5693 if (ret != tmp_err) {
5694 need_research = 1;
5695 goto begin;
5698 btrfs_release_path(path);
5699 print_dir_item_err(root, di_key, location.objectid, index,
5700 namebuf, name_len, filetype, tmp_err);
5701 err |= tmp_err;
5702 len = sizeof(*di) + name_len + data_len;
5703 di = (struct btrfs_dir_item *)((char *)di + len);
5704 cur += len;
5706 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5707 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5708 root->objectid, di_key->objectid,
5709 di_key->offset);
5710 break;
5713 out:
5714 /* research path */
5715 btrfs_release_path(path);
5716 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5717 if (ret)
5718 err |= ret > 0 ? -ENOENT : ret;
5719 return err;
5723 * Wrapper function of btrfs_punch_hole.
5725 * Returns 0 means success.
5726 * Returns not 0 means error.
5728 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5729 u64 len)
5731 struct btrfs_trans_handle *trans;
5732 int ret = 0;
5734 trans = btrfs_start_transaction(root, 1);
5735 if (IS_ERR(trans))
5736 return PTR_ERR(trans);
5738 ret = btrfs_punch_hole(trans, root, ino, start, len);
5739 if (ret)
5740 error("failed to add hole [%llu, %llu] in inode [%llu]",
5741 start, len, ino);
5742 else
5743 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5744 ino);
5746 btrfs_commit_transaction(trans, root);
5747 return ret;
5751 * Check file extent datasum/hole, update the size of the file extents,
5752 * check and update the last offset of the file extent.
5754 * @root: the root of fs/file tree.
5755 * @fkey: the key of the file extent.
5756 * @nodatasum: INODE_NODATASUM feature.
5757 * @size: the sum of all EXTENT_DATA items size for this inode.
5758 * @end: the offset of the last extent.
5760 * Return 0 if no error occurred.
5762 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5763 struct extent_buffer *node, int slot,
5764 unsigned int nodatasum, u64 *size, u64 *end)
5766 struct btrfs_file_extent_item *fi;
5767 u64 disk_bytenr;
5768 u64 disk_num_bytes;
5769 u64 extent_num_bytes;
5770 u64 extent_offset;
5771 u64 csum_found; /* In byte size, sectorsize aligned */
5772 u64 search_start; /* Logical range start we search for csum */
5773 u64 search_len; /* Logical range len we search for csum */
5774 unsigned int extent_type;
5775 unsigned int is_hole;
5776 int compressed = 0;
5777 int ret;
5778 int err = 0;
5780 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5782 /* Check inline extent */
5783 extent_type = btrfs_file_extent_type(node, fi);
5784 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5785 struct btrfs_item *e = btrfs_item_nr(slot);
5786 u32 item_inline_len;
5788 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5789 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5790 compressed = btrfs_file_extent_compression(node, fi);
5791 if (extent_num_bytes == 0) {
5792 error(
5793 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5794 root->objectid, fkey->objectid, fkey->offset);
5795 err |= FILE_EXTENT_ERROR;
5797 if (!compressed && extent_num_bytes != item_inline_len) {
5798 error(
5799 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5800 root->objectid, fkey->objectid, fkey->offset,
5801 extent_num_bytes, item_inline_len);
5802 err |= FILE_EXTENT_ERROR;
5804 *end += extent_num_bytes;
5805 *size += extent_num_bytes;
5806 return err;
5809 /* Check extent type */
5810 if (extent_type != BTRFS_FILE_EXTENT_REG &&
5811 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5812 err |= FILE_EXTENT_ERROR;
5813 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5814 root->objectid, fkey->objectid, fkey->offset);
5815 return err;
5818 /* Check REG_EXTENT/PREALLOC_EXTENT */
5819 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5820 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5821 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5822 extent_offset = btrfs_file_extent_offset(node, fi);
5823 compressed = btrfs_file_extent_compression(node, fi);
5824 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5827 * Check EXTENT_DATA csum
5829 * For plain (uncompressed) extent, we should only check the range
5830 * we're referring to, as it's possible that part of prealloc extent
5831 * has been written, and has csum:
5833 * |<--- Original large preallocated extent A ---->|
5834 * |<- Prealloc File Extent ->|<- Regular Extent ->|
5835 * No csum Has csum
5837 * For compressed extent, we should check the whole range.
5839 if (!compressed) {
5840 search_start = disk_bytenr + extent_offset;
5841 search_len = extent_num_bytes;
5842 } else {
5843 search_start = disk_bytenr;
5844 search_len = disk_num_bytes;
5846 ret = count_csum_range(root, search_start, search_len, &csum_found);
5847 if (csum_found > 0 && nodatasum) {
5848 err |= ODD_CSUM_ITEM;
5849 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5850 root->objectid, fkey->objectid, fkey->offset);
5851 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5852 !is_hole && (ret < 0 || csum_found < search_len)) {
5853 err |= CSUM_ITEM_MISSING;
5854 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5855 root->objectid, fkey->objectid, fkey->offset,
5856 csum_found, search_len);
5857 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5858 err |= ODD_CSUM_ITEM;
5859 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5860 root->objectid, fkey->objectid, fkey->offset, csum_found);
5863 /* Check EXTENT_DATA hole */
5864 if (!no_holes && *end != fkey->offset) {
5865 if (repair)
5866 ret = punch_extent_hole(root, fkey->objectid,
5867 *end, fkey->offset - *end);
5868 if (!repair || ret) {
5869 err |= FILE_EXTENT_ERROR;
5870 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
5871 root->objectid, fkey->objectid, fkey->offset);
5875 *end += extent_num_bytes;
5876 if (!is_hole)
5877 *size += extent_num_bytes;
5879 return err;
5883 * Set inode item nbytes to @nbytes
5885 * Returns 0 on success
5886 * Returns != 0 on error
5888 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5889 struct btrfs_path *path,
5890 u64 ino, u64 nbytes)
5892 struct btrfs_trans_handle *trans;
5893 struct btrfs_inode_item *ii;
5894 struct btrfs_key key;
5895 struct btrfs_key research_key;
5896 int err = 0;
5897 int ret;
5899 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5901 key.objectid = ino;
5902 key.type = BTRFS_INODE_ITEM_KEY;
5903 key.offset = 0;
5905 trans = btrfs_start_transaction(root, 1);
5906 if (IS_ERR(trans)) {
5907 ret = PTR_ERR(trans);
5908 err |= ret;
5909 goto out;
5912 btrfs_release_path(path);
5913 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5914 if (ret > 0)
5915 ret = -ENOENT;
5916 if (ret) {
5917 err |= ret;
5918 goto fail;
5921 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5922 struct btrfs_inode_item);
5923 btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5924 btrfs_mark_buffer_dirty(path->nodes[0]);
5925 fail:
5926 btrfs_commit_transaction(trans, root);
5927 out:
5928 if (ret)
5929 error("failed to set nbytes in inode %llu root %llu",
5930 ino, root->root_key.objectid);
5931 else
5932 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5933 root->root_key.objectid, nbytes);
5935 /* research path */
5936 btrfs_release_path(path);
5937 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5938 err |= ret;
5940 return err;
5944 * Set directory inode isize to @isize.
5946 * Returns 0 on success.
5947 * Returns != 0 on error.
5949 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5950 struct btrfs_path *path,
5951 u64 ino, u64 isize)
5953 struct btrfs_trans_handle *trans;
5954 struct btrfs_inode_item *ii;
5955 struct btrfs_key key;
5956 struct btrfs_key research_key;
5957 int ret;
5958 int err = 0;
5960 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5962 key.objectid = ino;
5963 key.type = BTRFS_INODE_ITEM_KEY;
5964 key.offset = 0;
5966 trans = btrfs_start_transaction(root, 1);
5967 if (IS_ERR(trans)) {
5968 ret = PTR_ERR(trans);
5969 err |= ret;
5970 goto out;
5973 btrfs_release_path(path);
5974 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5975 if (ret > 0)
5976 ret = -ENOENT;
5977 if (ret) {
5978 err |= ret;
5979 goto fail;
5982 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5983 struct btrfs_inode_item);
5984 btrfs_set_inode_size(path->nodes[0], ii, isize);
5985 btrfs_mark_buffer_dirty(path->nodes[0]);
5986 fail:
5987 btrfs_commit_transaction(trans, root);
5988 out:
5989 if (ret)
5990 error("failed to set isize in inode %llu root %llu",
5991 ino, root->root_key.objectid);
5992 else
5993 printf("Set isize in inode %llu root %llu to %llu\n",
5994 ino, root->root_key.objectid, isize);
5996 btrfs_release_path(path);
5997 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5998 err |= ret;
6000 return err;
6004 * Wrapper function for btrfs_add_orphan_item().
6006 * Returns 0 on success.
6007 * Returns != 0 on error.
6009 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
6010 struct btrfs_path *path, u64 ino)
6012 struct btrfs_trans_handle *trans;
6013 struct btrfs_key research_key;
6014 int ret;
6015 int err = 0;
6017 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
6019 trans = btrfs_start_transaction(root, 1);
6020 if (IS_ERR(trans)) {
6021 ret = PTR_ERR(trans);
6022 err |= ret;
6023 goto out;
6026 btrfs_release_path(path);
6027 ret = btrfs_add_orphan_item(trans, root, path, ino);
6028 err |= ret;
6029 btrfs_commit_transaction(trans, root);
6030 out:
6031 if (ret)
6032 error("failed to add inode %llu as orphan item root %llu",
6033 ino, root->root_key.objectid);
6034 else
6035 printf("Added inode %llu as orphan item root %llu\n",
6036 ino, root->root_key.objectid);
6038 btrfs_release_path(path);
6039 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
6040 err |= ret;
6042 return err;
6045 /* Set inode_item nlink to @ref_count.
6046 * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
6048 * Returns 0 on success
6050 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
6051 struct btrfs_path *path, u64 ino,
6052 const char *name, u32 namelen,
6053 u64 ref_count, u8 filetype, u64 *nlink)
6055 struct btrfs_trans_handle *trans;
6056 struct btrfs_inode_item *ii;
6057 struct btrfs_key key;
6058 struct btrfs_key old_key;
6059 char namebuf[BTRFS_NAME_LEN] = {0};
6060 int name_len;
6061 int ret;
6062 int ret2;
6064 /* save the key */
6065 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
6067 if (name && namelen) {
6068 ASSERT(namelen <= BTRFS_NAME_LEN);
6069 memcpy(namebuf, name, namelen);
6070 name_len = namelen;
6071 } else {
6072 sprintf(namebuf, "%llu", ino);
6073 name_len = count_digits(ino);
6074 printf("Can't find file name for inode %llu, use %s instead\n",
6075 ino, namebuf);
6078 trans = btrfs_start_transaction(root, 1);
6079 if (IS_ERR(trans)) {
6080 ret = PTR_ERR(trans);
6081 goto out;
6084 btrfs_release_path(path);
6085 /* if refs is 0, put it into lostfound */
6086 if (ref_count == 0) {
6087 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
6088 name_len, filetype, &ref_count);
6089 if (ret)
6090 goto fail;
6093 /* reset inode_item's nlink to ref_count */
6094 key.objectid = ino;
6095 key.type = BTRFS_INODE_ITEM_KEY;
6096 key.offset = 0;
6098 btrfs_release_path(path);
6099 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6100 if (ret > 0)
6101 ret = -ENOENT;
6102 if (ret)
6103 goto fail;
6105 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
6106 struct btrfs_inode_item);
6107 btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
6108 btrfs_mark_buffer_dirty(path->nodes[0]);
6110 if (nlink)
6111 *nlink = ref_count;
6112 fail:
6113 btrfs_commit_transaction(trans, root);
6114 out:
6115 if (ret)
6116 error(
6117 "fail to repair nlink of inode %llu root %llu name %s filetype %u",
6118 root->objectid, ino, namebuf, filetype);
6119 else
6120 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
6121 root->objectid, ino, namebuf, filetype);
6123 /* research */
6124 btrfs_release_path(path);
6125 ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
6126 if (ret2 < 0)
6127 return ret |= ret2;
6128 return ret;
6132 * Check INODE_ITEM and related ITEMs (the same inode number)
6133 * 1. check link count
6134 * 2. check inode ref/extref
6135 * 3. check dir item/index
6137 * @ext_ref: the EXTENDED_IREF feature
6139 * Return 0 if no error occurred.
6140 * Return >0 for error or hit the traversal is done(by error bitmap)
6142 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
6143 unsigned int ext_ref)
6145 struct extent_buffer *node;
6146 struct btrfs_inode_item *ii;
6147 struct btrfs_key key;
6148 struct btrfs_key last_key;
6149 u64 inode_id;
6150 u32 mode;
6151 u64 nlink;
6152 u64 nbytes;
6153 u64 isize;
6154 u64 size = 0;
6155 u64 refs = 0;
6156 u64 extent_end = 0;
6157 u64 extent_size = 0;
6158 unsigned int dir;
6159 unsigned int nodatasum;
6160 int slot;
6161 int ret;
6162 int err = 0;
6163 char namebuf[BTRFS_NAME_LEN] = {0};
6164 u32 name_len = 0;
6166 node = path->nodes[0];
6167 slot = path->slots[0];
6169 btrfs_item_key_to_cpu(node, &key, slot);
6170 inode_id = key.objectid;
6172 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
6173 ret = btrfs_next_item(root, path);
6174 if (ret > 0)
6175 err |= LAST_ITEM;
6176 return err;
6179 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
6180 isize = btrfs_inode_size(node, ii);
6181 nbytes = btrfs_inode_nbytes(node, ii);
6182 mode = btrfs_inode_mode(node, ii);
6183 dir = imode_to_type(mode) == BTRFS_FT_DIR;
6184 nlink = btrfs_inode_nlink(node, ii);
6185 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
6187 while (1) {
6188 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
6189 ret = btrfs_next_item(root, path);
6190 if (ret < 0) {
6191 /* out will fill 'err' rusing current statistics */
6192 goto out;
6193 } else if (ret > 0) {
6194 err |= LAST_ITEM;
6195 goto out;
6198 node = path->nodes[0];
6199 slot = path->slots[0];
6200 btrfs_item_key_to_cpu(node, &key, slot);
6201 if (key.objectid != inode_id)
6202 goto out;
6204 switch (key.type) {
6205 case BTRFS_INODE_REF_KEY:
6206 ret = check_inode_ref(root, &key, path, namebuf,
6207 &name_len, &refs, mode);
6208 err |= ret;
6209 break;
6210 case BTRFS_INODE_EXTREF_KEY:
6211 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
6212 warning("root %llu EXTREF[%llu %llu] isn't supported",
6213 root->objectid, key.objectid,
6214 key.offset);
6215 ret = check_inode_extref(root, &key, node, slot, &refs,
6216 mode);
6217 err |= ret;
6218 break;
6219 case BTRFS_DIR_ITEM_KEY:
6220 case BTRFS_DIR_INDEX_KEY:
6221 if (!dir) {
6222 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
6223 root->objectid, inode_id,
6224 imode_to_type(mode), key.objectid,
6225 key.offset);
6227 ret = check_dir_item(root, &key, path, &size, ext_ref);
6228 err |= ret;
6229 break;
6230 case BTRFS_EXTENT_DATA_KEY:
6231 if (dir) {
6232 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
6233 root->objectid, inode_id, key.objectid,
6234 key.offset);
6236 ret = check_file_extent(root, &key, node, slot,
6237 nodatasum, &extent_size,
6238 &extent_end);
6239 err |= ret;
6240 break;
6241 case BTRFS_XATTR_ITEM_KEY:
6242 break;
6243 default:
6244 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
6245 key.objectid, key.type, key.offset);
6249 out:
6250 if (err & LAST_ITEM) {
6251 btrfs_release_path(path);
6252 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
6253 if (ret)
6254 return err;
6257 /* verify INODE_ITEM nlink/isize/nbytes */
6258 if (dir) {
6259 if (repair && (err & DIR_COUNT_AGAIN)) {
6260 err &= ~DIR_COUNT_AGAIN;
6261 count_dir_isize(root, inode_id, &size);
6264 if ((nlink != 1 || refs != 1) && repair) {
6265 ret = repair_inode_nlinks_lowmem(root, path, inode_id,
6266 namebuf, name_len, refs, imode_to_type(mode),
6267 &nlink);
6270 if (nlink != 1) {
6271 err |= LINK_COUNT_ERROR;
6272 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
6273 root->objectid, inode_id, nlink);
6277 * Just a warning, as dir inode nbytes is just an
6278 * instructive value.
6280 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
6281 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
6282 root->objectid, inode_id,
6283 root->fs_info->nodesize);
6286 if (isize != size) {
6287 if (repair)
6288 ret = repair_dir_isize_lowmem(root, path,
6289 inode_id, size);
6290 if (!repair || ret) {
6291 err |= ISIZE_ERROR;
6292 error(
6293 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
6294 root->objectid, inode_id, isize, size);
6297 } else {
6298 if (nlink != refs) {
6299 if (repair)
6300 ret = repair_inode_nlinks_lowmem(root, path,
6301 inode_id, namebuf, name_len, refs,
6302 imode_to_type(mode), &nlink);
6303 if (!repair || ret) {
6304 err |= LINK_COUNT_ERROR;
6305 error(
6306 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
6307 root->objectid, inode_id, nlink, refs);
6309 } else if (!nlink) {
6310 if (repair)
6311 ret = repair_inode_orphan_item_lowmem(root,
6312 path, inode_id);
6313 if (!repair || ret) {
6314 err |= ORPHAN_ITEM;
6315 error("root %llu INODE[%llu] is orphan item",
6316 root->objectid, inode_id);
6320 if (!nbytes && !no_holes && extent_end < isize) {
6321 if (repair)
6322 ret = punch_extent_hole(root, inode_id,
6323 extent_end, isize - extent_end);
6324 if (!repair || ret) {
6325 err |= NBYTES_ERROR;
6326 error(
6327 "root %llu INODE[%llu] size %llu should have a file extent hole",
6328 root->objectid, inode_id, isize);
6332 if (nbytes != extent_size) {
6333 if (repair)
6334 ret = repair_inode_nbytes_lowmem(root, path,
6335 inode_id, extent_size);
6336 if (!repair || ret) {
6337 err |= NBYTES_ERROR;
6338 error(
6339 "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
6340 root->objectid, inode_id, nbytes,
6341 extent_size);
6346 if (err & LAST_ITEM)
6347 btrfs_next_item(root, path);
6348 return err;
6352 * Insert the missing inode item and inode ref.
6354 * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
6355 * Root dir should be handled specially because root dir is the root of fs.
6357 * returns err (>0 or 0) after repair
6359 static int repair_fs_first_inode(struct btrfs_root *root, int err)
6361 struct btrfs_trans_handle *trans;
6362 struct btrfs_key key;
6363 struct btrfs_path path;
6364 int filetype = BTRFS_FT_DIR;
6365 int ret = 0;
6367 btrfs_init_path(&path);
6369 if (err & INODE_REF_MISSING) {
6370 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6371 key.type = BTRFS_INODE_REF_KEY;
6372 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6374 trans = btrfs_start_transaction(root, 1);
6375 if (IS_ERR(trans)) {
6376 ret = PTR_ERR(trans);
6377 goto out;
6380 btrfs_release_path(&path);
6381 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
6382 if (ret)
6383 goto trans_fail;
6385 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
6386 BTRFS_FIRST_FREE_OBJECTID,
6387 BTRFS_FIRST_FREE_OBJECTID, 0);
6388 if (ret)
6389 goto trans_fail;
6391 printf("Add INODE_REF[%llu %llu] name %s\n",
6392 BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
6393 "..");
6394 err &= ~INODE_REF_MISSING;
6395 trans_fail:
6396 if (ret)
6397 error("fail to insert first inode's ref");
6398 btrfs_commit_transaction(trans, root);
6401 if (err & INODE_ITEM_MISSING) {
6402 ret = repair_inode_item_missing(root,
6403 BTRFS_FIRST_FREE_OBJECTID, filetype);
6404 if (ret)
6405 goto out;
6406 err &= ~INODE_ITEM_MISSING;
6408 out:
6409 if (ret)
6410 error("fail to repair first inode");
6411 btrfs_release_path(&path);
6412 return err;
6416 * check first root dir's inode_item and inode_ref
6418 * returns 0 means no error
6419 * returns >0 means error
6420 * returns <0 means fatal error
6422 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
6424 struct btrfs_path path;
6425 struct btrfs_key key;
6426 struct btrfs_inode_item *ii;
6427 u64 index;
6428 u32 mode;
6429 int err = 0;
6430 int ret;
6432 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6433 key.type = BTRFS_INODE_ITEM_KEY;
6434 key.offset = 0;
6436 /* For root being dropped, we don't need to check first inode */
6437 if (btrfs_root_refs(&root->root_item) == 0 &&
6438 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
6439 BTRFS_FIRST_FREE_OBJECTID)
6440 return 0;
6442 btrfs_init_path(&path);
6443 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6444 if (ret < 0)
6445 goto out;
6446 if (ret > 0) {
6447 ret = 0;
6448 err |= INODE_ITEM_MISSING;
6449 } else {
6450 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
6451 struct btrfs_inode_item);
6452 mode = btrfs_inode_mode(path.nodes[0], ii);
6453 if (imode_to_type(mode) != BTRFS_FT_DIR)
6454 err |= INODE_ITEM_MISMATCH;
6457 /* lookup first inode ref */
6458 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6459 key.type = BTRFS_INODE_REF_KEY;
6460 /* special index value */
6461 index = 0;
6463 ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
6464 if (ret < 0)
6465 goto out;
6466 err |= ret;
6468 out:
6469 btrfs_release_path(&path);
6471 if (err && repair)
6472 err = repair_fs_first_inode(root, err);
6474 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
6475 error("root dir INODE_ITEM is %s",
6476 err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
6477 if (err & INODE_REF_MISSING)
6478 error("root dir INODE_REF is missing");
6480 return ret < 0 ? ret : err;
6483 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6484 u64 parent, u64 root)
6486 struct rb_node *node;
6487 struct tree_backref *back = NULL;
6488 struct tree_backref match = {
6489 .node = {
6490 .is_data = 0,
6494 if (parent) {
6495 match.parent = parent;
6496 match.node.full_backref = 1;
6497 } else {
6498 match.root = root;
6501 node = rb_search(&rec->backref_tree, &match.node.node,
6502 (rb_compare_keys)compare_extent_backref, NULL);
6503 if (node)
6504 back = to_tree_backref(rb_node_to_extent_backref(node));
6506 return back;
6509 static struct data_backref *find_data_backref(struct extent_record *rec,
6510 u64 parent, u64 root,
6511 u64 owner, u64 offset,
6512 int found_ref,
6513 u64 disk_bytenr, u64 bytes)
6515 struct rb_node *node;
6516 struct data_backref *back = NULL;
6517 struct data_backref match = {
6518 .node = {
6519 .is_data = 1,
6521 .owner = owner,
6522 .offset = offset,
6523 .bytes = bytes,
6524 .found_ref = found_ref,
6525 .disk_bytenr = disk_bytenr,
6528 if (parent) {
6529 match.parent = parent;
6530 match.node.full_backref = 1;
6531 } else {
6532 match.root = root;
6535 node = rb_search(&rec->backref_tree, &match.node.node,
6536 (rb_compare_keys)compare_extent_backref, NULL);
6537 if (node)
6538 back = to_data_backref(rb_node_to_extent_backref(node));
6540 return back;
6543 * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
6544 * blocks and integrity of fs tree items.
6546 * @root: the root of the tree to be checked.
6547 * @ext_ref feature EXTENDED_IREF is enable or not.
6548 * @account if NOT 0 means check the tree (including tree)'s treeblocks.
6549 * otherwise means check fs tree(s) items relationship and
6550 * @root MUST be a fs tree root.
6551 * Returns 0 represents OK.
6552 * Returns not 0 represents error.
6554 static int check_btrfs_root(struct btrfs_trans_handle *trans,
6555 struct btrfs_root *root, unsigned int ext_ref,
6556 int check_all)
6559 struct btrfs_path path;
6560 struct node_refs nrefs;
6561 struct btrfs_root_item *root_item = &root->root_item;
6562 int ret;
6563 int level;
6564 int err = 0;
6566 memset(&nrefs, 0, sizeof(nrefs));
6567 if (!check_all) {
6569 * We need to manually check the first inode item (256)
6570 * As the following traversal function will only start from
6571 * the first inode item in the leaf, if inode item (256) is
6572 * missing we will skip it forever.
6574 ret = check_fs_first_inode(root, ext_ref);
6575 if (ret < 0)
6576 return ret;
6580 level = btrfs_header_level(root->node);
6581 btrfs_init_path(&path);
6583 if (btrfs_root_refs(root_item) > 0 ||
6584 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6585 path.nodes[level] = root->node;
6586 path.slots[level] = 0;
6587 extent_buffer_get(root->node);
6588 } else {
6589 struct btrfs_key key;
6591 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6592 level = root_item->drop_level;
6593 path.lowest_level = level;
6594 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6595 if (ret < 0)
6596 goto out;
6597 ret = 0;
6600 while (1) {
6601 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6602 ext_ref, check_all);
6604 err |= !!ret;
6606 /* if ret is negative, walk shall stop */
6607 if (ret < 0) {
6608 ret = err;
6609 break;
6612 ret = walk_up_tree_v2(root, &path, &level);
6613 if (ret != 0) {
6614 /* Normal exit, reset ret to err */
6615 ret = err;
6616 break;
6620 out:
6621 btrfs_release_path(&path);
6622 return ret;
6625 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info);
6628 * Iterate all items in the tree and call check_inode_item() to check.
6630 * @root: the root of the tree to be checked.
6631 * @ext_ref: the EXTENDED_IREF feature
6633 * Return 0 if no error found.
6634 * Return <0 for error.
6636 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6638 reset_cached_block_groups(root->fs_info);
6639 return check_btrfs_root(NULL, root, ext_ref, 0);
6643 * Find the relative ref for root_ref and root_backref.
6645 * @root: the root of the root tree.
6646 * @ref_key: the key of the root ref.
6648 * Return 0 if no error occurred.
6650 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6651 struct extent_buffer *node, int slot)
6653 struct btrfs_path path;
6654 struct btrfs_key key;
6655 struct btrfs_root_ref *ref;
6656 struct btrfs_root_ref *backref;
6657 char ref_name[BTRFS_NAME_LEN] = {0};
6658 char backref_name[BTRFS_NAME_LEN] = {0};
6659 u64 ref_dirid;
6660 u64 ref_seq;
6661 u32 ref_namelen;
6662 u64 backref_dirid;
6663 u64 backref_seq;
6664 u32 backref_namelen;
6665 u32 len;
6666 int ret;
6667 int err = 0;
6669 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6670 ref_dirid = btrfs_root_ref_dirid(node, ref);
6671 ref_seq = btrfs_root_ref_sequence(node, ref);
6672 ref_namelen = btrfs_root_ref_name_len(node, ref);
6674 if (ref_namelen <= BTRFS_NAME_LEN) {
6675 len = ref_namelen;
6676 } else {
6677 len = BTRFS_NAME_LEN;
6678 warning("%s[%llu %llu] ref_name too long",
6679 ref_key->type == BTRFS_ROOT_REF_KEY ?
6680 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6681 ref_key->offset);
6683 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6685 /* Find relative root_ref */
6686 key.objectid = ref_key->offset;
6687 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6688 key.offset = ref_key->objectid;
6690 btrfs_init_path(&path);
6691 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6692 if (ret) {
6693 err |= ROOT_REF_MISSING;
6694 error("%s[%llu %llu] couldn't find relative ref",
6695 ref_key->type == BTRFS_ROOT_REF_KEY ?
6696 "ROOT_REF" : "ROOT_BACKREF",
6697 ref_key->objectid, ref_key->offset);
6698 goto out;
6701 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6702 struct btrfs_root_ref);
6703 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6704 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6705 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6707 if (backref_namelen <= BTRFS_NAME_LEN) {
6708 len = backref_namelen;
6709 } else {
6710 len = BTRFS_NAME_LEN;
6711 warning("%s[%llu %llu] ref_name too long",
6712 key.type == BTRFS_ROOT_REF_KEY ?
6713 "ROOT_REF" : "ROOT_BACKREF",
6714 key.objectid, key.offset);
6716 read_extent_buffer(path.nodes[0], backref_name,
6717 (unsigned long)(backref + 1), len);
6719 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6720 ref_namelen != backref_namelen ||
6721 strncmp(ref_name, backref_name, len)) {
6722 err |= ROOT_REF_MISMATCH;
6723 error("%s[%llu %llu] mismatch relative ref",
6724 ref_key->type == BTRFS_ROOT_REF_KEY ?
6725 "ROOT_REF" : "ROOT_BACKREF",
6726 ref_key->objectid, ref_key->offset);
6728 out:
6729 btrfs_release_path(&path);
6730 return err;
6734 * Check all fs/file tree in low_memory mode.
6736 * 1. for fs tree root item, call check_fs_root_v2()
6737 * 2. for fs tree root ref/backref, call check_root_ref()
6739 * Return 0 if no error occurred.
6741 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6743 struct btrfs_root *tree_root = fs_info->tree_root;
6744 struct btrfs_root *cur_root = NULL;
6745 struct btrfs_path path;
6746 struct btrfs_key key;
6747 struct extent_buffer *node;
6748 unsigned int ext_ref;
6749 int slot;
6750 int ret;
6751 int err = 0;
6753 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6755 btrfs_init_path(&path);
6756 key.objectid = BTRFS_FS_TREE_OBJECTID;
6757 key.offset = 0;
6758 key.type = BTRFS_ROOT_ITEM_KEY;
6760 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6761 if (ret < 0) {
6762 err = ret;
6763 goto out;
6764 } else if (ret > 0) {
6765 err = -ENOENT;
6766 goto out;
6769 while (1) {
6770 node = path.nodes[0];
6771 slot = path.slots[0];
6772 btrfs_item_key_to_cpu(node, &key, slot);
6773 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6774 goto out;
6775 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6776 fs_root_objectid(key.objectid)) {
6777 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6778 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6779 &key);
6780 } else {
6781 key.offset = (u64)-1;
6782 cur_root = btrfs_read_fs_root(fs_info, &key);
6785 if (IS_ERR(cur_root)) {
6786 error("Fail to read fs/subvol tree: %lld",
6787 key.objectid);
6788 err = -EIO;
6789 goto next;
6792 ret = check_fs_root_v2(cur_root, ext_ref);
6793 err |= ret;
6795 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6796 btrfs_free_fs_root(cur_root);
6797 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6798 key.type == BTRFS_ROOT_BACKREF_KEY) {
6799 ret = check_root_ref(tree_root, &key, node, slot);
6800 err |= ret;
6802 next:
6803 ret = btrfs_next_item(tree_root, &path);
6804 if (ret > 0)
6805 goto out;
6806 if (ret < 0) {
6807 err = ret;
6808 goto out;
6812 out:
6813 btrfs_release_path(&path);
6814 return err;
6817 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6818 struct cache_tree *root_cache)
6820 int ret;
6822 if (!ctx.progress_enabled)
6823 fprintf(stderr, "checking fs roots\n");
6824 if (check_mode == CHECK_MODE_LOWMEM)
6825 ret = check_fs_roots_v2(fs_info);
6826 else
6827 ret = check_fs_roots(fs_info, root_cache);
6829 return ret;
6832 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6834 struct extent_backref *back, *tmp;
6835 struct tree_backref *tback;
6836 struct data_backref *dback;
6837 u64 found = 0;
6838 int err = 0;
6840 rbtree_postorder_for_each_entry_safe(back, tmp,
6841 &rec->backref_tree, node) {
6842 if (!back->found_extent_tree) {
6843 err = 1;
6844 if (!print_errs)
6845 goto out;
6846 if (back->is_data) {
6847 dback = to_data_backref(back);
6848 fprintf(stderr, "Data backref %llu %s %llu"
6849 " owner %llu offset %llu num_refs %lu"
6850 " not found in extent tree\n",
6851 (unsigned long long)rec->start,
6852 back->full_backref ?
6853 "parent" : "root",
6854 back->full_backref ?
6855 (unsigned long long)dback->parent:
6856 (unsigned long long)dback->root,
6857 (unsigned long long)dback->owner,
6858 (unsigned long long)dback->offset,
6859 (unsigned long)dback->num_refs);
6860 } else {
6861 tback = to_tree_backref(back);
6862 fprintf(stderr, "Tree backref %llu parent %llu"
6863 " root %llu not found in extent tree\n",
6864 (unsigned long long)rec->start,
6865 (unsigned long long)tback->parent,
6866 (unsigned long long)tback->root);
6869 if (!back->is_data && !back->found_ref) {
6870 err = 1;
6871 if (!print_errs)
6872 goto out;
6873 tback = to_tree_backref(back);
6874 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6875 (unsigned long long)rec->start,
6876 back->full_backref ? "parent" : "root",
6877 back->full_backref ?
6878 (unsigned long long)tback->parent :
6879 (unsigned long long)tback->root, back);
6881 if (back->is_data) {
6882 dback = to_data_backref(back);
6883 if (dback->found_ref != dback->num_refs) {
6884 err = 1;
6885 if (!print_errs)
6886 goto out;
6887 fprintf(stderr, "Incorrect local backref count"
6888 " on %llu %s %llu owner %llu"
6889 " offset %llu found %u wanted %u back %p\n",
6890 (unsigned long long)rec->start,
6891 back->full_backref ?
6892 "parent" : "root",
6893 back->full_backref ?
6894 (unsigned long long)dback->parent:
6895 (unsigned long long)dback->root,
6896 (unsigned long long)dback->owner,
6897 (unsigned long long)dback->offset,
6898 dback->found_ref, dback->num_refs, back);
6900 if (dback->disk_bytenr != rec->start) {
6901 err = 1;
6902 if (!print_errs)
6903 goto out;
6904 fprintf(stderr, "Backref disk bytenr does not"
6905 " match extent record, bytenr=%llu, "
6906 "ref bytenr=%llu\n",
6907 (unsigned long long)rec->start,
6908 (unsigned long long)dback->disk_bytenr);
6911 if (dback->bytes != rec->nr) {
6912 err = 1;
6913 if (!print_errs)
6914 goto out;
6915 fprintf(stderr, "Backref bytes do not match "
6916 "extent backref, bytenr=%llu, ref "
6917 "bytes=%llu, backref bytes=%llu\n",
6918 (unsigned long long)rec->start,
6919 (unsigned long long)rec->nr,
6920 (unsigned long long)dback->bytes);
6923 if (!back->is_data) {
6924 found += 1;
6925 } else {
6926 dback = to_data_backref(back);
6927 found += dback->found_ref;
6930 if (found != rec->refs) {
6931 err = 1;
6932 if (!print_errs)
6933 goto out;
6934 fprintf(stderr, "Incorrect global backref count "
6935 "on %llu found %llu wanted %llu\n",
6936 (unsigned long long)rec->start,
6937 (unsigned long long)found,
6938 (unsigned long long)rec->refs);
6940 out:
6941 return err;
6944 static void __free_one_backref(struct rb_node *node)
6946 struct extent_backref *back = rb_node_to_extent_backref(node);
6948 free(back);
6951 static void free_all_extent_backrefs(struct extent_record *rec)
6953 rb_free_nodes(&rec->backref_tree, __free_one_backref);
6956 static void free_extent_record_cache(struct cache_tree *extent_cache)
6958 struct cache_extent *cache;
6959 struct extent_record *rec;
6961 while (1) {
6962 cache = first_cache_extent(extent_cache);
6963 if (!cache)
6964 break;
6965 rec = container_of(cache, struct extent_record, cache);
6966 remove_cache_extent(extent_cache, cache);
6967 free_all_extent_backrefs(rec);
6968 free(rec);
6972 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6973 struct extent_record *rec)
6975 if (rec->content_checked && rec->owner_ref_checked &&
6976 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6977 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6978 !rec->bad_full_backref && !rec->crossing_stripes &&
6979 !rec->wrong_chunk_type) {
6980 remove_cache_extent(extent_cache, &rec->cache);
6981 free_all_extent_backrefs(rec);
6982 list_del_init(&rec->list);
6983 free(rec);
6985 return 0;
6988 static int check_owner_ref(struct btrfs_root *root,
6989 struct extent_record *rec,
6990 struct extent_buffer *buf)
6992 struct extent_backref *node, *tmp;
6993 struct tree_backref *back;
6994 struct btrfs_root *ref_root;
6995 struct btrfs_key key;
6996 struct btrfs_path path;
6997 struct extent_buffer *parent;
6998 int level;
6999 int found = 0;
7000 int ret;
7002 rbtree_postorder_for_each_entry_safe(node, tmp,
7003 &rec->backref_tree, node) {
7004 if (node->is_data)
7005 continue;
7006 if (!node->found_ref)
7007 continue;
7008 if (node->full_backref)
7009 continue;
7010 back = to_tree_backref(node);
7011 if (btrfs_header_owner(buf) == back->root)
7012 return 0;
7014 BUG_ON(rec->is_root);
7016 /* try to find the block by search corresponding fs tree */
7017 key.objectid = btrfs_header_owner(buf);
7018 key.type = BTRFS_ROOT_ITEM_KEY;
7019 key.offset = (u64)-1;
7021 ref_root = btrfs_read_fs_root(root->fs_info, &key);
7022 if (IS_ERR(ref_root))
7023 return 1;
7025 level = btrfs_header_level(buf);
7026 if (level == 0)
7027 btrfs_item_key_to_cpu(buf, &key, 0);
7028 else
7029 btrfs_node_key_to_cpu(buf, &key, 0);
7031 btrfs_init_path(&path);
7032 path.lowest_level = level + 1;
7033 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
7034 if (ret < 0)
7035 return 0;
7037 parent = path.nodes[level + 1];
7038 if (parent && buf->start == btrfs_node_blockptr(parent,
7039 path.slots[level + 1]))
7040 found = 1;
7042 btrfs_release_path(&path);
7043 return found ? 0 : 1;
7046 static int is_extent_tree_record(struct extent_record *rec)
7048 struct extent_backref *node, *tmp;
7049 struct tree_backref *back;
7050 int is_extent = 0;
7052 rbtree_postorder_for_each_entry_safe(node, tmp,
7053 &rec->backref_tree, node) {
7054 if (node->is_data)
7055 return 0;
7056 back = to_tree_backref(node);
7057 if (node->full_backref)
7058 return 0;
7059 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
7060 is_extent = 1;
7062 return is_extent;
7066 static int record_bad_block_io(struct btrfs_fs_info *info,
7067 struct cache_tree *extent_cache,
7068 u64 start, u64 len)
7070 struct extent_record *rec;
7071 struct cache_extent *cache;
7072 struct btrfs_key key;
7074 cache = lookup_cache_extent(extent_cache, start, len);
7075 if (!cache)
7076 return 0;
7078 rec = container_of(cache, struct extent_record, cache);
7079 if (!is_extent_tree_record(rec))
7080 return 0;
7082 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
7083 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
7086 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
7087 struct extent_buffer *buf, int slot)
7089 if (btrfs_header_level(buf)) {
7090 struct btrfs_key_ptr ptr1, ptr2;
7092 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
7093 sizeof(struct btrfs_key_ptr));
7094 read_extent_buffer(buf, &ptr2,
7095 btrfs_node_key_ptr_offset(slot + 1),
7096 sizeof(struct btrfs_key_ptr));
7097 write_extent_buffer(buf, &ptr1,
7098 btrfs_node_key_ptr_offset(slot + 1),
7099 sizeof(struct btrfs_key_ptr));
7100 write_extent_buffer(buf, &ptr2,
7101 btrfs_node_key_ptr_offset(slot),
7102 sizeof(struct btrfs_key_ptr));
7103 if (slot == 0) {
7104 struct btrfs_disk_key key;
7105 btrfs_node_key(buf, &key, 0);
7106 btrfs_fixup_low_keys(root, path, &key,
7107 btrfs_header_level(buf) + 1);
7109 } else {
7110 struct btrfs_item *item1, *item2;
7111 struct btrfs_key k1, k2;
7112 char *item1_data, *item2_data;
7113 u32 item1_offset, item2_offset, item1_size, item2_size;
7115 item1 = btrfs_item_nr(slot);
7116 item2 = btrfs_item_nr(slot + 1);
7117 btrfs_item_key_to_cpu(buf, &k1, slot);
7118 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
7119 item1_offset = btrfs_item_offset(buf, item1);
7120 item2_offset = btrfs_item_offset(buf, item2);
7121 item1_size = btrfs_item_size(buf, item1);
7122 item2_size = btrfs_item_size(buf, item2);
7124 item1_data = malloc(item1_size);
7125 if (!item1_data)
7126 return -ENOMEM;
7127 item2_data = malloc(item2_size);
7128 if (!item2_data) {
7129 free(item1_data);
7130 return -ENOMEM;
7133 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
7134 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
7136 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
7137 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
7138 free(item1_data);
7139 free(item2_data);
7141 btrfs_set_item_offset(buf, item1, item2_offset);
7142 btrfs_set_item_offset(buf, item2, item1_offset);
7143 btrfs_set_item_size(buf, item1, item2_size);
7144 btrfs_set_item_size(buf, item2, item1_size);
7146 path->slots[0] = slot;
7147 btrfs_set_item_key_unsafe(root, path, &k2);
7148 path->slots[0] = slot + 1;
7149 btrfs_set_item_key_unsafe(root, path, &k1);
7151 return 0;
7154 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
7156 struct extent_buffer *buf;
7157 struct btrfs_key k1, k2;
7158 int i;
7159 int level = path->lowest_level;
7160 int ret = -EIO;
7162 buf = path->nodes[level];
7163 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
7164 if (level) {
7165 btrfs_node_key_to_cpu(buf, &k1, i);
7166 btrfs_node_key_to_cpu(buf, &k2, i + 1);
7167 } else {
7168 btrfs_item_key_to_cpu(buf, &k1, i);
7169 btrfs_item_key_to_cpu(buf, &k2, i + 1);
7171 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
7172 continue;
7173 ret = swap_values(root, path, buf, i);
7174 if (ret)
7175 break;
7176 btrfs_mark_buffer_dirty(buf);
7177 i = 0;
7179 return ret;
7182 static int delete_bogus_item(struct btrfs_root *root,
7183 struct btrfs_path *path,
7184 struct extent_buffer *buf, int slot)
7186 struct btrfs_key key;
7187 int nritems = btrfs_header_nritems(buf);
7189 btrfs_item_key_to_cpu(buf, &key, slot);
7191 /* These are all the keys we can deal with missing. */
7192 if (key.type != BTRFS_DIR_INDEX_KEY &&
7193 key.type != BTRFS_EXTENT_ITEM_KEY &&
7194 key.type != BTRFS_METADATA_ITEM_KEY &&
7195 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7196 key.type != BTRFS_EXTENT_DATA_REF_KEY)
7197 return -1;
7199 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
7200 (unsigned long long)key.objectid, key.type,
7201 (unsigned long long)key.offset, slot, buf->start);
7202 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
7203 btrfs_item_nr_offset(slot + 1),
7204 sizeof(struct btrfs_item) *
7205 (nritems - slot - 1));
7206 btrfs_set_header_nritems(buf, nritems - 1);
7207 if (slot == 0) {
7208 struct btrfs_disk_key disk_key;
7210 btrfs_item_key(buf, &disk_key, 0);
7211 btrfs_fixup_low_keys(root, path, &disk_key, 1);
7213 btrfs_mark_buffer_dirty(buf);
7214 return 0;
7217 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
7219 struct extent_buffer *buf;
7220 int i;
7221 int ret = 0;
7223 /* We should only get this for leaves */
7224 BUG_ON(path->lowest_level);
7225 buf = path->nodes[0];
7226 again:
7227 for (i = 0; i < btrfs_header_nritems(buf); i++) {
7228 unsigned int shift = 0, offset;
7230 if (i == 0 && btrfs_item_end_nr(buf, i) !=
7231 BTRFS_LEAF_DATA_SIZE(root)) {
7232 if (btrfs_item_end_nr(buf, i) >
7233 BTRFS_LEAF_DATA_SIZE(root)) {
7234 ret = delete_bogus_item(root, path, buf, i);
7235 if (!ret)
7236 goto again;
7237 fprintf(stderr, "item is off the end of the "
7238 "leaf, can't fix\n");
7239 ret = -EIO;
7240 break;
7242 shift = BTRFS_LEAF_DATA_SIZE(root) -
7243 btrfs_item_end_nr(buf, i);
7244 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
7245 btrfs_item_offset_nr(buf, i - 1)) {
7246 if (btrfs_item_end_nr(buf, i) >
7247 btrfs_item_offset_nr(buf, i - 1)) {
7248 ret = delete_bogus_item(root, path, buf, i);
7249 if (!ret)
7250 goto again;
7251 fprintf(stderr, "items overlap, can't fix\n");
7252 ret = -EIO;
7253 break;
7255 shift = btrfs_item_offset_nr(buf, i - 1) -
7256 btrfs_item_end_nr(buf, i);
7258 if (!shift)
7259 continue;
7261 printf("Shifting item nr %d by %u bytes in block %llu\n",
7262 i, shift, (unsigned long long)buf->start);
7263 offset = btrfs_item_offset_nr(buf, i);
7264 memmove_extent_buffer(buf,
7265 btrfs_leaf_data(buf) + offset + shift,
7266 btrfs_leaf_data(buf) + offset,
7267 btrfs_item_size_nr(buf, i));
7268 btrfs_set_item_offset(buf, btrfs_item_nr(i),
7269 offset + shift);
7270 btrfs_mark_buffer_dirty(buf);
7274 * We may have moved things, in which case we want to exit so we don't
7275 * write those changes out. Once we have proper abort functionality in
7276 * progs this can be changed to something nicer.
7278 BUG_ON(ret);
7279 return ret;
7283 * Attempt to fix basic block failures. If we can't fix it for whatever reason
7284 * then just return -EIO.
7286 static int try_to_fix_bad_block(struct btrfs_root *root,
7287 struct extent_buffer *buf,
7288 enum btrfs_tree_block_status status)
7290 struct btrfs_trans_handle *trans;
7291 struct ulist *roots;
7292 struct ulist_node *node;
7293 struct btrfs_root *search_root;
7294 struct btrfs_path path;
7295 struct ulist_iterator iter;
7296 struct btrfs_key root_key, key;
7297 int ret;
7299 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
7300 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7301 return -EIO;
7303 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
7304 if (ret)
7305 return -EIO;
7307 btrfs_init_path(&path);
7308 ULIST_ITER_INIT(&iter);
7309 while ((node = ulist_next(roots, &iter))) {
7310 root_key.objectid = node->val;
7311 root_key.type = BTRFS_ROOT_ITEM_KEY;
7312 root_key.offset = (u64)-1;
7314 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
7315 if (IS_ERR(root)) {
7316 ret = -EIO;
7317 break;
7321 trans = btrfs_start_transaction(search_root, 0);
7322 if (IS_ERR(trans)) {
7323 ret = PTR_ERR(trans);
7324 break;
7327 path.lowest_level = btrfs_header_level(buf);
7328 path.skip_check_block = 1;
7329 if (path.lowest_level)
7330 btrfs_node_key_to_cpu(buf, &key, 0);
7331 else
7332 btrfs_item_key_to_cpu(buf, &key, 0);
7333 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
7334 if (ret) {
7335 ret = -EIO;
7336 btrfs_commit_transaction(trans, search_root);
7337 break;
7339 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
7340 ret = fix_key_order(search_root, &path);
7341 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7342 ret = fix_item_offset(search_root, &path);
7343 if (ret) {
7344 btrfs_commit_transaction(trans, search_root);
7345 break;
7347 btrfs_release_path(&path);
7348 btrfs_commit_transaction(trans, search_root);
7350 ulist_free(roots);
7351 btrfs_release_path(&path);
7352 return ret;
7355 static int check_block(struct btrfs_root *root,
7356 struct cache_tree *extent_cache,
7357 struct extent_buffer *buf, u64 flags)
7359 struct extent_record *rec;
7360 struct cache_extent *cache;
7361 struct btrfs_key key;
7362 enum btrfs_tree_block_status status;
7363 int ret = 0;
7364 int level;
7366 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
7367 if (!cache)
7368 return 1;
7369 rec = container_of(cache, struct extent_record, cache);
7370 rec->generation = btrfs_header_generation(buf);
7372 level = btrfs_header_level(buf);
7373 if (btrfs_header_nritems(buf) > 0) {
7375 if (level == 0)
7376 btrfs_item_key_to_cpu(buf, &key, 0);
7377 else
7378 btrfs_node_key_to_cpu(buf, &key, 0);
7380 rec->info_objectid = key.objectid;
7382 rec->info_level = level;
7384 if (btrfs_is_leaf(buf))
7385 status = btrfs_check_leaf(root, &rec->parent_key, buf);
7386 else
7387 status = btrfs_check_node(root, &rec->parent_key, buf);
7389 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7390 if (repair)
7391 status = try_to_fix_bad_block(root, buf, status);
7392 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7393 ret = -EIO;
7394 fprintf(stderr, "bad block %llu\n",
7395 (unsigned long long)buf->start);
7396 } else {
7398 * Signal to callers we need to start the scan over
7399 * again since we'll have cowed blocks.
7401 ret = -EAGAIN;
7403 } else {
7404 rec->content_checked = 1;
7405 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7406 rec->owner_ref_checked = 1;
7407 else {
7408 ret = check_owner_ref(root, rec, buf);
7409 if (!ret)
7410 rec->owner_ref_checked = 1;
7413 if (!ret)
7414 maybe_free_extent_rec(extent_cache, rec);
7415 return ret;
7418 #if 0
7419 static struct tree_backref *find_tree_backref(struct extent_record *rec,
7420 u64 parent, u64 root)
7422 struct list_head *cur = rec->backrefs.next;
7423 struct extent_backref *node;
7424 struct tree_backref *back;
7426 while(cur != &rec->backrefs) {
7427 node = to_extent_backref(cur);
7428 cur = cur->next;
7429 if (node->is_data)
7430 continue;
7431 back = to_tree_backref(node);
7432 if (parent > 0) {
7433 if (!node->full_backref)
7434 continue;
7435 if (parent == back->parent)
7436 return back;
7437 } else {
7438 if (node->full_backref)
7439 continue;
7440 if (back->root == root)
7441 return back;
7444 return NULL;
7446 #endif
7448 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
7449 u64 parent, u64 root)
7451 struct tree_backref *ref = malloc(sizeof(*ref));
7453 if (!ref)
7454 return NULL;
7455 memset(&ref->node, 0, sizeof(ref->node));
7456 if (parent > 0) {
7457 ref->parent = parent;
7458 ref->node.full_backref = 1;
7459 } else {
7460 ref->root = root;
7461 ref->node.full_backref = 0;
7464 return ref;
7467 #if 0
7468 static struct data_backref *find_data_backref(struct extent_record *rec,
7469 u64 parent, u64 root,
7470 u64 owner, u64 offset,
7471 int found_ref,
7472 u64 disk_bytenr, u64 bytes)
7474 struct list_head *cur = rec->backrefs.next;
7475 struct extent_backref *node;
7476 struct data_backref *back;
7478 while(cur != &rec->backrefs) {
7479 node = to_extent_backref(cur);
7480 cur = cur->next;
7481 if (!node->is_data)
7482 continue;
7483 back = to_data_backref(node);
7484 if (parent > 0) {
7485 if (!node->full_backref)
7486 continue;
7487 if (parent == back->parent)
7488 return back;
7489 } else {
7490 if (node->full_backref)
7491 continue;
7492 if (back->root == root && back->owner == owner &&
7493 back->offset == offset) {
7494 if (found_ref && node->found_ref &&
7495 (back->bytes != bytes ||
7496 back->disk_bytenr != disk_bytenr))
7497 continue;
7498 return back;
7502 return NULL;
7504 #endif
7506 static struct data_backref *alloc_data_backref(struct extent_record *rec,
7507 u64 parent, u64 root,
7508 u64 owner, u64 offset,
7509 u64 max_size)
7511 struct data_backref *ref = malloc(sizeof(*ref));
7513 if (!ref)
7514 return NULL;
7515 memset(&ref->node, 0, sizeof(ref->node));
7516 ref->node.is_data = 1;
7518 if (parent > 0) {
7519 ref->parent = parent;
7520 ref->owner = 0;
7521 ref->offset = 0;
7522 ref->node.full_backref = 1;
7523 } else {
7524 ref->root = root;
7525 ref->owner = owner;
7526 ref->offset = offset;
7527 ref->node.full_backref = 0;
7529 ref->bytes = max_size;
7530 ref->found_ref = 0;
7531 ref->num_refs = 0;
7532 if (max_size > rec->max_size)
7533 rec->max_size = max_size;
7534 return ref;
7537 /* Check if the type of extent matches with its chunk */
7538 static void check_extent_type(struct extent_record *rec)
7540 struct btrfs_block_group_cache *bg_cache;
7542 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7543 if (!bg_cache)
7544 return;
7546 /* data extent, check chunk directly*/
7547 if (!rec->metadata) {
7548 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7549 rec->wrong_chunk_type = 1;
7550 return;
7553 /* metadata extent, check the obvious case first */
7554 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7555 BTRFS_BLOCK_GROUP_METADATA))) {
7556 rec->wrong_chunk_type = 1;
7557 return;
7561 * Check SYSTEM extent, as it's also marked as metadata, we can only
7562 * make sure it's a SYSTEM extent by its backref
7564 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7565 struct extent_backref *node;
7566 struct tree_backref *tback;
7567 u64 bg_type;
7569 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7570 if (node->is_data) {
7571 /* tree block shouldn't have data backref */
7572 rec->wrong_chunk_type = 1;
7573 return;
7575 tback = container_of(node, struct tree_backref, node);
7577 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7578 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7579 else
7580 bg_type = BTRFS_BLOCK_GROUP_METADATA;
7581 if (!(bg_cache->flags & bg_type))
7582 rec->wrong_chunk_type = 1;
7587 * Allocate a new extent record, fill default values from @tmpl and insert int
7588 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7589 * the cache, otherwise it fails.
7591 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7592 struct extent_record *tmpl)
7594 struct extent_record *rec;
7595 int ret = 0;
7597 BUG_ON(tmpl->max_size == 0);
7598 rec = malloc(sizeof(*rec));
7599 if (!rec)
7600 return -ENOMEM;
7601 rec->start = tmpl->start;
7602 rec->max_size = tmpl->max_size;
7603 rec->nr = max(tmpl->nr, tmpl->max_size);
7604 rec->found_rec = tmpl->found_rec;
7605 rec->content_checked = tmpl->content_checked;
7606 rec->owner_ref_checked = tmpl->owner_ref_checked;
7607 rec->num_duplicates = 0;
7608 rec->metadata = tmpl->metadata;
7609 rec->flag_block_full_backref = FLAG_UNSET;
7610 rec->bad_full_backref = 0;
7611 rec->crossing_stripes = 0;
7612 rec->wrong_chunk_type = 0;
7613 rec->is_root = tmpl->is_root;
7614 rec->refs = tmpl->refs;
7615 rec->extent_item_refs = tmpl->extent_item_refs;
7616 rec->parent_generation = tmpl->parent_generation;
7617 INIT_LIST_HEAD(&rec->backrefs);
7618 INIT_LIST_HEAD(&rec->dups);
7619 INIT_LIST_HEAD(&rec->list);
7620 rec->backref_tree = RB_ROOT;
7621 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7622 rec->cache.start = tmpl->start;
7623 rec->cache.size = tmpl->nr;
7624 ret = insert_cache_extent(extent_cache, &rec->cache);
7625 if (ret) {
7626 free(rec);
7627 return ret;
7629 bytes_used += rec->nr;
7631 if (tmpl->metadata)
7632 rec->crossing_stripes = check_crossing_stripes(global_info,
7633 rec->start, global_info->nodesize);
7634 check_extent_type(rec);
7635 return ret;
7639 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7640 * some are hints:
7641 * - refs - if found, increase refs
7642 * - is_root - if found, set
7643 * - content_checked - if found, set
7644 * - owner_ref_checked - if found, set
7646 * If not found, create a new one, initialize and insert.
7648 static int add_extent_rec(struct cache_tree *extent_cache,
7649 struct extent_record *tmpl)
7651 struct extent_record *rec;
7652 struct cache_extent *cache;
7653 int ret = 0;
7654 int dup = 0;
7656 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7657 if (cache) {
7658 rec = container_of(cache, struct extent_record, cache);
7659 if (tmpl->refs)
7660 rec->refs++;
7661 if (rec->nr == 1)
7662 rec->nr = max(tmpl->nr, tmpl->max_size);
7665 * We need to make sure to reset nr to whatever the extent
7666 * record says was the real size, this way we can compare it to
7667 * the backrefs.
7669 if (tmpl->found_rec) {
7670 if (tmpl->start != rec->start || rec->found_rec) {
7671 struct extent_record *tmp;
7673 dup = 1;
7674 if (list_empty(&rec->list))
7675 list_add_tail(&rec->list,
7676 &duplicate_extents);
7679 * We have to do this song and dance in case we
7680 * find an extent record that falls inside of
7681 * our current extent record but does not have
7682 * the same objectid.
7684 tmp = malloc(sizeof(*tmp));
7685 if (!tmp)
7686 return -ENOMEM;
7687 tmp->start = tmpl->start;
7688 tmp->max_size = tmpl->max_size;
7689 tmp->nr = tmpl->nr;
7690 tmp->found_rec = 1;
7691 tmp->metadata = tmpl->metadata;
7692 tmp->extent_item_refs = tmpl->extent_item_refs;
7693 INIT_LIST_HEAD(&tmp->list);
7694 list_add_tail(&tmp->list, &rec->dups);
7695 rec->num_duplicates++;
7696 } else {
7697 rec->nr = tmpl->nr;
7698 rec->found_rec = 1;
7702 if (tmpl->extent_item_refs && !dup) {
7703 if (rec->extent_item_refs) {
7704 fprintf(stderr, "block %llu rec "
7705 "extent_item_refs %llu, passed %llu\n",
7706 (unsigned long long)tmpl->start,
7707 (unsigned long long)
7708 rec->extent_item_refs,
7709 (unsigned long long)tmpl->extent_item_refs);
7711 rec->extent_item_refs = tmpl->extent_item_refs;
7713 if (tmpl->is_root)
7714 rec->is_root = 1;
7715 if (tmpl->content_checked)
7716 rec->content_checked = 1;
7717 if (tmpl->owner_ref_checked)
7718 rec->owner_ref_checked = 1;
7719 memcpy(&rec->parent_key, &tmpl->parent_key,
7720 sizeof(tmpl->parent_key));
7721 if (tmpl->parent_generation)
7722 rec->parent_generation = tmpl->parent_generation;
7723 if (rec->max_size < tmpl->max_size)
7724 rec->max_size = tmpl->max_size;
7727 * A metadata extent can't cross stripe_len boundary, otherwise
7728 * kernel scrub won't be able to handle it.
7729 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7730 * it.
7732 if (tmpl->metadata)
7733 rec->crossing_stripes = check_crossing_stripes(
7734 global_info, rec->start,
7735 global_info->nodesize);
7736 check_extent_type(rec);
7737 maybe_free_extent_rec(extent_cache, rec);
7738 return ret;
7741 ret = add_extent_rec_nolookup(extent_cache, tmpl);
7743 return ret;
7746 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7747 u64 parent, u64 root, int found_ref)
7749 struct extent_record *rec;
7750 struct tree_backref *back;
7751 struct cache_extent *cache;
7752 int ret;
7753 bool insert = false;
7755 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7756 if (!cache) {
7757 struct extent_record tmpl;
7759 memset(&tmpl, 0, sizeof(tmpl));
7760 tmpl.start = bytenr;
7761 tmpl.nr = 1;
7762 tmpl.metadata = 1;
7763 tmpl.max_size = 1;
7765 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7766 if (ret)
7767 return ret;
7769 /* really a bug in cache_extent implement now */
7770 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7771 if (!cache)
7772 return -ENOENT;
7775 rec = container_of(cache, struct extent_record, cache);
7776 if (rec->start != bytenr) {
7778 * Several cause, from unaligned bytenr to over lapping extents
7780 return -EEXIST;
7783 back = find_tree_backref(rec, parent, root);
7784 if (!back) {
7785 back = alloc_tree_backref(rec, parent, root);
7786 if (!back)
7787 return -ENOMEM;
7788 insert = true;
7791 if (found_ref) {
7792 if (back->node.found_ref) {
7793 fprintf(stderr, "Extent back ref already exists "
7794 "for %llu parent %llu root %llu \n",
7795 (unsigned long long)bytenr,
7796 (unsigned long long)parent,
7797 (unsigned long long)root);
7799 back->node.found_ref = 1;
7800 } else {
7801 if (back->node.found_extent_tree) {
7802 fprintf(stderr, "Extent back ref already exists "
7803 "for %llu parent %llu root %llu \n",
7804 (unsigned long long)bytenr,
7805 (unsigned long long)parent,
7806 (unsigned long long)root);
7808 back->node.found_extent_tree = 1;
7810 if (insert)
7811 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7812 compare_extent_backref));
7813 check_extent_type(rec);
7814 maybe_free_extent_rec(extent_cache, rec);
7815 return 0;
7818 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7819 u64 parent, u64 root, u64 owner, u64 offset,
7820 u32 num_refs, int found_ref, u64 max_size)
7822 struct extent_record *rec;
7823 struct data_backref *back;
7824 struct cache_extent *cache;
7825 int ret;
7826 bool insert = false;
7828 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7829 if (!cache) {
7830 struct extent_record tmpl;
7832 memset(&tmpl, 0, sizeof(tmpl));
7833 tmpl.start = bytenr;
7834 tmpl.nr = 1;
7835 tmpl.max_size = max_size;
7837 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7838 if (ret)
7839 return ret;
7841 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7842 if (!cache)
7843 abort();
7846 rec = container_of(cache, struct extent_record, cache);
7847 if (rec->max_size < max_size)
7848 rec->max_size = max_size;
7851 * If found_ref is set then max_size is the real size and must match the
7852 * existing refs. So if we have already found a ref then we need to
7853 * make sure that this ref matches the existing one, otherwise we need
7854 * to add a new backref so we can notice that the backrefs don't match
7855 * and we need to figure out who is telling the truth. This is to
7856 * account for that awful fsync bug I introduced where we'd end up with
7857 * a btrfs_file_extent_item that would have its length include multiple
7858 * prealloc extents or point inside of a prealloc extent.
7860 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7861 bytenr, max_size);
7862 if (!back) {
7863 back = alloc_data_backref(rec, parent, root, owner, offset,
7864 max_size);
7865 BUG_ON(!back);
7866 insert = true;
7869 if (found_ref) {
7870 BUG_ON(num_refs != 1);
7871 if (back->node.found_ref)
7872 BUG_ON(back->bytes != max_size);
7873 back->node.found_ref = 1;
7874 back->found_ref += 1;
7875 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7876 back->bytes = max_size;
7877 back->disk_bytenr = bytenr;
7879 /* Need to reinsert if not already in the tree */
7880 if (!insert) {
7881 rb_erase(&back->node.node, &rec->backref_tree);
7882 insert = true;
7885 rec->refs += 1;
7886 rec->content_checked = 1;
7887 rec->owner_ref_checked = 1;
7888 } else {
7889 if (back->node.found_extent_tree) {
7890 fprintf(stderr, "Extent back ref already exists "
7891 "for %llu parent %llu root %llu "
7892 "owner %llu offset %llu num_refs %lu\n",
7893 (unsigned long long)bytenr,
7894 (unsigned long long)parent,
7895 (unsigned long long)root,
7896 (unsigned long long)owner,
7897 (unsigned long long)offset,
7898 (unsigned long)num_refs);
7900 back->num_refs = num_refs;
7901 back->node.found_extent_tree = 1;
7903 if (insert)
7904 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7905 compare_extent_backref));
7907 maybe_free_extent_rec(extent_cache, rec);
7908 return 0;
7911 static int add_pending(struct cache_tree *pending,
7912 struct cache_tree *seen, u64 bytenr, u32 size)
7914 int ret;
7915 ret = add_cache_extent(seen, bytenr, size);
7916 if (ret)
7917 return ret;
7918 add_cache_extent(pending, bytenr, size);
7919 return 0;
7922 static int pick_next_pending(struct cache_tree *pending,
7923 struct cache_tree *reada,
7924 struct cache_tree *nodes,
7925 u64 last, struct block_info *bits, int bits_nr,
7926 int *reada_bits)
7928 unsigned long node_start = last;
7929 struct cache_extent *cache;
7930 int ret;
7932 cache = search_cache_extent(reada, 0);
7933 if (cache) {
7934 bits[0].start = cache->start;
7935 bits[0].size = cache->size;
7936 *reada_bits = 1;
7937 return 1;
7939 *reada_bits = 0;
7940 if (node_start > 32768)
7941 node_start -= 32768;
7943 cache = search_cache_extent(nodes, node_start);
7944 if (!cache)
7945 cache = search_cache_extent(nodes, 0);
7947 if (!cache) {
7948 cache = search_cache_extent(pending, 0);
7949 if (!cache)
7950 return 0;
7951 ret = 0;
7952 do {
7953 bits[ret].start = cache->start;
7954 bits[ret].size = cache->size;
7955 cache = next_cache_extent(cache);
7956 ret++;
7957 } while (cache && ret < bits_nr);
7958 return ret;
7961 ret = 0;
7962 do {
7963 bits[ret].start = cache->start;
7964 bits[ret].size = cache->size;
7965 cache = next_cache_extent(cache);
7966 ret++;
7967 } while (cache && ret < bits_nr);
7969 if (bits_nr - ret > 8) {
7970 u64 lookup = bits[0].start + bits[0].size;
7971 struct cache_extent *next;
7972 next = search_cache_extent(pending, lookup);
7973 while(next) {
7974 if (next->start - lookup > 32768)
7975 break;
7976 bits[ret].start = next->start;
7977 bits[ret].size = next->size;
7978 lookup = next->start + next->size;
7979 ret++;
7980 if (ret == bits_nr)
7981 break;
7982 next = next_cache_extent(next);
7983 if (!next)
7984 break;
7987 return ret;
7990 static void free_chunk_record(struct cache_extent *cache)
7992 struct chunk_record *rec;
7994 rec = container_of(cache, struct chunk_record, cache);
7995 list_del_init(&rec->list);
7996 list_del_init(&rec->dextents);
7997 free(rec);
8000 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
8002 cache_tree_free_extents(chunk_cache, free_chunk_record);
8005 static void free_device_record(struct rb_node *node)
8007 struct device_record *rec;
8009 rec = container_of(node, struct device_record, node);
8010 free(rec);
8013 FREE_RB_BASED_TREE(device_cache, free_device_record);
8015 int insert_block_group_record(struct block_group_tree *tree,
8016 struct block_group_record *bg_rec)
8018 int ret;
8020 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
8021 if (ret)
8022 return ret;
8024 list_add_tail(&bg_rec->list, &tree->block_groups);
8025 return 0;
8028 static void free_block_group_record(struct cache_extent *cache)
8030 struct block_group_record *rec;
8032 rec = container_of(cache, struct block_group_record, cache);
8033 list_del_init(&rec->list);
8034 free(rec);
8037 void free_block_group_tree(struct block_group_tree *tree)
8039 cache_tree_free_extents(&tree->tree, free_block_group_record);
8042 int insert_device_extent_record(struct device_extent_tree *tree,
8043 struct device_extent_record *de_rec)
8045 int ret;
8048 * Device extent is a bit different from the other extents, because
8049 * the extents which belong to the different devices may have the
8050 * same start and size, so we need use the special extent cache
8051 * search/insert functions.
8053 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
8054 if (ret)
8055 return ret;
8057 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
8058 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
8059 return 0;
8062 static void free_device_extent_record(struct cache_extent *cache)
8064 struct device_extent_record *rec;
8066 rec = container_of(cache, struct device_extent_record, cache);
8067 if (!list_empty(&rec->chunk_list))
8068 list_del_init(&rec->chunk_list);
8069 if (!list_empty(&rec->device_list))
8070 list_del_init(&rec->device_list);
8071 free(rec);
8074 void free_device_extent_tree(struct device_extent_tree *tree)
8076 cache_tree_free_extents(&tree->tree, free_device_extent_record);
8079 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8080 static int process_extent_ref_v0(struct cache_tree *extent_cache,
8081 struct extent_buffer *leaf, int slot)
8083 struct btrfs_extent_ref_v0 *ref0;
8084 struct btrfs_key key;
8085 int ret;
8087 btrfs_item_key_to_cpu(leaf, &key, slot);
8088 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
8089 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
8090 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
8091 0, 0);
8092 } else {
8093 ret = add_data_backref(extent_cache, key.objectid, key.offset,
8094 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
8096 return ret;
8098 #endif
8100 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
8101 struct btrfs_key *key,
8102 int slot)
8104 struct btrfs_chunk *ptr;
8105 struct chunk_record *rec;
8106 int num_stripes, i;
8108 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
8109 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
8111 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
8112 if (!rec) {
8113 fprintf(stderr, "memory allocation failed\n");
8114 exit(-1);
8117 INIT_LIST_HEAD(&rec->list);
8118 INIT_LIST_HEAD(&rec->dextents);
8119 rec->bg_rec = NULL;
8121 rec->cache.start = key->offset;
8122 rec->cache.size = btrfs_chunk_length(leaf, ptr);
8124 rec->generation = btrfs_header_generation(leaf);
8126 rec->objectid = key->objectid;
8127 rec->type = key->type;
8128 rec->offset = key->offset;
8130 rec->length = rec->cache.size;
8131 rec->owner = btrfs_chunk_owner(leaf, ptr);
8132 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
8133 rec->type_flags = btrfs_chunk_type(leaf, ptr);
8134 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
8135 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
8136 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
8137 rec->num_stripes = num_stripes;
8138 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
8140 for (i = 0; i < rec->num_stripes; ++i) {
8141 rec->stripes[i].devid =
8142 btrfs_stripe_devid_nr(leaf, ptr, i);
8143 rec->stripes[i].offset =
8144 btrfs_stripe_offset_nr(leaf, ptr, i);
8145 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
8146 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
8147 BTRFS_UUID_SIZE);
8150 return rec;
8153 static int process_chunk_item(struct cache_tree *chunk_cache,
8154 struct btrfs_key *key, struct extent_buffer *eb,
8155 int slot)
8157 struct chunk_record *rec;
8158 struct btrfs_chunk *chunk;
8159 int ret = 0;
8161 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
8163 * Do extra check for this chunk item,
8165 * It's still possible one can craft a leaf with CHUNK_ITEM, with
8166 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
8167 * and owner<->key_type check.
8169 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
8170 key->offset);
8171 if (ret < 0) {
8172 error("chunk(%llu, %llu) is not valid, ignore it",
8173 key->offset, btrfs_chunk_length(eb, chunk));
8174 return 0;
8176 rec = btrfs_new_chunk_record(eb, key, slot);
8177 ret = insert_cache_extent(chunk_cache, &rec->cache);
8178 if (ret) {
8179 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
8180 rec->offset, rec->length);
8181 free(rec);
8184 return ret;
8187 static int process_device_item(struct rb_root *dev_cache,
8188 struct btrfs_key *key, struct extent_buffer *eb, int slot)
8190 struct btrfs_dev_item *ptr;
8191 struct device_record *rec;
8192 int ret = 0;
8194 ptr = btrfs_item_ptr(eb,
8195 slot, struct btrfs_dev_item);
8197 rec = malloc(sizeof(*rec));
8198 if (!rec) {
8199 fprintf(stderr, "memory allocation failed\n");
8200 return -ENOMEM;
8203 rec->devid = key->offset;
8204 rec->generation = btrfs_header_generation(eb);
8206 rec->objectid = key->objectid;
8207 rec->type = key->type;
8208 rec->offset = key->offset;
8210 rec->devid = btrfs_device_id(eb, ptr);
8211 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
8212 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
8214 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
8215 if (ret) {
8216 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
8217 free(rec);
8220 return ret;
8223 struct block_group_record *
8224 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
8225 int slot)
8227 struct btrfs_block_group_item *ptr;
8228 struct block_group_record *rec;
8230 rec = calloc(1, sizeof(*rec));
8231 if (!rec) {
8232 fprintf(stderr, "memory allocation failed\n");
8233 exit(-1);
8236 rec->cache.start = key->objectid;
8237 rec->cache.size = key->offset;
8239 rec->generation = btrfs_header_generation(leaf);
8241 rec->objectid = key->objectid;
8242 rec->type = key->type;
8243 rec->offset = key->offset;
8245 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
8246 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
8248 INIT_LIST_HEAD(&rec->list);
8250 return rec;
8253 static int process_block_group_item(struct block_group_tree *block_group_cache,
8254 struct btrfs_key *key,
8255 struct extent_buffer *eb, int slot)
8257 struct block_group_record *rec;
8258 int ret = 0;
8260 rec = btrfs_new_block_group_record(eb, key, slot);
8261 ret = insert_block_group_record(block_group_cache, rec);
8262 if (ret) {
8263 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
8264 rec->objectid, rec->offset);
8265 free(rec);
8268 return ret;
8271 struct device_extent_record *
8272 btrfs_new_device_extent_record(struct extent_buffer *leaf,
8273 struct btrfs_key *key, int slot)
8275 struct device_extent_record *rec;
8276 struct btrfs_dev_extent *ptr;
8278 rec = calloc(1, sizeof(*rec));
8279 if (!rec) {
8280 fprintf(stderr, "memory allocation failed\n");
8281 exit(-1);
8284 rec->cache.objectid = key->objectid;
8285 rec->cache.start = key->offset;
8287 rec->generation = btrfs_header_generation(leaf);
8289 rec->objectid = key->objectid;
8290 rec->type = key->type;
8291 rec->offset = key->offset;
8293 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
8294 rec->chunk_objecteid =
8295 btrfs_dev_extent_chunk_objectid(leaf, ptr);
8296 rec->chunk_offset =
8297 btrfs_dev_extent_chunk_offset(leaf, ptr);
8298 rec->length = btrfs_dev_extent_length(leaf, ptr);
8299 rec->cache.size = rec->length;
8301 INIT_LIST_HEAD(&rec->chunk_list);
8302 INIT_LIST_HEAD(&rec->device_list);
8304 return rec;
8307 static int
8308 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
8309 struct btrfs_key *key, struct extent_buffer *eb,
8310 int slot)
8312 struct device_extent_record *rec;
8313 int ret;
8315 rec = btrfs_new_device_extent_record(eb, key, slot);
8316 ret = insert_device_extent_record(dev_extent_cache, rec);
8317 if (ret) {
8318 fprintf(stderr,
8319 "Device extent[%llu, %llu, %llu] existed.\n",
8320 rec->objectid, rec->offset, rec->length);
8321 free(rec);
8324 return ret;
8327 static int process_extent_item(struct btrfs_root *root,
8328 struct cache_tree *extent_cache,
8329 struct extent_buffer *eb, int slot)
8331 struct btrfs_extent_item *ei;
8332 struct btrfs_extent_inline_ref *iref;
8333 struct btrfs_extent_data_ref *dref;
8334 struct btrfs_shared_data_ref *sref;
8335 struct btrfs_key key;
8336 struct extent_record tmpl;
8337 unsigned long end;
8338 unsigned long ptr;
8339 int ret;
8340 int type;
8341 u32 item_size = btrfs_item_size_nr(eb, slot);
8342 u64 refs = 0;
8343 u64 offset;
8344 u64 num_bytes;
8345 int metadata = 0;
8347 btrfs_item_key_to_cpu(eb, &key, slot);
8349 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8350 metadata = 1;
8351 num_bytes = root->fs_info->nodesize;
8352 } else {
8353 num_bytes = key.offset;
8356 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
8357 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
8358 key.objectid, root->fs_info->sectorsize);
8359 return -EIO;
8361 if (item_size < sizeof(*ei)) {
8362 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8363 struct btrfs_extent_item_v0 *ei0;
8364 BUG_ON(item_size != sizeof(*ei0));
8365 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
8366 refs = btrfs_extent_refs_v0(eb, ei0);
8367 #else
8368 BUG();
8369 #endif
8370 memset(&tmpl, 0, sizeof(tmpl));
8371 tmpl.start = key.objectid;
8372 tmpl.nr = num_bytes;
8373 tmpl.extent_item_refs = refs;
8374 tmpl.metadata = metadata;
8375 tmpl.found_rec = 1;
8376 tmpl.max_size = num_bytes;
8378 return add_extent_rec(extent_cache, &tmpl);
8381 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
8382 refs = btrfs_extent_refs(eb, ei);
8383 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
8384 metadata = 1;
8385 else
8386 metadata = 0;
8387 if (metadata && num_bytes != root->fs_info->nodesize) {
8388 error("ignore invalid metadata extent, length %llu does not equal to %u",
8389 num_bytes, root->fs_info->nodesize);
8390 return -EIO;
8392 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
8393 error("ignore invalid data extent, length %llu is not aligned to %u",
8394 num_bytes, root->fs_info->sectorsize);
8395 return -EIO;
8398 memset(&tmpl, 0, sizeof(tmpl));
8399 tmpl.start = key.objectid;
8400 tmpl.nr = num_bytes;
8401 tmpl.extent_item_refs = refs;
8402 tmpl.metadata = metadata;
8403 tmpl.found_rec = 1;
8404 tmpl.max_size = num_bytes;
8405 add_extent_rec(extent_cache, &tmpl);
8407 ptr = (unsigned long)(ei + 1);
8408 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
8409 key.type == BTRFS_EXTENT_ITEM_KEY)
8410 ptr += sizeof(struct btrfs_tree_block_info);
8412 end = (unsigned long)ei + item_size;
8413 while (ptr < end) {
8414 iref = (struct btrfs_extent_inline_ref *)ptr;
8415 type = btrfs_extent_inline_ref_type(eb, iref);
8416 offset = btrfs_extent_inline_ref_offset(eb, iref);
8417 switch (type) {
8418 case BTRFS_TREE_BLOCK_REF_KEY:
8419 ret = add_tree_backref(extent_cache, key.objectid,
8420 0, offset, 0);
8421 if (ret < 0)
8422 error(
8423 "add_tree_backref failed (extent items tree block): %s",
8424 strerror(-ret));
8425 break;
8426 case BTRFS_SHARED_BLOCK_REF_KEY:
8427 ret = add_tree_backref(extent_cache, key.objectid,
8428 offset, 0, 0);
8429 if (ret < 0)
8430 error(
8431 "add_tree_backref failed (extent items shared block): %s",
8432 strerror(-ret));
8433 break;
8434 case BTRFS_EXTENT_DATA_REF_KEY:
8435 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8436 add_data_backref(extent_cache, key.objectid, 0,
8437 btrfs_extent_data_ref_root(eb, dref),
8438 btrfs_extent_data_ref_objectid(eb,
8439 dref),
8440 btrfs_extent_data_ref_offset(eb, dref),
8441 btrfs_extent_data_ref_count(eb, dref),
8442 0, num_bytes);
8443 break;
8444 case BTRFS_SHARED_DATA_REF_KEY:
8445 sref = (struct btrfs_shared_data_ref *)(iref + 1);
8446 add_data_backref(extent_cache, key.objectid, offset,
8447 0, 0, 0,
8448 btrfs_shared_data_ref_count(eb, sref),
8449 0, num_bytes);
8450 break;
8451 default:
8452 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
8453 key.objectid, key.type, num_bytes);
8454 goto out;
8456 ptr += btrfs_extent_inline_ref_size(type);
8458 WARN_ON(ptr > end);
8459 out:
8460 return 0;
8463 static int check_cache_range(struct btrfs_root *root,
8464 struct btrfs_block_group_cache *cache,
8465 u64 offset, u64 bytes)
8467 struct btrfs_free_space *entry;
8468 u64 *logical;
8469 u64 bytenr;
8470 int stripe_len;
8471 int i, nr, ret;
8473 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
8474 bytenr = btrfs_sb_offset(i);
8475 ret = btrfs_rmap_block(root->fs_info,
8476 cache->key.objectid, bytenr, 0,
8477 &logical, &nr, &stripe_len);
8478 if (ret)
8479 return ret;
8481 while (nr--) {
8482 if (logical[nr] + stripe_len <= offset)
8483 continue;
8484 if (offset + bytes <= logical[nr])
8485 continue;
8486 if (logical[nr] == offset) {
8487 if (stripe_len >= bytes) {
8488 free(logical);
8489 return 0;
8491 bytes -= stripe_len;
8492 offset += stripe_len;
8493 } else if (logical[nr] < offset) {
8494 if (logical[nr] + stripe_len >=
8495 offset + bytes) {
8496 free(logical);
8497 return 0;
8499 bytes = (offset + bytes) -
8500 (logical[nr] + stripe_len);
8501 offset = logical[nr] + stripe_len;
8502 } else {
8504 * Could be tricky, the super may land in the
8505 * middle of the area we're checking. First
8506 * check the easiest case, it's at the end.
8508 if (logical[nr] + stripe_len >=
8509 bytes + offset) {
8510 bytes = logical[nr] - offset;
8511 continue;
8514 /* Check the left side */
8515 ret = check_cache_range(root, cache,
8516 offset,
8517 logical[nr] - offset);
8518 if (ret) {
8519 free(logical);
8520 return ret;
8523 /* Now we continue with the right side */
8524 bytes = (offset + bytes) -
8525 (logical[nr] + stripe_len);
8526 offset = logical[nr] + stripe_len;
8530 free(logical);
8533 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8534 if (!entry) {
8535 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8536 offset, offset+bytes);
8537 return -EINVAL;
8540 if (entry->offset != offset) {
8541 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8542 entry->offset);
8543 return -EINVAL;
8546 if (entry->bytes != bytes) {
8547 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8548 bytes, entry->bytes, offset);
8549 return -EINVAL;
8552 unlink_free_space(cache->free_space_ctl, entry);
8553 free(entry);
8554 return 0;
8557 static int verify_space_cache(struct btrfs_root *root,
8558 struct btrfs_block_group_cache *cache)
8560 struct btrfs_path path;
8561 struct extent_buffer *leaf;
8562 struct btrfs_key key;
8563 u64 last;
8564 int ret = 0;
8566 root = root->fs_info->extent_root;
8568 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8570 btrfs_init_path(&path);
8571 key.objectid = last;
8572 key.offset = 0;
8573 key.type = BTRFS_EXTENT_ITEM_KEY;
8574 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8575 if (ret < 0)
8576 goto out;
8577 ret = 0;
8578 while (1) {
8579 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8580 ret = btrfs_next_leaf(root, &path);
8581 if (ret < 0)
8582 goto out;
8583 if (ret > 0) {
8584 ret = 0;
8585 break;
8588 leaf = path.nodes[0];
8589 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8590 if (key.objectid >= cache->key.offset + cache->key.objectid)
8591 break;
8592 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8593 key.type != BTRFS_METADATA_ITEM_KEY) {
8594 path.slots[0]++;
8595 continue;
8598 if (last == key.objectid) {
8599 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8600 last = key.objectid + key.offset;
8601 else
8602 last = key.objectid + root->fs_info->nodesize;
8603 path.slots[0]++;
8604 continue;
8607 ret = check_cache_range(root, cache, last,
8608 key.objectid - last);
8609 if (ret)
8610 break;
8611 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8612 last = key.objectid + key.offset;
8613 else
8614 last = key.objectid + root->fs_info->nodesize;
8615 path.slots[0]++;
8618 if (last < cache->key.objectid + cache->key.offset)
8619 ret = check_cache_range(root, cache, last,
8620 cache->key.objectid +
8621 cache->key.offset - last);
8623 out:
8624 btrfs_release_path(&path);
8626 if (!ret &&
8627 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8628 fprintf(stderr, "There are still entries left in the space "
8629 "cache\n");
8630 ret = -EINVAL;
8633 return ret;
8636 static int check_space_cache(struct btrfs_root *root)
8638 struct btrfs_block_group_cache *cache;
8639 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8640 int ret;
8641 int error = 0;
8643 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8644 btrfs_super_generation(root->fs_info->super_copy) !=
8645 btrfs_super_cache_generation(root->fs_info->super_copy)) {
8646 printf("cache and super generation don't match, space cache "
8647 "will be invalidated\n");
8648 return 0;
8651 if (ctx.progress_enabled) {
8652 ctx.tp = TASK_FREE_SPACE;
8653 task_start(ctx.info);
8656 while (1) {
8657 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8658 if (!cache)
8659 break;
8661 start = cache->key.objectid + cache->key.offset;
8662 if (!cache->free_space_ctl) {
8663 if (btrfs_init_free_space_ctl(cache,
8664 root->fs_info->sectorsize)) {
8665 ret = -ENOMEM;
8666 break;
8668 } else {
8669 btrfs_remove_free_space_cache(cache);
8672 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8673 ret = exclude_super_stripes(root, cache);
8674 if (ret) {
8675 fprintf(stderr, "could not exclude super stripes: %s\n",
8676 strerror(-ret));
8677 error++;
8678 continue;
8680 ret = load_free_space_tree(root->fs_info, cache);
8681 free_excluded_extents(root, cache);
8682 if (ret < 0) {
8683 fprintf(stderr, "could not load free space tree: %s\n",
8684 strerror(-ret));
8685 error++;
8686 continue;
8688 error += ret;
8689 } else {
8690 ret = load_free_space_cache(root->fs_info, cache);
8691 if (!ret)
8692 continue;
8695 ret = verify_space_cache(root, cache);
8696 if (ret) {
8697 fprintf(stderr, "cache appears valid but isn't %Lu\n",
8698 cache->key.objectid);
8699 error++;
8703 task_stop(ctx.info);
8705 return error ? -EINVAL : 0;
8708 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8709 u64 num_bytes, unsigned long leaf_offset,
8710 struct extent_buffer *eb) {
8712 struct btrfs_fs_info *fs_info = root->fs_info;
8713 u64 offset = 0;
8714 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8715 char *data;
8716 unsigned long csum_offset;
8717 u32 csum;
8718 u32 csum_expected;
8719 u64 read_len;
8720 u64 data_checked = 0;
8721 u64 tmp;
8722 int ret = 0;
8723 int mirror;
8724 int num_copies;
8726 if (num_bytes % fs_info->sectorsize)
8727 return -EINVAL;
8729 data = malloc(num_bytes);
8730 if (!data)
8731 return -ENOMEM;
8733 while (offset < num_bytes) {
8734 mirror = 0;
8735 again:
8736 read_len = num_bytes - offset;
8737 /* read as much space once a time */
8738 ret = read_extent_data(fs_info, data + offset,
8739 bytenr + offset, &read_len, mirror);
8740 if (ret)
8741 goto out;
8742 data_checked = 0;
8743 /* verify every 4k data's checksum */
8744 while (data_checked < read_len) {
8745 csum = ~(u32)0;
8746 tmp = offset + data_checked;
8748 csum = btrfs_csum_data((char *)data + tmp,
8749 csum, fs_info->sectorsize);
8750 btrfs_csum_final(csum, (u8 *)&csum);
8752 csum_offset = leaf_offset +
8753 tmp / fs_info->sectorsize * csum_size;
8754 read_extent_buffer(eb, (char *)&csum_expected,
8755 csum_offset, csum_size);
8756 /* try another mirror */
8757 if (csum != csum_expected) {
8758 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8759 mirror, bytenr + tmp,
8760 csum, csum_expected);
8761 num_copies = btrfs_num_copies(root->fs_info,
8762 bytenr, num_bytes);
8763 if (mirror < num_copies - 1) {
8764 mirror += 1;
8765 goto again;
8768 data_checked += fs_info->sectorsize;
8770 offset += read_len;
8772 out:
8773 free(data);
8774 return ret;
8777 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8778 u64 num_bytes)
8780 struct btrfs_path path;
8781 struct extent_buffer *leaf;
8782 struct btrfs_key key;
8783 int ret;
8785 btrfs_init_path(&path);
8786 key.objectid = bytenr;
8787 key.type = BTRFS_EXTENT_ITEM_KEY;
8788 key.offset = (u64)-1;
8790 again:
8791 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8792 0, 0);
8793 if (ret < 0) {
8794 fprintf(stderr, "Error looking up extent record %d\n", ret);
8795 btrfs_release_path(&path);
8796 return ret;
8797 } else if (ret) {
8798 if (path.slots[0] > 0) {
8799 path.slots[0]--;
8800 } else {
8801 ret = btrfs_prev_leaf(root, &path);
8802 if (ret < 0) {
8803 goto out;
8804 } else if (ret > 0) {
8805 ret = 0;
8806 goto out;
8811 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8814 * Block group items come before extent items if they have the same
8815 * bytenr, so walk back one more just in case. Dear future traveller,
8816 * first congrats on mastering time travel. Now if it's not too much
8817 * trouble could you go back to 2006 and tell Chris to make the
8818 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8819 * EXTENT_ITEM_KEY please?
8821 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8822 if (path.slots[0] > 0) {
8823 path.slots[0]--;
8824 } else {
8825 ret = btrfs_prev_leaf(root, &path);
8826 if (ret < 0) {
8827 goto out;
8828 } else if (ret > 0) {
8829 ret = 0;
8830 goto out;
8833 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8836 while (num_bytes) {
8837 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8838 ret = btrfs_next_leaf(root, &path);
8839 if (ret < 0) {
8840 fprintf(stderr, "Error going to next leaf "
8841 "%d\n", ret);
8842 btrfs_release_path(&path);
8843 return ret;
8844 } else if (ret) {
8845 break;
8848 leaf = path.nodes[0];
8849 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8850 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8851 path.slots[0]++;
8852 continue;
8854 if (key.objectid + key.offset < bytenr) {
8855 path.slots[0]++;
8856 continue;
8858 if (key.objectid > bytenr + num_bytes)
8859 break;
8861 if (key.objectid == bytenr) {
8862 if (key.offset >= num_bytes) {
8863 num_bytes = 0;
8864 break;
8866 num_bytes -= key.offset;
8867 bytenr += key.offset;
8868 } else if (key.objectid < bytenr) {
8869 if (key.objectid + key.offset >= bytenr + num_bytes) {
8870 num_bytes = 0;
8871 break;
8873 num_bytes = (bytenr + num_bytes) -
8874 (key.objectid + key.offset);
8875 bytenr = key.objectid + key.offset;
8876 } else {
8877 if (key.objectid + key.offset < bytenr + num_bytes) {
8878 u64 new_start = key.objectid + key.offset;
8879 u64 new_bytes = bytenr + num_bytes - new_start;
8882 * Weird case, the extent is in the middle of
8883 * our range, we'll have to search one side
8884 * and then the other. Not sure if this happens
8885 * in real life, but no harm in coding it up
8886 * anyway just in case.
8888 btrfs_release_path(&path);
8889 ret = check_extent_exists(root, new_start,
8890 new_bytes);
8891 if (ret) {
8892 fprintf(stderr, "Right section didn't "
8893 "have a record\n");
8894 break;
8896 num_bytes = key.objectid - bytenr;
8897 goto again;
8899 num_bytes = key.objectid - bytenr;
8901 path.slots[0]++;
8903 ret = 0;
8905 out:
8906 if (num_bytes && !ret) {
8907 fprintf(stderr, "There are no extents for csum range "
8908 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8909 ret = 1;
8912 btrfs_release_path(&path);
8913 return ret;
8916 static int check_csums(struct btrfs_root *root)
8918 struct btrfs_path path;
8919 struct extent_buffer *leaf;
8920 struct btrfs_key key;
8921 u64 offset = 0, num_bytes = 0;
8922 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8923 int errors = 0;
8924 int ret;
8925 u64 data_len;
8926 unsigned long leaf_offset;
8928 root = root->fs_info->csum_root;
8929 if (!extent_buffer_uptodate(root->node)) {
8930 fprintf(stderr, "No valid csum tree found\n");
8931 return -ENOENT;
8934 btrfs_init_path(&path);
8935 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8936 key.type = BTRFS_EXTENT_CSUM_KEY;
8937 key.offset = 0;
8938 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8939 if (ret < 0) {
8940 fprintf(stderr, "Error searching csum tree %d\n", ret);
8941 btrfs_release_path(&path);
8942 return ret;
8945 if (ret > 0 && path.slots[0])
8946 path.slots[0]--;
8947 ret = 0;
8949 while (1) {
8950 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8951 ret = btrfs_next_leaf(root, &path);
8952 if (ret < 0) {
8953 fprintf(stderr, "Error going to next leaf "
8954 "%d\n", ret);
8955 break;
8957 if (ret)
8958 break;
8960 leaf = path.nodes[0];
8962 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8963 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8964 path.slots[0]++;
8965 continue;
8968 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8969 csum_size) * root->fs_info->sectorsize;
8970 if (!check_data_csum)
8971 goto skip_csum_check;
8972 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8973 ret = check_extent_csums(root, key.offset, data_len,
8974 leaf_offset, leaf);
8975 if (ret)
8976 break;
8977 skip_csum_check:
8978 if (!num_bytes) {
8979 offset = key.offset;
8980 } else if (key.offset != offset + num_bytes) {
8981 ret = check_extent_exists(root, offset, num_bytes);
8982 if (ret) {
8983 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8984 "there is no extent record\n",
8985 offset, offset+num_bytes);
8986 errors++;
8988 offset = key.offset;
8989 num_bytes = 0;
8991 num_bytes += data_len;
8992 path.slots[0]++;
8995 btrfs_release_path(&path);
8996 return errors;
8999 static int is_dropped_key(struct btrfs_key *key,
9000 struct btrfs_key *drop_key) {
9001 if (key->objectid < drop_key->objectid)
9002 return 1;
9003 else if (key->objectid == drop_key->objectid) {
9004 if (key->type < drop_key->type)
9005 return 1;
9006 else if (key->type == drop_key->type) {
9007 if (key->offset < drop_key->offset)
9008 return 1;
9011 return 0;
9015 * Here are the rules for FULL_BACKREF.
9017 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
9018 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
9019 * FULL_BACKREF set.
9020 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
9021 * if it happened after the relocation occurred since we'll have dropped the
9022 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
9023 * have no real way to know for sure.
9025 * We process the blocks one root at a time, and we start from the lowest root
9026 * objectid and go to the highest. So we can just lookup the owner backref for
9027 * the record and if we don't find it then we know it doesn't exist and we have
9028 * a FULL BACKREF.
9030 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
9031 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
9032 * be set or not and then we can check later once we've gathered all the refs.
9034 static int calc_extent_flag(struct cache_tree *extent_cache,
9035 struct extent_buffer *buf,
9036 struct root_item_record *ri,
9037 u64 *flags)
9039 struct extent_record *rec;
9040 struct cache_extent *cache;
9041 struct tree_backref *tback;
9042 u64 owner = 0;
9044 cache = lookup_cache_extent(extent_cache, buf->start, 1);
9045 /* we have added this extent before */
9046 if (!cache)
9047 return -ENOENT;
9049 rec = container_of(cache, struct extent_record, cache);
9052 * Except file/reloc tree, we can not have
9053 * FULL BACKREF MODE
9055 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
9056 goto normal;
9058 * root node
9060 if (buf->start == ri->bytenr)
9061 goto normal;
9063 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
9064 goto full_backref;
9066 owner = btrfs_header_owner(buf);
9067 if (owner == ri->objectid)
9068 goto normal;
9070 tback = find_tree_backref(rec, 0, owner);
9071 if (!tback)
9072 goto full_backref;
9073 normal:
9074 *flags = 0;
9075 if (rec->flag_block_full_backref != FLAG_UNSET &&
9076 rec->flag_block_full_backref != 0)
9077 rec->bad_full_backref = 1;
9078 return 0;
9079 full_backref:
9080 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9081 if (rec->flag_block_full_backref != FLAG_UNSET &&
9082 rec->flag_block_full_backref != 1)
9083 rec->bad_full_backref = 1;
9084 return 0;
9087 static void report_mismatch_key_root(u8 key_type, u64 rootid)
9089 fprintf(stderr, "Invalid key type(");
9090 print_key_type(stderr, 0, key_type);
9091 fprintf(stderr, ") found in root(");
9092 print_objectid(stderr, rootid, 0);
9093 fprintf(stderr, ")\n");
9097 * Check if the key is valid with its extent buffer.
9099 * This is a early check in case invalid key exists in a extent buffer
9100 * This is not comprehensive yet, but should prevent wrong key/item passed
9101 * further
9103 static int check_type_with_root(u64 rootid, u8 key_type)
9105 switch (key_type) {
9106 /* Only valid in chunk tree */
9107 case BTRFS_DEV_ITEM_KEY:
9108 case BTRFS_CHUNK_ITEM_KEY:
9109 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
9110 goto err;
9111 break;
9112 /* valid in csum and log tree */
9113 case BTRFS_CSUM_TREE_OBJECTID:
9114 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
9115 is_fstree(rootid)))
9116 goto err;
9117 break;
9118 case BTRFS_EXTENT_ITEM_KEY:
9119 case BTRFS_METADATA_ITEM_KEY:
9120 case BTRFS_BLOCK_GROUP_ITEM_KEY:
9121 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
9122 goto err;
9123 break;
9124 case BTRFS_ROOT_ITEM_KEY:
9125 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
9126 goto err;
9127 break;
9128 case BTRFS_DEV_EXTENT_KEY:
9129 if (rootid != BTRFS_DEV_TREE_OBJECTID)
9130 goto err;
9131 break;
9133 return 0;
9134 err:
9135 report_mismatch_key_root(key_type, rootid);
9136 return -EINVAL;
9139 static int run_next_block(struct btrfs_root *root,
9140 struct block_info *bits,
9141 int bits_nr,
9142 u64 *last,
9143 struct cache_tree *pending,
9144 struct cache_tree *seen,
9145 struct cache_tree *reada,
9146 struct cache_tree *nodes,
9147 struct cache_tree *extent_cache,
9148 struct cache_tree *chunk_cache,
9149 struct rb_root *dev_cache,
9150 struct block_group_tree *block_group_cache,
9151 struct device_extent_tree *dev_extent_cache,
9152 struct root_item_record *ri)
9154 struct btrfs_fs_info *fs_info = root->fs_info;
9155 struct extent_buffer *buf;
9156 struct extent_record *rec = NULL;
9157 u64 bytenr;
9158 u32 size;
9159 u64 parent;
9160 u64 owner;
9161 u64 flags;
9162 u64 ptr;
9163 u64 gen = 0;
9164 int ret = 0;
9165 int i;
9166 int nritems;
9167 struct btrfs_key key;
9168 struct cache_extent *cache;
9169 int reada_bits;
9171 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
9172 bits_nr, &reada_bits);
9173 if (nritems == 0)
9174 return 1;
9176 if (!reada_bits) {
9177 for(i = 0; i < nritems; i++) {
9178 ret = add_cache_extent(reada, bits[i].start,
9179 bits[i].size);
9180 if (ret == -EEXIST)
9181 continue;
9183 /* fixme, get the parent transid */
9184 readahead_tree_block(fs_info, bits[i].start, 0);
9187 *last = bits[0].start;
9188 bytenr = bits[0].start;
9189 size = bits[0].size;
9191 cache = lookup_cache_extent(pending, bytenr, size);
9192 if (cache) {
9193 remove_cache_extent(pending, cache);
9194 free(cache);
9196 cache = lookup_cache_extent(reada, bytenr, size);
9197 if (cache) {
9198 remove_cache_extent(reada, cache);
9199 free(cache);
9201 cache = lookup_cache_extent(nodes, bytenr, size);
9202 if (cache) {
9203 remove_cache_extent(nodes, cache);
9204 free(cache);
9206 cache = lookup_cache_extent(extent_cache, bytenr, size);
9207 if (cache) {
9208 rec = container_of(cache, struct extent_record, cache);
9209 gen = rec->parent_generation;
9212 /* fixme, get the real parent transid */
9213 buf = read_tree_block(root->fs_info, bytenr, gen);
9214 if (!extent_buffer_uptodate(buf)) {
9215 record_bad_block_io(root->fs_info,
9216 extent_cache, bytenr, size);
9217 goto out;
9220 nritems = btrfs_header_nritems(buf);
9222 flags = 0;
9223 if (!init_extent_tree) {
9224 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
9225 btrfs_header_level(buf), 1, NULL,
9226 &flags);
9227 if (ret < 0) {
9228 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9229 if (ret < 0) {
9230 fprintf(stderr, "Couldn't calc extent flags\n");
9231 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9234 } else {
9235 flags = 0;
9236 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9237 if (ret < 0) {
9238 fprintf(stderr, "Couldn't calc extent flags\n");
9239 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9243 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9244 if (ri != NULL &&
9245 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
9246 ri->objectid == btrfs_header_owner(buf)) {
9248 * Ok we got to this block from it's original owner and
9249 * we have FULL_BACKREF set. Relocation can leave
9250 * converted blocks over so this is altogether possible,
9251 * however it's not possible if the generation > the
9252 * last snapshot, so check for this case.
9254 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
9255 btrfs_header_generation(buf) > ri->last_snapshot) {
9256 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9257 rec->bad_full_backref = 1;
9260 } else {
9261 if (ri != NULL &&
9262 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
9263 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
9264 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9265 rec->bad_full_backref = 1;
9269 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9270 rec->flag_block_full_backref = 1;
9271 parent = bytenr;
9272 owner = 0;
9273 } else {
9274 rec->flag_block_full_backref = 0;
9275 parent = 0;
9276 owner = btrfs_header_owner(buf);
9279 ret = check_block(root, extent_cache, buf, flags);
9280 if (ret)
9281 goto out;
9283 if (btrfs_is_leaf(buf)) {
9284 btree_space_waste += btrfs_leaf_free_space(root, buf);
9285 for (i = 0; i < nritems; i++) {
9286 struct btrfs_file_extent_item *fi;
9287 btrfs_item_key_to_cpu(buf, &key, i);
9289 * Check key type against the leaf owner.
9290 * Could filter quite a lot of early error if
9291 * owner is correct
9293 if (check_type_with_root(btrfs_header_owner(buf),
9294 key.type)) {
9295 fprintf(stderr, "ignoring invalid key\n");
9296 continue;
9298 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
9299 process_extent_item(root, extent_cache, buf,
9301 continue;
9303 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9304 process_extent_item(root, extent_cache, buf,
9306 continue;
9308 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
9309 total_csum_bytes +=
9310 btrfs_item_size_nr(buf, i);
9311 continue;
9313 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
9314 process_chunk_item(chunk_cache, &key, buf, i);
9315 continue;
9317 if (key.type == BTRFS_DEV_ITEM_KEY) {
9318 process_device_item(dev_cache, &key, buf, i);
9319 continue;
9321 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
9322 process_block_group_item(block_group_cache,
9323 &key, buf, i);
9324 continue;
9326 if (key.type == BTRFS_DEV_EXTENT_KEY) {
9327 process_device_extent_item(dev_extent_cache,
9328 &key, buf, i);
9329 continue;
9332 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
9333 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
9334 process_extent_ref_v0(extent_cache, buf, i);
9335 #else
9336 BUG();
9337 #endif
9338 continue;
9341 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
9342 ret = add_tree_backref(extent_cache,
9343 key.objectid, 0, key.offset, 0);
9344 if (ret < 0)
9345 error(
9346 "add_tree_backref failed (leaf tree block): %s",
9347 strerror(-ret));
9348 continue;
9350 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
9351 ret = add_tree_backref(extent_cache,
9352 key.objectid, key.offset, 0, 0);
9353 if (ret < 0)
9354 error(
9355 "add_tree_backref failed (leaf shared block): %s",
9356 strerror(-ret));
9357 continue;
9359 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
9360 struct btrfs_extent_data_ref *ref;
9361 ref = btrfs_item_ptr(buf, i,
9362 struct btrfs_extent_data_ref);
9363 add_data_backref(extent_cache,
9364 key.objectid, 0,
9365 btrfs_extent_data_ref_root(buf, ref),
9366 btrfs_extent_data_ref_objectid(buf,
9367 ref),
9368 btrfs_extent_data_ref_offset(buf, ref),
9369 btrfs_extent_data_ref_count(buf, ref),
9370 0, root->fs_info->sectorsize);
9371 continue;
9373 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
9374 struct btrfs_shared_data_ref *ref;
9375 ref = btrfs_item_ptr(buf, i,
9376 struct btrfs_shared_data_ref);
9377 add_data_backref(extent_cache,
9378 key.objectid, key.offset, 0, 0, 0,
9379 btrfs_shared_data_ref_count(buf, ref),
9380 0, root->fs_info->sectorsize);
9381 continue;
9383 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
9384 struct bad_item *bad;
9386 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
9387 continue;
9388 if (!owner)
9389 continue;
9390 bad = malloc(sizeof(struct bad_item));
9391 if (!bad)
9392 continue;
9393 INIT_LIST_HEAD(&bad->list);
9394 memcpy(&bad->key, &key,
9395 sizeof(struct btrfs_key));
9396 bad->root_id = owner;
9397 list_add_tail(&bad->list, &delete_items);
9398 continue;
9400 if (key.type != BTRFS_EXTENT_DATA_KEY)
9401 continue;
9402 fi = btrfs_item_ptr(buf, i,
9403 struct btrfs_file_extent_item);
9404 if (btrfs_file_extent_type(buf, fi) ==
9405 BTRFS_FILE_EXTENT_INLINE)
9406 continue;
9407 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
9408 continue;
9410 data_bytes_allocated +=
9411 btrfs_file_extent_disk_num_bytes(buf, fi);
9412 if (data_bytes_allocated < root->fs_info->sectorsize) {
9413 abort();
9415 data_bytes_referenced +=
9416 btrfs_file_extent_num_bytes(buf, fi);
9417 add_data_backref(extent_cache,
9418 btrfs_file_extent_disk_bytenr(buf, fi),
9419 parent, owner, key.objectid, key.offset -
9420 btrfs_file_extent_offset(buf, fi), 1, 1,
9421 btrfs_file_extent_disk_num_bytes(buf, fi));
9423 } else {
9424 int level;
9425 struct btrfs_key first_key;
9427 first_key.objectid = 0;
9429 if (nritems > 0)
9430 btrfs_item_key_to_cpu(buf, &first_key, 0);
9431 level = btrfs_header_level(buf);
9432 for (i = 0; i < nritems; i++) {
9433 struct extent_record tmpl;
9435 ptr = btrfs_node_blockptr(buf, i);
9436 size = root->fs_info->nodesize;
9437 btrfs_node_key_to_cpu(buf, &key, i);
9438 if (ri != NULL) {
9439 if ((level == ri->drop_level)
9440 && is_dropped_key(&key, &ri->drop_key)) {
9441 continue;
9445 memset(&tmpl, 0, sizeof(tmpl));
9446 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
9447 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
9448 tmpl.start = ptr;
9449 tmpl.nr = size;
9450 tmpl.refs = 1;
9451 tmpl.metadata = 1;
9452 tmpl.max_size = size;
9453 ret = add_extent_rec(extent_cache, &tmpl);
9454 if (ret < 0)
9455 goto out;
9457 ret = add_tree_backref(extent_cache, ptr, parent,
9458 owner, 1);
9459 if (ret < 0) {
9460 error(
9461 "add_tree_backref failed (non-leaf block): %s",
9462 strerror(-ret));
9463 continue;
9466 if (level > 1) {
9467 add_pending(nodes, seen, ptr, size);
9468 } else {
9469 add_pending(pending, seen, ptr, size);
9472 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
9473 nritems) * sizeof(struct btrfs_key_ptr);
9475 total_btree_bytes += buf->len;
9476 if (fs_root_objectid(btrfs_header_owner(buf)))
9477 total_fs_tree_bytes += buf->len;
9478 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
9479 total_extent_tree_bytes += buf->len;
9480 out:
9481 free_extent_buffer(buf);
9482 return ret;
9485 static int add_root_to_pending(struct extent_buffer *buf,
9486 struct cache_tree *extent_cache,
9487 struct cache_tree *pending,
9488 struct cache_tree *seen,
9489 struct cache_tree *nodes,
9490 u64 objectid)
9492 struct extent_record tmpl;
9493 int ret;
9495 if (btrfs_header_level(buf) > 0)
9496 add_pending(nodes, seen, buf->start, buf->len);
9497 else
9498 add_pending(pending, seen, buf->start, buf->len);
9500 memset(&tmpl, 0, sizeof(tmpl));
9501 tmpl.start = buf->start;
9502 tmpl.nr = buf->len;
9503 tmpl.is_root = 1;
9504 tmpl.refs = 1;
9505 tmpl.metadata = 1;
9506 tmpl.max_size = buf->len;
9507 add_extent_rec(extent_cache, &tmpl);
9509 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
9510 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
9511 ret = add_tree_backref(extent_cache, buf->start, buf->start,
9512 0, 1);
9513 else
9514 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
9516 return ret;
9519 /* as we fix the tree, we might be deleting blocks that
9520 * we're tracking for repair. This hook makes sure we
9521 * remove any backrefs for blocks as we are fixing them.
9523 static int free_extent_hook(struct btrfs_trans_handle *trans,
9524 struct btrfs_root *root,
9525 u64 bytenr, u64 num_bytes, u64 parent,
9526 u64 root_objectid, u64 owner, u64 offset,
9527 int refs_to_drop)
9529 struct extent_record *rec;
9530 struct cache_extent *cache;
9531 int is_data;
9532 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9534 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9535 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9536 if (!cache)
9537 return 0;
9539 rec = container_of(cache, struct extent_record, cache);
9540 if (is_data) {
9541 struct data_backref *back;
9542 back = find_data_backref(rec, parent, root_objectid, owner,
9543 offset, 1, bytenr, num_bytes);
9544 if (!back)
9545 goto out;
9546 if (back->node.found_ref) {
9547 back->found_ref -= refs_to_drop;
9548 if (rec->refs)
9549 rec->refs -= refs_to_drop;
9551 if (back->node.found_extent_tree) {
9552 back->num_refs -= refs_to_drop;
9553 if (rec->extent_item_refs)
9554 rec->extent_item_refs -= refs_to_drop;
9556 if (back->found_ref == 0)
9557 back->node.found_ref = 0;
9558 if (back->num_refs == 0)
9559 back->node.found_extent_tree = 0;
9561 if (!back->node.found_extent_tree && back->node.found_ref) {
9562 rb_erase(&back->node.node, &rec->backref_tree);
9563 free(back);
9565 } else {
9566 struct tree_backref *back;
9567 back = find_tree_backref(rec, parent, root_objectid);
9568 if (!back)
9569 goto out;
9570 if (back->node.found_ref) {
9571 if (rec->refs)
9572 rec->refs--;
9573 back->node.found_ref = 0;
9575 if (back->node.found_extent_tree) {
9576 if (rec->extent_item_refs)
9577 rec->extent_item_refs--;
9578 back->node.found_extent_tree = 0;
9580 if (!back->node.found_extent_tree && back->node.found_ref) {
9581 rb_erase(&back->node.node, &rec->backref_tree);
9582 free(back);
9585 maybe_free_extent_rec(extent_cache, rec);
9586 out:
9587 return 0;
9590 static int delete_extent_records(struct btrfs_trans_handle *trans,
9591 struct btrfs_root *root,
9592 struct btrfs_path *path,
9593 u64 bytenr)
9595 struct btrfs_key key;
9596 struct btrfs_key found_key;
9597 struct extent_buffer *leaf;
9598 int ret;
9599 int slot;
9602 key.objectid = bytenr;
9603 key.type = (u8)-1;
9604 key.offset = (u64)-1;
9606 while(1) {
9607 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9608 &key, path, 0, 1);
9609 if (ret < 0)
9610 break;
9612 if (ret > 0) {
9613 ret = 0;
9614 if (path->slots[0] == 0)
9615 break;
9616 path->slots[0]--;
9618 ret = 0;
9620 leaf = path->nodes[0];
9621 slot = path->slots[0];
9623 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9624 if (found_key.objectid != bytenr)
9625 break;
9627 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9628 found_key.type != BTRFS_METADATA_ITEM_KEY &&
9629 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9630 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9631 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9632 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9633 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9634 btrfs_release_path(path);
9635 if (found_key.type == 0) {
9636 if (found_key.offset == 0)
9637 break;
9638 key.offset = found_key.offset - 1;
9639 key.type = found_key.type;
9641 key.type = found_key.type - 1;
9642 key.offset = (u64)-1;
9643 continue;
9646 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9647 found_key.objectid, found_key.type, found_key.offset);
9649 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9650 if (ret)
9651 break;
9652 btrfs_release_path(path);
9654 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9655 found_key.type == BTRFS_METADATA_ITEM_KEY) {
9656 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9657 found_key.offset : root->fs_info->nodesize;
9659 ret = btrfs_update_block_group(trans, root, bytenr,
9660 bytes, 0, 0);
9661 if (ret)
9662 break;
9666 btrfs_release_path(path);
9667 return ret;
9671 * for a single backref, this will allocate a new extent
9672 * and add the backref to it.
9674 static int record_extent(struct btrfs_trans_handle *trans,
9675 struct btrfs_fs_info *info,
9676 struct btrfs_path *path,
9677 struct extent_record *rec,
9678 struct extent_backref *back,
9679 int allocated, u64 flags)
9681 int ret = 0;
9682 struct btrfs_root *extent_root = info->extent_root;
9683 struct extent_buffer *leaf;
9684 struct btrfs_key ins_key;
9685 struct btrfs_extent_item *ei;
9686 struct data_backref *dback;
9687 struct btrfs_tree_block_info *bi;
9689 if (!back->is_data)
9690 rec->max_size = max_t(u64, rec->max_size,
9691 info->nodesize);
9693 if (!allocated) {
9694 u32 item_size = sizeof(*ei);
9696 if (!back->is_data)
9697 item_size += sizeof(*bi);
9699 ins_key.objectid = rec->start;
9700 ins_key.offset = rec->max_size;
9701 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9703 ret = btrfs_insert_empty_item(trans, extent_root, path,
9704 &ins_key, item_size);
9705 if (ret)
9706 goto fail;
9708 leaf = path->nodes[0];
9709 ei = btrfs_item_ptr(leaf, path->slots[0],
9710 struct btrfs_extent_item);
9712 btrfs_set_extent_refs(leaf, ei, 0);
9713 btrfs_set_extent_generation(leaf, ei, rec->generation);
9715 if (back->is_data) {
9716 btrfs_set_extent_flags(leaf, ei,
9717 BTRFS_EXTENT_FLAG_DATA);
9718 } else {
9719 struct btrfs_disk_key copy_key;;
9721 bi = (struct btrfs_tree_block_info *)(ei + 1);
9722 memset_extent_buffer(leaf, 0, (unsigned long)bi,
9723 sizeof(*bi));
9725 btrfs_set_disk_key_objectid(&copy_key,
9726 rec->info_objectid);
9727 btrfs_set_disk_key_type(&copy_key, 0);
9728 btrfs_set_disk_key_offset(&copy_key, 0);
9730 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9731 btrfs_set_tree_block_key(leaf, bi, &copy_key);
9733 btrfs_set_extent_flags(leaf, ei,
9734 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9737 btrfs_mark_buffer_dirty(leaf);
9738 ret = btrfs_update_block_group(trans, extent_root, rec->start,
9739 rec->max_size, 1, 0);
9740 if (ret)
9741 goto fail;
9742 btrfs_release_path(path);
9745 if (back->is_data) {
9746 u64 parent;
9747 int i;
9749 dback = to_data_backref(back);
9750 if (back->full_backref)
9751 parent = dback->parent;
9752 else
9753 parent = 0;
9755 for (i = 0; i < dback->found_ref; i++) {
9756 /* if parent != 0, we're doing a full backref
9757 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9758 * just makes the backref allocator create a data
9759 * backref
9761 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9762 rec->start, rec->max_size,
9763 parent,
9764 dback->root,
9765 parent ?
9766 BTRFS_FIRST_FREE_OBJECTID :
9767 dback->owner,
9768 dback->offset);
9769 if (ret)
9770 break;
9772 fprintf(stderr, "adding new data backref"
9773 " on %llu %s %llu owner %llu"
9774 " offset %llu found %d\n",
9775 (unsigned long long)rec->start,
9776 back->full_backref ?
9777 "parent" : "root",
9778 back->full_backref ?
9779 (unsigned long long)parent :
9780 (unsigned long long)dback->root,
9781 (unsigned long long)dback->owner,
9782 (unsigned long long)dback->offset,
9783 dback->found_ref);
9784 } else {
9785 u64 parent;
9786 struct tree_backref *tback;
9788 tback = to_tree_backref(back);
9789 if (back->full_backref)
9790 parent = tback->parent;
9791 else
9792 parent = 0;
9794 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9795 rec->start, rec->max_size,
9796 parent, tback->root, 0, 0);
9797 fprintf(stderr, "adding new tree backref on "
9798 "start %llu len %llu parent %llu root %llu\n",
9799 rec->start, rec->max_size, parent, tback->root);
9801 fail:
9802 btrfs_release_path(path);
9803 return ret;
9806 static struct extent_entry *find_entry(struct list_head *entries,
9807 u64 bytenr, u64 bytes)
9809 struct extent_entry *entry = NULL;
9811 list_for_each_entry(entry, entries, list) {
9812 if (entry->bytenr == bytenr && entry->bytes == bytes)
9813 return entry;
9816 return NULL;
9819 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9821 struct extent_entry *entry, *best = NULL, *prev = NULL;
9823 list_for_each_entry(entry, entries, list) {
9825 * If there are as many broken entries as entries then we know
9826 * not to trust this particular entry.
9828 if (entry->broken == entry->count)
9829 continue;
9832 * Special case, when there are only two entries and 'best' is
9833 * the first one
9835 if (!prev) {
9836 best = entry;
9837 prev = entry;
9838 continue;
9842 * If our current entry == best then we can't be sure our best
9843 * is really the best, so we need to keep searching.
9845 if (best && best->count == entry->count) {
9846 prev = entry;
9847 best = NULL;
9848 continue;
9851 /* Prev == entry, not good enough, have to keep searching */
9852 if (!prev->broken && prev->count == entry->count)
9853 continue;
9855 if (!best)
9856 best = (prev->count > entry->count) ? prev : entry;
9857 else if (best->count < entry->count)
9858 best = entry;
9859 prev = entry;
9862 return best;
9865 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9866 struct data_backref *dback, struct extent_entry *entry)
9868 struct btrfs_trans_handle *trans;
9869 struct btrfs_root *root;
9870 struct btrfs_file_extent_item *fi;
9871 struct extent_buffer *leaf;
9872 struct btrfs_key key;
9873 u64 bytenr, bytes;
9874 int ret, err;
9876 key.objectid = dback->root;
9877 key.type = BTRFS_ROOT_ITEM_KEY;
9878 key.offset = (u64)-1;
9879 root = btrfs_read_fs_root(info, &key);
9880 if (IS_ERR(root)) {
9881 fprintf(stderr, "Couldn't find root for our ref\n");
9882 return -EINVAL;
9886 * The backref points to the original offset of the extent if it was
9887 * split, so we need to search down to the offset we have and then walk
9888 * forward until we find the backref we're looking for.
9890 key.objectid = dback->owner;
9891 key.type = BTRFS_EXTENT_DATA_KEY;
9892 key.offset = dback->offset;
9893 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9894 if (ret < 0) {
9895 fprintf(stderr, "Error looking up ref %d\n", ret);
9896 return ret;
9899 while (1) {
9900 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9901 ret = btrfs_next_leaf(root, path);
9902 if (ret) {
9903 fprintf(stderr, "Couldn't find our ref, next\n");
9904 return -EINVAL;
9907 leaf = path->nodes[0];
9908 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9909 if (key.objectid != dback->owner ||
9910 key.type != BTRFS_EXTENT_DATA_KEY) {
9911 fprintf(stderr, "Couldn't find our ref, search\n");
9912 return -EINVAL;
9914 fi = btrfs_item_ptr(leaf, path->slots[0],
9915 struct btrfs_file_extent_item);
9916 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9917 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9919 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9920 break;
9921 path->slots[0]++;
9924 btrfs_release_path(path);
9926 trans = btrfs_start_transaction(root, 1);
9927 if (IS_ERR(trans))
9928 return PTR_ERR(trans);
9931 * Ok we have the key of the file extent we want to fix, now we can cow
9932 * down to the thing and fix it.
9934 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9935 if (ret < 0) {
9936 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9937 key.objectid, key.type, key.offset, ret);
9938 goto out;
9940 if (ret > 0) {
9941 fprintf(stderr, "Well that's odd, we just found this key "
9942 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9943 key.offset);
9944 ret = -EINVAL;
9945 goto out;
9947 leaf = path->nodes[0];
9948 fi = btrfs_item_ptr(leaf, path->slots[0],
9949 struct btrfs_file_extent_item);
9951 if (btrfs_file_extent_compression(leaf, fi) &&
9952 dback->disk_bytenr != entry->bytenr) {
9953 fprintf(stderr, "Ref doesn't match the record start and is "
9954 "compressed, please take a btrfs-image of this file "
9955 "system and send it to a btrfs developer so they can "
9956 "complete this functionality for bytenr %Lu\n",
9957 dback->disk_bytenr);
9958 ret = -EINVAL;
9959 goto out;
9962 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9963 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9964 } else if (dback->disk_bytenr > entry->bytenr) {
9965 u64 off_diff, offset;
9967 off_diff = dback->disk_bytenr - entry->bytenr;
9968 offset = btrfs_file_extent_offset(leaf, fi);
9969 if (dback->disk_bytenr + offset +
9970 btrfs_file_extent_num_bytes(leaf, fi) >
9971 entry->bytenr + entry->bytes) {
9972 fprintf(stderr, "Ref is past the entry end, please "
9973 "take a btrfs-image of this file system and "
9974 "send it to a btrfs developer, ref %Lu\n",
9975 dback->disk_bytenr);
9976 ret = -EINVAL;
9977 goto out;
9979 offset += off_diff;
9980 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9981 btrfs_set_file_extent_offset(leaf, fi, offset);
9982 } else if (dback->disk_bytenr < entry->bytenr) {
9983 u64 offset;
9985 offset = btrfs_file_extent_offset(leaf, fi);
9986 if (dback->disk_bytenr + offset < entry->bytenr) {
9987 fprintf(stderr, "Ref is before the entry start, please"
9988 " take a btrfs-image of this file system and "
9989 "send it to a btrfs developer, ref %Lu\n",
9990 dback->disk_bytenr);
9991 ret = -EINVAL;
9992 goto out;
9995 offset += dback->disk_bytenr;
9996 offset -= entry->bytenr;
9997 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9998 btrfs_set_file_extent_offset(leaf, fi, offset);
10001 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
10004 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
10005 * only do this if we aren't using compression, otherwise it's a
10006 * trickier case.
10008 if (!btrfs_file_extent_compression(leaf, fi))
10009 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
10010 else
10011 printf("ram bytes may be wrong?\n");
10012 btrfs_mark_buffer_dirty(leaf);
10013 out:
10014 err = btrfs_commit_transaction(trans, root);
10015 btrfs_release_path(path);
10016 return ret ? ret : err;
10019 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
10020 struct extent_record *rec)
10022 struct extent_backref *back, *tmp;
10023 struct data_backref *dback;
10024 struct extent_entry *entry, *best = NULL;
10025 LIST_HEAD(entries);
10026 int nr_entries = 0;
10027 int broken_entries = 0;
10028 int ret = 0;
10029 short mismatch = 0;
10032 * Metadata is easy and the backrefs should always agree on bytenr and
10033 * size, if not we've got bigger issues.
10035 if (rec->metadata)
10036 return 0;
10038 rbtree_postorder_for_each_entry_safe(back, tmp,
10039 &rec->backref_tree, node) {
10040 if (back->full_backref || !back->is_data)
10041 continue;
10043 dback = to_data_backref(back);
10046 * We only pay attention to backrefs that we found a real
10047 * backref for.
10049 if (dback->found_ref == 0)
10050 continue;
10053 * For now we only catch when the bytes don't match, not the
10054 * bytenr. We can easily do this at the same time, but I want
10055 * to have a fs image to test on before we just add repair
10056 * functionality willy-nilly so we know we won't screw up the
10057 * repair.
10060 entry = find_entry(&entries, dback->disk_bytenr,
10061 dback->bytes);
10062 if (!entry) {
10063 entry = malloc(sizeof(struct extent_entry));
10064 if (!entry) {
10065 ret = -ENOMEM;
10066 goto out;
10068 memset(entry, 0, sizeof(*entry));
10069 entry->bytenr = dback->disk_bytenr;
10070 entry->bytes = dback->bytes;
10071 list_add_tail(&entry->list, &entries);
10072 nr_entries++;
10076 * If we only have on entry we may think the entries agree when
10077 * in reality they don't so we have to do some extra checking.
10079 if (dback->disk_bytenr != rec->start ||
10080 dback->bytes != rec->nr || back->broken)
10081 mismatch = 1;
10083 if (back->broken) {
10084 entry->broken++;
10085 broken_entries++;
10088 entry->count++;
10091 /* Yay all the backrefs agree, carry on good sir */
10092 if (nr_entries <= 1 && !mismatch)
10093 goto out;
10095 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
10096 "%Lu\n", rec->start);
10099 * First we want to see if the backrefs can agree amongst themselves who
10100 * is right, so figure out which one of the entries has the highest
10101 * count.
10103 best = find_most_right_entry(&entries);
10106 * Ok so we may have an even split between what the backrefs think, so
10107 * this is where we use the extent ref to see what it thinks.
10109 if (!best) {
10110 entry = find_entry(&entries, rec->start, rec->nr);
10111 if (!entry && (!broken_entries || !rec->found_rec)) {
10112 fprintf(stderr, "Backrefs don't agree with each other "
10113 "and extent record doesn't agree with anybody,"
10114 " so we can't fix bytenr %Lu bytes %Lu\n",
10115 rec->start, rec->nr);
10116 ret = -EINVAL;
10117 goto out;
10118 } else if (!entry) {
10120 * Ok our backrefs were broken, we'll assume this is the
10121 * correct value and add an entry for this range.
10123 entry = malloc(sizeof(struct extent_entry));
10124 if (!entry) {
10125 ret = -ENOMEM;
10126 goto out;
10128 memset(entry, 0, sizeof(*entry));
10129 entry->bytenr = rec->start;
10130 entry->bytes = rec->nr;
10131 list_add_tail(&entry->list, &entries);
10132 nr_entries++;
10134 entry->count++;
10135 best = find_most_right_entry(&entries);
10136 if (!best) {
10137 fprintf(stderr, "Backrefs and extent record evenly "
10138 "split on who is right, this is going to "
10139 "require user input to fix bytenr %Lu bytes "
10140 "%Lu\n", rec->start, rec->nr);
10141 ret = -EINVAL;
10142 goto out;
10147 * I don't think this can happen currently as we'll abort() if we catch
10148 * this case higher up, but in case somebody removes that we still can't
10149 * deal with it properly here yet, so just bail out of that's the case.
10151 if (best->bytenr != rec->start) {
10152 fprintf(stderr, "Extent start and backref starts don't match, "
10153 "please use btrfs-image on this file system and send "
10154 "it to a btrfs developer so they can make fsck fix "
10155 "this particular case. bytenr is %Lu, bytes is %Lu\n",
10156 rec->start, rec->nr);
10157 ret = -EINVAL;
10158 goto out;
10162 * Ok great we all agreed on an extent record, let's go find the real
10163 * references and fix up the ones that don't match.
10165 rbtree_postorder_for_each_entry_safe(back, tmp,
10166 &rec->backref_tree, node) {
10167 if (back->full_backref || !back->is_data)
10168 continue;
10170 dback = to_data_backref(back);
10173 * Still ignoring backrefs that don't have a real ref attached
10174 * to them.
10176 if (dback->found_ref == 0)
10177 continue;
10179 if (dback->bytes == best->bytes &&
10180 dback->disk_bytenr == best->bytenr)
10181 continue;
10183 ret = repair_ref(info, path, dback, best);
10184 if (ret)
10185 goto out;
10189 * Ok we messed with the actual refs, which means we need to drop our
10190 * entire cache and go back and rescan. I know this is a huge pain and
10191 * adds a lot of extra work, but it's the only way to be safe. Once all
10192 * the backrefs agree we may not need to do anything to the extent
10193 * record itself.
10195 ret = -EAGAIN;
10196 out:
10197 while (!list_empty(&entries)) {
10198 entry = list_entry(entries.next, struct extent_entry, list);
10199 list_del_init(&entry->list);
10200 free(entry);
10202 return ret;
10205 static int process_duplicates(struct cache_tree *extent_cache,
10206 struct extent_record *rec)
10208 struct extent_record *good, *tmp;
10209 struct cache_extent *cache;
10210 int ret;
10213 * If we found a extent record for this extent then return, or if we
10214 * have more than one duplicate we are likely going to need to delete
10215 * something.
10217 if (rec->found_rec || rec->num_duplicates > 1)
10218 return 0;
10220 /* Shouldn't happen but just in case */
10221 BUG_ON(!rec->num_duplicates);
10224 * So this happens if we end up with a backref that doesn't match the
10225 * actual extent entry. So either the backref is bad or the extent
10226 * entry is bad. Either way we want to have the extent_record actually
10227 * reflect what we found in the extent_tree, so we need to take the
10228 * duplicate out and use that as the extent_record since the only way we
10229 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
10231 remove_cache_extent(extent_cache, &rec->cache);
10233 good = to_extent_record(rec->dups.next);
10234 list_del_init(&good->list);
10235 INIT_LIST_HEAD(&good->backrefs);
10236 INIT_LIST_HEAD(&good->dups);
10237 good->cache.start = good->start;
10238 good->cache.size = good->nr;
10239 good->content_checked = 0;
10240 good->owner_ref_checked = 0;
10241 good->num_duplicates = 0;
10242 good->refs = rec->refs;
10243 list_splice_init(&rec->backrefs, &good->backrefs);
10244 while (1) {
10245 cache = lookup_cache_extent(extent_cache, good->start,
10246 good->nr);
10247 if (!cache)
10248 break;
10249 tmp = container_of(cache, struct extent_record, cache);
10252 * If we find another overlapping extent and it's found_rec is
10253 * set then it's a duplicate and we need to try and delete
10254 * something.
10256 if (tmp->found_rec || tmp->num_duplicates > 0) {
10257 if (list_empty(&good->list))
10258 list_add_tail(&good->list,
10259 &duplicate_extents);
10260 good->num_duplicates += tmp->num_duplicates + 1;
10261 list_splice_init(&tmp->dups, &good->dups);
10262 list_del_init(&tmp->list);
10263 list_add_tail(&tmp->list, &good->dups);
10264 remove_cache_extent(extent_cache, &tmp->cache);
10265 continue;
10269 * Ok we have another non extent item backed extent rec, so lets
10270 * just add it to this extent and carry on like we did above.
10272 good->refs += tmp->refs;
10273 list_splice_init(&tmp->backrefs, &good->backrefs);
10274 remove_cache_extent(extent_cache, &tmp->cache);
10275 free(tmp);
10277 ret = insert_cache_extent(extent_cache, &good->cache);
10278 BUG_ON(ret);
10279 free(rec);
10280 return good->num_duplicates ? 0 : 1;
10283 static int delete_duplicate_records(struct btrfs_root *root,
10284 struct extent_record *rec)
10286 struct btrfs_trans_handle *trans;
10287 LIST_HEAD(delete_list);
10288 struct btrfs_path path;
10289 struct extent_record *tmp, *good, *n;
10290 int nr_del = 0;
10291 int ret = 0, err;
10292 struct btrfs_key key;
10294 btrfs_init_path(&path);
10296 good = rec;
10297 /* Find the record that covers all of the duplicates. */
10298 list_for_each_entry(tmp, &rec->dups, list) {
10299 if (good->start < tmp->start)
10300 continue;
10301 if (good->nr > tmp->nr)
10302 continue;
10304 if (tmp->start + tmp->nr < good->start + good->nr) {
10305 fprintf(stderr, "Ok we have overlapping extents that "
10306 "aren't completely covered by each other, this "
10307 "is going to require more careful thought. "
10308 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
10309 tmp->start, tmp->nr, good->start, good->nr);
10310 abort();
10312 good = tmp;
10315 if (good != rec)
10316 list_add_tail(&rec->list, &delete_list);
10318 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
10319 if (tmp == good)
10320 continue;
10321 list_move_tail(&tmp->list, &delete_list);
10324 root = root->fs_info->extent_root;
10325 trans = btrfs_start_transaction(root, 1);
10326 if (IS_ERR(trans)) {
10327 ret = PTR_ERR(trans);
10328 goto out;
10331 list_for_each_entry(tmp, &delete_list, list) {
10332 if (tmp->found_rec == 0)
10333 continue;
10334 key.objectid = tmp->start;
10335 key.type = BTRFS_EXTENT_ITEM_KEY;
10336 key.offset = tmp->nr;
10338 /* Shouldn't happen but just in case */
10339 if (tmp->metadata) {
10340 fprintf(stderr, "Well this shouldn't happen, extent "
10341 "record overlaps but is metadata? "
10342 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
10343 abort();
10346 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10347 if (ret) {
10348 if (ret > 0)
10349 ret = -EINVAL;
10350 break;
10352 ret = btrfs_del_item(trans, root, &path);
10353 if (ret)
10354 break;
10355 btrfs_release_path(&path);
10356 nr_del++;
10358 err = btrfs_commit_transaction(trans, root);
10359 if (err && !ret)
10360 ret = err;
10361 out:
10362 while (!list_empty(&delete_list)) {
10363 tmp = to_extent_record(delete_list.next);
10364 list_del_init(&tmp->list);
10365 if (tmp == rec)
10366 continue;
10367 free(tmp);
10370 while (!list_empty(&rec->dups)) {
10371 tmp = to_extent_record(rec->dups.next);
10372 list_del_init(&tmp->list);
10373 free(tmp);
10376 btrfs_release_path(&path);
10378 if (!ret && !nr_del)
10379 rec->num_duplicates = 0;
10381 return ret ? ret : nr_del;
10384 static int find_possible_backrefs(struct btrfs_fs_info *info,
10385 struct btrfs_path *path,
10386 struct cache_tree *extent_cache,
10387 struct extent_record *rec)
10389 struct btrfs_root *root;
10390 struct extent_backref *back, *tmp;
10391 struct data_backref *dback;
10392 struct cache_extent *cache;
10393 struct btrfs_file_extent_item *fi;
10394 struct btrfs_key key;
10395 u64 bytenr, bytes;
10396 int ret;
10398 rbtree_postorder_for_each_entry_safe(back, tmp,
10399 &rec->backref_tree, node) {
10400 /* Don't care about full backrefs (poor unloved backrefs) */
10401 if (back->full_backref || !back->is_data)
10402 continue;
10404 dback = to_data_backref(back);
10406 /* We found this one, we don't need to do a lookup */
10407 if (dback->found_ref)
10408 continue;
10410 key.objectid = dback->root;
10411 key.type = BTRFS_ROOT_ITEM_KEY;
10412 key.offset = (u64)-1;
10414 root = btrfs_read_fs_root(info, &key);
10416 /* No root, definitely a bad ref, skip */
10417 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
10418 continue;
10419 /* Other err, exit */
10420 if (IS_ERR(root))
10421 return PTR_ERR(root);
10423 key.objectid = dback->owner;
10424 key.type = BTRFS_EXTENT_DATA_KEY;
10425 key.offset = dback->offset;
10426 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
10427 if (ret) {
10428 btrfs_release_path(path);
10429 if (ret < 0)
10430 return ret;
10431 /* Didn't find it, we can carry on */
10432 ret = 0;
10433 continue;
10436 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
10437 struct btrfs_file_extent_item);
10438 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
10439 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
10440 btrfs_release_path(path);
10441 cache = lookup_cache_extent(extent_cache, bytenr, 1);
10442 if (cache) {
10443 struct extent_record *tmp;
10444 tmp = container_of(cache, struct extent_record, cache);
10447 * If we found an extent record for the bytenr for this
10448 * particular backref then we can't add it to our
10449 * current extent record. We only want to add backrefs
10450 * that don't have a corresponding extent item in the
10451 * extent tree since they likely belong to this record
10452 * and we need to fix it if it doesn't match bytenrs.
10454 if (tmp->found_rec)
10455 continue;
10458 dback->found_ref += 1;
10459 dback->disk_bytenr = bytenr;
10460 dback->bytes = bytes;
10463 * Set this so the verify backref code knows not to trust the
10464 * values in this backref.
10466 back->broken = 1;
10469 return 0;
10473 * Record orphan data ref into corresponding root.
10475 * Return 0 if the extent item contains data ref and recorded.
10476 * Return 1 if the extent item contains no useful data ref
10477 * On that case, it may contains only shared_dataref or metadata backref
10478 * or the file extent exists(this should be handled by the extent bytenr
10479 * recovery routine)
10480 * Return <0 if something goes wrong.
10482 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
10483 struct extent_record *rec)
10485 struct btrfs_key key;
10486 struct btrfs_root *dest_root;
10487 struct extent_backref *back, *tmp;
10488 struct data_backref *dback;
10489 struct orphan_data_extent *orphan;
10490 struct btrfs_path path;
10491 int recorded_data_ref = 0;
10492 int ret = 0;
10494 if (rec->metadata)
10495 return 1;
10496 btrfs_init_path(&path);
10497 rbtree_postorder_for_each_entry_safe(back, tmp,
10498 &rec->backref_tree, node) {
10499 if (back->full_backref || !back->is_data ||
10500 !back->found_extent_tree)
10501 continue;
10502 dback = to_data_backref(back);
10503 if (dback->found_ref)
10504 continue;
10505 key.objectid = dback->root;
10506 key.type = BTRFS_ROOT_ITEM_KEY;
10507 key.offset = (u64)-1;
10509 dest_root = btrfs_read_fs_root(fs_info, &key);
10511 /* For non-exist root we just skip it */
10512 if (IS_ERR(dest_root) || !dest_root)
10513 continue;
10515 key.objectid = dback->owner;
10516 key.type = BTRFS_EXTENT_DATA_KEY;
10517 key.offset = dback->offset;
10519 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
10520 btrfs_release_path(&path);
10522 * For ret < 0, it's OK since the fs-tree may be corrupted,
10523 * we need to record it for inode/file extent rebuild.
10524 * For ret > 0, we record it only for file extent rebuild.
10525 * For ret == 0, the file extent exists but only bytenr
10526 * mismatch, let the original bytenr fix routine to handle,
10527 * don't record it.
10529 if (ret == 0)
10530 continue;
10531 ret = 0;
10532 orphan = malloc(sizeof(*orphan));
10533 if (!orphan) {
10534 ret = -ENOMEM;
10535 goto out;
10537 INIT_LIST_HEAD(&orphan->list);
10538 orphan->root = dback->root;
10539 orphan->objectid = dback->owner;
10540 orphan->offset = dback->offset;
10541 orphan->disk_bytenr = rec->cache.start;
10542 orphan->disk_len = rec->cache.size;
10543 list_add(&dest_root->orphan_data_extents, &orphan->list);
10544 recorded_data_ref = 1;
10546 out:
10547 btrfs_release_path(&path);
10548 if (!ret)
10549 return !recorded_data_ref;
10550 else
10551 return ret;
10555 * when an incorrect extent item is found, this will delete
10556 * all of the existing entries for it and recreate them
10557 * based on what the tree scan found.
10559 static int fixup_extent_refs(struct btrfs_fs_info *info,
10560 struct cache_tree *extent_cache,
10561 struct extent_record *rec)
10563 struct btrfs_trans_handle *trans = NULL;
10564 int ret;
10565 struct btrfs_path path;
10566 struct cache_extent *cache;
10567 struct extent_backref *back, *tmp;
10568 int allocated = 0;
10569 u64 flags = 0;
10571 if (rec->flag_block_full_backref)
10572 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10574 btrfs_init_path(&path);
10575 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10577 * Sometimes the backrefs themselves are so broken they don't
10578 * get attached to any meaningful rec, so first go back and
10579 * check any of our backrefs that we couldn't find and throw
10580 * them into the list if we find the backref so that
10581 * verify_backrefs can figure out what to do.
10583 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10584 if (ret < 0)
10585 goto out;
10588 /* step one, make sure all of the backrefs agree */
10589 ret = verify_backrefs(info, &path, rec);
10590 if (ret < 0)
10591 goto out;
10593 trans = btrfs_start_transaction(info->extent_root, 1);
10594 if (IS_ERR(trans)) {
10595 ret = PTR_ERR(trans);
10596 goto out;
10599 /* step two, delete all the existing records */
10600 ret = delete_extent_records(trans, info->extent_root, &path,
10601 rec->start);
10603 if (ret < 0)
10604 goto out;
10606 /* was this block corrupt? If so, don't add references to it */
10607 cache = lookup_cache_extent(info->corrupt_blocks,
10608 rec->start, rec->max_size);
10609 if (cache) {
10610 ret = 0;
10611 goto out;
10614 /* step three, recreate all the refs we did find */
10615 rbtree_postorder_for_each_entry_safe(back, tmp,
10616 &rec->backref_tree, node) {
10618 * if we didn't find any references, don't create a
10619 * new extent record
10621 if (!back->found_ref)
10622 continue;
10624 rec->bad_full_backref = 0;
10625 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10626 allocated = 1;
10628 if (ret)
10629 goto out;
10631 out:
10632 if (trans) {
10633 int err = btrfs_commit_transaction(trans, info->extent_root);
10634 if (!ret)
10635 ret = err;
10638 if (!ret)
10639 fprintf(stderr, "Repaired extent references for %llu\n",
10640 (unsigned long long)rec->start);
10642 btrfs_release_path(&path);
10643 return ret;
10646 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10647 struct extent_record *rec)
10649 struct btrfs_trans_handle *trans;
10650 struct btrfs_root *root = fs_info->extent_root;
10651 struct btrfs_path path;
10652 struct btrfs_extent_item *ei;
10653 struct btrfs_key key;
10654 u64 flags;
10655 int ret = 0;
10657 key.objectid = rec->start;
10658 if (rec->metadata) {
10659 key.type = BTRFS_METADATA_ITEM_KEY;
10660 key.offset = rec->info_level;
10661 } else {
10662 key.type = BTRFS_EXTENT_ITEM_KEY;
10663 key.offset = rec->max_size;
10666 trans = btrfs_start_transaction(root, 0);
10667 if (IS_ERR(trans))
10668 return PTR_ERR(trans);
10670 btrfs_init_path(&path);
10671 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10672 if (ret < 0) {
10673 btrfs_release_path(&path);
10674 btrfs_commit_transaction(trans, root);
10675 return ret;
10676 } else if (ret) {
10677 fprintf(stderr, "Didn't find extent for %llu\n",
10678 (unsigned long long)rec->start);
10679 btrfs_release_path(&path);
10680 btrfs_commit_transaction(trans, root);
10681 return -ENOENT;
10684 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10685 struct btrfs_extent_item);
10686 flags = btrfs_extent_flags(path.nodes[0], ei);
10687 if (rec->flag_block_full_backref) {
10688 fprintf(stderr, "setting full backref on %llu\n",
10689 (unsigned long long)key.objectid);
10690 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10691 } else {
10692 fprintf(stderr, "clearing full backref on %llu\n",
10693 (unsigned long long)key.objectid);
10694 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10696 btrfs_set_extent_flags(path.nodes[0], ei, flags);
10697 btrfs_mark_buffer_dirty(path.nodes[0]);
10698 btrfs_release_path(&path);
10699 ret = btrfs_commit_transaction(trans, root);
10700 if (!ret)
10701 fprintf(stderr, "Repaired extent flags for %llu\n",
10702 (unsigned long long)rec->start);
10704 return ret;
10707 /* right now we only prune from the extent allocation tree */
10708 static int prune_one_block(struct btrfs_trans_handle *trans,
10709 struct btrfs_fs_info *info,
10710 struct btrfs_corrupt_block *corrupt)
10712 int ret;
10713 struct btrfs_path path;
10714 struct extent_buffer *eb;
10715 u64 found;
10716 int slot;
10717 int nritems;
10718 int level = corrupt->level + 1;
10720 btrfs_init_path(&path);
10721 again:
10722 /* we want to stop at the parent to our busted block */
10723 path.lowest_level = level;
10725 ret = btrfs_search_slot(trans, info->extent_root,
10726 &corrupt->key, &path, -1, 1);
10728 if (ret < 0)
10729 goto out;
10731 eb = path.nodes[level];
10732 if (!eb) {
10733 ret = -ENOENT;
10734 goto out;
10738 * hopefully the search gave us the block we want to prune,
10739 * lets try that first
10741 slot = path.slots[level];
10742 found = btrfs_node_blockptr(eb, slot);
10743 if (found == corrupt->cache.start)
10744 goto del_ptr;
10746 nritems = btrfs_header_nritems(eb);
10748 /* the search failed, lets scan this node and hope we find it */
10749 for (slot = 0; slot < nritems; slot++) {
10750 found = btrfs_node_blockptr(eb, slot);
10751 if (found == corrupt->cache.start)
10752 goto del_ptr;
10755 * we couldn't find the bad block. TODO, search all the nodes for pointers
10756 * to this block
10758 if (eb == info->extent_root->node) {
10759 ret = -ENOENT;
10760 goto out;
10761 } else {
10762 level++;
10763 btrfs_release_path(&path);
10764 goto again;
10767 del_ptr:
10768 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10769 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10771 out:
10772 btrfs_release_path(&path);
10773 return ret;
10776 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10778 struct btrfs_trans_handle *trans = NULL;
10779 struct cache_extent *cache;
10780 struct btrfs_corrupt_block *corrupt;
10782 while (1) {
10783 cache = search_cache_extent(info->corrupt_blocks, 0);
10784 if (!cache)
10785 break;
10786 if (!trans) {
10787 trans = btrfs_start_transaction(info->extent_root, 1);
10788 if (IS_ERR(trans))
10789 return PTR_ERR(trans);
10791 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10792 prune_one_block(trans, info, corrupt);
10793 remove_cache_extent(info->corrupt_blocks, cache);
10795 if (trans)
10796 return btrfs_commit_transaction(trans, info->extent_root);
10797 return 0;
10800 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10802 struct btrfs_block_group_cache *cache;
10803 u64 start, end;
10804 int ret;
10806 while (1) {
10807 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10808 &start, &end, EXTENT_DIRTY);
10809 if (ret)
10810 break;
10811 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10814 start = 0;
10815 while (1) {
10816 cache = btrfs_lookup_first_block_group(fs_info, start);
10817 if (!cache)
10818 break;
10819 if (cache->cached)
10820 cache->cached = 0;
10821 start = cache->key.objectid + cache->key.offset;
10825 static int check_extent_refs(struct btrfs_root *root,
10826 struct cache_tree *extent_cache)
10828 struct extent_record *rec;
10829 struct cache_extent *cache;
10830 int ret = 0;
10831 int had_dups = 0;
10832 int err = 0;
10834 if (repair) {
10836 * if we're doing a repair, we have to make sure
10837 * we don't allocate from the problem extents.
10838 * In the worst case, this will be all the
10839 * extents in the FS
10841 cache = search_cache_extent(extent_cache, 0);
10842 while(cache) {
10843 rec = container_of(cache, struct extent_record, cache);
10844 set_extent_dirty(root->fs_info->excluded_extents,
10845 rec->start,
10846 rec->start + rec->max_size - 1);
10847 cache = next_cache_extent(cache);
10850 /* pin down all the corrupted blocks too */
10851 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10852 while(cache) {
10853 set_extent_dirty(root->fs_info->excluded_extents,
10854 cache->start,
10855 cache->start + cache->size - 1);
10856 cache = next_cache_extent(cache);
10858 prune_corrupt_blocks(root->fs_info);
10859 reset_cached_block_groups(root->fs_info);
10862 reset_cached_block_groups(root->fs_info);
10865 * We need to delete any duplicate entries we find first otherwise we
10866 * could mess up the extent tree when we have backrefs that actually
10867 * belong to a different extent item and not the weird duplicate one.
10869 while (repair && !list_empty(&duplicate_extents)) {
10870 rec = to_extent_record(duplicate_extents.next);
10871 list_del_init(&rec->list);
10873 /* Sometimes we can find a backref before we find an actual
10874 * extent, so we need to process it a little bit to see if there
10875 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10876 * if this is a backref screwup. If we need to delete stuff
10877 * process_duplicates() will return 0, otherwise it will return
10878 * 1 and we
10880 if (process_duplicates(extent_cache, rec))
10881 continue;
10882 ret = delete_duplicate_records(root, rec);
10883 if (ret < 0)
10884 return ret;
10886 * delete_duplicate_records will return the number of entries
10887 * deleted, so if it's greater than 0 then we know we actually
10888 * did something and we need to remove.
10890 if (ret)
10891 had_dups = 1;
10894 if (had_dups)
10895 return -EAGAIN;
10897 while(1) {
10898 int cur_err = 0;
10899 int fix = 0;
10901 cache = search_cache_extent(extent_cache, 0);
10902 if (!cache)
10903 break;
10904 rec = container_of(cache, struct extent_record, cache);
10905 if (rec->num_duplicates) {
10906 fprintf(stderr, "extent item %llu has multiple extent "
10907 "items\n", (unsigned long long)rec->start);
10908 cur_err = 1;
10911 if (rec->refs != rec->extent_item_refs) {
10912 fprintf(stderr, "ref mismatch on [%llu %llu] ",
10913 (unsigned long long)rec->start,
10914 (unsigned long long)rec->nr);
10915 fprintf(stderr, "extent item %llu, found %llu\n",
10916 (unsigned long long)rec->extent_item_refs,
10917 (unsigned long long)rec->refs);
10918 ret = record_orphan_data_extents(root->fs_info, rec);
10919 if (ret < 0)
10920 goto repair_abort;
10921 fix = ret;
10922 cur_err = 1;
10924 if (all_backpointers_checked(rec, 1)) {
10925 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10926 (unsigned long long)rec->start,
10927 (unsigned long long)rec->nr);
10928 fix = 1;
10929 cur_err = 1;
10931 if (!rec->owner_ref_checked) {
10932 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10933 (unsigned long long)rec->start,
10934 (unsigned long long)rec->nr);
10935 fix = 1;
10936 cur_err = 1;
10939 if (repair && fix) {
10940 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10941 if (ret)
10942 goto repair_abort;
10946 if (rec->bad_full_backref) {
10947 fprintf(stderr, "bad full backref, on [%llu]\n",
10948 (unsigned long long)rec->start);
10949 if (repair) {
10950 ret = fixup_extent_flags(root->fs_info, rec);
10951 if (ret)
10952 goto repair_abort;
10953 fix = 1;
10955 cur_err = 1;
10958 * Although it's not a extent ref's problem, we reuse this
10959 * routine for error reporting.
10960 * No repair function yet.
10962 if (rec->crossing_stripes) {
10963 fprintf(stderr,
10964 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10965 rec->start, rec->start + rec->max_size);
10966 cur_err = 1;
10969 if (rec->wrong_chunk_type) {
10970 fprintf(stderr,
10971 "bad extent [%llu, %llu), type mismatch with chunk\n",
10972 rec->start, rec->start + rec->max_size);
10973 cur_err = 1;
10976 err = cur_err;
10977 remove_cache_extent(extent_cache, cache);
10978 free_all_extent_backrefs(rec);
10979 if (!init_extent_tree && repair && (!cur_err || fix))
10980 clear_extent_dirty(root->fs_info->excluded_extents,
10981 rec->start,
10982 rec->start + rec->max_size - 1);
10983 free(rec);
10985 repair_abort:
10986 if (repair) {
10987 if (ret && ret != -EAGAIN) {
10988 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10989 exit(1);
10990 } else if (!ret) {
10991 struct btrfs_trans_handle *trans;
10993 root = root->fs_info->extent_root;
10994 trans = btrfs_start_transaction(root, 1);
10995 if (IS_ERR(trans)) {
10996 ret = PTR_ERR(trans);
10997 goto repair_abort;
11000 ret = btrfs_fix_block_accounting(trans, root);
11001 if (ret)
11002 goto repair_abort;
11003 ret = btrfs_commit_transaction(trans, root);
11004 if (ret)
11005 goto repair_abort;
11007 return ret;
11010 if (err)
11011 err = -EIO;
11012 return err;
11015 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
11017 u64 stripe_size;
11019 if (type & BTRFS_BLOCK_GROUP_RAID0) {
11020 stripe_size = length;
11021 stripe_size /= num_stripes;
11022 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
11023 stripe_size = length * 2;
11024 stripe_size /= num_stripes;
11025 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
11026 stripe_size = length;
11027 stripe_size /= (num_stripes - 1);
11028 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
11029 stripe_size = length;
11030 stripe_size /= (num_stripes - 2);
11031 } else {
11032 stripe_size = length;
11034 return stripe_size;
11038 * Check the chunk with its block group/dev list ref:
11039 * Return 0 if all refs seems valid.
11040 * Return 1 if part of refs seems valid, need later check for rebuild ref
11041 * like missing block group and needs to search extent tree to rebuild them.
11042 * Return -1 if essential refs are missing and unable to rebuild.
11044 static int check_chunk_refs(struct chunk_record *chunk_rec,
11045 struct block_group_tree *block_group_cache,
11046 struct device_extent_tree *dev_extent_cache,
11047 int silent)
11049 struct cache_extent *block_group_item;
11050 struct block_group_record *block_group_rec;
11051 struct cache_extent *dev_extent_item;
11052 struct device_extent_record *dev_extent_rec;
11053 u64 devid;
11054 u64 offset;
11055 u64 length;
11056 int metadump_v2 = 0;
11057 int i;
11058 int ret = 0;
11060 block_group_item = lookup_cache_extent(&block_group_cache->tree,
11061 chunk_rec->offset,
11062 chunk_rec->length);
11063 if (block_group_item) {
11064 block_group_rec = container_of(block_group_item,
11065 struct block_group_record,
11066 cache);
11067 if (chunk_rec->length != block_group_rec->offset ||
11068 chunk_rec->offset != block_group_rec->objectid ||
11069 (!metadump_v2 &&
11070 chunk_rec->type_flags != block_group_rec->flags)) {
11071 if (!silent)
11072 fprintf(stderr,
11073 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
11074 chunk_rec->objectid,
11075 chunk_rec->type,
11076 chunk_rec->offset,
11077 chunk_rec->length,
11078 chunk_rec->offset,
11079 chunk_rec->type_flags,
11080 block_group_rec->objectid,
11081 block_group_rec->type,
11082 block_group_rec->offset,
11083 block_group_rec->offset,
11084 block_group_rec->objectid,
11085 block_group_rec->flags);
11086 ret = -1;
11087 } else {
11088 list_del_init(&block_group_rec->list);
11089 chunk_rec->bg_rec = block_group_rec;
11091 } else {
11092 if (!silent)
11093 fprintf(stderr,
11094 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
11095 chunk_rec->objectid,
11096 chunk_rec->type,
11097 chunk_rec->offset,
11098 chunk_rec->length,
11099 chunk_rec->offset,
11100 chunk_rec->type_flags);
11101 ret = 1;
11104 if (metadump_v2)
11105 return ret;
11107 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
11108 chunk_rec->num_stripes);
11109 for (i = 0; i < chunk_rec->num_stripes; ++i) {
11110 devid = chunk_rec->stripes[i].devid;
11111 offset = chunk_rec->stripes[i].offset;
11112 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
11113 devid, offset, length);
11114 if (dev_extent_item) {
11115 dev_extent_rec = container_of(dev_extent_item,
11116 struct device_extent_record,
11117 cache);
11118 if (dev_extent_rec->objectid != devid ||
11119 dev_extent_rec->offset != offset ||
11120 dev_extent_rec->chunk_offset != chunk_rec->offset ||
11121 dev_extent_rec->length != length) {
11122 if (!silent)
11123 fprintf(stderr,
11124 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
11125 chunk_rec->objectid,
11126 chunk_rec->type,
11127 chunk_rec->offset,
11128 chunk_rec->stripes[i].devid,
11129 chunk_rec->stripes[i].offset,
11130 dev_extent_rec->objectid,
11131 dev_extent_rec->offset,
11132 dev_extent_rec->length);
11133 ret = -1;
11134 } else {
11135 list_move(&dev_extent_rec->chunk_list,
11136 &chunk_rec->dextents);
11138 } else {
11139 if (!silent)
11140 fprintf(stderr,
11141 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
11142 chunk_rec->objectid,
11143 chunk_rec->type,
11144 chunk_rec->offset,
11145 chunk_rec->stripes[i].devid,
11146 chunk_rec->stripes[i].offset);
11147 ret = -1;
11150 return ret;
11153 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
11154 int check_chunks(struct cache_tree *chunk_cache,
11155 struct block_group_tree *block_group_cache,
11156 struct device_extent_tree *dev_extent_cache,
11157 struct list_head *good, struct list_head *bad,
11158 struct list_head *rebuild, int silent)
11160 struct cache_extent *chunk_item;
11161 struct chunk_record *chunk_rec;
11162 struct block_group_record *bg_rec;
11163 struct device_extent_record *dext_rec;
11164 int err;
11165 int ret = 0;
11167 chunk_item = first_cache_extent(chunk_cache);
11168 while (chunk_item) {
11169 chunk_rec = container_of(chunk_item, struct chunk_record,
11170 cache);
11171 err = check_chunk_refs(chunk_rec, block_group_cache,
11172 dev_extent_cache, silent);
11173 if (err < 0)
11174 ret = err;
11175 if (err == 0 && good)
11176 list_add_tail(&chunk_rec->list, good);
11177 if (err > 0 && rebuild)
11178 list_add_tail(&chunk_rec->list, rebuild);
11179 if (err < 0 && bad)
11180 list_add_tail(&chunk_rec->list, bad);
11181 chunk_item = next_cache_extent(chunk_item);
11184 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
11185 if (!silent)
11186 fprintf(stderr,
11187 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
11188 bg_rec->objectid,
11189 bg_rec->offset,
11190 bg_rec->flags);
11191 if (!ret)
11192 ret = 1;
11195 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
11196 chunk_list) {
11197 if (!silent)
11198 fprintf(stderr,
11199 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
11200 dext_rec->objectid,
11201 dext_rec->offset,
11202 dext_rec->length);
11203 if (!ret)
11204 ret = 1;
11206 return ret;
11210 static int check_device_used(struct device_record *dev_rec,
11211 struct device_extent_tree *dext_cache)
11213 struct cache_extent *cache;
11214 struct device_extent_record *dev_extent_rec;
11215 u64 total_byte = 0;
11217 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
11218 while (cache) {
11219 dev_extent_rec = container_of(cache,
11220 struct device_extent_record,
11221 cache);
11222 if (dev_extent_rec->objectid != dev_rec->devid)
11223 break;
11225 list_del_init(&dev_extent_rec->device_list);
11226 total_byte += dev_extent_rec->length;
11227 cache = next_cache_extent(cache);
11230 if (total_byte != dev_rec->byte_used) {
11231 fprintf(stderr,
11232 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
11233 total_byte, dev_rec->byte_used, dev_rec->objectid,
11234 dev_rec->type, dev_rec->offset);
11235 return -1;
11236 } else {
11237 return 0;
11241 /* check btrfs_dev_item -> btrfs_dev_extent */
11242 static int check_devices(struct rb_root *dev_cache,
11243 struct device_extent_tree *dev_extent_cache)
11245 struct rb_node *dev_node;
11246 struct device_record *dev_rec;
11247 struct device_extent_record *dext_rec;
11248 int err;
11249 int ret = 0;
11251 dev_node = rb_first(dev_cache);
11252 while (dev_node) {
11253 dev_rec = container_of(dev_node, struct device_record, node);
11254 err = check_device_used(dev_rec, dev_extent_cache);
11255 if (err)
11256 ret = err;
11258 dev_node = rb_next(dev_node);
11260 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
11261 device_list) {
11262 fprintf(stderr,
11263 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
11264 dext_rec->objectid, dext_rec->offset, dext_rec->length);
11265 if (!ret)
11266 ret = 1;
11268 return ret;
11271 static int add_root_item_to_list(struct list_head *head,
11272 u64 objectid, u64 bytenr, u64 last_snapshot,
11273 u8 level, u8 drop_level,
11274 struct btrfs_key *drop_key)
11277 struct root_item_record *ri_rec;
11278 ri_rec = malloc(sizeof(*ri_rec));
11279 if (!ri_rec)
11280 return -ENOMEM;
11281 ri_rec->bytenr = bytenr;
11282 ri_rec->objectid = objectid;
11283 ri_rec->level = level;
11284 ri_rec->drop_level = drop_level;
11285 ri_rec->last_snapshot = last_snapshot;
11286 if (drop_key)
11287 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
11288 list_add_tail(&ri_rec->list, head);
11290 return 0;
11293 static void free_root_item_list(struct list_head *list)
11295 struct root_item_record *ri_rec;
11297 while (!list_empty(list)) {
11298 ri_rec = list_first_entry(list, struct root_item_record,
11299 list);
11300 list_del_init(&ri_rec->list);
11301 free(ri_rec);
11305 static int deal_root_from_list(struct list_head *list,
11306 struct btrfs_root *root,
11307 struct block_info *bits,
11308 int bits_nr,
11309 struct cache_tree *pending,
11310 struct cache_tree *seen,
11311 struct cache_tree *reada,
11312 struct cache_tree *nodes,
11313 struct cache_tree *extent_cache,
11314 struct cache_tree *chunk_cache,
11315 struct rb_root *dev_cache,
11316 struct block_group_tree *block_group_cache,
11317 struct device_extent_tree *dev_extent_cache)
11319 int ret = 0;
11320 u64 last;
11322 while (!list_empty(list)) {
11323 struct root_item_record *rec;
11324 struct extent_buffer *buf;
11325 rec = list_entry(list->next,
11326 struct root_item_record, list);
11327 last = 0;
11328 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
11329 if (!extent_buffer_uptodate(buf)) {
11330 free_extent_buffer(buf);
11331 ret = -EIO;
11332 break;
11334 ret = add_root_to_pending(buf, extent_cache, pending,
11335 seen, nodes, rec->objectid);
11336 if (ret < 0)
11337 break;
11339 * To rebuild extent tree, we need deal with snapshot
11340 * one by one, otherwise we deal with node firstly which
11341 * can maximize readahead.
11343 while (1) {
11344 ret = run_next_block(root, bits, bits_nr, &last,
11345 pending, seen, reada, nodes,
11346 extent_cache, chunk_cache,
11347 dev_cache, block_group_cache,
11348 dev_extent_cache, rec);
11349 if (ret != 0)
11350 break;
11352 free_extent_buffer(buf);
11353 list_del(&rec->list);
11354 free(rec);
11355 if (ret < 0)
11356 break;
11358 while (ret >= 0) {
11359 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
11360 reada, nodes, extent_cache, chunk_cache,
11361 dev_cache, block_group_cache,
11362 dev_extent_cache, NULL);
11363 if (ret != 0) {
11364 if (ret > 0)
11365 ret = 0;
11366 break;
11369 return ret;
11372 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11374 struct rb_root dev_cache;
11375 struct cache_tree chunk_cache;
11376 struct block_group_tree block_group_cache;
11377 struct device_extent_tree dev_extent_cache;
11378 struct cache_tree extent_cache;
11379 struct cache_tree seen;
11380 struct cache_tree pending;
11381 struct cache_tree reada;
11382 struct cache_tree nodes;
11383 struct extent_io_tree excluded_extents;
11384 struct cache_tree corrupt_blocks;
11385 struct btrfs_path path;
11386 struct btrfs_key key;
11387 struct btrfs_key found_key;
11388 int ret, err = 0;
11389 struct block_info *bits;
11390 int bits_nr;
11391 struct extent_buffer *leaf;
11392 int slot;
11393 struct btrfs_root_item ri;
11394 struct list_head dropping_trees;
11395 struct list_head normal_trees;
11396 struct btrfs_root *root1;
11397 struct btrfs_root *root;
11398 u64 objectid;
11399 u8 level;
11401 root = fs_info->fs_root;
11402 dev_cache = RB_ROOT;
11403 cache_tree_init(&chunk_cache);
11404 block_group_tree_init(&block_group_cache);
11405 device_extent_tree_init(&dev_extent_cache);
11407 cache_tree_init(&extent_cache);
11408 cache_tree_init(&seen);
11409 cache_tree_init(&pending);
11410 cache_tree_init(&nodes);
11411 cache_tree_init(&reada);
11412 cache_tree_init(&corrupt_blocks);
11413 extent_io_tree_init(&excluded_extents);
11414 INIT_LIST_HEAD(&dropping_trees);
11415 INIT_LIST_HEAD(&normal_trees);
11417 if (repair) {
11418 fs_info->excluded_extents = &excluded_extents;
11419 fs_info->fsck_extent_cache = &extent_cache;
11420 fs_info->free_extent_hook = free_extent_hook;
11421 fs_info->corrupt_blocks = &corrupt_blocks;
11424 bits_nr = 1024;
11425 bits = malloc(bits_nr * sizeof(struct block_info));
11426 if (!bits) {
11427 perror("malloc");
11428 exit(1);
11431 if (ctx.progress_enabled) {
11432 ctx.tp = TASK_EXTENTS;
11433 task_start(ctx.info);
11436 again:
11437 root1 = fs_info->tree_root;
11438 level = btrfs_header_level(root1->node);
11439 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11440 root1->node->start, 0, level, 0, NULL);
11441 if (ret < 0)
11442 goto out;
11443 root1 = fs_info->chunk_root;
11444 level = btrfs_header_level(root1->node);
11445 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11446 root1->node->start, 0, level, 0, NULL);
11447 if (ret < 0)
11448 goto out;
11449 btrfs_init_path(&path);
11450 key.offset = 0;
11451 key.objectid = 0;
11452 key.type = BTRFS_ROOT_ITEM_KEY;
11453 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
11454 if (ret < 0)
11455 goto out;
11456 while(1) {
11457 leaf = path.nodes[0];
11458 slot = path.slots[0];
11459 if (slot >= btrfs_header_nritems(path.nodes[0])) {
11460 ret = btrfs_next_leaf(root, &path);
11461 if (ret != 0)
11462 break;
11463 leaf = path.nodes[0];
11464 slot = path.slots[0];
11466 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11467 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
11468 unsigned long offset;
11469 u64 last_snapshot;
11471 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
11472 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
11473 last_snapshot = btrfs_root_last_snapshot(&ri);
11474 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
11475 level = btrfs_root_level(&ri);
11476 ret = add_root_item_to_list(&normal_trees,
11477 found_key.objectid,
11478 btrfs_root_bytenr(&ri),
11479 last_snapshot, level,
11480 0, NULL);
11481 if (ret < 0)
11482 goto out;
11483 } else {
11484 level = btrfs_root_level(&ri);
11485 objectid = found_key.objectid;
11486 btrfs_disk_key_to_cpu(&found_key,
11487 &ri.drop_progress);
11488 ret = add_root_item_to_list(&dropping_trees,
11489 objectid,
11490 btrfs_root_bytenr(&ri),
11491 last_snapshot, level,
11492 ri.drop_level, &found_key);
11493 if (ret < 0)
11494 goto out;
11497 path.slots[0]++;
11499 btrfs_release_path(&path);
11502 * check_block can return -EAGAIN if it fixes something, please keep
11503 * this in mind when dealing with return values from these functions, if
11504 * we get -EAGAIN we want to fall through and restart the loop.
11506 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11507 &seen, &reada, &nodes, &extent_cache,
11508 &chunk_cache, &dev_cache, &block_group_cache,
11509 &dev_extent_cache);
11510 if (ret < 0) {
11511 if (ret == -EAGAIN)
11512 goto loop;
11513 goto out;
11515 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11516 &pending, &seen, &reada, &nodes,
11517 &extent_cache, &chunk_cache, &dev_cache,
11518 &block_group_cache, &dev_extent_cache);
11519 if (ret < 0) {
11520 if (ret == -EAGAIN)
11521 goto loop;
11522 goto out;
11525 ret = check_chunks(&chunk_cache, &block_group_cache,
11526 &dev_extent_cache, NULL, NULL, NULL, 0);
11527 if (ret) {
11528 if (ret == -EAGAIN)
11529 goto loop;
11530 err = ret;
11533 ret = check_extent_refs(root, &extent_cache);
11534 if (ret < 0) {
11535 if (ret == -EAGAIN)
11536 goto loop;
11537 goto out;
11540 ret = check_devices(&dev_cache, &dev_extent_cache);
11541 if (ret && err)
11542 ret = err;
11544 out:
11545 task_stop(ctx.info);
11546 if (repair) {
11547 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11548 extent_io_tree_cleanup(&excluded_extents);
11549 fs_info->fsck_extent_cache = NULL;
11550 fs_info->free_extent_hook = NULL;
11551 fs_info->corrupt_blocks = NULL;
11552 fs_info->excluded_extents = NULL;
11554 free(bits);
11555 free_chunk_cache_tree(&chunk_cache);
11556 free_device_cache_tree(&dev_cache);
11557 free_block_group_tree(&block_group_cache);
11558 free_device_extent_tree(&dev_extent_cache);
11559 free_extent_cache_tree(&seen);
11560 free_extent_cache_tree(&pending);
11561 free_extent_cache_tree(&reada);
11562 free_extent_cache_tree(&nodes);
11563 free_root_item_list(&normal_trees);
11564 free_root_item_list(&dropping_trees);
11565 return ret;
11566 loop:
11567 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11568 free_extent_cache_tree(&seen);
11569 free_extent_cache_tree(&pending);
11570 free_extent_cache_tree(&reada);
11571 free_extent_cache_tree(&nodes);
11572 free_chunk_cache_tree(&chunk_cache);
11573 free_block_group_tree(&block_group_cache);
11574 free_device_cache_tree(&dev_cache);
11575 free_device_extent_tree(&dev_extent_cache);
11576 free_extent_record_cache(&extent_cache);
11577 free_root_item_list(&normal_trees);
11578 free_root_item_list(&dropping_trees);
11579 extent_io_tree_cleanup(&excluded_extents);
11580 goto again;
11583 static int check_extent_inline_ref(struct extent_buffer *eb,
11584 struct btrfs_key *key, struct btrfs_extent_inline_ref *iref)
11586 int ret;
11587 u8 type = btrfs_extent_inline_ref_type(eb, iref);
11589 switch (type) {
11590 case BTRFS_TREE_BLOCK_REF_KEY:
11591 case BTRFS_EXTENT_DATA_REF_KEY:
11592 case BTRFS_SHARED_BLOCK_REF_KEY:
11593 case BTRFS_SHARED_DATA_REF_KEY:
11594 ret = 0;
11595 break;
11596 default:
11597 error("extent[%llu %u %llu] has unknown ref type: %d",
11598 key->objectid, key->type, key->offset, type);
11599 ret = UNKNOWN_TYPE;
11600 break;
11603 return ret;
11607 * Check backrefs of a tree block given by @bytenr or @eb.
11609 * @root: the root containing the @bytenr or @eb
11610 * @eb: tree block extent buffer, can be NULL
11611 * @bytenr: bytenr of the tree block to search
11612 * @level: tree level of the tree block
11613 * @owner: owner of the tree block
11615 * Return >0 for any error found and output error message
11616 * Return 0 for no error found
11618 static int check_tree_block_ref(struct btrfs_root *root,
11619 struct extent_buffer *eb, u64 bytenr,
11620 int level, u64 owner, struct node_refs *nrefs)
11622 struct btrfs_key key;
11623 struct btrfs_root *extent_root = root->fs_info->extent_root;
11624 struct btrfs_path path;
11625 struct btrfs_extent_item *ei;
11626 struct btrfs_extent_inline_ref *iref;
11627 struct extent_buffer *leaf;
11628 unsigned long end;
11629 unsigned long ptr;
11630 int slot;
11631 int skinny_level;
11632 int root_level = btrfs_header_level(root->node);
11633 int type;
11634 u32 nodesize = root->fs_info->nodesize;
11635 u32 item_size;
11636 u64 offset;
11637 int tree_reloc_root = 0;
11638 int found_ref = 0;
11639 int err = 0;
11640 int ret;
11641 int strict = 1;
11642 int parent = 0;
11644 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
11645 btrfs_header_bytenr(root->node) == bytenr)
11646 tree_reloc_root = 1;
11647 btrfs_init_path(&path);
11648 key.objectid = bytenr;
11649 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11650 key.type = BTRFS_METADATA_ITEM_KEY;
11651 else
11652 key.type = BTRFS_EXTENT_ITEM_KEY;
11653 key.offset = (u64)-1;
11655 /* Search for the backref in extent tree */
11656 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11657 if (ret < 0) {
11658 err |= BACKREF_MISSING;
11659 goto out;
11661 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11662 if (ret) {
11663 err |= BACKREF_MISSING;
11664 goto out;
11667 leaf = path.nodes[0];
11668 slot = path.slots[0];
11669 btrfs_item_key_to_cpu(leaf, &key, slot);
11671 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11673 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11674 skinny_level = (int)key.offset;
11675 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11676 } else {
11677 struct btrfs_tree_block_info *info;
11679 info = (struct btrfs_tree_block_info *)(ei + 1);
11680 skinny_level = btrfs_tree_block_level(leaf, info);
11681 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11685 if (eb) {
11686 u64 header_gen;
11687 u64 extent_gen;
11690 * Due to the feature of shared tree blocks, if the upper node
11691 * is a fs root or shared node, the extent of checked node may
11692 * not be updated until the next CoW.
11694 if (nrefs)
11695 strict = should_check_extent_strictly(root, nrefs,
11696 level);
11697 if (!(btrfs_extent_flags(leaf, ei) &
11698 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11699 error(
11700 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11701 key.objectid, nodesize,
11702 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11703 err = BACKREF_MISMATCH;
11705 header_gen = btrfs_header_generation(eb);
11706 extent_gen = btrfs_extent_generation(leaf, ei);
11707 if (header_gen != extent_gen) {
11708 error(
11709 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11710 key.objectid, nodesize, header_gen,
11711 extent_gen);
11712 err = BACKREF_MISMATCH;
11714 if (level != skinny_level) {
11715 error(
11716 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11717 key.objectid, nodesize, level, skinny_level);
11718 err = BACKREF_MISMATCH;
11720 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11721 error(
11722 "extent[%llu %u] is referred by other roots than %llu",
11723 key.objectid, nodesize, root->objectid);
11724 err = BACKREF_MISMATCH;
11729 * Iterate the extent/metadata item to find the exact backref
11731 item_size = btrfs_item_size_nr(leaf, slot);
11732 ptr = (unsigned long)iref;
11733 end = (unsigned long)ei + item_size;
11735 while (ptr < end) {
11736 iref = (struct btrfs_extent_inline_ref *)ptr;
11737 type = btrfs_extent_inline_ref_type(leaf, iref);
11738 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11740 ret = check_extent_inline_ref(leaf, &key, iref);
11741 if (ret) {
11742 err |= ret;
11743 break;
11745 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11746 if (offset == root->objectid)
11747 found_ref = 1;
11748 if (!strict && owner == offset)
11749 found_ref = 1;
11750 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11752 * Backref of tree reloc root points to itself, no need
11753 * to check backref any more.
11755 if (tree_reloc_root) {
11756 found_ref = 1;
11757 } else {
11759 * Check if the backref points to valid
11760 * referencer
11762 found_ref = !check_tree_block_ref( root, NULL,
11763 offset, level + 1, owner,
11764 NULL);
11768 if (found_ref)
11769 break;
11770 ptr += btrfs_extent_inline_ref_size(type);
11774 * Inlined extent item doesn't have what we need, check
11775 * TREE_BLOCK_REF_KEY
11777 if (!found_ref) {
11778 btrfs_release_path(&path);
11779 key.objectid = bytenr;
11780 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11781 key.offset = root->objectid;
11783 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11784 if (!ret)
11785 found_ref = 1;
11787 if (!found_ref)
11788 err |= BACKREF_MISSING;
11789 out:
11790 btrfs_release_path(&path);
11791 if (nrefs && strict &&
11792 level < root_level && nrefs->full_backref[level + 1])
11793 parent = nrefs->bytenr[level + 1];
11794 if (eb && (err & BACKREF_MISSING))
11795 error(
11796 "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11797 bytenr, nodesize, owner, level,
11798 parent ? "parent" : "root",
11799 parent ? parent : root->objectid);
11800 return err;
11804 * If @err contains BACKREF_MISSING then add extent of the
11805 * file_extent_data_item.
11807 * Returns error bits after reapir.
11809 static int repair_extent_data_item(struct btrfs_trans_handle *trans,
11810 struct btrfs_root *root,
11811 struct btrfs_path *pathp,
11812 struct node_refs *nrefs,
11813 int err)
11815 struct btrfs_file_extent_item *fi;
11816 struct btrfs_key fi_key;
11817 struct btrfs_key key;
11818 struct btrfs_extent_item *ei;
11819 struct btrfs_path path;
11820 struct btrfs_root *extent_root = root->fs_info->extent_root;
11821 struct extent_buffer *eb;
11822 u64 size;
11823 u64 disk_bytenr;
11824 u64 num_bytes;
11825 u64 parent;
11826 u64 offset;
11827 u64 extent_offset;
11828 u64 file_offset;
11829 int generation;
11830 int slot;
11831 int ret = 0;
11833 eb = pathp->nodes[0];
11834 slot = pathp->slots[0];
11835 btrfs_item_key_to_cpu(eb, &fi_key, slot);
11836 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11838 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11839 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11840 return err;
11842 file_offset = fi_key.offset;
11843 generation = btrfs_file_extent_generation(eb, fi);
11844 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11845 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11846 extent_offset = btrfs_file_extent_offset(eb, fi);
11847 offset = file_offset - extent_offset;
11849 /* now repair only adds backref */
11850 if ((err & BACKREF_MISSING) == 0)
11851 return err;
11853 /* search extent item */
11854 key.objectid = disk_bytenr;
11855 key.type = BTRFS_EXTENT_ITEM_KEY;
11856 key.offset = num_bytes;
11858 btrfs_init_path(&path);
11859 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11860 if (ret < 0) {
11861 ret = -EIO;
11862 goto out;
11865 /* insert an extent item */
11866 if (ret > 0) {
11867 key.objectid = disk_bytenr;
11868 key.type = BTRFS_EXTENT_ITEM_KEY;
11869 key.offset = num_bytes;
11870 size = sizeof(*ei);
11872 btrfs_release_path(&path);
11873 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
11874 size);
11875 if (ret)
11876 goto out;
11877 eb = path.nodes[0];
11878 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
11880 btrfs_set_extent_refs(eb, ei, 0);
11881 btrfs_set_extent_generation(eb, ei, generation);
11882 btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA);
11884 btrfs_mark_buffer_dirty(eb);
11885 ret = btrfs_update_block_group(trans, extent_root, disk_bytenr,
11886 num_bytes, 1, 0);
11887 btrfs_release_path(&path);
11890 if (nrefs->full_backref[0])
11891 parent = btrfs_header_bytenr(eb);
11892 else
11893 parent = 0;
11895 ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent,
11896 root->objectid,
11897 parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid,
11898 offset);
11899 if (ret) {
11900 error(
11901 "failed to increase extent data backref[%llu %llu] root %llu",
11902 disk_bytenr, num_bytes, root->objectid);
11903 goto out;
11904 } else {
11905 printf("Add one extent data backref [%llu %llu]\n",
11906 disk_bytenr, num_bytes);
11909 err &= ~BACKREF_MISSING;
11910 out:
11911 if (ret)
11912 error("can't repair root %llu extent data item[%llu %llu]",
11913 root->objectid, disk_bytenr, num_bytes);
11914 return err;
11918 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11920 * Return >0 any error found and output error message
11921 * Return 0 for no error found
11923 static int check_extent_data_item(struct btrfs_root *root,
11924 struct btrfs_path *pathp,
11925 struct node_refs *nrefs, int account_bytes)
11927 struct btrfs_file_extent_item *fi;
11928 struct extent_buffer *eb = pathp->nodes[0];
11929 struct btrfs_path path;
11930 struct btrfs_root *extent_root = root->fs_info->extent_root;
11931 struct btrfs_key fi_key;
11932 struct btrfs_key dbref_key;
11933 struct extent_buffer *leaf;
11934 struct btrfs_extent_item *ei;
11935 struct btrfs_extent_inline_ref *iref;
11936 struct btrfs_extent_data_ref *dref;
11937 u64 owner;
11938 u64 disk_bytenr;
11939 u64 disk_num_bytes;
11940 u64 extent_num_bytes;
11941 u64 extent_flags;
11942 u32 item_size;
11943 unsigned long end;
11944 unsigned long ptr;
11945 int type;
11946 u64 ref_root;
11947 int found_dbackref = 0;
11948 int slot = pathp->slots[0];
11949 int err = 0;
11950 int ret;
11951 int strict;
11953 btrfs_item_key_to_cpu(eb, &fi_key, slot);
11954 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11956 /* Nothing to check for hole and inline data extents */
11957 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11958 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11959 return 0;
11961 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11962 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11963 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11965 /* Check unaligned disk_num_bytes and num_bytes */
11966 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11967 error(
11968 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11969 fi_key.objectid, fi_key.offset, disk_num_bytes,
11970 root->fs_info->sectorsize);
11971 err |= BYTES_UNALIGNED;
11972 } else if (account_bytes) {
11973 data_bytes_allocated += disk_num_bytes;
11975 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11976 error(
11977 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11978 fi_key.objectid, fi_key.offset, extent_num_bytes,
11979 root->fs_info->sectorsize);
11980 err |= BYTES_UNALIGNED;
11981 } else if (account_bytes) {
11982 data_bytes_referenced += extent_num_bytes;
11984 owner = btrfs_header_owner(eb);
11986 /* Check the extent item of the file extent in extent tree */
11987 btrfs_init_path(&path);
11988 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11989 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11990 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11992 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11993 if (ret)
11994 goto out;
11996 leaf = path.nodes[0];
11997 slot = path.slots[0];
11998 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12000 extent_flags = btrfs_extent_flags(leaf, ei);
12002 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
12003 error(
12004 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
12005 disk_bytenr, disk_num_bytes,
12006 BTRFS_EXTENT_FLAG_DATA);
12007 err |= BACKREF_MISMATCH;
12010 /* Check data backref inside that extent item */
12011 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
12012 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12013 ptr = (unsigned long)iref;
12014 end = (unsigned long)ei + item_size;
12015 strict = should_check_extent_strictly(root, nrefs, -1);
12017 while (ptr < end) {
12018 iref = (struct btrfs_extent_inline_ref *)ptr;
12019 type = btrfs_extent_inline_ref_type(leaf, iref);
12020 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12022 ret = check_extent_inline_ref(leaf, &dbref_key, iref);
12023 if (ret) {
12024 err |= ret;
12025 break;
12027 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
12028 ref_root = btrfs_extent_data_ref_root(leaf, dref);
12029 if (ref_root == root->objectid)
12030 found_dbackref = 1;
12031 else if (!strict && owner == ref_root)
12032 found_dbackref = 1;
12033 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
12034 found_dbackref = !check_tree_block_ref(root, NULL,
12035 btrfs_extent_inline_ref_offset(leaf, iref),
12036 0, owner, NULL);
12039 if (found_dbackref)
12040 break;
12041 ptr += btrfs_extent_inline_ref_size(type);
12044 if (!found_dbackref) {
12045 btrfs_release_path(&path);
12047 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
12048 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
12049 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
12050 dbref_key.offset = hash_extent_data_ref(root->objectid,
12051 fi_key.objectid, fi_key.offset);
12053 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
12054 &dbref_key, &path, 0, 0);
12055 if (!ret) {
12056 found_dbackref = 1;
12057 goto out;
12060 btrfs_release_path(&path);
12063 * Neither inlined nor EXTENT_DATA_REF found, try
12064 * SHARED_DATA_REF as last chance.
12066 dbref_key.objectid = disk_bytenr;
12067 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
12068 dbref_key.offset = eb->start;
12070 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
12071 &dbref_key, &path, 0, 0);
12072 if (!ret) {
12073 found_dbackref = 1;
12074 goto out;
12078 out:
12079 if (!found_dbackref)
12080 err |= BACKREF_MISSING;
12081 btrfs_release_path(&path);
12082 if (err & BACKREF_MISSING) {
12083 error("data extent[%llu %llu] backref lost",
12084 disk_bytenr, disk_num_bytes);
12086 return err;
12090 * Get real tree block level for the case like shared block
12091 * Return >= 0 as tree level
12092 * Return <0 for error
12094 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
12096 struct extent_buffer *eb;
12097 struct btrfs_path path;
12098 struct btrfs_key key;
12099 struct btrfs_extent_item *ei;
12100 u64 flags;
12101 u64 transid;
12102 u8 backref_level;
12103 u8 header_level;
12104 int ret;
12106 /* Search extent tree for extent generation and level */
12107 key.objectid = bytenr;
12108 key.type = BTRFS_METADATA_ITEM_KEY;
12109 key.offset = (u64)-1;
12111 btrfs_init_path(&path);
12112 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
12113 if (ret < 0)
12114 goto release_out;
12115 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
12116 if (ret < 0)
12117 goto release_out;
12118 if (ret > 0) {
12119 ret = -ENOENT;
12120 goto release_out;
12123 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12124 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
12125 struct btrfs_extent_item);
12126 flags = btrfs_extent_flags(path.nodes[0], ei);
12127 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
12128 ret = -ENOENT;
12129 goto release_out;
12132 /* Get transid for later read_tree_block() check */
12133 transid = btrfs_extent_generation(path.nodes[0], ei);
12135 /* Get backref level as one source */
12136 if (key.type == BTRFS_METADATA_ITEM_KEY) {
12137 backref_level = key.offset;
12138 } else {
12139 struct btrfs_tree_block_info *info;
12141 info = (struct btrfs_tree_block_info *)(ei + 1);
12142 backref_level = btrfs_tree_block_level(path.nodes[0], info);
12144 btrfs_release_path(&path);
12146 /* Get level from tree block as an alternative source */
12147 eb = read_tree_block(fs_info, bytenr, transid);
12148 if (!extent_buffer_uptodate(eb)) {
12149 free_extent_buffer(eb);
12150 return -EIO;
12152 header_level = btrfs_header_level(eb);
12153 free_extent_buffer(eb);
12155 if (header_level != backref_level)
12156 return -EIO;
12157 return header_level;
12159 release_out:
12160 btrfs_release_path(&path);
12161 return ret;
12165 * Check if a tree block backref is valid (points to a valid tree block)
12166 * if level == -1, level will be resolved
12167 * Return >0 for any error found and print error message
12169 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
12170 u64 bytenr, int level)
12172 struct btrfs_root *root;
12173 struct btrfs_key key;
12174 struct btrfs_path path;
12175 struct extent_buffer *eb;
12176 struct extent_buffer *node;
12177 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12178 int err = 0;
12179 int ret;
12181 /* Query level for level == -1 special case */
12182 if (level == -1)
12183 level = query_tree_block_level(fs_info, bytenr);
12184 if (level < 0) {
12185 err |= REFERENCER_MISSING;
12186 goto out;
12189 key.objectid = root_id;
12190 key.type = BTRFS_ROOT_ITEM_KEY;
12191 key.offset = (u64)-1;
12193 root = btrfs_read_fs_root(fs_info, &key);
12194 if (IS_ERR(root)) {
12195 err |= REFERENCER_MISSING;
12196 goto out;
12199 /* Read out the tree block to get item/node key */
12200 eb = read_tree_block(fs_info, bytenr, 0);
12201 if (!extent_buffer_uptodate(eb)) {
12202 err |= REFERENCER_MISSING;
12203 free_extent_buffer(eb);
12204 goto out;
12207 /* Empty tree, no need to check key */
12208 if (!btrfs_header_nritems(eb) && !level) {
12209 free_extent_buffer(eb);
12210 goto out;
12213 if (level)
12214 btrfs_node_key_to_cpu(eb, &key, 0);
12215 else
12216 btrfs_item_key_to_cpu(eb, &key, 0);
12218 free_extent_buffer(eb);
12220 btrfs_init_path(&path);
12221 path.lowest_level = level;
12222 /* Search with the first key, to ensure we can reach it */
12223 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12224 if (ret < 0) {
12225 err |= REFERENCER_MISSING;
12226 goto release_out;
12229 node = path.nodes[level];
12230 if (btrfs_header_bytenr(node) != bytenr) {
12231 error(
12232 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
12233 bytenr, nodesize, bytenr,
12234 btrfs_header_bytenr(node));
12235 err |= REFERENCER_MISMATCH;
12237 if (btrfs_header_level(node) != level) {
12238 error(
12239 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
12240 bytenr, nodesize, level,
12241 btrfs_header_level(node));
12242 err |= REFERENCER_MISMATCH;
12245 release_out:
12246 btrfs_release_path(&path);
12247 out:
12248 if (err & REFERENCER_MISSING) {
12249 if (level < 0)
12250 error("extent [%llu %d] lost referencer (owner: %llu)",
12251 bytenr, nodesize, root_id);
12252 else
12253 error(
12254 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
12255 bytenr, nodesize, root_id, level);
12258 return err;
12262 * Check if tree block @eb is tree reloc root.
12263 * Return 0 if it's not or any problem happens
12264 * Return 1 if it's a tree reloc root
12266 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
12267 struct extent_buffer *eb)
12269 struct btrfs_root *tree_reloc_root;
12270 struct btrfs_key key;
12271 u64 bytenr = btrfs_header_bytenr(eb);
12272 u64 owner = btrfs_header_owner(eb);
12273 int ret = 0;
12275 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12276 key.offset = owner;
12277 key.type = BTRFS_ROOT_ITEM_KEY;
12279 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
12280 if (IS_ERR(tree_reloc_root))
12281 return 0;
12283 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
12284 ret = 1;
12285 btrfs_free_fs_root(tree_reloc_root);
12286 return ret;
12290 * Check referencer for shared block backref
12291 * If level == -1, this function will resolve the level.
12293 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
12294 u64 parent, u64 bytenr, int level)
12296 struct extent_buffer *eb;
12297 u32 nr;
12298 int found_parent = 0;
12299 int i;
12301 eb = read_tree_block(fs_info, parent, 0);
12302 if (!extent_buffer_uptodate(eb))
12303 goto out;
12305 if (level == -1)
12306 level = query_tree_block_level(fs_info, bytenr);
12307 if (level < 0)
12308 goto out;
12310 /* It's possible it's a tree reloc root */
12311 if (parent == bytenr) {
12312 if (is_tree_reloc_root(fs_info, eb))
12313 found_parent = 1;
12314 goto out;
12317 if (level + 1 != btrfs_header_level(eb))
12318 goto out;
12320 nr = btrfs_header_nritems(eb);
12321 for (i = 0; i < nr; i++) {
12322 if (bytenr == btrfs_node_blockptr(eb, i)) {
12323 found_parent = 1;
12324 break;
12327 out:
12328 free_extent_buffer(eb);
12329 if (!found_parent) {
12330 error(
12331 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
12332 bytenr, fs_info->nodesize, parent, level);
12333 return REFERENCER_MISSING;
12335 return 0;
12339 * Check referencer for normal (inlined) data ref
12340 * If len == 0, it will be resolved by searching in extent tree
12342 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
12343 u64 root_id, u64 objectid, u64 offset,
12344 u64 bytenr, u64 len, u32 count)
12346 struct btrfs_root *root;
12347 struct btrfs_root *extent_root = fs_info->extent_root;
12348 struct btrfs_key key;
12349 struct btrfs_path path;
12350 struct extent_buffer *leaf;
12351 struct btrfs_file_extent_item *fi;
12352 u32 found_count = 0;
12353 int slot;
12354 int ret = 0;
12356 if (!len) {
12357 key.objectid = bytenr;
12358 key.type = BTRFS_EXTENT_ITEM_KEY;
12359 key.offset = (u64)-1;
12361 btrfs_init_path(&path);
12362 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12363 if (ret < 0)
12364 goto out;
12365 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
12366 if (ret)
12367 goto out;
12368 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12369 if (key.objectid != bytenr ||
12370 key.type != BTRFS_EXTENT_ITEM_KEY)
12371 goto out;
12372 len = key.offset;
12373 btrfs_release_path(&path);
12375 key.objectid = root_id;
12376 key.type = BTRFS_ROOT_ITEM_KEY;
12377 key.offset = (u64)-1;
12378 btrfs_init_path(&path);
12380 root = btrfs_read_fs_root(fs_info, &key);
12381 if (IS_ERR(root))
12382 goto out;
12384 key.objectid = objectid;
12385 key.type = BTRFS_EXTENT_DATA_KEY;
12387 * It can be nasty as data backref offset is
12388 * file offset - file extent offset, which is smaller or
12389 * equal to original backref offset. The only special case is
12390 * overflow. So we need to special check and do further search.
12392 key.offset = offset & (1ULL << 63) ? 0 : offset;
12394 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12395 if (ret < 0)
12396 goto out;
12399 * Search afterwards to get correct one
12400 * NOTE: As we must do a comprehensive check on the data backref to
12401 * make sure the dref count also matches, we must iterate all file
12402 * extents for that inode.
12404 while (1) {
12405 leaf = path.nodes[0];
12406 slot = path.slots[0];
12408 if (slot >= btrfs_header_nritems(leaf))
12409 goto next;
12410 btrfs_item_key_to_cpu(leaf, &key, slot);
12411 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
12412 break;
12413 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
12415 * Except normal disk bytenr and disk num bytes, we still
12416 * need to do extra check on dbackref offset as
12417 * dbackref offset = file_offset - file_extent_offset
12419 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
12420 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
12421 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
12422 offset)
12423 found_count++;
12425 next:
12426 ret = btrfs_next_item(root, &path);
12427 if (ret)
12428 break;
12430 out:
12431 btrfs_release_path(&path);
12432 if (found_count != count) {
12433 error(
12434 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
12435 bytenr, len, root_id, objectid, offset, count, found_count);
12436 return REFERENCER_MISSING;
12438 return 0;
12442 * Check if the referencer of a shared data backref exists
12444 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
12445 u64 parent, u64 bytenr)
12447 struct extent_buffer *eb;
12448 struct btrfs_key key;
12449 struct btrfs_file_extent_item *fi;
12450 u32 nr;
12451 int found_parent = 0;
12452 int i;
12454 eb = read_tree_block(fs_info, parent, 0);
12455 if (!extent_buffer_uptodate(eb))
12456 goto out;
12458 nr = btrfs_header_nritems(eb);
12459 for (i = 0; i < nr; i++) {
12460 btrfs_item_key_to_cpu(eb, &key, i);
12461 if (key.type != BTRFS_EXTENT_DATA_KEY)
12462 continue;
12464 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
12465 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
12466 continue;
12468 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
12469 found_parent = 1;
12470 break;
12474 out:
12475 free_extent_buffer(eb);
12476 if (!found_parent) {
12477 error("shared extent %llu referencer lost (parent: %llu)",
12478 bytenr, parent);
12479 return REFERENCER_MISSING;
12481 return 0;
12485 * Only delete backref if REFERENCER_MISSING now
12487 * Returns <0 the extent was deleted
12488 * Returns >0 the backref was deleted but extent still exists, returned value
12489 * means error after repair
12490 * Returns 0 nothing happened
12492 static int repair_extent_item(struct btrfs_trans_handle *trans,
12493 struct btrfs_root *root, struct btrfs_path *path,
12494 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
12495 u64 owner, u64 offset, int err)
12497 struct btrfs_key old_key;
12498 int freed = 0;
12499 int ret;
12501 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
12503 if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) {
12504 /* delete the backref */
12505 ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr,
12506 num_bytes, parent, root_objectid, owner, offset);
12507 if (!ret) {
12508 freed = 1;
12509 err &= ~REFERENCER_MISSING;
12510 printf("Delete backref in extent [%llu %llu]\n",
12511 bytenr, num_bytes);
12512 } else {
12513 error("fail to delete backref in extent [%llu %llu]",
12514 bytenr, num_bytes);
12518 /* btrfs_free_extent may delete the extent */
12519 btrfs_release_path(path);
12520 ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
12522 if (ret)
12523 ret = -ENOENT;
12524 else if (freed)
12525 ret = err;
12526 return ret;
12530 * This function will check a given extent item, including its backref and
12531 * itself (like crossing stripe boundary and type)
12533 * Since we don't use extent_record anymore, introduce new error bit
12535 static int check_extent_item(struct btrfs_trans_handle *trans,
12536 struct btrfs_fs_info *fs_info,
12537 struct btrfs_path *path)
12539 struct btrfs_extent_item *ei;
12540 struct btrfs_extent_inline_ref *iref;
12541 struct btrfs_extent_data_ref *dref;
12542 struct extent_buffer *eb = path->nodes[0];
12543 unsigned long end;
12544 unsigned long ptr;
12545 int slot = path->slots[0];
12546 int type;
12547 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12548 u32 item_size = btrfs_item_size_nr(eb, slot);
12549 u64 flags;
12550 u64 offset;
12551 u64 parent;
12552 u64 num_bytes;
12553 u64 root_objectid;
12554 u64 owner;
12555 u64 owner_offset;
12556 int metadata = 0;
12557 int level;
12558 struct btrfs_key key;
12559 int ret;
12560 int err = 0;
12562 btrfs_item_key_to_cpu(eb, &key, slot);
12563 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
12564 bytes_used += key.offset;
12565 num_bytes = key.offset;
12566 } else {
12567 bytes_used += nodesize;
12568 num_bytes = nodesize;
12571 if (item_size < sizeof(*ei)) {
12573 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12574 * old thing when on disk format is still un-determined.
12575 * No need to care about it anymore
12577 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12578 return -ENOTTY;
12581 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12582 flags = btrfs_extent_flags(eb, ei);
12584 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12585 metadata = 1;
12586 if (metadata && check_crossing_stripes(global_info, key.objectid,
12587 eb->len)) {
12588 error("bad metadata [%llu, %llu) crossing stripe boundary",
12589 key.objectid, key.objectid + nodesize);
12590 err |= CROSSING_STRIPE_BOUNDARY;
12593 ptr = (unsigned long)(ei + 1);
12595 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12596 /* Old EXTENT_ITEM metadata */
12597 struct btrfs_tree_block_info *info;
12599 info = (struct btrfs_tree_block_info *)ptr;
12600 level = btrfs_tree_block_level(eb, info);
12601 ptr += sizeof(struct btrfs_tree_block_info);
12602 } else {
12603 /* New METADATA_ITEM */
12604 level = key.offset;
12606 end = (unsigned long)ei + item_size;
12608 next:
12609 /* Reached extent item end normally */
12610 if (ptr == end)
12611 goto out;
12613 /* Beyond extent item end, wrong item size */
12614 if (ptr > end) {
12615 err |= ITEM_SIZE_MISMATCH;
12616 error("extent item at bytenr %llu slot %d has wrong size",
12617 eb->start, slot);
12618 goto out;
12621 parent = 0;
12622 root_objectid = 0;
12623 owner = 0;
12624 owner_offset = 0;
12625 /* Now check every backref in this extent item */
12626 iref = (struct btrfs_extent_inline_ref *)ptr;
12627 type = btrfs_extent_inline_ref_type(eb, iref);
12628 offset = btrfs_extent_inline_ref_offset(eb, iref);
12629 switch (type) {
12630 case BTRFS_TREE_BLOCK_REF_KEY:
12631 root_objectid = offset;
12632 owner = level;
12633 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12634 level);
12635 err |= ret;
12636 break;
12637 case BTRFS_SHARED_BLOCK_REF_KEY:
12638 parent = offset;
12639 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12640 level);
12641 err |= ret;
12642 break;
12643 case BTRFS_EXTENT_DATA_REF_KEY:
12644 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12645 root_objectid = btrfs_extent_data_ref_root(eb, dref);
12646 owner = btrfs_extent_data_ref_objectid(eb, dref);
12647 owner_offset = btrfs_extent_data_ref_offset(eb, dref);
12648 ret = check_extent_data_backref(fs_info, root_objectid, owner,
12649 owner_offset, key.objectid, key.offset,
12650 btrfs_extent_data_ref_count(eb, dref));
12651 err |= ret;
12652 break;
12653 case BTRFS_SHARED_DATA_REF_KEY:
12654 parent = offset;
12655 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12656 err |= ret;
12657 break;
12658 default:
12659 error("extent[%llu %d %llu] has unknown ref type: %d",
12660 key.objectid, key.type, key.offset, type);
12661 ret = UNKNOWN_TYPE;
12662 err |= ret;
12663 goto out;
12666 if (err && repair) {
12667 ret = repair_extent_item(trans, fs_info->extent_root, path,
12668 key.objectid, num_bytes, parent, root_objectid,
12669 owner, owner_offset, ret);
12670 if (ret < 0)
12671 goto out;
12672 if (ret) {
12673 goto next;
12674 err = ret;
12678 ptr += btrfs_extent_inline_ref_size(type);
12679 goto next;
12681 out:
12682 return err;
12686 * Check if a dev extent item is referred correctly by its chunk
12688 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12689 struct extent_buffer *eb, int slot)
12691 struct btrfs_root *chunk_root = fs_info->chunk_root;
12692 struct btrfs_dev_extent *ptr;
12693 struct btrfs_path path;
12694 struct btrfs_key chunk_key;
12695 struct btrfs_key devext_key;
12696 struct btrfs_chunk *chunk;
12697 struct extent_buffer *l;
12698 int num_stripes;
12699 u64 length;
12700 int i;
12701 int found_chunk = 0;
12702 int ret;
12704 btrfs_item_key_to_cpu(eb, &devext_key, slot);
12705 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12706 length = btrfs_dev_extent_length(eb, ptr);
12708 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12709 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12710 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12712 btrfs_init_path(&path);
12713 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12714 if (ret)
12715 goto out;
12717 l = path.nodes[0];
12718 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12719 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12720 chunk_key.offset);
12721 if (ret < 0)
12722 goto out;
12724 if (btrfs_stripe_length(fs_info, l, chunk) != length)
12725 goto out;
12727 num_stripes = btrfs_chunk_num_stripes(l, chunk);
12728 for (i = 0; i < num_stripes; i++) {
12729 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12730 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12732 if (devid == devext_key.objectid &&
12733 offset == devext_key.offset) {
12734 found_chunk = 1;
12735 break;
12738 out:
12739 btrfs_release_path(&path);
12740 if (!found_chunk) {
12741 error(
12742 "device extent[%llu, %llu, %llu] did not find the related chunk",
12743 devext_key.objectid, devext_key.offset, length);
12744 return REFERENCER_MISSING;
12746 return 0;
12750 * Check if the used space is correct with the dev item
12752 static int check_dev_item(struct btrfs_fs_info *fs_info,
12753 struct extent_buffer *eb, int slot)
12755 struct btrfs_root *dev_root = fs_info->dev_root;
12756 struct btrfs_dev_item *dev_item;
12757 struct btrfs_path path;
12758 struct btrfs_key key;
12759 struct btrfs_dev_extent *ptr;
12760 u64 dev_id;
12761 u64 used;
12762 u64 total = 0;
12763 int ret;
12765 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12766 dev_id = btrfs_device_id(eb, dev_item);
12767 used = btrfs_device_bytes_used(eb, dev_item);
12769 key.objectid = dev_id;
12770 key.type = BTRFS_DEV_EXTENT_KEY;
12771 key.offset = 0;
12773 btrfs_init_path(&path);
12774 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12775 if (ret < 0) {
12776 btrfs_item_key_to_cpu(eb, &key, slot);
12777 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12778 key.objectid, key.type, key.offset);
12779 btrfs_release_path(&path);
12780 return REFERENCER_MISSING;
12783 /* Iterate dev_extents to calculate the used space of a device */
12784 while (1) {
12785 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12786 goto next;
12788 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12789 if (key.objectid > dev_id)
12790 break;
12791 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12792 goto next;
12794 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12795 struct btrfs_dev_extent);
12796 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12797 next:
12798 ret = btrfs_next_item(dev_root, &path);
12799 if (ret)
12800 break;
12802 btrfs_release_path(&path);
12804 if (used != total) {
12805 btrfs_item_key_to_cpu(eb, &key, slot);
12806 error(
12807 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12808 total, used, BTRFS_ROOT_TREE_OBJECTID,
12809 BTRFS_DEV_EXTENT_KEY, dev_id);
12810 return ACCOUNTING_MISMATCH;
12812 return 0;
12816 * Check a block group item with its referener (chunk) and its used space
12817 * with extent/metadata item
12819 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12820 struct extent_buffer *eb, int slot)
12822 struct btrfs_root *extent_root = fs_info->extent_root;
12823 struct btrfs_root *chunk_root = fs_info->chunk_root;
12824 struct btrfs_block_group_item *bi;
12825 struct btrfs_block_group_item bg_item;
12826 struct btrfs_path path;
12827 struct btrfs_key bg_key;
12828 struct btrfs_key chunk_key;
12829 struct btrfs_key extent_key;
12830 struct btrfs_chunk *chunk;
12831 struct extent_buffer *leaf;
12832 struct btrfs_extent_item *ei;
12833 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12834 u64 flags;
12835 u64 bg_flags;
12836 u64 used;
12837 u64 total = 0;
12838 int ret;
12839 int err = 0;
12841 btrfs_item_key_to_cpu(eb, &bg_key, slot);
12842 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12843 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12844 used = btrfs_block_group_used(&bg_item);
12845 bg_flags = btrfs_block_group_flags(&bg_item);
12847 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12848 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12849 chunk_key.offset = bg_key.objectid;
12851 btrfs_init_path(&path);
12852 /* Search for the referencer chunk */
12853 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12854 if (ret) {
12855 error(
12856 "block group[%llu %llu] did not find the related chunk item",
12857 bg_key.objectid, bg_key.offset);
12858 err |= REFERENCER_MISSING;
12859 } else {
12860 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12861 struct btrfs_chunk);
12862 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12863 bg_key.offset) {
12864 error(
12865 "block group[%llu %llu] related chunk item length does not match",
12866 bg_key.objectid, bg_key.offset);
12867 err |= REFERENCER_MISMATCH;
12870 btrfs_release_path(&path);
12872 /* Search from the block group bytenr */
12873 extent_key.objectid = bg_key.objectid;
12874 extent_key.type = 0;
12875 extent_key.offset = 0;
12877 btrfs_init_path(&path);
12878 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12879 if (ret < 0)
12880 goto out;
12882 /* Iterate extent tree to account used space */
12883 while (1) {
12884 leaf = path.nodes[0];
12886 /* Search slot can point to the last item beyond leaf nritems */
12887 if (path.slots[0] >= btrfs_header_nritems(leaf))
12888 goto next;
12890 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12891 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12892 break;
12894 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12895 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12896 goto next;
12897 if (extent_key.objectid < bg_key.objectid)
12898 goto next;
12900 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12901 total += nodesize;
12902 else
12903 total += extent_key.offset;
12905 ei = btrfs_item_ptr(leaf, path.slots[0],
12906 struct btrfs_extent_item);
12907 flags = btrfs_extent_flags(leaf, ei);
12908 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12909 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12910 error(
12911 "bad extent[%llu, %llu) type mismatch with chunk",
12912 extent_key.objectid,
12913 extent_key.objectid + extent_key.offset);
12914 err |= CHUNK_TYPE_MISMATCH;
12916 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12917 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12918 BTRFS_BLOCK_GROUP_METADATA))) {
12919 error(
12920 "bad extent[%llu, %llu) type mismatch with chunk",
12921 extent_key.objectid,
12922 extent_key.objectid + nodesize);
12923 err |= CHUNK_TYPE_MISMATCH;
12926 next:
12927 ret = btrfs_next_item(extent_root, &path);
12928 if (ret)
12929 break;
12932 out:
12933 btrfs_release_path(&path);
12935 if (total != used) {
12936 error(
12937 "block group[%llu %llu] used %llu but extent items used %llu",
12938 bg_key.objectid, bg_key.offset, used, total);
12939 err |= BG_ACCOUNTING_ERROR;
12941 return err;
12945 * Add block group item to the extent tree if @err contains REFERENCER_MISSING.
12946 * FIXME: We still need to repair error of dev_item.
12948 * Returns error after repair.
12950 static int repair_chunk_item(struct btrfs_trans_handle *trans,
12951 struct btrfs_root *chunk_root,
12952 struct btrfs_path *path, int err)
12954 struct btrfs_chunk *chunk;
12955 struct btrfs_key chunk_key;
12956 struct extent_buffer *eb = path->nodes[0];
12957 u64 length;
12958 int slot = path->slots[0];
12959 u64 type;
12960 int ret = 0;
12962 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12963 if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
12964 return err;
12965 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12966 type = btrfs_chunk_type(path->nodes[0], chunk);
12967 length = btrfs_chunk_length(eb, chunk);
12969 if (err & REFERENCER_MISSING) {
12970 ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0,
12971 type, chunk_key.objectid, chunk_key.offset, length);
12972 if (ret) {
12973 error("fail to add block group item[%llu %llu]",
12974 chunk_key.offset, length);
12975 goto out;
12976 } else {
12977 err &= ~REFERENCER_MISSING;
12978 printf("Added block group item[%llu %llu]\n",
12979 chunk_key.offset, length);
12983 out:
12984 return err;
12988 * Check a chunk item.
12989 * Including checking all referred dev_extents and block group
12991 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12992 struct extent_buffer *eb, int slot)
12994 struct btrfs_root *extent_root = fs_info->extent_root;
12995 struct btrfs_root *dev_root = fs_info->dev_root;
12996 struct btrfs_path path;
12997 struct btrfs_key chunk_key;
12998 struct btrfs_key bg_key;
12999 struct btrfs_key devext_key;
13000 struct btrfs_chunk *chunk;
13001 struct extent_buffer *leaf;
13002 struct btrfs_block_group_item *bi;
13003 struct btrfs_block_group_item bg_item;
13004 struct btrfs_dev_extent *ptr;
13005 u64 length;
13006 u64 chunk_end;
13007 u64 stripe_len;
13008 u64 type;
13009 int num_stripes;
13010 u64 offset;
13011 u64 objectid;
13012 int i;
13013 int ret;
13014 int err = 0;
13016 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
13017 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
13018 length = btrfs_chunk_length(eb, chunk);
13019 chunk_end = chunk_key.offset + length;
13020 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
13021 chunk_key.offset);
13022 if (ret < 0) {
13023 error("chunk[%llu %llu) is invalid", chunk_key.offset,
13024 chunk_end);
13025 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
13026 goto out;
13028 type = btrfs_chunk_type(eb, chunk);
13030 bg_key.objectid = chunk_key.offset;
13031 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
13032 bg_key.offset = length;
13034 btrfs_init_path(&path);
13035 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
13036 if (ret) {
13037 error(
13038 "chunk[%llu %llu) did not find the related block group item",
13039 chunk_key.offset, chunk_end);
13040 err |= REFERENCER_MISSING;
13041 } else{
13042 leaf = path.nodes[0];
13043 bi = btrfs_item_ptr(leaf, path.slots[0],
13044 struct btrfs_block_group_item);
13045 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
13046 sizeof(bg_item));
13047 if (btrfs_block_group_flags(&bg_item) != type) {
13048 error(
13049 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
13050 chunk_key.offset, chunk_end, type,
13051 btrfs_block_group_flags(&bg_item));
13052 err |= REFERENCER_MISSING;
13056 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
13057 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
13058 for (i = 0; i < num_stripes; i++) {
13059 btrfs_release_path(&path);
13060 btrfs_init_path(&path);
13061 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
13062 devext_key.type = BTRFS_DEV_EXTENT_KEY;
13063 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
13065 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
13066 0, 0);
13067 if (ret)
13068 goto not_match_dev;
13070 leaf = path.nodes[0];
13071 ptr = btrfs_item_ptr(leaf, path.slots[0],
13072 struct btrfs_dev_extent);
13073 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
13074 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
13075 if (objectid != chunk_key.objectid ||
13076 offset != chunk_key.offset ||
13077 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
13078 goto not_match_dev;
13079 continue;
13080 not_match_dev:
13081 err |= BACKREF_MISSING;
13082 error(
13083 "chunk[%llu %llu) stripe %d did not find the related dev extent",
13084 chunk_key.objectid, chunk_end, i);
13085 continue;
13087 btrfs_release_path(&path);
13088 out:
13089 return err;
13092 static int delete_extent_tree_item(struct btrfs_trans_handle *trans,
13093 struct btrfs_root *root,
13094 struct btrfs_path *path)
13096 struct btrfs_key key;
13097 int ret = 0;
13099 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
13100 btrfs_release_path(path);
13101 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
13102 if (ret) {
13103 ret = -ENOENT;
13104 goto out;
13107 ret = btrfs_del_item(trans, root, path);
13108 if (ret)
13109 goto out;
13111 if (path->slots[0] == 0)
13112 btrfs_prev_leaf(root, path);
13113 else
13114 path->slots[0]--;
13115 out:
13116 if (ret)
13117 error("failed to delete root %llu item[%llu, %u, %llu]",
13118 root->objectid, key.objectid, key.type, key.offset);
13119 else
13120 printf("Deleted root %llu item[%llu, %u, %llu]\n",
13121 root->objectid, key.objectid, key.type, key.offset);
13122 return ret;
13126 * Main entry function to check known items and update related accounting info
13128 static int check_leaf_items(struct btrfs_trans_handle *trans,
13129 struct btrfs_root *root, struct btrfs_path *path,
13130 struct node_refs *nrefs, int account_bytes)
13132 struct btrfs_fs_info *fs_info = root->fs_info;
13133 struct btrfs_key key;
13134 struct extent_buffer *eb;
13135 int slot;
13136 int type;
13137 struct btrfs_extent_data_ref *dref;
13138 int ret = 0;
13139 int err = 0;
13141 again:
13142 eb = path->nodes[0];
13143 slot = path->slots[0];
13144 if (slot >= btrfs_header_nritems(eb)) {
13145 if (slot == 0) {
13146 error("empty leaf [%llu %u] root %llu", eb->start,
13147 root->fs_info->nodesize, root->objectid);
13148 err |= EIO;
13150 goto out;
13153 btrfs_item_key_to_cpu(eb, &key, slot);
13154 type = key.type;
13156 switch (type) {
13157 case BTRFS_EXTENT_DATA_KEY:
13158 ret = check_extent_data_item(root, path, nrefs, account_bytes);
13159 if (repair && ret)
13160 ret = repair_extent_data_item(trans, root, path, nrefs,
13161 ret);
13162 err |= ret;
13163 break;
13164 case BTRFS_BLOCK_GROUP_ITEM_KEY:
13165 ret = check_block_group_item(fs_info, eb, slot);
13166 if (repair &&
13167 ret & REFERENCER_MISSING)
13168 ret = delete_extent_tree_item(trans, root, path);
13169 err |= ret;
13170 break;
13171 case BTRFS_DEV_ITEM_KEY:
13172 ret = check_dev_item(fs_info, eb, slot);
13173 err |= ret;
13174 break;
13175 case BTRFS_CHUNK_ITEM_KEY:
13176 ret = check_chunk_item(fs_info, eb, slot);
13177 if (repair && ret)
13178 ret = repair_chunk_item(trans, root, path, ret);
13179 err |= ret;
13180 break;
13181 case BTRFS_DEV_EXTENT_KEY:
13182 ret = check_dev_extent_item(fs_info, eb, slot);
13183 err |= ret;
13184 break;
13185 case BTRFS_EXTENT_ITEM_KEY:
13186 case BTRFS_METADATA_ITEM_KEY:
13187 ret = check_extent_item(trans, fs_info, path);
13188 err |= ret;
13189 break;
13190 case BTRFS_EXTENT_CSUM_KEY:
13191 total_csum_bytes += btrfs_item_size_nr(eb, slot);
13192 err |= ret;
13193 break;
13194 case BTRFS_TREE_BLOCK_REF_KEY:
13195 ret = check_tree_block_backref(fs_info, key.offset,
13196 key.objectid, -1);
13197 if (repair &&
13198 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13199 ret = delete_extent_tree_item(trans, root, path);
13200 err |= ret;
13201 break;
13202 case BTRFS_EXTENT_DATA_REF_KEY:
13203 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
13204 ret = check_extent_data_backref(fs_info,
13205 btrfs_extent_data_ref_root(eb, dref),
13206 btrfs_extent_data_ref_objectid(eb, dref),
13207 btrfs_extent_data_ref_offset(eb, dref),
13208 key.objectid, 0,
13209 btrfs_extent_data_ref_count(eb, dref));
13210 if (repair &&
13211 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13212 ret = delete_extent_tree_item(trans, root, path);
13213 err |= ret;
13214 break;
13215 case BTRFS_SHARED_BLOCK_REF_KEY:
13216 ret = check_shared_block_backref(fs_info, key.offset,
13217 key.objectid, -1);
13218 if (repair &&
13219 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13220 ret = delete_extent_tree_item(trans, root, path);
13221 err |= ret;
13222 break;
13223 case BTRFS_SHARED_DATA_REF_KEY:
13224 ret = check_shared_data_backref(fs_info, key.offset,
13225 key.objectid);
13226 if (repair &&
13227 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13228 ret = delete_extent_tree_item(trans, root, path);
13229 err |= ret;
13230 break;
13231 default:
13232 break;
13235 ++path->slots[0];
13236 goto again;
13237 out:
13238 return err;
13241 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info);
13244 * Low memory usage version check_chunks_and_extents.
13246 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
13248 struct btrfs_trans_handle *trans = NULL;
13249 struct btrfs_path path;
13250 struct btrfs_key old_key;
13251 struct btrfs_key key;
13252 struct btrfs_root *root1;
13253 struct btrfs_root *root;
13254 struct btrfs_root *cur_root;
13255 int err = 0;
13256 int ret;
13258 root = fs_info->fs_root;
13260 if (repair) {
13261 /* pin every tree block to avoid extent overwrite */
13262 ret = pin_metadata_blocks(fs_info);
13263 if (ret) {
13264 error("failed to pin metadata blocks");
13265 return ret;
13267 trans = btrfs_start_transaction(fs_info->extent_root, 1);
13268 if (IS_ERR(trans)) {
13269 error("failed to start transaction before check");
13270 return PTR_ERR(trans);
13274 root1 = root->fs_info->chunk_root;
13275 ret = check_btrfs_root(trans, root1, 0, 1);
13276 err |= ret;
13278 root1 = root->fs_info->tree_root;
13279 ret = check_btrfs_root(trans, root1, 0, 1);
13280 err |= ret;
13282 btrfs_init_path(&path);
13283 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
13284 key.offset = 0;
13285 key.type = BTRFS_ROOT_ITEM_KEY;
13287 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
13288 if (ret) {
13289 error("cannot find extent tree in tree_root");
13290 goto out;
13293 while (1) {
13294 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13295 if (key.type != BTRFS_ROOT_ITEM_KEY)
13296 goto next;
13297 old_key = key;
13298 key.offset = (u64)-1;
13300 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13301 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
13302 &key);
13303 else
13304 cur_root = btrfs_read_fs_root(root->fs_info, &key);
13305 if (IS_ERR(cur_root) || !cur_root) {
13306 error("failed to read tree: %lld", key.objectid);
13307 goto next;
13310 ret = check_btrfs_root(trans, cur_root, 0, 1);
13311 err |= ret;
13313 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13314 btrfs_free_fs_root(cur_root);
13316 btrfs_release_path(&path);
13317 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
13318 &old_key, &path, 0, 0);
13319 if (ret)
13320 goto out;
13321 next:
13322 ret = btrfs_next_item(root1, &path);
13323 if (ret)
13324 goto out;
13326 out:
13328 /* if repair, update block accounting */
13329 if (repair) {
13330 ret = btrfs_fix_block_accounting(trans, root);
13331 if (ret)
13332 err |= ret;
13333 else
13334 err &= ~BG_ACCOUNTING_ERROR;
13337 if (trans)
13338 btrfs_commit_transaction(trans, root->fs_info->extent_root);
13340 btrfs_release_path(&path);
13342 return err;
13345 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
13347 int ret;
13349 if (!ctx.progress_enabled)
13350 fprintf(stderr, "checking extents\n");
13351 if (check_mode == CHECK_MODE_LOWMEM)
13352 ret = check_chunks_and_extents_v2(fs_info);
13353 else
13354 ret = check_chunks_and_extents(fs_info);
13356 return ret;
13359 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
13360 struct btrfs_root *root, int overwrite)
13362 struct extent_buffer *c;
13363 struct extent_buffer *old = root->node;
13364 int level;
13365 int ret;
13366 struct btrfs_disk_key disk_key = {0,0,0};
13368 level = 0;
13370 if (overwrite) {
13371 c = old;
13372 extent_buffer_get(c);
13373 goto init;
13375 c = btrfs_alloc_free_block(trans, root,
13376 root->fs_info->nodesize,
13377 root->root_key.objectid,
13378 &disk_key, level, 0, 0);
13379 if (IS_ERR(c)) {
13380 c = old;
13381 extent_buffer_get(c);
13382 overwrite = 1;
13384 init:
13385 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
13386 btrfs_set_header_level(c, level);
13387 btrfs_set_header_bytenr(c, c->start);
13388 btrfs_set_header_generation(c, trans->transid);
13389 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
13390 btrfs_set_header_owner(c, root->root_key.objectid);
13392 write_extent_buffer(c, root->fs_info->fsid,
13393 btrfs_header_fsid(), BTRFS_FSID_SIZE);
13395 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
13396 btrfs_header_chunk_tree_uuid(c),
13397 BTRFS_UUID_SIZE);
13399 btrfs_mark_buffer_dirty(c);
13401 * this case can happen in the following case:
13403 * 1.overwrite previous root.
13405 * 2.reinit reloc data root, this is because we skip pin
13406 * down reloc data tree before which means we can allocate
13407 * same block bytenr here.
13409 if (old->start == c->start) {
13410 btrfs_set_root_generation(&root->root_item,
13411 trans->transid);
13412 root->root_item.level = btrfs_header_level(root->node);
13413 ret = btrfs_update_root(trans, root->fs_info->tree_root,
13414 &root->root_key, &root->root_item);
13415 if (ret) {
13416 free_extent_buffer(c);
13417 return ret;
13420 free_extent_buffer(old);
13421 root->node = c;
13422 add_root_to_dirty_list(root);
13423 return 0;
13426 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
13427 struct extent_buffer *eb, int tree_root)
13429 struct extent_buffer *tmp;
13430 struct btrfs_root_item *ri;
13431 struct btrfs_key key;
13432 u64 bytenr;
13433 int level = btrfs_header_level(eb);
13434 int nritems;
13435 int ret;
13436 int i;
13439 * If we have pinned this block before, don't pin it again.
13440 * This can not only avoid forever loop with broken filesystem
13441 * but also give us some speedups.
13443 if (test_range_bit(&fs_info->pinned_extents, eb->start,
13444 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
13445 return 0;
13447 btrfs_pin_extent(fs_info, eb->start, eb->len);
13449 nritems = btrfs_header_nritems(eb);
13450 for (i = 0; i < nritems; i++) {
13451 if (level == 0) {
13452 btrfs_item_key_to_cpu(eb, &key, i);
13453 if (key.type != BTRFS_ROOT_ITEM_KEY)
13454 continue;
13455 /* Skip the extent root and reloc roots */
13456 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
13457 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
13458 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
13459 continue;
13460 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
13461 bytenr = btrfs_disk_root_bytenr(eb, ri);
13464 * If at any point we start needing the real root we
13465 * will have to build a stump root for the root we are
13466 * in, but for now this doesn't actually use the root so
13467 * just pass in extent_root.
13469 tmp = read_tree_block(fs_info, bytenr, 0);
13470 if (!extent_buffer_uptodate(tmp)) {
13471 fprintf(stderr, "Error reading root block\n");
13472 return -EIO;
13474 ret = pin_down_tree_blocks(fs_info, tmp, 0);
13475 free_extent_buffer(tmp);
13476 if (ret)
13477 return ret;
13478 } else {
13479 bytenr = btrfs_node_blockptr(eb, i);
13481 /* If we aren't the tree root don't read the block */
13482 if (level == 1 && !tree_root) {
13483 btrfs_pin_extent(fs_info, bytenr,
13484 fs_info->nodesize);
13485 continue;
13488 tmp = read_tree_block(fs_info, bytenr, 0);
13489 if (!extent_buffer_uptodate(tmp)) {
13490 fprintf(stderr, "Error reading tree block\n");
13491 return -EIO;
13493 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
13494 free_extent_buffer(tmp);
13495 if (ret)
13496 return ret;
13500 return 0;
13503 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
13505 int ret;
13507 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
13508 if (ret)
13509 return ret;
13511 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
13514 static int reset_block_groups(struct btrfs_fs_info *fs_info)
13516 struct btrfs_block_group_cache *cache;
13517 struct btrfs_path path;
13518 struct extent_buffer *leaf;
13519 struct btrfs_chunk *chunk;
13520 struct btrfs_key key;
13521 int ret;
13522 u64 start;
13524 btrfs_init_path(&path);
13525 key.objectid = 0;
13526 key.type = BTRFS_CHUNK_ITEM_KEY;
13527 key.offset = 0;
13528 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
13529 if (ret < 0) {
13530 btrfs_release_path(&path);
13531 return ret;
13535 * We do this in case the block groups were screwed up and had alloc
13536 * bits that aren't actually set on the chunks. This happens with
13537 * restored images every time and could happen in real life I guess.
13539 fs_info->avail_data_alloc_bits = 0;
13540 fs_info->avail_metadata_alloc_bits = 0;
13541 fs_info->avail_system_alloc_bits = 0;
13543 /* First we need to create the in-memory block groups */
13544 while (1) {
13545 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13546 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13547 if (ret < 0) {
13548 btrfs_release_path(&path);
13549 return ret;
13551 if (ret) {
13552 ret = 0;
13553 break;
13556 leaf = path.nodes[0];
13557 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13558 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13559 path.slots[0]++;
13560 continue;
13563 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13564 btrfs_add_block_group(fs_info, 0,
13565 btrfs_chunk_type(leaf, chunk),
13566 key.objectid, key.offset,
13567 btrfs_chunk_length(leaf, chunk));
13568 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13569 key.offset + btrfs_chunk_length(leaf, chunk));
13570 path.slots[0]++;
13572 start = 0;
13573 while (1) {
13574 cache = btrfs_lookup_first_block_group(fs_info, start);
13575 if (!cache)
13576 break;
13577 cache->cached = 1;
13578 start = cache->key.objectid + cache->key.offset;
13581 btrfs_release_path(&path);
13582 return 0;
13585 static int reset_balance(struct btrfs_trans_handle *trans,
13586 struct btrfs_fs_info *fs_info)
13588 struct btrfs_root *root = fs_info->tree_root;
13589 struct btrfs_path path;
13590 struct extent_buffer *leaf;
13591 struct btrfs_key key;
13592 int del_slot, del_nr = 0;
13593 int ret;
13594 int found = 0;
13596 btrfs_init_path(&path);
13597 key.objectid = BTRFS_BALANCE_OBJECTID;
13598 key.type = BTRFS_BALANCE_ITEM_KEY;
13599 key.offset = 0;
13600 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13601 if (ret) {
13602 if (ret > 0)
13603 ret = 0;
13604 if (!ret)
13605 goto reinit_data_reloc;
13606 else
13607 goto out;
13610 ret = btrfs_del_item(trans, root, &path);
13611 if (ret)
13612 goto out;
13613 btrfs_release_path(&path);
13615 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13616 key.type = BTRFS_ROOT_ITEM_KEY;
13617 key.offset = 0;
13618 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13619 if (ret < 0)
13620 goto out;
13621 while (1) {
13622 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13623 if (!found)
13624 break;
13626 if (del_nr) {
13627 ret = btrfs_del_items(trans, root, &path,
13628 del_slot, del_nr);
13629 del_nr = 0;
13630 if (ret)
13631 goto out;
13633 key.offset++;
13634 btrfs_release_path(&path);
13636 found = 0;
13637 ret = btrfs_search_slot(trans, root, &key, &path,
13638 -1, 1);
13639 if (ret < 0)
13640 goto out;
13641 continue;
13643 found = 1;
13644 leaf = path.nodes[0];
13645 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13646 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13647 break;
13648 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13649 path.slots[0]++;
13650 continue;
13652 if (!del_nr) {
13653 del_slot = path.slots[0];
13654 del_nr = 1;
13655 } else {
13656 del_nr++;
13658 path.slots[0]++;
13661 if (del_nr) {
13662 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13663 if (ret)
13664 goto out;
13666 btrfs_release_path(&path);
13668 reinit_data_reloc:
13669 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13670 key.type = BTRFS_ROOT_ITEM_KEY;
13671 key.offset = (u64)-1;
13672 root = btrfs_read_fs_root(fs_info, &key);
13673 if (IS_ERR(root)) {
13674 fprintf(stderr, "Error reading data reloc tree\n");
13675 ret = PTR_ERR(root);
13676 goto out;
13678 record_root_in_trans(trans, root);
13679 ret = btrfs_fsck_reinit_root(trans, root, 0);
13680 if (ret)
13681 goto out;
13682 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13683 out:
13684 btrfs_release_path(&path);
13685 return ret;
13688 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13689 struct btrfs_fs_info *fs_info)
13691 u64 start = 0;
13692 int ret;
13695 * The only reason we don't do this is because right now we're just
13696 * walking the trees we find and pinning down their bytes, we don't look
13697 * at any of the leaves. In order to do mixed groups we'd have to check
13698 * the leaves of any fs roots and pin down the bytes for any file
13699 * extents we find. Not hard but why do it if we don't have to?
13701 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13702 fprintf(stderr, "We don't support re-initing the extent tree "
13703 "for mixed block groups yet, please notify a btrfs "
13704 "developer you want to do this so they can add this "
13705 "functionality.\n");
13706 return -EINVAL;
13710 * first we need to walk all of the trees except the extent tree and pin
13711 * down the bytes that are in use so we don't overwrite any existing
13712 * metadata.
13714 ret = pin_metadata_blocks(fs_info);
13715 if (ret) {
13716 fprintf(stderr, "error pinning down used bytes\n");
13717 return ret;
13721 * Need to drop all the block groups since we're going to recreate all
13722 * of them again.
13724 btrfs_free_block_groups(fs_info);
13725 ret = reset_block_groups(fs_info);
13726 if (ret) {
13727 fprintf(stderr, "error resetting the block groups\n");
13728 return ret;
13731 /* Ok we can allocate now, reinit the extent root */
13732 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13733 if (ret) {
13734 fprintf(stderr, "extent root initialization failed\n");
13736 * When the transaction code is updated we should end the
13737 * transaction, but for now progs only knows about commit so
13738 * just return an error.
13740 return ret;
13744 * Now we have all the in-memory block groups setup so we can make
13745 * allocations properly, and the metadata we care about is safe since we
13746 * pinned all of it above.
13748 while (1) {
13749 struct btrfs_block_group_cache *cache;
13751 cache = btrfs_lookup_first_block_group(fs_info, start);
13752 if (!cache)
13753 break;
13754 start = cache->key.objectid + cache->key.offset;
13755 ret = btrfs_insert_item(trans, fs_info->extent_root,
13756 &cache->key, &cache->item,
13757 sizeof(cache->item));
13758 if (ret) {
13759 fprintf(stderr, "Error adding block group\n");
13760 return ret;
13762 btrfs_extent_post_op(trans, fs_info->extent_root);
13765 ret = reset_balance(trans, fs_info);
13766 if (ret)
13767 fprintf(stderr, "error resetting the pending balance\n");
13769 return ret;
13772 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13774 struct btrfs_path path;
13775 struct btrfs_trans_handle *trans;
13776 struct btrfs_key key;
13777 int ret;
13779 printf("Recowing metadata block %llu\n", eb->start);
13780 key.objectid = btrfs_header_owner(eb);
13781 key.type = BTRFS_ROOT_ITEM_KEY;
13782 key.offset = (u64)-1;
13784 root = btrfs_read_fs_root(root->fs_info, &key);
13785 if (IS_ERR(root)) {
13786 fprintf(stderr, "Couldn't find owner root %llu\n",
13787 key.objectid);
13788 return PTR_ERR(root);
13791 trans = btrfs_start_transaction(root, 1);
13792 if (IS_ERR(trans))
13793 return PTR_ERR(trans);
13795 btrfs_init_path(&path);
13796 path.lowest_level = btrfs_header_level(eb);
13797 if (path.lowest_level)
13798 btrfs_node_key_to_cpu(eb, &key, 0);
13799 else
13800 btrfs_item_key_to_cpu(eb, &key, 0);
13802 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13803 btrfs_commit_transaction(trans, root);
13804 btrfs_release_path(&path);
13805 return ret;
13808 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13810 struct btrfs_path path;
13811 struct btrfs_trans_handle *trans;
13812 struct btrfs_key key;
13813 int ret;
13815 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13816 bad->key.type, bad->key.offset);
13817 key.objectid = bad->root_id;
13818 key.type = BTRFS_ROOT_ITEM_KEY;
13819 key.offset = (u64)-1;
13821 root = btrfs_read_fs_root(root->fs_info, &key);
13822 if (IS_ERR(root)) {
13823 fprintf(stderr, "Couldn't find owner root %llu\n",
13824 key.objectid);
13825 return PTR_ERR(root);
13828 trans = btrfs_start_transaction(root, 1);
13829 if (IS_ERR(trans))
13830 return PTR_ERR(trans);
13832 btrfs_init_path(&path);
13833 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13834 if (ret) {
13835 if (ret > 0)
13836 ret = 0;
13837 goto out;
13839 ret = btrfs_del_item(trans, root, &path);
13840 out:
13841 btrfs_commit_transaction(trans, root);
13842 btrfs_release_path(&path);
13843 return ret;
13846 static int zero_log_tree(struct btrfs_root *root)
13848 struct btrfs_trans_handle *trans;
13849 int ret;
13851 trans = btrfs_start_transaction(root, 1);
13852 if (IS_ERR(trans)) {
13853 ret = PTR_ERR(trans);
13854 return ret;
13856 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13857 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13858 ret = btrfs_commit_transaction(trans, root);
13859 return ret;
13862 static int populate_csum(struct btrfs_trans_handle *trans,
13863 struct btrfs_root *csum_root, char *buf, u64 start,
13864 u64 len)
13866 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13867 u64 offset = 0;
13868 u64 sectorsize;
13869 int ret = 0;
13871 while (offset < len) {
13872 sectorsize = fs_info->sectorsize;
13873 ret = read_extent_data(fs_info, buf, start + offset,
13874 &sectorsize, 0);
13875 if (ret)
13876 break;
13877 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13878 start + offset, buf, sectorsize);
13879 if (ret)
13880 break;
13881 offset += sectorsize;
13883 return ret;
13886 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13887 struct btrfs_root *csum_root,
13888 struct btrfs_root *cur_root)
13890 struct btrfs_path path;
13891 struct btrfs_key key;
13892 struct extent_buffer *node;
13893 struct btrfs_file_extent_item *fi;
13894 char *buf = NULL;
13895 u64 start = 0;
13896 u64 len = 0;
13897 int slot = 0;
13898 int ret = 0;
13900 buf = malloc(cur_root->fs_info->sectorsize);
13901 if (!buf)
13902 return -ENOMEM;
13904 btrfs_init_path(&path);
13905 key.objectid = 0;
13906 key.offset = 0;
13907 key.type = 0;
13908 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13909 if (ret < 0)
13910 goto out;
13911 /* Iterate all regular file extents and fill its csum */
13912 while (1) {
13913 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13915 if (key.type != BTRFS_EXTENT_DATA_KEY)
13916 goto next;
13917 node = path.nodes[0];
13918 slot = path.slots[0];
13919 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13920 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13921 goto next;
13922 start = btrfs_file_extent_disk_bytenr(node, fi);
13923 len = btrfs_file_extent_disk_num_bytes(node, fi);
13925 ret = populate_csum(trans, csum_root, buf, start, len);
13926 if (ret == -EEXIST)
13927 ret = 0;
13928 if (ret < 0)
13929 goto out;
13930 next:
13932 * TODO: if next leaf is corrupted, jump to nearest next valid
13933 * leaf.
13935 ret = btrfs_next_item(cur_root, &path);
13936 if (ret < 0)
13937 goto out;
13938 if (ret > 0) {
13939 ret = 0;
13940 goto out;
13944 out:
13945 btrfs_release_path(&path);
13946 free(buf);
13947 return ret;
13950 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13951 struct btrfs_root *csum_root)
13953 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13954 struct btrfs_path path;
13955 struct btrfs_root *tree_root = fs_info->tree_root;
13956 struct btrfs_root *cur_root;
13957 struct extent_buffer *node;
13958 struct btrfs_key key;
13959 int slot = 0;
13960 int ret = 0;
13962 btrfs_init_path(&path);
13963 key.objectid = BTRFS_FS_TREE_OBJECTID;
13964 key.offset = 0;
13965 key.type = BTRFS_ROOT_ITEM_KEY;
13966 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13967 if (ret < 0)
13968 goto out;
13969 if (ret > 0) {
13970 ret = -ENOENT;
13971 goto out;
13974 while (1) {
13975 node = path.nodes[0];
13976 slot = path.slots[0];
13977 btrfs_item_key_to_cpu(node, &key, slot);
13978 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13979 goto out;
13980 if (key.type != BTRFS_ROOT_ITEM_KEY)
13981 goto next;
13982 if (!is_fstree(key.objectid))
13983 goto next;
13984 key.offset = (u64)-1;
13986 cur_root = btrfs_read_fs_root(fs_info, &key);
13987 if (IS_ERR(cur_root) || !cur_root) {
13988 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13989 key.objectid);
13990 goto out;
13992 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13993 cur_root);
13994 if (ret < 0)
13995 goto out;
13996 next:
13997 ret = btrfs_next_item(tree_root, &path);
13998 if (ret > 0) {
13999 ret = 0;
14000 goto out;
14002 if (ret < 0)
14003 goto out;
14006 out:
14007 btrfs_release_path(&path);
14008 return ret;
14011 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
14012 struct btrfs_root *csum_root)
14014 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
14015 struct btrfs_path path;
14016 struct btrfs_extent_item *ei;
14017 struct extent_buffer *leaf;
14018 char *buf;
14019 struct btrfs_key key;
14020 int ret;
14022 btrfs_init_path(&path);
14023 key.objectid = 0;
14024 key.type = BTRFS_EXTENT_ITEM_KEY;
14025 key.offset = 0;
14026 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
14027 if (ret < 0) {
14028 btrfs_release_path(&path);
14029 return ret;
14032 buf = malloc(csum_root->fs_info->sectorsize);
14033 if (!buf) {
14034 btrfs_release_path(&path);
14035 return -ENOMEM;
14038 while (1) {
14039 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
14040 ret = btrfs_next_leaf(extent_root, &path);
14041 if (ret < 0)
14042 break;
14043 if (ret) {
14044 ret = 0;
14045 break;
14048 leaf = path.nodes[0];
14050 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
14051 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
14052 path.slots[0]++;
14053 continue;
14056 ei = btrfs_item_ptr(leaf, path.slots[0],
14057 struct btrfs_extent_item);
14058 if (!(btrfs_extent_flags(leaf, ei) &
14059 BTRFS_EXTENT_FLAG_DATA)) {
14060 path.slots[0]++;
14061 continue;
14064 ret = populate_csum(trans, csum_root, buf, key.objectid,
14065 key.offset);
14066 if (ret)
14067 break;
14068 path.slots[0]++;
14071 btrfs_release_path(&path);
14072 free(buf);
14073 return ret;
14077 * Recalculate the csum and put it into the csum tree.
14079 * Extent tree init will wipe out all the extent info, so in that case, we
14080 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
14081 * will use fs/subvol trees to init the csum tree.
14083 static int fill_csum_tree(struct btrfs_trans_handle *trans,
14084 struct btrfs_root *csum_root,
14085 int search_fs_tree)
14087 if (search_fs_tree)
14088 return fill_csum_tree_from_fs(trans, csum_root);
14089 else
14090 return fill_csum_tree_from_extent(trans, csum_root);
14093 static void free_roots_info_cache(void)
14095 if (!roots_info_cache)
14096 return;
14098 while (!cache_tree_empty(roots_info_cache)) {
14099 struct cache_extent *entry;
14100 struct root_item_info *rii;
14102 entry = first_cache_extent(roots_info_cache);
14103 if (!entry)
14104 break;
14105 remove_cache_extent(roots_info_cache, entry);
14106 rii = container_of(entry, struct root_item_info, cache_extent);
14107 free(rii);
14110 free(roots_info_cache);
14111 roots_info_cache = NULL;
14114 static int build_roots_info_cache(struct btrfs_fs_info *info)
14116 int ret = 0;
14117 struct btrfs_key key;
14118 struct extent_buffer *leaf;
14119 struct btrfs_path path;
14121 if (!roots_info_cache) {
14122 roots_info_cache = malloc(sizeof(*roots_info_cache));
14123 if (!roots_info_cache)
14124 return -ENOMEM;
14125 cache_tree_init(roots_info_cache);
14128 btrfs_init_path(&path);
14129 key.objectid = 0;
14130 key.type = BTRFS_EXTENT_ITEM_KEY;
14131 key.offset = 0;
14132 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
14133 if (ret < 0)
14134 goto out;
14135 leaf = path.nodes[0];
14137 while (1) {
14138 struct btrfs_key found_key;
14139 struct btrfs_extent_item *ei;
14140 struct btrfs_extent_inline_ref *iref;
14141 int slot = path.slots[0];
14142 int type;
14143 u64 flags;
14144 u64 root_id;
14145 u8 level;
14146 struct cache_extent *entry;
14147 struct root_item_info *rii;
14149 if (slot >= btrfs_header_nritems(leaf)) {
14150 ret = btrfs_next_leaf(info->extent_root, &path);
14151 if (ret < 0) {
14152 break;
14153 } else if (ret) {
14154 ret = 0;
14155 break;
14157 leaf = path.nodes[0];
14158 slot = path.slots[0];
14161 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14163 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
14164 found_key.type != BTRFS_METADATA_ITEM_KEY)
14165 goto next;
14167 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
14168 flags = btrfs_extent_flags(leaf, ei);
14170 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
14171 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
14172 goto next;
14174 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
14175 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
14176 level = found_key.offset;
14177 } else {
14178 struct btrfs_tree_block_info *binfo;
14180 binfo = (struct btrfs_tree_block_info *)(ei + 1);
14181 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
14182 level = btrfs_tree_block_level(leaf, binfo);
14186 * For a root extent, it must be of the following type and the
14187 * first (and only one) iref in the item.
14189 type = btrfs_extent_inline_ref_type(leaf, iref);
14190 if (type != BTRFS_TREE_BLOCK_REF_KEY)
14191 goto next;
14193 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
14194 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14195 if (!entry) {
14196 rii = malloc(sizeof(struct root_item_info));
14197 if (!rii) {
14198 ret = -ENOMEM;
14199 goto out;
14201 rii->cache_extent.start = root_id;
14202 rii->cache_extent.size = 1;
14203 rii->level = (u8)-1;
14204 entry = &rii->cache_extent;
14205 ret = insert_cache_extent(roots_info_cache, entry);
14206 ASSERT(ret == 0);
14207 } else {
14208 rii = container_of(entry, struct root_item_info,
14209 cache_extent);
14212 ASSERT(rii->cache_extent.start == root_id);
14213 ASSERT(rii->cache_extent.size == 1);
14215 if (level > rii->level || rii->level == (u8)-1) {
14216 rii->level = level;
14217 rii->bytenr = found_key.objectid;
14218 rii->gen = btrfs_extent_generation(leaf, ei);
14219 rii->node_count = 1;
14220 } else if (level == rii->level) {
14221 rii->node_count++;
14223 next:
14224 path.slots[0]++;
14227 out:
14228 btrfs_release_path(&path);
14230 return ret;
14233 static int maybe_repair_root_item(struct btrfs_path *path,
14234 const struct btrfs_key *root_key,
14235 const int read_only_mode)
14237 const u64 root_id = root_key->objectid;
14238 struct cache_extent *entry;
14239 struct root_item_info *rii;
14240 struct btrfs_root_item ri;
14241 unsigned long offset;
14243 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14244 if (!entry) {
14245 fprintf(stderr,
14246 "Error: could not find extent items for root %llu\n",
14247 root_key->objectid);
14248 return -ENOENT;
14251 rii = container_of(entry, struct root_item_info, cache_extent);
14252 ASSERT(rii->cache_extent.start == root_id);
14253 ASSERT(rii->cache_extent.size == 1);
14255 if (rii->node_count != 1) {
14256 fprintf(stderr,
14257 "Error: could not find btree root extent for root %llu\n",
14258 root_id);
14259 return -ENOENT;
14262 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
14263 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
14265 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
14266 btrfs_root_level(&ri) != rii->level ||
14267 btrfs_root_generation(&ri) != rii->gen) {
14270 * If we're in repair mode but our caller told us to not update
14271 * the root item, i.e. just check if it needs to be updated, don't
14272 * print this message, since the caller will call us again shortly
14273 * for the same root item without read only mode (the caller will
14274 * open a transaction first).
14276 if (!(read_only_mode && repair))
14277 fprintf(stderr,
14278 "%sroot item for root %llu,"
14279 " current bytenr %llu, current gen %llu, current level %u,"
14280 " new bytenr %llu, new gen %llu, new level %u\n",
14281 (read_only_mode ? "" : "fixing "),
14282 root_id,
14283 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
14284 btrfs_root_level(&ri),
14285 rii->bytenr, rii->gen, rii->level);
14287 if (btrfs_root_generation(&ri) > rii->gen) {
14288 fprintf(stderr,
14289 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
14290 root_id, btrfs_root_generation(&ri), rii->gen);
14291 return -EINVAL;
14294 if (!read_only_mode) {
14295 btrfs_set_root_bytenr(&ri, rii->bytenr);
14296 btrfs_set_root_level(&ri, rii->level);
14297 btrfs_set_root_generation(&ri, rii->gen);
14298 write_extent_buffer(path->nodes[0], &ri,
14299 offset, sizeof(ri));
14302 return 1;
14305 return 0;
14309 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
14310 * caused read-only snapshots to be corrupted if they were created at a moment
14311 * when the source subvolume/snapshot had orphan items. The issue was that the
14312 * on-disk root items became incorrect, referring to the pre orphan cleanup root
14313 * node instead of the post orphan cleanup root node.
14314 * So this function, and its callees, just detects and fixes those cases. Even
14315 * though the regression was for read-only snapshots, this function applies to
14316 * any snapshot/subvolume root.
14317 * This must be run before any other repair code - not doing it so, makes other
14318 * repair code delete or modify backrefs in the extent tree for example, which
14319 * will result in an inconsistent fs after repairing the root items.
14321 static int repair_root_items(struct btrfs_fs_info *info)
14323 struct btrfs_path path;
14324 struct btrfs_key key;
14325 struct extent_buffer *leaf;
14326 struct btrfs_trans_handle *trans = NULL;
14327 int ret = 0;
14328 int bad_roots = 0;
14329 int need_trans = 0;
14331 btrfs_init_path(&path);
14333 ret = build_roots_info_cache(info);
14334 if (ret)
14335 goto out;
14337 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
14338 key.type = BTRFS_ROOT_ITEM_KEY;
14339 key.offset = 0;
14341 again:
14343 * Avoid opening and committing transactions if a leaf doesn't have
14344 * any root items that need to be fixed, so that we avoid rotating
14345 * backup roots unnecessarily.
14347 if (need_trans) {
14348 trans = btrfs_start_transaction(info->tree_root, 1);
14349 if (IS_ERR(trans)) {
14350 ret = PTR_ERR(trans);
14351 goto out;
14355 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
14356 0, trans ? 1 : 0);
14357 if (ret < 0)
14358 goto out;
14359 leaf = path.nodes[0];
14361 while (1) {
14362 struct btrfs_key found_key;
14364 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
14365 int no_more_keys = find_next_key(&path, &key);
14367 btrfs_release_path(&path);
14368 if (trans) {
14369 ret = btrfs_commit_transaction(trans,
14370 info->tree_root);
14371 trans = NULL;
14372 if (ret < 0)
14373 goto out;
14375 need_trans = 0;
14376 if (no_more_keys)
14377 break;
14378 goto again;
14381 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14383 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
14384 goto next;
14385 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
14386 goto next;
14388 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
14389 if (ret < 0)
14390 goto out;
14391 if (ret) {
14392 if (!trans && repair) {
14393 need_trans = 1;
14394 key = found_key;
14395 btrfs_release_path(&path);
14396 goto again;
14398 bad_roots++;
14400 next:
14401 path.slots[0]++;
14403 ret = 0;
14404 out:
14405 free_roots_info_cache();
14406 btrfs_release_path(&path);
14407 if (trans)
14408 btrfs_commit_transaction(trans, info->tree_root);
14409 if (ret < 0)
14410 return ret;
14412 return bad_roots;
14415 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
14417 struct btrfs_trans_handle *trans;
14418 struct btrfs_block_group_cache *bg_cache;
14419 u64 current = 0;
14420 int ret = 0;
14422 /* Clear all free space cache inodes and its extent data */
14423 while (1) {
14424 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
14425 if (!bg_cache)
14426 break;
14427 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
14428 if (ret < 0)
14429 return ret;
14430 current = bg_cache->key.objectid + bg_cache->key.offset;
14433 /* Don't forget to set cache_generation to -1 */
14434 trans = btrfs_start_transaction(fs_info->tree_root, 0);
14435 if (IS_ERR(trans)) {
14436 error("failed to update super block cache generation");
14437 return PTR_ERR(trans);
14439 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
14440 btrfs_commit_transaction(trans, fs_info->tree_root);
14442 return ret;
14445 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
14446 int clear_version)
14448 int ret = 0;
14450 if (clear_version == 1) {
14451 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14452 error(
14453 "free space cache v2 detected, use --clear-space-cache v2");
14454 ret = 1;
14455 goto close_out;
14457 printf("Clearing free space cache\n");
14458 ret = clear_free_space_cache(fs_info);
14459 if (ret) {
14460 error("failed to clear free space cache");
14461 ret = 1;
14462 } else {
14463 printf("Free space cache cleared\n");
14465 } else if (clear_version == 2) {
14466 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14467 printf("no free space cache v2 to clear\n");
14468 ret = 0;
14469 goto close_out;
14471 printf("Clear free space cache v2\n");
14472 ret = btrfs_clear_free_space_tree(fs_info);
14473 if (ret) {
14474 error("failed to clear free space cache v2: %d", ret);
14475 ret = 1;
14476 } else {
14477 printf("free space cache v2 cleared\n");
14480 close_out:
14481 return ret;
14484 const char * const cmd_check_usage[] = {
14485 "btrfs check [options] <device>",
14486 "Check structural integrity of a filesystem (unmounted).",
14487 "Check structural integrity of an unmounted filesystem. Verify internal",
14488 "trees' consistency and item connectivity. In the repair mode try to",
14489 "fix the problems found. ",
14490 "WARNING: the repair mode is considered dangerous",
14492 "-s|--super <superblock> use this superblock copy",
14493 "-b|--backup use the first valid backup root copy",
14494 "--force skip mount checks, repair is not possible",
14495 "--repair try to repair the filesystem",
14496 "--readonly run in read-only mode (default)",
14497 "--init-csum-tree create a new CRC tree",
14498 "--init-extent-tree create a new extent tree",
14499 "--mode <MODE> allows choice of memory/IO trade-offs",
14500 " where MODE is one of:",
14501 " original - read inodes and extents to memory (requires",
14502 " more memory, does less IO)",
14503 " lowmem - try to use less memory but read blocks again",
14504 " when needed",
14505 "--check-data-csum verify checksums of data blocks",
14506 "-Q|--qgroup-report print a report on qgroup consistency",
14507 "-E|--subvol-extents <subvolid>",
14508 " print subvolume extents and sharing state",
14509 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
14510 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
14511 "-p|--progress indicate progress",
14512 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
14513 NULL
14516 int cmd_check(int argc, char **argv)
14518 struct cache_tree root_cache;
14519 struct btrfs_root *root;
14520 struct btrfs_fs_info *info;
14521 u64 bytenr = 0;
14522 u64 subvolid = 0;
14523 u64 tree_root_bytenr = 0;
14524 u64 chunk_root_bytenr = 0;
14525 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
14526 int ret = 0;
14527 int err = 0;
14528 u64 num;
14529 int init_csum_tree = 0;
14530 int readonly = 0;
14531 int clear_space_cache = 0;
14532 int qgroup_report = 0;
14533 int qgroups_repaired = 0;
14534 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14535 int force = 0;
14537 while(1) {
14538 int c;
14539 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14540 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14541 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14542 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14543 GETOPT_VAL_FORCE };
14544 static const struct option long_options[] = {
14545 { "super", required_argument, NULL, 's' },
14546 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14547 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14548 { "init-csum-tree", no_argument, NULL,
14549 GETOPT_VAL_INIT_CSUM },
14550 { "init-extent-tree", no_argument, NULL,
14551 GETOPT_VAL_INIT_EXTENT },
14552 { "check-data-csum", no_argument, NULL,
14553 GETOPT_VAL_CHECK_CSUM },
14554 { "backup", no_argument, NULL, 'b' },
14555 { "subvol-extents", required_argument, NULL, 'E' },
14556 { "qgroup-report", no_argument, NULL, 'Q' },
14557 { "tree-root", required_argument, NULL, 'r' },
14558 { "chunk-root", required_argument, NULL,
14559 GETOPT_VAL_CHUNK_TREE },
14560 { "progress", no_argument, NULL, 'p' },
14561 { "mode", required_argument, NULL,
14562 GETOPT_VAL_MODE },
14563 { "clear-space-cache", required_argument, NULL,
14564 GETOPT_VAL_CLEAR_SPACE_CACHE},
14565 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14566 { NULL, 0, NULL, 0}
14569 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14570 if (c < 0)
14571 break;
14572 switch(c) {
14573 case 'a': /* ignored */ break;
14574 case 'b':
14575 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14576 break;
14577 case 's':
14578 num = arg_strtou64(optarg);
14579 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14580 error(
14581 "super mirror should be less than %d",
14582 BTRFS_SUPER_MIRROR_MAX);
14583 exit(1);
14585 bytenr = btrfs_sb_offset(((int)num));
14586 printf("using SB copy %llu, bytenr %llu\n", num,
14587 (unsigned long long)bytenr);
14588 break;
14589 case 'Q':
14590 qgroup_report = 1;
14591 break;
14592 case 'E':
14593 subvolid = arg_strtou64(optarg);
14594 break;
14595 case 'r':
14596 tree_root_bytenr = arg_strtou64(optarg);
14597 break;
14598 case GETOPT_VAL_CHUNK_TREE:
14599 chunk_root_bytenr = arg_strtou64(optarg);
14600 break;
14601 case 'p':
14602 ctx.progress_enabled = true;
14603 break;
14604 case '?':
14605 case 'h':
14606 usage(cmd_check_usage);
14607 case GETOPT_VAL_REPAIR:
14608 printf("enabling repair mode\n");
14609 repair = 1;
14610 ctree_flags |= OPEN_CTREE_WRITES;
14611 break;
14612 case GETOPT_VAL_READONLY:
14613 readonly = 1;
14614 break;
14615 case GETOPT_VAL_INIT_CSUM:
14616 printf("Creating a new CRC tree\n");
14617 init_csum_tree = 1;
14618 repair = 1;
14619 ctree_flags |= OPEN_CTREE_WRITES;
14620 break;
14621 case GETOPT_VAL_INIT_EXTENT:
14622 init_extent_tree = 1;
14623 ctree_flags |= (OPEN_CTREE_WRITES |
14624 OPEN_CTREE_NO_BLOCK_GROUPS);
14625 repair = 1;
14626 break;
14627 case GETOPT_VAL_CHECK_CSUM:
14628 check_data_csum = 1;
14629 break;
14630 case GETOPT_VAL_MODE:
14631 check_mode = parse_check_mode(optarg);
14632 if (check_mode == CHECK_MODE_UNKNOWN) {
14633 error("unknown mode: %s", optarg);
14634 exit(1);
14636 break;
14637 case GETOPT_VAL_CLEAR_SPACE_CACHE:
14638 if (strcmp(optarg, "v1") == 0) {
14639 clear_space_cache = 1;
14640 } else if (strcmp(optarg, "v2") == 0) {
14641 clear_space_cache = 2;
14642 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14643 } else {
14644 error(
14645 "invalid argument to --clear-space-cache, must be v1 or v2");
14646 exit(1);
14648 ctree_flags |= OPEN_CTREE_WRITES;
14649 break;
14650 case GETOPT_VAL_FORCE:
14651 force = 1;
14652 break;
14656 if (check_argc_exact(argc - optind, 1))
14657 usage(cmd_check_usage);
14659 if (ctx.progress_enabled) {
14660 ctx.tp = TASK_NOTHING;
14661 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14664 /* This check is the only reason for --readonly to exist */
14665 if (readonly && repair) {
14666 error("repair options are not compatible with --readonly");
14667 exit(1);
14671 * experimental and dangerous
14673 if (repair && check_mode == CHECK_MODE_LOWMEM)
14674 warning("low-memory mode repair support is only partial");
14676 radix_tree_init();
14677 cache_tree_init(&root_cache);
14679 ret = check_mounted(argv[optind]);
14680 if (!force) {
14681 if (ret < 0) {
14682 error("could not check mount status: %s",
14683 strerror(-ret));
14684 err |= !!ret;
14685 goto err_out;
14686 } else if (ret) {
14687 error(
14688 "%s is currently mounted, use --force if you really intend to check the filesystem",
14689 argv[optind]);
14690 ret = -EBUSY;
14691 err |= !!ret;
14692 goto err_out;
14694 } else {
14695 if (repair) {
14696 error("repair and --force is not yet supported");
14697 ret = 1;
14698 err |= !!ret;
14699 goto err_out;
14701 if (ret < 0) {
14702 warning(
14703 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14704 argv[optind]);
14705 } else if (ret) {
14706 warning(
14707 "filesystem mounted, continuing because of --force");
14709 /* A block device is mounted in exclusive mode by kernel */
14710 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14713 /* only allow partial opening under repair mode */
14714 if (repair)
14715 ctree_flags |= OPEN_CTREE_PARTIAL;
14717 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14718 chunk_root_bytenr, ctree_flags);
14719 if (!info) {
14720 error("cannot open file system");
14721 ret = -EIO;
14722 err |= !!ret;
14723 goto err_out;
14726 global_info = info;
14727 root = info->fs_root;
14728 uuid_unparse(info->super_copy->fsid, uuidbuf);
14730 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14733 * Check the bare minimum before starting anything else that could rely
14734 * on it, namely the tree roots, any local consistency checks
14736 if (!extent_buffer_uptodate(info->tree_root->node) ||
14737 !extent_buffer_uptodate(info->dev_root->node) ||
14738 !extent_buffer_uptodate(info->chunk_root->node)) {
14739 error("critical roots corrupted, unable to check the filesystem");
14740 err |= !!ret;
14741 ret = -EIO;
14742 goto close_out;
14745 if (clear_space_cache) {
14746 ret = do_clear_free_space_cache(info, clear_space_cache);
14747 err |= !!ret;
14748 goto close_out;
14752 * repair mode will force us to commit transaction which
14753 * will make us fail to load log tree when mounting.
14755 if (repair && btrfs_super_log_root(info->super_copy)) {
14756 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14757 if (!ret) {
14758 ret = 1;
14759 err |= !!ret;
14760 goto close_out;
14762 ret = zero_log_tree(root);
14763 err |= !!ret;
14764 if (ret) {
14765 error("failed to zero log tree: %d", ret);
14766 goto close_out;
14770 if (qgroup_report) {
14771 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14772 uuidbuf);
14773 ret = qgroup_verify_all(info);
14774 err |= !!ret;
14775 if (ret == 0)
14776 report_qgroups(1);
14777 goto close_out;
14779 if (subvolid) {
14780 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14781 subvolid, argv[optind], uuidbuf);
14782 ret = print_extent_state(info, subvolid);
14783 err |= !!ret;
14784 goto close_out;
14787 if (init_extent_tree || init_csum_tree) {
14788 struct btrfs_trans_handle *trans;
14790 trans = btrfs_start_transaction(info->extent_root, 0);
14791 if (IS_ERR(trans)) {
14792 error("error starting transaction");
14793 ret = PTR_ERR(trans);
14794 err |= !!ret;
14795 goto close_out;
14798 if (init_extent_tree) {
14799 printf("Creating a new extent tree\n");
14800 ret = reinit_extent_tree(trans, info);
14801 err |= !!ret;
14802 if (ret)
14803 goto close_out;
14806 if (init_csum_tree) {
14807 printf("Reinitialize checksum tree\n");
14808 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14809 if (ret) {
14810 error("checksum tree initialization failed: %d",
14811 ret);
14812 ret = -EIO;
14813 err |= !!ret;
14814 goto close_out;
14817 ret = fill_csum_tree(trans, info->csum_root,
14818 init_extent_tree);
14819 err |= !!ret;
14820 if (ret) {
14821 error("checksum tree refilling failed: %d", ret);
14822 return -EIO;
14826 * Ok now we commit and run the normal fsck, which will add
14827 * extent entries for all of the items it finds.
14829 ret = btrfs_commit_transaction(trans, info->extent_root);
14830 err |= !!ret;
14831 if (ret)
14832 goto close_out;
14834 if (!extent_buffer_uptodate(info->extent_root->node)) {
14835 error("critical: extent_root, unable to check the filesystem");
14836 ret = -EIO;
14837 err |= !!ret;
14838 goto close_out;
14840 if (!extent_buffer_uptodate(info->csum_root->node)) {
14841 error("critical: csum_root, unable to check the filesystem");
14842 ret = -EIO;
14843 err |= !!ret;
14844 goto close_out;
14847 if (!init_extent_tree) {
14848 ret = repair_root_items(info);
14849 if (ret < 0) {
14850 err = !!ret;
14851 error("failed to repair root items: %s", strerror(-ret));
14852 goto close_out;
14854 if (repair) {
14855 fprintf(stderr, "Fixed %d roots.\n", ret);
14856 ret = 0;
14857 } else if (ret > 0) {
14858 fprintf(stderr,
14859 "Found %d roots with an outdated root item.\n",
14860 ret);
14861 fprintf(stderr,
14862 "Please run a filesystem check with the option --repair to fix them.\n");
14863 ret = 1;
14864 err |= ret;
14865 goto close_out;
14869 ret = do_check_chunks_and_extents(info);
14870 err |= !!ret;
14871 if (ret)
14872 error(
14873 "errors found in extent allocation tree or chunk allocation");
14875 if (!ctx.progress_enabled) {
14876 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14877 fprintf(stderr, "checking free space tree\n");
14878 else
14879 fprintf(stderr, "checking free space cache\n");
14881 ret = check_space_cache(root);
14882 err |= !!ret;
14883 if (ret) {
14884 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14885 error("errors found in free space tree");
14886 else
14887 error("errors found in free space cache");
14888 goto out;
14892 * We used to have to have these hole extents in between our real
14893 * extents so if we don't have this flag set we need to make sure there
14894 * are no gaps in the file extents for inodes, otherwise we can just
14895 * ignore it when this happens.
14897 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14898 ret = do_check_fs_roots(info, &root_cache);
14899 err |= !!ret;
14900 if (ret) {
14901 error("errors found in fs roots");
14902 goto out;
14905 fprintf(stderr, "checking csums\n");
14906 ret = check_csums(root);
14907 err |= !!ret;
14908 if (ret) {
14909 error("errors found in csum tree");
14910 goto out;
14913 fprintf(stderr, "checking root refs\n");
14914 /* For low memory mode, check_fs_roots_v2 handles root refs */
14915 if (check_mode != CHECK_MODE_LOWMEM) {
14916 ret = check_root_refs(root, &root_cache);
14917 err |= !!ret;
14918 if (ret) {
14919 error("errors found in root refs");
14920 goto out;
14924 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
14925 struct extent_buffer *eb;
14927 eb = list_first_entry(&root->fs_info->recow_ebs,
14928 struct extent_buffer, recow);
14929 list_del_init(&eb->recow);
14930 ret = recow_extent_buffer(root, eb);
14931 err |= !!ret;
14932 if (ret) {
14933 error("fails to fix transid errors");
14934 break;
14938 while (!list_empty(&delete_items)) {
14939 struct bad_item *bad;
14941 bad = list_first_entry(&delete_items, struct bad_item, list);
14942 list_del_init(&bad->list);
14943 if (repair) {
14944 ret = delete_bad_item(root, bad);
14945 err |= !!ret;
14947 free(bad);
14950 if (info->quota_enabled) {
14951 fprintf(stderr, "checking quota groups\n");
14952 ret = qgroup_verify_all(info);
14953 err |= !!ret;
14954 if (ret) {
14955 error("failed to check quota groups");
14956 goto out;
14958 report_qgroups(0);
14959 ret = repair_qgroups(info, &qgroups_repaired);
14960 err |= !!ret;
14961 if (err) {
14962 error("failed to repair quota groups");
14963 goto out;
14965 ret = 0;
14968 if (!list_empty(&root->fs_info->recow_ebs)) {
14969 error("transid errors in file system");
14970 ret = 1;
14971 err |= !!ret;
14973 out:
14974 printf("found %llu bytes used, ",
14975 (unsigned long long)bytes_used);
14976 if (err)
14977 printf("error(s) found\n");
14978 else
14979 printf("no error found\n");
14980 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14981 printf("total tree bytes: %llu\n",
14982 (unsigned long long)total_btree_bytes);
14983 printf("total fs tree bytes: %llu\n",
14984 (unsigned long long)total_fs_tree_bytes);
14985 printf("total extent tree bytes: %llu\n",
14986 (unsigned long long)total_extent_tree_bytes);
14987 printf("btree space waste bytes: %llu\n",
14988 (unsigned long long)btree_space_waste);
14989 printf("file data blocks allocated: %llu\n referenced %llu\n",
14990 (unsigned long long)data_bytes_allocated,
14991 (unsigned long long)data_bytes_referenced);
14993 free_qgroup_counts();
14994 free_root_recs_tree(&root_cache);
14995 close_out:
14996 close_ctree(root);
14997 err_out:
14998 if (ctx.progress_enabled)
14999 task_deinit(ctx.info);
15001 return err;