2 * Copyright (C) 2014 SUSE. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
18 * Authors: Mark Fasheh <mfasheh@suse.de>
23 #include <uuid/uuid.h>
24 #include "kerncompat.h"
25 #include "radix-tree.h"
28 #include "print-tree.h"
30 #include "kernel-shared/ulist.h"
31 #include "rbtree-utils.h"
32 #include "transaction.h"
35 #include "qgroup-verify.h"
37 static u64
*qgroup_item_count
;
39 void qgroup_set_item_count_ptr(u64
*item_count_ptr
)
41 qgroup_item_count
= item_count_ptr
;
44 /*#define QGROUP_VERIFY_DEBUG*/
45 static unsigned long tot_extents_scanned
= 0;
48 static struct qgroup_count
*find_count(u64 qgroupid
);
52 u64 referenced_compressed
;
54 u64 exclusive_compressed
;
61 struct btrfs_disk_key key
;
62 struct qgroup_info diskinfo
;
64 struct qgroup_info info
;
66 struct rb_node rb_node
;
68 /* Parents when we are a child group */
69 struct list_head groups
;
72 * Children when we are a parent group (not currently used but
73 * maintained to mirror kernel handling of qgroups)
75 struct list_head members
;
79 struct list_head bad_list
;
82 static struct counts_tree
{
84 unsigned int num_groups
;
85 unsigned int rescan_running
:1;
86 unsigned int qgroup_inconsist
:1;
88 } counts
= { .root
= RB_ROOT
};
90 static LIST_HEAD(bad_qgroups
);
92 static struct rb_root by_bytenr
= RB_ROOT
;
95 * Glue structure to represent the relations between qgroups. Mirrored
98 struct btrfs_qgroup_list
{
99 struct list_head next_group
;
100 struct list_head next_member
;
101 struct qgroup_count
*group
; /* Parent group */
102 struct qgroup_count
*member
;
105 /* Allow us to reset ref counts during accounting without zeroing each group. */
106 static u64 qgroup_seq
= 1ULL;
108 static inline void update_cur_refcnt(struct qgroup_count
*c
)
110 if (c
->cur_refcnt
< qgroup_seq
)
111 c
->cur_refcnt
= qgroup_seq
;
115 static inline u64
group_get_cur_refcnt(struct qgroup_count
*c
)
117 if (c
->cur_refcnt
< qgroup_seq
)
119 return c
->cur_refcnt
- qgroup_seq
;
122 static void inc_qgroup_seq(int root_count
)
124 qgroup_seq
+= root_count
+ 1;
128 * List of interior tree blocks. We walk this list after loading the
129 * extent tree to resolve implied refs. For each interior node we'll
130 * place a shared ref in the ref tree against each child object. This
131 * allows the shared ref resolving code to do the actual work later of
132 * finding roots to account against.
134 * An implied ref is when a tree block has refs on it that may not
135 * exist in any of its child nodes. Even though the refs might not
136 * exist further down the tree, the fact that our interior node has a
137 * ref means we need to account anything below it to all its roots.
139 static struct ulist
*tree_blocks
= NULL
; /* unode->val = bytenr, ->aux
140 * = tree_block pointer */
152 struct rb_node bytenr_node
;
155 #ifdef QGROUP_VERIFY_DEBUG
156 static void print_ref(struct ref
*ref
)
158 printf("bytenr: %llu\t\tnum_bytes: %llu\t\t parent: %llu\t\t"
159 "root: %llu\n", ref
->bytenr
, ref
->num_bytes
,
160 ref
->parent
, ref
->root
);
163 static void print_all_refs(void)
165 unsigned long count
= 0;
167 struct rb_node
*node
;
169 node
= rb_first(&by_bytenr
);
171 ref
= rb_entry(node
, struct ref
, bytenr_node
);
176 node
= rb_next(node
);
179 printf("%lu extents scanned with %lu refs in total.\n",
180 tot_extents_scanned
, count
);
185 * Store by bytenr in rbtree
187 * The tree is sorted in ascending order by bytenr, then parent, then
188 * root. Since full refs have a parent == 0, those will come before
191 static int compare_ref(struct ref
*orig
, u64 bytenr
, u64 root
, u64 parent
)
193 if (bytenr
< orig
->bytenr
)
195 if (bytenr
> orig
->bytenr
)
198 if (parent
< orig
->parent
)
200 if (parent
> orig
->parent
)
203 if (root
< orig
->root
)
205 if (root
> orig
->root
)
212 * insert a new ref into the tree. returns the existing ref entry
213 * if one is already there.
215 static struct ref
*insert_ref(struct ref
*ref
)
218 struct rb_node
**p
= &by_bytenr
.rb_node
;
219 struct rb_node
*parent
= NULL
;
224 curr
= rb_entry(parent
, struct ref
, bytenr_node
);
226 ret
= compare_ref(curr
, ref
->bytenr
, ref
->root
, ref
->parent
);
235 rb_link_node(&ref
->bytenr_node
, parent
, p
);
236 rb_insert_color(&ref
->bytenr_node
, &by_bytenr
);
241 * Partial search, returns the first ref with matching bytenr. Caller
242 * can walk forward from there.
244 * Leftmost refs will be full refs - this is used to our advantage
245 * when resolving roots.
247 static struct ref
*find_ref_bytenr(u64 bytenr
)
249 struct rb_node
*n
= by_bytenr
.rb_node
;
253 ref
= rb_entry(n
, struct ref
, bytenr_node
);
255 if (bytenr
< ref
->bytenr
)
257 else if (bytenr
> ref
->bytenr
)
260 /* Walk to the left to find the first item */
261 struct rb_node
*node_left
= rb_prev(&ref
->bytenr_node
);
262 struct ref
*ref_left
;
265 ref_left
= rb_entry(node_left
, struct ref
,
267 if (ref_left
->bytenr
!= ref
->bytenr
)
270 node_left
= rb_prev(node_left
);
278 static struct ref
*find_ref(u64 bytenr
, u64 root
, u64 parent
)
280 struct rb_node
*n
= by_bytenr
.rb_node
;
285 ref
= rb_entry(n
, struct ref
, bytenr_node
);
287 ret
= compare_ref(ref
, bytenr
, root
, parent
);
298 static struct ref
*alloc_ref(u64 bytenr
, u64 root
, u64 parent
, u64 num_bytes
)
300 struct ref
*ref
= find_ref(bytenr
, root
, parent
);
302 BUG_ON(parent
&& root
);
305 ref
= calloc(1, sizeof(*ref
));
307 ref
->bytenr
= bytenr
;
309 ref
->parent
= parent
;
310 ref
->num_bytes
= num_bytes
;
318 static void free_ref_node(struct rb_node
*node
)
320 struct ref
*ref
= rb_entry(node
, struct ref
, bytenr_node
);
324 FREE_RB_BASED_TREE(ref
, free_ref_node
);
327 * Resolves all the possible roots for the ref at parent.
329 static int find_parent_roots(struct ulist
*roots
, u64 parent
)
332 struct rb_node
*node
;
336 * Search the rbtree for the first ref with bytenr == parent.
337 * Walk forward so long as bytenr == parent, adding resolved root ids.
338 * For each unresolved root, we recurse
340 ref
= find_ref_bytenr(parent
);
342 error("bytenr ref not found for parent %llu",
343 (unsigned long long)parent
);
346 node
= &ref
->bytenr_node
;
347 if (ref
->bytenr
!= parent
) {
348 error("found bytenr ref does not match parent: %llu != %llu",
349 (unsigned long long)ref
->bytenr
,
350 (unsigned long long)parent
);
356 * Random sanity check, are we actually getting the
359 struct rb_node
*prev_node
= rb_prev(&ref
->bytenr_node
);
363 prev
= rb_entry(prev_node
, struct ref
, bytenr_node
);
364 if (prev
->bytenr
== parent
) {
366 "unexpected: prev bytenr same as parent: %llu",
367 (unsigned long long)parent
);
375 if (is_fstree(ref
->root
)) {
376 ret
= ulist_add(roots
, ref
->root
, 0, 0);
380 } else if (ref
->parent
== ref
->bytenr
) {
382 * Special loop case for tree reloc tree
384 ref
->root
= BTRFS_TREE_RELOC_OBJECTID
;
386 ret
= find_parent_roots(roots
, ref
->parent
);
391 node
= rb_next(node
);
393 ref
= rb_entry(node
, struct ref
, bytenr_node
);
394 } while (node
&& ref
->bytenr
== parent
);
401 static int account_one_extent(struct ulist
*roots
, u64 bytenr
, u64 num_bytes
)
404 u64 id
, nr_roots
, nr_refs
;
405 struct qgroup_count
*count
;
406 struct ulist
*counts
= ulist_alloc(0);
407 struct ulist
*tmp
= ulist_alloc(0);
408 struct ulist_iterator uiter
;
409 struct ulist_iterator tmp_uiter
;
410 struct ulist_node
*unode
;
411 struct ulist_node
*tmp_unode
;
412 struct btrfs_qgroup_list
*glist
;
414 if (!counts
|| !tmp
) {
420 ULIST_ITER_INIT(&uiter
);
421 while ((unode
= ulist_next(roots
, &uiter
))) {
422 BUG_ON(unode
->val
== 0ULL);
425 * For each root, find their corresponding tracking group and
426 * add it to our qgroups list.
428 count
= find_count(unode
->val
);
432 BUG_ON(!is_fstree(unode
->val
));
433 ret
= ulist_add(counts
, count
->qgroupid
, ptr_to_u64(count
), 0);
438 * Now we look for parents (and parents of those...). Use a tmp
439 * ulist here to avoid re-walking (and re-incrementing) our
440 * already added items on every loop iteration.
443 ret
= ulist_add(tmp
, count
->qgroupid
, ptr_to_u64(count
), 0);
447 ULIST_ITER_INIT(&tmp_uiter
);
448 while ((tmp_unode
= ulist_next(tmp
, &tmp_uiter
))) {
449 /* Bump the refcount on a node every time we see it. */
450 count
= u64_to_ptr(tmp_unode
->aux
);
451 update_cur_refcnt(count
);
453 list_for_each_entry(glist
, &count
->groups
, next_group
) {
454 struct qgroup_count
*parent
;
455 parent
= glist
->group
;
456 id
= parent
->qgroupid
;
460 ret
= ulist_add(counts
, id
, ptr_to_u64(parent
),
464 ret
= ulist_add(tmp
, id
, ptr_to_u64(parent
),
473 * Now that we have gathered up and counted all the groups, we
474 * can add bytes for this ref.
476 nr_roots
= roots
->nnodes
;
477 ULIST_ITER_INIT(&uiter
);
478 while ((unode
= ulist_next(counts
, &uiter
))) {
479 count
= u64_to_ptr(unode
->aux
);
481 nr_refs
= group_get_cur_refcnt(count
);
483 count
->info
.referenced
+= num_bytes
;
484 count
->info
.referenced_compressed
+= num_bytes
;
486 if (nr_refs
== nr_roots
) {
487 count
->info
.exclusive
+= num_bytes
;
488 count
->info
.exclusive_compressed
+= num_bytes
;
491 #ifdef QGROUP_VERIFY_DEBUG
492 printf("account (%llu, %llu), qgroup %llu/%llu, rfer %llu,"
493 " excl %llu, refs %llu, roots %llu\n", bytenr
, num_bytes
,
494 btrfs_qgroup_level(count
->qgroupid
),
495 btrfs_qgroup_subvid(count
->qgroupid
),
496 count
->info
.referenced
, count
->info
.exclusive
, nr_refs
,
501 inc_qgroup_seq(roots
->nnodes
);
509 static void print_subvol_info(u64 subvolid
, u64 bytenr
, u64 num_bytes
,
510 struct ulist
*roots
);
512 * Account each ref. Walk the refs, for each set of refs in a
515 * - add the roots for direct refs to the ref roots ulist
517 * - resolve all possible roots for shared refs, insert each
518 * of those into ref_roots ulist (this is a recursive process)
520 * - With all roots resolved we can account the ref - this is done in
521 * account_one_extent().
523 static int account_all_refs(int do_qgroups
, u64 search_subvol
)
526 struct rb_node
*node
;
527 u64 bytenr
, num_bytes
;
528 struct ulist
*roots
= ulist_alloc(0);
531 node
= rb_first(&by_bytenr
);
535 ref
= rb_entry(node
, struct ref
, bytenr_node
);
537 * Walk forward through the list of refs for this
538 * bytenr, adding roots to our ulist. If it's a full
539 * ref, then we have the easy case. Otherwise we need
540 * to search for roots.
542 bytenr
= ref
->bytenr
;
543 num_bytes
= ref
->num_bytes
;
545 BUG_ON(ref
->bytenr
!= bytenr
);
546 BUG_ON(ref
->num_bytes
!= num_bytes
);
548 if (is_fstree(ref
->root
)) {
549 if (ulist_add(roots
, ref
->root
, 0, 0) < 0)
553 ret
= find_parent_roots(roots
, ref
->parent
);
559 * When we leave this inner loop, node is set
560 * to next in our tree and will be turned into
561 * a ref object up top
563 node
= rb_next(node
);
565 ref
= rb_entry(node
, struct ref
, bytenr_node
);
566 } while (node
&& ref
->bytenr
== bytenr
);
569 print_subvol_info(search_subvol
, bytenr
, num_bytes
,
575 if (account_one_extent(roots
, bytenr
, num_bytes
))
582 error("Out of memory while accounting refs for qgroups");
586 static u64
resolve_one_root(u64 bytenr
)
588 struct ref
*ref
= find_ref_bytenr(bytenr
);
594 if (ref
->parent
== bytenr
)
595 return BTRFS_TREE_RELOC_OBJECTID
;
596 return resolve_one_root(ref
->parent
);
599 static inline struct tree_block
*unode_tree_block(struct ulist_node
*unode
)
601 return u64_to_ptr(unode
->aux
);
603 static inline u64
unode_bytenr(struct ulist_node
*unode
)
608 static int alloc_tree_block(u64 bytenr
, u64 num_bytes
, int level
)
610 struct tree_block
*block
= calloc(1, sizeof(*block
));
613 block
->num_bytes
= num_bytes
;
614 block
->level
= level
;
615 if (ulist_add(tree_blocks
, bytenr
, ptr_to_u64(block
), 0) >= 0)
622 static void free_tree_blocks(void)
624 struct ulist_iterator uiter
;
625 struct ulist_node
*unode
;
630 ULIST_ITER_INIT(&uiter
);
631 while ((unode
= ulist_next(tree_blocks
, &uiter
)))
632 free(unode_tree_block(unode
));
633 ulist_free(tree_blocks
);
637 #ifdef QGROUP_VERIFY_DEBUG
638 static void print_tree_block(u64 bytenr
, struct tree_block
*block
)
641 struct rb_node
*node
;
643 printf("tree block: %llu\t\tlevel: %d\n", (unsigned long long)bytenr
,
646 ref
= find_ref_bytenr(bytenr
);
647 node
= &ref
->bytenr_node
;
650 node
= rb_next(node
);
652 ref
= rb_entry(node
, struct ref
, bytenr_node
);
653 } while (node
&& ref
->bytenr
== bytenr
);
658 static void print_all_tree_blocks(void)
660 struct ulist_iterator uiter
;
661 struct ulist_node
*unode
;
666 printf("Listing all found interior tree nodes:\n");
668 ULIST_ITER_INIT(&uiter
);
669 while ((unode
= ulist_next(tree_blocks
, &uiter
)))
670 print_tree_block(unode_bytenr(unode
), unode_tree_block(unode
));
674 static int add_refs_for_leaf_items(struct extent_buffer
*eb
, u64 ref_parent
)
678 u64 bytenr
, num_bytes
;
679 struct btrfs_key key
;
680 struct btrfs_disk_key disk_key
;
681 struct btrfs_file_extent_item
*fi
;
683 nr
= btrfs_header_nritems(eb
);
684 for (i
= 0; i
< nr
; i
++) {
685 btrfs_item_key(eb
, &disk_key
, i
);
686 btrfs_disk_key_to_cpu(&key
, &disk_key
);
688 if (key
.type
!= BTRFS_EXTENT_DATA_KEY
)
691 fi
= btrfs_item_ptr(eb
, i
, struct btrfs_file_extent_item
);
692 /* filter out: inline, disk_bytenr == 0, compressed?
693 * not if we can avoid it */
694 extent_type
= btrfs_file_extent_type(eb
, fi
);
696 if (extent_type
== BTRFS_FILE_EXTENT_INLINE
)
699 bytenr
= btrfs_file_extent_disk_bytenr(eb
, fi
);
703 num_bytes
= btrfs_file_extent_disk_num_bytes(eb
, fi
);
704 if (alloc_ref(bytenr
, 0, ref_parent
, num_bytes
) == NULL
)
711 static int travel_tree(struct btrfs_fs_info
*info
, struct btrfs_root
*root
,
712 u64 bytenr
, u64 num_bytes
, u64 ref_parent
)
715 struct extent_buffer
*eb
;
719 // printf("travel_tree: bytenr: %llu\tnum_bytes: %llu\tref_parent: %llu\n",
720 // bytenr, num_bytes, ref_parent);
722 eb
= read_tree_block(info
, bytenr
, 0);
723 if (!extent_buffer_uptodate(eb
))
727 /* Don't add a ref for our starting tree block to itself */
728 if (bytenr
!= ref_parent
) {
729 if (alloc_ref(bytenr
, 0, ref_parent
, num_bytes
) == NULL
)
733 if (btrfs_is_leaf(eb
)) {
734 ret
= add_refs_for_leaf_items(eb
, ref_parent
);
739 * Interior nodes are tuples of (key, bytenr) where key is the
740 * leftmost key in the tree block pointed to by bytenr. We
741 * don't have to care about key here, just follow the bytenr
744 nr
= btrfs_header_nritems(eb
);
745 for (i
= 0; i
< nr
; i
++) {
746 (*qgroup_item_count
)++;
747 new_bytenr
= btrfs_node_blockptr(eb
, i
);
748 new_num_bytes
= info
->nodesize
;
750 ret
= travel_tree(info
, root
, new_bytenr
, new_num_bytes
,
755 free_extent_buffer(eb
);
759 static int add_refs_for_implied(struct btrfs_fs_info
*info
, u64 bytenr
,
760 struct tree_block
*block
)
763 u64 root_id
= resolve_one_root(bytenr
);
764 struct btrfs_root
*root
;
765 struct btrfs_key key
;
767 /* Tree reloc tree doesn't contribute qgroup, skip it */
768 if (root_id
== BTRFS_TREE_RELOC_OBJECTID
)
770 key
.objectid
= root_id
;
771 key
.type
= BTRFS_ROOT_ITEM_KEY
;
772 key
.offset
= (u64
)-1;
775 * XXX: Don't free the root object as we don't know whether it
776 * came off our fs_info struct or not.
778 root
= btrfs_read_fs_root(info
, &key
);
779 if (!root
|| IS_ERR(root
))
782 ret
= travel_tree(info
, root
, bytenr
, block
->num_bytes
, bytenr
);
790 * Place shared refs in the ref tree for each child of an interior tree node.
792 static int map_implied_refs(struct btrfs_fs_info
*info
)
795 struct ulist_iterator uiter
;
796 struct ulist_node
*unode
;
798 ULIST_ITER_INIT(&uiter
);
799 while ((unode
= ulist_next(tree_blocks
, &uiter
))) {
800 ret
= add_refs_for_implied(info
, unode_bytenr(unode
),
801 unode_tree_block(unode
));
810 * insert a new root into the tree. returns the existing root entry
811 * if one is already there. qgroupid is used
814 static int insert_count(struct qgroup_count
*qc
)
816 struct rb_node
**p
= &counts
.root
.rb_node
;
817 struct rb_node
*parent
= NULL
;
818 struct qgroup_count
*curr
;
822 curr
= rb_entry(parent
, struct qgroup_count
, rb_node
);
824 if (qc
->qgroupid
< curr
->qgroupid
)
826 else if (qc
->qgroupid
> curr
->qgroupid
)
832 rb_link_node(&qc
->rb_node
, parent
, p
);
833 rb_insert_color(&qc
->rb_node
, &counts
.root
);
837 static struct qgroup_count
*find_count(u64 qgroupid
)
839 struct rb_node
*n
= counts
.root
.rb_node
;
840 struct qgroup_count
*count
;
843 count
= rb_entry(n
, struct qgroup_count
, rb_node
);
845 if (qgroupid
< count
->qgroupid
)
847 else if (qgroupid
> count
->qgroupid
)
855 static struct qgroup_count
*alloc_count(struct btrfs_disk_key
*key
,
856 struct extent_buffer
*leaf
,
857 struct btrfs_qgroup_info_item
*disk
)
859 struct qgroup_count
*c
= calloc(1, sizeof(*c
));
860 struct qgroup_info
*item
;
863 c
->qgroupid
= btrfs_disk_key_offset(key
);
867 item
->referenced
= btrfs_qgroup_info_referenced(leaf
, disk
);
868 item
->referenced_compressed
=
869 btrfs_qgroup_info_referenced_compressed(leaf
, disk
);
870 item
->exclusive
= btrfs_qgroup_info_exclusive(leaf
, disk
);
871 item
->exclusive_compressed
=
872 btrfs_qgroup_info_exclusive_compressed(leaf
, disk
);
873 INIT_LIST_HEAD(&c
->groups
);
874 INIT_LIST_HEAD(&c
->members
);
875 INIT_LIST_HEAD(&c
->bad_list
);
877 if (insert_count(c
)) {
885 static int add_qgroup_relation(u64 memberid
, u64 parentid
)
887 struct qgroup_count
*member
;
888 struct qgroup_count
*parent
;
889 struct btrfs_qgroup_list
*list
;
891 if (memberid
> parentid
)
894 member
= find_count(memberid
);
895 parent
= find_count(parentid
);
896 if (!member
|| !parent
)
899 list
= calloc(1, sizeof(*list
));
903 list
->group
= parent
;
904 list
->member
= member
;
905 list_add_tail(&list
->next_group
, &member
->groups
);
906 list_add_tail(&list
->next_member
, &parent
->members
);
911 static void read_qgroup_status(struct extent_buffer
*eb
, int slot
,
912 struct counts_tree
*counts
)
914 struct btrfs_qgroup_status_item
*status_item
;
917 status_item
= btrfs_item_ptr(eb
, slot
, struct btrfs_qgroup_status_item
);
918 flags
= btrfs_qgroup_status_flags(eb
, status_item
);
920 * Since qgroup_inconsist/rescan_running is just one bit,
921 * assign value directly won't work.
923 counts
->qgroup_inconsist
= !!(flags
&
924 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT
);
925 counts
->rescan_running
= !!(flags
& BTRFS_QGROUP_STATUS_FLAG_RESCAN
);
926 counts
->scan_progress
= btrfs_qgroup_status_rescan(eb
, status_item
);
929 static int load_quota_info(struct btrfs_fs_info
*info
)
932 struct btrfs_root
*root
= info
->quota_root
;
933 struct btrfs_root
*tmproot
;
934 struct btrfs_path path
;
935 struct btrfs_key key
;
936 struct btrfs_key root_key
;
937 struct btrfs_disk_key disk_key
;
938 struct extent_buffer
*leaf
;
939 struct btrfs_qgroup_info_item
*item
;
940 struct qgroup_count
*count
;
942 int search_relations
= 0;
946 * Do 2 passes, the first allocates group counts and reads status
947 * items. The 2nd pass picks up relation items and glues them to their
948 * respective count structures.
950 btrfs_init_path(&path
);
953 key
.objectid
= search_relations
? 0 : BTRFS_QGROUP_RELATION_KEY
;
956 ret
= btrfs_search_slot(NULL
, root
, &key
, &path
, 0, 0);
958 fprintf(stderr
, "ERROR: Couldn't search slot: %d\n", ret
);
963 leaf
= path
.nodes
[0];
965 nr
= btrfs_header_nritems(leaf
);
966 for(i
= 0; i
< nr
; i
++) {
967 btrfs_item_key(leaf
, &disk_key
, i
);
968 btrfs_disk_key_to_cpu(&key
, &disk_key
);
970 if (search_relations
) {
971 if (key
.type
== BTRFS_QGROUP_RELATION_KEY
) {
972 ret
= add_qgroup_relation(key
.objectid
,
975 error("out of memory");
982 if (key
.type
== BTRFS_QGROUP_STATUS_KEY
) {
983 read_qgroup_status(leaf
, i
, &counts
);
988 * At this point, we can ignore anything that
989 * isn't a qgroup info.
991 if (key
.type
!= BTRFS_QGROUP_INFO_KEY
)
994 item
= btrfs_item_ptr(leaf
, i
,
995 struct btrfs_qgroup_info_item
);
997 count
= alloc_count(&disk_key
, leaf
, item
);
1000 fprintf(stderr
, "ERROR: out of memory\n");
1004 root_key
.objectid
= key
.offset
;
1005 root_key
.type
= BTRFS_ROOT_ITEM_KEY
;
1006 root_key
.offset
= (u64
)-1;
1007 tmproot
= btrfs_read_fs_root_no_cache(info
, &root_key
);
1008 if (tmproot
&& !IS_ERR(tmproot
)) {
1009 count
->subvol_exists
= 1;
1010 btrfs_free_fs_root(tmproot
);
1014 ret
= btrfs_next_leaf(root
, &path
);
1020 btrfs_release_path(&path
);
1022 if (!search_relations
) {
1023 search_relations
= 1;
1031 static int add_inline_refs(struct btrfs_fs_info
*info
,
1032 struct extent_buffer
*ei_leaf
, int slot
,
1033 u64 bytenr
, u64 num_bytes
, int meta_item
)
1035 struct btrfs_extent_item
*ei
;
1036 struct btrfs_extent_inline_ref
*iref
;
1037 struct btrfs_extent_data_ref
*dref
;
1038 u64 flags
, root_obj
, offset
, parent
;
1039 u32 item_size
= btrfs_item_size_nr(ei_leaf
, slot
);
1044 ei
= btrfs_item_ptr(ei_leaf
, slot
, struct btrfs_extent_item
);
1045 flags
= btrfs_extent_flags(ei_leaf
, ei
);
1047 if (flags
& BTRFS_EXTENT_FLAG_TREE_BLOCK
&& !meta_item
) {
1048 struct btrfs_tree_block_info
*tbinfo
;
1049 tbinfo
= (struct btrfs_tree_block_info
*)(ei
+ 1);
1050 iref
= (struct btrfs_extent_inline_ref
*)(tbinfo
+ 1);
1052 iref
= (struct btrfs_extent_inline_ref
*)(ei
+ 1);
1055 ptr
= (unsigned long)iref
;
1056 end
= (unsigned long)ei
+ item_size
;
1058 iref
= (struct btrfs_extent_inline_ref
*)ptr
;
1060 parent
= root_obj
= 0;
1061 offset
= btrfs_extent_inline_ref_offset(ei_leaf
, iref
);
1062 type
= btrfs_extent_inline_ref_type(ei_leaf
, iref
);
1064 case BTRFS_TREE_BLOCK_REF_KEY
:
1067 case BTRFS_EXTENT_DATA_REF_KEY
:
1068 dref
= (struct btrfs_extent_data_ref
*)(&iref
->offset
);
1069 root_obj
= btrfs_extent_data_ref_root(ei_leaf
, dref
);
1071 case BTRFS_SHARED_DATA_REF_KEY
:
1072 case BTRFS_SHARED_BLOCK_REF_KEY
:
1079 if (alloc_ref(bytenr
, root_obj
, parent
, num_bytes
) == NULL
)
1082 ptr
+= btrfs_extent_inline_ref_size(type
);
1088 static int add_keyed_ref(struct btrfs_fs_info
*info
,
1089 struct btrfs_key
*key
,
1090 struct extent_buffer
*leaf
, int slot
,
1091 u64 bytenr
, u64 num_bytes
)
1093 u64 root_obj
= 0, parent
= 0;
1094 struct btrfs_extent_data_ref
*dref
;
1097 case BTRFS_TREE_BLOCK_REF_KEY
:
1098 root_obj
= key
->offset
;
1100 case BTRFS_EXTENT_DATA_REF_KEY
:
1101 dref
= btrfs_item_ptr(leaf
, slot
, struct btrfs_extent_data_ref
);
1102 root_obj
= btrfs_extent_data_ref_root(leaf
, dref
);
1104 case BTRFS_SHARED_DATA_REF_KEY
:
1105 case BTRFS_SHARED_BLOCK_REF_KEY
:
1106 parent
= key
->offset
;
1112 if (alloc_ref(bytenr
, root_obj
, parent
, num_bytes
) == NULL
)
1119 * return value of 0 indicates leaf or not meta data. The code that
1120 * calls this does not need to make a distinction between the two as
1121 * it is only concerned with intermediate blocks which will always
1124 static int get_tree_block_level(struct btrfs_key
*key
,
1125 struct extent_buffer
*ei_leaf
,
1129 int meta_key
= key
->type
== BTRFS_METADATA_ITEM_KEY
;
1131 struct btrfs_extent_item
*ei
;
1133 ei
= btrfs_item_ptr(ei_leaf
, slot
, struct btrfs_extent_item
);
1134 flags
= btrfs_extent_flags(ei_leaf
, ei
);
1136 if (flags
& BTRFS_EXTENT_FLAG_TREE_BLOCK
&& !meta_key
) {
1137 struct btrfs_tree_block_info
*tbinfo
;
1138 tbinfo
= (struct btrfs_tree_block_info
*)(ei
+ 1);
1139 level
= btrfs_tree_block_level(ei_leaf
, tbinfo
);
1140 } else if (meta_key
) {
1141 /* skinny metadata */
1142 level
= (int)key
->offset
;
1148 * Walk the extent tree, allocating a ref item for every ref and
1149 * storing it in the bytenr tree.
1151 static int scan_extents(struct btrfs_fs_info
*info
,
1154 int ret
, i
, nr
, level
;
1155 struct btrfs_root
*root
= info
->extent_root
;
1156 struct btrfs_key key
;
1157 struct btrfs_path path
;
1158 struct btrfs_disk_key disk_key
;
1159 struct extent_buffer
*leaf
;
1160 u64 bytenr
= 0, num_bytes
= 0;
1162 btrfs_init_path(&path
);
1164 key
.objectid
= start
;
1168 ret
= btrfs_search_slot(NULL
, root
, &key
, &path
, 0, 0);
1170 fprintf(stderr
, "ERROR: Couldn't search slot: %d\n", ret
);
1173 path
.reada
= READA_BACK
;
1176 leaf
= path
.nodes
[0];
1178 nr
= btrfs_header_nritems(leaf
);
1179 for(i
= 0; i
< nr
; i
++) {
1180 btrfs_item_key(leaf
, &disk_key
, i
);
1181 btrfs_disk_key_to_cpu(&key
, &disk_key
);
1183 if (key
.objectid
< start
)
1186 if (key
.objectid
> end
)
1189 if (key
.type
== BTRFS_EXTENT_ITEM_KEY
||
1190 key
.type
== BTRFS_METADATA_ITEM_KEY
) {
1193 tot_extents_scanned
++;
1195 bytenr
= key
.objectid
;
1196 num_bytes
= key
.offset
;
1197 if (key
.type
== BTRFS_METADATA_ITEM_KEY
) {
1198 num_bytes
= info
->nodesize
;
1202 ret
= add_inline_refs(info
, leaf
, i
, bytenr
,
1207 level
= get_tree_block_level(&key
, leaf
, i
);
1209 if (alloc_tree_block(bytenr
, num_bytes
,
1217 if (key
.type
> BTRFS_SHARED_DATA_REF_KEY
)
1219 if (key
.type
< BTRFS_TREE_BLOCK_REF_KEY
)
1223 * Keyed refs should come after their extent
1224 * item in the tree. As a result, the value of
1225 * bytenr and num_bytes should be unchanged
1226 * from the above block that catches the
1227 * original extent item.
1229 BUG_ON(key
.objectid
!= bytenr
);
1231 ret
= add_keyed_ref(info
, &key
, leaf
, i
, bytenr
,
1237 ret
= btrfs_next_leaf(root
, &path
);
1241 "ERROR: Next leaf failed: %d\n", ret
);
1250 btrfs_release_path(&path
);
1255 static void print_fields(u64 bytes
, u64 bytes_compressed
, char *prefix
,
1258 printf("%s\t\t%s %llu %s compressed %llu\n",
1259 prefix
, type
, (unsigned long long)bytes
, type
,
1260 (unsigned long long)bytes_compressed
);
1263 static void print_fields_signed(long long bytes
,
1264 long long bytes_compressed
,
1265 char *prefix
, char *type
)
1267 printf("%s\t\t%s %lld %s compressed %lld\n",
1268 prefix
, type
, bytes
, type
, bytes_compressed
);
1271 static inline int qgroup_printable(struct qgroup_count
*c
)
1273 return !!(c
->subvol_exists
|| btrfs_qgroup_level(c
->qgroupid
));
1276 static int report_qgroup_difference(struct qgroup_count
*count
, int verbose
)
1279 struct qgroup_info
*info
= &count
->info
;
1280 struct qgroup_info
*disk
= &count
->diskinfo
;
1281 long long excl_diff
= info
->exclusive
- disk
->exclusive
;
1282 long long ref_diff
= info
->referenced
- disk
->referenced
;
1284 is_different
= excl_diff
|| ref_diff
;
1286 if (verbose
|| (is_different
&& qgroup_printable(count
))) {
1287 printf("Counts for qgroup id: %llu/%llu %s\n",
1288 btrfs_qgroup_level(count
->qgroupid
),
1289 btrfs_qgroup_subvid(count
->qgroupid
),
1290 is_different
? "are different" : "");
1292 print_fields(info
->referenced
, info
->referenced_compressed
,
1293 "our:", "referenced");
1294 print_fields(disk
->referenced
, disk
->referenced_compressed
,
1295 "disk:", "referenced");
1297 print_fields_signed(ref_diff
, ref_diff
,
1298 "diff:", "referenced");
1299 print_fields(info
->exclusive
, info
->exclusive_compressed
,
1300 "our:", "exclusive");
1301 print_fields(disk
->exclusive
, disk
->exclusive_compressed
,
1302 "disk:", "exclusive");
1304 print_fields_signed(excl_diff
, excl_diff
,
1305 "diff:", "exclusive");
1308 return is_different
;
1312 * Report qgroups errors
1313 * Return 0 if nothing wrong.
1314 * Return <0 if any qgroup is inconsistent.
1316 * @all: if set, all qgroup will be checked and reported even already
1317 * inconsistent or under rescan.
1319 int report_qgroups(int all
)
1321 struct rb_node
*node
;
1322 struct qgroup_count
*c
;
1323 bool found_err
= false;
1324 bool skip_err
= false;
1326 if (!repair
&& counts
.rescan_running
) {
1329 "Qgroup rescan is running, a difference in qgroup counts is expected\n");
1332 "Qgroup rescan is running, qgroups will not be printed.\n");
1337 * It's possible that rescan hasn't been initialized yet.
1339 if (counts
.qgroup_inconsist
&& !counts
.rescan_running
&&
1340 counts
.rescan_running
== 0) {
1342 "Rescan hasn't been initialzied, a difference in qgroup accounting is expected\n");
1345 if (counts
.qgroup_inconsist
&& !counts
.rescan_running
)
1346 fprintf(stderr
, "Qgroup are marked as inconsistent.\n");
1347 node
= rb_first(&counts
.root
);
1349 c
= rb_entry(node
, struct qgroup_count
, rb_node
);
1351 if (report_qgroup_difference(c
, all
)) {
1352 list_add_tail(&c
->bad_list
, &bad_qgroups
);
1356 node
= rb_next(node
);
1358 if (found_err
&& !skip_err
)
1363 void free_qgroup_counts(void)
1365 struct rb_node
*node
;
1366 struct qgroup_count
*c
;
1367 struct btrfs_qgroup_list
*glist
, *tmpglist
;
1369 node
= rb_first(&counts
.root
);
1371 c
= rb_entry(node
, struct qgroup_count
, rb_node
);
1373 list_del(&c
->bad_list
);
1375 list_for_each_entry_safe(glist
, tmpglist
, &c
->groups
,
1377 list_del(&glist
->next_group
);
1378 list_del(&glist
->next_member
);
1381 list_for_each_entry_safe(glist
, tmpglist
, &c
->members
,
1383 list_del(&glist
->next_group
);
1384 list_del(&glist
->next_member
);
1388 node
= rb_next(node
);
1390 rb_erase(&c
->rb_node
, &counts
.root
);
1395 int qgroup_verify_all(struct btrfs_fs_info
*info
)
1399 if (!info
->quota_enabled
)
1402 tree_blocks
= ulist_alloc(0);
1405 "ERROR: Out of memory while allocating ulist.\n");
1409 ret
= load_quota_info(info
);
1411 fprintf(stderr
, "ERROR: Loading qgroups from disk: %d\n", ret
);
1416 * Put all extent refs into our rbtree
1418 ret
= scan_extents(info
, 0, ~0ULL);
1420 fprintf(stderr
, "ERROR: while scanning extent tree: %d\n", ret
);
1424 ret
= map_implied_refs(info
);
1426 fprintf(stderr
, "ERROR: while mapping refs: %d\n", ret
);
1430 ret
= account_all_refs(1, 0);
1434 * Don't free the qgroup count records as they will be walked
1435 * later via the print function.
1438 free_ref_tree(&by_bytenr
);
1442 static void __print_subvol_info(u64 bytenr
, u64 num_bytes
, struct ulist
*roots
)
1444 int n
= roots
->nnodes
;
1445 struct ulist_iterator uiter
;
1446 struct ulist_node
*unode
;
1448 printf("%llu\t%llu\t%d\t", bytenr
, num_bytes
, n
);
1450 ULIST_ITER_INIT(&uiter
);
1451 while ((unode
= ulist_next(roots
, &uiter
))) {
1452 printf("%llu ", unode
->val
);
1457 static void print_subvol_info(u64 subvolid
, u64 bytenr
, u64 num_bytes
,
1458 struct ulist
*roots
)
1460 struct ulist_iterator uiter
;
1461 struct ulist_node
*unode
;
1463 ULIST_ITER_INIT(&uiter
);
1464 while ((unode
= ulist_next(roots
, &uiter
))) {
1465 BUG_ON(unode
->val
== 0ULL);
1466 if (unode
->val
== subvolid
) {
1467 __print_subvol_info(bytenr
, num_bytes
, roots
);
1475 int print_extent_state(struct btrfs_fs_info
*info
, u64 subvol
)
1479 tree_blocks
= ulist_alloc(0);
1482 "ERROR: Out of memory while allocating ulist.\n");
1487 * Put all extent refs into our rbtree
1489 ret
= scan_extents(info
, 0, ~0ULL);
1491 fprintf(stderr
, "ERROR: while scanning extent tree: %d\n", ret
);
1495 ret
= map_implied_refs(info
);
1497 fprintf(stderr
, "ERROR: while mapping refs: %d\n", ret
);
1501 printf("Offset\t\tLen\tRoot Refs\tRoots\n");
1502 ret
= account_all_refs(0, subvol
);
1506 free_ref_tree(&by_bytenr
);
1510 static int repair_qgroup_info(struct btrfs_fs_info
*info
,
1511 struct qgroup_count
*count
)
1514 struct btrfs_root
*root
= info
->quota_root
;
1515 struct btrfs_trans_handle
*trans
;
1516 struct btrfs_path path
;
1517 struct btrfs_qgroup_info_item
*info_item
;
1518 struct btrfs_key key
;
1520 printf("Repair qgroup %llu/%llu\n", btrfs_qgroup_level(count
->qgroupid
),
1521 btrfs_qgroup_subvid(count
->qgroupid
));
1523 trans
= btrfs_start_transaction(root
, 1);
1525 return PTR_ERR(trans
);
1527 btrfs_init_path(&path
);
1529 key
.type
= BTRFS_QGROUP_INFO_KEY
;
1530 key
.offset
= count
->qgroupid
;
1531 ret
= btrfs_search_slot(trans
, root
, &key
, &path
, 0, 1);
1533 error("could not find disk item for qgroup %llu/%llu",
1534 btrfs_qgroup_level(count
->qgroupid
),
1535 btrfs_qgroup_subvid(count
->qgroupid
));
1541 info_item
= btrfs_item_ptr(path
.nodes
[0], path
.slots
[0],
1542 struct btrfs_qgroup_info_item
);
1544 btrfs_set_qgroup_info_generation(path
.nodes
[0], info_item
,
1547 btrfs_set_qgroup_info_referenced(path
.nodes
[0], info_item
,
1548 count
->info
.referenced
);
1549 btrfs_set_qgroup_info_referenced_compressed(path
.nodes
[0], info_item
,
1550 count
->info
.referenced_compressed
);
1552 btrfs_set_qgroup_info_exclusive(path
.nodes
[0], info_item
,
1553 count
->info
.exclusive
);
1554 btrfs_set_qgroup_info_exclusive_compressed(path
.nodes
[0], info_item
,
1555 count
->info
.exclusive_compressed
);
1557 btrfs_mark_buffer_dirty(path
.nodes
[0]);
1560 btrfs_commit_transaction(trans
, root
);
1561 btrfs_release_path(&path
);
1566 static int repair_qgroup_status(struct btrfs_fs_info
*info
)
1569 struct btrfs_root
*root
= info
->quota_root
;
1570 struct btrfs_trans_handle
*trans
;
1571 struct btrfs_path path
;
1572 struct btrfs_key key
;
1573 struct btrfs_qgroup_status_item
*status_item
;
1575 printf("Repair qgroup status item\n");
1577 trans
= btrfs_start_transaction(root
, 1);
1579 return PTR_ERR(trans
);
1581 btrfs_init_path(&path
);
1583 key
.type
= BTRFS_QGROUP_STATUS_KEY
;
1585 ret
= btrfs_search_slot(trans
, root
, &key
, &path
, 0, 1);
1587 error("could not find qgroup status item");
1593 status_item
= btrfs_item_ptr(path
.nodes
[0], path
.slots
[0],
1594 struct btrfs_qgroup_status_item
);
1595 btrfs_set_qgroup_status_flags(path
.nodes
[0], status_item
,
1596 BTRFS_QGROUP_STATUS_FLAG_ON
);
1597 btrfs_set_qgroup_status_rescan(path
.nodes
[0], status_item
, 0);
1598 btrfs_set_qgroup_status_generation(path
.nodes
[0], status_item
,
1601 btrfs_mark_buffer_dirty(path
.nodes
[0]);
1604 btrfs_commit_transaction(trans
, root
);
1605 btrfs_release_path(&path
);
1610 int repair_qgroups(struct btrfs_fs_info
*info
, int *repaired
)
1613 struct qgroup_count
*count
, *tmpcount
;
1620 list_for_each_entry_safe(count
, tmpcount
, &bad_qgroups
, bad_list
) {
1621 ret
= repair_qgroup_info(info
, count
);
1628 list_del_init(&count
->bad_list
);
1632 * Do this step last as we want the latest transaction id on
1633 * our qgroup status to avoid a (useless) warning after
1636 if (*repaired
|| counts
.qgroup_inconsist
|| counts
.rescan_running
) {
1637 ret
= repair_qgroup_status(info
);