1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (C) 2011 STRATO. All rights reserved.
6 #include <linux/sched.h>
7 #include <linux/pagemap.h>
8 #include <linux/writeback.h>
9 #include <linux/blkdev.h>
10 #include <linux/rbtree.h>
11 #include <linux/slab.h>
12 #include <linux/workqueue.h>
13 #include <linux/btrfs.h>
14 #include <linux/sizes.h>
17 #include "transaction.h"
22 #include "extent_io.h"
27 * - subvol delete -> delete when ref goes to 0? delete limits also?
31 * - copy also limits on subvol creation
33 * - caches fuer ulists
34 * - performance benchmarks
35 * - check all ioctl parameters
39 * Helpers to access qgroup reservation
41 * Callers should ensure the lock context and type are valid
44 static u64
qgroup_rsv_total(const struct btrfs_qgroup
*qgroup
)
49 for (i
= 0; i
< BTRFS_QGROUP_RSV_LAST
; i
++)
50 ret
+= qgroup
->rsv
.values
[i
];
55 #ifdef CONFIG_BTRFS_DEBUG
56 static const char *qgroup_rsv_type_str(enum btrfs_qgroup_rsv_type type
)
58 if (type
== BTRFS_QGROUP_RSV_DATA
)
60 if (type
== BTRFS_QGROUP_RSV_META_PERTRANS
)
61 return "meta_pertrans";
62 if (type
== BTRFS_QGROUP_RSV_META_PREALLOC
)
63 return "meta_prealloc";
68 static void qgroup_rsv_add(struct btrfs_fs_info
*fs_info
,
69 struct btrfs_qgroup
*qgroup
, u64 num_bytes
,
70 enum btrfs_qgroup_rsv_type type
)
72 trace_qgroup_update_reserve(fs_info
, qgroup
, num_bytes
, type
);
73 qgroup
->rsv
.values
[type
] += num_bytes
;
76 static void qgroup_rsv_release(struct btrfs_fs_info
*fs_info
,
77 struct btrfs_qgroup
*qgroup
, u64 num_bytes
,
78 enum btrfs_qgroup_rsv_type type
)
80 trace_qgroup_update_reserve(fs_info
, qgroup
, -(s64
)num_bytes
, type
);
81 if (qgroup
->rsv
.values
[type
] >= num_bytes
) {
82 qgroup
->rsv
.values
[type
] -= num_bytes
;
85 #ifdef CONFIG_BTRFS_DEBUG
87 "qgroup %llu %s reserved space underflow, have %llu to free %llu",
88 qgroup
->qgroupid
, qgroup_rsv_type_str(type
),
89 qgroup
->rsv
.values
[type
], num_bytes
);
91 qgroup
->rsv
.values
[type
] = 0;
94 static void qgroup_rsv_add_by_qgroup(struct btrfs_fs_info
*fs_info
,
95 struct btrfs_qgroup
*dest
,
96 struct btrfs_qgroup
*src
)
100 for (i
= 0; i
< BTRFS_QGROUP_RSV_LAST
; i
++)
101 qgroup_rsv_add(fs_info
, dest
, src
->rsv
.values
[i
], i
);
104 static void qgroup_rsv_release_by_qgroup(struct btrfs_fs_info
*fs_info
,
105 struct btrfs_qgroup
*dest
,
106 struct btrfs_qgroup
*src
)
110 for (i
= 0; i
< BTRFS_QGROUP_RSV_LAST
; i
++)
111 qgroup_rsv_release(fs_info
, dest
, src
->rsv
.values
[i
], i
);
114 static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup
*qg
, u64 seq
,
117 if (qg
->old_refcnt
< seq
)
118 qg
->old_refcnt
= seq
;
119 qg
->old_refcnt
+= mod
;
122 static void btrfs_qgroup_update_new_refcnt(struct btrfs_qgroup
*qg
, u64 seq
,
125 if (qg
->new_refcnt
< seq
)
126 qg
->new_refcnt
= seq
;
127 qg
->new_refcnt
+= mod
;
130 static inline u64
btrfs_qgroup_get_old_refcnt(struct btrfs_qgroup
*qg
, u64 seq
)
132 if (qg
->old_refcnt
< seq
)
134 return qg
->old_refcnt
- seq
;
137 static inline u64
btrfs_qgroup_get_new_refcnt(struct btrfs_qgroup
*qg
, u64 seq
)
139 if (qg
->new_refcnt
< seq
)
141 return qg
->new_refcnt
- seq
;
145 * glue structure to represent the relations between qgroups.
147 struct btrfs_qgroup_list
{
148 struct list_head next_group
;
149 struct list_head next_member
;
150 struct btrfs_qgroup
*group
;
151 struct btrfs_qgroup
*member
;
154 static inline u64
qgroup_to_aux(struct btrfs_qgroup
*qg
)
156 return (u64
)(uintptr_t)qg
;
159 static inline struct btrfs_qgroup
* unode_aux_to_qgroup(struct ulist_node
*n
)
161 return (struct btrfs_qgroup
*)(uintptr_t)n
->aux
;
165 qgroup_rescan_init(struct btrfs_fs_info
*fs_info
, u64 progress_objectid
,
167 static void qgroup_rescan_zero_tracking(struct btrfs_fs_info
*fs_info
);
169 /* must be called with qgroup_ioctl_lock held */
170 static struct btrfs_qgroup
*find_qgroup_rb(struct btrfs_fs_info
*fs_info
,
173 struct rb_node
*n
= fs_info
->qgroup_tree
.rb_node
;
174 struct btrfs_qgroup
*qgroup
;
177 qgroup
= rb_entry(n
, struct btrfs_qgroup
, node
);
178 if (qgroup
->qgroupid
< qgroupid
)
180 else if (qgroup
->qgroupid
> qgroupid
)
188 /* must be called with qgroup_lock held */
189 static struct btrfs_qgroup
*add_qgroup_rb(struct btrfs_fs_info
*fs_info
,
192 struct rb_node
**p
= &fs_info
->qgroup_tree
.rb_node
;
193 struct rb_node
*parent
= NULL
;
194 struct btrfs_qgroup
*qgroup
;
198 qgroup
= rb_entry(parent
, struct btrfs_qgroup
, node
);
200 if (qgroup
->qgroupid
< qgroupid
)
202 else if (qgroup
->qgroupid
> qgroupid
)
208 qgroup
= kzalloc(sizeof(*qgroup
), GFP_ATOMIC
);
210 return ERR_PTR(-ENOMEM
);
212 qgroup
->qgroupid
= qgroupid
;
213 INIT_LIST_HEAD(&qgroup
->groups
);
214 INIT_LIST_HEAD(&qgroup
->members
);
215 INIT_LIST_HEAD(&qgroup
->dirty
);
217 rb_link_node(&qgroup
->node
, parent
, p
);
218 rb_insert_color(&qgroup
->node
, &fs_info
->qgroup_tree
);
223 static void __del_qgroup_rb(struct btrfs_qgroup
*qgroup
)
225 struct btrfs_qgroup_list
*list
;
227 list_del(&qgroup
->dirty
);
228 while (!list_empty(&qgroup
->groups
)) {
229 list
= list_first_entry(&qgroup
->groups
,
230 struct btrfs_qgroup_list
, next_group
);
231 list_del(&list
->next_group
);
232 list_del(&list
->next_member
);
236 while (!list_empty(&qgroup
->members
)) {
237 list
= list_first_entry(&qgroup
->members
,
238 struct btrfs_qgroup_list
, next_member
);
239 list_del(&list
->next_group
);
240 list_del(&list
->next_member
);
246 /* must be called with qgroup_lock held */
247 static int del_qgroup_rb(struct btrfs_fs_info
*fs_info
, u64 qgroupid
)
249 struct btrfs_qgroup
*qgroup
= find_qgroup_rb(fs_info
, qgroupid
);
254 rb_erase(&qgroup
->node
, &fs_info
->qgroup_tree
);
255 __del_qgroup_rb(qgroup
);
259 /* must be called with qgroup_lock held */
260 static int add_relation_rb(struct btrfs_fs_info
*fs_info
,
261 u64 memberid
, u64 parentid
)
263 struct btrfs_qgroup
*member
;
264 struct btrfs_qgroup
*parent
;
265 struct btrfs_qgroup_list
*list
;
267 member
= find_qgroup_rb(fs_info
, memberid
);
268 parent
= find_qgroup_rb(fs_info
, parentid
);
269 if (!member
|| !parent
)
272 list
= kzalloc(sizeof(*list
), GFP_ATOMIC
);
276 list
->group
= parent
;
277 list
->member
= member
;
278 list_add_tail(&list
->next_group
, &member
->groups
);
279 list_add_tail(&list
->next_member
, &parent
->members
);
284 /* must be called with qgroup_lock held */
285 static int del_relation_rb(struct btrfs_fs_info
*fs_info
,
286 u64 memberid
, u64 parentid
)
288 struct btrfs_qgroup
*member
;
289 struct btrfs_qgroup
*parent
;
290 struct btrfs_qgroup_list
*list
;
292 member
= find_qgroup_rb(fs_info
, memberid
);
293 parent
= find_qgroup_rb(fs_info
, parentid
);
294 if (!member
|| !parent
)
297 list_for_each_entry(list
, &member
->groups
, next_group
) {
298 if (list
->group
== parent
) {
299 list_del(&list
->next_group
);
300 list_del(&list
->next_member
);
308 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
309 int btrfs_verify_qgroup_counts(struct btrfs_fs_info
*fs_info
, u64 qgroupid
,
312 struct btrfs_qgroup
*qgroup
;
314 qgroup
= find_qgroup_rb(fs_info
, qgroupid
);
317 if (qgroup
->rfer
!= rfer
|| qgroup
->excl
!= excl
)
324 * The full config is read in one go, only called from open_ctree()
325 * It doesn't use any locking, as at this point we're still single-threaded
327 int btrfs_read_qgroup_config(struct btrfs_fs_info
*fs_info
)
329 struct btrfs_key key
;
330 struct btrfs_key found_key
;
331 struct btrfs_root
*quota_root
= fs_info
->quota_root
;
332 struct btrfs_path
*path
= NULL
;
333 struct extent_buffer
*l
;
337 u64 rescan_progress
= 0;
339 if (!test_bit(BTRFS_FS_QUOTA_ENABLED
, &fs_info
->flags
))
342 fs_info
->qgroup_ulist
= ulist_alloc(GFP_KERNEL
);
343 if (!fs_info
->qgroup_ulist
) {
348 path
= btrfs_alloc_path();
354 /* default this to quota off, in case no status key is found */
355 fs_info
->qgroup_flags
= 0;
358 * pass 1: read status, all qgroup infos and limits
363 ret
= btrfs_search_slot_for_read(quota_root
, &key
, path
, 1, 1);
368 struct btrfs_qgroup
*qgroup
;
370 slot
= path
->slots
[0];
372 btrfs_item_key_to_cpu(l
, &found_key
, slot
);
374 if (found_key
.type
== BTRFS_QGROUP_STATUS_KEY
) {
375 struct btrfs_qgroup_status_item
*ptr
;
377 ptr
= btrfs_item_ptr(l
, slot
,
378 struct btrfs_qgroup_status_item
);
380 if (btrfs_qgroup_status_version(l
, ptr
) !=
381 BTRFS_QGROUP_STATUS_VERSION
) {
383 "old qgroup version, quota disabled");
386 if (btrfs_qgroup_status_generation(l
, ptr
) !=
387 fs_info
->generation
) {
388 flags
|= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT
;
390 "qgroup generation mismatch, marked as inconsistent");
392 fs_info
->qgroup_flags
= btrfs_qgroup_status_flags(l
,
394 rescan_progress
= btrfs_qgroup_status_rescan(l
, ptr
);
398 if (found_key
.type
!= BTRFS_QGROUP_INFO_KEY
&&
399 found_key
.type
!= BTRFS_QGROUP_LIMIT_KEY
)
402 qgroup
= find_qgroup_rb(fs_info
, found_key
.offset
);
403 if ((qgroup
&& found_key
.type
== BTRFS_QGROUP_INFO_KEY
) ||
404 (!qgroup
&& found_key
.type
== BTRFS_QGROUP_LIMIT_KEY
)) {
405 btrfs_err(fs_info
, "inconsistent qgroup config");
406 flags
|= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT
;
409 qgroup
= add_qgroup_rb(fs_info
, found_key
.offset
);
410 if (IS_ERR(qgroup
)) {
411 ret
= PTR_ERR(qgroup
);
415 switch (found_key
.type
) {
416 case BTRFS_QGROUP_INFO_KEY
: {
417 struct btrfs_qgroup_info_item
*ptr
;
419 ptr
= btrfs_item_ptr(l
, slot
,
420 struct btrfs_qgroup_info_item
);
421 qgroup
->rfer
= btrfs_qgroup_info_rfer(l
, ptr
);
422 qgroup
->rfer_cmpr
= btrfs_qgroup_info_rfer_cmpr(l
, ptr
);
423 qgroup
->excl
= btrfs_qgroup_info_excl(l
, ptr
);
424 qgroup
->excl_cmpr
= btrfs_qgroup_info_excl_cmpr(l
, ptr
);
425 /* generation currently unused */
428 case BTRFS_QGROUP_LIMIT_KEY
: {
429 struct btrfs_qgroup_limit_item
*ptr
;
431 ptr
= btrfs_item_ptr(l
, slot
,
432 struct btrfs_qgroup_limit_item
);
433 qgroup
->lim_flags
= btrfs_qgroup_limit_flags(l
, ptr
);
434 qgroup
->max_rfer
= btrfs_qgroup_limit_max_rfer(l
, ptr
);
435 qgroup
->max_excl
= btrfs_qgroup_limit_max_excl(l
, ptr
);
436 qgroup
->rsv_rfer
= btrfs_qgroup_limit_rsv_rfer(l
, ptr
);
437 qgroup
->rsv_excl
= btrfs_qgroup_limit_rsv_excl(l
, ptr
);
442 ret
= btrfs_next_item(quota_root
, path
);
448 btrfs_release_path(path
);
451 * pass 2: read all qgroup relations
454 key
.type
= BTRFS_QGROUP_RELATION_KEY
;
456 ret
= btrfs_search_slot_for_read(quota_root
, &key
, path
, 1, 0);
460 slot
= path
->slots
[0];
462 btrfs_item_key_to_cpu(l
, &found_key
, slot
);
464 if (found_key
.type
!= BTRFS_QGROUP_RELATION_KEY
)
467 if (found_key
.objectid
> found_key
.offset
) {
468 /* parent <- member, not needed to build config */
469 /* FIXME should we omit the key completely? */
473 ret
= add_relation_rb(fs_info
, found_key
.objectid
,
475 if (ret
== -ENOENT
) {
477 "orphan qgroup relation 0x%llx->0x%llx",
478 found_key
.objectid
, found_key
.offset
);
479 ret
= 0; /* ignore the error */
484 ret
= btrfs_next_item(quota_root
, path
);
491 fs_info
->qgroup_flags
|= flags
;
492 if (!(fs_info
->qgroup_flags
& BTRFS_QGROUP_STATUS_FLAG_ON
))
493 clear_bit(BTRFS_FS_QUOTA_ENABLED
, &fs_info
->flags
);
494 else if (fs_info
->qgroup_flags
& BTRFS_QGROUP_STATUS_FLAG_RESCAN
&&
496 ret
= qgroup_rescan_init(fs_info
, rescan_progress
, 0);
497 btrfs_free_path(path
);
500 ulist_free(fs_info
->qgroup_ulist
);
501 fs_info
->qgroup_ulist
= NULL
;
502 fs_info
->qgroup_flags
&= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN
;
505 return ret
< 0 ? ret
: 0;
509 * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(),
510 * first two are in single-threaded paths.And for the third one, we have set
511 * quota_root to be null with qgroup_lock held before, so it is safe to clean
512 * up the in-memory structures without qgroup_lock held.
514 void btrfs_free_qgroup_config(struct btrfs_fs_info
*fs_info
)
517 struct btrfs_qgroup
*qgroup
;
519 while ((n
= rb_first(&fs_info
->qgroup_tree
))) {
520 qgroup
= rb_entry(n
, struct btrfs_qgroup
, node
);
521 rb_erase(n
, &fs_info
->qgroup_tree
);
522 __del_qgroup_rb(qgroup
);
525 * we call btrfs_free_qgroup_config() when umounting
526 * filesystem and disabling quota, so we set qgroup_ulist
527 * to be null here to avoid double free.
529 ulist_free(fs_info
->qgroup_ulist
);
530 fs_info
->qgroup_ulist
= NULL
;
533 static int add_qgroup_relation_item(struct btrfs_trans_handle
*trans
, u64 src
,
537 struct btrfs_root
*quota_root
= trans
->fs_info
->quota_root
;
538 struct btrfs_path
*path
;
539 struct btrfs_key key
;
541 path
= btrfs_alloc_path();
546 key
.type
= BTRFS_QGROUP_RELATION_KEY
;
549 ret
= btrfs_insert_empty_item(trans
, quota_root
, path
, &key
, 0);
551 btrfs_mark_buffer_dirty(path
->nodes
[0]);
553 btrfs_free_path(path
);
557 static int del_qgroup_relation_item(struct btrfs_trans_handle
*trans
, u64 src
,
561 struct btrfs_root
*quota_root
= trans
->fs_info
->quota_root
;
562 struct btrfs_path
*path
;
563 struct btrfs_key key
;
565 path
= btrfs_alloc_path();
570 key
.type
= BTRFS_QGROUP_RELATION_KEY
;
573 ret
= btrfs_search_slot(trans
, quota_root
, &key
, path
, -1, 1);
582 ret
= btrfs_del_item(trans
, quota_root
, path
);
584 btrfs_free_path(path
);
588 static int add_qgroup_item(struct btrfs_trans_handle
*trans
,
589 struct btrfs_root
*quota_root
, u64 qgroupid
)
592 struct btrfs_path
*path
;
593 struct btrfs_qgroup_info_item
*qgroup_info
;
594 struct btrfs_qgroup_limit_item
*qgroup_limit
;
595 struct extent_buffer
*leaf
;
596 struct btrfs_key key
;
598 if (btrfs_is_testing(quota_root
->fs_info
))
601 path
= btrfs_alloc_path();
606 key
.type
= BTRFS_QGROUP_INFO_KEY
;
607 key
.offset
= qgroupid
;
610 * Avoid a transaction abort by catching -EEXIST here. In that
611 * case, we proceed by re-initializing the existing structure
615 ret
= btrfs_insert_empty_item(trans
, quota_root
, path
, &key
,
616 sizeof(*qgroup_info
));
617 if (ret
&& ret
!= -EEXIST
)
620 leaf
= path
->nodes
[0];
621 qgroup_info
= btrfs_item_ptr(leaf
, path
->slots
[0],
622 struct btrfs_qgroup_info_item
);
623 btrfs_set_qgroup_info_generation(leaf
, qgroup_info
, trans
->transid
);
624 btrfs_set_qgroup_info_rfer(leaf
, qgroup_info
, 0);
625 btrfs_set_qgroup_info_rfer_cmpr(leaf
, qgroup_info
, 0);
626 btrfs_set_qgroup_info_excl(leaf
, qgroup_info
, 0);
627 btrfs_set_qgroup_info_excl_cmpr(leaf
, qgroup_info
, 0);
629 btrfs_mark_buffer_dirty(leaf
);
631 btrfs_release_path(path
);
633 key
.type
= BTRFS_QGROUP_LIMIT_KEY
;
634 ret
= btrfs_insert_empty_item(trans
, quota_root
, path
, &key
,
635 sizeof(*qgroup_limit
));
636 if (ret
&& ret
!= -EEXIST
)
639 leaf
= path
->nodes
[0];
640 qgroup_limit
= btrfs_item_ptr(leaf
, path
->slots
[0],
641 struct btrfs_qgroup_limit_item
);
642 btrfs_set_qgroup_limit_flags(leaf
, qgroup_limit
, 0);
643 btrfs_set_qgroup_limit_max_rfer(leaf
, qgroup_limit
, 0);
644 btrfs_set_qgroup_limit_max_excl(leaf
, qgroup_limit
, 0);
645 btrfs_set_qgroup_limit_rsv_rfer(leaf
, qgroup_limit
, 0);
646 btrfs_set_qgroup_limit_rsv_excl(leaf
, qgroup_limit
, 0);
648 btrfs_mark_buffer_dirty(leaf
);
652 btrfs_free_path(path
);
656 static int del_qgroup_item(struct btrfs_trans_handle
*trans
, u64 qgroupid
)
659 struct btrfs_root
*quota_root
= trans
->fs_info
->quota_root
;
660 struct btrfs_path
*path
;
661 struct btrfs_key key
;
663 path
= btrfs_alloc_path();
668 key
.type
= BTRFS_QGROUP_INFO_KEY
;
669 key
.offset
= qgroupid
;
670 ret
= btrfs_search_slot(trans
, quota_root
, &key
, path
, -1, 1);
679 ret
= btrfs_del_item(trans
, quota_root
, path
);
683 btrfs_release_path(path
);
685 key
.type
= BTRFS_QGROUP_LIMIT_KEY
;
686 ret
= btrfs_search_slot(trans
, quota_root
, &key
, path
, -1, 1);
695 ret
= btrfs_del_item(trans
, quota_root
, path
);
698 btrfs_free_path(path
);
702 static int update_qgroup_limit_item(struct btrfs_trans_handle
*trans
,
703 struct btrfs_qgroup
*qgroup
)
705 struct btrfs_root
*quota_root
= trans
->fs_info
->quota_root
;
706 struct btrfs_path
*path
;
707 struct btrfs_key key
;
708 struct extent_buffer
*l
;
709 struct btrfs_qgroup_limit_item
*qgroup_limit
;
714 key
.type
= BTRFS_QGROUP_LIMIT_KEY
;
715 key
.offset
= qgroup
->qgroupid
;
717 path
= btrfs_alloc_path();
721 ret
= btrfs_search_slot(trans
, quota_root
, &key
, path
, 0, 1);
729 slot
= path
->slots
[0];
730 qgroup_limit
= btrfs_item_ptr(l
, slot
, struct btrfs_qgroup_limit_item
);
731 btrfs_set_qgroup_limit_flags(l
, qgroup_limit
, qgroup
->lim_flags
);
732 btrfs_set_qgroup_limit_max_rfer(l
, qgroup_limit
, qgroup
->max_rfer
);
733 btrfs_set_qgroup_limit_max_excl(l
, qgroup_limit
, qgroup
->max_excl
);
734 btrfs_set_qgroup_limit_rsv_rfer(l
, qgroup_limit
, qgroup
->rsv_rfer
);
735 btrfs_set_qgroup_limit_rsv_excl(l
, qgroup_limit
, qgroup
->rsv_excl
);
737 btrfs_mark_buffer_dirty(l
);
740 btrfs_free_path(path
);
744 static int update_qgroup_info_item(struct btrfs_trans_handle
*trans
,
745 struct btrfs_qgroup
*qgroup
)
747 struct btrfs_fs_info
*fs_info
= trans
->fs_info
;
748 struct btrfs_root
*quota_root
= fs_info
->quota_root
;
749 struct btrfs_path
*path
;
750 struct btrfs_key key
;
751 struct extent_buffer
*l
;
752 struct btrfs_qgroup_info_item
*qgroup_info
;
756 if (btrfs_is_testing(fs_info
))
760 key
.type
= BTRFS_QGROUP_INFO_KEY
;
761 key
.offset
= qgroup
->qgroupid
;
763 path
= btrfs_alloc_path();
767 ret
= btrfs_search_slot(trans
, quota_root
, &key
, path
, 0, 1);
775 slot
= path
->slots
[0];
776 qgroup_info
= btrfs_item_ptr(l
, slot
, struct btrfs_qgroup_info_item
);
777 btrfs_set_qgroup_info_generation(l
, qgroup_info
, trans
->transid
);
778 btrfs_set_qgroup_info_rfer(l
, qgroup_info
, qgroup
->rfer
);
779 btrfs_set_qgroup_info_rfer_cmpr(l
, qgroup_info
, qgroup
->rfer_cmpr
);
780 btrfs_set_qgroup_info_excl(l
, qgroup_info
, qgroup
->excl
);
781 btrfs_set_qgroup_info_excl_cmpr(l
, qgroup_info
, qgroup
->excl_cmpr
);
783 btrfs_mark_buffer_dirty(l
);
786 btrfs_free_path(path
);
790 static int update_qgroup_status_item(struct btrfs_trans_handle
*trans
,
791 struct btrfs_fs_info
*fs_info
,
792 struct btrfs_root
*root
)
794 struct btrfs_path
*path
;
795 struct btrfs_key key
;
796 struct extent_buffer
*l
;
797 struct btrfs_qgroup_status_item
*ptr
;
802 key
.type
= BTRFS_QGROUP_STATUS_KEY
;
805 path
= btrfs_alloc_path();
809 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 1);
817 slot
= path
->slots
[0];
818 ptr
= btrfs_item_ptr(l
, slot
, struct btrfs_qgroup_status_item
);
819 btrfs_set_qgroup_status_flags(l
, ptr
, fs_info
->qgroup_flags
);
820 btrfs_set_qgroup_status_generation(l
, ptr
, trans
->transid
);
821 btrfs_set_qgroup_status_rescan(l
, ptr
,
822 fs_info
->qgroup_rescan_progress
.objectid
);
824 btrfs_mark_buffer_dirty(l
);
827 btrfs_free_path(path
);
832 * called with qgroup_lock held
834 static int btrfs_clean_quota_tree(struct btrfs_trans_handle
*trans
,
835 struct btrfs_root
*root
)
837 struct btrfs_path
*path
;
838 struct btrfs_key key
;
839 struct extent_buffer
*leaf
= NULL
;
843 path
= btrfs_alloc_path();
847 path
->leave_spinning
= 1;
854 ret
= btrfs_search_slot(trans
, root
, &key
, path
, -1, 1);
857 leaf
= path
->nodes
[0];
858 nr
= btrfs_header_nritems(leaf
);
862 * delete the leaf one by one
863 * since the whole tree is going
867 ret
= btrfs_del_items(trans
, root
, path
, 0, nr
);
871 btrfs_release_path(path
);
875 btrfs_free_path(path
);
879 int btrfs_quota_enable(struct btrfs_fs_info
*fs_info
)
881 struct btrfs_root
*quota_root
;
882 struct btrfs_root
*tree_root
= fs_info
->tree_root
;
883 struct btrfs_path
*path
= NULL
;
884 struct btrfs_qgroup_status_item
*ptr
;
885 struct extent_buffer
*leaf
;
886 struct btrfs_key key
;
887 struct btrfs_key found_key
;
888 struct btrfs_qgroup
*qgroup
= NULL
;
889 struct btrfs_trans_handle
*trans
= NULL
;
893 mutex_lock(&fs_info
->qgroup_ioctl_lock
);
894 if (fs_info
->quota_root
)
898 * 1 for quota root item
899 * 1 for BTRFS_QGROUP_STATUS item
901 * Yet we also need 2*n items for a QGROUP_INFO/QGROUP_LIMIT items
902 * per subvolume. However those are not currently reserved since it
903 * would be a lot of overkill.
905 trans
= btrfs_start_transaction(tree_root
, 2);
907 ret
= PTR_ERR(trans
);
912 fs_info
->qgroup_ulist
= ulist_alloc(GFP_KERNEL
);
913 if (!fs_info
->qgroup_ulist
) {
915 btrfs_abort_transaction(trans
, ret
);
920 * initially create the quota tree
922 quota_root
= btrfs_create_tree(trans
, fs_info
,
923 BTRFS_QUOTA_TREE_OBJECTID
);
924 if (IS_ERR(quota_root
)) {
925 ret
= PTR_ERR(quota_root
);
926 btrfs_abort_transaction(trans
, ret
);
930 path
= btrfs_alloc_path();
933 btrfs_abort_transaction(trans
, ret
);
938 key
.type
= BTRFS_QGROUP_STATUS_KEY
;
941 ret
= btrfs_insert_empty_item(trans
, quota_root
, path
, &key
,
944 btrfs_abort_transaction(trans
, ret
);
948 leaf
= path
->nodes
[0];
949 ptr
= btrfs_item_ptr(leaf
, path
->slots
[0],
950 struct btrfs_qgroup_status_item
);
951 btrfs_set_qgroup_status_generation(leaf
, ptr
, trans
->transid
);
952 btrfs_set_qgroup_status_version(leaf
, ptr
, BTRFS_QGROUP_STATUS_VERSION
);
953 fs_info
->qgroup_flags
= BTRFS_QGROUP_STATUS_FLAG_ON
|
954 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT
;
955 btrfs_set_qgroup_status_flags(leaf
, ptr
, fs_info
->qgroup_flags
);
956 btrfs_set_qgroup_status_rescan(leaf
, ptr
, 0);
958 btrfs_mark_buffer_dirty(leaf
);
961 key
.type
= BTRFS_ROOT_REF_KEY
;
964 btrfs_release_path(path
);
965 ret
= btrfs_search_slot_for_read(tree_root
, &key
, path
, 1, 0);
969 btrfs_abort_transaction(trans
, ret
);
974 slot
= path
->slots
[0];
975 leaf
= path
->nodes
[0];
976 btrfs_item_key_to_cpu(leaf
, &found_key
, slot
);
978 if (found_key
.type
== BTRFS_ROOT_REF_KEY
) {
979 ret
= add_qgroup_item(trans
, quota_root
,
982 btrfs_abort_transaction(trans
, ret
);
986 qgroup
= add_qgroup_rb(fs_info
, found_key
.offset
);
987 if (IS_ERR(qgroup
)) {
988 ret
= PTR_ERR(qgroup
);
989 btrfs_abort_transaction(trans
, ret
);
993 ret
= btrfs_next_item(tree_root
, path
);
995 btrfs_abort_transaction(trans
, ret
);
1003 btrfs_release_path(path
);
1004 ret
= add_qgroup_item(trans
, quota_root
, BTRFS_FS_TREE_OBJECTID
);
1006 btrfs_abort_transaction(trans
, ret
);
1010 qgroup
= add_qgroup_rb(fs_info
, BTRFS_FS_TREE_OBJECTID
);
1011 if (IS_ERR(qgroup
)) {
1012 ret
= PTR_ERR(qgroup
);
1013 btrfs_abort_transaction(trans
, ret
);
1016 spin_lock(&fs_info
->qgroup_lock
);
1017 fs_info
->quota_root
= quota_root
;
1018 set_bit(BTRFS_FS_QUOTA_ENABLED
, &fs_info
->flags
);
1019 spin_unlock(&fs_info
->qgroup_lock
);
1021 ret
= btrfs_commit_transaction(trans
);
1027 ret
= qgroup_rescan_init(fs_info
, 0, 1);
1029 qgroup_rescan_zero_tracking(fs_info
);
1030 btrfs_queue_work(fs_info
->qgroup_rescan_workers
,
1031 &fs_info
->qgroup_rescan_work
);
1035 btrfs_free_path(path
);
1038 free_extent_buffer(quota_root
->node
);
1039 free_extent_buffer(quota_root
->commit_root
);
1044 ulist_free(fs_info
->qgroup_ulist
);
1045 fs_info
->qgroup_ulist
= NULL
;
1047 btrfs_end_transaction(trans
);
1049 mutex_unlock(&fs_info
->qgroup_ioctl_lock
);
1053 int btrfs_quota_disable(struct btrfs_fs_info
*fs_info
)
1055 struct btrfs_root
*quota_root
;
1056 struct btrfs_trans_handle
*trans
= NULL
;
1059 mutex_lock(&fs_info
->qgroup_ioctl_lock
);
1060 if (!fs_info
->quota_root
)
1064 * 1 For the root item
1066 * We should also reserve enough items for the quota tree deletion in
1067 * btrfs_clean_quota_tree but this is not done.
1069 trans
= btrfs_start_transaction(fs_info
->tree_root
, 1);
1070 if (IS_ERR(trans
)) {
1071 ret
= PTR_ERR(trans
);
1075 clear_bit(BTRFS_FS_QUOTA_ENABLED
, &fs_info
->flags
);
1076 btrfs_qgroup_wait_for_completion(fs_info
, false);
1077 spin_lock(&fs_info
->qgroup_lock
);
1078 quota_root
= fs_info
->quota_root
;
1079 fs_info
->quota_root
= NULL
;
1080 fs_info
->qgroup_flags
&= ~BTRFS_QGROUP_STATUS_FLAG_ON
;
1081 spin_unlock(&fs_info
->qgroup_lock
);
1083 btrfs_free_qgroup_config(fs_info
);
1085 ret
= btrfs_clean_quota_tree(trans
, quota_root
);
1087 btrfs_abort_transaction(trans
, ret
);
1091 ret
= btrfs_del_root(trans
, fs_info
, "a_root
->root_key
);
1093 btrfs_abort_transaction(trans
, ret
);
1097 list_del("a_root
->dirty_list
);
1099 btrfs_tree_lock(quota_root
->node
);
1100 clean_tree_block(fs_info
, quota_root
->node
);
1101 btrfs_tree_unlock(quota_root
->node
);
1102 btrfs_free_tree_block(trans
, quota_root
, quota_root
->node
, 0, 1);
1104 free_extent_buffer(quota_root
->node
);
1105 free_extent_buffer(quota_root
->commit_root
);
1109 ret
= btrfs_end_transaction(trans
);
1111 mutex_unlock(&fs_info
->qgroup_ioctl_lock
);
1115 static void qgroup_dirty(struct btrfs_fs_info
*fs_info
,
1116 struct btrfs_qgroup
*qgroup
)
1118 if (list_empty(&qgroup
->dirty
))
1119 list_add(&qgroup
->dirty
, &fs_info
->dirty_qgroups
);
1123 * The easy accounting, we're updating qgroup relationship whose child qgroup
1124 * only has exclusive extents.
1126 * In this case, all exclsuive extents will also be exlusive for parent, so
1127 * excl/rfer just get added/removed.
1129 * So is qgroup reservation space, which should also be added/removed to
1131 * Or when child tries to release reservation space, parent will underflow its
1132 * reservation (for relationship adding case).
1134 * Caller should hold fs_info->qgroup_lock.
1136 static int __qgroup_excl_accounting(struct btrfs_fs_info
*fs_info
,
1137 struct ulist
*tmp
, u64 ref_root
,
1138 struct btrfs_qgroup
*src
, int sign
)
1140 struct btrfs_qgroup
*qgroup
;
1141 struct btrfs_qgroup_list
*glist
;
1142 struct ulist_node
*unode
;
1143 struct ulist_iterator uiter
;
1144 u64 num_bytes
= src
->excl
;
1147 qgroup
= find_qgroup_rb(fs_info
, ref_root
);
1151 qgroup
->rfer
+= sign
* num_bytes
;
1152 qgroup
->rfer_cmpr
+= sign
* num_bytes
;
1154 WARN_ON(sign
< 0 && qgroup
->excl
< num_bytes
);
1155 qgroup
->excl
+= sign
* num_bytes
;
1156 qgroup
->excl_cmpr
+= sign
* num_bytes
;
1159 qgroup_rsv_add_by_qgroup(fs_info
, qgroup
, src
);
1161 qgroup_rsv_release_by_qgroup(fs_info
, qgroup
, src
);
1163 qgroup_dirty(fs_info
, qgroup
);
1165 /* Get all of the parent groups that contain this qgroup */
1166 list_for_each_entry(glist
, &qgroup
->groups
, next_group
) {
1167 ret
= ulist_add(tmp
, glist
->group
->qgroupid
,
1168 qgroup_to_aux(glist
->group
), GFP_ATOMIC
);
1173 /* Iterate all of the parents and adjust their reference counts */
1174 ULIST_ITER_INIT(&uiter
);
1175 while ((unode
= ulist_next(tmp
, &uiter
))) {
1176 qgroup
= unode_aux_to_qgroup(unode
);
1177 qgroup
->rfer
+= sign
* num_bytes
;
1178 qgroup
->rfer_cmpr
+= sign
* num_bytes
;
1179 WARN_ON(sign
< 0 && qgroup
->excl
< num_bytes
);
1180 qgroup
->excl
+= sign
* num_bytes
;
1182 qgroup_rsv_add_by_qgroup(fs_info
, qgroup
, src
);
1184 qgroup_rsv_release_by_qgroup(fs_info
, qgroup
, src
);
1185 qgroup
->excl_cmpr
+= sign
* num_bytes
;
1186 qgroup_dirty(fs_info
, qgroup
);
1188 /* Add any parents of the parents */
1189 list_for_each_entry(glist
, &qgroup
->groups
, next_group
) {
1190 ret
= ulist_add(tmp
, glist
->group
->qgroupid
,
1191 qgroup_to_aux(glist
->group
), GFP_ATOMIC
);
1203 * Quick path for updating qgroup with only excl refs.
1205 * In that case, just update all parent will be enough.
1206 * Or we needs to do a full rescan.
1207 * Caller should also hold fs_info->qgroup_lock.
1209 * Return 0 for quick update, return >0 for need to full rescan
1210 * and mark INCONSISTENT flag.
1211 * Return < 0 for other error.
1213 static int quick_update_accounting(struct btrfs_fs_info
*fs_info
,
1214 struct ulist
*tmp
, u64 src
, u64 dst
,
1217 struct btrfs_qgroup
*qgroup
;
1221 qgroup
= find_qgroup_rb(fs_info
, src
);
1224 if (qgroup
->excl
== qgroup
->rfer
) {
1226 err
= __qgroup_excl_accounting(fs_info
, tmp
, dst
,
1235 fs_info
->qgroup_flags
|= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT
;
1239 int btrfs_add_qgroup_relation(struct btrfs_trans_handle
*trans
,
1240 struct btrfs_fs_info
*fs_info
, u64 src
, u64 dst
)
1242 struct btrfs_root
*quota_root
;
1243 struct btrfs_qgroup
*parent
;
1244 struct btrfs_qgroup
*member
;
1245 struct btrfs_qgroup_list
*list
;
1249 /* Check the level of src and dst first */
1250 if (btrfs_qgroup_level(src
) >= btrfs_qgroup_level(dst
))
1253 tmp
= ulist_alloc(GFP_KERNEL
);
1257 mutex_lock(&fs_info
->qgroup_ioctl_lock
);
1258 quota_root
= fs_info
->quota_root
;
1263 member
= find_qgroup_rb(fs_info
, src
);
1264 parent
= find_qgroup_rb(fs_info
, dst
);
1265 if (!member
|| !parent
) {
1270 /* check if such qgroup relation exist firstly */
1271 list_for_each_entry(list
, &member
->groups
, next_group
) {
1272 if (list
->group
== parent
) {
1278 ret
= add_qgroup_relation_item(trans
, src
, dst
);
1282 ret
= add_qgroup_relation_item(trans
, dst
, src
);
1284 del_qgroup_relation_item(trans
, src
, dst
);
1288 spin_lock(&fs_info
->qgroup_lock
);
1289 ret
= add_relation_rb(fs_info
, src
, dst
);
1291 spin_unlock(&fs_info
->qgroup_lock
);
1294 ret
= quick_update_accounting(fs_info
, tmp
, src
, dst
, 1);
1295 spin_unlock(&fs_info
->qgroup_lock
);
1297 mutex_unlock(&fs_info
->qgroup_ioctl_lock
);
1302 static int __del_qgroup_relation(struct btrfs_trans_handle
*trans
,
1303 struct btrfs_fs_info
*fs_info
, u64 src
, u64 dst
)
1305 struct btrfs_root
*quota_root
;
1306 struct btrfs_qgroup
*parent
;
1307 struct btrfs_qgroup
*member
;
1308 struct btrfs_qgroup_list
*list
;
1313 tmp
= ulist_alloc(GFP_KERNEL
);
1317 quota_root
= fs_info
->quota_root
;
1323 member
= find_qgroup_rb(fs_info
, src
);
1324 parent
= find_qgroup_rb(fs_info
, dst
);
1325 if (!member
|| !parent
) {
1330 /* check if such qgroup relation exist firstly */
1331 list_for_each_entry(list
, &member
->groups
, next_group
) {
1332 if (list
->group
== parent
)
1338 ret
= del_qgroup_relation_item(trans
, src
, dst
);
1339 err
= del_qgroup_relation_item(trans
, dst
, src
);
1343 spin_lock(&fs_info
->qgroup_lock
);
1344 del_relation_rb(fs_info
, src
, dst
);
1345 ret
= quick_update_accounting(fs_info
, tmp
, src
, dst
, -1);
1346 spin_unlock(&fs_info
->qgroup_lock
);
1352 int btrfs_del_qgroup_relation(struct btrfs_trans_handle
*trans
,
1353 struct btrfs_fs_info
*fs_info
, u64 src
, u64 dst
)
1357 mutex_lock(&fs_info
->qgroup_ioctl_lock
);
1358 ret
= __del_qgroup_relation(trans
, fs_info
, src
, dst
);
1359 mutex_unlock(&fs_info
->qgroup_ioctl_lock
);
1364 int btrfs_create_qgroup(struct btrfs_trans_handle
*trans
,
1365 struct btrfs_fs_info
*fs_info
, u64 qgroupid
)
1367 struct btrfs_root
*quota_root
;
1368 struct btrfs_qgroup
*qgroup
;
1371 mutex_lock(&fs_info
->qgroup_ioctl_lock
);
1372 quota_root
= fs_info
->quota_root
;
1377 qgroup
= find_qgroup_rb(fs_info
, qgroupid
);
1383 ret
= add_qgroup_item(trans
, quota_root
, qgroupid
);
1387 spin_lock(&fs_info
->qgroup_lock
);
1388 qgroup
= add_qgroup_rb(fs_info
, qgroupid
);
1389 spin_unlock(&fs_info
->qgroup_lock
);
1392 ret
= PTR_ERR(qgroup
);
1394 mutex_unlock(&fs_info
->qgroup_ioctl_lock
);
1398 int btrfs_remove_qgroup(struct btrfs_trans_handle
*trans
,
1399 struct btrfs_fs_info
*fs_info
, u64 qgroupid
)
1401 struct btrfs_root
*quota_root
;
1402 struct btrfs_qgroup
*qgroup
;
1403 struct btrfs_qgroup_list
*list
;
1406 mutex_lock(&fs_info
->qgroup_ioctl_lock
);
1407 quota_root
= fs_info
->quota_root
;
1413 qgroup
= find_qgroup_rb(fs_info
, qgroupid
);
1418 /* check if there are no children of this qgroup */
1419 if (!list_empty(&qgroup
->members
)) {
1424 ret
= del_qgroup_item(trans
, qgroupid
);
1425 if (ret
&& ret
!= -ENOENT
)
1428 while (!list_empty(&qgroup
->groups
)) {
1429 list
= list_first_entry(&qgroup
->groups
,
1430 struct btrfs_qgroup_list
, next_group
);
1431 ret
= __del_qgroup_relation(trans
, fs_info
,
1433 list
->group
->qgroupid
);
1438 spin_lock(&fs_info
->qgroup_lock
);
1439 del_qgroup_rb(fs_info
, qgroupid
);
1440 spin_unlock(&fs_info
->qgroup_lock
);
1442 mutex_unlock(&fs_info
->qgroup_ioctl_lock
);
1446 int btrfs_limit_qgroup(struct btrfs_trans_handle
*trans
,
1447 struct btrfs_fs_info
*fs_info
, u64 qgroupid
,
1448 struct btrfs_qgroup_limit
*limit
)
1450 struct btrfs_root
*quota_root
;
1451 struct btrfs_qgroup
*qgroup
;
1453 /* Sometimes we would want to clear the limit on this qgroup.
1454 * To meet this requirement, we treat the -1 as a special value
1455 * which tell kernel to clear the limit on this qgroup.
1457 const u64 CLEAR_VALUE
= -1;
1459 mutex_lock(&fs_info
->qgroup_ioctl_lock
);
1460 quota_root
= fs_info
->quota_root
;
1466 qgroup
= find_qgroup_rb(fs_info
, qgroupid
);
1472 spin_lock(&fs_info
->qgroup_lock
);
1473 if (limit
->flags
& BTRFS_QGROUP_LIMIT_MAX_RFER
) {
1474 if (limit
->max_rfer
== CLEAR_VALUE
) {
1475 qgroup
->lim_flags
&= ~BTRFS_QGROUP_LIMIT_MAX_RFER
;
1476 limit
->flags
&= ~BTRFS_QGROUP_LIMIT_MAX_RFER
;
1477 qgroup
->max_rfer
= 0;
1479 qgroup
->max_rfer
= limit
->max_rfer
;
1482 if (limit
->flags
& BTRFS_QGROUP_LIMIT_MAX_EXCL
) {
1483 if (limit
->max_excl
== CLEAR_VALUE
) {
1484 qgroup
->lim_flags
&= ~BTRFS_QGROUP_LIMIT_MAX_EXCL
;
1485 limit
->flags
&= ~BTRFS_QGROUP_LIMIT_MAX_EXCL
;
1486 qgroup
->max_excl
= 0;
1488 qgroup
->max_excl
= limit
->max_excl
;
1491 if (limit
->flags
& BTRFS_QGROUP_LIMIT_RSV_RFER
) {
1492 if (limit
->rsv_rfer
== CLEAR_VALUE
) {
1493 qgroup
->lim_flags
&= ~BTRFS_QGROUP_LIMIT_RSV_RFER
;
1494 limit
->flags
&= ~BTRFS_QGROUP_LIMIT_RSV_RFER
;
1495 qgroup
->rsv_rfer
= 0;
1497 qgroup
->rsv_rfer
= limit
->rsv_rfer
;
1500 if (limit
->flags
& BTRFS_QGROUP_LIMIT_RSV_EXCL
) {
1501 if (limit
->rsv_excl
== CLEAR_VALUE
) {
1502 qgroup
->lim_flags
&= ~BTRFS_QGROUP_LIMIT_RSV_EXCL
;
1503 limit
->flags
&= ~BTRFS_QGROUP_LIMIT_RSV_EXCL
;
1504 qgroup
->rsv_excl
= 0;
1506 qgroup
->rsv_excl
= limit
->rsv_excl
;
1509 qgroup
->lim_flags
|= limit
->flags
;
1511 spin_unlock(&fs_info
->qgroup_lock
);
1513 ret
= update_qgroup_limit_item(trans
, qgroup
);
1515 fs_info
->qgroup_flags
|= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT
;
1516 btrfs_info(fs_info
, "unable to update quota limit for %llu",
1521 mutex_unlock(&fs_info
->qgroup_ioctl_lock
);
1525 int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info
*fs_info
,
1526 struct btrfs_delayed_ref_root
*delayed_refs
,
1527 struct btrfs_qgroup_extent_record
*record
)
1529 struct rb_node
**p
= &delayed_refs
->dirty_extent_root
.rb_node
;
1530 struct rb_node
*parent_node
= NULL
;
1531 struct btrfs_qgroup_extent_record
*entry
;
1532 u64 bytenr
= record
->bytenr
;
1534 lockdep_assert_held(&delayed_refs
->lock
);
1535 trace_btrfs_qgroup_trace_extent(fs_info
, record
);
1539 entry
= rb_entry(parent_node
, struct btrfs_qgroup_extent_record
,
1541 if (bytenr
< entry
->bytenr
)
1543 else if (bytenr
> entry
->bytenr
)
1544 p
= &(*p
)->rb_right
;
1549 rb_link_node(&record
->node
, parent_node
, p
);
1550 rb_insert_color(&record
->node
, &delayed_refs
->dirty_extent_root
);
1554 int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info
*fs_info
,
1555 struct btrfs_qgroup_extent_record
*qrecord
)
1557 struct ulist
*old_root
;
1558 u64 bytenr
= qrecord
->bytenr
;
1561 ret
= btrfs_find_all_roots(NULL
, fs_info
, bytenr
, 0, &old_root
, false);
1563 fs_info
->qgroup_flags
|= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT
;
1565 "error accounting new delayed refs extent (err code: %d), quota inconsistent",
1571 * Here we don't need to get the lock of
1572 * trans->transaction->delayed_refs, since inserted qrecord won't
1573 * be deleted, only qrecord->node may be modified (new qrecord insert)
1575 * So modifying qrecord->old_roots is safe here
1577 qrecord
->old_roots
= old_root
;
1581 int btrfs_qgroup_trace_extent(struct btrfs_trans_handle
*trans
,
1582 struct btrfs_fs_info
*fs_info
, u64 bytenr
, u64 num_bytes
,
1585 struct btrfs_qgroup_extent_record
*record
;
1586 struct btrfs_delayed_ref_root
*delayed_refs
;
1589 if (!test_bit(BTRFS_FS_QUOTA_ENABLED
, &fs_info
->flags
)
1590 || bytenr
== 0 || num_bytes
== 0)
1592 if (WARN_ON(trans
== NULL
))
1594 record
= kmalloc(sizeof(*record
), gfp_flag
);
1598 delayed_refs
= &trans
->transaction
->delayed_refs
;
1599 record
->bytenr
= bytenr
;
1600 record
->num_bytes
= num_bytes
;
1601 record
->old_roots
= NULL
;
1603 spin_lock(&delayed_refs
->lock
);
1604 ret
= btrfs_qgroup_trace_extent_nolock(fs_info
, delayed_refs
, record
);
1605 spin_unlock(&delayed_refs
->lock
);
1610 return btrfs_qgroup_trace_extent_post(fs_info
, record
);
1613 int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle
*trans
,
1614 struct btrfs_fs_info
*fs_info
,
1615 struct extent_buffer
*eb
)
1617 int nr
= btrfs_header_nritems(eb
);
1618 int i
, extent_type
, ret
;
1619 struct btrfs_key key
;
1620 struct btrfs_file_extent_item
*fi
;
1621 u64 bytenr
, num_bytes
;
1623 /* We can be called directly from walk_up_proc() */
1624 if (!test_bit(BTRFS_FS_QUOTA_ENABLED
, &fs_info
->flags
))
1627 for (i
= 0; i
< nr
; i
++) {
1628 btrfs_item_key_to_cpu(eb
, &key
, i
);
1630 if (key
.type
!= BTRFS_EXTENT_DATA_KEY
)
1633 fi
= btrfs_item_ptr(eb
, i
, struct btrfs_file_extent_item
);
1634 /* filter out non qgroup-accountable extents */
1635 extent_type
= btrfs_file_extent_type(eb
, fi
);
1637 if (extent_type
== BTRFS_FILE_EXTENT_INLINE
)
1640 bytenr
= btrfs_file_extent_disk_bytenr(eb
, fi
);
1644 num_bytes
= btrfs_file_extent_disk_num_bytes(eb
, fi
);
1646 ret
= btrfs_qgroup_trace_extent(trans
, fs_info
, bytenr
,
1647 num_bytes
, GFP_NOFS
);
1656 * Walk up the tree from the bottom, freeing leaves and any interior
1657 * nodes which have had all slots visited. If a node (leaf or
1658 * interior) is freed, the node above it will have it's slot
1659 * incremented. The root node will never be freed.
1661 * At the end of this function, we should have a path which has all
1662 * slots incremented to the next position for a search. If we need to
1663 * read a new node it will be NULL and the node above it will have the
1664 * correct slot selected for a later read.
1666 * If we increment the root nodes slot counter past the number of
1667 * elements, 1 is returned to signal completion of the search.
1669 static int adjust_slots_upwards(struct btrfs_path
*path
, int root_level
)
1673 struct extent_buffer
*eb
;
1675 if (root_level
== 0)
1678 while (level
<= root_level
) {
1679 eb
= path
->nodes
[level
];
1680 nr
= btrfs_header_nritems(eb
);
1681 path
->slots
[level
]++;
1682 slot
= path
->slots
[level
];
1683 if (slot
>= nr
|| level
== 0) {
1685 * Don't free the root - we will detect this
1686 * condition after our loop and return a
1687 * positive value for caller to stop walking the tree.
1689 if (level
!= root_level
) {
1690 btrfs_tree_unlock_rw(eb
, path
->locks
[level
]);
1691 path
->locks
[level
] = 0;
1693 free_extent_buffer(eb
);
1694 path
->nodes
[level
] = NULL
;
1695 path
->slots
[level
] = 0;
1699 * We have a valid slot to walk back down
1700 * from. Stop here so caller can process these
1709 eb
= path
->nodes
[root_level
];
1710 if (path
->slots
[root_level
] >= btrfs_header_nritems(eb
))
1716 int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle
*trans
,
1717 struct btrfs_root
*root
,
1718 struct extent_buffer
*root_eb
,
1719 u64 root_gen
, int root_level
)
1721 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
1724 struct extent_buffer
*eb
= root_eb
;
1725 struct btrfs_path
*path
= NULL
;
1727 BUG_ON(root_level
< 0 || root_level
>= BTRFS_MAX_LEVEL
);
1728 BUG_ON(root_eb
== NULL
);
1730 if (!test_bit(BTRFS_FS_QUOTA_ENABLED
, &fs_info
->flags
))
1733 if (!extent_buffer_uptodate(root_eb
)) {
1734 ret
= btrfs_read_buffer(root_eb
, root_gen
, root_level
, NULL
);
1739 if (root_level
== 0) {
1740 ret
= btrfs_qgroup_trace_leaf_items(trans
, fs_info
, root_eb
);
1744 path
= btrfs_alloc_path();
1749 * Walk down the tree. Missing extent blocks are filled in as
1750 * we go. Metadata is accounted every time we read a new
1753 * When we reach a leaf, we account for file extent items in it,
1754 * walk back up the tree (adjusting slot pointers as we go)
1755 * and restart the search process.
1757 extent_buffer_get(root_eb
); /* For path */
1758 path
->nodes
[root_level
] = root_eb
;
1759 path
->slots
[root_level
] = 0;
1760 path
->locks
[root_level
] = 0; /* so release_path doesn't try to unlock */
1763 while (level
>= 0) {
1764 if (path
->nodes
[level
] == NULL
) {
1765 struct btrfs_key first_key
;
1771 * We need to get child blockptr/gen from parent before
1774 eb
= path
->nodes
[level
+ 1];
1775 parent_slot
= path
->slots
[level
+ 1];
1776 child_bytenr
= btrfs_node_blockptr(eb
, parent_slot
);
1777 child_gen
= btrfs_node_ptr_generation(eb
, parent_slot
);
1778 btrfs_node_key_to_cpu(eb
, &first_key
, parent_slot
);
1780 eb
= read_tree_block(fs_info
, child_bytenr
, child_gen
,
1785 } else if (!extent_buffer_uptodate(eb
)) {
1786 free_extent_buffer(eb
);
1791 path
->nodes
[level
] = eb
;
1792 path
->slots
[level
] = 0;
1794 btrfs_tree_read_lock(eb
);
1795 btrfs_set_lock_blocking_rw(eb
, BTRFS_READ_LOCK
);
1796 path
->locks
[level
] = BTRFS_READ_LOCK_BLOCKING
;
1798 ret
= btrfs_qgroup_trace_extent(trans
, fs_info
,
1807 ret
= btrfs_qgroup_trace_leaf_items(trans
,fs_info
,
1808 path
->nodes
[level
]);
1812 /* Nonzero return here means we completed our search */
1813 ret
= adjust_slots_upwards(path
, root_level
);
1817 /* Restart search with new slots */
1826 btrfs_free_path(path
);
1831 #define UPDATE_NEW 0
1832 #define UPDATE_OLD 1
1834 * Walk all of the roots that points to the bytenr and adjust their refcnts.
1836 static int qgroup_update_refcnt(struct btrfs_fs_info
*fs_info
,
1837 struct ulist
*roots
, struct ulist
*tmp
,
1838 struct ulist
*qgroups
, u64 seq
, int update_old
)
1840 struct ulist_node
*unode
;
1841 struct ulist_iterator uiter
;
1842 struct ulist_node
*tmp_unode
;
1843 struct ulist_iterator tmp_uiter
;
1844 struct btrfs_qgroup
*qg
;
1849 ULIST_ITER_INIT(&uiter
);
1850 while ((unode
= ulist_next(roots
, &uiter
))) {
1851 qg
= find_qgroup_rb(fs_info
, unode
->val
);
1856 ret
= ulist_add(qgroups
, qg
->qgroupid
, qgroup_to_aux(qg
),
1860 ret
= ulist_add(tmp
, qg
->qgroupid
, qgroup_to_aux(qg
), GFP_ATOMIC
);
1863 ULIST_ITER_INIT(&tmp_uiter
);
1864 while ((tmp_unode
= ulist_next(tmp
, &tmp_uiter
))) {
1865 struct btrfs_qgroup_list
*glist
;
1867 qg
= unode_aux_to_qgroup(tmp_unode
);
1869 btrfs_qgroup_update_old_refcnt(qg
, seq
, 1);
1871 btrfs_qgroup_update_new_refcnt(qg
, seq
, 1);
1872 list_for_each_entry(glist
, &qg
->groups
, next_group
) {
1873 ret
= ulist_add(qgroups
, glist
->group
->qgroupid
,
1874 qgroup_to_aux(glist
->group
),
1878 ret
= ulist_add(tmp
, glist
->group
->qgroupid
,
1879 qgroup_to_aux(glist
->group
),
1890 * Update qgroup rfer/excl counters.
1891 * Rfer update is easy, codes can explain themselves.
1893 * Excl update is tricky, the update is split into 2 part.
1894 * Part 1: Possible exclusive <-> sharing detect:
1896 * -------------------------------------
1898 * -------------------------------------
1900 * -------------------------------------
1903 * A: cur_old_roots < nr_old_roots (not exclusive before)
1904 * !A: cur_old_roots == nr_old_roots (possible exclusive before)
1905 * B: cur_new_roots < nr_new_roots (not exclusive now)
1906 * !B: cur_new_roots == nr_new_roots (possible exclusive now)
1909 * +: Possible sharing -> exclusive -: Possible exclusive -> sharing
1910 * *: Definitely not changed. **: Possible unchanged.
1912 * For !A and !B condition, the exception is cur_old/new_roots == 0 case.
1914 * To make the logic clear, we first use condition A and B to split
1915 * combination into 4 results.
1917 * Then, for result "+" and "-", check old/new_roots == 0 case, as in them
1918 * only on variant maybe 0.
1920 * Lastly, check result **, since there are 2 variants maybe 0, split them
1922 * But this time we don't need to consider other things, the codes and logic
1923 * is easy to understand now.
1925 static int qgroup_update_counters(struct btrfs_fs_info
*fs_info
,
1926 struct ulist
*qgroups
,
1929 u64 num_bytes
, u64 seq
)
1931 struct ulist_node
*unode
;
1932 struct ulist_iterator uiter
;
1933 struct btrfs_qgroup
*qg
;
1934 u64 cur_new_count
, cur_old_count
;
1936 ULIST_ITER_INIT(&uiter
);
1937 while ((unode
= ulist_next(qgroups
, &uiter
))) {
1940 qg
= unode_aux_to_qgroup(unode
);
1941 cur_old_count
= btrfs_qgroup_get_old_refcnt(qg
, seq
);
1942 cur_new_count
= btrfs_qgroup_get_new_refcnt(qg
, seq
);
1944 trace_qgroup_update_counters(fs_info
, qg
, cur_old_count
,
1947 /* Rfer update part */
1948 if (cur_old_count
== 0 && cur_new_count
> 0) {
1949 qg
->rfer
+= num_bytes
;
1950 qg
->rfer_cmpr
+= num_bytes
;
1953 if (cur_old_count
> 0 && cur_new_count
== 0) {
1954 qg
->rfer
-= num_bytes
;
1955 qg
->rfer_cmpr
-= num_bytes
;
1959 /* Excl update part */
1960 /* Exclusive/none -> shared case */
1961 if (cur_old_count
== nr_old_roots
&&
1962 cur_new_count
< nr_new_roots
) {
1963 /* Exclusive -> shared */
1964 if (cur_old_count
!= 0) {
1965 qg
->excl
-= num_bytes
;
1966 qg
->excl_cmpr
-= num_bytes
;
1971 /* Shared -> exclusive/none case */
1972 if (cur_old_count
< nr_old_roots
&&
1973 cur_new_count
== nr_new_roots
) {
1974 /* Shared->exclusive */
1975 if (cur_new_count
!= 0) {
1976 qg
->excl
+= num_bytes
;
1977 qg
->excl_cmpr
+= num_bytes
;
1982 /* Exclusive/none -> exclusive/none case */
1983 if (cur_old_count
== nr_old_roots
&&
1984 cur_new_count
== nr_new_roots
) {
1985 if (cur_old_count
== 0) {
1986 /* None -> exclusive/none */
1988 if (cur_new_count
!= 0) {
1989 /* None -> exclusive */
1990 qg
->excl
+= num_bytes
;
1991 qg
->excl_cmpr
+= num_bytes
;
1994 /* None -> none, nothing changed */
1996 /* Exclusive -> exclusive/none */
1998 if (cur_new_count
== 0) {
1999 /* Exclusive -> none */
2000 qg
->excl
-= num_bytes
;
2001 qg
->excl_cmpr
-= num_bytes
;
2004 /* Exclusive -> exclusive, nothing changed */
2009 qgroup_dirty(fs_info
, qg
);
2015 * Check if the @roots potentially is a list of fs tree roots
2017 * Return 0 for definitely not a fs/subvol tree roots ulist
2018 * Return 1 for possible fs/subvol tree roots in the list (considering an empty
2021 static int maybe_fs_roots(struct ulist
*roots
)
2023 struct ulist_node
*unode
;
2024 struct ulist_iterator uiter
;
2026 /* Empty one, still possible for fs roots */
2027 if (!roots
|| roots
->nnodes
== 0)
2030 ULIST_ITER_INIT(&uiter
);
2031 unode
= ulist_next(roots
, &uiter
);
2036 * If it contains fs tree roots, then it must belong to fs/subvol
2038 * If it contains a non-fs tree, it won't be shared with fs/subvol trees.
2040 return is_fstree(unode
->val
);
2044 btrfs_qgroup_account_extent(struct btrfs_trans_handle
*trans
,
2045 struct btrfs_fs_info
*fs_info
,
2046 u64 bytenr
, u64 num_bytes
,
2047 struct ulist
*old_roots
, struct ulist
*new_roots
)
2049 struct ulist
*qgroups
= NULL
;
2050 struct ulist
*tmp
= NULL
;
2052 u64 nr_new_roots
= 0;
2053 u64 nr_old_roots
= 0;
2056 if (!test_bit(BTRFS_FS_QUOTA_ENABLED
, &fs_info
->flags
))
2060 if (!maybe_fs_roots(new_roots
))
2062 nr_new_roots
= new_roots
->nnodes
;
2065 if (!maybe_fs_roots(old_roots
))
2067 nr_old_roots
= old_roots
->nnodes
;
2070 /* Quick exit, either not fs tree roots, or won't affect any qgroup */
2071 if (nr_old_roots
== 0 && nr_new_roots
== 0)
2074 BUG_ON(!fs_info
->quota_root
);
2076 trace_btrfs_qgroup_account_extent(fs_info
, trans
->transid
, bytenr
,
2077 num_bytes
, nr_old_roots
, nr_new_roots
);
2079 qgroups
= ulist_alloc(GFP_NOFS
);
2084 tmp
= ulist_alloc(GFP_NOFS
);
2090 mutex_lock(&fs_info
->qgroup_rescan_lock
);
2091 if (fs_info
->qgroup_flags
& BTRFS_QGROUP_STATUS_FLAG_RESCAN
) {
2092 if (fs_info
->qgroup_rescan_progress
.objectid
<= bytenr
) {
2093 mutex_unlock(&fs_info
->qgroup_rescan_lock
);
2098 mutex_unlock(&fs_info
->qgroup_rescan_lock
);
2100 spin_lock(&fs_info
->qgroup_lock
);
2101 seq
= fs_info
->qgroup_seq
;
2103 /* Update old refcnts using old_roots */
2104 ret
= qgroup_update_refcnt(fs_info
, old_roots
, tmp
, qgroups
, seq
,
2109 /* Update new refcnts using new_roots */
2110 ret
= qgroup_update_refcnt(fs_info
, new_roots
, tmp
, qgroups
, seq
,
2115 qgroup_update_counters(fs_info
, qgroups
, nr_old_roots
, nr_new_roots
,
2119 * Bump qgroup_seq to avoid seq overlap
2121 fs_info
->qgroup_seq
+= max(nr_old_roots
, nr_new_roots
) + 1;
2123 spin_unlock(&fs_info
->qgroup_lock
);
2126 ulist_free(qgroups
);
2127 ulist_free(old_roots
);
2128 ulist_free(new_roots
);
2132 int btrfs_qgroup_account_extents(struct btrfs_trans_handle
*trans
)
2134 struct btrfs_fs_info
*fs_info
= trans
->fs_info
;
2135 struct btrfs_qgroup_extent_record
*record
;
2136 struct btrfs_delayed_ref_root
*delayed_refs
;
2137 struct ulist
*new_roots
= NULL
;
2138 struct rb_node
*node
;
2142 delayed_refs
= &trans
->transaction
->delayed_refs
;
2143 qgroup_to_skip
= delayed_refs
->qgroup_to_skip
;
2144 while ((node
= rb_first(&delayed_refs
->dirty_extent_root
))) {
2145 record
= rb_entry(node
, struct btrfs_qgroup_extent_record
,
2148 trace_btrfs_qgroup_account_extents(fs_info
, record
);
2152 * Old roots should be searched when inserting qgroup
2155 if (WARN_ON(!record
->old_roots
)) {
2156 /* Search commit root to find old_roots */
2157 ret
= btrfs_find_all_roots(NULL
, fs_info
,
2159 &record
->old_roots
, false);
2165 * Use SEQ_LAST as time_seq to do special search, which
2166 * doesn't lock tree or delayed_refs and search current
2167 * root. It's safe inside commit_transaction().
2169 ret
= btrfs_find_all_roots(trans
, fs_info
,
2170 record
->bytenr
, SEQ_LAST
, &new_roots
, false);
2173 if (qgroup_to_skip
) {
2174 ulist_del(new_roots
, qgroup_to_skip
, 0);
2175 ulist_del(record
->old_roots
, qgroup_to_skip
,
2178 ret
= btrfs_qgroup_account_extent(trans
, fs_info
,
2179 record
->bytenr
, record
->num_bytes
,
2180 record
->old_roots
, new_roots
);
2181 record
->old_roots
= NULL
;
2185 ulist_free(record
->old_roots
);
2186 ulist_free(new_roots
);
2188 rb_erase(node
, &delayed_refs
->dirty_extent_root
);
2196 * called from commit_transaction. Writes all changed qgroups to disk.
2198 int btrfs_run_qgroups(struct btrfs_trans_handle
*trans
,
2199 struct btrfs_fs_info
*fs_info
)
2201 struct btrfs_root
*quota_root
= fs_info
->quota_root
;
2207 spin_lock(&fs_info
->qgroup_lock
);
2208 while (!list_empty(&fs_info
->dirty_qgroups
)) {
2209 struct btrfs_qgroup
*qgroup
;
2210 qgroup
= list_first_entry(&fs_info
->dirty_qgroups
,
2211 struct btrfs_qgroup
, dirty
);
2212 list_del_init(&qgroup
->dirty
);
2213 spin_unlock(&fs_info
->qgroup_lock
);
2214 ret
= update_qgroup_info_item(trans
, qgroup
);
2216 fs_info
->qgroup_flags
|=
2217 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT
;
2218 ret
= update_qgroup_limit_item(trans
, qgroup
);
2220 fs_info
->qgroup_flags
|=
2221 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT
;
2222 spin_lock(&fs_info
->qgroup_lock
);
2224 if (test_bit(BTRFS_FS_QUOTA_ENABLED
, &fs_info
->flags
))
2225 fs_info
->qgroup_flags
|= BTRFS_QGROUP_STATUS_FLAG_ON
;
2227 fs_info
->qgroup_flags
&= ~BTRFS_QGROUP_STATUS_FLAG_ON
;
2228 spin_unlock(&fs_info
->qgroup_lock
);
2230 ret
= update_qgroup_status_item(trans
, fs_info
, quota_root
);
2232 fs_info
->qgroup_flags
|= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT
;
2238 * Copy the accounting information between qgroups. This is necessary
2239 * when a snapshot or a subvolume is created. Throwing an error will
2240 * cause a transaction abort so we take extra care here to only error
2241 * when a readonly fs is a reasonable outcome.
2243 int btrfs_qgroup_inherit(struct btrfs_trans_handle
*trans
,
2244 struct btrfs_fs_info
*fs_info
, u64 srcid
, u64 objectid
,
2245 struct btrfs_qgroup_inherit
*inherit
)
2250 struct btrfs_root
*quota_root
= fs_info
->quota_root
;
2251 struct btrfs_qgroup
*srcgroup
;
2252 struct btrfs_qgroup
*dstgroup
;
2256 mutex_lock(&fs_info
->qgroup_ioctl_lock
);
2257 if (!test_bit(BTRFS_FS_QUOTA_ENABLED
, &fs_info
->flags
))
2266 i_qgroups
= (u64
*)(inherit
+ 1);
2267 nums
= inherit
->num_qgroups
+ 2 * inherit
->num_ref_copies
+
2268 2 * inherit
->num_excl_copies
;
2269 for (i
= 0; i
< nums
; ++i
) {
2270 srcgroup
= find_qgroup_rb(fs_info
, *i_qgroups
);
2273 * Zero out invalid groups so we can ignore
2277 ((srcgroup
->qgroupid
>> 48) <= (objectid
>> 48)))
2285 * create a tracking group for the subvol itself
2287 ret
= add_qgroup_item(trans
, quota_root
, objectid
);
2292 * add qgroup to all inherited groups
2295 i_qgroups
= (u64
*)(inherit
+ 1);
2296 for (i
= 0; i
< inherit
->num_qgroups
; ++i
, ++i_qgroups
) {
2297 if (*i_qgroups
== 0)
2299 ret
= add_qgroup_relation_item(trans
, objectid
,
2301 if (ret
&& ret
!= -EEXIST
)
2303 ret
= add_qgroup_relation_item(trans
, *i_qgroups
,
2305 if (ret
&& ret
!= -EEXIST
)
2312 spin_lock(&fs_info
->qgroup_lock
);
2314 dstgroup
= add_qgroup_rb(fs_info
, objectid
);
2315 if (IS_ERR(dstgroup
)) {
2316 ret
= PTR_ERR(dstgroup
);
2320 if (inherit
&& inherit
->flags
& BTRFS_QGROUP_INHERIT_SET_LIMITS
) {
2321 dstgroup
->lim_flags
= inherit
->lim
.flags
;
2322 dstgroup
->max_rfer
= inherit
->lim
.max_rfer
;
2323 dstgroup
->max_excl
= inherit
->lim
.max_excl
;
2324 dstgroup
->rsv_rfer
= inherit
->lim
.rsv_rfer
;
2325 dstgroup
->rsv_excl
= inherit
->lim
.rsv_excl
;
2327 ret
= update_qgroup_limit_item(trans
, dstgroup
);
2329 fs_info
->qgroup_flags
|= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT
;
2331 "unable to update quota limit for %llu",
2332 dstgroup
->qgroupid
);
2338 srcgroup
= find_qgroup_rb(fs_info
, srcid
);
2343 * We call inherit after we clone the root in order to make sure
2344 * our counts don't go crazy, so at this point the only
2345 * difference between the two roots should be the root node.
2347 level_size
= fs_info
->nodesize
;
2348 dstgroup
->rfer
= srcgroup
->rfer
;
2349 dstgroup
->rfer_cmpr
= srcgroup
->rfer_cmpr
;
2350 dstgroup
->excl
= level_size
;
2351 dstgroup
->excl_cmpr
= level_size
;
2352 srcgroup
->excl
= level_size
;
2353 srcgroup
->excl_cmpr
= level_size
;
2355 /* inherit the limit info */
2356 dstgroup
->lim_flags
= srcgroup
->lim_flags
;
2357 dstgroup
->max_rfer
= srcgroup
->max_rfer
;
2358 dstgroup
->max_excl
= srcgroup
->max_excl
;
2359 dstgroup
->rsv_rfer
= srcgroup
->rsv_rfer
;
2360 dstgroup
->rsv_excl
= srcgroup
->rsv_excl
;
2362 qgroup_dirty(fs_info
, dstgroup
);
2363 qgroup_dirty(fs_info
, srcgroup
);
2369 i_qgroups
= (u64
*)(inherit
+ 1);
2370 for (i
= 0; i
< inherit
->num_qgroups
; ++i
) {
2372 ret
= add_relation_rb(fs_info
, objectid
, *i_qgroups
);
2379 for (i
= 0; i
< inherit
->num_ref_copies
; ++i
, i_qgroups
+= 2) {
2380 struct btrfs_qgroup
*src
;
2381 struct btrfs_qgroup
*dst
;
2383 if (!i_qgroups
[0] || !i_qgroups
[1])
2386 src
= find_qgroup_rb(fs_info
, i_qgroups
[0]);
2387 dst
= find_qgroup_rb(fs_info
, i_qgroups
[1]);
2394 dst
->rfer
= src
->rfer
- level_size
;
2395 dst
->rfer_cmpr
= src
->rfer_cmpr
- level_size
;
2397 for (i
= 0; i
< inherit
->num_excl_copies
; ++i
, i_qgroups
+= 2) {
2398 struct btrfs_qgroup
*src
;
2399 struct btrfs_qgroup
*dst
;
2401 if (!i_qgroups
[0] || !i_qgroups
[1])
2404 src
= find_qgroup_rb(fs_info
, i_qgroups
[0]);
2405 dst
= find_qgroup_rb(fs_info
, i_qgroups
[1]);
2412 dst
->excl
= src
->excl
+ level_size
;
2413 dst
->excl_cmpr
= src
->excl_cmpr
+ level_size
;
2417 spin_unlock(&fs_info
->qgroup_lock
);
2419 mutex_unlock(&fs_info
->qgroup_ioctl_lock
);
2424 * Two limits to commit transaction in advance.
2426 * For RATIO, it will be 1/RATIO of the remaining limit
2427 * (excluding data and prealloc meta) as threshold.
2428 * For SIZE, it will be in byte unit as threshold.
2430 #define QGROUP_PERTRANS_RATIO 32
2431 #define QGROUP_PERTRANS_SIZE SZ_32M
2432 static bool qgroup_check_limits(struct btrfs_fs_info
*fs_info
,
2433 const struct btrfs_qgroup
*qg
, u64 num_bytes
)
2438 if ((qg
->lim_flags
& BTRFS_QGROUP_LIMIT_MAX_RFER
) &&
2439 qgroup_rsv_total(qg
) + (s64
)qg
->rfer
+ num_bytes
> qg
->max_rfer
)
2442 if ((qg
->lim_flags
& BTRFS_QGROUP_LIMIT_MAX_EXCL
) &&
2443 qgroup_rsv_total(qg
) + (s64
)qg
->excl
+ num_bytes
> qg
->max_excl
)
2447 * Even if we passed the check, it's better to check if reservation
2448 * for meta_pertrans is pushing us near limit.
2449 * If there is too much pertrans reservation or it's near the limit,
2450 * let's try commit transaction to free some, using transaction_kthread
2452 if ((qg
->lim_flags
& (BTRFS_QGROUP_LIMIT_MAX_RFER
|
2453 BTRFS_QGROUP_LIMIT_MAX_EXCL
))) {
2454 if (qg
->lim_flags
& BTRFS_QGROUP_LIMIT_MAX_EXCL
)
2455 limit
= qg
->max_excl
;
2457 limit
= qg
->max_rfer
;
2458 threshold
= (limit
- qg
->rsv
.values
[BTRFS_QGROUP_RSV_DATA
] -
2459 qg
->rsv
.values
[BTRFS_QGROUP_RSV_META_PREALLOC
]) /
2460 QGROUP_PERTRANS_RATIO
;
2461 threshold
= min_t(u64
, threshold
, QGROUP_PERTRANS_SIZE
);
2464 * Use transaction_kthread to commit transaction, so we no
2465 * longer need to bother nested transaction nor lock context.
2467 if (qg
->rsv
.values
[BTRFS_QGROUP_RSV_META_PERTRANS
] > threshold
)
2468 btrfs_commit_transaction_locksafe(fs_info
);
2474 static int qgroup_reserve(struct btrfs_root
*root
, u64 num_bytes
, bool enforce
,
2475 enum btrfs_qgroup_rsv_type type
)
2477 struct btrfs_root
*quota_root
;
2478 struct btrfs_qgroup
*qgroup
;
2479 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
2480 u64 ref_root
= root
->root_key
.objectid
;
2482 struct ulist_node
*unode
;
2483 struct ulist_iterator uiter
;
2485 if (!is_fstree(ref_root
))
2491 if (test_bit(BTRFS_FS_QUOTA_OVERRIDE
, &fs_info
->flags
) &&
2492 capable(CAP_SYS_RESOURCE
))
2495 spin_lock(&fs_info
->qgroup_lock
);
2496 quota_root
= fs_info
->quota_root
;
2500 qgroup
= find_qgroup_rb(fs_info
, ref_root
);
2505 * in a first step, we check all affected qgroups if any limits would
2508 ulist_reinit(fs_info
->qgroup_ulist
);
2509 ret
= ulist_add(fs_info
->qgroup_ulist
, qgroup
->qgroupid
,
2510 qgroup_to_aux(qgroup
), GFP_ATOMIC
);
2513 ULIST_ITER_INIT(&uiter
);
2514 while ((unode
= ulist_next(fs_info
->qgroup_ulist
, &uiter
))) {
2515 struct btrfs_qgroup
*qg
;
2516 struct btrfs_qgroup_list
*glist
;
2518 qg
= unode_aux_to_qgroup(unode
);
2520 if (enforce
&& !qgroup_check_limits(fs_info
, qg
, num_bytes
)) {
2525 list_for_each_entry(glist
, &qg
->groups
, next_group
) {
2526 ret
= ulist_add(fs_info
->qgroup_ulist
,
2527 glist
->group
->qgroupid
,
2528 qgroup_to_aux(glist
->group
), GFP_ATOMIC
);
2535 * no limits exceeded, now record the reservation into all qgroups
2537 ULIST_ITER_INIT(&uiter
);
2538 while ((unode
= ulist_next(fs_info
->qgroup_ulist
, &uiter
))) {
2539 struct btrfs_qgroup
*qg
;
2541 qg
= unode_aux_to_qgroup(unode
);
2543 trace_qgroup_update_reserve(fs_info
, qg
, num_bytes
, type
);
2544 qgroup_rsv_add(fs_info
, qg
, num_bytes
, type
);
2548 spin_unlock(&fs_info
->qgroup_lock
);
2553 * Free @num_bytes of reserved space with @type for qgroup. (Normally level 0
2556 * Will handle all higher level qgroup too.
2558 * NOTE: If @num_bytes is (u64)-1, this means to free all bytes of this qgroup.
2559 * This special case is only used for META_PERTRANS type.
2561 void btrfs_qgroup_free_refroot(struct btrfs_fs_info
*fs_info
,
2562 u64 ref_root
, u64 num_bytes
,
2563 enum btrfs_qgroup_rsv_type type
)
2565 struct btrfs_root
*quota_root
;
2566 struct btrfs_qgroup
*qgroup
;
2567 struct ulist_node
*unode
;
2568 struct ulist_iterator uiter
;
2571 if (!is_fstree(ref_root
))
2577 if (num_bytes
== (u64
)-1 && type
!= BTRFS_QGROUP_RSV_META_PERTRANS
) {
2578 WARN(1, "%s: Invalid type to free", __func__
);
2581 spin_lock(&fs_info
->qgroup_lock
);
2583 quota_root
= fs_info
->quota_root
;
2587 qgroup
= find_qgroup_rb(fs_info
, ref_root
);
2591 if (num_bytes
== (u64
)-1)
2593 * We're freeing all pertrans rsv, get reserved value from
2594 * level 0 qgroup as real num_bytes to free.
2596 num_bytes
= qgroup
->rsv
.values
[type
];
2598 ulist_reinit(fs_info
->qgroup_ulist
);
2599 ret
= ulist_add(fs_info
->qgroup_ulist
, qgroup
->qgroupid
,
2600 qgroup_to_aux(qgroup
), GFP_ATOMIC
);
2603 ULIST_ITER_INIT(&uiter
);
2604 while ((unode
= ulist_next(fs_info
->qgroup_ulist
, &uiter
))) {
2605 struct btrfs_qgroup
*qg
;
2606 struct btrfs_qgroup_list
*glist
;
2608 qg
= unode_aux_to_qgroup(unode
);
2610 trace_qgroup_update_reserve(fs_info
, qg
, -(s64
)num_bytes
, type
);
2611 qgroup_rsv_release(fs_info
, qg
, num_bytes
, type
);
2613 list_for_each_entry(glist
, &qg
->groups
, next_group
) {
2614 ret
= ulist_add(fs_info
->qgroup_ulist
,
2615 glist
->group
->qgroupid
,
2616 qgroup_to_aux(glist
->group
), GFP_ATOMIC
);
2623 spin_unlock(&fs_info
->qgroup_lock
);
2627 * Check if the leaf is the last leaf. Which means all node pointers
2628 * are at their last position.
2630 static bool is_last_leaf(struct btrfs_path
*path
)
2634 for (i
= 1; i
< BTRFS_MAX_LEVEL
&& path
->nodes
[i
]; i
++) {
2635 if (path
->slots
[i
] != btrfs_header_nritems(path
->nodes
[i
]) - 1)
2642 * returns < 0 on error, 0 when more leafs are to be scanned.
2643 * returns 1 when done.
2646 qgroup_rescan_leaf(struct btrfs_fs_info
*fs_info
, struct btrfs_path
*path
,
2647 struct btrfs_trans_handle
*trans
)
2649 struct btrfs_key found
;
2650 struct extent_buffer
*scratch_leaf
= NULL
;
2651 struct ulist
*roots
= NULL
;
2657 mutex_lock(&fs_info
->qgroup_rescan_lock
);
2658 ret
= btrfs_search_slot_for_read(fs_info
->extent_root
,
2659 &fs_info
->qgroup_rescan_progress
,
2662 btrfs_debug(fs_info
,
2663 "current progress key (%llu %u %llu), search_slot ret %d",
2664 fs_info
->qgroup_rescan_progress
.objectid
,
2665 fs_info
->qgroup_rescan_progress
.type
,
2666 fs_info
->qgroup_rescan_progress
.offset
, ret
);
2670 * The rescan is about to end, we will not be scanning any
2671 * further blocks. We cannot unset the RESCAN flag here, because
2672 * we want to commit the transaction if everything went well.
2673 * To make the live accounting work in this phase, we set our
2674 * scan progress pointer such that every real extent objectid
2677 fs_info
->qgroup_rescan_progress
.objectid
= (u64
)-1;
2678 btrfs_release_path(path
);
2679 mutex_unlock(&fs_info
->qgroup_rescan_lock
);
2682 done
= is_last_leaf(path
);
2684 btrfs_item_key_to_cpu(path
->nodes
[0], &found
,
2685 btrfs_header_nritems(path
->nodes
[0]) - 1);
2686 fs_info
->qgroup_rescan_progress
.objectid
= found
.objectid
+ 1;
2688 scratch_leaf
= btrfs_clone_extent_buffer(path
->nodes
[0]);
2689 if (!scratch_leaf
) {
2691 mutex_unlock(&fs_info
->qgroup_rescan_lock
);
2694 extent_buffer_get(scratch_leaf
);
2695 btrfs_tree_read_lock(scratch_leaf
);
2696 btrfs_set_lock_blocking_rw(scratch_leaf
, BTRFS_READ_LOCK
);
2697 slot
= path
->slots
[0];
2698 btrfs_release_path(path
);
2699 mutex_unlock(&fs_info
->qgroup_rescan_lock
);
2701 for (; slot
< btrfs_header_nritems(scratch_leaf
); ++slot
) {
2702 btrfs_item_key_to_cpu(scratch_leaf
, &found
, slot
);
2703 if (found
.type
!= BTRFS_EXTENT_ITEM_KEY
&&
2704 found
.type
!= BTRFS_METADATA_ITEM_KEY
)
2706 if (found
.type
== BTRFS_METADATA_ITEM_KEY
)
2707 num_bytes
= fs_info
->nodesize
;
2709 num_bytes
= found
.offset
;
2711 ret
= btrfs_find_all_roots(NULL
, fs_info
, found
.objectid
, 0,
2715 /* For rescan, just pass old_roots as NULL */
2716 ret
= btrfs_qgroup_account_extent(trans
, fs_info
,
2717 found
.objectid
, num_bytes
, NULL
, roots
);
2723 btrfs_tree_read_unlock_blocking(scratch_leaf
);
2724 free_extent_buffer(scratch_leaf
);
2729 fs_info
->qgroup_rescan_progress
.objectid
= (u64
)-1;
2734 static void btrfs_qgroup_rescan_worker(struct btrfs_work
*work
)
2736 struct btrfs_fs_info
*fs_info
= container_of(work
, struct btrfs_fs_info
,
2737 qgroup_rescan_work
);
2738 struct btrfs_path
*path
;
2739 struct btrfs_trans_handle
*trans
= NULL
;
2743 path
= btrfs_alloc_path();
2747 * Rescan should only search for commit root, and any later difference
2748 * should be recorded by qgroup
2750 path
->search_commit_root
= 1;
2751 path
->skip_locking
= 1;
2754 while (!err
&& !btrfs_fs_closing(fs_info
)) {
2755 trans
= btrfs_start_transaction(fs_info
->fs_root
, 0);
2756 if (IS_ERR(trans
)) {
2757 err
= PTR_ERR(trans
);
2760 if (!test_bit(BTRFS_FS_QUOTA_ENABLED
, &fs_info
->flags
)) {
2763 err
= qgroup_rescan_leaf(fs_info
, path
, trans
);
2766 btrfs_commit_transaction(trans
);
2768 btrfs_end_transaction(trans
);
2772 btrfs_free_path(path
);
2774 mutex_lock(&fs_info
->qgroup_rescan_lock
);
2775 if (!btrfs_fs_closing(fs_info
))
2776 fs_info
->qgroup_flags
&= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN
;
2779 fs_info
->qgroup_flags
& BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT
) {
2780 fs_info
->qgroup_flags
&= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT
;
2781 } else if (err
< 0) {
2782 fs_info
->qgroup_flags
|= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT
;
2784 mutex_unlock(&fs_info
->qgroup_rescan_lock
);
2787 * only update status, since the previous part has already updated the
2790 trans
= btrfs_start_transaction(fs_info
->quota_root
, 1);
2791 if (IS_ERR(trans
)) {
2792 err
= PTR_ERR(trans
);
2794 "fail to start transaction for status update: %d",
2798 ret
= update_qgroup_status_item(trans
, fs_info
, fs_info
->quota_root
);
2801 btrfs_err(fs_info
, "fail to update qgroup status: %d", err
);
2803 btrfs_end_transaction(trans
);
2805 if (btrfs_fs_closing(fs_info
)) {
2806 btrfs_info(fs_info
, "qgroup scan paused");
2807 } else if (err
>= 0) {
2808 btrfs_info(fs_info
, "qgroup scan completed%s",
2809 err
> 0 ? " (inconsistency flag cleared)" : "");
2811 btrfs_err(fs_info
, "qgroup scan failed with %d", err
);
2815 mutex_lock(&fs_info
->qgroup_rescan_lock
);
2816 fs_info
->qgroup_rescan_running
= false;
2817 mutex_unlock(&fs_info
->qgroup_rescan_lock
);
2818 complete_all(&fs_info
->qgroup_rescan_completion
);
2822 * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all
2823 * memory required for the rescan context.
2826 qgroup_rescan_init(struct btrfs_fs_info
*fs_info
, u64 progress_objectid
,
2832 /* we're resuming qgroup rescan at mount time */
2833 if (!(fs_info
->qgroup_flags
&
2834 BTRFS_QGROUP_STATUS_FLAG_RESCAN
)) {
2836 "qgroup rescan init failed, qgroup is not enabled");
2838 } else if (!(fs_info
->qgroup_flags
&
2839 BTRFS_QGROUP_STATUS_FLAG_ON
)) {
2841 "qgroup rescan init failed, qgroup rescan is not queued");
2849 mutex_lock(&fs_info
->qgroup_rescan_lock
);
2850 spin_lock(&fs_info
->qgroup_lock
);
2853 if (fs_info
->qgroup_flags
& BTRFS_QGROUP_STATUS_FLAG_RESCAN
) {
2855 "qgroup rescan is already in progress");
2857 } else if (!(fs_info
->qgroup_flags
&
2858 BTRFS_QGROUP_STATUS_FLAG_ON
)) {
2860 "qgroup rescan init failed, qgroup is not enabled");
2865 spin_unlock(&fs_info
->qgroup_lock
);
2866 mutex_unlock(&fs_info
->qgroup_rescan_lock
);
2869 fs_info
->qgroup_flags
|= BTRFS_QGROUP_STATUS_FLAG_RESCAN
;
2872 memset(&fs_info
->qgroup_rescan_progress
, 0,
2873 sizeof(fs_info
->qgroup_rescan_progress
));
2874 fs_info
->qgroup_rescan_progress
.objectid
= progress_objectid
;
2875 init_completion(&fs_info
->qgroup_rescan_completion
);
2876 fs_info
->qgroup_rescan_running
= true;
2878 spin_unlock(&fs_info
->qgroup_lock
);
2879 mutex_unlock(&fs_info
->qgroup_rescan_lock
);
2881 memset(&fs_info
->qgroup_rescan_work
, 0,
2882 sizeof(fs_info
->qgroup_rescan_work
));
2883 btrfs_init_work(&fs_info
->qgroup_rescan_work
,
2884 btrfs_qgroup_rescan_helper
,
2885 btrfs_qgroup_rescan_worker
, NULL
, NULL
);
2890 qgroup_rescan_zero_tracking(struct btrfs_fs_info
*fs_info
)
2893 struct btrfs_qgroup
*qgroup
;
2895 spin_lock(&fs_info
->qgroup_lock
);
2896 /* clear all current qgroup tracking information */
2897 for (n
= rb_first(&fs_info
->qgroup_tree
); n
; n
= rb_next(n
)) {
2898 qgroup
= rb_entry(n
, struct btrfs_qgroup
, node
);
2900 qgroup
->rfer_cmpr
= 0;
2902 qgroup
->excl_cmpr
= 0;
2904 spin_unlock(&fs_info
->qgroup_lock
);
2908 btrfs_qgroup_rescan(struct btrfs_fs_info
*fs_info
)
2911 struct btrfs_trans_handle
*trans
;
2913 ret
= qgroup_rescan_init(fs_info
, 0, 1);
2918 * We have set the rescan_progress to 0, which means no more
2919 * delayed refs will be accounted by btrfs_qgroup_account_ref.
2920 * However, btrfs_qgroup_account_ref may be right after its call
2921 * to btrfs_find_all_roots, in which case it would still do the
2923 * To solve this, we're committing the transaction, which will
2924 * ensure we run all delayed refs and only after that, we are
2925 * going to clear all tracking information for a clean start.
2928 trans
= btrfs_join_transaction(fs_info
->fs_root
);
2929 if (IS_ERR(trans
)) {
2930 fs_info
->qgroup_flags
&= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN
;
2931 return PTR_ERR(trans
);
2933 ret
= btrfs_commit_transaction(trans
);
2935 fs_info
->qgroup_flags
&= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN
;
2939 qgroup_rescan_zero_tracking(fs_info
);
2941 btrfs_queue_work(fs_info
->qgroup_rescan_workers
,
2942 &fs_info
->qgroup_rescan_work
);
2947 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info
*fs_info
,
2953 mutex_lock(&fs_info
->qgroup_rescan_lock
);
2954 spin_lock(&fs_info
->qgroup_lock
);
2955 running
= fs_info
->qgroup_rescan_running
;
2956 spin_unlock(&fs_info
->qgroup_lock
);
2957 mutex_unlock(&fs_info
->qgroup_rescan_lock
);
2963 ret
= wait_for_completion_interruptible(
2964 &fs_info
->qgroup_rescan_completion
);
2966 wait_for_completion(&fs_info
->qgroup_rescan_completion
);
2972 * this is only called from open_ctree where we're still single threaded, thus
2973 * locking is omitted here.
2976 btrfs_qgroup_rescan_resume(struct btrfs_fs_info
*fs_info
)
2978 if (fs_info
->qgroup_flags
& BTRFS_QGROUP_STATUS_FLAG_RESCAN
)
2979 btrfs_queue_work(fs_info
->qgroup_rescan_workers
,
2980 &fs_info
->qgroup_rescan_work
);
2984 * Reserve qgroup space for range [start, start + len).
2986 * This function will either reserve space from related qgroups or doing
2987 * nothing if the range is already reserved.
2989 * Return 0 for successful reserve
2990 * Return <0 for error (including -EQUOT)
2992 * NOTE: this function may sleep for memory allocation.
2993 * if btrfs_qgroup_reserve_data() is called multiple times with
2994 * same @reserved, caller must ensure when error happens it's OK
2995 * to free *ALL* reserved space.
2997 int btrfs_qgroup_reserve_data(struct inode
*inode
,
2998 struct extent_changeset
**reserved_ret
, u64 start
,
3001 struct btrfs_root
*root
= BTRFS_I(inode
)->root
;
3002 struct ulist_node
*unode
;
3003 struct ulist_iterator uiter
;
3004 struct extent_changeset
*reserved
;
3009 if (!test_bit(BTRFS_FS_QUOTA_ENABLED
, &root
->fs_info
->flags
) ||
3010 !is_fstree(root
->objectid
) || len
== 0)
3013 /* @reserved parameter is mandatory for qgroup */
3014 if (WARN_ON(!reserved_ret
))
3016 if (!*reserved_ret
) {
3017 *reserved_ret
= extent_changeset_alloc();
3021 reserved
= *reserved_ret
;
3022 /* Record already reserved space */
3023 orig_reserved
= reserved
->bytes_changed
;
3024 ret
= set_record_extent_bits(&BTRFS_I(inode
)->io_tree
, start
,
3025 start
+ len
-1, EXTENT_QGROUP_RESERVED
, reserved
);
3027 /* Newly reserved space */
3028 to_reserve
= reserved
->bytes_changed
- orig_reserved
;
3029 trace_btrfs_qgroup_reserve_data(inode
, start
, len
,
3030 to_reserve
, QGROUP_RESERVE
);
3033 ret
= qgroup_reserve(root
, to_reserve
, true, BTRFS_QGROUP_RSV_DATA
);
3040 /* cleanup *ALL* already reserved ranges */
3041 ULIST_ITER_INIT(&uiter
);
3042 while ((unode
= ulist_next(&reserved
->range_changed
, &uiter
)))
3043 clear_extent_bit(&BTRFS_I(inode
)->io_tree
, unode
->val
,
3044 unode
->aux
, EXTENT_QGROUP_RESERVED
, 0, 0, NULL
);
3045 extent_changeset_release(reserved
);
3049 /* Free ranges specified by @reserved, normally in error path */
3050 static int qgroup_free_reserved_data(struct inode
*inode
,
3051 struct extent_changeset
*reserved
, u64 start
, u64 len
)
3053 struct btrfs_root
*root
= BTRFS_I(inode
)->root
;
3054 struct ulist_node
*unode
;
3055 struct ulist_iterator uiter
;
3056 struct extent_changeset changeset
;
3060 extent_changeset_init(&changeset
);
3061 len
= round_up(start
+ len
, root
->fs_info
->sectorsize
);
3062 start
= round_down(start
, root
->fs_info
->sectorsize
);
3064 ULIST_ITER_INIT(&uiter
);
3065 while ((unode
= ulist_next(&reserved
->range_changed
, &uiter
))) {
3066 u64 range_start
= unode
->val
;
3067 /* unode->aux is the inclusive end */
3068 u64 range_len
= unode
->aux
- range_start
+ 1;
3072 extent_changeset_release(&changeset
);
3074 /* Only free range in range [start, start + len) */
3075 if (range_start
>= start
+ len
||
3076 range_start
+ range_len
<= start
)
3078 free_start
= max(range_start
, start
);
3079 free_len
= min(start
+ len
, range_start
+ range_len
) -
3082 * TODO: To also modify reserved->ranges_reserved to reflect
3085 * However as long as we free qgroup reserved according to
3086 * EXTENT_QGROUP_RESERVED, we won't double free.
3087 * So not need to rush.
3089 ret
= clear_record_extent_bits(&BTRFS_I(inode
)->io_failure_tree
,
3090 free_start
, free_start
+ free_len
- 1,
3091 EXTENT_QGROUP_RESERVED
, &changeset
);
3094 freed
+= changeset
.bytes_changed
;
3096 btrfs_qgroup_free_refroot(root
->fs_info
, root
->objectid
, freed
,
3097 BTRFS_QGROUP_RSV_DATA
);
3100 extent_changeset_release(&changeset
);
3104 static int __btrfs_qgroup_release_data(struct inode
*inode
,
3105 struct extent_changeset
*reserved
, u64 start
, u64 len
,
3108 struct extent_changeset changeset
;
3109 int trace_op
= QGROUP_RELEASE
;
3112 /* In release case, we shouldn't have @reserved */
3113 WARN_ON(!free
&& reserved
);
3114 if (free
&& reserved
)
3115 return qgroup_free_reserved_data(inode
, reserved
, start
, len
);
3116 extent_changeset_init(&changeset
);
3117 ret
= clear_record_extent_bits(&BTRFS_I(inode
)->io_tree
, start
,
3118 start
+ len
-1, EXTENT_QGROUP_RESERVED
, &changeset
);
3123 trace_op
= QGROUP_FREE
;
3124 trace_btrfs_qgroup_release_data(inode
, start
, len
,
3125 changeset
.bytes_changed
, trace_op
);
3127 btrfs_qgroup_free_refroot(BTRFS_I(inode
)->root
->fs_info
,
3128 BTRFS_I(inode
)->root
->objectid
,
3129 changeset
.bytes_changed
, BTRFS_QGROUP_RSV_DATA
);
3130 ret
= changeset
.bytes_changed
;
3132 extent_changeset_release(&changeset
);
3137 * Free a reserved space range from io_tree and related qgroups
3139 * Should be called when a range of pages get invalidated before reaching disk.
3140 * Or for error cleanup case.
3141 * if @reserved is given, only reserved range in [@start, @start + @len) will
3144 * For data written to disk, use btrfs_qgroup_release_data().
3146 * NOTE: This function may sleep for memory allocation.
3148 int btrfs_qgroup_free_data(struct inode
*inode
,
3149 struct extent_changeset
*reserved
, u64 start
, u64 len
)
3151 return __btrfs_qgroup_release_data(inode
, reserved
, start
, len
, 1);
3155 * Release a reserved space range from io_tree only.
3157 * Should be called when a range of pages get written to disk and corresponding
3158 * FILE_EXTENT is inserted into corresponding root.
3160 * Since new qgroup accounting framework will only update qgroup numbers at
3161 * commit_transaction() time, its reserved space shouldn't be freed from
3164 * But we should release the range from io_tree, to allow further write to be
3167 * NOTE: This function may sleep for memory allocation.
3169 int btrfs_qgroup_release_data(struct inode
*inode
, u64 start
, u64 len
)
3171 return __btrfs_qgroup_release_data(inode
, NULL
, start
, len
, 0);
3174 static void add_root_meta_rsv(struct btrfs_root
*root
, int num_bytes
,
3175 enum btrfs_qgroup_rsv_type type
)
3177 if (type
!= BTRFS_QGROUP_RSV_META_PREALLOC
&&
3178 type
!= BTRFS_QGROUP_RSV_META_PERTRANS
)
3183 spin_lock(&root
->qgroup_meta_rsv_lock
);
3184 if (type
== BTRFS_QGROUP_RSV_META_PREALLOC
)
3185 root
->qgroup_meta_rsv_prealloc
+= num_bytes
;
3187 root
->qgroup_meta_rsv_pertrans
+= num_bytes
;
3188 spin_unlock(&root
->qgroup_meta_rsv_lock
);
3191 static int sub_root_meta_rsv(struct btrfs_root
*root
, int num_bytes
,
3192 enum btrfs_qgroup_rsv_type type
)
3194 if (type
!= BTRFS_QGROUP_RSV_META_PREALLOC
&&
3195 type
!= BTRFS_QGROUP_RSV_META_PERTRANS
)
3200 spin_lock(&root
->qgroup_meta_rsv_lock
);
3201 if (type
== BTRFS_QGROUP_RSV_META_PREALLOC
) {
3202 num_bytes
= min_t(u64
, root
->qgroup_meta_rsv_prealloc
,
3204 root
->qgroup_meta_rsv_prealloc
-= num_bytes
;
3206 num_bytes
= min_t(u64
, root
->qgroup_meta_rsv_pertrans
,
3208 root
->qgroup_meta_rsv_pertrans
-= num_bytes
;
3210 spin_unlock(&root
->qgroup_meta_rsv_lock
);
3214 int __btrfs_qgroup_reserve_meta(struct btrfs_root
*root
, int num_bytes
,
3215 enum btrfs_qgroup_rsv_type type
, bool enforce
)
3217 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
3220 if (!test_bit(BTRFS_FS_QUOTA_ENABLED
, &fs_info
->flags
) ||
3221 !is_fstree(root
->objectid
) || num_bytes
== 0)
3224 BUG_ON(num_bytes
!= round_down(num_bytes
, fs_info
->nodesize
));
3225 trace_qgroup_meta_reserve(root
, type
, (s64
)num_bytes
);
3226 ret
= qgroup_reserve(root
, num_bytes
, enforce
, type
);
3230 * Record what we have reserved into root.
3232 * To avoid quota disabled->enabled underflow.
3233 * In that case, we may try to free space we haven't reserved
3234 * (since quota was disabled), so record what we reserved into root.
3235 * And ensure later release won't underflow this number.
3237 add_root_meta_rsv(root
, num_bytes
, type
);
3241 void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root
*root
)
3243 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
3245 if (!test_bit(BTRFS_FS_QUOTA_ENABLED
, &fs_info
->flags
) ||
3246 !is_fstree(root
->objectid
))
3249 /* TODO: Update trace point to handle such free */
3250 trace_qgroup_meta_free_all_pertrans(root
);
3251 /* Special value -1 means to free all reserved space */
3252 btrfs_qgroup_free_refroot(fs_info
, root
->objectid
, (u64
)-1,
3253 BTRFS_QGROUP_RSV_META_PERTRANS
);
3256 void __btrfs_qgroup_free_meta(struct btrfs_root
*root
, int num_bytes
,
3257 enum btrfs_qgroup_rsv_type type
)
3259 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
3261 if (!test_bit(BTRFS_FS_QUOTA_ENABLED
, &fs_info
->flags
) ||
3262 !is_fstree(root
->objectid
))
3266 * reservation for META_PREALLOC can happen before quota is enabled,
3267 * which can lead to underflow.
3268 * Here ensure we will only free what we really have reserved.
3270 num_bytes
= sub_root_meta_rsv(root
, num_bytes
, type
);
3271 BUG_ON(num_bytes
!= round_down(num_bytes
, fs_info
->nodesize
));
3272 trace_qgroup_meta_reserve(root
, type
, -(s64
)num_bytes
);
3273 btrfs_qgroup_free_refroot(fs_info
, root
->objectid
, num_bytes
, type
);
3276 static void qgroup_convert_meta(struct btrfs_fs_info
*fs_info
, u64 ref_root
,
3279 struct btrfs_root
*quota_root
= fs_info
->quota_root
;
3280 struct btrfs_qgroup
*qgroup
;
3281 struct ulist_node
*unode
;
3282 struct ulist_iterator uiter
;
3290 spin_lock(&fs_info
->qgroup_lock
);
3291 qgroup
= find_qgroup_rb(fs_info
, ref_root
);
3294 ulist_reinit(fs_info
->qgroup_ulist
);
3295 ret
= ulist_add(fs_info
->qgroup_ulist
, qgroup
->qgroupid
,
3296 qgroup_to_aux(qgroup
), GFP_ATOMIC
);
3299 ULIST_ITER_INIT(&uiter
);
3300 while ((unode
= ulist_next(fs_info
->qgroup_ulist
, &uiter
))) {
3301 struct btrfs_qgroup
*qg
;
3302 struct btrfs_qgroup_list
*glist
;
3304 qg
= unode_aux_to_qgroup(unode
);
3306 qgroup_rsv_release(fs_info
, qg
, num_bytes
,
3307 BTRFS_QGROUP_RSV_META_PREALLOC
);
3308 qgroup_rsv_add(fs_info
, qg
, num_bytes
,
3309 BTRFS_QGROUP_RSV_META_PERTRANS
);
3310 list_for_each_entry(glist
, &qg
->groups
, next_group
) {
3311 ret
= ulist_add(fs_info
->qgroup_ulist
,
3312 glist
->group
->qgroupid
,
3313 qgroup_to_aux(glist
->group
), GFP_ATOMIC
);
3319 spin_unlock(&fs_info
->qgroup_lock
);
3322 void btrfs_qgroup_convert_reserved_meta(struct btrfs_root
*root
, int num_bytes
)
3324 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
3326 if (!test_bit(BTRFS_FS_QUOTA_ENABLED
, &fs_info
->flags
) ||
3327 !is_fstree(root
->objectid
))
3329 /* Same as btrfs_qgroup_free_meta_prealloc() */
3330 num_bytes
= sub_root_meta_rsv(root
, num_bytes
,
3331 BTRFS_QGROUP_RSV_META_PREALLOC
);
3332 trace_qgroup_meta_convert(root
, num_bytes
);
3333 qgroup_convert_meta(fs_info
, root
->objectid
, num_bytes
);
3337 * Check qgroup reserved space leaking, normally at destroy inode
3340 void btrfs_qgroup_check_reserved_leak(struct inode
*inode
)
3342 struct extent_changeset changeset
;
3343 struct ulist_node
*unode
;
3344 struct ulist_iterator iter
;
3347 extent_changeset_init(&changeset
);
3348 ret
= clear_record_extent_bits(&BTRFS_I(inode
)->io_tree
, 0, (u64
)-1,
3349 EXTENT_QGROUP_RESERVED
, &changeset
);
3352 if (WARN_ON(changeset
.bytes_changed
)) {
3353 ULIST_ITER_INIT(&iter
);
3354 while ((unode
= ulist_next(&changeset
.range_changed
, &iter
))) {
3355 btrfs_warn(BTRFS_I(inode
)->root
->fs_info
,
3356 "leaking qgroup reserved space, ino: %lu, start: %llu, end: %llu",
3357 inode
->i_ino
, unode
->val
, unode
->aux
);
3359 btrfs_qgroup_free_refroot(BTRFS_I(inode
)->root
->fs_info
,
3360 BTRFS_I(inode
)->root
->objectid
,
3361 changeset
.bytes_changed
, BTRFS_QGROUP_RSV_DATA
);
3364 extent_changeset_release(&changeset
);