1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (C) 2009 Oracle. All rights reserved.
6 #include <linux/sched.h>
7 #include <linux/slab.h>
8 #include <linux/sort.h>
10 #include "delayed-ref.h"
11 #include "transaction.h"
13 #include "space-info.h"
15 struct kmem_cache
*btrfs_delayed_ref_head_cachep
;
16 struct kmem_cache
*btrfs_delayed_tree_ref_cachep
;
17 struct kmem_cache
*btrfs_delayed_data_ref_cachep
;
18 struct kmem_cache
*btrfs_delayed_extent_op_cachep
;
20 * delayed back reference update tracking. For subvolume trees
21 * we queue up extent allocations and backref maintenance for
22 * delayed processing. This avoids deep call chains where we
23 * add extents in the middle of btrfs_search_slot, and it allows
24 * us to buffer up frequently modified backrefs in an rb tree instead
25 * of hammering updates on the extent allocation tree.
28 bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info
*fs_info
)
30 struct btrfs_block_rsv
*delayed_refs_rsv
= &fs_info
->delayed_refs_rsv
;
31 struct btrfs_block_rsv
*global_rsv
= &fs_info
->global_block_rsv
;
35 spin_lock(&global_rsv
->lock
);
36 reserved
= global_rsv
->reserved
;
37 spin_unlock(&global_rsv
->lock
);
40 * Since the global reserve is just kind of magic we don't really want
41 * to rely on it to save our bacon, so if our size is more than the
42 * delayed_refs_rsv and the global rsv then it's time to think about
45 spin_lock(&delayed_refs_rsv
->lock
);
46 reserved
+= delayed_refs_rsv
->reserved
;
47 if (delayed_refs_rsv
->size
>= reserved
)
49 spin_unlock(&delayed_refs_rsv
->lock
);
53 int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle
*trans
)
56 atomic_read(&trans
->transaction
->delayed_refs
.num_entries
);
61 avg_runtime
= trans
->fs_info
->avg_delayed_ref_runtime
;
62 val
= num_entries
* avg_runtime
;
63 if (val
>= NSEC_PER_SEC
)
65 if (val
>= NSEC_PER_SEC
/ 2)
68 return btrfs_check_space_for_delayed_refs(trans
->fs_info
);
72 * btrfs_delayed_refs_rsv_release - release a ref head's reservation.
73 * @fs_info - the fs_info for our fs.
74 * @nr - the number of items to drop.
76 * This drops the delayed ref head's count from the delayed refs rsv and frees
77 * any excess reservation we had.
79 void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info
*fs_info
, int nr
)
81 struct btrfs_block_rsv
*block_rsv
= &fs_info
->delayed_refs_rsv
;
82 u64 num_bytes
= btrfs_calc_insert_metadata_size(fs_info
, nr
);
85 released
= btrfs_block_rsv_release(fs_info
, block_rsv
, num_bytes
, NULL
);
87 trace_btrfs_space_reservation(fs_info
, "delayed_refs_rsv",
92 * btrfs_update_delayed_refs_rsv - adjust the size of the delayed refs rsv
93 * @trans - the trans that may have generated delayed refs
95 * This is to be called anytime we may have adjusted trans->delayed_ref_updates,
96 * it'll calculate the additional size and add it to the delayed_refs_rsv.
98 void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle
*trans
)
100 struct btrfs_fs_info
*fs_info
= trans
->fs_info
;
101 struct btrfs_block_rsv
*delayed_rsv
= &fs_info
->delayed_refs_rsv
;
104 if (!trans
->delayed_ref_updates
)
107 num_bytes
= btrfs_calc_insert_metadata_size(fs_info
,
108 trans
->delayed_ref_updates
);
109 spin_lock(&delayed_rsv
->lock
);
110 delayed_rsv
->size
+= num_bytes
;
111 delayed_rsv
->full
= 0;
112 spin_unlock(&delayed_rsv
->lock
);
113 trans
->delayed_ref_updates
= 0;
117 * btrfs_migrate_to_delayed_refs_rsv - transfer bytes to our delayed refs rsv.
118 * @fs_info - the fs info for our fs.
119 * @src - the source block rsv to transfer from.
120 * @num_bytes - the number of bytes to transfer.
122 * This transfers up to the num_bytes amount from the src rsv to the
123 * delayed_refs_rsv. Any extra bytes are returned to the space info.
125 void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info
*fs_info
,
126 struct btrfs_block_rsv
*src
,
129 struct btrfs_block_rsv
*delayed_refs_rsv
= &fs_info
->delayed_refs_rsv
;
132 spin_lock(&src
->lock
);
133 src
->reserved
-= num_bytes
;
134 src
->size
-= num_bytes
;
135 spin_unlock(&src
->lock
);
137 spin_lock(&delayed_refs_rsv
->lock
);
138 if (delayed_refs_rsv
->size
> delayed_refs_rsv
->reserved
) {
139 u64 delta
= delayed_refs_rsv
->size
-
140 delayed_refs_rsv
->reserved
;
141 if (num_bytes
> delta
) {
142 to_free
= num_bytes
- delta
;
151 delayed_refs_rsv
->reserved
+= num_bytes
;
152 if (delayed_refs_rsv
->reserved
>= delayed_refs_rsv
->size
)
153 delayed_refs_rsv
->full
= 1;
154 spin_unlock(&delayed_refs_rsv
->lock
);
157 trace_btrfs_space_reservation(fs_info
, "delayed_refs_rsv",
160 btrfs_space_info_free_bytes_may_use(fs_info
,
161 delayed_refs_rsv
->space_info
, to_free
);
165 * btrfs_delayed_refs_rsv_refill - refill based on our delayed refs usage.
166 * @fs_info - the fs_info for our fs.
167 * @flush - control how we can flush for this reservation.
169 * This will refill the delayed block_rsv up to 1 items size worth of space and
170 * will return -ENOSPC if we can't make the reservation.
172 int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info
*fs_info
,
173 enum btrfs_reserve_flush_enum flush
)
175 struct btrfs_block_rsv
*block_rsv
= &fs_info
->delayed_refs_rsv
;
176 u64 limit
= btrfs_calc_insert_metadata_size(fs_info
, 1);
180 spin_lock(&block_rsv
->lock
);
181 if (block_rsv
->reserved
< block_rsv
->size
) {
182 num_bytes
= block_rsv
->size
- block_rsv
->reserved
;
183 num_bytes
= min(num_bytes
, limit
);
185 spin_unlock(&block_rsv
->lock
);
190 ret
= btrfs_reserve_metadata_bytes(fs_info
->extent_root
, block_rsv
,
194 btrfs_block_rsv_add_bytes(block_rsv
, num_bytes
, 0);
195 trace_btrfs_space_reservation(fs_info
, "delayed_refs_rsv",
201 * compare two delayed tree backrefs with same bytenr and type
203 static int comp_tree_refs(struct btrfs_delayed_tree_ref
*ref1
,
204 struct btrfs_delayed_tree_ref
*ref2
)
206 if (ref1
->node
.type
== BTRFS_TREE_BLOCK_REF_KEY
) {
207 if (ref1
->root
< ref2
->root
)
209 if (ref1
->root
> ref2
->root
)
212 if (ref1
->parent
< ref2
->parent
)
214 if (ref1
->parent
> ref2
->parent
)
221 * compare two delayed data backrefs with same bytenr and type
223 static int comp_data_refs(struct btrfs_delayed_data_ref
*ref1
,
224 struct btrfs_delayed_data_ref
*ref2
)
226 if (ref1
->node
.type
== BTRFS_EXTENT_DATA_REF_KEY
) {
227 if (ref1
->root
< ref2
->root
)
229 if (ref1
->root
> ref2
->root
)
231 if (ref1
->objectid
< ref2
->objectid
)
233 if (ref1
->objectid
> ref2
->objectid
)
235 if (ref1
->offset
< ref2
->offset
)
237 if (ref1
->offset
> ref2
->offset
)
240 if (ref1
->parent
< ref2
->parent
)
242 if (ref1
->parent
> ref2
->parent
)
248 static int comp_refs(struct btrfs_delayed_ref_node
*ref1
,
249 struct btrfs_delayed_ref_node
*ref2
,
254 if (ref1
->type
< ref2
->type
)
256 if (ref1
->type
> ref2
->type
)
258 if (ref1
->type
== BTRFS_TREE_BLOCK_REF_KEY
||
259 ref1
->type
== BTRFS_SHARED_BLOCK_REF_KEY
)
260 ret
= comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref1
),
261 btrfs_delayed_node_to_tree_ref(ref2
));
263 ret
= comp_data_refs(btrfs_delayed_node_to_data_ref(ref1
),
264 btrfs_delayed_node_to_data_ref(ref2
));
268 if (ref1
->seq
< ref2
->seq
)
270 if (ref1
->seq
> ref2
->seq
)
276 /* insert a new ref to head ref rbtree */
277 static struct btrfs_delayed_ref_head
*htree_insert(struct rb_root_cached
*root
,
278 struct rb_node
*node
)
280 struct rb_node
**p
= &root
->rb_root
.rb_node
;
281 struct rb_node
*parent_node
= NULL
;
282 struct btrfs_delayed_ref_head
*entry
;
283 struct btrfs_delayed_ref_head
*ins
;
285 bool leftmost
= true;
287 ins
= rb_entry(node
, struct btrfs_delayed_ref_head
, href_node
);
288 bytenr
= ins
->bytenr
;
291 entry
= rb_entry(parent_node
, struct btrfs_delayed_ref_head
,
294 if (bytenr
< entry
->bytenr
) {
296 } else if (bytenr
> entry
->bytenr
) {
304 rb_link_node(node
, parent_node
, p
);
305 rb_insert_color_cached(node
, root
, leftmost
);
309 static struct btrfs_delayed_ref_node
* tree_insert(struct rb_root_cached
*root
,
310 struct btrfs_delayed_ref_node
*ins
)
312 struct rb_node
**p
= &root
->rb_root
.rb_node
;
313 struct rb_node
*node
= &ins
->ref_node
;
314 struct rb_node
*parent_node
= NULL
;
315 struct btrfs_delayed_ref_node
*entry
;
316 bool leftmost
= true;
322 entry
= rb_entry(parent_node
, struct btrfs_delayed_ref_node
,
324 comp
= comp_refs(ins
, entry
, true);
327 } else if (comp
> 0) {
335 rb_link_node(node
, parent_node
, p
);
336 rb_insert_color_cached(node
, root
, leftmost
);
340 static struct btrfs_delayed_ref_head
*find_first_ref_head(
341 struct btrfs_delayed_ref_root
*dr
)
344 struct btrfs_delayed_ref_head
*entry
;
346 n
= rb_first_cached(&dr
->href_root
);
350 entry
= rb_entry(n
, struct btrfs_delayed_ref_head
, href_node
);
356 * Find a head entry based on bytenr. This returns the delayed ref head if it
357 * was able to find one, or NULL if nothing was in that spot. If return_bigger
358 * is given, the next bigger entry is returned if no exact match is found.
360 static struct btrfs_delayed_ref_head
*find_ref_head(
361 struct btrfs_delayed_ref_root
*dr
, u64 bytenr
,
364 struct rb_root
*root
= &dr
->href_root
.rb_root
;
366 struct btrfs_delayed_ref_head
*entry
;
371 entry
= rb_entry(n
, struct btrfs_delayed_ref_head
, href_node
);
373 if (bytenr
< entry
->bytenr
)
375 else if (bytenr
> entry
->bytenr
)
380 if (entry
&& return_bigger
) {
381 if (bytenr
> entry
->bytenr
) {
382 n
= rb_next(&entry
->href_node
);
385 entry
= rb_entry(n
, struct btrfs_delayed_ref_head
,
393 int btrfs_delayed_ref_lock(struct btrfs_delayed_ref_root
*delayed_refs
,
394 struct btrfs_delayed_ref_head
*head
)
396 lockdep_assert_held(&delayed_refs
->lock
);
397 if (mutex_trylock(&head
->mutex
))
400 refcount_inc(&head
->refs
);
401 spin_unlock(&delayed_refs
->lock
);
403 mutex_lock(&head
->mutex
);
404 spin_lock(&delayed_refs
->lock
);
405 if (RB_EMPTY_NODE(&head
->href_node
)) {
406 mutex_unlock(&head
->mutex
);
407 btrfs_put_delayed_ref_head(head
);
410 btrfs_put_delayed_ref_head(head
);
414 static inline void drop_delayed_ref(struct btrfs_trans_handle
*trans
,
415 struct btrfs_delayed_ref_root
*delayed_refs
,
416 struct btrfs_delayed_ref_head
*head
,
417 struct btrfs_delayed_ref_node
*ref
)
419 lockdep_assert_held(&head
->lock
);
420 rb_erase_cached(&ref
->ref_node
, &head
->ref_tree
);
421 RB_CLEAR_NODE(&ref
->ref_node
);
422 if (!list_empty(&ref
->add_list
))
423 list_del(&ref
->add_list
);
425 btrfs_put_delayed_ref(ref
);
426 atomic_dec(&delayed_refs
->num_entries
);
429 static bool merge_ref(struct btrfs_trans_handle
*trans
,
430 struct btrfs_delayed_ref_root
*delayed_refs
,
431 struct btrfs_delayed_ref_head
*head
,
432 struct btrfs_delayed_ref_node
*ref
,
435 struct btrfs_delayed_ref_node
*next
;
436 struct rb_node
*node
= rb_next(&ref
->ref_node
);
439 while (!done
&& node
) {
442 next
= rb_entry(node
, struct btrfs_delayed_ref_node
, ref_node
);
443 node
= rb_next(node
);
444 if (seq
&& next
->seq
>= seq
)
446 if (comp_refs(ref
, next
, false))
449 if (ref
->action
== next
->action
) {
452 if (ref
->ref_mod
< next
->ref_mod
) {
456 mod
= -next
->ref_mod
;
459 drop_delayed_ref(trans
, delayed_refs
, head
, next
);
461 if (ref
->ref_mod
== 0) {
462 drop_delayed_ref(trans
, delayed_refs
, head
, ref
);
466 * Can't have multiples of the same ref on a tree block.
468 WARN_ON(ref
->type
== BTRFS_TREE_BLOCK_REF_KEY
||
469 ref
->type
== BTRFS_SHARED_BLOCK_REF_KEY
);
476 void btrfs_merge_delayed_refs(struct btrfs_trans_handle
*trans
,
477 struct btrfs_delayed_ref_root
*delayed_refs
,
478 struct btrfs_delayed_ref_head
*head
)
480 struct btrfs_fs_info
*fs_info
= trans
->fs_info
;
481 struct btrfs_delayed_ref_node
*ref
;
482 struct rb_node
*node
;
485 lockdep_assert_held(&head
->lock
);
487 if (RB_EMPTY_ROOT(&head
->ref_tree
.rb_root
))
490 /* We don't have too many refs to merge for data. */
494 read_lock(&fs_info
->tree_mod_log_lock
);
495 if (!list_empty(&fs_info
->tree_mod_seq_list
)) {
496 struct seq_list
*elem
;
498 elem
= list_first_entry(&fs_info
->tree_mod_seq_list
,
499 struct seq_list
, list
);
502 read_unlock(&fs_info
->tree_mod_log_lock
);
505 for (node
= rb_first_cached(&head
->ref_tree
); node
;
506 node
= rb_next(node
)) {
507 ref
= rb_entry(node
, struct btrfs_delayed_ref_node
, ref_node
);
508 if (seq
&& ref
->seq
>= seq
)
510 if (merge_ref(trans
, delayed_refs
, head
, ref
, seq
))
515 int btrfs_check_delayed_seq(struct btrfs_fs_info
*fs_info
, u64 seq
)
517 struct seq_list
*elem
;
520 read_lock(&fs_info
->tree_mod_log_lock
);
521 if (!list_empty(&fs_info
->tree_mod_seq_list
)) {
522 elem
= list_first_entry(&fs_info
->tree_mod_seq_list
,
523 struct seq_list
, list
);
524 if (seq
>= elem
->seq
) {
526 "holding back delayed_ref %#x.%x, lowest is %#x.%x",
527 (u32
)(seq
>> 32), (u32
)seq
,
528 (u32
)(elem
->seq
>> 32), (u32
)elem
->seq
);
533 read_unlock(&fs_info
->tree_mod_log_lock
);
537 struct btrfs_delayed_ref_head
*btrfs_select_ref_head(
538 struct btrfs_delayed_ref_root
*delayed_refs
)
540 struct btrfs_delayed_ref_head
*head
;
543 head
= find_ref_head(delayed_refs
, delayed_refs
->run_delayed_start
,
545 if (!head
&& delayed_refs
->run_delayed_start
!= 0) {
546 delayed_refs
->run_delayed_start
= 0;
547 head
= find_first_ref_head(delayed_refs
);
552 while (head
->processing
) {
553 struct rb_node
*node
;
555 node
= rb_next(&head
->href_node
);
557 if (delayed_refs
->run_delayed_start
== 0)
559 delayed_refs
->run_delayed_start
= 0;
562 head
= rb_entry(node
, struct btrfs_delayed_ref_head
,
566 head
->processing
= 1;
567 WARN_ON(delayed_refs
->num_heads_ready
== 0);
568 delayed_refs
->num_heads_ready
--;
569 delayed_refs
->run_delayed_start
= head
->bytenr
+
574 void btrfs_delete_ref_head(struct btrfs_delayed_ref_root
*delayed_refs
,
575 struct btrfs_delayed_ref_head
*head
)
577 lockdep_assert_held(&delayed_refs
->lock
);
578 lockdep_assert_held(&head
->lock
);
580 rb_erase_cached(&head
->href_node
, &delayed_refs
->href_root
);
581 RB_CLEAR_NODE(&head
->href_node
);
582 atomic_dec(&delayed_refs
->num_entries
);
583 delayed_refs
->num_heads
--;
584 if (head
->processing
== 0)
585 delayed_refs
->num_heads_ready
--;
589 * Helper to insert the ref_node to the tail or merge with tail.
591 * Return 0 for insert.
592 * Return >0 for merge.
594 static int insert_delayed_ref(struct btrfs_trans_handle
*trans
,
595 struct btrfs_delayed_ref_root
*root
,
596 struct btrfs_delayed_ref_head
*href
,
597 struct btrfs_delayed_ref_node
*ref
)
599 struct btrfs_delayed_ref_node
*exist
;
603 spin_lock(&href
->lock
);
604 exist
= tree_insert(&href
->ref_tree
, ref
);
608 /* Now we are sure we can merge */
610 if (exist
->action
== ref
->action
) {
613 /* Need to change action */
614 if (exist
->ref_mod
< ref
->ref_mod
) {
615 exist
->action
= ref
->action
;
616 mod
= -exist
->ref_mod
;
617 exist
->ref_mod
= ref
->ref_mod
;
618 if (ref
->action
== BTRFS_ADD_DELAYED_REF
)
619 list_add_tail(&exist
->add_list
,
620 &href
->ref_add_list
);
621 else if (ref
->action
== BTRFS_DROP_DELAYED_REF
) {
622 ASSERT(!list_empty(&exist
->add_list
));
623 list_del(&exist
->add_list
);
630 exist
->ref_mod
+= mod
;
632 /* remove existing tail if its ref_mod is zero */
633 if (exist
->ref_mod
== 0)
634 drop_delayed_ref(trans
, root
, href
, exist
);
635 spin_unlock(&href
->lock
);
638 if (ref
->action
== BTRFS_ADD_DELAYED_REF
)
639 list_add_tail(&ref
->add_list
, &href
->ref_add_list
);
640 atomic_inc(&root
->num_entries
);
641 spin_unlock(&href
->lock
);
646 * helper function to update the accounting in the head ref
647 * existing and update must have the same bytenr
649 static noinline
void update_existing_head_ref(struct btrfs_trans_handle
*trans
,
650 struct btrfs_delayed_ref_head
*existing
,
651 struct btrfs_delayed_ref_head
*update
,
652 int *old_ref_mod_ret
)
654 struct btrfs_delayed_ref_root
*delayed_refs
=
655 &trans
->transaction
->delayed_refs
;
656 struct btrfs_fs_info
*fs_info
= trans
->fs_info
;
659 BUG_ON(existing
->is_data
!= update
->is_data
);
661 spin_lock(&existing
->lock
);
662 if (update
->must_insert_reserved
) {
663 /* if the extent was freed and then
664 * reallocated before the delayed ref
665 * entries were processed, we can end up
666 * with an existing head ref without
667 * the must_insert_reserved flag set.
670 existing
->must_insert_reserved
= update
->must_insert_reserved
;
673 * update the num_bytes so we make sure the accounting
676 existing
->num_bytes
= update
->num_bytes
;
680 if (update
->extent_op
) {
681 if (!existing
->extent_op
) {
682 existing
->extent_op
= update
->extent_op
;
684 if (update
->extent_op
->update_key
) {
685 memcpy(&existing
->extent_op
->key
,
686 &update
->extent_op
->key
,
687 sizeof(update
->extent_op
->key
));
688 existing
->extent_op
->update_key
= true;
690 if (update
->extent_op
->update_flags
) {
691 existing
->extent_op
->flags_to_set
|=
692 update
->extent_op
->flags_to_set
;
693 existing
->extent_op
->update_flags
= true;
695 btrfs_free_delayed_extent_op(update
->extent_op
);
699 * update the reference mod on the head to reflect this new operation,
700 * only need the lock for this case cause we could be processing it
701 * currently, for refs we just added we know we're a-ok.
703 old_ref_mod
= existing
->total_ref_mod
;
705 *old_ref_mod_ret
= old_ref_mod
;
706 existing
->ref_mod
+= update
->ref_mod
;
707 existing
->total_ref_mod
+= update
->ref_mod
;
710 * If we are going to from a positive ref mod to a negative or vice
711 * versa we need to make sure to adjust pending_csums accordingly.
713 if (existing
->is_data
) {
715 btrfs_csum_bytes_to_leaves(fs_info
,
716 existing
->num_bytes
);
718 if (existing
->total_ref_mod
>= 0 && old_ref_mod
< 0) {
719 delayed_refs
->pending_csums
-= existing
->num_bytes
;
720 btrfs_delayed_refs_rsv_release(fs_info
, csum_leaves
);
722 if (existing
->total_ref_mod
< 0 && old_ref_mod
>= 0) {
723 delayed_refs
->pending_csums
+= existing
->num_bytes
;
724 trans
->delayed_ref_updates
+= csum_leaves
;
727 spin_unlock(&existing
->lock
);
730 static void init_delayed_ref_head(struct btrfs_delayed_ref_head
*head_ref
,
731 struct btrfs_qgroup_extent_record
*qrecord
,
732 u64 bytenr
, u64 num_bytes
, u64 ref_root
,
733 u64 reserved
, int action
, bool is_data
,
737 int must_insert_reserved
= 0;
739 /* If reserved is provided, it must be a data extent. */
740 BUG_ON(!is_data
&& reserved
);
743 * The head node stores the sum of all the mods, so dropping a ref
744 * should drop the sum in the head node by one.
746 if (action
== BTRFS_UPDATE_DELAYED_HEAD
)
748 else if (action
== BTRFS_DROP_DELAYED_REF
)
752 * BTRFS_ADD_DELAYED_EXTENT means that we need to update the reserved
753 * accounting when the extent is finally added, or if a later
754 * modification deletes the delayed ref without ever inserting the
755 * extent into the extent allocation tree. ref->must_insert_reserved
756 * is the flag used to record that accounting mods are required.
758 * Once we record must_insert_reserved, switch the action to
759 * BTRFS_ADD_DELAYED_REF because other special casing is not required.
761 if (action
== BTRFS_ADD_DELAYED_EXTENT
)
762 must_insert_reserved
= 1;
764 must_insert_reserved
= 0;
766 refcount_set(&head_ref
->refs
, 1);
767 head_ref
->bytenr
= bytenr
;
768 head_ref
->num_bytes
= num_bytes
;
769 head_ref
->ref_mod
= count_mod
;
770 head_ref
->must_insert_reserved
= must_insert_reserved
;
771 head_ref
->is_data
= is_data
;
772 head_ref
->is_system
= is_system
;
773 head_ref
->ref_tree
= RB_ROOT_CACHED
;
774 INIT_LIST_HEAD(&head_ref
->ref_add_list
);
775 RB_CLEAR_NODE(&head_ref
->href_node
);
776 head_ref
->processing
= 0;
777 head_ref
->total_ref_mod
= count_mod
;
778 spin_lock_init(&head_ref
->lock
);
779 mutex_init(&head_ref
->mutex
);
782 if (ref_root
&& reserved
) {
783 qrecord
->data_rsv
= reserved
;
784 qrecord
->data_rsv_refroot
= ref_root
;
786 qrecord
->bytenr
= bytenr
;
787 qrecord
->num_bytes
= num_bytes
;
788 qrecord
->old_roots
= NULL
;
793 * helper function to actually insert a head node into the rbtree.
794 * this does all the dirty work in terms of maintaining the correct
795 * overall modification count.
797 static noinline
struct btrfs_delayed_ref_head
*
798 add_delayed_ref_head(struct btrfs_trans_handle
*trans
,
799 struct btrfs_delayed_ref_head
*head_ref
,
800 struct btrfs_qgroup_extent_record
*qrecord
,
801 int action
, int *qrecord_inserted_ret
,
802 int *old_ref_mod
, int *new_ref_mod
)
804 struct btrfs_delayed_ref_head
*existing
;
805 struct btrfs_delayed_ref_root
*delayed_refs
;
806 int qrecord_inserted
= 0;
808 delayed_refs
= &trans
->transaction
->delayed_refs
;
810 /* Record qgroup extent info if provided */
812 if (btrfs_qgroup_trace_extent_nolock(trans
->fs_info
,
813 delayed_refs
, qrecord
))
816 qrecord_inserted
= 1;
819 trace_add_delayed_ref_head(trans
->fs_info
, head_ref
, action
);
821 existing
= htree_insert(&delayed_refs
->href_root
,
822 &head_ref
->href_node
);
824 update_existing_head_ref(trans
, existing
, head_ref
,
827 * we've updated the existing ref, free the newly
830 kmem_cache_free(btrfs_delayed_ref_head_cachep
, head_ref
);
835 if (head_ref
->is_data
&& head_ref
->ref_mod
< 0) {
836 delayed_refs
->pending_csums
+= head_ref
->num_bytes
;
837 trans
->delayed_ref_updates
+=
838 btrfs_csum_bytes_to_leaves(trans
->fs_info
,
839 head_ref
->num_bytes
);
841 delayed_refs
->num_heads
++;
842 delayed_refs
->num_heads_ready
++;
843 atomic_inc(&delayed_refs
->num_entries
);
844 trans
->delayed_ref_updates
++;
846 if (qrecord_inserted_ret
)
847 *qrecord_inserted_ret
= qrecord_inserted
;
849 *new_ref_mod
= head_ref
->total_ref_mod
;
855 * init_delayed_ref_common - Initialize the structure which represents a
856 * modification to a an extent.
858 * @fs_info: Internal to the mounted filesystem mount structure.
860 * @ref: The structure which is going to be initialized.
862 * @bytenr: The logical address of the extent for which a modification is
863 * going to be recorded.
865 * @num_bytes: Size of the extent whose modification is being recorded.
867 * @ref_root: The id of the root where this modification has originated, this
868 * can be either one of the well-known metadata trees or the
869 * subvolume id which references this extent.
871 * @action: Can be one of BTRFS_ADD_DELAYED_REF/BTRFS_DROP_DELAYED_REF or
872 * BTRFS_ADD_DELAYED_EXTENT
874 * @ref_type: Holds the type of the extent which is being recorded, can be
875 * one of BTRFS_SHARED_BLOCK_REF_KEY/BTRFS_TREE_BLOCK_REF_KEY
876 * when recording a metadata extent or BTRFS_SHARED_DATA_REF_KEY/
877 * BTRFS_EXTENT_DATA_REF_KEY when recording data extent
879 static void init_delayed_ref_common(struct btrfs_fs_info
*fs_info
,
880 struct btrfs_delayed_ref_node
*ref
,
881 u64 bytenr
, u64 num_bytes
, u64 ref_root
,
882 int action
, u8 ref_type
)
886 if (action
== BTRFS_ADD_DELAYED_EXTENT
)
887 action
= BTRFS_ADD_DELAYED_REF
;
889 if (is_fstree(ref_root
))
890 seq
= atomic64_read(&fs_info
->tree_mod_seq
);
892 refcount_set(&ref
->refs
, 1);
893 ref
->bytenr
= bytenr
;
894 ref
->num_bytes
= num_bytes
;
896 ref
->action
= action
;
900 ref
->type
= ref_type
;
901 RB_CLEAR_NODE(&ref
->ref_node
);
902 INIT_LIST_HEAD(&ref
->add_list
);
906 * add a delayed tree ref. This does all of the accounting required
907 * to make sure the delayed ref is eventually processed before this
908 * transaction commits.
910 int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle
*trans
,
911 struct btrfs_ref
*generic_ref
,
912 struct btrfs_delayed_extent_op
*extent_op
,
913 int *old_ref_mod
, int *new_ref_mod
)
915 struct btrfs_fs_info
*fs_info
= trans
->fs_info
;
916 struct btrfs_delayed_tree_ref
*ref
;
917 struct btrfs_delayed_ref_head
*head_ref
;
918 struct btrfs_delayed_ref_root
*delayed_refs
;
919 struct btrfs_qgroup_extent_record
*record
= NULL
;
920 int qrecord_inserted
;
922 int action
= generic_ref
->action
;
923 int level
= generic_ref
->tree_ref
.level
;
925 u64 bytenr
= generic_ref
->bytenr
;
926 u64 num_bytes
= generic_ref
->len
;
927 u64 parent
= generic_ref
->parent
;
930 is_system
= (generic_ref
->real_root
== BTRFS_CHUNK_TREE_OBJECTID
);
932 ASSERT(generic_ref
->type
== BTRFS_REF_METADATA
&& generic_ref
->action
);
933 BUG_ON(extent_op
&& extent_op
->is_data
);
934 ref
= kmem_cache_alloc(btrfs_delayed_tree_ref_cachep
, GFP_NOFS
);
938 head_ref
= kmem_cache_alloc(btrfs_delayed_ref_head_cachep
, GFP_NOFS
);
940 kmem_cache_free(btrfs_delayed_tree_ref_cachep
, ref
);
944 if (test_bit(BTRFS_FS_QUOTA_ENABLED
, &fs_info
->flags
) &&
945 is_fstree(generic_ref
->real_root
) &&
946 is_fstree(generic_ref
->tree_ref
.root
) &&
947 !generic_ref
->skip_qgroup
) {
948 record
= kzalloc(sizeof(*record
), GFP_NOFS
);
950 kmem_cache_free(btrfs_delayed_tree_ref_cachep
, ref
);
951 kmem_cache_free(btrfs_delayed_ref_head_cachep
, head_ref
);
957 ref_type
= BTRFS_SHARED_BLOCK_REF_KEY
;
959 ref_type
= BTRFS_TREE_BLOCK_REF_KEY
;
961 init_delayed_ref_common(fs_info
, &ref
->node
, bytenr
, num_bytes
,
962 generic_ref
->tree_ref
.root
, action
, ref_type
);
963 ref
->root
= generic_ref
->tree_ref
.root
;
964 ref
->parent
= parent
;
967 init_delayed_ref_head(head_ref
, record
, bytenr
, num_bytes
,
968 generic_ref
->tree_ref
.root
, 0, action
, false,
970 head_ref
->extent_op
= extent_op
;
972 delayed_refs
= &trans
->transaction
->delayed_refs
;
973 spin_lock(&delayed_refs
->lock
);
976 * insert both the head node and the new ref without dropping
979 head_ref
= add_delayed_ref_head(trans
, head_ref
, record
,
980 action
, &qrecord_inserted
,
981 old_ref_mod
, new_ref_mod
);
983 ret
= insert_delayed_ref(trans
, delayed_refs
, head_ref
, &ref
->node
);
984 spin_unlock(&delayed_refs
->lock
);
987 * Need to update the delayed_refs_rsv with any changes we may have
990 btrfs_update_delayed_refs_rsv(trans
);
992 trace_add_delayed_tree_ref(fs_info
, &ref
->node
, ref
,
993 action
== BTRFS_ADD_DELAYED_EXTENT
?
994 BTRFS_ADD_DELAYED_REF
: action
);
996 kmem_cache_free(btrfs_delayed_tree_ref_cachep
, ref
);
998 if (qrecord_inserted
)
999 btrfs_qgroup_trace_extent_post(fs_info
, record
);
1005 * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
1007 int btrfs_add_delayed_data_ref(struct btrfs_trans_handle
*trans
,
1008 struct btrfs_ref
*generic_ref
,
1009 u64 reserved
, int *old_ref_mod
,
1012 struct btrfs_fs_info
*fs_info
= trans
->fs_info
;
1013 struct btrfs_delayed_data_ref
*ref
;
1014 struct btrfs_delayed_ref_head
*head_ref
;
1015 struct btrfs_delayed_ref_root
*delayed_refs
;
1016 struct btrfs_qgroup_extent_record
*record
= NULL
;
1017 int qrecord_inserted
;
1018 int action
= generic_ref
->action
;
1020 u64 bytenr
= generic_ref
->bytenr
;
1021 u64 num_bytes
= generic_ref
->len
;
1022 u64 parent
= generic_ref
->parent
;
1023 u64 ref_root
= generic_ref
->data_ref
.ref_root
;
1024 u64 owner
= generic_ref
->data_ref
.ino
;
1025 u64 offset
= generic_ref
->data_ref
.offset
;
1028 ASSERT(generic_ref
->type
== BTRFS_REF_DATA
&& action
);
1029 ref
= kmem_cache_alloc(btrfs_delayed_data_ref_cachep
, GFP_NOFS
);
1034 ref_type
= BTRFS_SHARED_DATA_REF_KEY
;
1036 ref_type
= BTRFS_EXTENT_DATA_REF_KEY
;
1037 init_delayed_ref_common(fs_info
, &ref
->node
, bytenr
, num_bytes
,
1038 ref_root
, action
, ref_type
);
1039 ref
->root
= ref_root
;
1040 ref
->parent
= parent
;
1041 ref
->objectid
= owner
;
1042 ref
->offset
= offset
;
1045 head_ref
= kmem_cache_alloc(btrfs_delayed_ref_head_cachep
, GFP_NOFS
);
1047 kmem_cache_free(btrfs_delayed_data_ref_cachep
, ref
);
1051 if (test_bit(BTRFS_FS_QUOTA_ENABLED
, &fs_info
->flags
) &&
1052 is_fstree(ref_root
) &&
1053 is_fstree(generic_ref
->real_root
) &&
1054 !generic_ref
->skip_qgroup
) {
1055 record
= kzalloc(sizeof(*record
), GFP_NOFS
);
1057 kmem_cache_free(btrfs_delayed_data_ref_cachep
, ref
);
1058 kmem_cache_free(btrfs_delayed_ref_head_cachep
,
1064 init_delayed_ref_head(head_ref
, record
, bytenr
, num_bytes
, ref_root
,
1065 reserved
, action
, true, false);
1066 head_ref
->extent_op
= NULL
;
1068 delayed_refs
= &trans
->transaction
->delayed_refs
;
1069 spin_lock(&delayed_refs
->lock
);
1072 * insert both the head node and the new ref without dropping
1075 head_ref
= add_delayed_ref_head(trans
, head_ref
, record
,
1076 action
, &qrecord_inserted
,
1077 old_ref_mod
, new_ref_mod
);
1079 ret
= insert_delayed_ref(trans
, delayed_refs
, head_ref
, &ref
->node
);
1080 spin_unlock(&delayed_refs
->lock
);
1083 * Need to update the delayed_refs_rsv with any changes we may have
1086 btrfs_update_delayed_refs_rsv(trans
);
1088 trace_add_delayed_data_ref(trans
->fs_info
, &ref
->node
, ref
,
1089 action
== BTRFS_ADD_DELAYED_EXTENT
?
1090 BTRFS_ADD_DELAYED_REF
: action
);
1092 kmem_cache_free(btrfs_delayed_data_ref_cachep
, ref
);
1095 if (qrecord_inserted
)
1096 return btrfs_qgroup_trace_extent_post(fs_info
, record
);
1100 int btrfs_add_delayed_extent_op(struct btrfs_trans_handle
*trans
,
1101 u64 bytenr
, u64 num_bytes
,
1102 struct btrfs_delayed_extent_op
*extent_op
)
1104 struct btrfs_delayed_ref_head
*head_ref
;
1105 struct btrfs_delayed_ref_root
*delayed_refs
;
1107 head_ref
= kmem_cache_alloc(btrfs_delayed_ref_head_cachep
, GFP_NOFS
);
1111 init_delayed_ref_head(head_ref
, NULL
, bytenr
, num_bytes
, 0, 0,
1112 BTRFS_UPDATE_DELAYED_HEAD
, extent_op
->is_data
,
1114 head_ref
->extent_op
= extent_op
;
1116 delayed_refs
= &trans
->transaction
->delayed_refs
;
1117 spin_lock(&delayed_refs
->lock
);
1119 add_delayed_ref_head(trans
, head_ref
, NULL
, BTRFS_UPDATE_DELAYED_HEAD
,
1122 spin_unlock(&delayed_refs
->lock
);
1125 * Need to update the delayed_refs_rsv with any changes we may have
1128 btrfs_update_delayed_refs_rsv(trans
);
1133 * This does a simple search for the head node for a given extent. Returns the
1134 * head node if found, or NULL if not.
1136 struct btrfs_delayed_ref_head
*
1137 btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root
*delayed_refs
, u64 bytenr
)
1139 lockdep_assert_held(&delayed_refs
->lock
);
1141 return find_ref_head(delayed_refs
, bytenr
, false);
1144 void __cold
btrfs_delayed_ref_exit(void)
1146 kmem_cache_destroy(btrfs_delayed_ref_head_cachep
);
1147 kmem_cache_destroy(btrfs_delayed_tree_ref_cachep
);
1148 kmem_cache_destroy(btrfs_delayed_data_ref_cachep
);
1149 kmem_cache_destroy(btrfs_delayed_extent_op_cachep
);
1152 int __init
btrfs_delayed_ref_init(void)
1154 btrfs_delayed_ref_head_cachep
= kmem_cache_create(
1155 "btrfs_delayed_ref_head",
1156 sizeof(struct btrfs_delayed_ref_head
), 0,
1157 SLAB_MEM_SPREAD
, NULL
);
1158 if (!btrfs_delayed_ref_head_cachep
)
1161 btrfs_delayed_tree_ref_cachep
= kmem_cache_create(
1162 "btrfs_delayed_tree_ref",
1163 sizeof(struct btrfs_delayed_tree_ref
), 0,
1164 SLAB_MEM_SPREAD
, NULL
);
1165 if (!btrfs_delayed_tree_ref_cachep
)
1168 btrfs_delayed_data_ref_cachep
= kmem_cache_create(
1169 "btrfs_delayed_data_ref",
1170 sizeof(struct btrfs_delayed_data_ref
), 0,
1171 SLAB_MEM_SPREAD
, NULL
);
1172 if (!btrfs_delayed_data_ref_cachep
)
1175 btrfs_delayed_extent_op_cachep
= kmem_cache_create(
1176 "btrfs_delayed_extent_op",
1177 sizeof(struct btrfs_delayed_extent_op
), 0,
1178 SLAB_MEM_SPREAD
, NULL
);
1179 if (!btrfs_delayed_extent_op_cachep
)
1184 btrfs_delayed_ref_exit();