1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (C) 2009 Oracle. All rights reserved.
6 #include <linux/sched.h>
7 #include <linux/slab.h>
8 #include <linux/sort.h>
10 #include "delayed-ref.h"
11 #include "transaction.h"
13 #include "space-info.h"
15 struct kmem_cache
*btrfs_delayed_ref_head_cachep
;
16 struct kmem_cache
*btrfs_delayed_tree_ref_cachep
;
17 struct kmem_cache
*btrfs_delayed_data_ref_cachep
;
18 struct kmem_cache
*btrfs_delayed_extent_op_cachep
;
20 * delayed back reference update tracking. For subvolume trees
21 * we queue up extent allocations and backref maintenance for
22 * delayed processing. This avoids deep call chains where we
23 * add extents in the middle of btrfs_search_slot, and it allows
24 * us to buffer up frequently modified backrefs in an rb tree instead
25 * of hammering updates on the extent allocation tree.
28 bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info
*fs_info
)
30 struct btrfs_block_rsv
*delayed_refs_rsv
= &fs_info
->delayed_refs_rsv
;
31 struct btrfs_block_rsv
*global_rsv
= &fs_info
->global_block_rsv
;
35 spin_lock(&global_rsv
->lock
);
36 reserved
= global_rsv
->reserved
;
37 spin_unlock(&global_rsv
->lock
);
40 * Since the global reserve is just kind of magic we don't really want
41 * to rely on it to save our bacon, so if our size is more than the
42 * delayed_refs_rsv and the global rsv then it's time to think about
45 spin_lock(&delayed_refs_rsv
->lock
);
46 reserved
+= delayed_refs_rsv
->reserved
;
47 if (delayed_refs_rsv
->size
>= reserved
)
49 spin_unlock(&delayed_refs_rsv
->lock
);
53 int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle
*trans
)
56 atomic_read(&trans
->transaction
->delayed_refs
.num_entries
);
61 avg_runtime
= trans
->fs_info
->avg_delayed_ref_runtime
;
62 val
= num_entries
* avg_runtime
;
63 if (val
>= NSEC_PER_SEC
)
65 if (val
>= NSEC_PER_SEC
/ 2)
68 return btrfs_check_space_for_delayed_refs(trans
->fs_info
);
72 * btrfs_delayed_refs_rsv_release - release a ref head's reservation.
73 * @fs_info - the fs_info for our fs.
74 * @nr - the number of items to drop.
76 * This drops the delayed ref head's count from the delayed refs rsv and frees
77 * any excess reservation we had.
79 void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info
*fs_info
, int nr
)
81 struct btrfs_block_rsv
*block_rsv
= &fs_info
->delayed_refs_rsv
;
82 u64 num_bytes
= btrfs_calc_insert_metadata_size(fs_info
, nr
);
85 released
= __btrfs_block_rsv_release(fs_info
, block_rsv
, num_bytes
,
88 trace_btrfs_space_reservation(fs_info
, "delayed_refs_rsv",
93 * btrfs_update_delayed_refs_rsv - adjust the size of the delayed refs rsv
94 * @trans - the trans that may have generated delayed refs
96 * This is to be called anytime we may have adjusted trans->delayed_ref_updates,
97 * it'll calculate the additional size and add it to the delayed_refs_rsv.
99 void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle
*trans
)
101 struct btrfs_fs_info
*fs_info
= trans
->fs_info
;
102 struct btrfs_block_rsv
*delayed_rsv
= &fs_info
->delayed_refs_rsv
;
105 if (!trans
->delayed_ref_updates
)
108 num_bytes
= btrfs_calc_insert_metadata_size(fs_info
,
109 trans
->delayed_ref_updates
);
110 spin_lock(&delayed_rsv
->lock
);
111 delayed_rsv
->size
+= num_bytes
;
112 delayed_rsv
->full
= 0;
113 spin_unlock(&delayed_rsv
->lock
);
114 trans
->delayed_ref_updates
= 0;
118 * btrfs_migrate_to_delayed_refs_rsv - transfer bytes to our delayed refs rsv.
119 * @fs_info - the fs info for our fs.
120 * @src - the source block rsv to transfer from.
121 * @num_bytes - the number of bytes to transfer.
123 * This transfers up to the num_bytes amount from the src rsv to the
124 * delayed_refs_rsv. Any extra bytes are returned to the space info.
126 void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info
*fs_info
,
127 struct btrfs_block_rsv
*src
,
130 struct btrfs_block_rsv
*delayed_refs_rsv
= &fs_info
->delayed_refs_rsv
;
133 spin_lock(&src
->lock
);
134 src
->reserved
-= num_bytes
;
135 src
->size
-= num_bytes
;
136 spin_unlock(&src
->lock
);
138 spin_lock(&delayed_refs_rsv
->lock
);
139 if (delayed_refs_rsv
->size
> delayed_refs_rsv
->reserved
) {
140 u64 delta
= delayed_refs_rsv
->size
-
141 delayed_refs_rsv
->reserved
;
142 if (num_bytes
> delta
) {
143 to_free
= num_bytes
- delta
;
152 delayed_refs_rsv
->reserved
+= num_bytes
;
153 if (delayed_refs_rsv
->reserved
>= delayed_refs_rsv
->size
)
154 delayed_refs_rsv
->full
= 1;
155 spin_unlock(&delayed_refs_rsv
->lock
);
158 trace_btrfs_space_reservation(fs_info
, "delayed_refs_rsv",
161 btrfs_space_info_free_bytes_may_use(fs_info
,
162 delayed_refs_rsv
->space_info
, to_free
);
166 * btrfs_delayed_refs_rsv_refill - refill based on our delayed refs usage.
167 * @fs_info - the fs_info for our fs.
168 * @flush - control how we can flush for this reservation.
170 * This will refill the delayed block_rsv up to 1 items size worth of space and
171 * will return -ENOSPC if we can't make the reservation.
173 int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info
*fs_info
,
174 enum btrfs_reserve_flush_enum flush
)
176 struct btrfs_block_rsv
*block_rsv
= &fs_info
->delayed_refs_rsv
;
177 u64 limit
= btrfs_calc_insert_metadata_size(fs_info
, 1);
181 spin_lock(&block_rsv
->lock
);
182 if (block_rsv
->reserved
< block_rsv
->size
) {
183 num_bytes
= block_rsv
->size
- block_rsv
->reserved
;
184 num_bytes
= min(num_bytes
, limit
);
186 spin_unlock(&block_rsv
->lock
);
191 ret
= btrfs_reserve_metadata_bytes(fs_info
->extent_root
, block_rsv
,
195 btrfs_block_rsv_add_bytes(block_rsv
, num_bytes
, 0);
196 trace_btrfs_space_reservation(fs_info
, "delayed_refs_rsv",
202 * compare two delayed tree backrefs with same bytenr and type
204 static int comp_tree_refs(struct btrfs_delayed_tree_ref
*ref1
,
205 struct btrfs_delayed_tree_ref
*ref2
)
207 if (ref1
->node
.type
== BTRFS_TREE_BLOCK_REF_KEY
) {
208 if (ref1
->root
< ref2
->root
)
210 if (ref1
->root
> ref2
->root
)
213 if (ref1
->parent
< ref2
->parent
)
215 if (ref1
->parent
> ref2
->parent
)
222 * compare two delayed data backrefs with same bytenr and type
224 static int comp_data_refs(struct btrfs_delayed_data_ref
*ref1
,
225 struct btrfs_delayed_data_ref
*ref2
)
227 if (ref1
->node
.type
== BTRFS_EXTENT_DATA_REF_KEY
) {
228 if (ref1
->root
< ref2
->root
)
230 if (ref1
->root
> ref2
->root
)
232 if (ref1
->objectid
< ref2
->objectid
)
234 if (ref1
->objectid
> ref2
->objectid
)
236 if (ref1
->offset
< ref2
->offset
)
238 if (ref1
->offset
> ref2
->offset
)
241 if (ref1
->parent
< ref2
->parent
)
243 if (ref1
->parent
> ref2
->parent
)
249 static int comp_refs(struct btrfs_delayed_ref_node
*ref1
,
250 struct btrfs_delayed_ref_node
*ref2
,
255 if (ref1
->type
< ref2
->type
)
257 if (ref1
->type
> ref2
->type
)
259 if (ref1
->type
== BTRFS_TREE_BLOCK_REF_KEY
||
260 ref1
->type
== BTRFS_SHARED_BLOCK_REF_KEY
)
261 ret
= comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref1
),
262 btrfs_delayed_node_to_tree_ref(ref2
));
264 ret
= comp_data_refs(btrfs_delayed_node_to_data_ref(ref1
),
265 btrfs_delayed_node_to_data_ref(ref2
));
269 if (ref1
->seq
< ref2
->seq
)
271 if (ref1
->seq
> ref2
->seq
)
277 /* insert a new ref to head ref rbtree */
278 static struct btrfs_delayed_ref_head
*htree_insert(struct rb_root_cached
*root
,
279 struct rb_node
*node
)
281 struct rb_node
**p
= &root
->rb_root
.rb_node
;
282 struct rb_node
*parent_node
= NULL
;
283 struct btrfs_delayed_ref_head
*entry
;
284 struct btrfs_delayed_ref_head
*ins
;
286 bool leftmost
= true;
288 ins
= rb_entry(node
, struct btrfs_delayed_ref_head
, href_node
);
289 bytenr
= ins
->bytenr
;
292 entry
= rb_entry(parent_node
, struct btrfs_delayed_ref_head
,
295 if (bytenr
< entry
->bytenr
) {
297 } else if (bytenr
> entry
->bytenr
) {
305 rb_link_node(node
, parent_node
, p
);
306 rb_insert_color_cached(node
, root
, leftmost
);
310 static struct btrfs_delayed_ref_node
* tree_insert(struct rb_root_cached
*root
,
311 struct btrfs_delayed_ref_node
*ins
)
313 struct rb_node
**p
= &root
->rb_root
.rb_node
;
314 struct rb_node
*node
= &ins
->ref_node
;
315 struct rb_node
*parent_node
= NULL
;
316 struct btrfs_delayed_ref_node
*entry
;
317 bool leftmost
= true;
323 entry
= rb_entry(parent_node
, struct btrfs_delayed_ref_node
,
325 comp
= comp_refs(ins
, entry
, true);
328 } else if (comp
> 0) {
336 rb_link_node(node
, parent_node
, p
);
337 rb_insert_color_cached(node
, root
, leftmost
);
341 static struct btrfs_delayed_ref_head
*find_first_ref_head(
342 struct btrfs_delayed_ref_root
*dr
)
345 struct btrfs_delayed_ref_head
*entry
;
347 n
= rb_first_cached(&dr
->href_root
);
351 entry
= rb_entry(n
, struct btrfs_delayed_ref_head
, href_node
);
357 * Find a head entry based on bytenr. This returns the delayed ref head if it
358 * was able to find one, or NULL if nothing was in that spot. If return_bigger
359 * is given, the next bigger entry is returned if no exact match is found.
361 static struct btrfs_delayed_ref_head
*find_ref_head(
362 struct btrfs_delayed_ref_root
*dr
, u64 bytenr
,
365 struct rb_root
*root
= &dr
->href_root
.rb_root
;
367 struct btrfs_delayed_ref_head
*entry
;
372 entry
= rb_entry(n
, struct btrfs_delayed_ref_head
, href_node
);
374 if (bytenr
< entry
->bytenr
)
376 else if (bytenr
> entry
->bytenr
)
381 if (entry
&& return_bigger
) {
382 if (bytenr
> entry
->bytenr
) {
383 n
= rb_next(&entry
->href_node
);
386 entry
= rb_entry(n
, struct btrfs_delayed_ref_head
,
394 int btrfs_delayed_ref_lock(struct btrfs_delayed_ref_root
*delayed_refs
,
395 struct btrfs_delayed_ref_head
*head
)
397 lockdep_assert_held(&delayed_refs
->lock
);
398 if (mutex_trylock(&head
->mutex
))
401 refcount_inc(&head
->refs
);
402 spin_unlock(&delayed_refs
->lock
);
404 mutex_lock(&head
->mutex
);
405 spin_lock(&delayed_refs
->lock
);
406 if (RB_EMPTY_NODE(&head
->href_node
)) {
407 mutex_unlock(&head
->mutex
);
408 btrfs_put_delayed_ref_head(head
);
411 btrfs_put_delayed_ref_head(head
);
415 static inline void drop_delayed_ref(struct btrfs_trans_handle
*trans
,
416 struct btrfs_delayed_ref_root
*delayed_refs
,
417 struct btrfs_delayed_ref_head
*head
,
418 struct btrfs_delayed_ref_node
*ref
)
420 lockdep_assert_held(&head
->lock
);
421 rb_erase_cached(&ref
->ref_node
, &head
->ref_tree
);
422 RB_CLEAR_NODE(&ref
->ref_node
);
423 if (!list_empty(&ref
->add_list
))
424 list_del(&ref
->add_list
);
426 btrfs_put_delayed_ref(ref
);
427 atomic_dec(&delayed_refs
->num_entries
);
430 static bool merge_ref(struct btrfs_trans_handle
*trans
,
431 struct btrfs_delayed_ref_root
*delayed_refs
,
432 struct btrfs_delayed_ref_head
*head
,
433 struct btrfs_delayed_ref_node
*ref
,
436 struct btrfs_delayed_ref_node
*next
;
437 struct rb_node
*node
= rb_next(&ref
->ref_node
);
440 while (!done
&& node
) {
443 next
= rb_entry(node
, struct btrfs_delayed_ref_node
, ref_node
);
444 node
= rb_next(node
);
445 if (seq
&& next
->seq
>= seq
)
447 if (comp_refs(ref
, next
, false))
450 if (ref
->action
== next
->action
) {
453 if (ref
->ref_mod
< next
->ref_mod
) {
457 mod
= -next
->ref_mod
;
460 drop_delayed_ref(trans
, delayed_refs
, head
, next
);
462 if (ref
->ref_mod
== 0) {
463 drop_delayed_ref(trans
, delayed_refs
, head
, ref
);
467 * Can't have multiples of the same ref on a tree block.
469 WARN_ON(ref
->type
== BTRFS_TREE_BLOCK_REF_KEY
||
470 ref
->type
== BTRFS_SHARED_BLOCK_REF_KEY
);
477 void btrfs_merge_delayed_refs(struct btrfs_trans_handle
*trans
,
478 struct btrfs_delayed_ref_root
*delayed_refs
,
479 struct btrfs_delayed_ref_head
*head
)
481 struct btrfs_fs_info
*fs_info
= trans
->fs_info
;
482 struct btrfs_delayed_ref_node
*ref
;
483 struct rb_node
*node
;
486 lockdep_assert_held(&head
->lock
);
488 if (RB_EMPTY_ROOT(&head
->ref_tree
.rb_root
))
491 /* We don't have too many refs to merge for data. */
495 read_lock(&fs_info
->tree_mod_log_lock
);
496 if (!list_empty(&fs_info
->tree_mod_seq_list
)) {
497 struct seq_list
*elem
;
499 elem
= list_first_entry(&fs_info
->tree_mod_seq_list
,
500 struct seq_list
, list
);
503 read_unlock(&fs_info
->tree_mod_log_lock
);
506 for (node
= rb_first_cached(&head
->ref_tree
); node
;
507 node
= rb_next(node
)) {
508 ref
= rb_entry(node
, struct btrfs_delayed_ref_node
, ref_node
);
509 if (seq
&& ref
->seq
>= seq
)
511 if (merge_ref(trans
, delayed_refs
, head
, ref
, seq
))
516 int btrfs_check_delayed_seq(struct btrfs_fs_info
*fs_info
, u64 seq
)
518 struct seq_list
*elem
;
521 read_lock(&fs_info
->tree_mod_log_lock
);
522 if (!list_empty(&fs_info
->tree_mod_seq_list
)) {
523 elem
= list_first_entry(&fs_info
->tree_mod_seq_list
,
524 struct seq_list
, list
);
525 if (seq
>= elem
->seq
) {
527 "holding back delayed_ref %#x.%x, lowest is %#x.%x",
528 (u32
)(seq
>> 32), (u32
)seq
,
529 (u32
)(elem
->seq
>> 32), (u32
)elem
->seq
);
534 read_unlock(&fs_info
->tree_mod_log_lock
);
538 struct btrfs_delayed_ref_head
*btrfs_select_ref_head(
539 struct btrfs_delayed_ref_root
*delayed_refs
)
541 struct btrfs_delayed_ref_head
*head
;
544 head
= find_ref_head(delayed_refs
, delayed_refs
->run_delayed_start
,
546 if (!head
&& delayed_refs
->run_delayed_start
!= 0) {
547 delayed_refs
->run_delayed_start
= 0;
548 head
= find_first_ref_head(delayed_refs
);
553 while (head
->processing
) {
554 struct rb_node
*node
;
556 node
= rb_next(&head
->href_node
);
558 if (delayed_refs
->run_delayed_start
== 0)
560 delayed_refs
->run_delayed_start
= 0;
563 head
= rb_entry(node
, struct btrfs_delayed_ref_head
,
567 head
->processing
= 1;
568 WARN_ON(delayed_refs
->num_heads_ready
== 0);
569 delayed_refs
->num_heads_ready
--;
570 delayed_refs
->run_delayed_start
= head
->bytenr
+
575 void btrfs_delete_ref_head(struct btrfs_delayed_ref_root
*delayed_refs
,
576 struct btrfs_delayed_ref_head
*head
)
578 lockdep_assert_held(&delayed_refs
->lock
);
579 lockdep_assert_held(&head
->lock
);
581 rb_erase_cached(&head
->href_node
, &delayed_refs
->href_root
);
582 RB_CLEAR_NODE(&head
->href_node
);
583 atomic_dec(&delayed_refs
->num_entries
);
584 delayed_refs
->num_heads
--;
585 if (head
->processing
== 0)
586 delayed_refs
->num_heads_ready
--;
590 * Helper to insert the ref_node to the tail or merge with tail.
592 * Return 0 for insert.
593 * Return >0 for merge.
595 static int insert_delayed_ref(struct btrfs_trans_handle
*trans
,
596 struct btrfs_delayed_ref_root
*root
,
597 struct btrfs_delayed_ref_head
*href
,
598 struct btrfs_delayed_ref_node
*ref
)
600 struct btrfs_delayed_ref_node
*exist
;
604 spin_lock(&href
->lock
);
605 exist
= tree_insert(&href
->ref_tree
, ref
);
609 /* Now we are sure we can merge */
611 if (exist
->action
== ref
->action
) {
614 /* Need to change action */
615 if (exist
->ref_mod
< ref
->ref_mod
) {
616 exist
->action
= ref
->action
;
617 mod
= -exist
->ref_mod
;
618 exist
->ref_mod
= ref
->ref_mod
;
619 if (ref
->action
== BTRFS_ADD_DELAYED_REF
)
620 list_add_tail(&exist
->add_list
,
621 &href
->ref_add_list
);
622 else if (ref
->action
== BTRFS_DROP_DELAYED_REF
) {
623 ASSERT(!list_empty(&exist
->add_list
));
624 list_del(&exist
->add_list
);
631 exist
->ref_mod
+= mod
;
633 /* remove existing tail if its ref_mod is zero */
634 if (exist
->ref_mod
== 0)
635 drop_delayed_ref(trans
, root
, href
, exist
);
636 spin_unlock(&href
->lock
);
639 if (ref
->action
== BTRFS_ADD_DELAYED_REF
)
640 list_add_tail(&ref
->add_list
, &href
->ref_add_list
);
641 atomic_inc(&root
->num_entries
);
642 spin_unlock(&href
->lock
);
647 * helper function to update the accounting in the head ref
648 * existing and update must have the same bytenr
650 static noinline
void update_existing_head_ref(struct btrfs_trans_handle
*trans
,
651 struct btrfs_delayed_ref_head
*existing
,
652 struct btrfs_delayed_ref_head
*update
,
653 int *old_ref_mod_ret
)
655 struct btrfs_delayed_ref_root
*delayed_refs
=
656 &trans
->transaction
->delayed_refs
;
657 struct btrfs_fs_info
*fs_info
= trans
->fs_info
;
660 BUG_ON(existing
->is_data
!= update
->is_data
);
662 spin_lock(&existing
->lock
);
663 if (update
->must_insert_reserved
) {
664 /* if the extent was freed and then
665 * reallocated before the delayed ref
666 * entries were processed, we can end up
667 * with an existing head ref without
668 * the must_insert_reserved flag set.
671 existing
->must_insert_reserved
= update
->must_insert_reserved
;
674 * update the num_bytes so we make sure the accounting
677 existing
->num_bytes
= update
->num_bytes
;
681 if (update
->extent_op
) {
682 if (!existing
->extent_op
) {
683 existing
->extent_op
= update
->extent_op
;
685 if (update
->extent_op
->update_key
) {
686 memcpy(&existing
->extent_op
->key
,
687 &update
->extent_op
->key
,
688 sizeof(update
->extent_op
->key
));
689 existing
->extent_op
->update_key
= true;
691 if (update
->extent_op
->update_flags
) {
692 existing
->extent_op
->flags_to_set
|=
693 update
->extent_op
->flags_to_set
;
694 existing
->extent_op
->update_flags
= true;
696 btrfs_free_delayed_extent_op(update
->extent_op
);
700 * update the reference mod on the head to reflect this new operation,
701 * only need the lock for this case cause we could be processing it
702 * currently, for refs we just added we know we're a-ok.
704 old_ref_mod
= existing
->total_ref_mod
;
706 *old_ref_mod_ret
= old_ref_mod
;
707 existing
->ref_mod
+= update
->ref_mod
;
708 existing
->total_ref_mod
+= update
->ref_mod
;
711 * If we are going to from a positive ref mod to a negative or vice
712 * versa we need to make sure to adjust pending_csums accordingly.
714 if (existing
->is_data
) {
716 btrfs_csum_bytes_to_leaves(fs_info
,
717 existing
->num_bytes
);
719 if (existing
->total_ref_mod
>= 0 && old_ref_mod
< 0) {
720 delayed_refs
->pending_csums
-= existing
->num_bytes
;
721 btrfs_delayed_refs_rsv_release(fs_info
, csum_leaves
);
723 if (existing
->total_ref_mod
< 0 && old_ref_mod
>= 0) {
724 delayed_refs
->pending_csums
+= existing
->num_bytes
;
725 trans
->delayed_ref_updates
+= csum_leaves
;
728 spin_unlock(&existing
->lock
);
731 static void init_delayed_ref_head(struct btrfs_delayed_ref_head
*head_ref
,
732 struct btrfs_qgroup_extent_record
*qrecord
,
733 u64 bytenr
, u64 num_bytes
, u64 ref_root
,
734 u64 reserved
, int action
, bool is_data
,
738 int must_insert_reserved
= 0;
740 /* If reserved is provided, it must be a data extent. */
741 BUG_ON(!is_data
&& reserved
);
744 * The head node stores the sum of all the mods, so dropping a ref
745 * should drop the sum in the head node by one.
747 if (action
== BTRFS_UPDATE_DELAYED_HEAD
)
749 else if (action
== BTRFS_DROP_DELAYED_REF
)
753 * BTRFS_ADD_DELAYED_EXTENT means that we need to update the reserved
754 * accounting when the extent is finally added, or if a later
755 * modification deletes the delayed ref without ever inserting the
756 * extent into the extent allocation tree. ref->must_insert_reserved
757 * is the flag used to record that accounting mods are required.
759 * Once we record must_insert_reserved, switch the action to
760 * BTRFS_ADD_DELAYED_REF because other special casing is not required.
762 if (action
== BTRFS_ADD_DELAYED_EXTENT
)
763 must_insert_reserved
= 1;
765 must_insert_reserved
= 0;
767 refcount_set(&head_ref
->refs
, 1);
768 head_ref
->bytenr
= bytenr
;
769 head_ref
->num_bytes
= num_bytes
;
770 head_ref
->ref_mod
= count_mod
;
771 head_ref
->must_insert_reserved
= must_insert_reserved
;
772 head_ref
->is_data
= is_data
;
773 head_ref
->is_system
= is_system
;
774 head_ref
->ref_tree
= RB_ROOT_CACHED
;
775 INIT_LIST_HEAD(&head_ref
->ref_add_list
);
776 RB_CLEAR_NODE(&head_ref
->href_node
);
777 head_ref
->processing
= 0;
778 head_ref
->total_ref_mod
= count_mod
;
779 spin_lock_init(&head_ref
->lock
);
780 mutex_init(&head_ref
->mutex
);
783 if (ref_root
&& reserved
) {
784 qrecord
->data_rsv
= reserved
;
785 qrecord
->data_rsv_refroot
= ref_root
;
787 qrecord
->bytenr
= bytenr
;
788 qrecord
->num_bytes
= num_bytes
;
789 qrecord
->old_roots
= NULL
;
794 * helper function to actually insert a head node into the rbtree.
795 * this does all the dirty work in terms of maintaining the correct
796 * overall modification count.
798 static noinline
struct btrfs_delayed_ref_head
*
799 add_delayed_ref_head(struct btrfs_trans_handle
*trans
,
800 struct btrfs_delayed_ref_head
*head_ref
,
801 struct btrfs_qgroup_extent_record
*qrecord
,
802 int action
, int *qrecord_inserted_ret
,
803 int *old_ref_mod
, int *new_ref_mod
)
805 struct btrfs_delayed_ref_head
*existing
;
806 struct btrfs_delayed_ref_root
*delayed_refs
;
807 int qrecord_inserted
= 0;
809 delayed_refs
= &trans
->transaction
->delayed_refs
;
811 /* Record qgroup extent info if provided */
813 if (btrfs_qgroup_trace_extent_nolock(trans
->fs_info
,
814 delayed_refs
, qrecord
))
817 qrecord_inserted
= 1;
820 trace_add_delayed_ref_head(trans
->fs_info
, head_ref
, action
);
822 existing
= htree_insert(&delayed_refs
->href_root
,
823 &head_ref
->href_node
);
825 update_existing_head_ref(trans
, existing
, head_ref
,
828 * we've updated the existing ref, free the newly
831 kmem_cache_free(btrfs_delayed_ref_head_cachep
, head_ref
);
836 if (head_ref
->is_data
&& head_ref
->ref_mod
< 0) {
837 delayed_refs
->pending_csums
+= head_ref
->num_bytes
;
838 trans
->delayed_ref_updates
+=
839 btrfs_csum_bytes_to_leaves(trans
->fs_info
,
840 head_ref
->num_bytes
);
842 delayed_refs
->num_heads
++;
843 delayed_refs
->num_heads_ready
++;
844 atomic_inc(&delayed_refs
->num_entries
);
845 trans
->delayed_ref_updates
++;
847 if (qrecord_inserted_ret
)
848 *qrecord_inserted_ret
= qrecord_inserted
;
850 *new_ref_mod
= head_ref
->total_ref_mod
;
856 * init_delayed_ref_common - Initialize the structure which represents a
857 * modification to a an extent.
859 * @fs_info: Internal to the mounted filesystem mount structure.
861 * @ref: The structure which is going to be initialized.
863 * @bytenr: The logical address of the extent for which a modification is
864 * going to be recorded.
866 * @num_bytes: Size of the extent whose modification is being recorded.
868 * @ref_root: The id of the root where this modification has originated, this
869 * can be either one of the well-known metadata trees or the
870 * subvolume id which references this extent.
872 * @action: Can be one of BTRFS_ADD_DELAYED_REF/BTRFS_DROP_DELAYED_REF or
873 * BTRFS_ADD_DELAYED_EXTENT
875 * @ref_type: Holds the type of the extent which is being recorded, can be
876 * one of BTRFS_SHARED_BLOCK_REF_KEY/BTRFS_TREE_BLOCK_REF_KEY
877 * when recording a metadata extent or BTRFS_SHARED_DATA_REF_KEY/
878 * BTRFS_EXTENT_DATA_REF_KEY when recording data extent
880 static void init_delayed_ref_common(struct btrfs_fs_info
*fs_info
,
881 struct btrfs_delayed_ref_node
*ref
,
882 u64 bytenr
, u64 num_bytes
, u64 ref_root
,
883 int action
, u8 ref_type
)
887 if (action
== BTRFS_ADD_DELAYED_EXTENT
)
888 action
= BTRFS_ADD_DELAYED_REF
;
890 if (is_fstree(ref_root
))
891 seq
= atomic64_read(&fs_info
->tree_mod_seq
);
893 refcount_set(&ref
->refs
, 1);
894 ref
->bytenr
= bytenr
;
895 ref
->num_bytes
= num_bytes
;
897 ref
->action
= action
;
901 ref
->type
= ref_type
;
902 RB_CLEAR_NODE(&ref
->ref_node
);
903 INIT_LIST_HEAD(&ref
->add_list
);
907 * add a delayed tree ref. This does all of the accounting required
908 * to make sure the delayed ref is eventually processed before this
909 * transaction commits.
911 int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle
*trans
,
912 struct btrfs_ref
*generic_ref
,
913 struct btrfs_delayed_extent_op
*extent_op
,
914 int *old_ref_mod
, int *new_ref_mod
)
916 struct btrfs_fs_info
*fs_info
= trans
->fs_info
;
917 struct btrfs_delayed_tree_ref
*ref
;
918 struct btrfs_delayed_ref_head
*head_ref
;
919 struct btrfs_delayed_ref_root
*delayed_refs
;
920 struct btrfs_qgroup_extent_record
*record
= NULL
;
921 int qrecord_inserted
;
923 int action
= generic_ref
->action
;
924 int level
= generic_ref
->tree_ref
.level
;
926 u64 bytenr
= generic_ref
->bytenr
;
927 u64 num_bytes
= generic_ref
->len
;
928 u64 parent
= generic_ref
->parent
;
931 is_system
= (generic_ref
->real_root
== BTRFS_CHUNK_TREE_OBJECTID
);
933 ASSERT(generic_ref
->type
== BTRFS_REF_METADATA
&& generic_ref
->action
);
934 BUG_ON(extent_op
&& extent_op
->is_data
);
935 ref
= kmem_cache_alloc(btrfs_delayed_tree_ref_cachep
, GFP_NOFS
);
939 head_ref
= kmem_cache_alloc(btrfs_delayed_ref_head_cachep
, GFP_NOFS
);
941 kmem_cache_free(btrfs_delayed_tree_ref_cachep
, ref
);
945 if (test_bit(BTRFS_FS_QUOTA_ENABLED
, &fs_info
->flags
) &&
946 is_fstree(generic_ref
->real_root
) &&
947 is_fstree(generic_ref
->tree_ref
.root
) &&
948 !generic_ref
->skip_qgroup
) {
949 record
= kzalloc(sizeof(*record
), GFP_NOFS
);
951 kmem_cache_free(btrfs_delayed_tree_ref_cachep
, ref
);
952 kmem_cache_free(btrfs_delayed_ref_head_cachep
, head_ref
);
958 ref_type
= BTRFS_SHARED_BLOCK_REF_KEY
;
960 ref_type
= BTRFS_TREE_BLOCK_REF_KEY
;
962 init_delayed_ref_common(fs_info
, &ref
->node
, bytenr
, num_bytes
,
963 generic_ref
->tree_ref
.root
, action
, ref_type
);
964 ref
->root
= generic_ref
->tree_ref
.root
;
965 ref
->parent
= parent
;
968 init_delayed_ref_head(head_ref
, record
, bytenr
, num_bytes
,
969 generic_ref
->tree_ref
.root
, 0, action
, false,
971 head_ref
->extent_op
= extent_op
;
973 delayed_refs
= &trans
->transaction
->delayed_refs
;
974 spin_lock(&delayed_refs
->lock
);
977 * insert both the head node and the new ref without dropping
980 head_ref
= add_delayed_ref_head(trans
, head_ref
, record
,
981 action
, &qrecord_inserted
,
982 old_ref_mod
, new_ref_mod
);
984 ret
= insert_delayed_ref(trans
, delayed_refs
, head_ref
, &ref
->node
);
985 spin_unlock(&delayed_refs
->lock
);
988 * Need to update the delayed_refs_rsv with any changes we may have
991 btrfs_update_delayed_refs_rsv(trans
);
993 trace_add_delayed_tree_ref(fs_info
, &ref
->node
, ref
,
994 action
== BTRFS_ADD_DELAYED_EXTENT
?
995 BTRFS_ADD_DELAYED_REF
: action
);
997 kmem_cache_free(btrfs_delayed_tree_ref_cachep
, ref
);
999 if (qrecord_inserted
)
1000 btrfs_qgroup_trace_extent_post(fs_info
, record
);
1006 * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
1008 int btrfs_add_delayed_data_ref(struct btrfs_trans_handle
*trans
,
1009 struct btrfs_ref
*generic_ref
,
1010 u64 reserved
, int *old_ref_mod
,
1013 struct btrfs_fs_info
*fs_info
= trans
->fs_info
;
1014 struct btrfs_delayed_data_ref
*ref
;
1015 struct btrfs_delayed_ref_head
*head_ref
;
1016 struct btrfs_delayed_ref_root
*delayed_refs
;
1017 struct btrfs_qgroup_extent_record
*record
= NULL
;
1018 int qrecord_inserted
;
1019 int action
= generic_ref
->action
;
1021 u64 bytenr
= generic_ref
->bytenr
;
1022 u64 num_bytes
= generic_ref
->len
;
1023 u64 parent
= generic_ref
->parent
;
1024 u64 ref_root
= generic_ref
->data_ref
.ref_root
;
1025 u64 owner
= generic_ref
->data_ref
.ino
;
1026 u64 offset
= generic_ref
->data_ref
.offset
;
1029 ASSERT(generic_ref
->type
== BTRFS_REF_DATA
&& action
);
1030 ref
= kmem_cache_alloc(btrfs_delayed_data_ref_cachep
, GFP_NOFS
);
1035 ref_type
= BTRFS_SHARED_DATA_REF_KEY
;
1037 ref_type
= BTRFS_EXTENT_DATA_REF_KEY
;
1038 init_delayed_ref_common(fs_info
, &ref
->node
, bytenr
, num_bytes
,
1039 ref_root
, action
, ref_type
);
1040 ref
->root
= ref_root
;
1041 ref
->parent
= parent
;
1042 ref
->objectid
= owner
;
1043 ref
->offset
= offset
;
1046 head_ref
= kmem_cache_alloc(btrfs_delayed_ref_head_cachep
, GFP_NOFS
);
1048 kmem_cache_free(btrfs_delayed_data_ref_cachep
, ref
);
1052 if (test_bit(BTRFS_FS_QUOTA_ENABLED
, &fs_info
->flags
) &&
1053 is_fstree(ref_root
) &&
1054 is_fstree(generic_ref
->real_root
) &&
1055 !generic_ref
->skip_qgroup
) {
1056 record
= kzalloc(sizeof(*record
), GFP_NOFS
);
1058 kmem_cache_free(btrfs_delayed_data_ref_cachep
, ref
);
1059 kmem_cache_free(btrfs_delayed_ref_head_cachep
,
1065 init_delayed_ref_head(head_ref
, record
, bytenr
, num_bytes
, ref_root
,
1066 reserved
, action
, true, false);
1067 head_ref
->extent_op
= NULL
;
1069 delayed_refs
= &trans
->transaction
->delayed_refs
;
1070 spin_lock(&delayed_refs
->lock
);
1073 * insert both the head node and the new ref without dropping
1076 head_ref
= add_delayed_ref_head(trans
, head_ref
, record
,
1077 action
, &qrecord_inserted
,
1078 old_ref_mod
, new_ref_mod
);
1080 ret
= insert_delayed_ref(trans
, delayed_refs
, head_ref
, &ref
->node
);
1081 spin_unlock(&delayed_refs
->lock
);
1084 * Need to update the delayed_refs_rsv with any changes we may have
1087 btrfs_update_delayed_refs_rsv(trans
);
1089 trace_add_delayed_data_ref(trans
->fs_info
, &ref
->node
, ref
,
1090 action
== BTRFS_ADD_DELAYED_EXTENT
?
1091 BTRFS_ADD_DELAYED_REF
: action
);
1093 kmem_cache_free(btrfs_delayed_data_ref_cachep
, ref
);
1096 if (qrecord_inserted
)
1097 return btrfs_qgroup_trace_extent_post(fs_info
, record
);
1101 int btrfs_add_delayed_extent_op(struct btrfs_trans_handle
*trans
,
1102 u64 bytenr
, u64 num_bytes
,
1103 struct btrfs_delayed_extent_op
*extent_op
)
1105 struct btrfs_delayed_ref_head
*head_ref
;
1106 struct btrfs_delayed_ref_root
*delayed_refs
;
1108 head_ref
= kmem_cache_alloc(btrfs_delayed_ref_head_cachep
, GFP_NOFS
);
1112 init_delayed_ref_head(head_ref
, NULL
, bytenr
, num_bytes
, 0, 0,
1113 BTRFS_UPDATE_DELAYED_HEAD
, extent_op
->is_data
,
1115 head_ref
->extent_op
= extent_op
;
1117 delayed_refs
= &trans
->transaction
->delayed_refs
;
1118 spin_lock(&delayed_refs
->lock
);
1120 add_delayed_ref_head(trans
, head_ref
, NULL
, BTRFS_UPDATE_DELAYED_HEAD
,
1123 spin_unlock(&delayed_refs
->lock
);
1126 * Need to update the delayed_refs_rsv with any changes we may have
1129 btrfs_update_delayed_refs_rsv(trans
);
1134 * This does a simple search for the head node for a given extent. Returns the
1135 * head node if found, or NULL if not.
1137 struct btrfs_delayed_ref_head
*
1138 btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root
*delayed_refs
, u64 bytenr
)
1140 lockdep_assert_held(&delayed_refs
->lock
);
1142 return find_ref_head(delayed_refs
, bytenr
, false);
1145 void __cold
btrfs_delayed_ref_exit(void)
1147 kmem_cache_destroy(btrfs_delayed_ref_head_cachep
);
1148 kmem_cache_destroy(btrfs_delayed_tree_ref_cachep
);
1149 kmem_cache_destroy(btrfs_delayed_data_ref_cachep
);
1150 kmem_cache_destroy(btrfs_delayed_extent_op_cachep
);
1153 int __init
btrfs_delayed_ref_init(void)
1155 btrfs_delayed_ref_head_cachep
= kmem_cache_create(
1156 "btrfs_delayed_ref_head",
1157 sizeof(struct btrfs_delayed_ref_head
), 0,
1158 SLAB_MEM_SPREAD
, NULL
);
1159 if (!btrfs_delayed_ref_head_cachep
)
1162 btrfs_delayed_tree_ref_cachep
= kmem_cache_create(
1163 "btrfs_delayed_tree_ref",
1164 sizeof(struct btrfs_delayed_tree_ref
), 0,
1165 SLAB_MEM_SPREAD
, NULL
);
1166 if (!btrfs_delayed_tree_ref_cachep
)
1169 btrfs_delayed_data_ref_cachep
= kmem_cache_create(
1170 "btrfs_delayed_data_ref",
1171 sizeof(struct btrfs_delayed_data_ref
), 0,
1172 SLAB_MEM_SPREAD
, NULL
);
1173 if (!btrfs_delayed_data_ref_cachep
)
1176 btrfs_delayed_extent_op_cachep
= kmem_cache_create(
1177 "btrfs_delayed_extent_op",
1178 sizeof(struct btrfs_delayed_extent_op
), 0,
1179 SLAB_MEM_SPREAD
, NULL
);
1180 if (!btrfs_delayed_extent_op_cachep
)
1185 btrfs_delayed_ref_exit();