bpf: Prevent memory disambiguation attack
[linux/fpc-iii.git] / fs / btrfs / delayed-ref.c
blob1e9a20a4c06c9f90c02b0abf7ff70ec3978e8fc7
1 /*
2 * Copyright (C) 2009 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <linux/sched.h>
20 #include <linux/slab.h>
21 #include <linux/sort.h>
22 #include "ctree.h"
23 #include "delayed-ref.h"
24 #include "transaction.h"
25 #include "qgroup.h"
27 struct kmem_cache *btrfs_delayed_ref_head_cachep;
28 struct kmem_cache *btrfs_delayed_tree_ref_cachep;
29 struct kmem_cache *btrfs_delayed_data_ref_cachep;
30 struct kmem_cache *btrfs_delayed_extent_op_cachep;
32 * delayed back reference update tracking. For subvolume trees
33 * we queue up extent allocations and backref maintenance for
34 * delayed processing. This avoids deep call chains where we
35 * add extents in the middle of btrfs_search_slot, and it allows
36 * us to buffer up frequently modified backrefs in an rb tree instead
37 * of hammering updates on the extent allocation tree.
41 * compare two delayed tree backrefs with same bytenr and type
43 static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref1,
44 struct btrfs_delayed_tree_ref *ref2)
46 if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) {
47 if (ref1->root < ref2->root)
48 return -1;
49 if (ref1->root > ref2->root)
50 return 1;
51 } else {
52 if (ref1->parent < ref2->parent)
53 return -1;
54 if (ref1->parent > ref2->parent)
55 return 1;
57 return 0;
61 * compare two delayed data backrefs with same bytenr and type
63 static int comp_data_refs(struct btrfs_delayed_data_ref *ref1,
64 struct btrfs_delayed_data_ref *ref2)
66 if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) {
67 if (ref1->root < ref2->root)
68 return -1;
69 if (ref1->root > ref2->root)
70 return 1;
71 if (ref1->objectid < ref2->objectid)
72 return -1;
73 if (ref1->objectid > ref2->objectid)
74 return 1;
75 if (ref1->offset < ref2->offset)
76 return -1;
77 if (ref1->offset > ref2->offset)
78 return 1;
79 } else {
80 if (ref1->parent < ref2->parent)
81 return -1;
82 if (ref1->parent > ref2->parent)
83 return 1;
85 return 0;
88 static int comp_refs(struct btrfs_delayed_ref_node *ref1,
89 struct btrfs_delayed_ref_node *ref2,
90 bool check_seq)
92 int ret = 0;
94 if (ref1->type < ref2->type)
95 return -1;
96 if (ref1->type > ref2->type)
97 return 1;
98 if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
99 ref1->type == BTRFS_SHARED_BLOCK_REF_KEY)
100 ret = comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref1),
101 btrfs_delayed_node_to_tree_ref(ref2));
102 else
103 ret = comp_data_refs(btrfs_delayed_node_to_data_ref(ref1),
104 btrfs_delayed_node_to_data_ref(ref2));
105 if (ret)
106 return ret;
107 if (check_seq) {
108 if (ref1->seq < ref2->seq)
109 return -1;
110 if (ref1->seq > ref2->seq)
111 return 1;
113 return 0;
116 /* insert a new ref to head ref rbtree */
117 static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root,
118 struct rb_node *node)
120 struct rb_node **p = &root->rb_node;
121 struct rb_node *parent_node = NULL;
122 struct btrfs_delayed_ref_head *entry;
123 struct btrfs_delayed_ref_head *ins;
124 u64 bytenr;
126 ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node);
127 bytenr = ins->bytenr;
128 while (*p) {
129 parent_node = *p;
130 entry = rb_entry(parent_node, struct btrfs_delayed_ref_head,
131 href_node);
133 if (bytenr < entry->bytenr)
134 p = &(*p)->rb_left;
135 else if (bytenr > entry->bytenr)
136 p = &(*p)->rb_right;
137 else
138 return entry;
141 rb_link_node(node, parent_node, p);
142 rb_insert_color(node, root);
143 return NULL;
146 static struct btrfs_delayed_ref_node* tree_insert(struct rb_root *root,
147 struct btrfs_delayed_ref_node *ins)
149 struct rb_node **p = &root->rb_node;
150 struct rb_node *node = &ins->ref_node;
151 struct rb_node *parent_node = NULL;
152 struct btrfs_delayed_ref_node *entry;
154 while (*p) {
155 int comp;
157 parent_node = *p;
158 entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
159 ref_node);
160 comp = comp_refs(ins, entry, true);
161 if (comp < 0)
162 p = &(*p)->rb_left;
163 else if (comp > 0)
164 p = &(*p)->rb_right;
165 else
166 return entry;
169 rb_link_node(node, parent_node, p);
170 rb_insert_color(node, root);
171 return NULL;
175 * find an head entry based on bytenr. This returns the delayed ref
176 * head if it was able to find one, or NULL if nothing was in that spot.
177 * If return_bigger is given, the next bigger entry is returned if no exact
178 * match is found.
180 static struct btrfs_delayed_ref_head *
181 find_ref_head(struct rb_root *root, u64 bytenr,
182 int return_bigger)
184 struct rb_node *n;
185 struct btrfs_delayed_ref_head *entry;
187 n = root->rb_node;
188 entry = NULL;
189 while (n) {
190 entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
192 if (bytenr < entry->bytenr)
193 n = n->rb_left;
194 else if (bytenr > entry->bytenr)
195 n = n->rb_right;
196 else
197 return entry;
199 if (entry && return_bigger) {
200 if (bytenr > entry->bytenr) {
201 n = rb_next(&entry->href_node);
202 if (!n)
203 n = rb_first(root);
204 entry = rb_entry(n, struct btrfs_delayed_ref_head,
205 href_node);
206 return entry;
208 return entry;
210 return NULL;
213 int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
214 struct btrfs_delayed_ref_head *head)
216 struct btrfs_delayed_ref_root *delayed_refs;
218 delayed_refs = &trans->transaction->delayed_refs;
219 assert_spin_locked(&delayed_refs->lock);
220 if (mutex_trylock(&head->mutex))
221 return 0;
223 refcount_inc(&head->refs);
224 spin_unlock(&delayed_refs->lock);
226 mutex_lock(&head->mutex);
227 spin_lock(&delayed_refs->lock);
228 if (RB_EMPTY_NODE(&head->href_node)) {
229 mutex_unlock(&head->mutex);
230 btrfs_put_delayed_ref_head(head);
231 return -EAGAIN;
233 btrfs_put_delayed_ref_head(head);
234 return 0;
237 static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
238 struct btrfs_delayed_ref_root *delayed_refs,
239 struct btrfs_delayed_ref_head *head,
240 struct btrfs_delayed_ref_node *ref)
242 assert_spin_locked(&head->lock);
243 rb_erase(&ref->ref_node, &head->ref_tree);
244 RB_CLEAR_NODE(&ref->ref_node);
245 if (!list_empty(&ref->add_list))
246 list_del(&ref->add_list);
247 ref->in_tree = 0;
248 btrfs_put_delayed_ref(ref);
249 atomic_dec(&delayed_refs->num_entries);
250 if (trans->delayed_ref_updates)
251 trans->delayed_ref_updates--;
254 static bool merge_ref(struct btrfs_trans_handle *trans,
255 struct btrfs_delayed_ref_root *delayed_refs,
256 struct btrfs_delayed_ref_head *head,
257 struct btrfs_delayed_ref_node *ref,
258 u64 seq)
260 struct btrfs_delayed_ref_node *next;
261 struct rb_node *node = rb_next(&ref->ref_node);
262 bool done = false;
264 while (!done && node) {
265 int mod;
267 next = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
268 node = rb_next(node);
269 if (seq && next->seq >= seq)
270 break;
271 if (comp_refs(ref, next, false))
272 break;
274 if (ref->action == next->action) {
275 mod = next->ref_mod;
276 } else {
277 if (ref->ref_mod < next->ref_mod) {
278 swap(ref, next);
279 done = true;
281 mod = -next->ref_mod;
284 drop_delayed_ref(trans, delayed_refs, head, next);
285 ref->ref_mod += mod;
286 if (ref->ref_mod == 0) {
287 drop_delayed_ref(trans, delayed_refs, head, ref);
288 done = true;
289 } else {
291 * Can't have multiples of the same ref on a tree block.
293 WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
294 ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
298 return done;
301 void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
302 struct btrfs_fs_info *fs_info,
303 struct btrfs_delayed_ref_root *delayed_refs,
304 struct btrfs_delayed_ref_head *head)
306 struct btrfs_delayed_ref_node *ref;
307 struct rb_node *node;
308 u64 seq = 0;
310 assert_spin_locked(&head->lock);
312 if (RB_EMPTY_ROOT(&head->ref_tree))
313 return;
315 /* We don't have too many refs to merge for data. */
316 if (head->is_data)
317 return;
319 spin_lock(&fs_info->tree_mod_seq_lock);
320 if (!list_empty(&fs_info->tree_mod_seq_list)) {
321 struct seq_list *elem;
323 elem = list_first_entry(&fs_info->tree_mod_seq_list,
324 struct seq_list, list);
325 seq = elem->seq;
327 spin_unlock(&fs_info->tree_mod_seq_lock);
329 again:
330 for (node = rb_first(&head->ref_tree); node; node = rb_next(node)) {
331 ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
332 if (seq && ref->seq >= seq)
333 continue;
334 if (merge_ref(trans, delayed_refs, head, ref, seq))
335 goto again;
339 int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
340 struct btrfs_delayed_ref_root *delayed_refs,
341 u64 seq)
343 struct seq_list *elem;
344 int ret = 0;
346 spin_lock(&fs_info->tree_mod_seq_lock);
347 if (!list_empty(&fs_info->tree_mod_seq_list)) {
348 elem = list_first_entry(&fs_info->tree_mod_seq_list,
349 struct seq_list, list);
350 if (seq >= elem->seq) {
351 btrfs_debug(fs_info,
352 "holding back delayed_ref %#x.%x, lowest is %#x.%x (%p)",
353 (u32)(seq >> 32), (u32)seq,
354 (u32)(elem->seq >> 32), (u32)elem->seq,
355 delayed_refs);
356 ret = 1;
360 spin_unlock(&fs_info->tree_mod_seq_lock);
361 return ret;
364 struct btrfs_delayed_ref_head *
365 btrfs_select_ref_head(struct btrfs_trans_handle *trans)
367 struct btrfs_delayed_ref_root *delayed_refs;
368 struct btrfs_delayed_ref_head *head;
369 u64 start;
370 bool loop = false;
372 delayed_refs = &trans->transaction->delayed_refs;
374 again:
375 start = delayed_refs->run_delayed_start;
376 head = find_ref_head(&delayed_refs->href_root, start, 1);
377 if (!head && !loop) {
378 delayed_refs->run_delayed_start = 0;
379 start = 0;
380 loop = true;
381 head = find_ref_head(&delayed_refs->href_root, start, 1);
382 if (!head)
383 return NULL;
384 } else if (!head && loop) {
385 return NULL;
388 while (head->processing) {
389 struct rb_node *node;
391 node = rb_next(&head->href_node);
392 if (!node) {
393 if (loop)
394 return NULL;
395 delayed_refs->run_delayed_start = 0;
396 start = 0;
397 loop = true;
398 goto again;
400 head = rb_entry(node, struct btrfs_delayed_ref_head,
401 href_node);
404 head->processing = 1;
405 WARN_ON(delayed_refs->num_heads_ready == 0);
406 delayed_refs->num_heads_ready--;
407 delayed_refs->run_delayed_start = head->bytenr +
408 head->num_bytes;
409 return head;
413 * Helper to insert the ref_node to the tail or merge with tail.
415 * Return 0 for insert.
416 * Return >0 for merge.
418 static int insert_delayed_ref(struct btrfs_trans_handle *trans,
419 struct btrfs_delayed_ref_root *root,
420 struct btrfs_delayed_ref_head *href,
421 struct btrfs_delayed_ref_node *ref)
423 struct btrfs_delayed_ref_node *exist;
424 int mod;
425 int ret = 0;
427 spin_lock(&href->lock);
428 exist = tree_insert(&href->ref_tree, ref);
429 if (!exist)
430 goto inserted;
432 /* Now we are sure we can merge */
433 ret = 1;
434 if (exist->action == ref->action) {
435 mod = ref->ref_mod;
436 } else {
437 /* Need to change action */
438 if (exist->ref_mod < ref->ref_mod) {
439 exist->action = ref->action;
440 mod = -exist->ref_mod;
441 exist->ref_mod = ref->ref_mod;
442 if (ref->action == BTRFS_ADD_DELAYED_REF)
443 list_add_tail(&exist->add_list,
444 &href->ref_add_list);
445 else if (ref->action == BTRFS_DROP_DELAYED_REF) {
446 ASSERT(!list_empty(&exist->add_list));
447 list_del(&exist->add_list);
448 } else {
449 ASSERT(0);
451 } else
452 mod = -ref->ref_mod;
454 exist->ref_mod += mod;
456 /* remove existing tail if its ref_mod is zero */
457 if (exist->ref_mod == 0)
458 drop_delayed_ref(trans, root, href, exist);
459 spin_unlock(&href->lock);
460 return ret;
461 inserted:
462 if (ref->action == BTRFS_ADD_DELAYED_REF)
463 list_add_tail(&ref->add_list, &href->ref_add_list);
464 atomic_inc(&root->num_entries);
465 trans->delayed_ref_updates++;
466 spin_unlock(&href->lock);
467 return ret;
471 * helper function to update the accounting in the head ref
472 * existing and update must have the same bytenr
474 static noinline void
475 update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
476 struct btrfs_delayed_ref_head *existing,
477 struct btrfs_delayed_ref_head *update,
478 int *old_ref_mod_ret)
480 int old_ref_mod;
482 BUG_ON(existing->is_data != update->is_data);
484 spin_lock(&existing->lock);
485 if (update->must_insert_reserved) {
486 /* if the extent was freed and then
487 * reallocated before the delayed ref
488 * entries were processed, we can end up
489 * with an existing head ref without
490 * the must_insert_reserved flag set.
491 * Set it again here
493 existing->must_insert_reserved = update->must_insert_reserved;
496 * update the num_bytes so we make sure the accounting
497 * is done correctly
499 existing->num_bytes = update->num_bytes;
503 if (update->extent_op) {
504 if (!existing->extent_op) {
505 existing->extent_op = update->extent_op;
506 } else {
507 if (update->extent_op->update_key) {
508 memcpy(&existing->extent_op->key,
509 &update->extent_op->key,
510 sizeof(update->extent_op->key));
511 existing->extent_op->update_key = true;
513 if (update->extent_op->update_flags) {
514 existing->extent_op->flags_to_set |=
515 update->extent_op->flags_to_set;
516 existing->extent_op->update_flags = true;
518 btrfs_free_delayed_extent_op(update->extent_op);
522 * update the reference mod on the head to reflect this new operation,
523 * only need the lock for this case cause we could be processing it
524 * currently, for refs we just added we know we're a-ok.
526 old_ref_mod = existing->total_ref_mod;
527 if (old_ref_mod_ret)
528 *old_ref_mod_ret = old_ref_mod;
529 existing->ref_mod += update->ref_mod;
530 existing->total_ref_mod += update->ref_mod;
533 * If we are going to from a positive ref mod to a negative or vice
534 * versa we need to make sure to adjust pending_csums accordingly.
536 if (existing->is_data) {
537 if (existing->total_ref_mod >= 0 && old_ref_mod < 0)
538 delayed_refs->pending_csums -= existing->num_bytes;
539 if (existing->total_ref_mod < 0 && old_ref_mod >= 0)
540 delayed_refs->pending_csums += existing->num_bytes;
542 spin_unlock(&existing->lock);
546 * helper function to actually insert a head node into the rbtree.
547 * this does all the dirty work in terms of maintaining the correct
548 * overall modification count.
550 static noinline struct btrfs_delayed_ref_head *
551 add_delayed_ref_head(struct btrfs_fs_info *fs_info,
552 struct btrfs_trans_handle *trans,
553 struct btrfs_delayed_ref_head *head_ref,
554 struct btrfs_qgroup_extent_record *qrecord,
555 u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved,
556 int action, int is_data, int is_system,
557 int *qrecord_inserted_ret,
558 int *old_ref_mod, int *new_ref_mod)
561 struct btrfs_delayed_ref_head *existing;
562 struct btrfs_delayed_ref_root *delayed_refs;
563 int count_mod = 1;
564 int must_insert_reserved = 0;
565 int qrecord_inserted = 0;
567 /* If reserved is provided, it must be a data extent. */
568 BUG_ON(!is_data && reserved);
571 * the head node stores the sum of all the mods, so dropping a ref
572 * should drop the sum in the head node by one.
574 if (action == BTRFS_UPDATE_DELAYED_HEAD)
575 count_mod = 0;
576 else if (action == BTRFS_DROP_DELAYED_REF)
577 count_mod = -1;
580 * BTRFS_ADD_DELAYED_EXTENT means that we need to update
581 * the reserved accounting when the extent is finally added, or
582 * if a later modification deletes the delayed ref without ever
583 * inserting the extent into the extent allocation tree.
584 * ref->must_insert_reserved is the flag used to record
585 * that accounting mods are required.
587 * Once we record must_insert_reserved, switch the action to
588 * BTRFS_ADD_DELAYED_REF because other special casing is not required.
590 if (action == BTRFS_ADD_DELAYED_EXTENT)
591 must_insert_reserved = 1;
592 else
593 must_insert_reserved = 0;
595 delayed_refs = &trans->transaction->delayed_refs;
597 refcount_set(&head_ref->refs, 1);
598 head_ref->bytenr = bytenr;
599 head_ref->num_bytes = num_bytes;
600 head_ref->ref_mod = count_mod;
601 head_ref->must_insert_reserved = must_insert_reserved;
602 head_ref->is_data = is_data;
603 head_ref->is_system = is_system;
604 head_ref->ref_tree = RB_ROOT;
605 INIT_LIST_HEAD(&head_ref->ref_add_list);
606 RB_CLEAR_NODE(&head_ref->href_node);
607 head_ref->processing = 0;
608 head_ref->total_ref_mod = count_mod;
609 head_ref->qgroup_reserved = 0;
610 head_ref->qgroup_ref_root = 0;
611 spin_lock_init(&head_ref->lock);
612 mutex_init(&head_ref->mutex);
614 /* Record qgroup extent info if provided */
615 if (qrecord) {
616 if (ref_root && reserved) {
617 head_ref->qgroup_ref_root = ref_root;
618 head_ref->qgroup_reserved = reserved;
621 qrecord->bytenr = bytenr;
622 qrecord->num_bytes = num_bytes;
623 qrecord->old_roots = NULL;
625 if(btrfs_qgroup_trace_extent_nolock(fs_info,
626 delayed_refs, qrecord))
627 kfree(qrecord);
628 else
629 qrecord_inserted = 1;
632 trace_add_delayed_ref_head(fs_info, head_ref, action);
634 existing = htree_insert(&delayed_refs->href_root,
635 &head_ref->href_node);
636 if (existing) {
637 WARN_ON(ref_root && reserved && existing->qgroup_ref_root
638 && existing->qgroup_reserved);
639 update_existing_head_ref(delayed_refs, existing, head_ref,
640 old_ref_mod);
642 * we've updated the existing ref, free the newly
643 * allocated ref
645 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
646 head_ref = existing;
647 } else {
648 if (old_ref_mod)
649 *old_ref_mod = 0;
650 if (is_data && count_mod < 0)
651 delayed_refs->pending_csums += num_bytes;
652 delayed_refs->num_heads++;
653 delayed_refs->num_heads_ready++;
654 atomic_inc(&delayed_refs->num_entries);
655 trans->delayed_ref_updates++;
657 if (qrecord_inserted_ret)
658 *qrecord_inserted_ret = qrecord_inserted;
659 if (new_ref_mod)
660 *new_ref_mod = head_ref->total_ref_mod;
661 return head_ref;
665 * helper to insert a delayed tree ref into the rbtree.
667 static noinline void
668 add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
669 struct btrfs_trans_handle *trans,
670 struct btrfs_delayed_ref_head *head_ref,
671 struct btrfs_delayed_ref_node *ref, u64 bytenr,
672 u64 num_bytes, u64 parent, u64 ref_root, int level,
673 int action)
675 struct btrfs_delayed_tree_ref *full_ref;
676 struct btrfs_delayed_ref_root *delayed_refs;
677 u64 seq = 0;
678 int ret;
680 if (action == BTRFS_ADD_DELAYED_EXTENT)
681 action = BTRFS_ADD_DELAYED_REF;
683 if (is_fstree(ref_root))
684 seq = atomic64_read(&fs_info->tree_mod_seq);
685 delayed_refs = &trans->transaction->delayed_refs;
687 /* first set the basic ref node struct up */
688 refcount_set(&ref->refs, 1);
689 ref->bytenr = bytenr;
690 ref->num_bytes = num_bytes;
691 ref->ref_mod = 1;
692 ref->action = action;
693 ref->is_head = 0;
694 ref->in_tree = 1;
695 ref->seq = seq;
696 RB_CLEAR_NODE(&ref->ref_node);
697 INIT_LIST_HEAD(&ref->add_list);
699 full_ref = btrfs_delayed_node_to_tree_ref(ref);
700 full_ref->parent = parent;
701 full_ref->root = ref_root;
702 if (parent)
703 ref->type = BTRFS_SHARED_BLOCK_REF_KEY;
704 else
705 ref->type = BTRFS_TREE_BLOCK_REF_KEY;
706 full_ref->level = level;
708 trace_add_delayed_tree_ref(fs_info, ref, full_ref, action);
710 ret = insert_delayed_ref(trans, delayed_refs, head_ref, ref);
713 * XXX: memory should be freed at the same level allocated.
714 * But bad practice is anywhere... Follow it now. Need cleanup.
716 if (ret > 0)
717 kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref);
721 * helper to insert a delayed data ref into the rbtree.
723 static noinline void
724 add_delayed_data_ref(struct btrfs_fs_info *fs_info,
725 struct btrfs_trans_handle *trans,
726 struct btrfs_delayed_ref_head *head_ref,
727 struct btrfs_delayed_ref_node *ref, u64 bytenr,
728 u64 num_bytes, u64 parent, u64 ref_root, u64 owner,
729 u64 offset, int action)
731 struct btrfs_delayed_data_ref *full_ref;
732 struct btrfs_delayed_ref_root *delayed_refs;
733 u64 seq = 0;
734 int ret;
736 if (action == BTRFS_ADD_DELAYED_EXTENT)
737 action = BTRFS_ADD_DELAYED_REF;
739 delayed_refs = &trans->transaction->delayed_refs;
741 if (is_fstree(ref_root))
742 seq = atomic64_read(&fs_info->tree_mod_seq);
744 /* first set the basic ref node struct up */
745 refcount_set(&ref->refs, 1);
746 ref->bytenr = bytenr;
747 ref->num_bytes = num_bytes;
748 ref->ref_mod = 1;
749 ref->action = action;
750 ref->is_head = 0;
751 ref->in_tree = 1;
752 ref->seq = seq;
753 RB_CLEAR_NODE(&ref->ref_node);
754 INIT_LIST_HEAD(&ref->add_list);
756 full_ref = btrfs_delayed_node_to_data_ref(ref);
757 full_ref->parent = parent;
758 full_ref->root = ref_root;
759 if (parent)
760 ref->type = BTRFS_SHARED_DATA_REF_KEY;
761 else
762 ref->type = BTRFS_EXTENT_DATA_REF_KEY;
764 full_ref->objectid = owner;
765 full_ref->offset = offset;
767 trace_add_delayed_data_ref(fs_info, ref, full_ref, action);
769 ret = insert_delayed_ref(trans, delayed_refs, head_ref, ref);
770 if (ret > 0)
771 kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
775 * add a delayed tree ref. This does all of the accounting required
776 * to make sure the delayed ref is eventually processed before this
777 * transaction commits.
779 int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
780 struct btrfs_trans_handle *trans,
781 u64 bytenr, u64 num_bytes, u64 parent,
782 u64 ref_root, int level, int action,
783 struct btrfs_delayed_extent_op *extent_op,
784 int *old_ref_mod, int *new_ref_mod)
786 struct btrfs_delayed_tree_ref *ref;
787 struct btrfs_delayed_ref_head *head_ref;
788 struct btrfs_delayed_ref_root *delayed_refs;
789 struct btrfs_qgroup_extent_record *record = NULL;
790 int qrecord_inserted;
791 int is_system = (ref_root == BTRFS_CHUNK_TREE_OBJECTID);
793 BUG_ON(extent_op && extent_op->is_data);
794 ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
795 if (!ref)
796 return -ENOMEM;
798 head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
799 if (!head_ref)
800 goto free_ref;
802 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
803 is_fstree(ref_root)) {
804 record = kmalloc(sizeof(*record), GFP_NOFS);
805 if (!record)
806 goto free_head_ref;
809 head_ref->extent_op = extent_op;
811 delayed_refs = &trans->transaction->delayed_refs;
812 spin_lock(&delayed_refs->lock);
815 * insert both the head node and the new ref without dropping
816 * the spin lock
818 head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
819 bytenr, num_bytes, 0, 0, action, 0,
820 is_system, &qrecord_inserted,
821 old_ref_mod, new_ref_mod);
823 add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
824 num_bytes, parent, ref_root, level, action);
825 spin_unlock(&delayed_refs->lock);
827 if (qrecord_inserted)
828 btrfs_qgroup_trace_extent_post(fs_info, record);
830 return 0;
832 free_head_ref:
833 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
834 free_ref:
835 kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
837 return -ENOMEM;
841 * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
843 int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
844 struct btrfs_trans_handle *trans,
845 u64 bytenr, u64 num_bytes,
846 u64 parent, u64 ref_root,
847 u64 owner, u64 offset, u64 reserved, int action,
848 int *old_ref_mod, int *new_ref_mod)
850 struct btrfs_delayed_data_ref *ref;
851 struct btrfs_delayed_ref_head *head_ref;
852 struct btrfs_delayed_ref_root *delayed_refs;
853 struct btrfs_qgroup_extent_record *record = NULL;
854 int qrecord_inserted;
856 ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
857 if (!ref)
858 return -ENOMEM;
860 head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
861 if (!head_ref) {
862 kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
863 return -ENOMEM;
866 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
867 is_fstree(ref_root)) {
868 record = kmalloc(sizeof(*record), GFP_NOFS);
869 if (!record) {
870 kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
871 kmem_cache_free(btrfs_delayed_ref_head_cachep,
872 head_ref);
873 return -ENOMEM;
877 head_ref->extent_op = NULL;
879 delayed_refs = &trans->transaction->delayed_refs;
880 spin_lock(&delayed_refs->lock);
883 * insert both the head node and the new ref without dropping
884 * the spin lock
886 head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
887 bytenr, num_bytes, ref_root, reserved,
888 action, 1, 0, &qrecord_inserted,
889 old_ref_mod, new_ref_mod);
891 add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
892 num_bytes, parent, ref_root, owner, offset,
893 action);
894 spin_unlock(&delayed_refs->lock);
896 if (qrecord_inserted)
897 return btrfs_qgroup_trace_extent_post(fs_info, record);
898 return 0;
901 int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
902 struct btrfs_trans_handle *trans,
903 u64 bytenr, u64 num_bytes,
904 struct btrfs_delayed_extent_op *extent_op)
906 struct btrfs_delayed_ref_head *head_ref;
907 struct btrfs_delayed_ref_root *delayed_refs;
909 head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
910 if (!head_ref)
911 return -ENOMEM;
913 head_ref->extent_op = extent_op;
915 delayed_refs = &trans->transaction->delayed_refs;
916 spin_lock(&delayed_refs->lock);
919 * extent_ops just modify the flags of an extent and they don't result
920 * in ref count changes, hence it's safe to pass false/0 for is_system
921 * argument
923 add_delayed_ref_head(fs_info, trans, head_ref, NULL, bytenr,
924 num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD,
925 extent_op->is_data, 0, NULL, NULL, NULL);
927 spin_unlock(&delayed_refs->lock);
928 return 0;
932 * this does a simple search for the head node for a given extent.
933 * It must be called with the delayed ref spinlock held, and it returns
934 * the head node if any where found, or NULL if not.
936 struct btrfs_delayed_ref_head *
937 btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 bytenr)
939 return find_ref_head(&delayed_refs->href_root, bytenr, 0);
942 void btrfs_delayed_ref_exit(void)
944 kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
945 kmem_cache_destroy(btrfs_delayed_tree_ref_cachep);
946 kmem_cache_destroy(btrfs_delayed_data_ref_cachep);
947 kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
950 int __init btrfs_delayed_ref_init(void)
952 btrfs_delayed_ref_head_cachep = kmem_cache_create(
953 "btrfs_delayed_ref_head",
954 sizeof(struct btrfs_delayed_ref_head), 0,
955 SLAB_MEM_SPREAD, NULL);
956 if (!btrfs_delayed_ref_head_cachep)
957 goto fail;
959 btrfs_delayed_tree_ref_cachep = kmem_cache_create(
960 "btrfs_delayed_tree_ref",
961 sizeof(struct btrfs_delayed_tree_ref), 0,
962 SLAB_MEM_SPREAD, NULL);
963 if (!btrfs_delayed_tree_ref_cachep)
964 goto fail;
966 btrfs_delayed_data_ref_cachep = kmem_cache_create(
967 "btrfs_delayed_data_ref",
968 sizeof(struct btrfs_delayed_data_ref), 0,
969 SLAB_MEM_SPREAD, NULL);
970 if (!btrfs_delayed_data_ref_cachep)
971 goto fail;
973 btrfs_delayed_extent_op_cachep = kmem_cache_create(
974 "btrfs_delayed_extent_op",
975 sizeof(struct btrfs_delayed_extent_op), 0,
976 SLAB_MEM_SPREAD, NULL);
977 if (!btrfs_delayed_extent_op_cachep)
978 goto fail;
980 return 0;
981 fail:
982 btrfs_delayed_ref_exit();
983 return -ENOMEM;