2 * Copyright (C) 2011 Fujitsu. All rights reserved.
3 * Written by Miao Xie <miaox@cn.fujitsu.com>
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public
7 * License v2 as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public
15 * License along with this program; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 021110-1307, USA.
20 #include <linux/slab.h>
21 #include "delayed-inode.h"
23 #include "transaction.h"
26 #define BTRFS_DELAYED_WRITEBACK 512
27 #define BTRFS_DELAYED_BACKGROUND 128
28 #define BTRFS_DELAYED_BATCH 16
30 static struct kmem_cache
*delayed_node_cache
;
32 int __init
btrfs_delayed_inode_init(void)
34 delayed_node_cache
= kmem_cache_create("btrfs_delayed_node",
35 sizeof(struct btrfs_delayed_node
),
37 SLAB_RECLAIM_ACCOUNT
| SLAB_MEM_SPREAD
,
39 if (!delayed_node_cache
)
44 void btrfs_delayed_inode_exit(void)
46 if (delayed_node_cache
)
47 kmem_cache_destroy(delayed_node_cache
);
50 static inline void btrfs_init_delayed_node(
51 struct btrfs_delayed_node
*delayed_node
,
52 struct btrfs_root
*root
, u64 inode_id
)
54 delayed_node
->root
= root
;
55 delayed_node
->inode_id
= inode_id
;
56 atomic_set(&delayed_node
->refs
, 0);
57 delayed_node
->count
= 0;
58 delayed_node
->flags
= 0;
59 delayed_node
->ins_root
= RB_ROOT
;
60 delayed_node
->del_root
= RB_ROOT
;
61 mutex_init(&delayed_node
->mutex
);
62 delayed_node
->index_cnt
= 0;
63 INIT_LIST_HEAD(&delayed_node
->n_list
);
64 INIT_LIST_HEAD(&delayed_node
->p_list
);
65 delayed_node
->bytes_reserved
= 0;
66 memset(&delayed_node
->inode_item
, 0, sizeof(delayed_node
->inode_item
));
69 static inline int btrfs_is_continuous_delayed_item(
70 struct btrfs_delayed_item
*item1
,
71 struct btrfs_delayed_item
*item2
)
73 if (item1
->key
.type
== BTRFS_DIR_INDEX_KEY
&&
74 item1
->key
.objectid
== item2
->key
.objectid
&&
75 item1
->key
.type
== item2
->key
.type
&&
76 item1
->key
.offset
+ 1 == item2
->key
.offset
)
81 static inline struct btrfs_delayed_root
*btrfs_get_delayed_root(
82 struct btrfs_root
*root
)
84 return root
->fs_info
->delayed_root
;
87 static struct btrfs_delayed_node
*btrfs_get_delayed_node(struct inode
*inode
)
89 struct btrfs_inode
*btrfs_inode
= BTRFS_I(inode
);
90 struct btrfs_root
*root
= btrfs_inode
->root
;
91 u64 ino
= btrfs_ino(inode
);
92 struct btrfs_delayed_node
*node
;
94 node
= ACCESS_ONCE(btrfs_inode
->delayed_node
);
96 atomic_inc(&node
->refs
);
100 spin_lock(&root
->inode_lock
);
101 node
= radix_tree_lookup(&root
->delayed_nodes_tree
, ino
);
103 if (btrfs_inode
->delayed_node
) {
104 atomic_inc(&node
->refs
); /* can be accessed */
105 BUG_ON(btrfs_inode
->delayed_node
!= node
);
106 spin_unlock(&root
->inode_lock
);
109 btrfs_inode
->delayed_node
= node
;
110 /* can be accessed and cached in the inode */
111 atomic_add(2, &node
->refs
);
112 spin_unlock(&root
->inode_lock
);
115 spin_unlock(&root
->inode_lock
);
120 /* Will return either the node or PTR_ERR(-ENOMEM) */
121 static struct btrfs_delayed_node
*btrfs_get_or_create_delayed_node(
124 struct btrfs_delayed_node
*node
;
125 struct btrfs_inode
*btrfs_inode
= BTRFS_I(inode
);
126 struct btrfs_root
*root
= btrfs_inode
->root
;
127 u64 ino
= btrfs_ino(inode
);
131 node
= btrfs_get_delayed_node(inode
);
135 node
= kmem_cache_alloc(delayed_node_cache
, GFP_NOFS
);
137 return ERR_PTR(-ENOMEM
);
138 btrfs_init_delayed_node(node
, root
, ino
);
140 /* cached in the btrfs inode and can be accessed */
141 atomic_add(2, &node
->refs
);
143 ret
= radix_tree_preload(GFP_NOFS
& ~__GFP_HIGHMEM
);
145 kmem_cache_free(delayed_node_cache
, node
);
149 spin_lock(&root
->inode_lock
);
150 ret
= radix_tree_insert(&root
->delayed_nodes_tree
, ino
, node
);
151 if (ret
== -EEXIST
) {
152 spin_unlock(&root
->inode_lock
);
153 kmem_cache_free(delayed_node_cache
, node
);
154 radix_tree_preload_end();
157 btrfs_inode
->delayed_node
= node
;
158 spin_unlock(&root
->inode_lock
);
159 radix_tree_preload_end();
165 * Call it when holding delayed_node->mutex
167 * If mod = 1, add this node into the prepared list.
169 static void btrfs_queue_delayed_node(struct btrfs_delayed_root
*root
,
170 struct btrfs_delayed_node
*node
,
173 spin_lock(&root
->lock
);
174 if (test_bit(BTRFS_DELAYED_NODE_IN_LIST
, &node
->flags
)) {
175 if (!list_empty(&node
->p_list
))
176 list_move_tail(&node
->p_list
, &root
->prepare_list
);
178 list_add_tail(&node
->p_list
, &root
->prepare_list
);
180 list_add_tail(&node
->n_list
, &root
->node_list
);
181 list_add_tail(&node
->p_list
, &root
->prepare_list
);
182 atomic_inc(&node
->refs
); /* inserted into list */
184 set_bit(BTRFS_DELAYED_NODE_IN_LIST
, &node
->flags
);
186 spin_unlock(&root
->lock
);
189 /* Call it when holding delayed_node->mutex */
190 static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root
*root
,
191 struct btrfs_delayed_node
*node
)
193 spin_lock(&root
->lock
);
194 if (test_bit(BTRFS_DELAYED_NODE_IN_LIST
, &node
->flags
)) {
196 atomic_dec(&node
->refs
); /* not in the list */
197 list_del_init(&node
->n_list
);
198 if (!list_empty(&node
->p_list
))
199 list_del_init(&node
->p_list
);
200 clear_bit(BTRFS_DELAYED_NODE_IN_LIST
, &node
->flags
);
202 spin_unlock(&root
->lock
);
205 static struct btrfs_delayed_node
*btrfs_first_delayed_node(
206 struct btrfs_delayed_root
*delayed_root
)
209 struct btrfs_delayed_node
*node
= NULL
;
211 spin_lock(&delayed_root
->lock
);
212 if (list_empty(&delayed_root
->node_list
))
215 p
= delayed_root
->node_list
.next
;
216 node
= list_entry(p
, struct btrfs_delayed_node
, n_list
);
217 atomic_inc(&node
->refs
);
219 spin_unlock(&delayed_root
->lock
);
224 static struct btrfs_delayed_node
*btrfs_next_delayed_node(
225 struct btrfs_delayed_node
*node
)
227 struct btrfs_delayed_root
*delayed_root
;
229 struct btrfs_delayed_node
*next
= NULL
;
231 delayed_root
= node
->root
->fs_info
->delayed_root
;
232 spin_lock(&delayed_root
->lock
);
233 if (!test_bit(BTRFS_DELAYED_NODE_IN_LIST
, &node
->flags
)) {
234 /* not in the list */
235 if (list_empty(&delayed_root
->node_list
))
237 p
= delayed_root
->node_list
.next
;
238 } else if (list_is_last(&node
->n_list
, &delayed_root
->node_list
))
241 p
= node
->n_list
.next
;
243 next
= list_entry(p
, struct btrfs_delayed_node
, n_list
);
244 atomic_inc(&next
->refs
);
246 spin_unlock(&delayed_root
->lock
);
251 static void __btrfs_release_delayed_node(
252 struct btrfs_delayed_node
*delayed_node
,
255 struct btrfs_delayed_root
*delayed_root
;
260 delayed_root
= delayed_node
->root
->fs_info
->delayed_root
;
262 mutex_lock(&delayed_node
->mutex
);
263 if (delayed_node
->count
)
264 btrfs_queue_delayed_node(delayed_root
, delayed_node
, mod
);
266 btrfs_dequeue_delayed_node(delayed_root
, delayed_node
);
267 mutex_unlock(&delayed_node
->mutex
);
269 if (atomic_dec_and_test(&delayed_node
->refs
)) {
271 struct btrfs_root
*root
= delayed_node
->root
;
272 spin_lock(&root
->inode_lock
);
273 if (atomic_read(&delayed_node
->refs
) == 0) {
274 radix_tree_delete(&root
->delayed_nodes_tree
,
275 delayed_node
->inode_id
);
278 spin_unlock(&root
->inode_lock
);
280 kmem_cache_free(delayed_node_cache
, delayed_node
);
284 static inline void btrfs_release_delayed_node(struct btrfs_delayed_node
*node
)
286 __btrfs_release_delayed_node(node
, 0);
289 static struct btrfs_delayed_node
*btrfs_first_prepared_delayed_node(
290 struct btrfs_delayed_root
*delayed_root
)
293 struct btrfs_delayed_node
*node
= NULL
;
295 spin_lock(&delayed_root
->lock
);
296 if (list_empty(&delayed_root
->prepare_list
))
299 p
= delayed_root
->prepare_list
.next
;
301 node
= list_entry(p
, struct btrfs_delayed_node
, p_list
);
302 atomic_inc(&node
->refs
);
304 spin_unlock(&delayed_root
->lock
);
309 static inline void btrfs_release_prepared_delayed_node(
310 struct btrfs_delayed_node
*node
)
312 __btrfs_release_delayed_node(node
, 1);
315 static struct btrfs_delayed_item
*btrfs_alloc_delayed_item(u32 data_len
)
317 struct btrfs_delayed_item
*item
;
318 item
= kmalloc(sizeof(*item
) + data_len
, GFP_NOFS
);
320 item
->data_len
= data_len
;
321 item
->ins_or_del
= 0;
322 item
->bytes_reserved
= 0;
323 item
->delayed_node
= NULL
;
324 atomic_set(&item
->refs
, 1);
330 * __btrfs_lookup_delayed_item - look up the delayed item by key
331 * @delayed_node: pointer to the delayed node
332 * @key: the key to look up
333 * @prev: used to store the prev item if the right item isn't found
334 * @next: used to store the next item if the right item isn't found
336 * Note: if we don't find the right item, we will return the prev item and
339 static struct btrfs_delayed_item
*__btrfs_lookup_delayed_item(
340 struct rb_root
*root
,
341 struct btrfs_key
*key
,
342 struct btrfs_delayed_item
**prev
,
343 struct btrfs_delayed_item
**next
)
345 struct rb_node
*node
, *prev_node
= NULL
;
346 struct btrfs_delayed_item
*delayed_item
= NULL
;
349 node
= root
->rb_node
;
352 delayed_item
= rb_entry(node
, struct btrfs_delayed_item
,
355 ret
= btrfs_comp_cpu_keys(&delayed_item
->key
, key
);
357 node
= node
->rb_right
;
359 node
= node
->rb_left
;
368 *prev
= delayed_item
;
369 else if ((node
= rb_prev(prev_node
)) != NULL
) {
370 *prev
= rb_entry(node
, struct btrfs_delayed_item
,
380 *next
= delayed_item
;
381 else if ((node
= rb_next(prev_node
)) != NULL
) {
382 *next
= rb_entry(node
, struct btrfs_delayed_item
,
390 static struct btrfs_delayed_item
*__btrfs_lookup_delayed_insertion_item(
391 struct btrfs_delayed_node
*delayed_node
,
392 struct btrfs_key
*key
)
394 struct btrfs_delayed_item
*item
;
396 item
= __btrfs_lookup_delayed_item(&delayed_node
->ins_root
, key
,
401 static int __btrfs_add_delayed_item(struct btrfs_delayed_node
*delayed_node
,
402 struct btrfs_delayed_item
*ins
,
405 struct rb_node
**p
, *node
;
406 struct rb_node
*parent_node
= NULL
;
407 struct rb_root
*root
;
408 struct btrfs_delayed_item
*item
;
411 if (action
== BTRFS_DELAYED_INSERTION_ITEM
)
412 root
= &delayed_node
->ins_root
;
413 else if (action
== BTRFS_DELAYED_DELETION_ITEM
)
414 root
= &delayed_node
->del_root
;
418 node
= &ins
->rb_node
;
422 item
= rb_entry(parent_node
, struct btrfs_delayed_item
,
425 cmp
= btrfs_comp_cpu_keys(&item
->key
, &ins
->key
);
434 rb_link_node(node
, parent_node
, p
);
435 rb_insert_color(node
, root
);
436 ins
->delayed_node
= delayed_node
;
437 ins
->ins_or_del
= action
;
439 if (ins
->key
.type
== BTRFS_DIR_INDEX_KEY
&&
440 action
== BTRFS_DELAYED_INSERTION_ITEM
&&
441 ins
->key
.offset
>= delayed_node
->index_cnt
)
442 delayed_node
->index_cnt
= ins
->key
.offset
+ 1;
444 delayed_node
->count
++;
445 atomic_inc(&delayed_node
->root
->fs_info
->delayed_root
->items
);
449 static int __btrfs_add_delayed_insertion_item(struct btrfs_delayed_node
*node
,
450 struct btrfs_delayed_item
*item
)
452 return __btrfs_add_delayed_item(node
, item
,
453 BTRFS_DELAYED_INSERTION_ITEM
);
456 static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node
*node
,
457 struct btrfs_delayed_item
*item
)
459 return __btrfs_add_delayed_item(node
, item
,
460 BTRFS_DELAYED_DELETION_ITEM
);
463 static void finish_one_item(struct btrfs_delayed_root
*delayed_root
)
465 int seq
= atomic_inc_return(&delayed_root
->items_seq
);
466 if ((atomic_dec_return(&delayed_root
->items
) <
467 BTRFS_DELAYED_BACKGROUND
|| seq
% BTRFS_DELAYED_BATCH
== 0) &&
468 waitqueue_active(&delayed_root
->wait
))
469 wake_up(&delayed_root
->wait
);
472 static void __btrfs_remove_delayed_item(struct btrfs_delayed_item
*delayed_item
)
474 struct rb_root
*root
;
475 struct btrfs_delayed_root
*delayed_root
;
477 delayed_root
= delayed_item
->delayed_node
->root
->fs_info
->delayed_root
;
479 BUG_ON(!delayed_root
);
480 BUG_ON(delayed_item
->ins_or_del
!= BTRFS_DELAYED_DELETION_ITEM
&&
481 delayed_item
->ins_or_del
!= BTRFS_DELAYED_INSERTION_ITEM
);
483 if (delayed_item
->ins_or_del
== BTRFS_DELAYED_INSERTION_ITEM
)
484 root
= &delayed_item
->delayed_node
->ins_root
;
486 root
= &delayed_item
->delayed_node
->del_root
;
488 rb_erase(&delayed_item
->rb_node
, root
);
489 delayed_item
->delayed_node
->count
--;
491 finish_one_item(delayed_root
);
494 static void btrfs_release_delayed_item(struct btrfs_delayed_item
*item
)
497 __btrfs_remove_delayed_item(item
);
498 if (atomic_dec_and_test(&item
->refs
))
503 static struct btrfs_delayed_item
*__btrfs_first_delayed_insertion_item(
504 struct btrfs_delayed_node
*delayed_node
)
507 struct btrfs_delayed_item
*item
= NULL
;
509 p
= rb_first(&delayed_node
->ins_root
);
511 item
= rb_entry(p
, struct btrfs_delayed_item
, rb_node
);
516 static struct btrfs_delayed_item
*__btrfs_first_delayed_deletion_item(
517 struct btrfs_delayed_node
*delayed_node
)
520 struct btrfs_delayed_item
*item
= NULL
;
522 p
= rb_first(&delayed_node
->del_root
);
524 item
= rb_entry(p
, struct btrfs_delayed_item
, rb_node
);
529 static struct btrfs_delayed_item
*__btrfs_next_delayed_item(
530 struct btrfs_delayed_item
*item
)
533 struct btrfs_delayed_item
*next
= NULL
;
535 p
= rb_next(&item
->rb_node
);
537 next
= rb_entry(p
, struct btrfs_delayed_item
, rb_node
);
542 static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle
*trans
,
543 struct btrfs_root
*root
,
544 struct btrfs_delayed_item
*item
)
546 struct btrfs_block_rsv
*src_rsv
;
547 struct btrfs_block_rsv
*dst_rsv
;
551 if (!trans
->bytes_reserved
)
554 src_rsv
= trans
->block_rsv
;
555 dst_rsv
= &root
->fs_info
->delayed_block_rsv
;
557 num_bytes
= btrfs_calc_trans_metadata_size(root
, 1);
558 ret
= btrfs_block_rsv_migrate(src_rsv
, dst_rsv
, num_bytes
);
560 trace_btrfs_space_reservation(root
->fs_info
, "delayed_item",
563 item
->bytes_reserved
= num_bytes
;
569 static void btrfs_delayed_item_release_metadata(struct btrfs_root
*root
,
570 struct btrfs_delayed_item
*item
)
572 struct btrfs_block_rsv
*rsv
;
574 if (!item
->bytes_reserved
)
577 rsv
= &root
->fs_info
->delayed_block_rsv
;
578 trace_btrfs_space_reservation(root
->fs_info
, "delayed_item",
579 item
->key
.objectid
, item
->bytes_reserved
,
581 btrfs_block_rsv_release(root
, rsv
,
582 item
->bytes_reserved
);
585 static int btrfs_delayed_inode_reserve_metadata(
586 struct btrfs_trans_handle
*trans
,
587 struct btrfs_root
*root
,
589 struct btrfs_delayed_node
*node
)
591 struct btrfs_block_rsv
*src_rsv
;
592 struct btrfs_block_rsv
*dst_rsv
;
595 bool release
= false;
597 src_rsv
= trans
->block_rsv
;
598 dst_rsv
= &root
->fs_info
->delayed_block_rsv
;
600 num_bytes
= btrfs_calc_trans_metadata_size(root
, 1);
603 * btrfs_dirty_inode will update the inode under btrfs_join_transaction
604 * which doesn't reserve space for speed. This is a problem since we
605 * still need to reserve space for this update, so try to reserve the
608 * Now if src_rsv == delalloc_block_rsv we'll let it just steal since
609 * we're accounted for.
611 if (!src_rsv
|| (!trans
->bytes_reserved
&&
612 src_rsv
->type
!= BTRFS_BLOCK_RSV_DELALLOC
)) {
613 ret
= btrfs_block_rsv_add(root
, dst_rsv
, num_bytes
,
614 BTRFS_RESERVE_NO_FLUSH
);
616 * Since we're under a transaction reserve_metadata_bytes could
617 * try to commit the transaction which will make it return
618 * EAGAIN to make us stop the transaction we have, so return
619 * ENOSPC instead so that btrfs_dirty_inode knows what to do.
624 node
->bytes_reserved
= num_bytes
;
625 trace_btrfs_space_reservation(root
->fs_info
,
631 } else if (src_rsv
->type
== BTRFS_BLOCK_RSV_DELALLOC
) {
632 spin_lock(&BTRFS_I(inode
)->lock
);
633 if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED
,
634 &BTRFS_I(inode
)->runtime_flags
)) {
635 spin_unlock(&BTRFS_I(inode
)->lock
);
639 spin_unlock(&BTRFS_I(inode
)->lock
);
641 /* Ok we didn't have space pre-reserved. This shouldn't happen
642 * too often but it can happen if we do delalloc to an existing
643 * inode which gets dirtied because of the time update, and then
644 * isn't touched again until after the transaction commits and
645 * then we try to write out the data. First try to be nice and
646 * reserve something strictly for us. If not be a pain and try
647 * to steal from the delalloc block rsv.
649 ret
= btrfs_block_rsv_add(root
, dst_rsv
, num_bytes
,
650 BTRFS_RESERVE_NO_FLUSH
);
654 ret
= btrfs_block_rsv_migrate(src_rsv
, dst_rsv
, num_bytes
);
659 * Ok this is a problem, let's just steal from the global rsv
660 * since this really shouldn't happen that often.
662 ret
= btrfs_block_rsv_migrate(&root
->fs_info
->global_block_rsv
,
668 ret
= btrfs_block_rsv_migrate(src_rsv
, dst_rsv
, num_bytes
);
672 * Migrate only takes a reservation, it doesn't touch the size of the
673 * block_rsv. This is to simplify people who don't normally have things
674 * migrated from their block rsv. If they go to release their
675 * reservation, that will decrease the size as well, so if migrate
676 * reduced size we'd end up with a negative size. But for the
677 * delalloc_meta_reserved stuff we will only know to drop 1 reservation,
678 * but we could in fact do this reserve/migrate dance several times
679 * between the time we did the original reservation and we'd clean it
680 * up. So to take care of this, release the space for the meta
681 * reservation here. I think it may be time for a documentation page on
682 * how block rsvs. work.
685 trace_btrfs_space_reservation(root
->fs_info
, "delayed_inode",
686 btrfs_ino(inode
), num_bytes
, 1);
687 node
->bytes_reserved
= num_bytes
;
691 trace_btrfs_space_reservation(root
->fs_info
, "delalloc",
692 btrfs_ino(inode
), num_bytes
, 0);
693 btrfs_block_rsv_release(root
, src_rsv
, num_bytes
);
699 static void btrfs_delayed_inode_release_metadata(struct btrfs_root
*root
,
700 struct btrfs_delayed_node
*node
)
702 struct btrfs_block_rsv
*rsv
;
704 if (!node
->bytes_reserved
)
707 rsv
= &root
->fs_info
->delayed_block_rsv
;
708 trace_btrfs_space_reservation(root
->fs_info
, "delayed_inode",
709 node
->inode_id
, node
->bytes_reserved
, 0);
710 btrfs_block_rsv_release(root
, rsv
,
711 node
->bytes_reserved
);
712 node
->bytes_reserved
= 0;
716 * This helper will insert some continuous items into the same leaf according
717 * to the free space of the leaf.
719 static int btrfs_batch_insert_items(struct btrfs_root
*root
,
720 struct btrfs_path
*path
,
721 struct btrfs_delayed_item
*item
)
723 struct btrfs_delayed_item
*curr
, *next
;
725 int total_data_size
= 0, total_size
= 0;
726 struct extent_buffer
*leaf
;
728 struct btrfs_key
*keys
;
730 struct list_head head
;
736 BUG_ON(!path
->nodes
[0]);
738 leaf
= path
->nodes
[0];
739 free_space
= btrfs_leaf_free_space(root
, leaf
);
740 INIT_LIST_HEAD(&head
);
746 * count the number of the continuous items that we can insert in batch
748 while (total_size
+ next
->data_len
+ sizeof(struct btrfs_item
) <=
750 total_data_size
+= next
->data_len
;
751 total_size
+= next
->data_len
+ sizeof(struct btrfs_item
);
752 list_add_tail(&next
->tree_list
, &head
);
756 next
= __btrfs_next_delayed_item(curr
);
760 if (!btrfs_is_continuous_delayed_item(curr
, next
))
770 * we need allocate some memory space, but it might cause the task
771 * to sleep, so we set all locked nodes in the path to blocking locks
774 btrfs_set_path_blocking(path
);
776 keys
= kmalloc_array(nitems
, sizeof(struct btrfs_key
), GFP_NOFS
);
782 data_size
= kmalloc_array(nitems
, sizeof(u32
), GFP_NOFS
);
788 /* get keys of all the delayed items */
790 list_for_each_entry(next
, &head
, tree_list
) {
792 data_size
[i
] = next
->data_len
;
796 /* reset all the locked nodes in the patch to spinning locks. */
797 btrfs_clear_path_blocking(path
, NULL
, 0);
799 /* insert the keys of the items */
800 setup_items_for_insert(root
, path
, keys
, data_size
,
801 total_data_size
, total_size
, nitems
);
803 /* insert the dir index items */
804 slot
= path
->slots
[0];
805 list_for_each_entry_safe(curr
, next
, &head
, tree_list
) {
806 data_ptr
= btrfs_item_ptr(leaf
, slot
, char);
807 write_extent_buffer(leaf
, &curr
->data
,
808 (unsigned long)data_ptr
,
812 btrfs_delayed_item_release_metadata(root
, curr
);
814 list_del(&curr
->tree_list
);
815 btrfs_release_delayed_item(curr
);
826 * This helper can just do simple insertion that needn't extend item for new
827 * data, such as directory name index insertion, inode insertion.
829 static int btrfs_insert_delayed_item(struct btrfs_trans_handle
*trans
,
830 struct btrfs_root
*root
,
831 struct btrfs_path
*path
,
832 struct btrfs_delayed_item
*delayed_item
)
834 struct extent_buffer
*leaf
;
838 ret
= btrfs_insert_empty_item(trans
, root
, path
, &delayed_item
->key
,
839 delayed_item
->data_len
);
840 if (ret
< 0 && ret
!= -EEXIST
)
843 leaf
= path
->nodes
[0];
845 ptr
= btrfs_item_ptr(leaf
, path
->slots
[0], char);
847 write_extent_buffer(leaf
, delayed_item
->data
, (unsigned long)ptr
,
848 delayed_item
->data_len
);
849 btrfs_mark_buffer_dirty(leaf
);
851 btrfs_delayed_item_release_metadata(root
, delayed_item
);
856 * we insert an item first, then if there are some continuous items, we try
857 * to insert those items into the same leaf.
859 static int btrfs_insert_delayed_items(struct btrfs_trans_handle
*trans
,
860 struct btrfs_path
*path
,
861 struct btrfs_root
*root
,
862 struct btrfs_delayed_node
*node
)
864 struct btrfs_delayed_item
*curr
, *prev
;
868 mutex_lock(&node
->mutex
);
869 curr
= __btrfs_first_delayed_insertion_item(node
);
873 ret
= btrfs_insert_delayed_item(trans
, root
, path
, curr
);
875 btrfs_release_path(path
);
880 curr
= __btrfs_next_delayed_item(prev
);
881 if (curr
&& btrfs_is_continuous_delayed_item(prev
, curr
)) {
882 /* insert the continuous items into the same leaf */
884 btrfs_batch_insert_items(root
, path
, curr
);
886 btrfs_release_delayed_item(prev
);
887 btrfs_mark_buffer_dirty(path
->nodes
[0]);
889 btrfs_release_path(path
);
890 mutex_unlock(&node
->mutex
);
894 mutex_unlock(&node
->mutex
);
898 static int btrfs_batch_delete_items(struct btrfs_trans_handle
*trans
,
899 struct btrfs_root
*root
,
900 struct btrfs_path
*path
,
901 struct btrfs_delayed_item
*item
)
903 struct btrfs_delayed_item
*curr
, *next
;
904 struct extent_buffer
*leaf
;
905 struct btrfs_key key
;
906 struct list_head head
;
907 int nitems
, i
, last_item
;
910 BUG_ON(!path
->nodes
[0]);
912 leaf
= path
->nodes
[0];
915 last_item
= btrfs_header_nritems(leaf
) - 1;
917 return -ENOENT
; /* FIXME: Is errno suitable? */
920 INIT_LIST_HEAD(&head
);
921 btrfs_item_key_to_cpu(leaf
, &key
, i
);
924 * count the number of the dir index items that we can delete in batch
926 while (btrfs_comp_cpu_keys(&next
->key
, &key
) == 0) {
927 list_add_tail(&next
->tree_list
, &head
);
931 next
= __btrfs_next_delayed_item(curr
);
935 if (!btrfs_is_continuous_delayed_item(curr
, next
))
941 btrfs_item_key_to_cpu(leaf
, &key
, i
);
947 ret
= btrfs_del_items(trans
, root
, path
, path
->slots
[0], nitems
);
951 list_for_each_entry_safe(curr
, next
, &head
, tree_list
) {
952 btrfs_delayed_item_release_metadata(root
, curr
);
953 list_del(&curr
->tree_list
);
954 btrfs_release_delayed_item(curr
);
961 static int btrfs_delete_delayed_items(struct btrfs_trans_handle
*trans
,
962 struct btrfs_path
*path
,
963 struct btrfs_root
*root
,
964 struct btrfs_delayed_node
*node
)
966 struct btrfs_delayed_item
*curr
, *prev
;
970 mutex_lock(&node
->mutex
);
971 curr
= __btrfs_first_delayed_deletion_item(node
);
975 ret
= btrfs_search_slot(trans
, root
, &curr
->key
, path
, -1, 1);
980 * can't find the item which the node points to, so this node
981 * is invalid, just drop it.
984 curr
= __btrfs_next_delayed_item(prev
);
985 btrfs_release_delayed_item(prev
);
987 btrfs_release_path(path
);
989 mutex_unlock(&node
->mutex
);
995 btrfs_batch_delete_items(trans
, root
, path
, curr
);
996 btrfs_release_path(path
);
997 mutex_unlock(&node
->mutex
);
1001 btrfs_release_path(path
);
1002 mutex_unlock(&node
->mutex
);
1006 static void btrfs_release_delayed_inode(struct btrfs_delayed_node
*delayed_node
)
1008 struct btrfs_delayed_root
*delayed_root
;
1011 test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY
, &delayed_node
->flags
)) {
1012 BUG_ON(!delayed_node
->root
);
1013 clear_bit(BTRFS_DELAYED_NODE_INODE_DIRTY
, &delayed_node
->flags
);
1014 delayed_node
->count
--;
1016 delayed_root
= delayed_node
->root
->fs_info
->delayed_root
;
1017 finish_one_item(delayed_root
);
1021 static void btrfs_release_delayed_iref(struct btrfs_delayed_node
*delayed_node
)
1023 struct btrfs_delayed_root
*delayed_root
;
1025 ASSERT(delayed_node
->root
);
1026 clear_bit(BTRFS_DELAYED_NODE_DEL_IREF
, &delayed_node
->flags
);
1027 delayed_node
->count
--;
1029 delayed_root
= delayed_node
->root
->fs_info
->delayed_root
;
1030 finish_one_item(delayed_root
);
1033 static int __btrfs_update_delayed_inode(struct btrfs_trans_handle
*trans
,
1034 struct btrfs_root
*root
,
1035 struct btrfs_path
*path
,
1036 struct btrfs_delayed_node
*node
)
1038 struct btrfs_key key
;
1039 struct btrfs_inode_item
*inode_item
;
1040 struct extent_buffer
*leaf
;
1044 key
.objectid
= node
->inode_id
;
1045 key
.type
= BTRFS_INODE_ITEM_KEY
;
1048 if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF
, &node
->flags
))
1053 ret
= btrfs_lookup_inode(trans
, root
, path
, &key
, mod
);
1055 btrfs_release_path(path
);
1057 } else if (ret
< 0) {
1061 leaf
= path
->nodes
[0];
1062 inode_item
= btrfs_item_ptr(leaf
, path
->slots
[0],
1063 struct btrfs_inode_item
);
1064 write_extent_buffer(leaf
, &node
->inode_item
, (unsigned long)inode_item
,
1065 sizeof(struct btrfs_inode_item
));
1066 btrfs_mark_buffer_dirty(leaf
);
1068 if (!test_bit(BTRFS_DELAYED_NODE_DEL_IREF
, &node
->flags
))
1072 if (path
->slots
[0] >= btrfs_header_nritems(leaf
))
1075 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
1076 if (key
.objectid
!= node
->inode_id
)
1079 if (key
.type
!= BTRFS_INODE_REF_KEY
&&
1080 key
.type
!= BTRFS_INODE_EXTREF_KEY
)
1084 * Delayed iref deletion is for the inode who has only one link,
1085 * so there is only one iref. The case that several irefs are
1086 * in the same item doesn't exist.
1088 btrfs_del_item(trans
, root
, path
);
1090 btrfs_release_delayed_iref(node
);
1092 btrfs_release_path(path
);
1094 btrfs_delayed_inode_release_metadata(root
, node
);
1095 btrfs_release_delayed_inode(node
);
1100 btrfs_release_path(path
);
1102 key
.type
= BTRFS_INODE_EXTREF_KEY
;
1104 ret
= btrfs_search_slot(trans
, root
, &key
, path
, -1, 1);
1110 leaf
= path
->nodes
[0];
1115 static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle
*trans
,
1116 struct btrfs_root
*root
,
1117 struct btrfs_path
*path
,
1118 struct btrfs_delayed_node
*node
)
1122 mutex_lock(&node
->mutex
);
1123 if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY
, &node
->flags
)) {
1124 mutex_unlock(&node
->mutex
);
1128 ret
= __btrfs_update_delayed_inode(trans
, root
, path
, node
);
1129 mutex_unlock(&node
->mutex
);
1134 __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle
*trans
,
1135 struct btrfs_path
*path
,
1136 struct btrfs_delayed_node
*node
)
1140 ret
= btrfs_insert_delayed_items(trans
, path
, node
->root
, node
);
1144 ret
= btrfs_delete_delayed_items(trans
, path
, node
->root
, node
);
1148 ret
= btrfs_update_delayed_inode(trans
, node
->root
, path
, node
);
1153 * Called when committing the transaction.
1154 * Returns 0 on success.
1155 * Returns < 0 on error and returns with an aborted transaction with any
1156 * outstanding delayed items cleaned up.
1158 static int __btrfs_run_delayed_items(struct btrfs_trans_handle
*trans
,
1159 struct btrfs_root
*root
, int nr
)
1161 struct btrfs_delayed_root
*delayed_root
;
1162 struct btrfs_delayed_node
*curr_node
, *prev_node
;
1163 struct btrfs_path
*path
;
1164 struct btrfs_block_rsv
*block_rsv
;
1166 bool count
= (nr
> 0);
1171 path
= btrfs_alloc_path();
1174 path
->leave_spinning
= 1;
1176 block_rsv
= trans
->block_rsv
;
1177 trans
->block_rsv
= &root
->fs_info
->delayed_block_rsv
;
1179 delayed_root
= btrfs_get_delayed_root(root
);
1181 curr_node
= btrfs_first_delayed_node(delayed_root
);
1182 while (curr_node
&& (!count
|| (count
&& nr
--))) {
1183 ret
= __btrfs_commit_inode_delayed_items(trans
, path
,
1186 btrfs_release_delayed_node(curr_node
);
1188 btrfs_abort_transaction(trans
, root
, ret
);
1192 prev_node
= curr_node
;
1193 curr_node
= btrfs_next_delayed_node(curr_node
);
1194 btrfs_release_delayed_node(prev_node
);
1198 btrfs_release_delayed_node(curr_node
);
1199 btrfs_free_path(path
);
1200 trans
->block_rsv
= block_rsv
;
1205 int btrfs_run_delayed_items(struct btrfs_trans_handle
*trans
,
1206 struct btrfs_root
*root
)
1208 return __btrfs_run_delayed_items(trans
, root
, -1);
1211 int btrfs_run_delayed_items_nr(struct btrfs_trans_handle
*trans
,
1212 struct btrfs_root
*root
, int nr
)
1214 return __btrfs_run_delayed_items(trans
, root
, nr
);
1217 int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle
*trans
,
1218 struct inode
*inode
)
1220 struct btrfs_delayed_node
*delayed_node
= btrfs_get_delayed_node(inode
);
1221 struct btrfs_path
*path
;
1222 struct btrfs_block_rsv
*block_rsv
;
1228 mutex_lock(&delayed_node
->mutex
);
1229 if (!delayed_node
->count
) {
1230 mutex_unlock(&delayed_node
->mutex
);
1231 btrfs_release_delayed_node(delayed_node
);
1234 mutex_unlock(&delayed_node
->mutex
);
1236 path
= btrfs_alloc_path();
1238 btrfs_release_delayed_node(delayed_node
);
1241 path
->leave_spinning
= 1;
1243 block_rsv
= trans
->block_rsv
;
1244 trans
->block_rsv
= &delayed_node
->root
->fs_info
->delayed_block_rsv
;
1246 ret
= __btrfs_commit_inode_delayed_items(trans
, path
, delayed_node
);
1248 btrfs_release_delayed_node(delayed_node
);
1249 btrfs_free_path(path
);
1250 trans
->block_rsv
= block_rsv
;
1255 int btrfs_commit_inode_delayed_inode(struct inode
*inode
)
1257 struct btrfs_trans_handle
*trans
;
1258 struct btrfs_delayed_node
*delayed_node
= btrfs_get_delayed_node(inode
);
1259 struct btrfs_path
*path
;
1260 struct btrfs_block_rsv
*block_rsv
;
1266 mutex_lock(&delayed_node
->mutex
);
1267 if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY
, &delayed_node
->flags
)) {
1268 mutex_unlock(&delayed_node
->mutex
);
1269 btrfs_release_delayed_node(delayed_node
);
1272 mutex_unlock(&delayed_node
->mutex
);
1274 trans
= btrfs_join_transaction(delayed_node
->root
);
1275 if (IS_ERR(trans
)) {
1276 ret
= PTR_ERR(trans
);
1280 path
= btrfs_alloc_path();
1285 path
->leave_spinning
= 1;
1287 block_rsv
= trans
->block_rsv
;
1288 trans
->block_rsv
= &delayed_node
->root
->fs_info
->delayed_block_rsv
;
1290 mutex_lock(&delayed_node
->mutex
);
1291 if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY
, &delayed_node
->flags
))
1292 ret
= __btrfs_update_delayed_inode(trans
, delayed_node
->root
,
1293 path
, delayed_node
);
1296 mutex_unlock(&delayed_node
->mutex
);
1298 btrfs_free_path(path
);
1299 trans
->block_rsv
= block_rsv
;
1301 btrfs_end_transaction(trans
, delayed_node
->root
);
1302 btrfs_btree_balance_dirty(delayed_node
->root
);
1304 btrfs_release_delayed_node(delayed_node
);
1309 void btrfs_remove_delayed_node(struct inode
*inode
)
1311 struct btrfs_delayed_node
*delayed_node
;
1313 delayed_node
= ACCESS_ONCE(BTRFS_I(inode
)->delayed_node
);
1317 BTRFS_I(inode
)->delayed_node
= NULL
;
1318 btrfs_release_delayed_node(delayed_node
);
1321 struct btrfs_async_delayed_work
{
1322 struct btrfs_delayed_root
*delayed_root
;
1324 struct btrfs_work work
;
1327 static void btrfs_async_run_delayed_root(struct btrfs_work
*work
)
1329 struct btrfs_async_delayed_work
*async_work
;
1330 struct btrfs_delayed_root
*delayed_root
;
1331 struct btrfs_trans_handle
*trans
;
1332 struct btrfs_path
*path
;
1333 struct btrfs_delayed_node
*delayed_node
= NULL
;
1334 struct btrfs_root
*root
;
1335 struct btrfs_block_rsv
*block_rsv
;
1338 async_work
= container_of(work
, struct btrfs_async_delayed_work
, work
);
1339 delayed_root
= async_work
->delayed_root
;
1341 path
= btrfs_alloc_path();
1346 if (atomic_read(&delayed_root
->items
) < BTRFS_DELAYED_BACKGROUND
/ 2)
1349 delayed_node
= btrfs_first_prepared_delayed_node(delayed_root
);
1353 path
->leave_spinning
= 1;
1354 root
= delayed_node
->root
;
1356 trans
= btrfs_join_transaction(root
);
1360 block_rsv
= trans
->block_rsv
;
1361 trans
->block_rsv
= &root
->fs_info
->delayed_block_rsv
;
1363 __btrfs_commit_inode_delayed_items(trans
, path
, delayed_node
);
1365 trans
->block_rsv
= block_rsv
;
1366 btrfs_end_transaction(trans
, root
);
1367 btrfs_btree_balance_dirty_nodelay(root
);
1370 btrfs_release_path(path
);
1373 btrfs_release_prepared_delayed_node(delayed_node
);
1374 if ((async_work
->nr
== 0 && total_done
< BTRFS_DELAYED_WRITEBACK
) ||
1375 total_done
< async_work
->nr
)
1379 btrfs_free_path(path
);
1381 wake_up(&delayed_root
->wait
);
1386 static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root
*delayed_root
,
1387 struct btrfs_fs_info
*fs_info
, int nr
)
1389 struct btrfs_async_delayed_work
*async_work
;
1391 if (atomic_read(&delayed_root
->items
) < BTRFS_DELAYED_BACKGROUND
||
1392 btrfs_workqueue_normal_congested(fs_info
->delayed_workers
))
1395 async_work
= kmalloc(sizeof(*async_work
), GFP_NOFS
);
1399 async_work
->delayed_root
= delayed_root
;
1400 btrfs_init_work(&async_work
->work
, btrfs_delayed_meta_helper
,
1401 btrfs_async_run_delayed_root
, NULL
, NULL
);
1402 async_work
->nr
= nr
;
1404 btrfs_queue_work(fs_info
->delayed_workers
, &async_work
->work
);
1408 void btrfs_assert_delayed_root_empty(struct btrfs_root
*root
)
1410 struct btrfs_delayed_root
*delayed_root
;
1411 delayed_root
= btrfs_get_delayed_root(root
);
1412 WARN_ON(btrfs_first_delayed_node(delayed_root
));
1415 static int could_end_wait(struct btrfs_delayed_root
*delayed_root
, int seq
)
1417 int val
= atomic_read(&delayed_root
->items_seq
);
1419 if (val
< seq
|| val
>= seq
+ BTRFS_DELAYED_BATCH
)
1422 if (atomic_read(&delayed_root
->items
) < BTRFS_DELAYED_BACKGROUND
)
1428 void btrfs_balance_delayed_items(struct btrfs_root
*root
)
1430 struct btrfs_delayed_root
*delayed_root
;
1431 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
1433 delayed_root
= btrfs_get_delayed_root(root
);
1435 if (atomic_read(&delayed_root
->items
) < BTRFS_DELAYED_BACKGROUND
)
1438 if (atomic_read(&delayed_root
->items
) >= BTRFS_DELAYED_WRITEBACK
) {
1442 seq
= atomic_read(&delayed_root
->items_seq
);
1444 ret
= btrfs_wq_run_delayed_node(delayed_root
, fs_info
, 0);
1448 wait_event_interruptible(delayed_root
->wait
,
1449 could_end_wait(delayed_root
, seq
));
1453 btrfs_wq_run_delayed_node(delayed_root
, fs_info
, BTRFS_DELAYED_BATCH
);
1456 /* Will return 0 or -ENOMEM */
1457 int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle
*trans
,
1458 struct btrfs_root
*root
, const char *name
,
1459 int name_len
, struct inode
*dir
,
1460 struct btrfs_disk_key
*disk_key
, u8 type
,
1463 struct btrfs_delayed_node
*delayed_node
;
1464 struct btrfs_delayed_item
*delayed_item
;
1465 struct btrfs_dir_item
*dir_item
;
1468 delayed_node
= btrfs_get_or_create_delayed_node(dir
);
1469 if (IS_ERR(delayed_node
))
1470 return PTR_ERR(delayed_node
);
1472 delayed_item
= btrfs_alloc_delayed_item(sizeof(*dir_item
) + name_len
);
1473 if (!delayed_item
) {
1478 delayed_item
->key
.objectid
= btrfs_ino(dir
);
1479 delayed_item
->key
.type
= BTRFS_DIR_INDEX_KEY
;
1480 delayed_item
->key
.offset
= index
;
1482 dir_item
= (struct btrfs_dir_item
*)delayed_item
->data
;
1483 dir_item
->location
= *disk_key
;
1484 btrfs_set_stack_dir_transid(dir_item
, trans
->transid
);
1485 btrfs_set_stack_dir_data_len(dir_item
, 0);
1486 btrfs_set_stack_dir_name_len(dir_item
, name_len
);
1487 btrfs_set_stack_dir_type(dir_item
, type
);
1488 memcpy((char *)(dir_item
+ 1), name
, name_len
);
1490 ret
= btrfs_delayed_item_reserve_metadata(trans
, root
, delayed_item
);
1492 * we have reserved enough space when we start a new transaction,
1493 * so reserving metadata failure is impossible
1498 mutex_lock(&delayed_node
->mutex
);
1499 ret
= __btrfs_add_delayed_insertion_item(delayed_node
, delayed_item
);
1500 if (unlikely(ret
)) {
1501 btrfs_err(root
->fs_info
, "err add delayed dir index item(name: %.*s) "
1502 "into the insertion tree of the delayed node"
1503 "(root id: %llu, inode id: %llu, errno: %d)",
1504 name_len
, name
, delayed_node
->root
->objectid
,
1505 delayed_node
->inode_id
, ret
);
1508 mutex_unlock(&delayed_node
->mutex
);
1511 btrfs_release_delayed_node(delayed_node
);
1515 static int btrfs_delete_delayed_insertion_item(struct btrfs_root
*root
,
1516 struct btrfs_delayed_node
*node
,
1517 struct btrfs_key
*key
)
1519 struct btrfs_delayed_item
*item
;
1521 mutex_lock(&node
->mutex
);
1522 item
= __btrfs_lookup_delayed_insertion_item(node
, key
);
1524 mutex_unlock(&node
->mutex
);
1528 btrfs_delayed_item_release_metadata(root
, item
);
1529 btrfs_release_delayed_item(item
);
1530 mutex_unlock(&node
->mutex
);
1534 int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle
*trans
,
1535 struct btrfs_root
*root
, struct inode
*dir
,
1538 struct btrfs_delayed_node
*node
;
1539 struct btrfs_delayed_item
*item
;
1540 struct btrfs_key item_key
;
1543 node
= btrfs_get_or_create_delayed_node(dir
);
1545 return PTR_ERR(node
);
1547 item_key
.objectid
= btrfs_ino(dir
);
1548 item_key
.type
= BTRFS_DIR_INDEX_KEY
;
1549 item_key
.offset
= index
;
1551 ret
= btrfs_delete_delayed_insertion_item(root
, node
, &item_key
);
1555 item
= btrfs_alloc_delayed_item(0);
1561 item
->key
= item_key
;
1563 ret
= btrfs_delayed_item_reserve_metadata(trans
, root
, item
);
1565 * we have reserved enough space when we start a new transaction,
1566 * so reserving metadata failure is impossible.
1570 mutex_lock(&node
->mutex
);
1571 ret
= __btrfs_add_delayed_deletion_item(node
, item
);
1572 if (unlikely(ret
)) {
1573 btrfs_err(root
->fs_info
, "err add delayed dir index item(index: %llu) "
1574 "into the deletion tree of the delayed node"
1575 "(root id: %llu, inode id: %llu, errno: %d)",
1576 index
, node
->root
->objectid
, node
->inode_id
,
1580 mutex_unlock(&node
->mutex
);
1582 btrfs_release_delayed_node(node
);
1586 int btrfs_inode_delayed_dir_index_count(struct inode
*inode
)
1588 struct btrfs_delayed_node
*delayed_node
= btrfs_get_delayed_node(inode
);
1594 * Since we have held i_mutex of this directory, it is impossible that
1595 * a new directory index is added into the delayed node and index_cnt
1596 * is updated now. So we needn't lock the delayed node.
1598 if (!delayed_node
->index_cnt
) {
1599 btrfs_release_delayed_node(delayed_node
);
1603 BTRFS_I(inode
)->index_cnt
= delayed_node
->index_cnt
;
1604 btrfs_release_delayed_node(delayed_node
);
1608 void btrfs_get_delayed_items(struct inode
*inode
, struct list_head
*ins_list
,
1609 struct list_head
*del_list
)
1611 struct btrfs_delayed_node
*delayed_node
;
1612 struct btrfs_delayed_item
*item
;
1614 delayed_node
= btrfs_get_delayed_node(inode
);
1618 mutex_lock(&delayed_node
->mutex
);
1619 item
= __btrfs_first_delayed_insertion_item(delayed_node
);
1621 atomic_inc(&item
->refs
);
1622 list_add_tail(&item
->readdir_list
, ins_list
);
1623 item
= __btrfs_next_delayed_item(item
);
1626 item
= __btrfs_first_delayed_deletion_item(delayed_node
);
1628 atomic_inc(&item
->refs
);
1629 list_add_tail(&item
->readdir_list
, del_list
);
1630 item
= __btrfs_next_delayed_item(item
);
1632 mutex_unlock(&delayed_node
->mutex
);
1634 * This delayed node is still cached in the btrfs inode, so refs
1635 * must be > 1 now, and we needn't check it is going to be freed
1638 * Besides that, this function is used to read dir, we do not
1639 * insert/delete delayed items in this period. So we also needn't
1640 * requeue or dequeue this delayed node.
1642 atomic_dec(&delayed_node
->refs
);
1645 void btrfs_put_delayed_items(struct list_head
*ins_list
,
1646 struct list_head
*del_list
)
1648 struct btrfs_delayed_item
*curr
, *next
;
1650 list_for_each_entry_safe(curr
, next
, ins_list
, readdir_list
) {
1651 list_del(&curr
->readdir_list
);
1652 if (atomic_dec_and_test(&curr
->refs
))
1656 list_for_each_entry_safe(curr
, next
, del_list
, readdir_list
) {
1657 list_del(&curr
->readdir_list
);
1658 if (atomic_dec_and_test(&curr
->refs
))
1663 int btrfs_should_delete_dir_index(struct list_head
*del_list
,
1666 struct btrfs_delayed_item
*curr
, *next
;
1669 if (list_empty(del_list
))
1672 list_for_each_entry_safe(curr
, next
, del_list
, readdir_list
) {
1673 if (curr
->key
.offset
> index
)
1676 list_del(&curr
->readdir_list
);
1677 ret
= (curr
->key
.offset
== index
);
1679 if (atomic_dec_and_test(&curr
->refs
))
1691 * btrfs_readdir_delayed_dir_index - read dir info stored in the delayed tree
1694 int btrfs_readdir_delayed_dir_index(struct dir_context
*ctx
,
1695 struct list_head
*ins_list
, bool *emitted
)
1697 struct btrfs_dir_item
*di
;
1698 struct btrfs_delayed_item
*curr
, *next
;
1699 struct btrfs_key location
;
1703 unsigned char d_type
;
1705 if (list_empty(ins_list
))
1709 * Changing the data of the delayed item is impossible. So
1710 * we needn't lock them. And we have held i_mutex of the
1711 * directory, nobody can delete any directory indexes now.
1713 list_for_each_entry_safe(curr
, next
, ins_list
, readdir_list
) {
1714 list_del(&curr
->readdir_list
);
1716 if (curr
->key
.offset
< ctx
->pos
) {
1717 if (atomic_dec_and_test(&curr
->refs
))
1722 ctx
->pos
= curr
->key
.offset
;
1724 di
= (struct btrfs_dir_item
*)curr
->data
;
1725 name
= (char *)(di
+ 1);
1726 name_len
= btrfs_stack_dir_name_len(di
);
1728 d_type
= btrfs_filetype_table
[di
->type
];
1729 btrfs_disk_key_to_cpu(&location
, &di
->location
);
1731 over
= !dir_emit(ctx
, name
, name_len
,
1732 location
.objectid
, d_type
);
1734 if (atomic_dec_and_test(&curr
->refs
))
1744 static void fill_stack_inode_item(struct btrfs_trans_handle
*trans
,
1745 struct btrfs_inode_item
*inode_item
,
1746 struct inode
*inode
)
1748 btrfs_set_stack_inode_uid(inode_item
, i_uid_read(inode
));
1749 btrfs_set_stack_inode_gid(inode_item
, i_gid_read(inode
));
1750 btrfs_set_stack_inode_size(inode_item
, BTRFS_I(inode
)->disk_i_size
);
1751 btrfs_set_stack_inode_mode(inode_item
, inode
->i_mode
);
1752 btrfs_set_stack_inode_nlink(inode_item
, inode
->i_nlink
);
1753 btrfs_set_stack_inode_nbytes(inode_item
, inode_get_bytes(inode
));
1754 btrfs_set_stack_inode_generation(inode_item
,
1755 BTRFS_I(inode
)->generation
);
1756 btrfs_set_stack_inode_sequence(inode_item
, inode
->i_version
);
1757 btrfs_set_stack_inode_transid(inode_item
, trans
->transid
);
1758 btrfs_set_stack_inode_rdev(inode_item
, inode
->i_rdev
);
1759 btrfs_set_stack_inode_flags(inode_item
, BTRFS_I(inode
)->flags
);
1760 btrfs_set_stack_inode_block_group(inode_item
, 0);
1762 btrfs_set_stack_timespec_sec(&inode_item
->atime
,
1763 inode
->i_atime
.tv_sec
);
1764 btrfs_set_stack_timespec_nsec(&inode_item
->atime
,
1765 inode
->i_atime
.tv_nsec
);
1767 btrfs_set_stack_timespec_sec(&inode_item
->mtime
,
1768 inode
->i_mtime
.tv_sec
);
1769 btrfs_set_stack_timespec_nsec(&inode_item
->mtime
,
1770 inode
->i_mtime
.tv_nsec
);
1772 btrfs_set_stack_timespec_sec(&inode_item
->ctime
,
1773 inode
->i_ctime
.tv_sec
);
1774 btrfs_set_stack_timespec_nsec(&inode_item
->ctime
,
1775 inode
->i_ctime
.tv_nsec
);
1777 btrfs_set_stack_timespec_sec(&inode_item
->otime
,
1778 BTRFS_I(inode
)->i_otime
.tv_sec
);
1779 btrfs_set_stack_timespec_nsec(&inode_item
->otime
,
1780 BTRFS_I(inode
)->i_otime
.tv_nsec
);
1783 int btrfs_fill_inode(struct inode
*inode
, u32
*rdev
)
1785 struct btrfs_delayed_node
*delayed_node
;
1786 struct btrfs_inode_item
*inode_item
;
1788 delayed_node
= btrfs_get_delayed_node(inode
);
1792 mutex_lock(&delayed_node
->mutex
);
1793 if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY
, &delayed_node
->flags
)) {
1794 mutex_unlock(&delayed_node
->mutex
);
1795 btrfs_release_delayed_node(delayed_node
);
1799 inode_item
= &delayed_node
->inode_item
;
1801 i_uid_write(inode
, btrfs_stack_inode_uid(inode_item
));
1802 i_gid_write(inode
, btrfs_stack_inode_gid(inode_item
));
1803 btrfs_i_size_write(inode
, btrfs_stack_inode_size(inode_item
));
1804 inode
->i_mode
= btrfs_stack_inode_mode(inode_item
);
1805 set_nlink(inode
, btrfs_stack_inode_nlink(inode_item
));
1806 inode_set_bytes(inode
, btrfs_stack_inode_nbytes(inode_item
));
1807 BTRFS_I(inode
)->generation
= btrfs_stack_inode_generation(inode_item
);
1808 BTRFS_I(inode
)->last_trans
= btrfs_stack_inode_transid(inode_item
);
1810 inode
->i_version
= btrfs_stack_inode_sequence(inode_item
);
1812 *rdev
= btrfs_stack_inode_rdev(inode_item
);
1813 BTRFS_I(inode
)->flags
= btrfs_stack_inode_flags(inode_item
);
1815 inode
->i_atime
.tv_sec
= btrfs_stack_timespec_sec(&inode_item
->atime
);
1816 inode
->i_atime
.tv_nsec
= btrfs_stack_timespec_nsec(&inode_item
->atime
);
1818 inode
->i_mtime
.tv_sec
= btrfs_stack_timespec_sec(&inode_item
->mtime
);
1819 inode
->i_mtime
.tv_nsec
= btrfs_stack_timespec_nsec(&inode_item
->mtime
);
1821 inode
->i_ctime
.tv_sec
= btrfs_stack_timespec_sec(&inode_item
->ctime
);
1822 inode
->i_ctime
.tv_nsec
= btrfs_stack_timespec_nsec(&inode_item
->ctime
);
1824 BTRFS_I(inode
)->i_otime
.tv_sec
=
1825 btrfs_stack_timespec_sec(&inode_item
->otime
);
1826 BTRFS_I(inode
)->i_otime
.tv_nsec
=
1827 btrfs_stack_timespec_nsec(&inode_item
->otime
);
1829 inode
->i_generation
= BTRFS_I(inode
)->generation
;
1830 BTRFS_I(inode
)->index_cnt
= (u64
)-1;
1832 mutex_unlock(&delayed_node
->mutex
);
1833 btrfs_release_delayed_node(delayed_node
);
1837 int btrfs_delayed_update_inode(struct btrfs_trans_handle
*trans
,
1838 struct btrfs_root
*root
, struct inode
*inode
)
1840 struct btrfs_delayed_node
*delayed_node
;
1843 delayed_node
= btrfs_get_or_create_delayed_node(inode
);
1844 if (IS_ERR(delayed_node
))
1845 return PTR_ERR(delayed_node
);
1847 mutex_lock(&delayed_node
->mutex
);
1848 if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY
, &delayed_node
->flags
)) {
1849 fill_stack_inode_item(trans
, &delayed_node
->inode_item
, inode
);
1853 ret
= btrfs_delayed_inode_reserve_metadata(trans
, root
, inode
,
1858 fill_stack_inode_item(trans
, &delayed_node
->inode_item
, inode
);
1859 set_bit(BTRFS_DELAYED_NODE_INODE_DIRTY
, &delayed_node
->flags
);
1860 delayed_node
->count
++;
1861 atomic_inc(&root
->fs_info
->delayed_root
->items
);
1863 mutex_unlock(&delayed_node
->mutex
);
1864 btrfs_release_delayed_node(delayed_node
);
1868 int btrfs_delayed_delete_inode_ref(struct inode
*inode
)
1870 struct btrfs_delayed_node
*delayed_node
;
1873 * we don't do delayed inode updates during log recovery because it
1874 * leads to enospc problems. This means we also can't do
1875 * delayed inode refs
1877 if (BTRFS_I(inode
)->root
->fs_info
->log_root_recovering
)
1880 delayed_node
= btrfs_get_or_create_delayed_node(inode
);
1881 if (IS_ERR(delayed_node
))
1882 return PTR_ERR(delayed_node
);
1885 * We don't reserve space for inode ref deletion is because:
1886 * - We ONLY do async inode ref deletion for the inode who has only
1887 * one link(i_nlink == 1), it means there is only one inode ref.
1888 * And in most case, the inode ref and the inode item are in the
1889 * same leaf, and we will deal with them at the same time.
1890 * Since we are sure we will reserve the space for the inode item,
1891 * it is unnecessary to reserve space for inode ref deletion.
1892 * - If the inode ref and the inode item are not in the same leaf,
1893 * We also needn't worry about enospc problem, because we reserve
1894 * much more space for the inode update than it needs.
1895 * - At the worst, we can steal some space from the global reservation.
1898 mutex_lock(&delayed_node
->mutex
);
1899 if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF
, &delayed_node
->flags
))
1902 set_bit(BTRFS_DELAYED_NODE_DEL_IREF
, &delayed_node
->flags
);
1903 delayed_node
->count
++;
1904 atomic_inc(&BTRFS_I(inode
)->root
->fs_info
->delayed_root
->items
);
1906 mutex_unlock(&delayed_node
->mutex
);
1907 btrfs_release_delayed_node(delayed_node
);
1911 static void __btrfs_kill_delayed_node(struct btrfs_delayed_node
*delayed_node
)
1913 struct btrfs_root
*root
= delayed_node
->root
;
1914 struct btrfs_delayed_item
*curr_item
, *prev_item
;
1916 mutex_lock(&delayed_node
->mutex
);
1917 curr_item
= __btrfs_first_delayed_insertion_item(delayed_node
);
1919 btrfs_delayed_item_release_metadata(root
, curr_item
);
1920 prev_item
= curr_item
;
1921 curr_item
= __btrfs_next_delayed_item(prev_item
);
1922 btrfs_release_delayed_item(prev_item
);
1925 curr_item
= __btrfs_first_delayed_deletion_item(delayed_node
);
1927 btrfs_delayed_item_release_metadata(root
, curr_item
);
1928 prev_item
= curr_item
;
1929 curr_item
= __btrfs_next_delayed_item(prev_item
);
1930 btrfs_release_delayed_item(prev_item
);
1933 if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF
, &delayed_node
->flags
))
1934 btrfs_release_delayed_iref(delayed_node
);
1936 if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY
, &delayed_node
->flags
)) {
1937 btrfs_delayed_inode_release_metadata(root
, delayed_node
);
1938 btrfs_release_delayed_inode(delayed_node
);
1940 mutex_unlock(&delayed_node
->mutex
);
1943 void btrfs_kill_delayed_inode_items(struct inode
*inode
)
1945 struct btrfs_delayed_node
*delayed_node
;
1947 delayed_node
= btrfs_get_delayed_node(inode
);
1951 __btrfs_kill_delayed_node(delayed_node
);
1952 btrfs_release_delayed_node(delayed_node
);
1955 void btrfs_kill_all_delayed_nodes(struct btrfs_root
*root
)
1958 struct btrfs_delayed_node
*delayed_nodes
[8];
1962 spin_lock(&root
->inode_lock
);
1963 n
= radix_tree_gang_lookup(&root
->delayed_nodes_tree
,
1964 (void **)delayed_nodes
, inode_id
,
1965 ARRAY_SIZE(delayed_nodes
));
1967 spin_unlock(&root
->inode_lock
);
1971 inode_id
= delayed_nodes
[n
- 1]->inode_id
+ 1;
1973 for (i
= 0; i
< n
; i
++)
1974 atomic_inc(&delayed_nodes
[i
]->refs
);
1975 spin_unlock(&root
->inode_lock
);
1977 for (i
= 0; i
< n
; i
++) {
1978 __btrfs_kill_delayed_node(delayed_nodes
[i
]);
1979 btrfs_release_delayed_node(delayed_nodes
[i
]);
1984 void btrfs_destroy_delayed_inodes(struct btrfs_root
*root
)
1986 struct btrfs_delayed_root
*delayed_root
;
1987 struct btrfs_delayed_node
*curr_node
, *prev_node
;
1989 delayed_root
= btrfs_get_delayed_root(root
);
1991 curr_node
= btrfs_first_delayed_node(delayed_root
);
1993 __btrfs_kill_delayed_node(curr_node
);
1995 prev_node
= curr_node
;
1996 curr_node
= btrfs_next_delayed_node(curr_node
);
1997 btrfs_release_delayed_node(prev_node
);