2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
20 #include <linux/sched.h>
21 #include <linux/writeback.h>
22 #include <linux/pagemap.h>
23 #include <linux/blkdev.h>
26 #include "transaction.h"
28 #include "ref-cache.h"
31 #define BTRFS_ROOT_TRANS_TAG 0
33 static noinline
void put_transaction(struct btrfs_transaction
*transaction
)
35 WARN_ON(transaction
->use_count
== 0);
36 transaction
->use_count
--;
37 if (transaction
->use_count
== 0) {
38 list_del_init(&transaction
->list
);
39 memset(transaction
, 0, sizeof(*transaction
));
40 kmem_cache_free(btrfs_transaction_cachep
, transaction
);
45 * either allocate a new transaction or hop into the existing one
47 static noinline
int join_transaction(struct btrfs_root
*root
)
49 struct btrfs_transaction
*cur_trans
;
50 cur_trans
= root
->fs_info
->running_transaction
;
52 cur_trans
= kmem_cache_alloc(btrfs_transaction_cachep
,
55 root
->fs_info
->generation
++;
56 root
->fs_info
->last_alloc
= 0;
57 root
->fs_info
->last_data_alloc
= 0;
58 cur_trans
->num_writers
= 1;
59 cur_trans
->num_joined
= 0;
60 cur_trans
->transid
= root
->fs_info
->generation
;
61 init_waitqueue_head(&cur_trans
->writer_wait
);
62 init_waitqueue_head(&cur_trans
->commit_wait
);
63 cur_trans
->in_commit
= 0;
64 cur_trans
->blocked
= 0;
65 cur_trans
->use_count
= 1;
66 cur_trans
->commit_done
= 0;
67 cur_trans
->start_time
= get_seconds();
69 cur_trans
->delayed_refs
.root
.rb_node
= NULL
;
70 cur_trans
->delayed_refs
.num_entries
= 0;
71 cur_trans
->delayed_refs
.num_heads_ready
= 0;
72 cur_trans
->delayed_refs
.num_heads
= 0;
73 cur_trans
->delayed_refs
.flushing
= 0;
74 cur_trans
->delayed_refs
.run_delayed_start
= 0;
75 spin_lock_init(&cur_trans
->delayed_refs
.lock
);
77 INIT_LIST_HEAD(&cur_trans
->pending_snapshots
);
78 list_add_tail(&cur_trans
->list
, &root
->fs_info
->trans_list
);
79 extent_io_tree_init(&cur_trans
->dirty_pages
,
80 root
->fs_info
->btree_inode
->i_mapping
,
82 spin_lock(&root
->fs_info
->new_trans_lock
);
83 root
->fs_info
->running_transaction
= cur_trans
;
84 spin_unlock(&root
->fs_info
->new_trans_lock
);
86 cur_trans
->num_writers
++;
87 cur_trans
->num_joined
++;
94 * this does all the record keeping required to make sure that a reference
95 * counted root is properly recorded in a given transaction. This is required
96 * to make sure the old root from before we joined the transaction is deleted
97 * when the transaction commits
99 noinline
int btrfs_record_root_in_trans(struct btrfs_root
*root
)
101 struct btrfs_dirty_root
*dirty
;
102 u64 running_trans_id
= root
->fs_info
->running_transaction
->transid
;
103 if (root
->ref_cows
&& root
->last_trans
< running_trans_id
) {
104 WARN_ON(root
== root
->fs_info
->extent_root
);
105 if (root
->root_item
.refs
!= 0) {
106 radix_tree_tag_set(&root
->fs_info
->fs_roots_radix
,
107 (unsigned long)root
->root_key
.objectid
,
108 BTRFS_ROOT_TRANS_TAG
);
110 dirty
= kmalloc(sizeof(*dirty
), GFP_NOFS
);
112 dirty
->root
= kmalloc(sizeof(*dirty
->root
), GFP_NOFS
);
113 BUG_ON(!dirty
->root
);
114 dirty
->latest_root
= root
;
115 INIT_LIST_HEAD(&dirty
->list
);
117 root
->commit_root
= btrfs_root_node(root
);
119 memcpy(dirty
->root
, root
, sizeof(*root
));
120 spin_lock_init(&dirty
->root
->node_lock
);
121 spin_lock_init(&dirty
->root
->list_lock
);
122 mutex_init(&dirty
->root
->objectid_mutex
);
123 mutex_init(&dirty
->root
->log_mutex
);
124 INIT_LIST_HEAD(&dirty
->root
->dead_list
);
125 dirty
->root
->node
= root
->commit_root
;
126 dirty
->root
->commit_root
= NULL
;
128 spin_lock(&root
->list_lock
);
129 list_add(&dirty
->root
->dead_list
, &root
->dead_list
);
130 spin_unlock(&root
->list_lock
);
132 root
->dirty_root
= dirty
;
136 root
->last_trans
= running_trans_id
;
141 /* wait for commit against the current transaction to become unblocked
142 * when this is done, it is safe to start a new transaction, but the current
143 * transaction might not be fully on disk.
145 static void wait_current_trans(struct btrfs_root
*root
)
147 struct btrfs_transaction
*cur_trans
;
149 cur_trans
= root
->fs_info
->running_transaction
;
150 if (cur_trans
&& cur_trans
->blocked
) {
152 cur_trans
->use_count
++;
154 prepare_to_wait(&root
->fs_info
->transaction_wait
, &wait
,
155 TASK_UNINTERRUPTIBLE
);
156 if (cur_trans
->blocked
) {
157 mutex_unlock(&root
->fs_info
->trans_mutex
);
159 mutex_lock(&root
->fs_info
->trans_mutex
);
160 finish_wait(&root
->fs_info
->transaction_wait
,
163 finish_wait(&root
->fs_info
->transaction_wait
,
168 put_transaction(cur_trans
);
172 static struct btrfs_trans_handle
*start_transaction(struct btrfs_root
*root
,
173 int num_blocks
, int wait
)
175 struct btrfs_trans_handle
*h
=
176 kmem_cache_alloc(btrfs_trans_handle_cachep
, GFP_NOFS
);
179 mutex_lock(&root
->fs_info
->trans_mutex
);
180 if (!root
->fs_info
->log_root_recovering
&&
181 ((wait
== 1 && !root
->fs_info
->open_ioctl_trans
) || wait
== 2))
182 wait_current_trans(root
);
183 ret
= join_transaction(root
);
186 btrfs_record_root_in_trans(root
);
187 h
->transid
= root
->fs_info
->running_transaction
->transid
;
188 h
->transaction
= root
->fs_info
->running_transaction
;
189 h
->blocks_reserved
= num_blocks
;
192 h
->alloc_exclude_nr
= 0;
193 h
->alloc_exclude_start
= 0;
194 h
->delayed_ref_updates
= 0;
196 root
->fs_info
->running_transaction
->use_count
++;
197 mutex_unlock(&root
->fs_info
->trans_mutex
);
201 struct btrfs_trans_handle
*btrfs_start_transaction(struct btrfs_root
*root
,
204 return start_transaction(root
, num_blocks
, 1);
206 struct btrfs_trans_handle
*btrfs_join_transaction(struct btrfs_root
*root
,
209 return start_transaction(root
, num_blocks
, 0);
212 struct btrfs_trans_handle
*btrfs_start_ioctl_transaction(struct btrfs_root
*r
,
215 return start_transaction(r
, num_blocks
, 2);
218 /* wait for a transaction commit to be fully complete */
219 static noinline
int wait_for_commit(struct btrfs_root
*root
,
220 struct btrfs_transaction
*commit
)
223 mutex_lock(&root
->fs_info
->trans_mutex
);
224 while (!commit
->commit_done
) {
225 prepare_to_wait(&commit
->commit_wait
, &wait
,
226 TASK_UNINTERRUPTIBLE
);
227 if (commit
->commit_done
)
229 mutex_unlock(&root
->fs_info
->trans_mutex
);
231 mutex_lock(&root
->fs_info
->trans_mutex
);
233 mutex_unlock(&root
->fs_info
->trans_mutex
);
234 finish_wait(&commit
->commit_wait
, &wait
);
239 * rate limit against the drop_snapshot code. This helps to slow down new
240 * operations if the drop_snapshot code isn't able to keep up.
242 static void throttle_on_drops(struct btrfs_root
*root
)
244 struct btrfs_fs_info
*info
= root
->fs_info
;
245 int harder_count
= 0;
248 if (atomic_read(&info
->throttles
)) {
251 thr
= atomic_read(&info
->throttle_gen
);
254 prepare_to_wait(&info
->transaction_throttle
,
255 &wait
, TASK_UNINTERRUPTIBLE
);
256 if (!atomic_read(&info
->throttles
)) {
257 finish_wait(&info
->transaction_throttle
, &wait
);
261 finish_wait(&info
->transaction_throttle
, &wait
);
262 } while (thr
== atomic_read(&info
->throttle_gen
));
265 if (root
->fs_info
->total_ref_cache_size
> 1 * 1024 * 1024 &&
269 if (root
->fs_info
->total_ref_cache_size
> 5 * 1024 * 1024 &&
273 if (root
->fs_info
->total_ref_cache_size
> 10 * 1024 * 1024 &&
279 void btrfs_throttle(struct btrfs_root
*root
)
281 mutex_lock(&root
->fs_info
->trans_mutex
);
282 if (!root
->fs_info
->open_ioctl_trans
)
283 wait_current_trans(root
);
284 mutex_unlock(&root
->fs_info
->trans_mutex
);
285 throttle_on_drops(root
);
288 static int __btrfs_end_transaction(struct btrfs_trans_handle
*trans
,
289 struct btrfs_root
*root
, int throttle
)
291 struct btrfs_transaction
*cur_trans
;
292 struct btrfs_fs_info
*info
= root
->fs_info
;
296 unsigned long cur
= trans
->delayed_ref_updates
;
297 trans
->delayed_ref_updates
= 0;
299 trans
->transaction
->delayed_refs
.num_heads_ready
> 64) {
300 trans
->delayed_ref_updates
= 0;
303 * do a full flush if the transaction is trying
306 if (trans
->transaction
->delayed_refs
.flushing
)
308 btrfs_run_delayed_refs(trans
, root
, cur
);
315 mutex_lock(&info
->trans_mutex
);
316 cur_trans
= info
->running_transaction
;
317 WARN_ON(cur_trans
!= trans
->transaction
);
318 WARN_ON(cur_trans
->num_writers
< 1);
319 cur_trans
->num_writers
--;
321 if (waitqueue_active(&cur_trans
->writer_wait
))
322 wake_up(&cur_trans
->writer_wait
);
323 put_transaction(cur_trans
);
324 mutex_unlock(&info
->trans_mutex
);
325 memset(trans
, 0, sizeof(*trans
));
326 kmem_cache_free(btrfs_trans_handle_cachep
, trans
);
329 throttle_on_drops(root
);
334 int btrfs_end_transaction(struct btrfs_trans_handle
*trans
,
335 struct btrfs_root
*root
)
337 return __btrfs_end_transaction(trans
, root
, 0);
340 int btrfs_end_transaction_throttle(struct btrfs_trans_handle
*trans
,
341 struct btrfs_root
*root
)
343 return __btrfs_end_transaction(trans
, root
, 1);
347 * when btree blocks are allocated, they have some corresponding bits set for
348 * them in one of two extent_io trees. This is used to make sure all of
349 * those extents are on disk for transaction or log commit
351 int btrfs_write_and_wait_marked_extents(struct btrfs_root
*root
,
352 struct extent_io_tree
*dirty_pages
)
358 struct inode
*btree_inode
= root
->fs_info
->btree_inode
;
364 ret
= find_first_extent_bit(dirty_pages
, start
, &start
, &end
,
368 while (start
<= end
) {
371 index
= start
>> PAGE_CACHE_SHIFT
;
372 start
= (u64
)(index
+ 1) << PAGE_CACHE_SHIFT
;
373 page
= find_get_page(btree_inode
->i_mapping
, index
);
377 btree_lock_page_hook(page
);
378 if (!page
->mapping
) {
380 page_cache_release(page
);
384 if (PageWriteback(page
)) {
386 wait_on_page_writeback(page
);
389 page_cache_release(page
);
393 err
= write_one_page(page
, 0);
396 page_cache_release(page
);
400 ret
= find_first_extent_bit(dirty_pages
, 0, &start
, &end
,
405 clear_extent_dirty(dirty_pages
, start
, end
, GFP_NOFS
);
406 while (start
<= end
) {
407 index
= start
>> PAGE_CACHE_SHIFT
;
408 start
= (u64
)(index
+ 1) << PAGE_CACHE_SHIFT
;
409 page
= find_get_page(btree_inode
->i_mapping
, index
);
412 if (PageDirty(page
)) {
413 btree_lock_page_hook(page
);
414 wait_on_page_writeback(page
);
415 err
= write_one_page(page
, 0);
419 wait_on_page_writeback(page
);
420 page_cache_release(page
);
429 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle
*trans
,
430 struct btrfs_root
*root
)
432 if (!trans
|| !trans
->transaction
) {
433 struct inode
*btree_inode
;
434 btree_inode
= root
->fs_info
->btree_inode
;
435 return filemap_write_and_wait(btree_inode
->i_mapping
);
437 return btrfs_write_and_wait_marked_extents(root
,
438 &trans
->transaction
->dirty_pages
);
442 * this is used to update the root pointer in the tree of tree roots.
444 * But, in the case of the extent allocation tree, updating the root
445 * pointer may allocate blocks which may change the root of the extent
448 * So, this loops and repeats and makes sure the cowonly root didn't
449 * change while the root pointer was being updated in the metadata.
451 static int update_cowonly_root(struct btrfs_trans_handle
*trans
,
452 struct btrfs_root
*root
)
456 struct btrfs_root
*tree_root
= root
->fs_info
->tree_root
;
458 btrfs_write_dirty_block_groups(trans
, root
);
460 ret
= btrfs_run_delayed_refs(trans
, root
, (unsigned long)-1);
464 old_root_bytenr
= btrfs_root_bytenr(&root
->root_item
);
465 if (old_root_bytenr
== root
->node
->start
)
467 btrfs_set_root_bytenr(&root
->root_item
,
469 btrfs_set_root_level(&root
->root_item
,
470 btrfs_header_level(root
->node
));
471 btrfs_set_root_generation(&root
->root_item
, trans
->transid
);
473 ret
= btrfs_update_root(trans
, tree_root
,
477 btrfs_write_dirty_block_groups(trans
, root
);
479 ret
= btrfs_run_delayed_refs(trans
, root
, (unsigned long)-1);
486 * update all the cowonly tree roots on disk
488 int btrfs_commit_tree_roots(struct btrfs_trans_handle
*trans
,
489 struct btrfs_root
*root
)
491 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
492 struct list_head
*next
;
493 struct extent_buffer
*eb
;
496 ret
= btrfs_run_delayed_refs(trans
, root
, (unsigned long)-1);
499 eb
= btrfs_lock_root_node(fs_info
->tree_root
);
500 btrfs_cow_block(trans
, fs_info
->tree_root
, eb
, NULL
, 0, &eb
);
501 btrfs_tree_unlock(eb
);
502 free_extent_buffer(eb
);
504 ret
= btrfs_run_delayed_refs(trans
, root
, (unsigned long)-1);
507 while (!list_empty(&fs_info
->dirty_cowonly_roots
)) {
508 next
= fs_info
->dirty_cowonly_roots
.next
;
510 root
= list_entry(next
, struct btrfs_root
, dirty_list
);
512 update_cowonly_root(trans
, root
);
514 ret
= btrfs_run_delayed_refs(trans
, root
, (unsigned long)-1);
521 * dead roots are old snapshots that need to be deleted. This allocates
522 * a dirty root struct and adds it into the list of dead roots that need to
525 int btrfs_add_dead_root(struct btrfs_root
*root
, struct btrfs_root
*latest
)
527 struct btrfs_dirty_root
*dirty
;
529 dirty
= kmalloc(sizeof(*dirty
), GFP_NOFS
);
533 dirty
->latest_root
= latest
;
535 mutex_lock(&root
->fs_info
->trans_mutex
);
536 list_add(&dirty
->list
, &latest
->fs_info
->dead_roots
);
537 mutex_unlock(&root
->fs_info
->trans_mutex
);
542 * at transaction commit time we need to schedule the old roots for
543 * deletion via btrfs_drop_snapshot. This runs through all the
544 * reference counted roots that were modified in the current
545 * transaction and puts them into the drop list
547 static noinline
int add_dirty_roots(struct btrfs_trans_handle
*trans
,
548 struct radix_tree_root
*radix
,
549 struct list_head
*list
)
551 struct btrfs_dirty_root
*dirty
;
552 struct btrfs_root
*gang
[8];
553 struct btrfs_root
*root
;
560 ret
= radix_tree_gang_lookup_tag(radix
, (void **)gang
, 0,
562 BTRFS_ROOT_TRANS_TAG
);
565 for (i
= 0; i
< ret
; i
++) {
567 radix_tree_tag_clear(radix
,
568 (unsigned long)root
->root_key
.objectid
,
569 BTRFS_ROOT_TRANS_TAG
);
571 BUG_ON(!root
->ref_tree
);
572 dirty
= root
->dirty_root
;
574 btrfs_free_log(trans
, root
);
575 btrfs_free_reloc_root(trans
, root
);
577 if (root
->commit_root
== root
->node
) {
578 WARN_ON(root
->node
->start
!=
579 btrfs_root_bytenr(&root
->root_item
));
581 free_extent_buffer(root
->commit_root
);
582 root
->commit_root
= NULL
;
583 root
->dirty_root
= NULL
;
585 spin_lock(&root
->list_lock
);
586 list_del_init(&dirty
->root
->dead_list
);
587 spin_unlock(&root
->list_lock
);
592 /* make sure to update the root on disk
593 * so we get any updates to the block used
596 err
= btrfs_update_root(trans
,
597 root
->fs_info
->tree_root
,
603 memset(&root
->root_item
.drop_progress
, 0,
604 sizeof(struct btrfs_disk_key
));
605 root
->root_item
.drop_level
= 0;
606 root
->commit_root
= NULL
;
607 root
->dirty_root
= NULL
;
608 root
->root_key
.offset
= root
->fs_info
->generation
;
609 btrfs_set_root_bytenr(&root
->root_item
,
611 btrfs_set_root_level(&root
->root_item
,
612 btrfs_header_level(root
->node
));
613 btrfs_set_root_generation(&root
->root_item
,
614 root
->root_key
.offset
);
616 err
= btrfs_insert_root(trans
, root
->fs_info
->tree_root
,
622 refs
= btrfs_root_refs(&dirty
->root
->root_item
);
623 btrfs_set_root_refs(&dirty
->root
->root_item
, refs
- 1);
624 err
= btrfs_update_root(trans
, root
->fs_info
->tree_root
,
625 &dirty
->root
->root_key
,
626 &dirty
->root
->root_item
);
630 list_add(&dirty
->list
, list
);
633 free_extent_buffer(dirty
->root
->node
);
643 * defrag a given btree. If cacheonly == 1, this won't read from the disk,
644 * otherwise every leaf in the btree is read and defragged.
646 int btrfs_defrag_root(struct btrfs_root
*root
, int cacheonly
)
648 struct btrfs_fs_info
*info
= root
->fs_info
;
650 struct btrfs_trans_handle
*trans
;
654 if (root
->defrag_running
)
656 trans
= btrfs_start_transaction(root
, 1);
658 root
->defrag_running
= 1;
659 ret
= btrfs_defrag_leaves(trans
, root
, cacheonly
);
660 nr
= trans
->blocks_used
;
661 btrfs_end_transaction(trans
, root
);
662 btrfs_btree_balance_dirty(info
->tree_root
, nr
);
665 trans
= btrfs_start_transaction(root
, 1);
666 if (root
->fs_info
->closing
|| ret
!= -EAGAIN
)
669 root
->defrag_running
= 0;
671 btrfs_end_transaction(trans
, root
);
676 * when dropping snapshots, we generate a ton of delayed refs, and it makes
677 * sense not to join the transaction while it is trying to flush the current
678 * queue of delayed refs out.
680 * This is used by the drop snapshot code only
682 static noinline
int wait_transaction_pre_flush(struct btrfs_fs_info
*info
)
686 mutex_lock(&info
->trans_mutex
);
687 while (info
->running_transaction
&&
688 info
->running_transaction
->delayed_refs
.flushing
) {
689 prepare_to_wait(&info
->transaction_wait
, &wait
,
690 TASK_UNINTERRUPTIBLE
);
691 mutex_unlock(&info
->trans_mutex
);
693 mutex_lock(&info
->trans_mutex
);
694 finish_wait(&info
->transaction_wait
, &wait
);
696 mutex_unlock(&info
->trans_mutex
);
701 * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on
704 static noinline
int drop_dirty_roots(struct btrfs_root
*tree_root
,
705 struct list_head
*list
)
707 struct btrfs_dirty_root
*dirty
;
708 struct btrfs_trans_handle
*trans
;
716 while (!list_empty(list
)) {
717 struct btrfs_root
*root
;
719 dirty
= list_entry(list
->prev
, struct btrfs_dirty_root
, list
);
720 list_del_init(&dirty
->list
);
722 num_bytes
= btrfs_root_used(&dirty
->root
->root_item
);
723 root
= dirty
->latest_root
;
724 atomic_inc(&root
->fs_info
->throttles
);
728 * we don't want to jump in and create a bunch of
729 * delayed refs if the transaction is starting to close
731 wait_transaction_pre_flush(tree_root
->fs_info
);
732 trans
= btrfs_start_transaction(tree_root
, 1);
735 * we've joined a transaction, make sure it isn't
738 if (trans
->transaction
->delayed_refs
.flushing
) {
739 btrfs_end_transaction(trans
, tree_root
);
743 mutex_lock(&root
->fs_info
->drop_mutex
);
744 ret
= btrfs_drop_snapshot(trans
, dirty
->root
);
747 mutex_unlock(&root
->fs_info
->drop_mutex
);
749 err
= btrfs_update_root(trans
,
751 &dirty
->root
->root_key
,
752 &dirty
->root
->root_item
);
755 nr
= trans
->blocks_used
;
756 ret
= btrfs_end_transaction(trans
, tree_root
);
759 btrfs_btree_balance_dirty(tree_root
, nr
);
763 atomic_dec(&root
->fs_info
->throttles
);
764 wake_up(&root
->fs_info
->transaction_throttle
);
766 num_bytes
-= btrfs_root_used(&dirty
->root
->root_item
);
767 bytes_used
= btrfs_root_used(&root
->root_item
);
769 mutex_lock(&root
->fs_info
->trans_mutex
);
770 btrfs_record_root_in_trans(root
);
771 mutex_unlock(&root
->fs_info
->trans_mutex
);
772 btrfs_set_root_used(&root
->root_item
,
773 bytes_used
- num_bytes
);
776 ret
= btrfs_del_root(trans
, tree_root
, &dirty
->root
->root_key
);
781 mutex_unlock(&root
->fs_info
->drop_mutex
);
783 spin_lock(&root
->list_lock
);
784 list_del_init(&dirty
->root
->dead_list
);
785 if (!list_empty(&root
->dead_list
)) {
786 struct btrfs_root
*oldest
;
787 oldest
= list_entry(root
->dead_list
.prev
,
788 struct btrfs_root
, dead_list
);
789 max_useless
= oldest
->root_key
.offset
- 1;
791 max_useless
= root
->root_key
.offset
- 1;
793 spin_unlock(&root
->list_lock
);
795 nr
= trans
->blocks_used
;
796 ret
= btrfs_end_transaction(trans
, tree_root
);
799 ret
= btrfs_remove_leaf_refs(root
, max_useless
, 0);
802 free_extent_buffer(dirty
->root
->node
);
806 btrfs_btree_balance_dirty(tree_root
, nr
);
813 * new snapshots need to be created at a very specific time in the
814 * transaction commit. This does the actual creation
816 static noinline
int create_pending_snapshot(struct btrfs_trans_handle
*trans
,
817 struct btrfs_fs_info
*fs_info
,
818 struct btrfs_pending_snapshot
*pending
)
820 struct btrfs_key key
;
821 struct btrfs_root_item
*new_root_item
;
822 struct btrfs_root
*tree_root
= fs_info
->tree_root
;
823 struct btrfs_root
*root
= pending
->root
;
824 struct extent_buffer
*tmp
;
825 struct extent_buffer
*old
;
829 new_root_item
= kmalloc(sizeof(*new_root_item
), GFP_NOFS
);
830 if (!new_root_item
) {
834 ret
= btrfs_find_free_objectid(trans
, tree_root
, 0, &objectid
);
838 btrfs_record_root_in_trans(root
);
839 btrfs_set_root_last_snapshot(&root
->root_item
, trans
->transid
);
840 memcpy(new_root_item
, &root
->root_item
, sizeof(*new_root_item
));
842 key
.objectid
= objectid
;
843 key
.offset
= trans
->transid
;
844 btrfs_set_key_type(&key
, BTRFS_ROOT_ITEM_KEY
);
846 old
= btrfs_lock_root_node(root
);
847 btrfs_cow_block(trans
, root
, old
, NULL
, 0, &old
);
849 btrfs_copy_root(trans
, root
, old
, &tmp
, objectid
);
850 btrfs_tree_unlock(old
);
851 free_extent_buffer(old
);
853 btrfs_set_root_bytenr(new_root_item
, tmp
->start
);
854 btrfs_set_root_level(new_root_item
, btrfs_header_level(tmp
));
855 btrfs_set_root_generation(new_root_item
, trans
->transid
);
856 ret
= btrfs_insert_root(trans
, root
->fs_info
->tree_root
, &key
,
858 btrfs_tree_unlock(tmp
);
859 free_extent_buffer(tmp
);
863 key
.offset
= (u64
)-1;
864 memcpy(&pending
->root_key
, &key
, sizeof(key
));
866 kfree(new_root_item
);
870 static noinline
int finish_pending_snapshot(struct btrfs_fs_info
*fs_info
,
871 struct btrfs_pending_snapshot
*pending
)
876 struct btrfs_trans_handle
*trans
;
877 struct inode
*parent_inode
;
879 struct btrfs_root
*parent_root
;
881 parent_inode
= pending
->dentry
->d_parent
->d_inode
;
882 parent_root
= BTRFS_I(parent_inode
)->root
;
883 trans
= btrfs_join_transaction(parent_root
, 1);
886 * insert the directory item
888 namelen
= strlen(pending
->name
);
889 ret
= btrfs_set_inode_index(parent_inode
, &index
);
890 ret
= btrfs_insert_dir_item(trans
, parent_root
,
891 pending
->name
, namelen
,
893 &pending
->root_key
, BTRFS_FT_DIR
, index
);
898 btrfs_i_size_write(parent_inode
, parent_inode
->i_size
+ namelen
* 2);
899 ret
= btrfs_update_inode(trans
, parent_root
, parent_inode
);
902 /* add the backref first */
903 ret
= btrfs_add_root_ref(trans
, parent_root
->fs_info
->tree_root
,
904 pending
->root_key
.objectid
,
905 BTRFS_ROOT_BACKREF_KEY
,
906 parent_root
->root_key
.objectid
,
907 parent_inode
->i_ino
, index
, pending
->name
,
912 /* now add the forward ref */
913 ret
= btrfs_add_root_ref(trans
, parent_root
->fs_info
->tree_root
,
914 parent_root
->root_key
.objectid
,
916 pending
->root_key
.objectid
,
917 parent_inode
->i_ino
, index
, pending
->name
,
920 inode
= btrfs_lookup_dentry(parent_inode
, pending
->dentry
);
921 d_instantiate(pending
->dentry
, inode
);
923 btrfs_end_transaction(trans
, fs_info
->fs_root
);
928 * create all the snapshots we've scheduled for creation
930 static noinline
int create_pending_snapshots(struct btrfs_trans_handle
*trans
,
931 struct btrfs_fs_info
*fs_info
)
933 struct btrfs_pending_snapshot
*pending
;
934 struct list_head
*head
= &trans
->transaction
->pending_snapshots
;
937 list_for_each_entry(pending
, head
, list
) {
938 ret
= create_pending_snapshot(trans
, fs_info
, pending
);
944 static noinline
int finish_pending_snapshots(struct btrfs_trans_handle
*trans
,
945 struct btrfs_fs_info
*fs_info
)
947 struct btrfs_pending_snapshot
*pending
;
948 struct list_head
*head
= &trans
->transaction
->pending_snapshots
;
951 while (!list_empty(head
)) {
952 pending
= list_entry(head
->next
,
953 struct btrfs_pending_snapshot
, list
);
954 ret
= finish_pending_snapshot(fs_info
, pending
);
956 list_del(&pending
->list
);
957 kfree(pending
->name
);
963 int btrfs_commit_transaction(struct btrfs_trans_handle
*trans
,
964 struct btrfs_root
*root
)
966 unsigned long joined
= 0;
967 unsigned long timeout
= 1;
968 struct btrfs_transaction
*cur_trans
;
969 struct btrfs_transaction
*prev_trans
= NULL
;
970 struct btrfs_root
*chunk_root
= root
->fs_info
->chunk_root
;
971 struct list_head dirty_fs_roots
;
972 struct extent_io_tree
*pinned_copy
;
976 unsigned long now
= get_seconds();
978 btrfs_run_ordered_operations(root
, 0);
980 /* make a pass through all the delayed refs we have so far
981 * any runnings procs may add more while we are here
983 ret
= btrfs_run_delayed_refs(trans
, root
, 0);
986 cur_trans
= trans
->transaction
;
988 * set the flushing flag so procs in this transaction have to
989 * start sending their work down.
991 cur_trans
->delayed_refs
.flushing
= 1;
993 ret
= btrfs_run_delayed_refs(trans
, root
, 0);
996 mutex_lock(&root
->fs_info
->trans_mutex
);
997 INIT_LIST_HEAD(&dirty_fs_roots
);
998 if (cur_trans
->in_commit
) {
999 cur_trans
->use_count
++;
1000 mutex_unlock(&root
->fs_info
->trans_mutex
);
1001 btrfs_end_transaction(trans
, root
);
1003 ret
= wait_for_commit(root
, cur_trans
);
1006 mutex_lock(&root
->fs_info
->trans_mutex
);
1007 put_transaction(cur_trans
);
1008 mutex_unlock(&root
->fs_info
->trans_mutex
);
1013 pinned_copy
= kmalloc(sizeof(*pinned_copy
), GFP_NOFS
);
1017 extent_io_tree_init(pinned_copy
,
1018 root
->fs_info
->btree_inode
->i_mapping
, GFP_NOFS
);
1020 trans
->transaction
->in_commit
= 1;
1021 trans
->transaction
->blocked
= 1;
1022 if (cur_trans
->list
.prev
!= &root
->fs_info
->trans_list
) {
1023 prev_trans
= list_entry(cur_trans
->list
.prev
,
1024 struct btrfs_transaction
, list
);
1025 if (!prev_trans
->commit_done
) {
1026 prev_trans
->use_count
++;
1027 mutex_unlock(&root
->fs_info
->trans_mutex
);
1029 wait_for_commit(root
, prev_trans
);
1031 mutex_lock(&root
->fs_info
->trans_mutex
);
1032 put_transaction(prev_trans
);
1036 if (now
< cur_trans
->start_time
|| now
- cur_trans
->start_time
< 1)
1040 int snap_pending
= 0;
1041 joined
= cur_trans
->num_joined
;
1042 if (!list_empty(&trans
->transaction
->pending_snapshots
))
1045 WARN_ON(cur_trans
!= trans
->transaction
);
1046 prepare_to_wait(&cur_trans
->writer_wait
, &wait
,
1047 TASK_UNINTERRUPTIBLE
);
1049 if (cur_trans
->num_writers
> 1)
1050 timeout
= MAX_SCHEDULE_TIMEOUT
;
1051 else if (should_grow
)
1054 mutex_unlock(&root
->fs_info
->trans_mutex
);
1057 ret
= btrfs_wait_ordered_extents(root
, 1);
1062 * rename don't use btrfs_join_transaction, so, once we
1063 * set the transaction to blocked above, we aren't going
1064 * to get any new ordered operations. We can safely run
1065 * it here and no for sure that nothing new will be added
1068 btrfs_run_ordered_operations(root
, 1);
1071 if (cur_trans
->num_writers
> 1 || should_grow
)
1072 schedule_timeout(timeout
);
1074 mutex_lock(&root
->fs_info
->trans_mutex
);
1075 finish_wait(&cur_trans
->writer_wait
, &wait
);
1076 } while (cur_trans
->num_writers
> 1 ||
1077 (should_grow
&& cur_trans
->num_joined
!= joined
));
1079 ret
= create_pending_snapshots(trans
, root
->fs_info
);
1082 ret
= btrfs_run_delayed_refs(trans
, root
, (unsigned long)-1);
1085 WARN_ON(cur_trans
!= trans
->transaction
);
1087 /* btrfs_commit_tree_roots is responsible for getting the
1088 * various roots consistent with each other. Every pointer
1089 * in the tree of tree roots has to point to the most up to date
1090 * root for every subvolume and other tree. So, we have to keep
1091 * the tree logging code from jumping in and changing any
1094 * At this point in the commit, there can't be any tree-log
1095 * writers, but a little lower down we drop the trans mutex
1096 * and let new people in. By holding the tree_log_mutex
1097 * from now until after the super is written, we avoid races
1098 * with the tree-log code.
1100 mutex_lock(&root
->fs_info
->tree_log_mutex
);
1102 * keep tree reloc code from adding new reloc trees
1104 mutex_lock(&root
->fs_info
->tree_reloc_mutex
);
1107 ret
= add_dirty_roots(trans
, &root
->fs_info
->fs_roots_radix
,
1111 /* add_dirty_roots gets rid of all the tree log roots, it is now
1112 * safe to free the root of tree log roots
1114 btrfs_free_log_root_tree(trans
, root
->fs_info
);
1116 ret
= btrfs_commit_tree_roots(trans
, root
);
1119 cur_trans
= root
->fs_info
->running_transaction
;
1120 spin_lock(&root
->fs_info
->new_trans_lock
);
1121 root
->fs_info
->running_transaction
= NULL
;
1122 spin_unlock(&root
->fs_info
->new_trans_lock
);
1123 btrfs_set_super_generation(&root
->fs_info
->super_copy
,
1124 cur_trans
->transid
);
1125 btrfs_set_super_root(&root
->fs_info
->super_copy
,
1126 root
->fs_info
->tree_root
->node
->start
);
1127 btrfs_set_super_root_level(&root
->fs_info
->super_copy
,
1128 btrfs_header_level(root
->fs_info
->tree_root
->node
));
1130 btrfs_set_super_chunk_root(&root
->fs_info
->super_copy
,
1131 chunk_root
->node
->start
);
1132 btrfs_set_super_chunk_root_level(&root
->fs_info
->super_copy
,
1133 btrfs_header_level(chunk_root
->node
));
1134 btrfs_set_super_chunk_root_generation(&root
->fs_info
->super_copy
,
1135 btrfs_header_generation(chunk_root
->node
));
1137 if (!root
->fs_info
->log_root_recovering
) {
1138 btrfs_set_super_log_root(&root
->fs_info
->super_copy
, 0);
1139 btrfs_set_super_log_root_level(&root
->fs_info
->super_copy
, 0);
1142 memcpy(&root
->fs_info
->super_for_commit
, &root
->fs_info
->super_copy
,
1143 sizeof(root
->fs_info
->super_copy
));
1145 btrfs_copy_pinned(root
, pinned_copy
);
1147 trans
->transaction
->blocked
= 0;
1149 wake_up(&root
->fs_info
->transaction_throttle
);
1150 wake_up(&root
->fs_info
->transaction_wait
);
1152 mutex_unlock(&root
->fs_info
->trans_mutex
);
1153 ret
= btrfs_write_and_wait_transaction(trans
, root
);
1155 write_ctree_super(trans
, root
, 0);
1158 * the super is written, we can safely allow the tree-loggers
1159 * to go about their business
1161 mutex_unlock(&root
->fs_info
->tree_log_mutex
);
1163 btrfs_finish_extent_commit(trans
, root
, pinned_copy
);
1166 btrfs_drop_dead_reloc_roots(root
);
1167 mutex_unlock(&root
->fs_info
->tree_reloc_mutex
);
1169 /* do the directory inserts of any pending snapshot creations */
1170 finish_pending_snapshots(trans
, root
->fs_info
);
1172 mutex_lock(&root
->fs_info
->trans_mutex
);
1174 cur_trans
->commit_done
= 1;
1176 root
->fs_info
->last_trans_committed
= cur_trans
->transid
;
1177 wake_up(&cur_trans
->commit_wait
);
1179 put_transaction(cur_trans
);
1180 put_transaction(cur_trans
);
1182 list_splice_init(&dirty_fs_roots
, &root
->fs_info
->dead_roots
);
1183 if (root
->fs_info
->closing
)
1184 list_splice_init(&root
->fs_info
->dead_roots
, &dirty_fs_roots
);
1186 mutex_unlock(&root
->fs_info
->trans_mutex
);
1188 kmem_cache_free(btrfs_trans_handle_cachep
, trans
);
1190 if (root
->fs_info
->closing
)
1191 drop_dirty_roots(root
->fs_info
->tree_root
, &dirty_fs_roots
);
1196 * interface function to delete all the snapshots we have scheduled for deletion
1198 int btrfs_clean_old_snapshots(struct btrfs_root
*root
)
1200 struct list_head dirty_roots
;
1201 INIT_LIST_HEAD(&dirty_roots
);
1203 mutex_lock(&root
->fs_info
->trans_mutex
);
1204 list_splice_init(&root
->fs_info
->dead_roots
, &dirty_roots
);
1205 mutex_unlock(&root
->fs_info
->trans_mutex
);
1207 if (!list_empty(&dirty_roots
)) {
1208 drop_dirty_roots(root
, &dirty_roots
);