2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
20 #include <linux/slab.h>
21 #include <linux/sched.h>
22 #include <linux/writeback.h>
23 #include <linux/pagemap.h>
24 #include <linux/blkdev.h>
27 #include "transaction.h"
30 #include "inode-map.h"
32 #define BTRFS_ROOT_TRANS_TAG 0
34 static noinline
void put_transaction(struct btrfs_transaction
*transaction
)
36 WARN_ON(atomic_read(&transaction
->use_count
) == 0);
37 if (atomic_dec_and_test(&transaction
->use_count
)) {
38 BUG_ON(!list_empty(&transaction
->list
));
39 memset(transaction
, 0, sizeof(*transaction
));
40 kmem_cache_free(btrfs_transaction_cachep
, transaction
);
44 static noinline
void switch_commit_root(struct btrfs_root
*root
)
46 free_extent_buffer(root
->commit_root
);
47 root
->commit_root
= btrfs_root_node(root
);
51 * either allocate a new transaction or hop into the existing one
53 static noinline
int join_transaction(struct btrfs_root
*root
, int nofail
)
55 struct btrfs_transaction
*cur_trans
;
57 spin_lock(&root
->fs_info
->trans_lock
);
58 if (root
->fs_info
->trans_no_join
) {
60 spin_unlock(&root
->fs_info
->trans_lock
);
65 cur_trans
= root
->fs_info
->running_transaction
;
67 atomic_inc(&cur_trans
->use_count
);
68 atomic_inc(&cur_trans
->num_writers
);
69 cur_trans
->num_joined
++;
70 spin_unlock(&root
->fs_info
->trans_lock
);
73 spin_unlock(&root
->fs_info
->trans_lock
);
75 cur_trans
= kmem_cache_alloc(btrfs_transaction_cachep
, GFP_NOFS
);
78 spin_lock(&root
->fs_info
->trans_lock
);
79 if (root
->fs_info
->running_transaction
) {
80 kmem_cache_free(btrfs_transaction_cachep
, cur_trans
);
81 cur_trans
= root
->fs_info
->running_transaction
;
82 atomic_inc(&cur_trans
->use_count
);
83 atomic_inc(&cur_trans
->num_writers
);
84 cur_trans
->num_joined
++;
85 spin_unlock(&root
->fs_info
->trans_lock
);
88 atomic_set(&cur_trans
->num_writers
, 1);
89 cur_trans
->num_joined
= 0;
90 init_waitqueue_head(&cur_trans
->writer_wait
);
91 init_waitqueue_head(&cur_trans
->commit_wait
);
92 cur_trans
->in_commit
= 0;
93 cur_trans
->blocked
= 0;
95 * One for this trans handle, one so it will live on until we
96 * commit the transaction.
98 atomic_set(&cur_trans
->use_count
, 2);
99 cur_trans
->commit_done
= 0;
100 cur_trans
->start_time
= get_seconds();
102 cur_trans
->delayed_refs
.root
= RB_ROOT
;
103 cur_trans
->delayed_refs
.num_entries
= 0;
104 cur_trans
->delayed_refs
.num_heads_ready
= 0;
105 cur_trans
->delayed_refs
.num_heads
= 0;
106 cur_trans
->delayed_refs
.flushing
= 0;
107 cur_trans
->delayed_refs
.run_delayed_start
= 0;
108 spin_lock_init(&cur_trans
->commit_lock
);
109 spin_lock_init(&cur_trans
->delayed_refs
.lock
);
111 INIT_LIST_HEAD(&cur_trans
->pending_snapshots
);
112 list_add_tail(&cur_trans
->list
, &root
->fs_info
->trans_list
);
113 extent_io_tree_init(&cur_trans
->dirty_pages
,
114 root
->fs_info
->btree_inode
->i_mapping
);
115 root
->fs_info
->generation
++;
116 cur_trans
->transid
= root
->fs_info
->generation
;
117 root
->fs_info
->running_transaction
= cur_trans
;
118 spin_unlock(&root
->fs_info
->trans_lock
);
124 * this does all the record keeping required to make sure that a reference
125 * counted root is properly recorded in a given transaction. This is required
126 * to make sure the old root from before we joined the transaction is deleted
127 * when the transaction commits
129 int btrfs_record_root_in_trans(struct btrfs_trans_handle
*trans
,
130 struct btrfs_root
*root
)
132 if (root
->ref_cows
&& root
->last_trans
< trans
->transid
) {
133 WARN_ON(root
== root
->fs_info
->extent_root
);
134 WARN_ON(root
->commit_root
!= root
->node
);
136 spin_lock(&root
->fs_info
->fs_roots_radix_lock
);
137 if (root
->last_trans
== trans
->transid
) {
138 spin_unlock(&root
->fs_info
->fs_roots_radix_lock
);
141 root
->last_trans
= trans
->transid
;
142 radix_tree_tag_set(&root
->fs_info
->fs_roots_radix
,
143 (unsigned long)root
->root_key
.objectid
,
144 BTRFS_ROOT_TRANS_TAG
);
145 spin_unlock(&root
->fs_info
->fs_roots_radix_lock
);
146 btrfs_init_reloc_root(trans
, root
);
151 /* wait for commit against the current transaction to become unblocked
152 * when this is done, it is safe to start a new transaction, but the current
153 * transaction might not be fully on disk.
155 static void wait_current_trans(struct btrfs_root
*root
)
157 struct btrfs_transaction
*cur_trans
;
159 spin_lock(&root
->fs_info
->trans_lock
);
160 cur_trans
= root
->fs_info
->running_transaction
;
161 if (cur_trans
&& cur_trans
->blocked
) {
163 atomic_inc(&cur_trans
->use_count
);
164 spin_unlock(&root
->fs_info
->trans_lock
);
166 prepare_to_wait(&root
->fs_info
->transaction_wait
, &wait
,
167 TASK_UNINTERRUPTIBLE
);
168 if (!cur_trans
->blocked
)
172 finish_wait(&root
->fs_info
->transaction_wait
, &wait
);
173 put_transaction(cur_trans
);
175 spin_unlock(&root
->fs_info
->trans_lock
);
179 enum btrfs_trans_type
{
186 static int may_wait_transaction(struct btrfs_root
*root
, int type
)
188 if (root
->fs_info
->log_root_recovering
)
191 if (type
== TRANS_USERSPACE
)
194 if (type
== TRANS_START
&&
195 !atomic_read(&root
->fs_info
->open_ioctl_trans
))
201 static struct btrfs_trans_handle
*start_transaction(struct btrfs_root
*root
,
202 u64 num_items
, int type
)
204 struct btrfs_trans_handle
*h
;
205 struct btrfs_transaction
*cur_trans
;
209 if (root
->fs_info
->fs_state
& BTRFS_SUPER_FLAG_ERROR
)
210 return ERR_PTR(-EROFS
);
212 if (current
->journal_info
) {
213 WARN_ON(type
!= TRANS_JOIN
&& type
!= TRANS_JOIN_NOLOCK
);
214 h
= current
->journal_info
;
216 h
->orig_rsv
= h
->block_rsv
;
221 h
= kmem_cache_alloc(btrfs_trans_handle_cachep
, GFP_NOFS
);
223 return ERR_PTR(-ENOMEM
);
225 if (may_wait_transaction(root
, type
))
226 wait_current_trans(root
);
229 ret
= join_transaction(root
, type
== TRANS_JOIN_NOLOCK
);
231 wait_current_trans(root
);
232 } while (ret
== -EBUSY
);
235 kmem_cache_free(btrfs_trans_handle_cachep
, h
);
239 cur_trans
= root
->fs_info
->running_transaction
;
241 h
->transid
= cur_trans
->transid
;
242 h
->transaction
= cur_trans
;
244 h
->bytes_reserved
= 0;
245 h
->delayed_ref_updates
= 0;
251 if (cur_trans
->blocked
&& may_wait_transaction(root
, type
)) {
252 btrfs_commit_transaction(h
, root
);
257 ret
= btrfs_trans_reserve_metadata(h
, root
, num_items
);
258 if (ret
== -EAGAIN
&& !retries
) {
260 btrfs_commit_transaction(h
, root
);
262 } else if (ret
== -EAGAIN
) {
264 * We have already retried and got EAGAIN, so really we
265 * don't have space, so set ret to -ENOSPC.
271 btrfs_end_transaction(h
, root
);
277 btrfs_record_root_in_trans(h
, root
);
279 if (!current
->journal_info
&& type
!= TRANS_USERSPACE
)
280 current
->journal_info
= h
;
284 struct btrfs_trans_handle
*btrfs_start_transaction(struct btrfs_root
*root
,
287 return start_transaction(root
, num_items
, TRANS_START
);
289 struct btrfs_trans_handle
*btrfs_join_transaction(struct btrfs_root
*root
)
291 return start_transaction(root
, 0, TRANS_JOIN
);
294 struct btrfs_trans_handle
*btrfs_join_transaction_nolock(struct btrfs_root
*root
)
296 return start_transaction(root
, 0, TRANS_JOIN_NOLOCK
);
299 struct btrfs_trans_handle
*btrfs_start_ioctl_transaction(struct btrfs_root
*root
)
301 return start_transaction(root
, 0, TRANS_USERSPACE
);
304 /* wait for a transaction commit to be fully complete */
305 static noinline
int wait_for_commit(struct btrfs_root
*root
,
306 struct btrfs_transaction
*commit
)
309 while (!commit
->commit_done
) {
310 prepare_to_wait(&commit
->commit_wait
, &wait
,
311 TASK_UNINTERRUPTIBLE
);
312 if (commit
->commit_done
)
316 finish_wait(&commit
->commit_wait
, &wait
);
320 int btrfs_wait_for_commit(struct btrfs_root
*root
, u64 transid
)
322 struct btrfs_transaction
*cur_trans
= NULL
, *t
;
327 if (transid
<= root
->fs_info
->last_trans_committed
)
330 /* find specified transaction */
331 spin_lock(&root
->fs_info
->trans_lock
);
332 list_for_each_entry(t
, &root
->fs_info
->trans_list
, list
) {
333 if (t
->transid
== transid
) {
335 atomic_inc(&cur_trans
->use_count
);
338 if (t
->transid
> transid
)
341 spin_unlock(&root
->fs_info
->trans_lock
);
344 goto out
; /* bad transid */
346 /* find newest transaction that is committing | committed */
347 spin_lock(&root
->fs_info
->trans_lock
);
348 list_for_each_entry_reverse(t
, &root
->fs_info
->trans_list
,
354 atomic_inc(&cur_trans
->use_count
);
358 spin_unlock(&root
->fs_info
->trans_lock
);
360 goto out
; /* nothing committing|committed */
363 wait_for_commit(root
, cur_trans
);
365 put_transaction(cur_trans
);
371 void btrfs_throttle(struct btrfs_root
*root
)
373 if (!atomic_read(&root
->fs_info
->open_ioctl_trans
))
374 wait_current_trans(root
);
377 static int should_end_transaction(struct btrfs_trans_handle
*trans
,
378 struct btrfs_root
*root
)
381 ret
= btrfs_block_rsv_check(trans
, root
,
382 &root
->fs_info
->global_block_rsv
, 0, 5);
386 int btrfs_should_end_transaction(struct btrfs_trans_handle
*trans
,
387 struct btrfs_root
*root
)
389 struct btrfs_transaction
*cur_trans
= trans
->transaction
;
393 if (cur_trans
->blocked
|| cur_trans
->delayed_refs
.flushing
)
396 updates
= trans
->delayed_ref_updates
;
397 trans
->delayed_ref_updates
= 0;
399 btrfs_run_delayed_refs(trans
, root
, updates
);
401 return should_end_transaction(trans
, root
);
404 static int __btrfs_end_transaction(struct btrfs_trans_handle
*trans
,
405 struct btrfs_root
*root
, int throttle
, int lock
)
407 struct btrfs_transaction
*cur_trans
= trans
->transaction
;
408 struct btrfs_fs_info
*info
= root
->fs_info
;
411 if (--trans
->use_count
) {
412 trans
->block_rsv
= trans
->orig_rsv
;
417 unsigned long cur
= trans
->delayed_ref_updates
;
418 trans
->delayed_ref_updates
= 0;
420 trans
->transaction
->delayed_refs
.num_heads_ready
> 64) {
421 trans
->delayed_ref_updates
= 0;
424 * do a full flush if the transaction is trying
427 if (trans
->transaction
->delayed_refs
.flushing
)
429 btrfs_run_delayed_refs(trans
, root
, cur
);
436 btrfs_trans_release_metadata(trans
, root
);
438 if (lock
&& !atomic_read(&root
->fs_info
->open_ioctl_trans
) &&
439 should_end_transaction(trans
, root
)) {
440 trans
->transaction
->blocked
= 1;
444 if (lock
&& cur_trans
->blocked
&& !cur_trans
->in_commit
) {
446 return btrfs_commit_transaction(trans
, root
);
448 wake_up_process(info
->transaction_kthread
);
451 WARN_ON(cur_trans
!= info
->running_transaction
);
452 WARN_ON(atomic_read(&cur_trans
->num_writers
) < 1);
453 atomic_dec(&cur_trans
->num_writers
);
456 if (waitqueue_active(&cur_trans
->writer_wait
))
457 wake_up(&cur_trans
->writer_wait
);
458 put_transaction(cur_trans
);
460 if (current
->journal_info
== trans
)
461 current
->journal_info
= NULL
;
462 memset(trans
, 0, sizeof(*trans
));
463 kmem_cache_free(btrfs_trans_handle_cachep
, trans
);
466 btrfs_run_delayed_iputs(root
);
471 int btrfs_end_transaction(struct btrfs_trans_handle
*trans
,
472 struct btrfs_root
*root
)
476 ret
= __btrfs_end_transaction(trans
, root
, 0, 1);
482 int btrfs_end_transaction_throttle(struct btrfs_trans_handle
*trans
,
483 struct btrfs_root
*root
)
487 ret
= __btrfs_end_transaction(trans
, root
, 1, 1);
493 int btrfs_end_transaction_nolock(struct btrfs_trans_handle
*trans
,
494 struct btrfs_root
*root
)
498 ret
= __btrfs_end_transaction(trans
, root
, 0, 0);
504 int btrfs_end_transaction_dmeta(struct btrfs_trans_handle
*trans
,
505 struct btrfs_root
*root
)
507 return __btrfs_end_transaction(trans
, root
, 1, 1);
511 * when btree blocks are allocated, they have some corresponding bits set for
512 * them in one of two extent_io trees. This is used to make sure all of
513 * those extents are sent to disk but does not wait on them
515 int btrfs_write_marked_extents(struct btrfs_root
*root
,
516 struct extent_io_tree
*dirty_pages
, int mark
)
522 struct inode
*btree_inode
= root
->fs_info
->btree_inode
;
528 ret
= find_first_extent_bit(dirty_pages
, start
, &start
, &end
,
532 while (start
<= end
) {
535 index
= start
>> PAGE_CACHE_SHIFT
;
536 start
= (u64
)(index
+ 1) << PAGE_CACHE_SHIFT
;
537 page
= find_get_page(btree_inode
->i_mapping
, index
);
541 btree_lock_page_hook(page
);
542 if (!page
->mapping
) {
544 page_cache_release(page
);
548 if (PageWriteback(page
)) {
550 wait_on_page_writeback(page
);
553 page_cache_release(page
);
557 err
= write_one_page(page
, 0);
560 page_cache_release(page
);
569 * when btree blocks are allocated, they have some corresponding bits set for
570 * them in one of two extent_io trees. This is used to make sure all of
571 * those extents are on disk for transaction or log commit. We wait
572 * on all the pages and clear them from the dirty pages state tree
574 int btrfs_wait_marked_extents(struct btrfs_root
*root
,
575 struct extent_io_tree
*dirty_pages
, int mark
)
581 struct inode
*btree_inode
= root
->fs_info
->btree_inode
;
587 ret
= find_first_extent_bit(dirty_pages
, start
, &start
, &end
,
592 clear_extent_bits(dirty_pages
, start
, end
, mark
, GFP_NOFS
);
593 while (start
<= end
) {
594 index
= start
>> PAGE_CACHE_SHIFT
;
595 start
= (u64
)(index
+ 1) << PAGE_CACHE_SHIFT
;
596 page
= find_get_page(btree_inode
->i_mapping
, index
);
599 if (PageDirty(page
)) {
600 btree_lock_page_hook(page
);
601 wait_on_page_writeback(page
);
602 err
= write_one_page(page
, 0);
606 wait_on_page_writeback(page
);
607 page_cache_release(page
);
617 * when btree blocks are allocated, they have some corresponding bits set for
618 * them in one of two extent_io trees. This is used to make sure all of
619 * those extents are on disk for transaction or log commit
621 int btrfs_write_and_wait_marked_extents(struct btrfs_root
*root
,
622 struct extent_io_tree
*dirty_pages
, int mark
)
627 ret
= btrfs_write_marked_extents(root
, dirty_pages
, mark
);
628 ret2
= btrfs_wait_marked_extents(root
, dirty_pages
, mark
);
632 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle
*trans
,
633 struct btrfs_root
*root
)
635 if (!trans
|| !trans
->transaction
) {
636 struct inode
*btree_inode
;
637 btree_inode
= root
->fs_info
->btree_inode
;
638 return filemap_write_and_wait(btree_inode
->i_mapping
);
640 return btrfs_write_and_wait_marked_extents(root
,
641 &trans
->transaction
->dirty_pages
,
646 * this is used to update the root pointer in the tree of tree roots.
648 * But, in the case of the extent allocation tree, updating the root
649 * pointer may allocate blocks which may change the root of the extent
652 * So, this loops and repeats and makes sure the cowonly root didn't
653 * change while the root pointer was being updated in the metadata.
655 static int update_cowonly_root(struct btrfs_trans_handle
*trans
,
656 struct btrfs_root
*root
)
661 struct btrfs_root
*tree_root
= root
->fs_info
->tree_root
;
663 old_root_used
= btrfs_root_used(&root
->root_item
);
664 btrfs_write_dirty_block_groups(trans
, root
);
667 old_root_bytenr
= btrfs_root_bytenr(&root
->root_item
);
668 if (old_root_bytenr
== root
->node
->start
&&
669 old_root_used
== btrfs_root_used(&root
->root_item
))
672 btrfs_set_root_node(&root
->root_item
, root
->node
);
673 ret
= btrfs_update_root(trans
, tree_root
,
678 old_root_used
= btrfs_root_used(&root
->root_item
);
679 ret
= btrfs_write_dirty_block_groups(trans
, root
);
683 if (root
!= root
->fs_info
->extent_root
)
684 switch_commit_root(root
);
690 * update all the cowonly tree roots on disk
692 static noinline
int commit_cowonly_roots(struct btrfs_trans_handle
*trans
,
693 struct btrfs_root
*root
)
695 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
696 struct list_head
*next
;
697 struct extent_buffer
*eb
;
700 ret
= btrfs_run_delayed_refs(trans
, root
, (unsigned long)-1);
703 eb
= btrfs_lock_root_node(fs_info
->tree_root
);
704 btrfs_cow_block(trans
, fs_info
->tree_root
, eb
, NULL
, 0, &eb
);
705 btrfs_tree_unlock(eb
);
706 free_extent_buffer(eb
);
708 ret
= btrfs_run_delayed_refs(trans
, root
, (unsigned long)-1);
711 while (!list_empty(&fs_info
->dirty_cowonly_roots
)) {
712 next
= fs_info
->dirty_cowonly_roots
.next
;
714 root
= list_entry(next
, struct btrfs_root
, dirty_list
);
716 update_cowonly_root(trans
, root
);
719 down_write(&fs_info
->extent_commit_sem
);
720 switch_commit_root(fs_info
->extent_root
);
721 up_write(&fs_info
->extent_commit_sem
);
727 * dead roots are old snapshots that need to be deleted. This allocates
728 * a dirty root struct and adds it into the list of dead roots that need to
731 int btrfs_add_dead_root(struct btrfs_root
*root
)
733 spin_lock(&root
->fs_info
->trans_lock
);
734 list_add(&root
->root_list
, &root
->fs_info
->dead_roots
);
735 spin_unlock(&root
->fs_info
->trans_lock
);
740 * update all the cowonly tree roots on disk
742 static noinline
int commit_fs_roots(struct btrfs_trans_handle
*trans
,
743 struct btrfs_root
*root
)
745 struct btrfs_root
*gang
[8];
746 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
751 spin_lock(&fs_info
->fs_roots_radix_lock
);
753 ret
= radix_tree_gang_lookup_tag(&fs_info
->fs_roots_radix
,
756 BTRFS_ROOT_TRANS_TAG
);
759 for (i
= 0; i
< ret
; i
++) {
761 radix_tree_tag_clear(&fs_info
->fs_roots_radix
,
762 (unsigned long)root
->root_key
.objectid
,
763 BTRFS_ROOT_TRANS_TAG
);
764 spin_unlock(&fs_info
->fs_roots_radix_lock
);
766 btrfs_free_log(trans
, root
);
767 btrfs_update_reloc_root(trans
, root
);
768 btrfs_orphan_commit_root(trans
, root
);
770 btrfs_save_ino_cache(root
, trans
);
772 if (root
->commit_root
!= root
->node
) {
773 mutex_lock(&root
->fs_commit_mutex
);
774 switch_commit_root(root
);
775 btrfs_unpin_free_ino(root
);
776 mutex_unlock(&root
->fs_commit_mutex
);
778 btrfs_set_root_node(&root
->root_item
,
782 err
= btrfs_update_root(trans
, fs_info
->tree_root
,
785 spin_lock(&fs_info
->fs_roots_radix_lock
);
790 spin_unlock(&fs_info
->fs_roots_radix_lock
);
795 * defrag a given btree. If cacheonly == 1, this won't read from the disk,
796 * otherwise every leaf in the btree is read and defragged.
798 int btrfs_defrag_root(struct btrfs_root
*root
, int cacheonly
)
800 struct btrfs_fs_info
*info
= root
->fs_info
;
801 struct btrfs_trans_handle
*trans
;
805 if (xchg(&root
->defrag_running
, 1))
809 trans
= btrfs_start_transaction(root
, 0);
811 return PTR_ERR(trans
);
813 ret
= btrfs_defrag_leaves(trans
, root
, cacheonly
);
815 nr
= trans
->blocks_used
;
816 btrfs_end_transaction(trans
, root
);
817 btrfs_btree_balance_dirty(info
->tree_root
, nr
);
820 if (btrfs_fs_closing(root
->fs_info
) || ret
!= -EAGAIN
)
823 root
->defrag_running
= 0;
828 * new snapshots need to be created at a very specific time in the
829 * transaction commit. This does the actual creation
831 static noinline
int create_pending_snapshot(struct btrfs_trans_handle
*trans
,
832 struct btrfs_fs_info
*fs_info
,
833 struct btrfs_pending_snapshot
*pending
)
835 struct btrfs_key key
;
836 struct btrfs_root_item
*new_root_item
;
837 struct btrfs_root
*tree_root
= fs_info
->tree_root
;
838 struct btrfs_root
*root
= pending
->root
;
839 struct btrfs_root
*parent_root
;
840 struct inode
*parent_inode
;
841 struct dentry
*parent
;
842 struct dentry
*dentry
;
843 struct extent_buffer
*tmp
;
844 struct extent_buffer
*old
;
851 new_root_item
= kmalloc(sizeof(*new_root_item
), GFP_NOFS
);
852 if (!new_root_item
) {
853 pending
->error
= -ENOMEM
;
857 ret
= btrfs_find_free_objectid(tree_root
, &objectid
);
859 pending
->error
= ret
;
863 btrfs_reloc_pre_snapshot(trans
, pending
, &to_reserve
);
864 btrfs_orphan_pre_snapshot(trans
, pending
, &to_reserve
);
866 if (to_reserve
> 0) {
867 ret
= btrfs_block_rsv_add(trans
, root
, &pending
->block_rsv
,
870 pending
->error
= ret
;
875 key
.objectid
= objectid
;
876 key
.offset
= (u64
)-1;
877 key
.type
= BTRFS_ROOT_ITEM_KEY
;
879 trans
->block_rsv
= &pending
->block_rsv
;
881 dentry
= pending
->dentry
;
882 parent
= dget_parent(dentry
);
883 parent_inode
= parent
->d_inode
;
884 parent_root
= BTRFS_I(parent_inode
)->root
;
885 btrfs_record_root_in_trans(trans
, parent_root
);
888 * insert the directory item
890 ret
= btrfs_set_inode_index(parent_inode
, &index
);
892 ret
= btrfs_insert_dir_item(trans
, parent_root
,
893 dentry
->d_name
.name
, dentry
->d_name
.len
,
895 BTRFS_FT_DIR
, index
);
898 btrfs_i_size_write(parent_inode
, parent_inode
->i_size
+
899 dentry
->d_name
.len
* 2);
900 ret
= btrfs_update_inode(trans
, parent_root
, parent_inode
);
903 btrfs_record_root_in_trans(trans
, root
);
904 btrfs_set_root_last_snapshot(&root
->root_item
, trans
->transid
);
905 memcpy(new_root_item
, &root
->root_item
, sizeof(*new_root_item
));
906 btrfs_check_and_init_root_item(new_root_item
);
908 root_flags
= btrfs_root_flags(new_root_item
);
909 if (pending
->readonly
)
910 root_flags
|= BTRFS_ROOT_SUBVOL_RDONLY
;
912 root_flags
&= ~BTRFS_ROOT_SUBVOL_RDONLY
;
913 btrfs_set_root_flags(new_root_item
, root_flags
);
915 old
= btrfs_lock_root_node(root
);
916 btrfs_cow_block(trans
, root
, old
, NULL
, 0, &old
);
917 btrfs_set_lock_blocking(old
);
919 btrfs_copy_root(trans
, root
, old
, &tmp
, objectid
);
920 btrfs_tree_unlock(old
);
921 free_extent_buffer(old
);
923 btrfs_set_root_node(new_root_item
, tmp
);
924 /* record when the snapshot was created in key.offset */
925 key
.offset
= trans
->transid
;
926 ret
= btrfs_insert_root(trans
, tree_root
, &key
, new_root_item
);
927 btrfs_tree_unlock(tmp
);
928 free_extent_buffer(tmp
);
932 * insert root back/forward references
934 ret
= btrfs_add_root_ref(trans
, tree_root
, objectid
,
935 parent_root
->root_key
.objectid
,
936 btrfs_ino(parent_inode
), index
,
937 dentry
->d_name
.name
, dentry
->d_name
.len
);
941 key
.offset
= (u64
)-1;
942 pending
->snap
= btrfs_read_fs_root_no_name(root
->fs_info
, &key
);
943 BUG_ON(IS_ERR(pending
->snap
));
945 btrfs_reloc_post_snapshot(trans
, pending
);
946 btrfs_orphan_post_snapshot(trans
, pending
);
948 kfree(new_root_item
);
949 btrfs_block_rsv_release(root
, &pending
->block_rsv
, (u64
)-1);
954 * create all the snapshots we've scheduled for creation
956 static noinline
int create_pending_snapshots(struct btrfs_trans_handle
*trans
,
957 struct btrfs_fs_info
*fs_info
)
959 struct btrfs_pending_snapshot
*pending
;
960 struct list_head
*head
= &trans
->transaction
->pending_snapshots
;
963 list_for_each_entry(pending
, head
, list
) {
965 * We must deal with the delayed items before creating
966 * snapshots, or we will create a snapthot with inconsistent
969 ret
= btrfs_run_delayed_items(trans
, fs_info
->fs_root
);
972 ret
= create_pending_snapshot(trans
, fs_info
, pending
);
978 static void update_super_roots(struct btrfs_root
*root
)
980 struct btrfs_root_item
*root_item
;
981 struct btrfs_super_block
*super
;
983 super
= &root
->fs_info
->super_copy
;
985 root_item
= &root
->fs_info
->chunk_root
->root_item
;
986 super
->chunk_root
= root_item
->bytenr
;
987 super
->chunk_root_generation
= root_item
->generation
;
988 super
->chunk_root_level
= root_item
->level
;
990 root_item
= &root
->fs_info
->tree_root
->root_item
;
991 super
->root
= root_item
->bytenr
;
992 super
->generation
= root_item
->generation
;
993 super
->root_level
= root_item
->level
;
994 if (super
->cache_generation
!= 0 || btrfs_test_opt(root
, SPACE_CACHE
))
995 super
->cache_generation
= root_item
->generation
;
998 int btrfs_transaction_in_commit(struct btrfs_fs_info
*info
)
1001 spin_lock(&info
->trans_lock
);
1002 if (info
->running_transaction
)
1003 ret
= info
->running_transaction
->in_commit
;
1004 spin_unlock(&info
->trans_lock
);
1008 int btrfs_transaction_blocked(struct btrfs_fs_info
*info
)
1011 spin_lock(&info
->trans_lock
);
1012 if (info
->running_transaction
)
1013 ret
= info
->running_transaction
->blocked
;
1014 spin_unlock(&info
->trans_lock
);
1019 * wait for the current transaction commit to start and block subsequent
1022 static void wait_current_trans_commit_start(struct btrfs_root
*root
,
1023 struct btrfs_transaction
*trans
)
1027 if (trans
->in_commit
)
1031 prepare_to_wait(&root
->fs_info
->transaction_blocked_wait
, &wait
,
1032 TASK_UNINTERRUPTIBLE
);
1033 if (trans
->in_commit
) {
1034 finish_wait(&root
->fs_info
->transaction_blocked_wait
,
1039 finish_wait(&root
->fs_info
->transaction_blocked_wait
, &wait
);
1044 * wait for the current transaction to start and then become unblocked.
1047 static void wait_current_trans_commit_start_and_unblock(struct btrfs_root
*root
,
1048 struct btrfs_transaction
*trans
)
1052 if (trans
->commit_done
|| (trans
->in_commit
&& !trans
->blocked
))
1056 prepare_to_wait(&root
->fs_info
->transaction_wait
, &wait
,
1057 TASK_UNINTERRUPTIBLE
);
1058 if (trans
->commit_done
||
1059 (trans
->in_commit
&& !trans
->blocked
)) {
1060 finish_wait(&root
->fs_info
->transaction_wait
,
1065 finish_wait(&root
->fs_info
->transaction_wait
,
1071 * commit transactions asynchronously. once btrfs_commit_transaction_async
1072 * returns, any subsequent transaction will not be allowed to join.
1074 struct btrfs_async_commit
{
1075 struct btrfs_trans_handle
*newtrans
;
1076 struct btrfs_root
*root
;
1077 struct delayed_work work
;
1080 static void do_async_commit(struct work_struct
*work
)
1082 struct btrfs_async_commit
*ac
=
1083 container_of(work
, struct btrfs_async_commit
, work
.work
);
1085 btrfs_commit_transaction(ac
->newtrans
, ac
->root
);
1089 int btrfs_commit_transaction_async(struct btrfs_trans_handle
*trans
,
1090 struct btrfs_root
*root
,
1091 int wait_for_unblock
)
1093 struct btrfs_async_commit
*ac
;
1094 struct btrfs_transaction
*cur_trans
;
1096 ac
= kmalloc(sizeof(*ac
), GFP_NOFS
);
1100 INIT_DELAYED_WORK(&ac
->work
, do_async_commit
);
1102 ac
->newtrans
= btrfs_join_transaction(root
);
1103 if (IS_ERR(ac
->newtrans
)) {
1104 int err
= PTR_ERR(ac
->newtrans
);
1109 /* take transaction reference */
1110 cur_trans
= trans
->transaction
;
1111 atomic_inc(&cur_trans
->use_count
);
1113 btrfs_end_transaction(trans
, root
);
1114 schedule_delayed_work(&ac
->work
, 0);
1116 /* wait for transaction to start and unblock */
1117 if (wait_for_unblock
)
1118 wait_current_trans_commit_start_and_unblock(root
, cur_trans
);
1120 wait_current_trans_commit_start(root
, cur_trans
);
1121 put_transaction(cur_trans
);
1127 * btrfs_transaction state sequence:
1128 * in_commit = 0, blocked = 0 (initial)
1129 * in_commit = 1, blocked = 1
1133 int btrfs_commit_transaction(struct btrfs_trans_handle
*trans
,
1134 struct btrfs_root
*root
)
1136 unsigned long joined
= 0;
1137 struct btrfs_transaction
*cur_trans
;
1138 struct btrfs_transaction
*prev_trans
= NULL
;
1141 int should_grow
= 0;
1142 unsigned long now
= get_seconds();
1143 int flush_on_commit
= btrfs_test_opt(root
, FLUSHONCOMMIT
);
1145 btrfs_run_ordered_operations(root
, 0);
1147 /* make a pass through all the delayed refs we have so far
1148 * any runnings procs may add more while we are here
1150 ret
= btrfs_run_delayed_refs(trans
, root
, 0);
1153 btrfs_trans_release_metadata(trans
, root
);
1155 cur_trans
= trans
->transaction
;
1157 * set the flushing flag so procs in this transaction have to
1158 * start sending their work down.
1160 cur_trans
->delayed_refs
.flushing
= 1;
1162 ret
= btrfs_run_delayed_refs(trans
, root
, 0);
1165 spin_lock(&cur_trans
->commit_lock
);
1166 if (cur_trans
->in_commit
) {
1167 spin_unlock(&cur_trans
->commit_lock
);
1168 atomic_inc(&cur_trans
->use_count
);
1169 btrfs_end_transaction(trans
, root
);
1171 ret
= wait_for_commit(root
, cur_trans
);
1174 put_transaction(cur_trans
);
1179 trans
->transaction
->in_commit
= 1;
1180 trans
->transaction
->blocked
= 1;
1181 spin_unlock(&cur_trans
->commit_lock
);
1182 wake_up(&root
->fs_info
->transaction_blocked_wait
);
1184 spin_lock(&root
->fs_info
->trans_lock
);
1185 if (cur_trans
->list
.prev
!= &root
->fs_info
->trans_list
) {
1186 prev_trans
= list_entry(cur_trans
->list
.prev
,
1187 struct btrfs_transaction
, list
);
1188 if (!prev_trans
->commit_done
) {
1189 atomic_inc(&prev_trans
->use_count
);
1190 spin_unlock(&root
->fs_info
->trans_lock
);
1192 wait_for_commit(root
, prev_trans
);
1194 put_transaction(prev_trans
);
1196 spin_unlock(&root
->fs_info
->trans_lock
);
1199 spin_unlock(&root
->fs_info
->trans_lock
);
1202 if (now
< cur_trans
->start_time
|| now
- cur_trans
->start_time
< 1)
1206 int snap_pending
= 0;
1208 joined
= cur_trans
->num_joined
;
1209 if (!list_empty(&trans
->transaction
->pending_snapshots
))
1212 WARN_ON(cur_trans
!= trans
->transaction
);
1214 if (flush_on_commit
|| snap_pending
) {
1215 btrfs_start_delalloc_inodes(root
, 1);
1216 ret
= btrfs_wait_ordered_extents(root
, 0, 1);
1220 ret
= btrfs_run_delayed_items(trans
, root
);
1224 * rename don't use btrfs_join_transaction, so, once we
1225 * set the transaction to blocked above, we aren't going
1226 * to get any new ordered operations. We can safely run
1227 * it here and no for sure that nothing new will be added
1230 btrfs_run_ordered_operations(root
, 1);
1232 prepare_to_wait(&cur_trans
->writer_wait
, &wait
,
1233 TASK_UNINTERRUPTIBLE
);
1235 if (atomic_read(&cur_trans
->num_writers
) > 1)
1236 schedule_timeout(MAX_SCHEDULE_TIMEOUT
);
1237 else if (should_grow
)
1238 schedule_timeout(1);
1240 finish_wait(&cur_trans
->writer_wait
, &wait
);
1241 spin_lock(&root
->fs_info
->trans_lock
);
1242 root
->fs_info
->trans_no_join
= 1;
1243 spin_unlock(&root
->fs_info
->trans_lock
);
1244 } while (atomic_read(&cur_trans
->num_writers
) > 1 ||
1245 (should_grow
&& cur_trans
->num_joined
!= joined
));
1247 ret
= create_pending_snapshots(trans
, root
->fs_info
);
1250 ret
= btrfs_run_delayed_items(trans
, root
);
1253 ret
= btrfs_run_delayed_refs(trans
, root
, (unsigned long)-1);
1256 WARN_ON(cur_trans
!= trans
->transaction
);
1258 btrfs_scrub_pause(root
);
1259 /* btrfs_commit_tree_roots is responsible for getting the
1260 * various roots consistent with each other. Every pointer
1261 * in the tree of tree roots has to point to the most up to date
1262 * root for every subvolume and other tree. So, we have to keep
1263 * the tree logging code from jumping in and changing any
1266 * At this point in the commit, there can't be any tree-log
1267 * writers, but a little lower down we drop the trans mutex
1268 * and let new people in. By holding the tree_log_mutex
1269 * from now until after the super is written, we avoid races
1270 * with the tree-log code.
1272 mutex_lock(&root
->fs_info
->tree_log_mutex
);
1274 ret
= commit_fs_roots(trans
, root
);
1277 /* commit_fs_roots gets rid of all the tree log roots, it is now
1278 * safe to free the root of tree log roots
1280 btrfs_free_log_root_tree(trans
, root
->fs_info
);
1282 ret
= commit_cowonly_roots(trans
, root
);
1285 btrfs_prepare_extent_commit(trans
, root
);
1287 cur_trans
= root
->fs_info
->running_transaction
;
1289 btrfs_set_root_node(&root
->fs_info
->tree_root
->root_item
,
1290 root
->fs_info
->tree_root
->node
);
1291 switch_commit_root(root
->fs_info
->tree_root
);
1293 btrfs_set_root_node(&root
->fs_info
->chunk_root
->root_item
,
1294 root
->fs_info
->chunk_root
->node
);
1295 switch_commit_root(root
->fs_info
->chunk_root
);
1297 update_super_roots(root
);
1299 if (!root
->fs_info
->log_root_recovering
) {
1300 btrfs_set_super_log_root(&root
->fs_info
->super_copy
, 0);
1301 btrfs_set_super_log_root_level(&root
->fs_info
->super_copy
, 0);
1304 memcpy(&root
->fs_info
->super_for_commit
, &root
->fs_info
->super_copy
,
1305 sizeof(root
->fs_info
->super_copy
));
1307 trans
->transaction
->blocked
= 0;
1308 spin_lock(&root
->fs_info
->trans_lock
);
1309 root
->fs_info
->running_transaction
= NULL
;
1310 root
->fs_info
->trans_no_join
= 0;
1311 spin_unlock(&root
->fs_info
->trans_lock
);
1313 wake_up(&root
->fs_info
->transaction_wait
);
1315 ret
= btrfs_write_and_wait_transaction(trans
, root
);
1317 write_ctree_super(trans
, root
, 0);
1320 * the super is written, we can safely allow the tree-loggers
1321 * to go about their business
1323 mutex_unlock(&root
->fs_info
->tree_log_mutex
);
1325 btrfs_finish_extent_commit(trans
, root
);
1327 cur_trans
->commit_done
= 1;
1329 root
->fs_info
->last_trans_committed
= cur_trans
->transid
;
1331 wake_up(&cur_trans
->commit_wait
);
1333 spin_lock(&root
->fs_info
->trans_lock
);
1334 list_del_init(&cur_trans
->list
);
1335 spin_unlock(&root
->fs_info
->trans_lock
);
1337 put_transaction(cur_trans
);
1338 put_transaction(cur_trans
);
1340 trace_btrfs_transaction_commit(root
);
1342 btrfs_scrub_continue(root
);
1344 if (current
->journal_info
== trans
)
1345 current
->journal_info
= NULL
;
1347 kmem_cache_free(btrfs_trans_handle_cachep
, trans
);
1349 if (current
!= root
->fs_info
->transaction_kthread
)
1350 btrfs_run_delayed_iputs(root
);
1356 * interface function to delete all the snapshots we have scheduled for deletion
1358 int btrfs_clean_old_snapshots(struct btrfs_root
*root
)
1361 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
1363 spin_lock(&fs_info
->trans_lock
);
1364 list_splice_init(&fs_info
->dead_roots
, &list
);
1365 spin_unlock(&fs_info
->trans_lock
);
1367 while (!list_empty(&list
)) {
1368 root
= list_entry(list
.next
, struct btrfs_root
, root_list
);
1369 list_del(&root
->root_list
);
1371 btrfs_kill_all_delayed_nodes(root
);
1373 if (btrfs_header_backref_rev(root
->node
) <
1374 BTRFS_MIXED_BACKREF_REV
)
1375 btrfs_drop_snapshot(root
, NULL
, 0);
1377 btrfs_drop_snapshot(root
, NULL
, 1);