2 * Copyright (C) 2007,2008 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <linux/sched.h>
22 #include "transaction.h"
23 #include "print-tree.h"
26 static int split_node(struct btrfs_trans_handle
*trans
, struct btrfs_root
27 *root
, struct btrfs_path
*path
, int level
);
28 static int split_leaf(struct btrfs_trans_handle
*trans
, struct btrfs_root
29 *root
, struct btrfs_key
*ins_key
,
30 struct btrfs_path
*path
, int data_size
, int extend
);
31 static int push_node_left(struct btrfs_trans_handle
*trans
,
32 struct btrfs_root
*root
, struct extent_buffer
*dst
,
33 struct extent_buffer
*src
, int empty
);
34 static int balance_node_right(struct btrfs_trans_handle
*trans
,
35 struct btrfs_root
*root
,
36 struct extent_buffer
*dst_buf
,
37 struct extent_buffer
*src_buf
);
38 static int del_ptr(struct btrfs_trans_handle
*trans
, struct btrfs_root
*root
,
39 struct btrfs_path
*path
, int level
, int slot
);
41 inline void btrfs_init_path(struct btrfs_path
*p
)
43 memset(p
, 0, sizeof(*p
));
46 struct btrfs_path
*btrfs_alloc_path(void)
48 struct btrfs_path
*path
;
49 path
= kmem_cache_alloc(btrfs_path_cachep
, GFP_NOFS
);
51 btrfs_init_path(path
);
57 /* this also releases the path */
58 void btrfs_free_path(struct btrfs_path
*p
)
60 btrfs_release_path(NULL
, p
);
61 kmem_cache_free(btrfs_path_cachep
, p
);
65 * path release drops references on the extent buffers in the path
66 * and it drops any locks held by this path
68 * It is safe to call this on paths that no locks or extent buffers held.
70 noinline
void btrfs_release_path(struct btrfs_root
*root
, struct btrfs_path
*p
)
74 for (i
= 0; i
< BTRFS_MAX_LEVEL
; i
++) {
79 btrfs_tree_unlock(p
->nodes
[i
]);
82 free_extent_buffer(p
->nodes
[i
]);
88 * safely gets a reference on the root node of a tree. A lock
89 * is not taken, so a concurrent writer may put a different node
90 * at the root of the tree. See btrfs_lock_root_node for the
93 * The extent buffer returned by this has a reference taken, so
94 * it won't disappear. It may stop being the root of the tree
95 * at any time because there are no locks held.
97 struct extent_buffer
*btrfs_root_node(struct btrfs_root
*root
)
99 struct extent_buffer
*eb
;
100 spin_lock(&root
->node_lock
);
102 extent_buffer_get(eb
);
103 spin_unlock(&root
->node_lock
);
107 /* loop around taking references on and locking the root node of the
108 * tree until you end up with a lock on the root. A locked buffer
109 * is returned, with a reference held.
111 struct extent_buffer
*btrfs_lock_root_node(struct btrfs_root
*root
)
113 struct extent_buffer
*eb
;
116 eb
= btrfs_root_node(root
);
119 spin_lock(&root
->node_lock
);
120 if (eb
== root
->node
) {
121 spin_unlock(&root
->node_lock
);
124 spin_unlock(&root
->node_lock
);
126 btrfs_tree_unlock(eb
);
127 free_extent_buffer(eb
);
132 /* cowonly root (everything not a reference counted cow subvolume), just get
133 * put onto a simple dirty list. transaction.c walks this to make sure they
134 * get properly updated on disk.
136 static void add_root_to_dirty_list(struct btrfs_root
*root
)
138 if (root
->track_dirty
&& list_empty(&root
->dirty_list
)) {
139 list_add(&root
->dirty_list
,
140 &root
->fs_info
->dirty_cowonly_roots
);
145 * used by snapshot creation to make a copy of a root for a tree with
146 * a given objectid. The buffer with the new root node is returned in
147 * cow_ret, and this func returns zero on success or a negative error code.
149 int btrfs_copy_root(struct btrfs_trans_handle
*trans
,
150 struct btrfs_root
*root
,
151 struct extent_buffer
*buf
,
152 struct extent_buffer
**cow_ret
, u64 new_root_objectid
)
154 struct extent_buffer
*cow
;
158 struct btrfs_root
*new_root
;
160 new_root
= kmalloc(sizeof(*new_root
), GFP_NOFS
);
164 memcpy(new_root
, root
, sizeof(*new_root
));
165 new_root
->root_key
.objectid
= new_root_objectid
;
167 WARN_ON(root
->ref_cows
&& trans
->transid
!=
168 root
->fs_info
->running_transaction
->transid
);
169 WARN_ON(root
->ref_cows
&& trans
->transid
!= root
->last_trans
);
171 level
= btrfs_header_level(buf
);
172 nritems
= btrfs_header_nritems(buf
);
174 cow
= btrfs_alloc_free_block(trans
, new_root
, buf
->len
, 0,
175 new_root_objectid
, trans
->transid
,
176 level
, buf
->start
, 0);
182 copy_extent_buffer(cow
, buf
, 0, 0, cow
->len
);
183 btrfs_set_header_bytenr(cow
, cow
->start
);
184 btrfs_set_header_generation(cow
, trans
->transid
);
185 btrfs_set_header_owner(cow
, new_root_objectid
);
186 btrfs_clear_header_flag(cow
, BTRFS_HEADER_FLAG_WRITTEN
);
188 write_extent_buffer(cow
, root
->fs_info
->fsid
,
189 (unsigned long)btrfs_header_fsid(cow
),
192 WARN_ON(btrfs_header_generation(buf
) > trans
->transid
);
193 ret
= btrfs_inc_ref(trans
, new_root
, buf
, cow
, NULL
);
199 btrfs_mark_buffer_dirty(cow
);
205 * does the dirty work in cow of a single block. The parent block (if
206 * supplied) is updated to point to the new cow copy. The new buffer is marked
207 * dirty and returned locked. If you modify the block it needs to be marked
210 * search_start -- an allocation hint for the new block
212 * empty_size -- a hint that you plan on doing more cow. This is the size in
213 * bytes the allocator should try to find free next to the block it returns.
214 * This is just a hint and may be ignored by the allocator.
216 * prealloc_dest -- if you have already reserved a destination for the cow,
217 * this uses that block instead of allocating a new one.
218 * btrfs_alloc_reserved_extent is used to finish the allocation.
220 static noinline
int __btrfs_cow_block(struct btrfs_trans_handle
*trans
,
221 struct btrfs_root
*root
,
222 struct extent_buffer
*buf
,
223 struct extent_buffer
*parent
, int parent_slot
,
224 struct extent_buffer
**cow_ret
,
225 u64 search_start
, u64 empty_size
,
229 struct extent_buffer
*cow
;
238 WARN_ON(!btrfs_tree_locked(buf
));
241 parent_start
= parent
->start
;
245 WARN_ON(root
->ref_cows
&& trans
->transid
!=
246 root
->fs_info
->running_transaction
->transid
);
247 WARN_ON(root
->ref_cows
&& trans
->transid
!= root
->last_trans
);
249 level
= btrfs_header_level(buf
);
250 nritems
= btrfs_header_nritems(buf
);
253 struct btrfs_key ins
;
255 ins
.objectid
= prealloc_dest
;
256 ins
.offset
= buf
->len
;
257 ins
.type
= BTRFS_EXTENT_ITEM_KEY
;
259 ret
= btrfs_alloc_reserved_extent(trans
, root
, parent_start
,
260 root
->root_key
.objectid
,
261 trans
->transid
, level
, &ins
);
263 cow
= btrfs_init_new_buffer(trans
, root
, prealloc_dest
,
266 cow
= btrfs_alloc_free_block(trans
, root
, buf
->len
,
268 root
->root_key
.objectid
,
269 trans
->transid
, level
,
270 search_start
, empty_size
);
275 copy_extent_buffer(cow
, buf
, 0, 0, cow
->len
);
276 btrfs_set_header_bytenr(cow
, cow
->start
);
277 btrfs_set_header_generation(cow
, trans
->transid
);
278 btrfs_set_header_owner(cow
, root
->root_key
.objectid
);
279 btrfs_clear_header_flag(cow
, BTRFS_HEADER_FLAG_WRITTEN
);
281 write_extent_buffer(cow
, root
->fs_info
->fsid
,
282 (unsigned long)btrfs_header_fsid(cow
),
285 WARN_ON(btrfs_header_generation(buf
) > trans
->transid
);
286 if (btrfs_header_generation(buf
) != trans
->transid
) {
288 ret
= btrfs_inc_ref(trans
, root
, buf
, cow
, &nr_extents
);
292 ret
= btrfs_cache_ref(trans
, root
, buf
, nr_extents
);
294 } else if (btrfs_header_owner(buf
) == BTRFS_TREE_RELOC_OBJECTID
) {
296 * There are only two places that can drop reference to
297 * tree blocks owned by living reloc trees, one is here,
298 * the other place is btrfs_drop_subtree. In both places,
299 * we check reference count while tree block is locked.
300 * Furthermore, if reference count is one, it won't get
301 * increased by someone else.
304 ret
= btrfs_lookup_extent_ref(trans
, root
, buf
->start
,
308 ret
= btrfs_update_ref(trans
, root
, buf
, cow
,
310 clean_tree_block(trans
, root
, buf
);
312 ret
= btrfs_inc_ref(trans
, root
, buf
, cow
, NULL
);
316 ret
= btrfs_update_ref(trans
, root
, buf
, cow
, 0, nritems
);
319 clean_tree_block(trans
, root
, buf
);
322 if (root
->root_key
.objectid
== BTRFS_TREE_RELOC_OBJECTID
) {
323 ret
= btrfs_reloc_tree_cache_ref(trans
, root
, cow
, buf
->start
);
327 if (buf
== root
->node
) {
328 WARN_ON(parent
&& parent
!= buf
);
330 spin_lock(&root
->node_lock
);
332 extent_buffer_get(cow
);
333 spin_unlock(&root
->node_lock
);
335 if (buf
!= root
->commit_root
) {
336 btrfs_free_extent(trans
, root
, buf
->start
,
337 buf
->len
, buf
->start
,
338 root
->root_key
.objectid
,
339 btrfs_header_generation(buf
),
342 free_extent_buffer(buf
);
343 add_root_to_dirty_list(root
);
345 btrfs_set_node_blockptr(parent
, parent_slot
,
347 WARN_ON(trans
->transid
== 0);
348 btrfs_set_node_ptr_generation(parent
, parent_slot
,
350 btrfs_mark_buffer_dirty(parent
);
351 WARN_ON(btrfs_header_generation(parent
) != trans
->transid
);
352 btrfs_free_extent(trans
, root
, buf
->start
, buf
->len
,
353 parent_start
, btrfs_header_owner(parent
),
354 btrfs_header_generation(parent
), level
, 1);
357 btrfs_tree_unlock(buf
);
358 free_extent_buffer(buf
);
359 btrfs_mark_buffer_dirty(cow
);
365 * cows a single block, see __btrfs_cow_block for the real work.
366 * This version of it has extra checks so that a block isn't cow'd more than
367 * once per transaction, as long as it hasn't been written yet
369 noinline
int btrfs_cow_block(struct btrfs_trans_handle
*trans
,
370 struct btrfs_root
*root
, struct extent_buffer
*buf
,
371 struct extent_buffer
*parent
, int parent_slot
,
372 struct extent_buffer
**cow_ret
, u64 prealloc_dest
)
377 if (trans
->transaction
!= root
->fs_info
->running_transaction
) {
378 printk(KERN_CRIT
"trans %llu running %llu\n",
379 (unsigned long long)trans
->transid
,
381 root
->fs_info
->running_transaction
->transid
);
384 if (trans
->transid
!= root
->fs_info
->generation
) {
385 printk(KERN_CRIT
"trans %llu running %llu\n",
386 (unsigned long long)trans
->transid
,
387 (unsigned long long)root
->fs_info
->generation
);
391 spin_lock(&root
->fs_info
->hash_lock
);
392 if (btrfs_header_generation(buf
) == trans
->transid
&&
393 btrfs_header_owner(buf
) == root
->root_key
.objectid
&&
394 !btrfs_header_flag(buf
, BTRFS_HEADER_FLAG_WRITTEN
)) {
396 spin_unlock(&root
->fs_info
->hash_lock
);
397 WARN_ON(prealloc_dest
);
400 spin_unlock(&root
->fs_info
->hash_lock
);
401 search_start
= buf
->start
& ~((u64
)(1024 * 1024 * 1024) - 1);
402 ret
= __btrfs_cow_block(trans
, root
, buf
, parent
,
403 parent_slot
, cow_ret
, search_start
, 0,
409 * helper function for defrag to decide if two blocks pointed to by a
410 * node are actually close by
412 static int close_blocks(u64 blocknr
, u64 other
, u32 blocksize
)
414 if (blocknr
< other
&& other
- (blocknr
+ blocksize
) < 32768)
416 if (blocknr
> other
&& blocknr
- (other
+ blocksize
) < 32768)
422 * compare two keys in a memcmp fashion
424 static int comp_keys(struct btrfs_disk_key
*disk
, struct btrfs_key
*k2
)
428 btrfs_disk_key_to_cpu(&k1
, disk
);
430 if (k1
.objectid
> k2
->objectid
)
432 if (k1
.objectid
< k2
->objectid
)
434 if (k1
.type
> k2
->type
)
436 if (k1
.type
< k2
->type
)
438 if (k1
.offset
> k2
->offset
)
440 if (k1
.offset
< k2
->offset
)
446 * same as comp_keys only with two btrfs_key's
448 static int comp_cpu_keys(struct btrfs_key
*k1
, struct btrfs_key
*k2
)
450 if (k1
->objectid
> k2
->objectid
)
452 if (k1
->objectid
< k2
->objectid
)
454 if (k1
->type
> k2
->type
)
456 if (k1
->type
< k2
->type
)
458 if (k1
->offset
> k2
->offset
)
460 if (k1
->offset
< k2
->offset
)
466 * this is used by the defrag code to go through all the
467 * leaves pointed to by a node and reallocate them so that
468 * disk order is close to key order
470 int btrfs_realloc_node(struct btrfs_trans_handle
*trans
,
471 struct btrfs_root
*root
, struct extent_buffer
*parent
,
472 int start_slot
, int cache_only
, u64
*last_ret
,
473 struct btrfs_key
*progress
)
475 struct extent_buffer
*cur
;
478 u64 search_start
= *last_ret
;
488 int progress_passed
= 0;
489 struct btrfs_disk_key disk_key
;
491 parent_level
= btrfs_header_level(parent
);
492 if (cache_only
&& parent_level
!= 1)
495 if (trans
->transaction
!= root
->fs_info
->running_transaction
)
497 if (trans
->transid
!= root
->fs_info
->generation
)
500 parent_nritems
= btrfs_header_nritems(parent
);
501 blocksize
= btrfs_level_size(root
, parent_level
- 1);
502 end_slot
= parent_nritems
;
504 if (parent_nritems
== 1)
507 for (i
= start_slot
; i
< end_slot
; i
++) {
510 if (!parent
->map_token
) {
511 map_extent_buffer(parent
,
512 btrfs_node_key_ptr_offset(i
),
513 sizeof(struct btrfs_key_ptr
),
514 &parent
->map_token
, &parent
->kaddr
,
515 &parent
->map_start
, &parent
->map_len
,
518 btrfs_node_key(parent
, &disk_key
, i
);
519 if (!progress_passed
&& comp_keys(&disk_key
, progress
) < 0)
523 blocknr
= btrfs_node_blockptr(parent
, i
);
524 gen
= btrfs_node_ptr_generation(parent
, i
);
526 last_block
= blocknr
;
529 other
= btrfs_node_blockptr(parent
, i
- 1);
530 close
= close_blocks(blocknr
, other
, blocksize
);
532 if (!close
&& i
< end_slot
- 2) {
533 other
= btrfs_node_blockptr(parent
, i
+ 1);
534 close
= close_blocks(blocknr
, other
, blocksize
);
537 last_block
= blocknr
;
540 if (parent
->map_token
) {
541 unmap_extent_buffer(parent
, parent
->map_token
,
543 parent
->map_token
= NULL
;
546 cur
= btrfs_find_tree_block(root
, blocknr
, blocksize
);
548 uptodate
= btrfs_buffer_uptodate(cur
, gen
);
551 if (!cur
|| !uptodate
) {
553 free_extent_buffer(cur
);
557 cur
= read_tree_block(root
, blocknr
,
559 } else if (!uptodate
) {
560 btrfs_read_buffer(cur
, gen
);
563 if (search_start
== 0)
564 search_start
= last_block
;
566 btrfs_tree_lock(cur
);
567 err
= __btrfs_cow_block(trans
, root
, cur
, parent
, i
,
570 (end_slot
- i
) * blocksize
), 0);
572 btrfs_tree_unlock(cur
);
573 free_extent_buffer(cur
);
576 search_start
= cur
->start
;
577 last_block
= cur
->start
;
578 *last_ret
= search_start
;
579 btrfs_tree_unlock(cur
);
580 free_extent_buffer(cur
);
582 if (parent
->map_token
) {
583 unmap_extent_buffer(parent
, parent
->map_token
,
585 parent
->map_token
= NULL
;
591 * The leaf data grows from end-to-front in the node.
592 * this returns the address of the start of the last item,
593 * which is the stop of the leaf data stack
595 static inline unsigned int leaf_data_end(struct btrfs_root
*root
,
596 struct extent_buffer
*leaf
)
598 u32 nr
= btrfs_header_nritems(leaf
);
600 return BTRFS_LEAF_DATA_SIZE(root
);
601 return btrfs_item_offset_nr(leaf
, nr
- 1);
605 * extra debugging checks to make sure all the items in a key are
606 * well formed and in the proper order
608 static int check_node(struct btrfs_root
*root
, struct btrfs_path
*path
,
611 struct extent_buffer
*parent
= NULL
;
612 struct extent_buffer
*node
= path
->nodes
[level
];
613 struct btrfs_disk_key parent_key
;
614 struct btrfs_disk_key node_key
;
617 struct btrfs_key cpukey
;
618 u32 nritems
= btrfs_header_nritems(node
);
620 if (path
->nodes
[level
+ 1])
621 parent
= path
->nodes
[level
+ 1];
623 slot
= path
->slots
[level
];
624 BUG_ON(nritems
== 0);
626 parent_slot
= path
->slots
[level
+ 1];
627 btrfs_node_key(parent
, &parent_key
, parent_slot
);
628 btrfs_node_key(node
, &node_key
, 0);
629 BUG_ON(memcmp(&parent_key
, &node_key
,
630 sizeof(struct btrfs_disk_key
)));
631 BUG_ON(btrfs_node_blockptr(parent
, parent_slot
) !=
632 btrfs_header_bytenr(node
));
634 BUG_ON(nritems
> BTRFS_NODEPTRS_PER_BLOCK(root
));
636 btrfs_node_key_to_cpu(node
, &cpukey
, slot
- 1);
637 btrfs_node_key(node
, &node_key
, slot
);
638 BUG_ON(comp_keys(&node_key
, &cpukey
) <= 0);
640 if (slot
< nritems
- 1) {
641 btrfs_node_key_to_cpu(node
, &cpukey
, slot
+ 1);
642 btrfs_node_key(node
, &node_key
, slot
);
643 BUG_ON(comp_keys(&node_key
, &cpukey
) >= 0);
649 * extra checking to make sure all the items in a leaf are
650 * well formed and in the proper order
652 static int check_leaf(struct btrfs_root
*root
, struct btrfs_path
*path
,
655 struct extent_buffer
*leaf
= path
->nodes
[level
];
656 struct extent_buffer
*parent
= NULL
;
658 struct btrfs_key cpukey
;
659 struct btrfs_disk_key parent_key
;
660 struct btrfs_disk_key leaf_key
;
661 int slot
= path
->slots
[0];
663 u32 nritems
= btrfs_header_nritems(leaf
);
665 if (path
->nodes
[level
+ 1])
666 parent
= path
->nodes
[level
+ 1];
672 parent_slot
= path
->slots
[level
+ 1];
673 btrfs_node_key(parent
, &parent_key
, parent_slot
);
674 btrfs_item_key(leaf
, &leaf_key
, 0);
676 BUG_ON(memcmp(&parent_key
, &leaf_key
,
677 sizeof(struct btrfs_disk_key
)));
678 BUG_ON(btrfs_node_blockptr(parent
, parent_slot
) !=
679 btrfs_header_bytenr(leaf
));
681 if (slot
!= 0 && slot
< nritems
- 1) {
682 btrfs_item_key(leaf
, &leaf_key
, slot
);
683 btrfs_item_key_to_cpu(leaf
, &cpukey
, slot
- 1);
684 if (comp_keys(&leaf_key
, &cpukey
) <= 0) {
685 btrfs_print_leaf(root
, leaf
);
686 printk(KERN_CRIT
"slot %d offset bad key\n", slot
);
689 if (btrfs_item_offset_nr(leaf
, slot
- 1) !=
690 btrfs_item_end_nr(leaf
, slot
)) {
691 btrfs_print_leaf(root
, leaf
);
692 printk(KERN_CRIT
"slot %d offset bad\n", slot
);
696 if (slot
< nritems
- 1) {
697 btrfs_item_key(leaf
, &leaf_key
, slot
);
698 btrfs_item_key_to_cpu(leaf
, &cpukey
, slot
+ 1);
699 BUG_ON(comp_keys(&leaf_key
, &cpukey
) >= 0);
700 if (btrfs_item_offset_nr(leaf
, slot
) !=
701 btrfs_item_end_nr(leaf
, slot
+ 1)) {
702 btrfs_print_leaf(root
, leaf
);
703 printk(KERN_CRIT
"slot %d offset bad\n", slot
);
707 BUG_ON(btrfs_item_offset_nr(leaf
, 0) +
708 btrfs_item_size_nr(leaf
, 0) != BTRFS_LEAF_DATA_SIZE(root
));
712 static noinline
int check_block(struct btrfs_root
*root
,
713 struct btrfs_path
*path
, int level
)
717 return check_leaf(root
, path
, level
);
718 return check_node(root
, path
, level
);
722 * search for key in the extent_buffer. The items start at offset p,
723 * and they are item_size apart. There are 'max' items in p.
725 * the slot in the array is returned via slot, and it points to
726 * the place where you would insert key if it is not found in
729 * slot may point to max if the key is bigger than all of the keys
731 static noinline
int generic_bin_search(struct extent_buffer
*eb
,
733 int item_size
, struct btrfs_key
*key
,
740 struct btrfs_disk_key
*tmp
= NULL
;
741 struct btrfs_disk_key unaligned
;
742 unsigned long offset
;
743 char *map_token
= NULL
;
745 unsigned long map_start
= 0;
746 unsigned long map_len
= 0;
750 mid
= (low
+ high
) / 2;
751 offset
= p
+ mid
* item_size
;
753 if (!map_token
|| offset
< map_start
||
754 (offset
+ sizeof(struct btrfs_disk_key
)) >
755 map_start
+ map_len
) {
757 unmap_extent_buffer(eb
, map_token
, KM_USER0
);
761 err
= map_private_extent_buffer(eb
, offset
,
762 sizeof(struct btrfs_disk_key
),
764 &map_start
, &map_len
, KM_USER0
);
767 tmp
= (struct btrfs_disk_key
*)(kaddr
+ offset
-
770 read_extent_buffer(eb
, &unaligned
,
771 offset
, sizeof(unaligned
));
776 tmp
= (struct btrfs_disk_key
*)(kaddr
+ offset
-
779 ret
= comp_keys(tmp
, key
);
788 unmap_extent_buffer(eb
, map_token
, KM_USER0
);
794 unmap_extent_buffer(eb
, map_token
, KM_USER0
);
799 * simple bin_search frontend that does the right thing for
802 static int bin_search(struct extent_buffer
*eb
, struct btrfs_key
*key
,
803 int level
, int *slot
)
806 return generic_bin_search(eb
,
807 offsetof(struct btrfs_leaf
, items
),
808 sizeof(struct btrfs_item
),
809 key
, btrfs_header_nritems(eb
),
812 return generic_bin_search(eb
,
813 offsetof(struct btrfs_node
, ptrs
),
814 sizeof(struct btrfs_key_ptr
),
815 key
, btrfs_header_nritems(eb
),
821 /* given a node and slot number, this reads the blocks it points to. The
822 * extent buffer is returned with a reference taken (but unlocked).
823 * NULL is returned on error.
825 static noinline
struct extent_buffer
*read_node_slot(struct btrfs_root
*root
,
826 struct extent_buffer
*parent
, int slot
)
828 int level
= btrfs_header_level(parent
);
831 if (slot
>= btrfs_header_nritems(parent
))
836 return read_tree_block(root
, btrfs_node_blockptr(parent
, slot
),
837 btrfs_level_size(root
, level
- 1),
838 btrfs_node_ptr_generation(parent
, slot
));
842 * node level balancing, used to make sure nodes are in proper order for
843 * item deletion. We balance from the top down, so we have to make sure
844 * that a deletion won't leave an node completely empty later on.
846 static noinline
int balance_level(struct btrfs_trans_handle
*trans
,
847 struct btrfs_root
*root
,
848 struct btrfs_path
*path
, int level
)
850 struct extent_buffer
*right
= NULL
;
851 struct extent_buffer
*mid
;
852 struct extent_buffer
*left
= NULL
;
853 struct extent_buffer
*parent
= NULL
;
857 int orig_slot
= path
->slots
[level
];
858 int err_on_enospc
= 0;
864 mid
= path
->nodes
[level
];
865 WARN_ON(!path
->locks
[level
]);
866 WARN_ON(btrfs_header_generation(mid
) != trans
->transid
);
868 orig_ptr
= btrfs_node_blockptr(mid
, orig_slot
);
870 if (level
< BTRFS_MAX_LEVEL
- 1)
871 parent
= path
->nodes
[level
+ 1];
872 pslot
= path
->slots
[level
+ 1];
875 * deal with the case where there is only one pointer in the root
876 * by promoting the node below to a root
879 struct extent_buffer
*child
;
881 if (btrfs_header_nritems(mid
) != 1)
884 /* promote the child to a root */
885 child
= read_node_slot(root
, mid
, 0);
886 btrfs_tree_lock(child
);
888 ret
= btrfs_cow_block(trans
, root
, child
, mid
, 0, &child
, 0);
891 spin_lock(&root
->node_lock
);
893 spin_unlock(&root
->node_lock
);
895 ret
= btrfs_update_extent_ref(trans
, root
, child
->start
,
896 mid
->start
, child
->start
,
897 root
->root_key
.objectid
,
898 trans
->transid
, level
- 1);
901 add_root_to_dirty_list(root
);
902 btrfs_tree_unlock(child
);
903 path
->locks
[level
] = 0;
904 path
->nodes
[level
] = NULL
;
905 clean_tree_block(trans
, root
, mid
);
906 btrfs_tree_unlock(mid
);
907 /* once for the path */
908 free_extent_buffer(mid
);
909 ret
= btrfs_free_extent(trans
, root
, mid
->start
, mid
->len
,
910 mid
->start
, root
->root_key
.objectid
,
911 btrfs_header_generation(mid
),
913 /* once for the root ptr */
914 free_extent_buffer(mid
);
917 if (btrfs_header_nritems(mid
) >
918 BTRFS_NODEPTRS_PER_BLOCK(root
) / 4)
921 if (btrfs_header_nritems(mid
) < 2)
924 left
= read_node_slot(root
, parent
, pslot
- 1);
926 btrfs_tree_lock(left
);
927 wret
= btrfs_cow_block(trans
, root
, left
,
928 parent
, pslot
- 1, &left
, 0);
934 right
= read_node_slot(root
, parent
, pslot
+ 1);
936 btrfs_tree_lock(right
);
937 wret
= btrfs_cow_block(trans
, root
, right
,
938 parent
, pslot
+ 1, &right
, 0);
945 /* first, try to make some room in the middle buffer */
947 orig_slot
+= btrfs_header_nritems(left
);
948 wret
= push_node_left(trans
, root
, left
, mid
, 1);
951 if (btrfs_header_nritems(mid
) < 2)
956 * then try to empty the right most buffer into the middle
959 wret
= push_node_left(trans
, root
, mid
, right
, 1);
960 if (wret
< 0 && wret
!= -ENOSPC
)
962 if (btrfs_header_nritems(right
) == 0) {
963 u64 bytenr
= right
->start
;
964 u64 generation
= btrfs_header_generation(parent
);
965 u32 blocksize
= right
->len
;
967 clean_tree_block(trans
, root
, right
);
968 btrfs_tree_unlock(right
);
969 free_extent_buffer(right
);
971 wret
= del_ptr(trans
, root
, path
, level
+ 1, pslot
+
975 wret
= btrfs_free_extent(trans
, root
, bytenr
,
976 blocksize
, parent
->start
,
977 btrfs_header_owner(parent
),
978 generation
, level
, 1);
982 struct btrfs_disk_key right_key
;
983 btrfs_node_key(right
, &right_key
, 0);
984 btrfs_set_node_key(parent
, &right_key
, pslot
+ 1);
985 btrfs_mark_buffer_dirty(parent
);
988 if (btrfs_header_nritems(mid
) == 1) {
990 * we're not allowed to leave a node with one item in the
991 * tree during a delete. A deletion from lower in the tree
992 * could try to delete the only pointer in this node.
993 * So, pull some keys from the left.
994 * There has to be a left pointer at this point because
995 * otherwise we would have pulled some pointers from the
999 wret
= balance_node_right(trans
, root
, mid
, left
);
1005 wret
= push_node_left(trans
, root
, left
, mid
, 1);
1011 if (btrfs_header_nritems(mid
) == 0) {
1012 /* we've managed to empty the middle node, drop it */
1013 u64 root_gen
= btrfs_header_generation(parent
);
1014 u64 bytenr
= mid
->start
;
1015 u32 blocksize
= mid
->len
;
1017 clean_tree_block(trans
, root
, mid
);
1018 btrfs_tree_unlock(mid
);
1019 free_extent_buffer(mid
);
1021 wret
= del_ptr(trans
, root
, path
, level
+ 1, pslot
);
1024 wret
= btrfs_free_extent(trans
, root
, bytenr
, blocksize
,
1026 btrfs_header_owner(parent
),
1027 root_gen
, level
, 1);
1031 /* update the parent key to reflect our changes */
1032 struct btrfs_disk_key mid_key
;
1033 btrfs_node_key(mid
, &mid_key
, 0);
1034 btrfs_set_node_key(parent
, &mid_key
, pslot
);
1035 btrfs_mark_buffer_dirty(parent
);
1038 /* update the path */
1040 if (btrfs_header_nritems(left
) > orig_slot
) {
1041 extent_buffer_get(left
);
1042 /* left was locked after cow */
1043 path
->nodes
[level
] = left
;
1044 path
->slots
[level
+ 1] -= 1;
1045 path
->slots
[level
] = orig_slot
;
1047 btrfs_tree_unlock(mid
);
1048 free_extent_buffer(mid
);
1051 orig_slot
-= btrfs_header_nritems(left
);
1052 path
->slots
[level
] = orig_slot
;
1055 /* double check we haven't messed things up */
1056 check_block(root
, path
, level
);
1058 btrfs_node_blockptr(path
->nodes
[level
], path
->slots
[level
]))
1062 btrfs_tree_unlock(right
);
1063 free_extent_buffer(right
);
1066 if (path
->nodes
[level
] != left
)
1067 btrfs_tree_unlock(left
);
1068 free_extent_buffer(left
);
1073 /* Node balancing for insertion. Here we only split or push nodes around
1074 * when they are completely full. This is also done top down, so we
1075 * have to be pessimistic.
1077 static noinline
int push_nodes_for_insert(struct btrfs_trans_handle
*trans
,
1078 struct btrfs_root
*root
,
1079 struct btrfs_path
*path
, int level
)
1081 struct extent_buffer
*right
= NULL
;
1082 struct extent_buffer
*mid
;
1083 struct extent_buffer
*left
= NULL
;
1084 struct extent_buffer
*parent
= NULL
;
1088 int orig_slot
= path
->slots
[level
];
1094 mid
= path
->nodes
[level
];
1095 WARN_ON(btrfs_header_generation(mid
) != trans
->transid
);
1096 orig_ptr
= btrfs_node_blockptr(mid
, orig_slot
);
1098 if (level
< BTRFS_MAX_LEVEL
- 1)
1099 parent
= path
->nodes
[level
+ 1];
1100 pslot
= path
->slots
[level
+ 1];
1105 left
= read_node_slot(root
, parent
, pslot
- 1);
1107 /* first, try to make some room in the middle buffer */
1111 btrfs_tree_lock(left
);
1112 left_nr
= btrfs_header_nritems(left
);
1113 if (left_nr
>= BTRFS_NODEPTRS_PER_BLOCK(root
) - 1) {
1116 ret
= btrfs_cow_block(trans
, root
, left
, parent
,
1117 pslot
- 1, &left
, 0);
1121 wret
= push_node_left(trans
, root
,
1128 struct btrfs_disk_key disk_key
;
1129 orig_slot
+= left_nr
;
1130 btrfs_node_key(mid
, &disk_key
, 0);
1131 btrfs_set_node_key(parent
, &disk_key
, pslot
);
1132 btrfs_mark_buffer_dirty(parent
);
1133 if (btrfs_header_nritems(left
) > orig_slot
) {
1134 path
->nodes
[level
] = left
;
1135 path
->slots
[level
+ 1] -= 1;
1136 path
->slots
[level
] = orig_slot
;
1137 btrfs_tree_unlock(mid
);
1138 free_extent_buffer(mid
);
1141 btrfs_header_nritems(left
);
1142 path
->slots
[level
] = orig_slot
;
1143 btrfs_tree_unlock(left
);
1144 free_extent_buffer(left
);
1148 btrfs_tree_unlock(left
);
1149 free_extent_buffer(left
);
1151 right
= read_node_slot(root
, parent
, pslot
+ 1);
1154 * then try to empty the right most buffer into the middle
1158 btrfs_tree_lock(right
);
1159 right_nr
= btrfs_header_nritems(right
);
1160 if (right_nr
>= BTRFS_NODEPTRS_PER_BLOCK(root
) - 1) {
1163 ret
= btrfs_cow_block(trans
, root
, right
,
1169 wret
= balance_node_right(trans
, root
,
1176 struct btrfs_disk_key disk_key
;
1178 btrfs_node_key(right
, &disk_key
, 0);
1179 btrfs_set_node_key(parent
, &disk_key
, pslot
+ 1);
1180 btrfs_mark_buffer_dirty(parent
);
1182 if (btrfs_header_nritems(mid
) <= orig_slot
) {
1183 path
->nodes
[level
] = right
;
1184 path
->slots
[level
+ 1] += 1;
1185 path
->slots
[level
] = orig_slot
-
1186 btrfs_header_nritems(mid
);
1187 btrfs_tree_unlock(mid
);
1188 free_extent_buffer(mid
);
1190 btrfs_tree_unlock(right
);
1191 free_extent_buffer(right
);
1195 btrfs_tree_unlock(right
);
1196 free_extent_buffer(right
);
1202 * readahead one full node of leaves, finding things that are close
1203 * to the block in 'slot', and triggering ra on them.
1205 static noinline
void reada_for_search(struct btrfs_root
*root
,
1206 struct btrfs_path
*path
,
1207 int level
, int slot
, u64 objectid
)
1209 struct extent_buffer
*node
;
1210 struct btrfs_disk_key disk_key
;
1216 int direction
= path
->reada
;
1217 struct extent_buffer
*eb
;
1225 if (!path
->nodes
[level
])
1228 node
= path
->nodes
[level
];
1230 search
= btrfs_node_blockptr(node
, slot
);
1231 blocksize
= btrfs_level_size(root
, level
- 1);
1232 eb
= btrfs_find_tree_block(root
, search
, blocksize
);
1234 free_extent_buffer(eb
);
1238 highest_read
= search
;
1239 lowest_read
= search
;
1241 nritems
= btrfs_header_nritems(node
);
1244 if (direction
< 0) {
1248 } else if (direction
> 0) {
1253 if (path
->reada
< 0 && objectid
) {
1254 btrfs_node_key(node
, &disk_key
, nr
);
1255 if (btrfs_disk_key_objectid(&disk_key
) != objectid
)
1258 search
= btrfs_node_blockptr(node
, nr
);
1259 if ((search
>= lowest_read
&& search
<= highest_read
) ||
1260 (search
< lowest_read
&& lowest_read
- search
<= 16384) ||
1261 (search
> highest_read
&& search
- highest_read
<= 16384)) {
1262 readahead_tree_block(root
, search
, blocksize
,
1263 btrfs_node_ptr_generation(node
, nr
));
1267 if (path
->reada
< 2 && (nread
> (64 * 1024) || nscan
> 32))
1270 if (nread
> (256 * 1024) || nscan
> 128)
1273 if (search
< lowest_read
)
1274 lowest_read
= search
;
1275 if (search
> highest_read
)
1276 highest_read
= search
;
1281 * when we walk down the tree, it is usually safe to unlock the higher layers
1282 * in the tree. The exceptions are when our path goes through slot 0, because
1283 * operations on the tree might require changing key pointers higher up in the
1286 * callers might also have set path->keep_locks, which tells this code to keep
1287 * the lock if the path points to the last slot in the block. This is part of
1288 * walking through the tree, and selecting the next slot in the higher block.
1290 * lowest_unlock sets the lowest level in the tree we're allowed to unlock. so
1291 * if lowest_unlock is 1, level 0 won't be unlocked
1293 static noinline
void unlock_up(struct btrfs_path
*path
, int level
,
1297 int skip_level
= level
;
1299 struct extent_buffer
*t
;
1301 for (i
= level
; i
< BTRFS_MAX_LEVEL
; i
++) {
1302 if (!path
->nodes
[i
])
1304 if (!path
->locks
[i
])
1306 if (!no_skips
&& path
->slots
[i
] == 0) {
1310 if (!no_skips
&& path
->keep_locks
) {
1313 nritems
= btrfs_header_nritems(t
);
1314 if (nritems
< 1 || path
->slots
[i
] >= nritems
- 1) {
1319 if (skip_level
< i
&& i
>= lowest_unlock
)
1323 if (i
>= lowest_unlock
&& i
> skip_level
&& path
->locks
[i
]) {
1324 btrfs_tree_unlock(t
);
1331 * look for key in the tree. path is filled in with nodes along the way
1332 * if key is found, we return zero and you can find the item in the leaf
1333 * level of the path (level 0)
1335 * If the key isn't found, the path points to the slot where it should
1336 * be inserted, and 1 is returned. If there are other errors during the
1337 * search a negative error number is returned.
1339 * if ins_len > 0, nodes and leaves will be split as we walk down the
1340 * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if
1343 int btrfs_search_slot(struct btrfs_trans_handle
*trans
, struct btrfs_root
1344 *root
, struct btrfs_key
*key
, struct btrfs_path
*p
, int
1347 struct extent_buffer
*b
;
1348 struct extent_buffer
*tmp
;
1352 int should_reada
= p
->reada
;
1353 int lowest_unlock
= 1;
1355 u8 lowest_level
= 0;
1358 struct btrfs_key prealloc_block
;
1360 lowest_level
= p
->lowest_level
;
1361 WARN_ON(lowest_level
&& ins_len
> 0);
1362 WARN_ON(p
->nodes
[0] != NULL
);
1367 prealloc_block
.objectid
= 0;
1370 if (p
->skip_locking
)
1371 b
= btrfs_root_node(root
);
1373 b
= btrfs_lock_root_node(root
);
1376 level
= btrfs_header_level(b
);
1379 * setup the path here so we can release it under lock
1380 * contention with the cow code
1382 p
->nodes
[level
] = b
;
1383 if (!p
->skip_locking
)
1384 p
->locks
[level
] = 1;
1389 /* is a cow on this block not required */
1390 spin_lock(&root
->fs_info
->hash_lock
);
1391 if (btrfs_header_generation(b
) == trans
->transid
&&
1392 btrfs_header_owner(b
) == root
->root_key
.objectid
&&
1393 !btrfs_header_flag(b
, BTRFS_HEADER_FLAG_WRITTEN
)) {
1394 spin_unlock(&root
->fs_info
->hash_lock
);
1397 spin_unlock(&root
->fs_info
->hash_lock
);
1399 /* ok, we have to cow, is our old prealloc the right
1402 if (prealloc_block
.objectid
&&
1403 prealloc_block
.offset
!= b
->len
) {
1404 btrfs_free_reserved_extent(root
,
1405 prealloc_block
.objectid
,
1406 prealloc_block
.offset
);
1407 prealloc_block
.objectid
= 0;
1411 * for higher level blocks, try not to allocate blocks
1412 * with the block and the parent locks held.
1414 if (level
> 1 && !prealloc_block
.objectid
&&
1415 btrfs_path_lock_waiting(p
, level
)) {
1417 u64 hint
= b
->start
;
1419 btrfs_release_path(root
, p
);
1420 ret
= btrfs_reserve_extent(trans
, root
,
1423 &prealloc_block
, 0);
1428 wret
= btrfs_cow_block(trans
, root
, b
,
1429 p
->nodes
[level
+ 1],
1430 p
->slots
[level
+ 1],
1431 &b
, prealloc_block
.objectid
);
1432 prealloc_block
.objectid
= 0;
1434 free_extent_buffer(b
);
1440 BUG_ON(!cow
&& ins_len
);
1441 if (level
!= btrfs_header_level(b
))
1443 level
= btrfs_header_level(b
);
1445 p
->nodes
[level
] = b
;
1446 if (!p
->skip_locking
)
1447 p
->locks
[level
] = 1;
1449 ret
= check_block(root
, p
, level
);
1455 ret
= bin_search(b
, key
, level
, &slot
);
1457 if (ret
&& slot
> 0)
1459 p
->slots
[level
] = slot
;
1460 if ((p
->search_for_split
|| ins_len
> 0) &&
1461 btrfs_header_nritems(b
) >=
1462 BTRFS_NODEPTRS_PER_BLOCK(root
) - 3) {
1463 int sret
= split_node(trans
, root
, p
, level
);
1469 b
= p
->nodes
[level
];
1470 slot
= p
->slots
[level
];
1471 } else if (ins_len
< 0) {
1472 int sret
= balance_level(trans
, root
, p
,
1478 b
= p
->nodes
[level
];
1480 btrfs_release_path(NULL
, p
);
1483 slot
= p
->slots
[level
];
1484 BUG_ON(btrfs_header_nritems(b
) == 1);
1486 unlock_up(p
, level
, lowest_unlock
);
1488 /* this is only true while dropping a snapshot */
1489 if (level
== lowest_level
) {
1494 blocknr
= btrfs_node_blockptr(b
, slot
);
1495 gen
= btrfs_node_ptr_generation(b
, slot
);
1496 blocksize
= btrfs_level_size(root
, level
- 1);
1498 tmp
= btrfs_find_tree_block(root
, blocknr
, blocksize
);
1499 if (tmp
&& btrfs_buffer_uptodate(tmp
, gen
)) {
1503 * reduce lock contention at high levels
1504 * of the btree by dropping locks before
1508 btrfs_release_path(NULL
, p
);
1510 free_extent_buffer(tmp
);
1512 reada_for_search(root
, p
,
1516 tmp
= read_tree_block(root
, blocknr
,
1519 free_extent_buffer(tmp
);
1523 free_extent_buffer(tmp
);
1525 reada_for_search(root
, p
,
1528 b
= read_node_slot(root
, b
, slot
);
1531 if (!p
->skip_locking
)
1534 p
->slots
[level
] = slot
;
1536 btrfs_leaf_free_space(root
, b
) < ins_len
) {
1537 int sret
= split_leaf(trans
, root
, key
,
1538 p
, ins_len
, ret
== 0);
1545 if (!p
->search_for_split
)
1546 unlock_up(p
, level
, lowest_unlock
);
1552 if (prealloc_block
.objectid
) {
1553 btrfs_free_reserved_extent(root
,
1554 prealloc_block
.objectid
,
1555 prealloc_block
.offset
);
1561 int btrfs_merge_path(struct btrfs_trans_handle
*trans
,
1562 struct btrfs_root
*root
,
1563 struct btrfs_key
*node_keys
,
1564 u64
*nodes
, int lowest_level
)
1566 struct extent_buffer
*eb
;
1567 struct extent_buffer
*parent
;
1568 struct btrfs_key key
;
1577 eb
= btrfs_lock_root_node(root
);
1578 ret
= btrfs_cow_block(trans
, root
, eb
, NULL
, 0, &eb
, 0);
1583 level
= btrfs_header_level(parent
);
1584 if (level
== 0 || level
<= lowest_level
)
1587 ret
= bin_search(parent
, &node_keys
[lowest_level
], level
,
1589 if (ret
&& slot
> 0)
1592 bytenr
= btrfs_node_blockptr(parent
, slot
);
1593 if (nodes
[level
- 1] == bytenr
)
1596 blocksize
= btrfs_level_size(root
, level
- 1);
1597 generation
= btrfs_node_ptr_generation(parent
, slot
);
1598 btrfs_node_key_to_cpu(eb
, &key
, slot
);
1599 key_match
= !memcmp(&key
, &node_keys
[level
- 1], sizeof(key
));
1601 if (generation
== trans
->transid
) {
1602 eb
= read_tree_block(root
, bytenr
, blocksize
,
1604 btrfs_tree_lock(eb
);
1608 * if node keys match and node pointer hasn't been modified
1609 * in the running transaction, we can merge the path. for
1610 * blocks owened by reloc trees, the node pointer check is
1611 * skipped, this is because these blocks are fully controlled
1612 * by the space balance code, no one else can modify them.
1614 if (!nodes
[level
- 1] || !key_match
||
1615 (generation
== trans
->transid
&&
1616 btrfs_header_owner(eb
) != BTRFS_TREE_RELOC_OBJECTID
)) {
1617 if (level
== 1 || level
== lowest_level
+ 1) {
1618 if (generation
== trans
->transid
) {
1619 btrfs_tree_unlock(eb
);
1620 free_extent_buffer(eb
);
1625 if (generation
!= trans
->transid
) {
1626 eb
= read_tree_block(root
, bytenr
, blocksize
,
1628 btrfs_tree_lock(eb
);
1631 ret
= btrfs_cow_block(trans
, root
, eb
, parent
, slot
,
1635 if (root
->root_key
.objectid
==
1636 BTRFS_TREE_RELOC_OBJECTID
) {
1637 if (!nodes
[level
- 1]) {
1638 nodes
[level
- 1] = eb
->start
;
1639 memcpy(&node_keys
[level
- 1], &key
,
1640 sizeof(node_keys
[0]));
1646 btrfs_tree_unlock(parent
);
1647 free_extent_buffer(parent
);
1652 btrfs_set_node_blockptr(parent
, slot
, nodes
[level
- 1]);
1653 btrfs_set_node_ptr_generation(parent
, slot
, trans
->transid
);
1654 btrfs_mark_buffer_dirty(parent
);
1656 ret
= btrfs_inc_extent_ref(trans
, root
,
1658 blocksize
, parent
->start
,
1659 btrfs_header_owner(parent
),
1660 btrfs_header_generation(parent
),
1665 * If the block was created in the running transaction,
1666 * it's possible this is the last reference to it, so we
1667 * should drop the subtree.
1669 if (generation
== trans
->transid
) {
1670 ret
= btrfs_drop_subtree(trans
, root
, eb
, parent
);
1672 btrfs_tree_unlock(eb
);
1673 free_extent_buffer(eb
);
1675 ret
= btrfs_free_extent(trans
, root
, bytenr
,
1676 blocksize
, parent
->start
,
1677 btrfs_header_owner(parent
),
1678 btrfs_header_generation(parent
),
1684 btrfs_tree_unlock(parent
);
1685 free_extent_buffer(parent
);
1690 * adjust the pointers going up the tree, starting at level
1691 * making sure the right key of each node is points to 'key'.
1692 * This is used after shifting pointers to the left, so it stops
1693 * fixing up pointers when a given leaf/node is not in slot 0 of the
1696 * If this fails to write a tree block, it returns -1, but continues
1697 * fixing up the blocks in ram so the tree is consistent.
1699 static int fixup_low_keys(struct btrfs_trans_handle
*trans
,
1700 struct btrfs_root
*root
, struct btrfs_path
*path
,
1701 struct btrfs_disk_key
*key
, int level
)
1705 struct extent_buffer
*t
;
1707 for (i
= level
; i
< BTRFS_MAX_LEVEL
; i
++) {
1708 int tslot
= path
->slots
[i
];
1709 if (!path
->nodes
[i
])
1712 btrfs_set_node_key(t
, key
, tslot
);
1713 btrfs_mark_buffer_dirty(path
->nodes
[i
]);
1723 * This function isn't completely safe. It's the caller's responsibility
1724 * that the new key won't break the order
1726 int btrfs_set_item_key_safe(struct btrfs_trans_handle
*trans
,
1727 struct btrfs_root
*root
, struct btrfs_path
*path
,
1728 struct btrfs_key
*new_key
)
1730 struct btrfs_disk_key disk_key
;
1731 struct extent_buffer
*eb
;
1734 eb
= path
->nodes
[0];
1735 slot
= path
->slots
[0];
1737 btrfs_item_key(eb
, &disk_key
, slot
- 1);
1738 if (comp_keys(&disk_key
, new_key
) >= 0)
1741 if (slot
< btrfs_header_nritems(eb
) - 1) {
1742 btrfs_item_key(eb
, &disk_key
, slot
+ 1);
1743 if (comp_keys(&disk_key
, new_key
) <= 0)
1747 btrfs_cpu_key_to_disk(&disk_key
, new_key
);
1748 btrfs_set_item_key(eb
, &disk_key
, slot
);
1749 btrfs_mark_buffer_dirty(eb
);
1751 fixup_low_keys(trans
, root
, path
, &disk_key
, 1);
1756 * try to push data from one node into the next node left in the
1759 * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
1760 * error, and > 0 if there was no room in the left hand block.
1762 static int push_node_left(struct btrfs_trans_handle
*trans
,
1763 struct btrfs_root
*root
, struct extent_buffer
*dst
,
1764 struct extent_buffer
*src
, int empty
)
1771 src_nritems
= btrfs_header_nritems(src
);
1772 dst_nritems
= btrfs_header_nritems(dst
);
1773 push_items
= BTRFS_NODEPTRS_PER_BLOCK(root
) - dst_nritems
;
1774 WARN_ON(btrfs_header_generation(src
) != trans
->transid
);
1775 WARN_ON(btrfs_header_generation(dst
) != trans
->transid
);
1777 if (!empty
&& src_nritems
<= 8)
1780 if (push_items
<= 0)
1784 push_items
= min(src_nritems
, push_items
);
1785 if (push_items
< src_nritems
) {
1786 /* leave at least 8 pointers in the node if
1787 * we aren't going to empty it
1789 if (src_nritems
- push_items
< 8) {
1790 if (push_items
<= 8)
1796 push_items
= min(src_nritems
- 8, push_items
);
1798 copy_extent_buffer(dst
, src
,
1799 btrfs_node_key_ptr_offset(dst_nritems
),
1800 btrfs_node_key_ptr_offset(0),
1801 push_items
* sizeof(struct btrfs_key_ptr
));
1803 if (push_items
< src_nritems
) {
1804 memmove_extent_buffer(src
, btrfs_node_key_ptr_offset(0),
1805 btrfs_node_key_ptr_offset(push_items
),
1806 (src_nritems
- push_items
) *
1807 sizeof(struct btrfs_key_ptr
));
1809 btrfs_set_header_nritems(src
, src_nritems
- push_items
);
1810 btrfs_set_header_nritems(dst
, dst_nritems
+ push_items
);
1811 btrfs_mark_buffer_dirty(src
);
1812 btrfs_mark_buffer_dirty(dst
);
1814 ret
= btrfs_update_ref(trans
, root
, src
, dst
, dst_nritems
, push_items
);
1821 * try to push data from one node into the next node right in the
1824 * returns 0 if some ptrs were pushed, < 0 if there was some horrible
1825 * error, and > 0 if there was no room in the right hand block.
1827 * this will only push up to 1/2 the contents of the left node over
1829 static int balance_node_right(struct btrfs_trans_handle
*trans
,
1830 struct btrfs_root
*root
,
1831 struct extent_buffer
*dst
,
1832 struct extent_buffer
*src
)
1840 WARN_ON(btrfs_header_generation(src
) != trans
->transid
);
1841 WARN_ON(btrfs_header_generation(dst
) != trans
->transid
);
1843 src_nritems
= btrfs_header_nritems(src
);
1844 dst_nritems
= btrfs_header_nritems(dst
);
1845 push_items
= BTRFS_NODEPTRS_PER_BLOCK(root
) - dst_nritems
;
1846 if (push_items
<= 0)
1849 if (src_nritems
< 4)
1852 max_push
= src_nritems
/ 2 + 1;
1853 /* don't try to empty the node */
1854 if (max_push
>= src_nritems
)
1857 if (max_push
< push_items
)
1858 push_items
= max_push
;
1860 memmove_extent_buffer(dst
, btrfs_node_key_ptr_offset(push_items
),
1861 btrfs_node_key_ptr_offset(0),
1863 sizeof(struct btrfs_key_ptr
));
1865 copy_extent_buffer(dst
, src
,
1866 btrfs_node_key_ptr_offset(0),
1867 btrfs_node_key_ptr_offset(src_nritems
- push_items
),
1868 push_items
* sizeof(struct btrfs_key_ptr
));
1870 btrfs_set_header_nritems(src
, src_nritems
- push_items
);
1871 btrfs_set_header_nritems(dst
, dst_nritems
+ push_items
);
1873 btrfs_mark_buffer_dirty(src
);
1874 btrfs_mark_buffer_dirty(dst
);
1876 ret
= btrfs_update_ref(trans
, root
, src
, dst
, 0, push_items
);
1883 * helper function to insert a new root level in the tree.
1884 * A new node is allocated, and a single item is inserted to
1885 * point to the existing root
1887 * returns zero on success or < 0 on failure.
1889 static noinline
int insert_new_root(struct btrfs_trans_handle
*trans
,
1890 struct btrfs_root
*root
,
1891 struct btrfs_path
*path
, int level
)
1894 struct extent_buffer
*lower
;
1895 struct extent_buffer
*c
;
1896 struct extent_buffer
*old
;
1897 struct btrfs_disk_key lower_key
;
1900 BUG_ON(path
->nodes
[level
]);
1901 BUG_ON(path
->nodes
[level
-1] != root
->node
);
1903 lower
= path
->nodes
[level
-1];
1905 btrfs_item_key(lower
, &lower_key
, 0);
1907 btrfs_node_key(lower
, &lower_key
, 0);
1909 c
= btrfs_alloc_free_block(trans
, root
, root
->nodesize
, 0,
1910 root
->root_key
.objectid
, trans
->transid
,
1911 level
, root
->node
->start
, 0);
1915 memset_extent_buffer(c
, 0, 0, root
->nodesize
);
1916 btrfs_set_header_nritems(c
, 1);
1917 btrfs_set_header_level(c
, level
);
1918 btrfs_set_header_bytenr(c
, c
->start
);
1919 btrfs_set_header_generation(c
, trans
->transid
);
1920 btrfs_set_header_owner(c
, root
->root_key
.objectid
);
1922 write_extent_buffer(c
, root
->fs_info
->fsid
,
1923 (unsigned long)btrfs_header_fsid(c
),
1926 write_extent_buffer(c
, root
->fs_info
->chunk_tree_uuid
,
1927 (unsigned long)btrfs_header_chunk_tree_uuid(c
),
1930 btrfs_set_node_key(c
, &lower_key
, 0);
1931 btrfs_set_node_blockptr(c
, 0, lower
->start
);
1932 lower_gen
= btrfs_header_generation(lower
);
1933 WARN_ON(lower_gen
!= trans
->transid
);
1935 btrfs_set_node_ptr_generation(c
, 0, lower_gen
);
1937 btrfs_mark_buffer_dirty(c
);
1939 spin_lock(&root
->node_lock
);
1942 spin_unlock(&root
->node_lock
);
1944 ret
= btrfs_update_extent_ref(trans
, root
, lower
->start
,
1945 lower
->start
, c
->start
,
1946 root
->root_key
.objectid
,
1947 trans
->transid
, level
- 1);
1950 /* the super has an extra ref to root->node */
1951 free_extent_buffer(old
);
1953 add_root_to_dirty_list(root
);
1954 extent_buffer_get(c
);
1955 path
->nodes
[level
] = c
;
1956 path
->locks
[level
] = 1;
1957 path
->slots
[level
] = 0;
1962 * worker function to insert a single pointer in a node.
1963 * the node should have enough room for the pointer already
1965 * slot and level indicate where you want the key to go, and
1966 * blocknr is the block the key points to.
1968 * returns zero on success and < 0 on any error
1970 static int insert_ptr(struct btrfs_trans_handle
*trans
, struct btrfs_root
1971 *root
, struct btrfs_path
*path
, struct btrfs_disk_key
1972 *key
, u64 bytenr
, int slot
, int level
)
1974 struct extent_buffer
*lower
;
1977 BUG_ON(!path
->nodes
[level
]);
1978 lower
= path
->nodes
[level
];
1979 nritems
= btrfs_header_nritems(lower
);
1982 if (nritems
== BTRFS_NODEPTRS_PER_BLOCK(root
))
1984 if (slot
!= nritems
) {
1985 memmove_extent_buffer(lower
,
1986 btrfs_node_key_ptr_offset(slot
+ 1),
1987 btrfs_node_key_ptr_offset(slot
),
1988 (nritems
- slot
) * sizeof(struct btrfs_key_ptr
));
1990 btrfs_set_node_key(lower
, key
, slot
);
1991 btrfs_set_node_blockptr(lower
, slot
, bytenr
);
1992 WARN_ON(trans
->transid
== 0);
1993 btrfs_set_node_ptr_generation(lower
, slot
, trans
->transid
);
1994 btrfs_set_header_nritems(lower
, nritems
+ 1);
1995 btrfs_mark_buffer_dirty(lower
);
2000 * split the node at the specified level in path in two.
2001 * The path is corrected to point to the appropriate node after the split
2003 * Before splitting this tries to make some room in the node by pushing
2004 * left and right, if either one works, it returns right away.
2006 * returns 0 on success and < 0 on failure
2008 static noinline
int split_node(struct btrfs_trans_handle
*trans
,
2009 struct btrfs_root
*root
,
2010 struct btrfs_path
*path
, int level
)
2012 struct extent_buffer
*c
;
2013 struct extent_buffer
*split
;
2014 struct btrfs_disk_key disk_key
;
2020 c
= path
->nodes
[level
];
2021 WARN_ON(btrfs_header_generation(c
) != trans
->transid
);
2022 if (c
== root
->node
) {
2023 /* trying to split the root, lets make a new one */
2024 ret
= insert_new_root(trans
, root
, path
, level
+ 1);
2028 ret
= push_nodes_for_insert(trans
, root
, path
, level
);
2029 c
= path
->nodes
[level
];
2030 if (!ret
&& btrfs_header_nritems(c
) <
2031 BTRFS_NODEPTRS_PER_BLOCK(root
) - 3)
2037 c_nritems
= btrfs_header_nritems(c
);
2039 split
= btrfs_alloc_free_block(trans
, root
, root
->nodesize
,
2040 path
->nodes
[level
+ 1]->start
,
2041 root
->root_key
.objectid
,
2042 trans
->transid
, level
, c
->start
, 0);
2044 return PTR_ERR(split
);
2046 btrfs_set_header_flags(split
, btrfs_header_flags(c
));
2047 btrfs_set_header_level(split
, btrfs_header_level(c
));
2048 btrfs_set_header_bytenr(split
, split
->start
);
2049 btrfs_set_header_generation(split
, trans
->transid
);
2050 btrfs_set_header_owner(split
, root
->root_key
.objectid
);
2051 btrfs_set_header_flags(split
, 0);
2052 write_extent_buffer(split
, root
->fs_info
->fsid
,
2053 (unsigned long)btrfs_header_fsid(split
),
2055 write_extent_buffer(split
, root
->fs_info
->chunk_tree_uuid
,
2056 (unsigned long)btrfs_header_chunk_tree_uuid(split
),
2059 mid
= (c_nritems
+ 1) / 2;
2061 copy_extent_buffer(split
, c
,
2062 btrfs_node_key_ptr_offset(0),
2063 btrfs_node_key_ptr_offset(mid
),
2064 (c_nritems
- mid
) * sizeof(struct btrfs_key_ptr
));
2065 btrfs_set_header_nritems(split
, c_nritems
- mid
);
2066 btrfs_set_header_nritems(c
, mid
);
2069 btrfs_mark_buffer_dirty(c
);
2070 btrfs_mark_buffer_dirty(split
);
2072 btrfs_node_key(split
, &disk_key
, 0);
2073 wret
= insert_ptr(trans
, root
, path
, &disk_key
, split
->start
,
2074 path
->slots
[level
+ 1] + 1,
2079 ret
= btrfs_update_ref(trans
, root
, c
, split
, 0, c_nritems
- mid
);
2082 if (path
->slots
[level
] >= mid
) {
2083 path
->slots
[level
] -= mid
;
2084 btrfs_tree_unlock(c
);
2085 free_extent_buffer(c
);
2086 path
->nodes
[level
] = split
;
2087 path
->slots
[level
+ 1] += 1;
2089 btrfs_tree_unlock(split
);
2090 free_extent_buffer(split
);
2096 * how many bytes are required to store the items in a leaf. start
2097 * and nr indicate which items in the leaf to check. This totals up the
2098 * space used both by the item structs and the item data
2100 static int leaf_space_used(struct extent_buffer
*l
, int start
, int nr
)
2103 int nritems
= btrfs_header_nritems(l
);
2104 int end
= min(nritems
, start
+ nr
) - 1;
2108 data_len
= btrfs_item_end_nr(l
, start
);
2109 data_len
= data_len
- btrfs_item_offset_nr(l
, end
);
2110 data_len
+= sizeof(struct btrfs_item
) * nr
;
2111 WARN_ON(data_len
< 0);
2116 * The space between the end of the leaf items and
2117 * the start of the leaf data. IOW, how much room
2118 * the leaf has left for both items and data
2120 noinline
int btrfs_leaf_free_space(struct btrfs_root
*root
,
2121 struct extent_buffer
*leaf
)
2123 int nritems
= btrfs_header_nritems(leaf
);
2125 ret
= BTRFS_LEAF_DATA_SIZE(root
) - leaf_space_used(leaf
, 0, nritems
);
2127 printk(KERN_CRIT
"leaf free space ret %d, leaf data size %lu, "
2128 "used %d nritems %d\n",
2129 ret
, (unsigned long) BTRFS_LEAF_DATA_SIZE(root
),
2130 leaf_space_used(leaf
, 0, nritems
), nritems
);
2136 * push some data in the path leaf to the right, trying to free up at
2137 * least data_size bytes. returns zero if the push worked, nonzero otherwise
2139 * returns 1 if the push failed because the other node didn't have enough
2140 * room, 0 if everything worked out and < 0 if there were major errors.
2142 static int push_leaf_right(struct btrfs_trans_handle
*trans
, struct btrfs_root
2143 *root
, struct btrfs_path
*path
, int data_size
,
2146 struct extent_buffer
*left
= path
->nodes
[0];
2147 struct extent_buffer
*right
;
2148 struct extent_buffer
*upper
;
2149 struct btrfs_disk_key disk_key
;
2155 struct btrfs_item
*item
;
2163 slot
= path
->slots
[1];
2164 if (!path
->nodes
[1])
2167 upper
= path
->nodes
[1];
2168 if (slot
>= btrfs_header_nritems(upper
) - 1)
2171 WARN_ON(!btrfs_tree_locked(path
->nodes
[1]));
2173 right
= read_node_slot(root
, upper
, slot
+ 1);
2174 btrfs_tree_lock(right
);
2175 free_space
= btrfs_leaf_free_space(root
, right
);
2176 if (free_space
< data_size
)
2179 /* cow and double check */
2180 ret
= btrfs_cow_block(trans
, root
, right
, upper
,
2181 slot
+ 1, &right
, 0);
2185 free_space
= btrfs_leaf_free_space(root
, right
);
2186 if (free_space
< data_size
)
2189 left_nritems
= btrfs_header_nritems(left
);
2190 if (left_nritems
== 0)
2198 if (path
->slots
[0] >= left_nritems
)
2199 push_space
+= data_size
;
2201 i
= left_nritems
- 1;
2203 item
= btrfs_item_nr(left
, i
);
2205 if (!empty
&& push_items
> 0) {
2206 if (path
->slots
[0] > i
)
2208 if (path
->slots
[0] == i
) {
2209 int space
= btrfs_leaf_free_space(root
, left
);
2210 if (space
+ push_space
* 2 > free_space
)
2215 if (path
->slots
[0] == i
)
2216 push_space
+= data_size
;
2218 if (!left
->map_token
) {
2219 map_extent_buffer(left
, (unsigned long)item
,
2220 sizeof(struct btrfs_item
),
2221 &left
->map_token
, &left
->kaddr
,
2222 &left
->map_start
, &left
->map_len
,
2226 this_item_size
= btrfs_item_size(left
, item
);
2227 if (this_item_size
+ sizeof(*item
) + push_space
> free_space
)
2231 push_space
+= this_item_size
+ sizeof(*item
);
2236 if (left
->map_token
) {
2237 unmap_extent_buffer(left
, left
->map_token
, KM_USER1
);
2238 left
->map_token
= NULL
;
2241 if (push_items
== 0)
2244 if (!empty
&& push_items
== left_nritems
)
2247 /* push left to right */
2248 right_nritems
= btrfs_header_nritems(right
);
2250 push_space
= btrfs_item_end_nr(left
, left_nritems
- push_items
);
2251 push_space
-= leaf_data_end(root
, left
);
2253 /* make room in the right data area */
2254 data_end
= leaf_data_end(root
, right
);
2255 memmove_extent_buffer(right
,
2256 btrfs_leaf_data(right
) + data_end
- push_space
,
2257 btrfs_leaf_data(right
) + data_end
,
2258 BTRFS_LEAF_DATA_SIZE(root
) - data_end
);
2260 /* copy from the left data area */
2261 copy_extent_buffer(right
, left
, btrfs_leaf_data(right
) +
2262 BTRFS_LEAF_DATA_SIZE(root
) - push_space
,
2263 btrfs_leaf_data(left
) + leaf_data_end(root
, left
),
2266 memmove_extent_buffer(right
, btrfs_item_nr_offset(push_items
),
2267 btrfs_item_nr_offset(0),
2268 right_nritems
* sizeof(struct btrfs_item
));
2270 /* copy the items from left to right */
2271 copy_extent_buffer(right
, left
, btrfs_item_nr_offset(0),
2272 btrfs_item_nr_offset(left_nritems
- push_items
),
2273 push_items
* sizeof(struct btrfs_item
));
2275 /* update the item pointers */
2276 right_nritems
+= push_items
;
2277 btrfs_set_header_nritems(right
, right_nritems
);
2278 push_space
= BTRFS_LEAF_DATA_SIZE(root
);
2279 for (i
= 0; i
< right_nritems
; i
++) {
2280 item
= btrfs_item_nr(right
, i
);
2281 if (!right
->map_token
) {
2282 map_extent_buffer(right
, (unsigned long)item
,
2283 sizeof(struct btrfs_item
),
2284 &right
->map_token
, &right
->kaddr
,
2285 &right
->map_start
, &right
->map_len
,
2288 push_space
-= btrfs_item_size(right
, item
);
2289 btrfs_set_item_offset(right
, item
, push_space
);
2292 if (right
->map_token
) {
2293 unmap_extent_buffer(right
, right
->map_token
, KM_USER1
);
2294 right
->map_token
= NULL
;
2296 left_nritems
-= push_items
;
2297 btrfs_set_header_nritems(left
, left_nritems
);
2300 btrfs_mark_buffer_dirty(left
);
2301 btrfs_mark_buffer_dirty(right
);
2303 ret
= btrfs_update_ref(trans
, root
, left
, right
, 0, push_items
);
2306 btrfs_item_key(right
, &disk_key
, 0);
2307 btrfs_set_node_key(upper
, &disk_key
, slot
+ 1);
2308 btrfs_mark_buffer_dirty(upper
);
2310 /* then fixup the leaf pointer in the path */
2311 if (path
->slots
[0] >= left_nritems
) {
2312 path
->slots
[0] -= left_nritems
;
2313 if (btrfs_header_nritems(path
->nodes
[0]) == 0)
2314 clean_tree_block(trans
, root
, path
->nodes
[0]);
2315 btrfs_tree_unlock(path
->nodes
[0]);
2316 free_extent_buffer(path
->nodes
[0]);
2317 path
->nodes
[0] = right
;
2318 path
->slots
[1] += 1;
2320 btrfs_tree_unlock(right
);
2321 free_extent_buffer(right
);
2326 btrfs_tree_unlock(right
);
2327 free_extent_buffer(right
);
2332 * push some data in the path leaf to the left, trying to free up at
2333 * least data_size bytes. returns zero if the push worked, nonzero otherwise
2335 static int push_leaf_left(struct btrfs_trans_handle
*trans
, struct btrfs_root
2336 *root
, struct btrfs_path
*path
, int data_size
,
2339 struct btrfs_disk_key disk_key
;
2340 struct extent_buffer
*right
= path
->nodes
[0];
2341 struct extent_buffer
*left
;
2347 struct btrfs_item
*item
;
2348 u32 old_left_nritems
;
2354 u32 old_left_item_size
;
2356 slot
= path
->slots
[1];
2359 if (!path
->nodes
[1])
2362 right_nritems
= btrfs_header_nritems(right
);
2363 if (right_nritems
== 0)
2366 WARN_ON(!btrfs_tree_locked(path
->nodes
[1]));
2368 left
= read_node_slot(root
, path
->nodes
[1], slot
- 1);
2369 btrfs_tree_lock(left
);
2370 free_space
= btrfs_leaf_free_space(root
, left
);
2371 if (free_space
< data_size
) {
2376 /* cow and double check */
2377 ret
= btrfs_cow_block(trans
, root
, left
,
2378 path
->nodes
[1], slot
- 1, &left
, 0);
2380 /* we hit -ENOSPC, but it isn't fatal here */
2385 free_space
= btrfs_leaf_free_space(root
, left
);
2386 if (free_space
< data_size
) {
2394 nr
= right_nritems
- 1;
2396 for (i
= 0; i
< nr
; i
++) {
2397 item
= btrfs_item_nr(right
, i
);
2398 if (!right
->map_token
) {
2399 map_extent_buffer(right
, (unsigned long)item
,
2400 sizeof(struct btrfs_item
),
2401 &right
->map_token
, &right
->kaddr
,
2402 &right
->map_start
, &right
->map_len
,
2406 if (!empty
&& push_items
> 0) {
2407 if (path
->slots
[0] < i
)
2409 if (path
->slots
[0] == i
) {
2410 int space
= btrfs_leaf_free_space(root
, right
);
2411 if (space
+ push_space
* 2 > free_space
)
2416 if (path
->slots
[0] == i
)
2417 push_space
+= data_size
;
2419 this_item_size
= btrfs_item_size(right
, item
);
2420 if (this_item_size
+ sizeof(*item
) + push_space
> free_space
)
2424 push_space
+= this_item_size
+ sizeof(*item
);
2427 if (right
->map_token
) {
2428 unmap_extent_buffer(right
, right
->map_token
, KM_USER1
);
2429 right
->map_token
= NULL
;
2432 if (push_items
== 0) {
2436 if (!empty
&& push_items
== btrfs_header_nritems(right
))
2439 /* push data from right to left */
2440 copy_extent_buffer(left
, right
,
2441 btrfs_item_nr_offset(btrfs_header_nritems(left
)),
2442 btrfs_item_nr_offset(0),
2443 push_items
* sizeof(struct btrfs_item
));
2445 push_space
= BTRFS_LEAF_DATA_SIZE(root
) -
2446 btrfs_item_offset_nr(right
, push_items
- 1);
2448 copy_extent_buffer(left
, right
, btrfs_leaf_data(left
) +
2449 leaf_data_end(root
, left
) - push_space
,
2450 btrfs_leaf_data(right
) +
2451 btrfs_item_offset_nr(right
, push_items
- 1),
2453 old_left_nritems
= btrfs_header_nritems(left
);
2454 BUG_ON(old_left_nritems
<= 0);
2456 old_left_item_size
= btrfs_item_offset_nr(left
, old_left_nritems
- 1);
2457 for (i
= old_left_nritems
; i
< old_left_nritems
+ push_items
; i
++) {
2460 item
= btrfs_item_nr(left
, i
);
2461 if (!left
->map_token
) {
2462 map_extent_buffer(left
, (unsigned long)item
,
2463 sizeof(struct btrfs_item
),
2464 &left
->map_token
, &left
->kaddr
,
2465 &left
->map_start
, &left
->map_len
,
2469 ioff
= btrfs_item_offset(left
, item
);
2470 btrfs_set_item_offset(left
, item
,
2471 ioff
- (BTRFS_LEAF_DATA_SIZE(root
) - old_left_item_size
));
2473 btrfs_set_header_nritems(left
, old_left_nritems
+ push_items
);
2474 if (left
->map_token
) {
2475 unmap_extent_buffer(left
, left
->map_token
, KM_USER1
);
2476 left
->map_token
= NULL
;
2479 /* fixup right node */
2480 if (push_items
> right_nritems
) {
2481 printk(KERN_CRIT
"push items %d nr %u\n", push_items
,
2486 if (push_items
< right_nritems
) {
2487 push_space
= btrfs_item_offset_nr(right
, push_items
- 1) -
2488 leaf_data_end(root
, right
);
2489 memmove_extent_buffer(right
, btrfs_leaf_data(right
) +
2490 BTRFS_LEAF_DATA_SIZE(root
) - push_space
,
2491 btrfs_leaf_data(right
) +
2492 leaf_data_end(root
, right
), push_space
);
2494 memmove_extent_buffer(right
, btrfs_item_nr_offset(0),
2495 btrfs_item_nr_offset(push_items
),
2496 (btrfs_header_nritems(right
) - push_items
) *
2497 sizeof(struct btrfs_item
));
2499 right_nritems
-= push_items
;
2500 btrfs_set_header_nritems(right
, right_nritems
);
2501 push_space
= BTRFS_LEAF_DATA_SIZE(root
);
2502 for (i
= 0; i
< right_nritems
; i
++) {
2503 item
= btrfs_item_nr(right
, i
);
2505 if (!right
->map_token
) {
2506 map_extent_buffer(right
, (unsigned long)item
,
2507 sizeof(struct btrfs_item
),
2508 &right
->map_token
, &right
->kaddr
,
2509 &right
->map_start
, &right
->map_len
,
2513 push_space
= push_space
- btrfs_item_size(right
, item
);
2514 btrfs_set_item_offset(right
, item
, push_space
);
2516 if (right
->map_token
) {
2517 unmap_extent_buffer(right
, right
->map_token
, KM_USER1
);
2518 right
->map_token
= NULL
;
2521 btrfs_mark_buffer_dirty(left
);
2523 btrfs_mark_buffer_dirty(right
);
2525 ret
= btrfs_update_ref(trans
, root
, right
, left
,
2526 old_left_nritems
, push_items
);
2529 btrfs_item_key(right
, &disk_key
, 0);
2530 wret
= fixup_low_keys(trans
, root
, path
, &disk_key
, 1);
2534 /* then fixup the leaf pointer in the path */
2535 if (path
->slots
[0] < push_items
) {
2536 path
->slots
[0] += old_left_nritems
;
2537 if (btrfs_header_nritems(path
->nodes
[0]) == 0)
2538 clean_tree_block(trans
, root
, path
->nodes
[0]);
2539 btrfs_tree_unlock(path
->nodes
[0]);
2540 free_extent_buffer(path
->nodes
[0]);
2541 path
->nodes
[0] = left
;
2542 path
->slots
[1] -= 1;
2544 btrfs_tree_unlock(left
);
2545 free_extent_buffer(left
);
2546 path
->slots
[0] -= push_items
;
2548 BUG_ON(path
->slots
[0] < 0);
2551 btrfs_tree_unlock(left
);
2552 free_extent_buffer(left
);
2557 * split the path's leaf in two, making sure there is at least data_size
2558 * available for the resulting leaf level of the path.
2560 * returns 0 if all went well and < 0 on failure.
2562 static noinline
int split_leaf(struct btrfs_trans_handle
*trans
,
2563 struct btrfs_root
*root
,
2564 struct btrfs_key
*ins_key
,
2565 struct btrfs_path
*path
, int data_size
,
2568 struct extent_buffer
*l
;
2572 struct extent_buffer
*right
;
2579 int num_doubles
= 0;
2580 struct btrfs_disk_key disk_key
;
2582 /* first try to make some room by pushing left and right */
2583 if (data_size
&& ins_key
->type
!= BTRFS_DIR_ITEM_KEY
) {
2584 wret
= push_leaf_right(trans
, root
, path
, data_size
, 0);
2588 wret
= push_leaf_left(trans
, root
, path
, data_size
, 0);
2594 /* did the pushes work? */
2595 if (btrfs_leaf_free_space(root
, l
) >= data_size
)
2599 if (!path
->nodes
[1]) {
2600 ret
= insert_new_root(trans
, root
, path
, 1);
2607 slot
= path
->slots
[0];
2608 nritems
= btrfs_header_nritems(l
);
2609 mid
= (nritems
+ 1) / 2;
2611 right
= btrfs_alloc_free_block(trans
, root
, root
->leafsize
,
2612 path
->nodes
[1]->start
,
2613 root
->root_key
.objectid
,
2614 trans
->transid
, 0, l
->start
, 0);
2615 if (IS_ERR(right
)) {
2617 return PTR_ERR(right
);
2620 memset_extent_buffer(right
, 0, 0, sizeof(struct btrfs_header
));
2621 btrfs_set_header_bytenr(right
, right
->start
);
2622 btrfs_set_header_generation(right
, trans
->transid
);
2623 btrfs_set_header_owner(right
, root
->root_key
.objectid
);
2624 btrfs_set_header_level(right
, 0);
2625 write_extent_buffer(right
, root
->fs_info
->fsid
,
2626 (unsigned long)btrfs_header_fsid(right
),
2629 write_extent_buffer(right
, root
->fs_info
->chunk_tree_uuid
,
2630 (unsigned long)btrfs_header_chunk_tree_uuid(right
),
2634 leaf_space_used(l
, mid
, nritems
- mid
) + data_size
>
2635 BTRFS_LEAF_DATA_SIZE(root
)) {
2636 if (slot
>= nritems
) {
2637 btrfs_cpu_key_to_disk(&disk_key
, ins_key
);
2638 btrfs_set_header_nritems(right
, 0);
2639 wret
= insert_ptr(trans
, root
, path
,
2640 &disk_key
, right
->start
,
2641 path
->slots
[1] + 1, 1);
2645 btrfs_tree_unlock(path
->nodes
[0]);
2646 free_extent_buffer(path
->nodes
[0]);
2647 path
->nodes
[0] = right
;
2649 path
->slots
[1] += 1;
2650 btrfs_mark_buffer_dirty(right
);
2654 if (mid
!= nritems
&&
2655 leaf_space_used(l
, mid
, nritems
- mid
) +
2656 data_size
> BTRFS_LEAF_DATA_SIZE(root
)) {
2661 if (leaf_space_used(l
, 0, mid
) + data_size
>
2662 BTRFS_LEAF_DATA_SIZE(root
)) {
2663 if (!extend
&& data_size
&& slot
== 0) {
2664 btrfs_cpu_key_to_disk(&disk_key
, ins_key
);
2665 btrfs_set_header_nritems(right
, 0);
2666 wret
= insert_ptr(trans
, root
, path
,
2672 btrfs_tree_unlock(path
->nodes
[0]);
2673 free_extent_buffer(path
->nodes
[0]);
2674 path
->nodes
[0] = right
;
2676 if (path
->slots
[1] == 0) {
2677 wret
= fixup_low_keys(trans
, root
,
2678 path
, &disk_key
, 1);
2682 btrfs_mark_buffer_dirty(right
);
2684 } else if ((extend
|| !data_size
) && slot
== 0) {
2688 if (mid
!= nritems
&&
2689 leaf_space_used(l
, mid
, nritems
- mid
) +
2690 data_size
> BTRFS_LEAF_DATA_SIZE(root
)) {
2696 nritems
= nritems
- mid
;
2697 btrfs_set_header_nritems(right
, nritems
);
2698 data_copy_size
= btrfs_item_end_nr(l
, mid
) - leaf_data_end(root
, l
);
2700 copy_extent_buffer(right
, l
, btrfs_item_nr_offset(0),
2701 btrfs_item_nr_offset(mid
),
2702 nritems
* sizeof(struct btrfs_item
));
2704 copy_extent_buffer(right
, l
,
2705 btrfs_leaf_data(right
) + BTRFS_LEAF_DATA_SIZE(root
) -
2706 data_copy_size
, btrfs_leaf_data(l
) +
2707 leaf_data_end(root
, l
), data_copy_size
);
2709 rt_data_off
= BTRFS_LEAF_DATA_SIZE(root
) -
2710 btrfs_item_end_nr(l
, mid
);
2712 for (i
= 0; i
< nritems
; i
++) {
2713 struct btrfs_item
*item
= btrfs_item_nr(right
, i
);
2716 if (!right
->map_token
) {
2717 map_extent_buffer(right
, (unsigned long)item
,
2718 sizeof(struct btrfs_item
),
2719 &right
->map_token
, &right
->kaddr
,
2720 &right
->map_start
, &right
->map_len
,
2724 ioff
= btrfs_item_offset(right
, item
);
2725 btrfs_set_item_offset(right
, item
, ioff
+ rt_data_off
);
2728 if (right
->map_token
) {
2729 unmap_extent_buffer(right
, right
->map_token
, KM_USER1
);
2730 right
->map_token
= NULL
;
2733 btrfs_set_header_nritems(l
, mid
);
2735 btrfs_item_key(right
, &disk_key
, 0);
2736 wret
= insert_ptr(trans
, root
, path
, &disk_key
, right
->start
,
2737 path
->slots
[1] + 1, 1);
2741 btrfs_mark_buffer_dirty(right
);
2742 btrfs_mark_buffer_dirty(l
);
2743 BUG_ON(path
->slots
[0] != slot
);
2745 ret
= btrfs_update_ref(trans
, root
, l
, right
, 0, nritems
);
2749 btrfs_tree_unlock(path
->nodes
[0]);
2750 free_extent_buffer(path
->nodes
[0]);
2751 path
->nodes
[0] = right
;
2752 path
->slots
[0] -= mid
;
2753 path
->slots
[1] += 1;
2755 btrfs_tree_unlock(right
);
2756 free_extent_buffer(right
);
2759 BUG_ON(path
->slots
[0] < 0);
2762 BUG_ON(num_doubles
!= 0);
2770 * This function splits a single item into two items,
2771 * giving 'new_key' to the new item and splitting the
2772 * old one at split_offset (from the start of the item).
2774 * The path may be released by this operation. After
2775 * the split, the path is pointing to the old item. The
2776 * new item is going to be in the same node as the old one.
2778 * Note, the item being split must be smaller enough to live alone on
2779 * a tree block with room for one extra struct btrfs_item
2781 * This allows us to split the item in place, keeping a lock on the
2782 * leaf the entire time.
2784 int btrfs_split_item(struct btrfs_trans_handle
*trans
,
2785 struct btrfs_root
*root
,
2786 struct btrfs_path
*path
,
2787 struct btrfs_key
*new_key
,
2788 unsigned long split_offset
)
2791 struct extent_buffer
*leaf
;
2792 struct btrfs_key orig_key
;
2793 struct btrfs_item
*item
;
2794 struct btrfs_item
*new_item
;
2799 struct btrfs_disk_key disk_key
;
2802 leaf
= path
->nodes
[0];
2803 btrfs_item_key_to_cpu(leaf
, &orig_key
, path
->slots
[0]);
2804 if (btrfs_leaf_free_space(root
, leaf
) >= sizeof(struct btrfs_item
))
2807 item_size
= btrfs_item_size_nr(leaf
, path
->slots
[0]);
2808 btrfs_release_path(root
, path
);
2810 path
->search_for_split
= 1;
2811 path
->keep_locks
= 1;
2813 ret
= btrfs_search_slot(trans
, root
, &orig_key
, path
, 0, 1);
2814 path
->search_for_split
= 0;
2816 /* if our item isn't there or got smaller, return now */
2817 if (ret
!= 0 || item_size
!= btrfs_item_size_nr(path
->nodes
[0],
2819 path
->keep_locks
= 0;
2823 ret
= split_leaf(trans
, root
, &orig_key
, path
,
2824 sizeof(struct btrfs_item
), 1);
2825 path
->keep_locks
= 0;
2828 leaf
= path
->nodes
[0];
2829 BUG_ON(btrfs_leaf_free_space(root
, leaf
) < sizeof(struct btrfs_item
));
2832 item
= btrfs_item_nr(leaf
, path
->slots
[0]);
2833 orig_offset
= btrfs_item_offset(leaf
, item
);
2834 item_size
= btrfs_item_size(leaf
, item
);
2837 buf
= kmalloc(item_size
, GFP_NOFS
);
2838 read_extent_buffer(leaf
, buf
, btrfs_item_ptr_offset(leaf
,
2839 path
->slots
[0]), item_size
);
2840 slot
= path
->slots
[0] + 1;
2841 leaf
= path
->nodes
[0];
2843 nritems
= btrfs_header_nritems(leaf
);
2845 if (slot
!= nritems
) {
2846 /* shift the items */
2847 memmove_extent_buffer(leaf
, btrfs_item_nr_offset(slot
+ 1),
2848 btrfs_item_nr_offset(slot
),
2849 (nritems
- slot
) * sizeof(struct btrfs_item
));
2853 btrfs_cpu_key_to_disk(&disk_key
, new_key
);
2854 btrfs_set_item_key(leaf
, &disk_key
, slot
);
2856 new_item
= btrfs_item_nr(leaf
, slot
);
2858 btrfs_set_item_offset(leaf
, new_item
, orig_offset
);
2859 btrfs_set_item_size(leaf
, new_item
, item_size
- split_offset
);
2861 btrfs_set_item_offset(leaf
, item
,
2862 orig_offset
+ item_size
- split_offset
);
2863 btrfs_set_item_size(leaf
, item
, split_offset
);
2865 btrfs_set_header_nritems(leaf
, nritems
+ 1);
2867 /* write the data for the start of the original item */
2868 write_extent_buffer(leaf
, buf
,
2869 btrfs_item_ptr_offset(leaf
, path
->slots
[0]),
2872 /* write the data for the new item */
2873 write_extent_buffer(leaf
, buf
+ split_offset
,
2874 btrfs_item_ptr_offset(leaf
, slot
),
2875 item_size
- split_offset
);
2876 btrfs_mark_buffer_dirty(leaf
);
2879 if (btrfs_leaf_free_space(root
, leaf
) < 0) {
2880 btrfs_print_leaf(root
, leaf
);
2888 * make the item pointed to by the path smaller. new_size indicates
2889 * how small to make it, and from_end tells us if we just chop bytes
2890 * off the end of the item or if we shift the item to chop bytes off
2893 int btrfs_truncate_item(struct btrfs_trans_handle
*trans
,
2894 struct btrfs_root
*root
,
2895 struct btrfs_path
*path
,
2896 u32 new_size
, int from_end
)
2901 struct extent_buffer
*leaf
;
2902 struct btrfs_item
*item
;
2904 unsigned int data_end
;
2905 unsigned int old_data_start
;
2906 unsigned int old_size
;
2907 unsigned int size_diff
;
2910 slot_orig
= path
->slots
[0];
2911 leaf
= path
->nodes
[0];
2912 slot
= path
->slots
[0];
2914 old_size
= btrfs_item_size_nr(leaf
, slot
);
2915 if (old_size
== new_size
)
2918 nritems
= btrfs_header_nritems(leaf
);
2919 data_end
= leaf_data_end(root
, leaf
);
2921 old_data_start
= btrfs_item_offset_nr(leaf
, slot
);
2923 size_diff
= old_size
- new_size
;
2926 BUG_ON(slot
>= nritems
);
2929 * item0..itemN ... dataN.offset..dataN.size .. data0.size
2931 /* first correct the data pointers */
2932 for (i
= slot
; i
< nritems
; i
++) {
2934 item
= btrfs_item_nr(leaf
, i
);
2936 if (!leaf
->map_token
) {
2937 map_extent_buffer(leaf
, (unsigned long)item
,
2938 sizeof(struct btrfs_item
),
2939 &leaf
->map_token
, &leaf
->kaddr
,
2940 &leaf
->map_start
, &leaf
->map_len
,
2944 ioff
= btrfs_item_offset(leaf
, item
);
2945 btrfs_set_item_offset(leaf
, item
, ioff
+ size_diff
);
2948 if (leaf
->map_token
) {
2949 unmap_extent_buffer(leaf
, leaf
->map_token
, KM_USER1
);
2950 leaf
->map_token
= NULL
;
2953 /* shift the data */
2955 memmove_extent_buffer(leaf
, btrfs_leaf_data(leaf
) +
2956 data_end
+ size_diff
, btrfs_leaf_data(leaf
) +
2957 data_end
, old_data_start
+ new_size
- data_end
);
2959 struct btrfs_disk_key disk_key
;
2962 btrfs_item_key(leaf
, &disk_key
, slot
);
2964 if (btrfs_disk_key_type(&disk_key
) == BTRFS_EXTENT_DATA_KEY
) {
2966 struct btrfs_file_extent_item
*fi
;
2968 fi
= btrfs_item_ptr(leaf
, slot
,
2969 struct btrfs_file_extent_item
);
2970 fi
= (struct btrfs_file_extent_item
*)(
2971 (unsigned long)fi
- size_diff
);
2973 if (btrfs_file_extent_type(leaf
, fi
) ==
2974 BTRFS_FILE_EXTENT_INLINE
) {
2975 ptr
= btrfs_item_ptr_offset(leaf
, slot
);
2976 memmove_extent_buffer(leaf
, ptr
,
2978 offsetof(struct btrfs_file_extent_item
,
2983 memmove_extent_buffer(leaf
, btrfs_leaf_data(leaf
) +
2984 data_end
+ size_diff
, btrfs_leaf_data(leaf
) +
2985 data_end
, old_data_start
- data_end
);
2987 offset
= btrfs_disk_key_offset(&disk_key
);
2988 btrfs_set_disk_key_offset(&disk_key
, offset
+ size_diff
);
2989 btrfs_set_item_key(leaf
, &disk_key
, slot
);
2991 fixup_low_keys(trans
, root
, path
, &disk_key
, 1);
2994 item
= btrfs_item_nr(leaf
, slot
);
2995 btrfs_set_item_size(leaf
, item
, new_size
);
2996 btrfs_mark_buffer_dirty(leaf
);
2999 if (btrfs_leaf_free_space(root
, leaf
) < 0) {
3000 btrfs_print_leaf(root
, leaf
);
3007 * make the item pointed to by the path bigger, data_size is the new size.
3009 int btrfs_extend_item(struct btrfs_trans_handle
*trans
,
3010 struct btrfs_root
*root
, struct btrfs_path
*path
,
3016 struct extent_buffer
*leaf
;
3017 struct btrfs_item
*item
;
3019 unsigned int data_end
;
3020 unsigned int old_data
;
3021 unsigned int old_size
;
3024 slot_orig
= path
->slots
[0];
3025 leaf
= path
->nodes
[0];
3027 nritems
= btrfs_header_nritems(leaf
);
3028 data_end
= leaf_data_end(root
, leaf
);
3030 if (btrfs_leaf_free_space(root
, leaf
) < data_size
) {
3031 btrfs_print_leaf(root
, leaf
);
3034 slot
= path
->slots
[0];
3035 old_data
= btrfs_item_end_nr(leaf
, slot
);
3038 if (slot
>= nritems
) {
3039 btrfs_print_leaf(root
, leaf
);
3040 printk(KERN_CRIT
"slot %d too large, nritems %d\n",
3046 * item0..itemN ... dataN.offset..dataN.size .. data0.size
3048 /* first correct the data pointers */
3049 for (i
= slot
; i
< nritems
; i
++) {
3051 item
= btrfs_item_nr(leaf
, i
);
3053 if (!leaf
->map_token
) {
3054 map_extent_buffer(leaf
, (unsigned long)item
,
3055 sizeof(struct btrfs_item
),
3056 &leaf
->map_token
, &leaf
->kaddr
,
3057 &leaf
->map_start
, &leaf
->map_len
,
3060 ioff
= btrfs_item_offset(leaf
, item
);
3061 btrfs_set_item_offset(leaf
, item
, ioff
- data_size
);
3064 if (leaf
->map_token
) {
3065 unmap_extent_buffer(leaf
, leaf
->map_token
, KM_USER1
);
3066 leaf
->map_token
= NULL
;
3069 /* shift the data */
3070 memmove_extent_buffer(leaf
, btrfs_leaf_data(leaf
) +
3071 data_end
- data_size
, btrfs_leaf_data(leaf
) +
3072 data_end
, old_data
- data_end
);
3074 data_end
= old_data
;
3075 old_size
= btrfs_item_size_nr(leaf
, slot
);
3076 item
= btrfs_item_nr(leaf
, slot
);
3077 btrfs_set_item_size(leaf
, item
, old_size
+ data_size
);
3078 btrfs_mark_buffer_dirty(leaf
);
3081 if (btrfs_leaf_free_space(root
, leaf
) < 0) {
3082 btrfs_print_leaf(root
, leaf
);
3089 * Given a key and some data, insert items into the tree.
3090 * This does all the path init required, making room in the tree if needed.
3091 * Returns the number of keys that were inserted.
3093 int btrfs_insert_some_items(struct btrfs_trans_handle
*trans
,
3094 struct btrfs_root
*root
,
3095 struct btrfs_path
*path
,
3096 struct btrfs_key
*cpu_key
, u32
*data_size
,
3099 struct extent_buffer
*leaf
;
3100 struct btrfs_item
*item
;
3107 unsigned int data_end
;
3108 struct btrfs_disk_key disk_key
;
3109 struct btrfs_key found_key
;
3111 for (i
= 0; i
< nr
; i
++) {
3112 if (total_size
+ data_size
[i
] + sizeof(struct btrfs_item
) >
3113 BTRFS_LEAF_DATA_SIZE(root
)) {
3117 total_data
+= data_size
[i
];
3118 total_size
+= data_size
[i
] + sizeof(struct btrfs_item
);
3122 ret
= btrfs_search_slot(trans
, root
, cpu_key
, path
, total_size
, 1);
3128 leaf
= path
->nodes
[0];
3130 nritems
= btrfs_header_nritems(leaf
);
3131 data_end
= leaf_data_end(root
, leaf
);
3133 if (btrfs_leaf_free_space(root
, leaf
) < total_size
) {
3134 for (i
= nr
; i
>= 0; i
--) {
3135 total_data
-= data_size
[i
];
3136 total_size
-= data_size
[i
] + sizeof(struct btrfs_item
);
3137 if (total_size
< btrfs_leaf_free_space(root
, leaf
))
3143 slot
= path
->slots
[0];
3146 if (slot
!= nritems
) {
3147 unsigned int old_data
= btrfs_item_end_nr(leaf
, slot
);
3149 item
= btrfs_item_nr(leaf
, slot
);
3150 btrfs_item_key_to_cpu(leaf
, &found_key
, slot
);
3152 /* figure out how many keys we can insert in here */
3153 total_data
= data_size
[0];
3154 for (i
= 1; i
< nr
; i
++) {
3155 if (comp_cpu_keys(&found_key
, cpu_key
+ i
) <= 0)
3157 total_data
+= data_size
[i
];
3161 if (old_data
< data_end
) {
3162 btrfs_print_leaf(root
, leaf
);
3163 printk(KERN_CRIT
"slot %d old_data %d data_end %d\n",
3164 slot
, old_data
, data_end
);
3168 * item0..itemN ... dataN.offset..dataN.size .. data0.size
3170 /* first correct the data pointers */
3171 WARN_ON(leaf
->map_token
);
3172 for (i
= slot
; i
< nritems
; i
++) {
3175 item
= btrfs_item_nr(leaf
, i
);
3176 if (!leaf
->map_token
) {
3177 map_extent_buffer(leaf
, (unsigned long)item
,
3178 sizeof(struct btrfs_item
),
3179 &leaf
->map_token
, &leaf
->kaddr
,
3180 &leaf
->map_start
, &leaf
->map_len
,
3184 ioff
= btrfs_item_offset(leaf
, item
);
3185 btrfs_set_item_offset(leaf
, item
, ioff
- total_data
);
3187 if (leaf
->map_token
) {
3188 unmap_extent_buffer(leaf
, leaf
->map_token
, KM_USER1
);
3189 leaf
->map_token
= NULL
;
3192 /* shift the items */
3193 memmove_extent_buffer(leaf
, btrfs_item_nr_offset(slot
+ nr
),
3194 btrfs_item_nr_offset(slot
),
3195 (nritems
- slot
) * sizeof(struct btrfs_item
));
3197 /* shift the data */
3198 memmove_extent_buffer(leaf
, btrfs_leaf_data(leaf
) +
3199 data_end
- total_data
, btrfs_leaf_data(leaf
) +
3200 data_end
, old_data
- data_end
);
3201 data_end
= old_data
;
3204 * this sucks but it has to be done, if we are inserting at
3205 * the end of the leaf only insert 1 of the items, since we
3206 * have no way of knowing whats on the next leaf and we'd have
3207 * to drop our current locks to figure it out
3212 /* setup the item for the new data */
3213 for (i
= 0; i
< nr
; i
++) {
3214 btrfs_cpu_key_to_disk(&disk_key
, cpu_key
+ i
);
3215 btrfs_set_item_key(leaf
, &disk_key
, slot
+ i
);
3216 item
= btrfs_item_nr(leaf
, slot
+ i
);
3217 btrfs_set_item_offset(leaf
, item
, data_end
- data_size
[i
]);
3218 data_end
-= data_size
[i
];
3219 btrfs_set_item_size(leaf
, item
, data_size
[i
]);
3221 btrfs_set_header_nritems(leaf
, nritems
+ nr
);
3222 btrfs_mark_buffer_dirty(leaf
);
3226 btrfs_cpu_key_to_disk(&disk_key
, cpu_key
);
3227 ret
= fixup_low_keys(trans
, root
, path
, &disk_key
, 1);
3230 if (btrfs_leaf_free_space(root
, leaf
) < 0) {
3231 btrfs_print_leaf(root
, leaf
);
3241 * Given a key and some data, insert items into the tree.
3242 * This does all the path init required, making room in the tree if needed.
3244 int btrfs_insert_empty_items(struct btrfs_trans_handle
*trans
,
3245 struct btrfs_root
*root
,
3246 struct btrfs_path
*path
,
3247 struct btrfs_key
*cpu_key
, u32
*data_size
,
3250 struct extent_buffer
*leaf
;
3251 struct btrfs_item
*item
;
3259 unsigned int data_end
;
3260 struct btrfs_disk_key disk_key
;
3262 for (i
= 0; i
< nr
; i
++)
3263 total_data
+= data_size
[i
];
3265 total_size
= total_data
+ (nr
* sizeof(struct btrfs_item
));
3266 ret
= btrfs_search_slot(trans
, root
, cpu_key
, path
, total_size
, 1);
3272 slot_orig
= path
->slots
[0];
3273 leaf
= path
->nodes
[0];
3275 nritems
= btrfs_header_nritems(leaf
);
3276 data_end
= leaf_data_end(root
, leaf
);
3278 if (btrfs_leaf_free_space(root
, leaf
) < total_size
) {
3279 btrfs_print_leaf(root
, leaf
);
3280 printk(KERN_CRIT
"not enough freespace need %u have %d\n",
3281 total_size
, btrfs_leaf_free_space(root
, leaf
));
3285 slot
= path
->slots
[0];
3288 if (slot
!= nritems
) {
3289 unsigned int old_data
= btrfs_item_end_nr(leaf
, slot
);
3291 if (old_data
< data_end
) {
3292 btrfs_print_leaf(root
, leaf
);
3293 printk(KERN_CRIT
"slot %d old_data %d data_end %d\n",
3294 slot
, old_data
, data_end
);
3298 * item0..itemN ... dataN.offset..dataN.size .. data0.size
3300 /* first correct the data pointers */
3301 WARN_ON(leaf
->map_token
);
3302 for (i
= slot
; i
< nritems
; i
++) {
3305 item
= btrfs_item_nr(leaf
, i
);
3306 if (!leaf
->map_token
) {
3307 map_extent_buffer(leaf
, (unsigned long)item
,
3308 sizeof(struct btrfs_item
),
3309 &leaf
->map_token
, &leaf
->kaddr
,
3310 &leaf
->map_start
, &leaf
->map_len
,
3314 ioff
= btrfs_item_offset(leaf
, item
);
3315 btrfs_set_item_offset(leaf
, item
, ioff
- total_data
);
3317 if (leaf
->map_token
) {
3318 unmap_extent_buffer(leaf
, leaf
->map_token
, KM_USER1
);
3319 leaf
->map_token
= NULL
;
3322 /* shift the items */
3323 memmove_extent_buffer(leaf
, btrfs_item_nr_offset(slot
+ nr
),
3324 btrfs_item_nr_offset(slot
),
3325 (nritems
- slot
) * sizeof(struct btrfs_item
));
3327 /* shift the data */
3328 memmove_extent_buffer(leaf
, btrfs_leaf_data(leaf
) +
3329 data_end
- total_data
, btrfs_leaf_data(leaf
) +
3330 data_end
, old_data
- data_end
);
3331 data_end
= old_data
;
3334 /* setup the item for the new data */
3335 for (i
= 0; i
< nr
; i
++) {
3336 btrfs_cpu_key_to_disk(&disk_key
, cpu_key
+ i
);
3337 btrfs_set_item_key(leaf
, &disk_key
, slot
+ i
);
3338 item
= btrfs_item_nr(leaf
, slot
+ i
);
3339 btrfs_set_item_offset(leaf
, item
, data_end
- data_size
[i
]);
3340 data_end
-= data_size
[i
];
3341 btrfs_set_item_size(leaf
, item
, data_size
[i
]);
3343 btrfs_set_header_nritems(leaf
, nritems
+ nr
);
3344 btrfs_mark_buffer_dirty(leaf
);
3348 btrfs_cpu_key_to_disk(&disk_key
, cpu_key
);
3349 ret
= fixup_low_keys(trans
, root
, path
, &disk_key
, 1);
3352 if (btrfs_leaf_free_space(root
, leaf
) < 0) {
3353 btrfs_print_leaf(root
, leaf
);
3361 * Given a key and some data, insert an item into the tree.
3362 * This does all the path init required, making room in the tree if needed.
3364 int btrfs_insert_item(struct btrfs_trans_handle
*trans
, struct btrfs_root
3365 *root
, struct btrfs_key
*cpu_key
, void *data
, u32
3369 struct btrfs_path
*path
;
3370 struct extent_buffer
*leaf
;
3373 path
= btrfs_alloc_path();
3375 ret
= btrfs_insert_empty_item(trans
, root
, path
, cpu_key
, data_size
);
3377 leaf
= path
->nodes
[0];
3378 ptr
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
3379 write_extent_buffer(leaf
, data
, ptr
, data_size
);
3380 btrfs_mark_buffer_dirty(leaf
);
3382 btrfs_free_path(path
);
3387 * delete the pointer from a given node.
3389 * the tree should have been previously balanced so the deletion does not
3392 static int del_ptr(struct btrfs_trans_handle
*trans
, struct btrfs_root
*root
,
3393 struct btrfs_path
*path
, int level
, int slot
)
3395 struct extent_buffer
*parent
= path
->nodes
[level
];
3400 nritems
= btrfs_header_nritems(parent
);
3401 if (slot
!= nritems
- 1) {
3402 memmove_extent_buffer(parent
,
3403 btrfs_node_key_ptr_offset(slot
),
3404 btrfs_node_key_ptr_offset(slot
+ 1),
3405 sizeof(struct btrfs_key_ptr
) *
3406 (nritems
- slot
- 1));
3409 btrfs_set_header_nritems(parent
, nritems
);
3410 if (nritems
== 0 && parent
== root
->node
) {
3411 BUG_ON(btrfs_header_level(root
->node
) != 1);
3412 /* just turn the root into a leaf and break */
3413 btrfs_set_header_level(root
->node
, 0);
3414 } else if (slot
== 0) {
3415 struct btrfs_disk_key disk_key
;
3417 btrfs_node_key(parent
, &disk_key
, 0);
3418 wret
= fixup_low_keys(trans
, root
, path
, &disk_key
, level
+ 1);
3422 btrfs_mark_buffer_dirty(parent
);
3427 * a helper function to delete the leaf pointed to by path->slots[1] and
3428 * path->nodes[1]. bytenr is the node block pointer, but since the callers
3429 * already know it, it is faster to have them pass it down than to
3430 * read it out of the node again.
3432 * This deletes the pointer in path->nodes[1] and frees the leaf
3433 * block extent. zero is returned if it all worked out, < 0 otherwise.
3435 * The path must have already been setup for deleting the leaf, including
3436 * all the proper balancing. path->nodes[1] must be locked.
3438 noinline
int btrfs_del_leaf(struct btrfs_trans_handle
*trans
,
3439 struct btrfs_root
*root
,
3440 struct btrfs_path
*path
, u64 bytenr
)
3443 u64 root_gen
= btrfs_header_generation(path
->nodes
[1]);
3445 ret
= del_ptr(trans
, root
, path
, 1, path
->slots
[1]);
3449 ret
= btrfs_free_extent(trans
, root
, bytenr
,
3450 btrfs_level_size(root
, 0),
3451 path
->nodes
[1]->start
,
3452 btrfs_header_owner(path
->nodes
[1]),
3457 * delete the item at the leaf level in path. If that empties
3458 * the leaf, remove it from the tree
3460 int btrfs_del_items(struct btrfs_trans_handle
*trans
, struct btrfs_root
*root
,
3461 struct btrfs_path
*path
, int slot
, int nr
)
3463 struct extent_buffer
*leaf
;
3464 struct btrfs_item
*item
;
3472 leaf
= path
->nodes
[0];
3473 last_off
= btrfs_item_offset_nr(leaf
, slot
+ nr
- 1);
3475 for (i
= 0; i
< nr
; i
++)
3476 dsize
+= btrfs_item_size_nr(leaf
, slot
+ i
);
3478 nritems
= btrfs_header_nritems(leaf
);
3480 if (slot
+ nr
!= nritems
) {
3481 int data_end
= leaf_data_end(root
, leaf
);
3483 memmove_extent_buffer(leaf
, btrfs_leaf_data(leaf
) +
3485 btrfs_leaf_data(leaf
) + data_end
,
3486 last_off
- data_end
);
3488 for (i
= slot
+ nr
; i
< nritems
; i
++) {
3491 item
= btrfs_item_nr(leaf
, i
);
3492 if (!leaf
->map_token
) {
3493 map_extent_buffer(leaf
, (unsigned long)item
,
3494 sizeof(struct btrfs_item
),
3495 &leaf
->map_token
, &leaf
->kaddr
,
3496 &leaf
->map_start
, &leaf
->map_len
,
3499 ioff
= btrfs_item_offset(leaf
, item
);
3500 btrfs_set_item_offset(leaf
, item
, ioff
+ dsize
);
3503 if (leaf
->map_token
) {
3504 unmap_extent_buffer(leaf
, leaf
->map_token
, KM_USER1
);
3505 leaf
->map_token
= NULL
;
3508 memmove_extent_buffer(leaf
, btrfs_item_nr_offset(slot
),
3509 btrfs_item_nr_offset(slot
+ nr
),
3510 sizeof(struct btrfs_item
) *
3511 (nritems
- slot
- nr
));
3513 btrfs_set_header_nritems(leaf
, nritems
- nr
);
3516 /* delete the leaf if we've emptied it */
3518 if (leaf
== root
->node
) {
3519 btrfs_set_header_level(leaf
, 0);
3521 ret
= btrfs_del_leaf(trans
, root
, path
, leaf
->start
);
3525 int used
= leaf_space_used(leaf
, 0, nritems
);
3527 struct btrfs_disk_key disk_key
;
3529 btrfs_item_key(leaf
, &disk_key
, 0);
3530 wret
= fixup_low_keys(trans
, root
, path
,
3536 /* delete the leaf if it is mostly empty */
3537 if (used
< BTRFS_LEAF_DATA_SIZE(root
) / 4) {
3538 /* push_leaf_left fixes the path.
3539 * make sure the path still points to our leaf
3540 * for possible call to del_ptr below
3542 slot
= path
->slots
[1];
3543 extent_buffer_get(leaf
);
3545 wret
= push_leaf_left(trans
, root
, path
, 1, 1);
3546 if (wret
< 0 && wret
!= -ENOSPC
)
3549 if (path
->nodes
[0] == leaf
&&
3550 btrfs_header_nritems(leaf
)) {
3551 wret
= push_leaf_right(trans
, root
, path
, 1, 1);
3552 if (wret
< 0 && wret
!= -ENOSPC
)
3556 if (btrfs_header_nritems(leaf
) == 0) {
3557 path
->slots
[1] = slot
;
3558 ret
= btrfs_del_leaf(trans
, root
, path
,
3561 free_extent_buffer(leaf
);
3563 /* if we're still in the path, make sure
3564 * we're dirty. Otherwise, one of the
3565 * push_leaf functions must have already
3566 * dirtied this buffer
3568 if (path
->nodes
[0] == leaf
)
3569 btrfs_mark_buffer_dirty(leaf
);
3570 free_extent_buffer(leaf
);
3573 btrfs_mark_buffer_dirty(leaf
);
3580 * search the tree again to find a leaf with lesser keys
3581 * returns 0 if it found something or 1 if there are no lesser leaves.
3582 * returns < 0 on io errors.
3584 * This may release the path, and so you may lose any locks held at the
3587 int btrfs_prev_leaf(struct btrfs_root
*root
, struct btrfs_path
*path
)
3589 struct btrfs_key key
;
3590 struct btrfs_disk_key found_key
;
3593 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, 0);
3597 else if (key
.type
> 0)
3599 else if (key
.objectid
> 0)
3604 btrfs_release_path(root
, path
);
3605 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
3608 btrfs_item_key(path
->nodes
[0], &found_key
, 0);
3609 ret
= comp_keys(&found_key
, &key
);
3616 * A helper function to walk down the tree starting at min_key, and looking
3617 * for nodes or leaves that are either in cache or have a minimum
3618 * transaction id. This is used by the btree defrag code, and tree logging
3620 * This does not cow, but it does stuff the starting key it finds back
3621 * into min_key, so you can call btrfs_search_slot with cow=1 on the
3622 * key and get a writable path.
3624 * This does lock as it descends, and path->keep_locks should be set
3625 * to 1 by the caller.
3627 * This honors path->lowest_level to prevent descent past a given level
3630 * min_trans indicates the oldest transaction that you are interested
3631 * in walking through. Any nodes or leaves older than min_trans are
3632 * skipped over (without reading them).
3634 * returns zero if something useful was found, < 0 on error and 1 if there
3635 * was nothing in the tree that matched the search criteria.
3637 int btrfs_search_forward(struct btrfs_root
*root
, struct btrfs_key
*min_key
,
3638 struct btrfs_key
*max_key
,
3639 struct btrfs_path
*path
, int cache_only
,
3642 struct extent_buffer
*cur
;
3643 struct btrfs_key found_key
;
3650 WARN_ON(!path
->keep_locks
);
3652 cur
= btrfs_lock_root_node(root
);
3653 level
= btrfs_header_level(cur
);
3654 WARN_ON(path
->nodes
[level
]);
3655 path
->nodes
[level
] = cur
;
3656 path
->locks
[level
] = 1;
3658 if (btrfs_header_generation(cur
) < min_trans
) {
3663 nritems
= btrfs_header_nritems(cur
);
3664 level
= btrfs_header_level(cur
);
3665 sret
= bin_search(cur
, min_key
, level
, &slot
);
3667 /* at the lowest level, we're done, setup the path and exit */
3668 if (level
== path
->lowest_level
) {
3669 if (slot
>= nritems
)
3672 path
->slots
[level
] = slot
;
3673 btrfs_item_key_to_cpu(cur
, &found_key
, slot
);
3676 if (sret
&& slot
> 0)
3679 * check this node pointer against the cache_only and
3680 * min_trans parameters. If it isn't in cache or is too
3681 * old, skip to the next one.
3683 while (slot
< nritems
) {
3686 struct extent_buffer
*tmp
;
3687 struct btrfs_disk_key disk_key
;
3689 blockptr
= btrfs_node_blockptr(cur
, slot
);
3690 gen
= btrfs_node_ptr_generation(cur
, slot
);
3691 if (gen
< min_trans
) {
3699 btrfs_node_key(cur
, &disk_key
, slot
);
3700 if (comp_keys(&disk_key
, max_key
) >= 0) {
3706 tmp
= btrfs_find_tree_block(root
, blockptr
,
3707 btrfs_level_size(root
, level
- 1));
3709 if (tmp
&& btrfs_buffer_uptodate(tmp
, gen
)) {
3710 free_extent_buffer(tmp
);
3714 free_extent_buffer(tmp
);
3719 * we didn't find a candidate key in this node, walk forward
3720 * and find another one
3722 if (slot
>= nritems
) {
3723 path
->slots
[level
] = slot
;
3724 sret
= btrfs_find_next_key(root
, path
, min_key
, level
,
3725 cache_only
, min_trans
);
3727 btrfs_release_path(root
, path
);
3733 /* save our key for returning back */
3734 btrfs_node_key_to_cpu(cur
, &found_key
, slot
);
3735 path
->slots
[level
] = slot
;
3736 if (level
== path
->lowest_level
) {
3738 unlock_up(path
, level
, 1);
3741 cur
= read_node_slot(root
, cur
, slot
);
3743 btrfs_tree_lock(cur
);
3744 path
->locks
[level
- 1] = 1;
3745 path
->nodes
[level
- 1] = cur
;
3746 unlock_up(path
, level
, 1);
3750 memcpy(min_key
, &found_key
, sizeof(found_key
));
3755 * this is similar to btrfs_next_leaf, but does not try to preserve
3756 * and fixup the path. It looks for and returns the next key in the
3757 * tree based on the current path and the cache_only and min_trans
3760 * 0 is returned if another key is found, < 0 if there are any errors
3761 * and 1 is returned if there are no higher keys in the tree
3763 * path->keep_locks should be set to 1 on the search made before
3764 * calling this function.
3766 int btrfs_find_next_key(struct btrfs_root
*root
, struct btrfs_path
*path
,
3767 struct btrfs_key
*key
, int lowest_level
,
3768 int cache_only
, u64 min_trans
)
3770 int level
= lowest_level
;
3772 struct extent_buffer
*c
;
3774 WARN_ON(!path
->keep_locks
);
3775 while (level
< BTRFS_MAX_LEVEL
) {
3776 if (!path
->nodes
[level
])
3779 slot
= path
->slots
[level
] + 1;
3780 c
= path
->nodes
[level
];
3782 if (slot
>= btrfs_header_nritems(c
)) {
3784 if (level
== BTRFS_MAX_LEVEL
)
3789 btrfs_item_key_to_cpu(c
, key
, slot
);
3791 u64 blockptr
= btrfs_node_blockptr(c
, slot
);
3792 u64 gen
= btrfs_node_ptr_generation(c
, slot
);
3795 struct extent_buffer
*cur
;
3796 cur
= btrfs_find_tree_block(root
, blockptr
,
3797 btrfs_level_size(root
, level
- 1));
3798 if (!cur
|| !btrfs_buffer_uptodate(cur
, gen
)) {
3801 free_extent_buffer(cur
);
3804 free_extent_buffer(cur
);
3806 if (gen
< min_trans
) {
3810 btrfs_node_key_to_cpu(c
, key
, slot
);
3818 * search the tree again to find a leaf with greater keys
3819 * returns 0 if it found something or 1 if there are no greater leaves.
3820 * returns < 0 on io errors.
3822 int btrfs_next_leaf(struct btrfs_root
*root
, struct btrfs_path
*path
)
3826 struct extent_buffer
*c
;
3827 struct extent_buffer
*next
= NULL
;
3828 struct btrfs_key key
;
3832 nritems
= btrfs_header_nritems(path
->nodes
[0]);
3836 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, nritems
- 1);
3838 btrfs_release_path(root
, path
);
3839 path
->keep_locks
= 1;
3840 ret
= btrfs_search_slot(NULL
, root
, &key
, path
, 0, 0);
3841 path
->keep_locks
= 0;
3846 nritems
= btrfs_header_nritems(path
->nodes
[0]);
3848 * by releasing the path above we dropped all our locks. A balance
3849 * could have added more items next to the key that used to be
3850 * at the very end of the block. So, check again here and
3851 * advance the path if there are now more items available.
3853 if (nritems
> 0 && path
->slots
[0] < nritems
- 1) {
3858 while (level
< BTRFS_MAX_LEVEL
) {
3859 if (!path
->nodes
[level
])
3862 slot
= path
->slots
[level
] + 1;
3863 c
= path
->nodes
[level
];
3864 if (slot
>= btrfs_header_nritems(c
)) {
3866 if (level
== BTRFS_MAX_LEVEL
)
3872 btrfs_tree_unlock(next
);
3873 free_extent_buffer(next
);
3876 if (level
== 1 && (path
->locks
[1] || path
->skip_locking
) &&
3878 reada_for_search(root
, path
, level
, slot
, 0);
3880 next
= read_node_slot(root
, c
, slot
);
3881 if (!path
->skip_locking
) {
3882 WARN_ON(!btrfs_tree_locked(c
));
3883 btrfs_tree_lock(next
);
3887 path
->slots
[level
] = slot
;
3890 c
= path
->nodes
[level
];
3891 if (path
->locks
[level
])
3892 btrfs_tree_unlock(c
);
3893 free_extent_buffer(c
);
3894 path
->nodes
[level
] = next
;
3895 path
->slots
[level
] = 0;
3896 if (!path
->skip_locking
)
3897 path
->locks
[level
] = 1;
3900 if (level
== 1 && path
->locks
[1] && path
->reada
)
3901 reada_for_search(root
, path
, level
, slot
, 0);
3902 next
= read_node_slot(root
, next
, 0);
3903 if (!path
->skip_locking
) {
3904 WARN_ON(!btrfs_tree_locked(path
->nodes
[level
]));
3905 btrfs_tree_lock(next
);
3909 unlock_up(path
, 0, 1);
3914 * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps
3915 * searching until it gets past min_objectid or finds an item of 'type'
3917 * returns 0 if something is found, 1 if nothing was found and < 0 on error
3919 int btrfs_previous_item(struct btrfs_root
*root
,
3920 struct btrfs_path
*path
, u64 min_objectid
,
3923 struct btrfs_key found_key
;
3924 struct extent_buffer
*leaf
;
3929 if (path
->slots
[0] == 0) {
3930 ret
= btrfs_prev_leaf(root
, path
);
3936 leaf
= path
->nodes
[0];
3937 nritems
= btrfs_header_nritems(leaf
);
3940 if (path
->slots
[0] == nritems
)
3943 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
3944 if (found_key
.type
== type
)
3946 if (found_key
.objectid
< min_objectid
)
3948 if (found_key
.objectid
== min_objectid
&&
3949 found_key
.type
< type
)