1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (C) 2015 Facebook. All rights reserved.
6 #include <linux/kernel.h>
7 #include <linux/sched/mm.h>
11 #include "free-space-tree.h"
12 #include "transaction.h"
13 #include "block-group.h"
15 static int __add_block_group_free_space(struct btrfs_trans_handle
*trans
,
16 struct btrfs_block_group
*block_group
,
17 struct btrfs_path
*path
);
19 void set_free_space_tree_thresholds(struct btrfs_block_group
*cache
)
23 u64 num_bitmaps
, total_bitmap_size
;
26 * We convert to bitmaps when the disk space required for using extents
27 * exceeds that required for using bitmaps.
29 bitmap_range
= cache
->fs_info
->sectorsize
* BTRFS_FREE_SPACE_BITMAP_BITS
;
30 num_bitmaps
= div_u64(cache
->length
+ bitmap_range
- 1, bitmap_range
);
31 bitmap_size
= sizeof(struct btrfs_item
) + BTRFS_FREE_SPACE_BITMAP_SIZE
;
32 total_bitmap_size
= num_bitmaps
* bitmap_size
;
33 cache
->bitmap_high_thresh
= div_u64(total_bitmap_size
,
34 sizeof(struct btrfs_item
));
37 * We allow for a small buffer between the high threshold and low
38 * threshold to avoid thrashing back and forth between the two formats.
40 if (cache
->bitmap_high_thresh
> 100)
41 cache
->bitmap_low_thresh
= cache
->bitmap_high_thresh
- 100;
43 cache
->bitmap_low_thresh
= 0;
46 static int add_new_free_space_info(struct btrfs_trans_handle
*trans
,
47 struct btrfs_block_group
*block_group
,
48 struct btrfs_path
*path
)
50 struct btrfs_root
*root
= trans
->fs_info
->free_space_root
;
51 struct btrfs_free_space_info
*info
;
53 struct extent_buffer
*leaf
;
56 key
.objectid
= block_group
->start
;
57 key
.type
= BTRFS_FREE_SPACE_INFO_KEY
;
58 key
.offset
= block_group
->length
;
60 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, sizeof(*info
));
64 leaf
= path
->nodes
[0];
65 info
= btrfs_item_ptr(leaf
, path
->slots
[0],
66 struct btrfs_free_space_info
);
67 btrfs_set_free_space_extent_count(leaf
, info
, 0);
68 btrfs_set_free_space_flags(leaf
, info
, 0);
69 btrfs_mark_buffer_dirty(leaf
);
73 btrfs_release_path(path
);
78 struct btrfs_free_space_info
*search_free_space_info(
79 struct btrfs_trans_handle
*trans
,
80 struct btrfs_block_group
*block_group
,
81 struct btrfs_path
*path
, int cow
)
83 struct btrfs_fs_info
*fs_info
= block_group
->fs_info
;
84 struct btrfs_root
*root
= fs_info
->free_space_root
;
88 key
.objectid
= block_group
->start
;
89 key
.type
= BTRFS_FREE_SPACE_INFO_KEY
;
90 key
.offset
= block_group
->length
;
92 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, cow
);
96 btrfs_warn(fs_info
, "missing free space info for %llu",
99 return ERR_PTR(-ENOENT
);
102 return btrfs_item_ptr(path
->nodes
[0], path
->slots
[0],
103 struct btrfs_free_space_info
);
107 * btrfs_search_slot() but we're looking for the greatest key less than the
110 static int btrfs_search_prev_slot(struct btrfs_trans_handle
*trans
,
111 struct btrfs_root
*root
,
112 struct btrfs_key
*key
, struct btrfs_path
*p
,
113 int ins_len
, int cow
)
117 ret
= btrfs_search_slot(trans
, root
, key
, p
, ins_len
, cow
);
126 if (p
->slots
[0] == 0) {
135 static inline u32
free_space_bitmap_size(u64 size
, u32 sectorsize
)
137 return DIV_ROUND_UP((u32
)div_u64(size
, sectorsize
), BITS_PER_BYTE
);
140 static unsigned long *alloc_bitmap(u32 bitmap_size
)
143 unsigned int nofs_flag
;
144 u32 bitmap_rounded_size
= round_up(bitmap_size
, sizeof(unsigned long));
147 * GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse
148 * into the filesystem as the free space bitmap can be modified in the
149 * critical section of a transaction commit.
151 * TODO: push the memalloc_nofs_{save,restore}() to the caller where we
152 * know that recursion is unsafe.
154 nofs_flag
= memalloc_nofs_save();
155 ret
= kvzalloc(bitmap_rounded_size
, GFP_KERNEL
);
156 memalloc_nofs_restore(nofs_flag
);
160 static void le_bitmap_set(unsigned long *map
, unsigned int start
, int len
)
162 u8
*p
= ((u8
*)map
) + BIT_BYTE(start
);
163 const unsigned int size
= start
+ len
;
164 int bits_to_set
= BITS_PER_BYTE
- (start
% BITS_PER_BYTE
);
165 u8 mask_to_set
= BITMAP_FIRST_BYTE_MASK(start
);
167 while (len
- bits_to_set
>= 0) {
170 bits_to_set
= BITS_PER_BYTE
;
175 mask_to_set
&= BITMAP_LAST_BYTE_MASK(size
);
181 int convert_free_space_to_bitmaps(struct btrfs_trans_handle
*trans
,
182 struct btrfs_block_group
*block_group
,
183 struct btrfs_path
*path
)
185 struct btrfs_fs_info
*fs_info
= trans
->fs_info
;
186 struct btrfs_root
*root
= fs_info
->free_space_root
;
187 struct btrfs_free_space_info
*info
;
188 struct btrfs_key key
, found_key
;
189 struct extent_buffer
*leaf
;
190 unsigned long *bitmap
;
194 u32 bitmap_size
, flags
, expected_extent_count
;
195 u32 extent_count
= 0;
199 bitmap_size
= free_space_bitmap_size(block_group
->length
,
200 fs_info
->sectorsize
);
201 bitmap
= alloc_bitmap(bitmap_size
);
207 start
= block_group
->start
;
208 end
= block_group
->start
+ block_group
->length
;
210 key
.objectid
= end
- 1;
212 key
.offset
= (u64
)-1;
215 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
219 leaf
= path
->nodes
[0];
222 while (path
->slots
[0] > 0) {
223 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0] - 1);
225 if (found_key
.type
== BTRFS_FREE_SPACE_INFO_KEY
) {
226 ASSERT(found_key
.objectid
== block_group
->start
);
227 ASSERT(found_key
.offset
== block_group
->length
);
230 } else if (found_key
.type
== BTRFS_FREE_SPACE_EXTENT_KEY
) {
233 ASSERT(found_key
.objectid
>= start
);
234 ASSERT(found_key
.objectid
< end
);
235 ASSERT(found_key
.objectid
+ found_key
.offset
<= end
);
237 first
= div_u64(found_key
.objectid
- start
,
238 fs_info
->sectorsize
);
239 last
= div_u64(found_key
.objectid
+ found_key
.offset
- start
,
240 fs_info
->sectorsize
);
241 le_bitmap_set(bitmap
, first
, last
- first
);
251 ret
= btrfs_del_items(trans
, root
, path
, path
->slots
[0], nr
);
254 btrfs_release_path(path
);
257 info
= search_free_space_info(trans
, block_group
, path
, 1);
262 leaf
= path
->nodes
[0];
263 flags
= btrfs_free_space_flags(leaf
, info
);
264 flags
|= BTRFS_FREE_SPACE_USING_BITMAPS
;
265 btrfs_set_free_space_flags(leaf
, info
, flags
);
266 expected_extent_count
= btrfs_free_space_extent_count(leaf
, info
);
267 btrfs_mark_buffer_dirty(leaf
);
268 btrfs_release_path(path
);
270 if (extent_count
!= expected_extent_count
) {
272 "incorrect extent count for %llu; counted %u, expected %u",
273 block_group
->start
, extent_count
,
274 expected_extent_count
);
280 bitmap_cursor
= (char *)bitmap
;
281 bitmap_range
= fs_info
->sectorsize
* BTRFS_FREE_SPACE_BITMAP_BITS
;
288 extent_size
= min(end
- i
, bitmap_range
);
289 data_size
= free_space_bitmap_size(extent_size
,
290 fs_info
->sectorsize
);
293 key
.type
= BTRFS_FREE_SPACE_BITMAP_KEY
;
294 key
.offset
= extent_size
;
296 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
,
301 leaf
= path
->nodes
[0];
302 ptr
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
303 write_extent_buffer(leaf
, bitmap_cursor
, ptr
,
305 btrfs_mark_buffer_dirty(leaf
);
306 btrfs_release_path(path
);
309 bitmap_cursor
+= data_size
;
316 btrfs_abort_transaction(trans
, ret
);
321 int convert_free_space_to_extents(struct btrfs_trans_handle
*trans
,
322 struct btrfs_block_group
*block_group
,
323 struct btrfs_path
*path
)
325 struct btrfs_fs_info
*fs_info
= trans
->fs_info
;
326 struct btrfs_root
*root
= fs_info
->free_space_root
;
327 struct btrfs_free_space_info
*info
;
328 struct btrfs_key key
, found_key
;
329 struct extent_buffer
*leaf
;
330 unsigned long *bitmap
;
332 u32 bitmap_size
, flags
, expected_extent_count
;
333 unsigned long nrbits
, start_bit
, end_bit
;
334 u32 extent_count
= 0;
338 bitmap_size
= free_space_bitmap_size(block_group
->length
,
339 fs_info
->sectorsize
);
340 bitmap
= alloc_bitmap(bitmap_size
);
346 start
= block_group
->start
;
347 end
= block_group
->start
+ block_group
->length
;
349 key
.objectid
= end
- 1;
351 key
.offset
= (u64
)-1;
354 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
358 leaf
= path
->nodes
[0];
361 while (path
->slots
[0] > 0) {
362 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0] - 1);
364 if (found_key
.type
== BTRFS_FREE_SPACE_INFO_KEY
) {
365 ASSERT(found_key
.objectid
== block_group
->start
);
366 ASSERT(found_key
.offset
== block_group
->length
);
369 } else if (found_key
.type
== BTRFS_FREE_SPACE_BITMAP_KEY
) {
372 u32 bitmap_pos
, data_size
;
374 ASSERT(found_key
.objectid
>= start
);
375 ASSERT(found_key
.objectid
< end
);
376 ASSERT(found_key
.objectid
+ found_key
.offset
<= end
);
378 bitmap_pos
= div_u64(found_key
.objectid
- start
,
379 fs_info
->sectorsize
*
381 bitmap_cursor
= ((char *)bitmap
) + bitmap_pos
;
382 data_size
= free_space_bitmap_size(found_key
.offset
,
383 fs_info
->sectorsize
);
385 ptr
= btrfs_item_ptr_offset(leaf
, path
->slots
[0] - 1);
386 read_extent_buffer(leaf
, bitmap_cursor
, ptr
,
396 ret
= btrfs_del_items(trans
, root
, path
, path
->slots
[0], nr
);
399 btrfs_release_path(path
);
402 info
= search_free_space_info(trans
, block_group
, path
, 1);
407 leaf
= path
->nodes
[0];
408 flags
= btrfs_free_space_flags(leaf
, info
);
409 flags
&= ~BTRFS_FREE_SPACE_USING_BITMAPS
;
410 btrfs_set_free_space_flags(leaf
, info
, flags
);
411 expected_extent_count
= btrfs_free_space_extent_count(leaf
, info
);
412 btrfs_mark_buffer_dirty(leaf
);
413 btrfs_release_path(path
);
415 nrbits
= div_u64(block_group
->length
, block_group
->fs_info
->sectorsize
);
416 start_bit
= find_next_bit_le(bitmap
, nrbits
, 0);
418 while (start_bit
< nrbits
) {
419 end_bit
= find_next_zero_bit_le(bitmap
, nrbits
, start_bit
);
420 ASSERT(start_bit
< end_bit
);
422 key
.objectid
= start
+ start_bit
* block_group
->fs_info
->sectorsize
;
423 key
.type
= BTRFS_FREE_SPACE_EXTENT_KEY
;
424 key
.offset
= (end_bit
- start_bit
) * block_group
->fs_info
->sectorsize
;
426 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, 0);
429 btrfs_release_path(path
);
433 start_bit
= find_next_bit_le(bitmap
, nrbits
, end_bit
);
436 if (extent_count
!= expected_extent_count
) {
438 "incorrect extent count for %llu; counted %u, expected %u",
439 block_group
->start
, extent_count
,
440 expected_extent_count
);
450 btrfs_abort_transaction(trans
, ret
);
454 static int update_free_space_extent_count(struct btrfs_trans_handle
*trans
,
455 struct btrfs_block_group
*block_group
,
456 struct btrfs_path
*path
,
459 struct btrfs_free_space_info
*info
;
464 if (new_extents
== 0)
467 info
= search_free_space_info(trans
, block_group
, path
, 1);
472 flags
= btrfs_free_space_flags(path
->nodes
[0], info
);
473 extent_count
= btrfs_free_space_extent_count(path
->nodes
[0], info
);
475 extent_count
+= new_extents
;
476 btrfs_set_free_space_extent_count(path
->nodes
[0], info
, extent_count
);
477 btrfs_mark_buffer_dirty(path
->nodes
[0]);
478 btrfs_release_path(path
);
480 if (!(flags
& BTRFS_FREE_SPACE_USING_BITMAPS
) &&
481 extent_count
> block_group
->bitmap_high_thresh
) {
482 ret
= convert_free_space_to_bitmaps(trans
, block_group
, path
);
483 } else if ((flags
& BTRFS_FREE_SPACE_USING_BITMAPS
) &&
484 extent_count
< block_group
->bitmap_low_thresh
) {
485 ret
= convert_free_space_to_extents(trans
, block_group
, path
);
493 int free_space_test_bit(struct btrfs_block_group
*block_group
,
494 struct btrfs_path
*path
, u64 offset
)
496 struct extent_buffer
*leaf
;
497 struct btrfs_key key
;
498 u64 found_start
, found_end
;
499 unsigned long ptr
, i
;
501 leaf
= path
->nodes
[0];
502 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
503 ASSERT(key
.type
== BTRFS_FREE_SPACE_BITMAP_KEY
);
505 found_start
= key
.objectid
;
506 found_end
= key
.objectid
+ key
.offset
;
507 ASSERT(offset
>= found_start
&& offset
< found_end
);
509 ptr
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
510 i
= div_u64(offset
- found_start
,
511 block_group
->fs_info
->sectorsize
);
512 return !!extent_buffer_test_bit(leaf
, ptr
, i
);
515 static void free_space_set_bits(struct btrfs_block_group
*block_group
,
516 struct btrfs_path
*path
, u64
*start
, u64
*size
,
519 struct btrfs_fs_info
*fs_info
= block_group
->fs_info
;
520 struct extent_buffer
*leaf
;
521 struct btrfs_key key
;
522 u64 end
= *start
+ *size
;
523 u64 found_start
, found_end
;
524 unsigned long ptr
, first
, last
;
526 leaf
= path
->nodes
[0];
527 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
528 ASSERT(key
.type
== BTRFS_FREE_SPACE_BITMAP_KEY
);
530 found_start
= key
.objectid
;
531 found_end
= key
.objectid
+ key
.offset
;
532 ASSERT(*start
>= found_start
&& *start
< found_end
);
533 ASSERT(end
> found_start
);
538 ptr
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
539 first
= div_u64(*start
- found_start
, fs_info
->sectorsize
);
540 last
= div_u64(end
- found_start
, fs_info
->sectorsize
);
542 extent_buffer_bitmap_set(leaf
, ptr
, first
, last
- first
);
544 extent_buffer_bitmap_clear(leaf
, ptr
, first
, last
- first
);
545 btrfs_mark_buffer_dirty(leaf
);
547 *size
-= end
- *start
;
552 * We can't use btrfs_next_item() in modify_free_space_bitmap() because
553 * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy
554 * tree walking in btrfs_next_leaf() anyways because we know exactly what we're
557 static int free_space_next_bitmap(struct btrfs_trans_handle
*trans
,
558 struct btrfs_root
*root
, struct btrfs_path
*p
)
560 struct btrfs_key key
;
562 if (p
->slots
[0] + 1 < btrfs_header_nritems(p
->nodes
[0])) {
567 btrfs_item_key_to_cpu(p
->nodes
[0], &key
, p
->slots
[0]);
568 btrfs_release_path(p
);
570 key
.objectid
+= key
.offset
;
572 key
.offset
= (u64
)-1;
574 return btrfs_search_prev_slot(trans
, root
, &key
, p
, 0, 1);
578 * If remove is 1, then we are removing free space, thus clearing bits in the
579 * bitmap. If remove is 0, then we are adding free space, thus setting bits in
582 static int modify_free_space_bitmap(struct btrfs_trans_handle
*trans
,
583 struct btrfs_block_group
*block_group
,
584 struct btrfs_path
*path
,
585 u64 start
, u64 size
, int remove
)
587 struct btrfs_root
*root
= block_group
->fs_info
->free_space_root
;
588 struct btrfs_key key
;
589 u64 end
= start
+ size
;
590 u64 cur_start
, cur_size
;
591 int prev_bit
, next_bit
;
596 * Read the bit for the block immediately before the extent of space if
597 * that block is within the block group.
599 if (start
> block_group
->start
) {
600 u64 prev_block
= start
- block_group
->fs_info
->sectorsize
;
602 key
.objectid
= prev_block
;
604 key
.offset
= (u64
)-1;
606 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, 0, 1);
610 prev_bit
= free_space_test_bit(block_group
, path
, prev_block
);
612 /* The previous block may have been in the previous bitmap. */
613 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
614 if (start
>= key
.objectid
+ key
.offset
) {
615 ret
= free_space_next_bitmap(trans
, root
, path
);
620 key
.objectid
= start
;
622 key
.offset
= (u64
)-1;
624 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, 0, 1);
632 * Iterate over all of the bitmaps overlapped by the extent of space,
633 * clearing/setting bits as required.
638 free_space_set_bits(block_group
, path
, &cur_start
, &cur_size
,
642 ret
= free_space_next_bitmap(trans
, root
, path
);
648 * Read the bit for the block immediately after the extent of space if
649 * that block is within the block group.
651 if (end
< block_group
->start
+ block_group
->length
) {
652 /* The next block may be in the next bitmap. */
653 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
654 if (end
>= key
.objectid
+ key
.offset
) {
655 ret
= free_space_next_bitmap(trans
, root
, path
);
660 next_bit
= free_space_test_bit(block_group
, path
, end
);
668 /* Leftover on the left. */
672 /* Leftover on the right. */
678 /* Merging with neighbor on the left. */
682 /* Merging with neighbor on the right. */
687 btrfs_release_path(path
);
688 ret
= update_free_space_extent_count(trans
, block_group
, path
,
695 static int remove_free_space_extent(struct btrfs_trans_handle
*trans
,
696 struct btrfs_block_group
*block_group
,
697 struct btrfs_path
*path
,
700 struct btrfs_root
*root
= trans
->fs_info
->free_space_root
;
701 struct btrfs_key key
;
702 u64 found_start
, found_end
;
703 u64 end
= start
+ size
;
704 int new_extents
= -1;
707 key
.objectid
= start
;
709 key
.offset
= (u64
)-1;
711 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
715 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
717 ASSERT(key
.type
== BTRFS_FREE_SPACE_EXTENT_KEY
);
719 found_start
= key
.objectid
;
720 found_end
= key
.objectid
+ key
.offset
;
721 ASSERT(start
>= found_start
&& end
<= found_end
);
724 * Okay, now that we've found the free space extent which contains the
725 * free space that we are removing, there are four cases:
727 * 1. We're using the whole extent: delete the key we found and
728 * decrement the free space extent count.
729 * 2. We are using part of the extent starting at the beginning: delete
730 * the key we found and insert a new key representing the leftover at
731 * the end. There is no net change in the number of extents.
732 * 3. We are using part of the extent ending at the end: delete the key
733 * we found and insert a new key representing the leftover at the
734 * beginning. There is no net change in the number of extents.
735 * 4. We are using part of the extent in the middle: delete the key we
736 * found and insert two new keys representing the leftovers on each
737 * side. Where we used to have one extent, we now have two, so increment
738 * the extent count. We may need to convert the block group to bitmaps
742 /* Delete the existing key (cases 1-4). */
743 ret
= btrfs_del_item(trans
, root
, path
);
747 /* Add a key for leftovers at the beginning (cases 3 and 4). */
748 if (start
> found_start
) {
749 key
.objectid
= found_start
;
750 key
.type
= BTRFS_FREE_SPACE_EXTENT_KEY
;
751 key
.offset
= start
- found_start
;
753 btrfs_release_path(path
);
754 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, 0);
760 /* Add a key for leftovers at the end (cases 2 and 4). */
761 if (end
< found_end
) {
763 key
.type
= BTRFS_FREE_SPACE_EXTENT_KEY
;
764 key
.offset
= found_end
- end
;
766 btrfs_release_path(path
);
767 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, 0);
773 btrfs_release_path(path
);
774 ret
= update_free_space_extent_count(trans
, block_group
, path
,
782 int __remove_from_free_space_tree(struct btrfs_trans_handle
*trans
,
783 struct btrfs_block_group
*block_group
,
784 struct btrfs_path
*path
, u64 start
, u64 size
)
786 struct btrfs_free_space_info
*info
;
790 if (block_group
->needs_free_space
) {
791 ret
= __add_block_group_free_space(trans
, block_group
, path
);
796 info
= search_free_space_info(NULL
, block_group
, path
, 0);
798 return PTR_ERR(info
);
799 flags
= btrfs_free_space_flags(path
->nodes
[0], info
);
800 btrfs_release_path(path
);
802 if (flags
& BTRFS_FREE_SPACE_USING_BITMAPS
) {
803 return modify_free_space_bitmap(trans
, block_group
, path
,
806 return remove_free_space_extent(trans
, block_group
, path
,
811 int remove_from_free_space_tree(struct btrfs_trans_handle
*trans
,
814 struct btrfs_block_group
*block_group
;
815 struct btrfs_path
*path
;
818 if (!btrfs_fs_compat_ro(trans
->fs_info
, FREE_SPACE_TREE
))
821 path
= btrfs_alloc_path();
827 block_group
= btrfs_lookup_block_group(trans
->fs_info
, start
);
834 mutex_lock(&block_group
->free_space_lock
);
835 ret
= __remove_from_free_space_tree(trans
, block_group
, path
, start
,
837 mutex_unlock(&block_group
->free_space_lock
);
839 btrfs_put_block_group(block_group
);
841 btrfs_free_path(path
);
843 btrfs_abort_transaction(trans
, ret
);
847 static int add_free_space_extent(struct btrfs_trans_handle
*trans
,
848 struct btrfs_block_group
*block_group
,
849 struct btrfs_path
*path
,
852 struct btrfs_root
*root
= trans
->fs_info
->free_space_root
;
853 struct btrfs_key key
, new_key
;
854 u64 found_start
, found_end
;
855 u64 end
= start
+ size
;
860 * We are adding a new extent of free space, but we need to merge
861 * extents. There are four cases here:
863 * 1. The new extent does not have any immediate neighbors to merge
864 * with: add the new key and increment the free space extent count. We
865 * may need to convert the block group to bitmaps as a result.
866 * 2. The new extent has an immediate neighbor before it: remove the
867 * previous key and insert a new key combining both of them. There is no
868 * net change in the number of extents.
869 * 3. The new extent has an immediate neighbor after it: remove the next
870 * key and insert a new key combining both of them. There is no net
871 * change in the number of extents.
872 * 4. The new extent has immediate neighbors on both sides: remove both
873 * of the keys and insert a new key combining all of them. Where we used
874 * to have two extents, we now have one, so decrement the extent count.
877 new_key
.objectid
= start
;
878 new_key
.type
= BTRFS_FREE_SPACE_EXTENT_KEY
;
879 new_key
.offset
= size
;
881 /* Search for a neighbor on the left. */
882 if (start
== block_group
->start
)
884 key
.objectid
= start
- 1;
886 key
.offset
= (u64
)-1;
888 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
892 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
894 if (key
.type
!= BTRFS_FREE_SPACE_EXTENT_KEY
) {
895 ASSERT(key
.type
== BTRFS_FREE_SPACE_INFO_KEY
);
896 btrfs_release_path(path
);
900 found_start
= key
.objectid
;
901 found_end
= key
.objectid
+ key
.offset
;
902 ASSERT(found_start
>= block_group
->start
&&
903 found_end
> block_group
->start
);
904 ASSERT(found_start
< start
&& found_end
<= start
);
907 * Delete the neighbor on the left and absorb it into the new key (cases
910 if (found_end
== start
) {
911 ret
= btrfs_del_item(trans
, root
, path
);
914 new_key
.objectid
= found_start
;
915 new_key
.offset
+= key
.offset
;
918 btrfs_release_path(path
);
921 /* Search for a neighbor on the right. */
922 if (end
== block_group
->start
+ block_group
->length
)
926 key
.offset
= (u64
)-1;
928 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
932 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
934 if (key
.type
!= BTRFS_FREE_SPACE_EXTENT_KEY
) {
935 ASSERT(key
.type
== BTRFS_FREE_SPACE_INFO_KEY
);
936 btrfs_release_path(path
);
940 found_start
= key
.objectid
;
941 found_end
= key
.objectid
+ key
.offset
;
942 ASSERT(found_start
>= block_group
->start
&&
943 found_end
> block_group
->start
);
944 ASSERT((found_start
< start
&& found_end
<= start
) ||
945 (found_start
>= end
&& found_end
> end
));
948 * Delete the neighbor on the right and absorb it into the new key
951 if (found_start
== end
) {
952 ret
= btrfs_del_item(trans
, root
, path
);
955 new_key
.offset
+= key
.offset
;
958 btrfs_release_path(path
);
961 /* Insert the new key (cases 1-4). */
962 ret
= btrfs_insert_empty_item(trans
, root
, path
, &new_key
, 0);
966 btrfs_release_path(path
);
967 ret
= update_free_space_extent_count(trans
, block_group
, path
,
975 int __add_to_free_space_tree(struct btrfs_trans_handle
*trans
,
976 struct btrfs_block_group
*block_group
,
977 struct btrfs_path
*path
, u64 start
, u64 size
)
979 struct btrfs_free_space_info
*info
;
983 if (block_group
->needs_free_space
) {
984 ret
= __add_block_group_free_space(trans
, block_group
, path
);
989 info
= search_free_space_info(NULL
, block_group
, path
, 0);
991 return PTR_ERR(info
);
992 flags
= btrfs_free_space_flags(path
->nodes
[0], info
);
993 btrfs_release_path(path
);
995 if (flags
& BTRFS_FREE_SPACE_USING_BITMAPS
) {
996 return modify_free_space_bitmap(trans
, block_group
, path
,
999 return add_free_space_extent(trans
, block_group
, path
, start
,
1004 int add_to_free_space_tree(struct btrfs_trans_handle
*trans
,
1005 u64 start
, u64 size
)
1007 struct btrfs_block_group
*block_group
;
1008 struct btrfs_path
*path
;
1011 if (!btrfs_fs_compat_ro(trans
->fs_info
, FREE_SPACE_TREE
))
1014 path
= btrfs_alloc_path();
1020 block_group
= btrfs_lookup_block_group(trans
->fs_info
, start
);
1027 mutex_lock(&block_group
->free_space_lock
);
1028 ret
= __add_to_free_space_tree(trans
, block_group
, path
, start
, size
);
1029 mutex_unlock(&block_group
->free_space_lock
);
1031 btrfs_put_block_group(block_group
);
1033 btrfs_free_path(path
);
1035 btrfs_abort_transaction(trans
, ret
);
1040 * Populate the free space tree by walking the extent tree. Operations on the
1041 * extent tree that happen as a result of writes to the free space tree will go
1042 * through the normal add/remove hooks.
1044 static int populate_free_space_tree(struct btrfs_trans_handle
*trans
,
1045 struct btrfs_block_group
*block_group
)
1047 struct btrfs_root
*extent_root
= trans
->fs_info
->extent_root
;
1048 struct btrfs_path
*path
, *path2
;
1049 struct btrfs_key key
;
1053 path
= btrfs_alloc_path();
1056 path
->reada
= READA_FORWARD
;
1058 path2
= btrfs_alloc_path();
1060 btrfs_free_path(path
);
1064 ret
= add_new_free_space_info(trans
, block_group
, path2
);
1068 mutex_lock(&block_group
->free_space_lock
);
1071 * Iterate through all of the extent and metadata items in this block
1072 * group, adding the free space between them and the free space at the
1073 * end. Note that EXTENT_ITEM and METADATA_ITEM are less than
1074 * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's
1077 key
.objectid
= block_group
->start
;
1078 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
1081 ret
= btrfs_search_slot_for_read(extent_root
, &key
, path
, 1, 0);
1086 start
= block_group
->start
;
1087 end
= block_group
->start
+ block_group
->length
;
1089 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
1091 if (key
.type
== BTRFS_EXTENT_ITEM_KEY
||
1092 key
.type
== BTRFS_METADATA_ITEM_KEY
) {
1093 if (key
.objectid
>= end
)
1096 if (start
< key
.objectid
) {
1097 ret
= __add_to_free_space_tree(trans
,
1105 start
= key
.objectid
;
1106 if (key
.type
== BTRFS_METADATA_ITEM_KEY
)
1107 start
+= trans
->fs_info
->nodesize
;
1109 start
+= key
.offset
;
1110 } else if (key
.type
== BTRFS_BLOCK_GROUP_ITEM_KEY
) {
1111 if (key
.objectid
!= block_group
->start
)
1115 ret
= btrfs_next_item(extent_root
, path
);
1122 ret
= __add_to_free_space_tree(trans
, block_group
, path2
,
1123 start
, end
- start
);
1130 mutex_unlock(&block_group
->free_space_lock
);
1132 btrfs_free_path(path2
);
1133 btrfs_free_path(path
);
1137 int btrfs_create_free_space_tree(struct btrfs_fs_info
*fs_info
)
1139 struct btrfs_trans_handle
*trans
;
1140 struct btrfs_root
*tree_root
= fs_info
->tree_root
;
1141 struct btrfs_root
*free_space_root
;
1142 struct btrfs_block_group
*block_group
;
1143 struct rb_node
*node
;
1146 trans
= btrfs_start_transaction(tree_root
, 0);
1148 return PTR_ERR(trans
);
1150 set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE
, &fs_info
->flags
);
1151 free_space_root
= btrfs_create_tree(trans
,
1152 BTRFS_FREE_SPACE_TREE_OBJECTID
);
1153 if (IS_ERR(free_space_root
)) {
1154 ret
= PTR_ERR(free_space_root
);
1157 fs_info
->free_space_root
= free_space_root
;
1159 node
= rb_first(&fs_info
->block_group_cache_tree
);
1161 block_group
= rb_entry(node
, struct btrfs_block_group
,
1163 ret
= populate_free_space_tree(trans
, block_group
);
1166 node
= rb_next(node
);
1169 btrfs_set_fs_compat_ro(fs_info
, FREE_SPACE_TREE
);
1170 btrfs_set_fs_compat_ro(fs_info
, FREE_SPACE_TREE_VALID
);
1171 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE
, &fs_info
->flags
);
1173 return btrfs_commit_transaction(trans
);
1176 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE
, &fs_info
->flags
);
1177 btrfs_abort_transaction(trans
, ret
);
1178 btrfs_end_transaction(trans
);
1182 static int clear_free_space_tree(struct btrfs_trans_handle
*trans
,
1183 struct btrfs_root
*root
)
1185 struct btrfs_path
*path
;
1186 struct btrfs_key key
;
1190 path
= btrfs_alloc_path();
1194 path
->leave_spinning
= 1;
1201 ret
= btrfs_search_slot(trans
, root
, &key
, path
, -1, 1);
1205 nr
= btrfs_header_nritems(path
->nodes
[0]);
1210 ret
= btrfs_del_items(trans
, root
, path
, 0, nr
);
1214 btrfs_release_path(path
);
1219 btrfs_free_path(path
);
1223 int btrfs_clear_free_space_tree(struct btrfs_fs_info
*fs_info
)
1225 struct btrfs_trans_handle
*trans
;
1226 struct btrfs_root
*tree_root
= fs_info
->tree_root
;
1227 struct btrfs_root
*free_space_root
= fs_info
->free_space_root
;
1230 trans
= btrfs_start_transaction(tree_root
, 0);
1232 return PTR_ERR(trans
);
1234 btrfs_clear_fs_compat_ro(fs_info
, FREE_SPACE_TREE
);
1235 btrfs_clear_fs_compat_ro(fs_info
, FREE_SPACE_TREE_VALID
);
1236 fs_info
->free_space_root
= NULL
;
1238 ret
= clear_free_space_tree(trans
, free_space_root
);
1242 ret
= btrfs_del_root(trans
, &free_space_root
->root_key
);
1246 list_del(&free_space_root
->dirty_list
);
1248 btrfs_tree_lock(free_space_root
->node
);
1249 btrfs_clean_tree_block(free_space_root
->node
);
1250 btrfs_tree_unlock(free_space_root
->node
);
1251 btrfs_free_tree_block(trans
, free_space_root
, free_space_root
->node
,
1254 btrfs_put_root(free_space_root
);
1256 return btrfs_commit_transaction(trans
);
1259 btrfs_abort_transaction(trans
, ret
);
1260 btrfs_end_transaction(trans
);
1264 static int __add_block_group_free_space(struct btrfs_trans_handle
*trans
,
1265 struct btrfs_block_group
*block_group
,
1266 struct btrfs_path
*path
)
1270 block_group
->needs_free_space
= 0;
1272 ret
= add_new_free_space_info(trans
, block_group
, path
);
1276 return __add_to_free_space_tree(trans
, block_group
, path
,
1278 block_group
->length
);
1281 int add_block_group_free_space(struct btrfs_trans_handle
*trans
,
1282 struct btrfs_block_group
*block_group
)
1284 struct btrfs_fs_info
*fs_info
= trans
->fs_info
;
1285 struct btrfs_path
*path
= NULL
;
1288 if (!btrfs_fs_compat_ro(fs_info
, FREE_SPACE_TREE
))
1291 mutex_lock(&block_group
->free_space_lock
);
1292 if (!block_group
->needs_free_space
)
1295 path
= btrfs_alloc_path();
1301 ret
= __add_block_group_free_space(trans
, block_group
, path
);
1304 btrfs_free_path(path
);
1305 mutex_unlock(&block_group
->free_space_lock
);
1307 btrfs_abort_transaction(trans
, ret
);
1311 int remove_block_group_free_space(struct btrfs_trans_handle
*trans
,
1312 struct btrfs_block_group
*block_group
)
1314 struct btrfs_root
*root
= trans
->fs_info
->free_space_root
;
1315 struct btrfs_path
*path
;
1316 struct btrfs_key key
, found_key
;
1317 struct extent_buffer
*leaf
;
1322 if (!btrfs_fs_compat_ro(trans
->fs_info
, FREE_SPACE_TREE
))
1325 if (block_group
->needs_free_space
) {
1326 /* We never added this block group to the free space tree. */
1330 path
= btrfs_alloc_path();
1336 start
= block_group
->start
;
1337 end
= block_group
->start
+ block_group
->length
;
1339 key
.objectid
= end
- 1;
1341 key
.offset
= (u64
)-1;
1344 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
1348 leaf
= path
->nodes
[0];
1351 while (path
->slots
[0] > 0) {
1352 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0] - 1);
1354 if (found_key
.type
== BTRFS_FREE_SPACE_INFO_KEY
) {
1355 ASSERT(found_key
.objectid
== block_group
->start
);
1356 ASSERT(found_key
.offset
== block_group
->length
);
1361 } else if (found_key
.type
== BTRFS_FREE_SPACE_EXTENT_KEY
||
1362 found_key
.type
== BTRFS_FREE_SPACE_BITMAP_KEY
) {
1363 ASSERT(found_key
.objectid
>= start
);
1364 ASSERT(found_key
.objectid
< end
);
1365 ASSERT(found_key
.objectid
+ found_key
.offset
<= end
);
1373 ret
= btrfs_del_items(trans
, root
, path
, path
->slots
[0], nr
);
1376 btrfs_release_path(path
);
1381 btrfs_free_path(path
);
1383 btrfs_abort_transaction(trans
, ret
);
1387 static int load_free_space_bitmaps(struct btrfs_caching_control
*caching_ctl
,
1388 struct btrfs_path
*path
,
1389 u32 expected_extent_count
)
1391 struct btrfs_block_group
*block_group
;
1392 struct btrfs_fs_info
*fs_info
;
1393 struct btrfs_root
*root
;
1394 struct btrfs_key key
;
1395 int prev_bit
= 0, bit
;
1396 /* Initialize to silence GCC. */
1397 u64 extent_start
= 0;
1399 u64 total_found
= 0;
1400 u32 extent_count
= 0;
1403 block_group
= caching_ctl
->block_group
;
1404 fs_info
= block_group
->fs_info
;
1405 root
= fs_info
->free_space_root
;
1407 end
= block_group
->start
+ block_group
->length
;
1410 ret
= btrfs_next_item(root
, path
);
1416 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
1418 if (key
.type
== BTRFS_FREE_SPACE_INFO_KEY
)
1421 ASSERT(key
.type
== BTRFS_FREE_SPACE_BITMAP_KEY
);
1422 ASSERT(key
.objectid
< end
&& key
.objectid
+ key
.offset
<= end
);
1424 caching_ctl
->progress
= key
.objectid
;
1426 offset
= key
.objectid
;
1427 while (offset
< key
.objectid
+ key
.offset
) {
1428 bit
= free_space_test_bit(block_group
, path
, offset
);
1429 if (prev_bit
== 0 && bit
== 1) {
1430 extent_start
= offset
;
1431 } else if (prev_bit
== 1 && bit
== 0) {
1432 total_found
+= add_new_free_space(block_group
,
1435 if (total_found
> CACHING_CTL_WAKE_UP
) {
1437 wake_up(&caching_ctl
->wait
);
1442 offset
+= fs_info
->sectorsize
;
1445 if (prev_bit
== 1) {
1446 total_found
+= add_new_free_space(block_group
, extent_start
,
1451 if (extent_count
!= expected_extent_count
) {
1453 "incorrect extent count for %llu; counted %u, expected %u",
1454 block_group
->start
, extent_count
,
1455 expected_extent_count
);
1461 caching_ctl
->progress
= (u64
)-1;
1468 static int load_free_space_extents(struct btrfs_caching_control
*caching_ctl
,
1469 struct btrfs_path
*path
,
1470 u32 expected_extent_count
)
1472 struct btrfs_block_group
*block_group
;
1473 struct btrfs_fs_info
*fs_info
;
1474 struct btrfs_root
*root
;
1475 struct btrfs_key key
;
1477 u64 total_found
= 0;
1478 u32 extent_count
= 0;
1481 block_group
= caching_ctl
->block_group
;
1482 fs_info
= block_group
->fs_info
;
1483 root
= fs_info
->free_space_root
;
1485 end
= block_group
->start
+ block_group
->length
;
1488 ret
= btrfs_next_item(root
, path
);
1494 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
1496 if (key
.type
== BTRFS_FREE_SPACE_INFO_KEY
)
1499 ASSERT(key
.type
== BTRFS_FREE_SPACE_EXTENT_KEY
);
1500 ASSERT(key
.objectid
< end
&& key
.objectid
+ key
.offset
<= end
);
1502 caching_ctl
->progress
= key
.objectid
;
1504 total_found
+= add_new_free_space(block_group
, key
.objectid
,
1505 key
.objectid
+ key
.offset
);
1506 if (total_found
> CACHING_CTL_WAKE_UP
) {
1508 wake_up(&caching_ctl
->wait
);
1513 if (extent_count
!= expected_extent_count
) {
1515 "incorrect extent count for %llu; counted %u, expected %u",
1516 block_group
->start
, extent_count
,
1517 expected_extent_count
);
1523 caching_ctl
->progress
= (u64
)-1;
1530 int load_free_space_tree(struct btrfs_caching_control
*caching_ctl
)
1532 struct btrfs_block_group
*block_group
;
1533 struct btrfs_free_space_info
*info
;
1534 struct btrfs_path
*path
;
1535 u32 extent_count
, flags
;
1538 block_group
= caching_ctl
->block_group
;
1540 path
= btrfs_alloc_path();
1545 * Just like caching_thread() doesn't want to deadlock on the extent
1546 * tree, we don't want to deadlock on the free space tree.
1548 path
->skip_locking
= 1;
1549 path
->search_commit_root
= 1;
1550 path
->reada
= READA_FORWARD
;
1552 info
= search_free_space_info(NULL
, block_group
, path
, 0);
1554 ret
= PTR_ERR(info
);
1557 extent_count
= btrfs_free_space_extent_count(path
->nodes
[0], info
);
1558 flags
= btrfs_free_space_flags(path
->nodes
[0], info
);
1561 * We left path pointing to the free space info item, so now
1562 * load_free_space_foo can just iterate through the free space tree from
1565 if (flags
& BTRFS_FREE_SPACE_USING_BITMAPS
)
1566 ret
= load_free_space_bitmaps(caching_ctl
, path
, extent_count
);
1568 ret
= load_free_space_extents(caching_ctl
, path
, extent_count
);
1571 btrfs_free_path(path
);