1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (C) 2015 Facebook. All rights reserved.
6 #include <linux/kernel.h>
7 #include <linux/sched/mm.h>
11 #include "free-space-tree.h"
12 #include "transaction.h"
14 static int __add_block_group_free_space(struct btrfs_trans_handle
*trans
,
15 struct btrfs_block_group_cache
*block_group
,
16 struct btrfs_path
*path
);
18 void set_free_space_tree_thresholds(struct btrfs_block_group_cache
*cache
)
22 u64 num_bitmaps
, total_bitmap_size
;
25 * We convert to bitmaps when the disk space required for using extents
26 * exceeds that required for using bitmaps.
28 bitmap_range
= cache
->fs_info
->sectorsize
* BTRFS_FREE_SPACE_BITMAP_BITS
;
29 num_bitmaps
= div_u64(cache
->key
.offset
+ bitmap_range
- 1,
31 bitmap_size
= sizeof(struct btrfs_item
) + BTRFS_FREE_SPACE_BITMAP_SIZE
;
32 total_bitmap_size
= num_bitmaps
* bitmap_size
;
33 cache
->bitmap_high_thresh
= div_u64(total_bitmap_size
,
34 sizeof(struct btrfs_item
));
37 * We allow for a small buffer between the high threshold and low
38 * threshold to avoid thrashing back and forth between the two formats.
40 if (cache
->bitmap_high_thresh
> 100)
41 cache
->bitmap_low_thresh
= cache
->bitmap_high_thresh
- 100;
43 cache
->bitmap_low_thresh
= 0;
46 static int add_new_free_space_info(struct btrfs_trans_handle
*trans
,
47 struct btrfs_block_group_cache
*block_group
,
48 struct btrfs_path
*path
)
50 struct btrfs_root
*root
= trans
->fs_info
->free_space_root
;
51 struct btrfs_free_space_info
*info
;
53 struct extent_buffer
*leaf
;
56 key
.objectid
= block_group
->key
.objectid
;
57 key
.type
= BTRFS_FREE_SPACE_INFO_KEY
;
58 key
.offset
= block_group
->key
.offset
;
60 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, sizeof(*info
));
64 leaf
= path
->nodes
[0];
65 info
= btrfs_item_ptr(leaf
, path
->slots
[0],
66 struct btrfs_free_space_info
);
67 btrfs_set_free_space_extent_count(leaf
, info
, 0);
68 btrfs_set_free_space_flags(leaf
, info
, 0);
69 btrfs_mark_buffer_dirty(leaf
);
73 btrfs_release_path(path
);
78 struct btrfs_free_space_info
*search_free_space_info(
79 struct btrfs_trans_handle
*trans
,
80 struct btrfs_block_group_cache
*block_group
,
81 struct btrfs_path
*path
, int cow
)
83 struct btrfs_fs_info
*fs_info
= block_group
->fs_info
;
84 struct btrfs_root
*root
= fs_info
->free_space_root
;
88 key
.objectid
= block_group
->key
.objectid
;
89 key
.type
= BTRFS_FREE_SPACE_INFO_KEY
;
90 key
.offset
= block_group
->key
.offset
;
92 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, cow
);
96 btrfs_warn(fs_info
, "missing free space info for %llu",
97 block_group
->key
.objectid
);
99 return ERR_PTR(-ENOENT
);
102 return btrfs_item_ptr(path
->nodes
[0], path
->slots
[0],
103 struct btrfs_free_space_info
);
107 * btrfs_search_slot() but we're looking for the greatest key less than the
110 static int btrfs_search_prev_slot(struct btrfs_trans_handle
*trans
,
111 struct btrfs_root
*root
,
112 struct btrfs_key
*key
, struct btrfs_path
*p
,
113 int ins_len
, int cow
)
117 ret
= btrfs_search_slot(trans
, root
, key
, p
, ins_len
, cow
);
126 if (p
->slots
[0] == 0) {
135 static inline u32
free_space_bitmap_size(u64 size
, u32 sectorsize
)
137 return DIV_ROUND_UP((u32
)div_u64(size
, sectorsize
), BITS_PER_BYTE
);
140 static unsigned long *alloc_bitmap(u32 bitmap_size
)
143 unsigned int nofs_flag
;
144 u32 bitmap_rounded_size
= round_up(bitmap_size
, sizeof(unsigned long));
147 * GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse
148 * into the filesystem as the free space bitmap can be modified in the
149 * critical section of a transaction commit.
151 * TODO: push the memalloc_nofs_{save,restore}() to the caller where we
152 * know that recursion is unsafe.
154 nofs_flag
= memalloc_nofs_save();
155 ret
= kvzalloc(bitmap_rounded_size
, GFP_KERNEL
);
156 memalloc_nofs_restore(nofs_flag
);
160 static void le_bitmap_set(unsigned long *map
, unsigned int start
, int len
)
162 u8
*p
= ((u8
*)map
) + BIT_BYTE(start
);
163 const unsigned int size
= start
+ len
;
164 int bits_to_set
= BITS_PER_BYTE
- (start
% BITS_PER_BYTE
);
165 u8 mask_to_set
= BITMAP_FIRST_BYTE_MASK(start
);
167 while (len
- bits_to_set
>= 0) {
170 bits_to_set
= BITS_PER_BYTE
;
175 mask_to_set
&= BITMAP_LAST_BYTE_MASK(size
);
181 int convert_free_space_to_bitmaps(struct btrfs_trans_handle
*trans
,
182 struct btrfs_block_group_cache
*block_group
,
183 struct btrfs_path
*path
)
185 struct btrfs_fs_info
*fs_info
= trans
->fs_info
;
186 struct btrfs_root
*root
= fs_info
->free_space_root
;
187 struct btrfs_free_space_info
*info
;
188 struct btrfs_key key
, found_key
;
189 struct extent_buffer
*leaf
;
190 unsigned long *bitmap
;
194 u32 bitmap_size
, flags
, expected_extent_count
;
195 u32 extent_count
= 0;
199 bitmap_size
= free_space_bitmap_size(block_group
->key
.offset
,
200 fs_info
->sectorsize
);
201 bitmap
= alloc_bitmap(bitmap_size
);
207 start
= block_group
->key
.objectid
;
208 end
= block_group
->key
.objectid
+ block_group
->key
.offset
;
210 key
.objectid
= end
- 1;
212 key
.offset
= (u64
)-1;
215 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
219 leaf
= path
->nodes
[0];
222 while (path
->slots
[0] > 0) {
223 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0] - 1);
225 if (found_key
.type
== BTRFS_FREE_SPACE_INFO_KEY
) {
226 ASSERT(found_key
.objectid
== block_group
->key
.objectid
);
227 ASSERT(found_key
.offset
== block_group
->key
.offset
);
230 } else if (found_key
.type
== BTRFS_FREE_SPACE_EXTENT_KEY
) {
233 ASSERT(found_key
.objectid
>= start
);
234 ASSERT(found_key
.objectid
< end
);
235 ASSERT(found_key
.objectid
+ found_key
.offset
<= end
);
237 first
= div_u64(found_key
.objectid
- start
,
238 fs_info
->sectorsize
);
239 last
= div_u64(found_key
.objectid
+ found_key
.offset
- start
,
240 fs_info
->sectorsize
);
241 le_bitmap_set(bitmap
, first
, last
- first
);
251 ret
= btrfs_del_items(trans
, root
, path
, path
->slots
[0], nr
);
254 btrfs_release_path(path
);
257 info
= search_free_space_info(trans
, block_group
, path
, 1);
262 leaf
= path
->nodes
[0];
263 flags
= btrfs_free_space_flags(leaf
, info
);
264 flags
|= BTRFS_FREE_SPACE_USING_BITMAPS
;
265 btrfs_set_free_space_flags(leaf
, info
, flags
);
266 expected_extent_count
= btrfs_free_space_extent_count(leaf
, info
);
267 btrfs_mark_buffer_dirty(leaf
);
268 btrfs_release_path(path
);
270 if (extent_count
!= expected_extent_count
) {
272 "incorrect extent count for %llu; counted %u, expected %u",
273 block_group
->key
.objectid
, extent_count
,
274 expected_extent_count
);
280 bitmap_cursor
= (char *)bitmap
;
281 bitmap_range
= fs_info
->sectorsize
* BTRFS_FREE_SPACE_BITMAP_BITS
;
288 extent_size
= min(end
- i
, bitmap_range
);
289 data_size
= free_space_bitmap_size(extent_size
,
290 fs_info
->sectorsize
);
293 key
.type
= BTRFS_FREE_SPACE_BITMAP_KEY
;
294 key
.offset
= extent_size
;
296 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
,
301 leaf
= path
->nodes
[0];
302 ptr
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
303 write_extent_buffer(leaf
, bitmap_cursor
, ptr
,
305 btrfs_mark_buffer_dirty(leaf
);
306 btrfs_release_path(path
);
309 bitmap_cursor
+= data_size
;
316 btrfs_abort_transaction(trans
, ret
);
321 int convert_free_space_to_extents(struct btrfs_trans_handle
*trans
,
322 struct btrfs_block_group_cache
*block_group
,
323 struct btrfs_path
*path
)
325 struct btrfs_fs_info
*fs_info
= trans
->fs_info
;
326 struct btrfs_root
*root
= fs_info
->free_space_root
;
327 struct btrfs_free_space_info
*info
;
328 struct btrfs_key key
, found_key
;
329 struct extent_buffer
*leaf
;
330 unsigned long *bitmap
;
332 u32 bitmap_size
, flags
, expected_extent_count
;
333 unsigned long nrbits
, start_bit
, end_bit
;
334 u32 extent_count
= 0;
338 bitmap_size
= free_space_bitmap_size(block_group
->key
.offset
,
339 fs_info
->sectorsize
);
340 bitmap
= alloc_bitmap(bitmap_size
);
346 start
= block_group
->key
.objectid
;
347 end
= block_group
->key
.objectid
+ block_group
->key
.offset
;
349 key
.objectid
= end
- 1;
351 key
.offset
= (u64
)-1;
354 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
358 leaf
= path
->nodes
[0];
361 while (path
->slots
[0] > 0) {
362 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0] - 1);
364 if (found_key
.type
== BTRFS_FREE_SPACE_INFO_KEY
) {
365 ASSERT(found_key
.objectid
== block_group
->key
.objectid
);
366 ASSERT(found_key
.offset
== block_group
->key
.offset
);
369 } else if (found_key
.type
== BTRFS_FREE_SPACE_BITMAP_KEY
) {
372 u32 bitmap_pos
, data_size
;
374 ASSERT(found_key
.objectid
>= start
);
375 ASSERT(found_key
.objectid
< end
);
376 ASSERT(found_key
.objectid
+ found_key
.offset
<= end
);
378 bitmap_pos
= div_u64(found_key
.objectid
- start
,
379 fs_info
->sectorsize
*
381 bitmap_cursor
= ((char *)bitmap
) + bitmap_pos
;
382 data_size
= free_space_bitmap_size(found_key
.offset
,
383 fs_info
->sectorsize
);
385 ptr
= btrfs_item_ptr_offset(leaf
, path
->slots
[0] - 1);
386 read_extent_buffer(leaf
, bitmap_cursor
, ptr
,
396 ret
= btrfs_del_items(trans
, root
, path
, path
->slots
[0], nr
);
399 btrfs_release_path(path
);
402 info
= search_free_space_info(trans
, block_group
, path
, 1);
407 leaf
= path
->nodes
[0];
408 flags
= btrfs_free_space_flags(leaf
, info
);
409 flags
&= ~BTRFS_FREE_SPACE_USING_BITMAPS
;
410 btrfs_set_free_space_flags(leaf
, info
, flags
);
411 expected_extent_count
= btrfs_free_space_extent_count(leaf
, info
);
412 btrfs_mark_buffer_dirty(leaf
);
413 btrfs_release_path(path
);
415 nrbits
= div_u64(block_group
->key
.offset
, block_group
->fs_info
->sectorsize
);
416 start_bit
= find_next_bit_le(bitmap
, nrbits
, 0);
418 while (start_bit
< nrbits
) {
419 end_bit
= find_next_zero_bit_le(bitmap
, nrbits
, start_bit
);
420 ASSERT(start_bit
< end_bit
);
422 key
.objectid
= start
+ start_bit
* block_group
->fs_info
->sectorsize
;
423 key
.type
= BTRFS_FREE_SPACE_EXTENT_KEY
;
424 key
.offset
= (end_bit
- start_bit
) * block_group
->fs_info
->sectorsize
;
426 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, 0);
429 btrfs_release_path(path
);
433 start_bit
= find_next_bit_le(bitmap
, nrbits
, end_bit
);
436 if (extent_count
!= expected_extent_count
) {
438 "incorrect extent count for %llu; counted %u, expected %u",
439 block_group
->key
.objectid
, extent_count
,
440 expected_extent_count
);
450 btrfs_abort_transaction(trans
, ret
);
454 static int update_free_space_extent_count(struct btrfs_trans_handle
*trans
,
455 struct btrfs_block_group_cache
*block_group
,
456 struct btrfs_path
*path
,
459 struct btrfs_free_space_info
*info
;
464 if (new_extents
== 0)
467 info
= search_free_space_info(trans
, block_group
, path
, 1);
472 flags
= btrfs_free_space_flags(path
->nodes
[0], info
);
473 extent_count
= btrfs_free_space_extent_count(path
->nodes
[0], info
);
475 extent_count
+= new_extents
;
476 btrfs_set_free_space_extent_count(path
->nodes
[0], info
, extent_count
);
477 btrfs_mark_buffer_dirty(path
->nodes
[0]);
478 btrfs_release_path(path
);
480 if (!(flags
& BTRFS_FREE_SPACE_USING_BITMAPS
) &&
481 extent_count
> block_group
->bitmap_high_thresh
) {
482 ret
= convert_free_space_to_bitmaps(trans
, block_group
, path
);
483 } else if ((flags
& BTRFS_FREE_SPACE_USING_BITMAPS
) &&
484 extent_count
< block_group
->bitmap_low_thresh
) {
485 ret
= convert_free_space_to_extents(trans
, block_group
, path
);
493 int free_space_test_bit(struct btrfs_block_group_cache
*block_group
,
494 struct btrfs_path
*path
, u64 offset
)
496 struct extent_buffer
*leaf
;
497 struct btrfs_key key
;
498 u64 found_start
, found_end
;
499 unsigned long ptr
, i
;
501 leaf
= path
->nodes
[0];
502 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
503 ASSERT(key
.type
== BTRFS_FREE_SPACE_BITMAP_KEY
);
505 found_start
= key
.objectid
;
506 found_end
= key
.objectid
+ key
.offset
;
507 ASSERT(offset
>= found_start
&& offset
< found_end
);
509 ptr
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
510 i
= div_u64(offset
- found_start
,
511 block_group
->fs_info
->sectorsize
);
512 return !!extent_buffer_test_bit(leaf
, ptr
, i
);
515 static void free_space_set_bits(struct btrfs_block_group_cache
*block_group
,
516 struct btrfs_path
*path
, u64
*start
, u64
*size
,
519 struct btrfs_fs_info
*fs_info
= block_group
->fs_info
;
520 struct extent_buffer
*leaf
;
521 struct btrfs_key key
;
522 u64 end
= *start
+ *size
;
523 u64 found_start
, found_end
;
524 unsigned long ptr
, first
, last
;
526 leaf
= path
->nodes
[0];
527 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
528 ASSERT(key
.type
== BTRFS_FREE_SPACE_BITMAP_KEY
);
530 found_start
= key
.objectid
;
531 found_end
= key
.objectid
+ key
.offset
;
532 ASSERT(*start
>= found_start
&& *start
< found_end
);
533 ASSERT(end
> found_start
);
538 ptr
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
539 first
= div_u64(*start
- found_start
, fs_info
->sectorsize
);
540 last
= div_u64(end
- found_start
, fs_info
->sectorsize
);
542 extent_buffer_bitmap_set(leaf
, ptr
, first
, last
- first
);
544 extent_buffer_bitmap_clear(leaf
, ptr
, first
, last
- first
);
545 btrfs_mark_buffer_dirty(leaf
);
547 *size
-= end
- *start
;
552 * We can't use btrfs_next_item() in modify_free_space_bitmap() because
553 * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy
554 * tree walking in btrfs_next_leaf() anyways because we know exactly what we're
557 static int free_space_next_bitmap(struct btrfs_trans_handle
*trans
,
558 struct btrfs_root
*root
, struct btrfs_path
*p
)
560 struct btrfs_key key
;
562 if (p
->slots
[0] + 1 < btrfs_header_nritems(p
->nodes
[0])) {
567 btrfs_item_key_to_cpu(p
->nodes
[0], &key
, p
->slots
[0]);
568 btrfs_release_path(p
);
570 key
.objectid
+= key
.offset
;
572 key
.offset
= (u64
)-1;
574 return btrfs_search_prev_slot(trans
, root
, &key
, p
, 0, 1);
578 * If remove is 1, then we are removing free space, thus clearing bits in the
579 * bitmap. If remove is 0, then we are adding free space, thus setting bits in
582 static int modify_free_space_bitmap(struct btrfs_trans_handle
*trans
,
583 struct btrfs_block_group_cache
*block_group
,
584 struct btrfs_path
*path
,
585 u64 start
, u64 size
, int remove
)
587 struct btrfs_root
*root
= block_group
->fs_info
->free_space_root
;
588 struct btrfs_key key
;
589 u64 end
= start
+ size
;
590 u64 cur_start
, cur_size
;
591 int prev_bit
, next_bit
;
596 * Read the bit for the block immediately before the extent of space if
597 * that block is within the block group.
599 if (start
> block_group
->key
.objectid
) {
600 u64 prev_block
= start
- block_group
->fs_info
->sectorsize
;
602 key
.objectid
= prev_block
;
604 key
.offset
= (u64
)-1;
606 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, 0, 1);
610 prev_bit
= free_space_test_bit(block_group
, path
, prev_block
);
612 /* The previous block may have been in the previous bitmap. */
613 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
614 if (start
>= key
.objectid
+ key
.offset
) {
615 ret
= free_space_next_bitmap(trans
, root
, path
);
620 key
.objectid
= start
;
622 key
.offset
= (u64
)-1;
624 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, 0, 1);
632 * Iterate over all of the bitmaps overlapped by the extent of space,
633 * clearing/setting bits as required.
638 free_space_set_bits(block_group
, path
, &cur_start
, &cur_size
,
642 ret
= free_space_next_bitmap(trans
, root
, path
);
648 * Read the bit for the block immediately after the extent of space if
649 * that block is within the block group.
651 if (end
< block_group
->key
.objectid
+ block_group
->key
.offset
) {
652 /* The next block may be in the next bitmap. */
653 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
654 if (end
>= key
.objectid
+ key
.offset
) {
655 ret
= free_space_next_bitmap(trans
, root
, path
);
660 next_bit
= free_space_test_bit(block_group
, path
, end
);
668 /* Leftover on the left. */
672 /* Leftover on the right. */
678 /* Merging with neighbor on the left. */
682 /* Merging with neighbor on the right. */
687 btrfs_release_path(path
);
688 ret
= update_free_space_extent_count(trans
, block_group
, path
,
695 static int remove_free_space_extent(struct btrfs_trans_handle
*trans
,
696 struct btrfs_block_group_cache
*block_group
,
697 struct btrfs_path
*path
,
700 struct btrfs_root
*root
= trans
->fs_info
->free_space_root
;
701 struct btrfs_key key
;
702 u64 found_start
, found_end
;
703 u64 end
= start
+ size
;
704 int new_extents
= -1;
707 key
.objectid
= start
;
709 key
.offset
= (u64
)-1;
711 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
715 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
717 ASSERT(key
.type
== BTRFS_FREE_SPACE_EXTENT_KEY
);
719 found_start
= key
.objectid
;
720 found_end
= key
.objectid
+ key
.offset
;
721 ASSERT(start
>= found_start
&& end
<= found_end
);
724 * Okay, now that we've found the free space extent which contains the
725 * free space that we are removing, there are four cases:
727 * 1. We're using the whole extent: delete the key we found and
728 * decrement the free space extent count.
729 * 2. We are using part of the extent starting at the beginning: delete
730 * the key we found and insert a new key representing the leftover at
731 * the end. There is no net change in the number of extents.
732 * 3. We are using part of the extent ending at the end: delete the key
733 * we found and insert a new key representing the leftover at the
734 * beginning. There is no net change in the number of extents.
735 * 4. We are using part of the extent in the middle: delete the key we
736 * found and insert two new keys representing the leftovers on each
737 * side. Where we used to have one extent, we now have two, so increment
738 * the extent count. We may need to convert the block group to bitmaps
742 /* Delete the existing key (cases 1-4). */
743 ret
= btrfs_del_item(trans
, root
, path
);
747 /* Add a key for leftovers at the beginning (cases 3 and 4). */
748 if (start
> found_start
) {
749 key
.objectid
= found_start
;
750 key
.type
= BTRFS_FREE_SPACE_EXTENT_KEY
;
751 key
.offset
= start
- found_start
;
753 btrfs_release_path(path
);
754 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, 0);
760 /* Add a key for leftovers at the end (cases 2 and 4). */
761 if (end
< found_end
) {
763 key
.type
= BTRFS_FREE_SPACE_EXTENT_KEY
;
764 key
.offset
= found_end
- end
;
766 btrfs_release_path(path
);
767 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, 0);
773 btrfs_release_path(path
);
774 ret
= update_free_space_extent_count(trans
, block_group
, path
,
782 int __remove_from_free_space_tree(struct btrfs_trans_handle
*trans
,
783 struct btrfs_block_group_cache
*block_group
,
784 struct btrfs_path
*path
, u64 start
, u64 size
)
786 struct btrfs_free_space_info
*info
;
790 if (block_group
->needs_free_space
) {
791 ret
= __add_block_group_free_space(trans
, block_group
, path
);
796 info
= search_free_space_info(NULL
, block_group
, path
, 0);
798 return PTR_ERR(info
);
799 flags
= btrfs_free_space_flags(path
->nodes
[0], info
);
800 btrfs_release_path(path
);
802 if (flags
& BTRFS_FREE_SPACE_USING_BITMAPS
) {
803 return modify_free_space_bitmap(trans
, block_group
, path
,
806 return remove_free_space_extent(trans
, block_group
, path
,
811 int remove_from_free_space_tree(struct btrfs_trans_handle
*trans
,
814 struct btrfs_block_group_cache
*block_group
;
815 struct btrfs_path
*path
;
818 if (!btrfs_fs_compat_ro(trans
->fs_info
, FREE_SPACE_TREE
))
821 path
= btrfs_alloc_path();
827 block_group
= btrfs_lookup_block_group(trans
->fs_info
, start
);
834 mutex_lock(&block_group
->free_space_lock
);
835 ret
= __remove_from_free_space_tree(trans
, block_group
, path
, start
,
837 mutex_unlock(&block_group
->free_space_lock
);
839 btrfs_put_block_group(block_group
);
841 btrfs_free_path(path
);
843 btrfs_abort_transaction(trans
, ret
);
847 static int add_free_space_extent(struct btrfs_trans_handle
*trans
,
848 struct btrfs_block_group_cache
*block_group
,
849 struct btrfs_path
*path
,
852 struct btrfs_root
*root
= trans
->fs_info
->free_space_root
;
853 struct btrfs_key key
, new_key
;
854 u64 found_start
, found_end
;
855 u64 end
= start
+ size
;
860 * We are adding a new extent of free space, but we need to merge
861 * extents. There are four cases here:
863 * 1. The new extent does not have any immediate neighbors to merge
864 * with: add the new key and increment the free space extent count. We
865 * may need to convert the block group to bitmaps as a result.
866 * 2. The new extent has an immediate neighbor before it: remove the
867 * previous key and insert a new key combining both of them. There is no
868 * net change in the number of extents.
869 * 3. The new extent has an immediate neighbor after it: remove the next
870 * key and insert a new key combining both of them. There is no net
871 * change in the number of extents.
872 * 4. The new extent has immediate neighbors on both sides: remove both
873 * of the keys and insert a new key combining all of them. Where we used
874 * to have two extents, we now have one, so decrement the extent count.
877 new_key
.objectid
= start
;
878 new_key
.type
= BTRFS_FREE_SPACE_EXTENT_KEY
;
879 new_key
.offset
= size
;
881 /* Search for a neighbor on the left. */
882 if (start
== block_group
->key
.objectid
)
884 key
.objectid
= start
- 1;
886 key
.offset
= (u64
)-1;
888 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
892 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
894 if (key
.type
!= BTRFS_FREE_SPACE_EXTENT_KEY
) {
895 ASSERT(key
.type
== BTRFS_FREE_SPACE_INFO_KEY
);
896 btrfs_release_path(path
);
900 found_start
= key
.objectid
;
901 found_end
= key
.objectid
+ key
.offset
;
902 ASSERT(found_start
>= block_group
->key
.objectid
&&
903 found_end
> block_group
->key
.objectid
);
904 ASSERT(found_start
< start
&& found_end
<= start
);
907 * Delete the neighbor on the left and absorb it into the new key (cases
910 if (found_end
== start
) {
911 ret
= btrfs_del_item(trans
, root
, path
);
914 new_key
.objectid
= found_start
;
915 new_key
.offset
+= key
.offset
;
918 btrfs_release_path(path
);
921 /* Search for a neighbor on the right. */
922 if (end
== block_group
->key
.objectid
+ block_group
->key
.offset
)
926 key
.offset
= (u64
)-1;
928 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
932 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
934 if (key
.type
!= BTRFS_FREE_SPACE_EXTENT_KEY
) {
935 ASSERT(key
.type
== BTRFS_FREE_SPACE_INFO_KEY
);
936 btrfs_release_path(path
);
940 found_start
= key
.objectid
;
941 found_end
= key
.objectid
+ key
.offset
;
942 ASSERT(found_start
>= block_group
->key
.objectid
&&
943 found_end
> block_group
->key
.objectid
);
944 ASSERT((found_start
< start
&& found_end
<= start
) ||
945 (found_start
>= end
&& found_end
> end
));
948 * Delete the neighbor on the right and absorb it into the new key
951 if (found_start
== end
) {
952 ret
= btrfs_del_item(trans
, root
, path
);
955 new_key
.offset
+= key
.offset
;
958 btrfs_release_path(path
);
961 /* Insert the new key (cases 1-4). */
962 ret
= btrfs_insert_empty_item(trans
, root
, path
, &new_key
, 0);
966 btrfs_release_path(path
);
967 ret
= update_free_space_extent_count(trans
, block_group
, path
,
975 int __add_to_free_space_tree(struct btrfs_trans_handle
*trans
,
976 struct btrfs_block_group_cache
*block_group
,
977 struct btrfs_path
*path
, u64 start
, u64 size
)
979 struct btrfs_free_space_info
*info
;
983 if (block_group
->needs_free_space
) {
984 ret
= __add_block_group_free_space(trans
, block_group
, path
);
989 info
= search_free_space_info(NULL
, block_group
, path
, 0);
991 return PTR_ERR(info
);
992 flags
= btrfs_free_space_flags(path
->nodes
[0], info
);
993 btrfs_release_path(path
);
995 if (flags
& BTRFS_FREE_SPACE_USING_BITMAPS
) {
996 return modify_free_space_bitmap(trans
, block_group
, path
,
999 return add_free_space_extent(trans
, block_group
, path
, start
,
1004 int add_to_free_space_tree(struct btrfs_trans_handle
*trans
,
1005 u64 start
, u64 size
)
1007 struct btrfs_block_group_cache
*block_group
;
1008 struct btrfs_path
*path
;
1011 if (!btrfs_fs_compat_ro(trans
->fs_info
, FREE_SPACE_TREE
))
1014 path
= btrfs_alloc_path();
1020 block_group
= btrfs_lookup_block_group(trans
->fs_info
, start
);
1027 mutex_lock(&block_group
->free_space_lock
);
1028 ret
= __add_to_free_space_tree(trans
, block_group
, path
, start
, size
);
1029 mutex_unlock(&block_group
->free_space_lock
);
1031 btrfs_put_block_group(block_group
);
1033 btrfs_free_path(path
);
1035 btrfs_abort_transaction(trans
, ret
);
1040 * Populate the free space tree by walking the extent tree. Operations on the
1041 * extent tree that happen as a result of writes to the free space tree will go
1042 * through the normal add/remove hooks.
1044 static int populate_free_space_tree(struct btrfs_trans_handle
*trans
,
1045 struct btrfs_block_group_cache
*block_group
)
1047 struct btrfs_root
*extent_root
= trans
->fs_info
->extent_root
;
1048 struct btrfs_path
*path
, *path2
;
1049 struct btrfs_key key
;
1053 path
= btrfs_alloc_path();
1056 path
->reada
= READA_FORWARD
;
1058 path2
= btrfs_alloc_path();
1060 btrfs_free_path(path
);
1064 ret
= add_new_free_space_info(trans
, block_group
, path2
);
1068 mutex_lock(&block_group
->free_space_lock
);
1071 * Iterate through all of the extent and metadata items in this block
1072 * group, adding the free space between them and the free space at the
1073 * end. Note that EXTENT_ITEM and METADATA_ITEM are less than
1074 * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's
1077 key
.objectid
= block_group
->key
.objectid
;
1078 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
1081 ret
= btrfs_search_slot_for_read(extent_root
, &key
, path
, 1, 0);
1086 start
= block_group
->key
.objectid
;
1087 end
= block_group
->key
.objectid
+ block_group
->key
.offset
;
1089 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
1091 if (key
.type
== BTRFS_EXTENT_ITEM_KEY
||
1092 key
.type
== BTRFS_METADATA_ITEM_KEY
) {
1093 if (key
.objectid
>= end
)
1096 if (start
< key
.objectid
) {
1097 ret
= __add_to_free_space_tree(trans
,
1105 start
= key
.objectid
;
1106 if (key
.type
== BTRFS_METADATA_ITEM_KEY
)
1107 start
+= trans
->fs_info
->nodesize
;
1109 start
+= key
.offset
;
1110 } else if (key
.type
== BTRFS_BLOCK_GROUP_ITEM_KEY
) {
1111 if (key
.objectid
!= block_group
->key
.objectid
)
1115 ret
= btrfs_next_item(extent_root
, path
);
1122 ret
= __add_to_free_space_tree(trans
, block_group
, path2
,
1123 start
, end
- start
);
1130 mutex_unlock(&block_group
->free_space_lock
);
1132 btrfs_free_path(path2
);
1133 btrfs_free_path(path
);
1137 int btrfs_create_free_space_tree(struct btrfs_fs_info
*fs_info
)
1139 struct btrfs_trans_handle
*trans
;
1140 struct btrfs_root
*tree_root
= fs_info
->tree_root
;
1141 struct btrfs_root
*free_space_root
;
1142 struct btrfs_block_group_cache
*block_group
;
1143 struct rb_node
*node
;
1146 trans
= btrfs_start_transaction(tree_root
, 0);
1148 return PTR_ERR(trans
);
1150 set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE
, &fs_info
->flags
);
1151 free_space_root
= btrfs_create_tree(trans
,
1152 BTRFS_FREE_SPACE_TREE_OBJECTID
);
1153 if (IS_ERR(free_space_root
)) {
1154 ret
= PTR_ERR(free_space_root
);
1157 fs_info
->free_space_root
= free_space_root
;
1159 node
= rb_first(&fs_info
->block_group_cache_tree
);
1161 block_group
= rb_entry(node
, struct btrfs_block_group_cache
,
1163 ret
= populate_free_space_tree(trans
, block_group
);
1166 node
= rb_next(node
);
1169 btrfs_set_fs_compat_ro(fs_info
, FREE_SPACE_TREE
);
1170 btrfs_set_fs_compat_ro(fs_info
, FREE_SPACE_TREE_VALID
);
1171 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE
, &fs_info
->flags
);
1173 return btrfs_commit_transaction(trans
);
1176 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE
, &fs_info
->flags
);
1177 btrfs_abort_transaction(trans
, ret
);
1178 btrfs_end_transaction(trans
);
1182 static int clear_free_space_tree(struct btrfs_trans_handle
*trans
,
1183 struct btrfs_root
*root
)
1185 struct btrfs_path
*path
;
1186 struct btrfs_key key
;
1190 path
= btrfs_alloc_path();
1194 path
->leave_spinning
= 1;
1201 ret
= btrfs_search_slot(trans
, root
, &key
, path
, -1, 1);
1205 nr
= btrfs_header_nritems(path
->nodes
[0]);
1210 ret
= btrfs_del_items(trans
, root
, path
, 0, nr
);
1214 btrfs_release_path(path
);
1219 btrfs_free_path(path
);
1223 int btrfs_clear_free_space_tree(struct btrfs_fs_info
*fs_info
)
1225 struct btrfs_trans_handle
*trans
;
1226 struct btrfs_root
*tree_root
= fs_info
->tree_root
;
1227 struct btrfs_root
*free_space_root
= fs_info
->free_space_root
;
1230 trans
= btrfs_start_transaction(tree_root
, 0);
1232 return PTR_ERR(trans
);
1234 btrfs_clear_fs_compat_ro(fs_info
, FREE_SPACE_TREE
);
1235 btrfs_clear_fs_compat_ro(fs_info
, FREE_SPACE_TREE_VALID
);
1236 fs_info
->free_space_root
= NULL
;
1238 ret
= clear_free_space_tree(trans
, free_space_root
);
1242 ret
= btrfs_del_root(trans
, &free_space_root
->root_key
);
1246 list_del(&free_space_root
->dirty_list
);
1248 btrfs_tree_lock(free_space_root
->node
);
1249 btrfs_clean_tree_block(free_space_root
->node
);
1250 btrfs_tree_unlock(free_space_root
->node
);
1251 btrfs_free_tree_block(trans
, free_space_root
, free_space_root
->node
,
1254 free_extent_buffer(free_space_root
->node
);
1255 free_extent_buffer(free_space_root
->commit_root
);
1256 kfree(free_space_root
);
1258 return btrfs_commit_transaction(trans
);
1261 btrfs_abort_transaction(trans
, ret
);
1262 btrfs_end_transaction(trans
);
1266 static int __add_block_group_free_space(struct btrfs_trans_handle
*trans
,
1267 struct btrfs_block_group_cache
*block_group
,
1268 struct btrfs_path
*path
)
1272 block_group
->needs_free_space
= 0;
1274 ret
= add_new_free_space_info(trans
, block_group
, path
);
1278 return __add_to_free_space_tree(trans
, block_group
, path
,
1279 block_group
->key
.objectid
,
1280 block_group
->key
.offset
);
1283 int add_block_group_free_space(struct btrfs_trans_handle
*trans
,
1284 struct btrfs_block_group_cache
*block_group
)
1286 struct btrfs_fs_info
*fs_info
= trans
->fs_info
;
1287 struct btrfs_path
*path
= NULL
;
1290 if (!btrfs_fs_compat_ro(fs_info
, FREE_SPACE_TREE
))
1293 mutex_lock(&block_group
->free_space_lock
);
1294 if (!block_group
->needs_free_space
)
1297 path
= btrfs_alloc_path();
1303 ret
= __add_block_group_free_space(trans
, block_group
, path
);
1306 btrfs_free_path(path
);
1307 mutex_unlock(&block_group
->free_space_lock
);
1309 btrfs_abort_transaction(trans
, ret
);
1313 int remove_block_group_free_space(struct btrfs_trans_handle
*trans
,
1314 struct btrfs_block_group_cache
*block_group
)
1316 struct btrfs_root
*root
= trans
->fs_info
->free_space_root
;
1317 struct btrfs_path
*path
;
1318 struct btrfs_key key
, found_key
;
1319 struct extent_buffer
*leaf
;
1324 if (!btrfs_fs_compat_ro(trans
->fs_info
, FREE_SPACE_TREE
))
1327 if (block_group
->needs_free_space
) {
1328 /* We never added this block group to the free space tree. */
1332 path
= btrfs_alloc_path();
1338 start
= block_group
->key
.objectid
;
1339 end
= block_group
->key
.objectid
+ block_group
->key
.offset
;
1341 key
.objectid
= end
- 1;
1343 key
.offset
= (u64
)-1;
1346 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
1350 leaf
= path
->nodes
[0];
1353 while (path
->slots
[0] > 0) {
1354 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0] - 1);
1356 if (found_key
.type
== BTRFS_FREE_SPACE_INFO_KEY
) {
1357 ASSERT(found_key
.objectid
== block_group
->key
.objectid
);
1358 ASSERT(found_key
.offset
== block_group
->key
.offset
);
1363 } else if (found_key
.type
== BTRFS_FREE_SPACE_EXTENT_KEY
||
1364 found_key
.type
== BTRFS_FREE_SPACE_BITMAP_KEY
) {
1365 ASSERT(found_key
.objectid
>= start
);
1366 ASSERT(found_key
.objectid
< end
);
1367 ASSERT(found_key
.objectid
+ found_key
.offset
<= end
);
1375 ret
= btrfs_del_items(trans
, root
, path
, path
->slots
[0], nr
);
1378 btrfs_release_path(path
);
1383 btrfs_free_path(path
);
1385 btrfs_abort_transaction(trans
, ret
);
1389 static int load_free_space_bitmaps(struct btrfs_caching_control
*caching_ctl
,
1390 struct btrfs_path
*path
,
1391 u32 expected_extent_count
)
1393 struct btrfs_block_group_cache
*block_group
;
1394 struct btrfs_fs_info
*fs_info
;
1395 struct btrfs_root
*root
;
1396 struct btrfs_key key
;
1397 int prev_bit
= 0, bit
;
1398 /* Initialize to silence GCC. */
1399 u64 extent_start
= 0;
1401 u64 total_found
= 0;
1402 u32 extent_count
= 0;
1405 block_group
= caching_ctl
->block_group
;
1406 fs_info
= block_group
->fs_info
;
1407 root
= fs_info
->free_space_root
;
1409 end
= block_group
->key
.objectid
+ block_group
->key
.offset
;
1412 ret
= btrfs_next_item(root
, path
);
1418 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
1420 if (key
.type
== BTRFS_FREE_SPACE_INFO_KEY
)
1423 ASSERT(key
.type
== BTRFS_FREE_SPACE_BITMAP_KEY
);
1424 ASSERT(key
.objectid
< end
&& key
.objectid
+ key
.offset
<= end
);
1426 caching_ctl
->progress
= key
.objectid
;
1428 offset
= key
.objectid
;
1429 while (offset
< key
.objectid
+ key
.offset
) {
1430 bit
= free_space_test_bit(block_group
, path
, offset
);
1431 if (prev_bit
== 0 && bit
== 1) {
1432 extent_start
= offset
;
1433 } else if (prev_bit
== 1 && bit
== 0) {
1434 total_found
+= add_new_free_space(block_group
,
1437 if (total_found
> CACHING_CTL_WAKE_UP
) {
1439 wake_up(&caching_ctl
->wait
);
1444 offset
+= fs_info
->sectorsize
;
1447 if (prev_bit
== 1) {
1448 total_found
+= add_new_free_space(block_group
, extent_start
,
1453 if (extent_count
!= expected_extent_count
) {
1455 "incorrect extent count for %llu; counted %u, expected %u",
1456 block_group
->key
.objectid
, extent_count
,
1457 expected_extent_count
);
1463 caching_ctl
->progress
= (u64
)-1;
1470 static int load_free_space_extents(struct btrfs_caching_control
*caching_ctl
,
1471 struct btrfs_path
*path
,
1472 u32 expected_extent_count
)
1474 struct btrfs_block_group_cache
*block_group
;
1475 struct btrfs_fs_info
*fs_info
;
1476 struct btrfs_root
*root
;
1477 struct btrfs_key key
;
1479 u64 total_found
= 0;
1480 u32 extent_count
= 0;
1483 block_group
= caching_ctl
->block_group
;
1484 fs_info
= block_group
->fs_info
;
1485 root
= fs_info
->free_space_root
;
1487 end
= block_group
->key
.objectid
+ block_group
->key
.offset
;
1490 ret
= btrfs_next_item(root
, path
);
1496 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
1498 if (key
.type
== BTRFS_FREE_SPACE_INFO_KEY
)
1501 ASSERT(key
.type
== BTRFS_FREE_SPACE_EXTENT_KEY
);
1502 ASSERT(key
.objectid
< end
&& key
.objectid
+ key
.offset
<= end
);
1504 caching_ctl
->progress
= key
.objectid
;
1506 total_found
+= add_new_free_space(block_group
, key
.objectid
,
1507 key
.objectid
+ key
.offset
);
1508 if (total_found
> CACHING_CTL_WAKE_UP
) {
1510 wake_up(&caching_ctl
->wait
);
1515 if (extent_count
!= expected_extent_count
) {
1517 "incorrect extent count for %llu; counted %u, expected %u",
1518 block_group
->key
.objectid
, extent_count
,
1519 expected_extent_count
);
1525 caching_ctl
->progress
= (u64
)-1;
1532 int load_free_space_tree(struct btrfs_caching_control
*caching_ctl
)
1534 struct btrfs_block_group_cache
*block_group
;
1535 struct btrfs_free_space_info
*info
;
1536 struct btrfs_path
*path
;
1537 u32 extent_count
, flags
;
1540 block_group
= caching_ctl
->block_group
;
1542 path
= btrfs_alloc_path();
1547 * Just like caching_thread() doesn't want to deadlock on the extent
1548 * tree, we don't want to deadlock on the free space tree.
1550 path
->skip_locking
= 1;
1551 path
->search_commit_root
= 1;
1552 path
->reada
= READA_FORWARD
;
1554 info
= search_free_space_info(NULL
, block_group
, path
, 0);
1556 ret
= PTR_ERR(info
);
1559 extent_count
= btrfs_free_space_extent_count(path
->nodes
[0], info
);
1560 flags
= btrfs_free_space_flags(path
->nodes
[0], info
);
1563 * We left path pointing to the free space info item, so now
1564 * load_free_space_foo can just iterate through the free space tree from
1567 if (flags
& BTRFS_FREE_SPACE_USING_BITMAPS
)
1568 ret
= load_free_space_bitmaps(caching_ctl
, path
, extent_count
);
1570 ret
= load_free_space_extents(caching_ctl
, path
, extent_count
);
1573 btrfs_free_path(path
);