1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (C) 2015 Facebook. All rights reserved.
6 #include <linux/kernel.h>
7 #include <linux/sched/mm.h>
11 #include "free-space-tree.h"
12 #include "transaction.h"
14 static int __add_block_group_free_space(struct btrfs_trans_handle
*trans
,
15 struct btrfs_fs_info
*fs_info
,
16 struct btrfs_block_group_cache
*block_group
,
17 struct btrfs_path
*path
);
19 void set_free_space_tree_thresholds(struct btrfs_block_group_cache
*cache
)
23 u64 num_bitmaps
, total_bitmap_size
;
26 * We convert to bitmaps when the disk space required for using extents
27 * exceeds that required for using bitmaps.
29 bitmap_range
= cache
->fs_info
->sectorsize
* BTRFS_FREE_SPACE_BITMAP_BITS
;
30 num_bitmaps
= div_u64(cache
->key
.offset
+ bitmap_range
- 1,
32 bitmap_size
= sizeof(struct btrfs_item
) + BTRFS_FREE_SPACE_BITMAP_SIZE
;
33 total_bitmap_size
= num_bitmaps
* bitmap_size
;
34 cache
->bitmap_high_thresh
= div_u64(total_bitmap_size
,
35 sizeof(struct btrfs_item
));
38 * We allow for a small buffer between the high threshold and low
39 * threshold to avoid thrashing back and forth between the two formats.
41 if (cache
->bitmap_high_thresh
> 100)
42 cache
->bitmap_low_thresh
= cache
->bitmap_high_thresh
- 100;
44 cache
->bitmap_low_thresh
= 0;
47 static int add_new_free_space_info(struct btrfs_trans_handle
*trans
,
48 struct btrfs_fs_info
*fs_info
,
49 struct btrfs_block_group_cache
*block_group
,
50 struct btrfs_path
*path
)
52 struct btrfs_root
*root
= fs_info
->free_space_root
;
53 struct btrfs_free_space_info
*info
;
55 struct extent_buffer
*leaf
;
58 key
.objectid
= block_group
->key
.objectid
;
59 key
.type
= BTRFS_FREE_SPACE_INFO_KEY
;
60 key
.offset
= block_group
->key
.offset
;
62 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, sizeof(*info
));
66 leaf
= path
->nodes
[0];
67 info
= btrfs_item_ptr(leaf
, path
->slots
[0],
68 struct btrfs_free_space_info
);
69 btrfs_set_free_space_extent_count(leaf
, info
, 0);
70 btrfs_set_free_space_flags(leaf
, info
, 0);
71 btrfs_mark_buffer_dirty(leaf
);
75 btrfs_release_path(path
);
79 struct btrfs_free_space_info
*
80 search_free_space_info(struct btrfs_trans_handle
*trans
,
81 struct btrfs_fs_info
*fs_info
,
82 struct btrfs_block_group_cache
*block_group
,
83 struct btrfs_path
*path
, int cow
)
85 struct btrfs_root
*root
= fs_info
->free_space_root
;
89 key
.objectid
= block_group
->key
.objectid
;
90 key
.type
= BTRFS_FREE_SPACE_INFO_KEY
;
91 key
.offset
= block_group
->key
.offset
;
93 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, cow
);
97 btrfs_warn(fs_info
, "missing free space info for %llu",
98 block_group
->key
.objectid
);
100 return ERR_PTR(-ENOENT
);
103 return btrfs_item_ptr(path
->nodes
[0], path
->slots
[0],
104 struct btrfs_free_space_info
);
108 * btrfs_search_slot() but we're looking for the greatest key less than the
111 static int btrfs_search_prev_slot(struct btrfs_trans_handle
*trans
,
112 struct btrfs_root
*root
,
113 struct btrfs_key
*key
, struct btrfs_path
*p
,
114 int ins_len
, int cow
)
118 ret
= btrfs_search_slot(trans
, root
, key
, p
, ins_len
, cow
);
127 if (p
->slots
[0] == 0) {
136 static inline u32
free_space_bitmap_size(u64 size
, u32 sectorsize
)
138 return DIV_ROUND_UP((u32
)div_u64(size
, sectorsize
), BITS_PER_BYTE
);
141 static u8
*alloc_bitmap(u32 bitmap_size
)
144 unsigned int nofs_flag
;
147 * GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse
148 * into the filesystem as the free space bitmap can be modified in the
149 * critical section of a transaction commit.
151 * TODO: push the memalloc_nofs_{save,restore}() to the caller where we
152 * know that recursion is unsafe.
154 nofs_flag
= memalloc_nofs_save();
155 ret
= kvzalloc(bitmap_size
, GFP_KERNEL
);
156 memalloc_nofs_restore(nofs_flag
);
160 int convert_free_space_to_bitmaps(struct btrfs_trans_handle
*trans
,
161 struct btrfs_fs_info
*fs_info
,
162 struct btrfs_block_group_cache
*block_group
,
163 struct btrfs_path
*path
)
165 struct btrfs_root
*root
= fs_info
->free_space_root
;
166 struct btrfs_free_space_info
*info
;
167 struct btrfs_key key
, found_key
;
168 struct extent_buffer
*leaf
;
169 u8
*bitmap
, *bitmap_cursor
;
172 u32 bitmap_size
, flags
, expected_extent_count
;
173 u32 extent_count
= 0;
177 bitmap_size
= free_space_bitmap_size(block_group
->key
.offset
,
178 fs_info
->sectorsize
);
179 bitmap
= alloc_bitmap(bitmap_size
);
185 start
= block_group
->key
.objectid
;
186 end
= block_group
->key
.objectid
+ block_group
->key
.offset
;
188 key
.objectid
= end
- 1;
190 key
.offset
= (u64
)-1;
193 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
197 leaf
= path
->nodes
[0];
200 while (path
->slots
[0] > 0) {
201 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0] - 1);
203 if (found_key
.type
== BTRFS_FREE_SPACE_INFO_KEY
) {
204 ASSERT(found_key
.objectid
== block_group
->key
.objectid
);
205 ASSERT(found_key
.offset
== block_group
->key
.offset
);
208 } else if (found_key
.type
== BTRFS_FREE_SPACE_EXTENT_KEY
) {
211 ASSERT(found_key
.objectid
>= start
);
212 ASSERT(found_key
.objectid
< end
);
213 ASSERT(found_key
.objectid
+ found_key
.offset
<= end
);
215 first
= div_u64(found_key
.objectid
- start
,
216 fs_info
->sectorsize
);
217 last
= div_u64(found_key
.objectid
+ found_key
.offset
- start
,
218 fs_info
->sectorsize
);
219 le_bitmap_set(bitmap
, first
, last
- first
);
229 ret
= btrfs_del_items(trans
, root
, path
, path
->slots
[0], nr
);
232 btrfs_release_path(path
);
235 info
= search_free_space_info(trans
, fs_info
, block_group
, path
, 1);
240 leaf
= path
->nodes
[0];
241 flags
= btrfs_free_space_flags(leaf
, info
);
242 flags
|= BTRFS_FREE_SPACE_USING_BITMAPS
;
243 btrfs_set_free_space_flags(leaf
, info
, flags
);
244 expected_extent_count
= btrfs_free_space_extent_count(leaf
, info
);
245 btrfs_mark_buffer_dirty(leaf
);
246 btrfs_release_path(path
);
248 if (extent_count
!= expected_extent_count
) {
250 "incorrect extent count for %llu; counted %u, expected %u",
251 block_group
->key
.objectid
, extent_count
,
252 expected_extent_count
);
258 bitmap_cursor
= bitmap
;
259 bitmap_range
= fs_info
->sectorsize
* BTRFS_FREE_SPACE_BITMAP_BITS
;
266 extent_size
= min(end
- i
, bitmap_range
);
267 data_size
= free_space_bitmap_size(extent_size
,
268 fs_info
->sectorsize
);
271 key
.type
= BTRFS_FREE_SPACE_BITMAP_KEY
;
272 key
.offset
= extent_size
;
274 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
,
279 leaf
= path
->nodes
[0];
280 ptr
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
281 write_extent_buffer(leaf
, bitmap_cursor
, ptr
,
283 btrfs_mark_buffer_dirty(leaf
);
284 btrfs_release_path(path
);
287 bitmap_cursor
+= data_size
;
294 btrfs_abort_transaction(trans
, ret
);
298 int convert_free_space_to_extents(struct btrfs_trans_handle
*trans
,
299 struct btrfs_fs_info
*fs_info
,
300 struct btrfs_block_group_cache
*block_group
,
301 struct btrfs_path
*path
)
303 struct btrfs_root
*root
= fs_info
->free_space_root
;
304 struct btrfs_free_space_info
*info
;
305 struct btrfs_key key
, found_key
;
306 struct extent_buffer
*leaf
;
309 /* Initialize to silence GCC. */
310 u64 extent_start
= 0;
312 u32 bitmap_size
, flags
, expected_extent_count
;
313 int prev_bit
= 0, bit
, bitnr
;
314 u32 extent_count
= 0;
318 bitmap_size
= free_space_bitmap_size(block_group
->key
.offset
,
319 fs_info
->sectorsize
);
320 bitmap
= alloc_bitmap(bitmap_size
);
326 start
= block_group
->key
.objectid
;
327 end
= block_group
->key
.objectid
+ block_group
->key
.offset
;
329 key
.objectid
= end
- 1;
331 key
.offset
= (u64
)-1;
334 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
338 leaf
= path
->nodes
[0];
341 while (path
->slots
[0] > 0) {
342 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0] - 1);
344 if (found_key
.type
== BTRFS_FREE_SPACE_INFO_KEY
) {
345 ASSERT(found_key
.objectid
== block_group
->key
.objectid
);
346 ASSERT(found_key
.offset
== block_group
->key
.offset
);
349 } else if (found_key
.type
== BTRFS_FREE_SPACE_BITMAP_KEY
) {
352 u32 bitmap_pos
, data_size
;
354 ASSERT(found_key
.objectid
>= start
);
355 ASSERT(found_key
.objectid
< end
);
356 ASSERT(found_key
.objectid
+ found_key
.offset
<= end
);
358 bitmap_pos
= div_u64(found_key
.objectid
- start
,
359 fs_info
->sectorsize
*
361 bitmap_cursor
= bitmap
+ bitmap_pos
;
362 data_size
= free_space_bitmap_size(found_key
.offset
,
363 fs_info
->sectorsize
);
365 ptr
= btrfs_item_ptr_offset(leaf
, path
->slots
[0] - 1);
366 read_extent_buffer(leaf
, bitmap_cursor
, ptr
,
376 ret
= btrfs_del_items(trans
, root
, path
, path
->slots
[0], nr
);
379 btrfs_release_path(path
);
382 info
= search_free_space_info(trans
, fs_info
, block_group
, path
, 1);
387 leaf
= path
->nodes
[0];
388 flags
= btrfs_free_space_flags(leaf
, info
);
389 flags
&= ~BTRFS_FREE_SPACE_USING_BITMAPS
;
390 btrfs_set_free_space_flags(leaf
, info
, flags
);
391 expected_extent_count
= btrfs_free_space_extent_count(leaf
, info
);
392 btrfs_mark_buffer_dirty(leaf
);
393 btrfs_release_path(path
);
397 while (offset
< end
) {
398 bit
= !!le_test_bit(bitnr
, bitmap
);
399 if (prev_bit
== 0 && bit
== 1) {
400 extent_start
= offset
;
401 } else if (prev_bit
== 1 && bit
== 0) {
402 key
.objectid
= extent_start
;
403 key
.type
= BTRFS_FREE_SPACE_EXTENT_KEY
;
404 key
.offset
= offset
- extent_start
;
406 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, 0);
409 btrfs_release_path(path
);
414 offset
+= fs_info
->sectorsize
;
418 key
.objectid
= extent_start
;
419 key
.type
= BTRFS_FREE_SPACE_EXTENT_KEY
;
420 key
.offset
= end
- extent_start
;
422 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, 0);
425 btrfs_release_path(path
);
430 if (extent_count
!= expected_extent_count
) {
432 "incorrect extent count for %llu; counted %u, expected %u",
433 block_group
->key
.objectid
, extent_count
,
434 expected_extent_count
);
444 btrfs_abort_transaction(trans
, ret
);
448 static int update_free_space_extent_count(struct btrfs_trans_handle
*trans
,
449 struct btrfs_fs_info
*fs_info
,
450 struct btrfs_block_group_cache
*block_group
,
451 struct btrfs_path
*path
,
454 struct btrfs_free_space_info
*info
;
459 if (new_extents
== 0)
462 info
= search_free_space_info(trans
, fs_info
, block_group
, path
, 1);
467 flags
= btrfs_free_space_flags(path
->nodes
[0], info
);
468 extent_count
= btrfs_free_space_extent_count(path
->nodes
[0], info
);
470 extent_count
+= new_extents
;
471 btrfs_set_free_space_extent_count(path
->nodes
[0], info
, extent_count
);
472 btrfs_mark_buffer_dirty(path
->nodes
[0]);
473 btrfs_release_path(path
);
475 if (!(flags
& BTRFS_FREE_SPACE_USING_BITMAPS
) &&
476 extent_count
> block_group
->bitmap_high_thresh
) {
477 ret
= convert_free_space_to_bitmaps(trans
, fs_info
, block_group
,
479 } else if ((flags
& BTRFS_FREE_SPACE_USING_BITMAPS
) &&
480 extent_count
< block_group
->bitmap_low_thresh
) {
481 ret
= convert_free_space_to_extents(trans
, fs_info
, block_group
,
489 int free_space_test_bit(struct btrfs_block_group_cache
*block_group
,
490 struct btrfs_path
*path
, u64 offset
)
492 struct extent_buffer
*leaf
;
493 struct btrfs_key key
;
494 u64 found_start
, found_end
;
495 unsigned long ptr
, i
;
497 leaf
= path
->nodes
[0];
498 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
499 ASSERT(key
.type
== BTRFS_FREE_SPACE_BITMAP_KEY
);
501 found_start
= key
.objectid
;
502 found_end
= key
.objectid
+ key
.offset
;
503 ASSERT(offset
>= found_start
&& offset
< found_end
);
505 ptr
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
506 i
= div_u64(offset
- found_start
,
507 block_group
->fs_info
->sectorsize
);
508 return !!extent_buffer_test_bit(leaf
, ptr
, i
);
511 static void free_space_set_bits(struct btrfs_block_group_cache
*block_group
,
512 struct btrfs_path
*path
, u64
*start
, u64
*size
,
515 struct btrfs_fs_info
*fs_info
= block_group
->fs_info
;
516 struct extent_buffer
*leaf
;
517 struct btrfs_key key
;
518 u64 end
= *start
+ *size
;
519 u64 found_start
, found_end
;
520 unsigned long ptr
, first
, last
;
522 leaf
= path
->nodes
[0];
523 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
524 ASSERT(key
.type
== BTRFS_FREE_SPACE_BITMAP_KEY
);
526 found_start
= key
.objectid
;
527 found_end
= key
.objectid
+ key
.offset
;
528 ASSERT(*start
>= found_start
&& *start
< found_end
);
529 ASSERT(end
> found_start
);
534 ptr
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
535 first
= div_u64(*start
- found_start
, fs_info
->sectorsize
);
536 last
= div_u64(end
- found_start
, fs_info
->sectorsize
);
538 extent_buffer_bitmap_set(leaf
, ptr
, first
, last
- first
);
540 extent_buffer_bitmap_clear(leaf
, ptr
, first
, last
- first
);
541 btrfs_mark_buffer_dirty(leaf
);
543 *size
-= end
- *start
;
548 * We can't use btrfs_next_item() in modify_free_space_bitmap() because
549 * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy
550 * tree walking in btrfs_next_leaf() anyways because we know exactly what we're
553 static int free_space_next_bitmap(struct btrfs_trans_handle
*trans
,
554 struct btrfs_root
*root
, struct btrfs_path
*p
)
556 struct btrfs_key key
;
558 if (p
->slots
[0] + 1 < btrfs_header_nritems(p
->nodes
[0])) {
563 btrfs_item_key_to_cpu(p
->nodes
[0], &key
, p
->slots
[0]);
564 btrfs_release_path(p
);
566 key
.objectid
+= key
.offset
;
568 key
.offset
= (u64
)-1;
570 return btrfs_search_prev_slot(trans
, root
, &key
, p
, 0, 1);
574 * If remove is 1, then we are removing free space, thus clearing bits in the
575 * bitmap. If remove is 0, then we are adding free space, thus setting bits in
578 static int modify_free_space_bitmap(struct btrfs_trans_handle
*trans
,
579 struct btrfs_fs_info
*fs_info
,
580 struct btrfs_block_group_cache
*block_group
,
581 struct btrfs_path
*path
,
582 u64 start
, u64 size
, int remove
)
584 struct btrfs_root
*root
= fs_info
->free_space_root
;
585 struct btrfs_key key
;
586 u64 end
= start
+ size
;
587 u64 cur_start
, cur_size
;
588 int prev_bit
, next_bit
;
593 * Read the bit for the block immediately before the extent of space if
594 * that block is within the block group.
596 if (start
> block_group
->key
.objectid
) {
597 u64 prev_block
= start
- block_group
->fs_info
->sectorsize
;
599 key
.objectid
= prev_block
;
601 key
.offset
= (u64
)-1;
603 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, 0, 1);
607 prev_bit
= free_space_test_bit(block_group
, path
, prev_block
);
609 /* The previous block may have been in the previous bitmap. */
610 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
611 if (start
>= key
.objectid
+ key
.offset
) {
612 ret
= free_space_next_bitmap(trans
, root
, path
);
617 key
.objectid
= start
;
619 key
.offset
= (u64
)-1;
621 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, 0, 1);
629 * Iterate over all of the bitmaps overlapped by the extent of space,
630 * clearing/setting bits as required.
635 free_space_set_bits(block_group
, path
, &cur_start
, &cur_size
,
639 ret
= free_space_next_bitmap(trans
, root
, path
);
645 * Read the bit for the block immediately after the extent of space if
646 * that block is within the block group.
648 if (end
< block_group
->key
.objectid
+ block_group
->key
.offset
) {
649 /* The next block may be in the next bitmap. */
650 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
651 if (end
>= key
.objectid
+ key
.offset
) {
652 ret
= free_space_next_bitmap(trans
, root
, path
);
657 next_bit
= free_space_test_bit(block_group
, path
, end
);
665 /* Leftover on the left. */
669 /* Leftover on the right. */
675 /* Merging with neighbor on the left. */
679 /* Merging with neighbor on the right. */
684 btrfs_release_path(path
);
685 ret
= update_free_space_extent_count(trans
, fs_info
, block_group
, path
,
692 static int remove_free_space_extent(struct btrfs_trans_handle
*trans
,
693 struct btrfs_fs_info
*fs_info
,
694 struct btrfs_block_group_cache
*block_group
,
695 struct btrfs_path
*path
,
698 struct btrfs_root
*root
= fs_info
->free_space_root
;
699 struct btrfs_key key
;
700 u64 found_start
, found_end
;
701 u64 end
= start
+ size
;
702 int new_extents
= -1;
705 key
.objectid
= start
;
707 key
.offset
= (u64
)-1;
709 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
713 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
715 ASSERT(key
.type
== BTRFS_FREE_SPACE_EXTENT_KEY
);
717 found_start
= key
.objectid
;
718 found_end
= key
.objectid
+ key
.offset
;
719 ASSERT(start
>= found_start
&& end
<= found_end
);
722 * Okay, now that we've found the free space extent which contains the
723 * free space that we are removing, there are four cases:
725 * 1. We're using the whole extent: delete the key we found and
726 * decrement the free space extent count.
727 * 2. We are using part of the extent starting at the beginning: delete
728 * the key we found and insert a new key representing the leftover at
729 * the end. There is no net change in the number of extents.
730 * 3. We are using part of the extent ending at the end: delete the key
731 * we found and insert a new key representing the leftover at the
732 * beginning. There is no net change in the number of extents.
733 * 4. We are using part of the extent in the middle: delete the key we
734 * found and insert two new keys representing the leftovers on each
735 * side. Where we used to have one extent, we now have two, so increment
736 * the extent count. We may need to convert the block group to bitmaps
740 /* Delete the existing key (cases 1-4). */
741 ret
= btrfs_del_item(trans
, root
, path
);
745 /* Add a key for leftovers at the beginning (cases 3 and 4). */
746 if (start
> found_start
) {
747 key
.objectid
= found_start
;
748 key
.type
= BTRFS_FREE_SPACE_EXTENT_KEY
;
749 key
.offset
= start
- found_start
;
751 btrfs_release_path(path
);
752 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, 0);
758 /* Add a key for leftovers at the end (cases 2 and 4). */
759 if (end
< found_end
) {
761 key
.type
= BTRFS_FREE_SPACE_EXTENT_KEY
;
762 key
.offset
= found_end
- end
;
764 btrfs_release_path(path
);
765 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, 0);
771 btrfs_release_path(path
);
772 ret
= update_free_space_extent_count(trans
, fs_info
, block_group
, path
,
779 int __remove_from_free_space_tree(struct btrfs_trans_handle
*trans
,
780 struct btrfs_fs_info
*fs_info
,
781 struct btrfs_block_group_cache
*block_group
,
782 struct btrfs_path
*path
, u64 start
, u64 size
)
784 struct btrfs_free_space_info
*info
;
788 if (block_group
->needs_free_space
) {
789 ret
= __add_block_group_free_space(trans
, fs_info
, block_group
,
795 info
= search_free_space_info(NULL
, fs_info
, block_group
, path
, 0);
797 return PTR_ERR(info
);
798 flags
= btrfs_free_space_flags(path
->nodes
[0], info
);
799 btrfs_release_path(path
);
801 if (flags
& BTRFS_FREE_SPACE_USING_BITMAPS
) {
802 return modify_free_space_bitmap(trans
, fs_info
, block_group
,
803 path
, start
, size
, 1);
805 return remove_free_space_extent(trans
, fs_info
, block_group
,
810 int remove_from_free_space_tree(struct btrfs_trans_handle
*trans
,
811 struct btrfs_fs_info
*fs_info
,
814 struct btrfs_block_group_cache
*block_group
;
815 struct btrfs_path
*path
;
818 if (!btrfs_fs_compat_ro(fs_info
, FREE_SPACE_TREE
))
821 path
= btrfs_alloc_path();
827 block_group
= btrfs_lookup_block_group(fs_info
, start
);
834 mutex_lock(&block_group
->free_space_lock
);
835 ret
= __remove_from_free_space_tree(trans
, fs_info
, block_group
, path
,
837 mutex_unlock(&block_group
->free_space_lock
);
839 btrfs_put_block_group(block_group
);
841 btrfs_free_path(path
);
843 btrfs_abort_transaction(trans
, ret
);
847 static int add_free_space_extent(struct btrfs_trans_handle
*trans
,
848 struct btrfs_fs_info
*fs_info
,
849 struct btrfs_block_group_cache
*block_group
,
850 struct btrfs_path
*path
,
853 struct btrfs_root
*root
= fs_info
->free_space_root
;
854 struct btrfs_key key
, new_key
;
855 u64 found_start
, found_end
;
856 u64 end
= start
+ size
;
861 * We are adding a new extent of free space, but we need to merge
862 * extents. There are four cases here:
864 * 1. The new extent does not have any immediate neighbors to merge
865 * with: add the new key and increment the free space extent count. We
866 * may need to convert the block group to bitmaps as a result.
867 * 2. The new extent has an immediate neighbor before it: remove the
868 * previous key and insert a new key combining both of them. There is no
869 * net change in the number of extents.
870 * 3. The new extent has an immediate neighbor after it: remove the next
871 * key and insert a new key combining both of them. There is no net
872 * change in the number of extents.
873 * 4. The new extent has immediate neighbors on both sides: remove both
874 * of the keys and insert a new key combining all of them. Where we used
875 * to have two extents, we now have one, so decrement the extent count.
878 new_key
.objectid
= start
;
879 new_key
.type
= BTRFS_FREE_SPACE_EXTENT_KEY
;
880 new_key
.offset
= size
;
882 /* Search for a neighbor on the left. */
883 if (start
== block_group
->key
.objectid
)
885 key
.objectid
= start
- 1;
887 key
.offset
= (u64
)-1;
889 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
893 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
895 if (key
.type
!= BTRFS_FREE_SPACE_EXTENT_KEY
) {
896 ASSERT(key
.type
== BTRFS_FREE_SPACE_INFO_KEY
);
897 btrfs_release_path(path
);
901 found_start
= key
.objectid
;
902 found_end
= key
.objectid
+ key
.offset
;
903 ASSERT(found_start
>= block_group
->key
.objectid
&&
904 found_end
> block_group
->key
.objectid
);
905 ASSERT(found_start
< start
&& found_end
<= start
);
908 * Delete the neighbor on the left and absorb it into the new key (cases
911 if (found_end
== start
) {
912 ret
= btrfs_del_item(trans
, root
, path
);
915 new_key
.objectid
= found_start
;
916 new_key
.offset
+= key
.offset
;
919 btrfs_release_path(path
);
922 /* Search for a neighbor on the right. */
923 if (end
== block_group
->key
.objectid
+ block_group
->key
.offset
)
927 key
.offset
= (u64
)-1;
929 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
933 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
935 if (key
.type
!= BTRFS_FREE_SPACE_EXTENT_KEY
) {
936 ASSERT(key
.type
== BTRFS_FREE_SPACE_INFO_KEY
);
937 btrfs_release_path(path
);
941 found_start
= key
.objectid
;
942 found_end
= key
.objectid
+ key
.offset
;
943 ASSERT(found_start
>= block_group
->key
.objectid
&&
944 found_end
> block_group
->key
.objectid
);
945 ASSERT((found_start
< start
&& found_end
<= start
) ||
946 (found_start
>= end
&& found_end
> end
));
949 * Delete the neighbor on the right and absorb it into the new key
952 if (found_start
== end
) {
953 ret
= btrfs_del_item(trans
, root
, path
);
956 new_key
.offset
+= key
.offset
;
959 btrfs_release_path(path
);
962 /* Insert the new key (cases 1-4). */
963 ret
= btrfs_insert_empty_item(trans
, root
, path
, &new_key
, 0);
967 btrfs_release_path(path
);
968 ret
= update_free_space_extent_count(trans
, fs_info
, block_group
, path
,
975 int __add_to_free_space_tree(struct btrfs_trans_handle
*trans
,
976 struct btrfs_fs_info
*fs_info
,
977 struct btrfs_block_group_cache
*block_group
,
978 struct btrfs_path
*path
, u64 start
, u64 size
)
980 struct btrfs_free_space_info
*info
;
984 if (block_group
->needs_free_space
) {
985 ret
= __add_block_group_free_space(trans
, fs_info
, block_group
,
991 info
= search_free_space_info(NULL
, fs_info
, block_group
, path
, 0);
993 return PTR_ERR(info
);
994 flags
= btrfs_free_space_flags(path
->nodes
[0], info
);
995 btrfs_release_path(path
);
997 if (flags
& BTRFS_FREE_SPACE_USING_BITMAPS
) {
998 return modify_free_space_bitmap(trans
, fs_info
, block_group
,
999 path
, start
, size
, 0);
1001 return add_free_space_extent(trans
, fs_info
, block_group
, path
,
1006 int add_to_free_space_tree(struct btrfs_trans_handle
*trans
,
1007 struct btrfs_fs_info
*fs_info
,
1008 u64 start
, u64 size
)
1010 struct btrfs_block_group_cache
*block_group
;
1011 struct btrfs_path
*path
;
1014 if (!btrfs_fs_compat_ro(fs_info
, FREE_SPACE_TREE
))
1017 path
= btrfs_alloc_path();
1023 block_group
= btrfs_lookup_block_group(fs_info
, start
);
1030 mutex_lock(&block_group
->free_space_lock
);
1031 ret
= __add_to_free_space_tree(trans
, fs_info
, block_group
, path
, start
,
1033 mutex_unlock(&block_group
->free_space_lock
);
1035 btrfs_put_block_group(block_group
);
1037 btrfs_free_path(path
);
1039 btrfs_abort_transaction(trans
, ret
);
1044 * Populate the free space tree by walking the extent tree. Operations on the
1045 * extent tree that happen as a result of writes to the free space tree will go
1046 * through the normal add/remove hooks.
1048 static int populate_free_space_tree(struct btrfs_trans_handle
*trans
,
1049 struct btrfs_fs_info
*fs_info
,
1050 struct btrfs_block_group_cache
*block_group
)
1052 struct btrfs_root
*extent_root
= fs_info
->extent_root
;
1053 struct btrfs_path
*path
, *path2
;
1054 struct btrfs_key key
;
1058 path
= btrfs_alloc_path();
1061 path
->reada
= READA_FORWARD
;
1063 path2
= btrfs_alloc_path();
1065 btrfs_free_path(path
);
1069 ret
= add_new_free_space_info(trans
, fs_info
, block_group
, path2
);
1073 mutex_lock(&block_group
->free_space_lock
);
1076 * Iterate through all of the extent and metadata items in this block
1077 * group, adding the free space between them and the free space at the
1078 * end. Note that EXTENT_ITEM and METADATA_ITEM are less than
1079 * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's
1082 key
.objectid
= block_group
->key
.objectid
;
1083 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
1086 ret
= btrfs_search_slot_for_read(extent_root
, &key
, path
, 1, 0);
1091 start
= block_group
->key
.objectid
;
1092 end
= block_group
->key
.objectid
+ block_group
->key
.offset
;
1094 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
1096 if (key
.type
== BTRFS_EXTENT_ITEM_KEY
||
1097 key
.type
== BTRFS_METADATA_ITEM_KEY
) {
1098 if (key
.objectid
>= end
)
1101 if (start
< key
.objectid
) {
1102 ret
= __add_to_free_space_tree(trans
, fs_info
,
1110 start
= key
.objectid
;
1111 if (key
.type
== BTRFS_METADATA_ITEM_KEY
)
1112 start
+= fs_info
->nodesize
;
1114 start
+= key
.offset
;
1115 } else if (key
.type
== BTRFS_BLOCK_GROUP_ITEM_KEY
) {
1116 if (key
.objectid
!= block_group
->key
.objectid
)
1120 ret
= btrfs_next_item(extent_root
, path
);
1127 ret
= __add_to_free_space_tree(trans
, fs_info
, block_group
,
1128 path2
, start
, end
- start
);
1135 mutex_unlock(&block_group
->free_space_lock
);
1137 btrfs_free_path(path2
);
1138 btrfs_free_path(path
);
1142 int btrfs_create_free_space_tree(struct btrfs_fs_info
*fs_info
)
1144 struct btrfs_trans_handle
*trans
;
1145 struct btrfs_root
*tree_root
= fs_info
->tree_root
;
1146 struct btrfs_root
*free_space_root
;
1147 struct btrfs_block_group_cache
*block_group
;
1148 struct rb_node
*node
;
1151 trans
= btrfs_start_transaction(tree_root
, 0);
1153 return PTR_ERR(trans
);
1155 set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE
, &fs_info
->flags
);
1156 free_space_root
= btrfs_create_tree(trans
, fs_info
,
1157 BTRFS_FREE_SPACE_TREE_OBJECTID
);
1158 if (IS_ERR(free_space_root
)) {
1159 ret
= PTR_ERR(free_space_root
);
1162 fs_info
->free_space_root
= free_space_root
;
1164 node
= rb_first(&fs_info
->block_group_cache_tree
);
1166 block_group
= rb_entry(node
, struct btrfs_block_group_cache
,
1168 ret
= populate_free_space_tree(trans
, fs_info
, block_group
);
1171 node
= rb_next(node
);
1174 btrfs_set_fs_compat_ro(fs_info
, FREE_SPACE_TREE
);
1175 btrfs_set_fs_compat_ro(fs_info
, FREE_SPACE_TREE_VALID
);
1176 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE
, &fs_info
->flags
);
1178 return btrfs_commit_transaction(trans
);
1181 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE
, &fs_info
->flags
);
1182 btrfs_abort_transaction(trans
, ret
);
1183 btrfs_end_transaction(trans
);
1187 static int clear_free_space_tree(struct btrfs_trans_handle
*trans
,
1188 struct btrfs_root
*root
)
1190 struct btrfs_path
*path
;
1191 struct btrfs_key key
;
1195 path
= btrfs_alloc_path();
1199 path
->leave_spinning
= 1;
1206 ret
= btrfs_search_slot(trans
, root
, &key
, path
, -1, 1);
1210 nr
= btrfs_header_nritems(path
->nodes
[0]);
1215 ret
= btrfs_del_items(trans
, root
, path
, 0, nr
);
1219 btrfs_release_path(path
);
1224 btrfs_free_path(path
);
1228 int btrfs_clear_free_space_tree(struct btrfs_fs_info
*fs_info
)
1230 struct btrfs_trans_handle
*trans
;
1231 struct btrfs_root
*tree_root
= fs_info
->tree_root
;
1232 struct btrfs_root
*free_space_root
= fs_info
->free_space_root
;
1235 trans
= btrfs_start_transaction(tree_root
, 0);
1237 return PTR_ERR(trans
);
1239 btrfs_clear_fs_compat_ro(fs_info
, FREE_SPACE_TREE
);
1240 btrfs_clear_fs_compat_ro(fs_info
, FREE_SPACE_TREE_VALID
);
1241 fs_info
->free_space_root
= NULL
;
1243 ret
= clear_free_space_tree(trans
, free_space_root
);
1247 ret
= btrfs_del_root(trans
, fs_info
, &free_space_root
->root_key
);
1251 list_del(&free_space_root
->dirty_list
);
1253 btrfs_tree_lock(free_space_root
->node
);
1254 clean_tree_block(fs_info
, free_space_root
->node
);
1255 btrfs_tree_unlock(free_space_root
->node
);
1256 btrfs_free_tree_block(trans
, free_space_root
, free_space_root
->node
,
1259 free_extent_buffer(free_space_root
->node
);
1260 free_extent_buffer(free_space_root
->commit_root
);
1261 kfree(free_space_root
);
1263 return btrfs_commit_transaction(trans
);
1266 btrfs_abort_transaction(trans
, ret
);
1267 btrfs_end_transaction(trans
);
1271 static int __add_block_group_free_space(struct btrfs_trans_handle
*trans
,
1272 struct btrfs_fs_info
*fs_info
,
1273 struct btrfs_block_group_cache
*block_group
,
1274 struct btrfs_path
*path
)
1278 block_group
->needs_free_space
= 0;
1280 ret
= add_new_free_space_info(trans
, fs_info
, block_group
, path
);
1284 return __add_to_free_space_tree(trans
, fs_info
, block_group
, path
,
1285 block_group
->key
.objectid
,
1286 block_group
->key
.offset
);
1289 int add_block_group_free_space(struct btrfs_trans_handle
*trans
,
1290 struct btrfs_fs_info
*fs_info
,
1291 struct btrfs_block_group_cache
*block_group
)
1293 struct btrfs_path
*path
= NULL
;
1296 if (!btrfs_fs_compat_ro(fs_info
, FREE_SPACE_TREE
))
1299 mutex_lock(&block_group
->free_space_lock
);
1300 if (!block_group
->needs_free_space
)
1303 path
= btrfs_alloc_path();
1309 ret
= __add_block_group_free_space(trans
, fs_info
, block_group
, path
);
1312 btrfs_free_path(path
);
1313 mutex_unlock(&block_group
->free_space_lock
);
1315 btrfs_abort_transaction(trans
, ret
);
1319 int remove_block_group_free_space(struct btrfs_trans_handle
*trans
,
1320 struct btrfs_fs_info
*fs_info
,
1321 struct btrfs_block_group_cache
*block_group
)
1323 struct btrfs_root
*root
= fs_info
->free_space_root
;
1324 struct btrfs_path
*path
;
1325 struct btrfs_key key
, found_key
;
1326 struct extent_buffer
*leaf
;
1331 if (!btrfs_fs_compat_ro(fs_info
, FREE_SPACE_TREE
))
1334 if (block_group
->needs_free_space
) {
1335 /* We never added this block group to the free space tree. */
1339 path
= btrfs_alloc_path();
1345 start
= block_group
->key
.objectid
;
1346 end
= block_group
->key
.objectid
+ block_group
->key
.offset
;
1348 key
.objectid
= end
- 1;
1350 key
.offset
= (u64
)-1;
1353 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
1357 leaf
= path
->nodes
[0];
1360 while (path
->slots
[0] > 0) {
1361 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0] - 1);
1363 if (found_key
.type
== BTRFS_FREE_SPACE_INFO_KEY
) {
1364 ASSERT(found_key
.objectid
== block_group
->key
.objectid
);
1365 ASSERT(found_key
.offset
== block_group
->key
.offset
);
1370 } else if (found_key
.type
== BTRFS_FREE_SPACE_EXTENT_KEY
||
1371 found_key
.type
== BTRFS_FREE_SPACE_BITMAP_KEY
) {
1372 ASSERT(found_key
.objectid
>= start
);
1373 ASSERT(found_key
.objectid
< end
);
1374 ASSERT(found_key
.objectid
+ found_key
.offset
<= end
);
1382 ret
= btrfs_del_items(trans
, root
, path
, path
->slots
[0], nr
);
1385 btrfs_release_path(path
);
1390 btrfs_free_path(path
);
1392 btrfs_abort_transaction(trans
, ret
);
1396 static int load_free_space_bitmaps(struct btrfs_caching_control
*caching_ctl
,
1397 struct btrfs_path
*path
,
1398 u32 expected_extent_count
)
1400 struct btrfs_block_group_cache
*block_group
;
1401 struct btrfs_fs_info
*fs_info
;
1402 struct btrfs_root
*root
;
1403 struct btrfs_key key
;
1404 int prev_bit
= 0, bit
;
1405 /* Initialize to silence GCC. */
1406 u64 extent_start
= 0;
1408 u64 total_found
= 0;
1409 u32 extent_count
= 0;
1412 block_group
= caching_ctl
->block_group
;
1413 fs_info
= block_group
->fs_info
;
1414 root
= fs_info
->free_space_root
;
1416 end
= block_group
->key
.objectid
+ block_group
->key
.offset
;
1419 ret
= btrfs_next_item(root
, path
);
1425 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
1427 if (key
.type
== BTRFS_FREE_SPACE_INFO_KEY
)
1430 ASSERT(key
.type
== BTRFS_FREE_SPACE_BITMAP_KEY
);
1431 ASSERT(key
.objectid
< end
&& key
.objectid
+ key
.offset
<= end
);
1433 caching_ctl
->progress
= key
.objectid
;
1435 offset
= key
.objectid
;
1436 while (offset
< key
.objectid
+ key
.offset
) {
1437 bit
= free_space_test_bit(block_group
, path
, offset
);
1438 if (prev_bit
== 0 && bit
== 1) {
1439 extent_start
= offset
;
1440 } else if (prev_bit
== 1 && bit
== 0) {
1441 total_found
+= add_new_free_space(block_group
,
1445 if (total_found
> CACHING_CTL_WAKE_UP
) {
1447 wake_up(&caching_ctl
->wait
);
1452 offset
+= fs_info
->sectorsize
;
1455 if (prev_bit
== 1) {
1456 total_found
+= add_new_free_space(block_group
, fs_info
,
1461 if (extent_count
!= expected_extent_count
) {
1463 "incorrect extent count for %llu; counted %u, expected %u",
1464 block_group
->key
.objectid
, extent_count
,
1465 expected_extent_count
);
1471 caching_ctl
->progress
= (u64
)-1;
1478 static int load_free_space_extents(struct btrfs_caching_control
*caching_ctl
,
1479 struct btrfs_path
*path
,
1480 u32 expected_extent_count
)
1482 struct btrfs_block_group_cache
*block_group
;
1483 struct btrfs_fs_info
*fs_info
;
1484 struct btrfs_root
*root
;
1485 struct btrfs_key key
;
1487 u64 total_found
= 0;
1488 u32 extent_count
= 0;
1491 block_group
= caching_ctl
->block_group
;
1492 fs_info
= block_group
->fs_info
;
1493 root
= fs_info
->free_space_root
;
1495 end
= block_group
->key
.objectid
+ block_group
->key
.offset
;
1498 ret
= btrfs_next_item(root
, path
);
1504 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
1506 if (key
.type
== BTRFS_FREE_SPACE_INFO_KEY
)
1509 ASSERT(key
.type
== BTRFS_FREE_SPACE_EXTENT_KEY
);
1510 ASSERT(key
.objectid
< end
&& key
.objectid
+ key
.offset
<= end
);
1512 caching_ctl
->progress
= key
.objectid
;
1514 total_found
+= add_new_free_space(block_group
, fs_info
,
1516 key
.objectid
+ key
.offset
);
1517 if (total_found
> CACHING_CTL_WAKE_UP
) {
1519 wake_up(&caching_ctl
->wait
);
1524 if (extent_count
!= expected_extent_count
) {
1526 "incorrect extent count for %llu; counted %u, expected %u",
1527 block_group
->key
.objectid
, extent_count
,
1528 expected_extent_count
);
1534 caching_ctl
->progress
= (u64
)-1;
1541 int load_free_space_tree(struct btrfs_caching_control
*caching_ctl
)
1543 struct btrfs_block_group_cache
*block_group
;
1544 struct btrfs_fs_info
*fs_info
;
1545 struct btrfs_free_space_info
*info
;
1546 struct btrfs_path
*path
;
1547 u32 extent_count
, flags
;
1550 block_group
= caching_ctl
->block_group
;
1551 fs_info
= block_group
->fs_info
;
1553 path
= btrfs_alloc_path();
1558 * Just like caching_thread() doesn't want to deadlock on the extent
1559 * tree, we don't want to deadlock on the free space tree.
1561 path
->skip_locking
= 1;
1562 path
->search_commit_root
= 1;
1563 path
->reada
= READA_FORWARD
;
1565 info
= search_free_space_info(NULL
, fs_info
, block_group
, path
, 0);
1567 ret
= PTR_ERR(info
);
1570 extent_count
= btrfs_free_space_extent_count(path
->nodes
[0], info
);
1571 flags
= btrfs_free_space_flags(path
->nodes
[0], info
);
1574 * We left path pointing to the free space info item, so now
1575 * load_free_space_foo can just iterate through the free space tree from
1578 if (flags
& BTRFS_FREE_SPACE_USING_BITMAPS
)
1579 ret
= load_free_space_bitmaps(caching_ctl
, path
, extent_count
);
1581 ret
= load_free_space_extents(caching_ctl
, path
, extent_count
);
1584 btrfs_free_path(path
);