1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (C) 2015 Facebook. All rights reserved.
6 #include <linux/kernel.h>
7 #include <linux/sched/mm.h>
11 #include "free-space-tree.h"
12 #include "transaction.h"
13 #include "block-group.h"
15 static int __add_block_group_free_space(struct btrfs_trans_handle
*trans
,
16 struct btrfs_block_group
*block_group
,
17 struct btrfs_path
*path
);
19 void set_free_space_tree_thresholds(struct btrfs_block_group
*cache
)
23 u64 num_bitmaps
, total_bitmap_size
;
25 if (WARN_ON(cache
->length
== 0))
26 btrfs_warn(cache
->fs_info
, "block group %llu length is zero",
30 * We convert to bitmaps when the disk space required for using extents
31 * exceeds that required for using bitmaps.
33 bitmap_range
= cache
->fs_info
->sectorsize
* BTRFS_FREE_SPACE_BITMAP_BITS
;
34 num_bitmaps
= div_u64(cache
->length
+ bitmap_range
- 1, bitmap_range
);
35 bitmap_size
= sizeof(struct btrfs_item
) + BTRFS_FREE_SPACE_BITMAP_SIZE
;
36 total_bitmap_size
= num_bitmaps
* bitmap_size
;
37 cache
->bitmap_high_thresh
= div_u64(total_bitmap_size
,
38 sizeof(struct btrfs_item
));
41 * We allow for a small buffer between the high threshold and low
42 * threshold to avoid thrashing back and forth between the two formats.
44 if (cache
->bitmap_high_thresh
> 100)
45 cache
->bitmap_low_thresh
= cache
->bitmap_high_thresh
- 100;
47 cache
->bitmap_low_thresh
= 0;
50 static int add_new_free_space_info(struct btrfs_trans_handle
*trans
,
51 struct btrfs_block_group
*block_group
,
52 struct btrfs_path
*path
)
54 struct btrfs_root
*root
= trans
->fs_info
->free_space_root
;
55 struct btrfs_free_space_info
*info
;
57 struct extent_buffer
*leaf
;
60 key
.objectid
= block_group
->start
;
61 key
.type
= BTRFS_FREE_SPACE_INFO_KEY
;
62 key
.offset
= block_group
->length
;
64 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, sizeof(*info
));
68 leaf
= path
->nodes
[0];
69 info
= btrfs_item_ptr(leaf
, path
->slots
[0],
70 struct btrfs_free_space_info
);
71 btrfs_set_free_space_extent_count(leaf
, info
, 0);
72 btrfs_set_free_space_flags(leaf
, info
, 0);
73 btrfs_mark_buffer_dirty(leaf
);
77 btrfs_release_path(path
);
82 struct btrfs_free_space_info
*search_free_space_info(
83 struct btrfs_trans_handle
*trans
,
84 struct btrfs_block_group
*block_group
,
85 struct btrfs_path
*path
, int cow
)
87 struct btrfs_fs_info
*fs_info
= block_group
->fs_info
;
88 struct btrfs_root
*root
= fs_info
->free_space_root
;
92 key
.objectid
= block_group
->start
;
93 key
.type
= BTRFS_FREE_SPACE_INFO_KEY
;
94 key
.offset
= block_group
->length
;
96 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, cow
);
100 btrfs_warn(fs_info
, "missing free space info for %llu",
103 return ERR_PTR(-ENOENT
);
106 return btrfs_item_ptr(path
->nodes
[0], path
->slots
[0],
107 struct btrfs_free_space_info
);
111 * btrfs_search_slot() but we're looking for the greatest key less than the
114 static int btrfs_search_prev_slot(struct btrfs_trans_handle
*trans
,
115 struct btrfs_root
*root
,
116 struct btrfs_key
*key
, struct btrfs_path
*p
,
117 int ins_len
, int cow
)
121 ret
= btrfs_search_slot(trans
, root
, key
, p
, ins_len
, cow
);
130 if (p
->slots
[0] == 0) {
139 static inline u32
free_space_bitmap_size(const struct btrfs_fs_info
*fs_info
,
142 return DIV_ROUND_UP(size
>> fs_info
->sectorsize_bits
, BITS_PER_BYTE
);
145 static unsigned long *alloc_bitmap(u32 bitmap_size
)
148 unsigned int nofs_flag
;
149 u32 bitmap_rounded_size
= round_up(bitmap_size
, sizeof(unsigned long));
152 * GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse
153 * into the filesystem as the free space bitmap can be modified in the
154 * critical section of a transaction commit.
156 * TODO: push the memalloc_nofs_{save,restore}() to the caller where we
157 * know that recursion is unsafe.
159 nofs_flag
= memalloc_nofs_save();
160 ret
= kvzalloc(bitmap_rounded_size
, GFP_KERNEL
);
161 memalloc_nofs_restore(nofs_flag
);
165 static void le_bitmap_set(unsigned long *map
, unsigned int start
, int len
)
167 u8
*p
= ((u8
*)map
) + BIT_BYTE(start
);
168 const unsigned int size
= start
+ len
;
169 int bits_to_set
= BITS_PER_BYTE
- (start
% BITS_PER_BYTE
);
170 u8 mask_to_set
= BITMAP_FIRST_BYTE_MASK(start
);
172 while (len
- bits_to_set
>= 0) {
175 bits_to_set
= BITS_PER_BYTE
;
180 mask_to_set
&= BITMAP_LAST_BYTE_MASK(size
);
186 int convert_free_space_to_bitmaps(struct btrfs_trans_handle
*trans
,
187 struct btrfs_block_group
*block_group
,
188 struct btrfs_path
*path
)
190 struct btrfs_fs_info
*fs_info
= trans
->fs_info
;
191 struct btrfs_root
*root
= fs_info
->free_space_root
;
192 struct btrfs_free_space_info
*info
;
193 struct btrfs_key key
, found_key
;
194 struct extent_buffer
*leaf
;
195 unsigned long *bitmap
;
199 u32 bitmap_size
, flags
, expected_extent_count
;
200 u32 extent_count
= 0;
204 bitmap_size
= free_space_bitmap_size(fs_info
, block_group
->length
);
205 bitmap
= alloc_bitmap(bitmap_size
);
211 start
= block_group
->start
;
212 end
= block_group
->start
+ block_group
->length
;
214 key
.objectid
= end
- 1;
216 key
.offset
= (u64
)-1;
219 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
223 leaf
= path
->nodes
[0];
226 while (path
->slots
[0] > 0) {
227 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0] - 1);
229 if (found_key
.type
== BTRFS_FREE_SPACE_INFO_KEY
) {
230 ASSERT(found_key
.objectid
== block_group
->start
);
231 ASSERT(found_key
.offset
== block_group
->length
);
234 } else if (found_key
.type
== BTRFS_FREE_SPACE_EXTENT_KEY
) {
237 ASSERT(found_key
.objectid
>= start
);
238 ASSERT(found_key
.objectid
< end
);
239 ASSERT(found_key
.objectid
+ found_key
.offset
<= end
);
241 first
= div_u64(found_key
.objectid
- start
,
242 fs_info
->sectorsize
);
243 last
= div_u64(found_key
.objectid
+ found_key
.offset
- start
,
244 fs_info
->sectorsize
);
245 le_bitmap_set(bitmap
, first
, last
- first
);
255 ret
= btrfs_del_items(trans
, root
, path
, path
->slots
[0], nr
);
258 btrfs_release_path(path
);
261 info
= search_free_space_info(trans
, block_group
, path
, 1);
266 leaf
= path
->nodes
[0];
267 flags
= btrfs_free_space_flags(leaf
, info
);
268 flags
|= BTRFS_FREE_SPACE_USING_BITMAPS
;
269 btrfs_set_free_space_flags(leaf
, info
, flags
);
270 expected_extent_count
= btrfs_free_space_extent_count(leaf
, info
);
271 btrfs_mark_buffer_dirty(leaf
);
272 btrfs_release_path(path
);
274 if (extent_count
!= expected_extent_count
) {
276 "incorrect extent count for %llu; counted %u, expected %u",
277 block_group
->start
, extent_count
,
278 expected_extent_count
);
284 bitmap_cursor
= (char *)bitmap
;
285 bitmap_range
= fs_info
->sectorsize
* BTRFS_FREE_SPACE_BITMAP_BITS
;
292 extent_size
= min(end
- i
, bitmap_range
);
293 data_size
= free_space_bitmap_size(fs_info
, extent_size
);
296 key
.type
= BTRFS_FREE_SPACE_BITMAP_KEY
;
297 key
.offset
= extent_size
;
299 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
,
304 leaf
= path
->nodes
[0];
305 ptr
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
306 write_extent_buffer(leaf
, bitmap_cursor
, ptr
,
308 btrfs_mark_buffer_dirty(leaf
);
309 btrfs_release_path(path
);
312 bitmap_cursor
+= data_size
;
319 btrfs_abort_transaction(trans
, ret
);
324 int convert_free_space_to_extents(struct btrfs_trans_handle
*trans
,
325 struct btrfs_block_group
*block_group
,
326 struct btrfs_path
*path
)
328 struct btrfs_fs_info
*fs_info
= trans
->fs_info
;
329 struct btrfs_root
*root
= fs_info
->free_space_root
;
330 struct btrfs_free_space_info
*info
;
331 struct btrfs_key key
, found_key
;
332 struct extent_buffer
*leaf
;
333 unsigned long *bitmap
;
335 u32 bitmap_size
, flags
, expected_extent_count
;
336 unsigned long nrbits
, start_bit
, end_bit
;
337 u32 extent_count
= 0;
341 bitmap_size
= free_space_bitmap_size(fs_info
, block_group
->length
);
342 bitmap
= alloc_bitmap(bitmap_size
);
348 start
= block_group
->start
;
349 end
= block_group
->start
+ block_group
->length
;
351 key
.objectid
= end
- 1;
353 key
.offset
= (u64
)-1;
356 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
360 leaf
= path
->nodes
[0];
363 while (path
->slots
[0] > 0) {
364 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0] - 1);
366 if (found_key
.type
== BTRFS_FREE_SPACE_INFO_KEY
) {
367 ASSERT(found_key
.objectid
== block_group
->start
);
368 ASSERT(found_key
.offset
== block_group
->length
);
371 } else if (found_key
.type
== BTRFS_FREE_SPACE_BITMAP_KEY
) {
374 u32 bitmap_pos
, data_size
;
376 ASSERT(found_key
.objectid
>= start
);
377 ASSERT(found_key
.objectid
< end
);
378 ASSERT(found_key
.objectid
+ found_key
.offset
<= end
);
380 bitmap_pos
= div_u64(found_key
.objectid
- start
,
381 fs_info
->sectorsize
*
383 bitmap_cursor
= ((char *)bitmap
) + bitmap_pos
;
384 data_size
= free_space_bitmap_size(fs_info
,
387 ptr
= btrfs_item_ptr_offset(leaf
, path
->slots
[0] - 1);
388 read_extent_buffer(leaf
, bitmap_cursor
, ptr
,
398 ret
= btrfs_del_items(trans
, root
, path
, path
->slots
[0], nr
);
401 btrfs_release_path(path
);
404 info
= search_free_space_info(trans
, block_group
, path
, 1);
409 leaf
= path
->nodes
[0];
410 flags
= btrfs_free_space_flags(leaf
, info
);
411 flags
&= ~BTRFS_FREE_SPACE_USING_BITMAPS
;
412 btrfs_set_free_space_flags(leaf
, info
, flags
);
413 expected_extent_count
= btrfs_free_space_extent_count(leaf
, info
);
414 btrfs_mark_buffer_dirty(leaf
);
415 btrfs_release_path(path
);
417 nrbits
= block_group
->length
>> block_group
->fs_info
->sectorsize_bits
;
418 start_bit
= find_next_bit_le(bitmap
, nrbits
, 0);
420 while (start_bit
< nrbits
) {
421 end_bit
= find_next_zero_bit_le(bitmap
, nrbits
, start_bit
);
422 ASSERT(start_bit
< end_bit
);
424 key
.objectid
= start
+ start_bit
* block_group
->fs_info
->sectorsize
;
425 key
.type
= BTRFS_FREE_SPACE_EXTENT_KEY
;
426 key
.offset
= (end_bit
- start_bit
) * block_group
->fs_info
->sectorsize
;
428 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, 0);
431 btrfs_release_path(path
);
435 start_bit
= find_next_bit_le(bitmap
, nrbits
, end_bit
);
438 if (extent_count
!= expected_extent_count
) {
440 "incorrect extent count for %llu; counted %u, expected %u",
441 block_group
->start
, extent_count
,
442 expected_extent_count
);
452 btrfs_abort_transaction(trans
, ret
);
456 static int update_free_space_extent_count(struct btrfs_trans_handle
*trans
,
457 struct btrfs_block_group
*block_group
,
458 struct btrfs_path
*path
,
461 struct btrfs_free_space_info
*info
;
466 if (new_extents
== 0)
469 info
= search_free_space_info(trans
, block_group
, path
, 1);
474 flags
= btrfs_free_space_flags(path
->nodes
[0], info
);
475 extent_count
= btrfs_free_space_extent_count(path
->nodes
[0], info
);
477 extent_count
+= new_extents
;
478 btrfs_set_free_space_extent_count(path
->nodes
[0], info
, extent_count
);
479 btrfs_mark_buffer_dirty(path
->nodes
[0]);
480 btrfs_release_path(path
);
482 if (!(flags
& BTRFS_FREE_SPACE_USING_BITMAPS
) &&
483 extent_count
> block_group
->bitmap_high_thresh
) {
484 ret
= convert_free_space_to_bitmaps(trans
, block_group
, path
);
485 } else if ((flags
& BTRFS_FREE_SPACE_USING_BITMAPS
) &&
486 extent_count
< block_group
->bitmap_low_thresh
) {
487 ret
= convert_free_space_to_extents(trans
, block_group
, path
);
495 int free_space_test_bit(struct btrfs_block_group
*block_group
,
496 struct btrfs_path
*path
, u64 offset
)
498 struct extent_buffer
*leaf
;
499 struct btrfs_key key
;
500 u64 found_start
, found_end
;
501 unsigned long ptr
, i
;
503 leaf
= path
->nodes
[0];
504 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
505 ASSERT(key
.type
== BTRFS_FREE_SPACE_BITMAP_KEY
);
507 found_start
= key
.objectid
;
508 found_end
= key
.objectid
+ key
.offset
;
509 ASSERT(offset
>= found_start
&& offset
< found_end
);
511 ptr
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
512 i
= div_u64(offset
- found_start
,
513 block_group
->fs_info
->sectorsize
);
514 return !!extent_buffer_test_bit(leaf
, ptr
, i
);
517 static void free_space_set_bits(struct btrfs_block_group
*block_group
,
518 struct btrfs_path
*path
, u64
*start
, u64
*size
,
521 struct btrfs_fs_info
*fs_info
= block_group
->fs_info
;
522 struct extent_buffer
*leaf
;
523 struct btrfs_key key
;
524 u64 end
= *start
+ *size
;
525 u64 found_start
, found_end
;
526 unsigned long ptr
, first
, last
;
528 leaf
= path
->nodes
[0];
529 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
530 ASSERT(key
.type
== BTRFS_FREE_SPACE_BITMAP_KEY
);
532 found_start
= key
.objectid
;
533 found_end
= key
.objectid
+ key
.offset
;
534 ASSERT(*start
>= found_start
&& *start
< found_end
);
535 ASSERT(end
> found_start
);
540 ptr
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
541 first
= (*start
- found_start
) >> fs_info
->sectorsize_bits
;
542 last
= (end
- found_start
) >> fs_info
->sectorsize_bits
;
544 extent_buffer_bitmap_set(leaf
, ptr
, first
, last
- first
);
546 extent_buffer_bitmap_clear(leaf
, ptr
, first
, last
- first
);
547 btrfs_mark_buffer_dirty(leaf
);
549 *size
-= end
- *start
;
554 * We can't use btrfs_next_item() in modify_free_space_bitmap() because
555 * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy
556 * tree walking in btrfs_next_leaf() anyways because we know exactly what we're
559 static int free_space_next_bitmap(struct btrfs_trans_handle
*trans
,
560 struct btrfs_root
*root
, struct btrfs_path
*p
)
562 struct btrfs_key key
;
564 if (p
->slots
[0] + 1 < btrfs_header_nritems(p
->nodes
[0])) {
569 btrfs_item_key_to_cpu(p
->nodes
[0], &key
, p
->slots
[0]);
570 btrfs_release_path(p
);
572 key
.objectid
+= key
.offset
;
574 key
.offset
= (u64
)-1;
576 return btrfs_search_prev_slot(trans
, root
, &key
, p
, 0, 1);
580 * If remove is 1, then we are removing free space, thus clearing bits in the
581 * bitmap. If remove is 0, then we are adding free space, thus setting bits in
584 static int modify_free_space_bitmap(struct btrfs_trans_handle
*trans
,
585 struct btrfs_block_group
*block_group
,
586 struct btrfs_path
*path
,
587 u64 start
, u64 size
, int remove
)
589 struct btrfs_root
*root
= block_group
->fs_info
->free_space_root
;
590 struct btrfs_key key
;
591 u64 end
= start
+ size
;
592 u64 cur_start
, cur_size
;
593 int prev_bit
, next_bit
;
598 * Read the bit for the block immediately before the extent of space if
599 * that block is within the block group.
601 if (start
> block_group
->start
) {
602 u64 prev_block
= start
- block_group
->fs_info
->sectorsize
;
604 key
.objectid
= prev_block
;
606 key
.offset
= (u64
)-1;
608 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, 0, 1);
612 prev_bit
= free_space_test_bit(block_group
, path
, prev_block
);
614 /* The previous block may have been in the previous bitmap. */
615 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
616 if (start
>= key
.objectid
+ key
.offset
) {
617 ret
= free_space_next_bitmap(trans
, root
, path
);
622 key
.objectid
= start
;
624 key
.offset
= (u64
)-1;
626 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, 0, 1);
634 * Iterate over all of the bitmaps overlapped by the extent of space,
635 * clearing/setting bits as required.
640 free_space_set_bits(block_group
, path
, &cur_start
, &cur_size
,
644 ret
= free_space_next_bitmap(trans
, root
, path
);
650 * Read the bit for the block immediately after the extent of space if
651 * that block is within the block group.
653 if (end
< block_group
->start
+ block_group
->length
) {
654 /* The next block may be in the next bitmap. */
655 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
656 if (end
>= key
.objectid
+ key
.offset
) {
657 ret
= free_space_next_bitmap(trans
, root
, path
);
662 next_bit
= free_space_test_bit(block_group
, path
, end
);
670 /* Leftover on the left. */
674 /* Leftover on the right. */
680 /* Merging with neighbor on the left. */
684 /* Merging with neighbor on the right. */
689 btrfs_release_path(path
);
690 ret
= update_free_space_extent_count(trans
, block_group
, path
,
697 static int remove_free_space_extent(struct btrfs_trans_handle
*trans
,
698 struct btrfs_block_group
*block_group
,
699 struct btrfs_path
*path
,
702 struct btrfs_root
*root
= trans
->fs_info
->free_space_root
;
703 struct btrfs_key key
;
704 u64 found_start
, found_end
;
705 u64 end
= start
+ size
;
706 int new_extents
= -1;
709 key
.objectid
= start
;
711 key
.offset
= (u64
)-1;
713 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
717 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
719 ASSERT(key
.type
== BTRFS_FREE_SPACE_EXTENT_KEY
);
721 found_start
= key
.objectid
;
722 found_end
= key
.objectid
+ key
.offset
;
723 ASSERT(start
>= found_start
&& end
<= found_end
);
726 * Okay, now that we've found the free space extent which contains the
727 * free space that we are removing, there are four cases:
729 * 1. We're using the whole extent: delete the key we found and
730 * decrement the free space extent count.
731 * 2. We are using part of the extent starting at the beginning: delete
732 * the key we found and insert a new key representing the leftover at
733 * the end. There is no net change in the number of extents.
734 * 3. We are using part of the extent ending at the end: delete the key
735 * we found and insert a new key representing the leftover at the
736 * beginning. There is no net change in the number of extents.
737 * 4. We are using part of the extent in the middle: delete the key we
738 * found and insert two new keys representing the leftovers on each
739 * side. Where we used to have one extent, we now have two, so increment
740 * the extent count. We may need to convert the block group to bitmaps
744 /* Delete the existing key (cases 1-4). */
745 ret
= btrfs_del_item(trans
, root
, path
);
749 /* Add a key for leftovers at the beginning (cases 3 and 4). */
750 if (start
> found_start
) {
751 key
.objectid
= found_start
;
752 key
.type
= BTRFS_FREE_SPACE_EXTENT_KEY
;
753 key
.offset
= start
- found_start
;
755 btrfs_release_path(path
);
756 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, 0);
762 /* Add a key for leftovers at the end (cases 2 and 4). */
763 if (end
< found_end
) {
765 key
.type
= BTRFS_FREE_SPACE_EXTENT_KEY
;
766 key
.offset
= found_end
- end
;
768 btrfs_release_path(path
);
769 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, 0);
775 btrfs_release_path(path
);
776 ret
= update_free_space_extent_count(trans
, block_group
, path
,
784 int __remove_from_free_space_tree(struct btrfs_trans_handle
*trans
,
785 struct btrfs_block_group
*block_group
,
786 struct btrfs_path
*path
, u64 start
, u64 size
)
788 struct btrfs_free_space_info
*info
;
792 if (block_group
->needs_free_space
) {
793 ret
= __add_block_group_free_space(trans
, block_group
, path
);
798 info
= search_free_space_info(NULL
, block_group
, path
, 0);
800 return PTR_ERR(info
);
801 flags
= btrfs_free_space_flags(path
->nodes
[0], info
);
802 btrfs_release_path(path
);
804 if (flags
& BTRFS_FREE_SPACE_USING_BITMAPS
) {
805 return modify_free_space_bitmap(trans
, block_group
, path
,
808 return remove_free_space_extent(trans
, block_group
, path
,
813 int remove_from_free_space_tree(struct btrfs_trans_handle
*trans
,
816 struct btrfs_block_group
*block_group
;
817 struct btrfs_path
*path
;
820 if (!btrfs_fs_compat_ro(trans
->fs_info
, FREE_SPACE_TREE
))
823 path
= btrfs_alloc_path();
829 block_group
= btrfs_lookup_block_group(trans
->fs_info
, start
);
836 mutex_lock(&block_group
->free_space_lock
);
837 ret
= __remove_from_free_space_tree(trans
, block_group
, path
, start
,
839 mutex_unlock(&block_group
->free_space_lock
);
841 btrfs_put_block_group(block_group
);
843 btrfs_free_path(path
);
845 btrfs_abort_transaction(trans
, ret
);
849 static int add_free_space_extent(struct btrfs_trans_handle
*trans
,
850 struct btrfs_block_group
*block_group
,
851 struct btrfs_path
*path
,
854 struct btrfs_root
*root
= trans
->fs_info
->free_space_root
;
855 struct btrfs_key key
, new_key
;
856 u64 found_start
, found_end
;
857 u64 end
= start
+ size
;
862 * We are adding a new extent of free space, but we need to merge
863 * extents. There are four cases here:
865 * 1. The new extent does not have any immediate neighbors to merge
866 * with: add the new key and increment the free space extent count. We
867 * may need to convert the block group to bitmaps as a result.
868 * 2. The new extent has an immediate neighbor before it: remove the
869 * previous key and insert a new key combining both of them. There is no
870 * net change in the number of extents.
871 * 3. The new extent has an immediate neighbor after it: remove the next
872 * key and insert a new key combining both of them. There is no net
873 * change in the number of extents.
874 * 4. The new extent has immediate neighbors on both sides: remove both
875 * of the keys and insert a new key combining all of them. Where we used
876 * to have two extents, we now have one, so decrement the extent count.
879 new_key
.objectid
= start
;
880 new_key
.type
= BTRFS_FREE_SPACE_EXTENT_KEY
;
881 new_key
.offset
= size
;
883 /* Search for a neighbor on the left. */
884 if (start
== block_group
->start
)
886 key
.objectid
= start
- 1;
888 key
.offset
= (u64
)-1;
890 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
894 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
896 if (key
.type
!= BTRFS_FREE_SPACE_EXTENT_KEY
) {
897 ASSERT(key
.type
== BTRFS_FREE_SPACE_INFO_KEY
);
898 btrfs_release_path(path
);
902 found_start
= key
.objectid
;
903 found_end
= key
.objectid
+ key
.offset
;
904 ASSERT(found_start
>= block_group
->start
&&
905 found_end
> block_group
->start
);
906 ASSERT(found_start
< start
&& found_end
<= start
);
909 * Delete the neighbor on the left and absorb it into the new key (cases
912 if (found_end
== start
) {
913 ret
= btrfs_del_item(trans
, root
, path
);
916 new_key
.objectid
= found_start
;
917 new_key
.offset
+= key
.offset
;
920 btrfs_release_path(path
);
923 /* Search for a neighbor on the right. */
924 if (end
== block_group
->start
+ block_group
->length
)
928 key
.offset
= (u64
)-1;
930 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
934 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
936 if (key
.type
!= BTRFS_FREE_SPACE_EXTENT_KEY
) {
937 ASSERT(key
.type
== BTRFS_FREE_SPACE_INFO_KEY
);
938 btrfs_release_path(path
);
942 found_start
= key
.objectid
;
943 found_end
= key
.objectid
+ key
.offset
;
944 ASSERT(found_start
>= block_group
->start
&&
945 found_end
> block_group
->start
);
946 ASSERT((found_start
< start
&& found_end
<= start
) ||
947 (found_start
>= end
&& found_end
> end
));
950 * Delete the neighbor on the right and absorb it into the new key
953 if (found_start
== end
) {
954 ret
= btrfs_del_item(trans
, root
, path
);
957 new_key
.offset
+= key
.offset
;
960 btrfs_release_path(path
);
963 /* Insert the new key (cases 1-4). */
964 ret
= btrfs_insert_empty_item(trans
, root
, path
, &new_key
, 0);
968 btrfs_release_path(path
);
969 ret
= update_free_space_extent_count(trans
, block_group
, path
,
977 int __add_to_free_space_tree(struct btrfs_trans_handle
*trans
,
978 struct btrfs_block_group
*block_group
,
979 struct btrfs_path
*path
, u64 start
, u64 size
)
981 struct btrfs_free_space_info
*info
;
985 if (block_group
->needs_free_space
) {
986 ret
= __add_block_group_free_space(trans
, block_group
, path
);
991 info
= search_free_space_info(NULL
, block_group
, path
, 0);
993 return PTR_ERR(info
);
994 flags
= btrfs_free_space_flags(path
->nodes
[0], info
);
995 btrfs_release_path(path
);
997 if (flags
& BTRFS_FREE_SPACE_USING_BITMAPS
) {
998 return modify_free_space_bitmap(trans
, block_group
, path
,
1001 return add_free_space_extent(trans
, block_group
, path
, start
,
1006 int add_to_free_space_tree(struct btrfs_trans_handle
*trans
,
1007 u64 start
, u64 size
)
1009 struct btrfs_block_group
*block_group
;
1010 struct btrfs_path
*path
;
1013 if (!btrfs_fs_compat_ro(trans
->fs_info
, FREE_SPACE_TREE
))
1016 path
= btrfs_alloc_path();
1022 block_group
= btrfs_lookup_block_group(trans
->fs_info
, start
);
1029 mutex_lock(&block_group
->free_space_lock
);
1030 ret
= __add_to_free_space_tree(trans
, block_group
, path
, start
, size
);
1031 mutex_unlock(&block_group
->free_space_lock
);
1033 btrfs_put_block_group(block_group
);
1035 btrfs_free_path(path
);
1037 btrfs_abort_transaction(trans
, ret
);
1042 * Populate the free space tree by walking the extent tree. Operations on the
1043 * extent tree that happen as a result of writes to the free space tree will go
1044 * through the normal add/remove hooks.
1046 static int populate_free_space_tree(struct btrfs_trans_handle
*trans
,
1047 struct btrfs_block_group
*block_group
)
1049 struct btrfs_root
*extent_root
= trans
->fs_info
->extent_root
;
1050 struct btrfs_path
*path
, *path2
;
1051 struct btrfs_key key
;
1055 path
= btrfs_alloc_path();
1058 path
->reada
= READA_FORWARD
;
1060 path2
= btrfs_alloc_path();
1062 btrfs_free_path(path
);
1066 ret
= add_new_free_space_info(trans
, block_group
, path2
);
1070 mutex_lock(&block_group
->free_space_lock
);
1073 * Iterate through all of the extent and metadata items in this block
1074 * group, adding the free space between them and the free space at the
1075 * end. Note that EXTENT_ITEM and METADATA_ITEM are less than
1076 * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's
1079 key
.objectid
= block_group
->start
;
1080 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
1083 ret
= btrfs_search_slot_for_read(extent_root
, &key
, path
, 1, 0);
1088 start
= block_group
->start
;
1089 end
= block_group
->start
+ block_group
->length
;
1091 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
1093 if (key
.type
== BTRFS_EXTENT_ITEM_KEY
||
1094 key
.type
== BTRFS_METADATA_ITEM_KEY
) {
1095 if (key
.objectid
>= end
)
1098 if (start
< key
.objectid
) {
1099 ret
= __add_to_free_space_tree(trans
,
1107 start
= key
.objectid
;
1108 if (key
.type
== BTRFS_METADATA_ITEM_KEY
)
1109 start
+= trans
->fs_info
->nodesize
;
1111 start
+= key
.offset
;
1112 } else if (key
.type
== BTRFS_BLOCK_GROUP_ITEM_KEY
) {
1113 if (key
.objectid
!= block_group
->start
)
1117 ret
= btrfs_next_item(extent_root
, path
);
1124 ret
= __add_to_free_space_tree(trans
, block_group
, path2
,
1125 start
, end
- start
);
1132 mutex_unlock(&block_group
->free_space_lock
);
1134 btrfs_free_path(path2
);
1135 btrfs_free_path(path
);
1139 int btrfs_create_free_space_tree(struct btrfs_fs_info
*fs_info
)
1141 struct btrfs_trans_handle
*trans
;
1142 struct btrfs_root
*tree_root
= fs_info
->tree_root
;
1143 struct btrfs_root
*free_space_root
;
1144 struct btrfs_block_group
*block_group
;
1145 struct rb_node
*node
;
1148 trans
= btrfs_start_transaction(tree_root
, 0);
1150 return PTR_ERR(trans
);
1152 set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE
, &fs_info
->flags
);
1153 free_space_root
= btrfs_create_tree(trans
,
1154 BTRFS_FREE_SPACE_TREE_OBJECTID
);
1155 if (IS_ERR(free_space_root
)) {
1156 ret
= PTR_ERR(free_space_root
);
1159 fs_info
->free_space_root
= free_space_root
;
1161 node
= rb_first(&fs_info
->block_group_cache_tree
);
1163 block_group
= rb_entry(node
, struct btrfs_block_group
,
1165 ret
= populate_free_space_tree(trans
, block_group
);
1168 node
= rb_next(node
);
1171 btrfs_set_fs_compat_ro(fs_info
, FREE_SPACE_TREE
);
1172 btrfs_set_fs_compat_ro(fs_info
, FREE_SPACE_TREE_VALID
);
1173 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE
, &fs_info
->flags
);
1175 return btrfs_commit_transaction(trans
);
1178 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE
, &fs_info
->flags
);
1179 btrfs_abort_transaction(trans
, ret
);
1180 btrfs_end_transaction(trans
);
1184 static int clear_free_space_tree(struct btrfs_trans_handle
*trans
,
1185 struct btrfs_root
*root
)
1187 struct btrfs_path
*path
;
1188 struct btrfs_key key
;
1192 path
= btrfs_alloc_path();
1201 ret
= btrfs_search_slot(trans
, root
, &key
, path
, -1, 1);
1205 nr
= btrfs_header_nritems(path
->nodes
[0]);
1210 ret
= btrfs_del_items(trans
, root
, path
, 0, nr
);
1214 btrfs_release_path(path
);
1219 btrfs_free_path(path
);
1223 int btrfs_clear_free_space_tree(struct btrfs_fs_info
*fs_info
)
1225 struct btrfs_trans_handle
*trans
;
1226 struct btrfs_root
*tree_root
= fs_info
->tree_root
;
1227 struct btrfs_root
*free_space_root
= fs_info
->free_space_root
;
1230 trans
= btrfs_start_transaction(tree_root
, 0);
1232 return PTR_ERR(trans
);
1234 btrfs_clear_fs_compat_ro(fs_info
, FREE_SPACE_TREE
);
1235 btrfs_clear_fs_compat_ro(fs_info
, FREE_SPACE_TREE_VALID
);
1236 fs_info
->free_space_root
= NULL
;
1238 ret
= clear_free_space_tree(trans
, free_space_root
);
1242 ret
= btrfs_del_root(trans
, &free_space_root
->root_key
);
1246 list_del(&free_space_root
->dirty_list
);
1248 btrfs_tree_lock(free_space_root
->node
);
1249 btrfs_clean_tree_block(free_space_root
->node
);
1250 btrfs_tree_unlock(free_space_root
->node
);
1251 btrfs_free_tree_block(trans
, free_space_root
, free_space_root
->node
,
1254 btrfs_put_root(free_space_root
);
1256 return btrfs_commit_transaction(trans
);
1259 btrfs_abort_transaction(trans
, ret
);
1260 btrfs_end_transaction(trans
);
1264 static int __add_block_group_free_space(struct btrfs_trans_handle
*trans
,
1265 struct btrfs_block_group
*block_group
,
1266 struct btrfs_path
*path
)
1270 block_group
->needs_free_space
= 0;
1272 ret
= add_new_free_space_info(trans
, block_group
, path
);
1276 return __add_to_free_space_tree(trans
, block_group
, path
,
1278 block_group
->length
);
1281 int add_block_group_free_space(struct btrfs_trans_handle
*trans
,
1282 struct btrfs_block_group
*block_group
)
1284 struct btrfs_fs_info
*fs_info
= trans
->fs_info
;
1285 struct btrfs_path
*path
= NULL
;
1288 if (!btrfs_fs_compat_ro(fs_info
, FREE_SPACE_TREE
))
1291 mutex_lock(&block_group
->free_space_lock
);
1292 if (!block_group
->needs_free_space
)
1295 path
= btrfs_alloc_path();
1301 ret
= __add_block_group_free_space(trans
, block_group
, path
);
1304 btrfs_free_path(path
);
1305 mutex_unlock(&block_group
->free_space_lock
);
1307 btrfs_abort_transaction(trans
, ret
);
1311 int remove_block_group_free_space(struct btrfs_trans_handle
*trans
,
1312 struct btrfs_block_group
*block_group
)
1314 struct btrfs_root
*root
= trans
->fs_info
->free_space_root
;
1315 struct btrfs_path
*path
;
1316 struct btrfs_key key
, found_key
;
1317 struct extent_buffer
*leaf
;
1322 if (!btrfs_fs_compat_ro(trans
->fs_info
, FREE_SPACE_TREE
))
1325 if (block_group
->needs_free_space
) {
1326 /* We never added this block group to the free space tree. */
1330 path
= btrfs_alloc_path();
1336 start
= block_group
->start
;
1337 end
= block_group
->start
+ block_group
->length
;
1339 key
.objectid
= end
- 1;
1341 key
.offset
= (u64
)-1;
1344 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
1348 leaf
= path
->nodes
[0];
1351 while (path
->slots
[0] > 0) {
1352 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0] - 1);
1354 if (found_key
.type
== BTRFS_FREE_SPACE_INFO_KEY
) {
1355 ASSERT(found_key
.objectid
== block_group
->start
);
1356 ASSERT(found_key
.offset
== block_group
->length
);
1361 } else if (found_key
.type
== BTRFS_FREE_SPACE_EXTENT_KEY
||
1362 found_key
.type
== BTRFS_FREE_SPACE_BITMAP_KEY
) {
1363 ASSERT(found_key
.objectid
>= start
);
1364 ASSERT(found_key
.objectid
< end
);
1365 ASSERT(found_key
.objectid
+ found_key
.offset
<= end
);
1373 ret
= btrfs_del_items(trans
, root
, path
, path
->slots
[0], nr
);
1376 btrfs_release_path(path
);
1381 btrfs_free_path(path
);
1383 btrfs_abort_transaction(trans
, ret
);
1387 static int load_free_space_bitmaps(struct btrfs_caching_control
*caching_ctl
,
1388 struct btrfs_path
*path
,
1389 u32 expected_extent_count
)
1391 struct btrfs_block_group
*block_group
;
1392 struct btrfs_fs_info
*fs_info
;
1393 struct btrfs_root
*root
;
1394 struct btrfs_key key
;
1395 int prev_bit
= 0, bit
;
1396 /* Initialize to silence GCC. */
1397 u64 extent_start
= 0;
1399 u64 total_found
= 0;
1400 u32 extent_count
= 0;
1403 block_group
= caching_ctl
->block_group
;
1404 fs_info
= block_group
->fs_info
;
1405 root
= fs_info
->free_space_root
;
1407 end
= block_group
->start
+ block_group
->length
;
1410 ret
= btrfs_next_item(root
, path
);
1416 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
1418 if (key
.type
== BTRFS_FREE_SPACE_INFO_KEY
)
1421 ASSERT(key
.type
== BTRFS_FREE_SPACE_BITMAP_KEY
);
1422 ASSERT(key
.objectid
< end
&& key
.objectid
+ key
.offset
<= end
);
1424 caching_ctl
->progress
= key
.objectid
;
1426 offset
= key
.objectid
;
1427 while (offset
< key
.objectid
+ key
.offset
) {
1428 bit
= free_space_test_bit(block_group
, path
, offset
);
1429 if (prev_bit
== 0 && bit
== 1) {
1430 extent_start
= offset
;
1431 } else if (prev_bit
== 1 && bit
== 0) {
1432 total_found
+= add_new_free_space(block_group
,
1435 if (total_found
> CACHING_CTL_WAKE_UP
) {
1437 wake_up(&caching_ctl
->wait
);
1442 offset
+= fs_info
->sectorsize
;
1445 if (prev_bit
== 1) {
1446 total_found
+= add_new_free_space(block_group
, extent_start
,
1451 if (extent_count
!= expected_extent_count
) {
1453 "incorrect extent count for %llu; counted %u, expected %u",
1454 block_group
->start
, extent_count
,
1455 expected_extent_count
);
1461 caching_ctl
->progress
= (u64
)-1;
1468 static int load_free_space_extents(struct btrfs_caching_control
*caching_ctl
,
1469 struct btrfs_path
*path
,
1470 u32 expected_extent_count
)
1472 struct btrfs_block_group
*block_group
;
1473 struct btrfs_fs_info
*fs_info
;
1474 struct btrfs_root
*root
;
1475 struct btrfs_key key
;
1477 u64 total_found
= 0;
1478 u32 extent_count
= 0;
1481 block_group
= caching_ctl
->block_group
;
1482 fs_info
= block_group
->fs_info
;
1483 root
= fs_info
->free_space_root
;
1485 end
= block_group
->start
+ block_group
->length
;
1488 ret
= btrfs_next_item(root
, path
);
1494 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
1496 if (key
.type
== BTRFS_FREE_SPACE_INFO_KEY
)
1499 ASSERT(key
.type
== BTRFS_FREE_SPACE_EXTENT_KEY
);
1500 ASSERT(key
.objectid
< end
&& key
.objectid
+ key
.offset
<= end
);
1502 caching_ctl
->progress
= key
.objectid
;
1504 total_found
+= add_new_free_space(block_group
, key
.objectid
,
1505 key
.objectid
+ key
.offset
);
1506 if (total_found
> CACHING_CTL_WAKE_UP
) {
1508 wake_up(&caching_ctl
->wait
);
1513 if (extent_count
!= expected_extent_count
) {
1515 "incorrect extent count for %llu; counted %u, expected %u",
1516 block_group
->start
, extent_count
,
1517 expected_extent_count
);
1523 caching_ctl
->progress
= (u64
)-1;
1530 int load_free_space_tree(struct btrfs_caching_control
*caching_ctl
)
1532 struct btrfs_block_group
*block_group
;
1533 struct btrfs_free_space_info
*info
;
1534 struct btrfs_path
*path
;
1535 u32 extent_count
, flags
;
1538 block_group
= caching_ctl
->block_group
;
1540 path
= btrfs_alloc_path();
1545 * Just like caching_thread() doesn't want to deadlock on the extent
1546 * tree, we don't want to deadlock on the free space tree.
1548 path
->skip_locking
= 1;
1549 path
->search_commit_root
= 1;
1550 path
->reada
= READA_FORWARD
;
1552 info
= search_free_space_info(NULL
, block_group
, path
, 0);
1554 ret
= PTR_ERR(info
);
1557 extent_count
= btrfs_free_space_extent_count(path
->nodes
[0], info
);
1558 flags
= btrfs_free_space_flags(path
->nodes
[0], info
);
1561 * We left path pointing to the free space info item, so now
1562 * load_free_space_foo can just iterate through the free space tree from
1565 if (flags
& BTRFS_FREE_SPACE_USING_BITMAPS
)
1566 ret
= load_free_space_bitmaps(caching_ctl
, path
, extent_count
);
1568 ret
= load_free_space_extents(caching_ctl
, path
, extent_count
);
1571 btrfs_free_path(path
);