2 * Copyright (C) 2015 Facebook. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <linux/kernel.h>
20 #include <linux/sched/mm.h>
24 #include "free-space-tree.h"
25 #include "transaction.h"
27 static int __add_block_group_free_space(struct btrfs_trans_handle
*trans
,
28 struct btrfs_fs_info
*fs_info
,
29 struct btrfs_block_group_cache
*block_group
,
30 struct btrfs_path
*path
);
32 void set_free_space_tree_thresholds(struct btrfs_block_group_cache
*cache
)
36 u64 num_bitmaps
, total_bitmap_size
;
39 * We convert to bitmaps when the disk space required for using extents
40 * exceeds that required for using bitmaps.
42 bitmap_range
= cache
->fs_info
->sectorsize
* BTRFS_FREE_SPACE_BITMAP_BITS
;
43 num_bitmaps
= div_u64(cache
->key
.offset
+ bitmap_range
- 1,
45 bitmap_size
= sizeof(struct btrfs_item
) + BTRFS_FREE_SPACE_BITMAP_SIZE
;
46 total_bitmap_size
= num_bitmaps
* bitmap_size
;
47 cache
->bitmap_high_thresh
= div_u64(total_bitmap_size
,
48 sizeof(struct btrfs_item
));
51 * We allow for a small buffer between the high threshold and low
52 * threshold to avoid thrashing back and forth between the two formats.
54 if (cache
->bitmap_high_thresh
> 100)
55 cache
->bitmap_low_thresh
= cache
->bitmap_high_thresh
- 100;
57 cache
->bitmap_low_thresh
= 0;
60 static int add_new_free_space_info(struct btrfs_trans_handle
*trans
,
61 struct btrfs_fs_info
*fs_info
,
62 struct btrfs_block_group_cache
*block_group
,
63 struct btrfs_path
*path
)
65 struct btrfs_root
*root
= fs_info
->free_space_root
;
66 struct btrfs_free_space_info
*info
;
68 struct extent_buffer
*leaf
;
71 key
.objectid
= block_group
->key
.objectid
;
72 key
.type
= BTRFS_FREE_SPACE_INFO_KEY
;
73 key
.offset
= block_group
->key
.offset
;
75 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, sizeof(*info
));
79 leaf
= path
->nodes
[0];
80 info
= btrfs_item_ptr(leaf
, path
->slots
[0],
81 struct btrfs_free_space_info
);
82 btrfs_set_free_space_extent_count(leaf
, info
, 0);
83 btrfs_set_free_space_flags(leaf
, info
, 0);
84 btrfs_mark_buffer_dirty(leaf
);
88 btrfs_release_path(path
);
92 struct btrfs_free_space_info
*
93 search_free_space_info(struct btrfs_trans_handle
*trans
,
94 struct btrfs_fs_info
*fs_info
,
95 struct btrfs_block_group_cache
*block_group
,
96 struct btrfs_path
*path
, int cow
)
98 struct btrfs_root
*root
= fs_info
->free_space_root
;
102 key
.objectid
= block_group
->key
.objectid
;
103 key
.type
= BTRFS_FREE_SPACE_INFO_KEY
;
104 key
.offset
= block_group
->key
.offset
;
106 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, cow
);
110 btrfs_warn(fs_info
, "missing free space info for %llu",
111 block_group
->key
.objectid
);
113 return ERR_PTR(-ENOENT
);
116 return btrfs_item_ptr(path
->nodes
[0], path
->slots
[0],
117 struct btrfs_free_space_info
);
121 * btrfs_search_slot() but we're looking for the greatest key less than the
124 static int btrfs_search_prev_slot(struct btrfs_trans_handle
*trans
,
125 struct btrfs_root
*root
,
126 struct btrfs_key
*key
, struct btrfs_path
*p
,
127 int ins_len
, int cow
)
131 ret
= btrfs_search_slot(trans
, root
, key
, p
, ins_len
, cow
);
140 if (p
->slots
[0] == 0) {
149 static inline u32
free_space_bitmap_size(u64 size
, u32 sectorsize
)
151 return DIV_ROUND_UP((u32
)div_u64(size
, sectorsize
), BITS_PER_BYTE
);
154 static u8
*alloc_bitmap(u32 bitmap_size
)
157 unsigned int nofs_flag
;
160 * GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse
161 * into the filesystem as the free space bitmap can be modified in the
162 * critical section of a transaction commit.
164 * TODO: push the memalloc_nofs_{save,restore}() to the caller where we
165 * know that recursion is unsafe.
167 nofs_flag
= memalloc_nofs_save();
168 ret
= kvzalloc(bitmap_size
, GFP_KERNEL
);
169 memalloc_nofs_restore(nofs_flag
);
173 int convert_free_space_to_bitmaps(struct btrfs_trans_handle
*trans
,
174 struct btrfs_fs_info
*fs_info
,
175 struct btrfs_block_group_cache
*block_group
,
176 struct btrfs_path
*path
)
178 struct btrfs_root
*root
= fs_info
->free_space_root
;
179 struct btrfs_free_space_info
*info
;
180 struct btrfs_key key
, found_key
;
181 struct extent_buffer
*leaf
;
182 u8
*bitmap
, *bitmap_cursor
;
185 u32 bitmap_size
, flags
, expected_extent_count
;
186 u32 extent_count
= 0;
190 bitmap_size
= free_space_bitmap_size(block_group
->key
.offset
,
191 fs_info
->sectorsize
);
192 bitmap
= alloc_bitmap(bitmap_size
);
198 start
= block_group
->key
.objectid
;
199 end
= block_group
->key
.objectid
+ block_group
->key
.offset
;
201 key
.objectid
= end
- 1;
203 key
.offset
= (u64
)-1;
206 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
210 leaf
= path
->nodes
[0];
213 while (path
->slots
[0] > 0) {
214 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0] - 1);
216 if (found_key
.type
== BTRFS_FREE_SPACE_INFO_KEY
) {
217 ASSERT(found_key
.objectid
== block_group
->key
.objectid
);
218 ASSERT(found_key
.offset
== block_group
->key
.offset
);
221 } else if (found_key
.type
== BTRFS_FREE_SPACE_EXTENT_KEY
) {
224 ASSERT(found_key
.objectid
>= start
);
225 ASSERT(found_key
.objectid
< end
);
226 ASSERT(found_key
.objectid
+ found_key
.offset
<= end
);
228 first
= div_u64(found_key
.objectid
- start
,
229 fs_info
->sectorsize
);
230 last
= div_u64(found_key
.objectid
+ found_key
.offset
- start
,
231 fs_info
->sectorsize
);
232 le_bitmap_set(bitmap
, first
, last
- first
);
242 ret
= btrfs_del_items(trans
, root
, path
, path
->slots
[0], nr
);
245 btrfs_release_path(path
);
248 info
= search_free_space_info(trans
, fs_info
, block_group
, path
, 1);
253 leaf
= path
->nodes
[0];
254 flags
= btrfs_free_space_flags(leaf
, info
);
255 flags
|= BTRFS_FREE_SPACE_USING_BITMAPS
;
256 btrfs_set_free_space_flags(leaf
, info
, flags
);
257 expected_extent_count
= btrfs_free_space_extent_count(leaf
, info
);
258 btrfs_mark_buffer_dirty(leaf
);
259 btrfs_release_path(path
);
261 if (extent_count
!= expected_extent_count
) {
263 "incorrect extent count for %llu; counted %u, expected %u",
264 block_group
->key
.objectid
, extent_count
,
265 expected_extent_count
);
271 bitmap_cursor
= bitmap
;
272 bitmap_range
= fs_info
->sectorsize
* BTRFS_FREE_SPACE_BITMAP_BITS
;
279 extent_size
= min(end
- i
, bitmap_range
);
280 data_size
= free_space_bitmap_size(extent_size
,
281 fs_info
->sectorsize
);
284 key
.type
= BTRFS_FREE_SPACE_BITMAP_KEY
;
285 key
.offset
= extent_size
;
287 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
,
292 leaf
= path
->nodes
[0];
293 ptr
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
294 write_extent_buffer(leaf
, bitmap_cursor
, ptr
,
296 btrfs_mark_buffer_dirty(leaf
);
297 btrfs_release_path(path
);
300 bitmap_cursor
+= data_size
;
307 btrfs_abort_transaction(trans
, ret
);
311 int convert_free_space_to_extents(struct btrfs_trans_handle
*trans
,
312 struct btrfs_fs_info
*fs_info
,
313 struct btrfs_block_group_cache
*block_group
,
314 struct btrfs_path
*path
)
316 struct btrfs_root
*root
= fs_info
->free_space_root
;
317 struct btrfs_free_space_info
*info
;
318 struct btrfs_key key
, found_key
;
319 struct extent_buffer
*leaf
;
322 /* Initialize to silence GCC. */
323 u64 extent_start
= 0;
325 u32 bitmap_size
, flags
, expected_extent_count
;
326 int prev_bit
= 0, bit
, bitnr
;
327 u32 extent_count
= 0;
331 bitmap_size
= free_space_bitmap_size(block_group
->key
.offset
,
332 fs_info
->sectorsize
);
333 bitmap
= alloc_bitmap(bitmap_size
);
339 start
= block_group
->key
.objectid
;
340 end
= block_group
->key
.objectid
+ block_group
->key
.offset
;
342 key
.objectid
= end
- 1;
344 key
.offset
= (u64
)-1;
347 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
351 leaf
= path
->nodes
[0];
354 while (path
->slots
[0] > 0) {
355 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0] - 1);
357 if (found_key
.type
== BTRFS_FREE_SPACE_INFO_KEY
) {
358 ASSERT(found_key
.objectid
== block_group
->key
.objectid
);
359 ASSERT(found_key
.offset
== block_group
->key
.offset
);
362 } else if (found_key
.type
== BTRFS_FREE_SPACE_BITMAP_KEY
) {
365 u32 bitmap_pos
, data_size
;
367 ASSERT(found_key
.objectid
>= start
);
368 ASSERT(found_key
.objectid
< end
);
369 ASSERT(found_key
.objectid
+ found_key
.offset
<= end
);
371 bitmap_pos
= div_u64(found_key
.objectid
- start
,
372 fs_info
->sectorsize
*
374 bitmap_cursor
= bitmap
+ bitmap_pos
;
375 data_size
= free_space_bitmap_size(found_key
.offset
,
376 fs_info
->sectorsize
);
378 ptr
= btrfs_item_ptr_offset(leaf
, path
->slots
[0] - 1);
379 read_extent_buffer(leaf
, bitmap_cursor
, ptr
,
389 ret
= btrfs_del_items(trans
, root
, path
, path
->slots
[0], nr
);
392 btrfs_release_path(path
);
395 info
= search_free_space_info(trans
, fs_info
, block_group
, path
, 1);
400 leaf
= path
->nodes
[0];
401 flags
= btrfs_free_space_flags(leaf
, info
);
402 flags
&= ~BTRFS_FREE_SPACE_USING_BITMAPS
;
403 btrfs_set_free_space_flags(leaf
, info
, flags
);
404 expected_extent_count
= btrfs_free_space_extent_count(leaf
, info
);
405 btrfs_mark_buffer_dirty(leaf
);
406 btrfs_release_path(path
);
410 while (offset
< end
) {
411 bit
= !!le_test_bit(bitnr
, bitmap
);
412 if (prev_bit
== 0 && bit
== 1) {
413 extent_start
= offset
;
414 } else if (prev_bit
== 1 && bit
== 0) {
415 key
.objectid
= extent_start
;
416 key
.type
= BTRFS_FREE_SPACE_EXTENT_KEY
;
417 key
.offset
= offset
- extent_start
;
419 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, 0);
422 btrfs_release_path(path
);
427 offset
+= fs_info
->sectorsize
;
431 key
.objectid
= extent_start
;
432 key
.type
= BTRFS_FREE_SPACE_EXTENT_KEY
;
433 key
.offset
= end
- extent_start
;
435 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, 0);
438 btrfs_release_path(path
);
443 if (extent_count
!= expected_extent_count
) {
445 "incorrect extent count for %llu; counted %u, expected %u",
446 block_group
->key
.objectid
, extent_count
,
447 expected_extent_count
);
457 btrfs_abort_transaction(trans
, ret
);
461 static int update_free_space_extent_count(struct btrfs_trans_handle
*trans
,
462 struct btrfs_fs_info
*fs_info
,
463 struct btrfs_block_group_cache
*block_group
,
464 struct btrfs_path
*path
,
467 struct btrfs_free_space_info
*info
;
472 if (new_extents
== 0)
475 info
= search_free_space_info(trans
, fs_info
, block_group
, path
, 1);
480 flags
= btrfs_free_space_flags(path
->nodes
[0], info
);
481 extent_count
= btrfs_free_space_extent_count(path
->nodes
[0], info
);
483 extent_count
+= new_extents
;
484 btrfs_set_free_space_extent_count(path
->nodes
[0], info
, extent_count
);
485 btrfs_mark_buffer_dirty(path
->nodes
[0]);
486 btrfs_release_path(path
);
488 if (!(flags
& BTRFS_FREE_SPACE_USING_BITMAPS
) &&
489 extent_count
> block_group
->bitmap_high_thresh
) {
490 ret
= convert_free_space_to_bitmaps(trans
, fs_info
, block_group
,
492 } else if ((flags
& BTRFS_FREE_SPACE_USING_BITMAPS
) &&
493 extent_count
< block_group
->bitmap_low_thresh
) {
494 ret
= convert_free_space_to_extents(trans
, fs_info
, block_group
,
502 int free_space_test_bit(struct btrfs_block_group_cache
*block_group
,
503 struct btrfs_path
*path
, u64 offset
)
505 struct extent_buffer
*leaf
;
506 struct btrfs_key key
;
507 u64 found_start
, found_end
;
508 unsigned long ptr
, i
;
510 leaf
= path
->nodes
[0];
511 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
512 ASSERT(key
.type
== BTRFS_FREE_SPACE_BITMAP_KEY
);
514 found_start
= key
.objectid
;
515 found_end
= key
.objectid
+ key
.offset
;
516 ASSERT(offset
>= found_start
&& offset
< found_end
);
518 ptr
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
519 i
= div_u64(offset
- found_start
,
520 block_group
->fs_info
->sectorsize
);
521 return !!extent_buffer_test_bit(leaf
, ptr
, i
);
524 static void free_space_set_bits(struct btrfs_block_group_cache
*block_group
,
525 struct btrfs_path
*path
, u64
*start
, u64
*size
,
528 struct btrfs_fs_info
*fs_info
= block_group
->fs_info
;
529 struct extent_buffer
*leaf
;
530 struct btrfs_key key
;
531 u64 end
= *start
+ *size
;
532 u64 found_start
, found_end
;
533 unsigned long ptr
, first
, last
;
535 leaf
= path
->nodes
[0];
536 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
537 ASSERT(key
.type
== BTRFS_FREE_SPACE_BITMAP_KEY
);
539 found_start
= key
.objectid
;
540 found_end
= key
.objectid
+ key
.offset
;
541 ASSERT(*start
>= found_start
&& *start
< found_end
);
542 ASSERT(end
> found_start
);
547 ptr
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
548 first
= div_u64(*start
- found_start
, fs_info
->sectorsize
);
549 last
= div_u64(end
- found_start
, fs_info
->sectorsize
);
551 extent_buffer_bitmap_set(leaf
, ptr
, first
, last
- first
);
553 extent_buffer_bitmap_clear(leaf
, ptr
, first
, last
- first
);
554 btrfs_mark_buffer_dirty(leaf
);
556 *size
-= end
- *start
;
561 * We can't use btrfs_next_item() in modify_free_space_bitmap() because
562 * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy
563 * tree walking in btrfs_next_leaf() anyways because we know exactly what we're
566 static int free_space_next_bitmap(struct btrfs_trans_handle
*trans
,
567 struct btrfs_root
*root
, struct btrfs_path
*p
)
569 struct btrfs_key key
;
571 if (p
->slots
[0] + 1 < btrfs_header_nritems(p
->nodes
[0])) {
576 btrfs_item_key_to_cpu(p
->nodes
[0], &key
, p
->slots
[0]);
577 btrfs_release_path(p
);
579 key
.objectid
+= key
.offset
;
581 key
.offset
= (u64
)-1;
583 return btrfs_search_prev_slot(trans
, root
, &key
, p
, 0, 1);
587 * If remove is 1, then we are removing free space, thus clearing bits in the
588 * bitmap. If remove is 0, then we are adding free space, thus setting bits in
591 static int modify_free_space_bitmap(struct btrfs_trans_handle
*trans
,
592 struct btrfs_fs_info
*fs_info
,
593 struct btrfs_block_group_cache
*block_group
,
594 struct btrfs_path
*path
,
595 u64 start
, u64 size
, int remove
)
597 struct btrfs_root
*root
= fs_info
->free_space_root
;
598 struct btrfs_key key
;
599 u64 end
= start
+ size
;
600 u64 cur_start
, cur_size
;
601 int prev_bit
, next_bit
;
606 * Read the bit for the block immediately before the extent of space if
607 * that block is within the block group.
609 if (start
> block_group
->key
.objectid
) {
610 u64 prev_block
= start
- block_group
->fs_info
->sectorsize
;
612 key
.objectid
= prev_block
;
614 key
.offset
= (u64
)-1;
616 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, 0, 1);
620 prev_bit
= free_space_test_bit(block_group
, path
, prev_block
);
622 /* The previous block may have been in the previous bitmap. */
623 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
624 if (start
>= key
.objectid
+ key
.offset
) {
625 ret
= free_space_next_bitmap(trans
, root
, path
);
630 key
.objectid
= start
;
632 key
.offset
= (u64
)-1;
634 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, 0, 1);
642 * Iterate over all of the bitmaps overlapped by the extent of space,
643 * clearing/setting bits as required.
648 free_space_set_bits(block_group
, path
, &cur_start
, &cur_size
,
652 ret
= free_space_next_bitmap(trans
, root
, path
);
658 * Read the bit for the block immediately after the extent of space if
659 * that block is within the block group.
661 if (end
< block_group
->key
.objectid
+ block_group
->key
.offset
) {
662 /* The next block may be in the next bitmap. */
663 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
664 if (end
>= key
.objectid
+ key
.offset
) {
665 ret
= free_space_next_bitmap(trans
, root
, path
);
670 next_bit
= free_space_test_bit(block_group
, path
, end
);
678 /* Leftover on the left. */
682 /* Leftover on the right. */
688 /* Merging with neighbor on the left. */
692 /* Merging with neighbor on the right. */
697 btrfs_release_path(path
);
698 ret
= update_free_space_extent_count(trans
, fs_info
, block_group
, path
,
705 static int remove_free_space_extent(struct btrfs_trans_handle
*trans
,
706 struct btrfs_fs_info
*fs_info
,
707 struct btrfs_block_group_cache
*block_group
,
708 struct btrfs_path
*path
,
711 struct btrfs_root
*root
= fs_info
->free_space_root
;
712 struct btrfs_key key
;
713 u64 found_start
, found_end
;
714 u64 end
= start
+ size
;
715 int new_extents
= -1;
718 key
.objectid
= start
;
720 key
.offset
= (u64
)-1;
722 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
726 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
728 ASSERT(key
.type
== BTRFS_FREE_SPACE_EXTENT_KEY
);
730 found_start
= key
.objectid
;
731 found_end
= key
.objectid
+ key
.offset
;
732 ASSERT(start
>= found_start
&& end
<= found_end
);
735 * Okay, now that we've found the free space extent which contains the
736 * free space that we are removing, there are four cases:
738 * 1. We're using the whole extent: delete the key we found and
739 * decrement the free space extent count.
740 * 2. We are using part of the extent starting at the beginning: delete
741 * the key we found and insert a new key representing the leftover at
742 * the end. There is no net change in the number of extents.
743 * 3. We are using part of the extent ending at the end: delete the key
744 * we found and insert a new key representing the leftover at the
745 * beginning. There is no net change in the number of extents.
746 * 4. We are using part of the extent in the middle: delete the key we
747 * found and insert two new keys representing the leftovers on each
748 * side. Where we used to have one extent, we now have two, so increment
749 * the extent count. We may need to convert the block group to bitmaps
753 /* Delete the existing key (cases 1-4). */
754 ret
= btrfs_del_item(trans
, root
, path
);
758 /* Add a key for leftovers at the beginning (cases 3 and 4). */
759 if (start
> found_start
) {
760 key
.objectid
= found_start
;
761 key
.type
= BTRFS_FREE_SPACE_EXTENT_KEY
;
762 key
.offset
= start
- found_start
;
764 btrfs_release_path(path
);
765 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, 0);
771 /* Add a key for leftovers at the end (cases 2 and 4). */
772 if (end
< found_end
) {
774 key
.type
= BTRFS_FREE_SPACE_EXTENT_KEY
;
775 key
.offset
= found_end
- end
;
777 btrfs_release_path(path
);
778 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, 0);
784 btrfs_release_path(path
);
785 ret
= update_free_space_extent_count(trans
, fs_info
, block_group
, path
,
792 int __remove_from_free_space_tree(struct btrfs_trans_handle
*trans
,
793 struct btrfs_fs_info
*fs_info
,
794 struct btrfs_block_group_cache
*block_group
,
795 struct btrfs_path
*path
, u64 start
, u64 size
)
797 struct btrfs_free_space_info
*info
;
801 if (block_group
->needs_free_space
) {
802 ret
= __add_block_group_free_space(trans
, fs_info
, block_group
,
808 info
= search_free_space_info(NULL
, fs_info
, block_group
, path
, 0);
810 return PTR_ERR(info
);
811 flags
= btrfs_free_space_flags(path
->nodes
[0], info
);
812 btrfs_release_path(path
);
814 if (flags
& BTRFS_FREE_SPACE_USING_BITMAPS
) {
815 return modify_free_space_bitmap(trans
, fs_info
, block_group
,
816 path
, start
, size
, 1);
818 return remove_free_space_extent(trans
, fs_info
, block_group
,
823 int remove_from_free_space_tree(struct btrfs_trans_handle
*trans
,
824 struct btrfs_fs_info
*fs_info
,
827 struct btrfs_block_group_cache
*block_group
;
828 struct btrfs_path
*path
;
831 if (!btrfs_fs_compat_ro(fs_info
, FREE_SPACE_TREE
))
834 path
= btrfs_alloc_path();
840 block_group
= btrfs_lookup_block_group(fs_info
, start
);
847 mutex_lock(&block_group
->free_space_lock
);
848 ret
= __remove_from_free_space_tree(trans
, fs_info
, block_group
, path
,
850 mutex_unlock(&block_group
->free_space_lock
);
852 btrfs_put_block_group(block_group
);
854 btrfs_free_path(path
);
856 btrfs_abort_transaction(trans
, ret
);
860 static int add_free_space_extent(struct btrfs_trans_handle
*trans
,
861 struct btrfs_fs_info
*fs_info
,
862 struct btrfs_block_group_cache
*block_group
,
863 struct btrfs_path
*path
,
866 struct btrfs_root
*root
= fs_info
->free_space_root
;
867 struct btrfs_key key
, new_key
;
868 u64 found_start
, found_end
;
869 u64 end
= start
+ size
;
874 * We are adding a new extent of free space, but we need to merge
875 * extents. There are four cases here:
877 * 1. The new extent does not have any immediate neighbors to merge
878 * with: add the new key and increment the free space extent count. We
879 * may need to convert the block group to bitmaps as a result.
880 * 2. The new extent has an immediate neighbor before it: remove the
881 * previous key and insert a new key combining both of them. There is no
882 * net change in the number of extents.
883 * 3. The new extent has an immediate neighbor after it: remove the next
884 * key and insert a new key combining both of them. There is no net
885 * change in the number of extents.
886 * 4. The new extent has immediate neighbors on both sides: remove both
887 * of the keys and insert a new key combining all of them. Where we used
888 * to have two extents, we now have one, so decrement the extent count.
891 new_key
.objectid
= start
;
892 new_key
.type
= BTRFS_FREE_SPACE_EXTENT_KEY
;
893 new_key
.offset
= size
;
895 /* Search for a neighbor on the left. */
896 if (start
== block_group
->key
.objectid
)
898 key
.objectid
= start
- 1;
900 key
.offset
= (u64
)-1;
902 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
906 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
908 if (key
.type
!= BTRFS_FREE_SPACE_EXTENT_KEY
) {
909 ASSERT(key
.type
== BTRFS_FREE_SPACE_INFO_KEY
);
910 btrfs_release_path(path
);
914 found_start
= key
.objectid
;
915 found_end
= key
.objectid
+ key
.offset
;
916 ASSERT(found_start
>= block_group
->key
.objectid
&&
917 found_end
> block_group
->key
.objectid
);
918 ASSERT(found_start
< start
&& found_end
<= start
);
921 * Delete the neighbor on the left and absorb it into the new key (cases
924 if (found_end
== start
) {
925 ret
= btrfs_del_item(trans
, root
, path
);
928 new_key
.objectid
= found_start
;
929 new_key
.offset
+= key
.offset
;
932 btrfs_release_path(path
);
935 /* Search for a neighbor on the right. */
936 if (end
== block_group
->key
.objectid
+ block_group
->key
.offset
)
940 key
.offset
= (u64
)-1;
942 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
946 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
948 if (key
.type
!= BTRFS_FREE_SPACE_EXTENT_KEY
) {
949 ASSERT(key
.type
== BTRFS_FREE_SPACE_INFO_KEY
);
950 btrfs_release_path(path
);
954 found_start
= key
.objectid
;
955 found_end
= key
.objectid
+ key
.offset
;
956 ASSERT(found_start
>= block_group
->key
.objectid
&&
957 found_end
> block_group
->key
.objectid
);
958 ASSERT((found_start
< start
&& found_end
<= start
) ||
959 (found_start
>= end
&& found_end
> end
));
962 * Delete the neighbor on the right and absorb it into the new key
965 if (found_start
== end
) {
966 ret
= btrfs_del_item(trans
, root
, path
);
969 new_key
.offset
+= key
.offset
;
972 btrfs_release_path(path
);
975 /* Insert the new key (cases 1-4). */
976 ret
= btrfs_insert_empty_item(trans
, root
, path
, &new_key
, 0);
980 btrfs_release_path(path
);
981 ret
= update_free_space_extent_count(trans
, fs_info
, block_group
, path
,
988 int __add_to_free_space_tree(struct btrfs_trans_handle
*trans
,
989 struct btrfs_fs_info
*fs_info
,
990 struct btrfs_block_group_cache
*block_group
,
991 struct btrfs_path
*path
, u64 start
, u64 size
)
993 struct btrfs_free_space_info
*info
;
997 if (block_group
->needs_free_space
) {
998 ret
= __add_block_group_free_space(trans
, fs_info
, block_group
,
1004 info
= search_free_space_info(NULL
, fs_info
, block_group
, path
, 0);
1006 return PTR_ERR(info
);
1007 flags
= btrfs_free_space_flags(path
->nodes
[0], info
);
1008 btrfs_release_path(path
);
1010 if (flags
& BTRFS_FREE_SPACE_USING_BITMAPS
) {
1011 return modify_free_space_bitmap(trans
, fs_info
, block_group
,
1012 path
, start
, size
, 0);
1014 return add_free_space_extent(trans
, fs_info
, block_group
, path
,
1019 int add_to_free_space_tree(struct btrfs_trans_handle
*trans
,
1020 struct btrfs_fs_info
*fs_info
,
1021 u64 start
, u64 size
)
1023 struct btrfs_block_group_cache
*block_group
;
1024 struct btrfs_path
*path
;
1027 if (!btrfs_fs_compat_ro(fs_info
, FREE_SPACE_TREE
))
1030 path
= btrfs_alloc_path();
1036 block_group
= btrfs_lookup_block_group(fs_info
, start
);
1043 mutex_lock(&block_group
->free_space_lock
);
1044 ret
= __add_to_free_space_tree(trans
, fs_info
, block_group
, path
, start
,
1046 mutex_unlock(&block_group
->free_space_lock
);
1048 btrfs_put_block_group(block_group
);
1050 btrfs_free_path(path
);
1052 btrfs_abort_transaction(trans
, ret
);
1057 * Populate the free space tree by walking the extent tree. Operations on the
1058 * extent tree that happen as a result of writes to the free space tree will go
1059 * through the normal add/remove hooks.
1061 static int populate_free_space_tree(struct btrfs_trans_handle
*trans
,
1062 struct btrfs_fs_info
*fs_info
,
1063 struct btrfs_block_group_cache
*block_group
)
1065 struct btrfs_root
*extent_root
= fs_info
->extent_root
;
1066 struct btrfs_path
*path
, *path2
;
1067 struct btrfs_key key
;
1071 path
= btrfs_alloc_path();
1076 path2
= btrfs_alloc_path();
1078 btrfs_free_path(path
);
1082 ret
= add_new_free_space_info(trans
, fs_info
, block_group
, path2
);
1086 mutex_lock(&block_group
->free_space_lock
);
1089 * Iterate through all of the extent and metadata items in this block
1090 * group, adding the free space between them and the free space at the
1091 * end. Note that EXTENT_ITEM and METADATA_ITEM are less than
1092 * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's
1095 key
.objectid
= block_group
->key
.objectid
;
1096 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
1099 ret
= btrfs_search_slot_for_read(extent_root
, &key
, path
, 1, 0);
1104 start
= block_group
->key
.objectid
;
1105 end
= block_group
->key
.objectid
+ block_group
->key
.offset
;
1107 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
1109 if (key
.type
== BTRFS_EXTENT_ITEM_KEY
||
1110 key
.type
== BTRFS_METADATA_ITEM_KEY
) {
1111 if (key
.objectid
>= end
)
1114 if (start
< key
.objectid
) {
1115 ret
= __add_to_free_space_tree(trans
, fs_info
,
1123 start
= key
.objectid
;
1124 if (key
.type
== BTRFS_METADATA_ITEM_KEY
)
1125 start
+= fs_info
->nodesize
;
1127 start
+= key
.offset
;
1128 } else if (key
.type
== BTRFS_BLOCK_GROUP_ITEM_KEY
) {
1129 if (key
.objectid
!= block_group
->key
.objectid
)
1133 ret
= btrfs_next_item(extent_root
, path
);
1140 ret
= __add_to_free_space_tree(trans
, fs_info
, block_group
,
1141 path2
, start
, end
- start
);
1148 mutex_unlock(&block_group
->free_space_lock
);
1150 btrfs_free_path(path2
);
1151 btrfs_free_path(path
);
1155 int btrfs_create_free_space_tree(struct btrfs_fs_info
*fs_info
)
1157 struct btrfs_trans_handle
*trans
;
1158 struct btrfs_root
*tree_root
= fs_info
->tree_root
;
1159 struct btrfs_root
*free_space_root
;
1160 struct btrfs_block_group_cache
*block_group
;
1161 struct rb_node
*node
;
1164 trans
= btrfs_start_transaction(tree_root
, 0);
1166 return PTR_ERR(trans
);
1168 set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE
, &fs_info
->flags
);
1169 free_space_root
= btrfs_create_tree(trans
, fs_info
,
1170 BTRFS_FREE_SPACE_TREE_OBJECTID
);
1171 if (IS_ERR(free_space_root
)) {
1172 ret
= PTR_ERR(free_space_root
);
1175 fs_info
->free_space_root
= free_space_root
;
1177 node
= rb_first(&fs_info
->block_group_cache_tree
);
1179 block_group
= rb_entry(node
, struct btrfs_block_group_cache
,
1181 ret
= populate_free_space_tree(trans
, fs_info
, block_group
);
1184 node
= rb_next(node
);
1187 btrfs_set_fs_compat_ro(fs_info
, FREE_SPACE_TREE
);
1188 btrfs_set_fs_compat_ro(fs_info
, FREE_SPACE_TREE_VALID
);
1189 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE
, &fs_info
->flags
);
1191 return btrfs_commit_transaction(trans
);
1194 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE
, &fs_info
->flags
);
1195 btrfs_abort_transaction(trans
, ret
);
1196 btrfs_end_transaction(trans
);
1200 static int clear_free_space_tree(struct btrfs_trans_handle
*trans
,
1201 struct btrfs_root
*root
)
1203 struct btrfs_path
*path
;
1204 struct btrfs_key key
;
1208 path
= btrfs_alloc_path();
1212 path
->leave_spinning
= 1;
1219 ret
= btrfs_search_slot(trans
, root
, &key
, path
, -1, 1);
1223 nr
= btrfs_header_nritems(path
->nodes
[0]);
1228 ret
= btrfs_del_items(trans
, root
, path
, 0, nr
);
1232 btrfs_release_path(path
);
1237 btrfs_free_path(path
);
1241 int btrfs_clear_free_space_tree(struct btrfs_fs_info
*fs_info
)
1243 struct btrfs_trans_handle
*trans
;
1244 struct btrfs_root
*tree_root
= fs_info
->tree_root
;
1245 struct btrfs_root
*free_space_root
= fs_info
->free_space_root
;
1248 trans
= btrfs_start_transaction(tree_root
, 0);
1250 return PTR_ERR(trans
);
1252 btrfs_clear_fs_compat_ro(fs_info
, FREE_SPACE_TREE
);
1253 btrfs_clear_fs_compat_ro(fs_info
, FREE_SPACE_TREE_VALID
);
1254 fs_info
->free_space_root
= NULL
;
1256 ret
= clear_free_space_tree(trans
, free_space_root
);
1260 ret
= btrfs_del_root(trans
, fs_info
, &free_space_root
->root_key
);
1264 list_del(&free_space_root
->dirty_list
);
1266 btrfs_tree_lock(free_space_root
->node
);
1267 clean_tree_block(fs_info
, free_space_root
->node
);
1268 btrfs_tree_unlock(free_space_root
->node
);
1269 btrfs_free_tree_block(trans
, free_space_root
, free_space_root
->node
,
1272 free_extent_buffer(free_space_root
->node
);
1273 free_extent_buffer(free_space_root
->commit_root
);
1274 kfree(free_space_root
);
1276 return btrfs_commit_transaction(trans
);
1279 btrfs_abort_transaction(trans
, ret
);
1280 btrfs_end_transaction(trans
);
1284 static int __add_block_group_free_space(struct btrfs_trans_handle
*trans
,
1285 struct btrfs_fs_info
*fs_info
,
1286 struct btrfs_block_group_cache
*block_group
,
1287 struct btrfs_path
*path
)
1292 start
= block_group
->key
.objectid
;
1293 end
= block_group
->key
.objectid
+ block_group
->key
.offset
;
1295 block_group
->needs_free_space
= 0;
1297 ret
= add_new_free_space_info(trans
, fs_info
, block_group
, path
);
1301 return __add_to_free_space_tree(trans
, fs_info
, block_group
, path
,
1302 block_group
->key
.objectid
,
1303 block_group
->key
.offset
);
1306 int add_block_group_free_space(struct btrfs_trans_handle
*trans
,
1307 struct btrfs_fs_info
*fs_info
,
1308 struct btrfs_block_group_cache
*block_group
)
1310 struct btrfs_path
*path
= NULL
;
1313 if (!btrfs_fs_compat_ro(fs_info
, FREE_SPACE_TREE
))
1316 mutex_lock(&block_group
->free_space_lock
);
1317 if (!block_group
->needs_free_space
)
1320 path
= btrfs_alloc_path();
1326 ret
= __add_block_group_free_space(trans
, fs_info
, block_group
, path
);
1329 btrfs_free_path(path
);
1330 mutex_unlock(&block_group
->free_space_lock
);
1332 btrfs_abort_transaction(trans
, ret
);
1336 int remove_block_group_free_space(struct btrfs_trans_handle
*trans
,
1337 struct btrfs_fs_info
*fs_info
,
1338 struct btrfs_block_group_cache
*block_group
)
1340 struct btrfs_root
*root
= fs_info
->free_space_root
;
1341 struct btrfs_path
*path
;
1342 struct btrfs_key key
, found_key
;
1343 struct extent_buffer
*leaf
;
1348 if (!btrfs_fs_compat_ro(fs_info
, FREE_SPACE_TREE
))
1351 if (block_group
->needs_free_space
) {
1352 /* We never added this block group to the free space tree. */
1356 path
= btrfs_alloc_path();
1362 start
= block_group
->key
.objectid
;
1363 end
= block_group
->key
.objectid
+ block_group
->key
.offset
;
1365 key
.objectid
= end
- 1;
1367 key
.offset
= (u64
)-1;
1370 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
1374 leaf
= path
->nodes
[0];
1377 while (path
->slots
[0] > 0) {
1378 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0] - 1);
1380 if (found_key
.type
== BTRFS_FREE_SPACE_INFO_KEY
) {
1381 ASSERT(found_key
.objectid
== block_group
->key
.objectid
);
1382 ASSERT(found_key
.offset
== block_group
->key
.offset
);
1387 } else if (found_key
.type
== BTRFS_FREE_SPACE_EXTENT_KEY
||
1388 found_key
.type
== BTRFS_FREE_SPACE_BITMAP_KEY
) {
1389 ASSERT(found_key
.objectid
>= start
);
1390 ASSERT(found_key
.objectid
< end
);
1391 ASSERT(found_key
.objectid
+ found_key
.offset
<= end
);
1399 ret
= btrfs_del_items(trans
, root
, path
, path
->slots
[0], nr
);
1402 btrfs_release_path(path
);
1407 btrfs_free_path(path
);
1409 btrfs_abort_transaction(trans
, ret
);
1413 static int load_free_space_bitmaps(struct btrfs_caching_control
*caching_ctl
,
1414 struct btrfs_path
*path
,
1415 u32 expected_extent_count
)
1417 struct btrfs_block_group_cache
*block_group
;
1418 struct btrfs_fs_info
*fs_info
;
1419 struct btrfs_root
*root
;
1420 struct btrfs_key key
;
1421 int prev_bit
= 0, bit
;
1422 /* Initialize to silence GCC. */
1423 u64 extent_start
= 0;
1425 u64 total_found
= 0;
1426 u32 extent_count
= 0;
1429 block_group
= caching_ctl
->block_group
;
1430 fs_info
= block_group
->fs_info
;
1431 root
= fs_info
->free_space_root
;
1433 end
= block_group
->key
.objectid
+ block_group
->key
.offset
;
1436 ret
= btrfs_next_item(root
, path
);
1442 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
1444 if (key
.type
== BTRFS_FREE_SPACE_INFO_KEY
)
1447 ASSERT(key
.type
== BTRFS_FREE_SPACE_BITMAP_KEY
);
1448 ASSERT(key
.objectid
< end
&& key
.objectid
+ key
.offset
<= end
);
1450 caching_ctl
->progress
= key
.objectid
;
1452 offset
= key
.objectid
;
1453 while (offset
< key
.objectid
+ key
.offset
) {
1454 bit
= free_space_test_bit(block_group
, path
, offset
);
1455 if (prev_bit
== 0 && bit
== 1) {
1456 extent_start
= offset
;
1457 } else if (prev_bit
== 1 && bit
== 0) {
1458 total_found
+= add_new_free_space(block_group
,
1462 if (total_found
> CACHING_CTL_WAKE_UP
) {
1464 wake_up(&caching_ctl
->wait
);
1469 offset
+= fs_info
->sectorsize
;
1472 if (prev_bit
== 1) {
1473 total_found
+= add_new_free_space(block_group
, fs_info
,
1478 if (extent_count
!= expected_extent_count
) {
1480 "incorrect extent count for %llu; counted %u, expected %u",
1481 block_group
->key
.objectid
, extent_count
,
1482 expected_extent_count
);
1488 caching_ctl
->progress
= (u64
)-1;
1495 static int load_free_space_extents(struct btrfs_caching_control
*caching_ctl
,
1496 struct btrfs_path
*path
,
1497 u32 expected_extent_count
)
1499 struct btrfs_block_group_cache
*block_group
;
1500 struct btrfs_fs_info
*fs_info
;
1501 struct btrfs_root
*root
;
1502 struct btrfs_key key
;
1504 u64 total_found
= 0;
1505 u32 extent_count
= 0;
1508 block_group
= caching_ctl
->block_group
;
1509 fs_info
= block_group
->fs_info
;
1510 root
= fs_info
->free_space_root
;
1512 end
= block_group
->key
.objectid
+ block_group
->key
.offset
;
1515 ret
= btrfs_next_item(root
, path
);
1521 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
1523 if (key
.type
== BTRFS_FREE_SPACE_INFO_KEY
)
1526 ASSERT(key
.type
== BTRFS_FREE_SPACE_EXTENT_KEY
);
1527 ASSERT(key
.objectid
< end
&& key
.objectid
+ key
.offset
<= end
);
1529 caching_ctl
->progress
= key
.objectid
;
1531 total_found
+= add_new_free_space(block_group
, fs_info
,
1533 key
.objectid
+ key
.offset
);
1534 if (total_found
> CACHING_CTL_WAKE_UP
) {
1536 wake_up(&caching_ctl
->wait
);
1541 if (extent_count
!= expected_extent_count
) {
1543 "incorrect extent count for %llu; counted %u, expected %u",
1544 block_group
->key
.objectid
, extent_count
,
1545 expected_extent_count
);
1551 caching_ctl
->progress
= (u64
)-1;
1558 int load_free_space_tree(struct btrfs_caching_control
*caching_ctl
)
1560 struct btrfs_block_group_cache
*block_group
;
1561 struct btrfs_fs_info
*fs_info
;
1562 struct btrfs_free_space_info
*info
;
1563 struct btrfs_path
*path
;
1564 u32 extent_count
, flags
;
1567 block_group
= caching_ctl
->block_group
;
1568 fs_info
= block_group
->fs_info
;
1570 path
= btrfs_alloc_path();
1575 * Just like caching_thread() doesn't want to deadlock on the extent
1576 * tree, we don't want to deadlock on the free space tree.
1578 path
->skip_locking
= 1;
1579 path
->search_commit_root
= 1;
1582 info
= search_free_space_info(NULL
, fs_info
, block_group
, path
, 0);
1584 ret
= PTR_ERR(info
);
1587 extent_count
= btrfs_free_space_extent_count(path
->nodes
[0], info
);
1588 flags
= btrfs_free_space_flags(path
->nodes
[0], info
);
1591 * We left path pointing to the free space info item, so now
1592 * load_free_space_foo can just iterate through the free space tree from
1595 if (flags
& BTRFS_FREE_SPACE_USING_BITMAPS
)
1596 ret
= load_free_space_bitmaps(caching_ctl
, path
, extent_count
);
1598 ret
= load_free_space_extents(caching_ctl
, path
, extent_count
);
1601 btrfs_free_path(path
);