2 * Copyright (C) 2015 Facebook. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <linux/kernel.h>
20 #include <linux/vmalloc.h>
24 #include "free-space-tree.h"
25 #include "transaction.h"
27 static int __add_block_group_free_space(struct btrfs_trans_handle
*trans
,
28 struct btrfs_fs_info
*fs_info
,
29 struct btrfs_block_group_cache
*block_group
,
30 struct btrfs_path
*path
);
32 void set_free_space_tree_thresholds(struct btrfs_block_group_cache
*cache
)
36 u64 num_bitmaps
, total_bitmap_size
;
39 * We convert to bitmaps when the disk space required for using extents
40 * exceeds that required for using bitmaps.
42 bitmap_range
= cache
->fs_info
->sectorsize
* BTRFS_FREE_SPACE_BITMAP_BITS
;
43 num_bitmaps
= div_u64(cache
->key
.offset
+ bitmap_range
- 1,
45 bitmap_size
= sizeof(struct btrfs_item
) + BTRFS_FREE_SPACE_BITMAP_SIZE
;
46 total_bitmap_size
= num_bitmaps
* bitmap_size
;
47 cache
->bitmap_high_thresh
= div_u64(total_bitmap_size
,
48 sizeof(struct btrfs_item
));
51 * We allow for a small buffer between the high threshold and low
52 * threshold to avoid thrashing back and forth between the two formats.
54 if (cache
->bitmap_high_thresh
> 100)
55 cache
->bitmap_low_thresh
= cache
->bitmap_high_thresh
- 100;
57 cache
->bitmap_low_thresh
= 0;
60 static int add_new_free_space_info(struct btrfs_trans_handle
*trans
,
61 struct btrfs_fs_info
*fs_info
,
62 struct btrfs_block_group_cache
*block_group
,
63 struct btrfs_path
*path
)
65 struct btrfs_root
*root
= fs_info
->free_space_root
;
66 struct btrfs_free_space_info
*info
;
68 struct extent_buffer
*leaf
;
71 key
.objectid
= block_group
->key
.objectid
;
72 key
.type
= BTRFS_FREE_SPACE_INFO_KEY
;
73 key
.offset
= block_group
->key
.offset
;
75 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, sizeof(*info
));
79 leaf
= path
->nodes
[0];
80 info
= btrfs_item_ptr(leaf
, path
->slots
[0],
81 struct btrfs_free_space_info
);
82 btrfs_set_free_space_extent_count(leaf
, info
, 0);
83 btrfs_set_free_space_flags(leaf
, info
, 0);
84 btrfs_mark_buffer_dirty(leaf
);
88 btrfs_release_path(path
);
92 struct btrfs_free_space_info
*
93 search_free_space_info(struct btrfs_trans_handle
*trans
,
94 struct btrfs_fs_info
*fs_info
,
95 struct btrfs_block_group_cache
*block_group
,
96 struct btrfs_path
*path
, int cow
)
98 struct btrfs_root
*root
= fs_info
->free_space_root
;
102 key
.objectid
= block_group
->key
.objectid
;
103 key
.type
= BTRFS_FREE_SPACE_INFO_KEY
;
104 key
.offset
= block_group
->key
.offset
;
106 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, cow
);
110 btrfs_warn(fs_info
, "missing free space info for %llu",
111 block_group
->key
.objectid
);
113 return ERR_PTR(-ENOENT
);
116 return btrfs_item_ptr(path
->nodes
[0], path
->slots
[0],
117 struct btrfs_free_space_info
);
121 * btrfs_search_slot() but we're looking for the greatest key less than the
124 static int btrfs_search_prev_slot(struct btrfs_trans_handle
*trans
,
125 struct btrfs_root
*root
,
126 struct btrfs_key
*key
, struct btrfs_path
*p
,
127 int ins_len
, int cow
)
131 ret
= btrfs_search_slot(trans
, root
, key
, p
, ins_len
, cow
);
140 if (p
->slots
[0] == 0) {
149 static inline u32
free_space_bitmap_size(u64 size
, u32 sectorsize
)
151 return DIV_ROUND_UP((u32
)div_u64(size
, sectorsize
), BITS_PER_BYTE
);
154 static u8
*alloc_bitmap(u32 bitmap_size
)
159 * The allocation size varies, observed numbers were < 4K up to 16K.
160 * Using vmalloc unconditionally would be too heavy, we'll try
161 * contiguous allocations first.
163 if (bitmap_size
<= PAGE_SIZE
)
164 return kzalloc(bitmap_size
, GFP_NOFS
);
166 mem
= kzalloc(bitmap_size
, GFP_NOFS
| __GFP_NOWARN
);
170 return __vmalloc(bitmap_size
, GFP_NOFS
| __GFP_HIGHMEM
| __GFP_ZERO
,
174 int convert_free_space_to_bitmaps(struct btrfs_trans_handle
*trans
,
175 struct btrfs_fs_info
*fs_info
,
176 struct btrfs_block_group_cache
*block_group
,
177 struct btrfs_path
*path
)
179 struct btrfs_root
*root
= fs_info
->free_space_root
;
180 struct btrfs_free_space_info
*info
;
181 struct btrfs_key key
, found_key
;
182 struct extent_buffer
*leaf
;
183 u8
*bitmap
, *bitmap_cursor
;
186 u32 bitmap_size
, flags
, expected_extent_count
;
187 u32 extent_count
= 0;
191 bitmap_size
= free_space_bitmap_size(block_group
->key
.offset
,
192 fs_info
->sectorsize
);
193 bitmap
= alloc_bitmap(bitmap_size
);
199 start
= block_group
->key
.objectid
;
200 end
= block_group
->key
.objectid
+ block_group
->key
.offset
;
202 key
.objectid
= end
- 1;
204 key
.offset
= (u64
)-1;
207 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
211 leaf
= path
->nodes
[0];
214 while (path
->slots
[0] > 0) {
215 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0] - 1);
217 if (found_key
.type
== BTRFS_FREE_SPACE_INFO_KEY
) {
218 ASSERT(found_key
.objectid
== block_group
->key
.objectid
);
219 ASSERT(found_key
.offset
== block_group
->key
.offset
);
222 } else if (found_key
.type
== BTRFS_FREE_SPACE_EXTENT_KEY
) {
225 ASSERT(found_key
.objectid
>= start
);
226 ASSERT(found_key
.objectid
< end
);
227 ASSERT(found_key
.objectid
+ found_key
.offset
<= end
);
229 first
= div_u64(found_key
.objectid
- start
,
230 fs_info
->sectorsize
);
231 last
= div_u64(found_key
.objectid
+ found_key
.offset
- start
,
232 fs_info
->sectorsize
);
233 le_bitmap_set(bitmap
, first
, last
- first
);
243 ret
= btrfs_del_items(trans
, root
, path
, path
->slots
[0], nr
);
246 btrfs_release_path(path
);
249 info
= search_free_space_info(trans
, fs_info
, block_group
, path
, 1);
254 leaf
= path
->nodes
[0];
255 flags
= btrfs_free_space_flags(leaf
, info
);
256 flags
|= BTRFS_FREE_SPACE_USING_BITMAPS
;
257 btrfs_set_free_space_flags(leaf
, info
, flags
);
258 expected_extent_count
= btrfs_free_space_extent_count(leaf
, info
);
259 btrfs_mark_buffer_dirty(leaf
);
260 btrfs_release_path(path
);
262 if (extent_count
!= expected_extent_count
) {
264 "incorrect extent count for %llu; counted %u, expected %u",
265 block_group
->key
.objectid
, extent_count
,
266 expected_extent_count
);
272 bitmap_cursor
= bitmap
;
273 bitmap_range
= fs_info
->sectorsize
* BTRFS_FREE_SPACE_BITMAP_BITS
;
280 extent_size
= min(end
- i
, bitmap_range
);
281 data_size
= free_space_bitmap_size(extent_size
,
282 fs_info
->sectorsize
);
285 key
.type
= BTRFS_FREE_SPACE_BITMAP_KEY
;
286 key
.offset
= extent_size
;
288 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
,
293 leaf
= path
->nodes
[0];
294 ptr
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
295 write_extent_buffer(leaf
, bitmap_cursor
, ptr
,
297 btrfs_mark_buffer_dirty(leaf
);
298 btrfs_release_path(path
);
301 bitmap_cursor
+= data_size
;
308 btrfs_abort_transaction(trans
, ret
);
312 int convert_free_space_to_extents(struct btrfs_trans_handle
*trans
,
313 struct btrfs_fs_info
*fs_info
,
314 struct btrfs_block_group_cache
*block_group
,
315 struct btrfs_path
*path
)
317 struct btrfs_root
*root
= fs_info
->free_space_root
;
318 struct btrfs_free_space_info
*info
;
319 struct btrfs_key key
, found_key
;
320 struct extent_buffer
*leaf
;
323 /* Initialize to silence GCC. */
324 u64 extent_start
= 0;
326 u32 bitmap_size
, flags
, expected_extent_count
;
327 int prev_bit
= 0, bit
, bitnr
;
328 u32 extent_count
= 0;
332 bitmap_size
= free_space_bitmap_size(block_group
->key
.offset
,
333 fs_info
->sectorsize
);
334 bitmap
= alloc_bitmap(bitmap_size
);
340 start
= block_group
->key
.objectid
;
341 end
= block_group
->key
.objectid
+ block_group
->key
.offset
;
343 key
.objectid
= end
- 1;
345 key
.offset
= (u64
)-1;
348 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
352 leaf
= path
->nodes
[0];
355 while (path
->slots
[0] > 0) {
356 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0] - 1);
358 if (found_key
.type
== BTRFS_FREE_SPACE_INFO_KEY
) {
359 ASSERT(found_key
.objectid
== block_group
->key
.objectid
);
360 ASSERT(found_key
.offset
== block_group
->key
.offset
);
363 } else if (found_key
.type
== BTRFS_FREE_SPACE_BITMAP_KEY
) {
366 u32 bitmap_pos
, data_size
;
368 ASSERT(found_key
.objectid
>= start
);
369 ASSERT(found_key
.objectid
< end
);
370 ASSERT(found_key
.objectid
+ found_key
.offset
<= end
);
372 bitmap_pos
= div_u64(found_key
.objectid
- start
,
373 fs_info
->sectorsize
*
375 bitmap_cursor
= bitmap
+ bitmap_pos
;
376 data_size
= free_space_bitmap_size(found_key
.offset
,
377 fs_info
->sectorsize
);
379 ptr
= btrfs_item_ptr_offset(leaf
, path
->slots
[0] - 1);
380 read_extent_buffer(leaf
, bitmap_cursor
, ptr
,
390 ret
= btrfs_del_items(trans
, root
, path
, path
->slots
[0], nr
);
393 btrfs_release_path(path
);
396 info
= search_free_space_info(trans
, fs_info
, block_group
, path
, 1);
401 leaf
= path
->nodes
[0];
402 flags
= btrfs_free_space_flags(leaf
, info
);
403 flags
&= ~BTRFS_FREE_SPACE_USING_BITMAPS
;
404 btrfs_set_free_space_flags(leaf
, info
, flags
);
405 expected_extent_count
= btrfs_free_space_extent_count(leaf
, info
);
406 btrfs_mark_buffer_dirty(leaf
);
407 btrfs_release_path(path
);
411 while (offset
< end
) {
412 bit
= !!le_test_bit(bitnr
, bitmap
);
413 if (prev_bit
== 0 && bit
== 1) {
414 extent_start
= offset
;
415 } else if (prev_bit
== 1 && bit
== 0) {
416 key
.objectid
= extent_start
;
417 key
.type
= BTRFS_FREE_SPACE_EXTENT_KEY
;
418 key
.offset
= offset
- extent_start
;
420 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, 0);
423 btrfs_release_path(path
);
428 offset
+= fs_info
->sectorsize
;
432 key
.objectid
= extent_start
;
433 key
.type
= BTRFS_FREE_SPACE_EXTENT_KEY
;
434 key
.offset
= end
- extent_start
;
436 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, 0);
439 btrfs_release_path(path
);
444 if (extent_count
!= expected_extent_count
) {
446 "incorrect extent count for %llu; counted %u, expected %u",
447 block_group
->key
.objectid
, extent_count
,
448 expected_extent_count
);
458 btrfs_abort_transaction(trans
, ret
);
462 static int update_free_space_extent_count(struct btrfs_trans_handle
*trans
,
463 struct btrfs_fs_info
*fs_info
,
464 struct btrfs_block_group_cache
*block_group
,
465 struct btrfs_path
*path
,
468 struct btrfs_free_space_info
*info
;
473 if (new_extents
== 0)
476 info
= search_free_space_info(trans
, fs_info
, block_group
, path
, 1);
481 flags
= btrfs_free_space_flags(path
->nodes
[0], info
);
482 extent_count
= btrfs_free_space_extent_count(path
->nodes
[0], info
);
484 extent_count
+= new_extents
;
485 btrfs_set_free_space_extent_count(path
->nodes
[0], info
, extent_count
);
486 btrfs_mark_buffer_dirty(path
->nodes
[0]);
487 btrfs_release_path(path
);
489 if (!(flags
& BTRFS_FREE_SPACE_USING_BITMAPS
) &&
490 extent_count
> block_group
->bitmap_high_thresh
) {
491 ret
= convert_free_space_to_bitmaps(trans
, fs_info
, block_group
,
493 } else if ((flags
& BTRFS_FREE_SPACE_USING_BITMAPS
) &&
494 extent_count
< block_group
->bitmap_low_thresh
) {
495 ret
= convert_free_space_to_extents(trans
, fs_info
, block_group
,
503 int free_space_test_bit(struct btrfs_block_group_cache
*block_group
,
504 struct btrfs_path
*path
, u64 offset
)
506 struct extent_buffer
*leaf
;
507 struct btrfs_key key
;
508 u64 found_start
, found_end
;
509 unsigned long ptr
, i
;
511 leaf
= path
->nodes
[0];
512 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
513 ASSERT(key
.type
== BTRFS_FREE_SPACE_BITMAP_KEY
);
515 found_start
= key
.objectid
;
516 found_end
= key
.objectid
+ key
.offset
;
517 ASSERT(offset
>= found_start
&& offset
< found_end
);
519 ptr
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
520 i
= div_u64(offset
- found_start
,
521 block_group
->fs_info
->sectorsize
);
522 return !!extent_buffer_test_bit(leaf
, ptr
, i
);
525 static void free_space_set_bits(struct btrfs_block_group_cache
*block_group
,
526 struct btrfs_path
*path
, u64
*start
, u64
*size
,
529 struct btrfs_fs_info
*fs_info
= block_group
->fs_info
;
530 struct extent_buffer
*leaf
;
531 struct btrfs_key key
;
532 u64 end
= *start
+ *size
;
533 u64 found_start
, found_end
;
534 unsigned long ptr
, first
, last
;
536 leaf
= path
->nodes
[0];
537 btrfs_item_key_to_cpu(leaf
, &key
, path
->slots
[0]);
538 ASSERT(key
.type
== BTRFS_FREE_SPACE_BITMAP_KEY
);
540 found_start
= key
.objectid
;
541 found_end
= key
.objectid
+ key
.offset
;
542 ASSERT(*start
>= found_start
&& *start
< found_end
);
543 ASSERT(end
> found_start
);
548 ptr
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
549 first
= div_u64(*start
- found_start
, fs_info
->sectorsize
);
550 last
= div_u64(end
- found_start
, fs_info
->sectorsize
);
552 extent_buffer_bitmap_set(leaf
, ptr
, first
, last
- first
);
554 extent_buffer_bitmap_clear(leaf
, ptr
, first
, last
- first
);
555 btrfs_mark_buffer_dirty(leaf
);
557 *size
-= end
- *start
;
562 * We can't use btrfs_next_item() in modify_free_space_bitmap() because
563 * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy
564 * tree walking in btrfs_next_leaf() anyways because we know exactly what we're
567 static int free_space_next_bitmap(struct btrfs_trans_handle
*trans
,
568 struct btrfs_root
*root
, struct btrfs_path
*p
)
570 struct btrfs_key key
;
572 if (p
->slots
[0] + 1 < btrfs_header_nritems(p
->nodes
[0])) {
577 btrfs_item_key_to_cpu(p
->nodes
[0], &key
, p
->slots
[0]);
578 btrfs_release_path(p
);
580 key
.objectid
+= key
.offset
;
582 key
.offset
= (u64
)-1;
584 return btrfs_search_prev_slot(trans
, root
, &key
, p
, 0, 1);
588 * If remove is 1, then we are removing free space, thus clearing bits in the
589 * bitmap. If remove is 0, then we are adding free space, thus setting bits in
592 static int modify_free_space_bitmap(struct btrfs_trans_handle
*trans
,
593 struct btrfs_fs_info
*fs_info
,
594 struct btrfs_block_group_cache
*block_group
,
595 struct btrfs_path
*path
,
596 u64 start
, u64 size
, int remove
)
598 struct btrfs_root
*root
= fs_info
->free_space_root
;
599 struct btrfs_key key
;
600 u64 end
= start
+ size
;
601 u64 cur_start
, cur_size
;
602 int prev_bit
, next_bit
;
607 * Read the bit for the block immediately before the extent of space if
608 * that block is within the block group.
610 if (start
> block_group
->key
.objectid
) {
611 u64 prev_block
= start
- block_group
->fs_info
->sectorsize
;
613 key
.objectid
= prev_block
;
615 key
.offset
= (u64
)-1;
617 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, 0, 1);
621 prev_bit
= free_space_test_bit(block_group
, path
, prev_block
);
623 /* The previous block may have been in the previous bitmap. */
624 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
625 if (start
>= key
.objectid
+ key
.offset
) {
626 ret
= free_space_next_bitmap(trans
, root
, path
);
631 key
.objectid
= start
;
633 key
.offset
= (u64
)-1;
635 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, 0, 1);
643 * Iterate over all of the bitmaps overlapped by the extent of space,
644 * clearing/setting bits as required.
649 free_space_set_bits(block_group
, path
, &cur_start
, &cur_size
,
653 ret
= free_space_next_bitmap(trans
, root
, path
);
659 * Read the bit for the block immediately after the extent of space if
660 * that block is within the block group.
662 if (end
< block_group
->key
.objectid
+ block_group
->key
.offset
) {
663 /* The next block may be in the next bitmap. */
664 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
665 if (end
>= key
.objectid
+ key
.offset
) {
666 ret
= free_space_next_bitmap(trans
, root
, path
);
671 next_bit
= free_space_test_bit(block_group
, path
, end
);
679 /* Leftover on the left. */
683 /* Leftover on the right. */
689 /* Merging with neighbor on the left. */
693 /* Merging with neighbor on the right. */
698 btrfs_release_path(path
);
699 ret
= update_free_space_extent_count(trans
, fs_info
, block_group
, path
,
706 static int remove_free_space_extent(struct btrfs_trans_handle
*trans
,
707 struct btrfs_fs_info
*fs_info
,
708 struct btrfs_block_group_cache
*block_group
,
709 struct btrfs_path
*path
,
712 struct btrfs_root
*root
= fs_info
->free_space_root
;
713 struct btrfs_key key
;
714 u64 found_start
, found_end
;
715 u64 end
= start
+ size
;
716 int new_extents
= -1;
719 key
.objectid
= start
;
721 key
.offset
= (u64
)-1;
723 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
727 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
729 ASSERT(key
.type
== BTRFS_FREE_SPACE_EXTENT_KEY
);
731 found_start
= key
.objectid
;
732 found_end
= key
.objectid
+ key
.offset
;
733 ASSERT(start
>= found_start
&& end
<= found_end
);
736 * Okay, now that we've found the free space extent which contains the
737 * free space that we are removing, there are four cases:
739 * 1. We're using the whole extent: delete the key we found and
740 * decrement the free space extent count.
741 * 2. We are using part of the extent starting at the beginning: delete
742 * the key we found and insert a new key representing the leftover at
743 * the end. There is no net change in the number of extents.
744 * 3. We are using part of the extent ending at the end: delete the key
745 * we found and insert a new key representing the leftover at the
746 * beginning. There is no net change in the number of extents.
747 * 4. We are using part of the extent in the middle: delete the key we
748 * found and insert two new keys representing the leftovers on each
749 * side. Where we used to have one extent, we now have two, so increment
750 * the extent count. We may need to convert the block group to bitmaps
754 /* Delete the existing key (cases 1-4). */
755 ret
= btrfs_del_item(trans
, root
, path
);
759 /* Add a key for leftovers at the beginning (cases 3 and 4). */
760 if (start
> found_start
) {
761 key
.objectid
= found_start
;
762 key
.type
= BTRFS_FREE_SPACE_EXTENT_KEY
;
763 key
.offset
= start
- found_start
;
765 btrfs_release_path(path
);
766 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, 0);
772 /* Add a key for leftovers at the end (cases 2 and 4). */
773 if (end
< found_end
) {
775 key
.type
= BTRFS_FREE_SPACE_EXTENT_KEY
;
776 key
.offset
= found_end
- end
;
778 btrfs_release_path(path
);
779 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
, 0);
785 btrfs_release_path(path
);
786 ret
= update_free_space_extent_count(trans
, fs_info
, block_group
, path
,
793 int __remove_from_free_space_tree(struct btrfs_trans_handle
*trans
,
794 struct btrfs_fs_info
*fs_info
,
795 struct btrfs_block_group_cache
*block_group
,
796 struct btrfs_path
*path
, u64 start
, u64 size
)
798 struct btrfs_free_space_info
*info
;
802 if (block_group
->needs_free_space
) {
803 ret
= __add_block_group_free_space(trans
, fs_info
, block_group
,
809 info
= search_free_space_info(NULL
, fs_info
, block_group
, path
, 0);
811 return PTR_ERR(info
);
812 flags
= btrfs_free_space_flags(path
->nodes
[0], info
);
813 btrfs_release_path(path
);
815 if (flags
& BTRFS_FREE_SPACE_USING_BITMAPS
) {
816 return modify_free_space_bitmap(trans
, fs_info
, block_group
,
817 path
, start
, size
, 1);
819 return remove_free_space_extent(trans
, fs_info
, block_group
,
824 int remove_from_free_space_tree(struct btrfs_trans_handle
*trans
,
825 struct btrfs_fs_info
*fs_info
,
828 struct btrfs_block_group_cache
*block_group
;
829 struct btrfs_path
*path
;
832 if (!btrfs_fs_compat_ro(fs_info
, FREE_SPACE_TREE
))
835 path
= btrfs_alloc_path();
841 block_group
= btrfs_lookup_block_group(fs_info
, start
);
848 mutex_lock(&block_group
->free_space_lock
);
849 ret
= __remove_from_free_space_tree(trans
, fs_info
, block_group
, path
,
851 mutex_unlock(&block_group
->free_space_lock
);
853 btrfs_put_block_group(block_group
);
855 btrfs_free_path(path
);
857 btrfs_abort_transaction(trans
, ret
);
861 static int add_free_space_extent(struct btrfs_trans_handle
*trans
,
862 struct btrfs_fs_info
*fs_info
,
863 struct btrfs_block_group_cache
*block_group
,
864 struct btrfs_path
*path
,
867 struct btrfs_root
*root
= fs_info
->free_space_root
;
868 struct btrfs_key key
, new_key
;
869 u64 found_start
, found_end
;
870 u64 end
= start
+ size
;
875 * We are adding a new extent of free space, but we need to merge
876 * extents. There are four cases here:
878 * 1. The new extent does not have any immediate neighbors to merge
879 * with: add the new key and increment the free space extent count. We
880 * may need to convert the block group to bitmaps as a result.
881 * 2. The new extent has an immediate neighbor before it: remove the
882 * previous key and insert a new key combining both of them. There is no
883 * net change in the number of extents.
884 * 3. The new extent has an immediate neighbor after it: remove the next
885 * key and insert a new key combining both of them. There is no net
886 * change in the number of extents.
887 * 4. The new extent has immediate neighbors on both sides: remove both
888 * of the keys and insert a new key combining all of them. Where we used
889 * to have two extents, we now have one, so decrement the extent count.
892 new_key
.objectid
= start
;
893 new_key
.type
= BTRFS_FREE_SPACE_EXTENT_KEY
;
894 new_key
.offset
= size
;
896 /* Search for a neighbor on the left. */
897 if (start
== block_group
->key
.objectid
)
899 key
.objectid
= start
- 1;
901 key
.offset
= (u64
)-1;
903 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
907 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
909 if (key
.type
!= BTRFS_FREE_SPACE_EXTENT_KEY
) {
910 ASSERT(key
.type
== BTRFS_FREE_SPACE_INFO_KEY
);
911 btrfs_release_path(path
);
915 found_start
= key
.objectid
;
916 found_end
= key
.objectid
+ key
.offset
;
917 ASSERT(found_start
>= block_group
->key
.objectid
&&
918 found_end
> block_group
->key
.objectid
);
919 ASSERT(found_start
< start
&& found_end
<= start
);
922 * Delete the neighbor on the left and absorb it into the new key (cases
925 if (found_end
== start
) {
926 ret
= btrfs_del_item(trans
, root
, path
);
929 new_key
.objectid
= found_start
;
930 new_key
.offset
+= key
.offset
;
933 btrfs_release_path(path
);
936 /* Search for a neighbor on the right. */
937 if (end
== block_group
->key
.objectid
+ block_group
->key
.offset
)
941 key
.offset
= (u64
)-1;
943 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
947 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
949 if (key
.type
!= BTRFS_FREE_SPACE_EXTENT_KEY
) {
950 ASSERT(key
.type
== BTRFS_FREE_SPACE_INFO_KEY
);
951 btrfs_release_path(path
);
955 found_start
= key
.objectid
;
956 found_end
= key
.objectid
+ key
.offset
;
957 ASSERT(found_start
>= block_group
->key
.objectid
&&
958 found_end
> block_group
->key
.objectid
);
959 ASSERT((found_start
< start
&& found_end
<= start
) ||
960 (found_start
>= end
&& found_end
> end
));
963 * Delete the neighbor on the right and absorb it into the new key
966 if (found_start
== end
) {
967 ret
= btrfs_del_item(trans
, root
, path
);
970 new_key
.offset
+= key
.offset
;
973 btrfs_release_path(path
);
976 /* Insert the new key (cases 1-4). */
977 ret
= btrfs_insert_empty_item(trans
, root
, path
, &new_key
, 0);
981 btrfs_release_path(path
);
982 ret
= update_free_space_extent_count(trans
, fs_info
, block_group
, path
,
989 int __add_to_free_space_tree(struct btrfs_trans_handle
*trans
,
990 struct btrfs_fs_info
*fs_info
,
991 struct btrfs_block_group_cache
*block_group
,
992 struct btrfs_path
*path
, u64 start
, u64 size
)
994 struct btrfs_free_space_info
*info
;
998 if (block_group
->needs_free_space
) {
999 ret
= __add_block_group_free_space(trans
, fs_info
, block_group
,
1005 info
= search_free_space_info(NULL
, fs_info
, block_group
, path
, 0);
1007 return PTR_ERR(info
);
1008 flags
= btrfs_free_space_flags(path
->nodes
[0], info
);
1009 btrfs_release_path(path
);
1011 if (flags
& BTRFS_FREE_SPACE_USING_BITMAPS
) {
1012 return modify_free_space_bitmap(trans
, fs_info
, block_group
,
1013 path
, start
, size
, 0);
1015 return add_free_space_extent(trans
, fs_info
, block_group
, path
,
1020 int add_to_free_space_tree(struct btrfs_trans_handle
*trans
,
1021 struct btrfs_fs_info
*fs_info
,
1022 u64 start
, u64 size
)
1024 struct btrfs_block_group_cache
*block_group
;
1025 struct btrfs_path
*path
;
1028 if (!btrfs_fs_compat_ro(fs_info
, FREE_SPACE_TREE
))
1031 path
= btrfs_alloc_path();
1037 block_group
= btrfs_lookup_block_group(fs_info
, start
);
1044 mutex_lock(&block_group
->free_space_lock
);
1045 ret
= __add_to_free_space_tree(trans
, fs_info
, block_group
, path
, start
,
1047 mutex_unlock(&block_group
->free_space_lock
);
1049 btrfs_put_block_group(block_group
);
1051 btrfs_free_path(path
);
1053 btrfs_abort_transaction(trans
, ret
);
1058 * Populate the free space tree by walking the extent tree. Operations on the
1059 * extent tree that happen as a result of writes to the free space tree will go
1060 * through the normal add/remove hooks.
1062 static int populate_free_space_tree(struct btrfs_trans_handle
*trans
,
1063 struct btrfs_fs_info
*fs_info
,
1064 struct btrfs_block_group_cache
*block_group
)
1066 struct btrfs_root
*extent_root
= fs_info
->extent_root
;
1067 struct btrfs_path
*path
, *path2
;
1068 struct btrfs_key key
;
1072 path
= btrfs_alloc_path();
1077 path2
= btrfs_alloc_path();
1079 btrfs_free_path(path
);
1083 ret
= add_new_free_space_info(trans
, fs_info
, block_group
, path2
);
1087 mutex_lock(&block_group
->free_space_lock
);
1090 * Iterate through all of the extent and metadata items in this block
1091 * group, adding the free space between them and the free space at the
1092 * end. Note that EXTENT_ITEM and METADATA_ITEM are less than
1093 * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's
1096 key
.objectid
= block_group
->key
.objectid
;
1097 key
.type
= BTRFS_EXTENT_ITEM_KEY
;
1100 ret
= btrfs_search_slot_for_read(extent_root
, &key
, path
, 1, 0);
1105 start
= block_group
->key
.objectid
;
1106 end
= block_group
->key
.objectid
+ block_group
->key
.offset
;
1108 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
1110 if (key
.type
== BTRFS_EXTENT_ITEM_KEY
||
1111 key
.type
== BTRFS_METADATA_ITEM_KEY
) {
1112 if (key
.objectid
>= end
)
1115 if (start
< key
.objectid
) {
1116 ret
= __add_to_free_space_tree(trans
, fs_info
,
1124 start
= key
.objectid
;
1125 if (key
.type
== BTRFS_METADATA_ITEM_KEY
)
1126 start
+= fs_info
->nodesize
;
1128 start
+= key
.offset
;
1129 } else if (key
.type
== BTRFS_BLOCK_GROUP_ITEM_KEY
) {
1130 if (key
.objectid
!= block_group
->key
.objectid
)
1134 ret
= btrfs_next_item(extent_root
, path
);
1141 ret
= __add_to_free_space_tree(trans
, fs_info
, block_group
,
1142 path2
, start
, end
- start
);
1149 mutex_unlock(&block_group
->free_space_lock
);
1151 btrfs_free_path(path2
);
1152 btrfs_free_path(path
);
1156 int btrfs_create_free_space_tree(struct btrfs_fs_info
*fs_info
)
1158 struct btrfs_trans_handle
*trans
;
1159 struct btrfs_root
*tree_root
= fs_info
->tree_root
;
1160 struct btrfs_root
*free_space_root
;
1161 struct btrfs_block_group_cache
*block_group
;
1162 struct rb_node
*node
;
1165 trans
= btrfs_start_transaction(tree_root
, 0);
1167 return PTR_ERR(trans
);
1169 set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE
, &fs_info
->flags
);
1170 free_space_root
= btrfs_create_tree(trans
, fs_info
,
1171 BTRFS_FREE_SPACE_TREE_OBJECTID
);
1172 if (IS_ERR(free_space_root
)) {
1173 ret
= PTR_ERR(free_space_root
);
1176 fs_info
->free_space_root
= free_space_root
;
1178 node
= rb_first(&fs_info
->block_group_cache_tree
);
1180 block_group
= rb_entry(node
, struct btrfs_block_group_cache
,
1182 ret
= populate_free_space_tree(trans
, fs_info
, block_group
);
1185 node
= rb_next(node
);
1188 btrfs_set_fs_compat_ro(fs_info
, FREE_SPACE_TREE
);
1189 btrfs_set_fs_compat_ro(fs_info
, FREE_SPACE_TREE_VALID
);
1190 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE
, &fs_info
->flags
);
1192 ret
= btrfs_commit_transaction(trans
);
1199 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE
, &fs_info
->flags
);
1200 btrfs_abort_transaction(trans
, ret
);
1201 btrfs_end_transaction(trans
);
1205 static int clear_free_space_tree(struct btrfs_trans_handle
*trans
,
1206 struct btrfs_root
*root
)
1208 struct btrfs_path
*path
;
1209 struct btrfs_key key
;
1213 path
= btrfs_alloc_path();
1217 path
->leave_spinning
= 1;
1224 ret
= btrfs_search_slot(trans
, root
, &key
, path
, -1, 1);
1228 nr
= btrfs_header_nritems(path
->nodes
[0]);
1233 ret
= btrfs_del_items(trans
, root
, path
, 0, nr
);
1237 btrfs_release_path(path
);
1242 btrfs_free_path(path
);
1246 int btrfs_clear_free_space_tree(struct btrfs_fs_info
*fs_info
)
1248 struct btrfs_trans_handle
*trans
;
1249 struct btrfs_root
*tree_root
= fs_info
->tree_root
;
1250 struct btrfs_root
*free_space_root
= fs_info
->free_space_root
;
1253 trans
= btrfs_start_transaction(tree_root
, 0);
1255 return PTR_ERR(trans
);
1257 btrfs_clear_fs_compat_ro(fs_info
, FREE_SPACE_TREE
);
1258 btrfs_clear_fs_compat_ro(fs_info
, FREE_SPACE_TREE_VALID
);
1259 fs_info
->free_space_root
= NULL
;
1261 ret
= clear_free_space_tree(trans
, free_space_root
);
1265 ret
= btrfs_del_root(trans
, tree_root
, &free_space_root
->root_key
);
1269 list_del(&free_space_root
->dirty_list
);
1271 btrfs_tree_lock(free_space_root
->node
);
1272 clean_tree_block(trans
, fs_info
, free_space_root
->node
);
1273 btrfs_tree_unlock(free_space_root
->node
);
1274 btrfs_free_tree_block(trans
, free_space_root
, free_space_root
->node
,
1277 free_extent_buffer(free_space_root
->node
);
1278 free_extent_buffer(free_space_root
->commit_root
);
1279 kfree(free_space_root
);
1281 ret
= btrfs_commit_transaction(trans
);
1288 btrfs_abort_transaction(trans
, ret
);
1289 btrfs_end_transaction(trans
);
1293 static int __add_block_group_free_space(struct btrfs_trans_handle
*trans
,
1294 struct btrfs_fs_info
*fs_info
,
1295 struct btrfs_block_group_cache
*block_group
,
1296 struct btrfs_path
*path
)
1301 start
= block_group
->key
.objectid
;
1302 end
= block_group
->key
.objectid
+ block_group
->key
.offset
;
1304 block_group
->needs_free_space
= 0;
1306 ret
= add_new_free_space_info(trans
, fs_info
, block_group
, path
);
1310 return __add_to_free_space_tree(trans
, fs_info
, block_group
, path
,
1311 block_group
->key
.objectid
,
1312 block_group
->key
.offset
);
1315 int add_block_group_free_space(struct btrfs_trans_handle
*trans
,
1316 struct btrfs_fs_info
*fs_info
,
1317 struct btrfs_block_group_cache
*block_group
)
1319 struct btrfs_path
*path
= NULL
;
1322 if (!btrfs_fs_compat_ro(fs_info
, FREE_SPACE_TREE
))
1325 mutex_lock(&block_group
->free_space_lock
);
1326 if (!block_group
->needs_free_space
)
1329 path
= btrfs_alloc_path();
1335 ret
= __add_block_group_free_space(trans
, fs_info
, block_group
, path
);
1338 btrfs_free_path(path
);
1339 mutex_unlock(&block_group
->free_space_lock
);
1341 btrfs_abort_transaction(trans
, ret
);
1345 int remove_block_group_free_space(struct btrfs_trans_handle
*trans
,
1346 struct btrfs_fs_info
*fs_info
,
1347 struct btrfs_block_group_cache
*block_group
)
1349 struct btrfs_root
*root
= fs_info
->free_space_root
;
1350 struct btrfs_path
*path
;
1351 struct btrfs_key key
, found_key
;
1352 struct extent_buffer
*leaf
;
1357 if (!btrfs_fs_compat_ro(fs_info
, FREE_SPACE_TREE
))
1360 if (block_group
->needs_free_space
) {
1361 /* We never added this block group to the free space tree. */
1365 path
= btrfs_alloc_path();
1371 start
= block_group
->key
.objectid
;
1372 end
= block_group
->key
.objectid
+ block_group
->key
.offset
;
1374 key
.objectid
= end
- 1;
1376 key
.offset
= (u64
)-1;
1379 ret
= btrfs_search_prev_slot(trans
, root
, &key
, path
, -1, 1);
1383 leaf
= path
->nodes
[0];
1386 while (path
->slots
[0] > 0) {
1387 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0] - 1);
1389 if (found_key
.type
== BTRFS_FREE_SPACE_INFO_KEY
) {
1390 ASSERT(found_key
.objectid
== block_group
->key
.objectid
);
1391 ASSERT(found_key
.offset
== block_group
->key
.offset
);
1396 } else if (found_key
.type
== BTRFS_FREE_SPACE_EXTENT_KEY
||
1397 found_key
.type
== BTRFS_FREE_SPACE_BITMAP_KEY
) {
1398 ASSERT(found_key
.objectid
>= start
);
1399 ASSERT(found_key
.objectid
< end
);
1400 ASSERT(found_key
.objectid
+ found_key
.offset
<= end
);
1408 ret
= btrfs_del_items(trans
, root
, path
, path
->slots
[0], nr
);
1411 btrfs_release_path(path
);
1416 btrfs_free_path(path
);
1418 btrfs_abort_transaction(trans
, ret
);
1422 static int load_free_space_bitmaps(struct btrfs_caching_control
*caching_ctl
,
1423 struct btrfs_path
*path
,
1424 u32 expected_extent_count
)
1426 struct btrfs_block_group_cache
*block_group
;
1427 struct btrfs_fs_info
*fs_info
;
1428 struct btrfs_root
*root
;
1429 struct btrfs_key key
;
1430 int prev_bit
= 0, bit
;
1431 /* Initialize to silence GCC. */
1432 u64 extent_start
= 0;
1434 u64 total_found
= 0;
1435 u32 extent_count
= 0;
1438 block_group
= caching_ctl
->block_group
;
1439 fs_info
= block_group
->fs_info
;
1440 root
= fs_info
->free_space_root
;
1442 end
= block_group
->key
.objectid
+ block_group
->key
.offset
;
1445 ret
= btrfs_next_item(root
, path
);
1451 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
1453 if (key
.type
== BTRFS_FREE_SPACE_INFO_KEY
)
1456 ASSERT(key
.type
== BTRFS_FREE_SPACE_BITMAP_KEY
);
1457 ASSERT(key
.objectid
< end
&& key
.objectid
+ key
.offset
<= end
);
1459 caching_ctl
->progress
= key
.objectid
;
1461 offset
= key
.objectid
;
1462 while (offset
< key
.objectid
+ key
.offset
) {
1463 bit
= free_space_test_bit(block_group
, path
, offset
);
1464 if (prev_bit
== 0 && bit
== 1) {
1465 extent_start
= offset
;
1466 } else if (prev_bit
== 1 && bit
== 0) {
1467 total_found
+= add_new_free_space(block_group
,
1471 if (total_found
> CACHING_CTL_WAKE_UP
) {
1473 wake_up(&caching_ctl
->wait
);
1478 offset
+= fs_info
->sectorsize
;
1481 if (prev_bit
== 1) {
1482 total_found
+= add_new_free_space(block_group
, fs_info
,
1487 if (extent_count
!= expected_extent_count
) {
1489 "incorrect extent count for %llu; counted %u, expected %u",
1490 block_group
->key
.objectid
, extent_count
,
1491 expected_extent_count
);
1497 caching_ctl
->progress
= (u64
)-1;
1504 static int load_free_space_extents(struct btrfs_caching_control
*caching_ctl
,
1505 struct btrfs_path
*path
,
1506 u32 expected_extent_count
)
1508 struct btrfs_block_group_cache
*block_group
;
1509 struct btrfs_fs_info
*fs_info
;
1510 struct btrfs_root
*root
;
1511 struct btrfs_key key
;
1513 u64 total_found
= 0;
1514 u32 extent_count
= 0;
1517 block_group
= caching_ctl
->block_group
;
1518 fs_info
= block_group
->fs_info
;
1519 root
= fs_info
->free_space_root
;
1521 end
= block_group
->key
.objectid
+ block_group
->key
.offset
;
1524 ret
= btrfs_next_item(root
, path
);
1530 btrfs_item_key_to_cpu(path
->nodes
[0], &key
, path
->slots
[0]);
1532 if (key
.type
== BTRFS_FREE_SPACE_INFO_KEY
)
1535 ASSERT(key
.type
== BTRFS_FREE_SPACE_EXTENT_KEY
);
1536 ASSERT(key
.objectid
< end
&& key
.objectid
+ key
.offset
<= end
);
1538 caching_ctl
->progress
= key
.objectid
;
1540 total_found
+= add_new_free_space(block_group
, fs_info
,
1542 key
.objectid
+ key
.offset
);
1543 if (total_found
> CACHING_CTL_WAKE_UP
) {
1545 wake_up(&caching_ctl
->wait
);
1550 if (extent_count
!= expected_extent_count
) {
1552 "incorrect extent count for %llu; counted %u, expected %u",
1553 block_group
->key
.objectid
, extent_count
,
1554 expected_extent_count
);
1560 caching_ctl
->progress
= (u64
)-1;
1567 int load_free_space_tree(struct btrfs_caching_control
*caching_ctl
)
1569 struct btrfs_block_group_cache
*block_group
;
1570 struct btrfs_fs_info
*fs_info
;
1571 struct btrfs_free_space_info
*info
;
1572 struct btrfs_path
*path
;
1573 u32 extent_count
, flags
;
1576 block_group
= caching_ctl
->block_group
;
1577 fs_info
= block_group
->fs_info
;
1579 path
= btrfs_alloc_path();
1584 * Just like caching_thread() doesn't want to deadlock on the extent
1585 * tree, we don't want to deadlock on the free space tree.
1587 path
->skip_locking
= 1;
1588 path
->search_commit_root
= 1;
1591 info
= search_free_space_info(NULL
, fs_info
, block_group
, path
, 0);
1593 ret
= PTR_ERR(info
);
1596 extent_count
= btrfs_free_space_extent_count(path
->nodes
[0], info
);
1597 flags
= btrfs_free_space_flags(path
->nodes
[0], info
);
1600 * We left path pointing to the free space info item, so now
1601 * load_free_space_foo can just iterate through the free space tree from
1604 if (flags
& BTRFS_FREE_SPACE_USING_BITMAPS
)
1605 ret
= load_free_space_bitmaps(caching_ctl
, path
, extent_count
);
1607 ret
= load_free_space_extents(caching_ctl
, path
, extent_count
);
1610 btrfs_free_path(path
);