2 * Copyright (C) 2008 Red Hat. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <linux/pagemap.h>
20 #include <linux/sched.h>
21 #include <linux/math64.h>
23 #include "free-space-cache.h"
24 #include "transaction.h"
26 #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
27 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
29 static inline unsigned long offset_to_bit(u64 bitmap_start
, u64 sectorsize
,
32 BUG_ON(offset
< bitmap_start
);
33 offset
-= bitmap_start
;
34 return (unsigned long)(div64_u64(offset
, sectorsize
));
37 static inline unsigned long bytes_to_bits(u64 bytes
, u64 sectorsize
)
39 return (unsigned long)(div64_u64(bytes
, sectorsize
));
42 static inline u64
offset_to_bitmap(struct btrfs_block_group_cache
*block_group
,
48 bytes_per_bitmap
= BITS_PER_BITMAP
* block_group
->sectorsize
;
49 bitmap_start
= offset
- block_group
->key
.objectid
;
50 bitmap_start
= div64_u64(bitmap_start
, bytes_per_bitmap
);
51 bitmap_start
*= bytes_per_bitmap
;
52 bitmap_start
+= block_group
->key
.objectid
;
57 static int tree_insert_offset(struct rb_root
*root
, u64 offset
,
58 struct rb_node
*node
, int bitmap
)
60 struct rb_node
**p
= &root
->rb_node
;
61 struct rb_node
*parent
= NULL
;
62 struct btrfs_free_space
*info
;
66 info
= rb_entry(parent
, struct btrfs_free_space
, offset_index
);
68 if (offset
< info
->offset
) {
70 } else if (offset
> info
->offset
) {
74 * we could have a bitmap entry and an extent entry
75 * share the same offset. If this is the case, we want
76 * the extent entry to always be found first if we do a
77 * linear search through the tree, since we want to have
78 * the quickest allocation time, and allocating from an
79 * extent is faster than allocating from a bitmap. So
80 * if we're inserting a bitmap and we find an entry at
81 * this offset, we want to go right, or after this entry
82 * logically. If we are inserting an extent and we've
83 * found a bitmap, we want to go left, or before
87 WARN_ON(info
->bitmap
);
90 WARN_ON(!info
->bitmap
);
96 rb_link_node(node
, parent
, p
);
97 rb_insert_color(node
, root
);
103 * searches the tree for the given offset.
105 * fuzzy - If this is set, then we are trying to make an allocation, and we just
106 * want a section that has at least bytes size and comes at or after the given
109 static struct btrfs_free_space
*
110 tree_search_offset(struct btrfs_block_group_cache
*block_group
,
111 u64 offset
, int bitmap_only
, int fuzzy
)
113 struct rb_node
*n
= block_group
->free_space_offset
.rb_node
;
114 struct btrfs_free_space
*entry
, *prev
= NULL
;
116 /* find entry that is closest to the 'offset' */
123 entry
= rb_entry(n
, struct btrfs_free_space
, offset_index
);
126 if (offset
< entry
->offset
)
128 else if (offset
> entry
->offset
)
141 * bitmap entry and extent entry may share same offset,
142 * in that case, bitmap entry comes after extent entry.
147 entry
= rb_entry(n
, struct btrfs_free_space
, offset_index
);
148 if (entry
->offset
!= offset
)
151 WARN_ON(!entry
->bitmap
);
156 * if previous extent entry covers the offset,
157 * we should return it instead of the bitmap entry
159 n
= &entry
->offset_index
;
164 prev
= rb_entry(n
, struct btrfs_free_space
,
167 if (prev
->offset
+ prev
->bytes
> offset
)
179 /* find last entry before the 'offset' */
181 if (entry
->offset
> offset
) {
182 n
= rb_prev(&entry
->offset_index
);
184 entry
= rb_entry(n
, struct btrfs_free_space
,
186 BUG_ON(entry
->offset
> offset
);
196 n
= &entry
->offset_index
;
201 prev
= rb_entry(n
, struct btrfs_free_space
,
204 if (prev
->offset
+ prev
->bytes
> offset
)
209 if (entry
->offset
+ BITS_PER_BITMAP
*
210 block_group
->sectorsize
> offset
)
212 } else if (entry
->offset
+ entry
->bytes
> offset
)
220 if (entry
->offset
+ BITS_PER_BITMAP
*
221 block_group
->sectorsize
> offset
)
224 if (entry
->offset
+ entry
->bytes
> offset
)
228 n
= rb_next(&entry
->offset_index
);
231 entry
= rb_entry(n
, struct btrfs_free_space
, offset_index
);
236 static void unlink_free_space(struct btrfs_block_group_cache
*block_group
,
237 struct btrfs_free_space
*info
)
239 rb_erase(&info
->offset_index
, &block_group
->free_space_offset
);
240 block_group
->free_extents
--;
241 block_group
->free_space
-= info
->bytes
;
244 static int link_free_space(struct btrfs_block_group_cache
*block_group
,
245 struct btrfs_free_space
*info
)
249 BUG_ON(!info
->bitmap
&& !info
->bytes
);
250 ret
= tree_insert_offset(&block_group
->free_space_offset
, info
->offset
,
251 &info
->offset_index
, (info
->bitmap
!= NULL
));
255 block_group
->free_space
+= info
->bytes
;
256 block_group
->free_extents
++;
260 static void recalculate_thresholds(struct btrfs_block_group_cache
*block_group
)
262 u64 max_bytes
, possible_bytes
;
265 * The goal is to keep the total amount of memory used per 1gb of space
266 * at or below 32k, so we need to adjust how much memory we allow to be
267 * used by extent based free space tracking
269 max_bytes
= MAX_CACHE_BYTES_PER_GIG
*
270 (div64_u64(block_group
->key
.offset
, 1024 * 1024 * 1024));
272 possible_bytes
= (block_group
->total_bitmaps
* PAGE_CACHE_SIZE
) +
273 (sizeof(struct btrfs_free_space
) *
274 block_group
->extents_thresh
);
276 if (possible_bytes
> max_bytes
) {
277 int extent_bytes
= max_bytes
-
278 (block_group
->total_bitmaps
* PAGE_CACHE_SIZE
);
280 if (extent_bytes
<= 0) {
281 block_group
->extents_thresh
= 0;
285 block_group
->extents_thresh
= extent_bytes
/
286 (sizeof(struct btrfs_free_space
));
290 static void bitmap_clear_bits(struct btrfs_block_group_cache
*block_group
,
291 struct btrfs_free_space
*info
, u64 offset
,
294 unsigned long start
, end
;
297 start
= offset_to_bit(info
->offset
, block_group
->sectorsize
, offset
);
298 end
= start
+ bytes_to_bits(bytes
, block_group
->sectorsize
);
299 BUG_ON(end
> BITS_PER_BITMAP
);
301 for (i
= start
; i
< end
; i
++)
302 clear_bit(i
, info
->bitmap
);
304 info
->bytes
-= bytes
;
305 block_group
->free_space
-= bytes
;
308 static void bitmap_set_bits(struct btrfs_block_group_cache
*block_group
,
309 struct btrfs_free_space
*info
, u64 offset
,
312 unsigned long start
, end
;
315 start
= offset_to_bit(info
->offset
, block_group
->sectorsize
, offset
);
316 end
= start
+ bytes_to_bits(bytes
, block_group
->sectorsize
);
317 BUG_ON(end
> BITS_PER_BITMAP
);
319 for (i
= start
; i
< end
; i
++)
320 set_bit(i
, info
->bitmap
);
322 info
->bytes
+= bytes
;
323 block_group
->free_space
+= bytes
;
326 static int search_bitmap(struct btrfs_block_group_cache
*block_group
,
327 struct btrfs_free_space
*bitmap_info
, u64
*offset
,
330 unsigned long found_bits
= 0;
331 unsigned long bits
, i
;
332 unsigned long next_zero
;
334 i
= offset_to_bit(bitmap_info
->offset
, block_group
->sectorsize
,
335 max_t(u64
, *offset
, bitmap_info
->offset
));
336 bits
= bytes_to_bits(*bytes
, block_group
->sectorsize
);
338 for (i
= find_next_bit(bitmap_info
->bitmap
, BITS_PER_BITMAP
, i
);
340 i
= find_next_bit(bitmap_info
->bitmap
, BITS_PER_BITMAP
, i
+ 1)) {
341 next_zero
= find_next_zero_bit(bitmap_info
->bitmap
,
343 if ((next_zero
- i
) >= bits
) {
344 found_bits
= next_zero
- i
;
351 *offset
= (u64
)(i
* block_group
->sectorsize
) +
353 *bytes
= (u64
)(found_bits
) * block_group
->sectorsize
;
360 static struct btrfs_free_space
*find_free_space(struct btrfs_block_group_cache
361 *block_group
, u64
*offset
,
362 u64
*bytes
, int debug
)
364 struct btrfs_free_space
*entry
;
365 struct rb_node
*node
;
368 if (!block_group
->free_space_offset
.rb_node
)
371 entry
= tree_search_offset(block_group
,
372 offset_to_bitmap(block_group
, *offset
),
377 for (node
= &entry
->offset_index
; node
; node
= rb_next(node
)) {
378 entry
= rb_entry(node
, struct btrfs_free_space
, offset_index
);
379 if (entry
->bytes
< *bytes
)
383 ret
= search_bitmap(block_group
, entry
, offset
, bytes
);
389 *offset
= entry
->offset
;
390 *bytes
= entry
->bytes
;
397 static void add_new_bitmap(struct btrfs_block_group_cache
*block_group
,
398 struct btrfs_free_space
*info
, u64 offset
)
400 u64 bytes_per_bg
= BITS_PER_BITMAP
* block_group
->sectorsize
;
401 int max_bitmaps
= (int)div64_u64(block_group
->key
.offset
+
402 bytes_per_bg
- 1, bytes_per_bg
);
403 BUG_ON(block_group
->total_bitmaps
>= max_bitmaps
);
405 info
->offset
= offset_to_bitmap(block_group
, offset
);
406 link_free_space(block_group
, info
);
407 block_group
->total_bitmaps
++;
409 recalculate_thresholds(block_group
);
412 static noinline
int remove_from_bitmap(struct btrfs_block_group_cache
*block_group
,
413 struct btrfs_free_space
*bitmap_info
,
414 u64
*offset
, u64
*bytes
)
419 end
= bitmap_info
->offset
+
420 (u64
)(BITS_PER_BITMAP
* block_group
->sectorsize
) - 1;
422 if (*offset
> bitmap_info
->offset
&& *offset
+ *bytes
> end
) {
423 bitmap_clear_bits(block_group
, bitmap_info
, *offset
,
425 *bytes
-= end
- *offset
+ 1;
427 } else if (*offset
>= bitmap_info
->offset
&& *offset
+ *bytes
<= end
) {
428 bitmap_clear_bits(block_group
, bitmap_info
, *offset
, *bytes
);
433 if (!bitmap_info
->bytes
) {
434 unlink_free_space(block_group
, bitmap_info
);
435 kfree(bitmap_info
->bitmap
);
437 block_group
->total_bitmaps
--;
438 recalculate_thresholds(block_group
);
441 bitmap_info
= tree_search_offset(block_group
,
442 offset_to_bitmap(block_group
,
448 if (!bitmap_info
->bitmap
)
452 } else if (!bitmap_info
->bytes
) {
453 unlink_free_space(block_group
, bitmap_info
);
454 kfree(bitmap_info
->bitmap
);
456 block_group
->total_bitmaps
--;
457 recalculate_thresholds(block_group
);
463 static int insert_into_bitmap(struct btrfs_block_group_cache
*block_group
,
464 struct btrfs_free_space
*info
)
466 struct btrfs_free_space
*bitmap_info
;
468 u64 bytes
, offset
, end
;
472 * If we are below the extents threshold then we can add this as an
473 * extent, and don't have to deal with the bitmap
475 if (block_group
->free_extents
< block_group
->extents_thresh
&&
476 info
->bytes
> block_group
->sectorsize
* 4)
480 * some block groups are so tiny they can't be enveloped by a bitmap, so
481 * don't even bother to create a bitmap for this
483 if (BITS_PER_BITMAP
* block_group
->sectorsize
>
484 block_group
->key
.offset
)
488 offset
= info
->offset
;
491 bitmap_info
= tree_search_offset(block_group
,
492 offset_to_bitmap(block_group
, offset
),
499 end
= bitmap_info
->offset
+
500 (u64
)(BITS_PER_BITMAP
* block_group
->sectorsize
);
502 if (offset
>= bitmap_info
->offset
&& offset
+ bytes
> end
) {
503 bitmap_set_bits(block_group
, bitmap_info
, offset
,
505 bytes
-= end
- offset
;
508 } else if (offset
>= bitmap_info
->offset
&& offset
+ bytes
<= end
) {
509 bitmap_set_bits(block_group
, bitmap_info
, offset
, bytes
);
522 if (info
&& info
->bitmap
) {
523 add_new_bitmap(block_group
, info
, offset
);
528 spin_unlock(&block_group
->tree_lock
);
530 /* no pre-allocated info, allocate a new one */
532 info
= kzalloc(sizeof(struct btrfs_free_space
),
535 spin_lock(&block_group
->tree_lock
);
541 /* allocate the bitmap */
542 info
->bitmap
= kzalloc(PAGE_CACHE_SIZE
, GFP_NOFS
);
543 spin_lock(&block_group
->tree_lock
);
561 int btrfs_add_free_space(struct btrfs_block_group_cache
*block_group
,
562 u64 offset
, u64 bytes
)
564 struct btrfs_free_space
*right_info
= NULL
;
565 struct btrfs_free_space
*left_info
= NULL
;
566 struct btrfs_free_space
*info
= NULL
;
569 info
= kzalloc(sizeof(struct btrfs_free_space
), GFP_NOFS
);
573 info
->offset
= offset
;
576 spin_lock(&block_group
->tree_lock
);
579 * first we want to see if there is free space adjacent to the range we
580 * are adding, if there is remove that struct and add a new one to
581 * cover the entire range
583 right_info
= tree_search_offset(block_group
, offset
+ bytes
, 0, 0);
584 if (right_info
&& rb_prev(&right_info
->offset_index
))
585 left_info
= rb_entry(rb_prev(&right_info
->offset_index
),
586 struct btrfs_free_space
, offset_index
);
588 left_info
= tree_search_offset(block_group
, offset
- 1, 0, 0);
591 * If there was no extent directly to the left or right of this new
592 * extent then we know we're going to have to allocate a new extent, so
593 * before we do that see if we need to drop this into a bitmap
595 if ((!left_info
|| left_info
->bitmap
) &&
596 (!right_info
|| right_info
->bitmap
)) {
597 ret
= insert_into_bitmap(block_group
, info
);
607 if (right_info
&& !right_info
->bitmap
) {
608 unlink_free_space(block_group
, right_info
);
609 info
->bytes
+= right_info
->bytes
;
613 if (left_info
&& !left_info
->bitmap
&&
614 left_info
->offset
+ left_info
->bytes
== offset
) {
615 unlink_free_space(block_group
, left_info
);
616 info
->offset
= left_info
->offset
;
617 info
->bytes
+= left_info
->bytes
;
621 ret
= link_free_space(block_group
, info
);
625 spin_unlock(&block_group
->tree_lock
);
628 printk(KERN_CRIT
"btrfs: unable to add free space :%d\n", ret
);
629 BUG_ON(ret
== -EEXIST
);
635 int btrfs_remove_free_space(struct btrfs_block_group_cache
*block_group
,
636 u64 offset
, u64 bytes
)
638 struct btrfs_free_space
*info
;
639 struct btrfs_free_space
*next_info
= NULL
;
642 spin_lock(&block_group
->tree_lock
);
645 info
= tree_search_offset(block_group
, offset
, 0, 0);
651 if (info
->bytes
< bytes
&& rb_next(&info
->offset_index
)) {
653 next_info
= rb_entry(rb_next(&info
->offset_index
),
654 struct btrfs_free_space
,
657 if (next_info
->bitmap
)
658 end
= next_info
->offset
+ BITS_PER_BITMAP
*
659 block_group
->sectorsize
- 1;
661 end
= next_info
->offset
+ next_info
->bytes
;
663 if (next_info
->bytes
< bytes
||
664 next_info
->offset
> offset
|| offset
> end
) {
665 printk(KERN_CRIT
"Found free space at %llu, size %llu,"
666 " trying to use %llu\n",
667 (unsigned long long)info
->offset
,
668 (unsigned long long)info
->bytes
,
669 (unsigned long long)bytes
);
678 if (info
->bytes
== bytes
) {
679 unlink_free_space(block_group
, info
);
682 block_group
->total_bitmaps
--;
688 if (!info
->bitmap
&& info
->offset
== offset
) {
689 unlink_free_space(block_group
, info
);
690 info
->offset
+= bytes
;
691 info
->bytes
-= bytes
;
692 link_free_space(block_group
, info
);
696 if (!info
->bitmap
&& info
->offset
<= offset
&&
697 info
->offset
+ info
->bytes
>= offset
+ bytes
) {
698 u64 old_start
= info
->offset
;
700 * we're freeing space in the middle of the info,
701 * this can happen during tree log replay
703 * first unlink the old info and then
704 * insert it again after the hole we're creating
706 unlink_free_space(block_group
, info
);
707 if (offset
+ bytes
< info
->offset
+ info
->bytes
) {
708 u64 old_end
= info
->offset
+ info
->bytes
;
710 info
->offset
= offset
+ bytes
;
711 info
->bytes
= old_end
- info
->offset
;
712 ret
= link_free_space(block_group
, info
);
717 /* the hole we're creating ends at the end
718 * of the info struct, just free the info
722 spin_unlock(&block_group
->tree_lock
);
724 /* step two, insert a new info struct to cover
725 * anything before the hole
727 ret
= btrfs_add_free_space(block_group
, old_start
,
733 ret
= remove_from_bitmap(block_group
, info
, &offset
, &bytes
);
738 spin_unlock(&block_group
->tree_lock
);
743 void btrfs_dump_free_space(struct btrfs_block_group_cache
*block_group
,
746 struct btrfs_free_space
*info
;
750 for (n
= rb_first(&block_group
->free_space_offset
); n
; n
= rb_next(n
)) {
751 info
= rb_entry(n
, struct btrfs_free_space
, offset_index
);
752 if (info
->bytes
>= bytes
)
754 printk(KERN_CRIT
"entry offset %llu, bytes %llu, bitmap %s\n",
755 (unsigned long long)info
->offset
,
756 (unsigned long long)info
->bytes
,
757 (info
->bitmap
) ? "yes" : "no");
759 printk(KERN_INFO
"block group has cluster?: %s\n",
760 list_empty(&block_group
->cluster_list
) ? "no" : "yes");
761 printk(KERN_INFO
"%d blocks of free space at or bigger than bytes is"
765 u64
btrfs_block_group_free_space(struct btrfs_block_group_cache
*block_group
)
767 struct btrfs_free_space
*info
;
771 for (n
= rb_first(&block_group
->free_space_offset
); n
;
773 info
= rb_entry(n
, struct btrfs_free_space
, offset_index
);
781 * for a given cluster, put all of its extents back into the free
782 * space cache. If the block group passed doesn't match the block group
783 * pointed to by the cluster, someone else raced in and freed the
784 * cluster already. In that case, we just return without changing anything
787 __btrfs_return_cluster_to_free_space(
788 struct btrfs_block_group_cache
*block_group
,
789 struct btrfs_free_cluster
*cluster
)
791 struct btrfs_free_space
*entry
;
792 struct rb_node
*node
;
795 spin_lock(&cluster
->lock
);
796 if (cluster
->block_group
!= block_group
)
799 bitmap
= cluster
->points_to_bitmap
;
800 cluster
->block_group
= NULL
;
801 cluster
->window_start
= 0;
802 list_del_init(&cluster
->block_group_list
);
803 cluster
->points_to_bitmap
= false;
808 node
= rb_first(&cluster
->root
);
810 entry
= rb_entry(node
, struct btrfs_free_space
, offset_index
);
811 node
= rb_next(&entry
->offset_index
);
812 rb_erase(&entry
->offset_index
, &cluster
->root
);
813 BUG_ON(entry
->bitmap
);
814 tree_insert_offset(&block_group
->free_space_offset
,
815 entry
->offset
, &entry
->offset_index
, 0);
817 cluster
->root
.rb_node
= NULL
;
820 spin_unlock(&cluster
->lock
);
821 btrfs_put_block_group(block_group
);
825 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache
*block_group
)
827 struct btrfs_free_space
*info
;
828 struct rb_node
*node
;
829 struct btrfs_free_cluster
*cluster
;
830 struct list_head
*head
;
832 spin_lock(&block_group
->tree_lock
);
833 while ((head
= block_group
->cluster_list
.next
) !=
834 &block_group
->cluster_list
) {
835 cluster
= list_entry(head
, struct btrfs_free_cluster
,
838 WARN_ON(cluster
->block_group
!= block_group
);
839 __btrfs_return_cluster_to_free_space(block_group
, cluster
);
840 if (need_resched()) {
841 spin_unlock(&block_group
->tree_lock
);
843 spin_lock(&block_group
->tree_lock
);
847 while ((node
= rb_last(&block_group
->free_space_offset
)) != NULL
) {
848 info
= rb_entry(node
, struct btrfs_free_space
, offset_index
);
849 unlink_free_space(block_group
, info
);
853 if (need_resched()) {
854 spin_unlock(&block_group
->tree_lock
);
856 spin_lock(&block_group
->tree_lock
);
860 spin_unlock(&block_group
->tree_lock
);
863 u64
btrfs_find_space_for_alloc(struct btrfs_block_group_cache
*block_group
,
864 u64 offset
, u64 bytes
, u64 empty_size
)
866 struct btrfs_free_space
*entry
= NULL
;
867 u64 bytes_search
= bytes
+ empty_size
;
870 spin_lock(&block_group
->tree_lock
);
871 entry
= find_free_space(block_group
, &offset
, &bytes_search
, 0);
877 bitmap_clear_bits(block_group
, entry
, offset
, bytes
);
879 unlink_free_space(block_group
, entry
);
880 kfree(entry
->bitmap
);
882 block_group
->total_bitmaps
--;
883 recalculate_thresholds(block_group
);
886 unlink_free_space(block_group
, entry
);
887 entry
->offset
+= bytes
;
888 entry
->bytes
-= bytes
;
892 link_free_space(block_group
, entry
);
896 spin_unlock(&block_group
->tree_lock
);
902 * given a cluster, put all of its extents back into the free space
903 * cache. If a block group is passed, this function will only free
904 * a cluster that belongs to the passed block group.
906 * Otherwise, it'll get a reference on the block group pointed to by the
907 * cluster and remove the cluster from it.
909 int btrfs_return_cluster_to_free_space(
910 struct btrfs_block_group_cache
*block_group
,
911 struct btrfs_free_cluster
*cluster
)
915 /* first, get a safe pointer to the block group */
916 spin_lock(&cluster
->lock
);
918 block_group
= cluster
->block_group
;
920 spin_unlock(&cluster
->lock
);
923 } else if (cluster
->block_group
!= block_group
) {
924 /* someone else has already freed it don't redo their work */
925 spin_unlock(&cluster
->lock
);
928 atomic_inc(&block_group
->count
);
929 spin_unlock(&cluster
->lock
);
931 /* now return any extents the cluster had on it */
932 spin_lock(&block_group
->tree_lock
);
933 ret
= __btrfs_return_cluster_to_free_space(block_group
, cluster
);
934 spin_unlock(&block_group
->tree_lock
);
936 /* finally drop our ref */
937 btrfs_put_block_group(block_group
);
941 static u64
btrfs_alloc_from_bitmap(struct btrfs_block_group_cache
*block_group
,
942 struct btrfs_free_cluster
*cluster
,
943 u64 bytes
, u64 min_start
)
945 struct btrfs_free_space
*entry
;
947 u64 search_start
= cluster
->window_start
;
948 u64 search_bytes
= bytes
;
951 spin_lock(&block_group
->tree_lock
);
952 spin_lock(&cluster
->lock
);
954 if (!cluster
->points_to_bitmap
)
957 if (cluster
->block_group
!= block_group
)
960 entry
= tree_search_offset(block_group
, search_start
, 0, 0);
962 if (!entry
|| !entry
->bitmap
)
965 search_start
= min_start
;
966 search_bytes
= bytes
;
968 err
= search_bitmap(block_group
, entry
, &search_start
,
974 bitmap_clear_bits(block_group
, entry
, ret
, bytes
);
976 spin_unlock(&cluster
->lock
);
977 spin_unlock(&block_group
->tree_lock
);
983 * given a cluster, try to allocate 'bytes' from it, returns 0
984 * if it couldn't find anything suitably large, or a logical disk offset
985 * if things worked out
987 u64
btrfs_alloc_from_cluster(struct btrfs_block_group_cache
*block_group
,
988 struct btrfs_free_cluster
*cluster
, u64 bytes
,
991 struct btrfs_free_space
*entry
= NULL
;
992 struct rb_node
*node
;
995 if (cluster
->points_to_bitmap
)
996 return btrfs_alloc_from_bitmap(block_group
, cluster
, bytes
,
999 spin_lock(&cluster
->lock
);
1000 if (bytes
> cluster
->max_size
)
1003 if (cluster
->block_group
!= block_group
)
1006 node
= rb_first(&cluster
->root
);
1010 entry
= rb_entry(node
, struct btrfs_free_space
, offset_index
);
1013 if (entry
->bytes
< bytes
|| entry
->offset
< min_start
) {
1014 struct rb_node
*node
;
1016 node
= rb_next(&entry
->offset_index
);
1019 entry
= rb_entry(node
, struct btrfs_free_space
,
1023 ret
= entry
->offset
;
1025 entry
->offset
+= bytes
;
1026 entry
->bytes
-= bytes
;
1028 if (entry
->bytes
== 0) {
1029 rb_erase(&entry
->offset_index
, &cluster
->root
);
1035 spin_unlock(&cluster
->lock
);
1040 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache
*block_group
,
1041 struct btrfs_free_space
*entry
,
1042 struct btrfs_free_cluster
*cluster
,
1043 u64 offset
, u64 bytes
, u64 min_bytes
)
1045 unsigned long next_zero
;
1047 unsigned long search_bits
;
1048 unsigned long total_bits
;
1049 unsigned long found_bits
;
1050 unsigned long start
= 0;
1051 unsigned long total_found
= 0;
1054 i
= offset_to_bit(entry
->offset
, block_group
->sectorsize
,
1055 max_t(u64
, offset
, entry
->offset
));
1056 search_bits
= bytes_to_bits(min_bytes
, block_group
->sectorsize
);
1057 total_bits
= bytes_to_bits(bytes
, block_group
->sectorsize
);
1061 for (i
= find_next_bit(entry
->bitmap
, BITS_PER_BITMAP
, i
);
1062 i
< BITS_PER_BITMAP
;
1063 i
= find_next_bit(entry
->bitmap
, BITS_PER_BITMAP
, i
+ 1)) {
1064 next_zero
= find_next_zero_bit(entry
->bitmap
,
1065 BITS_PER_BITMAP
, i
);
1066 if (next_zero
- i
>= search_bits
) {
1067 found_bits
= next_zero
- i
;
1081 total_found
+= found_bits
;
1083 if (cluster
->max_size
< found_bits
* block_group
->sectorsize
)
1084 cluster
->max_size
= found_bits
* block_group
->sectorsize
;
1086 if (total_found
< total_bits
) {
1087 i
= find_next_bit(entry
->bitmap
, BITS_PER_BITMAP
, next_zero
);
1088 if (i
- start
> total_bits
* 2) {
1090 cluster
->max_size
= 0;
1096 cluster
->window_start
= start
* block_group
->sectorsize
+
1098 cluster
->points_to_bitmap
= true;
1104 * here we try to find a cluster of blocks in a block group. The goal
1105 * is to find at least bytes free and up to empty_size + bytes free.
1106 * We might not find them all in one contiguous area.
1108 * returns zero and sets up cluster if things worked out, otherwise
1109 * it returns -enospc
1111 int btrfs_find_space_cluster(struct btrfs_trans_handle
*trans
,
1112 struct btrfs_root
*root
,
1113 struct btrfs_block_group_cache
*block_group
,
1114 struct btrfs_free_cluster
*cluster
,
1115 u64 offset
, u64 bytes
, u64 empty_size
)
1117 struct btrfs_free_space
*entry
= NULL
;
1118 struct rb_node
*node
;
1119 struct btrfs_free_space
*next
;
1120 struct btrfs_free_space
*last
= NULL
;
1125 bool found_bitmap
= false;
1128 /* for metadata, allow allocates with more holes */
1129 if (btrfs_test_opt(root
, SSD_SPREAD
)) {
1130 min_bytes
= bytes
+ empty_size
;
1131 } else if (block_group
->flags
& BTRFS_BLOCK_GROUP_METADATA
) {
1133 * we want to do larger allocations when we are
1134 * flushing out the delayed refs, it helps prevent
1135 * making more work as we go along.
1137 if (trans
->transaction
->delayed_refs
.flushing
)
1138 min_bytes
= max(bytes
, (bytes
+ empty_size
) >> 1);
1140 min_bytes
= max(bytes
, (bytes
+ empty_size
) >> 4);
1142 min_bytes
= max(bytes
, (bytes
+ empty_size
) >> 2);
1144 spin_lock(&block_group
->tree_lock
);
1145 spin_lock(&cluster
->lock
);
1147 /* someone already found a cluster, hooray */
1148 if (cluster
->block_group
) {
1153 entry
= tree_search_offset(block_group
, offset
, found_bitmap
, 1);
1160 * If found_bitmap is true, we exhausted our search for extent entries,
1161 * and we just want to search all of the bitmaps that we can find, and
1162 * ignore any extent entries we find.
1164 while (entry
->bitmap
|| found_bitmap
||
1165 (!entry
->bitmap
&& entry
->bytes
< min_bytes
)) {
1166 struct rb_node
*node
= rb_next(&entry
->offset_index
);
1168 if (entry
->bitmap
&& entry
->bytes
> bytes
+ empty_size
) {
1169 ret
= btrfs_bitmap_cluster(block_group
, entry
, cluster
,
1170 offset
, bytes
+ empty_size
,
1180 entry
= rb_entry(node
, struct btrfs_free_space
, offset_index
);
1184 * We already searched all the extent entries from the passed in offset
1185 * to the end and didn't find enough space for the cluster, and we also
1186 * didn't find any bitmaps that met our criteria, just go ahead and exit
1193 cluster
->points_to_bitmap
= false;
1194 window_start
= entry
->offset
;
1195 window_free
= entry
->bytes
;
1197 max_extent
= entry
->bytes
;
1200 /* out window is just right, lets fill it */
1201 if (window_free
>= bytes
+ empty_size
)
1204 node
= rb_next(&last
->offset_index
);
1211 next
= rb_entry(node
, struct btrfs_free_space
, offset_index
);
1214 * we found a bitmap, so if this search doesn't result in a
1215 * cluster, we know to go and search again for the bitmaps and
1216 * start looking for space there
1220 offset
= next
->offset
;
1221 found_bitmap
= true;
1227 * we haven't filled the empty size and the window is
1228 * very large. reset and try again
1230 if (next
->offset
- (last
->offset
+ last
->bytes
) > 128 * 1024 ||
1231 next
->offset
- window_start
> (bytes
+ empty_size
) * 2) {
1233 window_start
= entry
->offset
;
1234 window_free
= entry
->bytes
;
1239 window_free
+= next
->bytes
;
1240 if (entry
->bytes
> max_extent
)
1241 max_extent
= entry
->bytes
;
1245 cluster
->window_start
= entry
->offset
;
1248 * now we've found our entries, pull them out of the free space
1249 * cache and put them into the cluster rbtree
1251 * The cluster includes an rbtree, but only uses the offset index
1252 * of each free space cache entry.
1255 node
= rb_next(&entry
->offset_index
);
1256 if (entry
->bitmap
&& node
) {
1257 entry
= rb_entry(node
, struct btrfs_free_space
,
1260 } else if (entry
->bitmap
&& !node
) {
1264 rb_erase(&entry
->offset_index
, &block_group
->free_space_offset
);
1265 ret
= tree_insert_offset(&cluster
->root
, entry
->offset
,
1266 &entry
->offset_index
, 0);
1269 if (!node
|| entry
== last
)
1272 entry
= rb_entry(node
, struct btrfs_free_space
, offset_index
);
1275 cluster
->max_size
= max_extent
;
1278 atomic_inc(&block_group
->count
);
1279 list_add_tail(&cluster
->block_group_list
, &block_group
->cluster_list
);
1280 cluster
->block_group
= block_group
;
1282 spin_unlock(&cluster
->lock
);
1283 spin_unlock(&block_group
->tree_lock
);
1289 * simple code to zero out a cluster
1291 void btrfs_init_free_cluster(struct btrfs_free_cluster
*cluster
)
1293 spin_lock_init(&cluster
->lock
);
1294 spin_lock_init(&cluster
->refill_lock
);
1295 cluster
->root
.rb_node
= NULL
;
1296 cluster
->max_size
= 0;
1297 cluster
->points_to_bitmap
= false;
1298 INIT_LIST_HEAD(&cluster
->block_group_list
);
1299 cluster
->block_group
= NULL
;