3 * Copyright (C) 2007 Oracle. All rights reserved.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public
7 * License v2 as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public
15 * License along with this program; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 021110-1307, USA.
21 #include <sys/types.h>
26 #include "kerncompat.h"
27 #include "extent_io.h"
34 void extent_io_tree_init(struct extent_io_tree
*tree
)
36 cache_tree_init(&tree
->state
);
37 cache_tree_init(&tree
->cache
);
38 INIT_LIST_HEAD(&tree
->lru
);
40 tree
->max_cache_size
= (u64
)total_memory() / 4;
43 void extent_io_tree_init_cache_max(struct extent_io_tree
*tree
,
46 extent_io_tree_init(tree
);
47 tree
->max_cache_size
= max_cache_size
;
50 static struct extent_state
*alloc_extent_state(void)
52 struct extent_state
*state
;
54 state
= malloc(sizeof(*state
));
57 state
->cache_node
.objectid
= 0;
64 static void btrfs_free_extent_state(struct extent_state
*state
)
67 BUG_ON(state
->refs
< 0);
72 static void free_extent_state_func(struct cache_extent
*cache
)
74 struct extent_state
*es
;
76 es
= container_of(cache
, struct extent_state
, cache_node
);
77 btrfs_free_extent_state(es
);
80 static void free_extent_buffer_final(struct extent_buffer
*eb
);
81 void extent_io_tree_cleanup(struct extent_io_tree
*tree
)
83 struct extent_buffer
*eb
;
85 while(!list_empty(&tree
->lru
)) {
86 eb
= list_entry(tree
->lru
.next
, struct extent_buffer
, lru
);
89 "extent buffer leak: start %llu len %u\n",
90 (unsigned long long)eb
->start
, eb
->len
);
91 free_extent_buffer_nocache(eb
);
93 free_extent_buffer_final(eb
);
97 cache_tree_free_extents(&tree
->state
, free_extent_state_func
);
100 static inline void update_extent_state(struct extent_state
*state
)
102 state
->cache_node
.start
= state
->start
;
103 state
->cache_node
.size
= state
->end
+ 1 - state
->start
;
107 * Utility function to look for merge candidates inside a given range.
108 * Any extents with matching state are merged together into a single
109 * extent in the tree. Extents with EXTENT_IO in their state field are
112 static int merge_state(struct extent_io_tree
*tree
,
113 struct extent_state
*state
)
115 struct extent_state
*other
;
116 struct cache_extent
*other_node
;
118 if (state
->state
& EXTENT_IOBITS
)
121 other_node
= prev_cache_extent(&state
->cache_node
);
123 other
= container_of(other_node
, struct extent_state
,
125 if (other
->end
== state
->start
- 1 &&
126 other
->state
== state
->state
) {
127 state
->start
= other
->start
;
128 update_extent_state(state
);
129 remove_cache_extent(&tree
->state
, &other
->cache_node
);
130 btrfs_free_extent_state(other
);
133 other_node
= next_cache_extent(&state
->cache_node
);
135 other
= container_of(other_node
, struct extent_state
,
137 if (other
->start
== state
->end
+ 1 &&
138 other
->state
== state
->state
) {
139 other
->start
= state
->start
;
140 update_extent_state(other
);
141 remove_cache_extent(&tree
->state
, &state
->cache_node
);
142 btrfs_free_extent_state(state
);
149 * insert an extent_state struct into the tree. 'bits' are set on the
150 * struct before it is inserted.
152 static int insert_state(struct extent_io_tree
*tree
,
153 struct extent_state
*state
, u64 start
, u64 end
,
159 state
->state
|= bits
;
160 state
->start
= start
;
162 update_extent_state(state
);
163 ret
= insert_cache_extent(&tree
->state
, &state
->cache_node
);
165 merge_state(tree
, state
);
170 * split a given extent state struct in two, inserting the preallocated
171 * struct 'prealloc' as the newly created second half. 'split' indicates an
172 * offset inside 'orig' where it should be split.
174 static int split_state(struct extent_io_tree
*tree
, struct extent_state
*orig
,
175 struct extent_state
*prealloc
, u64 split
)
178 prealloc
->start
= orig
->start
;
179 prealloc
->end
= split
- 1;
180 prealloc
->state
= orig
->state
;
181 update_extent_state(prealloc
);
183 update_extent_state(orig
);
184 ret
= insert_cache_extent(&tree
->state
, &prealloc
->cache_node
);
190 * clear some bits on a range in the tree.
192 static int clear_state_bit(struct extent_io_tree
*tree
,
193 struct extent_state
*state
, int bits
)
195 int ret
= state
->state
& bits
;
197 state
->state
&= ~bits
;
198 if (state
->state
== 0) {
199 remove_cache_extent(&tree
->state
, &state
->cache_node
);
200 btrfs_free_extent_state(state
);
202 merge_state(tree
, state
);
208 * clear some bits on a range in the tree.
210 int clear_extent_bits(struct extent_io_tree
*tree
, u64 start
, u64 end
, int bits
)
212 struct extent_state
*state
;
213 struct extent_state
*prealloc
= NULL
;
214 struct cache_extent
*node
;
221 prealloc
= alloc_extent_state();
227 * this search will find the extents that end after
230 node
= search_cache_extent(&tree
->state
, start
);
233 state
= container_of(node
, struct extent_state
, cache_node
);
234 if (state
->start
> end
)
236 last_end
= state
->end
;
239 * | ---- desired range ---- |
241 * | ------------- state -------------- |
243 * We need to split the extent we found, and may flip
244 * bits on second half.
246 * If the extent we found extends past our range, we
247 * just split and search again. It'll get split again
248 * the next time though.
250 * If the extent we found is inside our range, we clear
251 * the desired bit on it.
253 if (state
->start
< start
) {
254 err
= split_state(tree
, state
, prealloc
, start
);
255 BUG_ON(err
== -EEXIST
);
259 if (state
->end
<= end
) {
260 set
|= clear_state_bit(tree
, state
, bits
);
261 if (last_end
== (u64
)-1)
263 start
= last_end
+ 1;
265 start
= state
->start
;
270 * | ---- desired range ---- |
272 * We need to split the extent, and clear the bit
275 if (state
->start
<= end
&& state
->end
> end
) {
276 err
= split_state(tree
, state
, prealloc
, end
+ 1);
277 BUG_ON(err
== -EEXIST
);
279 set
|= clear_state_bit(tree
, prealloc
, bits
);
284 start
= state
->end
+ 1;
285 set
|= clear_state_bit(tree
, state
, bits
);
286 if (last_end
== (u64
)-1)
288 start
= last_end
+ 1;
292 btrfs_free_extent_state(prealloc
);
302 * set some bits on a range in the tree.
304 int set_extent_bits(struct extent_io_tree
*tree
, u64 start
, u64 end
, int bits
)
306 struct extent_state
*state
;
307 struct extent_state
*prealloc
= NULL
;
308 struct cache_extent
*node
;
314 prealloc
= alloc_extent_state();
320 * this search will find the extents that end after
323 node
= search_cache_extent(&tree
->state
, start
);
325 err
= insert_state(tree
, prealloc
, start
, end
, bits
);
326 BUG_ON(err
== -EEXIST
);
331 state
= container_of(node
, struct extent_state
, cache_node
);
332 last_start
= state
->start
;
333 last_end
= state
->end
;
336 * | ---- desired range ---- |
339 * Just lock what we found and keep going
341 if (state
->start
== start
&& state
->end
<= end
) {
342 state
->state
|= bits
;
343 merge_state(tree
, state
);
344 if (last_end
== (u64
)-1)
346 start
= last_end
+ 1;
350 * | ---- desired range ---- |
353 * | ------------- state -------------- |
355 * We need to split the extent we found, and may flip bits on
358 * If the extent we found extends past our
359 * range, we just split and search again. It'll get split
360 * again the next time though.
362 * If the extent we found is inside our range, we set the
365 if (state
->start
< start
) {
366 err
= split_state(tree
, state
, prealloc
, start
);
367 BUG_ON(err
== -EEXIST
);
371 if (state
->end
<= end
) {
372 state
->state
|= bits
;
373 start
= state
->end
+ 1;
374 merge_state(tree
, state
);
375 if (last_end
== (u64
)-1)
377 start
= last_end
+ 1;
379 start
= state
->start
;
384 * | ---- desired range ---- |
385 * | state | or | state |
387 * There's a hole, we need to insert something in it and
388 * ignore the extent we found.
390 if (state
->start
> start
) {
392 if (end
< last_start
)
395 this_end
= last_start
-1;
396 err
= insert_state(tree
, prealloc
, start
, this_end
,
398 BUG_ON(err
== -EEXIST
);
402 start
= this_end
+ 1;
406 * | ---- desired range ---- |
407 * | ---------- state ---------- |
408 * We need to split the extent, and set the bit
411 err
= split_state(tree
, state
, prealloc
, end
+ 1);
412 BUG_ON(err
== -EEXIST
);
414 state
->state
|= bits
;
415 merge_state(tree
, prealloc
);
419 btrfs_free_extent_state(prealloc
);
427 int set_extent_dirty(struct extent_io_tree
*tree
, u64 start
, u64 end
)
429 return set_extent_bits(tree
, start
, end
, EXTENT_DIRTY
);
432 int clear_extent_dirty(struct extent_io_tree
*tree
, u64 start
, u64 end
)
434 return clear_extent_bits(tree
, start
, end
, EXTENT_DIRTY
);
437 int find_first_extent_bit(struct extent_io_tree
*tree
, u64 start
,
438 u64
*start_ret
, u64
*end_ret
, int bits
)
440 struct cache_extent
*node
;
441 struct extent_state
*state
;
445 * this search will find all the extents that end after
448 node
= search_cache_extent(&tree
->state
, start
);
453 state
= container_of(node
, struct extent_state
, cache_node
);
454 if (state
->end
>= start
&& (state
->state
& bits
)) {
455 *start_ret
= state
->start
;
456 *end_ret
= state
->end
;
460 node
= next_cache_extent(node
);
468 int test_range_bit(struct extent_io_tree
*tree
, u64 start
, u64 end
,
469 int bits
, int filled
)
471 struct extent_state
*state
= NULL
;
472 struct cache_extent
*node
;
475 node
= search_cache_extent(&tree
->state
, start
);
476 while (node
&& start
<= end
) {
477 state
= container_of(node
, struct extent_state
, cache_node
);
479 if (filled
&& state
->start
> start
) {
483 if (state
->start
> end
)
485 if (state
->state
& bits
) {
493 start
= state
->end
+ 1;
496 node
= next_cache_extent(node
);
506 int set_state_private(struct extent_io_tree
*tree
, u64 start
, u64
private)
508 struct cache_extent
*node
;
509 struct extent_state
*state
;
512 node
= search_cache_extent(&tree
->state
, start
);
517 state
= container_of(node
, struct extent_state
, cache_node
);
518 if (state
->start
!= start
) {
522 state
->xprivate
= private;
527 int get_state_private(struct extent_io_tree
*tree
, u64 start
, u64
*private)
529 struct cache_extent
*node
;
530 struct extent_state
*state
;
533 node
= search_cache_extent(&tree
->state
, start
);
538 state
= container_of(node
, struct extent_state
, cache_node
);
539 if (state
->start
!= start
) {
543 *private = state
->xprivate
;
548 static struct extent_buffer
*__alloc_extent_buffer(struct btrfs_fs_info
*info
,
549 u64 bytenr
, u32 blocksize
)
551 struct extent_buffer
*eb
;
553 eb
= calloc(1, sizeof(struct extent_buffer
) + blocksize
);
562 eb
->dev_bytenr
= (u64
)-1;
563 eb
->cache_node
.start
= bytenr
;
564 eb
->cache_node
.size
= blocksize
;
566 eb
->tree
= &info
->extent_cache
;
567 INIT_LIST_HEAD(&eb
->recow
);
568 INIT_LIST_HEAD(&eb
->lru
);
573 struct extent_buffer
*btrfs_clone_extent_buffer(struct extent_buffer
*src
)
575 struct extent_buffer
*new;
577 new = __alloc_extent_buffer(src
->fs_info
, src
->start
, src
->len
);
580 /* cloned eb is not linked into fs_info->extent_cache */
583 copy_extent_buffer(new, src
, 0, 0, src
->len
);
584 new->flags
|= EXTENT_BUFFER_DUMMY
;
589 static void free_extent_buffer_final(struct extent_buffer
*eb
)
591 struct extent_io_tree
*tree
= eb
->tree
;
594 BUG_ON(tree
&& tree
->cache_size
< eb
->len
);
595 list_del_init(&eb
->lru
);
596 if (!(eb
->flags
& EXTENT_BUFFER_DUMMY
)) {
597 remove_cache_extent(&tree
->cache
, &eb
->cache_node
);
598 tree
->cache_size
-= eb
->len
;
603 static void free_extent_buffer_internal(struct extent_buffer
*eb
, bool free_now
)
605 if (!eb
|| IS_ERR(eb
))
609 BUG_ON(eb
->refs
< 0);
611 BUG_ON(eb
->flags
& EXTENT_DIRTY
);
612 list_del_init(&eb
->recow
);
613 if (eb
->flags
& EXTENT_BUFFER_DUMMY
|| free_now
)
614 free_extent_buffer_final(eb
);
618 void free_extent_buffer(struct extent_buffer
*eb
)
620 free_extent_buffer_internal(eb
, 0);
623 void free_extent_buffer_nocache(struct extent_buffer
*eb
)
625 free_extent_buffer_internal(eb
, 1);
628 struct extent_buffer
*find_extent_buffer(struct extent_io_tree
*tree
,
629 u64 bytenr
, u32 blocksize
)
631 struct extent_buffer
*eb
= NULL
;
632 struct cache_extent
*cache
;
634 cache
= lookup_cache_extent(&tree
->cache
, bytenr
, blocksize
);
635 if (cache
&& cache
->start
== bytenr
&&
636 cache
->size
== blocksize
) {
637 eb
= container_of(cache
, struct extent_buffer
, cache_node
);
638 list_move_tail(&eb
->lru
, &tree
->lru
);
644 struct extent_buffer
*find_first_extent_buffer(struct extent_io_tree
*tree
,
647 struct extent_buffer
*eb
= NULL
;
648 struct cache_extent
*cache
;
650 cache
= search_cache_extent(&tree
->cache
, start
);
652 eb
= container_of(cache
, struct extent_buffer
, cache_node
);
653 list_move_tail(&eb
->lru
, &tree
->lru
);
659 static void trim_extent_buffer_cache(struct extent_io_tree
*tree
)
661 struct extent_buffer
*eb
, *tmp
;
663 list_for_each_entry_safe(eb
, tmp
, &tree
->lru
, lru
) {
665 free_extent_buffer_final(eb
);
666 if (tree
->cache_size
<= ((tree
->max_cache_size
* 9) / 10))
671 struct extent_buffer
*alloc_extent_buffer(struct btrfs_fs_info
*fs_info
,
672 u64 bytenr
, u32 blocksize
)
674 struct extent_buffer
*eb
;
675 struct extent_io_tree
*tree
= &fs_info
->extent_cache
;
676 struct cache_extent
*cache
;
678 cache
= lookup_cache_extent(&tree
->cache
, bytenr
, blocksize
);
679 if (cache
&& cache
->start
== bytenr
&&
680 cache
->size
== blocksize
) {
681 eb
= container_of(cache
, struct extent_buffer
, cache_node
);
682 list_move_tail(&eb
->lru
, &tree
->lru
);
688 eb
= container_of(cache
, struct extent_buffer
,
690 free_extent_buffer(eb
);
692 eb
= __alloc_extent_buffer(fs_info
, bytenr
, blocksize
);
695 ret
= insert_cache_extent(&tree
->cache
, &eb
->cache_node
);
700 list_add_tail(&eb
->lru
, &tree
->lru
);
701 tree
->cache_size
+= blocksize
;
702 if (tree
->cache_size
>= tree
->max_cache_size
)
703 trim_extent_buffer_cache(tree
);
708 int read_extent_from_disk(struct extent_buffer
*eb
,
709 unsigned long offset
, unsigned long len
)
712 ret
= pread(eb
->fd
, eb
->data
+ offset
, len
, eb
->dev_bytenr
);
726 int write_extent_to_disk(struct extent_buffer
*eb
)
729 ret
= pwrite(eb
->fd
, eb
->data
, eb
->len
, eb
->dev_bytenr
);
732 if (ret
!= eb
->len
) {
741 int read_data_from_disk(struct btrfs_fs_info
*info
, void *buf
, u64 offset
,
742 u64 bytes
, int mirror
)
744 struct btrfs_multi_bio
*multi
= NULL
;
745 struct btrfs_device
*device
;
746 u64 bytes_left
= bytes
;
752 read_len
= bytes_left
;
753 ret
= btrfs_map_block(info
, READ
, offset
, &read_len
, &multi
,
756 fprintf(stderr
, "Couldn't map the block %Lu\n",
760 device
= multi
->stripes
[0].dev
;
762 read_len
= min(bytes_left
, read_len
);
763 if (device
->fd
<= 0) {
768 ret
= pread(device
->fd
, buf
+ total_read
, read_len
,
769 multi
->stripes
[0].physical
);
772 fprintf(stderr
, "Error reading %Lu, %d\n", offset
,
776 if (ret
!= read_len
) {
777 fprintf(stderr
, "Short read for %Lu, read %d, "
778 "read_len %Lu\n", offset
, ret
, read_len
);
782 bytes_left
-= read_len
;
784 total_read
+= read_len
;
790 int write_data_to_disk(struct btrfs_fs_info
*info
, void *buf
, u64 offset
,
791 u64 bytes
, int mirror
)
793 struct btrfs_multi_bio
*multi
= NULL
;
794 struct btrfs_device
*device
;
795 u64 bytes_left
= bytes
;
798 u64
*raid_map
= NULL
;
803 while (bytes_left
> 0) {
804 this_len
= bytes_left
;
807 ret
= btrfs_map_block(info
, WRITE
, offset
, &this_len
, &multi
,
810 fprintf(stderr
, "Couldn't map the block %Lu\n",
816 struct extent_buffer
*eb
;
817 u64 stripe_len
= this_len
;
819 this_len
= min(this_len
, bytes_left
);
820 this_len
= min(this_len
, (u64
)info
->nodesize
);
822 eb
= malloc(sizeof(struct extent_buffer
) + this_len
);
824 fprintf(stderr
, "cannot allocate memory for eb\n");
829 memset(eb
, 0, sizeof(struct extent_buffer
) + this_len
);
833 memcpy(eb
->data
, buf
+ total_write
, this_len
);
834 ret
= write_raid56_with_parity(info
, eb
, multi
,
835 stripe_len
, raid_map
);
841 } else while (dev_nr
< multi
->num_stripes
) {
842 device
= multi
->stripes
[dev_nr
].dev
;
843 if (device
->fd
<= 0) {
848 dev_bytenr
= multi
->stripes
[dev_nr
].physical
;
849 this_len
= min(this_len
, bytes_left
);
852 ret
= pwrite(device
->fd
, buf
+ total_write
, this_len
, dev_bytenr
);
853 if (ret
!= this_len
) {
855 fprintf(stderr
, "Error writing to "
856 "device %d\n", errno
);
861 fprintf(stderr
, "Short write\n");
868 BUG_ON(bytes_left
< this_len
);
870 bytes_left
-= this_len
;
872 total_write
+= this_len
;
884 int set_extent_buffer_dirty(struct extent_buffer
*eb
)
886 struct extent_io_tree
*tree
= eb
->tree
;
887 if (!(eb
->flags
& EXTENT_DIRTY
)) {
888 eb
->flags
|= EXTENT_DIRTY
;
889 set_extent_dirty(tree
, eb
->start
, eb
->start
+ eb
->len
- 1);
890 extent_buffer_get(eb
);
895 int clear_extent_buffer_dirty(struct extent_buffer
*eb
)
897 struct extent_io_tree
*tree
= eb
->tree
;
898 if (eb
->flags
& EXTENT_DIRTY
) {
899 eb
->flags
&= ~EXTENT_DIRTY
;
900 clear_extent_dirty(tree
, eb
->start
, eb
->start
+ eb
->len
- 1);
901 free_extent_buffer(eb
);
906 int memcmp_extent_buffer(struct extent_buffer
*eb
, const void *ptrv
,
907 unsigned long start
, unsigned long len
)
909 return memcmp(eb
->data
+ start
, ptrv
, len
);
912 void read_extent_buffer(struct extent_buffer
*eb
, void *dst
,
913 unsigned long start
, unsigned long len
)
915 memcpy(dst
, eb
->data
+ start
, len
);
918 void write_extent_buffer(struct extent_buffer
*eb
, const void *src
,
919 unsigned long start
, unsigned long len
)
921 memcpy(eb
->data
+ start
, src
, len
);
924 void copy_extent_buffer(struct extent_buffer
*dst
, struct extent_buffer
*src
,
925 unsigned long dst_offset
, unsigned long src_offset
,
928 memcpy(dst
->data
+ dst_offset
, src
->data
+ src_offset
, len
);
931 void memmove_extent_buffer(struct extent_buffer
*dst
, unsigned long dst_offset
,
932 unsigned long src_offset
, unsigned long len
)
934 memmove(dst
->data
+ dst_offset
, dst
->data
+ src_offset
, len
);
937 void memset_extent_buffer(struct extent_buffer
*eb
, char c
,
938 unsigned long start
, unsigned long len
)
940 memset(eb
->data
+ start
, c
, len
);
943 int extent_buffer_test_bit(struct extent_buffer
*eb
, unsigned long start
,
946 return le_test_bit(nr
, (u8
*)eb
->data
+ start
);