3 * Copyright (C) 2007 Oracle. All rights reserved.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public
7 * License v2 as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public
15 * License along with this program; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 021110-1307, USA.
19 #define _XOPEN_SOURCE 600
23 #include <sys/types.h>
27 #include "kerncompat.h"
28 #include "extent_io.h"
31 u64 cache_max
= 1024 * 1024 * 32;
33 void extent_io_tree_init(struct extent_io_tree
*tree
)
35 cache_tree_init(&tree
->state
);
36 cache_tree_init(&tree
->cache
);
37 INIT_LIST_HEAD(&tree
->lru
);
41 static struct extent_state
*alloc_extent_state(void)
43 struct extent_state
*state
;
45 state
= malloc(sizeof(*state
));
54 static void free_extent_state(struct extent_state
*state
)
57 BUG_ON(state
->refs
< 0);
62 void extent_io_tree_cleanup(struct extent_io_tree
*tree
)
64 struct extent_state
*es
;
65 struct extent_buffer
*eb
;
66 struct cache_extent
*cache
;
68 while(!list_empty(&tree
->lru
)) {
69 eb
= list_entry(tree
->lru
.next
, struct extent_buffer
, lru
);
71 fprintf(stderr
, "extent buffer leak: "
72 "start %llu len %u\n",
73 (unsigned long long)eb
->start
, eb
->len
);
76 free_extent_buffer(eb
);
79 cache
= find_first_cache_extent(&tree
->state
, 0);
82 es
= container_of(cache
, struct extent_state
, cache_node
);
83 remove_cache_extent(&tree
->state
, &es
->cache_node
);
84 free_extent_state(es
);
88 static inline void update_extent_state(struct extent_state
*state
)
90 state
->cache_node
.start
= state
->start
;
91 state
->cache_node
.size
= state
->end
+ 1 - state
->start
;
95 * Utility function to look for merge candidates inside a given range.
96 * Any extents with matching state are merged together into a single
97 * extent in the tree. Extents with EXTENT_IO in their state field are
100 static int merge_state(struct extent_io_tree
*tree
,
101 struct extent_state
*state
)
103 struct extent_state
*other
;
104 struct cache_extent
*other_node
;
106 if (state
->state
& EXTENT_IOBITS
)
109 other_node
= prev_cache_extent(&state
->cache_node
);
111 other
= container_of(other_node
, struct extent_state
,
113 if (other
->end
== state
->start
- 1 &&
114 other
->state
== state
->state
) {
115 state
->start
= other
->start
;
116 update_extent_state(state
);
117 remove_cache_extent(&tree
->state
, &other
->cache_node
);
118 free_extent_state(other
);
121 other_node
= next_cache_extent(&state
->cache_node
);
123 other
= container_of(other_node
, struct extent_state
,
125 if (other
->start
== state
->end
+ 1 &&
126 other
->state
== state
->state
) {
127 other
->start
= state
->start
;
128 update_extent_state(other
);
129 remove_cache_extent(&tree
->state
, &state
->cache_node
);
130 free_extent_state(state
);
137 * insert an extent_state struct into the tree. 'bits' are set on the
138 * struct before it is inserted.
140 static int insert_state(struct extent_io_tree
*tree
,
141 struct extent_state
*state
, u64 start
, u64 end
,
147 state
->state
|= bits
;
148 state
->start
= start
;
150 update_extent_state(state
);
151 ret
= insert_existing_cache_extent(&tree
->state
, &state
->cache_node
);
153 merge_state(tree
, state
);
158 * split a given extent state struct in two, inserting the preallocated
159 * struct 'prealloc' as the newly created second half. 'split' indicates an
160 * offset inside 'orig' where it should be split.
162 static int split_state(struct extent_io_tree
*tree
, struct extent_state
*orig
,
163 struct extent_state
*prealloc
, u64 split
)
166 prealloc
->start
= orig
->start
;
167 prealloc
->end
= split
- 1;
168 prealloc
->state
= orig
->state
;
169 update_extent_state(prealloc
);
171 update_extent_state(orig
);
172 ret
= insert_existing_cache_extent(&tree
->state
,
173 &prealloc
->cache_node
);
179 * clear some bits on a range in the tree.
181 static int clear_state_bit(struct extent_io_tree
*tree
,
182 struct extent_state
*state
, int bits
)
184 int ret
= state
->state
& bits
;
186 state
->state
&= ~bits
;
187 if (state
->state
== 0) {
188 remove_cache_extent(&tree
->state
, &state
->cache_node
);
189 free_extent_state(state
);
191 merge_state(tree
, state
);
197 * set some bits on a range in the tree.
199 int clear_extent_bits(struct extent_io_tree
*tree
, u64 start
,
200 u64 end
, int bits
, gfp_t mask
)
202 struct extent_state
*state
;
203 struct extent_state
*prealloc
= NULL
;
204 struct cache_extent
*node
;
210 prealloc
= alloc_extent_state();
215 * this search will find the extents that end after
218 node
= find_first_cache_extent(&tree
->state
, start
);
221 state
= container_of(node
, struct extent_state
, cache_node
);
222 if (state
->start
> end
)
224 last_end
= state
->end
;
227 * | ---- desired range ---- |
229 * | ------------- state -------------- |
231 * We need to split the extent we found, and may flip
232 * bits on second half.
234 * If the extent we found extends past our range, we
235 * just split and search again. It'll get split again
236 * the next time though.
238 * If the extent we found is inside our range, we clear
239 * the desired bit on it.
241 if (state
->start
< start
) {
242 err
= split_state(tree
, state
, prealloc
, start
);
243 BUG_ON(err
== -EEXIST
);
247 if (state
->end
<= end
) {
248 set
|= clear_state_bit(tree
, state
, bits
);
249 if (last_end
== (u64
)-1)
251 start
= last_end
+ 1;
253 start
= state
->start
;
258 * | ---- desired range ---- |
260 * We need to split the extent, and clear the bit
263 if (state
->start
<= end
&& state
->end
> end
) {
264 err
= split_state(tree
, state
, prealloc
, end
+ 1);
265 BUG_ON(err
== -EEXIST
);
267 set
|= clear_state_bit(tree
, prealloc
, bits
);
272 start
= state
->end
+ 1;
273 set
|= clear_state_bit(tree
, state
, bits
);
274 if (last_end
== (u64
)-1)
276 start
= last_end
+ 1;
280 free_extent_state(prealloc
);
290 * set some bits on a range in the tree.
292 int set_extent_bits(struct extent_io_tree
*tree
, u64 start
,
293 u64 end
, int bits
, gfp_t mask
)
295 struct extent_state
*state
;
296 struct extent_state
*prealloc
= NULL
;
297 struct cache_extent
*node
;
303 prealloc
= alloc_extent_state();
308 * this search will find the extents that end after
311 node
= find_first_cache_extent(&tree
->state
, start
);
313 err
= insert_state(tree
, prealloc
, start
, end
, bits
);
314 BUG_ON(err
== -EEXIST
);
319 state
= container_of(node
, struct extent_state
, cache_node
);
320 last_start
= state
->start
;
321 last_end
= state
->end
;
324 * | ---- desired range ---- |
327 * Just lock what we found and keep going
329 if (state
->start
== start
&& state
->end
<= end
) {
330 set
= state
->state
& bits
;
331 state
->state
|= bits
;
332 merge_state(tree
, state
);
333 if (last_end
== (u64
)-1)
335 start
= last_end
+ 1;
339 * | ---- desired range ---- |
342 * | ------------- state -------------- |
344 * We need to split the extent we found, and may flip bits on
347 * If the extent we found extends past our
348 * range, we just split and search again. It'll get split
349 * again the next time though.
351 * If the extent we found is inside our range, we set the
354 if (state
->start
< start
) {
355 set
= state
->state
& bits
;
356 err
= split_state(tree
, state
, prealloc
, start
);
357 BUG_ON(err
== -EEXIST
);
361 if (state
->end
<= end
) {
362 state
->state
|= bits
;
363 start
= state
->end
+ 1;
364 merge_state(tree
, state
);
365 if (last_end
== (u64
)-1)
367 start
= last_end
+ 1;
369 start
= state
->start
;
374 * | ---- desired range ---- |
375 * | state | or | state |
377 * There's a hole, we need to insert something in it and
378 * ignore the extent we found.
380 if (state
->start
> start
) {
382 if (end
< last_start
)
385 this_end
= last_start
-1;
386 err
= insert_state(tree
, prealloc
, start
, this_end
,
388 BUG_ON(err
== -EEXIST
);
392 start
= this_end
+ 1;
396 * | ---- desired range ---- |
397 * | ---------- state ---------- |
398 * We need to split the extent, and set the bit
401 set
= state
->state
& bits
;
402 err
= split_state(tree
, state
, prealloc
, end
+ 1);
403 BUG_ON(err
== -EEXIST
);
405 state
->state
|= bits
;
406 merge_state(tree
, prealloc
);
410 free_extent_state(prealloc
);
418 int set_extent_dirty(struct extent_io_tree
*tree
, u64 start
, u64 end
,
421 return set_extent_bits(tree
, start
, end
, EXTENT_DIRTY
, mask
);
424 int clear_extent_dirty(struct extent_io_tree
*tree
, u64 start
, u64 end
,
427 return clear_extent_bits(tree
, start
, end
, EXTENT_DIRTY
, mask
);
430 int find_first_extent_bit(struct extent_io_tree
*tree
, u64 start
,
431 u64
*start_ret
, u64
*end_ret
, int bits
)
433 struct cache_extent
*node
;
434 struct extent_state
*state
;
438 * this search will find all the extents that end after
441 node
= find_first_cache_extent(&tree
->state
, start
);
446 state
= container_of(node
, struct extent_state
, cache_node
);
447 if (state
->end
>= start
&& (state
->state
& bits
)) {
448 *start_ret
= state
->start
;
449 *end_ret
= state
->end
;
453 node
= next_cache_extent(node
);
461 int test_range_bit(struct extent_io_tree
*tree
, u64 start
, u64 end
,
462 int bits
, int filled
)
464 struct extent_state
*state
= NULL
;
465 struct cache_extent
*node
;
468 node
= find_first_cache_extent(&tree
->state
, start
);
469 while (node
&& start
<= end
) {
470 state
= container_of(node
, struct extent_state
, cache_node
);
472 if (filled
&& state
->start
> start
) {
476 if (state
->start
> end
)
478 if (state
->state
& bits
) {
486 start
= state
->end
+ 1;
489 node
= next_cache_extent(node
);
499 int set_state_private(struct extent_io_tree
*tree
, u64 start
, u64
private)
501 struct cache_extent
*node
;
502 struct extent_state
*state
;
505 node
= find_first_cache_extent(&tree
->state
, start
);
510 state
= container_of(node
, struct extent_state
, cache_node
);
511 if (state
->start
!= start
) {
515 state
->private = private;
520 int get_state_private(struct extent_io_tree
*tree
, u64 start
, u64
*private)
522 struct cache_extent
*node
;
523 struct extent_state
*state
;
526 node
= find_first_cache_extent(&tree
->state
, start
);
531 state
= container_of(node
, struct extent_state
, cache_node
);
532 if (state
->start
!= start
) {
536 *private = state
->private;
541 static int free_some_buffers(struct extent_io_tree
*tree
)
544 struct extent_buffer
*eb
;
545 struct list_head
*node
, *next
;
547 if (tree
->cache_size
< cache_max
)
549 list_for_each_safe(node
, next
, &tree
->lru
) {
550 eb
= list_entry(node
, struct extent_buffer
, lru
);
552 free_extent_buffer(eb
);
553 if (tree
->cache_size
< cache_max
)
556 list_move_tail(&eb
->lru
, &tree
->lru
);
564 static struct extent_buffer
*__alloc_extent_buffer(struct extent_io_tree
*tree
,
565 u64 bytenr
, u32 blocksize
)
567 struct extent_buffer
*eb
;
570 eb
= malloc(sizeof(struct extent_buffer
) + blocksize
);
582 eb
->dev_bytenr
= (u64
)-1;
583 eb
->cache_node
.start
= bytenr
;
584 eb
->cache_node
.size
= blocksize
;
586 free_some_buffers(tree
);
587 ret
= insert_existing_cache_extent(&tree
->cache
, &eb
->cache_node
);
592 list_add_tail(&eb
->lru
, &tree
->lru
);
593 tree
->cache_size
+= blocksize
;
597 void free_extent_buffer(struct extent_buffer
*eb
)
603 BUG_ON(eb
->refs
< 0);
605 struct extent_io_tree
*tree
= eb
->tree
;
606 BUG_ON(eb
->flags
& EXTENT_DIRTY
);
607 list_del_init(&eb
->lru
);
608 remove_cache_extent(&tree
->cache
, &eb
->cache_node
);
609 BUG_ON(tree
->cache_size
< eb
->len
);
610 tree
->cache_size
-= eb
->len
;
615 struct extent_buffer
*find_extent_buffer(struct extent_io_tree
*tree
,
616 u64 bytenr
, u32 blocksize
)
618 struct extent_buffer
*eb
= NULL
;
619 struct cache_extent
*cache
;
621 cache
= find_cache_extent(&tree
->cache
, bytenr
, blocksize
);
622 if (cache
&& cache
->start
== bytenr
&& cache
->size
== blocksize
) {
623 eb
= container_of(cache
, struct extent_buffer
, cache_node
);
624 list_move_tail(&eb
->lru
, &tree
->lru
);
630 struct extent_buffer
*find_first_extent_buffer(struct extent_io_tree
*tree
,
633 struct extent_buffer
*eb
= NULL
;
634 struct cache_extent
*cache
;
636 cache
= find_first_cache_extent(&tree
->cache
, start
);
638 eb
= container_of(cache
, struct extent_buffer
, cache_node
);
639 list_move_tail(&eb
->lru
, &tree
->lru
);
645 struct extent_buffer
*alloc_extent_buffer(struct extent_io_tree
*tree
,
646 u64 bytenr
, u32 blocksize
)
648 struct extent_buffer
*eb
;
649 struct cache_extent
*cache
;
651 cache
= find_cache_extent(&tree
->cache
, bytenr
, blocksize
);
652 if (cache
&& cache
->start
== bytenr
&& cache
->size
== blocksize
) {
653 eb
= container_of(cache
, struct extent_buffer
, cache_node
);
654 list_move_tail(&eb
->lru
, &tree
->lru
);
658 eb
= container_of(cache
, struct extent_buffer
,
660 BUG_ON(eb
->refs
!= 1);
661 free_extent_buffer(eb
);
663 eb
= __alloc_extent_buffer(tree
, bytenr
, blocksize
);
668 int read_extent_from_disk(struct extent_buffer
*eb
)
671 ret
= pread(eb
->fd
, eb
->data
, eb
->len
, eb
->dev_bytenr
);
674 if (ret
!= eb
->len
) {
683 int write_extent_to_disk(struct extent_buffer
*eb
)
686 ret
= pwrite(eb
->fd
, eb
->data
, eb
->len
, eb
->dev_bytenr
);
689 if (ret
!= eb
->len
) {
698 int set_extent_buffer_uptodate(struct extent_buffer
*eb
)
700 eb
->flags
|= EXTENT_UPTODATE
;
704 int clear_extent_buffer_uptodate(struct extent_io_tree
*tree
,
705 struct extent_buffer
*eb
)
707 eb
->flags
&= ~EXTENT_UPTODATE
;
711 int extent_buffer_uptodate(struct extent_buffer
*eb
)
713 if (eb
->flags
& EXTENT_UPTODATE
)
718 int set_extent_buffer_dirty(struct extent_buffer
*eb
)
720 struct extent_io_tree
*tree
= eb
->tree
;
721 if (!(eb
->flags
& EXTENT_DIRTY
)) {
722 eb
->flags
|= EXTENT_DIRTY
;
723 set_extent_dirty(tree
, eb
->start
, eb
->start
+ eb
->len
- 1, 0);
724 extent_buffer_get(eb
);
729 int clear_extent_buffer_dirty(struct extent_buffer
*eb
)
731 struct extent_io_tree
*tree
= eb
->tree
;
732 if (eb
->flags
& EXTENT_DIRTY
) {
733 eb
->flags
&= ~EXTENT_DIRTY
;
734 clear_extent_dirty(tree
, eb
->start
, eb
->start
+ eb
->len
- 1, 0);
735 free_extent_buffer(eb
);
740 int memcmp_extent_buffer(struct extent_buffer
*eb
, const void *ptrv
,
741 unsigned long start
, unsigned long len
)
743 return memcmp(eb
->data
+ start
, ptrv
, len
);
746 void read_extent_buffer(struct extent_buffer
*eb
, void *dst
,
747 unsigned long start
, unsigned long len
)
749 memcpy(dst
, eb
->data
+ start
, len
);
752 void write_extent_buffer(struct extent_buffer
*eb
, const void *src
,
753 unsigned long start
, unsigned long len
)
755 memcpy(eb
->data
+ start
, src
, len
);
758 void copy_extent_buffer(struct extent_buffer
*dst
, struct extent_buffer
*src
,
759 unsigned long dst_offset
, unsigned long src_offset
,
762 memcpy(dst
->data
+ dst_offset
, src
->data
+ src_offset
, len
);
765 void memcpy_extent_buffer(struct extent_buffer
*dst
, unsigned long dst_offset
,
766 unsigned long src_offset
, unsigned long len
)
768 memcpy(dst
->data
+ dst_offset
, dst
->data
+ src_offset
, len
);
771 void memmove_extent_buffer(struct extent_buffer
*dst
, unsigned long dst_offset
,
772 unsigned long src_offset
, unsigned long len
)
774 memmove(dst
->data
+ dst_offset
, dst
->data
+ src_offset
, len
);
777 void memset_extent_buffer(struct extent_buffer
*eb
, char c
,
778 unsigned long start
, unsigned long len
)
780 memset(eb
->data
+ start
, c
, len
);