2 * f2fs extent cache support
4 * Copyright (c) 2015 Motorola Mobility
5 * Copyright (c) 2015 Samsung Electronics
6 * Authors: Jaegeuk Kim <jaegeuk@kernel.org>
7 * Chao Yu <chao2.yu@samsung.com>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
15 #include <linux/f2fs_fs.h>
19 #include <trace/events/f2fs.h>
21 static struct kmem_cache
*extent_tree_slab
;
22 static struct kmem_cache
*extent_node_slab
;
24 static struct extent_node
*__attach_extent_node(struct f2fs_sb_info
*sbi
,
25 struct extent_tree
*et
, struct extent_info
*ei
,
26 struct rb_node
*parent
, struct rb_node
**p
)
28 struct extent_node
*en
;
30 en
= kmem_cache_alloc(extent_node_slab
, GFP_ATOMIC
);
35 INIT_LIST_HEAD(&en
->list
);
37 rb_link_node(&en
->rb_node
, parent
, p
);
38 rb_insert_color(&en
->rb_node
, &et
->root
);
39 atomic_inc(&et
->node_cnt
);
40 atomic_inc(&sbi
->total_ext_node
);
44 static void __detach_extent_node(struct f2fs_sb_info
*sbi
,
45 struct extent_tree
*et
, struct extent_node
*en
)
47 rb_erase(&en
->rb_node
, &et
->root
);
48 atomic_dec(&et
->node_cnt
);
49 atomic_dec(&sbi
->total_ext_node
);
51 if (et
->cached_en
== en
)
55 static struct extent_tree
*__grab_extent_tree(struct inode
*inode
)
57 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
58 struct extent_tree
*et
;
59 nid_t ino
= inode
->i_ino
;
61 down_write(&sbi
->extent_tree_lock
);
62 et
= radix_tree_lookup(&sbi
->extent_tree_root
, ino
);
64 et
= f2fs_kmem_cache_alloc(extent_tree_slab
, GFP_NOFS
);
65 f2fs_radix_tree_insert(&sbi
->extent_tree_root
, ino
, et
);
66 memset(et
, 0, sizeof(struct extent_tree
));
70 rwlock_init(&et
->lock
);
71 INIT_LIST_HEAD(&et
->list
);
72 atomic_set(&et
->node_cnt
, 0);
73 atomic_inc(&sbi
->total_ext_tree
);
75 atomic_dec(&sbi
->total_zombie_tree
);
76 list_del_init(&et
->list
);
78 up_write(&sbi
->extent_tree_lock
);
80 /* never died until evict_inode */
81 F2FS_I(inode
)->extent_tree
= et
;
86 static struct extent_node
*__lookup_extent_tree(struct f2fs_sb_info
*sbi
,
87 struct extent_tree
*et
, unsigned int fofs
)
89 struct rb_node
*node
= et
->root
.rb_node
;
90 struct extent_node
*en
= et
->cached_en
;
93 struct extent_info
*cei
= &en
->ei
;
95 if (cei
->fofs
<= fofs
&& cei
->fofs
+ cei
->len
> fofs
) {
96 stat_inc_cached_node_hit(sbi
);
102 en
= rb_entry(node
, struct extent_node
, rb_node
);
104 if (fofs
< en
->ei
.fofs
) {
105 node
= node
->rb_left
;
106 } else if (fofs
>= en
->ei
.fofs
+ en
->ei
.len
) {
107 node
= node
->rb_right
;
109 stat_inc_rbtree_node_hit(sbi
);
116 static struct extent_node
*__init_extent_tree(struct f2fs_sb_info
*sbi
,
117 struct extent_tree
*et
, struct extent_info
*ei
)
119 struct rb_node
**p
= &et
->root
.rb_node
;
120 struct extent_node
*en
;
122 en
= __attach_extent_node(sbi
, et
, ei
, NULL
, p
);
126 et
->largest
= en
->ei
;
131 static unsigned int __free_extent_tree(struct f2fs_sb_info
*sbi
,
132 struct extent_tree
*et
, bool free_all
)
134 struct rb_node
*node
, *next
;
135 struct extent_node
*en
;
136 unsigned int count
= atomic_read(&et
->node_cnt
);
138 node
= rb_first(&et
->root
);
140 next
= rb_next(node
);
141 en
= rb_entry(node
, struct extent_node
, rb_node
);
144 spin_lock(&sbi
->extent_lock
);
145 if (!list_empty(&en
->list
))
146 list_del_init(&en
->list
);
147 spin_unlock(&sbi
->extent_lock
);
150 if (free_all
|| list_empty(&en
->list
)) {
151 __detach_extent_node(sbi
, et
, en
);
152 kmem_cache_free(extent_node_slab
, en
);
157 return count
- atomic_read(&et
->node_cnt
);
160 static void __drop_largest_extent(struct inode
*inode
,
161 pgoff_t fofs
, unsigned int len
)
163 struct extent_info
*largest
= &F2FS_I(inode
)->extent_tree
->largest
;
165 if (fofs
< largest
->fofs
+ largest
->len
&& fofs
+ len
> largest
->fofs
)
169 /* return true, if inode page is changed */
170 bool f2fs_init_extent_tree(struct inode
*inode
, struct f2fs_extent
*i_ext
)
172 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
173 struct extent_tree
*et
;
174 struct extent_node
*en
;
175 struct extent_info ei
;
177 if (!f2fs_may_extent_tree(inode
)) {
178 /* drop largest extent */
179 if (i_ext
&& i_ext
->len
) {
186 et
= __grab_extent_tree(inode
);
188 if (!i_ext
|| !i_ext
->len
)
191 set_extent_info(&ei
, le32_to_cpu(i_ext
->fofs
),
192 le32_to_cpu(i_ext
->blk
), le32_to_cpu(i_ext
->len
));
194 write_lock(&et
->lock
);
195 if (atomic_read(&et
->node_cnt
))
198 en
= __init_extent_tree(sbi
, et
, &ei
);
200 spin_lock(&sbi
->extent_lock
);
201 list_add_tail(&en
->list
, &sbi
->extent_list
);
202 spin_unlock(&sbi
->extent_lock
);
205 write_unlock(&et
->lock
);
209 static bool f2fs_lookup_extent_tree(struct inode
*inode
, pgoff_t pgofs
,
210 struct extent_info
*ei
)
212 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
213 struct extent_tree
*et
= F2FS_I(inode
)->extent_tree
;
214 struct extent_node
*en
;
217 f2fs_bug_on(sbi
, !et
);
219 trace_f2fs_lookup_extent_tree_start(inode
, pgofs
);
221 read_lock(&et
->lock
);
223 if (et
->largest
.fofs
<= pgofs
&&
224 et
->largest
.fofs
+ et
->largest
.len
> pgofs
) {
227 stat_inc_largest_node_hit(sbi
);
231 en
= __lookup_extent_tree(sbi
, et
, pgofs
);
234 spin_lock(&sbi
->extent_lock
);
235 if (!list_empty(&en
->list
))
236 list_move_tail(&en
->list
, &sbi
->extent_list
);
238 spin_unlock(&sbi
->extent_lock
);
242 stat_inc_total_hit(sbi
);
243 read_unlock(&et
->lock
);
245 trace_f2fs_lookup_extent_tree_end(inode
, pgofs
, ei
);
251 * lookup extent at @fofs, if hit, return the extent
252 * if not, return NULL and
253 * @prev_ex: extent before fofs
254 * @next_ex: extent after fofs
255 * @insert_p: insert point for new extent at fofs
256 * in order to simpfy the insertion after.
257 * tree must stay unchanged between lookup and insertion.
259 static struct extent_node
*__lookup_extent_tree_ret(struct extent_tree
*et
,
261 struct extent_node
**prev_ex
,
262 struct extent_node
**next_ex
,
263 struct rb_node
***insert_p
,
264 struct rb_node
**insert_parent
)
266 struct rb_node
**pnode
= &et
->root
.rb_node
;
267 struct rb_node
*parent
= NULL
, *tmp_node
;
268 struct extent_node
*en
= et
->cached_en
;
271 *insert_parent
= NULL
;
275 if (RB_EMPTY_ROOT(&et
->root
))
279 struct extent_info
*cei
= &en
->ei
;
281 if (cei
->fofs
<= fofs
&& cei
->fofs
+ cei
->len
> fofs
)
282 goto lookup_neighbors
;
287 en
= rb_entry(*pnode
, struct extent_node
, rb_node
);
289 if (fofs
< en
->ei
.fofs
)
290 pnode
= &(*pnode
)->rb_left
;
291 else if (fofs
>= en
->ei
.fofs
+ en
->ei
.len
)
292 pnode
= &(*pnode
)->rb_right
;
294 goto lookup_neighbors
;
298 *insert_parent
= parent
;
300 en
= rb_entry(parent
, struct extent_node
, rb_node
);
302 if (parent
&& fofs
> en
->ei
.fofs
)
303 tmp_node
= rb_next(parent
);
304 *next_ex
= tmp_node
?
305 rb_entry(tmp_node
, struct extent_node
, rb_node
) : NULL
;
308 if (parent
&& fofs
< en
->ei
.fofs
)
309 tmp_node
= rb_prev(parent
);
310 *prev_ex
= tmp_node
?
311 rb_entry(tmp_node
, struct extent_node
, rb_node
) : NULL
;
315 if (fofs
== en
->ei
.fofs
) {
316 /* lookup prev node for merging backward later */
317 tmp_node
= rb_prev(&en
->rb_node
);
318 *prev_ex
= tmp_node
?
319 rb_entry(tmp_node
, struct extent_node
, rb_node
) : NULL
;
321 if (fofs
== en
->ei
.fofs
+ en
->ei
.len
- 1) {
322 /* lookup next node for merging frontward later */
323 tmp_node
= rb_next(&en
->rb_node
);
324 *next_ex
= tmp_node
?
325 rb_entry(tmp_node
, struct extent_node
, rb_node
) : NULL
;
330 static struct extent_node
*__try_merge_extent_node(struct f2fs_sb_info
*sbi
,
331 struct extent_tree
*et
, struct extent_info
*ei
,
332 struct extent_node
**den
,
333 struct extent_node
*prev_ex
,
334 struct extent_node
*next_ex
)
336 struct extent_node
*en
= NULL
;
338 if (prev_ex
&& __is_back_mergeable(ei
, &prev_ex
->ei
)) {
339 prev_ex
->ei
.len
+= ei
->len
;
344 if (next_ex
&& __is_front_mergeable(ei
, &next_ex
->ei
)) {
346 __detach_extent_node(sbi
, et
, prev_ex
);
349 next_ex
->ei
.fofs
= ei
->fofs
;
350 next_ex
->ei
.blk
= ei
->blk
;
351 next_ex
->ei
.len
+= ei
->len
;
356 __try_update_largest_extent(et
, en
);
362 static struct extent_node
*__insert_extent_tree(struct f2fs_sb_info
*sbi
,
363 struct extent_tree
*et
, struct extent_info
*ei
,
364 struct rb_node
**insert_p
,
365 struct rb_node
*insert_parent
)
367 struct rb_node
**p
= &et
->root
.rb_node
;
368 struct rb_node
*parent
= NULL
;
369 struct extent_node
*en
= NULL
;
371 if (insert_p
&& insert_parent
) {
372 parent
= insert_parent
;
379 en
= rb_entry(parent
, struct extent_node
, rb_node
);
381 if (ei
->fofs
< en
->ei
.fofs
)
383 else if (ei
->fofs
>= en
->ei
.fofs
+ en
->ei
.len
)
389 en
= __attach_extent_node(sbi
, et
, ei
, parent
, p
);
393 __try_update_largest_extent(et
, en
);
398 static unsigned int f2fs_update_extent_tree_range(struct inode
*inode
,
399 pgoff_t fofs
, block_t blkaddr
, unsigned int len
)
401 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
402 struct extent_tree
*et
= F2FS_I(inode
)->extent_tree
;
403 struct extent_node
*en
= NULL
, *en1
= NULL
;
404 struct extent_node
*prev_en
= NULL
, *next_en
= NULL
;
405 struct extent_info ei
, dei
, prev
;
406 struct rb_node
**insert_p
= NULL
, *insert_parent
= NULL
;
407 unsigned int end
= fofs
+ len
;
408 unsigned int pos
= (unsigned int)fofs
;
413 trace_f2fs_update_extent_tree_range(inode
, fofs
, blkaddr
, len
);
415 write_lock(&et
->lock
);
417 if (is_inode_flag_set(F2FS_I(inode
), FI_NO_EXTENT
)) {
418 write_unlock(&et
->lock
);
426 * drop largest extent before lookup, in case it's already
427 * been shrunk from extent tree
429 __drop_largest_extent(inode
, fofs
, len
);
431 /* 1. lookup first extent node in range [fofs, fofs + len - 1] */
432 en
= __lookup_extent_tree_ret(et
, fofs
, &prev_en
, &next_en
,
433 &insert_p
, &insert_parent
);
437 /* 2. invlidate all extent nodes in range [fofs, fofs + len - 1] */
438 while (en
&& en
->ei
.fofs
< end
) {
439 unsigned int org_end
;
440 int parts
= 0; /* # of parts current extent split into */
442 next_en
= en1
= NULL
;
445 org_end
= dei
.fofs
+ dei
.len
;
446 f2fs_bug_on(sbi
, pos
>= org_end
);
448 if (pos
> dei
.fofs
&& pos
- dei
.fofs
>= F2FS_MIN_EXTENT_LEN
) {
449 en
->ei
.len
= pos
- en
->ei
.fofs
;
454 if (end
< org_end
&& org_end
- end
>= F2FS_MIN_EXTENT_LEN
) {
456 set_extent_info(&ei
, end
,
457 end
- dei
.fofs
+ dei
.blk
,
459 en1
= __insert_extent_tree(sbi
, et
, &ei
,
464 en
->ei
.blk
+= end
- dei
.fofs
;
465 en
->ei
.len
-= end
- dei
.fofs
;
472 struct rb_node
*node
= rb_next(&en
->rb_node
);
475 rb_entry(node
, struct extent_node
, rb_node
)
480 __try_update_largest_extent(et
, en
);
482 __detach_extent_node(sbi
, et
, en
);
485 * if original extent is split into zero or two parts, extent
486 * tree has been altered by deletion or insertion, therefore
487 * invalidate pointers regard to tree.
491 insert_parent
= NULL
;
494 /* update in global extent list */
495 spin_lock(&sbi
->extent_lock
);
496 if (!parts
&& !list_empty(&en
->list
))
499 list_add_tail(&en1
->list
, &sbi
->extent_list
);
500 spin_unlock(&sbi
->extent_lock
);
502 /* release extent node */
504 kmem_cache_free(extent_node_slab
, en
);
509 /* 3. update extent in extent cache */
511 struct extent_node
*den
= NULL
;
513 set_extent_info(&ei
, fofs
, blkaddr
, len
);
514 en1
= __try_merge_extent_node(sbi
, et
, &ei
, &den
,
517 en1
= __insert_extent_tree(sbi
, et
, &ei
,
518 insert_p
, insert_parent
);
520 /* give up extent_cache, if split and small updates happen */
522 prev
.len
< F2FS_MIN_EXTENT_LEN
&&
523 et
->largest
.len
< F2FS_MIN_EXTENT_LEN
) {
525 set_inode_flag(F2FS_I(inode
), FI_NO_EXTENT
);
528 spin_lock(&sbi
->extent_lock
);
530 if (list_empty(&en1
->list
))
531 list_add_tail(&en1
->list
, &sbi
->extent_list
);
533 list_move_tail(&en1
->list
, &sbi
->extent_list
);
535 if (den
&& !list_empty(&den
->list
))
536 list_del(&den
->list
);
537 spin_unlock(&sbi
->extent_lock
);
540 kmem_cache_free(extent_node_slab
, den
);
543 if (is_inode_flag_set(F2FS_I(inode
), FI_NO_EXTENT
))
544 __free_extent_tree(sbi
, et
, true);
546 write_unlock(&et
->lock
);
548 return !__is_extent_same(&prev
, &et
->largest
);
551 unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info
*sbi
, int nr_shrink
)
553 struct extent_tree
*treevec
[EXT_TREE_VEC_SIZE
];
554 struct extent_tree
*et
, *next
;
555 struct extent_node
*en
, *tmp
;
556 unsigned long ino
= F2FS_ROOT_INO(sbi
);
558 unsigned int node_cnt
= 0, tree_cnt
= 0;
560 bool do_free
= false;
562 if (!test_opt(sbi
, EXTENT_CACHE
))
565 if (!atomic_read(&sbi
->total_zombie_tree
))
568 if (!down_write_trylock(&sbi
->extent_tree_lock
))
571 /* 1. remove unreferenced extent tree */
572 list_for_each_entry_safe(et
, next
, &sbi
->zombie_list
, list
) {
573 if (atomic_read(&et
->node_cnt
)) {
574 write_lock(&et
->lock
);
575 node_cnt
+= __free_extent_tree(sbi
, et
, true);
576 write_unlock(&et
->lock
);
579 list_del_init(&et
->list
);
580 radix_tree_delete(&sbi
->extent_tree_root
, et
->ino
);
581 kmem_cache_free(extent_tree_slab
, et
);
582 atomic_dec(&sbi
->total_ext_tree
);
583 atomic_dec(&sbi
->total_zombie_tree
);
586 if (node_cnt
+ tree_cnt
>= nr_shrink
)
589 up_write(&sbi
->extent_tree_lock
);
592 /* 2. remove LRU extent entries */
593 if (!down_write_trylock(&sbi
->extent_tree_lock
))
596 remained
= nr_shrink
- (node_cnt
+ tree_cnt
);
598 spin_lock(&sbi
->extent_lock
);
599 list_for_each_entry_safe(en
, tmp
, &sbi
->extent_list
, list
) {
602 list_del_init(&en
->list
);
605 spin_unlock(&sbi
->extent_lock
);
607 if (do_free
== false)
611 * reset ino for searching victims from beginning of global extent tree.
613 ino
= F2FS_ROOT_INO(sbi
);
615 while ((found
= radix_tree_gang_lookup(&sbi
->extent_tree_root
,
616 (void **)treevec
, ino
, EXT_TREE_VEC_SIZE
))) {
619 ino
= treevec
[found
- 1]->ino
+ 1;
620 for (i
= 0; i
< found
; i
++) {
621 struct extent_tree
*et
= treevec
[i
];
623 if (!atomic_read(&et
->node_cnt
))
626 if (write_trylock(&et
->lock
)) {
627 node_cnt
+= __free_extent_tree(sbi
, et
, false);
628 write_unlock(&et
->lock
);
631 if (node_cnt
+ tree_cnt
>= nr_shrink
)
636 up_write(&sbi
->extent_tree_lock
);
638 trace_f2fs_shrink_extent_tree(sbi
, node_cnt
, tree_cnt
);
640 return node_cnt
+ tree_cnt
;
643 unsigned int f2fs_destroy_extent_node(struct inode
*inode
)
645 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
646 struct extent_tree
*et
= F2FS_I(inode
)->extent_tree
;
647 unsigned int node_cnt
= 0;
649 if (!et
|| !atomic_read(&et
->node_cnt
))
652 write_lock(&et
->lock
);
653 node_cnt
= __free_extent_tree(sbi
, et
, true);
654 write_unlock(&et
->lock
);
659 void f2fs_destroy_extent_tree(struct inode
*inode
)
661 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
662 struct extent_tree
*et
= F2FS_I(inode
)->extent_tree
;
663 unsigned int node_cnt
= 0;
668 if (inode
->i_nlink
&& !is_bad_inode(inode
) &&
669 atomic_read(&et
->node_cnt
)) {
670 down_write(&sbi
->extent_tree_lock
);
671 list_add_tail(&et
->list
, &sbi
->zombie_list
);
672 atomic_inc(&sbi
->total_zombie_tree
);
673 up_write(&sbi
->extent_tree_lock
);
677 /* free all extent info belong to this extent tree */
678 node_cnt
= f2fs_destroy_extent_node(inode
);
680 /* delete extent tree entry in radix tree */
681 down_write(&sbi
->extent_tree_lock
);
682 f2fs_bug_on(sbi
, atomic_read(&et
->node_cnt
));
683 radix_tree_delete(&sbi
->extent_tree_root
, inode
->i_ino
);
684 kmem_cache_free(extent_tree_slab
, et
);
685 atomic_dec(&sbi
->total_ext_tree
);
686 up_write(&sbi
->extent_tree_lock
);
688 F2FS_I(inode
)->extent_tree
= NULL
;
690 trace_f2fs_destroy_extent_tree(inode
, node_cnt
);
693 bool f2fs_lookup_extent_cache(struct inode
*inode
, pgoff_t pgofs
,
694 struct extent_info
*ei
)
696 if (!f2fs_may_extent_tree(inode
))
699 return f2fs_lookup_extent_tree(inode
, pgofs
, ei
);
702 void f2fs_update_extent_cache(struct dnode_of_data
*dn
)
704 struct f2fs_inode_info
*fi
= F2FS_I(dn
->inode
);
707 if (!f2fs_may_extent_tree(dn
->inode
))
710 f2fs_bug_on(F2FS_I_SB(dn
->inode
), dn
->data_blkaddr
== NEW_ADDR
);
713 fofs
= start_bidx_of_node(ofs_of_node(dn
->node_page
), fi
) +
716 if (f2fs_update_extent_tree_range(dn
->inode
, fofs
, dn
->data_blkaddr
, 1))
720 void f2fs_update_extent_cache_range(struct dnode_of_data
*dn
,
721 pgoff_t fofs
, block_t blkaddr
, unsigned int len
)
724 if (!f2fs_may_extent_tree(dn
->inode
))
727 if (f2fs_update_extent_tree_range(dn
->inode
, fofs
, blkaddr
, len
))
731 void init_extent_cache_info(struct f2fs_sb_info
*sbi
)
733 INIT_RADIX_TREE(&sbi
->extent_tree_root
, GFP_NOIO
);
734 init_rwsem(&sbi
->extent_tree_lock
);
735 INIT_LIST_HEAD(&sbi
->extent_list
);
736 spin_lock_init(&sbi
->extent_lock
);
737 atomic_set(&sbi
->total_ext_tree
, 0);
738 INIT_LIST_HEAD(&sbi
->zombie_list
);
739 atomic_set(&sbi
->total_zombie_tree
, 0);
740 atomic_set(&sbi
->total_ext_node
, 0);
743 int __init
create_extent_cache(void)
745 extent_tree_slab
= f2fs_kmem_cache_create("f2fs_extent_tree",
746 sizeof(struct extent_tree
));
747 if (!extent_tree_slab
)
749 extent_node_slab
= f2fs_kmem_cache_create("f2fs_extent_node",
750 sizeof(struct extent_node
));
751 if (!extent_node_slab
) {
752 kmem_cache_destroy(extent_tree_slab
);
758 void destroy_extent_cache(void)
760 kmem_cache_destroy(extent_node_slab
);
761 kmem_cache_destroy(extent_tree_slab
);