2 * f2fs extent cache support
4 * Copyright (c) 2015 Motorola Mobility
5 * Copyright (c) 2015 Samsung Electronics
6 * Authors: Jaegeuk Kim <jaegeuk@kernel.org>
7 * Chao Yu <chao2.yu@samsung.com>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
15 #include <linux/f2fs_fs.h>
19 #include <trace/events/f2fs.h>
21 static struct kmem_cache
*extent_tree_slab
;
22 static struct kmem_cache
*extent_node_slab
;
24 static struct extent_node
*__attach_extent_node(struct f2fs_sb_info
*sbi
,
25 struct extent_tree
*et
, struct extent_info
*ei
,
26 struct rb_node
*parent
, struct rb_node
**p
)
28 struct extent_node
*en
;
30 en
= kmem_cache_alloc(extent_node_slab
, GFP_ATOMIC
);
35 INIT_LIST_HEAD(&en
->list
);
38 rb_link_node(&en
->rb_node
, parent
, p
);
39 rb_insert_color(&en
->rb_node
, &et
->root
);
40 atomic_inc(&et
->node_cnt
);
41 atomic_inc(&sbi
->total_ext_node
);
45 static void __detach_extent_node(struct f2fs_sb_info
*sbi
,
46 struct extent_tree
*et
, struct extent_node
*en
)
48 rb_erase(&en
->rb_node
, &et
->root
);
49 atomic_dec(&et
->node_cnt
);
50 atomic_dec(&sbi
->total_ext_node
);
52 if (et
->cached_en
== en
)
54 kmem_cache_free(extent_node_slab
, en
);
58 * Flow to release an extent_node:
60 * 2. __detach_extent_node
63 static void __release_extent_node(struct f2fs_sb_info
*sbi
,
64 struct extent_tree
*et
, struct extent_node
*en
)
66 spin_lock(&sbi
->extent_lock
);
67 f2fs_bug_on(sbi
, list_empty(&en
->list
));
68 list_del_init(&en
->list
);
69 spin_unlock(&sbi
->extent_lock
);
71 __detach_extent_node(sbi
, et
, en
);
74 static struct extent_tree
*__grab_extent_tree(struct inode
*inode
)
76 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
77 struct extent_tree
*et
;
78 nid_t ino
= inode
->i_ino
;
80 down_write(&sbi
->extent_tree_lock
);
81 et
= radix_tree_lookup(&sbi
->extent_tree_root
, ino
);
83 et
= f2fs_kmem_cache_alloc(extent_tree_slab
, GFP_NOFS
);
84 f2fs_radix_tree_insert(&sbi
->extent_tree_root
, ino
, et
);
85 memset(et
, 0, sizeof(struct extent_tree
));
89 rwlock_init(&et
->lock
);
90 INIT_LIST_HEAD(&et
->list
);
91 atomic_set(&et
->node_cnt
, 0);
92 atomic_inc(&sbi
->total_ext_tree
);
94 atomic_dec(&sbi
->total_zombie_tree
);
95 list_del_init(&et
->list
);
97 up_write(&sbi
->extent_tree_lock
);
99 /* never died until evict_inode */
100 F2FS_I(inode
)->extent_tree
= et
;
105 static struct extent_node
*__lookup_extent_tree(struct f2fs_sb_info
*sbi
,
106 struct extent_tree
*et
, unsigned int fofs
)
108 struct rb_node
*node
= et
->root
.rb_node
;
109 struct extent_node
*en
= et
->cached_en
;
112 struct extent_info
*cei
= &en
->ei
;
114 if (cei
->fofs
<= fofs
&& cei
->fofs
+ cei
->len
> fofs
) {
115 stat_inc_cached_node_hit(sbi
);
121 en
= rb_entry(node
, struct extent_node
, rb_node
);
123 if (fofs
< en
->ei
.fofs
) {
124 node
= node
->rb_left
;
125 } else if (fofs
>= en
->ei
.fofs
+ en
->ei
.len
) {
126 node
= node
->rb_right
;
128 stat_inc_rbtree_node_hit(sbi
);
135 static struct extent_node
*__init_extent_tree(struct f2fs_sb_info
*sbi
,
136 struct extent_tree
*et
, struct extent_info
*ei
)
138 struct rb_node
**p
= &et
->root
.rb_node
;
139 struct extent_node
*en
;
141 en
= __attach_extent_node(sbi
, et
, ei
, NULL
, p
);
145 et
->largest
= en
->ei
;
150 static unsigned int __free_extent_tree(struct f2fs_sb_info
*sbi
,
151 struct extent_tree
*et
)
153 struct rb_node
*node
, *next
;
154 struct extent_node
*en
;
155 unsigned int count
= atomic_read(&et
->node_cnt
);
157 node
= rb_first(&et
->root
);
159 next
= rb_next(node
);
160 en
= rb_entry(node
, struct extent_node
, rb_node
);
161 __release_extent_node(sbi
, et
, en
);
165 return count
- atomic_read(&et
->node_cnt
);
168 static void __drop_largest_extent(struct inode
*inode
,
169 pgoff_t fofs
, unsigned int len
)
171 struct extent_info
*largest
= &F2FS_I(inode
)->extent_tree
->largest
;
173 if (fofs
< largest
->fofs
+ largest
->len
&& fofs
+ len
> largest
->fofs
) {
175 f2fs_mark_inode_dirty_sync(inode
);
179 /* return true, if inode page is changed */
180 static bool __f2fs_init_extent_tree(struct inode
*inode
, struct f2fs_extent
*i_ext
)
182 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
183 struct extent_tree
*et
;
184 struct extent_node
*en
;
185 struct extent_info ei
;
187 if (!f2fs_may_extent_tree(inode
)) {
188 /* drop largest extent */
189 if (i_ext
&& i_ext
->len
) {
196 et
= __grab_extent_tree(inode
);
198 if (!i_ext
|| !i_ext
->len
)
201 get_extent_info(&ei
, i_ext
);
203 write_lock(&et
->lock
);
204 if (atomic_read(&et
->node_cnt
))
207 en
= __init_extent_tree(sbi
, et
, &ei
);
209 spin_lock(&sbi
->extent_lock
);
210 list_add_tail(&en
->list
, &sbi
->extent_list
);
211 spin_unlock(&sbi
->extent_lock
);
214 write_unlock(&et
->lock
);
218 bool f2fs_init_extent_tree(struct inode
*inode
, struct f2fs_extent
*i_ext
)
220 bool ret
= __f2fs_init_extent_tree(inode
, i_ext
);
222 if (!F2FS_I(inode
)->extent_tree
)
223 set_inode_flag(inode
, FI_NO_EXTENT
);
228 static bool f2fs_lookup_extent_tree(struct inode
*inode
, pgoff_t pgofs
,
229 struct extent_info
*ei
)
231 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
232 struct extent_tree
*et
= F2FS_I(inode
)->extent_tree
;
233 struct extent_node
*en
;
236 f2fs_bug_on(sbi
, !et
);
238 trace_f2fs_lookup_extent_tree_start(inode
, pgofs
);
240 read_lock(&et
->lock
);
242 if (et
->largest
.fofs
<= pgofs
&&
243 et
->largest
.fofs
+ et
->largest
.len
> pgofs
) {
246 stat_inc_largest_node_hit(sbi
);
250 en
= __lookup_extent_tree(sbi
, et
, pgofs
);
253 spin_lock(&sbi
->extent_lock
);
254 if (!list_empty(&en
->list
)) {
255 list_move_tail(&en
->list
, &sbi
->extent_list
);
258 spin_unlock(&sbi
->extent_lock
);
262 stat_inc_total_hit(sbi
);
263 read_unlock(&et
->lock
);
265 trace_f2fs_lookup_extent_tree_end(inode
, pgofs
, ei
);
271 * lookup extent at @fofs, if hit, return the extent
272 * if not, return NULL and
273 * @prev_ex: extent before fofs
274 * @next_ex: extent after fofs
275 * @insert_p: insert point for new extent at fofs
276 * in order to simpfy the insertion after.
277 * tree must stay unchanged between lookup and insertion.
279 static struct extent_node
*__lookup_extent_tree_ret(struct extent_tree
*et
,
281 struct extent_node
**prev_ex
,
282 struct extent_node
**next_ex
,
283 struct rb_node
***insert_p
,
284 struct rb_node
**insert_parent
)
286 struct rb_node
**pnode
= &et
->root
.rb_node
;
287 struct rb_node
*parent
= NULL
, *tmp_node
;
288 struct extent_node
*en
= et
->cached_en
;
291 *insert_parent
= NULL
;
295 if (RB_EMPTY_ROOT(&et
->root
))
299 struct extent_info
*cei
= &en
->ei
;
301 if (cei
->fofs
<= fofs
&& cei
->fofs
+ cei
->len
> fofs
)
302 goto lookup_neighbors
;
307 en
= rb_entry(*pnode
, struct extent_node
, rb_node
);
309 if (fofs
< en
->ei
.fofs
)
310 pnode
= &(*pnode
)->rb_left
;
311 else if (fofs
>= en
->ei
.fofs
+ en
->ei
.len
)
312 pnode
= &(*pnode
)->rb_right
;
314 goto lookup_neighbors
;
318 *insert_parent
= parent
;
320 en
= rb_entry(parent
, struct extent_node
, rb_node
);
322 if (parent
&& fofs
> en
->ei
.fofs
)
323 tmp_node
= rb_next(parent
);
324 *next_ex
= tmp_node
?
325 rb_entry(tmp_node
, struct extent_node
, rb_node
) : NULL
;
328 if (parent
&& fofs
< en
->ei
.fofs
)
329 tmp_node
= rb_prev(parent
);
330 *prev_ex
= tmp_node
?
331 rb_entry(tmp_node
, struct extent_node
, rb_node
) : NULL
;
335 if (fofs
== en
->ei
.fofs
) {
336 /* lookup prev node for merging backward later */
337 tmp_node
= rb_prev(&en
->rb_node
);
338 *prev_ex
= tmp_node
?
339 rb_entry(tmp_node
, struct extent_node
, rb_node
) : NULL
;
341 if (fofs
== en
->ei
.fofs
+ en
->ei
.len
- 1) {
342 /* lookup next node for merging frontward later */
343 tmp_node
= rb_next(&en
->rb_node
);
344 *next_ex
= tmp_node
?
345 rb_entry(tmp_node
, struct extent_node
, rb_node
) : NULL
;
350 static struct extent_node
*__try_merge_extent_node(struct inode
*inode
,
351 struct extent_tree
*et
, struct extent_info
*ei
,
352 struct extent_node
*prev_ex
,
353 struct extent_node
*next_ex
)
355 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
356 struct extent_node
*en
= NULL
;
358 if (prev_ex
&& __is_back_mergeable(ei
, &prev_ex
->ei
)) {
359 prev_ex
->ei
.len
+= ei
->len
;
364 if (next_ex
&& __is_front_mergeable(ei
, &next_ex
->ei
)) {
365 next_ex
->ei
.fofs
= ei
->fofs
;
366 next_ex
->ei
.blk
= ei
->blk
;
367 next_ex
->ei
.len
+= ei
->len
;
369 __release_extent_node(sbi
, et
, prev_ex
);
377 __try_update_largest_extent(inode
, et
, en
);
379 spin_lock(&sbi
->extent_lock
);
380 if (!list_empty(&en
->list
)) {
381 list_move_tail(&en
->list
, &sbi
->extent_list
);
384 spin_unlock(&sbi
->extent_lock
);
388 static struct extent_node
*__insert_extent_tree(struct inode
*inode
,
389 struct extent_tree
*et
, struct extent_info
*ei
,
390 struct rb_node
**insert_p
,
391 struct rb_node
*insert_parent
)
393 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
394 struct rb_node
**p
= &et
->root
.rb_node
;
395 struct rb_node
*parent
= NULL
;
396 struct extent_node
*en
= NULL
;
398 if (insert_p
&& insert_parent
) {
399 parent
= insert_parent
;
406 en
= rb_entry(parent
, struct extent_node
, rb_node
);
408 if (ei
->fofs
< en
->ei
.fofs
)
410 else if (ei
->fofs
>= en
->ei
.fofs
+ en
->ei
.len
)
416 en
= __attach_extent_node(sbi
, et
, ei
, parent
, p
);
420 __try_update_largest_extent(inode
, et
, en
);
422 /* update in global extent list */
423 spin_lock(&sbi
->extent_lock
);
424 list_add_tail(&en
->list
, &sbi
->extent_list
);
426 spin_unlock(&sbi
->extent_lock
);
430 static unsigned int f2fs_update_extent_tree_range(struct inode
*inode
,
431 pgoff_t fofs
, block_t blkaddr
, unsigned int len
)
433 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
434 struct extent_tree
*et
= F2FS_I(inode
)->extent_tree
;
435 struct extent_node
*en
= NULL
, *en1
= NULL
;
436 struct extent_node
*prev_en
= NULL
, *next_en
= NULL
;
437 struct extent_info ei
, dei
, prev
;
438 struct rb_node
**insert_p
= NULL
, *insert_parent
= NULL
;
439 unsigned int end
= fofs
+ len
;
440 unsigned int pos
= (unsigned int)fofs
;
445 trace_f2fs_update_extent_tree_range(inode
, fofs
, blkaddr
, len
);
447 write_lock(&et
->lock
);
449 if (is_inode_flag_set(inode
, FI_NO_EXTENT
)) {
450 write_unlock(&et
->lock
);
458 * drop largest extent before lookup, in case it's already
459 * been shrunk from extent tree
461 __drop_largest_extent(inode
, fofs
, len
);
463 /* 1. lookup first extent node in range [fofs, fofs + len - 1] */
464 en
= __lookup_extent_tree_ret(et
, fofs
, &prev_en
, &next_en
,
465 &insert_p
, &insert_parent
);
469 /* 2. invlidate all extent nodes in range [fofs, fofs + len - 1] */
470 while (en
&& en
->ei
.fofs
< end
) {
471 unsigned int org_end
;
472 int parts
= 0; /* # of parts current extent split into */
474 next_en
= en1
= NULL
;
477 org_end
= dei
.fofs
+ dei
.len
;
478 f2fs_bug_on(sbi
, pos
>= org_end
);
480 if (pos
> dei
.fofs
&& pos
- dei
.fofs
>= F2FS_MIN_EXTENT_LEN
) {
481 en
->ei
.len
= pos
- en
->ei
.fofs
;
486 if (end
< org_end
&& org_end
- end
>= F2FS_MIN_EXTENT_LEN
) {
488 set_extent_info(&ei
, end
,
489 end
- dei
.fofs
+ dei
.blk
,
491 en1
= __insert_extent_tree(inode
, et
, &ei
,
496 en
->ei
.blk
+= end
- dei
.fofs
;
497 en
->ei
.len
-= end
- dei
.fofs
;
504 struct rb_node
*node
= rb_next(&en
->rb_node
);
507 rb_entry(node
, struct extent_node
, rb_node
)
512 __try_update_largest_extent(inode
, et
, en
);
514 __release_extent_node(sbi
, et
, en
);
517 * if original extent is split into zero or two parts, extent
518 * tree has been altered by deletion or insertion, therefore
519 * invalidate pointers regard to tree.
523 insert_parent
= NULL
;
528 /* 3. update extent in extent cache */
531 set_extent_info(&ei
, fofs
, blkaddr
, len
);
532 if (!__try_merge_extent_node(inode
, et
, &ei
, prev_en
, next_en
))
533 __insert_extent_tree(inode
, et
, &ei
,
534 insert_p
, insert_parent
);
536 /* give up extent_cache, if split and small updates happen */
538 prev
.len
< F2FS_MIN_EXTENT_LEN
&&
539 et
->largest
.len
< F2FS_MIN_EXTENT_LEN
) {
540 __drop_largest_extent(inode
, 0, UINT_MAX
);
541 set_inode_flag(inode
, FI_NO_EXTENT
);
545 if (is_inode_flag_set(inode
, FI_NO_EXTENT
))
546 __free_extent_tree(sbi
, et
);
548 write_unlock(&et
->lock
);
550 return !__is_extent_same(&prev
, &et
->largest
);
553 unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info
*sbi
, int nr_shrink
)
555 struct extent_tree
*et
, *next
;
556 struct extent_node
*en
;
557 unsigned int node_cnt
= 0, tree_cnt
= 0;
560 if (!test_opt(sbi
, EXTENT_CACHE
))
563 if (!atomic_read(&sbi
->total_zombie_tree
))
566 if (!down_write_trylock(&sbi
->extent_tree_lock
))
569 /* 1. remove unreferenced extent tree */
570 list_for_each_entry_safe(et
, next
, &sbi
->zombie_list
, list
) {
571 if (atomic_read(&et
->node_cnt
)) {
572 write_lock(&et
->lock
);
573 node_cnt
+= __free_extent_tree(sbi
, et
);
574 write_unlock(&et
->lock
);
576 f2fs_bug_on(sbi
, atomic_read(&et
->node_cnt
));
577 list_del_init(&et
->list
);
578 radix_tree_delete(&sbi
->extent_tree_root
, et
->ino
);
579 kmem_cache_free(extent_tree_slab
, et
);
580 atomic_dec(&sbi
->total_ext_tree
);
581 atomic_dec(&sbi
->total_zombie_tree
);
584 if (node_cnt
+ tree_cnt
>= nr_shrink
)
588 up_write(&sbi
->extent_tree_lock
);
591 /* 2. remove LRU extent entries */
592 if (!down_write_trylock(&sbi
->extent_tree_lock
))
595 remained
= nr_shrink
- (node_cnt
+ tree_cnt
);
597 spin_lock(&sbi
->extent_lock
);
598 for (; remained
> 0; remained
--) {
599 if (list_empty(&sbi
->extent_list
))
601 en
= list_first_entry(&sbi
->extent_list
,
602 struct extent_node
, list
);
604 if (!write_trylock(&et
->lock
)) {
605 /* refresh this extent node's position in extent list */
606 list_move_tail(&en
->list
, &sbi
->extent_list
);
610 list_del_init(&en
->list
);
611 spin_unlock(&sbi
->extent_lock
);
613 __detach_extent_node(sbi
, et
, en
);
615 write_unlock(&et
->lock
);
617 spin_lock(&sbi
->extent_lock
);
619 spin_unlock(&sbi
->extent_lock
);
622 up_write(&sbi
->extent_tree_lock
);
624 trace_f2fs_shrink_extent_tree(sbi
, node_cnt
, tree_cnt
);
626 return node_cnt
+ tree_cnt
;
629 unsigned int f2fs_destroy_extent_node(struct inode
*inode
)
631 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
632 struct extent_tree
*et
= F2FS_I(inode
)->extent_tree
;
633 unsigned int node_cnt
= 0;
635 if (!et
|| !atomic_read(&et
->node_cnt
))
638 write_lock(&et
->lock
);
639 node_cnt
= __free_extent_tree(sbi
, et
);
640 write_unlock(&et
->lock
);
645 void f2fs_drop_extent_tree(struct inode
*inode
)
647 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
648 struct extent_tree
*et
= F2FS_I(inode
)->extent_tree
;
650 if (!f2fs_may_extent_tree(inode
))
653 set_inode_flag(inode
, FI_NO_EXTENT
);
655 write_lock(&et
->lock
);
656 __free_extent_tree(sbi
, et
);
657 __drop_largest_extent(inode
, 0, UINT_MAX
);
658 write_unlock(&et
->lock
);
661 void f2fs_destroy_extent_tree(struct inode
*inode
)
663 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
664 struct extent_tree
*et
= F2FS_I(inode
)->extent_tree
;
665 unsigned int node_cnt
= 0;
670 if (inode
->i_nlink
&& !is_bad_inode(inode
) &&
671 atomic_read(&et
->node_cnt
)) {
672 down_write(&sbi
->extent_tree_lock
);
673 list_add_tail(&et
->list
, &sbi
->zombie_list
);
674 atomic_inc(&sbi
->total_zombie_tree
);
675 up_write(&sbi
->extent_tree_lock
);
679 /* free all extent info belong to this extent tree */
680 node_cnt
= f2fs_destroy_extent_node(inode
);
682 /* delete extent tree entry in radix tree */
683 down_write(&sbi
->extent_tree_lock
);
684 f2fs_bug_on(sbi
, atomic_read(&et
->node_cnt
));
685 radix_tree_delete(&sbi
->extent_tree_root
, inode
->i_ino
);
686 kmem_cache_free(extent_tree_slab
, et
);
687 atomic_dec(&sbi
->total_ext_tree
);
688 up_write(&sbi
->extent_tree_lock
);
690 F2FS_I(inode
)->extent_tree
= NULL
;
692 trace_f2fs_destroy_extent_tree(inode
, node_cnt
);
695 bool f2fs_lookup_extent_cache(struct inode
*inode
, pgoff_t pgofs
,
696 struct extent_info
*ei
)
698 if (!f2fs_may_extent_tree(inode
))
701 return f2fs_lookup_extent_tree(inode
, pgofs
, ei
);
704 void f2fs_update_extent_cache(struct dnode_of_data
*dn
)
709 if (!f2fs_may_extent_tree(dn
->inode
))
712 if (dn
->data_blkaddr
== NEW_ADDR
)
715 blkaddr
= dn
->data_blkaddr
;
717 fofs
= start_bidx_of_node(ofs_of_node(dn
->node_page
), dn
->inode
) +
719 f2fs_update_extent_tree_range(dn
->inode
, fofs
, blkaddr
, 1);
722 void f2fs_update_extent_cache_range(struct dnode_of_data
*dn
,
723 pgoff_t fofs
, block_t blkaddr
, unsigned int len
)
726 if (!f2fs_may_extent_tree(dn
->inode
))
729 f2fs_update_extent_tree_range(dn
->inode
, fofs
, blkaddr
, len
);
732 void init_extent_cache_info(struct f2fs_sb_info
*sbi
)
734 INIT_RADIX_TREE(&sbi
->extent_tree_root
, GFP_NOIO
);
735 init_rwsem(&sbi
->extent_tree_lock
);
736 INIT_LIST_HEAD(&sbi
->extent_list
);
737 spin_lock_init(&sbi
->extent_lock
);
738 atomic_set(&sbi
->total_ext_tree
, 0);
739 INIT_LIST_HEAD(&sbi
->zombie_list
);
740 atomic_set(&sbi
->total_zombie_tree
, 0);
741 atomic_set(&sbi
->total_ext_node
, 0);
744 int __init
create_extent_cache(void)
746 extent_tree_slab
= f2fs_kmem_cache_create("f2fs_extent_tree",
747 sizeof(struct extent_tree
));
748 if (!extent_tree_slab
)
750 extent_node_slab
= f2fs_kmem_cache_create("f2fs_extent_node",
751 sizeof(struct extent_node
));
752 if (!extent_node_slab
) {
753 kmem_cache_destroy(extent_tree_slab
);
759 void destroy_extent_cache(void)
761 kmem_cache_destroy(extent_node_slab
);
762 kmem_cache_destroy(extent_tree_slab
);