4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
12 #include <linux/f2fs_fs.h>
13 #include <linux/mpage.h>
14 #include <linux/backing-dev.h>
15 #include <linux/blkdev.h>
16 #include <linux/pagevec.h>
17 #include <linux/swap.h>
24 #include <trace/events/f2fs.h>
26 #define on_f2fs_build_free_nids(nmi) mutex_is_locked(&(nm_i)->build_lock)
28 static struct kmem_cache
*nat_entry_slab
;
29 static struct kmem_cache
*free_nid_slab
;
30 static struct kmem_cache
*nat_entry_set_slab
;
31 static struct kmem_cache
*fsync_node_entry_slab
;
34 * Check whether the given nid is within node id range.
36 int f2fs_check_nid_range(struct f2fs_sb_info
*sbi
, nid_t nid
)
38 if (unlikely(nid
< F2FS_ROOT_INO(sbi
) || nid
>= NM_I(sbi
)->max_nid
)) {
39 set_sbi_flag(sbi
, SBI_NEED_FSCK
);
40 f2fs_msg(sbi
->sb
, KERN_WARNING
,
41 "%s: out-of-range nid=%x, run fsck to fix.",
48 bool f2fs_available_free_memory(struct f2fs_sb_info
*sbi
, int type
)
50 struct f2fs_nm_info
*nm_i
= NM_I(sbi
);
52 unsigned long avail_ram
;
53 unsigned long mem_size
= 0;
58 /* only uses low memory */
59 avail_ram
= val
.totalram
- val
.totalhigh
;
62 * give 25%, 25%, 50%, 50%, 50% memory for each components respectively
64 if (type
== FREE_NIDS
) {
65 mem_size
= (nm_i
->nid_cnt
[FREE_NID
] *
66 sizeof(struct free_nid
)) >> PAGE_SHIFT
;
67 res
= mem_size
< ((avail_ram
* nm_i
->ram_thresh
/ 100) >> 2);
68 } else if (type
== NAT_ENTRIES
) {
69 mem_size
= (nm_i
->nat_cnt
* sizeof(struct nat_entry
)) >>
71 res
= mem_size
< ((avail_ram
* nm_i
->ram_thresh
/ 100) >> 2);
72 if (excess_cached_nats(sbi
))
74 } else if (type
== DIRTY_DENTS
) {
75 if (sbi
->sb
->s_bdi
->wb
.dirty_exceeded
)
77 mem_size
= get_pages(sbi
, F2FS_DIRTY_DENTS
);
78 res
= mem_size
< ((avail_ram
* nm_i
->ram_thresh
/ 100) >> 1);
79 } else if (type
== INO_ENTRIES
) {
82 for (i
= 0; i
< MAX_INO_ENTRY
; i
++)
83 mem_size
+= sbi
->im
[i
].ino_num
*
84 sizeof(struct ino_entry
);
85 mem_size
>>= PAGE_SHIFT
;
86 res
= mem_size
< ((avail_ram
* nm_i
->ram_thresh
/ 100) >> 1);
87 } else if (type
== EXTENT_CACHE
) {
88 mem_size
= (atomic_read(&sbi
->total_ext_tree
) *
89 sizeof(struct extent_tree
) +
90 atomic_read(&sbi
->total_ext_node
) *
91 sizeof(struct extent_node
)) >> PAGE_SHIFT
;
92 res
= mem_size
< ((avail_ram
* nm_i
->ram_thresh
/ 100) >> 1);
93 } else if (type
== INMEM_PAGES
) {
94 /* it allows 20% / total_ram for inmemory pages */
95 mem_size
= get_pages(sbi
, F2FS_INMEM_PAGES
);
96 res
= mem_size
< (val
.totalram
/ 5);
98 if (!sbi
->sb
->s_bdi
->wb
.dirty_exceeded
)
104 static void clear_node_page_dirty(struct page
*page
)
106 if (PageDirty(page
)) {
107 f2fs_clear_radix_tree_dirty_tag(page
);
108 clear_page_dirty_for_io(page
);
109 dec_page_count(F2FS_P_SB(page
), F2FS_DIRTY_NODES
);
111 ClearPageUptodate(page
);
114 static struct page
*get_current_nat_page(struct f2fs_sb_info
*sbi
, nid_t nid
)
116 return f2fs_get_meta_page_nofail(sbi
, current_nat_addr(sbi
, nid
));
119 static struct page
*get_next_nat_page(struct f2fs_sb_info
*sbi
, nid_t nid
)
121 struct page
*src_page
;
122 struct page
*dst_page
;
126 struct f2fs_nm_info
*nm_i
= NM_I(sbi
);
128 dst_off
= next_nat_addr(sbi
, current_nat_addr(sbi
, nid
));
130 /* get current nat block page with lock */
131 src_page
= get_current_nat_page(sbi
, nid
);
132 dst_page
= f2fs_grab_meta_page(sbi
, dst_off
);
133 f2fs_bug_on(sbi
, PageDirty(src_page
));
135 src_addr
= page_address(src_page
);
136 dst_addr
= page_address(dst_page
);
137 memcpy(dst_addr
, src_addr
, PAGE_SIZE
);
138 set_page_dirty(dst_page
);
139 f2fs_put_page(src_page
, 1);
141 set_to_next_nat(nm_i
, nid
);
146 static struct nat_entry
*__alloc_nat_entry(nid_t nid
, bool no_fail
)
148 struct nat_entry
*new;
151 new = f2fs_kmem_cache_alloc(nat_entry_slab
, GFP_F2FS_ZERO
);
153 new = kmem_cache_alloc(nat_entry_slab
, GFP_F2FS_ZERO
);
155 nat_set_nid(new, nid
);
161 static void __free_nat_entry(struct nat_entry
*e
)
163 kmem_cache_free(nat_entry_slab
, e
);
166 /* must be locked by nat_tree_lock */
167 static struct nat_entry
*__init_nat_entry(struct f2fs_nm_info
*nm_i
,
168 struct nat_entry
*ne
, struct f2fs_nat_entry
*raw_ne
, bool no_fail
)
171 f2fs_radix_tree_insert(&nm_i
->nat_root
, nat_get_nid(ne
), ne
);
172 else if (radix_tree_insert(&nm_i
->nat_root
, nat_get_nid(ne
), ne
))
176 node_info_from_raw_nat(&ne
->ni
, raw_ne
);
178 spin_lock(&nm_i
->nat_list_lock
);
179 list_add_tail(&ne
->list
, &nm_i
->nat_entries
);
180 spin_unlock(&nm_i
->nat_list_lock
);
186 static struct nat_entry
*__lookup_nat_cache(struct f2fs_nm_info
*nm_i
, nid_t n
)
188 struct nat_entry
*ne
;
190 ne
= radix_tree_lookup(&nm_i
->nat_root
, n
);
192 /* for recent accessed nat entry, move it to tail of lru list */
193 if (ne
&& !get_nat_flag(ne
, IS_DIRTY
)) {
194 spin_lock(&nm_i
->nat_list_lock
);
195 if (!list_empty(&ne
->list
))
196 list_move_tail(&ne
->list
, &nm_i
->nat_entries
);
197 spin_unlock(&nm_i
->nat_list_lock
);
203 static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info
*nm_i
,
204 nid_t start
, unsigned int nr
, struct nat_entry
**ep
)
206 return radix_tree_gang_lookup(&nm_i
->nat_root
, (void **)ep
, start
, nr
);
209 static void __del_from_nat_cache(struct f2fs_nm_info
*nm_i
, struct nat_entry
*e
)
211 radix_tree_delete(&nm_i
->nat_root
, nat_get_nid(e
));
216 static struct nat_entry_set
*__grab_nat_entry_set(struct f2fs_nm_info
*nm_i
,
217 struct nat_entry
*ne
)
219 nid_t set
= NAT_BLOCK_OFFSET(ne
->ni
.nid
);
220 struct nat_entry_set
*head
;
222 head
= radix_tree_lookup(&nm_i
->nat_set_root
, set
);
224 head
= f2fs_kmem_cache_alloc(nat_entry_set_slab
, GFP_NOFS
);
226 INIT_LIST_HEAD(&head
->entry_list
);
227 INIT_LIST_HEAD(&head
->set_list
);
230 f2fs_radix_tree_insert(&nm_i
->nat_set_root
, set
, head
);
235 static void __set_nat_cache_dirty(struct f2fs_nm_info
*nm_i
,
236 struct nat_entry
*ne
)
238 struct nat_entry_set
*head
;
239 bool new_ne
= nat_get_blkaddr(ne
) == NEW_ADDR
;
242 head
= __grab_nat_entry_set(nm_i
, ne
);
245 * update entry_cnt in below condition:
246 * 1. update NEW_ADDR to valid block address;
247 * 2. update old block address to new one;
249 if (!new_ne
&& (get_nat_flag(ne
, IS_PREALLOC
) ||
250 !get_nat_flag(ne
, IS_DIRTY
)))
253 set_nat_flag(ne
, IS_PREALLOC
, new_ne
);
255 if (get_nat_flag(ne
, IS_DIRTY
))
258 nm_i
->dirty_nat_cnt
++;
259 set_nat_flag(ne
, IS_DIRTY
, true);
261 spin_lock(&nm_i
->nat_list_lock
);
263 list_del_init(&ne
->list
);
265 list_move_tail(&ne
->list
, &head
->entry_list
);
266 spin_unlock(&nm_i
->nat_list_lock
);
269 static void __clear_nat_cache_dirty(struct f2fs_nm_info
*nm_i
,
270 struct nat_entry_set
*set
, struct nat_entry
*ne
)
272 spin_lock(&nm_i
->nat_list_lock
);
273 list_move_tail(&ne
->list
, &nm_i
->nat_entries
);
274 spin_unlock(&nm_i
->nat_list_lock
);
276 set_nat_flag(ne
, IS_DIRTY
, false);
278 nm_i
->dirty_nat_cnt
--;
281 static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info
*nm_i
,
282 nid_t start
, unsigned int nr
, struct nat_entry_set
**ep
)
284 return radix_tree_gang_lookup(&nm_i
->nat_set_root
, (void **)ep
,
288 bool f2fs_in_warm_node_list(struct f2fs_sb_info
*sbi
, struct page
*page
)
290 return NODE_MAPPING(sbi
) == page
->mapping
&&
291 IS_DNODE(page
) && is_cold_node(page
);
294 void f2fs_init_fsync_node_info(struct f2fs_sb_info
*sbi
)
296 spin_lock_init(&sbi
->fsync_node_lock
);
297 INIT_LIST_HEAD(&sbi
->fsync_node_list
);
298 sbi
->fsync_seg_id
= 0;
299 sbi
->fsync_node_num
= 0;
302 static unsigned int f2fs_add_fsync_node_entry(struct f2fs_sb_info
*sbi
,
305 struct fsync_node_entry
*fn
;
309 fn
= f2fs_kmem_cache_alloc(fsync_node_entry_slab
, GFP_NOFS
);
313 INIT_LIST_HEAD(&fn
->list
);
315 spin_lock_irqsave(&sbi
->fsync_node_lock
, flags
);
316 list_add_tail(&fn
->list
, &sbi
->fsync_node_list
);
317 fn
->seq_id
= sbi
->fsync_seg_id
++;
319 sbi
->fsync_node_num
++;
320 spin_unlock_irqrestore(&sbi
->fsync_node_lock
, flags
);
325 void f2fs_del_fsync_node_entry(struct f2fs_sb_info
*sbi
, struct page
*page
)
327 struct fsync_node_entry
*fn
;
330 spin_lock_irqsave(&sbi
->fsync_node_lock
, flags
);
331 list_for_each_entry(fn
, &sbi
->fsync_node_list
, list
) {
332 if (fn
->page
== page
) {
334 sbi
->fsync_node_num
--;
335 spin_unlock_irqrestore(&sbi
->fsync_node_lock
, flags
);
336 kmem_cache_free(fsync_node_entry_slab
, fn
);
341 spin_unlock_irqrestore(&sbi
->fsync_node_lock
, flags
);
345 void f2fs_reset_fsync_node_info(struct f2fs_sb_info
*sbi
)
349 spin_lock_irqsave(&sbi
->fsync_node_lock
, flags
);
350 sbi
->fsync_seg_id
= 0;
351 spin_unlock_irqrestore(&sbi
->fsync_node_lock
, flags
);
354 int f2fs_need_dentry_mark(struct f2fs_sb_info
*sbi
, nid_t nid
)
356 struct f2fs_nm_info
*nm_i
= NM_I(sbi
);
360 down_read(&nm_i
->nat_tree_lock
);
361 e
= __lookup_nat_cache(nm_i
, nid
);
363 if (!get_nat_flag(e
, IS_CHECKPOINTED
) &&
364 !get_nat_flag(e
, HAS_FSYNCED_INODE
))
367 up_read(&nm_i
->nat_tree_lock
);
371 bool f2fs_is_checkpointed_node(struct f2fs_sb_info
*sbi
, nid_t nid
)
373 struct f2fs_nm_info
*nm_i
= NM_I(sbi
);
377 down_read(&nm_i
->nat_tree_lock
);
378 e
= __lookup_nat_cache(nm_i
, nid
);
379 if (e
&& !get_nat_flag(e
, IS_CHECKPOINTED
))
381 up_read(&nm_i
->nat_tree_lock
);
385 bool f2fs_need_inode_block_update(struct f2fs_sb_info
*sbi
, nid_t ino
)
387 struct f2fs_nm_info
*nm_i
= NM_I(sbi
);
389 bool need_update
= true;
391 down_read(&nm_i
->nat_tree_lock
);
392 e
= __lookup_nat_cache(nm_i
, ino
);
393 if (e
&& get_nat_flag(e
, HAS_LAST_FSYNC
) &&
394 (get_nat_flag(e
, IS_CHECKPOINTED
) ||
395 get_nat_flag(e
, HAS_FSYNCED_INODE
)))
397 up_read(&nm_i
->nat_tree_lock
);
401 /* must be locked by nat_tree_lock */
402 static void cache_nat_entry(struct f2fs_sb_info
*sbi
, nid_t nid
,
403 struct f2fs_nat_entry
*ne
)
405 struct f2fs_nm_info
*nm_i
= NM_I(sbi
);
406 struct nat_entry
*new, *e
;
408 new = __alloc_nat_entry(nid
, false);
412 down_write(&nm_i
->nat_tree_lock
);
413 e
= __lookup_nat_cache(nm_i
, nid
);
415 e
= __init_nat_entry(nm_i
, new, ne
, false);
417 f2fs_bug_on(sbi
, nat_get_ino(e
) != le32_to_cpu(ne
->ino
) ||
418 nat_get_blkaddr(e
) !=
419 le32_to_cpu(ne
->block_addr
) ||
420 nat_get_version(e
) != ne
->version
);
421 up_write(&nm_i
->nat_tree_lock
);
423 __free_nat_entry(new);
426 static void set_node_addr(struct f2fs_sb_info
*sbi
, struct node_info
*ni
,
427 block_t new_blkaddr
, bool fsync_done
)
429 struct f2fs_nm_info
*nm_i
= NM_I(sbi
);
431 struct nat_entry
*new = __alloc_nat_entry(ni
->nid
, true);
433 down_write(&nm_i
->nat_tree_lock
);
434 e
= __lookup_nat_cache(nm_i
, ni
->nid
);
436 e
= __init_nat_entry(nm_i
, new, NULL
, true);
437 copy_node_info(&e
->ni
, ni
);
438 f2fs_bug_on(sbi
, ni
->blk_addr
== NEW_ADDR
);
439 } else if (new_blkaddr
== NEW_ADDR
) {
441 * when nid is reallocated,
442 * previous nat entry can be remained in nat cache.
443 * So, reinitialize it with new information.
445 copy_node_info(&e
->ni
, ni
);
446 f2fs_bug_on(sbi
, ni
->blk_addr
!= NULL_ADDR
);
448 /* let's free early to reduce memory consumption */
450 __free_nat_entry(new);
453 f2fs_bug_on(sbi
, nat_get_blkaddr(e
) != ni
->blk_addr
);
454 f2fs_bug_on(sbi
, nat_get_blkaddr(e
) == NULL_ADDR
&&
455 new_blkaddr
== NULL_ADDR
);
456 f2fs_bug_on(sbi
, nat_get_blkaddr(e
) == NEW_ADDR
&&
457 new_blkaddr
== NEW_ADDR
);
458 f2fs_bug_on(sbi
, is_valid_data_blkaddr(sbi
, nat_get_blkaddr(e
)) &&
459 new_blkaddr
== NEW_ADDR
);
461 /* increment version no as node is removed */
462 if (nat_get_blkaddr(e
) != NEW_ADDR
&& new_blkaddr
== NULL_ADDR
) {
463 unsigned char version
= nat_get_version(e
);
464 nat_set_version(e
, inc_node_version(version
));
468 nat_set_blkaddr(e
, new_blkaddr
);
469 if (!is_valid_data_blkaddr(sbi
, new_blkaddr
))
470 set_nat_flag(e
, IS_CHECKPOINTED
, false);
471 __set_nat_cache_dirty(nm_i
, e
);
473 /* update fsync_mark if its inode nat entry is still alive */
474 if (ni
->nid
!= ni
->ino
)
475 e
= __lookup_nat_cache(nm_i
, ni
->ino
);
477 if (fsync_done
&& ni
->nid
== ni
->ino
)
478 set_nat_flag(e
, HAS_FSYNCED_INODE
, true);
479 set_nat_flag(e
, HAS_LAST_FSYNC
, fsync_done
);
481 up_write(&nm_i
->nat_tree_lock
);
484 int f2fs_try_to_free_nats(struct f2fs_sb_info
*sbi
, int nr_shrink
)
486 struct f2fs_nm_info
*nm_i
= NM_I(sbi
);
489 if (!down_write_trylock(&nm_i
->nat_tree_lock
))
492 spin_lock(&nm_i
->nat_list_lock
);
494 struct nat_entry
*ne
;
496 if (list_empty(&nm_i
->nat_entries
))
499 ne
= list_first_entry(&nm_i
->nat_entries
,
500 struct nat_entry
, list
);
502 spin_unlock(&nm_i
->nat_list_lock
);
504 __del_from_nat_cache(nm_i
, ne
);
507 spin_lock(&nm_i
->nat_list_lock
);
509 spin_unlock(&nm_i
->nat_list_lock
);
511 up_write(&nm_i
->nat_tree_lock
);
512 return nr
- nr_shrink
;
516 * This function always returns success
518 int f2fs_get_node_info(struct f2fs_sb_info
*sbi
, nid_t nid
,
519 struct node_info
*ni
)
521 struct f2fs_nm_info
*nm_i
= NM_I(sbi
);
522 struct curseg_info
*curseg
= CURSEG_I(sbi
, CURSEG_HOT_DATA
);
523 struct f2fs_journal
*journal
= curseg
->journal
;
524 nid_t start_nid
= START_NID(nid
);
525 struct f2fs_nat_block
*nat_blk
;
526 struct page
*page
= NULL
;
527 struct f2fs_nat_entry ne
;
534 /* Check nat cache */
535 down_read(&nm_i
->nat_tree_lock
);
536 e
= __lookup_nat_cache(nm_i
, nid
);
538 ni
->ino
= nat_get_ino(e
);
539 ni
->blk_addr
= nat_get_blkaddr(e
);
540 ni
->version
= nat_get_version(e
);
541 up_read(&nm_i
->nat_tree_lock
);
545 memset(&ne
, 0, sizeof(struct f2fs_nat_entry
));
547 /* Check current segment summary */
548 down_read(&curseg
->journal_rwsem
);
549 i
= f2fs_lookup_journal_in_cursum(journal
, NAT_JOURNAL
, nid
, 0);
551 ne
= nat_in_journal(journal
, i
);
552 node_info_from_raw_nat(ni
, &ne
);
554 up_read(&curseg
->journal_rwsem
);
556 up_read(&nm_i
->nat_tree_lock
);
560 /* Fill node_info from nat page */
561 index
= current_nat_addr(sbi
, nid
);
562 up_read(&nm_i
->nat_tree_lock
);
564 page
= f2fs_get_meta_page(sbi
, index
);
566 return PTR_ERR(page
);
568 nat_blk
= (struct f2fs_nat_block
*)page_address(page
);
569 ne
= nat_blk
->entries
[nid
- start_nid
];
570 node_info_from_raw_nat(ni
, &ne
);
571 f2fs_put_page(page
, 1);
573 /* cache nat entry */
574 cache_nat_entry(sbi
, nid
, &ne
);
579 * readahead MAX_RA_NODE number of node pages.
581 static void f2fs_ra_node_pages(struct page
*parent
, int start
, int n
)
583 struct f2fs_sb_info
*sbi
= F2FS_P_SB(parent
);
584 struct blk_plug plug
;
588 blk_start_plug(&plug
);
590 /* Then, try readahead for siblings of the desired node */
592 end
= min(end
, NIDS_PER_BLOCK
);
593 for (i
= start
; i
< end
; i
++) {
594 nid
= get_nid(parent
, i
, false);
595 f2fs_ra_node_page(sbi
, nid
);
598 blk_finish_plug(&plug
);
601 pgoff_t
f2fs_get_next_page_offset(struct dnode_of_data
*dn
, pgoff_t pgofs
)
603 const long direct_index
= ADDRS_PER_INODE(dn
->inode
);
604 const long direct_blks
= ADDRS_PER_BLOCK
;
605 const long indirect_blks
= ADDRS_PER_BLOCK
* NIDS_PER_BLOCK
;
606 unsigned int skipped_unit
= ADDRS_PER_BLOCK
;
607 int cur_level
= dn
->cur_level
;
608 int max_level
= dn
->max_level
;
614 while (max_level
-- > cur_level
)
615 skipped_unit
*= NIDS_PER_BLOCK
;
617 switch (dn
->max_level
) {
619 base
+= 2 * indirect_blks
;
621 base
+= 2 * direct_blks
;
623 base
+= direct_index
;
626 f2fs_bug_on(F2FS_I_SB(dn
->inode
), 1);
629 return ((pgofs
- base
) / skipped_unit
+ 1) * skipped_unit
+ base
;
633 * The maximum depth is four.
634 * Offset[0] will have raw inode offset.
636 static int get_node_path(struct inode
*inode
, long block
,
637 int offset
[4], unsigned int noffset
[4])
639 const long direct_index
= ADDRS_PER_INODE(inode
);
640 const long direct_blks
= ADDRS_PER_BLOCK
;
641 const long dptrs_per_blk
= NIDS_PER_BLOCK
;
642 const long indirect_blks
= ADDRS_PER_BLOCK
* NIDS_PER_BLOCK
;
643 const long dindirect_blks
= indirect_blks
* NIDS_PER_BLOCK
;
649 if (block
< direct_index
) {
653 block
-= direct_index
;
654 if (block
< direct_blks
) {
655 offset
[n
++] = NODE_DIR1_BLOCK
;
661 block
-= direct_blks
;
662 if (block
< direct_blks
) {
663 offset
[n
++] = NODE_DIR2_BLOCK
;
669 block
-= direct_blks
;
670 if (block
< indirect_blks
) {
671 offset
[n
++] = NODE_IND1_BLOCK
;
673 offset
[n
++] = block
/ direct_blks
;
674 noffset
[n
] = 4 + offset
[n
- 1];
675 offset
[n
] = block
% direct_blks
;
679 block
-= indirect_blks
;
680 if (block
< indirect_blks
) {
681 offset
[n
++] = NODE_IND2_BLOCK
;
682 noffset
[n
] = 4 + dptrs_per_blk
;
683 offset
[n
++] = block
/ direct_blks
;
684 noffset
[n
] = 5 + dptrs_per_blk
+ offset
[n
- 1];
685 offset
[n
] = block
% direct_blks
;
689 block
-= indirect_blks
;
690 if (block
< dindirect_blks
) {
691 offset
[n
++] = NODE_DIND_BLOCK
;
692 noffset
[n
] = 5 + (dptrs_per_blk
* 2);
693 offset
[n
++] = block
/ indirect_blks
;
694 noffset
[n
] = 6 + (dptrs_per_blk
* 2) +
695 offset
[n
- 1] * (dptrs_per_blk
+ 1);
696 offset
[n
++] = (block
/ direct_blks
) % dptrs_per_blk
;
697 noffset
[n
] = 7 + (dptrs_per_blk
* 2) +
698 offset
[n
- 2] * (dptrs_per_blk
+ 1) +
700 offset
[n
] = block
% direct_blks
;
711 * Caller should call f2fs_put_dnode(dn).
712 * Also, it should grab and release a rwsem by calling f2fs_lock_op() and
713 * f2fs_unlock_op() only if ro is not set RDONLY_NODE.
714 * In the case of RDONLY_NODE, we don't need to care about mutex.
716 int f2fs_get_dnode_of_data(struct dnode_of_data
*dn
, pgoff_t index
, int mode
)
718 struct f2fs_sb_info
*sbi
= F2FS_I_SB(dn
->inode
);
719 struct page
*npage
[4];
720 struct page
*parent
= NULL
;
722 unsigned int noffset
[4];
727 level
= get_node_path(dn
->inode
, index
, offset
, noffset
);
731 nids
[0] = dn
->inode
->i_ino
;
732 npage
[0] = dn
->inode_page
;
735 npage
[0] = f2fs_get_node_page(sbi
, nids
[0]);
736 if (IS_ERR(npage
[0]))
737 return PTR_ERR(npage
[0]);
740 /* if inline_data is set, should not report any block indices */
741 if (f2fs_has_inline_data(dn
->inode
) && index
) {
743 f2fs_put_page(npage
[0], 1);
749 nids
[1] = get_nid(parent
, offset
[0], true);
750 dn
->inode_page
= npage
[0];
751 dn
->inode_page_locked
= true;
753 /* get indirect or direct nodes */
754 for (i
= 1; i
<= level
; i
++) {
757 if (!nids
[i
] && mode
== ALLOC_NODE
) {
759 if (!f2fs_alloc_nid(sbi
, &(nids
[i
]))) {
765 npage
[i
] = f2fs_new_node_page(dn
, noffset
[i
]);
766 if (IS_ERR(npage
[i
])) {
767 f2fs_alloc_nid_failed(sbi
, nids
[i
]);
768 err
= PTR_ERR(npage
[i
]);
772 set_nid(parent
, offset
[i
- 1], nids
[i
], i
== 1);
773 f2fs_alloc_nid_done(sbi
, nids
[i
]);
775 } else if (mode
== LOOKUP_NODE_RA
&& i
== level
&& level
> 1) {
776 npage
[i
] = f2fs_get_node_page_ra(parent
, offset
[i
- 1]);
777 if (IS_ERR(npage
[i
])) {
778 err
= PTR_ERR(npage
[i
]);
784 dn
->inode_page_locked
= false;
787 f2fs_put_page(parent
, 1);
791 npage
[i
] = f2fs_get_node_page(sbi
, nids
[i
]);
792 if (IS_ERR(npage
[i
])) {
793 err
= PTR_ERR(npage
[i
]);
794 f2fs_put_page(npage
[0], 0);
800 nids
[i
+ 1] = get_nid(parent
, offset
[i
], false);
803 dn
->nid
= nids
[level
];
804 dn
->ofs_in_node
= offset
[level
];
805 dn
->node_page
= npage
[level
];
806 dn
->data_blkaddr
= datablock_addr(dn
->inode
,
807 dn
->node_page
, dn
->ofs_in_node
);
811 f2fs_put_page(parent
, 1);
813 f2fs_put_page(npage
[0], 0);
815 dn
->inode_page
= NULL
;
816 dn
->node_page
= NULL
;
817 if (err
== -ENOENT
) {
819 dn
->max_level
= level
;
820 dn
->ofs_in_node
= offset
[level
];
825 static int truncate_node(struct dnode_of_data
*dn
)
827 struct f2fs_sb_info
*sbi
= F2FS_I_SB(dn
->inode
);
831 err
= f2fs_get_node_info(sbi
, dn
->nid
, &ni
);
835 /* Deallocate node address */
836 f2fs_invalidate_blocks(sbi
, ni
.blk_addr
);
837 dec_valid_node_count(sbi
, dn
->inode
, dn
->nid
== dn
->inode
->i_ino
);
838 set_node_addr(sbi
, &ni
, NULL_ADDR
, false);
840 if (dn
->nid
== dn
->inode
->i_ino
) {
841 f2fs_remove_orphan_inode(sbi
, dn
->nid
);
842 dec_valid_inode_count(sbi
);
843 f2fs_inode_synced(dn
->inode
);
846 clear_node_page_dirty(dn
->node_page
);
847 set_sbi_flag(sbi
, SBI_IS_DIRTY
);
849 f2fs_put_page(dn
->node_page
, 1);
851 invalidate_mapping_pages(NODE_MAPPING(sbi
),
852 dn
->node_page
->index
, dn
->node_page
->index
);
854 dn
->node_page
= NULL
;
855 trace_f2fs_truncate_node(dn
->inode
, dn
->nid
, ni
.blk_addr
);
860 static int truncate_dnode(struct dnode_of_data
*dn
)
868 /* get direct node */
869 page
= f2fs_get_node_page(F2FS_I_SB(dn
->inode
), dn
->nid
);
870 if (IS_ERR(page
) && PTR_ERR(page
) == -ENOENT
)
872 else if (IS_ERR(page
))
873 return PTR_ERR(page
);
875 /* Make dnode_of_data for parameter */
876 dn
->node_page
= page
;
878 f2fs_truncate_data_blocks(dn
);
879 err
= truncate_node(dn
);
886 static int truncate_nodes(struct dnode_of_data
*dn
, unsigned int nofs
,
889 struct dnode_of_data rdn
= *dn
;
891 struct f2fs_node
*rn
;
893 unsigned int child_nofs
;
898 return NIDS_PER_BLOCK
+ 1;
900 trace_f2fs_truncate_nodes_enter(dn
->inode
, dn
->nid
, dn
->data_blkaddr
);
902 page
= f2fs_get_node_page(F2FS_I_SB(dn
->inode
), dn
->nid
);
904 trace_f2fs_truncate_nodes_exit(dn
->inode
, PTR_ERR(page
));
905 return PTR_ERR(page
);
908 f2fs_ra_node_pages(page
, ofs
, NIDS_PER_BLOCK
);
910 rn
= F2FS_NODE(page
);
912 for (i
= ofs
; i
< NIDS_PER_BLOCK
; i
++, freed
++) {
913 child_nid
= le32_to_cpu(rn
->in
.nid
[i
]);
917 ret
= truncate_dnode(&rdn
);
920 if (set_nid(page
, i
, 0, false))
921 dn
->node_changed
= true;
924 child_nofs
= nofs
+ ofs
* (NIDS_PER_BLOCK
+ 1) + 1;
925 for (i
= ofs
; i
< NIDS_PER_BLOCK
; i
++) {
926 child_nid
= le32_to_cpu(rn
->in
.nid
[i
]);
927 if (child_nid
== 0) {
928 child_nofs
+= NIDS_PER_BLOCK
+ 1;
932 ret
= truncate_nodes(&rdn
, child_nofs
, 0, depth
- 1);
933 if (ret
== (NIDS_PER_BLOCK
+ 1)) {
934 if (set_nid(page
, i
, 0, false))
935 dn
->node_changed
= true;
937 } else if (ret
< 0 && ret
!= -ENOENT
) {
945 /* remove current indirect node */
946 dn
->node_page
= page
;
947 ret
= truncate_node(dn
);
952 f2fs_put_page(page
, 1);
954 trace_f2fs_truncate_nodes_exit(dn
->inode
, freed
);
958 f2fs_put_page(page
, 1);
959 trace_f2fs_truncate_nodes_exit(dn
->inode
, ret
);
963 static int truncate_partial_nodes(struct dnode_of_data
*dn
,
964 struct f2fs_inode
*ri
, int *offset
, int depth
)
966 struct page
*pages
[2];
973 nid
[0] = le32_to_cpu(ri
->i_nid
[offset
[0] - NODE_DIR1_BLOCK
]);
977 /* get indirect nodes in the path */
978 for (i
= 0; i
< idx
+ 1; i
++) {
979 /* reference count'll be increased */
980 pages
[i
] = f2fs_get_node_page(F2FS_I_SB(dn
->inode
), nid
[i
]);
981 if (IS_ERR(pages
[i
])) {
982 err
= PTR_ERR(pages
[i
]);
986 nid
[i
+ 1] = get_nid(pages
[i
], offset
[i
+ 1], false);
989 f2fs_ra_node_pages(pages
[idx
], offset
[idx
+ 1], NIDS_PER_BLOCK
);
991 /* free direct nodes linked to a partial indirect node */
992 for (i
= offset
[idx
+ 1]; i
< NIDS_PER_BLOCK
; i
++) {
993 child_nid
= get_nid(pages
[idx
], i
, false);
997 err
= truncate_dnode(dn
);
1000 if (set_nid(pages
[idx
], i
, 0, false))
1001 dn
->node_changed
= true;
1004 if (offset
[idx
+ 1] == 0) {
1005 dn
->node_page
= pages
[idx
];
1007 err
= truncate_node(dn
);
1011 f2fs_put_page(pages
[idx
], 1);
1014 offset
[idx
+ 1] = 0;
1017 for (i
= idx
; i
>= 0; i
--)
1018 f2fs_put_page(pages
[i
], 1);
1020 trace_f2fs_truncate_partial_nodes(dn
->inode
, nid
, depth
, err
);
1026 * All the block addresses of data and nodes should be nullified.
1028 int f2fs_truncate_inode_blocks(struct inode
*inode
, pgoff_t from
)
1030 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
1031 int err
= 0, cont
= 1;
1032 int level
, offset
[4], noffset
[4];
1033 unsigned int nofs
= 0;
1034 struct f2fs_inode
*ri
;
1035 struct dnode_of_data dn
;
1038 trace_f2fs_truncate_inode_blocks_enter(inode
, from
);
1040 level
= get_node_path(inode
, from
, offset
, noffset
);
1044 page
= f2fs_get_node_page(sbi
, inode
->i_ino
);
1046 trace_f2fs_truncate_inode_blocks_exit(inode
, PTR_ERR(page
));
1047 return PTR_ERR(page
);
1050 set_new_dnode(&dn
, inode
, page
, NULL
, 0);
1053 ri
= F2FS_INODE(page
);
1061 if (!offset
[level
- 1])
1063 err
= truncate_partial_nodes(&dn
, ri
, offset
, level
);
1064 if (err
< 0 && err
!= -ENOENT
)
1066 nofs
+= 1 + NIDS_PER_BLOCK
;
1069 nofs
= 5 + 2 * NIDS_PER_BLOCK
;
1070 if (!offset
[level
- 1])
1072 err
= truncate_partial_nodes(&dn
, ri
, offset
, level
);
1073 if (err
< 0 && err
!= -ENOENT
)
1082 dn
.nid
= le32_to_cpu(ri
->i_nid
[offset
[0] - NODE_DIR1_BLOCK
]);
1083 switch (offset
[0]) {
1084 case NODE_DIR1_BLOCK
:
1085 case NODE_DIR2_BLOCK
:
1086 err
= truncate_dnode(&dn
);
1089 case NODE_IND1_BLOCK
:
1090 case NODE_IND2_BLOCK
:
1091 err
= truncate_nodes(&dn
, nofs
, offset
[1], 2);
1094 case NODE_DIND_BLOCK
:
1095 err
= truncate_nodes(&dn
, nofs
, offset
[1], 3);
1102 if (err
< 0 && err
!= -ENOENT
)
1104 if (offset
[1] == 0 &&
1105 ri
->i_nid
[offset
[0] - NODE_DIR1_BLOCK
]) {
1107 BUG_ON(page
->mapping
!= NODE_MAPPING(sbi
));
1108 f2fs_wait_on_page_writeback(page
, NODE
, true);
1109 ri
->i_nid
[offset
[0] - NODE_DIR1_BLOCK
] = 0;
1110 set_page_dirty(page
);
1118 f2fs_put_page(page
, 0);
1119 trace_f2fs_truncate_inode_blocks_exit(inode
, err
);
1120 return err
> 0 ? 0 : err
;
1123 /* caller must lock inode page */
1124 int f2fs_truncate_xattr_node(struct inode
*inode
)
1126 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
1127 nid_t nid
= F2FS_I(inode
)->i_xattr_nid
;
1128 struct dnode_of_data dn
;
1135 npage
= f2fs_get_node_page(sbi
, nid
);
1137 return PTR_ERR(npage
);
1139 set_new_dnode(&dn
, inode
, NULL
, npage
, nid
);
1140 err
= truncate_node(&dn
);
1142 f2fs_put_page(npage
, 1);
1146 f2fs_i_xnid_write(inode
, 0);
1152 * Caller should grab and release a rwsem by calling f2fs_lock_op() and
1155 int f2fs_remove_inode_page(struct inode
*inode
)
1157 struct dnode_of_data dn
;
1160 set_new_dnode(&dn
, inode
, NULL
, NULL
, inode
->i_ino
);
1161 err
= f2fs_get_dnode_of_data(&dn
, 0, LOOKUP_NODE
);
1165 err
= f2fs_truncate_xattr_node(inode
);
1167 f2fs_put_dnode(&dn
);
1171 /* remove potential inline_data blocks */
1172 if (S_ISREG(inode
->i_mode
) || S_ISDIR(inode
->i_mode
) ||
1173 S_ISLNK(inode
->i_mode
))
1174 f2fs_truncate_data_blocks_range(&dn
, 1);
1176 /* 0 is possible, after f2fs_new_inode() has failed */
1177 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode
)))) {
1178 f2fs_put_dnode(&dn
);
1181 f2fs_bug_on(F2FS_I_SB(inode
),
1182 inode
->i_blocks
!= 0 && inode
->i_blocks
!= 8);
1184 /* will put inode & node pages */
1185 err
= truncate_node(&dn
);
1187 f2fs_put_dnode(&dn
);
1193 struct page
*f2fs_new_inode_page(struct inode
*inode
)
1195 struct dnode_of_data dn
;
1197 /* allocate inode page for new inode */
1198 set_new_dnode(&dn
, inode
, NULL
, NULL
, inode
->i_ino
);
1200 /* caller should f2fs_put_page(page, 1); */
1201 return f2fs_new_node_page(&dn
, 0);
1204 struct page
*f2fs_new_node_page(struct dnode_of_data
*dn
, unsigned int ofs
)
1206 struct f2fs_sb_info
*sbi
= F2FS_I_SB(dn
->inode
);
1207 struct node_info new_ni
;
1211 if (unlikely(is_inode_flag_set(dn
->inode
, FI_NO_ALLOC
)))
1212 return ERR_PTR(-EPERM
);
1214 page
= f2fs_grab_cache_page(NODE_MAPPING(sbi
), dn
->nid
, false);
1216 return ERR_PTR(-ENOMEM
);
1218 if (unlikely((err
= inc_valid_node_count(sbi
, dn
->inode
, !ofs
))))
1221 #ifdef CONFIG_F2FS_CHECK_FS
1222 err
= f2fs_get_node_info(sbi
, dn
->nid
, &new_ni
);
1224 dec_valid_node_count(sbi
, dn
->inode
, !ofs
);
1227 f2fs_bug_on(sbi
, new_ni
.blk_addr
!= NULL_ADDR
);
1229 new_ni
.nid
= dn
->nid
;
1230 new_ni
.ino
= dn
->inode
->i_ino
;
1231 new_ni
.blk_addr
= NULL_ADDR
;
1234 set_node_addr(sbi
, &new_ni
, NEW_ADDR
, false);
1236 f2fs_wait_on_page_writeback(page
, NODE
, true);
1237 fill_node_footer(page
, dn
->nid
, dn
->inode
->i_ino
, ofs
, true);
1238 set_cold_node(page
, S_ISDIR(dn
->inode
->i_mode
));
1239 if (!PageUptodate(page
))
1240 SetPageUptodate(page
);
1241 if (set_page_dirty(page
))
1242 dn
->node_changed
= true;
1244 if (f2fs_has_xattr_block(ofs
))
1245 f2fs_i_xnid_write(dn
->inode
, dn
->nid
);
1248 inc_valid_inode_count(sbi
);
1252 clear_node_page_dirty(page
);
1253 f2fs_put_page(page
, 1);
1254 return ERR_PTR(err
);
1258 * Caller should do after getting the following values.
1259 * 0: f2fs_put_page(page, 0)
1260 * LOCKED_PAGE or error: f2fs_put_page(page, 1)
1262 static int read_node_page(struct page
*page
, int op_flags
)
1264 struct f2fs_sb_info
*sbi
= F2FS_P_SB(page
);
1265 struct node_info ni
;
1266 struct f2fs_io_info fio
= {
1270 .op_flags
= op_flags
,
1272 .encrypted_page
= NULL
,
1276 if (PageUptodate(page
)) {
1277 #ifdef CONFIG_F2FS_CHECK_FS
1278 f2fs_bug_on(sbi
, !f2fs_inode_chksum_verify(sbi
, page
));
1283 err
= f2fs_get_node_info(sbi
, page
->index
, &ni
);
1287 if (unlikely(ni
.blk_addr
== NULL_ADDR
) ||
1288 is_sbi_flag_set(sbi
, SBI_IS_SHUTDOWN
)) {
1289 ClearPageUptodate(page
);
1293 fio
.new_blkaddr
= fio
.old_blkaddr
= ni
.blk_addr
;
1294 return f2fs_submit_page_bio(&fio
);
1298 * Readahead a node page
1300 void f2fs_ra_node_page(struct f2fs_sb_info
*sbi
, nid_t nid
)
1307 if (f2fs_check_nid_range(sbi
, nid
))
1311 apage
= radix_tree_lookup(&NODE_MAPPING(sbi
)->i_pages
, nid
);
1316 apage
= f2fs_grab_cache_page(NODE_MAPPING(sbi
), nid
, false);
1320 err
= read_node_page(apage
, REQ_RAHEAD
);
1321 f2fs_put_page(apage
, err
? 1 : 0);
1324 static struct page
*__get_node_page(struct f2fs_sb_info
*sbi
, pgoff_t nid
,
1325 struct page
*parent
, int start
)
1331 return ERR_PTR(-ENOENT
);
1332 if (f2fs_check_nid_range(sbi
, nid
))
1333 return ERR_PTR(-EINVAL
);
1335 page
= f2fs_grab_cache_page(NODE_MAPPING(sbi
), nid
, false);
1337 return ERR_PTR(-ENOMEM
);
1339 err
= read_node_page(page
, 0);
1341 f2fs_put_page(page
, 1);
1342 return ERR_PTR(err
);
1343 } else if (err
== LOCKED_PAGE
) {
1349 f2fs_ra_node_pages(parent
, start
+ 1, MAX_RA_NODE
);
1353 if (unlikely(page
->mapping
!= NODE_MAPPING(sbi
))) {
1354 f2fs_put_page(page
, 1);
1358 if (unlikely(!PageUptodate(page
))) {
1363 if (!f2fs_inode_chksum_verify(sbi
, page
)) {
1368 if(unlikely(nid
!= nid_of_node(page
))) {
1369 f2fs_msg(sbi
->sb
, KERN_WARNING
, "inconsistent node block, "
1370 "nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]",
1371 nid
, nid_of_node(page
), ino_of_node(page
),
1372 ofs_of_node(page
), cpver_of_node(page
),
1373 next_blkaddr_of_node(page
));
1376 ClearPageUptodate(page
);
1377 f2fs_put_page(page
, 1);
1378 return ERR_PTR(err
);
1383 struct page
*f2fs_get_node_page(struct f2fs_sb_info
*sbi
, pgoff_t nid
)
1385 return __get_node_page(sbi
, nid
, NULL
, 0);
1388 struct page
*f2fs_get_node_page_ra(struct page
*parent
, int start
)
1390 struct f2fs_sb_info
*sbi
= F2FS_P_SB(parent
);
1391 nid_t nid
= get_nid(parent
, start
, false);
1393 return __get_node_page(sbi
, nid
, parent
, start
);
1396 static void flush_inline_data(struct f2fs_sb_info
*sbi
, nid_t ino
)
1398 struct inode
*inode
;
1402 /* should flush inline_data before evict_inode */
1403 inode
= ilookup(sbi
->sb
, ino
);
1407 page
= f2fs_pagecache_get_page(inode
->i_mapping
, 0,
1408 FGP_LOCK
|FGP_NOWAIT
, 0);
1412 if (!PageUptodate(page
))
1415 if (!PageDirty(page
))
1418 if (!clear_page_dirty_for_io(page
))
1421 ret
= f2fs_write_inline_data(inode
, page
);
1422 inode_dec_dirty_pages(inode
);
1423 f2fs_remove_dirty_inode(inode
);
1425 set_page_dirty(page
);
1427 f2fs_put_page(page
, 1);
1432 static struct page
*last_fsync_dnode(struct f2fs_sb_info
*sbi
, nid_t ino
)
1435 struct pagevec pvec
;
1436 struct page
*last_page
= NULL
;
1439 pagevec_init(&pvec
);
1442 while ((nr_pages
= pagevec_lookup_tag(&pvec
, NODE_MAPPING(sbi
), &index
,
1443 PAGECACHE_TAG_DIRTY
))) {
1446 for (i
= 0; i
< nr_pages
; i
++) {
1447 struct page
*page
= pvec
.pages
[i
];
1449 if (unlikely(f2fs_cp_error(sbi
))) {
1450 f2fs_put_page(last_page
, 0);
1451 pagevec_release(&pvec
);
1452 return ERR_PTR(-EIO
);
1455 if (!IS_DNODE(page
) || !is_cold_node(page
))
1457 if (ino_of_node(page
) != ino
)
1462 if (unlikely(page
->mapping
!= NODE_MAPPING(sbi
))) {
1467 if (ino_of_node(page
) != ino
)
1468 goto continue_unlock
;
1470 if (!PageDirty(page
)) {
1471 /* someone wrote it for us */
1472 goto continue_unlock
;
1476 f2fs_put_page(last_page
, 0);
1482 pagevec_release(&pvec
);
1488 static int __write_node_page(struct page
*page
, bool atomic
, bool *submitted
,
1489 struct writeback_control
*wbc
, bool do_balance
,
1490 enum iostat_type io_type
, unsigned int *seq_id
)
1492 struct f2fs_sb_info
*sbi
= F2FS_P_SB(page
);
1494 struct node_info ni
;
1495 struct f2fs_io_info fio
= {
1497 .ino
= ino_of_node(page
),
1500 .op_flags
= wbc_to_write_flags(wbc
),
1502 .encrypted_page
= NULL
,
1509 trace_f2fs_writepage(page
, NODE
);
1511 if (unlikely(f2fs_cp_error(sbi
)))
1514 if (unlikely(is_sbi_flag_set(sbi
, SBI_POR_DOING
)))
1517 if (wbc
->sync_mode
== WB_SYNC_NONE
&&
1518 IS_DNODE(page
) && is_cold_node(page
))
1521 /* get old block addr of this node page */
1522 nid
= nid_of_node(page
);
1523 f2fs_bug_on(sbi
, page
->index
!= nid
);
1525 if (f2fs_get_node_info(sbi
, nid
, &ni
))
1528 if (wbc
->for_reclaim
) {
1529 if (!down_read_trylock(&sbi
->node_write
))
1532 down_read(&sbi
->node_write
);
1535 /* This page is already truncated */
1536 if (unlikely(ni
.blk_addr
== NULL_ADDR
)) {
1537 ClearPageUptodate(page
);
1538 dec_page_count(sbi
, F2FS_DIRTY_NODES
);
1539 up_read(&sbi
->node_write
);
1544 if (__is_valid_data_blkaddr(ni
.blk_addr
) &&
1545 !f2fs_is_valid_blkaddr(sbi
, ni
.blk_addr
, DATA_GENERIC
))
1548 if (atomic
&& !test_opt(sbi
, NOBARRIER
))
1549 fio
.op_flags
|= REQ_PREFLUSH
| REQ_FUA
;
1551 set_page_writeback(page
);
1552 ClearPageError(page
);
1554 if (f2fs_in_warm_node_list(sbi
, page
)) {
1555 seq
= f2fs_add_fsync_node_entry(sbi
, page
);
1560 fio
.old_blkaddr
= ni
.blk_addr
;
1561 f2fs_do_write_node_page(nid
, &fio
);
1562 set_node_addr(sbi
, &ni
, fio
.new_blkaddr
, is_fsync_dnode(page
));
1563 dec_page_count(sbi
, F2FS_DIRTY_NODES
);
1564 up_read(&sbi
->node_write
);
1566 if (wbc
->for_reclaim
) {
1567 f2fs_submit_merged_write_cond(sbi
, page
->mapping
->host
, 0,
1574 if (unlikely(f2fs_cp_error(sbi
))) {
1575 f2fs_submit_merged_write(sbi
, NODE
);
1579 *submitted
= fio
.submitted
;
1582 f2fs_balance_fs(sbi
, false);
1586 redirty_page_for_writepage(wbc
, page
);
1587 return AOP_WRITEPAGE_ACTIVATE
;
1590 void f2fs_move_node_page(struct page
*node_page
, int gc_type
)
1592 if (gc_type
== FG_GC
) {
1593 struct writeback_control wbc
= {
1594 .sync_mode
= WB_SYNC_ALL
,
1599 set_page_dirty(node_page
);
1600 f2fs_wait_on_page_writeback(node_page
, NODE
, true);
1602 f2fs_bug_on(F2FS_P_SB(node_page
), PageWriteback(node_page
));
1603 if (!clear_page_dirty_for_io(node_page
))
1606 if (__write_node_page(node_page
, false, NULL
,
1607 &wbc
, false, FS_GC_NODE_IO
, NULL
))
1608 unlock_page(node_page
);
1611 /* set page dirty and write it */
1612 if (!PageWriteback(node_page
))
1613 set_page_dirty(node_page
);
1616 unlock_page(node_page
);
1618 f2fs_put_page(node_page
, 0);
1621 static int f2fs_write_node_page(struct page
*page
,
1622 struct writeback_control
*wbc
)
1624 return __write_node_page(page
, false, NULL
, wbc
, false,
1628 int f2fs_fsync_node_pages(struct f2fs_sb_info
*sbi
, struct inode
*inode
,
1629 struct writeback_control
*wbc
, bool atomic
,
1630 unsigned int *seq_id
)
1633 pgoff_t last_idx
= ULONG_MAX
;
1634 struct pagevec pvec
;
1636 struct page
*last_page
= NULL
;
1637 bool marked
= false;
1638 nid_t ino
= inode
->i_ino
;
1642 last_page
= last_fsync_dnode(sbi
, ino
);
1643 if (IS_ERR_OR_NULL(last_page
))
1644 return PTR_ERR_OR_ZERO(last_page
);
1647 pagevec_init(&pvec
);
1650 while ((nr_pages
= pagevec_lookup_tag(&pvec
, NODE_MAPPING(sbi
), &index
,
1651 PAGECACHE_TAG_DIRTY
))) {
1654 for (i
= 0; i
< nr_pages
; i
++) {
1655 struct page
*page
= pvec
.pages
[i
];
1656 bool submitted
= false;
1658 if (unlikely(f2fs_cp_error(sbi
))) {
1659 f2fs_put_page(last_page
, 0);
1660 pagevec_release(&pvec
);
1665 if (!IS_DNODE(page
) || !is_cold_node(page
))
1667 if (ino_of_node(page
) != ino
)
1672 if (unlikely(page
->mapping
!= NODE_MAPPING(sbi
))) {
1677 if (ino_of_node(page
) != ino
)
1678 goto continue_unlock
;
1680 if (!PageDirty(page
) && page
!= last_page
) {
1681 /* someone wrote it for us */
1682 goto continue_unlock
;
1685 f2fs_wait_on_page_writeback(page
, NODE
, true);
1686 BUG_ON(PageWriteback(page
));
1688 set_fsync_mark(page
, 0);
1689 set_dentry_mark(page
, 0);
1691 if (!atomic
|| page
== last_page
) {
1692 set_fsync_mark(page
, 1);
1693 if (IS_INODE(page
)) {
1694 if (is_inode_flag_set(inode
,
1696 f2fs_update_inode(inode
, page
);
1697 set_dentry_mark(page
,
1698 f2fs_need_dentry_mark(sbi
, ino
));
1700 /* may be written by other thread */
1701 if (!PageDirty(page
))
1702 set_page_dirty(page
);
1705 if (!clear_page_dirty_for_io(page
))
1706 goto continue_unlock
;
1708 ret
= __write_node_page(page
, atomic
&&
1710 &submitted
, wbc
, true,
1711 FS_NODE_IO
, seq_id
);
1714 f2fs_put_page(last_page
, 0);
1716 } else if (submitted
) {
1717 last_idx
= page
->index
;
1720 if (page
== last_page
) {
1721 f2fs_put_page(page
, 0);
1726 pagevec_release(&pvec
);
1732 if (!ret
&& atomic
&& !marked
) {
1733 f2fs_msg(sbi
->sb
, KERN_DEBUG
,
1734 "Retry to write fsync mark: ino=%u, idx=%lx",
1735 ino
, last_page
->index
);
1736 lock_page(last_page
);
1737 f2fs_wait_on_page_writeback(last_page
, NODE
, true);
1738 set_page_dirty(last_page
);
1739 unlock_page(last_page
);
1743 if (last_idx
!= ULONG_MAX
)
1744 f2fs_submit_merged_write_cond(sbi
, NULL
, ino
, last_idx
, NODE
);
1745 return ret
? -EIO
: 0;
1748 int f2fs_sync_node_pages(struct f2fs_sb_info
*sbi
,
1749 struct writeback_control
*wbc
,
1750 bool do_balance
, enum iostat_type io_type
)
1753 struct pagevec pvec
;
1757 int nr_pages
, done
= 0;
1759 pagevec_init(&pvec
);
1764 while (!done
&& (nr_pages
= pagevec_lookup_tag(&pvec
,
1765 NODE_MAPPING(sbi
), &index
, PAGECACHE_TAG_DIRTY
))) {
1768 for (i
= 0; i
< nr_pages
; i
++) {
1769 struct page
*page
= pvec
.pages
[i
];
1770 bool submitted
= false;
1772 /* give a priority to WB_SYNC threads */
1773 if (atomic_read(&sbi
->wb_sync_req
[NODE
]) &&
1774 wbc
->sync_mode
== WB_SYNC_NONE
) {
1780 * flushing sequence with step:
1785 if (step
== 0 && IS_DNODE(page
))
1787 if (step
== 1 && (!IS_DNODE(page
) ||
1788 is_cold_node(page
)))
1790 if (step
== 2 && (!IS_DNODE(page
) ||
1791 !is_cold_node(page
)))
1794 if (wbc
->sync_mode
== WB_SYNC_ALL
)
1796 else if (!trylock_page(page
))
1799 if (unlikely(page
->mapping
!= NODE_MAPPING(sbi
))) {
1805 if (!PageDirty(page
)) {
1806 /* someone wrote it for us */
1807 goto continue_unlock
;
1810 /* flush inline_data */
1811 if (is_inline_node(page
)) {
1812 clear_inline_node(page
);
1814 flush_inline_data(sbi
, ino_of_node(page
));
1818 f2fs_wait_on_page_writeback(page
, NODE
, true);
1820 BUG_ON(PageWriteback(page
));
1821 if (!clear_page_dirty_for_io(page
))
1822 goto continue_unlock
;
1824 set_fsync_mark(page
, 0);
1825 set_dentry_mark(page
, 0);
1827 ret
= __write_node_page(page
, false, &submitted
,
1828 wbc
, do_balance
, io_type
, NULL
);
1834 if (--wbc
->nr_to_write
== 0)
1837 pagevec_release(&pvec
);
1840 if (wbc
->nr_to_write
== 0) {
1847 if (wbc
->sync_mode
== WB_SYNC_NONE
&& step
== 1)
1854 f2fs_submit_merged_write(sbi
, NODE
);
1856 if (unlikely(f2fs_cp_error(sbi
)))
1861 int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info
*sbi
,
1862 unsigned int seq_id
)
1864 struct fsync_node_entry
*fn
;
1866 struct list_head
*head
= &sbi
->fsync_node_list
;
1867 unsigned long flags
;
1868 unsigned int cur_seq_id
= 0;
1871 while (seq_id
&& cur_seq_id
< seq_id
) {
1872 spin_lock_irqsave(&sbi
->fsync_node_lock
, flags
);
1873 if (list_empty(head
)) {
1874 spin_unlock_irqrestore(&sbi
->fsync_node_lock
, flags
);
1877 fn
= list_first_entry(head
, struct fsync_node_entry
, list
);
1878 if (fn
->seq_id
> seq_id
) {
1879 spin_unlock_irqrestore(&sbi
->fsync_node_lock
, flags
);
1882 cur_seq_id
= fn
->seq_id
;
1885 spin_unlock_irqrestore(&sbi
->fsync_node_lock
, flags
);
1887 f2fs_wait_on_page_writeback(page
, NODE
, true);
1888 if (TestClearPageError(page
))
1897 ret2
= filemap_check_errors(NODE_MAPPING(sbi
));
1904 static int f2fs_write_node_pages(struct address_space
*mapping
,
1905 struct writeback_control
*wbc
)
1907 struct f2fs_sb_info
*sbi
= F2FS_M_SB(mapping
);
1908 struct blk_plug plug
;
1911 if (unlikely(is_sbi_flag_set(sbi
, SBI_POR_DOING
)))
1914 /* balancing f2fs's metadata in background */
1915 f2fs_balance_fs_bg(sbi
);
1917 /* collect a number of dirty node pages and write together */
1918 if (get_pages(sbi
, F2FS_DIRTY_NODES
) < nr_pages_to_skip(sbi
, NODE
))
1921 if (wbc
->sync_mode
== WB_SYNC_ALL
)
1922 atomic_inc(&sbi
->wb_sync_req
[NODE
]);
1923 else if (atomic_read(&sbi
->wb_sync_req
[NODE
]))
1926 trace_f2fs_writepages(mapping
->host
, wbc
, NODE
);
1928 diff
= nr_pages_to_write(sbi
, NODE
, wbc
);
1929 blk_start_plug(&plug
);
1930 f2fs_sync_node_pages(sbi
, wbc
, true, FS_NODE_IO
);
1931 blk_finish_plug(&plug
);
1932 wbc
->nr_to_write
= max((long)0, wbc
->nr_to_write
- diff
);
1934 if (wbc
->sync_mode
== WB_SYNC_ALL
)
1935 atomic_dec(&sbi
->wb_sync_req
[NODE
]);
1939 wbc
->pages_skipped
+= get_pages(sbi
, F2FS_DIRTY_NODES
);
1940 trace_f2fs_writepages(mapping
->host
, wbc
, NODE
);
1944 static int f2fs_set_node_page_dirty(struct page
*page
)
1946 trace_f2fs_set_page_dirty(page
, NODE
);
1948 if (!PageUptodate(page
))
1949 SetPageUptodate(page
);
1950 #ifdef CONFIG_F2FS_CHECK_FS
1952 f2fs_inode_chksum_set(F2FS_P_SB(page
), page
);
1954 if (!PageDirty(page
)) {
1955 __set_page_dirty_nobuffers(page
);
1956 inc_page_count(F2FS_P_SB(page
), F2FS_DIRTY_NODES
);
1957 SetPagePrivate(page
);
1958 f2fs_trace_pid(page
);
1965 * Structure of the f2fs node operations
1967 const struct address_space_operations f2fs_node_aops
= {
1968 .writepage
= f2fs_write_node_page
,
1969 .writepages
= f2fs_write_node_pages
,
1970 .set_page_dirty
= f2fs_set_node_page_dirty
,
1971 .invalidatepage
= f2fs_invalidate_page
,
1972 .releasepage
= f2fs_release_page
,
1973 #ifdef CONFIG_MIGRATION
1974 .migratepage
= f2fs_migrate_page
,
1978 static struct free_nid
*__lookup_free_nid_list(struct f2fs_nm_info
*nm_i
,
1981 return radix_tree_lookup(&nm_i
->free_nid_root
, n
);
1984 static int __insert_free_nid(struct f2fs_sb_info
*sbi
,
1985 struct free_nid
*i
, enum nid_state state
)
1987 struct f2fs_nm_info
*nm_i
= NM_I(sbi
);
1989 int err
= radix_tree_insert(&nm_i
->free_nid_root
, i
->nid
, i
);
1993 f2fs_bug_on(sbi
, state
!= i
->state
);
1994 nm_i
->nid_cnt
[state
]++;
1995 if (state
== FREE_NID
)
1996 list_add_tail(&i
->list
, &nm_i
->free_nid_list
);
2000 static void __remove_free_nid(struct f2fs_sb_info
*sbi
,
2001 struct free_nid
*i
, enum nid_state state
)
2003 struct f2fs_nm_info
*nm_i
= NM_I(sbi
);
2005 f2fs_bug_on(sbi
, state
!= i
->state
);
2006 nm_i
->nid_cnt
[state
]--;
2007 if (state
== FREE_NID
)
2009 radix_tree_delete(&nm_i
->free_nid_root
, i
->nid
);
2012 static void __move_free_nid(struct f2fs_sb_info
*sbi
, struct free_nid
*i
,
2013 enum nid_state org_state
, enum nid_state dst_state
)
2015 struct f2fs_nm_info
*nm_i
= NM_I(sbi
);
2017 f2fs_bug_on(sbi
, org_state
!= i
->state
);
2018 i
->state
= dst_state
;
2019 nm_i
->nid_cnt
[org_state
]--;
2020 nm_i
->nid_cnt
[dst_state
]++;
2022 switch (dst_state
) {
2027 list_add_tail(&i
->list
, &nm_i
->free_nid_list
);
2034 static void update_free_nid_bitmap(struct f2fs_sb_info
*sbi
, nid_t nid
,
2035 bool set
, bool build
)
2037 struct f2fs_nm_info
*nm_i
= NM_I(sbi
);
2038 unsigned int nat_ofs
= NAT_BLOCK_OFFSET(nid
);
2039 unsigned int nid_ofs
= nid
- START_NID(nid
);
2041 if (!test_bit_le(nat_ofs
, nm_i
->nat_block_bitmap
))
2045 if (test_bit_le(nid_ofs
, nm_i
->free_nid_bitmap
[nat_ofs
]))
2047 __set_bit_le(nid_ofs
, nm_i
->free_nid_bitmap
[nat_ofs
]);
2048 nm_i
->free_nid_count
[nat_ofs
]++;
2050 if (!test_bit_le(nid_ofs
, nm_i
->free_nid_bitmap
[nat_ofs
]))
2052 __clear_bit_le(nid_ofs
, nm_i
->free_nid_bitmap
[nat_ofs
]);
2054 nm_i
->free_nid_count
[nat_ofs
]--;
2058 /* return if the nid is recognized as free */
2059 static bool add_free_nid(struct f2fs_sb_info
*sbi
,
2060 nid_t nid
, bool build
, bool update
)
2062 struct f2fs_nm_info
*nm_i
= NM_I(sbi
);
2063 struct free_nid
*i
, *e
;
2064 struct nat_entry
*ne
;
2068 /* 0 nid should not be used */
2069 if (unlikely(nid
== 0))
2072 i
= f2fs_kmem_cache_alloc(free_nid_slab
, GFP_NOFS
);
2074 i
->state
= FREE_NID
;
2076 radix_tree_preload(GFP_NOFS
| __GFP_NOFAIL
);
2078 spin_lock(&nm_i
->nid_list_lock
);
2086 * - __insert_nid_to_list(PREALLOC_NID)
2087 * - f2fs_balance_fs_bg
2088 * - f2fs_build_free_nids
2089 * - __f2fs_build_free_nids
2092 * - __lookup_nat_cache
2094 * - f2fs_init_inode_metadata
2095 * - f2fs_new_inode_page
2096 * - f2fs_new_node_page
2098 * - f2fs_alloc_nid_done
2099 * - __remove_nid_from_list(PREALLOC_NID)
2100 * - __insert_nid_to_list(FREE_NID)
2102 ne
= __lookup_nat_cache(nm_i
, nid
);
2103 if (ne
&& (!get_nat_flag(ne
, IS_CHECKPOINTED
) ||
2104 nat_get_blkaddr(ne
) != NULL_ADDR
))
2107 e
= __lookup_free_nid_list(nm_i
, nid
);
2109 if (e
->state
== FREE_NID
)
2115 err
= __insert_free_nid(sbi
, i
, FREE_NID
);
2118 update_free_nid_bitmap(sbi
, nid
, ret
, build
);
2120 nm_i
->available_nids
++;
2122 spin_unlock(&nm_i
->nid_list_lock
);
2123 radix_tree_preload_end();
2126 kmem_cache_free(free_nid_slab
, i
);
2130 static void remove_free_nid(struct f2fs_sb_info
*sbi
, nid_t nid
)
2132 struct f2fs_nm_info
*nm_i
= NM_I(sbi
);
2134 bool need_free
= false;
2136 spin_lock(&nm_i
->nid_list_lock
);
2137 i
= __lookup_free_nid_list(nm_i
, nid
);
2138 if (i
&& i
->state
== FREE_NID
) {
2139 __remove_free_nid(sbi
, i
, FREE_NID
);
2142 spin_unlock(&nm_i
->nid_list_lock
);
2145 kmem_cache_free(free_nid_slab
, i
);
2148 static int scan_nat_page(struct f2fs_sb_info
*sbi
,
2149 struct page
*nat_page
, nid_t start_nid
)
2151 struct f2fs_nm_info
*nm_i
= NM_I(sbi
);
2152 struct f2fs_nat_block
*nat_blk
= page_address(nat_page
);
2154 unsigned int nat_ofs
= NAT_BLOCK_OFFSET(start_nid
);
2157 __set_bit_le(nat_ofs
, nm_i
->nat_block_bitmap
);
2159 i
= start_nid
% NAT_ENTRY_PER_BLOCK
;
2161 for (; i
< NAT_ENTRY_PER_BLOCK
; i
++, start_nid
++) {
2162 if (unlikely(start_nid
>= nm_i
->max_nid
))
2165 blk_addr
= le32_to_cpu(nat_blk
->entries
[i
].block_addr
);
2167 if (blk_addr
== NEW_ADDR
)
2170 if (blk_addr
== NULL_ADDR
) {
2171 add_free_nid(sbi
, start_nid
, true, true);
2173 spin_lock(&NM_I(sbi
)->nid_list_lock
);
2174 update_free_nid_bitmap(sbi
, start_nid
, false, true);
2175 spin_unlock(&NM_I(sbi
)->nid_list_lock
);
2182 static void scan_curseg_cache(struct f2fs_sb_info
*sbi
)
2184 struct curseg_info
*curseg
= CURSEG_I(sbi
, CURSEG_HOT_DATA
);
2185 struct f2fs_journal
*journal
= curseg
->journal
;
2188 down_read(&curseg
->journal_rwsem
);
2189 for (i
= 0; i
< nats_in_cursum(journal
); i
++) {
2193 addr
= le32_to_cpu(nat_in_journal(journal
, i
).block_addr
);
2194 nid
= le32_to_cpu(nid_in_journal(journal
, i
));
2195 if (addr
== NULL_ADDR
)
2196 add_free_nid(sbi
, nid
, true, false);
2198 remove_free_nid(sbi
, nid
);
2200 up_read(&curseg
->journal_rwsem
);
2203 static void scan_free_nid_bits(struct f2fs_sb_info
*sbi
)
2205 struct f2fs_nm_info
*nm_i
= NM_I(sbi
);
2206 unsigned int i
, idx
;
2209 down_read(&nm_i
->nat_tree_lock
);
2211 for (i
= 0; i
< nm_i
->nat_blocks
; i
++) {
2212 if (!test_bit_le(i
, nm_i
->nat_block_bitmap
))
2214 if (!nm_i
->free_nid_count
[i
])
2216 for (idx
= 0; idx
< NAT_ENTRY_PER_BLOCK
; idx
++) {
2217 idx
= find_next_bit_le(nm_i
->free_nid_bitmap
[i
],
2218 NAT_ENTRY_PER_BLOCK
, idx
);
2219 if (idx
>= NAT_ENTRY_PER_BLOCK
)
2222 nid
= i
* NAT_ENTRY_PER_BLOCK
+ idx
;
2223 add_free_nid(sbi
, nid
, true, false);
2225 if (nm_i
->nid_cnt
[FREE_NID
] >= MAX_FREE_NIDS
)
2230 scan_curseg_cache(sbi
);
2232 up_read(&nm_i
->nat_tree_lock
);
2235 static int __f2fs_build_free_nids(struct f2fs_sb_info
*sbi
,
2236 bool sync
, bool mount
)
2238 struct f2fs_nm_info
*nm_i
= NM_I(sbi
);
2240 nid_t nid
= nm_i
->next_scan_nid
;
2242 if (unlikely(nid
>= nm_i
->max_nid
))
2245 /* Enough entries */
2246 if (nm_i
->nid_cnt
[FREE_NID
] >= NAT_ENTRY_PER_BLOCK
)
2249 if (!sync
&& !f2fs_available_free_memory(sbi
, FREE_NIDS
))
2253 /* try to find free nids in free_nid_bitmap */
2254 scan_free_nid_bits(sbi
);
2256 if (nm_i
->nid_cnt
[FREE_NID
] >= NAT_ENTRY_PER_BLOCK
)
2260 /* readahead nat pages to be scanned */
2261 f2fs_ra_meta_pages(sbi
, NAT_BLOCK_OFFSET(nid
), FREE_NID_PAGES
,
2264 down_read(&nm_i
->nat_tree_lock
);
2267 if (!test_bit_le(NAT_BLOCK_OFFSET(nid
),
2268 nm_i
->nat_block_bitmap
)) {
2269 struct page
*page
= get_current_nat_page(sbi
, nid
);
2271 ret
= scan_nat_page(sbi
, page
, nid
);
2272 f2fs_put_page(page
, 1);
2275 up_read(&nm_i
->nat_tree_lock
);
2276 f2fs_bug_on(sbi
, !mount
);
2277 f2fs_msg(sbi
->sb
, KERN_ERR
,
2278 "NAT is corrupt, run fsck to fix it");
2283 nid
+= (NAT_ENTRY_PER_BLOCK
- (nid
% NAT_ENTRY_PER_BLOCK
));
2284 if (unlikely(nid
>= nm_i
->max_nid
))
2287 if (++i
>= FREE_NID_PAGES
)
2291 /* go to the next free nat pages to find free nids abundantly */
2292 nm_i
->next_scan_nid
= nid
;
2294 /* find free nids from current sum_pages */
2295 scan_curseg_cache(sbi
);
2297 up_read(&nm_i
->nat_tree_lock
);
2299 f2fs_ra_meta_pages(sbi
, NAT_BLOCK_OFFSET(nm_i
->next_scan_nid
),
2300 nm_i
->ra_nid_pages
, META_NAT
, false);
2305 int f2fs_build_free_nids(struct f2fs_sb_info
*sbi
, bool sync
, bool mount
)
2309 mutex_lock(&NM_I(sbi
)->build_lock
);
2310 ret
= __f2fs_build_free_nids(sbi
, sync
, mount
);
2311 mutex_unlock(&NM_I(sbi
)->build_lock
);
2317 * If this function returns success, caller can obtain a new nid
2318 * from second parameter of this function.
2319 * The returned nid could be used ino as well as nid when inode is created.
2321 bool f2fs_alloc_nid(struct f2fs_sb_info
*sbi
, nid_t
*nid
)
2323 struct f2fs_nm_info
*nm_i
= NM_I(sbi
);
2324 struct free_nid
*i
= NULL
;
2326 if (time_to_inject(sbi
, FAULT_ALLOC_NID
)) {
2327 f2fs_show_injection_info(FAULT_ALLOC_NID
);
2331 spin_lock(&nm_i
->nid_list_lock
);
2333 if (unlikely(nm_i
->available_nids
== 0)) {
2334 spin_unlock(&nm_i
->nid_list_lock
);
2338 /* We should not use stale free nids created by f2fs_build_free_nids */
2339 if (nm_i
->nid_cnt
[FREE_NID
] && !on_f2fs_build_free_nids(nm_i
)) {
2340 f2fs_bug_on(sbi
, list_empty(&nm_i
->free_nid_list
));
2341 i
= list_first_entry(&nm_i
->free_nid_list
,
2342 struct free_nid
, list
);
2345 __move_free_nid(sbi
, i
, FREE_NID
, PREALLOC_NID
);
2346 nm_i
->available_nids
--;
2348 update_free_nid_bitmap(sbi
, *nid
, false, false);
2350 spin_unlock(&nm_i
->nid_list_lock
);
2353 spin_unlock(&nm_i
->nid_list_lock
);
2355 /* Let's scan nat pages and its caches to get free nids */
2356 f2fs_build_free_nids(sbi
, true, false);
2361 * f2fs_alloc_nid() should be called prior to this function.
2363 void f2fs_alloc_nid_done(struct f2fs_sb_info
*sbi
, nid_t nid
)
2365 struct f2fs_nm_info
*nm_i
= NM_I(sbi
);
2368 spin_lock(&nm_i
->nid_list_lock
);
2369 i
= __lookup_free_nid_list(nm_i
, nid
);
2370 f2fs_bug_on(sbi
, !i
);
2371 __remove_free_nid(sbi
, i
, PREALLOC_NID
);
2372 spin_unlock(&nm_i
->nid_list_lock
);
2374 kmem_cache_free(free_nid_slab
, i
);
2378 * f2fs_alloc_nid() should be called prior to this function.
2380 void f2fs_alloc_nid_failed(struct f2fs_sb_info
*sbi
, nid_t nid
)
2382 struct f2fs_nm_info
*nm_i
= NM_I(sbi
);
2384 bool need_free
= false;
2389 spin_lock(&nm_i
->nid_list_lock
);
2390 i
= __lookup_free_nid_list(nm_i
, nid
);
2391 f2fs_bug_on(sbi
, !i
);
2393 if (!f2fs_available_free_memory(sbi
, FREE_NIDS
)) {
2394 __remove_free_nid(sbi
, i
, PREALLOC_NID
);
2397 __move_free_nid(sbi
, i
, PREALLOC_NID
, FREE_NID
);
2400 nm_i
->available_nids
++;
2402 update_free_nid_bitmap(sbi
, nid
, true, false);
2404 spin_unlock(&nm_i
->nid_list_lock
);
2407 kmem_cache_free(free_nid_slab
, i
);
2410 int f2fs_try_to_free_nids(struct f2fs_sb_info
*sbi
, int nr_shrink
)
2412 struct f2fs_nm_info
*nm_i
= NM_I(sbi
);
2413 struct free_nid
*i
, *next
;
2416 if (nm_i
->nid_cnt
[FREE_NID
] <= MAX_FREE_NIDS
)
2419 if (!mutex_trylock(&nm_i
->build_lock
))
2422 spin_lock(&nm_i
->nid_list_lock
);
2423 list_for_each_entry_safe(i
, next
, &nm_i
->free_nid_list
, list
) {
2424 if (nr_shrink
<= 0 ||
2425 nm_i
->nid_cnt
[FREE_NID
] <= MAX_FREE_NIDS
)
2428 __remove_free_nid(sbi
, i
, FREE_NID
);
2429 kmem_cache_free(free_nid_slab
, i
);
2432 spin_unlock(&nm_i
->nid_list_lock
);
2433 mutex_unlock(&nm_i
->build_lock
);
2435 return nr
- nr_shrink
;
2438 void f2fs_recover_inline_xattr(struct inode
*inode
, struct page
*page
)
2440 void *src_addr
, *dst_addr
;
2443 struct f2fs_inode
*ri
;
2445 ipage
= f2fs_get_node_page(F2FS_I_SB(inode
), inode
->i_ino
);
2446 f2fs_bug_on(F2FS_I_SB(inode
), IS_ERR(ipage
));
2448 ri
= F2FS_INODE(page
);
2449 if (ri
->i_inline
& F2FS_INLINE_XATTR
) {
2450 set_inode_flag(inode
, FI_INLINE_XATTR
);
2452 clear_inode_flag(inode
, FI_INLINE_XATTR
);
2456 dst_addr
= inline_xattr_addr(inode
, ipage
);
2457 src_addr
= inline_xattr_addr(inode
, page
);
2458 inline_size
= inline_xattr_size(inode
);
2460 f2fs_wait_on_page_writeback(ipage
, NODE
, true);
2461 memcpy(dst_addr
, src_addr
, inline_size
);
2463 f2fs_update_inode(inode
, ipage
);
2464 f2fs_put_page(ipage
, 1);
2467 int f2fs_recover_xattr_data(struct inode
*inode
, struct page
*page
)
2469 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
2470 nid_t prev_xnid
= F2FS_I(inode
)->i_xattr_nid
;
2472 struct dnode_of_data dn
;
2473 struct node_info ni
;
2480 /* 1: invalidate the previous xattr nid */
2481 err
= f2fs_get_node_info(sbi
, prev_xnid
, &ni
);
2485 f2fs_invalidate_blocks(sbi
, ni
.blk_addr
);
2486 dec_valid_node_count(sbi
, inode
, false);
2487 set_node_addr(sbi
, &ni
, NULL_ADDR
, false);
2490 /* 2: update xattr nid in inode */
2491 if (!f2fs_alloc_nid(sbi
, &new_xnid
))
2494 set_new_dnode(&dn
, inode
, NULL
, NULL
, new_xnid
);
2495 xpage
= f2fs_new_node_page(&dn
, XATTR_NODE_OFFSET
);
2496 if (IS_ERR(xpage
)) {
2497 f2fs_alloc_nid_failed(sbi
, new_xnid
);
2498 return PTR_ERR(xpage
);
2501 f2fs_alloc_nid_done(sbi
, new_xnid
);
2502 f2fs_update_inode_page(inode
);
2504 /* 3: update and set xattr node page dirty */
2505 memcpy(F2FS_NODE(xpage
), F2FS_NODE(page
), VALID_XATTR_BLOCK_SIZE
);
2507 set_page_dirty(xpage
);
2508 f2fs_put_page(xpage
, 1);
2513 int f2fs_recover_inode_page(struct f2fs_sb_info
*sbi
, struct page
*page
)
2515 struct f2fs_inode
*src
, *dst
;
2516 nid_t ino
= ino_of_node(page
);
2517 struct node_info old_ni
, new_ni
;
2521 err
= f2fs_get_node_info(sbi
, ino
, &old_ni
);
2525 if (unlikely(old_ni
.blk_addr
!= NULL_ADDR
))
2528 ipage
= f2fs_grab_cache_page(NODE_MAPPING(sbi
), ino
, false);
2530 congestion_wait(BLK_RW_ASYNC
, HZ
/50);
2534 /* Should not use this inode from free nid list */
2535 remove_free_nid(sbi
, ino
);
2537 if (!PageUptodate(ipage
))
2538 SetPageUptodate(ipage
);
2539 fill_node_footer(ipage
, ino
, ino
, 0, true);
2540 set_cold_node(page
, false);
2542 src
= F2FS_INODE(page
);
2543 dst
= F2FS_INODE(ipage
);
2545 memcpy(dst
, src
, (unsigned long)&src
->i_ext
- (unsigned long)src
);
2547 dst
->i_blocks
= cpu_to_le64(1);
2548 dst
->i_links
= cpu_to_le32(1);
2549 dst
->i_xattr_nid
= 0;
2550 dst
->i_inline
= src
->i_inline
& (F2FS_INLINE_XATTR
| F2FS_EXTRA_ATTR
);
2551 if (dst
->i_inline
& F2FS_EXTRA_ATTR
) {
2552 dst
->i_extra_isize
= src
->i_extra_isize
;
2554 if (f2fs_sb_has_flexible_inline_xattr(sbi
->sb
) &&
2555 F2FS_FITS_IN_INODE(src
, le16_to_cpu(src
->i_extra_isize
),
2556 i_inline_xattr_size
))
2557 dst
->i_inline_xattr_size
= src
->i_inline_xattr_size
;
2559 if (f2fs_sb_has_project_quota(sbi
->sb
) &&
2560 F2FS_FITS_IN_INODE(src
, le16_to_cpu(src
->i_extra_isize
),
2562 dst
->i_projid
= src
->i_projid
;
2568 if (unlikely(inc_valid_node_count(sbi
, NULL
, true)))
2570 set_node_addr(sbi
, &new_ni
, NEW_ADDR
, false);
2571 inc_valid_inode_count(sbi
);
2572 set_page_dirty(ipage
);
2573 f2fs_put_page(ipage
, 1);
2577 int f2fs_restore_node_summary(struct f2fs_sb_info
*sbi
,
2578 unsigned int segno
, struct f2fs_summary_block
*sum
)
2580 struct f2fs_node
*rn
;
2581 struct f2fs_summary
*sum_entry
;
2583 int i
, idx
, last_offset
, nrpages
;
2585 /* scan the node segment */
2586 last_offset
= sbi
->blocks_per_seg
;
2587 addr
= START_BLOCK(sbi
, segno
);
2588 sum_entry
= &sum
->entries
[0];
2590 for (i
= 0; i
< last_offset
; i
+= nrpages
, addr
+= nrpages
) {
2591 nrpages
= min(last_offset
- i
, BIO_MAX_PAGES
);
2593 /* readahead node pages */
2594 f2fs_ra_meta_pages(sbi
, addr
, nrpages
, META_POR
, true);
2596 for (idx
= addr
; idx
< addr
+ nrpages
; idx
++) {
2597 struct page
*page
= f2fs_get_tmp_page(sbi
, idx
);
2600 return PTR_ERR(page
);
2602 rn
= F2FS_NODE(page
);
2603 sum_entry
->nid
= rn
->footer
.nid
;
2604 sum_entry
->version
= 0;
2605 sum_entry
->ofs_in_node
= 0;
2607 f2fs_put_page(page
, 1);
2610 invalidate_mapping_pages(META_MAPPING(sbi
), addr
,
2616 static void remove_nats_in_journal(struct f2fs_sb_info
*sbi
)
2618 struct f2fs_nm_info
*nm_i
= NM_I(sbi
);
2619 struct curseg_info
*curseg
= CURSEG_I(sbi
, CURSEG_HOT_DATA
);
2620 struct f2fs_journal
*journal
= curseg
->journal
;
2623 down_write(&curseg
->journal_rwsem
);
2624 for (i
= 0; i
< nats_in_cursum(journal
); i
++) {
2625 struct nat_entry
*ne
;
2626 struct f2fs_nat_entry raw_ne
;
2627 nid_t nid
= le32_to_cpu(nid_in_journal(journal
, i
));
2629 raw_ne
= nat_in_journal(journal
, i
);
2631 ne
= __lookup_nat_cache(nm_i
, nid
);
2633 ne
= __alloc_nat_entry(nid
, true);
2634 __init_nat_entry(nm_i
, ne
, &raw_ne
, true);
2638 * if a free nat in journal has not been used after last
2639 * checkpoint, we should remove it from available nids,
2640 * since later we will add it again.
2642 if (!get_nat_flag(ne
, IS_DIRTY
) &&
2643 le32_to_cpu(raw_ne
.block_addr
) == NULL_ADDR
) {
2644 spin_lock(&nm_i
->nid_list_lock
);
2645 nm_i
->available_nids
--;
2646 spin_unlock(&nm_i
->nid_list_lock
);
2649 __set_nat_cache_dirty(nm_i
, ne
);
2651 update_nats_in_cursum(journal
, -i
);
2652 up_write(&curseg
->journal_rwsem
);
2655 static void __adjust_nat_entry_set(struct nat_entry_set
*nes
,
2656 struct list_head
*head
, int max
)
2658 struct nat_entry_set
*cur
;
2660 if (nes
->entry_cnt
>= max
)
2663 list_for_each_entry(cur
, head
, set_list
) {
2664 if (cur
->entry_cnt
>= nes
->entry_cnt
) {
2665 list_add(&nes
->set_list
, cur
->set_list
.prev
);
2670 list_add_tail(&nes
->set_list
, head
);
2673 static void __update_nat_bits(struct f2fs_sb_info
*sbi
, nid_t start_nid
,
2676 struct f2fs_nm_info
*nm_i
= NM_I(sbi
);
2677 unsigned int nat_index
= start_nid
/ NAT_ENTRY_PER_BLOCK
;
2678 struct f2fs_nat_block
*nat_blk
= page_address(page
);
2682 if (!enabled_nat_bits(sbi
, NULL
))
2685 if (nat_index
== 0) {
2689 for (; i
< NAT_ENTRY_PER_BLOCK
; i
++) {
2690 if (nat_blk
->entries
[i
].block_addr
!= NULL_ADDR
)
2694 __set_bit_le(nat_index
, nm_i
->empty_nat_bits
);
2695 __clear_bit_le(nat_index
, nm_i
->full_nat_bits
);
2699 __clear_bit_le(nat_index
, nm_i
->empty_nat_bits
);
2700 if (valid
== NAT_ENTRY_PER_BLOCK
)
2701 __set_bit_le(nat_index
, nm_i
->full_nat_bits
);
2703 __clear_bit_le(nat_index
, nm_i
->full_nat_bits
);
2706 static void __flush_nat_entry_set(struct f2fs_sb_info
*sbi
,
2707 struct nat_entry_set
*set
, struct cp_control
*cpc
)
2709 struct curseg_info
*curseg
= CURSEG_I(sbi
, CURSEG_HOT_DATA
);
2710 struct f2fs_journal
*journal
= curseg
->journal
;
2711 nid_t start_nid
= set
->set
* NAT_ENTRY_PER_BLOCK
;
2712 bool to_journal
= true;
2713 struct f2fs_nat_block
*nat_blk
;
2714 struct nat_entry
*ne
, *cur
;
2715 struct page
*page
= NULL
;
2718 * there are two steps to flush nat entries:
2719 * #1, flush nat entries to journal in current hot data summary block.
2720 * #2, flush nat entries to nat page.
2722 if (enabled_nat_bits(sbi
, cpc
) ||
2723 !__has_cursum_space(journal
, set
->entry_cnt
, NAT_JOURNAL
))
2727 down_write(&curseg
->journal_rwsem
);
2729 page
= get_next_nat_page(sbi
, start_nid
);
2730 nat_blk
= page_address(page
);
2731 f2fs_bug_on(sbi
, !nat_blk
);
2734 /* flush dirty nats in nat entry set */
2735 list_for_each_entry_safe(ne
, cur
, &set
->entry_list
, list
) {
2736 struct f2fs_nat_entry
*raw_ne
;
2737 nid_t nid
= nat_get_nid(ne
);
2740 f2fs_bug_on(sbi
, nat_get_blkaddr(ne
) == NEW_ADDR
);
2743 offset
= f2fs_lookup_journal_in_cursum(journal
,
2744 NAT_JOURNAL
, nid
, 1);
2745 f2fs_bug_on(sbi
, offset
< 0);
2746 raw_ne
= &nat_in_journal(journal
, offset
);
2747 nid_in_journal(journal
, offset
) = cpu_to_le32(nid
);
2749 raw_ne
= &nat_blk
->entries
[nid
- start_nid
];
2751 raw_nat_from_node_info(raw_ne
, &ne
->ni
);
2753 __clear_nat_cache_dirty(NM_I(sbi
), set
, ne
);
2754 if (nat_get_blkaddr(ne
) == NULL_ADDR
) {
2755 add_free_nid(sbi
, nid
, false, true);
2757 spin_lock(&NM_I(sbi
)->nid_list_lock
);
2758 update_free_nid_bitmap(sbi
, nid
, false, false);
2759 spin_unlock(&NM_I(sbi
)->nid_list_lock
);
2764 up_write(&curseg
->journal_rwsem
);
2766 __update_nat_bits(sbi
, start_nid
, page
);
2767 f2fs_put_page(page
, 1);
2770 /* Allow dirty nats by node block allocation in write_begin */
2771 if (!set
->entry_cnt
) {
2772 radix_tree_delete(&NM_I(sbi
)->nat_set_root
, set
->set
);
2773 kmem_cache_free(nat_entry_set_slab
, set
);
2778 * This function is called during the checkpointing process.
2780 void f2fs_flush_nat_entries(struct f2fs_sb_info
*sbi
, struct cp_control
*cpc
)
2782 struct f2fs_nm_info
*nm_i
= NM_I(sbi
);
2783 struct curseg_info
*curseg
= CURSEG_I(sbi
, CURSEG_HOT_DATA
);
2784 struct f2fs_journal
*journal
= curseg
->journal
;
2785 struct nat_entry_set
*setvec
[SETVEC_SIZE
];
2786 struct nat_entry_set
*set
, *tmp
;
2791 /* during unmount, let's flush nat_bits before checking dirty_nat_cnt */
2792 if (enabled_nat_bits(sbi
, cpc
)) {
2793 down_write(&nm_i
->nat_tree_lock
);
2794 remove_nats_in_journal(sbi
);
2795 up_write(&nm_i
->nat_tree_lock
);
2798 if (!nm_i
->dirty_nat_cnt
)
2801 down_write(&nm_i
->nat_tree_lock
);
2804 * if there are no enough space in journal to store dirty nat
2805 * entries, remove all entries from journal and merge them
2806 * into nat entry set.
2808 if (enabled_nat_bits(sbi
, cpc
) ||
2809 !__has_cursum_space(journal
, nm_i
->dirty_nat_cnt
, NAT_JOURNAL
))
2810 remove_nats_in_journal(sbi
);
2812 while ((found
= __gang_lookup_nat_set(nm_i
,
2813 set_idx
, SETVEC_SIZE
, setvec
))) {
2815 set_idx
= setvec
[found
- 1]->set
+ 1;
2816 for (idx
= 0; idx
< found
; idx
++)
2817 __adjust_nat_entry_set(setvec
[idx
], &sets
,
2818 MAX_NAT_JENTRIES(journal
));
2821 /* flush dirty nats in nat entry set */
2822 list_for_each_entry_safe(set
, tmp
, &sets
, set_list
)
2823 __flush_nat_entry_set(sbi
, set
, cpc
);
2825 up_write(&nm_i
->nat_tree_lock
);
2826 /* Allow dirty nats by node block allocation in write_begin */
2829 static int __get_nat_bitmaps(struct f2fs_sb_info
*sbi
)
2831 struct f2fs_checkpoint
*ckpt
= F2FS_CKPT(sbi
);
2832 struct f2fs_nm_info
*nm_i
= NM_I(sbi
);
2833 unsigned int nat_bits_bytes
= nm_i
->nat_blocks
/ BITS_PER_BYTE
;
2835 __u64 cp_ver
= cur_cp_version(ckpt
);
2836 block_t nat_bits_addr
;
2838 if (!enabled_nat_bits(sbi
, NULL
))
2841 nm_i
->nat_bits_blocks
= F2FS_BLK_ALIGN((nat_bits_bytes
<< 1) + 8);
2842 nm_i
->nat_bits
= f2fs_kzalloc(sbi
,
2843 nm_i
->nat_bits_blocks
<< F2FS_BLKSIZE_BITS
, GFP_KERNEL
);
2844 if (!nm_i
->nat_bits
)
2847 nat_bits_addr
= __start_cp_addr(sbi
) + sbi
->blocks_per_seg
-
2848 nm_i
->nat_bits_blocks
;
2849 for (i
= 0; i
< nm_i
->nat_bits_blocks
; i
++) {
2852 page
= f2fs_get_meta_page(sbi
, nat_bits_addr
++);
2854 disable_nat_bits(sbi
, true);
2855 return PTR_ERR(page
);
2858 memcpy(nm_i
->nat_bits
+ (i
<< F2FS_BLKSIZE_BITS
),
2859 page_address(page
), F2FS_BLKSIZE
);
2860 f2fs_put_page(page
, 1);
2863 cp_ver
|= (cur_cp_crc(ckpt
) << 32);
2864 if (cpu_to_le64(cp_ver
) != *(__le64
*)nm_i
->nat_bits
) {
2865 disable_nat_bits(sbi
, true);
2869 nm_i
->full_nat_bits
= nm_i
->nat_bits
+ 8;
2870 nm_i
->empty_nat_bits
= nm_i
->full_nat_bits
+ nat_bits_bytes
;
2872 f2fs_msg(sbi
->sb
, KERN_NOTICE
, "Found nat_bits in checkpoint");
2876 static inline void load_free_nid_bitmap(struct f2fs_sb_info
*sbi
)
2878 struct f2fs_nm_info
*nm_i
= NM_I(sbi
);
2880 nid_t nid
, last_nid
;
2882 if (!enabled_nat_bits(sbi
, NULL
))
2885 for (i
= 0; i
< nm_i
->nat_blocks
; i
++) {
2886 i
= find_next_bit_le(nm_i
->empty_nat_bits
, nm_i
->nat_blocks
, i
);
2887 if (i
>= nm_i
->nat_blocks
)
2890 __set_bit_le(i
, nm_i
->nat_block_bitmap
);
2892 nid
= i
* NAT_ENTRY_PER_BLOCK
;
2893 last_nid
= nid
+ NAT_ENTRY_PER_BLOCK
;
2895 spin_lock(&NM_I(sbi
)->nid_list_lock
);
2896 for (; nid
< last_nid
; nid
++)
2897 update_free_nid_bitmap(sbi
, nid
, true, true);
2898 spin_unlock(&NM_I(sbi
)->nid_list_lock
);
2901 for (i
= 0; i
< nm_i
->nat_blocks
; i
++) {
2902 i
= find_next_bit_le(nm_i
->full_nat_bits
, nm_i
->nat_blocks
, i
);
2903 if (i
>= nm_i
->nat_blocks
)
2906 __set_bit_le(i
, nm_i
->nat_block_bitmap
);
2910 static int init_node_manager(struct f2fs_sb_info
*sbi
)
2912 struct f2fs_super_block
*sb_raw
= F2FS_RAW_SUPER(sbi
);
2913 struct f2fs_nm_info
*nm_i
= NM_I(sbi
);
2914 unsigned char *version_bitmap
;
2915 unsigned int nat_segs
;
2918 nm_i
->nat_blkaddr
= le32_to_cpu(sb_raw
->nat_blkaddr
);
2920 /* segment_count_nat includes pair segment so divide to 2. */
2921 nat_segs
= le32_to_cpu(sb_raw
->segment_count_nat
) >> 1;
2922 nm_i
->nat_blocks
= nat_segs
<< le32_to_cpu(sb_raw
->log_blocks_per_seg
);
2923 nm_i
->max_nid
= NAT_ENTRY_PER_BLOCK
* nm_i
->nat_blocks
;
2925 /* not used nids: 0, node, meta, (and root counted as valid node) */
2926 nm_i
->available_nids
= nm_i
->max_nid
- sbi
->total_valid_node_count
-
2927 sbi
->nquota_files
- F2FS_RESERVED_NODE_NUM
;
2928 nm_i
->nid_cnt
[FREE_NID
] = 0;
2929 nm_i
->nid_cnt
[PREALLOC_NID
] = 0;
2931 nm_i
->ram_thresh
= DEF_RAM_THRESHOLD
;
2932 nm_i
->ra_nid_pages
= DEF_RA_NID_PAGES
;
2933 nm_i
->dirty_nats_ratio
= DEF_DIRTY_NAT_RATIO_THRESHOLD
;
2935 INIT_RADIX_TREE(&nm_i
->free_nid_root
, GFP_ATOMIC
);
2936 INIT_LIST_HEAD(&nm_i
->free_nid_list
);
2937 INIT_RADIX_TREE(&nm_i
->nat_root
, GFP_NOIO
);
2938 INIT_RADIX_TREE(&nm_i
->nat_set_root
, GFP_NOIO
);
2939 INIT_LIST_HEAD(&nm_i
->nat_entries
);
2940 spin_lock_init(&nm_i
->nat_list_lock
);
2942 mutex_init(&nm_i
->build_lock
);
2943 spin_lock_init(&nm_i
->nid_list_lock
);
2944 init_rwsem(&nm_i
->nat_tree_lock
);
2946 nm_i
->next_scan_nid
= le32_to_cpu(sbi
->ckpt
->next_free_nid
);
2947 nm_i
->bitmap_size
= __bitmap_size(sbi
, NAT_BITMAP
);
2948 version_bitmap
= __bitmap_ptr(sbi
, NAT_BITMAP
);
2949 if (!version_bitmap
)
2952 nm_i
->nat_bitmap
= kmemdup(version_bitmap
, nm_i
->bitmap_size
,
2954 if (!nm_i
->nat_bitmap
)
2957 err
= __get_nat_bitmaps(sbi
);
2961 #ifdef CONFIG_F2FS_CHECK_FS
2962 nm_i
->nat_bitmap_mir
= kmemdup(version_bitmap
, nm_i
->bitmap_size
,
2964 if (!nm_i
->nat_bitmap_mir
)
2971 static int init_free_nid_cache(struct f2fs_sb_info
*sbi
)
2973 struct f2fs_nm_info
*nm_i
= NM_I(sbi
);
2976 nm_i
->free_nid_bitmap
=
2977 f2fs_kzalloc(sbi
, array_size(sizeof(unsigned char *),
2980 if (!nm_i
->free_nid_bitmap
)
2983 for (i
= 0; i
< nm_i
->nat_blocks
; i
++) {
2984 nm_i
->free_nid_bitmap
[i
] = f2fs_kvzalloc(sbi
,
2985 f2fs_bitmap_size(NAT_ENTRY_PER_BLOCK
), GFP_KERNEL
);
2986 if (!nm_i
->free_nid_bitmap
[i
])
2990 nm_i
->nat_block_bitmap
= f2fs_kvzalloc(sbi
, nm_i
->nat_blocks
/ 8,
2992 if (!nm_i
->nat_block_bitmap
)
2995 nm_i
->free_nid_count
=
2996 f2fs_kvzalloc(sbi
, array_size(sizeof(unsigned short),
2999 if (!nm_i
->free_nid_count
)
3004 int f2fs_build_node_manager(struct f2fs_sb_info
*sbi
)
3008 sbi
->nm_info
= f2fs_kzalloc(sbi
, sizeof(struct f2fs_nm_info
),
3013 err
= init_node_manager(sbi
);
3017 err
= init_free_nid_cache(sbi
);
3021 /* load free nid status from nat_bits table */
3022 load_free_nid_bitmap(sbi
);
3024 return f2fs_build_free_nids(sbi
, true, true);
3027 void f2fs_destroy_node_manager(struct f2fs_sb_info
*sbi
)
3029 struct f2fs_nm_info
*nm_i
= NM_I(sbi
);
3030 struct free_nid
*i
, *next_i
;
3031 struct nat_entry
*natvec
[NATVEC_SIZE
];
3032 struct nat_entry_set
*setvec
[SETVEC_SIZE
];
3039 /* destroy free nid list */
3040 spin_lock(&nm_i
->nid_list_lock
);
3041 list_for_each_entry_safe(i
, next_i
, &nm_i
->free_nid_list
, list
) {
3042 __remove_free_nid(sbi
, i
, FREE_NID
);
3043 spin_unlock(&nm_i
->nid_list_lock
);
3044 kmem_cache_free(free_nid_slab
, i
);
3045 spin_lock(&nm_i
->nid_list_lock
);
3047 f2fs_bug_on(sbi
, nm_i
->nid_cnt
[FREE_NID
]);
3048 f2fs_bug_on(sbi
, nm_i
->nid_cnt
[PREALLOC_NID
]);
3049 f2fs_bug_on(sbi
, !list_empty(&nm_i
->free_nid_list
));
3050 spin_unlock(&nm_i
->nid_list_lock
);
3052 /* destroy nat cache */
3053 down_write(&nm_i
->nat_tree_lock
);
3054 while ((found
= __gang_lookup_nat_cache(nm_i
,
3055 nid
, NATVEC_SIZE
, natvec
))) {
3058 nid
= nat_get_nid(natvec
[found
- 1]) + 1;
3059 for (idx
= 0; idx
< found
; idx
++) {
3060 spin_lock(&nm_i
->nat_list_lock
);
3061 list_del(&natvec
[idx
]->list
);
3062 spin_unlock(&nm_i
->nat_list_lock
);
3064 __del_from_nat_cache(nm_i
, natvec
[idx
]);
3067 f2fs_bug_on(sbi
, nm_i
->nat_cnt
);
3069 /* destroy nat set cache */
3071 while ((found
= __gang_lookup_nat_set(nm_i
,
3072 nid
, SETVEC_SIZE
, setvec
))) {
3075 nid
= setvec
[found
- 1]->set
+ 1;
3076 for (idx
= 0; idx
< found
; idx
++) {
3077 /* entry_cnt is not zero, when cp_error was occurred */
3078 f2fs_bug_on(sbi
, !list_empty(&setvec
[idx
]->entry_list
));
3079 radix_tree_delete(&nm_i
->nat_set_root
, setvec
[idx
]->set
);
3080 kmem_cache_free(nat_entry_set_slab
, setvec
[idx
]);
3083 up_write(&nm_i
->nat_tree_lock
);
3085 kvfree(nm_i
->nat_block_bitmap
);
3086 if (nm_i
->free_nid_bitmap
) {
3089 for (i
= 0; i
< nm_i
->nat_blocks
; i
++)
3090 kvfree(nm_i
->free_nid_bitmap
[i
]);
3091 kfree(nm_i
->free_nid_bitmap
);
3093 kvfree(nm_i
->free_nid_count
);
3095 kfree(nm_i
->nat_bitmap
);
3096 kfree(nm_i
->nat_bits
);
3097 #ifdef CONFIG_F2FS_CHECK_FS
3098 kfree(nm_i
->nat_bitmap_mir
);
3100 sbi
->nm_info
= NULL
;
3104 int __init
f2fs_create_node_manager_caches(void)
3106 nat_entry_slab
= f2fs_kmem_cache_create("nat_entry",
3107 sizeof(struct nat_entry
));
3108 if (!nat_entry_slab
)
3111 free_nid_slab
= f2fs_kmem_cache_create("free_nid",
3112 sizeof(struct free_nid
));
3114 goto destroy_nat_entry
;
3116 nat_entry_set_slab
= f2fs_kmem_cache_create("nat_entry_set",
3117 sizeof(struct nat_entry_set
));
3118 if (!nat_entry_set_slab
)
3119 goto destroy_free_nid
;
3121 fsync_node_entry_slab
= f2fs_kmem_cache_create("fsync_node_entry",
3122 sizeof(struct fsync_node_entry
));
3123 if (!fsync_node_entry_slab
)
3124 goto destroy_nat_entry_set
;
3127 destroy_nat_entry_set
:
3128 kmem_cache_destroy(nat_entry_set_slab
);
3130 kmem_cache_destroy(free_nid_slab
);
3132 kmem_cache_destroy(nat_entry_slab
);
3137 void f2fs_destroy_node_manager_caches(void)
3139 kmem_cache_destroy(fsync_node_entry_slab
);
3140 kmem_cache_destroy(nat_entry_set_slab
);
3141 kmem_cache_destroy(free_nid_slab
);
3142 kmem_cache_destroy(nat_entry_slab
);