1 // SPDX-License-Identifier: GPL-2.0-only
3 * This file is part of UBIFS.
5 * Copyright (C) 2006-2008 Nokia Corporation.
7 * Authors: Artem Bityutskiy (Битюцкий Артём)
12 * This file implements UBIFS shrinker which evicts clean znodes from the TNC
13 * tree when Linux VM needs more RAM.
15 * We do not implement any LRU lists to find oldest znodes to free because it
16 * would add additional overhead to the file system fast paths. So the shrinker
17 * just walks the TNC tree when searching for znodes to free.
19 * If the root of a TNC sub-tree is clean and old enough, then the children are
20 * also clean and old enough. So the shrinker walks the TNC in level order and
21 * dumps entire sub-trees.
23 * The age of znodes is just the time-stamp when they were last looked at.
24 * The current shrinker first tries to evict old znodes, then young ones.
26 * Since the shrinker is global, it has to protect against races with FS
27 * un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'.
32 /* List of all UBIFS file-system instances */
33 LIST_HEAD(ubifs_infos
);
36 * We number each shrinker run and record the number on the ubifs_info structure
37 * so that we can easily work out which ubifs_info structures have already been
38 * done by the current run.
40 static unsigned int shrinker_run_no
;
42 /* Protects 'ubifs_infos' list */
43 DEFINE_SPINLOCK(ubifs_infos_lock
);
45 /* Global clean znode counter (for all mounted UBIFS instances) */
46 atomic_long_t ubifs_clean_zn_cnt
;
49 * shrink_tnc - shrink TNC tree.
50 * @c: UBIFS file-system description object
51 * @nr: number of znodes to free
52 * @age: the age of znodes to free
53 * @contention: if any contention, this is set to %1
55 * This function traverses TNC tree and frees clean znodes. It does not free
56 * clean znodes which younger then @age. Returns number of freed znodes.
58 static int shrink_tnc(struct ubifs_info
*c
, int nr
, int age
, int *contention
)
61 struct ubifs_znode
*znode
, *zprev
;
62 time64_t time
= ktime_get_seconds();
64 ubifs_assert(c
, mutex_is_locked(&c
->umount_mutex
));
65 ubifs_assert(c
, mutex_is_locked(&c
->tnc_mutex
));
67 if (!c
->zroot
.znode
|| atomic_long_read(&c
->clean_zn_cnt
) == 0)
71 * Traverse the TNC tree in levelorder manner, so that it is possible
72 * to destroy large sub-trees. Indeed, if a znode is old, then all its
73 * children are older or of the same age.
75 * Note, we are holding 'c->tnc_mutex', so we do not have to lock the
76 * 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is
77 * changed only when the 'c->tnc_mutex' is held.
80 znode
= ubifs_tnc_levelorder_next(c
, c
->zroot
.znode
, NULL
);
81 while (znode
&& total_freed
< nr
&&
82 atomic_long_read(&c
->clean_zn_cnt
) > 0) {
86 * If the znode is clean, but it is in the 'c->cnext' list, this
87 * means that this znode has just been written to flash as a
88 * part of commit and was marked clean. They will be removed
89 * from the list at end commit. We cannot change the list,
90 * because it is not protected by any mutex (design decision to
91 * make commit really independent and parallel to main I/O). So
92 * we just skip these znodes.
94 * Note, the 'clean_zn_cnt' counters are not updated until
95 * after the commit, so the UBIFS shrinker does not report
96 * the znodes which are in the 'c->cnext' list as freeable.
98 * Also note, if the root of a sub-tree is not in 'c->cnext',
99 * then the whole sub-tree is not in 'c->cnext' as well, so it
100 * is safe to dump whole sub-tree.
105 * Very soon these znodes will be removed from the list
106 * and become freeable.
109 } else if (!ubifs_zn_dirty(znode
) &&
110 abs(time
- znode
->time
) >= age
) {
112 znode
->parent
->zbranch
[znode
->iip
].znode
= NULL
;
114 c
->zroot
.znode
= NULL
;
116 freed
= ubifs_destroy_tnc_subtree(c
, znode
);
117 atomic_long_sub(freed
, &ubifs_clean_zn_cnt
);
118 atomic_long_sub(freed
, &c
->clean_zn_cnt
);
119 total_freed
+= freed
;
123 if (unlikely(!c
->zroot
.znode
))
127 znode
= ubifs_tnc_levelorder_next(c
, c
->zroot
.znode
, znode
);
135 * shrink_tnc_trees - shrink UBIFS TNC trees.
136 * @nr: number of znodes to free
137 * @age: the age of znodes to free
138 * @contention: if any contention, this is set to %1
140 * This function walks the list of mounted UBIFS file-systems and frees clean
141 * znodes which are older than @age, until at least @nr znodes are freed.
142 * Returns the number of freed znodes.
144 static int shrink_tnc_trees(int nr
, int age
, int *contention
)
146 struct ubifs_info
*c
;
151 spin_lock(&ubifs_infos_lock
);
153 run_no
= ++shrinker_run_no
;
154 } while (run_no
== 0);
155 /* Iterate over all mounted UBIFS file-systems and try to shrink them */
156 p
= ubifs_infos
.next
;
157 while (p
!= &ubifs_infos
) {
158 c
= list_entry(p
, struct ubifs_info
, infos_list
);
160 * We move the ones we do to the end of the list, so we stop
161 * when we see one we have already done.
163 if (c
->shrinker_run_no
== run_no
)
165 if (!mutex_trylock(&c
->umount_mutex
)) {
166 /* Some un-mount is in progress, try next FS */
172 * We're holding 'c->umount_mutex', so the file-system won't go
175 if (!mutex_trylock(&c
->tnc_mutex
)) {
176 mutex_unlock(&c
->umount_mutex
);
181 spin_unlock(&ubifs_infos_lock
);
183 * OK, now we have TNC locked, the file-system cannot go away -
184 * it is safe to reap the cache.
186 c
->shrinker_run_no
= run_no
;
187 freed
+= shrink_tnc(c
, nr
, age
, contention
);
188 mutex_unlock(&c
->tnc_mutex
);
189 spin_lock(&ubifs_infos_lock
);
190 /* Get the next list element before we move this one */
193 * Move this one to the end of the list to provide some
196 list_move_tail(&c
->infos_list
, &ubifs_infos
);
197 mutex_unlock(&c
->umount_mutex
);
201 spin_unlock(&ubifs_infos_lock
);
206 * kick_a_thread - kick a background thread to start commit.
208 * This function kicks a background thread to start background commit. Returns
209 * %-1 if a thread was kicked or there is another reason to assume the memory
210 * will soon be freed or become freeable. If there are no dirty znodes, returns
213 static int kick_a_thread(void)
216 struct ubifs_info
*c
;
219 * Iterate over all mounted UBIFS file-systems and find out if there is
220 * already an ongoing commit operation there. If no, then iterate for
221 * the second time and initiate background commit.
223 spin_lock(&ubifs_infos_lock
);
224 for (i
= 0; i
< 2; i
++) {
225 list_for_each_entry(c
, &ubifs_infos
, infos_list
) {
228 if (!mutex_trylock(&c
->umount_mutex
)) {
230 * Some un-mount is in progress, it will
231 * certainly free memory, so just return.
233 spin_unlock(&ubifs_infos_lock
);
237 dirty_zn_cnt
= atomic_long_read(&c
->dirty_zn_cnt
);
239 if (!dirty_zn_cnt
|| c
->cmt_state
== COMMIT_BROKEN
||
240 c
->ro_mount
|| c
->ro_error
) {
241 mutex_unlock(&c
->umount_mutex
);
245 if (c
->cmt_state
!= COMMIT_RESTING
) {
246 spin_unlock(&ubifs_infos_lock
);
247 mutex_unlock(&c
->umount_mutex
);
252 list_move_tail(&c
->infos_list
, &ubifs_infos
);
253 spin_unlock(&ubifs_infos_lock
);
255 ubifs_request_bg_commit(c
);
256 mutex_unlock(&c
->umount_mutex
);
259 mutex_unlock(&c
->umount_mutex
);
262 spin_unlock(&ubifs_infos_lock
);
267 unsigned long ubifs_shrink_count(struct shrinker
*shrink
,
268 struct shrink_control
*sc
)
270 long clean_zn_cnt
= atomic_long_read(&ubifs_clean_zn_cnt
);
273 * Due to the way UBIFS updates the clean znode counter it may
274 * temporarily be negative.
276 return clean_zn_cnt
>= 0 ? clean_zn_cnt
: 1;
279 unsigned long ubifs_shrink_scan(struct shrinker
*shrink
,
280 struct shrink_control
*sc
)
282 unsigned long nr
= sc
->nr_to_scan
;
285 long clean_zn_cnt
= atomic_long_read(&ubifs_clean_zn_cnt
);
289 * No clean znodes, nothing to reap. All we can do in this case
290 * is to kick background threads to start commit, which will
291 * probably make clean znodes which, in turn, will be freeable.
292 * And we return -1 which means will make VM call us again
295 dbg_tnc("no clean znodes, kick a thread");
296 return kick_a_thread();
299 freed
= shrink_tnc_trees(nr
, OLD_ZNODE_AGE
, &contention
);
303 dbg_tnc("not enough old znodes, try to free young ones");
304 freed
+= shrink_tnc_trees(nr
- freed
, YOUNG_ZNODE_AGE
, &contention
);
308 dbg_tnc("not enough young znodes, free all");
309 freed
+= shrink_tnc_trees(nr
- freed
, 0, &contention
);
311 if (!freed
&& contention
) {
312 dbg_tnc("freed nothing, but contention");
317 dbg_tnc("%lu znodes were freed, requested %lu", freed
, nr
);