1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
4 * Authors: David Chinner and Glauber Costa
6 * Generic LRU infrastructure
8 #include <linux/kernel.h>
9 #include <linux/module.h>
11 #include <linux/list_lru.h>
12 #include <linux/slab.h>
13 #include <linux/mutex.h>
14 #include <linux/memcontrol.h>
19 static LIST_HEAD(memcg_list_lrus
);
20 static DEFINE_MUTEX(list_lrus_mutex
);
22 static inline bool list_lru_memcg_aware(struct list_lru
*lru
)
24 return lru
->memcg_aware
;
27 static void list_lru_register(struct list_lru
*lru
)
29 if (!list_lru_memcg_aware(lru
))
32 mutex_lock(&list_lrus_mutex
);
33 list_add(&lru
->list
, &memcg_list_lrus
);
34 mutex_unlock(&list_lrus_mutex
);
37 static void list_lru_unregister(struct list_lru
*lru
)
39 if (!list_lru_memcg_aware(lru
))
42 mutex_lock(&list_lrus_mutex
);
44 mutex_unlock(&list_lrus_mutex
);
47 static int lru_shrinker_id(struct list_lru
*lru
)
49 return lru
->shrinker_id
;
52 static inline struct list_lru_one
*
53 list_lru_from_memcg_idx(struct list_lru
*lru
, int nid
, int idx
)
55 if (list_lru_memcg_aware(lru
) && idx
>= 0) {
56 struct list_lru_memcg
*mlru
= xa_load(&lru
->xa
, idx
);
58 return mlru
? &mlru
->node
[nid
] : NULL
;
60 return &lru
->node
[nid
].lru
;
63 static inline struct list_lru_one
*
64 lock_list_lru_of_memcg(struct list_lru
*lru
, int nid
, struct mem_cgroup
*memcg
,
65 bool irq
, bool skip_empty
)
67 struct list_lru_one
*l
;
72 l
= list_lru_from_memcg_idx(lru
, nid
, memcg_kmem_id(memcg
));
75 spin_lock_irq(&l
->lock
);
78 nr_items
= READ_ONCE(l
->nr_items
);
79 if (likely(nr_items
!= LONG_MIN
)) {
80 WARN_ON(nr_items
< 0);
85 spin_unlock_irq(&l
->lock
);
87 spin_unlock(&l
->lock
);
90 * Caller may simply bail out if raced with reparenting or
91 * may iterate through the list_lru and expect empty slots.
97 VM_WARN_ON(!css_is_dying(&memcg
->css
));
98 memcg
= parent_mem_cgroup(memcg
);
102 static inline void unlock_list_lru(struct list_lru_one
*l
, bool irq_off
)
105 spin_unlock_irq(&l
->lock
);
107 spin_unlock(&l
->lock
);
110 static void list_lru_register(struct list_lru
*lru
)
114 static void list_lru_unregister(struct list_lru
*lru
)
118 static int lru_shrinker_id(struct list_lru
*lru
)
123 static inline bool list_lru_memcg_aware(struct list_lru
*lru
)
128 static inline struct list_lru_one
*
129 list_lru_from_memcg_idx(struct list_lru
*lru
, int nid
, int idx
)
131 return &lru
->node
[nid
].lru
;
134 static inline struct list_lru_one
*
135 lock_list_lru_of_memcg(struct list_lru
*lru
, int nid
, struct mem_cgroup
*memcg
,
136 bool irq
, bool skip_empty
)
138 struct list_lru_one
*l
= &lru
->node
[nid
].lru
;
141 spin_lock_irq(&l
->lock
);
148 static inline void unlock_list_lru(struct list_lru_one
*l
, bool irq_off
)
151 spin_unlock_irq(&l
->lock
);
153 spin_unlock(&l
->lock
);
155 #endif /* CONFIG_MEMCG */
157 /* The caller must ensure the memcg lifetime. */
158 bool list_lru_add(struct list_lru
*lru
, struct list_head
*item
, int nid
,
159 struct mem_cgroup
*memcg
)
161 struct list_lru_node
*nlru
= &lru
->node
[nid
];
162 struct list_lru_one
*l
;
164 l
= lock_list_lru_of_memcg(lru
, nid
, memcg
, false, false);
167 if (list_empty(item
)) {
168 list_add_tail(item
, &l
->list
);
169 /* Set shrinker bit if the first element was added */
171 set_shrinker_bit(memcg
, nid
, lru_shrinker_id(lru
));
172 unlock_list_lru(l
, false);
173 atomic_long_inc(&nlru
->nr_items
);
176 unlock_list_lru(l
, false);
180 bool list_lru_add_obj(struct list_lru
*lru
, struct list_head
*item
)
183 int nid
= page_to_nid(virt_to_page(item
));
185 if (list_lru_memcg_aware(lru
)) {
187 ret
= list_lru_add(lru
, item
, nid
, mem_cgroup_from_slab_obj(item
));
190 ret
= list_lru_add(lru
, item
, nid
, NULL
);
195 EXPORT_SYMBOL_GPL(list_lru_add_obj
);
197 /* The caller must ensure the memcg lifetime. */
198 bool list_lru_del(struct list_lru
*lru
, struct list_head
*item
, int nid
,
199 struct mem_cgroup
*memcg
)
201 struct list_lru_node
*nlru
= &lru
->node
[nid
];
202 struct list_lru_one
*l
;
203 l
= lock_list_lru_of_memcg(lru
, nid
, memcg
, false, false);
206 if (!list_empty(item
)) {
209 unlock_list_lru(l
, false);
210 atomic_long_dec(&nlru
->nr_items
);
213 unlock_list_lru(l
, false);
217 bool list_lru_del_obj(struct list_lru
*lru
, struct list_head
*item
)
220 int nid
= page_to_nid(virt_to_page(item
));
222 if (list_lru_memcg_aware(lru
)) {
224 ret
= list_lru_del(lru
, item
, nid
, mem_cgroup_from_slab_obj(item
));
227 ret
= list_lru_del(lru
, item
, nid
, NULL
);
232 EXPORT_SYMBOL_GPL(list_lru_del_obj
);
234 void list_lru_isolate(struct list_lru_one
*list
, struct list_head
*item
)
239 EXPORT_SYMBOL_GPL(list_lru_isolate
);
241 void list_lru_isolate_move(struct list_lru_one
*list
, struct list_head
*item
,
242 struct list_head
*head
)
244 list_move(item
, head
);
247 EXPORT_SYMBOL_GPL(list_lru_isolate_move
);
249 unsigned long list_lru_count_one(struct list_lru
*lru
,
250 int nid
, struct mem_cgroup
*memcg
)
252 struct list_lru_one
*l
;
256 l
= list_lru_from_memcg_idx(lru
, nid
, memcg_kmem_id(memcg
));
257 count
= l
? READ_ONCE(l
->nr_items
) : 0;
260 if (unlikely(count
< 0))
265 EXPORT_SYMBOL_GPL(list_lru_count_one
);
267 unsigned long list_lru_count_node(struct list_lru
*lru
, int nid
)
269 struct list_lru_node
*nlru
;
271 nlru
= &lru
->node
[nid
];
272 return atomic_long_read(&nlru
->nr_items
);
274 EXPORT_SYMBOL_GPL(list_lru_count_node
);
277 __list_lru_walk_one(struct list_lru
*lru
, int nid
, struct mem_cgroup
*memcg
,
278 list_lru_walk_cb isolate
, void *cb_arg
,
279 unsigned long *nr_to_walk
, bool irq_off
)
281 struct list_lru_node
*nlru
= &lru
->node
[nid
];
282 struct list_lru_one
*l
= NULL
;
283 struct list_head
*item
, *n
;
284 unsigned long isolated
= 0;
287 l
= lock_list_lru_of_memcg(lru
, nid
, memcg
, irq_off
, true);
290 list_for_each_safe(item
, n
, &l
->list
) {
294 * decrement nr_to_walk first so that we don't livelock if we
295 * get stuck on large numbers of LRU_RETRY items
301 ret
= isolate(item
, l
, cb_arg
);
304 * LRU_RETRY, LRU_REMOVED_RETRY and LRU_STOP will drop the lru
305 * lock. List traversal will have to restart from scratch.
309 case LRU_REMOVED_RETRY
:
313 atomic_long_dec(&nlru
->nr_items
);
314 if (ret
== LRU_REMOVED_RETRY
)
318 list_move_tail(item
, &l
->list
);
328 unlock_list_lru(l
, irq_off
);
334 list_lru_walk_one(struct list_lru
*lru
, int nid
, struct mem_cgroup
*memcg
,
335 list_lru_walk_cb isolate
, void *cb_arg
,
336 unsigned long *nr_to_walk
)
338 return __list_lru_walk_one(lru
, nid
, memcg
, isolate
,
339 cb_arg
, nr_to_walk
, false);
341 EXPORT_SYMBOL_GPL(list_lru_walk_one
);
344 list_lru_walk_one_irq(struct list_lru
*lru
, int nid
, struct mem_cgroup
*memcg
,
345 list_lru_walk_cb isolate
, void *cb_arg
,
346 unsigned long *nr_to_walk
)
348 return __list_lru_walk_one(lru
, nid
, memcg
, isolate
,
349 cb_arg
, nr_to_walk
, true);
352 unsigned long list_lru_walk_node(struct list_lru
*lru
, int nid
,
353 list_lru_walk_cb isolate
, void *cb_arg
,
354 unsigned long *nr_to_walk
)
358 isolated
+= list_lru_walk_one(lru
, nid
, NULL
, isolate
, cb_arg
,
362 if (*nr_to_walk
> 0 && list_lru_memcg_aware(lru
)) {
363 struct list_lru_memcg
*mlru
;
364 struct mem_cgroup
*memcg
;
367 xa_for_each(&lru
->xa
, index
, mlru
) {
369 memcg
= mem_cgroup_from_id(index
);
370 if (!mem_cgroup_tryget(memcg
)) {
375 isolated
+= __list_lru_walk_one(lru
, nid
, memcg
,
378 mem_cgroup_put(memcg
);
380 if (*nr_to_walk
<= 0)
388 EXPORT_SYMBOL_GPL(list_lru_walk_node
);
390 static void init_one_lru(struct list_lru
*lru
, struct list_lru_one
*l
)
392 INIT_LIST_HEAD(&l
->list
);
393 spin_lock_init(&l
->lock
);
395 #ifdef CONFIG_LOCKDEP
397 lockdep_set_class(&l
->lock
, lru
->key
);
402 static struct list_lru_memcg
*memcg_init_list_lru_one(struct list_lru
*lru
, gfp_t gfp
)
405 struct list_lru_memcg
*mlru
;
407 mlru
= kmalloc(struct_size(mlru
, node
, nr_node_ids
), gfp
);
412 init_one_lru(lru
, &mlru
->node
[nid
]);
417 static inline void memcg_init_list_lru(struct list_lru
*lru
, bool memcg_aware
)
420 xa_init_flags(&lru
->xa
, XA_FLAGS_LOCK_IRQ
);
421 lru
->memcg_aware
= memcg_aware
;
424 static void memcg_destroy_list_lru(struct list_lru
*lru
)
426 XA_STATE(xas
, &lru
->xa
, 0);
427 struct list_lru_memcg
*mlru
;
429 if (!list_lru_memcg_aware(lru
))
433 xas_for_each(&xas
, mlru
, ULONG_MAX
) {
435 xas_store(&xas
, NULL
);
437 xas_unlock_irq(&xas
);
440 static void memcg_reparent_list_lru_one(struct list_lru
*lru
, int nid
,
441 struct list_lru_one
*src
,
442 struct mem_cgroup
*dst_memcg
)
444 int dst_idx
= dst_memcg
->kmemcg_id
;
445 struct list_lru_one
*dst
;
447 spin_lock_irq(&src
->lock
);
448 dst
= list_lru_from_memcg_idx(lru
, nid
, dst_idx
);
449 spin_lock_nested(&dst
->lock
, SINGLE_DEPTH_NESTING
);
451 list_splice_init(&src
->list
, &dst
->list
);
453 dst
->nr_items
+= src
->nr_items
;
454 set_shrinker_bit(dst_memcg
, nid
, lru_shrinker_id(lru
));
456 /* Mark the list_lru_one dead */
457 src
->nr_items
= LONG_MIN
;
459 spin_unlock(&dst
->lock
);
460 spin_unlock_irq(&src
->lock
);
463 void memcg_reparent_list_lrus(struct mem_cgroup
*memcg
, struct mem_cgroup
*parent
)
465 struct list_lru
*lru
;
468 mutex_lock(&list_lrus_mutex
);
469 list_for_each_entry(lru
, &memcg_list_lrus
, list
) {
470 struct list_lru_memcg
*mlru
;
471 XA_STATE(xas
, &lru
->xa
, memcg
->kmemcg_id
);
474 * Lock the Xarray to ensure no on going list_lru_memcg
475 * allocation and further allocation will see css_is_dying().
478 mlru
= xas_store(&xas
, NULL
);
479 xas_unlock_irq(&xas
);
484 * With Xarray value set to NULL, holding the lru lock below
485 * prevents list_lru_{add,del,isolate} from touching the lru,
489 memcg_reparent_list_lru_one(lru
, i
, &mlru
->node
[i
], parent
);
492 * Here all list_lrus corresponding to the cgroup are guaranteed
493 * to remain empty, we can safely free this lru, any further
494 * memcg_list_lru_alloc() call will simply bail out.
496 kvfree_rcu(mlru
, rcu
);
498 mutex_unlock(&list_lrus_mutex
);
501 static inline bool memcg_list_lru_allocated(struct mem_cgroup
*memcg
,
502 struct list_lru
*lru
)
504 int idx
= memcg
->kmemcg_id
;
506 return idx
< 0 || xa_load(&lru
->xa
, idx
);
509 int memcg_list_lru_alloc(struct mem_cgroup
*memcg
, struct list_lru
*lru
,
513 struct list_lru_memcg
*mlru
;
514 struct mem_cgroup
*pos
, *parent
;
515 XA_STATE(xas
, &lru
->xa
, 0);
517 if (!list_lru_memcg_aware(lru
) || memcg_list_lru_allocated(memcg
, lru
))
520 gfp
&= GFP_RECLAIM_MASK
;
522 * Because the list_lru can be reparented to the parent cgroup's
523 * list_lru, we should make sure that this cgroup and all its
524 * ancestors have allocated list_lru_memcg.
528 * Keep finding the farest parent that wasn't populated
529 * until found memcg itself.
532 parent
= parent_mem_cgroup(pos
);
533 while (!memcg_list_lru_allocated(parent
, lru
)) {
535 parent
= parent_mem_cgroup(pos
);
538 mlru
= memcg_init_list_lru_one(lru
, gfp
);
541 xas_set(&xas
, pos
->kmemcg_id
);
543 xas_lock_irqsave(&xas
, flags
);
544 if (!xas_load(&xas
) && !css_is_dying(&pos
->css
)) {
545 xas_store(&xas
, mlru
);
546 if (!xas_error(&xas
))
549 xas_unlock_irqrestore(&xas
, flags
);
550 } while (xas_nomem(&xas
, gfp
));
553 } while (pos
!= memcg
&& !css_is_dying(&pos
->css
));
555 return xas_error(&xas
);
558 static inline void memcg_init_list_lru(struct list_lru
*lru
, bool memcg_aware
)
562 static void memcg_destroy_list_lru(struct list_lru
*lru
)
565 #endif /* CONFIG_MEMCG */
567 int __list_lru_init(struct list_lru
*lru
, bool memcg_aware
, struct shrinker
*shrinker
)
573 lru
->shrinker_id
= shrinker
->id
;
575 lru
->shrinker_id
= -1;
577 if (mem_cgroup_kmem_disabled())
581 lru
->node
= kcalloc(nr_node_ids
, sizeof(*lru
->node
), GFP_KERNEL
);
586 init_one_lru(lru
, &lru
->node
[i
].lru
);
588 memcg_init_list_lru(lru
, memcg_aware
);
589 list_lru_register(lru
);
593 EXPORT_SYMBOL_GPL(__list_lru_init
);
595 void list_lru_destroy(struct list_lru
*lru
)
597 /* Already destroyed or not yet initialized? */
601 list_lru_unregister(lru
);
603 memcg_destroy_list_lru(lru
);
608 lru
->shrinker_id
= -1;
611 EXPORT_SYMBOL_GPL(list_lru_destroy
);