1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
4 * Authors: David Chinner and Glauber Costa
6 * Generic LRU infrastructure
8 #include <linux/kernel.h>
9 #include <linux/module.h>
11 #include <linux/list_lru.h>
12 #include <linux/slab.h>
13 #include <linux/mutex.h>
14 #include <linux/memcontrol.h>
17 #ifdef CONFIG_MEMCG_KMEM
18 static LIST_HEAD(list_lrus
);
19 static DEFINE_MUTEX(list_lrus_mutex
);
21 static void list_lru_register(struct list_lru
*lru
)
23 mutex_lock(&list_lrus_mutex
);
24 list_add(&lru
->list
, &list_lrus
);
25 mutex_unlock(&list_lrus_mutex
);
28 static void list_lru_unregister(struct list_lru
*lru
)
30 mutex_lock(&list_lrus_mutex
);
32 mutex_unlock(&list_lrus_mutex
);
35 static int lru_shrinker_id(struct list_lru
*lru
)
37 return lru
->shrinker_id
;
40 static inline bool list_lru_memcg_aware(struct list_lru
*lru
)
42 return lru
->memcg_aware
;
45 static inline struct list_lru_one
*
46 list_lru_from_memcg_idx(struct list_lru_node
*nlru
, int idx
)
48 struct list_lru_memcg
*memcg_lrus
;
50 * Either lock or RCU protects the array of per cgroup lists
51 * from relocation (see memcg_update_list_lru_node).
53 memcg_lrus
= rcu_dereference_check(nlru
->memcg_lrus
,
54 lockdep_is_held(&nlru
->lock
));
55 if (memcg_lrus
&& idx
>= 0)
56 return memcg_lrus
->lru
[idx
];
60 static __always_inline
struct mem_cgroup
*mem_cgroup_from_kmem(void *ptr
)
64 if (!memcg_kmem_enabled())
66 page
= virt_to_head_page(ptr
);
67 return memcg_from_slab_page(page
);
70 static inline struct list_lru_one
*
71 list_lru_from_kmem(struct list_lru_node
*nlru
, void *ptr
,
72 struct mem_cgroup
**memcg_ptr
)
74 struct list_lru_one
*l
= &nlru
->lru
;
75 struct mem_cgroup
*memcg
= NULL
;
77 if (!nlru
->memcg_lrus
)
80 memcg
= mem_cgroup_from_kmem(ptr
);
84 l
= list_lru_from_memcg_idx(nlru
, memcg_cache_id(memcg
));
91 static void list_lru_register(struct list_lru
*lru
)
95 static void list_lru_unregister(struct list_lru
*lru
)
99 static int lru_shrinker_id(struct list_lru
*lru
)
104 static inline bool list_lru_memcg_aware(struct list_lru
*lru
)
109 static inline struct list_lru_one
*
110 list_lru_from_memcg_idx(struct list_lru_node
*nlru
, int idx
)
115 static inline struct list_lru_one
*
116 list_lru_from_kmem(struct list_lru_node
*nlru
, void *ptr
,
117 struct mem_cgroup
**memcg_ptr
)
123 #endif /* CONFIG_MEMCG_KMEM */
125 bool list_lru_add(struct list_lru
*lru
, struct list_head
*item
)
127 int nid
= page_to_nid(virt_to_page(item
));
128 struct list_lru_node
*nlru
= &lru
->node
[nid
];
129 struct mem_cgroup
*memcg
;
130 struct list_lru_one
*l
;
132 spin_lock(&nlru
->lock
);
133 if (list_empty(item
)) {
134 l
= list_lru_from_kmem(nlru
, item
, &memcg
);
135 list_add_tail(item
, &l
->list
);
136 /* Set shrinker bit if the first element was added */
138 memcg_set_shrinker_bit(memcg
, nid
,
139 lru_shrinker_id(lru
));
141 spin_unlock(&nlru
->lock
);
144 spin_unlock(&nlru
->lock
);
147 EXPORT_SYMBOL_GPL(list_lru_add
);
149 bool list_lru_del(struct list_lru
*lru
, struct list_head
*item
)
151 int nid
= page_to_nid(virt_to_page(item
));
152 struct list_lru_node
*nlru
= &lru
->node
[nid
];
153 struct list_lru_one
*l
;
155 spin_lock(&nlru
->lock
);
156 if (!list_empty(item
)) {
157 l
= list_lru_from_kmem(nlru
, item
, NULL
);
161 spin_unlock(&nlru
->lock
);
164 spin_unlock(&nlru
->lock
);
167 EXPORT_SYMBOL_GPL(list_lru_del
);
169 void list_lru_isolate(struct list_lru_one
*list
, struct list_head
*item
)
174 EXPORT_SYMBOL_GPL(list_lru_isolate
);
176 void list_lru_isolate_move(struct list_lru_one
*list
, struct list_head
*item
,
177 struct list_head
*head
)
179 list_move(item
, head
);
182 EXPORT_SYMBOL_GPL(list_lru_isolate_move
);
184 unsigned long list_lru_count_one(struct list_lru
*lru
,
185 int nid
, struct mem_cgroup
*memcg
)
187 struct list_lru_node
*nlru
= &lru
->node
[nid
];
188 struct list_lru_one
*l
;
192 l
= list_lru_from_memcg_idx(nlru
, memcg_cache_id(memcg
));
198 EXPORT_SYMBOL_GPL(list_lru_count_one
);
200 unsigned long list_lru_count_node(struct list_lru
*lru
, int nid
)
202 struct list_lru_node
*nlru
;
204 nlru
= &lru
->node
[nid
];
205 return nlru
->nr_items
;
207 EXPORT_SYMBOL_GPL(list_lru_count_node
);
210 __list_lru_walk_one(struct list_lru_node
*nlru
, int memcg_idx
,
211 list_lru_walk_cb isolate
, void *cb_arg
,
212 unsigned long *nr_to_walk
)
215 struct list_lru_one
*l
;
216 struct list_head
*item
, *n
;
217 unsigned long isolated
= 0;
219 l
= list_lru_from_memcg_idx(nlru
, memcg_idx
);
221 list_for_each_safe(item
, n
, &l
->list
) {
225 * decrement nr_to_walk first so that we don't livelock if we
226 * get stuck on large numbesr of LRU_RETRY items
232 ret
= isolate(item
, l
, &nlru
->lock
, cb_arg
);
234 case LRU_REMOVED_RETRY
:
235 assert_spin_locked(&nlru
->lock
);
241 * If the lru lock has been dropped, our list
242 * traversal is now invalid and so we have to
243 * restart from scratch.
245 if (ret
== LRU_REMOVED_RETRY
)
249 list_move_tail(item
, &l
->list
);
255 * The lru lock has been dropped, our list traversal is
256 * now invalid and so we have to restart from scratch.
258 assert_spin_locked(&nlru
->lock
);
268 list_lru_walk_one(struct list_lru
*lru
, int nid
, struct mem_cgroup
*memcg
,
269 list_lru_walk_cb isolate
, void *cb_arg
,
270 unsigned long *nr_to_walk
)
272 struct list_lru_node
*nlru
= &lru
->node
[nid
];
275 spin_lock(&nlru
->lock
);
276 ret
= __list_lru_walk_one(nlru
, memcg_cache_id(memcg
), isolate
, cb_arg
,
278 spin_unlock(&nlru
->lock
);
281 EXPORT_SYMBOL_GPL(list_lru_walk_one
);
284 list_lru_walk_one_irq(struct list_lru
*lru
, int nid
, struct mem_cgroup
*memcg
,
285 list_lru_walk_cb isolate
, void *cb_arg
,
286 unsigned long *nr_to_walk
)
288 struct list_lru_node
*nlru
= &lru
->node
[nid
];
291 spin_lock_irq(&nlru
->lock
);
292 ret
= __list_lru_walk_one(nlru
, memcg_cache_id(memcg
), isolate
, cb_arg
,
294 spin_unlock_irq(&nlru
->lock
);
298 unsigned long list_lru_walk_node(struct list_lru
*lru
, int nid
,
299 list_lru_walk_cb isolate
, void *cb_arg
,
300 unsigned long *nr_to_walk
)
305 isolated
+= list_lru_walk_one(lru
, nid
, NULL
, isolate
, cb_arg
,
307 if (*nr_to_walk
> 0 && list_lru_memcg_aware(lru
)) {
308 for_each_memcg_cache_index(memcg_idx
) {
309 struct list_lru_node
*nlru
= &lru
->node
[nid
];
311 spin_lock(&nlru
->lock
);
312 isolated
+= __list_lru_walk_one(nlru
, memcg_idx
,
315 spin_unlock(&nlru
->lock
);
317 if (*nr_to_walk
<= 0)
323 EXPORT_SYMBOL_GPL(list_lru_walk_node
);
325 static void init_one_lru(struct list_lru_one
*l
)
327 INIT_LIST_HEAD(&l
->list
);
331 #ifdef CONFIG_MEMCG_KMEM
332 static void __memcg_destroy_list_lru_node(struct list_lru_memcg
*memcg_lrus
,
337 for (i
= begin
; i
< end
; i
++)
338 kfree(memcg_lrus
->lru
[i
]);
341 static int __memcg_init_list_lru_node(struct list_lru_memcg
*memcg_lrus
,
346 for (i
= begin
; i
< end
; i
++) {
347 struct list_lru_one
*l
;
349 l
= kmalloc(sizeof(struct list_lru_one
), GFP_KERNEL
);
354 memcg_lrus
->lru
[i
] = l
;
358 __memcg_destroy_list_lru_node(memcg_lrus
, begin
, i
);
362 static int memcg_init_list_lru_node(struct list_lru_node
*nlru
)
364 struct list_lru_memcg
*memcg_lrus
;
365 int size
= memcg_nr_cache_ids
;
367 memcg_lrus
= kvmalloc(sizeof(*memcg_lrus
) +
368 size
* sizeof(void *), GFP_KERNEL
);
372 if (__memcg_init_list_lru_node(memcg_lrus
, 0, size
)) {
376 RCU_INIT_POINTER(nlru
->memcg_lrus
, memcg_lrus
);
381 static void memcg_destroy_list_lru_node(struct list_lru_node
*nlru
)
383 struct list_lru_memcg
*memcg_lrus
;
385 * This is called when shrinker has already been unregistered,
386 * and nobody can use it. So, there is no need to use kvfree_rcu().
388 memcg_lrus
= rcu_dereference_protected(nlru
->memcg_lrus
, true);
389 __memcg_destroy_list_lru_node(memcg_lrus
, 0, memcg_nr_cache_ids
);
393 static void kvfree_rcu(struct rcu_head
*head
)
395 struct list_lru_memcg
*mlru
;
397 mlru
= container_of(head
, struct list_lru_memcg
, rcu
);
401 static int memcg_update_list_lru_node(struct list_lru_node
*nlru
,
402 int old_size
, int new_size
)
404 struct list_lru_memcg
*old
, *new;
406 BUG_ON(old_size
> new_size
);
408 old
= rcu_dereference_protected(nlru
->memcg_lrus
,
409 lockdep_is_held(&list_lrus_mutex
));
410 new = kvmalloc(sizeof(*new) + new_size
* sizeof(void *), GFP_KERNEL
);
414 if (__memcg_init_list_lru_node(new, old_size
, new_size
)) {
419 memcpy(&new->lru
, &old
->lru
, old_size
* sizeof(void *));
422 * The locking below allows readers that hold nlru->lock avoid taking
423 * rcu_read_lock (see list_lru_from_memcg_idx).
425 * Since list_lru_{add,del} may be called under an IRQ-safe lock,
426 * we have to use IRQ-safe primitives here to avoid deadlock.
428 spin_lock_irq(&nlru
->lock
);
429 rcu_assign_pointer(nlru
->memcg_lrus
, new);
430 spin_unlock_irq(&nlru
->lock
);
432 call_rcu(&old
->rcu
, kvfree_rcu
);
436 static void memcg_cancel_update_list_lru_node(struct list_lru_node
*nlru
,
437 int old_size
, int new_size
)
439 struct list_lru_memcg
*memcg_lrus
;
441 memcg_lrus
= rcu_dereference_protected(nlru
->memcg_lrus
,
442 lockdep_is_held(&list_lrus_mutex
));
443 /* do not bother shrinking the array back to the old size, because we
444 * cannot handle allocation failures here */
445 __memcg_destroy_list_lru_node(memcg_lrus
, old_size
, new_size
);
448 static int memcg_init_list_lru(struct list_lru
*lru
, bool memcg_aware
)
452 lru
->memcg_aware
= memcg_aware
;
458 if (memcg_init_list_lru_node(&lru
->node
[i
]))
463 for (i
= i
- 1; i
>= 0; i
--) {
464 if (!lru
->node
[i
].memcg_lrus
)
466 memcg_destroy_list_lru_node(&lru
->node
[i
]);
471 static void memcg_destroy_list_lru(struct list_lru
*lru
)
475 if (!list_lru_memcg_aware(lru
))
479 memcg_destroy_list_lru_node(&lru
->node
[i
]);
482 static int memcg_update_list_lru(struct list_lru
*lru
,
483 int old_size
, int new_size
)
487 if (!list_lru_memcg_aware(lru
))
491 if (memcg_update_list_lru_node(&lru
->node
[i
],
497 for (i
= i
- 1; i
>= 0; i
--) {
498 if (!lru
->node
[i
].memcg_lrus
)
501 memcg_cancel_update_list_lru_node(&lru
->node
[i
],
507 static void memcg_cancel_update_list_lru(struct list_lru
*lru
,
508 int old_size
, int new_size
)
512 if (!list_lru_memcg_aware(lru
))
516 memcg_cancel_update_list_lru_node(&lru
->node
[i
],
520 int memcg_update_all_list_lrus(int new_size
)
523 struct list_lru
*lru
;
524 int old_size
= memcg_nr_cache_ids
;
526 mutex_lock(&list_lrus_mutex
);
527 list_for_each_entry(lru
, &list_lrus
, list
) {
528 ret
= memcg_update_list_lru(lru
, old_size
, new_size
);
533 mutex_unlock(&list_lrus_mutex
);
536 list_for_each_entry_continue_reverse(lru
, &list_lrus
, list
)
537 memcg_cancel_update_list_lru(lru
, old_size
, new_size
);
541 static void memcg_drain_list_lru_node(struct list_lru
*lru
, int nid
,
542 int src_idx
, struct mem_cgroup
*dst_memcg
)
544 struct list_lru_node
*nlru
= &lru
->node
[nid
];
545 int dst_idx
= dst_memcg
->kmemcg_id
;
546 struct list_lru_one
*src
, *dst
;
550 * Since list_lru_{add,del} may be called under an IRQ-safe lock,
551 * we have to use IRQ-safe primitives here to avoid deadlock.
553 spin_lock_irq(&nlru
->lock
);
555 src
= list_lru_from_memcg_idx(nlru
, src_idx
);
556 dst
= list_lru_from_memcg_idx(nlru
, dst_idx
);
558 list_splice_init(&src
->list
, &dst
->list
);
559 set
= (!dst
->nr_items
&& src
->nr_items
);
560 dst
->nr_items
+= src
->nr_items
;
562 memcg_set_shrinker_bit(dst_memcg
, nid
, lru_shrinker_id(lru
));
565 spin_unlock_irq(&nlru
->lock
);
568 static void memcg_drain_list_lru(struct list_lru
*lru
,
569 int src_idx
, struct mem_cgroup
*dst_memcg
)
573 if (!list_lru_memcg_aware(lru
))
577 memcg_drain_list_lru_node(lru
, i
, src_idx
, dst_memcg
);
580 void memcg_drain_all_list_lrus(int src_idx
, struct mem_cgroup
*dst_memcg
)
582 struct list_lru
*lru
;
584 mutex_lock(&list_lrus_mutex
);
585 list_for_each_entry(lru
, &list_lrus
, list
)
586 memcg_drain_list_lru(lru
, src_idx
, dst_memcg
);
587 mutex_unlock(&list_lrus_mutex
);
590 static int memcg_init_list_lru(struct list_lru
*lru
, bool memcg_aware
)
595 static void memcg_destroy_list_lru(struct list_lru
*lru
)
598 #endif /* CONFIG_MEMCG_KMEM */
600 int __list_lru_init(struct list_lru
*lru
, bool memcg_aware
,
601 struct lock_class_key
*key
, struct shrinker
*shrinker
)
606 #ifdef CONFIG_MEMCG_KMEM
608 lru
->shrinker_id
= shrinker
->id
;
610 lru
->shrinker_id
= -1;
612 memcg_get_cache_ids();
614 lru
->node
= kcalloc(nr_node_ids
, sizeof(*lru
->node
), GFP_KERNEL
);
619 spin_lock_init(&lru
->node
[i
].lock
);
621 lockdep_set_class(&lru
->node
[i
].lock
, key
);
622 init_one_lru(&lru
->node
[i
].lru
);
625 err
= memcg_init_list_lru(lru
, memcg_aware
);
628 /* Do this so a list_lru_destroy() doesn't crash: */
633 list_lru_register(lru
);
635 memcg_put_cache_ids();
638 EXPORT_SYMBOL_GPL(__list_lru_init
);
640 void list_lru_destroy(struct list_lru
*lru
)
642 /* Already destroyed or not yet initialized? */
646 memcg_get_cache_ids();
648 list_lru_unregister(lru
);
650 memcg_destroy_list_lru(lru
);
654 #ifdef CONFIG_MEMCG_KMEM
655 lru
->shrinker_id
= -1;
657 memcg_put_cache_ids();
659 EXPORT_SYMBOL_GPL(list_lru_destroy
);