1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
4 * Authors: David Chinner and Glauber Costa
6 * Generic LRU infrastructure
8 #include <linux/kernel.h>
9 #include <linux/module.h>
11 #include <linux/list_lru.h>
12 #include <linux/slab.h>
13 #include <linux/mutex.h>
14 #include <linux/memcontrol.h>
19 static LIST_HEAD(memcg_list_lrus
);
20 static DEFINE_MUTEX(list_lrus_mutex
);
22 static inline bool list_lru_memcg_aware(struct list_lru
*lru
)
24 return lru
->memcg_aware
;
27 static void list_lru_register(struct list_lru
*lru
)
29 if (!list_lru_memcg_aware(lru
))
32 mutex_lock(&list_lrus_mutex
);
33 list_add(&lru
->list
, &memcg_list_lrus
);
34 mutex_unlock(&list_lrus_mutex
);
37 static void list_lru_unregister(struct list_lru
*lru
)
39 if (!list_lru_memcg_aware(lru
))
42 mutex_lock(&list_lrus_mutex
);
44 mutex_unlock(&list_lrus_mutex
);
47 static int lru_shrinker_id(struct list_lru
*lru
)
49 return lru
->shrinker_id
;
52 static inline struct list_lru_one
*
53 list_lru_from_memcg_idx(struct list_lru
*lru
, int nid
, int idx
)
55 if (list_lru_memcg_aware(lru
) && idx
>= 0) {
56 struct list_lru_memcg
*mlru
= xa_load(&lru
->xa
, idx
);
58 return mlru
? &mlru
->node
[nid
] : NULL
;
60 return &lru
->node
[nid
].lru
;
63 static inline struct list_lru_one
*
64 lock_list_lru_of_memcg(struct list_lru
*lru
, int nid
, struct mem_cgroup
*memcg
,
65 bool irq
, bool skip_empty
)
67 struct list_lru_one
*l
;
72 l
= list_lru_from_memcg_idx(lru
, nid
, memcg_kmem_id(memcg
));
75 spin_lock_irq(&l
->lock
);
78 nr_items
= READ_ONCE(l
->nr_items
);
79 if (likely(nr_items
!= LONG_MIN
)) {
84 spin_unlock_irq(&l
->lock
);
86 spin_unlock(&l
->lock
);
89 * Caller may simply bail out if raced with reparenting or
90 * may iterate through the list_lru and expect empty slots.
96 VM_WARN_ON(!css_is_dying(&memcg
->css
));
97 memcg
= parent_mem_cgroup(memcg
);
101 static inline void unlock_list_lru(struct list_lru_one
*l
, bool irq_off
)
104 spin_unlock_irq(&l
->lock
);
106 spin_unlock(&l
->lock
);
109 static void list_lru_register(struct list_lru
*lru
)
113 static void list_lru_unregister(struct list_lru
*lru
)
117 static int lru_shrinker_id(struct list_lru
*lru
)
122 static inline bool list_lru_memcg_aware(struct list_lru
*lru
)
127 static inline struct list_lru_one
*
128 list_lru_from_memcg_idx(struct list_lru
*lru
, int nid
, int idx
)
130 return &lru
->node
[nid
].lru
;
133 static inline struct list_lru_one
*
134 lock_list_lru_of_memcg(struct list_lru
*lru
, int nid
, struct mem_cgroup
*memcg
,
135 bool irq
, bool skip_empty
)
137 struct list_lru_one
*l
= &lru
->node
[nid
].lru
;
140 spin_lock_irq(&l
->lock
);
147 static inline void unlock_list_lru(struct list_lru_one
*l
, bool irq_off
)
150 spin_unlock_irq(&l
->lock
);
152 spin_unlock(&l
->lock
);
154 #endif /* CONFIG_MEMCG */
156 /* The caller must ensure the memcg lifetime. */
157 bool list_lru_add(struct list_lru
*lru
, struct list_head
*item
, int nid
,
158 struct mem_cgroup
*memcg
)
160 struct list_lru_node
*nlru
= &lru
->node
[nid
];
161 struct list_lru_one
*l
;
163 l
= lock_list_lru_of_memcg(lru
, nid
, memcg
, false, false);
166 if (list_empty(item
)) {
167 list_add_tail(item
, &l
->list
);
168 /* Set shrinker bit if the first element was added */
170 set_shrinker_bit(memcg
, nid
, lru_shrinker_id(lru
));
171 unlock_list_lru(l
, false);
172 atomic_long_inc(&nlru
->nr_items
);
175 unlock_list_lru(l
, false);
179 bool list_lru_add_obj(struct list_lru
*lru
, struct list_head
*item
)
182 int nid
= page_to_nid(virt_to_page(item
));
184 if (list_lru_memcg_aware(lru
)) {
186 ret
= list_lru_add(lru
, item
, nid
, mem_cgroup_from_slab_obj(item
));
189 ret
= list_lru_add(lru
, item
, nid
, NULL
);
194 EXPORT_SYMBOL_GPL(list_lru_add_obj
);
196 /* The caller must ensure the memcg lifetime. */
197 bool list_lru_del(struct list_lru
*lru
, struct list_head
*item
, int nid
,
198 struct mem_cgroup
*memcg
)
200 struct list_lru_node
*nlru
= &lru
->node
[nid
];
201 struct list_lru_one
*l
;
202 l
= lock_list_lru_of_memcg(lru
, nid
, memcg
, false, false);
205 if (!list_empty(item
)) {
208 unlock_list_lru(l
, false);
209 atomic_long_dec(&nlru
->nr_items
);
212 unlock_list_lru(l
, false);
216 bool list_lru_del_obj(struct list_lru
*lru
, struct list_head
*item
)
219 int nid
= page_to_nid(virt_to_page(item
));
221 if (list_lru_memcg_aware(lru
)) {
223 ret
= list_lru_del(lru
, item
, nid
, mem_cgroup_from_slab_obj(item
));
226 ret
= list_lru_del(lru
, item
, nid
, NULL
);
231 EXPORT_SYMBOL_GPL(list_lru_del_obj
);
233 void list_lru_isolate(struct list_lru_one
*list
, struct list_head
*item
)
238 EXPORT_SYMBOL_GPL(list_lru_isolate
);
240 void list_lru_isolate_move(struct list_lru_one
*list
, struct list_head
*item
,
241 struct list_head
*head
)
243 list_move(item
, head
);
246 EXPORT_SYMBOL_GPL(list_lru_isolate_move
);
248 unsigned long list_lru_count_one(struct list_lru
*lru
,
249 int nid
, struct mem_cgroup
*memcg
)
251 struct list_lru_one
*l
;
255 l
= list_lru_from_memcg_idx(lru
, nid
, memcg_kmem_id(memcg
));
256 count
= l
? READ_ONCE(l
->nr_items
) : 0;
259 if (unlikely(count
< 0))
264 EXPORT_SYMBOL_GPL(list_lru_count_one
);
266 unsigned long list_lru_count_node(struct list_lru
*lru
, int nid
)
268 struct list_lru_node
*nlru
;
270 nlru
= &lru
->node
[nid
];
271 return atomic_long_read(&nlru
->nr_items
);
273 EXPORT_SYMBOL_GPL(list_lru_count_node
);
276 __list_lru_walk_one(struct list_lru
*lru
, int nid
, struct mem_cgroup
*memcg
,
277 list_lru_walk_cb isolate
, void *cb_arg
,
278 unsigned long *nr_to_walk
, bool irq_off
)
280 struct list_lru_node
*nlru
= &lru
->node
[nid
];
281 struct list_lru_one
*l
= NULL
;
282 struct list_head
*item
, *n
;
283 unsigned long isolated
= 0;
286 l
= lock_list_lru_of_memcg(lru
, nid
, memcg
, irq_off
, true);
289 list_for_each_safe(item
, n
, &l
->list
) {
293 * decrement nr_to_walk first so that we don't livelock if we
294 * get stuck on large numbers of LRU_RETRY items
300 ret
= isolate(item
, l
, cb_arg
);
303 * LRU_RETRY, LRU_REMOVED_RETRY and LRU_STOP will drop the lru
304 * lock. List traversal will have to restart from scratch.
308 case LRU_REMOVED_RETRY
:
312 atomic_long_dec(&nlru
->nr_items
);
313 if (ret
== LRU_REMOVED_RETRY
)
317 list_move_tail(item
, &l
->list
);
327 unlock_list_lru(l
, irq_off
);
333 list_lru_walk_one(struct list_lru
*lru
, int nid
, struct mem_cgroup
*memcg
,
334 list_lru_walk_cb isolate
, void *cb_arg
,
335 unsigned long *nr_to_walk
)
337 return __list_lru_walk_one(lru
, nid
, memcg
, isolate
,
338 cb_arg
, nr_to_walk
, false);
340 EXPORT_SYMBOL_GPL(list_lru_walk_one
);
343 list_lru_walk_one_irq(struct list_lru
*lru
, int nid
, struct mem_cgroup
*memcg
,
344 list_lru_walk_cb isolate
, void *cb_arg
,
345 unsigned long *nr_to_walk
)
347 return __list_lru_walk_one(lru
, nid
, memcg
, isolate
,
348 cb_arg
, nr_to_walk
, true);
351 unsigned long list_lru_walk_node(struct list_lru
*lru
, int nid
,
352 list_lru_walk_cb isolate
, void *cb_arg
,
353 unsigned long *nr_to_walk
)
357 isolated
+= list_lru_walk_one(lru
, nid
, NULL
, isolate
, cb_arg
,
361 if (*nr_to_walk
> 0 && list_lru_memcg_aware(lru
)) {
362 struct list_lru_memcg
*mlru
;
363 struct mem_cgroup
*memcg
;
366 xa_for_each(&lru
->xa
, index
, mlru
) {
368 memcg
= mem_cgroup_from_id(index
);
369 if (!mem_cgroup_tryget(memcg
)) {
374 isolated
+= __list_lru_walk_one(lru
, nid
, memcg
,
377 mem_cgroup_put(memcg
);
379 if (*nr_to_walk
<= 0)
387 EXPORT_SYMBOL_GPL(list_lru_walk_node
);
389 static void init_one_lru(struct list_lru
*lru
, struct list_lru_one
*l
)
391 INIT_LIST_HEAD(&l
->list
);
392 spin_lock_init(&l
->lock
);
394 #ifdef CONFIG_LOCKDEP
396 lockdep_set_class(&l
->lock
, lru
->key
);
401 static struct list_lru_memcg
*memcg_init_list_lru_one(struct list_lru
*lru
, gfp_t gfp
)
404 struct list_lru_memcg
*mlru
;
406 mlru
= kmalloc(struct_size(mlru
, node
, nr_node_ids
), gfp
);
411 init_one_lru(lru
, &mlru
->node
[nid
]);
416 static inline void memcg_init_list_lru(struct list_lru
*lru
, bool memcg_aware
)
419 xa_init_flags(&lru
->xa
, XA_FLAGS_LOCK_IRQ
);
420 lru
->memcg_aware
= memcg_aware
;
423 static void memcg_destroy_list_lru(struct list_lru
*lru
)
425 XA_STATE(xas
, &lru
->xa
, 0);
426 struct list_lru_memcg
*mlru
;
428 if (!list_lru_memcg_aware(lru
))
432 xas_for_each(&xas
, mlru
, ULONG_MAX
) {
434 xas_store(&xas
, NULL
);
436 xas_unlock_irq(&xas
);
439 static void memcg_reparent_list_lru_one(struct list_lru
*lru
, int nid
,
440 struct list_lru_one
*src
,
441 struct mem_cgroup
*dst_memcg
)
443 int dst_idx
= dst_memcg
->kmemcg_id
;
444 struct list_lru_one
*dst
;
446 spin_lock_irq(&src
->lock
);
447 dst
= list_lru_from_memcg_idx(lru
, nid
, dst_idx
);
448 spin_lock_nested(&dst
->lock
, SINGLE_DEPTH_NESTING
);
450 list_splice_init(&src
->list
, &dst
->list
);
452 WARN_ON(src
->nr_items
< 0);
453 dst
->nr_items
+= src
->nr_items
;
454 set_shrinker_bit(dst_memcg
, nid
, lru_shrinker_id(lru
));
456 /* Mark the list_lru_one dead */
457 src
->nr_items
= LONG_MIN
;
459 spin_unlock(&dst
->lock
);
460 spin_unlock_irq(&src
->lock
);
463 void memcg_reparent_list_lrus(struct mem_cgroup
*memcg
, struct mem_cgroup
*parent
)
465 struct list_lru
*lru
;
468 mutex_lock(&list_lrus_mutex
);
469 list_for_each_entry(lru
, &memcg_list_lrus
, list
) {
470 struct list_lru_memcg
*mlru
;
471 XA_STATE(xas
, &lru
->xa
, memcg
->kmemcg_id
);
474 * Lock the Xarray to ensure no on going list_lru_memcg
475 * allocation and further allocation will see css_is_dying().
478 mlru
= xas_store(&xas
, NULL
);
479 xas_unlock_irq(&xas
);
484 * With Xarray value set to NULL, holding the lru lock below
485 * prevents list_lru_{add,del,isolate} from touching the lru,
489 memcg_reparent_list_lru_one(lru
, i
, &mlru
->node
[i
], parent
);
492 * Here all list_lrus corresponding to the cgroup are guaranteed
493 * to remain empty, we can safely free this lru, any further
494 * memcg_list_lru_alloc() call will simply bail out.
496 kvfree_rcu(mlru
, rcu
);
498 mutex_unlock(&list_lrus_mutex
);
501 static inline bool memcg_list_lru_allocated(struct mem_cgroup
*memcg
,
502 struct list_lru
*lru
)
504 int idx
= memcg
->kmemcg_id
;
506 return idx
< 0 || xa_load(&lru
->xa
, idx
);
509 int memcg_list_lru_alloc(struct mem_cgroup
*memcg
, struct list_lru
*lru
,
513 struct list_lru_memcg
*mlru
;
514 struct mem_cgroup
*pos
, *parent
;
515 XA_STATE(xas
, &lru
->xa
, 0);
517 if (!list_lru_memcg_aware(lru
) || memcg_list_lru_allocated(memcg
, lru
))
520 gfp
&= GFP_RECLAIM_MASK
;
522 * Because the list_lru can be reparented to the parent cgroup's
523 * list_lru, we should make sure that this cgroup and all its
524 * ancestors have allocated list_lru_memcg.
528 * Keep finding the farest parent that wasn't populated
529 * until found memcg itself.
532 parent
= parent_mem_cgroup(pos
);
533 while (!memcg_list_lru_allocated(parent
, lru
)) {
535 parent
= parent_mem_cgroup(pos
);
538 mlru
= memcg_init_list_lru_one(lru
, gfp
);
541 xas_set(&xas
, pos
->kmemcg_id
);
543 xas_lock_irqsave(&xas
, flags
);
544 if (!xas_load(&xas
) && !css_is_dying(&pos
->css
)) {
545 xas_store(&xas
, mlru
);
546 if (!xas_error(&xas
))
549 xas_unlock_irqrestore(&xas
, flags
);
550 } while (xas_nomem(&xas
, gfp
));
553 } while (pos
!= memcg
&& !css_is_dying(&pos
->css
));
555 return xas_error(&xas
);
558 static inline void memcg_init_list_lru(struct list_lru
*lru
, bool memcg_aware
)
562 static void memcg_destroy_list_lru(struct list_lru
*lru
)
565 #endif /* CONFIG_MEMCG */
567 int __list_lru_init(struct list_lru
*lru
, bool memcg_aware
, struct shrinker
*shrinker
)
573 lru
->shrinker_id
= shrinker
->id
;
575 lru
->shrinker_id
= -1;
577 if (mem_cgroup_kmem_disabled())
581 lru
->node
= kcalloc(nr_node_ids
, sizeof(*lru
->node
), GFP_KERNEL
);
586 init_one_lru(lru
, &lru
->node
[i
].lru
);
588 memcg_init_list_lru(lru
, memcg_aware
);
589 list_lru_register(lru
);
593 EXPORT_SYMBOL_GPL(__list_lru_init
);
595 void list_lru_destroy(struct list_lru
*lru
)
597 /* Already destroyed or not yet initialized? */
601 list_lru_unregister(lru
);
603 memcg_destroy_list_lru(lru
);
608 lru
->shrinker_id
= -1;
611 EXPORT_SYMBOL_GPL(list_lru_destroy
);