2 * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
3 * Authors: David Chinner and Glauber Costa
5 * Generic LRU infrastructure
7 #include <linux/kernel.h>
8 #include <linux/module.h>
10 #include <linux/list_lru.h>
11 #include <linux/slab.h>
12 #include <linux/mutex.h>
13 #include <linux/memcontrol.h>
15 #ifdef CONFIG_MEMCG_KMEM
16 static LIST_HEAD(list_lrus
);
17 static DEFINE_MUTEX(list_lrus_mutex
);
19 static void list_lru_register(struct list_lru
*lru
)
21 mutex_lock(&list_lrus_mutex
);
22 list_add(&lru
->list
, &list_lrus
);
23 mutex_unlock(&list_lrus_mutex
);
26 static void list_lru_unregister(struct list_lru
*lru
)
28 mutex_lock(&list_lrus_mutex
);
30 mutex_unlock(&list_lrus_mutex
);
33 static int lru_shrinker_id(struct list_lru
*lru
)
35 return lru
->shrinker_id
;
38 static inline bool list_lru_memcg_aware(struct list_lru
*lru
)
40 return lru
->memcg_aware
;
43 static inline struct list_lru_one
*
44 list_lru_from_memcg_idx(struct list_lru_node
*nlru
, int idx
)
46 struct list_lru_memcg
*memcg_lrus
;
48 * Either lock or RCU protects the array of per cgroup lists
49 * from relocation (see memcg_update_list_lru_node).
51 memcg_lrus
= rcu_dereference_check(nlru
->memcg_lrus
,
52 lockdep_is_held(&nlru
->lock
));
53 if (memcg_lrus
&& idx
>= 0)
54 return memcg_lrus
->lru
[idx
];
58 static __always_inline
struct mem_cgroup
*mem_cgroup_from_kmem(void *ptr
)
62 if (!memcg_kmem_enabled())
64 page
= virt_to_head_page(ptr
);
65 return page
->mem_cgroup
;
68 static inline struct list_lru_one
*
69 list_lru_from_kmem(struct list_lru_node
*nlru
, void *ptr
,
70 struct mem_cgroup
**memcg_ptr
)
72 struct list_lru_one
*l
= &nlru
->lru
;
73 struct mem_cgroup
*memcg
= NULL
;
75 if (!nlru
->memcg_lrus
)
78 memcg
= mem_cgroup_from_kmem(ptr
);
82 l
= list_lru_from_memcg_idx(nlru
, memcg_cache_id(memcg
));
89 static void list_lru_register(struct list_lru
*lru
)
93 static void list_lru_unregister(struct list_lru
*lru
)
97 static int lru_shrinker_id(struct list_lru
*lru
)
102 static inline bool list_lru_memcg_aware(struct list_lru
*lru
)
107 static inline struct list_lru_one
*
108 list_lru_from_memcg_idx(struct list_lru_node
*nlru
, int idx
)
113 static inline struct list_lru_one
*
114 list_lru_from_kmem(struct list_lru_node
*nlru
, void *ptr
,
115 struct mem_cgroup
**memcg_ptr
)
121 #endif /* CONFIG_MEMCG_KMEM */
123 bool list_lru_add(struct list_lru
*lru
, struct list_head
*item
)
125 int nid
= page_to_nid(virt_to_page(item
));
126 struct list_lru_node
*nlru
= &lru
->node
[nid
];
127 struct mem_cgroup
*memcg
;
128 struct list_lru_one
*l
;
130 spin_lock(&nlru
->lock
);
131 if (list_empty(item
)) {
132 l
= list_lru_from_kmem(nlru
, item
, &memcg
);
133 list_add_tail(item
, &l
->list
);
134 /* Set shrinker bit if the first element was added */
136 memcg_set_shrinker_bit(memcg
, nid
,
137 lru_shrinker_id(lru
));
139 spin_unlock(&nlru
->lock
);
142 spin_unlock(&nlru
->lock
);
145 EXPORT_SYMBOL_GPL(list_lru_add
);
147 bool list_lru_del(struct list_lru
*lru
, struct list_head
*item
)
149 int nid
= page_to_nid(virt_to_page(item
));
150 struct list_lru_node
*nlru
= &lru
->node
[nid
];
151 struct list_lru_one
*l
;
153 spin_lock(&nlru
->lock
);
154 if (!list_empty(item
)) {
155 l
= list_lru_from_kmem(nlru
, item
, NULL
);
159 spin_unlock(&nlru
->lock
);
162 spin_unlock(&nlru
->lock
);
165 EXPORT_SYMBOL_GPL(list_lru_del
);
167 void list_lru_isolate(struct list_lru_one
*list
, struct list_head
*item
)
172 EXPORT_SYMBOL_GPL(list_lru_isolate
);
174 void list_lru_isolate_move(struct list_lru_one
*list
, struct list_head
*item
,
175 struct list_head
*head
)
177 list_move(item
, head
);
180 EXPORT_SYMBOL_GPL(list_lru_isolate_move
);
182 unsigned long list_lru_count_one(struct list_lru
*lru
,
183 int nid
, struct mem_cgroup
*memcg
)
185 struct list_lru_node
*nlru
= &lru
->node
[nid
];
186 struct list_lru_one
*l
;
190 l
= list_lru_from_memcg_idx(nlru
, memcg_cache_id(memcg
));
196 EXPORT_SYMBOL_GPL(list_lru_count_one
);
198 unsigned long list_lru_count_node(struct list_lru
*lru
, int nid
)
200 struct list_lru_node
*nlru
;
202 nlru
= &lru
->node
[nid
];
203 return nlru
->nr_items
;
205 EXPORT_SYMBOL_GPL(list_lru_count_node
);
208 __list_lru_walk_one(struct list_lru_node
*nlru
, int memcg_idx
,
209 list_lru_walk_cb isolate
, void *cb_arg
,
210 unsigned long *nr_to_walk
)
213 struct list_lru_one
*l
;
214 struct list_head
*item
, *n
;
215 unsigned long isolated
= 0;
217 l
= list_lru_from_memcg_idx(nlru
, memcg_idx
);
219 list_for_each_safe(item
, n
, &l
->list
) {
223 * decrement nr_to_walk first so that we don't livelock if we
224 * get stuck on large numbesr of LRU_RETRY items
230 ret
= isolate(item
, l
, &nlru
->lock
, cb_arg
);
232 case LRU_REMOVED_RETRY
:
233 assert_spin_locked(&nlru
->lock
);
239 * If the lru lock has been dropped, our list
240 * traversal is now invalid and so we have to
241 * restart from scratch.
243 if (ret
== LRU_REMOVED_RETRY
)
247 list_move_tail(item
, &l
->list
);
253 * The lru lock has been dropped, our list traversal is
254 * now invalid and so we have to restart from scratch.
256 assert_spin_locked(&nlru
->lock
);
266 list_lru_walk_one(struct list_lru
*lru
, int nid
, struct mem_cgroup
*memcg
,
267 list_lru_walk_cb isolate
, void *cb_arg
,
268 unsigned long *nr_to_walk
)
270 struct list_lru_node
*nlru
= &lru
->node
[nid
];
273 spin_lock(&nlru
->lock
);
274 ret
= __list_lru_walk_one(nlru
, memcg_cache_id(memcg
), isolate
, cb_arg
,
276 spin_unlock(&nlru
->lock
);
279 EXPORT_SYMBOL_GPL(list_lru_walk_one
);
282 list_lru_walk_one_irq(struct list_lru
*lru
, int nid
, struct mem_cgroup
*memcg
,
283 list_lru_walk_cb isolate
, void *cb_arg
,
284 unsigned long *nr_to_walk
)
286 struct list_lru_node
*nlru
= &lru
->node
[nid
];
289 spin_lock_irq(&nlru
->lock
);
290 ret
= __list_lru_walk_one(nlru
, memcg_cache_id(memcg
), isolate
, cb_arg
,
292 spin_unlock_irq(&nlru
->lock
);
296 unsigned long list_lru_walk_node(struct list_lru
*lru
, int nid
,
297 list_lru_walk_cb isolate
, void *cb_arg
,
298 unsigned long *nr_to_walk
)
303 isolated
+= list_lru_walk_one(lru
, nid
, NULL
, isolate
, cb_arg
,
305 if (*nr_to_walk
> 0 && list_lru_memcg_aware(lru
)) {
306 for_each_memcg_cache_index(memcg_idx
) {
307 struct list_lru_node
*nlru
= &lru
->node
[nid
];
309 spin_lock(&nlru
->lock
);
310 isolated
+= __list_lru_walk_one(nlru
, memcg_idx
,
313 spin_unlock(&nlru
->lock
);
315 if (*nr_to_walk
<= 0)
321 EXPORT_SYMBOL_GPL(list_lru_walk_node
);
323 static void init_one_lru(struct list_lru_one
*l
)
325 INIT_LIST_HEAD(&l
->list
);
329 #ifdef CONFIG_MEMCG_KMEM
330 static void __memcg_destroy_list_lru_node(struct list_lru_memcg
*memcg_lrus
,
335 for (i
= begin
; i
< end
; i
++)
336 kfree(memcg_lrus
->lru
[i
]);
339 static int __memcg_init_list_lru_node(struct list_lru_memcg
*memcg_lrus
,
344 for (i
= begin
; i
< end
; i
++) {
345 struct list_lru_one
*l
;
347 l
= kmalloc(sizeof(struct list_lru_one
), GFP_KERNEL
);
352 memcg_lrus
->lru
[i
] = l
;
356 __memcg_destroy_list_lru_node(memcg_lrus
, begin
, i
);
360 static int memcg_init_list_lru_node(struct list_lru_node
*nlru
)
362 struct list_lru_memcg
*memcg_lrus
;
363 int size
= memcg_nr_cache_ids
;
365 memcg_lrus
= kvmalloc(sizeof(*memcg_lrus
) +
366 size
* sizeof(void *), GFP_KERNEL
);
370 if (__memcg_init_list_lru_node(memcg_lrus
, 0, size
)) {
374 RCU_INIT_POINTER(nlru
->memcg_lrus
, memcg_lrus
);
379 static void memcg_destroy_list_lru_node(struct list_lru_node
*nlru
)
381 struct list_lru_memcg
*memcg_lrus
;
383 * This is called when shrinker has already been unregistered,
384 * and nobody can use it. So, there is no need to use kvfree_rcu().
386 memcg_lrus
= rcu_dereference_protected(nlru
->memcg_lrus
, true);
387 __memcg_destroy_list_lru_node(memcg_lrus
, 0, memcg_nr_cache_ids
);
391 static void kvfree_rcu(struct rcu_head
*head
)
393 struct list_lru_memcg
*mlru
;
395 mlru
= container_of(head
, struct list_lru_memcg
, rcu
);
399 static int memcg_update_list_lru_node(struct list_lru_node
*nlru
,
400 int old_size
, int new_size
)
402 struct list_lru_memcg
*old
, *new;
404 BUG_ON(old_size
> new_size
);
406 old
= rcu_dereference_protected(nlru
->memcg_lrus
,
407 lockdep_is_held(&list_lrus_mutex
));
408 new = kvmalloc(sizeof(*new) + new_size
* sizeof(void *), GFP_KERNEL
);
412 if (__memcg_init_list_lru_node(new, old_size
, new_size
)) {
417 memcpy(&new->lru
, &old
->lru
, old_size
* sizeof(void *));
420 * The locking below allows readers that hold nlru->lock avoid taking
421 * rcu_read_lock (see list_lru_from_memcg_idx).
423 * Since list_lru_{add,del} may be called under an IRQ-safe lock,
424 * we have to use IRQ-safe primitives here to avoid deadlock.
426 spin_lock_irq(&nlru
->lock
);
427 rcu_assign_pointer(nlru
->memcg_lrus
, new);
428 spin_unlock_irq(&nlru
->lock
);
430 call_rcu(&old
->rcu
, kvfree_rcu
);
434 static void memcg_cancel_update_list_lru_node(struct list_lru_node
*nlru
,
435 int old_size
, int new_size
)
437 struct list_lru_memcg
*memcg_lrus
;
439 memcg_lrus
= rcu_dereference_protected(nlru
->memcg_lrus
,
440 lockdep_is_held(&list_lrus_mutex
));
441 /* do not bother shrinking the array back to the old size, because we
442 * cannot handle allocation failures here */
443 __memcg_destroy_list_lru_node(memcg_lrus
, old_size
, new_size
);
446 static int memcg_init_list_lru(struct list_lru
*lru
, bool memcg_aware
)
450 lru
->memcg_aware
= memcg_aware
;
456 if (memcg_init_list_lru_node(&lru
->node
[i
]))
461 for (i
= i
- 1; i
>= 0; i
--) {
462 if (!lru
->node
[i
].memcg_lrus
)
464 memcg_destroy_list_lru_node(&lru
->node
[i
]);
469 static void memcg_destroy_list_lru(struct list_lru
*lru
)
473 if (!list_lru_memcg_aware(lru
))
477 memcg_destroy_list_lru_node(&lru
->node
[i
]);
480 static int memcg_update_list_lru(struct list_lru
*lru
,
481 int old_size
, int new_size
)
485 if (!list_lru_memcg_aware(lru
))
489 if (memcg_update_list_lru_node(&lru
->node
[i
],
495 for (i
= i
- 1; i
>= 0; i
--) {
496 if (!lru
->node
[i
].memcg_lrus
)
499 memcg_cancel_update_list_lru_node(&lru
->node
[i
],
505 static void memcg_cancel_update_list_lru(struct list_lru
*lru
,
506 int old_size
, int new_size
)
510 if (!list_lru_memcg_aware(lru
))
514 memcg_cancel_update_list_lru_node(&lru
->node
[i
],
518 int memcg_update_all_list_lrus(int new_size
)
521 struct list_lru
*lru
;
522 int old_size
= memcg_nr_cache_ids
;
524 mutex_lock(&list_lrus_mutex
);
525 list_for_each_entry(lru
, &list_lrus
, list
) {
526 ret
= memcg_update_list_lru(lru
, old_size
, new_size
);
531 mutex_unlock(&list_lrus_mutex
);
534 list_for_each_entry_continue_reverse(lru
, &list_lrus
, list
)
535 memcg_cancel_update_list_lru(lru
, old_size
, new_size
);
539 static void memcg_drain_list_lru_node(struct list_lru
*lru
, int nid
,
540 int src_idx
, struct mem_cgroup
*dst_memcg
)
542 struct list_lru_node
*nlru
= &lru
->node
[nid
];
543 int dst_idx
= dst_memcg
->kmemcg_id
;
544 struct list_lru_one
*src
, *dst
;
548 * Since list_lru_{add,del} may be called under an IRQ-safe lock,
549 * we have to use IRQ-safe primitives here to avoid deadlock.
551 spin_lock_irq(&nlru
->lock
);
553 src
= list_lru_from_memcg_idx(nlru
, src_idx
);
554 dst
= list_lru_from_memcg_idx(nlru
, dst_idx
);
556 list_splice_init(&src
->list
, &dst
->list
);
557 set
= (!dst
->nr_items
&& src
->nr_items
);
558 dst
->nr_items
+= src
->nr_items
;
560 memcg_set_shrinker_bit(dst_memcg
, nid
, lru_shrinker_id(lru
));
563 spin_unlock_irq(&nlru
->lock
);
566 static void memcg_drain_list_lru(struct list_lru
*lru
,
567 int src_idx
, struct mem_cgroup
*dst_memcg
)
571 if (!list_lru_memcg_aware(lru
))
575 memcg_drain_list_lru_node(lru
, i
, src_idx
, dst_memcg
);
578 void memcg_drain_all_list_lrus(int src_idx
, struct mem_cgroup
*dst_memcg
)
580 struct list_lru
*lru
;
582 mutex_lock(&list_lrus_mutex
);
583 list_for_each_entry(lru
, &list_lrus
, list
)
584 memcg_drain_list_lru(lru
, src_idx
, dst_memcg
);
585 mutex_unlock(&list_lrus_mutex
);
588 static int memcg_init_list_lru(struct list_lru
*lru
, bool memcg_aware
)
593 static void memcg_destroy_list_lru(struct list_lru
*lru
)
596 #endif /* CONFIG_MEMCG_KMEM */
598 int __list_lru_init(struct list_lru
*lru
, bool memcg_aware
,
599 struct lock_class_key
*key
, struct shrinker
*shrinker
)
602 size_t size
= sizeof(*lru
->node
) * nr_node_ids
;
605 #ifdef CONFIG_MEMCG_KMEM
607 lru
->shrinker_id
= shrinker
->id
;
609 lru
->shrinker_id
= -1;
611 memcg_get_cache_ids();
613 lru
->node
= kzalloc(size
, GFP_KERNEL
);
618 spin_lock_init(&lru
->node
[i
].lock
);
620 lockdep_set_class(&lru
->node
[i
].lock
, key
);
621 init_one_lru(&lru
->node
[i
].lru
);
624 err
= memcg_init_list_lru(lru
, memcg_aware
);
627 /* Do this so a list_lru_destroy() doesn't crash: */
632 list_lru_register(lru
);
634 memcg_put_cache_ids();
637 EXPORT_SYMBOL_GPL(__list_lru_init
);
639 void list_lru_destroy(struct list_lru
*lru
)
641 /* Already destroyed or not yet initialized? */
645 memcg_get_cache_ids();
647 list_lru_unregister(lru
);
649 memcg_destroy_list_lru(lru
);
653 #ifdef CONFIG_MEMCG_KMEM
654 lru
->shrinker_id
= -1;
656 memcg_put_cache_ids();
658 EXPORT_SYMBOL_GPL(list_lru_destroy
);