2 * SLQB: A slab allocator that focuses on per-CPU scaling, and good performance
3 * with order-0 allocations. Fastpaths emphasis is placed on local allocaiton
4 * and freeing, but with a secondary goal of good remote freeing (freeing on
5 * another CPU from that which allocated).
7 * Using ideas and code from mm/slab.c, mm/slob.c, and mm/slub.c.
11 #include <linux/swap.h> /* struct reclaim_state */
12 #include <linux/module.h>
13 #include <linux/interrupt.h>
14 #include <linux/slab.h>
15 #include <linux/seq_file.h>
16 #include <linux/cpu.h>
17 #include <linux/cpuset.h>
18 #include <linux/mempolicy.h>
19 #include <linux/ctype.h>
20 #include <linux/kallsyms.h>
21 #include <linux/memory.h>
22 #include <linux/fault-inject.h>
26 * - fix up releasing of offlined data structures. Not a big deal because
27 * they don't get cumulatively leaked with successive online/offline cycles
28 * - allow OOM conditions to flush back per-CPU pages to common lists to be
29 * reused by other CPUs.
30 * - investiage performance with memoryless nodes. Perhaps CPUs can be given
31 * a default closest home node via which it can use fastpath functions.
32 * Perhaps it is not a big problem.
36 * slqb_page overloads struct page, and is used to manage some slob allocation
37 * aspects, however to avoid the horrible mess in include/linux/mm_types.h,
38 * we'll just define our own struct slqb_page type variant here.
43 unsigned long flags
; /* mandatory */
44 atomic_t _count
; /* mandatory */
45 unsigned int inuse
; /* Nr of objects */
46 struct kmem_cache_list
*list
; /* Pointer to list */
47 void **freelist
; /* LIFO freelist */
49 struct list_head lru
; /* misc. list */
50 struct rcu_head rcu_head
; /* for rcu freeing */
56 static inline void struct_slqb_page_wrong_size(void)
57 { BUILD_BUG_ON(sizeof(struct slqb_page
) != sizeof(struct page
)); }
59 #define PG_SLQB_BIT (1 << PG_slab)
62 * slqb_min_order: minimum allocation order for slabs
64 static int slqb_min_order
;
67 * slqb_min_objects: minimum number of objects per slab. Increasing this
68 * will increase the allocation order for slabs with larger objects
70 static int slqb_min_objects
= 1;
73 static inline int slab_numa(struct kmem_cache
*s
)
75 return s
->flags
& SLAB_NUMA
;
78 static inline int slab_numa(struct kmem_cache
*s
)
84 static inline int slab_hiwater(struct kmem_cache
*s
)
89 static inline int slab_freebatch(struct kmem_cache
*s
)
96 * kmem_cache_node->list_lock
97 * kmem_cache_remote_free->lock
100 * SLQB is primarily per-cpu. For each kmem_cache, each CPU has:
102 * - A LIFO list of node-local objects. Allocation and freeing of node local
103 * objects goes first to this list.
105 * - 2 Lists of slab pages, free and partial pages. If an allocation misses
106 * the object list, it tries from the partial list, then the free list.
107 * After freeing an object to the object list, if it is over a watermark,
108 * some objects are freed back to pages. If an allocation misses these lists,
109 * a new slab page is allocated from the page allocator. If the free list
110 * reaches a watermark, some of its pages are returned to the page allocator.
112 * - A remote free queue, where objects freed that did not come from the local
113 * node are queued to. When this reaches a watermark, the objects are
116 * - A remotely freed queue, where objects allocated from this CPU are flushed
117 * to from other CPUs' remote free queues. kmem_cache_remote_free->lock is
118 * used to protect access to this queue.
120 * When the remotely freed queue reaches a watermark, a flag is set to tell
121 * the owner CPU to check it. The owner CPU will then check the queue on the
122 * next allocation that misses the object list. It will move all objects from
123 * this list onto the object list and then allocate one.
125 * This system of remote queueing is intended to reduce lock and remote
126 * cacheline acquisitions, and give a cooling off period for remotely freed
127 * objects before they are re-allocated.
129 * node specific allocations from somewhere other than the local node are
130 * handled by a per-node list which is the same as the above per-CPU data
131 * structures except for the following differences:
133 * - kmem_cache_node->list_lock is used to protect access for multiple CPUs to
134 * allocate from a given node.
136 * - There is no remote free queue. Nodes don't free objects, CPUs do.
139 static inline void slqb_stat_inc(struct kmem_cache_list
*list
,
142 #ifdef CONFIG_SLQB_STATS
147 static inline void slqb_stat_add(struct kmem_cache_list
*list
,
148 enum stat_item si
, unsigned long nr
)
150 #ifdef CONFIG_SLQB_STATS
151 list
->stats
[si
] += nr
;
155 static inline int slqb_page_to_nid(struct slqb_page
*page
)
157 return page_to_nid(&page
->page
);
160 static inline void *slqb_page_address(struct slqb_page
*page
)
162 return page_address(&page
->page
);
165 static inline struct zone
*slqb_page_zone(struct slqb_page
*page
)
167 return page_zone(&page
->page
);
170 static inline int virt_to_nid(const void *addr
)
172 return page_to_nid(virt_to_page(addr
));
175 static inline struct slqb_page
*virt_to_head_slqb_page(const void *addr
)
179 p
= virt_to_head_page(addr
);
180 return (struct slqb_page
*)p
;
183 static inline void __free_slqb_pages(struct slqb_page
*page
, unsigned int order
,
186 struct page
*p
= &page
->page
;
188 reset_page_mapcount(p
);
190 VM_BUG_ON(!(p
->flags
& PG_SLQB_BIT
));
191 p
->flags
&= ~PG_SLQB_BIT
;
193 if (current
->reclaim_state
)
194 current
->reclaim_state
->reclaimed_slab
+= pages
;
195 __free_pages(p
, order
);
198 #ifdef CONFIG_SLQB_DEBUG
199 static inline int slab_debug(struct kmem_cache
*s
)
208 static inline int slab_poison(struct kmem_cache
*s
)
210 return s
->flags
& SLAB_POISON
;
213 static inline int slab_debug(struct kmem_cache
*s
)
217 static inline int slab_poison(struct kmem_cache
*s
)
223 #define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \
224 SLAB_POISON | SLAB_STORE_USER)
226 /* Internal SLQB flags */
227 #define __OBJECT_POISON 0x80000000 /* Poison object */
229 /* Not all arches define cache_line_size */
230 #ifndef cache_line_size
231 #define cache_line_size() L1_CACHE_BYTES
235 static struct notifier_block slab_notifier
;
239 * slqb_lock protects slab_caches list and serialises hotplug operations.
240 * hotplug operations take lock for write, other operations can hold off
241 * hotplug by taking it for read (or write).
243 static DECLARE_RWSEM(slqb_lock
);
246 * A list of all slab caches on the system
248 static LIST_HEAD(slab_caches
);
251 * Tracking user of a slab.
254 unsigned long addr
; /* Called from address */
255 int cpu
; /* Was running on cpu */
256 int pid
; /* Pid context */
257 unsigned long when
; /* When did the operation occur */
260 enum track_item
{ TRACK_ALLOC
, TRACK_FREE
};
262 static struct kmem_cache kmem_cache_cache
;
264 #ifdef CONFIG_SLQB_SYSFS
265 static int sysfs_slab_add(struct kmem_cache
*s
);
266 static void sysfs_slab_remove(struct kmem_cache
*s
);
268 static inline int sysfs_slab_add(struct kmem_cache
*s
)
272 static inline void sysfs_slab_remove(struct kmem_cache
*s
)
274 kmem_cache_free(&kmem_cache_cache
, s
);
278 /********************************************************************
279 * Core slab cache functions
280 *******************************************************************/
282 static int __slab_is_available __read_mostly
;
283 int slab_is_available(void)
285 return __slab_is_available
;
288 static inline struct kmem_cache_cpu
*get_cpu_slab(struct kmem_cache
*s
, int cpu
)
291 VM_BUG_ON(!s
->cpu_slab
[cpu
]);
292 return s
->cpu_slab
[cpu
];
298 static inline int check_valid_pointer(struct kmem_cache
*s
,
299 struct slqb_page
*page
, const void *object
)
303 base
= slqb_page_address(page
);
304 if (object
< base
|| object
>= base
+ s
->objects
* s
->size
||
305 (object
- base
) % s
->size
) {
312 static inline void *get_freepointer(struct kmem_cache
*s
, void *object
)
314 return *(void **)(object
+ s
->offset
);
317 static inline void set_freepointer(struct kmem_cache
*s
, void *object
, void *fp
)
319 *(void **)(object
+ s
->offset
) = fp
;
322 /* Loop over all objects in a slab */
323 #define for_each_object(__p, __s, __addr) \
324 for (__p = (__addr); __p < (__addr) + (__s)->objects * (__s)->size;\
328 #define for_each_free_object(__p, __s, __free) \
329 for (__p = (__free); (__p) != NULL; __p = get_freepointer((__s),\
332 #ifdef CONFIG_SLQB_DEBUG
336 #ifdef CONFIG_SLQB_DEBUG_ON
337 static int slqb_debug __read_mostly
= DEBUG_DEFAULT_FLAGS
;
339 static int slqb_debug __read_mostly
;
342 static char *slqb_debug_slabs
;
347 static void print_section(char *text
, u8
*addr
, unsigned int length
)
355 for (i
= 0; i
< length
; i
++) {
357 printk(KERN_ERR
"%8s 0x%p: ", text
, addr
+ i
);
360 printk(KERN_CONT
" %02x", addr
[i
]);
362 ascii
[offset
] = isgraph(addr
[i
]) ? addr
[i
] : '.';
364 printk(KERN_CONT
" %s\n", ascii
);
371 printk(KERN_CONT
" ");
375 printk(KERN_CONT
" %s\n", ascii
);
379 static struct track
*get_track(struct kmem_cache
*s
, void *object
,
380 enum track_item alloc
)
385 p
= object
+ s
->offset
+ sizeof(void *);
387 p
= object
+ s
->inuse
;
392 static void set_track(struct kmem_cache
*s
, void *object
,
393 enum track_item alloc
, unsigned long addr
)
398 p
= object
+ s
->offset
+ sizeof(void *);
400 p
= object
+ s
->inuse
;
405 p
->cpu
= raw_smp_processor_id();
406 p
->pid
= current
? current
->pid
: -1;
409 memset(p
, 0, sizeof(struct track
));
412 static void init_tracking(struct kmem_cache
*s
, void *object
)
414 if (!(s
->flags
& SLAB_STORE_USER
))
417 set_track(s
, object
, TRACK_FREE
, 0UL);
418 set_track(s
, object
, TRACK_ALLOC
, 0UL);
421 static void print_track(const char *s
, struct track
*t
)
426 printk(KERN_ERR
"INFO: %s in ", s
);
427 __print_symbol("%s", (unsigned long)t
->addr
);
428 printk(" age=%lu cpu=%u pid=%d\n", jiffies
- t
->when
, t
->cpu
, t
->pid
);
431 static void print_tracking(struct kmem_cache
*s
, void *object
)
433 if (!(s
->flags
& SLAB_STORE_USER
))
436 print_track("Allocated", get_track(s
, object
, TRACK_ALLOC
));
437 print_track("Freed", get_track(s
, object
, TRACK_FREE
));
440 static void print_page_info(struct slqb_page
*page
)
442 printk(KERN_ERR
"INFO: Slab 0x%p used=%u fp=0x%p flags=0x%04lx\n",
443 page
, page
->inuse
, page
->freelist
, page
->flags
);
447 #define MAX_ERR_STR 100
448 static void slab_bug(struct kmem_cache
*s
, char *fmt
, ...)
451 char buf
[MAX_ERR_STR
];
454 vsnprintf(buf
, sizeof(buf
), fmt
, args
);
456 printk(KERN_ERR
"========================================"
457 "=====================================\n");
458 printk(KERN_ERR
"BUG %s: %s\n", s
->name
, buf
);
459 printk(KERN_ERR
"----------------------------------------"
460 "-------------------------------------\n\n");
463 static void slab_fix(struct kmem_cache
*s
, char *fmt
, ...)
469 vsnprintf(buf
, sizeof(buf
), fmt
, args
);
471 printk(KERN_ERR
"FIX %s: %s\n", s
->name
, buf
);
474 static void print_trailer(struct kmem_cache
*s
, struct slqb_page
*page
, u8
*p
)
476 unsigned int off
; /* Offset of last byte */
477 u8
*addr
= slqb_page_address(page
);
479 print_tracking(s
, p
);
481 print_page_info(page
);
483 printk(KERN_ERR
"INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
484 p
, p
- addr
, get_freepointer(s
, p
));
487 print_section("Bytes b4", p
- 16, 16);
489 print_section("Object", p
, min(s
->objsize
, 128));
491 if (s
->flags
& SLAB_RED_ZONE
)
492 print_section("Redzone", p
+ s
->objsize
, s
->inuse
- s
->objsize
);
495 off
= s
->offset
+ sizeof(void *);
499 if (s
->flags
& SLAB_STORE_USER
)
500 off
+= 2 * sizeof(struct track
);
502 if (off
!= s
->size
) {
503 /* Beginning of the filler is the free pointer */
504 print_section("Padding", p
+ off
, s
->size
- off
);
510 static void object_err(struct kmem_cache
*s
, struct slqb_page
*page
,
511 u8
*object
, char *reason
)
514 print_trailer(s
, page
, object
);
517 static void slab_err(struct kmem_cache
*s
, struct slqb_page
*page
,
521 print_page_info(page
);
525 static void init_object(struct kmem_cache
*s
, void *object
, int active
)
529 if (s
->flags
& __OBJECT_POISON
) {
530 memset(p
, POISON_FREE
, s
->objsize
- 1);
531 p
[s
->objsize
- 1] = POISON_END
;
534 if (s
->flags
& SLAB_RED_ZONE
) {
535 memset(p
+ s
->objsize
,
536 active
? SLUB_RED_ACTIVE
: SLUB_RED_INACTIVE
,
537 s
->inuse
- s
->objsize
);
541 static u8
*check_bytes(u8
*start
, unsigned int value
, unsigned int bytes
)
544 if (*start
!= (u8
)value
)
552 static void restore_bytes(struct kmem_cache
*s
, char *message
, u8 data
,
553 void *from
, void *to
)
555 slab_fix(s
, "Restoring 0x%p-0x%p=0x%x\n", from
, to
- 1, data
);
556 memset(from
, data
, to
- from
);
559 static int check_bytes_and_report(struct kmem_cache
*s
, struct slqb_page
*page
,
560 u8
*object
, char *what
,
561 u8
*start
, unsigned int value
, unsigned int bytes
)
566 fault
= check_bytes(start
, value
, bytes
);
571 while (end
> fault
&& end
[-1] == value
)
574 slab_bug(s
, "%s overwritten", what
);
575 printk(KERN_ERR
"INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n",
576 fault
, end
- 1, fault
[0], value
);
577 print_trailer(s
, page
, object
);
579 restore_bytes(s
, what
, value
, fault
, end
);
587 * Bytes of the object to be managed.
588 * If the freepointer may overlay the object then the free
589 * pointer is the first word of the object.
591 * Poisoning uses 0x6b (POISON_FREE) and the last byte is
594 * object + s->objsize
595 * Padding to reach word boundary. This is also used for Redzoning.
596 * Padding is extended by another word if Redzoning is enabled and
599 * We fill with 0xbb (RED_INACTIVE) for inactive objects and with
600 * 0xcc (RED_ACTIVE) for objects in use.
603 * Meta data starts here.
605 * A. Free pointer (if we cannot overwrite object on free)
606 * B. Tracking data for SLAB_STORE_USER
607 * C. Padding to reach required alignment boundary or at mininum
608 * one word if debuggin is on to be able to detect writes
609 * before the word boundary.
611 * Padding is done using 0x5a (POISON_INUSE)
614 * Nothing is used beyond s->size.
617 static int check_pad_bytes(struct kmem_cache
*s
, struct slqb_page
*page
, u8
*p
)
619 unsigned long off
= s
->inuse
; /* The end of info */
622 /* Freepointer is placed after the object. */
623 off
+= sizeof(void *);
626 if (s
->flags
& SLAB_STORE_USER
) {
627 /* We also have user information there */
628 off
+= 2 * sizeof(struct track
);
634 return check_bytes_and_report(s
, page
, p
, "Object padding",
635 p
+ off
, POISON_INUSE
, s
->size
- off
);
638 static int slab_pad_check(struct kmem_cache
*s
, struct slqb_page
*page
)
646 if (!(s
->flags
& SLAB_POISON
))
649 start
= slqb_page_address(page
);
650 end
= start
+ (PAGE_SIZE
<< s
->order
);
651 length
= s
->objects
* s
->size
;
652 remainder
= end
- (start
+ length
);
656 fault
= check_bytes(start
+ length
, POISON_INUSE
, remainder
);
660 while (end
> fault
&& end
[-1] == POISON_INUSE
)
663 slab_err(s
, page
, "Padding overwritten. 0x%p-0x%p", fault
, end
- 1);
664 print_section("Padding", start
, length
);
666 restore_bytes(s
, "slab padding", POISON_INUSE
, start
, end
);
670 static int check_object(struct kmem_cache
*s
, struct slqb_page
*page
,
671 void *object
, int active
)
674 u8
*endobject
= object
+ s
->objsize
;
676 if (s
->flags
& SLAB_RED_ZONE
) {
678 active
? SLUB_RED_ACTIVE
: SLUB_RED_INACTIVE
;
680 if (!check_bytes_and_report(s
, page
, object
, "Redzone",
681 endobject
, red
, s
->inuse
- s
->objsize
))
684 if ((s
->flags
& SLAB_POISON
) && s
->objsize
< s
->inuse
) {
685 check_bytes_and_report(s
, page
, p
, "Alignment padding",
686 endobject
, POISON_INUSE
, s
->inuse
- s
->objsize
);
690 if (s
->flags
& SLAB_POISON
) {
691 if (!active
&& (s
->flags
& __OBJECT_POISON
)) {
692 if (!check_bytes_and_report(s
, page
, p
, "Poison", p
,
693 POISON_FREE
, s
->objsize
- 1))
696 if (!check_bytes_and_report(s
, page
, p
, "Poison",
697 p
+ s
->objsize
- 1, POISON_END
, 1))
702 * check_pad_bytes cleans up on its own.
704 check_pad_bytes(s
, page
, p
);
710 static int check_slab(struct kmem_cache
*s
, struct slqb_page
*page
)
712 if (!(page
->flags
& PG_SLQB_BIT
)) {
713 slab_err(s
, page
, "Not a valid slab page");
716 if (page
->inuse
== 0) {
717 slab_err(s
, page
, "inuse before free / after alloc", s
->name
);
720 if (page
->inuse
> s
->objects
) {
721 slab_err(s
, page
, "inuse %u > max %u",
722 s
->name
, page
->inuse
, s
->objects
);
725 /* Slab_pad_check fixes things up after itself */
726 slab_pad_check(s
, page
);
730 static void trace(struct kmem_cache
*s
, struct slqb_page
*page
,
731 void *object
, int alloc
)
733 if (s
->flags
& SLAB_TRACE
) {
734 printk(KERN_INFO
"TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
736 alloc
? "alloc" : "free",
741 print_section("Object", (void *)object
, s
->objsize
);
747 static void setup_object_debug(struct kmem_cache
*s
, struct slqb_page
*page
,
753 if (!(s
->flags
& (SLAB_STORE_USER
|SLAB_RED_ZONE
|__OBJECT_POISON
)))
756 init_object(s
, object
, 0);
757 init_tracking(s
, object
);
760 static int alloc_debug_processing(struct kmem_cache
*s
,
761 void *object
, unsigned long addr
)
763 struct slqb_page
*page
;
764 page
= virt_to_head_slqb_page(object
);
766 if (!check_slab(s
, page
))
769 if (!check_valid_pointer(s
, page
, object
)) {
770 object_err(s
, page
, object
, "Freelist Pointer check fails");
774 if (object
&& !check_object(s
, page
, object
, 0))
777 /* Success perform special debug activities for allocs */
778 if (s
->flags
& SLAB_STORE_USER
)
779 set_track(s
, object
, TRACK_ALLOC
, addr
);
780 trace(s
, page
, object
, 1);
781 init_object(s
, object
, 1);
788 static int free_debug_processing(struct kmem_cache
*s
,
789 void *object
, unsigned long addr
)
791 struct slqb_page
*page
;
792 page
= virt_to_head_slqb_page(object
);
794 if (!check_slab(s
, page
))
797 if (!check_valid_pointer(s
, page
, object
)) {
798 slab_err(s
, page
, "Invalid object pointer 0x%p", object
);
802 if (!check_object(s
, page
, object
, 1))
805 /* Special debug activities for freeing objects */
806 if (s
->flags
& SLAB_STORE_USER
)
807 set_track(s
, object
, TRACK_FREE
, addr
);
808 trace(s
, page
, object
, 0);
809 init_object(s
, object
, 0);
813 slab_fix(s
, "Object at 0x%p not freed", object
);
817 static int __init
setup_slqb_debug(char *str
)
819 slqb_debug
= DEBUG_DEFAULT_FLAGS
;
820 if (*str
++ != '=' || !*str
) {
822 * No options specified. Switch on full debugging.
829 * No options but restriction on slabs. This means full
830 * debugging for slabs matching a pattern.
838 * Switch off all debugging measures.
844 * Determine which debug features should be switched on
846 for (; *str
&& *str
!= ','; str
++) {
847 switch (tolower(*str
)) {
849 slqb_debug
|= SLAB_DEBUG_FREE
;
852 slqb_debug
|= SLAB_RED_ZONE
;
855 slqb_debug
|= SLAB_POISON
;
858 slqb_debug
|= SLAB_STORE_USER
;
861 slqb_debug
|= SLAB_TRACE
;
864 printk(KERN_ERR
"slqb_debug option '%c' "
865 "unknown. skipped\n", *str
);
871 slqb_debug_slabs
= str
+ 1;
875 __setup("slqb_debug", setup_slqb_debug
);
877 static int __init
setup_slqb_min_order(char *str
)
879 get_option(&str
, &slqb_min_order
);
880 slqb_min_order
= min(slqb_min_order
, MAX_ORDER
- 1);
884 __setup("slqb_min_order=", setup_slqb_min_order
);
886 static int __init
setup_slqb_min_objects(char *str
)
888 get_option(&str
, &slqb_min_objects
);
893 __setup("slqb_min_objects=", setup_slqb_min_objects
);
895 static unsigned long kmem_cache_flags(unsigned long objsize
,
896 unsigned long flags
, const char *name
,
897 void (*ctor
)(void *))
900 * Enable debugging if selected on the kernel commandline.
902 if (slqb_debug
&& (!slqb_debug_slabs
||
903 strncmp(slqb_debug_slabs
, name
,
904 strlen(slqb_debug_slabs
)) == 0))
907 if (num_possible_nodes() > 1)
913 static inline void setup_object_debug(struct kmem_cache
*s
,
914 struct slqb_page
*page
, void *object
)
918 static inline int alloc_debug_processing(struct kmem_cache
*s
,
919 void *object
, unsigned long addr
)
924 static inline int free_debug_processing(struct kmem_cache
*s
,
925 void *object
, unsigned long addr
)
930 static inline int slab_pad_check(struct kmem_cache
*s
, struct slqb_page
*page
)
935 static inline int check_object(struct kmem_cache
*s
, struct slqb_page
*page
,
936 void *object
, int active
)
941 static inline void add_full(struct kmem_cache_node
*n
, struct slqb_page
*page
)
945 static inline unsigned long kmem_cache_flags(unsigned long objsize
,
946 unsigned long flags
, const char *name
, void (*ctor
)(void *))
948 if (num_possible_nodes() > 1)
953 static const int slqb_debug
;
957 * allocate a new slab (return its corresponding struct slqb_page)
959 static struct slqb_page
*allocate_slab(struct kmem_cache
*s
,
960 gfp_t flags
, int node
)
962 struct slqb_page
*page
;
963 int pages
= 1 << s
->order
;
965 flags
|= s
->allocflags
;
967 page
= (struct slqb_page
*)alloc_pages_node(node
, flags
, s
->order
);
971 mod_zone_page_state(slqb_page_zone(page
),
972 (s
->flags
& SLAB_RECLAIM_ACCOUNT
) ?
973 NR_SLAB_RECLAIMABLE
: NR_SLAB_UNRECLAIMABLE
,
980 * Called once for each object on a new slab page
982 static void setup_object(struct kmem_cache
*s
,
983 struct slqb_page
*page
, void *object
)
985 setup_object_debug(s
, page
, object
);
986 if (unlikely(s
->ctor
))
991 * Allocate a new slab, set up its object list.
993 static struct slqb_page
*new_slab_page(struct kmem_cache
*s
,
994 gfp_t flags
, int node
, unsigned int colour
)
996 struct slqb_page
*page
;
1001 BUG_ON(flags
& GFP_SLAB_BUG_MASK
);
1003 page
= allocate_slab(s
,
1004 flags
& (GFP_RECLAIM_MASK
| GFP_CONSTRAINT_MASK
), node
);
1008 page
->flags
|= PG_SLQB_BIT
;
1010 start
= page_address(&page
->page
);
1012 if (unlikely(slab_poison(s
)))
1013 memset(start
, POISON_INUSE
, PAGE_SIZE
<< s
->order
);
1018 for_each_object(p
, s
, start
) {
1019 setup_object(s
, page
, p
);
1020 set_freepointer(s
, last
, p
);
1023 set_freepointer(s
, last
, NULL
);
1025 page
->freelist
= start
;
1032 * Free a slab page back to the page allocator
1034 static void __free_slab(struct kmem_cache
*s
, struct slqb_page
*page
)
1036 int pages
= 1 << s
->order
;
1038 if (unlikely(slab_debug(s
))) {
1041 slab_pad_check(s
, page
);
1042 for_each_free_object(p
, s
, page
->freelist
)
1043 check_object(s
, page
, p
, 0);
1046 mod_zone_page_state(slqb_page_zone(page
),
1047 (s
->flags
& SLAB_RECLAIM_ACCOUNT
) ?
1048 NR_SLAB_RECLAIMABLE
: NR_SLAB_UNRECLAIMABLE
,
1051 __free_slqb_pages(page
, s
->order
, pages
);
1054 static void rcu_free_slab(struct rcu_head
*h
)
1056 struct slqb_page
*page
;
1058 page
= container_of(h
, struct slqb_page
, rcu_head
);
1059 __free_slab(page
->list
->cache
, page
);
1062 static void free_slab(struct kmem_cache
*s
, struct slqb_page
*page
)
1064 VM_BUG_ON(page
->inuse
);
1065 if (unlikely(s
->flags
& SLAB_DESTROY_BY_RCU
))
1066 call_rcu(&page
->rcu_head
, rcu_free_slab
);
1068 __free_slab(s
, page
);
1072 * Return an object to its slab.
1074 * Caller must be the owner CPU in the case of per-CPU list, or hold the node's
1075 * list_lock in the case of per-node list.
1077 static int free_object_to_page(struct kmem_cache
*s
,
1078 struct kmem_cache_list
*l
, struct slqb_page
*page
,
1081 VM_BUG_ON(page
->list
!= l
);
1083 set_freepointer(s
, object
, page
->freelist
);
1084 page
->freelist
= object
;
1088 if (likely(s
->objects
> 1)) {
1090 list_del(&page
->lru
);
1094 slqb_stat_inc(l
, FLUSH_SLAB_FREE
);
1097 } else if (page
->inuse
+ 1 == s
->objects
) {
1099 list_add(&page
->lru
, &l
->partial
);
1100 slqb_stat_inc(l
, FLUSH_SLAB_PARTIAL
);
1107 static void slab_free_to_remote(struct kmem_cache
*s
, struct slqb_page
*page
,
1108 void *object
, struct kmem_cache_cpu
*c
);
1112 * Flush the LIFO list of objects on a list. They are sent back to their pages
1113 * in case the pages also belong to the list, or to our CPU's remote-free list
1114 * in the case they do not.
1116 * Doesn't flush the entire list. flush_free_list_all does.
1118 * Caller must be the owner CPU in the case of per-CPU list, or hold the node's
1119 * list_lock in the case of per-node list.
1121 static void flush_free_list(struct kmem_cache
*s
, struct kmem_cache_list
*l
)
1127 nr
= l
->freelist
.nr
;
1131 nr
= min(slab_freebatch(s
), nr
);
1133 slqb_stat_inc(l
, FLUSH_FREE_LIST
);
1134 slqb_stat_add(l
, FLUSH_FREE_LIST_OBJECTS
, nr
);
1136 l
->freelist
.nr
-= nr
;
1137 head
= l
->freelist
.head
;
1140 struct slqb_page
*page
;
1145 head
= get_freepointer(s
, object
);
1146 page
= virt_to_head_slqb_page(object
);
1149 if (page
->list
!= l
) {
1150 struct kmem_cache_cpu
*c
;
1153 spin_unlock(&l
->page_lock
);
1157 c
= get_cpu_slab(s
, smp_processor_id());
1159 slab_free_to_remote(s
, page
, object
, c
);
1160 slqb_stat_inc(l
, FLUSH_FREE_LIST_REMOTE
);
1165 spin_lock(&l
->page_lock
);
1168 free_object_to_page(s
, l
, page
, object
);
1175 spin_unlock(&l
->page_lock
);
1177 l
->freelist
.head
= head
;
1178 if (!l
->freelist
.nr
)
1179 l
->freelist
.tail
= NULL
;
1182 static void flush_free_list_all(struct kmem_cache
*s
, struct kmem_cache_list
*l
)
1184 while (l
->freelist
.nr
)
1185 flush_free_list(s
, l
);
1190 * If enough objects have been remotely freed back to this list,
1191 * remote_free_check will be set. In which case, we'll eventually come here
1192 * to take those objects off our remote_free list and onto our LIFO freelist.
1194 * Caller must be the owner CPU in the case of per-CPU list, or hold the node's
1195 * list_lock in the case of per-node list.
1197 static void claim_remote_free_list(struct kmem_cache
*s
,
1198 struct kmem_cache_list
*l
)
1200 void **head
, **tail
;
1203 if (!l
->remote_free
.list
.nr
)
1206 spin_lock(&l
->remote_free
.lock
);
1208 l
->remote_free_check
= 0;
1209 head
= l
->remote_free
.list
.head
;
1210 l
->remote_free
.list
.head
= NULL
;
1211 tail
= l
->remote_free
.list
.tail
;
1212 l
->remote_free
.list
.tail
= NULL
;
1213 nr
= l
->remote_free
.list
.nr
;
1214 l
->remote_free
.list
.nr
= 0;
1216 spin_unlock(&l
->remote_free
.lock
);
1220 if (!l
->freelist
.nr
) {
1221 /* Get head hot for likely subsequent allocation or flush */
1223 l
->freelist
.head
= head
;
1225 set_freepointer(s
, l
->freelist
.tail
, head
);
1226 l
->freelist
.tail
= tail
;
1228 l
->freelist
.nr
+= nr
;
1230 slqb_stat_inc(l
, CLAIM_REMOTE_LIST
);
1231 slqb_stat_add(l
, CLAIM_REMOTE_LIST_OBJECTS
, nr
);
1234 static inline void claim_remote_free_list(struct kmem_cache
*s
,
1235 struct kmem_cache_list
*l
)
1241 * Allocation fastpath. Get an object from the list's LIFO freelist, or
1242 * return NULL if it is empty.
1244 * Caller must be the owner CPU in the case of per-CPU list, or hold the node's
1245 * list_lock in the case of per-node list.
1247 static __always_inline
void *__cache_list_get_object(struct kmem_cache
*s
,
1248 struct kmem_cache_list
*l
)
1252 object
= l
->freelist
.head
;
1253 if (likely(object
)) {
1254 void *next
= get_freepointer(s
, object
);
1256 VM_BUG_ON(!l
->freelist
.nr
);
1258 l
->freelist
.head
= next
;
1262 VM_BUG_ON(l
->freelist
.nr
);
1265 if (unlikely(l
->remote_free_check
)) {
1266 claim_remote_free_list(s
, l
);
1268 if (l
->freelist
.nr
> slab_hiwater(s
))
1269 flush_free_list(s
, l
);
1271 /* repetition here helps gcc :( */
1272 object
= l
->freelist
.head
;
1273 if (likely(object
)) {
1274 void *next
= get_freepointer(s
, object
);
1276 VM_BUG_ON(!l
->freelist
.nr
);
1278 l
->freelist
.head
= next
;
1282 VM_BUG_ON(l
->freelist
.nr
);
1290 * Slow(er) path. Get a page from this list's existing pages. Will be a
1291 * new empty page in the case that __slab_alloc_page has just been called
1292 * (empty pages otherwise never get queued up on the lists), or a partial page
1293 * already on the list.
1295 * Caller must be the owner CPU in the case of per-CPU list, or hold the node's
1296 * list_lock in the case of per-node list.
1298 static noinline
void *__cache_list_get_page(struct kmem_cache
*s
,
1299 struct kmem_cache_list
*l
)
1301 struct slqb_page
*page
;
1304 if (unlikely(!l
->nr_partial
))
1307 page
= list_first_entry(&l
->partial
, struct slqb_page
, lru
);
1308 VM_BUG_ON(page
->inuse
== s
->objects
);
1309 if (page
->inuse
+ 1 == s
->objects
) {
1311 list_del(&page
->lru
);
1314 VM_BUG_ON(!page
->freelist
);
1318 object
= page
->freelist
;
1319 page
->freelist
= get_freepointer(s
, object
);
1321 prefetchw(page
->freelist
);
1322 VM_BUG_ON((page
->inuse
== s
->objects
) != (page
->freelist
== NULL
));
1323 slqb_stat_inc(l
, ALLOC_SLAB_FILL
);
1328 static void *cache_list_get_page(struct kmem_cache
*s
,
1329 struct kmem_cache_list
*l
)
1333 if (unlikely(!l
->nr_partial
))
1336 spin_lock(&l
->page_lock
);
1337 object
= __cache_list_get_page(s
, l
);
1338 spin_unlock(&l
->page_lock
);
1344 * Allocation slowpath. Allocate a new slab page from the page allocator, and
1345 * put it on the list's partial list. Must be followed by an allocation so
1346 * that we don't have dangling empty pages on the partial list.
1348 * Returns 0 on allocation failure.
1350 * Must be called with interrupts disabled.
1352 static noinline
void *__slab_alloc_page(struct kmem_cache
*s
,
1353 gfp_t gfpflags
, int node
)
1355 struct slqb_page
*page
;
1356 struct kmem_cache_list
*l
;
1357 struct kmem_cache_cpu
*c
;
1358 unsigned int colour
;
1361 c
= get_cpu_slab(s
, smp_processor_id());
1362 colour
= c
->colour_next
;
1363 c
->colour_next
+= s
->colour_off
;
1364 if (c
->colour_next
>= s
->colour_range
)
1367 /* Caller handles __GFP_ZERO */
1368 gfpflags
&= ~__GFP_ZERO
;
1370 if (gfpflags
& __GFP_WAIT
)
1372 page
= new_slab_page(s
, gfpflags
, node
, colour
);
1373 if (gfpflags
& __GFP_WAIT
)
1374 local_irq_disable();
1375 if (unlikely(!page
))
1378 if (!NUMA_BUILD
|| likely(slqb_page_to_nid(page
) == numa_node_id())) {
1379 struct kmem_cache_cpu
*c
;
1380 int cpu
= smp_processor_id();
1382 c
= get_cpu_slab(s
, cpu
);
1386 spin_lock(&l
->page_lock
);
1389 list_add(&page
->lru
, &l
->partial
);
1390 slqb_stat_inc(l
, ALLOC
);
1391 slqb_stat_inc(l
, ALLOC_SLAB_NEW
);
1392 object
= __cache_list_get_page(s
, l
);
1393 spin_unlock(&l
->page_lock
);
1396 struct kmem_cache_node
*n
;
1398 n
= s
->node_slab
[slqb_page_to_nid(page
)];
1402 spin_lock(&n
->list_lock
);
1403 spin_lock(&l
->page_lock
);
1406 list_add(&page
->lru
, &l
->partial
);
1407 slqb_stat_inc(l
, ALLOC
);
1408 slqb_stat_inc(l
, ALLOC_SLAB_NEW
);
1409 object
= __cache_list_get_page(s
, l
);
1410 spin_unlock(&l
->page_lock
);
1411 spin_unlock(&n
->list_lock
);
1419 static noinline
int alternate_nid(struct kmem_cache
*s
,
1420 gfp_t gfpflags
, int node
)
1422 if (in_interrupt() || (gfpflags
& __GFP_THISNODE
))
1424 if (cpuset_do_slab_mem_spread() && (s
->flags
& SLAB_MEM_SPREAD
))
1425 return cpuset_mem_spread_node();
1426 else if (current
->mempolicy
)
1427 return slab_node(current
->mempolicy
);
1432 * Allocate an object from a remote node. Return NULL if none could be found
1433 * (in which case, caller should allocate a new slab)
1435 * Must be called with interrupts disabled.
1437 static void *__remote_slab_alloc_node(struct kmem_cache
*s
,
1438 gfp_t gfpflags
, int node
)
1440 struct kmem_cache_node
*n
;
1441 struct kmem_cache_list
*l
;
1444 n
= s
->node_slab
[node
];
1445 if (unlikely(!n
)) /* node has no memory */
1449 spin_lock(&n
->list_lock
);
1451 object
= __cache_list_get_object(s
, l
);
1452 if (unlikely(!object
)) {
1453 object
= cache_list_get_page(s
, l
);
1454 if (unlikely(!object
)) {
1455 spin_unlock(&n
->list_lock
);
1456 return __slab_alloc_page(s
, gfpflags
, node
);
1460 slqb_stat_inc(l
, ALLOC
);
1461 spin_unlock(&n
->list_lock
);
1465 static noinline
void *__remote_slab_alloc(struct kmem_cache
*s
,
1466 gfp_t gfpflags
, int node
)
1469 struct zonelist
*zonelist
;
1472 enum zone_type high_zoneidx
= gfp_zone(gfpflags
);
1474 object
= __remote_slab_alloc_node(s
, gfpflags
, node
);
1475 if (likely(object
|| (gfpflags
& __GFP_THISNODE
)))
1478 zonelist
= node_zonelist(slab_node(current
->mempolicy
), gfpflags
);
1479 for_each_zone_zonelist(zone
, z
, zonelist
, high_zoneidx
) {
1480 if (!cpuset_zone_allowed_hardwall(zone
, gfpflags
))
1483 node
= zone_to_nid(zone
);
1484 object
= __remote_slab_alloc_node(s
, gfpflags
, node
);
1493 * Main allocation path. Return an object, or NULL on allocation failure.
1495 * Must be called with interrupts disabled.
1497 static __always_inline
void *__slab_alloc(struct kmem_cache
*s
,
1498 gfp_t gfpflags
, int node
)
1501 struct kmem_cache_cpu
*c
;
1502 struct kmem_cache_list
*l
;
1505 if (unlikely(node
!= -1) && unlikely(node
!= numa_node_id())) {
1507 return __remote_slab_alloc(s
, gfpflags
, node
);
1511 c
= get_cpu_slab(s
, smp_processor_id());
1514 object
= __cache_list_get_object(s
, l
);
1515 if (unlikely(!object
)) {
1517 int thisnode
= numa_node_id();
1520 * If the local node is memoryless, try remote alloc before
1521 * trying the page allocator. Otherwise, what happens is
1522 * objects are always freed to remote lists but the allocation
1523 * side always allocates a new page with only one object
1526 if (unlikely(!node_state(thisnode
, N_HIGH_MEMORY
)))
1527 object
= __remote_slab_alloc(s
, gfpflags
, thisnode
);
1531 object
= cache_list_get_page(s
, l
);
1532 if (unlikely(!object
)) {
1533 object
= __slab_alloc_page(s
, gfpflags
, node
);
1535 if (unlikely(!object
)) {
1536 node
= numa_node_id();
1545 slqb_stat_inc(l
, ALLOC
);
1550 * Perform some interrupts-on processing around the main allocation path
1551 * (debug checking and memset()ing).
1553 static __always_inline
void *slab_alloc(struct kmem_cache
*s
,
1554 gfp_t gfpflags
, int node
, unsigned long addr
)
1557 unsigned long flags
;
1559 gfpflags
&= gfp_allowed_mask
;
1561 lockdep_trace_alloc(gfpflags
);
1562 might_sleep_if(gfpflags
& __GFP_WAIT
);
1564 if (should_failslab(s
->objsize
, gfpflags
))
1568 local_irq_save(flags
);
1569 object
= __slab_alloc(s
, gfpflags
, node
);
1570 local_irq_restore(flags
);
1572 if (unlikely(slab_debug(s
)) && likely(object
)) {
1573 if (unlikely(!alloc_debug_processing(s
, object
, addr
)))
1577 if (unlikely(gfpflags
& __GFP_ZERO
) && likely(object
))
1578 memset(object
, 0, s
->objsize
);
1583 static __always_inline
void *__kmem_cache_alloc(struct kmem_cache
*s
,
1584 gfp_t gfpflags
, unsigned long caller
)
1589 if (unlikely(current
->flags
& (PF_SPREAD_SLAB
| PF_MEMPOLICY
)))
1590 node
= alternate_nid(s
, gfpflags
, node
);
1592 return slab_alloc(s
, gfpflags
, node
, caller
);
1595 void *kmem_cache_alloc(struct kmem_cache
*s
, gfp_t gfpflags
)
1597 return __kmem_cache_alloc(s
, gfpflags
, _RET_IP_
);
1599 EXPORT_SYMBOL(kmem_cache_alloc
);
1602 void *kmem_cache_alloc_node(struct kmem_cache
*s
, gfp_t gfpflags
, int node
)
1604 return slab_alloc(s
, gfpflags
, node
, _RET_IP_
);
1606 EXPORT_SYMBOL(kmem_cache_alloc_node
);
1611 * Flush this CPU's remote free list of objects back to the list from where
1612 * they originate. They end up on that list's remotely freed list, and
1613 * eventually we set it's remote_free_check if there are enough objects on it.
1615 * This seems convoluted, but it keeps is from stomping on the target CPU's
1616 * fastpath cachelines.
1618 * Must be called with interrupts disabled.
1620 static void flush_remote_free_cache(struct kmem_cache
*s
,
1621 struct kmem_cache_cpu
*c
)
1624 struct kmem_cache_list
*dst
;
1633 #ifdef CONFIG_SLQB_STATS
1635 struct kmem_cache_list
*l
= &c
->list
;
1637 slqb_stat_inc(l
, FLUSH_RFREE_LIST
);
1638 slqb_stat_add(l
, FLUSH_RFREE_LIST_OBJECTS
, nr
);
1642 dst
= c
->remote_cache_list
;
1645 * Less common case, dst is filling up so free synchronously.
1646 * No point in having remote CPU free thse as it will just
1647 * free them back to the page list anyway.
1649 if (unlikely(dst
->remote_free
.list
.nr
> (slab_hiwater(s
) >> 1))) {
1653 spin_lock(&dst
->page_lock
);
1655 struct slqb_page
*page
;
1660 head
= get_freepointer(s
, object
);
1661 page
= virt_to_head_slqb_page(object
);
1663 free_object_to_page(s
, dst
, page
, object
);
1666 spin_unlock(&dst
->page_lock
);
1675 spin_lock(&dst
->remote_free
.lock
);
1677 if (!dst
->remote_free
.list
.head
)
1678 dst
->remote_free
.list
.head
= src
->head
;
1680 set_freepointer(s
, dst
->remote_free
.list
.tail
, src
->head
);
1681 dst
->remote_free
.list
.tail
= src
->tail
;
1687 if (dst
->remote_free
.list
.nr
< slab_freebatch(s
))
1692 dst
->remote_free
.list
.nr
+= nr
;
1694 if (unlikely(dst
->remote_free
.list
.nr
>= slab_freebatch(s
) && set
))
1695 dst
->remote_free_check
= 1;
1697 spin_unlock(&dst
->remote_free
.lock
);
1701 * Free an object to this CPU's remote free list.
1703 * Must be called with interrupts disabled.
1705 static noinline
void slab_free_to_remote(struct kmem_cache
*s
,
1706 struct slqb_page
*page
, void *object
,
1707 struct kmem_cache_cpu
*c
)
1712 * Our remote free list corresponds to a different list. Must
1713 * flush it and switch.
1715 if (page
->list
!= c
->remote_cache_list
) {
1716 flush_remote_free_cache(s
, c
);
1717 c
->remote_cache_list
= page
->list
;
1724 set_freepointer(s
, r
->tail
, object
);
1725 set_freepointer(s
, object
, NULL
);
1729 if (unlikely(r
->nr
>= slab_freebatch(s
)))
1730 flush_remote_free_cache(s
, c
);
1735 * Main freeing path. Return an object, or NULL on allocation failure.
1737 * Must be called with interrupts disabled.
1739 static __always_inline
void __slab_free(struct kmem_cache
*s
,
1740 struct slqb_page
*page
, void *object
)
1742 struct kmem_cache_cpu
*c
;
1743 struct kmem_cache_list
*l
;
1744 int thiscpu
= smp_processor_id();
1746 c
= get_cpu_slab(s
, thiscpu
);
1749 slqb_stat_inc(l
, FREE
);
1751 if (!NUMA_BUILD
|| !slab_numa(s
) ||
1752 likely(slqb_page_to_nid(page
) == numa_node_id())) {
1754 * Freeing fastpath. Collects all local-node objects, not
1755 * just those allocated from our per-CPU list. This allows
1756 * fast transfer of objects from one CPU to another within
1759 set_freepointer(s
, object
, l
->freelist
.head
);
1760 l
->freelist
.head
= object
;
1761 if (!l
->freelist
.nr
)
1762 l
->freelist
.tail
= object
;
1765 if (unlikely(l
->freelist
.nr
> slab_hiwater(s
)))
1766 flush_free_list(s
, l
);
1771 * Freeing an object that was allocated on a remote node.
1773 slab_free_to_remote(s
, page
, object
, c
);
1774 slqb_stat_inc(l
, FREE_REMOTE
);
1780 * Perform some interrupts-on processing around the main freeing path
1783 static __always_inline
void slab_free(struct kmem_cache
*s
,
1784 struct slqb_page
*page
, void *object
)
1786 unsigned long flags
;
1790 debug_check_no_locks_freed(object
, s
->objsize
);
1791 if (likely(object
) && unlikely(slab_debug(s
))) {
1792 if (unlikely(!free_debug_processing(s
, object
, _RET_IP_
)))
1796 local_irq_save(flags
);
1797 __slab_free(s
, page
, object
);
1798 local_irq_restore(flags
);
1801 void kmem_cache_free(struct kmem_cache
*s
, void *object
)
1803 struct slqb_page
*page
= NULL
;
1806 page
= virt_to_head_slqb_page(object
);
1807 slab_free(s
, page
, object
);
1809 EXPORT_SYMBOL(kmem_cache_free
);
1812 * Calculate the order of allocation given an slab object size.
1814 * Order 0 allocations are preferred since order 0 does not cause fragmentation
1815 * in the page allocator, and they have fastpaths in the page allocator. But
1816 * also minimise external fragmentation with large objects.
1818 static int slab_order(int size
, int max_order
, int frac
)
1822 if (fls(size
- 1) <= PAGE_SHIFT
)
1825 order
= fls(size
- 1) - PAGE_SHIFT
;
1826 if (order
< slqb_min_order
)
1827 order
= slqb_min_order
;
1829 while (order
<= max_order
) {
1830 unsigned long slab_size
= PAGE_SIZE
<< order
;
1831 unsigned long objects
;
1832 unsigned long waste
;
1834 objects
= slab_size
/ size
;
1838 if (order
< MAX_ORDER
&& objects
< slqb_min_objects
) {
1840 * if we don't have enough objects for min_objects,
1841 * then try the next size up. Unless we have reached
1842 * our maximum possible page size.
1847 waste
= slab_size
- (objects
* size
);
1849 if (waste
* frac
<= slab_size
)
1859 static int calculate_order(int size
)
1864 * Attempt to find best configuration for a slab. This
1865 * works by first attempting to generate a layout with
1866 * the best configuration and backing off gradually.
1868 order
= slab_order(size
, 1, 4);
1873 * This size cannot fit in order-1. Allow bigger orders, but
1874 * forget about trying to save space.
1876 order
= slab_order(size
, MAX_ORDER
- 1, 0);
1877 if (order
< MAX_ORDER
)
1884 * Figure out what the alignment of the objects will be.
1886 static unsigned long calculate_alignment(unsigned long flags
,
1887 unsigned long align
, unsigned long size
)
1890 * If the user wants hardware cache aligned objects then follow that
1891 * suggestion if the object is sufficiently large.
1893 * The hardware cache alignment cannot override the specified
1894 * alignment though. If that is greater then use it.
1896 if (flags
& SLAB_HWCACHE_ALIGN
) {
1897 unsigned long ralign
= cache_line_size();
1899 while (size
<= ralign
/ 2)
1901 align
= max(align
, ralign
);
1904 if (align
< ARCH_SLAB_MINALIGN
)
1905 align
= ARCH_SLAB_MINALIGN
;
1907 return ALIGN(align
, sizeof(void *));
1910 static void init_kmem_cache_list(struct kmem_cache
*s
,
1911 struct kmem_cache_list
*l
)
1915 l
->freelist
.head
= NULL
;
1916 l
->freelist
.tail
= NULL
;
1919 INIT_LIST_HEAD(&l
->partial
);
1920 spin_lock_init(&l
->page_lock
);
1923 l
->remote_free_check
= 0;
1924 spin_lock_init(&l
->remote_free
.lock
);
1925 l
->remote_free
.list
.nr
= 0;
1926 l
->remote_free
.list
.head
= NULL
;
1927 l
->remote_free
.list
.tail
= NULL
;
1930 #ifdef CONFIG_SLQB_STATS
1931 memset(l
->stats
, 0, sizeof(l
->stats
));
1935 static void init_kmem_cache_cpu(struct kmem_cache
*s
,
1936 struct kmem_cache_cpu
*c
)
1938 init_kmem_cache_list(s
, &c
->list
);
1943 c
->rlist
.head
= NULL
;
1944 c
->rlist
.tail
= NULL
;
1945 c
->remote_cache_list
= NULL
;
1950 static void init_kmem_cache_node(struct kmem_cache
*s
,
1951 struct kmem_cache_node
*n
)
1953 spin_lock_init(&n
->list_lock
);
1954 init_kmem_cache_list(s
, &n
->list
);
1958 /* Initial slabs. */
1960 static DEFINE_PER_CPU(struct kmem_cache_cpu
, kmem_cache_cpus
);
1963 /* XXX: really need a DEFINE_PER_NODE for per-node data because a static
1964 * array is wasteful */
1965 static struct kmem_cache_node kmem_cache_nodes
[MAX_NUMNODES
];
1969 static struct kmem_cache kmem_cpu_cache
;
1970 static DEFINE_PER_CPU(struct kmem_cache_cpu
, kmem_cpu_cpus
);
1972 static struct kmem_cache_node kmem_cpu_nodes
[MAX_NUMNODES
]; /* XXX per-nid */
1977 static struct kmem_cache kmem_node_cache
;
1979 static DEFINE_PER_CPU(struct kmem_cache_cpu
, kmem_node_cpus
);
1981 static struct kmem_cache_node kmem_node_nodes
[MAX_NUMNODES
]; /*XXX per-nid */
1985 static struct kmem_cache_cpu
*alloc_kmem_cache_cpu(struct kmem_cache
*s
,
1988 struct kmem_cache_cpu
*c
;
1991 node
= cpu_to_node(cpu
);
1993 c
= kmem_cache_alloc_node(&kmem_cpu_cache
, GFP_KERNEL
, node
);
1997 init_kmem_cache_cpu(s
, c
);
2001 static void free_kmem_cache_cpus(struct kmem_cache
*s
)
2005 for_each_online_cpu(cpu
) {
2006 struct kmem_cache_cpu
*c
;
2008 c
= s
->cpu_slab
[cpu
];
2010 kmem_cache_free(&kmem_cpu_cache
, c
);
2011 s
->cpu_slab
[cpu
] = NULL
;
2016 static int alloc_kmem_cache_cpus(struct kmem_cache
*s
)
2020 for_each_online_cpu(cpu
) {
2021 struct kmem_cache_cpu
*c
;
2023 c
= s
->cpu_slab
[cpu
];
2027 c
= alloc_kmem_cache_cpu(s
, cpu
);
2029 free_kmem_cache_cpus(s
);
2032 s
->cpu_slab
[cpu
] = c
;
2038 static inline void free_kmem_cache_cpus(struct kmem_cache
*s
)
2042 static inline int alloc_kmem_cache_cpus(struct kmem_cache
*s
)
2044 init_kmem_cache_cpu(s
, &s
->cpu_slab
);
2050 static void free_kmem_cache_nodes(struct kmem_cache
*s
)
2054 for_each_node_state(node
, N_NORMAL_MEMORY
) {
2055 struct kmem_cache_node
*n
;
2057 n
= s
->node_slab
[node
];
2059 kmem_cache_free(&kmem_node_cache
, n
);
2060 s
->node_slab
[node
] = NULL
;
2065 static int alloc_kmem_cache_nodes(struct kmem_cache
*s
)
2069 for_each_node_state(node
, N_NORMAL_MEMORY
) {
2070 struct kmem_cache_node
*n
;
2072 n
= kmem_cache_alloc_node(&kmem_node_cache
, GFP_KERNEL
, node
);
2074 free_kmem_cache_nodes(s
);
2077 init_kmem_cache_node(s
, n
);
2078 s
->node_slab
[node
] = n
;
2083 static void free_kmem_cache_nodes(struct kmem_cache
*s
)
2087 static int alloc_kmem_cache_nodes(struct kmem_cache
*s
)
2094 * calculate_sizes() determines the order and the distribution of data within
2097 static int calculate_sizes(struct kmem_cache
*s
)
2099 unsigned long flags
= s
->flags
;
2100 unsigned long size
= s
->objsize
;
2101 unsigned long align
= s
->align
;
2104 * Determine if we can poison the object itself. If the user of
2105 * the slab may touch the object after free or before allocation
2106 * then we should never poison the object itself.
2108 if (slab_poison(s
) && !(flags
& SLAB_DESTROY_BY_RCU
) && !s
->ctor
)
2109 s
->flags
|= __OBJECT_POISON
;
2111 s
->flags
&= ~__OBJECT_POISON
;
2114 * Round up object size to the next word boundary. We can only
2115 * place the free pointer at word boundaries and this determines
2116 * the possible location of the free pointer.
2118 size
= ALIGN(size
, sizeof(void *));
2120 #ifdef CONFIG_SLQB_DEBUG
2122 * If we are Redzoning then check if there is some space between the
2123 * end of the object and the free pointer. If not then add an
2124 * additional word to have some bytes to store Redzone information.
2126 if ((flags
& SLAB_RED_ZONE
) && size
== s
->objsize
)
2127 size
+= sizeof(void *);
2131 * With that we have determined the number of bytes in actual use
2132 * by the object. This is the potential offset to the free pointer.
2136 if (((flags
& (SLAB_DESTROY_BY_RCU
| SLAB_POISON
)) || s
->ctor
)) {
2138 * Relocate free pointer after the object if it is not
2139 * permitted to overwrite the first word of the object on
2142 * This is the case if we do RCU, have a constructor or
2143 * destructor or are poisoning the objects.
2146 size
+= sizeof(void *);
2149 #ifdef CONFIG_SLQB_DEBUG
2150 if (flags
& SLAB_STORE_USER
) {
2152 * Need to store information about allocs and frees after
2155 size
+= 2 * sizeof(struct track
);
2158 if (flags
& SLAB_RED_ZONE
) {
2160 * Add some empty padding so that we can catch
2161 * overwrites from earlier objects rather than let
2162 * tracking information or the free pointer be
2163 * corrupted if an user writes before the start
2166 size
+= sizeof(void *);
2171 * Determine the alignment based on various parameters that the
2172 * user specified and the dynamic determination of cache line size
2175 align
= calculate_alignment(flags
, align
, s
->objsize
);
2178 * SLQB stores one object immediately after another beginning from
2179 * offset 0. In order to align the objects we have to simply size
2180 * each object to conform to the alignment.
2182 size
= ALIGN(size
, align
);
2184 s
->order
= calculate_order(size
);
2191 s
->allocflags
|= __GFP_COMP
;
2193 if (s
->flags
& SLAB_CACHE_DMA
)
2194 s
->allocflags
|= SLQB_DMA
;
2196 if (s
->flags
& SLAB_RECLAIM_ACCOUNT
)
2197 s
->allocflags
|= __GFP_RECLAIMABLE
;
2200 * Determine the number of objects per slab
2202 s
->objects
= (PAGE_SIZE
<< s
->order
) / size
;
2204 s
->freebatch
= max(4UL*PAGE_SIZE
/ size
,
2205 min(256UL, 64*PAGE_SIZE
/ size
));
2208 s
->hiwater
= s
->freebatch
<< 2;
2210 return !!s
->objects
;
2216 * Per-cpu allocator can't be used because it always uses slab allocator,
2217 * and it can't do per-node allocations.
2219 static void *kmem_cache_dyn_array_alloc(int ids
)
2221 size_t size
= sizeof(void *) * ids
;
2225 if (unlikely(!slab_is_available())) {
2226 static void *nextmem
;
2227 static size_t nextleft
;
2231 * Special case for setting up initial caches. These will
2232 * never get freed by definition so we can do it rather
2235 if (size
> nextleft
) {
2236 nextmem
= alloc_pages_exact(size
, GFP_KERNEL
);
2239 nextleft
= roundup(size
, PAGE_SIZE
);
2245 memset(ret
, 0, size
);
2248 return kzalloc(size
, GFP_KERNEL
);
2252 static void kmem_cache_dyn_array_free(void *array
)
2254 if (unlikely(!slab_is_available()))
2255 return; /* error case without crashing here (will panic soon) */
2261 * Except in early boot, this should be called with slqb_lock held for write
2262 * to lock out hotplug, and protect list modifications.
2264 static int kmem_cache_open(struct kmem_cache
*s
,
2265 const char *name
, size_t size
, size_t align
,
2266 unsigned long flags
, void (*ctor
)(void *), int alloc
)
2268 unsigned int left_over
;
2270 memset(s
, 0, sizeof(struct kmem_cache
));
2275 s
->flags
= kmem_cache_flags(size
, flags
, name
, ctor
);
2277 if (!calculate_sizes(s
))
2280 if (!slab_debug(s
)) {
2281 left_over
= (PAGE_SIZE
<< s
->order
) - (s
->objects
* s
->size
);
2282 s
->colour_off
= max(cache_line_size(), s
->align
);
2283 s
->colour_range
= left_over
;
2286 s
->colour_range
= 0;
2290 s
->cpu_slab
= kmem_cache_dyn_array_alloc(nr_cpu_ids
);
2294 s
->node_slab
= kmem_cache_dyn_array_alloc(nr_node_ids
);
2296 goto error_cpu_array
;
2300 if (likely(alloc
)) {
2301 if (!alloc_kmem_cache_nodes(s
))
2302 goto error_node_array
;
2304 if (!alloc_kmem_cache_cpus(s
))
2309 list_add(&s
->list
, &slab_caches
);
2314 free_kmem_cache_nodes(s
);
2316 #if defined(CONFIG_NUMA) && defined(CONFIG_SMP)
2317 kmem_cache_dyn_array_free(s
->node_slab
);
2321 kmem_cache_dyn_array_free(s
->cpu_slab
);
2324 if (flags
& SLAB_PANIC
)
2325 panic("%s: failed to create slab `%s'\n", __func__
, name
);
2330 * kmem_ptr_validate - check if an untrusted pointer might be a slab entry.
2331 * @s: the cache we're checking against
2332 * @ptr: pointer to validate
2334 * This verifies that the untrusted pointer looks sane;
2335 * it is _not_ a guarantee that the pointer is actually
2336 * part of the slab cache in question, but it at least
2337 * validates that the pointer can be dereferenced and
2338 * looks half-way sane.
2340 * Currently only used for dentry validation.
2342 int kmem_ptr_validate(struct kmem_cache
*s
, const void *ptr
)
2344 unsigned long addr
= (unsigned long)ptr
;
2345 struct slqb_page
*page
;
2347 if (unlikely(addr
< PAGE_OFFSET
))
2349 if (unlikely(addr
> (unsigned long)high_memory
- s
->size
))
2351 if (unlikely(!IS_ALIGNED(addr
, s
->align
)))
2353 if (unlikely(!kern_addr_valid(addr
)))
2355 if (unlikely(!kern_addr_valid(addr
+ s
->size
- 1)))
2357 if (unlikely(!pfn_valid(addr
>> PAGE_SHIFT
)))
2359 page
= virt_to_head_slqb_page(ptr
);
2360 if (unlikely(!(page
->flags
& PG_SLQB_BIT
)))
2362 if (unlikely(page
->list
->cache
!= s
)) /* XXX: ouch, racy */
2368 EXPORT_SYMBOL(kmem_ptr_validate
);
2371 * Determine the size of a slab object
2373 unsigned int kmem_cache_size(struct kmem_cache
*s
)
2377 EXPORT_SYMBOL(kmem_cache_size
);
2379 const char *kmem_cache_name(struct kmem_cache
*s
)
2383 EXPORT_SYMBOL(kmem_cache_name
);
2386 * Release all resources used by a slab cache. No more concurrency on the
2387 * slab, so we can touch remote kmem_cache_cpu structures.
2389 void kmem_cache_destroy(struct kmem_cache
*s
)
2396 down_write(&slqb_lock
);
2399 local_irq_disable();
2401 for_each_online_cpu(cpu
) {
2402 struct kmem_cache_cpu
*c
= get_cpu_slab(s
, cpu
);
2403 struct kmem_cache_list
*l
= &c
->list
;
2405 flush_free_list_all(s
, l
);
2406 flush_remote_free_cache(s
, c
);
2410 for_each_online_cpu(cpu
) {
2411 struct kmem_cache_cpu
*c
= get_cpu_slab(s
, cpu
);
2412 struct kmem_cache_list
*l
= &c
->list
;
2414 claim_remote_free_list(s
, l
);
2415 flush_free_list_all(s
, l
);
2417 WARN_ON(l
->freelist
.nr
);
2418 WARN_ON(l
->nr_slabs
);
2419 WARN_ON(l
->nr_partial
);
2422 free_kmem_cache_cpus(s
);
2425 for_each_node_state(node
, N_NORMAL_MEMORY
) {
2426 struct kmem_cache_node
*n
;
2427 struct kmem_cache_list
*l
;
2429 n
= s
->node_slab
[node
];
2434 claim_remote_free_list(s
, l
);
2435 flush_free_list_all(s
, l
);
2437 WARN_ON(l
->freelist
.nr
);
2438 WARN_ON(l
->nr_slabs
);
2439 WARN_ON(l
->nr_partial
);
2442 free_kmem_cache_nodes(s
);
2446 sysfs_slab_remove(s
);
2447 up_write(&slqb_lock
);
2449 EXPORT_SYMBOL(kmem_cache_destroy
);
2451 /********************************************************************
2453 *******************************************************************/
2455 struct kmem_cache kmalloc_caches
[KMALLOC_SHIFT_SLQB_HIGH
+ 1] __cacheline_aligned
;
2456 EXPORT_SYMBOL(kmalloc_caches
);
2458 #ifdef CONFIG_ZONE_DMA
2459 struct kmem_cache kmalloc_caches_dma
[KMALLOC_SHIFT_SLQB_HIGH
+ 1] __cacheline_aligned
;
2460 EXPORT_SYMBOL(kmalloc_caches_dma
);
2463 #ifndef ARCH_KMALLOC_FLAGS
2464 #define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN
2467 static struct kmem_cache
*open_kmalloc_cache(struct kmem_cache
*s
,
2468 const char *name
, int size
, gfp_t gfp_flags
)
2470 unsigned int flags
= ARCH_KMALLOC_FLAGS
| SLAB_PANIC
;
2472 if (gfp_flags
& SLQB_DMA
)
2473 flags
|= SLAB_CACHE_DMA
;
2475 kmem_cache_open(s
, name
, size
, ARCH_KMALLOC_MINALIGN
, flags
, NULL
, 1);
2481 * Conversion table for small slabs sizes / 8 to the index in the
2482 * kmalloc array. This is necessary for slabs < 192 since we have non power
2483 * of two cache sizes there. The size of larger slabs can be determined using
2486 static s8 size_index
[24] __cacheline_aligned
= {
2495 #if L1_CACHE_BYTES < 64
2510 #if L1_CACHE_BYTES < 128
2531 static struct kmem_cache
*get_slab(size_t size
, gfp_t flags
)
2535 if (unlikely(size
<= KMALLOC_MIN_SIZE
)) {
2536 if (unlikely(!size
))
2537 return ZERO_SIZE_PTR
;
2539 index
= KMALLOC_SHIFT_LOW
;
2543 #if L1_CACHE_BYTES >= 128
2548 index
= size_index
[(size
- 1) / 8];
2550 if (unlikely(size
> 1UL << KMALLOC_SHIFT_SLQB_HIGH
))
2553 index
= fls(size
- 1);
2557 if (unlikely((flags
& SLQB_DMA
)))
2558 return &kmalloc_caches_dma
[index
];
2560 return &kmalloc_caches
[index
];
2563 void *__kmalloc(size_t size
, gfp_t flags
)
2565 struct kmem_cache
*s
;
2567 s
= get_slab(size
, flags
);
2568 if (unlikely(ZERO_OR_NULL_PTR(s
)))
2571 return __kmem_cache_alloc(s
, flags
, _RET_IP_
);
2573 EXPORT_SYMBOL(__kmalloc
);
2576 void *__kmalloc_node(size_t size
, gfp_t flags
, int node
)
2578 struct kmem_cache
*s
;
2580 s
= get_slab(size
, flags
);
2581 if (unlikely(ZERO_OR_NULL_PTR(s
)))
2584 return kmem_cache_alloc_node(s
, flags
, node
);
2586 EXPORT_SYMBOL(__kmalloc_node
);
2589 size_t ksize(const void *object
)
2591 struct slqb_page
*page
;
2592 struct kmem_cache
*s
;
2595 if (unlikely(object
== ZERO_SIZE_PTR
))
2598 page
= virt_to_head_slqb_page(object
);
2599 BUG_ON(!(page
->flags
& PG_SLQB_BIT
));
2601 s
= page
->list
->cache
;
2604 * Debugging requires use of the padding between object
2605 * and whatever may come after it.
2607 if (s
->flags
& (SLAB_RED_ZONE
| SLAB_POISON
))
2611 * If we have the need to store the freelist pointer
2612 * back there or track user information then we can
2613 * only use the space before that information.
2615 if (s
->flags
& (SLAB_DESTROY_BY_RCU
| SLAB_STORE_USER
))
2619 * Else we can use all the padding etc for the allocation
2623 EXPORT_SYMBOL(ksize
);
2625 void kfree(const void *object
)
2627 struct kmem_cache
*s
;
2628 struct slqb_page
*page
;
2630 if (unlikely(ZERO_OR_NULL_PTR(object
)))
2633 page
= virt_to_head_slqb_page(object
);
2634 s
= page
->list
->cache
;
2636 slab_free(s
, page
, (void *)object
);
2638 EXPORT_SYMBOL(kfree
);
2640 static void kmem_cache_trim_percpu(void *arg
)
2642 int cpu
= smp_processor_id();
2643 struct kmem_cache
*s
= arg
;
2644 struct kmem_cache_cpu
*c
= get_cpu_slab(s
, cpu
);
2645 struct kmem_cache_list
*l
= &c
->list
;
2647 claim_remote_free_list(s
, l
);
2648 flush_free_list(s
, l
);
2650 flush_remote_free_cache(s
, c
);
2654 int kmem_cache_shrink(struct kmem_cache
*s
)
2660 on_each_cpu(kmem_cache_trim_percpu
, s
, 1);
2663 for_each_node_state(node
, N_NORMAL_MEMORY
) {
2664 struct kmem_cache_node
*n
;
2665 struct kmem_cache_list
*l
;
2667 n
= s
->node_slab
[node
];
2672 spin_lock_irq(&n
->list_lock
);
2673 claim_remote_free_list(s
, l
);
2674 flush_free_list(s
, l
);
2675 spin_unlock_irq(&n
->list_lock
);
2681 EXPORT_SYMBOL(kmem_cache_shrink
);
2683 #if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
2684 static void kmem_cache_reap_percpu(void *arg
)
2686 int cpu
= smp_processor_id();
2687 struct kmem_cache
*s
;
2688 long phase
= (long)arg
;
2690 list_for_each_entry(s
, &slab_caches
, list
) {
2691 struct kmem_cache_cpu
*c
= get_cpu_slab(s
, cpu
);
2692 struct kmem_cache_list
*l
= &c
->list
;
2695 flush_free_list_all(s
, l
);
2696 flush_remote_free_cache(s
, c
);
2700 claim_remote_free_list(s
, l
);
2701 flush_free_list_all(s
, l
);
2706 static void kmem_cache_reap(void)
2708 struct kmem_cache
*s
;
2711 down_read(&slqb_lock
);
2712 on_each_cpu(kmem_cache_reap_percpu
, (void *)0, 1);
2713 on_each_cpu(kmem_cache_reap_percpu
, (void *)1, 1);
2715 list_for_each_entry(s
, &slab_caches
, list
) {
2716 for_each_node_state(node
, N_NORMAL_MEMORY
) {
2717 struct kmem_cache_node
*n
;
2718 struct kmem_cache_list
*l
;
2720 n
= s
->node_slab
[node
];
2725 spin_lock_irq(&n
->list_lock
);
2726 claim_remote_free_list(s
, l
);
2727 flush_free_list_all(s
, l
);
2728 spin_unlock_irq(&n
->list_lock
);
2731 up_read(&slqb_lock
);
2735 static void cache_trim_worker(struct work_struct
*w
)
2737 struct delayed_work
*work
=
2738 container_of(w
, struct delayed_work
, work
);
2739 struct kmem_cache
*s
;
2741 if (!down_read_trylock(&slqb_lock
))
2744 list_for_each_entry(s
, &slab_caches
, list
) {
2746 int node
= numa_node_id();
2747 struct kmem_cache_node
*n
= s
->node_slab
[node
];
2750 struct kmem_cache_list
*l
= &n
->list
;
2752 spin_lock_irq(&n
->list_lock
);
2753 claim_remote_free_list(s
, l
);
2754 flush_free_list(s
, l
);
2755 spin_unlock_irq(&n
->list_lock
);
2759 local_irq_disable();
2760 kmem_cache_trim_percpu(s
);
2764 up_read(&slqb_lock
);
2766 schedule_delayed_work(work
, round_jiffies_relative(3*HZ
));
2769 static DEFINE_PER_CPU(struct delayed_work
, cache_trim_work
);
2771 static void __cpuinit
start_cpu_timer(int cpu
)
2773 struct delayed_work
*cache_trim_work
= &per_cpu(cache_trim_work
, cpu
);
2776 * When this gets called from do_initcalls via cpucache_init(),
2777 * init_workqueues() has already run, so keventd will be setup
2780 if (keventd_up() && cache_trim_work
->work
.func
== NULL
) {
2781 INIT_DELAYED_WORK(cache_trim_work
, cache_trim_worker
);
2782 schedule_delayed_work_on(cpu
, cache_trim_work
,
2783 __round_jiffies_relative(HZ
, cpu
));
2787 static int __init
cpucache_init(void)
2791 for_each_online_cpu(cpu
)
2792 start_cpu_timer(cpu
);
2796 device_initcall(cpucache_init
);
2798 #if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
2799 static void slab_mem_going_offline_callback(void *arg
)
2804 static void slab_mem_offline_callback(void *arg
)
2806 /* XXX: should release structures, see CPU offline comment */
2809 static int slab_mem_going_online_callback(void *arg
)
2811 struct kmem_cache
*s
;
2812 struct kmem_cache_node
*n
;
2813 struct memory_notify
*marg
= arg
;
2814 int nid
= marg
->status_change_nid
;
2818 * If the node's memory is already available, then kmem_cache_node is
2819 * already created. Nothing to do.
2825 * We are bringing a node online. No memory is availabe yet. We must
2826 * allocate a kmem_cache_node structure in order to bring the node
2829 down_write(&slqb_lock
);
2830 list_for_each_entry(s
, &slab_caches
, list
) {
2832 * XXX: kmem_cache_alloc_node will fallback to other nodes
2833 * since memory is not yet available from the node that
2836 if (s
->node_slab
[nid
]) /* could be lefover from last online */
2838 n
= kmem_cache_alloc(&kmem_node_cache
, GFP_KERNEL
);
2843 init_kmem_cache_node(s
, n
);
2844 s
->node_slab
[nid
] = n
;
2847 up_write(&slqb_lock
);
2851 static int slab_memory_callback(struct notifier_block
*self
,
2852 unsigned long action
, void *arg
)
2857 case MEM_GOING_ONLINE
:
2858 ret
= slab_mem_going_online_callback(arg
);
2860 case MEM_GOING_OFFLINE
:
2861 slab_mem_going_offline_callback(arg
);
2864 case MEM_CANCEL_ONLINE
:
2865 slab_mem_offline_callback(arg
);
2868 case MEM_CANCEL_OFFLINE
:
2873 ret
= notifier_from_errno(ret
);
2879 #endif /* CONFIG_MEMORY_HOTPLUG */
2881 /********************************************************************
2882 * Basic setup of slabs
2883 *******************************************************************/
2885 void __init
kmem_cache_init(void)
2888 unsigned int flags
= SLAB_HWCACHE_ALIGN
|SLAB_PANIC
;
2891 * All the ifdefs are rather ugly here, but it's just the setup code,
2892 * so it doesn't have to be too readable :)
2896 * No need to take slqb_lock here: there should be no concurrency
2897 * anyway, and spin_unlock_irq in rwsem code could enable interrupts
2900 kmem_cache_open(&kmem_cache_cache
, "kmem_cache",
2901 sizeof(struct kmem_cache
), 0, flags
, NULL
, 0);
2903 kmem_cache_open(&kmem_cpu_cache
, "kmem_cache_cpu",
2904 sizeof(struct kmem_cache_cpu
), 0, flags
, NULL
, 0);
2907 kmem_cache_open(&kmem_node_cache
, "kmem_cache_node",
2908 sizeof(struct kmem_cache_node
), 0, flags
, NULL
, 0);
2912 for_each_possible_cpu(i
) {
2913 struct kmem_cache_cpu
*c
;
2915 c
= &per_cpu(kmem_cache_cpus
, i
);
2916 init_kmem_cache_cpu(&kmem_cache_cache
, c
);
2917 kmem_cache_cache
.cpu_slab
[i
] = c
;
2919 c
= &per_cpu(kmem_cpu_cpus
, i
);
2920 init_kmem_cache_cpu(&kmem_cpu_cache
, c
);
2921 kmem_cpu_cache
.cpu_slab
[i
] = c
;
2924 c
= &per_cpu(kmem_node_cpus
, i
);
2925 init_kmem_cache_cpu(&kmem_node_cache
, c
);
2926 kmem_node_cache
.cpu_slab
[i
] = c
;
2930 init_kmem_cache_cpu(&kmem_cache_cache
, &kmem_cache_cache
.cpu_slab
);
2934 for_each_node_state(i
, N_NORMAL_MEMORY
) {
2935 struct kmem_cache_node
*n
;
2937 n
= &kmem_cache_nodes
[i
];
2938 init_kmem_cache_node(&kmem_cache_cache
, n
);
2939 kmem_cache_cache
.node_slab
[i
] = n
;
2941 n
= &kmem_cpu_nodes
[i
];
2942 init_kmem_cache_node(&kmem_cpu_cache
, n
);
2943 kmem_cpu_cache
.node_slab
[i
] = n
;
2945 n
= &kmem_node_nodes
[i
];
2946 init_kmem_cache_node(&kmem_node_cache
, n
);
2947 kmem_node_cache
.node_slab
[i
] = n
;
2951 /* Caches that are not of the two-to-the-power-of size */
2952 if (L1_CACHE_BYTES
< 64 && KMALLOC_MIN_SIZE
<= 64) {
2953 open_kmalloc_cache(&kmalloc_caches
[1],
2954 "kmalloc-96", 96, GFP_KERNEL
);
2955 #ifdef CONFIG_ZONE_DMA
2956 open_kmalloc_cache(&kmalloc_caches_dma
[1],
2957 "kmalloc_dma-96", 96, GFP_KERNEL
|SLQB_DMA
);
2960 if (L1_CACHE_BYTES
< 128 && KMALLOC_MIN_SIZE
<= 128) {
2961 open_kmalloc_cache(&kmalloc_caches
[2],
2962 "kmalloc-192", 192, GFP_KERNEL
);
2963 #ifdef CONFIG_ZONE_DMA
2964 open_kmalloc_cache(&kmalloc_caches_dma
[2],
2965 "kmalloc_dma-192", 192, GFP_KERNEL
|SLQB_DMA
);
2969 for (i
= KMALLOC_SHIFT_LOW
; i
<= KMALLOC_SHIFT_SLQB_HIGH
; i
++) {
2970 open_kmalloc_cache(&kmalloc_caches
[i
],
2971 "kmalloc", 1 << i
, GFP_KERNEL
);
2972 #ifdef CONFIG_ZONE_DMA
2973 open_kmalloc_cache(&kmalloc_caches_dma
[i
],
2974 "kmalloc_dma", 1 << i
, GFP_KERNEL
|SLQB_DMA
);
2979 * Patch up the size_index table if we have strange large alignment
2980 * requirements for the kmalloc array. This is only the case for
2981 * mips it seems. The standard arches will not generate any code here.
2983 * Largest permitted alignment is 256 bytes due to the way we
2984 * handle the index determination for the smaller caches.
2986 * Make sure that nothing crazy happens if someone starts tinkering
2987 * around with ARCH_KMALLOC_MINALIGN
2989 BUILD_BUG_ON(KMALLOC_MIN_SIZE
> 256 ||
2990 (KMALLOC_MIN_SIZE
& (KMALLOC_MIN_SIZE
- 1)));
2992 for (i
= 8; i
< KMALLOC_MIN_SIZE
; i
+= 8)
2993 size_index
[(i
- 1) / 8] = KMALLOC_SHIFT_LOW
;
2995 /* Provide the correct kmalloc names now that the caches are up */
2996 for (i
= KMALLOC_SHIFT_LOW
; i
<= KMALLOC_SHIFT_SLQB_HIGH
; i
++) {
2997 kmalloc_caches
[i
].name
=
2998 kasprintf(GFP_KERNEL
, "kmalloc-%d", 1 << i
);
2999 #ifdef CONFIG_ZONE_DMA
3000 kmalloc_caches_dma
[i
].name
=
3001 kasprintf(GFP_KERNEL
, "kmalloc_dma-%d", 1 << i
);
3006 register_cpu_notifier(&slab_notifier
);
3009 hotplug_memory_notifier(slab_memory_callback
, 1);
3012 * smp_init() has not yet been called, so no worries about memory
3013 * ordering with __slab_is_available.
3015 __slab_is_available
= 1;
3018 void __init
kmem_cache_init_late(void)
3023 * Some basic slab creation sanity checks
3025 static int kmem_cache_create_ok(const char *name
, size_t size
,
3026 size_t align
, unsigned long flags
)
3028 struct kmem_cache
*tmp
;
3031 * Sanity checks... these are all serious usage bugs.
3033 if (!name
|| in_interrupt() || (size
< sizeof(void *))) {
3034 printk(KERN_ERR
"kmem_cache_create(): early error in slab %s\n",
3041 list_for_each_entry(tmp
, &slab_caches
, list
) {
3046 * This happens when the module gets unloaded and doesn't
3047 * destroy its slab cache and no-one else reuses the vmalloc
3048 * area of the module. Print a warning.
3050 res
= probe_kernel_address(tmp
->name
, x
);
3053 "SLAB: cache with size %d has lost its name\n",
3058 if (!strcmp(tmp
->name
, name
)) {
3060 "SLAB: duplicate cache %s\n", name
);
3067 WARN_ON(strchr(name
, ' ')); /* It confuses parsers */
3068 if (flags
& SLAB_DESTROY_BY_RCU
)
3069 WARN_ON(flags
& SLAB_POISON
);
3074 struct kmem_cache
*kmem_cache_create(const char *name
, size_t size
,
3075 size_t align
, unsigned long flags
, void (*ctor
)(void *))
3077 struct kmem_cache
*s
;
3079 down_write(&slqb_lock
);
3080 if (!kmem_cache_create_ok(name
, size
, align
, flags
))
3083 s
= kmem_cache_alloc(&kmem_cache_cache
, GFP_KERNEL
);
3087 if (kmem_cache_open(s
, name
, size
, align
, flags
, ctor
, 1)) {
3088 up_write(&slqb_lock
);
3092 kmem_cache_free(&kmem_cache_cache
, s
);
3095 up_write(&slqb_lock
);
3096 if (flags
& SLAB_PANIC
)
3097 panic("%s: failed to create slab `%s'\n", __func__
, name
);
3101 EXPORT_SYMBOL(kmem_cache_create
);
3105 * Use the cpu notifier to insure that the cpu slabs are flushed when
3108 static int __cpuinit
slab_cpuup_callback(struct notifier_block
*nfb
,
3109 unsigned long action
, void *hcpu
)
3111 long cpu
= (long)hcpu
;
3112 struct kmem_cache
*s
;
3115 case CPU_UP_PREPARE
:
3116 case CPU_UP_PREPARE_FROZEN
:
3117 down_write(&slqb_lock
);
3118 list_for_each_entry(s
, &slab_caches
, list
) {
3119 if (s
->cpu_slab
[cpu
]) /* could be lefover last online */
3121 s
->cpu_slab
[cpu
] = alloc_kmem_cache_cpu(s
, cpu
);
3122 if (!s
->cpu_slab
[cpu
]) {
3123 up_read(&slqb_lock
);
3127 up_write(&slqb_lock
);
3131 case CPU_ONLINE_FROZEN
:
3132 case CPU_DOWN_FAILED
:
3133 case CPU_DOWN_FAILED_FROZEN
:
3134 start_cpu_timer(cpu
);
3137 case CPU_DOWN_PREPARE
:
3138 case CPU_DOWN_PREPARE_FROZEN
:
3139 cancel_rearming_delayed_work(&per_cpu(cache_trim_work
, cpu
));
3140 per_cpu(cache_trim_work
, cpu
).work
.func
= NULL
;
3143 case CPU_UP_CANCELED
:
3144 case CPU_UP_CANCELED_FROZEN
:
3146 case CPU_DEAD_FROZEN
:
3148 * XXX: Freeing here doesn't work because objects can still be
3149 * on this CPU's list. periodic timer needs to check if a CPU
3150 * is offline and then try to cleanup from there. Same for node
3159 static struct notifier_block __cpuinitdata slab_notifier
= {
3160 .notifier_call
= slab_cpuup_callback
3165 #ifdef CONFIG_SLQB_DEBUG
3166 void *__kmalloc_track_caller(size_t size
, gfp_t flags
, unsigned long caller
)
3168 struct kmem_cache
*s
;
3171 s
= get_slab(size
, flags
);
3172 if (unlikely(ZERO_OR_NULL_PTR(s
)))
3176 if (unlikely(current
->flags
& (PF_SPREAD_SLAB
| PF_MEMPOLICY
)))
3177 node
= alternate_nid(s
, flags
, node
);
3179 return slab_alloc(s
, flags
, node
, caller
);
3182 void *__kmalloc_node_track_caller(size_t size
, gfp_t flags
, int node
,
3183 unsigned long caller
)
3185 struct kmem_cache
*s
;
3187 s
= get_slab(size
, flags
);
3188 if (unlikely(ZERO_OR_NULL_PTR(s
)))
3191 return slab_alloc(s
, flags
, node
, caller
);
3195 #if defined(CONFIG_SLQB_SYSFS) || defined(CONFIG_SLABINFO)
3196 struct stats_gather
{
3197 struct kmem_cache
*s
;
3199 unsigned long nr_slabs
;
3200 unsigned long nr_partial
;
3201 unsigned long nr_inuse
;
3202 unsigned long nr_objects
;
3204 #ifdef CONFIG_SLQB_STATS
3205 unsigned long stats
[NR_SLQB_STAT_ITEMS
];
3209 static void __gather_stats(void *arg
)
3211 unsigned long nr_slabs
;
3212 unsigned long nr_partial
;
3213 unsigned long nr_inuse
;
3214 struct stats_gather
*gather
= arg
;
3215 int cpu
= smp_processor_id();
3216 struct kmem_cache
*s
= gather
->s
;
3217 struct kmem_cache_cpu
*c
= get_cpu_slab(s
, cpu
);
3218 struct kmem_cache_list
*l
= &c
->list
;
3219 struct slqb_page
*page
;
3220 #ifdef CONFIG_SLQB_STATS
3224 spin_lock(&l
->page_lock
);
3225 nr_slabs
= l
->nr_slabs
;
3226 nr_partial
= l
->nr_partial
;
3227 nr_inuse
= (nr_slabs
- nr_partial
) * s
->objects
;
3229 list_for_each_entry(page
, &l
->partial
, lru
) {
3230 nr_inuse
+= page
->inuse
;
3232 spin_unlock(&l
->page_lock
);
3234 spin_lock(&gather
->lock
);
3235 gather
->nr_slabs
+= nr_slabs
;
3236 gather
->nr_partial
+= nr_partial
;
3237 gather
->nr_inuse
+= nr_inuse
;
3238 #ifdef CONFIG_SLQB_STATS
3239 for (i
= 0; i
< NR_SLQB_STAT_ITEMS
; i
++)
3240 gather
->stats
[i
] += l
->stats
[i
];
3242 spin_unlock(&gather
->lock
);
3245 /* must be called with slqb_lock held */
3246 static void gather_stats_locked(struct kmem_cache
*s
,
3247 struct stats_gather
*stats
)
3253 memset(stats
, 0, sizeof(struct stats_gather
));
3255 spin_lock_init(&stats
->lock
);
3257 on_each_cpu(__gather_stats
, stats
, 1);
3260 for_each_online_node(node
) {
3261 struct kmem_cache_node
*n
= s
->node_slab
[node
];
3262 struct kmem_cache_list
*l
= &n
->list
;
3263 struct slqb_page
*page
;
3264 unsigned long flags
;
3265 #ifdef CONFIG_SLQB_STATS
3269 spin_lock_irqsave(&n
->list_lock
, flags
);
3270 #ifdef CONFIG_SLQB_STATS
3271 for (i
= 0; i
< NR_SLQB_STAT_ITEMS
; i
++)
3272 stats
->stats
[i
] += l
->stats
[i
];
3274 stats
->nr_slabs
+= l
->nr_slabs
;
3275 stats
->nr_partial
+= l
->nr_partial
;
3276 stats
->nr_inuse
+= (l
->nr_slabs
- l
->nr_partial
) * s
->objects
;
3278 list_for_each_entry(page
, &l
->partial
, lru
) {
3279 stats
->nr_inuse
+= page
->inuse
;
3281 spin_unlock_irqrestore(&n
->list_lock
, flags
);
3285 stats
->nr_objects
= stats
->nr_slabs
* s
->objects
;
3288 #ifdef CONFIG_SLQB_SYSFS
3289 static void gather_stats(struct kmem_cache
*s
, struct stats_gather
*stats
)
3291 down_read(&slqb_lock
); /* hold off hotplug */
3292 gather_stats_locked(s
, stats
);
3293 up_read(&slqb_lock
);
3299 * The /proc/slabinfo ABI
3301 #ifdef CONFIG_SLABINFO
3302 #include <linux/proc_fs.h>
3303 #include <linux/seq_file.h>
3304 ssize_t
slabinfo_write(struct file
*file
, const char __user
* buffer
,
3305 size_t count
, loff_t
*ppos
)
3310 static void print_slabinfo_header(struct seq_file
*m
)
3312 seq_puts(m
, "slabinfo - version: 2.1\n");
3313 seq_puts(m
, "# name <active_objs> <num_objs> <objsize> "
3314 "<objperslab> <pagesperslab>");
3315 seq_puts(m
, " : tunables <limit> <batchcount> <sharedfactor>");
3316 seq_puts(m
, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
3320 static void *s_start(struct seq_file
*m
, loff_t
*pos
)
3324 down_read(&slqb_lock
);
3326 print_slabinfo_header(m
);
3328 return seq_list_start(&slab_caches
, *pos
);
3331 static void *s_next(struct seq_file
*m
, void *p
, loff_t
*pos
)
3333 return seq_list_next(p
, &slab_caches
, pos
);
3336 static void s_stop(struct seq_file
*m
, void *p
)
3338 up_read(&slqb_lock
);
3341 static int s_show(struct seq_file
*m
, void *p
)
3343 struct stats_gather stats
;
3344 struct kmem_cache
*s
;
3346 s
= list_entry(p
, struct kmem_cache
, list
);
3348 gather_stats_locked(s
, &stats
);
3350 seq_printf(m
, "%-17s %6lu %6lu %6u %4u %4d", s
->name
, stats
.nr_inuse
,
3351 stats
.nr_objects
, s
->size
, s
->objects
, (1 << s
->order
));
3352 seq_printf(m
, " : tunables %4u %4u %4u", slab_hiwater(s
),
3353 slab_freebatch(s
), 0);
3354 seq_printf(m
, " : slabdata %6lu %6lu %6lu", stats
.nr_slabs
,
3355 stats
.nr_slabs
, 0UL);
3360 static const struct seq_operations slabinfo_op
= {
3367 static int slabinfo_open(struct inode
*inode
, struct file
*file
)
3369 return seq_open(file
, &slabinfo_op
);
3372 static const struct file_operations proc_slabinfo_operations
= {
3373 .open
= slabinfo_open
,
3375 .llseek
= seq_lseek
,
3376 .release
= seq_release
,
3379 static int __init
slab_proc_init(void)
3381 proc_create("slabinfo", S_IWUSR
|S_IRUGO
, NULL
,
3382 &proc_slabinfo_operations
);
3385 module_init(slab_proc_init
);
3386 #endif /* CONFIG_SLABINFO */
3388 #ifdef CONFIG_SLQB_SYSFS
3392 #define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
3393 #define to_slab(n) container_of(n, struct kmem_cache, kobj);
3395 struct slab_attribute
{
3396 struct attribute attr
;
3397 ssize_t (*show
)(struct kmem_cache
*s
, char *buf
);
3398 ssize_t (*store
)(struct kmem_cache
*s
, const char *x
, size_t count
);
3401 #define SLAB_ATTR_RO(_name) \
3402 static struct slab_attribute _name##_attr = __ATTR_RO(_name)
3404 #define SLAB_ATTR(_name) \
3405 static struct slab_attribute _name##_attr = \
3406 __ATTR(_name, 0644, _name##_show, _name##_store)
3408 static ssize_t
slab_size_show(struct kmem_cache
*s
, char *buf
)
3410 return sprintf(buf
, "%d\n", s
->size
);
3412 SLAB_ATTR_RO(slab_size
);
3414 static ssize_t
align_show(struct kmem_cache
*s
, char *buf
)
3416 return sprintf(buf
, "%d\n", s
->align
);
3418 SLAB_ATTR_RO(align
);
3420 static ssize_t
object_size_show(struct kmem_cache
*s
, char *buf
)
3422 return sprintf(buf
, "%d\n", s
->objsize
);
3424 SLAB_ATTR_RO(object_size
);
3426 static ssize_t
objs_per_slab_show(struct kmem_cache
*s
, char *buf
)
3428 return sprintf(buf
, "%d\n", s
->objects
);
3430 SLAB_ATTR_RO(objs_per_slab
);
3432 static ssize_t
order_show(struct kmem_cache
*s
, char *buf
)
3434 return sprintf(buf
, "%d\n", s
->order
);
3436 SLAB_ATTR_RO(order
);
3438 static ssize_t
ctor_show(struct kmem_cache
*s
, char *buf
)
3441 int n
= sprint_symbol(buf
, (unsigned long)s
->ctor
);
3443 return n
+ sprintf(buf
+ n
, "\n");
3449 static ssize_t
slabs_show(struct kmem_cache
*s
, char *buf
)
3451 struct stats_gather stats
;
3453 gather_stats(s
, &stats
);
3455 return sprintf(buf
, "%lu\n", stats
.nr_slabs
);
3457 SLAB_ATTR_RO(slabs
);
3459 static ssize_t
objects_show(struct kmem_cache
*s
, char *buf
)
3461 struct stats_gather stats
;
3463 gather_stats(s
, &stats
);
3465 return sprintf(buf
, "%lu\n", stats
.nr_inuse
);
3467 SLAB_ATTR_RO(objects
);
3469 static ssize_t
total_objects_show(struct kmem_cache
*s
, char *buf
)
3471 struct stats_gather stats
;
3473 gather_stats(s
, &stats
);
3475 return sprintf(buf
, "%lu\n", stats
.nr_objects
);
3477 SLAB_ATTR_RO(total_objects
);
3479 static ssize_t
reclaim_account_show(struct kmem_cache
*s
, char *buf
)
3481 return sprintf(buf
, "%d\n", !!(s
->flags
& SLAB_RECLAIM_ACCOUNT
));
3483 SLAB_ATTR_RO(reclaim_account
);
3485 static ssize_t
hwcache_align_show(struct kmem_cache
*s
, char *buf
)
3487 return sprintf(buf
, "%d\n", !!(s
->flags
& SLAB_HWCACHE_ALIGN
));
3489 SLAB_ATTR_RO(hwcache_align
);
3491 #ifdef CONFIG_ZONE_DMA
3492 static ssize_t
cache_dma_show(struct kmem_cache
*s
, char *buf
)
3494 return sprintf(buf
, "%d\n", !!(s
->flags
& SLAB_CACHE_DMA
));
3496 SLAB_ATTR_RO(cache_dma
);
3499 static ssize_t
destroy_by_rcu_show(struct kmem_cache
*s
, char *buf
)
3501 return sprintf(buf
, "%d\n", !!(s
->flags
& SLAB_DESTROY_BY_RCU
));
3503 SLAB_ATTR_RO(destroy_by_rcu
);
3505 static ssize_t
red_zone_show(struct kmem_cache
*s
, char *buf
)
3507 return sprintf(buf
, "%d\n", !!(s
->flags
& SLAB_RED_ZONE
));
3509 SLAB_ATTR_RO(red_zone
);
3511 static ssize_t
poison_show(struct kmem_cache
*s
, char *buf
)
3513 return sprintf(buf
, "%d\n", !!(s
->flags
& SLAB_POISON
));
3515 SLAB_ATTR_RO(poison
);
3517 static ssize_t
store_user_show(struct kmem_cache
*s
, char *buf
)
3519 return sprintf(buf
, "%d\n", !!(s
->flags
& SLAB_STORE_USER
));
3521 SLAB_ATTR_RO(store_user
);
3523 static ssize_t
hiwater_store(struct kmem_cache
*s
,
3524 const char *buf
, size_t length
)
3529 err
= strict_strtol(buf
, 10, &hiwater
);
3536 s
->hiwater
= hiwater
;
3541 static ssize_t
hiwater_show(struct kmem_cache
*s
, char *buf
)
3543 return sprintf(buf
, "%d\n", slab_hiwater(s
));
3547 static ssize_t
freebatch_store(struct kmem_cache
*s
,
3548 const char *buf
, size_t length
)
3553 err
= strict_strtol(buf
, 10, &freebatch
);
3557 if (freebatch
<= 0 || freebatch
- 1 > s
->hiwater
)
3560 s
->freebatch
= freebatch
;
3565 static ssize_t
freebatch_show(struct kmem_cache
*s
, char *buf
)
3567 return sprintf(buf
, "%d\n", slab_freebatch(s
));
3569 SLAB_ATTR(freebatch
);
3571 #ifdef CONFIG_SLQB_STATS
3572 static int show_stat(struct kmem_cache
*s
, char *buf
, enum stat_item si
)
3574 struct stats_gather stats
;
3580 gather_stats(s
, &stats
);
3582 len
= sprintf(buf
, "%lu", stats
.stats
[si
]);
3585 for_each_online_cpu(cpu
) {
3586 struct kmem_cache_cpu
*c
= get_cpu_slab(s
, cpu
);
3587 struct kmem_cache_list
*l
= &c
->list
;
3589 if (len
< PAGE_SIZE
- 20)
3590 len
+= sprintf(buf
+len
, " C%d=%lu", cpu
, l
->stats
[si
]);
3593 return len
+ sprintf(buf
+ len
, "\n");
3596 #define STAT_ATTR(si, text) \
3597 static ssize_t text##_show(struct kmem_cache *s, char *buf) \
3599 return show_stat(s, buf, si); \
3601 SLAB_ATTR_RO(text); \
3603 STAT_ATTR(ALLOC, alloc);
3604 STAT_ATTR(ALLOC_SLAB_FILL
, alloc_slab_fill
);
3605 STAT_ATTR(ALLOC_SLAB_NEW
, alloc_slab_new
);
3606 STAT_ATTR(FREE
, free
);
3607 STAT_ATTR(FREE_REMOTE
, free_remote
);
3608 STAT_ATTR(FLUSH_FREE_LIST
, flush_free_list
);
3609 STAT_ATTR(FLUSH_FREE_LIST_OBJECTS
, flush_free_list_objects
);
3610 STAT_ATTR(FLUSH_FREE_LIST_REMOTE
, flush_free_list_remote
);
3611 STAT_ATTR(FLUSH_SLAB_PARTIAL
, flush_slab_partial
);
3612 STAT_ATTR(FLUSH_SLAB_FREE
, flush_slab_free
);
3613 STAT_ATTR(FLUSH_RFREE_LIST
, flush_rfree_list
);
3614 STAT_ATTR(FLUSH_RFREE_LIST_OBJECTS
, flush_rfree_list_objects
);
3615 STAT_ATTR(CLAIM_REMOTE_LIST
, claim_remote_list
);
3616 STAT_ATTR(CLAIM_REMOTE_LIST_OBJECTS
, claim_remote_list_objects
);
3619 static struct attribute
*slab_attrs
[] = {
3620 &slab_size_attr
.attr
,
3621 &object_size_attr
.attr
,
3622 &objs_per_slab_attr
.attr
,
3625 &total_objects_attr
.attr
,
3629 &hwcache_align_attr
.attr
,
3630 &reclaim_account_attr
.attr
,
3631 &destroy_by_rcu_attr
.attr
,
3632 &red_zone_attr
.attr
,
3634 &store_user_attr
.attr
,
3636 &freebatch_attr
.attr
,
3637 #ifdef CONFIG_ZONE_DMA
3638 &cache_dma_attr
.attr
,
3640 #ifdef CONFIG_SLQB_STATS
3642 &alloc_slab_fill_attr
.attr
,
3643 &alloc_slab_new_attr
.attr
,
3645 &free_remote_attr
.attr
,
3646 &flush_free_list_attr
.attr
,
3647 &flush_free_list_objects_attr
.attr
,
3648 &flush_free_list_remote_attr
.attr
,
3649 &flush_slab_partial_attr
.attr
,
3650 &flush_slab_free_attr
.attr
,
3651 &flush_rfree_list_attr
.attr
,
3652 &flush_rfree_list_objects_attr
.attr
,
3653 &claim_remote_list_attr
.attr
,
3654 &claim_remote_list_objects_attr
.attr
,
3659 static struct attribute_group slab_attr_group
= {
3660 .attrs
= slab_attrs
,
3663 static ssize_t
slab_attr_show(struct kobject
*kobj
,
3664 struct attribute
*attr
, char *buf
)
3666 struct slab_attribute
*attribute
;
3667 struct kmem_cache
*s
;
3670 attribute
= to_slab_attr(attr
);
3673 if (!attribute
->show
)
3676 err
= attribute
->show(s
, buf
);
3681 static ssize_t
slab_attr_store(struct kobject
*kobj
,
3682 struct attribute
*attr
, const char *buf
, size_t len
)
3684 struct slab_attribute
*attribute
;
3685 struct kmem_cache
*s
;
3688 attribute
= to_slab_attr(attr
);
3691 if (!attribute
->store
)
3694 err
= attribute
->store(s
, buf
, len
);
3699 static void kmem_cache_release(struct kobject
*kobj
)
3701 struct kmem_cache
*s
= to_slab(kobj
);
3703 kmem_cache_free(&kmem_cache_cache
, s
);
3706 static struct sysfs_ops slab_sysfs_ops
= {
3707 .show
= slab_attr_show
,
3708 .store
= slab_attr_store
,
3711 static struct kobj_type slab_ktype
= {
3712 .sysfs_ops
= &slab_sysfs_ops
,
3713 .release
= kmem_cache_release
3716 static int uevent_filter(struct kset
*kset
, struct kobject
*kobj
)
3718 struct kobj_type
*ktype
= get_ktype(kobj
);
3720 if (ktype
== &slab_ktype
)
3725 static struct kset_uevent_ops slab_uevent_ops
= {
3726 .filter
= uevent_filter
,
3729 static struct kset
*slab_kset
;
3731 static int sysfs_available __read_mostly
;
3733 static int sysfs_slab_add(struct kmem_cache
*s
)
3737 if (!sysfs_available
)
3740 s
->kobj
.kset
= slab_kset
;
3741 err
= kobject_init_and_add(&s
->kobj
, &slab_ktype
, NULL
, s
->name
);
3743 kobject_put(&s
->kobj
);
3747 err
= sysfs_create_group(&s
->kobj
, &slab_attr_group
);
3751 kobject_uevent(&s
->kobj
, KOBJ_ADD
);
3756 static void sysfs_slab_remove(struct kmem_cache
*s
)
3758 kobject_uevent(&s
->kobj
, KOBJ_REMOVE
);
3759 kobject_del(&s
->kobj
);
3760 kobject_put(&s
->kobj
);
3763 static int __init
slab_sysfs_init(void)
3765 struct kmem_cache
*s
;
3768 slab_kset
= kset_create_and_add("slab", &slab_uevent_ops
, kernel_kobj
);
3770 printk(KERN_ERR
"Cannot register slab subsystem.\n");
3774 down_write(&slqb_lock
);
3776 sysfs_available
= 1;
3778 list_for_each_entry(s
, &slab_caches
, list
) {
3779 err
= sysfs_slab_add(s
);
3781 printk(KERN_ERR
"SLQB: Unable to add boot slab %s"
3782 " to sysfs\n", s
->name
);
3785 up_write(&slqb_lock
);
3789 device_initcall(slab_sysfs_init
);