2 * SLQB: A slab allocator that focuses on per-CPU scaling, and good performance
3 * with order-0 allocations. Fastpaths emphasis is placed on local allocaiton
4 * and freeing, but with a secondary goal of good remote freeing (freeing on
5 * another CPU from that which allocated).
7 * Using ideas and code from mm/slab.c, mm/slob.c, and mm/slub.c.
11 #include <linux/swap.h> /* struct reclaim_state */
12 #include <linux/module.h>
13 #include <linux/interrupt.h>
14 #include <linux/slab.h>
15 #include <linux/seq_file.h>
16 #include <linux/cpu.h>
17 #include <linux/cpuset.h>
18 #include <linux/mempolicy.h>
19 #include <linux/ctype.h>
20 #include <linux/kallsyms.h>
21 #include <linux/memory.h>
22 #include <linux/fault-inject.h>
26 * - fix up releasing of offlined data structures. Not a big deal because
27 * they don't get cumulatively leaked with successive online/offline cycles
28 * - allow OOM conditions to flush back per-CPU pages to common lists to be
29 * reused by other CPUs.
30 * - investiage performance with memoryless nodes. Perhaps CPUs can be given
31 * a default closest home node via which it can use fastpath functions.
32 * Perhaps it is not a big problem.
36 * slqb_page overloads struct page, and is used to manage some slob allocation
37 * aspects, however to avoid the horrible mess in include/linux/mm_types.h,
38 * we'll just define our own struct slqb_page type variant here.
43 unsigned long flags
; /* mandatory */
44 atomic_t _count
; /* mandatory */
45 unsigned int inuse
; /* Nr of objects */
46 struct kmem_cache_list
*list
; /* Pointer to list */
47 void **freelist
; /* LIFO freelist */
49 struct list_head lru
; /* misc. list */
50 struct rcu_head rcu_head
; /* for rcu freeing */
56 static inline void struct_slqb_page_wrong_size(void)
57 { BUILD_BUG_ON(sizeof(struct slqb_page
) != sizeof(struct page
)); }
59 #define PG_SLQB_BIT (1 << PG_slab)
62 * slqb_min_order: minimum allocation order for slabs
64 static int slqb_min_order
;
67 * slqb_min_objects: minimum number of objects per slab. Increasing this
68 * will increase the allocation order for slabs with larger objects
70 static int slqb_min_objects
= 1;
73 static inline int slab_numa(struct kmem_cache
*s
)
75 return s
->flags
& SLAB_NUMA
;
78 static inline int slab_numa(struct kmem_cache
*s
)
84 static inline int slab_hiwater(struct kmem_cache
*s
)
89 static inline int slab_freebatch(struct kmem_cache
*s
)
96 * kmem_cache_node->list_lock
97 * kmem_cache_remote_free->lock
100 * SLQB is primarily per-cpu. For each kmem_cache, each CPU has:
102 * - A LIFO list of node-local objects. Allocation and freeing of node local
103 * objects goes first to this list.
105 * - 2 Lists of slab pages, free and partial pages. If an allocation misses
106 * the object list, it tries from the partial list, then the free list.
107 * After freeing an object to the object list, if it is over a watermark,
108 * some objects are freed back to pages. If an allocation misses these lists,
109 * a new slab page is allocated from the page allocator. If the free list
110 * reaches a watermark, some of its pages are returned to the page allocator.
112 * - A remote free queue, where objects freed that did not come from the local
113 * node are queued to. When this reaches a watermark, the objects are
116 * - A remotely freed queue, where objects allocated from this CPU are flushed
117 * to from other CPUs' remote free queues. kmem_cache_remote_free->lock is
118 * used to protect access to this queue.
120 * When the remotely freed queue reaches a watermark, a flag is set to tell
121 * the owner CPU to check it. The owner CPU will then check the queue on the
122 * next allocation that misses the object list. It will move all objects from
123 * this list onto the object list and then allocate one.
125 * This system of remote queueing is intended to reduce lock and remote
126 * cacheline acquisitions, and give a cooling off period for remotely freed
127 * objects before they are re-allocated.
129 * node specific allocations from somewhere other than the local node are
130 * handled by a per-node list which is the same as the above per-CPU data
131 * structures except for the following differences:
133 * - kmem_cache_node->list_lock is used to protect access for multiple CPUs to
134 * allocate from a given node.
136 * - There is no remote free queue. Nodes don't free objects, CPUs do.
139 static inline void slqb_stat_inc(struct kmem_cache_list
*list
,
142 #ifdef CONFIG_SLQB_STATS
147 static inline void slqb_stat_add(struct kmem_cache_list
*list
,
148 enum stat_item si
, unsigned long nr
)
150 #ifdef CONFIG_SLQB_STATS
151 list
->stats
[si
] += nr
;
155 static inline int slqb_page_to_nid(struct slqb_page
*page
)
157 return page_to_nid(&page
->page
);
160 static inline void *slqb_page_address(struct slqb_page
*page
)
162 return page_address(&page
->page
);
165 static inline struct zone
*slqb_page_zone(struct slqb_page
*page
)
167 return page_zone(&page
->page
);
170 static inline int virt_to_nid(const void *addr
)
172 return page_to_nid(virt_to_page(addr
));
175 static inline struct slqb_page
*virt_to_head_slqb_page(const void *addr
)
179 p
= virt_to_head_page(addr
);
180 return (struct slqb_page
*)p
;
183 static inline void __free_slqb_pages(struct slqb_page
*page
, unsigned int order
,
186 struct page
*p
= &page
->page
;
188 reset_page_mapcount(p
);
190 VM_BUG_ON(!(p
->flags
& PG_SLQB_BIT
));
191 p
->flags
&= ~PG_SLQB_BIT
;
193 if (current
->reclaim_state
)
194 current
->reclaim_state
->reclaimed_slab
+= pages
;
195 __free_pages(p
, order
);
198 #ifdef CONFIG_SLQB_DEBUG
199 static inline int slab_debug(struct kmem_cache
*s
)
208 static inline int slab_poison(struct kmem_cache
*s
)
210 return s
->flags
& SLAB_POISON
;
213 static inline int slab_debug(struct kmem_cache
*s
)
217 static inline int slab_poison(struct kmem_cache
*s
)
223 #define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \
224 SLAB_POISON | SLAB_STORE_USER)
226 /* Internal SLQB flags */
227 #define __OBJECT_POISON 0x80000000 /* Poison object */
229 /* Not all arches define cache_line_size */
230 #ifndef cache_line_size
231 #define cache_line_size() L1_CACHE_BYTES
235 static struct notifier_block slab_notifier
;
239 * slqb_lock protects slab_caches list and serialises hotplug operations.
240 * hotplug operations take lock for write, other operations can hold off
241 * hotplug by taking it for read (or write).
243 static DECLARE_RWSEM(slqb_lock
);
246 * A list of all slab caches on the system
248 static LIST_HEAD(slab_caches
);
251 * Tracking user of a slab.
254 unsigned long addr
; /* Called from address */
255 int cpu
; /* Was running on cpu */
256 int pid
; /* Pid context */
257 unsigned long when
; /* When did the operation occur */
260 enum track_item
{ TRACK_ALLOC
, TRACK_FREE
};
262 static struct kmem_cache kmem_cache_cache
;
264 #ifdef CONFIG_SLQB_SYSFS
265 static int sysfs_slab_add(struct kmem_cache
*s
);
266 static void sysfs_slab_remove(struct kmem_cache
*s
);
268 static inline int sysfs_slab_add(struct kmem_cache
*s
)
272 static inline void sysfs_slab_remove(struct kmem_cache
*s
)
274 kmem_cache_free(&kmem_cache_cache
, s
);
278 /********************************************************************
279 * Core slab cache functions
280 *******************************************************************/
282 static int __slab_is_available __read_mostly
;
283 int slab_is_available(void)
285 return __slab_is_available
;
288 static inline struct kmem_cache_cpu
*get_cpu_slab(struct kmem_cache
*s
, int cpu
)
291 VM_BUG_ON(!s
->cpu_slab
[cpu
]);
292 return s
->cpu_slab
[cpu
];
298 static inline int check_valid_pointer(struct kmem_cache
*s
,
299 struct slqb_page
*page
, const void *object
)
303 base
= slqb_page_address(page
);
304 if (object
< base
|| object
>= base
+ s
->objects
* s
->size
||
305 (object
- base
) % s
->size
) {
312 static inline void *get_freepointer(struct kmem_cache
*s
, void *object
)
314 return *(void **)(object
+ s
->offset
);
317 static inline void set_freepointer(struct kmem_cache
*s
, void *object
, void *fp
)
319 *(void **)(object
+ s
->offset
) = fp
;
322 /* Loop over all objects in a slab */
323 #define for_each_object(__p, __s, __addr) \
324 for (__p = (__addr); __p < (__addr) + (__s)->objects * (__s)->size;\
328 #define for_each_free_object(__p, __s, __free) \
329 for (__p = (__free); (__p) != NULL; __p = get_freepointer((__s),\
332 #ifdef CONFIG_SLQB_DEBUG
336 #ifdef CONFIG_SLQB_DEBUG_ON
337 static int slqb_debug __read_mostly
= DEBUG_DEFAULT_FLAGS
;
339 static int slqb_debug __read_mostly
;
342 static char *slqb_debug_slabs
;
347 static void print_section(char *text
, u8
*addr
, unsigned int length
)
355 for (i
= 0; i
< length
; i
++) {
357 printk(KERN_ERR
"%8s 0x%p: ", text
, addr
+ i
);
360 printk(KERN_CONT
" %02x", addr
[i
]);
362 ascii
[offset
] = isgraph(addr
[i
]) ? addr
[i
] : '.';
364 printk(KERN_CONT
" %s\n", ascii
);
371 printk(KERN_CONT
" ");
375 printk(KERN_CONT
" %s\n", ascii
);
379 static struct track
*get_track(struct kmem_cache
*s
, void *object
,
380 enum track_item alloc
)
385 p
= object
+ s
->offset
+ sizeof(void *);
387 p
= object
+ s
->inuse
;
392 static void set_track(struct kmem_cache
*s
, void *object
,
393 enum track_item alloc
, unsigned long addr
)
398 p
= object
+ s
->offset
+ sizeof(void *);
400 p
= object
+ s
->inuse
;
405 p
->cpu
= raw_smp_processor_id();
406 p
->pid
= current
? current
->pid
: -1;
409 memset(p
, 0, sizeof(struct track
));
412 static void init_tracking(struct kmem_cache
*s
, void *object
)
414 if (!(s
->flags
& SLAB_STORE_USER
))
417 set_track(s
, object
, TRACK_FREE
, 0UL);
418 set_track(s
, object
, TRACK_ALLOC
, 0UL);
421 static void print_track(const char *s
, struct track
*t
)
426 printk(KERN_ERR
"INFO: %s in ", s
);
427 __print_symbol("%s", (unsigned long)t
->addr
);
428 printk(" age=%lu cpu=%u pid=%d\n", jiffies
- t
->when
, t
->cpu
, t
->pid
);
431 static void print_tracking(struct kmem_cache
*s
, void *object
)
433 if (!(s
->flags
& SLAB_STORE_USER
))
436 print_track("Allocated", get_track(s
, object
, TRACK_ALLOC
));
437 print_track("Freed", get_track(s
, object
, TRACK_FREE
));
440 static void print_page_info(struct slqb_page
*page
)
442 printk(KERN_ERR
"INFO: Slab 0x%p used=%u fp=0x%p flags=0x%04lx\n",
443 page
, page
->inuse
, page
->freelist
, page
->flags
);
447 #define MAX_ERR_STR 100
448 static void slab_bug(struct kmem_cache
*s
, char *fmt
, ...)
451 char buf
[MAX_ERR_STR
];
454 vsnprintf(buf
, sizeof(buf
), fmt
, args
);
456 printk(KERN_ERR
"========================================"
457 "=====================================\n");
458 printk(KERN_ERR
"BUG %s: %s\n", s
->name
, buf
);
459 printk(KERN_ERR
"----------------------------------------"
460 "-------------------------------------\n\n");
463 static void slab_fix(struct kmem_cache
*s
, char *fmt
, ...)
469 vsnprintf(buf
, sizeof(buf
), fmt
, args
);
471 printk(KERN_ERR
"FIX %s: %s\n", s
->name
, buf
);
474 static void print_trailer(struct kmem_cache
*s
, struct slqb_page
*page
, u8
*p
)
476 unsigned int off
; /* Offset of last byte */
477 u8
*addr
= slqb_page_address(page
);
479 print_tracking(s
, p
);
481 print_page_info(page
);
483 printk(KERN_ERR
"INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
484 p
, p
- addr
, get_freepointer(s
, p
));
487 print_section("Bytes b4", p
- 16, 16);
489 print_section("Object", p
, min(s
->objsize
, 128));
491 if (s
->flags
& SLAB_RED_ZONE
)
492 print_section("Redzone", p
+ s
->objsize
, s
->inuse
- s
->objsize
);
495 off
= s
->offset
+ sizeof(void *);
499 if (s
->flags
& SLAB_STORE_USER
)
500 off
+= 2 * sizeof(struct track
);
502 if (off
!= s
->size
) {
503 /* Beginning of the filler is the free pointer */
504 print_section("Padding", p
+ off
, s
->size
- off
);
510 static void object_err(struct kmem_cache
*s
, struct slqb_page
*page
,
511 u8
*object
, char *reason
)
514 print_trailer(s
, page
, object
);
517 static void slab_err(struct kmem_cache
*s
, struct slqb_page
*page
,
521 print_page_info(page
);
525 static void init_object(struct kmem_cache
*s
, void *object
, int active
)
529 if (s
->flags
& __OBJECT_POISON
) {
530 memset(p
, POISON_FREE
, s
->objsize
- 1);
531 p
[s
->objsize
- 1] = POISON_END
;
534 if (s
->flags
& SLAB_RED_ZONE
) {
535 memset(p
+ s
->objsize
,
536 active
? SLUB_RED_ACTIVE
: SLUB_RED_INACTIVE
,
537 s
->inuse
- s
->objsize
);
541 static u8
*check_bytes(u8
*start
, unsigned int value
, unsigned int bytes
)
544 if (*start
!= (u8
)value
)
552 static void restore_bytes(struct kmem_cache
*s
, char *message
, u8 data
,
553 void *from
, void *to
)
555 slab_fix(s
, "Restoring 0x%p-0x%p=0x%x\n", from
, to
- 1, data
);
556 memset(from
, data
, to
- from
);
559 static int check_bytes_and_report(struct kmem_cache
*s
, struct slqb_page
*page
,
560 u8
*object
, char *what
,
561 u8
*start
, unsigned int value
, unsigned int bytes
)
566 fault
= check_bytes(start
, value
, bytes
);
571 while (end
> fault
&& end
[-1] == value
)
574 slab_bug(s
, "%s overwritten", what
);
575 printk(KERN_ERR
"INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n",
576 fault
, end
- 1, fault
[0], value
);
577 print_trailer(s
, page
, object
);
579 restore_bytes(s
, what
, value
, fault
, end
);
587 * Bytes of the object to be managed.
588 * If the freepointer may overlay the object then the free
589 * pointer is the first word of the object.
591 * Poisoning uses 0x6b (POISON_FREE) and the last byte is
594 * object + s->objsize
595 * Padding to reach word boundary. This is also used for Redzoning.
596 * Padding is extended by another word if Redzoning is enabled and
599 * We fill with 0xbb (RED_INACTIVE) for inactive objects and with
600 * 0xcc (RED_ACTIVE) for objects in use.
603 * Meta data starts here.
605 * A. Free pointer (if we cannot overwrite object on free)
606 * B. Tracking data for SLAB_STORE_USER
607 * C. Padding to reach required alignment boundary or at mininum
608 * one word if debuggin is on to be able to detect writes
609 * before the word boundary.
611 * Padding is done using 0x5a (POISON_INUSE)
614 * Nothing is used beyond s->size.
617 static int check_pad_bytes(struct kmem_cache
*s
, struct slqb_page
*page
, u8
*p
)
619 unsigned long off
= s
->inuse
; /* The end of info */
622 /* Freepointer is placed after the object. */
623 off
+= sizeof(void *);
626 if (s
->flags
& SLAB_STORE_USER
) {
627 /* We also have user information there */
628 off
+= 2 * sizeof(struct track
);
634 return check_bytes_and_report(s
, page
, p
, "Object padding",
635 p
+ off
, POISON_INUSE
, s
->size
- off
);
638 static int slab_pad_check(struct kmem_cache
*s
, struct slqb_page
*page
)
646 if (!(s
->flags
& SLAB_POISON
))
649 start
= slqb_page_address(page
);
650 end
= start
+ (PAGE_SIZE
<< s
->order
);
651 length
= s
->objects
* s
->size
;
652 remainder
= end
- (start
+ length
);
656 fault
= check_bytes(start
+ length
, POISON_INUSE
, remainder
);
660 while (end
> fault
&& end
[-1] == POISON_INUSE
)
663 slab_err(s
, page
, "Padding overwritten. 0x%p-0x%p", fault
, end
- 1);
664 print_section("Padding", start
, length
);
666 restore_bytes(s
, "slab padding", POISON_INUSE
, start
, end
);
670 static int check_object(struct kmem_cache
*s
, struct slqb_page
*page
,
671 void *object
, int active
)
674 u8
*endobject
= object
+ s
->objsize
;
676 if (s
->flags
& SLAB_RED_ZONE
) {
678 active
? SLUB_RED_ACTIVE
: SLUB_RED_INACTIVE
;
680 if (!check_bytes_and_report(s
, page
, object
, "Redzone",
681 endobject
, red
, s
->inuse
- s
->objsize
))
684 if ((s
->flags
& SLAB_POISON
) && s
->objsize
< s
->inuse
) {
685 check_bytes_and_report(s
, page
, p
, "Alignment padding",
686 endobject
, POISON_INUSE
, s
->inuse
- s
->objsize
);
690 if (s
->flags
& SLAB_POISON
) {
691 if (!active
&& (s
->flags
& __OBJECT_POISON
)) {
692 if (!check_bytes_and_report(s
, page
, p
, "Poison", p
,
693 POISON_FREE
, s
->objsize
- 1))
696 if (!check_bytes_and_report(s
, page
, p
, "Poison",
697 p
+ s
->objsize
- 1, POISON_END
, 1))
702 * check_pad_bytes cleans up on its own.
704 check_pad_bytes(s
, page
, p
);
710 static int check_slab(struct kmem_cache
*s
, struct slqb_page
*page
)
712 if (!(page
->flags
& PG_SLQB_BIT
)) {
713 slab_err(s
, page
, "Not a valid slab page");
716 if (page
->inuse
== 0) {
717 slab_err(s
, page
, "inuse before free / after alloc", s
->name
);
720 if (page
->inuse
> s
->objects
) {
721 slab_err(s
, page
, "inuse %u > max %u",
722 s
->name
, page
->inuse
, s
->objects
);
725 /* Slab_pad_check fixes things up after itself */
726 slab_pad_check(s
, page
);
730 static void trace(struct kmem_cache
*s
, struct slqb_page
*page
,
731 void *object
, int alloc
)
733 if (s
->flags
& SLAB_TRACE
) {
734 printk(KERN_INFO
"TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
736 alloc
? "alloc" : "free",
741 print_section("Object", (void *)object
, s
->objsize
);
747 static void setup_object_debug(struct kmem_cache
*s
, struct slqb_page
*page
,
753 if (!(s
->flags
& (SLAB_STORE_USER
|SLAB_RED_ZONE
|__OBJECT_POISON
)))
756 init_object(s
, object
, 0);
757 init_tracking(s
, object
);
760 static int alloc_debug_processing(struct kmem_cache
*s
,
761 void *object
, unsigned long addr
)
763 struct slqb_page
*page
;
764 page
= virt_to_head_slqb_page(object
);
766 if (!check_slab(s
, page
))
769 if (!check_valid_pointer(s
, page
, object
)) {
770 object_err(s
, page
, object
, "Freelist Pointer check fails");
774 if (object
&& !check_object(s
, page
, object
, 0))
777 /* Success perform special debug activities for allocs */
778 if (s
->flags
& SLAB_STORE_USER
)
779 set_track(s
, object
, TRACK_ALLOC
, addr
);
780 trace(s
, page
, object
, 1);
781 init_object(s
, object
, 1);
788 static int free_debug_processing(struct kmem_cache
*s
,
789 void *object
, unsigned long addr
)
791 struct slqb_page
*page
;
792 page
= virt_to_head_slqb_page(object
);
794 if (!check_slab(s
, page
))
797 if (!check_valid_pointer(s
, page
, object
)) {
798 slab_err(s
, page
, "Invalid object pointer 0x%p", object
);
802 if (!check_object(s
, page
, object
, 1))
805 /* Special debug activities for freeing objects */
806 if (s
->flags
& SLAB_STORE_USER
)
807 set_track(s
, object
, TRACK_FREE
, addr
);
808 trace(s
, page
, object
, 0);
809 init_object(s
, object
, 0);
813 slab_fix(s
, "Object at 0x%p not freed", object
);
817 static int __init
setup_slqb_debug(char *str
)
819 slqb_debug
= DEBUG_DEFAULT_FLAGS
;
820 if (*str
++ != '=' || !*str
) {
822 * No options specified. Switch on full debugging.
829 * No options but restriction on slabs. This means full
830 * debugging for slabs matching a pattern.
838 * Switch off all debugging measures.
844 * Determine which debug features should be switched on
846 for (; *str
&& *str
!= ','; str
++) {
847 switch (tolower(*str
)) {
849 slqb_debug
|= SLAB_DEBUG_FREE
;
852 slqb_debug
|= SLAB_RED_ZONE
;
855 slqb_debug
|= SLAB_POISON
;
858 slqb_debug
|= SLAB_STORE_USER
;
861 slqb_debug
|= SLAB_TRACE
;
864 slqb_debug
|= SLAB_FAILSLAB
;
867 printk(KERN_ERR
"slqb_debug option '%c' "
868 "unknown. skipped\n", *str
);
874 slqb_debug_slabs
= str
+ 1;
878 __setup("slqb_debug", setup_slqb_debug
);
880 static int __init
setup_slqb_min_order(char *str
)
882 get_option(&str
, &slqb_min_order
);
883 slqb_min_order
= min(slqb_min_order
, MAX_ORDER
- 1);
887 __setup("slqb_min_order=", setup_slqb_min_order
);
889 static int __init
setup_slqb_min_objects(char *str
)
891 get_option(&str
, &slqb_min_objects
);
896 __setup("slqb_min_objects=", setup_slqb_min_objects
);
898 static unsigned long kmem_cache_flags(unsigned long objsize
,
899 unsigned long flags
, const char *name
,
900 void (*ctor
)(void *))
903 * Enable debugging if selected on the kernel commandline.
905 if (slqb_debug
&& (!slqb_debug_slabs
||
906 strncmp(slqb_debug_slabs
, name
,
907 strlen(slqb_debug_slabs
)) == 0))
910 if (num_possible_nodes() > 1)
916 static inline void setup_object_debug(struct kmem_cache
*s
,
917 struct slqb_page
*page
, void *object
)
921 static inline int alloc_debug_processing(struct kmem_cache
*s
,
922 void *object
, unsigned long addr
)
927 static inline int free_debug_processing(struct kmem_cache
*s
,
928 void *object
, unsigned long addr
)
933 static inline int slab_pad_check(struct kmem_cache
*s
, struct slqb_page
*page
)
938 static inline int check_object(struct kmem_cache
*s
, struct slqb_page
*page
,
939 void *object
, int active
)
944 static inline void add_full(struct kmem_cache_node
*n
, struct slqb_page
*page
)
948 static inline unsigned long kmem_cache_flags(unsigned long objsize
,
949 unsigned long flags
, const char *name
, void (*ctor
)(void *))
951 if (num_possible_nodes() > 1)
956 static const int slqb_debug
;
960 * allocate a new slab (return its corresponding struct slqb_page)
962 static struct slqb_page
*allocate_slab(struct kmem_cache
*s
,
963 gfp_t flags
, int node
)
965 struct slqb_page
*page
;
966 int pages
= 1 << s
->order
;
968 flags
|= s
->allocflags
;
970 page
= (struct slqb_page
*)alloc_pages_node(node
, flags
, s
->order
);
974 mod_zone_page_state(slqb_page_zone(page
),
975 (s
->flags
& SLAB_RECLAIM_ACCOUNT
) ?
976 NR_SLAB_RECLAIMABLE
: NR_SLAB_UNRECLAIMABLE
,
983 * Called once for each object on a new slab page
985 static void setup_object(struct kmem_cache
*s
,
986 struct slqb_page
*page
, void *object
)
988 setup_object_debug(s
, page
, object
);
989 if (unlikely(s
->ctor
))
994 * Allocate a new slab, set up its object list.
996 static struct slqb_page
*new_slab_page(struct kmem_cache
*s
,
997 gfp_t flags
, int node
, unsigned int colour
)
999 struct slqb_page
*page
;
1004 BUG_ON(flags
& GFP_SLAB_BUG_MASK
);
1006 page
= allocate_slab(s
,
1007 flags
& (GFP_RECLAIM_MASK
| GFP_CONSTRAINT_MASK
), node
);
1011 page
->flags
|= PG_SLQB_BIT
;
1013 start
= page_address(&page
->page
);
1015 if (unlikely(slab_poison(s
)))
1016 memset(start
, POISON_INUSE
, PAGE_SIZE
<< s
->order
);
1021 for_each_object(p
, s
, start
) {
1022 setup_object(s
, page
, p
);
1023 set_freepointer(s
, last
, p
);
1026 set_freepointer(s
, last
, NULL
);
1028 page
->freelist
= start
;
1035 * Free a slab page back to the page allocator
1037 static void __free_slab(struct kmem_cache
*s
, struct slqb_page
*page
)
1039 int pages
= 1 << s
->order
;
1041 if (unlikely(slab_debug(s
))) {
1044 slab_pad_check(s
, page
);
1045 for_each_free_object(p
, s
, page
->freelist
)
1046 check_object(s
, page
, p
, 0);
1049 mod_zone_page_state(slqb_page_zone(page
),
1050 (s
->flags
& SLAB_RECLAIM_ACCOUNT
) ?
1051 NR_SLAB_RECLAIMABLE
: NR_SLAB_UNRECLAIMABLE
,
1054 __free_slqb_pages(page
, s
->order
, pages
);
1057 static void rcu_free_slab(struct rcu_head
*h
)
1059 struct slqb_page
*page
;
1061 page
= container_of(h
, struct slqb_page
, rcu_head
);
1062 __free_slab(page
->list
->cache
, page
);
1065 static void free_slab(struct kmem_cache
*s
, struct slqb_page
*page
)
1067 VM_BUG_ON(page
->inuse
);
1068 if (unlikely(s
->flags
& SLAB_DESTROY_BY_RCU
))
1069 call_rcu(&page
->rcu_head
, rcu_free_slab
);
1071 __free_slab(s
, page
);
1075 * Return an object to its slab.
1077 * Caller must be the owner CPU in the case of per-CPU list, or hold the node's
1078 * list_lock in the case of per-node list.
1080 static int free_object_to_page(struct kmem_cache
*s
,
1081 struct kmem_cache_list
*l
, struct slqb_page
*page
,
1084 VM_BUG_ON(page
->list
!= l
);
1086 set_freepointer(s
, object
, page
->freelist
);
1087 page
->freelist
= object
;
1091 if (likely(s
->objects
> 1)) {
1093 list_del(&page
->lru
);
1097 slqb_stat_inc(l
, FLUSH_SLAB_FREE
);
1100 } else if (page
->inuse
+ 1 == s
->objects
) {
1102 list_add(&page
->lru
, &l
->partial
);
1103 slqb_stat_inc(l
, FLUSH_SLAB_PARTIAL
);
1110 static void slab_free_to_remote(struct kmem_cache
*s
, struct slqb_page
*page
,
1111 void *object
, struct kmem_cache_cpu
*c
);
1115 * Flush the LIFO list of objects on a list. They are sent back to their pages
1116 * in case the pages also belong to the list, or to our CPU's remote-free list
1117 * in the case they do not.
1119 * Doesn't flush the entire list. flush_free_list_all does.
1121 * Caller must be the owner CPU in the case of per-CPU list, or hold the node's
1122 * list_lock in the case of per-node list.
1124 static void flush_free_list(struct kmem_cache
*s
, struct kmem_cache_list
*l
)
1130 nr
= l
->freelist
.nr
;
1134 nr
= min(slab_freebatch(s
), nr
);
1136 slqb_stat_inc(l
, FLUSH_FREE_LIST
);
1137 slqb_stat_add(l
, FLUSH_FREE_LIST_OBJECTS
, nr
);
1139 l
->freelist
.nr
-= nr
;
1140 head
= l
->freelist
.head
;
1143 struct slqb_page
*page
;
1148 head
= get_freepointer(s
, object
);
1149 page
= virt_to_head_slqb_page(object
);
1152 if (page
->list
!= l
) {
1153 struct kmem_cache_cpu
*c
;
1156 spin_unlock(&l
->page_lock
);
1160 c
= get_cpu_slab(s
, smp_processor_id());
1162 slab_free_to_remote(s
, page
, object
, c
);
1163 slqb_stat_inc(l
, FLUSH_FREE_LIST_REMOTE
);
1168 spin_lock(&l
->page_lock
);
1171 free_object_to_page(s
, l
, page
, object
);
1178 spin_unlock(&l
->page_lock
);
1180 l
->freelist
.head
= head
;
1181 if (!l
->freelist
.nr
)
1182 l
->freelist
.tail
= NULL
;
1185 static void flush_free_list_all(struct kmem_cache
*s
, struct kmem_cache_list
*l
)
1187 while (l
->freelist
.nr
)
1188 flush_free_list(s
, l
);
1193 * If enough objects have been remotely freed back to this list,
1194 * remote_free_check will be set. In which case, we'll eventually come here
1195 * to take those objects off our remote_free list and onto our LIFO freelist.
1197 * Caller must be the owner CPU in the case of per-CPU list, or hold the node's
1198 * list_lock in the case of per-node list.
1200 static void claim_remote_free_list(struct kmem_cache
*s
,
1201 struct kmem_cache_list
*l
)
1203 void **head
, **tail
;
1206 if (!l
->remote_free
.list
.nr
)
1209 spin_lock(&l
->remote_free
.lock
);
1211 l
->remote_free_check
= 0;
1212 head
= l
->remote_free
.list
.head
;
1213 l
->remote_free
.list
.head
= NULL
;
1214 tail
= l
->remote_free
.list
.tail
;
1215 l
->remote_free
.list
.tail
= NULL
;
1216 nr
= l
->remote_free
.list
.nr
;
1217 l
->remote_free
.list
.nr
= 0;
1219 spin_unlock(&l
->remote_free
.lock
);
1223 if (!l
->freelist
.nr
) {
1224 /* Get head hot for likely subsequent allocation or flush */
1226 l
->freelist
.head
= head
;
1228 set_freepointer(s
, l
->freelist
.tail
, head
);
1229 l
->freelist
.tail
= tail
;
1231 l
->freelist
.nr
+= nr
;
1233 slqb_stat_inc(l
, CLAIM_REMOTE_LIST
);
1234 slqb_stat_add(l
, CLAIM_REMOTE_LIST_OBJECTS
, nr
);
1237 static inline void claim_remote_free_list(struct kmem_cache
*s
,
1238 struct kmem_cache_list
*l
)
1244 * Allocation fastpath. Get an object from the list's LIFO freelist, or
1245 * return NULL if it is empty.
1247 * Caller must be the owner CPU in the case of per-CPU list, or hold the node's
1248 * list_lock in the case of per-node list.
1250 static __always_inline
void *__cache_list_get_object(struct kmem_cache
*s
,
1251 struct kmem_cache_list
*l
)
1255 object
= l
->freelist
.head
;
1256 if (likely(object
)) {
1257 void *next
= get_freepointer(s
, object
);
1259 VM_BUG_ON(!l
->freelist
.nr
);
1261 l
->freelist
.head
= next
;
1265 VM_BUG_ON(l
->freelist
.nr
);
1268 if (unlikely(l
->remote_free_check
)) {
1269 claim_remote_free_list(s
, l
);
1271 if (l
->freelist
.nr
> slab_hiwater(s
))
1272 flush_free_list(s
, l
);
1274 /* repetition here helps gcc :( */
1275 object
= l
->freelist
.head
;
1276 if (likely(object
)) {
1277 void *next
= get_freepointer(s
, object
);
1279 VM_BUG_ON(!l
->freelist
.nr
);
1281 l
->freelist
.head
= next
;
1285 VM_BUG_ON(l
->freelist
.nr
);
1293 * Slow(er) path. Get a page from this list's existing pages. Will be a
1294 * new empty page in the case that __slab_alloc_page has just been called
1295 * (empty pages otherwise never get queued up on the lists), or a partial page
1296 * already on the list.
1298 * Caller must be the owner CPU in the case of per-CPU list, or hold the node's
1299 * list_lock in the case of per-node list.
1301 static noinline
void *__cache_list_get_page(struct kmem_cache
*s
,
1302 struct kmem_cache_list
*l
)
1304 struct slqb_page
*page
;
1307 if (unlikely(!l
->nr_partial
))
1310 page
= list_first_entry(&l
->partial
, struct slqb_page
, lru
);
1311 VM_BUG_ON(page
->inuse
== s
->objects
);
1312 if (page
->inuse
+ 1 == s
->objects
) {
1314 list_del(&page
->lru
);
1317 VM_BUG_ON(!page
->freelist
);
1321 object
= page
->freelist
;
1322 page
->freelist
= get_freepointer(s
, object
);
1324 prefetchw(page
->freelist
);
1325 VM_BUG_ON((page
->inuse
== s
->objects
) != (page
->freelist
== NULL
));
1326 slqb_stat_inc(l
, ALLOC_SLAB_FILL
);
1331 static void *cache_list_get_page(struct kmem_cache
*s
,
1332 struct kmem_cache_list
*l
)
1336 if (unlikely(!l
->nr_partial
))
1339 spin_lock(&l
->page_lock
);
1340 object
= __cache_list_get_page(s
, l
);
1341 spin_unlock(&l
->page_lock
);
1347 * Allocation slowpath. Allocate a new slab page from the page allocator, and
1348 * put it on the list's partial list. Must be followed by an allocation so
1349 * that we don't have dangling empty pages on the partial list.
1351 * Returns 0 on allocation failure.
1353 * Must be called with interrupts disabled.
1355 static noinline
void *__slab_alloc_page(struct kmem_cache
*s
,
1356 gfp_t gfpflags
, int node
)
1358 struct slqb_page
*page
;
1359 struct kmem_cache_list
*l
;
1360 struct kmem_cache_cpu
*c
;
1361 unsigned int colour
;
1364 c
= get_cpu_slab(s
, smp_processor_id());
1365 colour
= c
->colour_next
;
1366 c
->colour_next
+= s
->colour_off
;
1367 if (c
->colour_next
>= s
->colour_range
)
1370 /* Caller handles __GFP_ZERO */
1371 gfpflags
&= ~__GFP_ZERO
;
1373 if (gfpflags
& __GFP_WAIT
)
1375 page
= new_slab_page(s
, gfpflags
, node
, colour
);
1376 if (gfpflags
& __GFP_WAIT
)
1377 local_irq_disable();
1378 if (unlikely(!page
))
1381 if (!NUMA_BUILD
|| likely(slqb_page_to_nid(page
) == numa_node_id())) {
1382 struct kmem_cache_cpu
*c
;
1383 int cpu
= smp_processor_id();
1385 c
= get_cpu_slab(s
, cpu
);
1389 spin_lock(&l
->page_lock
);
1392 list_add(&page
->lru
, &l
->partial
);
1393 slqb_stat_inc(l
, ALLOC
);
1394 slqb_stat_inc(l
, ALLOC_SLAB_NEW
);
1395 object
= __cache_list_get_page(s
, l
);
1396 spin_unlock(&l
->page_lock
);
1399 struct kmem_cache_node
*n
;
1401 n
= s
->node_slab
[slqb_page_to_nid(page
)];
1405 spin_lock(&n
->list_lock
);
1406 spin_lock(&l
->page_lock
);
1409 list_add(&page
->lru
, &l
->partial
);
1410 slqb_stat_inc(l
, ALLOC
);
1411 slqb_stat_inc(l
, ALLOC_SLAB_NEW
);
1412 object
= __cache_list_get_page(s
, l
);
1413 spin_unlock(&l
->page_lock
);
1414 spin_unlock(&n
->list_lock
);
1422 static noinline
int alternate_nid(struct kmem_cache
*s
,
1423 gfp_t gfpflags
, int node
)
1425 if (in_interrupt() || (gfpflags
& __GFP_THISNODE
))
1427 if (cpuset_do_slab_mem_spread() && (s
->flags
& SLAB_MEM_SPREAD
))
1428 return cpuset_mem_spread_node();
1429 else if (current
->mempolicy
)
1430 return slab_node(current
->mempolicy
);
1435 * Allocate an object from a remote node. Return NULL if none could be found
1436 * (in which case, caller should allocate a new slab)
1438 * Must be called with interrupts disabled.
1440 static void *__remote_slab_alloc_node(struct kmem_cache
*s
,
1441 gfp_t gfpflags
, int node
)
1443 struct kmem_cache_node
*n
;
1444 struct kmem_cache_list
*l
;
1447 n
= s
->node_slab
[node
];
1448 if (unlikely(!n
)) /* node has no memory */
1452 spin_lock(&n
->list_lock
);
1454 object
= __cache_list_get_object(s
, l
);
1455 if (unlikely(!object
)) {
1456 object
= cache_list_get_page(s
, l
);
1457 if (unlikely(!object
)) {
1458 spin_unlock(&n
->list_lock
);
1459 return __slab_alloc_page(s
, gfpflags
, node
);
1463 slqb_stat_inc(l
, ALLOC
);
1464 spin_unlock(&n
->list_lock
);
1468 static noinline
void *__remote_slab_alloc(struct kmem_cache
*s
,
1469 gfp_t gfpflags
, int node
)
1472 struct zonelist
*zonelist
;
1475 enum zone_type high_zoneidx
= gfp_zone(gfpflags
);
1477 object
= __remote_slab_alloc_node(s
, gfpflags
, node
);
1478 if (likely(object
|| (gfpflags
& __GFP_THISNODE
)))
1481 zonelist
= node_zonelist(slab_node(current
->mempolicy
), gfpflags
);
1482 for_each_zone_zonelist(zone
, z
, zonelist
, high_zoneidx
) {
1483 if (!cpuset_zone_allowed_hardwall(zone
, gfpflags
))
1486 node
= zone_to_nid(zone
);
1487 object
= __remote_slab_alloc_node(s
, gfpflags
, node
);
1496 * Main allocation path. Return an object, or NULL on allocation failure.
1498 * Must be called with interrupts disabled.
1500 static __always_inline
void *__slab_alloc(struct kmem_cache
*s
,
1501 gfp_t gfpflags
, int node
)
1504 struct kmem_cache_cpu
*c
;
1505 struct kmem_cache_list
*l
;
1508 if (unlikely(node
!= -1) && unlikely(node
!= numa_node_id())) {
1510 return __remote_slab_alloc(s
, gfpflags
, node
);
1514 c
= get_cpu_slab(s
, smp_processor_id());
1517 object
= __cache_list_get_object(s
, l
);
1518 if (unlikely(!object
)) {
1520 int thisnode
= numa_node_id();
1523 * If the local node is memoryless, try remote alloc before
1524 * trying the page allocator. Otherwise, what happens is
1525 * objects are always freed to remote lists but the allocation
1526 * side always allocates a new page with only one object
1529 if (unlikely(!node_state(thisnode
, N_HIGH_MEMORY
)))
1530 object
= __remote_slab_alloc(s
, gfpflags
, thisnode
);
1534 object
= cache_list_get_page(s
, l
);
1535 if (unlikely(!object
)) {
1536 object
= __slab_alloc_page(s
, gfpflags
, node
);
1538 if (unlikely(!object
)) {
1539 node
= numa_node_id();
1548 slqb_stat_inc(l
, ALLOC
);
1553 * Perform some interrupts-on processing around the main allocation path
1554 * (debug checking and memset()ing).
1556 static __always_inline
void *slab_alloc(struct kmem_cache
*s
,
1557 gfp_t gfpflags
, int node
, unsigned long addr
)
1560 unsigned long flags
;
1562 gfpflags
&= gfp_allowed_mask
;
1564 lockdep_trace_alloc(gfpflags
);
1565 might_sleep_if(gfpflags
& __GFP_WAIT
);
1567 if (should_failslab(s
->objsize
, gfpflags
, s
->flags
))
1571 local_irq_save(flags
);
1572 object
= __slab_alloc(s
, gfpflags
, node
);
1573 local_irq_restore(flags
);
1575 if (unlikely(slab_debug(s
)) && likely(object
)) {
1576 if (unlikely(!alloc_debug_processing(s
, object
, addr
)))
1580 if (unlikely(gfpflags
& __GFP_ZERO
) && likely(object
))
1581 memset(object
, 0, s
->objsize
);
1586 static __always_inline
void *__kmem_cache_alloc(struct kmem_cache
*s
,
1587 gfp_t gfpflags
, unsigned long caller
)
1592 if (unlikely(current
->flags
& (PF_SPREAD_SLAB
| PF_MEMPOLICY
)))
1593 node
= alternate_nid(s
, gfpflags
, node
);
1595 return slab_alloc(s
, gfpflags
, node
, caller
);
1598 void *kmem_cache_alloc(struct kmem_cache
*s
, gfp_t gfpflags
)
1600 return __kmem_cache_alloc(s
, gfpflags
, _RET_IP_
);
1602 EXPORT_SYMBOL(kmem_cache_alloc
);
1605 void *kmem_cache_alloc_node(struct kmem_cache
*s
, gfp_t gfpflags
, int node
)
1607 return slab_alloc(s
, gfpflags
, node
, _RET_IP_
);
1609 EXPORT_SYMBOL(kmem_cache_alloc_node
);
1614 * Flush this CPU's remote free list of objects back to the list from where
1615 * they originate. They end up on that list's remotely freed list, and
1616 * eventually we set it's remote_free_check if there are enough objects on it.
1618 * This seems convoluted, but it keeps is from stomping on the target CPU's
1619 * fastpath cachelines.
1621 * Must be called with interrupts disabled.
1623 static void flush_remote_free_cache(struct kmem_cache
*s
,
1624 struct kmem_cache_cpu
*c
)
1627 struct kmem_cache_list
*dst
;
1636 #ifdef CONFIG_SLQB_STATS
1638 struct kmem_cache_list
*l
= &c
->list
;
1640 slqb_stat_inc(l
, FLUSH_RFREE_LIST
);
1641 slqb_stat_add(l
, FLUSH_RFREE_LIST_OBJECTS
, nr
);
1645 dst
= c
->remote_cache_list
;
1648 * Less common case, dst is filling up so free synchronously.
1649 * No point in having remote CPU free thse as it will just
1650 * free them back to the page list anyway.
1652 if (unlikely(dst
->remote_free
.list
.nr
> (slab_hiwater(s
) >> 1))) {
1656 spin_lock(&dst
->page_lock
);
1658 struct slqb_page
*page
;
1663 head
= get_freepointer(s
, object
);
1664 page
= virt_to_head_slqb_page(object
);
1666 free_object_to_page(s
, dst
, page
, object
);
1669 spin_unlock(&dst
->page_lock
);
1678 spin_lock(&dst
->remote_free
.lock
);
1680 if (!dst
->remote_free
.list
.head
)
1681 dst
->remote_free
.list
.head
= src
->head
;
1683 set_freepointer(s
, dst
->remote_free
.list
.tail
, src
->head
);
1684 dst
->remote_free
.list
.tail
= src
->tail
;
1690 if (dst
->remote_free
.list
.nr
< slab_freebatch(s
))
1695 dst
->remote_free
.list
.nr
+= nr
;
1697 if (unlikely(dst
->remote_free
.list
.nr
>= slab_freebatch(s
) && set
))
1698 dst
->remote_free_check
= 1;
1700 spin_unlock(&dst
->remote_free
.lock
);
1704 * Free an object to this CPU's remote free list.
1706 * Must be called with interrupts disabled.
1708 static noinline
void slab_free_to_remote(struct kmem_cache
*s
,
1709 struct slqb_page
*page
, void *object
,
1710 struct kmem_cache_cpu
*c
)
1715 * Our remote free list corresponds to a different list. Must
1716 * flush it and switch.
1718 if (page
->list
!= c
->remote_cache_list
) {
1719 flush_remote_free_cache(s
, c
);
1720 c
->remote_cache_list
= page
->list
;
1727 set_freepointer(s
, r
->tail
, object
);
1728 set_freepointer(s
, object
, NULL
);
1732 if (unlikely(r
->nr
>= slab_freebatch(s
)))
1733 flush_remote_free_cache(s
, c
);
1738 * Main freeing path. Return an object, or NULL on allocation failure.
1740 * Must be called with interrupts disabled.
1742 static __always_inline
void __slab_free(struct kmem_cache
*s
,
1743 struct slqb_page
*page
, void *object
)
1745 struct kmem_cache_cpu
*c
;
1746 struct kmem_cache_list
*l
;
1747 int thiscpu
= smp_processor_id();
1749 c
= get_cpu_slab(s
, thiscpu
);
1752 slqb_stat_inc(l
, FREE
);
1754 if (!NUMA_BUILD
|| !slab_numa(s
) ||
1755 likely(slqb_page_to_nid(page
) == numa_node_id())) {
1757 * Freeing fastpath. Collects all local-node objects, not
1758 * just those allocated from our per-CPU list. This allows
1759 * fast transfer of objects from one CPU to another within
1762 set_freepointer(s
, object
, l
->freelist
.head
);
1763 l
->freelist
.head
= object
;
1764 if (!l
->freelist
.nr
)
1765 l
->freelist
.tail
= object
;
1768 if (unlikely(l
->freelist
.nr
> slab_hiwater(s
)))
1769 flush_free_list(s
, l
);
1774 * Freeing an object that was allocated on a remote node.
1776 slab_free_to_remote(s
, page
, object
, c
);
1777 slqb_stat_inc(l
, FREE_REMOTE
);
1783 * Perform some interrupts-on processing around the main freeing path
1786 static __always_inline
void slab_free(struct kmem_cache
*s
,
1787 struct slqb_page
*page
, void *object
)
1789 unsigned long flags
;
1793 debug_check_no_locks_freed(object
, s
->objsize
);
1794 if (likely(object
) && unlikely(slab_debug(s
))) {
1795 if (unlikely(!free_debug_processing(s
, object
, _RET_IP_
)))
1799 local_irq_save(flags
);
1800 __slab_free(s
, page
, object
);
1801 local_irq_restore(flags
);
1804 void kmem_cache_free(struct kmem_cache
*s
, void *object
)
1806 struct slqb_page
*page
= NULL
;
1809 page
= virt_to_head_slqb_page(object
);
1810 slab_free(s
, page
, object
);
1812 EXPORT_SYMBOL(kmem_cache_free
);
1815 * Calculate the order of allocation given an slab object size.
1817 * Order 0 allocations are preferred since order 0 does not cause fragmentation
1818 * in the page allocator, and they have fastpaths in the page allocator. But
1819 * also minimise external fragmentation with large objects.
1821 static int slab_order(int size
, int max_order
, int frac
)
1825 if (fls(size
- 1) <= PAGE_SHIFT
)
1828 order
= fls(size
- 1) - PAGE_SHIFT
;
1829 if (order
< slqb_min_order
)
1830 order
= slqb_min_order
;
1832 while (order
<= max_order
) {
1833 unsigned long slab_size
= PAGE_SIZE
<< order
;
1834 unsigned long objects
;
1835 unsigned long waste
;
1837 objects
= slab_size
/ size
;
1841 if (order
< MAX_ORDER
&& objects
< slqb_min_objects
) {
1843 * if we don't have enough objects for min_objects,
1844 * then try the next size up. Unless we have reached
1845 * our maximum possible page size.
1850 waste
= slab_size
- (objects
* size
);
1852 if (waste
* frac
<= slab_size
)
1862 static int calculate_order(int size
)
1867 * Attempt to find best configuration for a slab. This
1868 * works by first attempting to generate a layout with
1869 * the best configuration and backing off gradually.
1871 order
= slab_order(size
, 1, 4);
1876 * This size cannot fit in order-1. Allow bigger orders, but
1877 * forget about trying to save space.
1879 order
= slab_order(size
, MAX_ORDER
- 1, 0);
1880 if (order
< MAX_ORDER
)
1887 * Figure out what the alignment of the objects will be.
1889 static unsigned long calculate_alignment(unsigned long flags
,
1890 unsigned long align
, unsigned long size
)
1893 * If the user wants hardware cache aligned objects then follow that
1894 * suggestion if the object is sufficiently large.
1896 * The hardware cache alignment cannot override the specified
1897 * alignment though. If that is greater then use it.
1899 if (flags
& SLAB_HWCACHE_ALIGN
) {
1900 unsigned long ralign
= cache_line_size();
1902 while (size
<= ralign
/ 2)
1904 align
= max(align
, ralign
);
1907 if (align
< ARCH_SLAB_MINALIGN
)
1908 align
= ARCH_SLAB_MINALIGN
;
1910 return ALIGN(align
, sizeof(void *));
1913 static void init_kmem_cache_list(struct kmem_cache
*s
,
1914 struct kmem_cache_list
*l
)
1918 l
->freelist
.head
= NULL
;
1919 l
->freelist
.tail
= NULL
;
1922 INIT_LIST_HEAD(&l
->partial
);
1923 spin_lock_init(&l
->page_lock
);
1926 l
->remote_free_check
= 0;
1927 spin_lock_init(&l
->remote_free
.lock
);
1928 l
->remote_free
.list
.nr
= 0;
1929 l
->remote_free
.list
.head
= NULL
;
1930 l
->remote_free
.list
.tail
= NULL
;
1933 #ifdef CONFIG_SLQB_STATS
1934 memset(l
->stats
, 0, sizeof(l
->stats
));
1938 static void init_kmem_cache_cpu(struct kmem_cache
*s
,
1939 struct kmem_cache_cpu
*c
)
1941 init_kmem_cache_list(s
, &c
->list
);
1946 c
->rlist
.head
= NULL
;
1947 c
->rlist
.tail
= NULL
;
1948 c
->remote_cache_list
= NULL
;
1953 static void init_kmem_cache_node(struct kmem_cache
*s
,
1954 struct kmem_cache_node
*n
)
1956 spin_lock_init(&n
->list_lock
);
1957 init_kmem_cache_list(s
, &n
->list
);
1961 /* Initial slabs. */
1963 static DEFINE_PER_CPU(struct kmem_cache_cpu
, kmem_cache_cpus
);
1966 /* XXX: really need a DEFINE_PER_NODE for per-node data because a static
1967 * array is wasteful */
1968 static struct kmem_cache_node kmem_cache_nodes
[MAX_NUMNODES
];
1972 static struct kmem_cache kmem_cpu_cache
;
1973 static DEFINE_PER_CPU(struct kmem_cache_cpu
, kmem_cpu_cpus
);
1975 static struct kmem_cache_node kmem_cpu_nodes
[MAX_NUMNODES
]; /* XXX per-nid */
1980 static struct kmem_cache kmem_node_cache
;
1982 static DEFINE_PER_CPU(struct kmem_cache_cpu
, kmem_node_cpus
);
1984 static struct kmem_cache_node kmem_node_nodes
[MAX_NUMNODES
]; /*XXX per-nid */
1988 static struct kmem_cache_cpu
*alloc_kmem_cache_cpu(struct kmem_cache
*s
,
1991 struct kmem_cache_cpu
*c
;
1994 node
= cpu_to_node(cpu
);
1996 c
= kmem_cache_alloc_node(&kmem_cpu_cache
, GFP_KERNEL
, node
);
2000 init_kmem_cache_cpu(s
, c
);
2004 static void free_kmem_cache_cpus(struct kmem_cache
*s
)
2008 for_each_online_cpu(cpu
) {
2009 struct kmem_cache_cpu
*c
;
2011 c
= s
->cpu_slab
[cpu
];
2013 kmem_cache_free(&kmem_cpu_cache
, c
);
2014 s
->cpu_slab
[cpu
] = NULL
;
2019 static int alloc_kmem_cache_cpus(struct kmem_cache
*s
)
2023 for_each_online_cpu(cpu
) {
2024 struct kmem_cache_cpu
*c
;
2026 c
= s
->cpu_slab
[cpu
];
2030 c
= alloc_kmem_cache_cpu(s
, cpu
);
2032 free_kmem_cache_cpus(s
);
2035 s
->cpu_slab
[cpu
] = c
;
2041 static inline void free_kmem_cache_cpus(struct kmem_cache
*s
)
2045 static inline int alloc_kmem_cache_cpus(struct kmem_cache
*s
)
2047 init_kmem_cache_cpu(s
, &s
->cpu_slab
);
2053 static void free_kmem_cache_nodes(struct kmem_cache
*s
)
2057 for_each_node_state(node
, N_NORMAL_MEMORY
) {
2058 struct kmem_cache_node
*n
;
2060 n
= s
->node_slab
[node
];
2062 kmem_cache_free(&kmem_node_cache
, n
);
2063 s
->node_slab
[node
] = NULL
;
2068 static int alloc_kmem_cache_nodes(struct kmem_cache
*s
)
2072 for_each_node_state(node
, N_NORMAL_MEMORY
) {
2073 struct kmem_cache_node
*n
;
2075 n
= kmem_cache_alloc_node(&kmem_node_cache
, GFP_KERNEL
, node
);
2077 free_kmem_cache_nodes(s
);
2080 init_kmem_cache_node(s
, n
);
2081 s
->node_slab
[node
] = n
;
2086 static void free_kmem_cache_nodes(struct kmem_cache
*s
)
2090 static int alloc_kmem_cache_nodes(struct kmem_cache
*s
)
2097 * calculate_sizes() determines the order and the distribution of data within
2100 static int calculate_sizes(struct kmem_cache
*s
)
2102 unsigned long flags
= s
->flags
;
2103 unsigned long size
= s
->objsize
;
2104 unsigned long align
= s
->align
;
2107 * Determine if we can poison the object itself. If the user of
2108 * the slab may touch the object after free or before allocation
2109 * then we should never poison the object itself.
2111 if (slab_poison(s
) && !(flags
& SLAB_DESTROY_BY_RCU
) && !s
->ctor
)
2112 s
->flags
|= __OBJECT_POISON
;
2114 s
->flags
&= ~__OBJECT_POISON
;
2117 * Round up object size to the next word boundary. We can only
2118 * place the free pointer at word boundaries and this determines
2119 * the possible location of the free pointer.
2121 size
= ALIGN(size
, sizeof(void *));
2123 #ifdef CONFIG_SLQB_DEBUG
2125 * If we are Redzoning then check if there is some space between the
2126 * end of the object and the free pointer. If not then add an
2127 * additional word to have some bytes to store Redzone information.
2129 if ((flags
& SLAB_RED_ZONE
) && size
== s
->objsize
)
2130 size
+= sizeof(void *);
2134 * With that we have determined the number of bytes in actual use
2135 * by the object. This is the potential offset to the free pointer.
2139 if (((flags
& (SLAB_DESTROY_BY_RCU
| SLAB_POISON
)) || s
->ctor
)) {
2141 * Relocate free pointer after the object if it is not
2142 * permitted to overwrite the first word of the object on
2145 * This is the case if we do RCU, have a constructor or
2146 * destructor or are poisoning the objects.
2149 size
+= sizeof(void *);
2152 #ifdef CONFIG_SLQB_DEBUG
2153 if (flags
& SLAB_STORE_USER
) {
2155 * Need to store information about allocs and frees after
2158 size
+= 2 * sizeof(struct track
);
2161 if (flags
& SLAB_RED_ZONE
) {
2163 * Add some empty padding so that we can catch
2164 * overwrites from earlier objects rather than let
2165 * tracking information or the free pointer be
2166 * corrupted if an user writes before the start
2169 size
+= sizeof(void *);
2174 * Determine the alignment based on various parameters that the
2175 * user specified and the dynamic determination of cache line size
2178 align
= calculate_alignment(flags
, align
, s
->objsize
);
2181 * SLQB stores one object immediately after another beginning from
2182 * offset 0. In order to align the objects we have to simply size
2183 * each object to conform to the alignment.
2185 size
= ALIGN(size
, align
);
2187 s
->order
= calculate_order(size
);
2194 s
->allocflags
|= __GFP_COMP
;
2196 if (s
->flags
& SLAB_CACHE_DMA
)
2197 s
->allocflags
|= SLQB_DMA
;
2199 if (s
->flags
& SLAB_RECLAIM_ACCOUNT
)
2200 s
->allocflags
|= __GFP_RECLAIMABLE
;
2203 * Determine the number of objects per slab
2205 s
->objects
= (PAGE_SIZE
<< s
->order
) / size
;
2207 s
->freebatch
= max(4UL*PAGE_SIZE
/ size
,
2208 min(256UL, 64*PAGE_SIZE
/ size
));
2211 s
->hiwater
= s
->freebatch
<< 2;
2213 return !!s
->objects
;
2219 * Per-cpu allocator can't be used because it always uses slab allocator,
2220 * and it can't do per-node allocations.
2222 static void *kmem_cache_dyn_array_alloc(int ids
)
2224 size_t size
= sizeof(void *) * ids
;
2228 if (unlikely(!slab_is_available())) {
2229 static void *nextmem
;
2230 static size_t nextleft
;
2234 * Special case for setting up initial caches. These will
2235 * never get freed by definition so we can do it rather
2238 if (size
> nextleft
) {
2239 nextmem
= alloc_pages_exact(size
, GFP_KERNEL
);
2242 nextleft
= roundup(size
, PAGE_SIZE
);
2248 memset(ret
, 0, size
);
2251 return kzalloc(size
, GFP_KERNEL
);
2255 static void kmem_cache_dyn_array_free(void *array
)
2257 if (unlikely(!slab_is_available()))
2258 return; /* error case without crashing here (will panic soon) */
2264 * Except in early boot, this should be called with slqb_lock held for write
2265 * to lock out hotplug, and protect list modifications.
2267 static int kmem_cache_open(struct kmem_cache
*s
,
2268 const char *name
, size_t size
, size_t align
,
2269 unsigned long flags
, void (*ctor
)(void *), int alloc
)
2271 unsigned int left_over
;
2273 memset(s
, 0, sizeof(struct kmem_cache
));
2278 s
->flags
= kmem_cache_flags(size
, flags
, name
, ctor
);
2280 if (!calculate_sizes(s
))
2283 if (!slab_debug(s
)) {
2284 left_over
= (PAGE_SIZE
<< s
->order
) - (s
->objects
* s
->size
);
2285 s
->colour_off
= max(cache_line_size(), s
->align
);
2286 s
->colour_range
= left_over
;
2289 s
->colour_range
= 0;
2293 s
->cpu_slab
= kmem_cache_dyn_array_alloc(nr_cpu_ids
);
2297 s
->node_slab
= kmem_cache_dyn_array_alloc(nr_node_ids
);
2299 goto error_cpu_array
;
2303 if (likely(alloc
)) {
2304 if (!alloc_kmem_cache_nodes(s
))
2305 goto error_node_array
;
2307 if (!alloc_kmem_cache_cpus(s
))
2312 list_add(&s
->list
, &slab_caches
);
2317 free_kmem_cache_nodes(s
);
2319 #if defined(CONFIG_NUMA) && defined(CONFIG_SMP)
2320 kmem_cache_dyn_array_free(s
->node_slab
);
2324 kmem_cache_dyn_array_free(s
->cpu_slab
);
2327 if (flags
& SLAB_PANIC
)
2328 panic("%s: failed to create slab `%s'\n", __func__
, name
);
2333 * kmem_ptr_validate - check if an untrusted pointer might be a slab entry.
2334 * @s: the cache we're checking against
2335 * @ptr: pointer to validate
2337 * This verifies that the untrusted pointer looks sane;
2338 * it is _not_ a guarantee that the pointer is actually
2339 * part of the slab cache in question, but it at least
2340 * validates that the pointer can be dereferenced and
2341 * looks half-way sane.
2343 * Currently only used for dentry validation.
2345 int kmem_ptr_validate(struct kmem_cache
*s
, const void *ptr
)
2347 unsigned long addr
= (unsigned long)ptr
;
2348 struct slqb_page
*page
;
2350 if (unlikely(addr
< PAGE_OFFSET
))
2352 if (unlikely(addr
> (unsigned long)high_memory
- s
->size
))
2354 if (unlikely(!IS_ALIGNED(addr
, s
->align
)))
2356 if (unlikely(!kern_addr_valid(addr
)))
2358 if (unlikely(!kern_addr_valid(addr
+ s
->size
- 1)))
2360 if (unlikely(!pfn_valid(addr
>> PAGE_SHIFT
)))
2362 page
= virt_to_head_slqb_page(ptr
);
2363 if (unlikely(!(page
->flags
& PG_SLQB_BIT
)))
2365 if (unlikely(page
->list
->cache
!= s
)) /* XXX: ouch, racy */
2371 EXPORT_SYMBOL(kmem_ptr_validate
);
2374 * Determine the size of a slab object
2376 unsigned int kmem_cache_size(struct kmem_cache
*s
)
2380 EXPORT_SYMBOL(kmem_cache_size
);
2382 const char *kmem_cache_name(struct kmem_cache
*s
)
2386 EXPORT_SYMBOL(kmem_cache_name
);
2389 * Release all resources used by a slab cache. No more concurrency on the
2390 * slab, so we can touch remote kmem_cache_cpu structures.
2392 void kmem_cache_destroy(struct kmem_cache
*s
)
2399 down_write(&slqb_lock
);
2402 local_irq_disable();
2404 for_each_online_cpu(cpu
) {
2405 struct kmem_cache_cpu
*c
= get_cpu_slab(s
, cpu
);
2406 struct kmem_cache_list
*l
= &c
->list
;
2408 flush_free_list_all(s
, l
);
2409 flush_remote_free_cache(s
, c
);
2413 for_each_online_cpu(cpu
) {
2414 struct kmem_cache_cpu
*c
= get_cpu_slab(s
, cpu
);
2415 struct kmem_cache_list
*l
= &c
->list
;
2417 claim_remote_free_list(s
, l
);
2418 flush_free_list_all(s
, l
);
2420 WARN_ON(l
->freelist
.nr
);
2421 WARN_ON(l
->nr_slabs
);
2422 WARN_ON(l
->nr_partial
);
2425 free_kmem_cache_cpus(s
);
2428 for_each_node_state(node
, N_NORMAL_MEMORY
) {
2429 struct kmem_cache_node
*n
;
2430 struct kmem_cache_list
*l
;
2432 n
= s
->node_slab
[node
];
2437 claim_remote_free_list(s
, l
);
2438 flush_free_list_all(s
, l
);
2440 WARN_ON(l
->freelist
.nr
);
2441 WARN_ON(l
->nr_slabs
);
2442 WARN_ON(l
->nr_partial
);
2445 free_kmem_cache_nodes(s
);
2449 sysfs_slab_remove(s
);
2450 up_write(&slqb_lock
);
2452 EXPORT_SYMBOL(kmem_cache_destroy
);
2454 /********************************************************************
2456 *******************************************************************/
2458 struct kmem_cache kmalloc_caches
[KMALLOC_SHIFT_SLQB_HIGH
+ 1] __cacheline_aligned
;
2459 EXPORT_SYMBOL(kmalloc_caches
);
2461 #ifdef CONFIG_ZONE_DMA
2462 struct kmem_cache kmalloc_caches_dma
[KMALLOC_SHIFT_SLQB_HIGH
+ 1] __cacheline_aligned
;
2463 EXPORT_SYMBOL(kmalloc_caches_dma
);
2466 #ifndef ARCH_KMALLOC_FLAGS
2467 #define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN
2470 static struct kmem_cache
*open_kmalloc_cache(struct kmem_cache
*s
,
2471 const char *name
, int size
, gfp_t gfp_flags
)
2473 unsigned int flags
= ARCH_KMALLOC_FLAGS
| SLAB_PANIC
;
2475 if (gfp_flags
& SLQB_DMA
)
2476 flags
|= SLAB_CACHE_DMA
;
2478 kmem_cache_open(s
, name
, size
, ARCH_KMALLOC_MINALIGN
, flags
, NULL
, 1);
2484 * Conversion table for small slabs sizes / 8 to the index in the
2485 * kmalloc array. This is necessary for slabs < 192 since we have non power
2486 * of two cache sizes there. The size of larger slabs can be determined using
2489 static s8 size_index
[24] __cacheline_aligned
= {
2498 #if L1_CACHE_BYTES < 64
2513 #if L1_CACHE_BYTES < 128
2534 static struct kmem_cache
*get_slab(size_t size
, gfp_t flags
)
2538 if (unlikely(size
<= KMALLOC_MIN_SIZE
)) {
2539 if (unlikely(!size
))
2540 return ZERO_SIZE_PTR
;
2542 index
= KMALLOC_SHIFT_LOW
;
2546 #if L1_CACHE_BYTES >= 128
2551 index
= size_index
[(size
- 1) / 8];
2553 if (unlikely(size
> 1UL << KMALLOC_SHIFT_SLQB_HIGH
))
2556 index
= fls(size
- 1);
2560 if (unlikely((flags
& SLQB_DMA
)))
2561 return &kmalloc_caches_dma
[index
];
2563 return &kmalloc_caches
[index
];
2566 void *__kmalloc(size_t size
, gfp_t flags
)
2568 struct kmem_cache
*s
;
2570 s
= get_slab(size
, flags
);
2571 if (unlikely(ZERO_OR_NULL_PTR(s
)))
2574 return __kmem_cache_alloc(s
, flags
, _RET_IP_
);
2576 EXPORT_SYMBOL(__kmalloc
);
2579 void *__kmalloc_node(size_t size
, gfp_t flags
, int node
)
2581 struct kmem_cache
*s
;
2583 s
= get_slab(size
, flags
);
2584 if (unlikely(ZERO_OR_NULL_PTR(s
)))
2587 return kmem_cache_alloc_node(s
, flags
, node
);
2589 EXPORT_SYMBOL(__kmalloc_node
);
2592 size_t ksize(const void *object
)
2594 struct slqb_page
*page
;
2595 struct kmem_cache
*s
;
2598 if (unlikely(object
== ZERO_SIZE_PTR
))
2601 page
= virt_to_head_slqb_page(object
);
2602 BUG_ON(!(page
->flags
& PG_SLQB_BIT
));
2604 s
= page
->list
->cache
;
2607 * Debugging requires use of the padding between object
2608 * and whatever may come after it.
2610 if (s
->flags
& (SLAB_RED_ZONE
| SLAB_POISON
))
2614 * If we have the need to store the freelist pointer
2615 * back there or track user information then we can
2616 * only use the space before that information.
2618 if (s
->flags
& (SLAB_DESTROY_BY_RCU
| SLAB_STORE_USER
))
2622 * Else we can use all the padding etc for the allocation
2626 EXPORT_SYMBOL(ksize
);
2628 void kfree(const void *object
)
2630 struct kmem_cache
*s
;
2631 struct slqb_page
*page
;
2633 if (unlikely(ZERO_OR_NULL_PTR(object
)))
2636 page
= virt_to_head_slqb_page(object
);
2637 s
= page
->list
->cache
;
2639 slab_free(s
, page
, (void *)object
);
2641 EXPORT_SYMBOL(kfree
);
2643 static void kmem_cache_trim_percpu(void *arg
)
2645 int cpu
= smp_processor_id();
2646 struct kmem_cache
*s
= arg
;
2647 struct kmem_cache_cpu
*c
= get_cpu_slab(s
, cpu
);
2648 struct kmem_cache_list
*l
= &c
->list
;
2650 claim_remote_free_list(s
, l
);
2651 flush_free_list(s
, l
);
2653 flush_remote_free_cache(s
, c
);
2657 int kmem_cache_shrink(struct kmem_cache
*s
)
2663 on_each_cpu(kmem_cache_trim_percpu
, s
, 1);
2666 for_each_node_state(node
, N_NORMAL_MEMORY
) {
2667 struct kmem_cache_node
*n
;
2668 struct kmem_cache_list
*l
;
2670 n
= s
->node_slab
[node
];
2675 spin_lock_irq(&n
->list_lock
);
2676 claim_remote_free_list(s
, l
);
2677 flush_free_list(s
, l
);
2678 spin_unlock_irq(&n
->list_lock
);
2684 EXPORT_SYMBOL(kmem_cache_shrink
);
2686 #if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
2687 static void kmem_cache_reap_percpu(void *arg
)
2689 int cpu
= smp_processor_id();
2690 struct kmem_cache
*s
;
2691 long phase
= (long)arg
;
2693 list_for_each_entry(s
, &slab_caches
, list
) {
2694 struct kmem_cache_cpu
*c
= get_cpu_slab(s
, cpu
);
2695 struct kmem_cache_list
*l
= &c
->list
;
2698 flush_free_list_all(s
, l
);
2699 flush_remote_free_cache(s
, c
);
2703 claim_remote_free_list(s
, l
);
2704 flush_free_list_all(s
, l
);
2709 static void kmem_cache_reap(void)
2711 struct kmem_cache
*s
;
2714 down_read(&slqb_lock
);
2715 on_each_cpu(kmem_cache_reap_percpu
, (void *)0, 1);
2716 on_each_cpu(kmem_cache_reap_percpu
, (void *)1, 1);
2718 list_for_each_entry(s
, &slab_caches
, list
) {
2719 for_each_node_state(node
, N_NORMAL_MEMORY
) {
2720 struct kmem_cache_node
*n
;
2721 struct kmem_cache_list
*l
;
2723 n
= s
->node_slab
[node
];
2728 spin_lock_irq(&n
->list_lock
);
2729 claim_remote_free_list(s
, l
);
2730 flush_free_list_all(s
, l
);
2731 spin_unlock_irq(&n
->list_lock
);
2734 up_read(&slqb_lock
);
2738 static void cache_trim_worker(struct work_struct
*w
)
2740 struct delayed_work
*work
=
2741 container_of(w
, struct delayed_work
, work
);
2742 struct kmem_cache
*s
;
2744 if (!down_read_trylock(&slqb_lock
))
2747 list_for_each_entry(s
, &slab_caches
, list
) {
2749 int node
= numa_node_id();
2750 struct kmem_cache_node
*n
= s
->node_slab
[node
];
2753 struct kmem_cache_list
*l
= &n
->list
;
2755 spin_lock_irq(&n
->list_lock
);
2756 claim_remote_free_list(s
, l
);
2757 flush_free_list(s
, l
);
2758 spin_unlock_irq(&n
->list_lock
);
2762 local_irq_disable();
2763 kmem_cache_trim_percpu(s
);
2767 up_read(&slqb_lock
);
2769 schedule_delayed_work(work
, round_jiffies_relative(3*HZ
));
2772 static DEFINE_PER_CPU(struct delayed_work
, slqb_cache_trim_work
);
2774 static void __cpuinit
start_cpu_timer(int cpu
)
2776 struct delayed_work
*cache_trim_work
= &per_cpu(slqb_cache_trim_work
,
2780 * When this gets called from do_initcalls via cpucache_init(),
2781 * init_workqueues() has already run, so keventd will be setup
2784 if (keventd_up() && cache_trim_work
->work
.func
== NULL
) {
2785 INIT_DELAYED_WORK(cache_trim_work
, cache_trim_worker
);
2786 schedule_delayed_work_on(cpu
, cache_trim_work
,
2787 __round_jiffies_relative(HZ
, cpu
));
2791 static int __init
cpucache_init(void)
2795 for_each_online_cpu(cpu
)
2796 start_cpu_timer(cpu
);
2800 device_initcall(cpucache_init
);
2802 #if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
2803 static void slab_mem_going_offline_callback(void *arg
)
2808 static void slab_mem_offline_callback(void *arg
)
2810 /* XXX: should release structures, see CPU offline comment */
2813 static int slab_mem_going_online_callback(void *arg
)
2815 struct kmem_cache
*s
;
2816 struct kmem_cache_node
*n
;
2817 struct memory_notify
*marg
= arg
;
2818 int nid
= marg
->status_change_nid
;
2822 * If the node's memory is already available, then kmem_cache_node is
2823 * already created. Nothing to do.
2829 * We are bringing a node online. No memory is availabe yet. We must
2830 * allocate a kmem_cache_node structure in order to bring the node
2833 down_write(&slqb_lock
);
2834 list_for_each_entry(s
, &slab_caches
, list
) {
2836 * XXX: kmem_cache_alloc_node will fallback to other nodes
2837 * since memory is not yet available from the node that
2840 if (s
->node_slab
[nid
]) /* could be lefover from last online */
2842 n
= kmem_cache_alloc(&kmem_node_cache
, GFP_KERNEL
);
2847 init_kmem_cache_node(s
, n
);
2848 s
->node_slab
[nid
] = n
;
2851 up_write(&slqb_lock
);
2855 static int slab_memory_callback(struct notifier_block
*self
,
2856 unsigned long action
, void *arg
)
2861 case MEM_GOING_ONLINE
:
2862 ret
= slab_mem_going_online_callback(arg
);
2864 case MEM_GOING_OFFLINE
:
2865 slab_mem_going_offline_callback(arg
);
2868 case MEM_CANCEL_ONLINE
:
2869 slab_mem_offline_callback(arg
);
2872 case MEM_CANCEL_OFFLINE
:
2877 ret
= notifier_from_errno(ret
);
2883 #endif /* CONFIG_MEMORY_HOTPLUG */
2885 /********************************************************************
2886 * Basic setup of slabs
2887 *******************************************************************/
2889 void __init
kmem_cache_init(void)
2892 unsigned int flags
= SLAB_HWCACHE_ALIGN
|SLAB_PANIC
;
2895 * All the ifdefs are rather ugly here, but it's just the setup code,
2896 * so it doesn't have to be too readable :)
2900 * No need to take slqb_lock here: there should be no concurrency
2901 * anyway, and spin_unlock_irq in rwsem code could enable interrupts
2904 kmem_cache_open(&kmem_cache_cache
, "kmem_cache",
2905 sizeof(struct kmem_cache
), 0, flags
, NULL
, 0);
2907 kmem_cache_open(&kmem_cpu_cache
, "kmem_cache_cpu",
2908 sizeof(struct kmem_cache_cpu
), 0, flags
, NULL
, 0);
2911 kmem_cache_open(&kmem_node_cache
, "kmem_cache_node",
2912 sizeof(struct kmem_cache_node
), 0, flags
, NULL
, 0);
2916 for_each_possible_cpu(i
) {
2917 struct kmem_cache_cpu
*c
;
2919 c
= &per_cpu(kmem_cache_cpus
, i
);
2920 init_kmem_cache_cpu(&kmem_cache_cache
, c
);
2921 kmem_cache_cache
.cpu_slab
[i
] = c
;
2923 c
= &per_cpu(kmem_cpu_cpus
, i
);
2924 init_kmem_cache_cpu(&kmem_cpu_cache
, c
);
2925 kmem_cpu_cache
.cpu_slab
[i
] = c
;
2928 c
= &per_cpu(kmem_node_cpus
, i
);
2929 init_kmem_cache_cpu(&kmem_node_cache
, c
);
2930 kmem_node_cache
.cpu_slab
[i
] = c
;
2934 init_kmem_cache_cpu(&kmem_cache_cache
, &kmem_cache_cache
.cpu_slab
);
2938 for_each_node_state(i
, N_NORMAL_MEMORY
) {
2939 struct kmem_cache_node
*n
;
2941 n
= &kmem_cache_nodes
[i
];
2942 init_kmem_cache_node(&kmem_cache_cache
, n
);
2943 kmem_cache_cache
.node_slab
[i
] = n
;
2945 n
= &kmem_cpu_nodes
[i
];
2946 init_kmem_cache_node(&kmem_cpu_cache
, n
);
2947 kmem_cpu_cache
.node_slab
[i
] = n
;
2949 n
= &kmem_node_nodes
[i
];
2950 init_kmem_cache_node(&kmem_node_cache
, n
);
2951 kmem_node_cache
.node_slab
[i
] = n
;
2955 /* Caches that are not of the two-to-the-power-of size */
2956 if (L1_CACHE_BYTES
< 64 && KMALLOC_MIN_SIZE
<= 64) {
2957 open_kmalloc_cache(&kmalloc_caches
[1],
2958 "kmalloc-96", 96, GFP_KERNEL
);
2959 #ifdef CONFIG_ZONE_DMA
2960 open_kmalloc_cache(&kmalloc_caches_dma
[1],
2961 "kmalloc_dma-96", 96, GFP_KERNEL
|SLQB_DMA
);
2964 if (L1_CACHE_BYTES
< 128 && KMALLOC_MIN_SIZE
<= 128) {
2965 open_kmalloc_cache(&kmalloc_caches
[2],
2966 "kmalloc-192", 192, GFP_KERNEL
);
2967 #ifdef CONFIG_ZONE_DMA
2968 open_kmalloc_cache(&kmalloc_caches_dma
[2],
2969 "kmalloc_dma-192", 192, GFP_KERNEL
|SLQB_DMA
);
2973 for (i
= KMALLOC_SHIFT_LOW
; i
<= KMALLOC_SHIFT_SLQB_HIGH
; i
++) {
2974 open_kmalloc_cache(&kmalloc_caches
[i
],
2975 "kmalloc", 1 << i
, GFP_KERNEL
);
2976 #ifdef CONFIG_ZONE_DMA
2977 open_kmalloc_cache(&kmalloc_caches_dma
[i
],
2978 "kmalloc_dma", 1 << i
, GFP_KERNEL
|SLQB_DMA
);
2983 * Patch up the size_index table if we have strange large alignment
2984 * requirements for the kmalloc array. This is only the case for
2985 * mips it seems. The standard arches will not generate any code here.
2987 * Largest permitted alignment is 256 bytes due to the way we
2988 * handle the index determination for the smaller caches.
2990 * Make sure that nothing crazy happens if someone starts tinkering
2991 * around with ARCH_KMALLOC_MINALIGN
2993 BUILD_BUG_ON(KMALLOC_MIN_SIZE
> 256 ||
2994 (KMALLOC_MIN_SIZE
& (KMALLOC_MIN_SIZE
- 1)));
2996 for (i
= 8; i
< KMALLOC_MIN_SIZE
; i
+= 8)
2997 size_index
[(i
- 1) / 8] = KMALLOC_SHIFT_LOW
;
2999 /* Provide the correct kmalloc names now that the caches are up */
3000 for (i
= KMALLOC_SHIFT_LOW
; i
<= KMALLOC_SHIFT_SLQB_HIGH
; i
++) {
3001 kmalloc_caches
[i
].name
=
3002 kasprintf(GFP_KERNEL
, "kmalloc-%d", 1 << i
);
3003 #ifdef CONFIG_ZONE_DMA
3004 kmalloc_caches_dma
[i
].name
=
3005 kasprintf(GFP_KERNEL
, "kmalloc_dma-%d", 1 << i
);
3010 register_cpu_notifier(&slab_notifier
);
3013 hotplug_memory_notifier(slab_memory_callback
, 1);
3016 * smp_init() has not yet been called, so no worries about memory
3017 * ordering with __slab_is_available.
3019 __slab_is_available
= 1;
3022 void __init
kmem_cache_init_late(void)
3027 * Some basic slab creation sanity checks
3029 static int kmem_cache_create_ok(const char *name
, size_t size
,
3030 size_t align
, unsigned long flags
)
3032 struct kmem_cache
*tmp
;
3035 * Sanity checks... these are all serious usage bugs.
3037 if (!name
|| in_interrupt() || (size
< sizeof(void *))) {
3038 printk(KERN_ERR
"kmem_cache_create(): early error in slab %s\n",
3045 list_for_each_entry(tmp
, &slab_caches
, list
) {
3050 * This happens when the module gets unloaded and doesn't
3051 * destroy its slab cache and no-one else reuses the vmalloc
3052 * area of the module. Print a warning.
3054 res
= probe_kernel_address(tmp
->name
, x
);
3057 "SLAB: cache with size %d has lost its name\n",
3062 if (!strcmp(tmp
->name
, name
)) {
3064 "SLAB: duplicate cache %s\n", name
);
3071 WARN_ON(strchr(name
, ' ')); /* It confuses parsers */
3072 if (flags
& SLAB_DESTROY_BY_RCU
)
3073 WARN_ON(flags
& SLAB_POISON
);
3078 struct kmem_cache
*kmem_cache_create(const char *name
, size_t size
,
3079 size_t align
, unsigned long flags
, void (*ctor
)(void *))
3081 struct kmem_cache
*s
;
3083 down_write(&slqb_lock
);
3084 if (!kmem_cache_create_ok(name
, size
, align
, flags
))
3087 s
= kmem_cache_alloc(&kmem_cache_cache
, GFP_KERNEL
);
3091 if (kmem_cache_open(s
, name
, size
, align
, flags
, ctor
, 1)) {
3092 up_write(&slqb_lock
);
3096 kmem_cache_free(&kmem_cache_cache
, s
);
3099 up_write(&slqb_lock
);
3100 if (flags
& SLAB_PANIC
)
3101 panic("%s: failed to create slab `%s'\n", __func__
, name
);
3105 EXPORT_SYMBOL(kmem_cache_create
);
3109 * Use the cpu notifier to insure that the cpu slabs are flushed when
3112 static int __cpuinit
slab_cpuup_callback(struct notifier_block
*nfb
,
3113 unsigned long action
, void *hcpu
)
3115 long cpu
= (long)hcpu
;
3116 struct kmem_cache
*s
;
3119 case CPU_UP_PREPARE
:
3120 case CPU_UP_PREPARE_FROZEN
:
3121 down_write(&slqb_lock
);
3122 list_for_each_entry(s
, &slab_caches
, list
) {
3123 if (s
->cpu_slab
[cpu
]) /* could be lefover last online */
3125 s
->cpu_slab
[cpu
] = alloc_kmem_cache_cpu(s
, cpu
);
3126 if (!s
->cpu_slab
[cpu
]) {
3127 up_read(&slqb_lock
);
3131 up_write(&slqb_lock
);
3135 case CPU_ONLINE_FROZEN
:
3136 case CPU_DOWN_FAILED
:
3137 case CPU_DOWN_FAILED_FROZEN
:
3138 start_cpu_timer(cpu
);
3141 case CPU_DOWN_PREPARE
:
3142 case CPU_DOWN_PREPARE_FROZEN
:
3143 cancel_rearming_delayed_work(&per_cpu(slqb_cache_trim_work
,
3145 per_cpu(slqb_cache_trim_work
, cpu
).work
.func
= NULL
;
3148 case CPU_UP_CANCELED
:
3149 case CPU_UP_CANCELED_FROZEN
:
3151 case CPU_DEAD_FROZEN
:
3153 * XXX: Freeing here doesn't work because objects can still be
3154 * on this CPU's list. periodic timer needs to check if a CPU
3155 * is offline and then try to cleanup from there. Same for node
3164 static struct notifier_block __cpuinitdata slab_notifier
= {
3165 .notifier_call
= slab_cpuup_callback
3170 #ifdef CONFIG_SLQB_DEBUG
3171 void *__kmalloc_track_caller(size_t size
, gfp_t flags
, unsigned long caller
)
3173 struct kmem_cache
*s
;
3176 s
= get_slab(size
, flags
);
3177 if (unlikely(ZERO_OR_NULL_PTR(s
)))
3181 if (unlikely(current
->flags
& (PF_SPREAD_SLAB
| PF_MEMPOLICY
)))
3182 node
= alternate_nid(s
, flags
, node
);
3184 return slab_alloc(s
, flags
, node
, caller
);
3187 void *__kmalloc_node_track_caller(size_t size
, gfp_t flags
, int node
,
3188 unsigned long caller
)
3190 struct kmem_cache
*s
;
3192 s
= get_slab(size
, flags
);
3193 if (unlikely(ZERO_OR_NULL_PTR(s
)))
3196 return slab_alloc(s
, flags
, node
, caller
);
3200 #if defined(CONFIG_SLQB_SYSFS) || defined(CONFIG_SLABINFO)
3201 struct stats_gather
{
3202 struct kmem_cache
*s
;
3204 unsigned long nr_slabs
;
3205 unsigned long nr_partial
;
3206 unsigned long nr_inuse
;
3207 unsigned long nr_objects
;
3209 #ifdef CONFIG_SLQB_STATS
3210 unsigned long stats
[NR_SLQB_STAT_ITEMS
];
3214 static void __gather_stats(void *arg
)
3216 unsigned long nr_slabs
;
3217 unsigned long nr_partial
;
3218 unsigned long nr_inuse
;
3219 struct stats_gather
*gather
= arg
;
3220 int cpu
= smp_processor_id();
3221 struct kmem_cache
*s
= gather
->s
;
3222 struct kmem_cache_cpu
*c
= get_cpu_slab(s
, cpu
);
3223 struct kmem_cache_list
*l
= &c
->list
;
3224 struct slqb_page
*page
;
3225 #ifdef CONFIG_SLQB_STATS
3229 spin_lock(&l
->page_lock
);
3230 nr_slabs
= l
->nr_slabs
;
3231 nr_partial
= l
->nr_partial
;
3232 nr_inuse
= (nr_slabs
- nr_partial
) * s
->objects
;
3234 list_for_each_entry(page
, &l
->partial
, lru
) {
3235 nr_inuse
+= page
->inuse
;
3237 spin_unlock(&l
->page_lock
);
3239 spin_lock(&gather
->lock
);
3240 gather
->nr_slabs
+= nr_slabs
;
3241 gather
->nr_partial
+= nr_partial
;
3242 gather
->nr_inuse
+= nr_inuse
;
3243 #ifdef CONFIG_SLQB_STATS
3244 for (i
= 0; i
< NR_SLQB_STAT_ITEMS
; i
++)
3245 gather
->stats
[i
] += l
->stats
[i
];
3247 spin_unlock(&gather
->lock
);
3250 /* must be called with slqb_lock held */
3251 static void gather_stats_locked(struct kmem_cache
*s
,
3252 struct stats_gather
*stats
)
3258 memset(stats
, 0, sizeof(struct stats_gather
));
3260 spin_lock_init(&stats
->lock
);
3262 on_each_cpu(__gather_stats
, stats
, 1);
3265 for_each_online_node(node
) {
3266 struct kmem_cache_node
*n
= s
->node_slab
[node
];
3267 struct kmem_cache_list
*l
= &n
->list
;
3268 struct slqb_page
*page
;
3269 unsigned long flags
;
3270 #ifdef CONFIG_SLQB_STATS
3274 spin_lock_irqsave(&n
->list_lock
, flags
);
3275 #ifdef CONFIG_SLQB_STATS
3276 for (i
= 0; i
< NR_SLQB_STAT_ITEMS
; i
++)
3277 stats
->stats
[i
] += l
->stats
[i
];
3279 stats
->nr_slabs
+= l
->nr_slabs
;
3280 stats
->nr_partial
+= l
->nr_partial
;
3281 stats
->nr_inuse
+= (l
->nr_slabs
- l
->nr_partial
) * s
->objects
;
3283 list_for_each_entry(page
, &l
->partial
, lru
) {
3284 stats
->nr_inuse
+= page
->inuse
;
3286 spin_unlock_irqrestore(&n
->list_lock
, flags
);
3290 stats
->nr_objects
= stats
->nr_slabs
* s
->objects
;
3293 #ifdef CONFIG_SLQB_SYSFS
3294 static void gather_stats(struct kmem_cache
*s
, struct stats_gather
*stats
)
3296 down_read(&slqb_lock
); /* hold off hotplug */
3297 gather_stats_locked(s
, stats
);
3298 up_read(&slqb_lock
);
3304 * The /proc/slabinfo ABI
3306 #ifdef CONFIG_SLABINFO
3307 #include <linux/proc_fs.h>
3308 ssize_t
slabinfo_write(struct file
*file
, const char __user
* buffer
,
3309 size_t count
, loff_t
*ppos
)
3314 static void print_slabinfo_header(struct seq_file
*m
)
3316 seq_puts(m
, "slabinfo - version: 2.1\n");
3317 seq_puts(m
, "# name <active_objs> <num_objs> <objsize> "
3318 "<objperslab> <pagesperslab>");
3319 seq_puts(m
, " : tunables <limit> <batchcount> <sharedfactor>");
3320 seq_puts(m
, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
3324 static void *s_start(struct seq_file
*m
, loff_t
*pos
)
3328 down_read(&slqb_lock
);
3330 print_slabinfo_header(m
);
3332 return seq_list_start(&slab_caches
, *pos
);
3335 static void *s_next(struct seq_file
*m
, void *p
, loff_t
*pos
)
3337 return seq_list_next(p
, &slab_caches
, pos
);
3340 static void s_stop(struct seq_file
*m
, void *p
)
3342 up_read(&slqb_lock
);
3345 static int s_show(struct seq_file
*m
, void *p
)
3347 struct stats_gather stats
;
3348 struct kmem_cache
*s
;
3350 s
= list_entry(p
, struct kmem_cache
, list
);
3352 gather_stats_locked(s
, &stats
);
3354 seq_printf(m
, "%-17s %6lu %6lu %6u %4u %4d", s
->name
, stats
.nr_inuse
,
3355 stats
.nr_objects
, s
->size
, s
->objects
, (1 << s
->order
));
3356 seq_printf(m
, " : tunables %4u %4u %4u", slab_hiwater(s
),
3357 slab_freebatch(s
), 0);
3358 seq_printf(m
, " : slabdata %6lu %6lu %6lu", stats
.nr_slabs
,
3359 stats
.nr_slabs
, 0UL);
3364 static const struct seq_operations slabinfo_op
= {
3371 static int slabinfo_open(struct inode
*inode
, struct file
*file
)
3373 return seq_open(file
, &slabinfo_op
);
3376 static const struct file_operations proc_slabinfo_operations
= {
3377 .open
= slabinfo_open
,
3379 .llseek
= seq_lseek
,
3380 .release
= seq_release
,
3383 static int __init
slab_proc_init(void)
3385 proc_create("slabinfo", S_IWUSR
|S_IRUGO
, NULL
,
3386 &proc_slabinfo_operations
);
3389 module_init(slab_proc_init
);
3390 #endif /* CONFIG_SLABINFO */
3392 #ifdef CONFIG_SLQB_SYSFS
3396 #define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
3397 #define to_slab(n) container_of(n, struct kmem_cache, kobj);
3399 struct slab_attribute
{
3400 struct attribute attr
;
3401 ssize_t (*show
)(struct kmem_cache
*s
, char *buf
);
3402 ssize_t (*store
)(struct kmem_cache
*s
, const char *x
, size_t count
);
3405 #define SLAB_ATTR_RO(_name) \
3406 static struct slab_attribute _name##_attr = __ATTR_RO(_name)
3408 #define SLAB_ATTR(_name) \
3409 static struct slab_attribute _name##_attr = \
3410 __ATTR(_name, 0644, _name##_show, _name##_store)
3412 static ssize_t
slab_size_show(struct kmem_cache
*s
, char *buf
)
3414 return sprintf(buf
, "%d\n", s
->size
);
3416 SLAB_ATTR_RO(slab_size
);
3418 static ssize_t
align_show(struct kmem_cache
*s
, char *buf
)
3420 return sprintf(buf
, "%d\n", s
->align
);
3422 SLAB_ATTR_RO(align
);
3424 static ssize_t
object_size_show(struct kmem_cache
*s
, char *buf
)
3426 return sprintf(buf
, "%d\n", s
->objsize
);
3428 SLAB_ATTR_RO(object_size
);
3430 static ssize_t
objs_per_slab_show(struct kmem_cache
*s
, char *buf
)
3432 return sprintf(buf
, "%d\n", s
->objects
);
3434 SLAB_ATTR_RO(objs_per_slab
);
3436 static ssize_t
order_show(struct kmem_cache
*s
, char *buf
)
3438 return sprintf(buf
, "%d\n", s
->order
);
3440 SLAB_ATTR_RO(order
);
3442 static ssize_t
ctor_show(struct kmem_cache
*s
, char *buf
)
3445 int n
= sprint_symbol(buf
, (unsigned long)s
->ctor
);
3447 return n
+ sprintf(buf
+ n
, "\n");
3453 static ssize_t
slabs_show(struct kmem_cache
*s
, char *buf
)
3455 struct stats_gather stats
;
3457 gather_stats(s
, &stats
);
3459 return sprintf(buf
, "%lu\n", stats
.nr_slabs
);
3461 SLAB_ATTR_RO(slabs
);
3463 static ssize_t
objects_show(struct kmem_cache
*s
, char *buf
)
3465 struct stats_gather stats
;
3467 gather_stats(s
, &stats
);
3469 return sprintf(buf
, "%lu\n", stats
.nr_inuse
);
3471 SLAB_ATTR_RO(objects
);
3473 static ssize_t
total_objects_show(struct kmem_cache
*s
, char *buf
)
3475 struct stats_gather stats
;
3477 gather_stats(s
, &stats
);
3479 return sprintf(buf
, "%lu\n", stats
.nr_objects
);
3481 SLAB_ATTR_RO(total_objects
);
3483 #ifdef CONFIG_FAILSLAB
3484 static ssize_t
failslab_show(struct kmem_cache
*s
, char *buf
)
3486 return sprintf(buf
, "%d\n", !!(s
->flags
& SLAB_FAILSLAB
));
3489 static ssize_t
failslab_store(struct kmem_cache
*s
, const char *buf
,
3492 s
->flags
&= ~SLAB_FAILSLAB
;
3494 s
->flags
|= SLAB_FAILSLAB
;
3497 SLAB_ATTR(failslab
);
3500 static ssize_t
reclaim_account_show(struct kmem_cache
*s
, char *buf
)
3502 return sprintf(buf
, "%d\n", !!(s
->flags
& SLAB_RECLAIM_ACCOUNT
));
3504 SLAB_ATTR_RO(reclaim_account
);
3506 static ssize_t
hwcache_align_show(struct kmem_cache
*s
, char *buf
)
3508 return sprintf(buf
, "%d\n", !!(s
->flags
& SLAB_HWCACHE_ALIGN
));
3510 SLAB_ATTR_RO(hwcache_align
);
3512 #ifdef CONFIG_ZONE_DMA
3513 static ssize_t
cache_dma_show(struct kmem_cache
*s
, char *buf
)
3515 return sprintf(buf
, "%d\n", !!(s
->flags
& SLAB_CACHE_DMA
));
3517 SLAB_ATTR_RO(cache_dma
);
3520 static ssize_t
destroy_by_rcu_show(struct kmem_cache
*s
, char *buf
)
3522 return sprintf(buf
, "%d\n", !!(s
->flags
& SLAB_DESTROY_BY_RCU
));
3524 SLAB_ATTR_RO(destroy_by_rcu
);
3526 static ssize_t
red_zone_show(struct kmem_cache
*s
, char *buf
)
3528 return sprintf(buf
, "%d\n", !!(s
->flags
& SLAB_RED_ZONE
));
3530 SLAB_ATTR_RO(red_zone
);
3532 static ssize_t
poison_show(struct kmem_cache
*s
, char *buf
)
3534 return sprintf(buf
, "%d\n", !!(s
->flags
& SLAB_POISON
));
3536 SLAB_ATTR_RO(poison
);
3538 static ssize_t
store_user_show(struct kmem_cache
*s
, char *buf
)
3540 return sprintf(buf
, "%d\n", !!(s
->flags
& SLAB_STORE_USER
));
3542 SLAB_ATTR_RO(store_user
);
3544 static ssize_t
hiwater_store(struct kmem_cache
*s
,
3545 const char *buf
, size_t length
)
3550 err
= strict_strtol(buf
, 10, &hiwater
);
3557 s
->hiwater
= hiwater
;
3562 static ssize_t
hiwater_show(struct kmem_cache
*s
, char *buf
)
3564 return sprintf(buf
, "%d\n", slab_hiwater(s
));
3568 static ssize_t
freebatch_store(struct kmem_cache
*s
,
3569 const char *buf
, size_t length
)
3574 err
= strict_strtol(buf
, 10, &freebatch
);
3578 if (freebatch
<= 0 || freebatch
- 1 > s
->hiwater
)
3581 s
->freebatch
= freebatch
;
3586 static ssize_t
freebatch_show(struct kmem_cache
*s
, char *buf
)
3588 return sprintf(buf
, "%d\n", slab_freebatch(s
));
3590 SLAB_ATTR(freebatch
);
3592 #ifdef CONFIG_SLQB_STATS
3593 static int show_stat(struct kmem_cache
*s
, char *buf
, enum stat_item si
)
3595 struct stats_gather stats
;
3601 gather_stats(s
, &stats
);
3603 len
= sprintf(buf
, "%lu", stats
.stats
[si
]);
3606 for_each_online_cpu(cpu
) {
3607 struct kmem_cache_cpu
*c
= get_cpu_slab(s
, cpu
);
3608 struct kmem_cache_list
*l
= &c
->list
;
3610 if (len
< PAGE_SIZE
- 20)
3611 len
+= sprintf(buf
+len
, " C%d=%lu", cpu
, l
->stats
[si
]);
3614 return len
+ sprintf(buf
+ len
, "\n");
3617 #define STAT_ATTR(si, text) \
3618 static ssize_t text##_show(struct kmem_cache *s, char *buf) \
3620 return show_stat(s, buf, si); \
3622 SLAB_ATTR_RO(text); \
3624 STAT_ATTR(ALLOC, alloc);
3625 STAT_ATTR(ALLOC_SLAB_FILL
, alloc_slab_fill
);
3626 STAT_ATTR(ALLOC_SLAB_NEW
, alloc_slab_new
);
3627 STAT_ATTR(FREE
, free
);
3628 STAT_ATTR(FREE_REMOTE
, free_remote
);
3629 STAT_ATTR(FLUSH_FREE_LIST
, flush_free_list
);
3630 STAT_ATTR(FLUSH_FREE_LIST_OBJECTS
, flush_free_list_objects
);
3631 STAT_ATTR(FLUSH_FREE_LIST_REMOTE
, flush_free_list_remote
);
3632 STAT_ATTR(FLUSH_SLAB_PARTIAL
, flush_slab_partial
);
3633 STAT_ATTR(FLUSH_SLAB_FREE
, flush_slab_free
);
3634 STAT_ATTR(FLUSH_RFREE_LIST
, flush_rfree_list
);
3635 STAT_ATTR(FLUSH_RFREE_LIST_OBJECTS
, flush_rfree_list_objects
);
3636 STAT_ATTR(CLAIM_REMOTE_LIST
, claim_remote_list
);
3637 STAT_ATTR(CLAIM_REMOTE_LIST_OBJECTS
, claim_remote_list_objects
);
3640 static struct attribute
*slab_attrs
[] = {
3641 &slab_size_attr
.attr
,
3642 &object_size_attr
.attr
,
3643 &objs_per_slab_attr
.attr
,
3646 &total_objects_attr
.attr
,
3650 &hwcache_align_attr
.attr
,
3651 &reclaim_account_attr
.attr
,
3652 &destroy_by_rcu_attr
.attr
,
3653 &red_zone_attr
.attr
,
3655 &store_user_attr
.attr
,
3657 &freebatch_attr
.attr
,
3658 #ifdef CONFIG_ZONE_DMA
3659 &cache_dma_attr
.attr
,
3661 #ifdef CONFIG_SLQB_STATS
3663 &alloc_slab_fill_attr
.attr
,
3664 &alloc_slab_new_attr
.attr
,
3666 &free_remote_attr
.attr
,
3667 &flush_free_list_attr
.attr
,
3668 &flush_free_list_objects_attr
.attr
,
3669 &flush_free_list_remote_attr
.attr
,
3670 &flush_slab_partial_attr
.attr
,
3671 &flush_slab_free_attr
.attr
,
3672 &flush_rfree_list_attr
.attr
,
3673 &flush_rfree_list_objects_attr
.attr
,
3674 &claim_remote_list_attr
.attr
,
3675 &claim_remote_list_objects_attr
.attr
,
3677 #ifdef CONFIG_FAILSLAB
3678 &failslab_attr
.attr
,
3684 static struct attribute_group slab_attr_group
= {
3685 .attrs
= slab_attrs
,
3688 static ssize_t
slab_attr_show(struct kobject
*kobj
,
3689 struct attribute
*attr
, char *buf
)
3691 struct slab_attribute
*attribute
;
3692 struct kmem_cache
*s
;
3695 attribute
= to_slab_attr(attr
);
3698 if (!attribute
->show
)
3701 err
= attribute
->show(s
, buf
);
3706 static ssize_t
slab_attr_store(struct kobject
*kobj
,
3707 struct attribute
*attr
, const char *buf
, size_t len
)
3709 struct slab_attribute
*attribute
;
3710 struct kmem_cache
*s
;
3713 attribute
= to_slab_attr(attr
);
3716 if (!attribute
->store
)
3719 err
= attribute
->store(s
, buf
, len
);
3724 static void kmem_cache_release(struct kobject
*kobj
)
3726 struct kmem_cache
*s
= to_slab(kobj
);
3728 kmem_cache_free(&kmem_cache_cache
, s
);
3731 static struct sysfs_ops slab_sysfs_ops
= {
3732 .show
= slab_attr_show
,
3733 .store
= slab_attr_store
,
3736 static struct kobj_type slab_ktype
= {
3737 .sysfs_ops
= &slab_sysfs_ops
,
3738 .release
= kmem_cache_release
3741 static int uevent_filter(struct kset
*kset
, struct kobject
*kobj
)
3743 struct kobj_type
*ktype
= get_ktype(kobj
);
3745 if (ktype
== &slab_ktype
)
3750 static struct kset_uevent_ops slab_uevent_ops
= {
3751 .filter
= uevent_filter
,
3754 static struct kset
*slab_kset
;
3756 static int sysfs_available __read_mostly
;
3758 static int sysfs_slab_add(struct kmem_cache
*s
)
3762 if (!sysfs_available
)
3765 s
->kobj
.kset
= slab_kset
;
3766 err
= kobject_init_and_add(&s
->kobj
, &slab_ktype
, NULL
, s
->name
);
3768 kobject_put(&s
->kobj
);
3772 err
= sysfs_create_group(&s
->kobj
, &slab_attr_group
);
3776 kobject_uevent(&s
->kobj
, KOBJ_ADD
);
3781 static void sysfs_slab_remove(struct kmem_cache
*s
)
3783 kobject_uevent(&s
->kobj
, KOBJ_REMOVE
);
3784 kobject_del(&s
->kobj
);
3785 kobject_put(&s
->kobj
);
3788 static int __init
slab_sysfs_init(void)
3790 struct kmem_cache
*s
;
3793 slab_kset
= kset_create_and_add("slab", &slab_uevent_ops
, kernel_kobj
);
3795 printk(KERN_ERR
"Cannot register slab subsystem.\n");
3799 down_write(&slqb_lock
);
3801 sysfs_available
= 1;
3803 list_for_each_entry(s
, &slab_caches
, list
) {
3804 err
= sysfs_slab_add(s
);
3806 printk(KERN_ERR
"SLQB: Unable to add boot slab %s"
3807 " to sysfs\n", s
->name
);
3810 up_write(&slqb_lock
);
3814 device_initcall(slab_sysfs_init
);