mm/slab.c

   1 /*
   2  * linux/mm/slab.c
   3  * Written by Mark Hemment, 1996/97.
   4  * (markhe@nextd.demon.co.uk)
   5  *
   6  * kmem_cache_destroy() + some cleanup - 1999 Andrea Arcangeli
   7  *
   8  * Major cleanup, different bufctl logic, per-cpu arrays
   9  *      (c) 2000 Manfred Spraul
  10  *
  11  * An implementation of the Slab Allocator as described in outline in;
  12  *      UNIX Internals: The New Frontiers by Uresh Vahalia
  13  *      Pub: Prentice Hall      ISBN 0-13-101908-2
  14  * or with a little more detail in;
  15  *      The Slab Allocator: An Object-Caching Kernel Memory Allocator
  16  *      Jeff Bonwick (Sun Microsystems).
  17  *      Presented at: USENIX Summer 1994 Technical Conference
  18  *
  19  *
  20  * The memory is organized in caches, one cache for each object type.
  21  * (e.g. inode_cache, dentry_cache, buffer_head, vm_area_struct)
  22  * Each cache consists out of many slabs (they are small (usually one
  23  * page long) and always contiguous), and each slab contains multiple
  24  * initialized objects.
  25  *
  26  * Each cache can only support one memory type (GFP_DMA, GFP_HIGHMEM,
  27  * normal). If you need a special memory type, then must create a new
  28  * cache for that memory type.
  29  *
  30  * In order to reduce fragmentation, the slabs are sorted in 3 groups:
  31  *   full slabs with 0 free objects
  32  *   partial slabs
  33  *   empty slabs with no allocated objects
  34  *
  35  * If partial slabs exist, then new allocations come from these slabs,
  36  * otherwise from empty slabs or new slabs are allocated.
  37  *
  38  * kmem_cache_destroy() CAN CRASH if you try to allocate from the cache
  39  * during kmem_cache_destroy(). The caller must prevent concurrent allocs.
  40  *
  41  * On SMP systems, each cache has a short per-cpu head array, most allocs
  42  * and frees go into that array, and if that array overflows, then 1/2
  43  * of the entries in the array are given back into the global cache.
  44  * This reduces the number of spinlock operations.
  45  *
  46  * The c_cpuarray may not be read with enabled local interrupts.
  47  *
  48  * SMP synchronization:
  49  *  constructors and destructors are called without any locking.
  50  *  Several members in kmem_cache_t and slab_t never change, they
  51  *      are accessed without any locking.
  52  *  The per-cpu arrays are never accessed from the wrong cpu, no locking.
  53  *  The non-constant members are protected with a per-cache irq spinlock.
  54  *
  55  * Further notes from the original documentation:
  56  *
  57  * 11 April '97.  Started multi-threading - markhe
  58  *      The global cache-chain is protected by the semaphore 'cache_chain_sem'.
  59  *      The sem is only needed when accessing/extending the cache-chain, which
  60  *      can never happen inside an interrupt (kmem_cache_create(),
  61  *      kmem_cache_shrink() and kmem_cache_reap()).
  62  *
  63  *      To prevent kmem_cache_shrink() trying to shrink a 'growing' cache (which
  64  *      maybe be sleeping and therefore not holding the semaphore/lock), the
  65  *      growing field is used.  This also prevents reaping from a cache.
  66  *
  67  *      At present, each engine can be growing a cache.  This should be blocked.
  68  *
  69  */
  70
  71 #include        <linux/config.h>
  72 #include        <linux/slab.h>
  73 #include        <linux/interrupt.h>
  74 #include        <linux/init.h>
  75 #include        <asm/uaccess.h>
  76
  77 /*
  78  * DEBUG        - 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
  79  *                SLAB_RED_ZONE & SLAB_POISON.
  80  *                0 for faster, smaller code (especially in the critical paths).
  81  *
  82  * STATS        - 1 to collect stats for /proc/slabinfo.
  83  *                0 for faster, smaller code (especially in the critical paths).
  84  *
  85  * FORCED_DEBUG - 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
  86  */
  87
  88 #define DEBUG           0
  89 #define STATS           0
  90 #define FORCED_DEBUG    0
  91
  92 /*
  93  * Parameters for kmem_cache_reap
  94  */
  95 #define REAP_SCANLEN    10
  96 #define REAP_PERFECT    10
  97
  98 /* Shouldn't this be in a header file somewhere? */
  99 #define BYTES_PER_WORD          sizeof(void *)
 100
 101 /* Legal flag mask for kmem_cache_create(). */
 102 #if DEBUG
 103 # define CREATE_MASK    (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \
 104                          SLAB_POISON | SLAB_HWCACHE_ALIGN | \
 105                          SLAB_NO_REAP | SLAB_CACHE_DMA)
 106 #else
 107 # define CREATE_MASK    (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | SLAB_CACHE_DMA)
 108 #endif
 109
 110 /*
 111  * kmem_bufctl_t:
 112  *
 113  * Bufctl's are used for linking objs within a slab
 114  * linked offsets.
 115  *
 116  * This implementaion relies on "struct page" for locating the cache &
 117  * slab an object belongs to.
 118  * This allows the bufctl structure to be small (one int), but limits
 119  * the number of objects a slab (not a cache) can contain when off-slab
 120  * bufctls are used. The limit is the size of the largest general cache
 121  * that does not use off-slab slabs.
 122  * For 32bit archs with 4 kB pages, is this 56.
 123  * This is not serious, as it is only for large objects, when it is unwise
 124  * to have too many per slab.
 125  * Note: This limit can be raised by introducing a general cache whose size
 126  * is less than 512 (PAGE_SIZE<<3), but greater than 256.
 127  */
 128
 129 #define BUFCTL_END 0xffffFFFF
 130 #define SLAB_LIMIT 0xffffFFFE
 131 typedef unsigned int kmem_bufctl_t;
 132
 133 /* Max number of objs-per-slab for caches which use off-slab slabs.
 134  * Needed to avoid a possible looping condition in kmem_cache_grow().
 135  */
 136 static unsigned long offslab_limit;
 137
 138 /*
 139  * slab_t
 140  *
 141  * Manages the objs in a slab. Placed either at the beginning of mem allocated
 142  * for a slab, or allocated from an general cache.
 143  * Slabs are chained into one ordered list: fully used, partial, then fully
 144  * free slabs.
 145  */
 146 typedef struct slab_s {
 147         struct list_head        list;
 148         unsigned long           colouroff;
 149         void                    *s_mem;         /* including colour offset */
 150         unsigned int            inuse;          /* num of objs active in slab */
 151         kmem_bufctl_t           free;
 152 } slab_t;
 153
 154 #define slab_bufctl(slabp) \
 155         ((kmem_bufctl_t *)(((slab_t*)slabp)+1))
 156
 157 /*
 158  * cpucache_t
 159  *
 160  * Per cpu structures
 161  * The limit is stored in the per-cpu structure to reduce the data cache
 162  * footprint.
 163  */
 164 typedef struct cpucache_s {
 165         unsigned int avail;
 166         unsigned int limit;
 167 } cpucache_t;
 168
 169 #define cc_entry(cpucache) \
 170         ((void **)(((cpucache_t*)cpucache)+1))
 171 #define cc_data(cachep) \
 172         ((cachep)->cpudata[smp_processor_id()])
 173 /*
 174  * kmem_cache_t
 175  *
 176  * manages a cache.
 177  */
 178
 179 #define CACHE_NAMELEN   20      /* max name length for a slab cache */
 180
 181 struct kmem_cache_s {
 182 /* 1) each alloc & free */
 183         /* full, partial first, then free */
 184         struct list_head        slabs;
 185         struct list_head        *firstnotfull;
 186         unsigned int            objsize;
 187         unsigned int            flags;  /* constant flags */
 188         unsigned int            num;    /* # of objs per slab */
 189         spinlock_t              spinlock;
 190 #ifdef CONFIG_SMP
 191         unsigned int            batchcount;
 192 #endif
 193
 194 /* 2) slab additions /removals */
 195         /* order of pgs per slab (2^n) */
 196         unsigned int            gfporder;
 197
 198         /* force GFP flags, e.g. GFP_DMA */
 199         unsigned int            gfpflags;
 200
 201         size_t                  colour;         /* cache colouring range */
 202         unsigned int            colour_off;     /* colour offset */
 203         unsigned int            colour_next;    /* cache colouring */
 204         kmem_cache_t            *slabp_cache;
 205         unsigned int            growing;
 206         unsigned int            dflags;         /* dynamic flags */
 207
 208         /* constructor func */
 209         void (*ctor)(void *, kmem_cache_t *, unsigned long);
 210
 211         /* de-constructor func */
 212         void (*dtor)(void *, kmem_cache_t *, unsigned long);
 213
 214         unsigned long           failures;
 215
 216 /* 3) cache creation/removal */
 217         char                    name[CACHE_NAMELEN];
 218         struct list_head        next;
 219 #ifdef CONFIG_SMP
 220 /* 4) per-cpu data */
 221         cpucache_t              *cpudata[NR_CPUS];
 222 #endif
 223 #if STATS
 224         unsigned long           num_active;
 225         unsigned long           num_allocations;
 226         unsigned long           high_mark;
 227         unsigned long           grown;
 228         unsigned long           reaped;
 229         unsigned long           errors;
 230 #ifdef CONFIG_SMP
 231         atomic_t                allochit;
 232         atomic_t                allocmiss;
 233         atomic_t                freehit;
 234         atomic_t                freemiss;
 235 #endif
 236 #endif
 237 };
 238
 239 /* internal c_flags */
 240 #define CFLGS_OFF_SLAB  0x010000UL      /* slab management in own cache */
 241 #define CFLGS_OPTIMIZE  0x020000UL      /* optimized slab lookup */
 242
 243 /* c_dflags (dynamic flags). Need to hold the spinlock to access this member */
 244 #define DFLGS_GROWN     0x000001UL      /* don't reap a recently grown */
 245
 246 #define OFF_SLAB(x)     ((x)->flags & CFLGS_OFF_SLAB)
 247 #define OPTIMIZE(x)     ((x)->flags & CFLGS_OPTIMIZE)
 248 #define GROWN(x)        ((x)->dlags & DFLGS_GROWN)
 249
 250 #if STATS
 251 #define STATS_INC_ACTIVE(x)     ((x)->num_active++)
 252 #define STATS_DEC_ACTIVE(x)     ((x)->num_active--)
 253 #define STATS_INC_ALLOCED(x)    ((x)->num_allocations++)
 254 #define STATS_INC_GROWN(x)      ((x)->grown++)
 255 #define STATS_INC_REAPED(x)     ((x)->reaped++)
 256 #define STATS_SET_HIGH(x)       do { if ((x)->num_active > (x)->high_mark) \
 257                                         (x)->high_mark = (x)->num_active; \
 258                                 } while (0)
 259 #define STATS_INC_ERR(x)        ((x)->errors++)
 260 #else
 261 #define STATS_INC_ACTIVE(x)     do { } while (0)
 262 #define STATS_DEC_ACTIVE(x)     do { } while (0)
 263 #define STATS_INC_ALLOCED(x)    do { } while (0)
 264 #define STATS_INC_GROWN(x)      do { } while (0)
 265 #define STATS_INC_REAPED(x)     do { } while (0)
 266 #define STATS_SET_HIGH(x)       do { } while (0)
 267 #define STATS_INC_ERR(x)        do { } while (0)
 268 #endif
 269
 270 #if STATS && defined(CONFIG_SMP)
 271 #define STATS_INC_ALLOCHIT(x)   atomic_inc(&(x)->allochit)
 272 #define STATS_INC_ALLOCMISS(x)  atomic_inc(&(x)->allocmiss)
 273 #define STATS_INC_FREEHIT(x)    atomic_inc(&(x)->freehit)
 274 #define STATS_INC_FREEMISS(x)   atomic_inc(&(x)->freemiss)
 275 #else
 276 #define STATS_INC_ALLOCHIT(x)   do { } while (0)
 277 #define STATS_INC_ALLOCMISS(x)  do { } while (0)
 278 #define STATS_INC_FREEHIT(x)    do { } while (0)
 279 #define STATS_INC_FREEMISS(x)   do { } while (0)
 280 #endif
 281
 282 #if DEBUG
 283 /* Magic nums for obj red zoning.
 284  * Placed in the first word before and the first word after an obj.
 285  */
 286 #define RED_MAGIC1      0x5A2CF071UL    /* when obj is active */
 287 #define RED_MAGIC2      0x170FC2A5UL    /* when obj is inactive */
 288
 289 /* ...and for poisoning */
 290 #define POISON_BYTE     0x5a            /* byte value for poisoning */
 291 #define POISON_END      0xa5            /* end-byte of poisoning */
 292
 293 #endif
 294
 295 /* maximum size of an obj (in 2^order pages) */
 296 #define MAX_OBJ_ORDER   5       /* 32 pages */
 297
 298 /*
 299  * Do not go above this order unless 0 objects fit into the slab.
 300  */
 301 #define BREAK_GFP_ORDER_HI      2
 302 #define BREAK_GFP_ORDER_LO      1
 303 static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
 304
 305 /*
 306  * Absolute limit for the gfp order
 307  */
 308 #define MAX_GFP_ORDER   5       /* 32 pages */
 309
 310
 311 /* Macros for storing/retrieving the cachep and or slab from the
 312  * global 'mem_map'. These are used to find the slab an obj belongs to.
 313  * With kfree(), these are used to find the cache which an obj belongs to.
 314  */
 315 #define SET_PAGE_CACHE(pg,x)  ((pg)->list.next = (struct list_head *)(x))
 316 #define GET_PAGE_CACHE(pg)    ((kmem_cache_t *)(pg)->list.next)
 317 #define SET_PAGE_SLAB(pg,x)   ((pg)->list.prev = (struct list_head *)(x))
 318 #define GET_PAGE_SLAB(pg)     ((slab_t *)(pg)->list.prev)
 319
 320 /* Size description struct for general caches. */
 321 typedef struct cache_sizes {
 322         size_t           cs_size;
 323         kmem_cache_t    *cs_cachep;
 324         kmem_cache_t    *cs_dmacachep;
 325 } cache_sizes_t;
 326
 327 static cache_sizes_t cache_sizes[] = {
 328 #if PAGE_SIZE == 4096
 329         {    32,        NULL, NULL},
 330 #endif
 331         {    64,        NULL, NULL},
 332         {   128,        NULL, NULL},
 333         {   256,        NULL, NULL},
 334         {   512,        NULL, NULL},
 335         {  1024,        NULL, NULL},
 336         {  2048,        NULL, NULL},
 337         {  4096,        NULL, NULL},
 338         {  8192,        NULL, NULL},
 339         { 16384,        NULL, NULL},
 340         { 32768,        NULL, NULL},
 341         { 65536,        NULL, NULL},
 342         {131072,        NULL, NULL},
 343         {     0,        NULL, NULL}
 344 };
 345
 346 /* internal cache of cache description objs */
 347 static kmem_cache_t cache_cache = {
 348         slabs:          LIST_HEAD_INIT(cache_cache.slabs),
 349         firstnotfull:   &cache_cache.slabs,
 350         objsize:        sizeof(kmem_cache_t),
 351         flags:          SLAB_NO_REAP,
 352         spinlock:       SPIN_LOCK_UNLOCKED,
 353         colour_off:     L1_CACHE_BYTES,
 354         name:           "kmem_cache",
 355 };
 356
 357 /* Guard access to the cache-chain. */
 358 static struct semaphore cache_chain_sem;
 359
 360 /* Place maintainer for reaping. */
 361 static kmem_cache_t *clock_searchp = &cache_cache;
 362
 363 #define cache_chain (cache_cache.next)
 364
 365 #ifdef CONFIG_SMP
 366 /*
 367  * chicken and egg problem: delay the per-cpu array allocation
 368  * until the general caches are up.
 369  */
 370 static int g_cpucache_up;
 371
 372 static void enable_cpucache (kmem_cache_t *cachep);
 373 static void enable_all_cpucaches (void);
 374 #endif
 375
 376 /* Cal the num objs, wastage, and bytes left over for a given slab size. */
 377 static void kmem_cache_estimate (unsigned long gfporder, size_t size,
 378                  int flags, size_t *left_over, unsigned int *num)
 379 {
 380         int i;
 381         size_t wastage = PAGE_SIZE<<gfporder;
 382         size_t extra = 0;
 383         size_t base = 0;
 384
 385         if (!(flags & CFLGS_OFF_SLAB)) {
 386                 base = sizeof(slab_t);
 387                 extra = sizeof(kmem_bufctl_t);
 388         }
 389         i = 0;
 390         while (i*size + L1_CACHE_ALIGN(base+i*extra) <= wastage)
 391                 i++;
 392         if (i > 0)
 393                 i--;
 394
 395         if (i > SLAB_LIMIT)
 396                 i = SLAB_LIMIT;
 397
 398         *num = i;
 399         wastage -= i*size;
 400         wastage -= L1_CACHE_ALIGN(base+i*extra);
 401         *left_over = wastage;
 402 }
 403
 404 /* Initialisation - setup the `cache' cache. */
 405 void __init kmem_cache_init(void)
 406 {
 407         size_t left_over;
 408
 409         init_MUTEX(&cache_chain_sem);
 410         INIT_LIST_HEAD(&cache_chain);
 411
 412         kmem_cache_estimate(0, cache_cache.objsize, 0,
 413                         &left_over, &cache_cache.num);
 414         if (!cache_cache.num)
 415                 BUG();
 416
 417         cache_cache.colour = left_over/cache_cache.colour_off;
 418         cache_cache.colour_next = 0;
 419 }
 420
 421
 422 /* Initialisation - setup remaining internal and general caches.
 423  * Called after the gfp() functions have been enabled, and before smp_init().
 424  */
 425 void __init kmem_cache_sizes_init(void)
 426 {
 427         cache_sizes_t *sizes = cache_sizes;
 428         char name[20];
 429         /*
 430          * Fragmentation resistance on low memory - only use bigger
 431          * page orders on machines with more than 32MB of memory.
 432          */
 433         if (num_physpages > (32 << 20) >> PAGE_SHIFT)
 434                 slab_break_gfp_order = BREAK_GFP_ORDER_HI;
 435         do {
 436                 /* For performance, all the general caches are L1 aligned.
 437                  * This should be particularly beneficial on SMP boxes, as it
 438                  * eliminates "false sharing".
 439                  * Note for systems short on memory removing the alignment will
 440                  * allow tighter packing of the smaller caches. */
 441                 sprintf(name,"size-%Zd",sizes->cs_size);
 442                 if (!(sizes->cs_cachep =
 443                         kmem_cache_create(name, sizes->cs_size,
 444                                         0, SLAB_HWCACHE_ALIGN, NULL, NULL))) {
 445                         BUG();
 446                 }
 447
 448                 /* Inc off-slab bufctl limit until the ceiling is hit. */
 449                 if (!(OFF_SLAB(sizes->cs_cachep))) {
 450                         offslab_limit = sizes->cs_size-sizeof(slab_t);
 451                         offslab_limit /= 2;
 452                 }
 453                 sprintf(name, "size-%Zd(DMA)",sizes->cs_size);
 454                 sizes->cs_dmacachep = kmem_cache_create(name, sizes->cs_size, 0,
 455                               SLAB_CACHE_DMA|SLAB_HWCACHE_ALIGN, NULL, NULL);
 456                 if (!sizes->cs_dmacachep)
 457                         BUG();
 458                 sizes++;
 459         } while (sizes->cs_size);
 460 }
 461
 462 int __init kmem_cpucache_init(void)
 463 {
 464 #ifdef CONFIG_SMP
 465         g_cpucache_up = 1;
 466         enable_all_cpucaches();
 467 #endif
 468         return 0;
 469 }
 470
 471 __initcall(kmem_cpucache_init);
 472
 473 /* Interface to system's page allocator. No need to hold the cache-lock.
 474  */
 475 static inline void * kmem_getpages (kmem_cache_t *cachep, unsigned long flags)
 476 {
 477         void    *addr;
 478
 479         /*
 480          * If we requested dmaable memory, we will get it. Even if we
 481          * did not request dmaable memory, we might get it, but that
 482          * would be relatively rare and ignorable.
 483          */
 484         flags |= cachep->gfpflags;
 485         addr = (void*) __get_free_pages(flags, cachep->gfporder);
 486         /* Assume that now we have the pages no one else can legally
 487          * messes with the 'struct page's.
 488          * However vm_scan() might try to test the structure to see if
 489          * it is a named-page or buffer-page.  The members it tests are
 490          * of no interest here.....
 491          */
 492         return addr;
 493 }
 494
 495 /* Interface to system's page release. */
 496 static inline void kmem_freepages (kmem_cache_t *cachep, void *addr)
 497 {
 498         unsigned long i = (1<<cachep->gfporder);
 499         struct page *page = virt_to_page(addr);
 500
 501         /* free_pages() does not clear the type bit - we do that.
 502          * The pages have been unlinked from their cache-slab,
 503          * but their 'struct page's might be accessed in
 504          * vm_scan(). Shouldn't be a worry.
 505          */
 506         while (i--) {
 507                 PageClearSlab(page);
 508                 page++;
 509         }
 510         free_pages((unsigned long)addr, cachep->gfporder);
 511 }
 512
 513 #if DEBUG
 514 static inline void kmem_poison_obj (kmem_cache_t *cachep, void *addr)
 515 {
 516         int size = cachep->objsize;
 517         if (cachep->flags & SLAB_RED_ZONE) {
 518                 addr += BYTES_PER_WORD;
 519                 size -= 2*BYTES_PER_WORD;
 520         }
 521         memset(addr, POISON_BYTE, size);
 522         *(unsigned char *)(addr+size-1) = POISON_END;
 523 }
 524
 525 static inline int kmem_check_poison_obj (kmem_cache_t *cachep, void *addr)
 526 {
 527         int size = cachep->objsize;
 528         void *end;
 529         if (cachep->flags & SLAB_RED_ZONE) {
 530                 addr += BYTES_PER_WORD;
 531                 size -= 2*BYTES_PER_WORD;
 532         }
 533         end = memchr(addr, POISON_END, size);
 534         if (end != (addr+size-1))
 535                 return 1;
 536         return 0;
 537 }
 538 #endif
 539
 540 /* Destroy all the objs in a slab, and release the mem back to the system.
 541  * Before calling the slab must have been unlinked from the cache.
 542  * The cache-lock is not held/needed.
 543  */
 544 static void kmem_slab_destroy (kmem_cache_t *cachep, slab_t *slabp)
 545 {
 546         if (cachep->dtor
 547 #if DEBUG
 548                 || cachep->flags & (SLAB_POISON | SLAB_RED_ZONE)
 549 #endif
 550         ) {
 551                 int i;
 552                 for (i = 0; i < cachep->num; i++) {
 553                         void* objp = slabp->s_mem+cachep->objsize*i;
 554 #if DEBUG
 555                         if (cachep->flags & SLAB_RED_ZONE) {
 556                                 if (*((unsigned long*)(objp)) != RED_MAGIC1)
 557                                         BUG();
 558                                 if (*((unsigned long*)(objp + cachep->objsize
 559                                                 -BYTES_PER_WORD)) != RED_MAGIC1)
 560                                         BUG();
 561                                 objp += BYTES_PER_WORD;
 562                         }
 563 #endif
 564                         if (cachep->dtor)
 565                                 (cachep->dtor)(objp, cachep, 0);
 566 #if DEBUG
 567                         if (cachep->flags & SLAB_RED_ZONE) {
 568                                 objp -= BYTES_PER_WORD;
 569                         }
 570                         if ((cachep->flags & SLAB_POISON)  &&
 571                                 kmem_check_poison_obj(cachep, objp))
 572                                 BUG();
 573 #endif
 574                 }
 575         }
 576
 577         kmem_freepages(cachep, slabp->s_mem-slabp->colouroff);
 578         if (OFF_SLAB(cachep))
 579                 kmem_cache_free(cachep->slabp_cache, slabp);
 580 }
 581
 582 /**
 583  * kmem_cache_create - Create a cache.
 584  * @name: A string which is used in /proc/slabinfo to identify this cache.
 585  * @size: The size of objects to be created in this cache.
 586  * @offset: The offset to use within the page.
 587  * @flags: SLAB flags
 588  * @ctor: A constructor for the objects.
 589  * @dtor: A destructor for the objects.
 590  *
 591  * Returns a ptr to the cache on success, NULL on failure.
 592  * Cannot be called within a int, but can be interrupted.
 593  * The @ctor is run when new pages are allocated by the cache
 594  * and the @dtor is run before the pages are handed back.
 595  * The flags are
 596  *
 597  * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
 598  * to catch references to uninitialised memory.
 599  *
 600  * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
 601  * for buffer overruns.
 602  *
 603  * %SLAB_NO_REAP - Don't automatically reap this cache when we're under
 604  * memory pressure.
 605  *
 606  * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
 607  * cacheline.  This can be beneficial if you're counting cycles as closely
 608  * as davem.
 609  */
 610 kmem_cache_t *
 611 kmem_cache_create (const char *name, size_t size, size_t offset,
 612         unsigned long flags, void (*ctor)(void*, kmem_cache_t *, unsigned long),
 613         void (*dtor)(void*, kmem_cache_t *, unsigned long))
 614 {
 615         const char *func_nm = KERN_ERR "kmem_create: ";
 616         size_t left_over, align, slab_size;
 617         kmem_cache_t *cachep = NULL;
 618
 619         /*
 620          * Sanity checks... these are all serious usage bugs.
 621          */
 622         if ((!name) ||
 623                 ((strlen(name) >= CACHE_NAMELEN - 1)) ||
 624                 in_interrupt() ||
 625                 (size < BYTES_PER_WORD) ||
 626                 (size > (1<<MAX_OBJ_ORDER)*PAGE_SIZE) ||
 627                 (dtor && !ctor) ||
 628                 (offset < 0 || offset > size))
 629                         BUG();
 630
 631 #if DEBUG
 632         if ((flags & SLAB_DEBUG_INITIAL) && !ctor) {
 633                 /* No constructor, but inital state check requested */
 634                 printk("%sNo con, but init state check requested - %s\n", func_nm, name);
 635                 flags &= ~SLAB_DEBUG_INITIAL;
 636         }
 637
 638         if ((flags & SLAB_POISON) && ctor) {
 639                 /* request for poisoning, but we can't do that with a constructor */
 640                 printk("%sPoisoning requested, but con given - %s\n", func_nm, name);
 641                 flags &= ~SLAB_POISON;
 642         }
 643 #if FORCED_DEBUG
 644         if (size < (PAGE_SIZE>>3))
 645                 /*
 646                  * do not red zone large object, causes severe
 647                  * fragmentation.
 648                  */
 649                 flags |= SLAB_RED_ZONE;
 650         if (!ctor)
 651                 flags |= SLAB_POISON;
 652 #endif
 653 #endif
 654
 655         /*
 656          * Always checks flags, a caller might be expecting debug
 657          * support which isn't available.
 658          */
 659         if (flags & ~CREATE_MASK)
 660                 BUG();
 661
 662         /* Get cache's description obj. */
 663         cachep = (kmem_cache_t *) kmem_cache_alloc(&cache_cache, SLAB_KERNEL);
 664         if (!cachep)
 665                 goto opps;
 666         memset(cachep, 0, sizeof(kmem_cache_t));
 667
 668         /* Check that size is in terms of words.  This is needed to avoid
 669          * unaligned accesses for some archs when redzoning is used, and makes
 670          * sure any on-slab bufctl's are also correctly aligned.
 671          */
 672         if (size & (BYTES_PER_WORD-1)) {
 673                 size += (BYTES_PER_WORD-1);
 674                 size &= ~(BYTES_PER_WORD-1);
 675                 printk("%sForcing size word alignment - %s\n", func_nm, name);
 676         }
 677
 678 #if DEBUG
 679         if (flags & SLAB_RED_ZONE) {
 680                 /*
 681                  * There is no point trying to honour cache alignment
 682                  * when redzoning.
 683                  */
 684                 flags &= ~SLAB_HWCACHE_ALIGN;
 685                 size += 2*BYTES_PER_WORD;       /* words for redzone */
 686         }
 687 #endif
 688         align = BYTES_PER_WORD;
 689         if (flags & SLAB_HWCACHE_ALIGN)
 690                 align = L1_CACHE_BYTES;
 691
 692         /* Determine if the slab management is 'on' or 'off' slab. */
 693         if (size >= (PAGE_SIZE>>3))
 694                 /*
 695                  * Size is large, assume best to place the slab management obj
 696                  * off-slab (should allow better packing of objs).
 697                  */
 698                 flags |= CFLGS_OFF_SLAB;
 699
 700         if (flags & SLAB_HWCACHE_ALIGN) {
 701                 /* Need to adjust size so that objs are cache aligned. */
 702                 /* Small obj size, can get at least two per cache line. */
 703                 /* FIXME: only power of 2 supported, was better */
 704                 while (size < align/2)
 705                         align /= 2;
 706                 size = (size+align-1)&(~(align-1));
 707         }
 708
 709         /* Cal size (in pages) of slabs, and the num of objs per slab.
 710          * This could be made much more intelligent.  For now, try to avoid
 711          * using high page-orders for slabs.  When the gfp() funcs are more
 712          * friendly towards high-order requests, this should be changed.
 713          */
 714         do {
 715                 unsigned int break_flag = 0;
 716 cal_wastage:
 717                 kmem_cache_estimate(cachep->gfporder, size, flags,
 718                                                 &left_over, &cachep->num);
 719                 if (break_flag)
 720                         break;
 721                 if (cachep->gfporder >= MAX_GFP_ORDER)
 722                         break;
 723                 if (!cachep->num)
 724                         goto next;
 725                 if (flags & CFLGS_OFF_SLAB && cachep->num > offslab_limit) {
 726                         /* Oops, this num of objs will cause problems. */
 727                         cachep->gfporder--;
 728                         break_flag++;
 729                         goto cal_wastage;
 730                 }
 731
 732                 /*
 733                  * Large num of objs is good, but v. large slabs are currently
 734                  * bad for the gfp()s.
 735                  */
 736                 if (cachep->gfporder >= slab_break_gfp_order)
 737                         break;
 738
 739                 if ((left_over*8) <= (PAGE_SIZE<<cachep->gfporder))
 740                         break;  /* Acceptable internal fragmentation. */
 741 next:
 742                 cachep->gfporder++;
 743         } while (1);
 744
 745         if (!cachep->num) {
 746                 printk("kmem_cache_create: couldn't create cache %s.\n", name);
 747                 kmem_cache_free(&cache_cache, cachep);
 748                 cachep = NULL;
 749                 goto opps;
 750         }
 751         slab_size = L1_CACHE_ALIGN(cachep->num*sizeof(kmem_bufctl_t)+sizeof(slab_t));
 752
 753         /*
 754          * If the slab has been placed off-slab, and we have enough space then
 755          * move it on-slab. This is at the expense of any extra colouring.
 756          */
 757         if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
 758                 flags &= ~CFLGS_OFF_SLAB;
 759                 left_over -= slab_size;
 760         }
 761
 762         /* Offset must be a multiple of the alignment. */
 763         offset += (align-1);
 764         offset &= ~(align-1);
 765         if (!offset)
 766                 offset = L1_CACHE_BYTES;
 767         cachep->colour_off = offset;
 768         cachep->colour = left_over/offset;
 769
 770         /* init remaining fields */
 771         if (!cachep->gfporder && !(flags & CFLGS_OFF_SLAB))
 772                 flags |= CFLGS_OPTIMIZE;
 773
 774         cachep->flags = flags;
 775         cachep->gfpflags = 0;
 776         if (flags & SLAB_CACHE_DMA)
 777                 cachep->gfpflags |= GFP_DMA;
 778         spin_lock_init(&cachep->spinlock);
 779         cachep->objsize = size;
 780         INIT_LIST_HEAD(&cachep->slabs);
 781         cachep->firstnotfull = &cachep->slabs;
 782
 783         if (flags & CFLGS_OFF_SLAB)
 784                 cachep->slabp_cache = kmem_find_general_cachep(slab_size,0);
 785         cachep->ctor = ctor;
 786         cachep->dtor = dtor;
 787         /* Copy name over so we don't have problems with unloaded modules */
 788         strcpy(cachep->name, name);
 789
 790 #ifdef CONFIG_SMP
 791         if (g_cpucache_up)
 792                 enable_cpucache(cachep);
 793 #endif
 794         /* Need the semaphore to access the chain. */
 795         down(&cache_chain_sem);
 796         {
 797                 struct list_head *p;
 798
 799                 list_for_each(p, &cache_chain) {
 800                         kmem_cache_t *pc = list_entry(p, kmem_cache_t, next);
 801
 802                         /* The name field is constant - no lock needed. */
 803                         if (!strcmp(pc->name, name))
 804                                 BUG();
 805                 }
 806         }
 807
 808         /* There is no reason to lock our new cache before we
 809          * link it in - no one knows about it yet...
 810          */
 811         list_add(&cachep->next, &cache_chain);
 812         up(&cache_chain_sem);
 813 opps:
 814         return cachep;
 815 }
 816
 817 /*
 818  * This check if the kmem_cache_t pointer is chained in the cache_cache
 819  * list. -arca
 820  */
 821 static int is_chained_kmem_cache(kmem_cache_t * cachep)
 822 {
 823         struct list_head *p;
 824         int ret = 0;
 825
 826         /* Find the cache in the chain of caches. */
 827         down(&cache_chain_sem);
 828         list_for_each(p, &cache_chain) {
 829                 if (p == &cachep->next) {
 830                         ret = 1;
 831                         break;
 832                 }
 833         }
 834         up(&cache_chain_sem);
 835
 836         return ret;
 837 }
 838
 839 #ifdef CONFIG_SMP
 840 /*
 841  * Waits for all CPUs to execute func().
 842  */
 843 static void smp_call_function_all_cpus(void (*func) (void *arg), void *arg)
 844 {
 845         local_irq_disable();
 846         func(arg);
 847         local_irq_enable();
 848
 849         if (smp_call_function(func, arg, 1, 1))
 850                 BUG();
 851 }
 852 typedef struct ccupdate_struct_s
 853 {
 854         kmem_cache_t *cachep;
 855         cpucache_t *new[NR_CPUS];
 856 } ccupdate_struct_t;
 857
 858 static void do_ccupdate_local(void *info)
 859 {
 860         ccupdate_struct_t *new = (ccupdate_struct_t *)info;
 861         cpucache_t *old = cc_data(new->cachep);
 862
 863         cc_data(new->cachep) = new->new[smp_processor_id()];
 864         new->new[smp_processor_id()] = old;
 865 }
 866
 867 static void free_block (kmem_cache_t* cachep, void** objpp, int len);
 868
 869 static void drain_cpu_caches(kmem_cache_t *cachep)
 870 {
 871         ccupdate_struct_t new;
 872         int i;
 873
 874         memset(&new.new,0,sizeof(new.new));
 875
 876         new.cachep = cachep;
 877
 878         down(&cache_chain_sem);
 879         smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
 880
 881         for (i = 0; i < smp_num_cpus; i++) {
 882                 cpucache_t* ccold = new.new[cpu_logical_map(i)];
 883                 if (!ccold || (ccold->avail == 0))
 884                         continue;
 885                 local_irq_disable();
 886                 free_block(cachep, cc_entry(ccold), ccold->avail);
 887                 local_irq_enable();
 888                 ccold->avail = 0;
 889         }
 890         smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
 891         up(&cache_chain_sem);
 892 }
 893
 894 #else
 895 #define drain_cpu_caches(cachep)        do { } while (0)
 896 #endif
 897
 898 static int __kmem_cache_shrink(kmem_cache_t *cachep)
 899 {
 900         slab_t *slabp;
 901         int ret;
 902
 903         drain_cpu_caches(cachep);
 904
 905         spin_lock_irq(&cachep->spinlock);
 906
 907         /* If the cache is growing, stop shrinking. */
 908         while (!cachep->growing) {
 909                 struct list_head *p;
 910
 911                 p = cachep->slabs.prev;
 912                 if (p == &cachep->slabs)
 913                         break;
 914
 915                 slabp = list_entry(cachep->slabs.prev, slab_t, list);
 916                 if (slabp->inuse)
 917                         break;
 918
 919                 list_del(&slabp->list);
 920                 if (cachep->firstnotfull == &slabp->list)
 921                         cachep->firstnotfull = &cachep->slabs;
 922
 923                 spin_unlock_irq(&cachep->spinlock);
 924                 kmem_slab_destroy(cachep, slabp);
 925                 spin_lock_irq(&cachep->spinlock);
 926         }
 927         ret = !list_empty(&cachep->slabs);
 928         spin_unlock_irq(&cachep->spinlock);
 929         return ret;
 930 }
 931
 932 /**
 933  * kmem_cache_shrink - Shrink a cache.
 934  * @cachep: The cache to shrink.
 935  *
 936  * Releases as many slabs as possible for a cache.
 937  * To help debugging, a zero exit status indicates all slabs were released.
 938  */
 939 int kmem_cache_shrink(kmem_cache_t *cachep)
 940 {
 941         if (!cachep || in_interrupt() || !is_chained_kmem_cache(cachep))
 942                 BUG();
 943
 944         return __kmem_cache_shrink(cachep);
 945 }
 946
 947 /**
 948  * kmem_cache_destroy - delete a cache
 949  * @cachep: the cache to destroy
 950  *
 951  * Remove a kmem_cache_t object from the slab cache.
 952  * Returns 0 on success.
 953  *
 954  * It is expected this function will be called by a module when it is
 955  * unloaded.  This will remove the cache completely, and avoid a duplicate
 956  * cache being allocated each time a module is loaded and unloaded, if the
 957  * module doesn't have persistent in-kernel storage across loads and unloads.
 958  *
 959  * The caller must guarantee that noone will allocate memory from the cache
 960  * during the kmem_cache_destroy().
 961  */
 962 int kmem_cache_destroy (kmem_cache_t * cachep)
 963 {
 964         if (!cachep || in_interrupt() || cachep->growing)
 965                 BUG();
 966
 967         /* Find the cache in the chain of caches. */
 968         down(&cache_chain_sem);
 969         /* the chain is never empty, cache_cache is never destroyed */
 970         if (clock_searchp == cachep)
 971                 clock_searchp = list_entry(cachep->next.next,
 972                                                 kmem_cache_t, next);
 973         list_del(&cachep->next);
 974         up(&cache_chain_sem);
 975
 976         if (__kmem_cache_shrink(cachep)) {
 977                 printk(KERN_ERR "kmem_cache_destroy: Can't free all objects %p\n",
 978                        cachep);
 979                 down(&cache_chain_sem);
 980                 list_add(&cachep->next,&cache_chain);
 981                 up(&cache_chain_sem);
 982                 return 1;
 983         }
 984 #ifdef CONFIG_SMP
 985         {
 986                 int i;
 987                 for (i = 0; i < NR_CPUS; i++)
 988                         kfree(cachep->cpudata[i]);
 989         }
 990 #endif
 991         kmem_cache_free(&cache_cache, cachep);
 992
 993         return 0;
 994 }
 995
 996 /* Get the memory for a slab management obj. */
 997 static inline slab_t * kmem_cache_slabmgmt (kmem_cache_t *cachep,
 998                         void *objp, int colour_off, int local_flags)
 999 {
1000         slab_t *slabp;
1001
1002         if (OFF_SLAB(cachep)) {
1003                 /* Slab management obj is off-slab. */
1004                 slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags);
1005                 if (!slabp)
1006                         return NULL;
1007         } else {
1008                 /* FIXME: change to
1009                         slabp = objp
1010                  * if you enable OPTIMIZE
1011                  */
1012                 slabp = objp+colour_off;
1013                 colour_off += L1_CACHE_ALIGN(cachep->num *
1014                                 sizeof(kmem_bufctl_t) + sizeof(slab_t));
1015         }
1016         slabp->inuse = 0;
1017         slabp->colouroff = colour_off;
1018         slabp->s_mem = objp+colour_off;
1019
1020         return slabp;
1021 }
1022
1023 static inline void kmem_cache_init_objs (kmem_cache_t * cachep,
1024                         slab_t * slabp, unsigned long ctor_flags)
1025 {
1026         int i;
1027
1028         for (i = 0; i < cachep->num; i++) {
1029                 void* objp = slabp->s_mem+cachep->objsize*i;
1030 #if DEBUG
1031                 if (cachep->flags & SLAB_RED_ZONE) {
1032                         *((unsigned long*)(objp)) = RED_MAGIC1;
1033                         *((unsigned long*)(objp + cachep->objsize -
1034                                         BYTES_PER_WORD)) = RED_MAGIC1;
1035                         objp += BYTES_PER_WORD;
1036                 }
1037 #endif
1038
1039                 /*
1040                  * Constructors are not allowed to allocate memory from
1041                  * the same cache which they are a constructor for.
1042                  * Otherwise, deadlock. They must also be threaded.
1043                  */
1044                 if (cachep->ctor)
1045                         cachep->ctor(objp, cachep, ctor_flags);
1046 #if DEBUG
1047                 if (cachep->flags & SLAB_RED_ZONE)
1048                         objp -= BYTES_PER_WORD;
1049                 if (cachep->flags & SLAB_POISON)
1050                         /* need to poison the objs */
1051                         kmem_poison_obj(cachep, objp);
1052                 if (cachep->flags & SLAB_RED_ZONE) {
1053                         if (*((unsigned long*)(objp)) != RED_MAGIC1)
1054                                 BUG();
1055                         if (*((unsigned long*)(objp + cachep->objsize -
1056                                         BYTES_PER_WORD)) != RED_MAGIC1)
1057                                 BUG();
1058                 }
1059 #endif
1060                 slab_bufctl(slabp)[i] = i+1;
1061         }
1062         slab_bufctl(slabp)[i-1] = BUFCTL_END;
1063         slabp->free = 0;
1064 }
1065
1066 /*
1067  * Grow (by 1) the number of slabs within a cache.  This is called by
1068  * kmem_cache_alloc() when there are no active objs left in a cache.
1069  */
1070 static int kmem_cache_grow (kmem_cache_t * cachep, int flags)
1071 {
1072         slab_t  *slabp;
1073         struct page     *page;
1074         void            *objp;
1075         size_t           offset;
1076         unsigned int     i, local_flags;
1077         unsigned long    ctor_flags;
1078         unsigned long    save_flags;
1079
1080         /* Be lazy and only check for valid flags here,
1081          * keeping it out of the critical path in kmem_cache_alloc().
1082          */
1083         if (flags & ~(SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW))
1084                 BUG();
1085         if (flags & SLAB_NO_GROW)
1086                 return 0;
1087
1088         /*
1089          * The test for missing atomic flag is performed here, rather than
1090          * the more obvious place, simply to reduce the critical path length
1091          * in kmem_cache_alloc(). If a caller is seriously mis-behaving they
1092          * will eventually be caught here (where it matters).
1093          */
1094         if (in_interrupt() && (flags & SLAB_LEVEL_MASK) != SLAB_ATOMIC)
1095                 BUG();
1096
1097         ctor_flags = SLAB_CTOR_CONSTRUCTOR;
1098         local_flags = (flags & SLAB_LEVEL_MASK);
1099         if (local_flags == SLAB_ATOMIC)
1100                 /*
1101                  * Not allowed to sleep.  Need to tell a constructor about
1102                  * this - it might need to know...
1103                  */
1104                 ctor_flags |= SLAB_CTOR_ATOMIC;
1105
1106         /* About to mess with non-constant members - lock. */
1107         spin_lock_irqsave(&cachep->spinlock, save_flags);
1108
1109         /* Get colour for the slab, and cal the next value. */
1110         offset = cachep->colour_next;
1111         cachep->colour_next++;
1112         if (cachep->colour_next >= cachep->colour)
1113                 cachep->colour_next = 0;
1114         offset *= cachep->colour_off;
1115         cachep->dflags |= DFLGS_GROWN;
1116
1117         cachep->growing++;
1118         spin_unlock_irqrestore(&cachep->spinlock, save_flags);
1119
1120         /* A series of memory allocations for a new slab.
1121          * Neither the cache-chain semaphore, or cache-lock, are
1122          * held, but the incrementing c_growing prevents this
1123          * cache from being reaped or shrunk.
1124          * Note: The cache could be selected in for reaping in
1125          * kmem_cache_reap(), but when the final test is made the
1126          * growing value will be seen.
1127          */
1128
1129         /* Get mem for the objs. */
1130         if (!(objp = kmem_getpages(cachep, flags)))
1131                 goto failed;
1132
1133         /* Get slab management. */
1134         if (!(slabp = kmem_cache_slabmgmt(cachep, objp, offset, local_flags)))
1135                 goto opps1;
1136
1137         /* Nasty!!!!!! I hope this is OK. */
1138         i = 1 << cachep->gfporder;
1139         page = virt_to_page(objp);
1140         do {
1141                 SET_PAGE_CACHE(page, cachep);
1142                 SET_PAGE_SLAB(page, slabp);
1143                 PageSetSlab(page);
1144                 page++;
1145         } while (--i);
1146
1147         kmem_cache_init_objs(cachep, slabp, ctor_flags);
1148
1149         spin_lock_irqsave(&cachep->spinlock, save_flags);
1150         cachep->growing--;
1151
1152         /* Make slab active. */
1153         list_add_tail(&slabp->list,&cachep->slabs);
1154         if (cachep->firstnotfull == &cachep->slabs)
1155                 cachep->firstnotfull = &slabp->list;
1156         STATS_INC_GROWN(cachep);
1157         cachep->failures = 0;
1158
1159         spin_unlock_irqrestore(&cachep->spinlock, save_flags);
1160         return 1;
1161 opps1:
1162         kmem_freepages(cachep, objp);
1163 failed:
1164         spin_lock_irqsave(&cachep->spinlock, save_flags);
1165         cachep->growing--;
1166         spin_unlock_irqrestore(&cachep->spinlock, save_flags);
1167         return 0;
1168 }
1169
1170 /*
1171  * Perform extra freeing checks:
1172  * - detect double free
1173  * - detect bad pointers.
1174  * Called with the cache-lock held.
1175  */
1176
1177 #if DEBUG
1178 static int kmem_extra_free_checks (kmem_cache_t * cachep,
1179                         slab_t *slabp, void * objp)
1180 {
1181         int i;
1182         unsigned int objnr = (objp-slabp->s_mem)/cachep->objsize;
1183
1184         if (objnr >= cachep->num)
1185                 BUG();
1186         if (objp != slabp->s_mem + objnr*cachep->objsize)
1187                 BUG();
1188
1189         /* Check slab's freelist to see if this obj is there. */
1190         for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
1191                 if (i == objnr)
1192                         BUG();
1193         }
1194         return 0;
1195 }
1196 #endif
1197
1198 static inline void kmem_cache_alloc_head(kmem_cache_t *cachep, int flags)
1199 {
1200 #if DEBUG
1201         if (flags & SLAB_DMA) {
1202                 if (!(cachep->gfpflags & GFP_DMA))
1203                         BUG();
1204         } else {
1205                 if (cachep->gfpflags & GFP_DMA)
1206                         BUG();
1207         }
1208 #endif
1209 }
1210
1211 static inline void * kmem_cache_alloc_one_tail (kmem_cache_t *cachep,
1212                                                          slab_t *slabp)
1213 {
1214         void *objp;
1215
1216         STATS_INC_ALLOCED(cachep);
1217         STATS_INC_ACTIVE(cachep);
1218         STATS_SET_HIGH(cachep);
1219
1220         /* get obj pointer */
1221         slabp->inuse++;
1222         objp = slabp->s_mem + slabp->free*cachep->objsize;
1223         slabp->free=slab_bufctl(slabp)[slabp->free];
1224
1225         if (slabp->free == BUFCTL_END)
1226                 /* slab now full: move to next slab for next alloc */
1227                 cachep->firstnotfull = slabp->list.next;
1228 #if DEBUG
1229         if (cachep->flags & SLAB_POISON)
1230                 if (kmem_check_poison_obj(cachep, objp))
1231                         BUG();
1232         if (cachep->flags & SLAB_RED_ZONE) {
1233                 /* Set alloc red-zone, and check old one. */
1234                 if (xchg((unsigned long *)objp, RED_MAGIC2) !=
1235                                                          RED_MAGIC1)
1236                         BUG();
1237                 if (xchg((unsigned long *)(objp+cachep->objsize -
1238                           BYTES_PER_WORD), RED_MAGIC2) != RED_MAGIC1)
1239                         BUG();
1240                 objp += BYTES_PER_WORD;
1241         }
1242 #endif
1243         return objp;
1244 }
1245
1246 /*
1247  * Returns a ptr to an obj in the given cache.
1248  * caller must guarantee synchronization
1249  * #define for the goto optimization 8-)
1250  */
1251 #define kmem_cache_alloc_one(cachep)                            \
1252 ({                                                              \
1253         slab_t  *slabp;                                 \
1254                                                                 \
1255         /* Get slab alloc is to come from. */                   \
1256         {                                                       \
1257                 struct list_head* p = cachep->firstnotfull;     \
1258                 if (p == &cachep->slabs)                        \
1259                         goto alloc_new_slab;                    \
1260                 slabp = list_entry(p,slab_t, list);     \
1261         }                                                       \
1262         kmem_cache_alloc_one_tail(cachep, slabp);               \
1263 })
1264
1265 #ifdef CONFIG_SMP
1266 void* kmem_cache_alloc_batch(kmem_cache_t* cachep, int flags)
1267 {
1268         int batchcount = cachep->batchcount;
1269         cpucache_t* cc = cc_data(cachep);
1270
1271         spin_lock(&cachep->spinlock);
1272         while (batchcount--) {
1273                 /* Get slab alloc is to come from. */
1274                 struct list_head *p = cachep->firstnotfull;
1275                 slab_t *slabp;
1276
1277                 if (p == &cachep->slabs)
1278                         break;
1279                 slabp = list_entry(p,slab_t, list);
1280                 cc_entry(cc)[cc->avail++] =
1281                                 kmem_cache_alloc_one_tail(cachep, slabp);
1282         }
1283         spin_unlock(&cachep->spinlock);
1284
1285         if (cc->avail)
1286                 return cc_entry(cc)[--cc->avail];
1287         return NULL;
1288 }
1289 #endif
1290
1291 static inline void * __kmem_cache_alloc (kmem_cache_t *cachep, int flags)
1292 {
1293         unsigned long save_flags;
1294         void* objp;
1295
1296         kmem_cache_alloc_head(cachep, flags);
1297 try_again:
1298         local_irq_save(save_flags);
1299 #ifdef CONFIG_SMP
1300         {
1301                 cpucache_t *cc = cc_data(cachep);
1302
1303                 if (cc) {
1304                         if (cc->avail) {
1305                                 STATS_INC_ALLOCHIT(cachep);
1306                                 objp = cc_entry(cc)[--cc->avail];
1307                         } else {
1308                                 STATS_INC_ALLOCMISS(cachep);
1309                                 objp = kmem_cache_alloc_batch(cachep,flags);
1310                                 if (!objp)
1311                                         goto alloc_new_slab_nolock;
1312                         }
1313                 } else {
1314                         spin_lock(&cachep->spinlock);
1315                         objp = kmem_cache_alloc_one(cachep);
1316                         spin_unlock(&cachep->spinlock);
1317                 }
1318         }
1319 #else
1320         objp = kmem_cache_alloc_one(cachep);
1321 #endif
1322         local_irq_restore(save_flags);
1323         return objp;
1324 alloc_new_slab:
1325 #ifdef CONFIG_SMP
1326         spin_unlock(&cachep->spinlock);
1327 alloc_new_slab_nolock:
1328 #endif
1329         local_irq_restore(save_flags);
1330         if (kmem_cache_grow(cachep, flags))
1331                 /* Someone may have stolen our objs.  Doesn't matter, we'll
1332                  * just come back here again.
1333                  */
1334                 goto try_again;
1335         return NULL;
1336 }
1337
1338 /*
1339  * Release an obj back to its cache. If the obj has a constructed
1340  * state, it should be in this state _before_ it is released.
1341  * - caller is responsible for the synchronization
1342  */
1343
1344 #if DEBUG
1345 # define CHECK_NR(pg)                                           \
1346         do {                                                    \
1347                 if (!VALID_PAGE(pg)) {                          \
1348                         printk(KERN_ERR "kfree: out of range ptr %lxh.\n", \
1349                                 (unsigned long)objp);           \
1350                         BUG();                                  \
1351                 } \
1352         } while (0)
1353 # define CHECK_PAGE(page)                                       \
1354         do {                                                    \
1355                 CHECK_NR(page);                                 \
1356                 if (!PageSlab(page)) {                          \
1357                         printk(KERN_ERR "kfree: bad ptr %lxh.\n", \
1358                                 (unsigned long)objp);           \
1359                         BUG();                                  \
1360                 }                                               \
1361         } while (0)
1362
1363 #else
1364 # define CHECK_PAGE(pg) do { } while (0)
1365 #endif
1366
1367 static inline void kmem_cache_free_one(kmem_cache_t *cachep, void *objp)
1368 {
1369         slab_t* slabp;
1370
1371         CHECK_PAGE(virt_to_page(objp));
1372         /* reduces memory footprint
1373          *
1374         if (OPTIMIZE(cachep))
1375                 slabp = (void*)((unsigned long)objp&(~(PAGE_SIZE-1)));
1376          else
1377          */
1378         slabp = GET_PAGE_SLAB(virt_to_page(objp));
1379
1380 #if DEBUG
1381         if (cachep->flags & SLAB_DEBUG_INITIAL)
1382                 /* Need to call the slab's constructor so the
1383                  * caller can perform a verify of its state (debugging).
1384                  * Called without the cache-lock held.
1385                  */
1386                 cachep->ctor(objp, cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY);
1387
1388         if (cachep->flags & SLAB_RED_ZONE) {
1389                 objp -= BYTES_PER_WORD;
1390                 if (xchg((unsigned long *)objp, RED_MAGIC1) != RED_MAGIC2)
1391                         /* Either write before start, or a double free. */
1392                         BUG();
1393                 if (xchg((unsigned long *)(objp+cachep->objsize -
1394                                 BYTES_PER_WORD), RED_MAGIC1) != RED_MAGIC2)
1395                         /* Either write past end, or a double free. */
1396                         BUG();
1397         }
1398         if (cachep->flags & SLAB_POISON)
1399                 kmem_poison_obj(cachep, objp);
1400         if (kmem_extra_free_checks(cachep, slabp, objp))
1401                 return;
1402 #endif
1403         {
1404                 unsigned int objnr = (objp-slabp->s_mem)/cachep->objsize;
1405
1406                 slab_bufctl(slabp)[objnr] = slabp->free;
1407                 slabp->free = objnr;
1408         }
1409         STATS_DEC_ACTIVE(cachep);
1410
1411         /* fixup slab chain */
1412         if (slabp->inuse-- == cachep->num)
1413                 goto moveslab_partial;
1414         if (!slabp->inuse)
1415                 goto moveslab_free;
1416         return;
1417
1418 moveslab_partial:
1419         /* was full.
1420          * Even if the page is now empty, we can set c_firstnotfull to
1421          * slabp: there are no partial slabs in this case
1422          */
1423         {
1424                 struct list_head *t = cachep->firstnotfull;
1425
1426                 cachep->firstnotfull = &slabp->list;
1427                 if (slabp->list.next == t)
1428                         return;
1429                 list_del(&slabp->list);
1430                 list_add_tail(&slabp->list, t);
1431                 return;
1432         }
1433 moveslab_free:
1434         /*
1435          * was partial, now empty.
1436          * c_firstnotfull might point to slabp
1437          * FIXME: optimize
1438          */
1439         {
1440                 struct list_head *t = cachep->firstnotfull->prev;
1441
1442                 list_del(&slabp->list);
1443                 list_add_tail(&slabp->list, &cachep->slabs);
1444                 if (cachep->firstnotfull == &slabp->list)
1445                         cachep->firstnotfull = t->next;
1446                 return;
1447         }
1448 }
1449
1450 #ifdef CONFIG_SMP
1451 static inline void __free_block (kmem_cache_t* cachep,
1452                                                         void** objpp, int len)
1453 {
1454         for ( ; len > 0; len--, objpp++)
1455                 kmem_cache_free_one(cachep, *objpp);
1456 }
1457
1458 static void free_block (kmem_cache_t* cachep, void** objpp, int len)
1459 {
1460         spin_lock(&cachep->spinlock);
1461         __free_block(cachep, objpp, len);
1462         spin_unlock(&cachep->spinlock);
1463 }
1464 #endif
1465
1466 /*
1467  * __kmem_cache_free
1468  * called with disabled ints
1469  */
1470 static inline void __kmem_cache_free (kmem_cache_t *cachep, void* objp)
1471 {
1472 #ifdef CONFIG_SMP
1473         cpucache_t *cc = cc_data(cachep);
1474
1475         CHECK_PAGE(virt_to_page(objp));
1476         if (cc) {
1477                 int batchcount;
1478                 if (cc->avail < cc->limit) {
1479                         STATS_INC_FREEHIT(cachep);
1480                         cc_entry(cc)[cc->avail++] = objp;
1481                         return;
1482                 }
1483                 STATS_INC_FREEMISS(cachep);
1484                 batchcount = cachep->batchcount;
1485                 cc->avail -= batchcount;
1486                 free_block(cachep,
1487                                         &cc_entry(cc)[cc->avail],batchcount);
1488                 cc_entry(cc)[cc->avail++] = objp;
1489                 return;
1490         } else {
1491                 free_block(cachep, &objp, 1);
1492         }
1493 #else
1494         kmem_cache_free_one(cachep, objp);
1495 #endif
1496 }
1497
1498 /**
1499  * kmem_cache_alloc - Allocate an object
1500  * @cachep: The cache to allocate from.
1501  * @flags: See kmalloc().
1502  *
1503  * Allocate an object from this cache.  The flags are only relevant
1504  * if the cache has no available objects.
1505  */
1506 void * kmem_cache_alloc (kmem_cache_t *cachep, int flags)
1507 {
1508         return __kmem_cache_alloc(cachep, flags);
1509 }
1510
1511 /**
1512  * kmalloc - allocate memory
1513  * @size: how many bytes of memory are required.
1514  * @flags: the type of memory to allocate.
1515  *
1516  * kmalloc is the normal method of allocating memory
1517  * in the kernel.  The @flags argument may be one of:
1518  *
1519  * %GFP_BUFFER - XXX
1520  *
1521  * %GFP_ATOMIC - allocation will not sleep.  Use inside interrupt handlers.
1522  *
1523  * %GFP_USER - allocate memory on behalf of user.  May sleep.
1524  *
1525  * %GFP_KERNEL - allocate normal kernel ram.  May sleep.
1526  *
1527  * %GFP_NFS - has a slightly lower probability of sleeping than %GFP_KERNEL.
1528  * Don't use unless you're in the NFS code.
1529  *
1530  * %GFP_KSWAPD - Don't use unless you're modifying kswapd.
1531  */
1532 void * kmalloc (size_t size, int flags)
1533 {
1534         cache_sizes_t *csizep = cache_sizes;
1535
1536         for (; csizep->cs_size; csizep++) {
1537                 if (size > csizep->cs_size)
1538                         continue;
1539                 return __kmem_cache_alloc(flags & GFP_DMA ?
1540                          csizep->cs_dmacachep : csizep->cs_cachep, flags);
1541         }
1542         BUG(); // too big size
1543         return NULL;
1544 }
1545
1546 /**
1547  * kmem_cache_free - Deallocate an object
1548  * @cachep: The cache the allocation was from.
1549  * @objp: The previously allocated object.
1550  *
1551  * Free an object which was previously allocated from this
1552  * cache.
1553  */
1554 void kmem_cache_free (kmem_cache_t *cachep, void *objp)
1555 {
1556         unsigned long flags;
1557 #if DEBUG
1558         CHECK_PAGE(virt_to_page(objp));
1559         if (cachep != GET_PAGE_CACHE(virt_to_page(objp)))
1560                 BUG();
1561 #endif
1562
1563         local_irq_save(flags);
1564         __kmem_cache_free(cachep, objp);
1565         local_irq_restore(flags);
1566 }
1567
1568 /**
1569  * kfree - free previously allocated memory
1570  * @objp: pointer returned by kmalloc.
1571  *
1572  * Don't free memory not originally allocated by kmalloc()
1573  * or you will run into trouble.
1574  */
1575 void kfree (const void *objp)
1576 {
1577         kmem_cache_t *c;
1578         unsigned long flags;
1579
1580         if (!objp)
1581                 return;
1582         local_irq_save(flags);
1583         CHECK_PAGE(virt_to_page(objp));
1584         c = GET_PAGE_CACHE(virt_to_page(objp));
1585         __kmem_cache_free(c, (void*)objp);
1586         local_irq_restore(flags);
1587 }
1588
1589 kmem_cache_t * kmem_find_general_cachep (size_t size, int gfpflags)
1590 {
1591         cache_sizes_t *csizep = cache_sizes;
1592
1593         /* This function could be moved to the header file, and
1594          * made inline so consumers can quickly determine what
1595          * cache pointer they require.
1596          */
1597         for ( ; csizep->cs_size; csizep++) {
1598                 if (size > csizep->cs_size)
1599                         continue;
1600                 break;
1601         }
1602         return (gfpflags & GFP_DMA) ? csizep->cs_dmacachep : csizep->cs_cachep;
1603 }
1604
1605 #ifdef CONFIG_SMP
1606
1607 /* called with cache_chain_sem acquired.  */
1608 static int kmem_tune_cpucache (kmem_cache_t* cachep, int limit, int batchcount)
1609 {
1610         ccupdate_struct_t new;
1611         int i;
1612
1613         /*
1614          * These are admin-provided, so we are more graceful.
1615          */
1616         if (limit < 0)
1617                 return -EINVAL;
1618         if (batchcount < 0)
1619                 return -EINVAL;
1620         if (batchcount > limit)
1621                 return -EINVAL;
1622         if (limit != 0 && !batchcount)
1623                 return -EINVAL;
1624
1625         memset(&new.new,0,sizeof(new.new));
1626         if (limit) {
1627                 for (i = 0; i< smp_num_cpus; i++) {
1628                         cpucache_t* ccnew;
1629
1630                         ccnew = kmalloc(sizeof(void*)*limit+
1631                                         sizeof(cpucache_t), GFP_KERNEL);
1632                         if (!ccnew)
1633                                 goto oom;
1634                         ccnew->limit = limit;
1635                         ccnew->avail = 0;
1636                         new.new[cpu_logical_map(i)] = ccnew;
1637                 }
1638         }
1639         new.cachep = cachep;
1640         spin_lock_irq(&cachep->spinlock);
1641         cachep->batchcount = batchcount;
1642         spin_unlock_irq(&cachep->spinlock);
1643
1644         smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
1645
1646         for (i = 0; i < smp_num_cpus; i++) {
1647                 cpucache_t* ccold = new.new[cpu_logical_map(i)];
1648                 if (!ccold)
1649                         continue;
1650                 local_irq_disable();
1651                 free_block(cachep, cc_entry(ccold), ccold->avail);
1652                 local_irq_enable();
1653                 kfree(ccold);
1654         }
1655         return 0;
1656 oom:
1657         for (i--; i >= 0; i--)
1658                 kfree(new.new[cpu_logical_map(i)]);
1659         return -ENOMEM;
1660 }
1661
1662 static void enable_cpucache (kmem_cache_t *cachep)
1663 {
1664         int err;
1665         int limit;
1666
1667         /* FIXME: optimize */
1668         if (cachep->objsize > PAGE_SIZE)
1669                 return;
1670         if (cachep->objsize > 1024)
1671                 limit = 60;
1672         else if (cachep->objsize > 256)
1673                 limit = 124;
1674         else
1675                 limit = 252;
1676
1677         err = kmem_tune_cpucache(cachep, limit, limit/2);
1678         if (err)
1679                 printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
1680                                         cachep->name, -err);
1681 }
1682
1683 static void enable_all_cpucaches (void)
1684 {
1685         struct list_head* p;
1686
1687         down(&cache_chain_sem);
1688
1689         p = &cache_cache.next;
1690         do {
1691                 kmem_cache_t* cachep = list_entry(p, kmem_cache_t, next);
1692
1693                 enable_cpucache(cachep);
1694                 p = cachep->next.next;
1695         } while (p != &cache_cache.next);
1696
1697         up(&cache_chain_sem);
1698 }
1699 #endif
1700
1701 /**
1702  * kmem_cache_reap - Reclaim memory from caches.
1703  * @gfp_mask: the type of memory required.
1704  *
1705  * Called from try_to_free_page().
1706  */
1707 void kmem_cache_reap (int gfp_mask)
1708 {
1709         slab_t *slabp;
1710         kmem_cache_t *searchp;
1711         kmem_cache_t *best_cachep;
1712         unsigned int best_pages;
1713         unsigned int best_len;
1714         unsigned int scan;
1715
1716         if (gfp_mask & __GFP_WAIT)
1717                 down(&cache_chain_sem);
1718         else
1719                 if (down_trylock(&cache_chain_sem))
1720                         return;
1721
1722         scan = REAP_SCANLEN;
1723         best_len = 0;
1724         best_pages = 0;
1725         best_cachep = NULL;
1726         searchp = clock_searchp;
1727         do {
1728                 unsigned int pages;
1729                 struct list_head* p;
1730                 unsigned int full_free;
1731
1732                 /* It's safe to test this without holding the cache-lock. */
1733                 if (searchp->flags & SLAB_NO_REAP)
1734                         goto next;
1735                 spin_lock_irq(&searchp->spinlock);
1736                 if (searchp->growing)
1737                         goto next_unlock;
1738                 if (searchp->dflags & DFLGS_GROWN) {
1739                         searchp->dflags &= ~DFLGS_GROWN;
1740                         goto next_unlock;
1741                 }
1742 #ifdef CONFIG_SMP
1743                 {
1744                         cpucache_t *cc = cc_data(searchp);
1745                         if (cc && cc->avail) {
1746                                 __free_block(searchp, cc_entry(cc), cc->avail);
1747                                 cc->avail = 0;
1748                         }
1749                 }
1750 #endif
1751
1752                 full_free = 0;
1753                 p = searchp->slabs.prev;
1754                 while (p != &searchp->slabs) {
1755                         slabp = list_entry(p, slab_t, list);
1756                         if (slabp->inuse)
1757                                 break;
1758                         full_free++;
1759                         p = p->prev;
1760                 }
1761
1762                 /*
1763                  * Try to avoid slabs with constructors and/or
1764                  * more than one page per slab (as it can be difficult
1765                  * to get high orders from gfp()).
1766                  */
1767                 pages = full_free * (1<<searchp->gfporder);
1768                 if (searchp->ctor)
1769                         pages = (pages*4+1)/5;
1770                 if (searchp->gfporder)
1771                         pages = (pages*4+1)/5;
1772                 if (pages > best_pages) {
1773                         best_cachep = searchp;
1774                         best_len = full_free;
1775                         best_pages = pages;
1776                         if (full_free >= REAP_PERFECT) {
1777                                 clock_searchp = list_entry(searchp->next.next,
1778                                                         kmem_cache_t,next);
1779                                 goto perfect;
1780                         }
1781                 }
1782 next_unlock:
1783                 spin_unlock_irq(&searchp->spinlock);
1784 next:
1785                 searchp = list_entry(searchp->next.next,kmem_cache_t,next);
1786         } while (--scan && searchp != clock_searchp);
1787
1788         clock_searchp = searchp;
1789
1790         if (!best_cachep)
1791                 /* couldn't find anything to reap */
1792                 goto out;
1793
1794         spin_lock_irq(&best_cachep->spinlock);
1795 perfect:
1796         /* free only 80% of the free slabs */
1797         best_len = (best_len*4 + 1)/5;
1798         for (scan = 0; scan < best_len; scan++) {
1799                 struct list_head *p;
1800
1801                 if (best_cachep->growing)
1802                         break;
1803                 p = best_cachep->slabs.prev;
1804                 if (p == &best_cachep->slabs)
1805                         break;
1806                 slabp = list_entry(p,slab_t,list);
1807                 if (slabp->inuse)
1808                         break;
1809                 list_del(&slabp->list);
1810                 if (best_cachep->firstnotfull == &slabp->list)
1811                         best_cachep->firstnotfull = &best_cachep->slabs;
1812                 STATS_INC_REAPED(best_cachep);
1813
1814                 /* Safe to drop the lock. The slab is no longer linked to the
1815                  * cache.
1816                  */
1817                 spin_unlock_irq(&best_cachep->spinlock);
1818                 kmem_slab_destroy(best_cachep, slabp);
1819                 spin_lock_irq(&best_cachep->spinlock);
1820         }
1821         spin_unlock_irq(&best_cachep->spinlock);
1822 out:
1823         up(&cache_chain_sem);
1824         return;
1825 }
1826
1827 #ifdef CONFIG_PROC_FS
1828 /* /proc/slabinfo
1829  *      cache-name num-active-objs total-objs
1830  *      obj-size num-active-slabs total-slabs
1831  *      num-pages-per-slab
1832  */
1833 #define FIXUP(t)                                \
1834         do {                                    \
1835                 if (len <= off) {               \
1836                         off -= len;             \
1837                         len = 0;                \
1838                 } else {                        \
1839                         if (len-off > count)    \
1840                                 goto t;         \
1841                 }                               \
1842         } while (0)
1843
1844 static int proc_getdata (char*page, char**start, off_t off, int count)
1845 {
1846         struct list_head *p;
1847         int len = 0;
1848
1849         /* Output format version, so at least we can change it without _too_
1850          * many complaints.
1851          */
1852         len += sprintf(page+len, "slabinfo - version: 1.1"
1853 #if STATS
1854                                 " (statistics)"
1855 #endif
1856 #ifdef CONFIG_SMP
1857                                 " (SMP)"
1858 #endif
1859                                 "\n");
1860         FIXUP(got_data);
1861
1862         down(&cache_chain_sem);
1863         p = &cache_cache.next;
1864         do {
1865                 kmem_cache_t    *cachep;
1866                 struct list_head *q;
1867                 slab_t          *slabp;
1868                 unsigned long   active_objs;
1869                 unsigned long   num_objs;
1870                 unsigned long   active_slabs = 0;
1871                 unsigned long   num_slabs;
1872                 cachep = list_entry(p, kmem_cache_t, next);
1873
1874                 spin_lock_irq(&cachep->spinlock);
1875                 active_objs = 0;
1876                 num_slabs = 0;
1877                 list_for_each(q,&cachep->slabs) {
1878                         slabp = list_entry(q, slab_t, list);
1879                         active_objs += slabp->inuse;
1880                         num_objs += cachep->num;
1881                         if (slabp->inuse)
1882                                 active_slabs++;
1883                         else
1884                                 num_slabs++;
1885                 }
1886                 num_slabs+=active_slabs;
1887                 num_objs = num_slabs*cachep->num;
1888
1889                 len += sprintf(page+len, "%-17s %6lu %6lu %6u %4lu %4lu %4u",
1890                         cachep->name, active_objs, num_objs, cachep->objsize,
1891                         active_slabs, num_slabs, (1<<cachep->gfporder));
1892
1893 #if STATS
1894                 {
1895                         unsigned long errors = cachep->errors;
1896                         unsigned long high = cachep->high_mark;
1897                         unsigned long grown = cachep->grown;
1898                         unsigned long reaped = cachep->reaped;
1899                         unsigned long allocs = cachep->num_allocations;
1900
1901                         len += sprintf(page+len, " : %6lu %7lu %5lu %4lu %4lu",
1902                                         high, allocs, grown, reaped, errors);
1903                 }
1904 #endif
1905 #ifdef CONFIG_SMP
1906                 {
1907                         unsigned int batchcount = cachep->batchcount;
1908                         unsigned int limit;
1909
1910                         if (cc_data(cachep))
1911                                 limit = cc_data(cachep)->limit;
1912                          else
1913                                 limit = 0;
1914                         len += sprintf(page+len, " : %4u %4u",
1915                                         limit, batchcount);
1916                 }
1917 #endif
1918 #if STATS && defined(CONFIG_SMP)
1919                 {
1920                         unsigned long allochit = atomic_read(&cachep->allochit);
1921                         unsigned long allocmiss = atomic_read(&cachep->allocmiss);
1922                         unsigned long freehit = atomic_read(&cachep->freehit);
1923                         unsigned long freemiss = atomic_read(&cachep->freemiss);
1924                         len += sprintf(page+len, " : %6lu %6lu %6lu %6lu",
1925                                         allochit, allocmiss, freehit, freemiss);
1926                 }
1927 #endif
1928                 len += sprintf(page+len,"\n");
1929                 spin_unlock_irq(&cachep->spinlock);
1930                 FIXUP(got_data_up);
1931                 p = cachep->next.next;
1932         } while (p != &cache_cache.next);
1933 got_data_up:
1934         up(&cache_chain_sem);
1935
1936 got_data:
1937         *start = page+off;
1938         return len;
1939 }
1940
1941 /**
1942  * slabinfo_read_proc - generates /proc/slabinfo
1943  * @page: scratch area, one page long
1944  * @start: pointer to the pointer to the output buffer
1945  * @off: offset within /proc/slabinfo the caller is interested in
1946  * @count: requested len in bytes
1947  * @eof: eof marker
1948  * @data: unused
1949  *
1950  * The contents of the buffer are
1951  * cache-name
1952  * num-active-objs
1953  * total-objs
1954  * object size
1955  * num-active-slabs
1956  * total-slabs
1957  * num-pages-per-slab
1958  * + further values on SMP and with statistics enabled
1959  */
1960 int slabinfo_read_proc (char *page, char **start, off_t off,
1961                                  int count, int *eof, void *data)
1962 {
1963         int len = proc_getdata(page, start, off, count);
1964         len -= (*start-page);
1965         if (len <= count)
1966                 *eof = 1;
1967         if (len>count) len = count;
1968         if (len<0) len = 0;
1969         return len;
1970 }
1971
1972 #define MAX_SLABINFO_WRITE 128
1973 /**
1974  * slabinfo_write_proc - SMP tuning for the slab allocator
1975  * @file: unused
1976  * @buffer: user buffer
1977  * @count: data len
1978  * @data: unused
1979  */
1980 int slabinfo_write_proc (struct file *file, const char *buffer,
1981                                 unsigned long count, void *data)
1982 {
1983 #ifdef CONFIG_SMP
1984         char kbuf[MAX_SLABINFO_WRITE], *tmp;
1985         int limit, batchcount, res;
1986         struct list_head *p;
1987
1988         if (count > MAX_SLABINFO_WRITE)
1989                 return -EINVAL;
1990         if (copy_from_user(&kbuf, buffer, count))
1991                 return -EFAULT;
1992
1993         tmp = strchr(kbuf, ' ');
1994         if (!tmp)
1995                 return -EINVAL;
1996         *tmp = '\0';
1997         tmp++;
1998         limit = simple_strtol(tmp, &tmp, 10);
1999         while (*tmp == ' ')
2000                 tmp++;
2001         batchcount = simple_strtol(tmp, &tmp, 10);
2002
2003         /* Find the cache in the chain of caches. */
2004         down(&cache_chain_sem);
2005         res = -EINVAL;
2006         list_for_each(p,&cache_chain) {
2007                 kmem_cache_t *cachep = list_entry(p, kmem_cache_t, next);
2008
2009                 if (!strcmp(cachep->name, kbuf)) {
2010                         res = kmem_tune_cpucache(cachep, limit, batchcount);
2011                         break;
2012                 }
2013         }
2014         up(&cache_chain_sem);
2015         if (res >= 0)
2016                 res = count;
2017         return res;
2018 #else
2019         return -EINVAL;
2020 #endif
2021 }
2022 #endif