mm/slob.c

   1 /*
   2  * SLOB Allocator: Simple List Of Blocks
   3  *
   4  * Matt Mackall <mpm@selenic.com> 12/30/03
   5  *
   6  * How SLOB works:
   7  *
   8  * The core of SLOB is a traditional K&R style heap allocator, with
   9  * support for returning aligned objects. The granularity of this
  10  * allocator is 4 bytes on 32-bit and 8 bytes on 64-bit, though it
  11  * could be as low as 2 if the compiler alignment requirements allow.
  12  *
  13  * The slob heap is a linked list of pages from __get_free_page, and
  14  * within each page, there is a singly-linked list of free blocks (slob_t).
  15  * The heap is grown on demand and allocation from the heap is currently
  16  * first-fit.
  17  *
  18  * Above this is an implementation of kmalloc/kfree. Blocks returned
  19  * from kmalloc are 4-byte aligned and prepended with a 4-byte header.
  20  * If kmalloc is asked for objects of PAGE_SIZE or larger, it calls
  21  * __get_free_pages directly so that it can return page-aligned blocks
  22  * and keeps a linked list of such pages and their orders. These
  23  * objects are detected in kfree() by their page alignment.
  24  *
  25  * SLAB is emulated on top of SLOB by simply calling constructors and
  26  * destructors for every SLAB allocation. Objects are returned with the
  27  * 4-byte alignment unless the SLAB_HWCACHE_ALIGN flag is set, in which
  28  * case the low-level allocator will fragment blocks to create the proper
  29  * alignment. Again, objects of page-size or greater are allocated by
  30  * calling __get_free_pages. As SLAB objects know their size, no separate
  31  * size bookkeeping is necessary and there is essentially no allocation
  32  * space overhead.
  33  */
  34
  35 #include <linux/kernel.h>
  36 #include <linux/slab.h>
  37 #include <linux/mm.h>
  38 #include <linux/cache.h>
  39 #include <linux/init.h>
  40 #include <linux/module.h>
  41 #include <linux/rcupdate.h>
  42 #include <linux/list.h>
  43 #include <asm/atomic.h>
  44
  45 /* SLOB_MIN_ALIGN == sizeof(long) */
  46 #if BITS_PER_BYTE == 32
  47 #define SLOB_MIN_ALIGN  4
  48 #else
  49 #define SLOB_MIN_ALIGN  8
  50 #endif
  51
  52 /*
  53  * slob_block has a field 'units', which indicates size of block if +ve,
  54  * or offset of next block if -ve (in SLOB_UNITs).
  55  *
  56  * Free blocks of size 1 unit simply contain the offset of the next block.
  57  * Those with larger size contain their size in the first SLOB_UNIT of
  58  * memory, and the offset of the next free block in the second SLOB_UNIT.
  59  */
  60 #if PAGE_SIZE <= (32767 * SLOB_MIN_ALIGN)
  61 typedef s16 slobidx_t;
  62 #else
  63 typedef s32 slobidx_t;
  64 #endif
  65
  66 /*
  67  * Align struct slob_block to long for now, but can some embedded
  68  * architectures get away with less?
  69  */
  70 struct slob_block {
  71         slobidx_t units;
  72 } __attribute__((aligned(SLOB_MIN_ALIGN)));
  73 typedef struct slob_block slob_t;
  74
  75 /*
  76  * We use struct page fields to manage some slob allocation aspects,
  77  * however to avoid the horrible mess in include/linux/mm_types.h, we'll
  78  * just define our own struct page type variant here.
  79  */
  80 struct slob_page {
  81         union {
  82                 struct {
  83                         unsigned long flags;    /* mandatory */
  84                         atomic_t _count;        /* mandatory */
  85                         slobidx_t units;        /* free units left in page */
  86                         unsigned long pad[2];
  87                         slob_t *free;           /* first free slob_t in page */
  88                         struct list_head list;  /* linked list of free pages */
  89                 };
  90                 struct page page;
  91         };
  92 };
  93 static inline void struct_slob_page_wrong_size(void)
  94 { BUILD_BUG_ON(sizeof(struct slob_page) != sizeof(struct page)); }
  95
  96 /*
  97  * free_slob_page: call before a slob_page is returned to the page allocator.
  98  */
  99 static inline void free_slob_page(struct slob_page *sp)
 100 {
 101         reset_page_mapcount(&sp->page);
 102         sp->page.mapping = NULL;
 103 }
 104
 105 /*
 106  * All (partially) free slob pages go on this list.
 107  */
 108 static LIST_HEAD(free_slob_pages);
 109
 110 /*
 111  * slob_page: True for all slob pages (false for bigblock pages)
 112  */
 113 static inline int slob_page(struct slob_page *sp)
 114 {
 115         return test_bit(PG_active, &sp->flags);
 116 }
 117
 118 static inline void set_slob_page(struct slob_page *sp)
 119 {
 120         __set_bit(PG_active, &sp->flags);
 121 }
 122
 123 static inline void clear_slob_page(struct slob_page *sp)
 124 {
 125         __clear_bit(PG_active, &sp->flags);
 126 }
 127
 128 /*
 129  * slob_page_free: true for pages on free_slob_pages list.
 130  */
 131 static inline int slob_page_free(struct slob_page *sp)
 132 {
 133         return test_bit(PG_private, &sp->flags);
 134 }
 135
 136 static inline void set_slob_page_free(struct slob_page *sp)
 137 {
 138         list_add(&sp->list, &free_slob_pages);
 139         __set_bit(PG_private, &sp->flags);
 140 }
 141
 142 static inline void clear_slob_page_free(struct slob_page *sp)
 143 {
 144         list_del(&sp->list);
 145         __clear_bit(PG_private, &sp->flags);
 146 }
 147
 148 #define SLOB_UNIT sizeof(slob_t)
 149 #define SLOB_UNITS(size) (((size) + SLOB_UNIT - 1)/SLOB_UNIT)
 150 #define SLOB_ALIGN L1_CACHE_BYTES
 151
 152 /*
 153  * struct slob_rcu is inserted at the tail of allocated slob blocks, which
 154  * were created with a SLAB_DESTROY_BY_RCU slab. slob_rcu is used to free
 155  * the block using call_rcu.
 156  */
 157 struct slob_rcu {
 158         struct rcu_head head;
 159         int size;
 160 };
 161
 162 /*
 163  * slob_lock protects all slob allocator structures.
 164  */
 165 static DEFINE_SPINLOCK(slob_lock);
 166
 167 /*
 168  * Encode the given size and next info into a free slob block s.
 169  */
 170 static void set_slob(slob_t *s, slobidx_t size, slob_t *next)
 171 {
 172         slob_t *base = (slob_t *)((unsigned long)s & PAGE_MASK);
 173         slobidx_t offset = next - base;
 174
 175         if (size > 1) {
 176                 s[0].units = size;
 177                 s[1].units = offset;
 178         } else
 179                 s[0].units = -offset;
 180 }
 181
 182 /*
 183  * Return the size of a slob block.
 184  */
 185 static slobidx_t slob_units(slob_t *s)
 186 {
 187         if (s->units > 0)
 188                 return s->units;
 189         return 1;
 190 }
 191
 192 /*
 193  * Return the next free slob block pointer after this one.
 194  */
 195 static slob_t *slob_next(slob_t *s)
 196 {
 197         slob_t *base = (slob_t *)((unsigned long)s & PAGE_MASK);
 198         slobidx_t next;
 199
 200         if (s[0].units < 0)
 201                 next = -s[0].units;
 202         else
 203                 next = s[1].units;
 204         return base+next;
 205 }
 206
 207 /*
 208  * Returns true if s is the last free block in its page.
 209  */
 210 static int slob_last(slob_t *s)
 211 {
 212         return !((unsigned long)slob_next(s) & ~PAGE_MASK);
 213 }
 214
 215 /*
 216  * Allocate a slob block within a given slob_page sp.
 217  */
 218 static void *slob_page_alloc(struct slob_page *sp, size_t size, int align)
 219 {
 220         slob_t *prev, *cur, *aligned = 0;
 221         int delta = 0, units = SLOB_UNITS(size);
 222
 223         for (prev = NULL, cur = sp->free; ; prev = cur, cur = slob_next(cur)) {
 224                 slobidx_t avail = slob_units(cur);
 225
 226                 if (align) {
 227                         aligned = (slob_t *)ALIGN((unsigned long)cur, align);
 228                         delta = aligned - cur;
 229                 }
 230                 if (avail >= units + delta) { /* room enough? */
 231                         slob_t *next;
 232
 233                         if (delta) { /* need to fragment head to align? */
 234                                 next = slob_next(cur);
 235                                 set_slob(aligned, avail - delta, next);
 236                                 set_slob(cur, delta, aligned);
 237                                 prev = cur;
 238                                 cur = aligned;
 239                                 avail = slob_units(cur);
 240                         }
 241
 242                         next = slob_next(cur);
 243                         if (avail == units) { /* exact fit? unlink. */
 244                                 if (prev)
 245                                         set_slob(prev, slob_units(prev), next);
 246                                 else
 247                                         sp->free = next;
 248                         } else { /* fragment */
 249                                 if (prev)
 250                                         set_slob(prev, slob_units(prev), cur + units);
 251                                 else
 252                                         sp->free = cur + units;
 253                                 set_slob(cur + units, avail - units, next);
 254                         }
 255
 256                         sp->units -= units;
 257                         if (!sp->units)
 258                                 clear_slob_page_free(sp);
 259                         return cur;
 260                 }
 261                 if (slob_last(cur))
 262                         return NULL;
 263         }
 264 }
 265
 266 /*
 267  * slob_alloc: entry point into the slob allocator.
 268  */
 269 static void *slob_alloc(size_t size, gfp_t gfp, int align)
 270 {
 271         struct slob_page *sp;
 272         slob_t *b = NULL;
 273         unsigned long flags;
 274
 275         spin_lock_irqsave(&slob_lock, flags);
 276         /* Iterate through each partially free page, try to find room */
 277         list_for_each_entry(sp, &free_slob_pages, list) {
 278                 if (sp->units >= SLOB_UNITS(size)) {
 279                         b = slob_page_alloc(sp, size, align);
 280                         if (b)
 281                                 break;
 282                 }
 283         }
 284         spin_unlock_irqrestore(&slob_lock, flags);
 285
 286         /* Not enough space: must allocate a new page */
 287         if (!b) {
 288                 b = (slob_t *)__get_free_page(gfp);
 289                 if (!b)
 290                         return 0;
 291                 sp = (struct slob_page *)virt_to_page(b);
 292                 set_slob_page(sp);
 293
 294                 spin_lock_irqsave(&slob_lock, flags);
 295                 sp->units = SLOB_UNITS(PAGE_SIZE);
 296                 sp->free = b;
 297                 INIT_LIST_HEAD(&sp->list);
 298                 set_slob(b, SLOB_UNITS(PAGE_SIZE), b + SLOB_UNITS(PAGE_SIZE));
 299                 set_slob_page_free(sp);
 300                 b = slob_page_alloc(sp, size, align);
 301                 BUG_ON(!b);
 302                 spin_unlock_irqrestore(&slob_lock, flags);
 303         }
 304         return b;
 305 }
 306
 307 /*
 308  * slob_free: entry point into the slob allocator.
 309  */
 310 static void slob_free(void *block, int size)
 311 {
 312         struct slob_page *sp;
 313         slob_t *prev, *next, *b = (slob_t *)block;
 314         slobidx_t units;
 315         unsigned long flags;
 316
 317         if (!block)
 318                 return;
 319         BUG_ON(!size);
 320
 321         sp = (struct slob_page *)virt_to_page(block);
 322         units = SLOB_UNITS(size);
 323
 324         spin_lock_irqsave(&slob_lock, flags);
 325
 326         if (sp->units + units == SLOB_UNITS(PAGE_SIZE)) {
 327                 /* Go directly to page allocator. Do not pass slob allocator */
 328                 if (slob_page_free(sp))
 329                         clear_slob_page_free(sp);
 330                 clear_slob_page(sp);
 331                 free_slob_page(sp);
 332                 free_page((unsigned long)b);
 333                 goto out;
 334         }
 335
 336         if (!slob_page_free(sp)) {
 337                 /* This slob page is about to become partially free. Easy! */
 338                 sp->units = units;
 339                 sp->free = b;
 340                 set_slob(b, units,
 341                         (void *)((unsigned long)(b +
 342                                         SLOB_UNITS(PAGE_SIZE)) & PAGE_MASK));
 343                 set_slob_page_free(sp);
 344                 goto out;
 345         }
 346
 347         /*
 348          * Otherwise the page is already partially free, so find reinsertion
 349          * point.
 350          */
 351         sp->units += units;
 352
 353         if (b < sp->free) {
 354                 set_slob(b, units, sp->free);
 355                 sp->free = b;
 356         } else {
 357                 prev = sp->free;
 358                 next = slob_next(prev);
 359                 while (b > next) {
 360                         prev = next;
 361                         next = slob_next(prev);
 362                 }
 363
 364                 if (!slob_last(prev) && b + units == next) {
 365                         units += slob_units(next);
 366                         set_slob(b, units, slob_next(next));
 367                 } else
 368                         set_slob(b, units, next);
 369
 370                 if (prev + slob_units(prev) == b) {
 371                         units = slob_units(b) + slob_units(prev);
 372                         set_slob(prev, units, slob_next(b));
 373                 } else
 374                         set_slob(prev, slob_units(prev), b);
 375         }
 376 out:
 377         spin_unlock_irqrestore(&slob_lock, flags);
 378 }
 379
 380 /*
 381  * End of slob allocator proper. Begin kmem_cache_alloc and kmalloc frontend.
 382  */
 383
 384 struct bigblock {
 385         int order;
 386         void *pages;
 387         struct bigblock *next;
 388 };
 389 typedef struct bigblock bigblock_t;
 390
 391 static bigblock_t *bigblocks;
 392
 393 static DEFINE_SPINLOCK(block_lock);
 394
 395
 396 void *__kmalloc(size_t size, gfp_t gfp)
 397 {
 398         slob_t *m;
 399         bigblock_t *bb;
 400         unsigned long flags;
 401
 402         if (size < PAGE_SIZE - SLOB_UNIT) {
 403                 m = slob_alloc(size + SLOB_UNIT, gfp, 0);
 404                 if (m)
 405                         m->units = size;
 406                 return m+1;
 407         }
 408
 409         bb = slob_alloc(sizeof(bigblock_t), gfp, 0);
 410         if (!bb)
 411                 return 0;
 412
 413         bb->order = get_order(size);
 414         bb->pages = (void *)__get_free_pages(gfp, bb->order);
 415
 416         if (bb->pages) {
 417                 spin_lock_irqsave(&block_lock, flags);
 418                 bb->next = bigblocks;
 419                 bigblocks = bb;
 420                 spin_unlock_irqrestore(&block_lock, flags);
 421                 return bb->pages;
 422         }
 423
 424         slob_free(bb, sizeof(bigblock_t));
 425         return 0;
 426 }
 427 EXPORT_SYMBOL(__kmalloc);
 428
 429 /**
 430  * krealloc - reallocate memory. The contents will remain unchanged.
 431  *
 432  * @p: object to reallocate memory for.
 433  * @new_size: how many bytes of memory are required.
 434  * @flags: the type of memory to allocate.
 435  *
 436  * The contents of the object pointed to are preserved up to the
 437  * lesser of the new and old sizes.  If @p is %NULL, krealloc()
 438  * behaves exactly like kmalloc().  If @size is 0 and @p is not a
 439  * %NULL pointer, the object pointed to is freed.
 440  */
 441 void *krealloc(const void *p, size_t new_size, gfp_t flags)
 442 {
 443         void *ret;
 444
 445         if (unlikely(!p))
 446                 return kmalloc_track_caller(new_size, flags);
 447
 448         if (unlikely(!new_size)) {
 449                 kfree(p);
 450                 return NULL;
 451         }
 452
 453         ret = kmalloc_track_caller(new_size, flags);
 454         if (ret) {
 455                 memcpy(ret, p, min(new_size, ksize(p)));
 456                 kfree(p);
 457         }
 458         return ret;
 459 }
 460 EXPORT_SYMBOL(krealloc);
 461
 462 void kfree(const void *block)
 463 {
 464         struct slob_page *sp;
 465         slob_t *m;
 466         bigblock_t *bb, **last = &bigblocks;
 467         unsigned long flags;
 468
 469         if (!block)
 470                 return;
 471
 472         sp = (struct slob_page *)virt_to_page(block);
 473         if (!slob_page(sp)) {
 474                 /* on the big block list */
 475                 spin_lock_irqsave(&block_lock, flags);
 476                 for (bb = bigblocks; bb; last = &bb->next, bb = bb->next) {
 477                         if (bb->pages == block) {
 478                                 *last = bb->next;
 479                                 spin_unlock_irqrestore(&block_lock, flags);
 480                                 free_pages((unsigned long)block, bb->order);
 481                                 slob_free(bb, sizeof(bigblock_t));
 482                                 return;
 483                         }
 484                 }
 485                 spin_unlock_irqrestore(&block_lock, flags);
 486                 WARN_ON(1);
 487                 return;
 488         }
 489
 490         m = (slob_t *)block - 1;
 491         slob_free(m, m->units + SLOB_UNIT);
 492         return;
 493 }
 494
 495 EXPORT_SYMBOL(kfree);
 496
 497 size_t ksize(const void *block)
 498 {
 499         struct slob_page *sp;
 500         bigblock_t *bb;
 501         unsigned long flags;
 502
 503         if (!block)
 504                 return 0;
 505
 506         sp = (struct slob_page *)virt_to_page(block);
 507         if (!slob_page(sp)) {
 508                 spin_lock_irqsave(&block_lock, flags);
 509                 for (bb = bigblocks; bb; bb = bb->next)
 510                         if (bb->pages == block) {
 511                                 spin_unlock_irqrestore(&slob_lock, flags);
 512                                 return PAGE_SIZE << bb->order;
 513                         }
 514                 spin_unlock_irqrestore(&block_lock, flags);
 515         }
 516
 517         return ((slob_t *)block - 1)->units + SLOB_UNIT;
 518 }
 519
 520 struct kmem_cache {
 521         unsigned int size, align;
 522         unsigned long flags;
 523         const char *name;
 524         void (*ctor)(void *, struct kmem_cache *, unsigned long);
 525 };
 526
 527 struct kmem_cache *kmem_cache_create(const char *name, size_t size,
 528         size_t align, unsigned long flags,
 529         void (*ctor)(void*, struct kmem_cache *, unsigned long),
 530         void (*dtor)(void*, struct kmem_cache *, unsigned long))
 531 {
 532         struct kmem_cache *c;
 533
 534         c = slob_alloc(sizeof(struct kmem_cache), flags, 0);
 535
 536         if (c) {
 537                 c->name = name;
 538                 c->size = size;
 539                 if (flags & SLAB_DESTROY_BY_RCU) {
 540                         /* leave room for rcu footer at the end of object */
 541                         c->size += sizeof(struct slob_rcu);
 542                 }
 543                 c->flags = flags;
 544                 c->ctor = ctor;
 545                 /* ignore alignment unless it's forced */
 546                 c->align = (flags & SLAB_HWCACHE_ALIGN) ? SLOB_ALIGN : 0;
 547                 if (c->align < align)
 548                         c->align = align;
 549         } else if (flags & SLAB_PANIC)
 550                 panic("Cannot create slab cache %s\n", name);
 551
 552         return c;
 553 }
 554 EXPORT_SYMBOL(kmem_cache_create);
 555
 556 void kmem_cache_destroy(struct kmem_cache *c)
 557 {
 558         slob_free(c, sizeof(struct kmem_cache));
 559 }
 560 EXPORT_SYMBOL(kmem_cache_destroy);
 561
 562 void *kmem_cache_alloc(struct kmem_cache *c, gfp_t flags)
 563 {
 564         void *b;
 565
 566         if (c->size < PAGE_SIZE)
 567                 b = slob_alloc(c->size, flags, c->align);
 568         else
 569                 b = (void *)__get_free_pages(flags, get_order(c->size));
 570
 571         if (c->ctor)
 572                 c->ctor(b, c, 0);
 573
 574         return b;
 575 }
 576 EXPORT_SYMBOL(kmem_cache_alloc);
 577
 578 void *kmem_cache_zalloc(struct kmem_cache *c, gfp_t flags)
 579 {
 580         void *ret = kmem_cache_alloc(c, flags);
 581         if (ret)
 582                 memset(ret, 0, c->size);
 583
 584         return ret;
 585 }
 586 EXPORT_SYMBOL(kmem_cache_zalloc);
 587
 588 static void __kmem_cache_free(void *b, int size)
 589 {
 590         if (size < PAGE_SIZE)
 591                 slob_free(b, size);
 592         else
 593                 free_pages((unsigned long)b, get_order(size));
 594 }
 595
 596 static void kmem_rcu_free(struct rcu_head *head)
 597 {
 598         struct slob_rcu *slob_rcu = (struct slob_rcu *)head;
 599         void *b = (void *)slob_rcu - (slob_rcu->size - sizeof(struct slob_rcu));
 600
 601         __kmem_cache_free(b, slob_rcu->size);
 602 }
 603
 604 void kmem_cache_free(struct kmem_cache *c, void *b)
 605 {
 606         if (unlikely(c->flags & SLAB_DESTROY_BY_RCU)) {
 607                 struct slob_rcu *slob_rcu;
 608                 slob_rcu = b + (c->size - sizeof(struct slob_rcu));
 609                 INIT_RCU_HEAD(&slob_rcu->head);
 610                 slob_rcu->size = c->size;
 611                 call_rcu(&slob_rcu->head, kmem_rcu_free);
 612         } else {
 613                 __kmem_cache_free(b, c->size);
 614         }
 615 }
 616 EXPORT_SYMBOL(kmem_cache_free);
 617
 618 unsigned int kmem_cache_size(struct kmem_cache *c)
 619 {
 620         return c->size;
 621 }
 622 EXPORT_SYMBOL(kmem_cache_size);
 623
 624 const char *kmem_cache_name(struct kmem_cache *c)
 625 {
 626         return c->name;
 627 }
 628 EXPORT_SYMBOL(kmem_cache_name);
 629
 630 int kmem_cache_shrink(struct kmem_cache *d)
 631 {
 632         return 0;
 633 }
 634 EXPORT_SYMBOL(kmem_cache_shrink);
 635
 636 int kmem_ptr_validate(struct kmem_cache *a, const void *b)
 637 {
 638         return 0;
 639 }
 640
 641 void __init kmem_cache_init(void)
 642 {
 643 }