writeback: split writeback_inodes_wb
[linux-2.6/next.git] / drivers / staging / ramzswap / xvmalloc.c
blob3fdbb8ada827e5db53571fef2fff695ff734ba6e
1 /*
2 * xvmalloc memory allocator
4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
6 * This code is released using a dual license strategy: BSD/GPL
7 * You can choose the licence that better fits your requirements.
9 * Released under the terms of 3-clause BSD License
10 * Released under the terms of GNU General Public License Version 2.0
13 #include <linux/bitops.h>
14 #include <linux/errno.h>
15 #include <linux/highmem.h>
16 #include <linux/init.h>
17 #include <linux/string.h>
18 #include <linux/slab.h>
20 #include "xvmalloc.h"
21 #include "xvmalloc_int.h"
23 static void stat_inc(u64 *value)
25 *value = *value + 1;
28 static void stat_dec(u64 *value)
30 *value = *value - 1;
33 static int test_flag(struct block_header *block, enum blockflags flag)
35 return block->prev & BIT(flag);
38 static void set_flag(struct block_header *block, enum blockflags flag)
40 block->prev |= BIT(flag);
43 static void clear_flag(struct block_header *block, enum blockflags flag)
45 block->prev &= ~BIT(flag);
49 * Given <page, offset> pair, provide a derefrencable pointer.
50 * This is called from xv_malloc/xv_free path, so it
51 * needs to be fast.
53 static void *get_ptr_atomic(struct page *page, u16 offset, enum km_type type)
55 unsigned char *base;
57 base = kmap_atomic(page, type);
58 return base + offset;
61 static void put_ptr_atomic(void *ptr, enum km_type type)
63 kunmap_atomic(ptr, type);
66 static u32 get_blockprev(struct block_header *block)
68 return block->prev & PREV_MASK;
71 static void set_blockprev(struct block_header *block, u16 new_offset)
73 block->prev = new_offset | (block->prev & FLAGS_MASK);
76 static struct block_header *BLOCK_NEXT(struct block_header *block)
78 return (struct block_header *)
79 ((char *)block + block->size + XV_ALIGN);
83 * Get index of free list containing blocks of maximum size
84 * which is less than or equal to given size.
86 static u32 get_index_for_insert(u32 size)
88 if (unlikely(size > XV_MAX_ALLOC_SIZE))
89 size = XV_MAX_ALLOC_SIZE;
90 size &= ~FL_DELTA_MASK;
91 return (size - XV_MIN_ALLOC_SIZE) >> FL_DELTA_SHIFT;
95 * Get index of free list having blocks of size greater than
96 * or equal to requested size.
98 static u32 get_index(u32 size)
100 if (unlikely(size < XV_MIN_ALLOC_SIZE))
101 size = XV_MIN_ALLOC_SIZE;
102 size = ALIGN(size, FL_DELTA);
103 return (size - XV_MIN_ALLOC_SIZE) >> FL_DELTA_SHIFT;
107 * find_block - find block of at least given size
108 * @pool: memory pool to search from
109 * @size: size of block required
110 * @page: page containing required block
111 * @offset: offset within the page where block is located.
113 * Searches two level bitmap to locate block of at least
114 * the given size. If such a block is found, it provides
115 * <page, offset> to identify this block and returns index
116 * in freelist where we found this block.
117 * Otherwise, returns 0 and <page, offset> params are not touched.
119 static u32 find_block(struct xv_pool *pool, u32 size,
120 struct page **page, u32 *offset)
122 ulong flbitmap, slbitmap;
123 u32 flindex, slindex, slbitstart;
125 /* There are no free blocks in this pool */
126 if (!pool->flbitmap)
127 return 0;
129 /* Get freelist index correspoding to this size */
130 slindex = get_index(size);
131 slbitmap = pool->slbitmap[slindex / BITS_PER_LONG];
132 slbitstart = slindex % BITS_PER_LONG;
135 * If freelist is not empty at this index, we found the
136 * block - head of this list. This is approximate best-fit match.
138 if (test_bit(slbitstart, &slbitmap)) {
139 *page = pool->freelist[slindex].page;
140 *offset = pool->freelist[slindex].offset;
141 return slindex;
145 * No best-fit found. Search a bit further in bitmap for a free block.
146 * Second level bitmap consists of series of 32-bit chunks. Search
147 * further in the chunk where we expected a best-fit, starting from
148 * index location found above.
150 slbitstart++;
151 slbitmap >>= slbitstart;
153 /* Skip this search if we were already at end of this bitmap chunk */
154 if ((slbitstart != BITS_PER_LONG) && slbitmap) {
155 slindex += __ffs(slbitmap) + 1;
156 *page = pool->freelist[slindex].page;
157 *offset = pool->freelist[slindex].offset;
158 return slindex;
161 /* Now do a full two-level bitmap search to find next nearest fit */
162 flindex = slindex / BITS_PER_LONG;
164 flbitmap = (pool->flbitmap) >> (flindex + 1);
165 if (!flbitmap)
166 return 0;
168 flindex += __ffs(flbitmap) + 1;
169 slbitmap = pool->slbitmap[flindex];
170 slindex = (flindex * BITS_PER_LONG) + __ffs(slbitmap);
171 *page = pool->freelist[slindex].page;
172 *offset = pool->freelist[slindex].offset;
174 return slindex;
178 * Insert block at <page, offset> in freelist of given pool.
179 * freelist used depends on block size.
181 static void insert_block(struct xv_pool *pool, struct page *page, u32 offset,
182 struct block_header *block)
184 u32 flindex, slindex;
185 struct block_header *nextblock;
187 slindex = get_index_for_insert(block->size);
188 flindex = slindex / BITS_PER_LONG;
190 block->link.prev_page = 0;
191 block->link.prev_offset = 0;
192 block->link.next_page = pool->freelist[slindex].page;
193 block->link.next_offset = pool->freelist[slindex].offset;
194 pool->freelist[slindex].page = page;
195 pool->freelist[slindex].offset = offset;
197 if (block->link.next_page) {
198 nextblock = get_ptr_atomic(block->link.next_page,
199 block->link.next_offset, KM_USER1);
200 nextblock->link.prev_page = page;
201 nextblock->link.prev_offset = offset;
202 put_ptr_atomic(nextblock, KM_USER1);
205 __set_bit(slindex % BITS_PER_LONG, &pool->slbitmap[flindex]);
206 __set_bit(flindex, &pool->flbitmap);
210 * Remove block from head of freelist. Index 'slindex' identifies the freelist.
212 static void remove_block_head(struct xv_pool *pool,
213 struct block_header *block, u32 slindex)
215 struct block_header *tmpblock;
216 u32 flindex = slindex / BITS_PER_LONG;
218 pool->freelist[slindex].page = block->link.next_page;
219 pool->freelist[slindex].offset = block->link.next_offset;
220 block->link.prev_page = 0;
221 block->link.prev_offset = 0;
223 if (!pool->freelist[slindex].page) {
224 __clear_bit(slindex % BITS_PER_LONG, &pool->slbitmap[flindex]);
225 if (!pool->slbitmap[flindex])
226 __clear_bit(flindex, &pool->flbitmap);
227 } else {
229 * DEBUG ONLY: We need not reinitialize freelist head previous
230 * pointer to 0 - we never depend on its value. But just for
231 * sanity, lets do it.
233 tmpblock = get_ptr_atomic(pool->freelist[slindex].page,
234 pool->freelist[slindex].offset, KM_USER1);
235 tmpblock->link.prev_page = 0;
236 tmpblock->link.prev_offset = 0;
237 put_ptr_atomic(tmpblock, KM_USER1);
242 * Remove block from freelist. Index 'slindex' identifies the freelist.
244 static void remove_block(struct xv_pool *pool, struct page *page, u32 offset,
245 struct block_header *block, u32 slindex)
247 u32 flindex;
248 struct block_header *tmpblock;
250 if (pool->freelist[slindex].page == page
251 && pool->freelist[slindex].offset == offset) {
252 remove_block_head(pool, block, slindex);
253 return;
256 flindex = slindex / BITS_PER_LONG;
258 if (block->link.prev_page) {
259 tmpblock = get_ptr_atomic(block->link.prev_page,
260 block->link.prev_offset, KM_USER1);
261 tmpblock->link.next_page = block->link.next_page;
262 tmpblock->link.next_offset = block->link.next_offset;
263 put_ptr_atomic(tmpblock, KM_USER1);
266 if (block->link.next_page) {
267 tmpblock = get_ptr_atomic(block->link.next_page,
268 block->link.next_offset, KM_USER1);
269 tmpblock->link.prev_page = block->link.prev_page;
270 tmpblock->link.prev_offset = block->link.prev_offset;
271 put_ptr_atomic(tmpblock, KM_USER1);
276 * Allocate a page and add it to freelist of given pool.
278 static int grow_pool(struct xv_pool *pool, gfp_t flags)
280 struct page *page;
281 struct block_header *block;
283 page = alloc_page(flags);
284 if (unlikely(!page))
285 return -ENOMEM;
287 stat_inc(&pool->total_pages);
289 spin_lock(&pool->lock);
290 block = get_ptr_atomic(page, 0, KM_USER0);
292 block->size = PAGE_SIZE - XV_ALIGN;
293 set_flag(block, BLOCK_FREE);
294 clear_flag(block, PREV_FREE);
295 set_blockprev(block, 0);
297 insert_block(pool, page, 0, block);
299 put_ptr_atomic(block, KM_USER0);
300 spin_unlock(&pool->lock);
302 return 0;
306 * Create a memory pool. Allocates freelist, bitmaps and other
307 * per-pool metadata.
309 struct xv_pool *xv_create_pool(void)
311 u32 ovhd_size;
312 struct xv_pool *pool;
314 ovhd_size = roundup(sizeof(*pool), PAGE_SIZE);
315 pool = kzalloc(ovhd_size, GFP_KERNEL);
316 if (!pool)
317 return NULL;
319 spin_lock_init(&pool->lock);
321 return pool;
324 void xv_destroy_pool(struct xv_pool *pool)
326 kfree(pool);
330 * xv_malloc - Allocate block of given size from pool.
331 * @pool: pool to allocate from
332 * @size: size of block to allocate
333 * @page: page no. that holds the object
334 * @offset: location of object within page
336 * On success, <page, offset> identifies block allocated
337 * and 0 is returned. On failure, <page, offset> is set to
338 * 0 and -ENOMEM is returned.
340 * Allocation requests with size > XV_MAX_ALLOC_SIZE will fail.
342 int xv_malloc(struct xv_pool *pool, u32 size, struct page **page,
343 u32 *offset, gfp_t flags)
345 int error;
346 u32 index, tmpsize, origsize, tmpoffset;
347 struct block_header *block, *tmpblock;
349 *page = NULL;
350 *offset = 0;
351 origsize = size;
353 if (unlikely(!size || size > XV_MAX_ALLOC_SIZE))
354 return -ENOMEM;
356 size = ALIGN(size, XV_ALIGN);
358 spin_lock(&pool->lock);
360 index = find_block(pool, size, page, offset);
362 if (!*page) {
363 spin_unlock(&pool->lock);
364 if (flags & GFP_NOWAIT)
365 return -ENOMEM;
366 error = grow_pool(pool, flags);
367 if (unlikely(error))
368 return error;
370 spin_lock(&pool->lock);
371 index = find_block(pool, size, page, offset);
374 if (!*page) {
375 spin_unlock(&pool->lock);
376 return -ENOMEM;
379 block = get_ptr_atomic(*page, *offset, KM_USER0);
381 remove_block_head(pool, block, index);
383 /* Split the block if required */
384 tmpoffset = *offset + size + XV_ALIGN;
385 tmpsize = block->size - size;
386 tmpblock = (struct block_header *)((char *)block + size + XV_ALIGN);
387 if (tmpsize) {
388 tmpblock->size = tmpsize - XV_ALIGN;
389 set_flag(tmpblock, BLOCK_FREE);
390 clear_flag(tmpblock, PREV_FREE);
392 set_blockprev(tmpblock, *offset);
393 if (tmpblock->size >= XV_MIN_ALLOC_SIZE)
394 insert_block(pool, *page, tmpoffset, tmpblock);
396 if (tmpoffset + XV_ALIGN + tmpblock->size != PAGE_SIZE) {
397 tmpblock = BLOCK_NEXT(tmpblock);
398 set_blockprev(tmpblock, tmpoffset);
400 } else {
401 /* This block is exact fit */
402 if (tmpoffset != PAGE_SIZE)
403 clear_flag(tmpblock, PREV_FREE);
406 block->size = origsize;
407 clear_flag(block, BLOCK_FREE);
409 put_ptr_atomic(block, KM_USER0);
410 spin_unlock(&pool->lock);
412 *offset += XV_ALIGN;
414 return 0;
418 * Free block identified with <page, offset>
420 void xv_free(struct xv_pool *pool, struct page *page, u32 offset)
422 void *page_start;
423 struct block_header *block, *tmpblock;
425 offset -= XV_ALIGN;
427 spin_lock(&pool->lock);
429 page_start = get_ptr_atomic(page, 0, KM_USER0);
430 block = (struct block_header *)((char *)page_start + offset);
432 /* Catch double free bugs */
433 BUG_ON(test_flag(block, BLOCK_FREE));
435 block->size = ALIGN(block->size, XV_ALIGN);
437 tmpblock = BLOCK_NEXT(block);
438 if (offset + block->size + XV_ALIGN == PAGE_SIZE)
439 tmpblock = NULL;
441 /* Merge next block if its free */
442 if (tmpblock && test_flag(tmpblock, BLOCK_FREE)) {
444 * Blocks smaller than XV_MIN_ALLOC_SIZE
445 * are not inserted in any free list.
447 if (tmpblock->size >= XV_MIN_ALLOC_SIZE) {
448 remove_block(pool, page,
449 offset + block->size + XV_ALIGN, tmpblock,
450 get_index_for_insert(tmpblock->size));
452 block->size += tmpblock->size + XV_ALIGN;
455 /* Merge previous block if its free */
456 if (test_flag(block, PREV_FREE)) {
457 tmpblock = (struct block_header *)((char *)(page_start) +
458 get_blockprev(block));
459 offset = offset - tmpblock->size - XV_ALIGN;
461 if (tmpblock->size >= XV_MIN_ALLOC_SIZE)
462 remove_block(pool, page, offset, tmpblock,
463 get_index_for_insert(tmpblock->size));
465 tmpblock->size += block->size + XV_ALIGN;
466 block = tmpblock;
469 /* No used objects in this page. Free it. */
470 if (block->size == PAGE_SIZE - XV_ALIGN) {
471 put_ptr_atomic(page_start, KM_USER0);
472 spin_unlock(&pool->lock);
474 __free_page(page);
475 stat_dec(&pool->total_pages);
476 return;
479 set_flag(block, BLOCK_FREE);
480 if (block->size >= XV_MIN_ALLOC_SIZE)
481 insert_block(pool, page, offset, block);
483 if (offset + block->size + XV_ALIGN != PAGE_SIZE) {
484 tmpblock = BLOCK_NEXT(block);
485 set_flag(tmpblock, PREV_FREE);
486 set_blockprev(tmpblock, offset);
489 put_ptr_atomic(page_start, KM_USER0);
490 spin_unlock(&pool->lock);
493 u32 xv_get_object_size(void *obj)
495 struct block_header *blk;
497 blk = (struct block_header *)((char *)(obj) - XV_ALIGN);
498 return blk->size;
502 * Returns total memory used by allocator (userdata + metadata)
504 u64 xv_get_total_size_bytes(struct xv_pool *pool)
506 return pool->total_pages << PAGE_SHIFT;