gro: Allow tunnel stacking in the case of FOU/GUE
[linux/fpc-iii.git] / fs / mbcache.c
blob187477ded6b334c8be0196949cf0e7f40c16b40d
1 /*
2 * linux/fs/mbcache.c
3 * (C) 2001-2002 Andreas Gruenbacher, <a.gruenbacher@computer.org>
4 */
6 /*
7 * Filesystem Meta Information Block Cache (mbcache)
9 * The mbcache caches blocks of block devices that need to be located
10 * by their device/block number, as well as by other criteria (such
11 * as the block's contents).
13 * There can only be one cache entry in a cache per device and block number.
14 * Additional indexes need not be unique in this sense. The number of
15 * additional indexes (=other criteria) can be hardwired at compile time
16 * or specified at cache create time.
18 * Each cache entry is of fixed size. An entry may be `valid' or `invalid'
19 * in the cache. A valid entry is in the main hash tables of the cache,
20 * and may also be in the lru list. An invalid entry is not in any hashes
21 * or lists.
23 * A valid cache entry is only in the lru list if no handles refer to it.
24 * Invalid cache entries will be freed when the last handle to the cache
25 * entry is released. Entries that cannot be freed immediately are put
26 * back on the lru list.
30 * Lock descriptions and usage:
32 * Each hash chain of both the block and index hash tables now contains
33 * a built-in lock used to serialize accesses to the hash chain.
35 * Accesses to global data structures mb_cache_list and mb_cache_lru_list
36 * are serialized via the global spinlock mb_cache_spinlock.
38 * Each mb_cache_entry contains a spinlock, e_entry_lock, to serialize
39 * accesses to its local data, such as e_used and e_queued.
41 * Lock ordering:
43 * Each block hash chain's lock has the highest lock order, followed by an
44 * index hash chain's lock, mb_cache_bg_lock (used to implement mb_cache_entry's
45 * lock), and mb_cach_spinlock, with the lowest order. While holding
46 * either a block or index hash chain lock, a thread can acquire an
47 * mc_cache_bg_lock, which in turn can also acquire mb_cache_spinlock.
49 * Synchronization:
51 * Since both mb_cache_entry_get and mb_cache_entry_find scan the block and
52 * index hash chian, it needs to lock the corresponding hash chain. For each
53 * mb_cache_entry within the chain, it needs to lock the mb_cache_entry to
54 * prevent either any simultaneous release or free on the entry and also
55 * to serialize accesses to either the e_used or e_queued member of the entry.
57 * To avoid having a dangling reference to an already freed
58 * mb_cache_entry, an mb_cache_entry is only freed when it is not on a
59 * block hash chain and also no longer being referenced, both e_used,
60 * and e_queued are 0's. When an mb_cache_entry is explicitly freed it is
61 * first removed from a block hash chain.
64 #include <linux/kernel.h>
65 #include <linux/module.h>
67 #include <linux/hash.h>
68 #include <linux/fs.h>
69 #include <linux/mm.h>
70 #include <linux/slab.h>
71 #include <linux/sched.h>
72 #include <linux/list_bl.h>
73 #include <linux/mbcache.h>
74 #include <linux/init.h>
75 #include <linux/blockgroup_lock.h>
76 #include <linux/log2.h>
78 #ifdef MB_CACHE_DEBUG
79 # define mb_debug(f...) do { \
80 printk(KERN_DEBUG f); \
81 printk("\n"); \
82 } while (0)
83 #define mb_assert(c) do { if (!(c)) \
84 printk(KERN_ERR "assertion " #c " failed\n"); \
85 } while(0)
86 #else
87 # define mb_debug(f...) do { } while(0)
88 # define mb_assert(c) do { } while(0)
89 #endif
90 #define mb_error(f...) do { \
91 printk(KERN_ERR f); \
92 printk("\n"); \
93 } while(0)
95 #define MB_CACHE_WRITER ((unsigned short)~0U >> 1)
97 #define MB_CACHE_ENTRY_LOCK_BITS ilog2(NR_BG_LOCKS)
98 #define MB_CACHE_ENTRY_LOCK_INDEX(ce) \
99 (hash_long((unsigned long)ce, MB_CACHE_ENTRY_LOCK_BITS))
101 static DECLARE_WAIT_QUEUE_HEAD(mb_cache_queue);
102 static struct blockgroup_lock *mb_cache_bg_lock;
103 static struct kmem_cache *mb_cache_kmem_cache;
105 MODULE_AUTHOR("Andreas Gruenbacher <a.gruenbacher@computer.org>");
106 MODULE_DESCRIPTION("Meta block cache (for extended attributes)");
107 MODULE_LICENSE("GPL");
109 EXPORT_SYMBOL(mb_cache_create);
110 EXPORT_SYMBOL(mb_cache_shrink);
111 EXPORT_SYMBOL(mb_cache_destroy);
112 EXPORT_SYMBOL(mb_cache_entry_alloc);
113 EXPORT_SYMBOL(mb_cache_entry_insert);
114 EXPORT_SYMBOL(mb_cache_entry_release);
115 EXPORT_SYMBOL(mb_cache_entry_free);
116 EXPORT_SYMBOL(mb_cache_entry_get);
117 #if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0)
118 EXPORT_SYMBOL(mb_cache_entry_find_first);
119 EXPORT_SYMBOL(mb_cache_entry_find_next);
120 #endif
123 * Global data: list of all mbcache's, lru list, and a spinlock for
124 * accessing cache data structures on SMP machines. The lru list is
125 * global across all mbcaches.
128 static LIST_HEAD(mb_cache_list);
129 static LIST_HEAD(mb_cache_lru_list);
130 static DEFINE_SPINLOCK(mb_cache_spinlock);
132 static inline void
133 __spin_lock_mb_cache_entry(struct mb_cache_entry *ce)
135 spin_lock(bgl_lock_ptr(mb_cache_bg_lock,
136 MB_CACHE_ENTRY_LOCK_INDEX(ce)));
139 static inline void
140 __spin_unlock_mb_cache_entry(struct mb_cache_entry *ce)
142 spin_unlock(bgl_lock_ptr(mb_cache_bg_lock,
143 MB_CACHE_ENTRY_LOCK_INDEX(ce)));
146 static inline int
147 __mb_cache_entry_is_block_hashed(struct mb_cache_entry *ce)
149 return !hlist_bl_unhashed(&ce->e_block_list);
153 static inline void
154 __mb_cache_entry_unhash_block(struct mb_cache_entry *ce)
156 if (__mb_cache_entry_is_block_hashed(ce))
157 hlist_bl_del_init(&ce->e_block_list);
160 static inline int
161 __mb_cache_entry_is_index_hashed(struct mb_cache_entry *ce)
163 return !hlist_bl_unhashed(&ce->e_index.o_list);
166 static inline void
167 __mb_cache_entry_unhash_index(struct mb_cache_entry *ce)
169 if (__mb_cache_entry_is_index_hashed(ce))
170 hlist_bl_del_init(&ce->e_index.o_list);
174 * __mb_cache_entry_unhash_unlock()
176 * This function is called to unhash both the block and index hash
177 * chain.
178 * It assumes both the block and index hash chain is locked upon entry.
179 * It also unlock both hash chains both exit
181 static inline void
182 __mb_cache_entry_unhash_unlock(struct mb_cache_entry *ce)
184 __mb_cache_entry_unhash_index(ce);
185 hlist_bl_unlock(ce->e_index_hash_p);
186 __mb_cache_entry_unhash_block(ce);
187 hlist_bl_unlock(ce->e_block_hash_p);
190 static void
191 __mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask)
193 struct mb_cache *cache = ce->e_cache;
195 mb_assert(!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt)));
196 kmem_cache_free(cache->c_entry_cache, ce);
197 atomic_dec(&cache->c_entry_count);
200 static void
201 __mb_cache_entry_release(struct mb_cache_entry *ce)
203 /* First lock the entry to serialize access to its local data. */
204 __spin_lock_mb_cache_entry(ce);
205 /* Wake up all processes queuing for this cache entry. */
206 if (ce->e_queued)
207 wake_up_all(&mb_cache_queue);
208 if (ce->e_used >= MB_CACHE_WRITER)
209 ce->e_used -= MB_CACHE_WRITER;
211 * Make sure that all cache entries on lru_list have
212 * both e_used and e_qued of 0s.
214 ce->e_used--;
215 if (!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))) {
216 if (!__mb_cache_entry_is_block_hashed(ce)) {
217 __spin_unlock_mb_cache_entry(ce);
218 goto forget;
221 * Need access to lru list, first drop entry lock,
222 * then reacquire the lock in the proper order.
224 spin_lock(&mb_cache_spinlock);
225 if (list_empty(&ce->e_lru_list))
226 list_add_tail(&ce->e_lru_list, &mb_cache_lru_list);
227 spin_unlock(&mb_cache_spinlock);
229 __spin_unlock_mb_cache_entry(ce);
230 return;
231 forget:
232 mb_assert(list_empty(&ce->e_lru_list));
233 __mb_cache_entry_forget(ce, GFP_KERNEL);
237 * mb_cache_shrink_scan() memory pressure callback
239 * This function is called by the kernel memory management when memory
240 * gets low.
242 * @shrink: (ignored)
243 * @sc: shrink_control passed from reclaim
245 * Returns the number of objects freed.
247 static unsigned long
248 mb_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
250 LIST_HEAD(free_list);
251 struct mb_cache_entry *entry, *tmp;
252 int nr_to_scan = sc->nr_to_scan;
253 gfp_t gfp_mask = sc->gfp_mask;
254 unsigned long freed = 0;
256 mb_debug("trying to free %d entries", nr_to_scan);
257 spin_lock(&mb_cache_spinlock);
258 while ((nr_to_scan-- > 0) && !list_empty(&mb_cache_lru_list)) {
259 struct mb_cache_entry *ce =
260 list_entry(mb_cache_lru_list.next,
261 struct mb_cache_entry, e_lru_list);
262 list_del_init(&ce->e_lru_list);
263 if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))
264 continue;
265 spin_unlock(&mb_cache_spinlock);
266 /* Prevent any find or get operation on the entry */
267 hlist_bl_lock(ce->e_block_hash_p);
268 hlist_bl_lock(ce->e_index_hash_p);
269 /* Ignore if it is touched by a find/get */
270 if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt) ||
271 !list_empty(&ce->e_lru_list)) {
272 hlist_bl_unlock(ce->e_index_hash_p);
273 hlist_bl_unlock(ce->e_block_hash_p);
274 spin_lock(&mb_cache_spinlock);
275 continue;
277 __mb_cache_entry_unhash_unlock(ce);
278 list_add_tail(&ce->e_lru_list, &free_list);
279 spin_lock(&mb_cache_spinlock);
281 spin_unlock(&mb_cache_spinlock);
283 list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) {
284 __mb_cache_entry_forget(entry, gfp_mask);
285 freed++;
287 return freed;
290 static unsigned long
291 mb_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
293 struct mb_cache *cache;
294 unsigned long count = 0;
296 spin_lock(&mb_cache_spinlock);
297 list_for_each_entry(cache, &mb_cache_list, c_cache_list) {
298 mb_debug("cache %s (%d)", cache->c_name,
299 atomic_read(&cache->c_entry_count));
300 count += atomic_read(&cache->c_entry_count);
302 spin_unlock(&mb_cache_spinlock);
304 return vfs_pressure_ratio(count);
307 static struct shrinker mb_cache_shrinker = {
308 .count_objects = mb_cache_shrink_count,
309 .scan_objects = mb_cache_shrink_scan,
310 .seeks = DEFAULT_SEEKS,
314 * mb_cache_create() create a new cache
316 * All entries in one cache are equal size. Cache entries may be from
317 * multiple devices. If this is the first mbcache created, registers
318 * the cache with kernel memory management. Returns NULL if no more
319 * memory was available.
321 * @name: name of the cache (informal)
322 * @bucket_bits: log2(number of hash buckets)
324 struct mb_cache *
325 mb_cache_create(const char *name, int bucket_bits)
327 int n, bucket_count = 1 << bucket_bits;
328 struct mb_cache *cache = NULL;
330 if (!mb_cache_bg_lock) {
331 mb_cache_bg_lock = kmalloc(sizeof(struct blockgroup_lock),
332 GFP_KERNEL);
333 if (!mb_cache_bg_lock)
334 return NULL;
335 bgl_lock_init(mb_cache_bg_lock);
338 cache = kmalloc(sizeof(struct mb_cache), GFP_KERNEL);
339 if (!cache)
340 return NULL;
341 cache->c_name = name;
342 atomic_set(&cache->c_entry_count, 0);
343 cache->c_bucket_bits = bucket_bits;
344 cache->c_block_hash = kmalloc(bucket_count *
345 sizeof(struct hlist_bl_head), GFP_KERNEL);
346 if (!cache->c_block_hash)
347 goto fail;
348 for (n=0; n<bucket_count; n++)
349 INIT_HLIST_BL_HEAD(&cache->c_block_hash[n]);
350 cache->c_index_hash = kmalloc(bucket_count *
351 sizeof(struct hlist_bl_head), GFP_KERNEL);
352 if (!cache->c_index_hash)
353 goto fail;
354 for (n=0; n<bucket_count; n++)
355 INIT_HLIST_BL_HEAD(&cache->c_index_hash[n]);
356 if (!mb_cache_kmem_cache) {
357 mb_cache_kmem_cache = kmem_cache_create(name,
358 sizeof(struct mb_cache_entry), 0,
359 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
360 if (!mb_cache_kmem_cache)
361 goto fail2;
363 cache->c_entry_cache = mb_cache_kmem_cache;
366 * Set an upper limit on the number of cache entries so that the hash
367 * chains won't grow too long.
369 cache->c_max_entries = bucket_count << 4;
371 spin_lock(&mb_cache_spinlock);
372 list_add(&cache->c_cache_list, &mb_cache_list);
373 spin_unlock(&mb_cache_spinlock);
374 return cache;
376 fail2:
377 kfree(cache->c_index_hash);
379 fail:
380 kfree(cache->c_block_hash);
381 kfree(cache);
382 return NULL;
387 * mb_cache_shrink()
389 * Removes all cache entries of a device from the cache. All cache entries
390 * currently in use cannot be freed, and thus remain in the cache. All others
391 * are freed.
393 * @bdev: which device's cache entries to shrink
395 void
396 mb_cache_shrink(struct block_device *bdev)
398 LIST_HEAD(free_list);
399 struct list_head *l;
400 struct mb_cache_entry *ce, *tmp;
402 l = &mb_cache_lru_list;
403 spin_lock(&mb_cache_spinlock);
404 while (!list_is_last(l, &mb_cache_lru_list)) {
405 l = l->next;
406 ce = list_entry(l, struct mb_cache_entry, e_lru_list);
407 if (ce->e_bdev == bdev) {
408 list_del_init(&ce->e_lru_list);
409 if (ce->e_used || ce->e_queued ||
410 atomic_read(&ce->e_refcnt))
411 continue;
412 spin_unlock(&mb_cache_spinlock);
414 * Prevent any find or get operation on the entry.
416 hlist_bl_lock(ce->e_block_hash_p);
417 hlist_bl_lock(ce->e_index_hash_p);
418 /* Ignore if it is touched by a find/get */
419 if (ce->e_used || ce->e_queued ||
420 atomic_read(&ce->e_refcnt) ||
421 !list_empty(&ce->e_lru_list)) {
422 hlist_bl_unlock(ce->e_index_hash_p);
423 hlist_bl_unlock(ce->e_block_hash_p);
424 l = &mb_cache_lru_list;
425 spin_lock(&mb_cache_spinlock);
426 continue;
428 __mb_cache_entry_unhash_unlock(ce);
429 mb_assert(!(ce->e_used || ce->e_queued ||
430 atomic_read(&ce->e_refcnt)));
431 list_add_tail(&ce->e_lru_list, &free_list);
432 l = &mb_cache_lru_list;
433 spin_lock(&mb_cache_spinlock);
436 spin_unlock(&mb_cache_spinlock);
438 list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) {
439 __mb_cache_entry_forget(ce, GFP_KERNEL);
445 * mb_cache_destroy()
447 * Shrinks the cache to its minimum possible size (hopefully 0 entries),
448 * and then destroys it. If this was the last mbcache, un-registers the
449 * mbcache from kernel memory management.
451 void
452 mb_cache_destroy(struct mb_cache *cache)
454 LIST_HEAD(free_list);
455 struct mb_cache_entry *ce, *tmp;
457 spin_lock(&mb_cache_spinlock);
458 list_for_each_entry_safe(ce, tmp, &mb_cache_lru_list, e_lru_list) {
459 if (ce->e_cache == cache)
460 list_move_tail(&ce->e_lru_list, &free_list);
462 list_del(&cache->c_cache_list);
463 spin_unlock(&mb_cache_spinlock);
465 list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) {
466 list_del_init(&ce->e_lru_list);
468 * Prevent any find or get operation on the entry.
470 hlist_bl_lock(ce->e_block_hash_p);
471 hlist_bl_lock(ce->e_index_hash_p);
472 mb_assert(!(ce->e_used || ce->e_queued ||
473 atomic_read(&ce->e_refcnt)));
474 __mb_cache_entry_unhash_unlock(ce);
475 __mb_cache_entry_forget(ce, GFP_KERNEL);
478 if (atomic_read(&cache->c_entry_count) > 0) {
479 mb_error("cache %s: %d orphaned entries",
480 cache->c_name,
481 atomic_read(&cache->c_entry_count));
484 if (list_empty(&mb_cache_list)) {
485 kmem_cache_destroy(mb_cache_kmem_cache);
486 mb_cache_kmem_cache = NULL;
488 kfree(cache->c_index_hash);
489 kfree(cache->c_block_hash);
490 kfree(cache);
494 * mb_cache_entry_alloc()
496 * Allocates a new cache entry. The new entry will not be valid initially,
497 * and thus cannot be looked up yet. It should be filled with data, and
498 * then inserted into the cache using mb_cache_entry_insert(). Returns NULL
499 * if no more memory was available.
501 struct mb_cache_entry *
502 mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags)
504 struct mb_cache_entry *ce;
506 if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) {
507 struct list_head *l;
509 l = &mb_cache_lru_list;
510 spin_lock(&mb_cache_spinlock);
511 while (!list_is_last(l, &mb_cache_lru_list)) {
512 l = l->next;
513 ce = list_entry(l, struct mb_cache_entry, e_lru_list);
514 if (ce->e_cache == cache) {
515 list_del_init(&ce->e_lru_list);
516 if (ce->e_used || ce->e_queued ||
517 atomic_read(&ce->e_refcnt))
518 continue;
519 spin_unlock(&mb_cache_spinlock);
521 * Prevent any find or get operation on the
522 * entry.
524 hlist_bl_lock(ce->e_block_hash_p);
525 hlist_bl_lock(ce->e_index_hash_p);
526 /* Ignore if it is touched by a find/get */
527 if (ce->e_used || ce->e_queued ||
528 atomic_read(&ce->e_refcnt) ||
529 !list_empty(&ce->e_lru_list)) {
530 hlist_bl_unlock(ce->e_index_hash_p);
531 hlist_bl_unlock(ce->e_block_hash_p);
532 l = &mb_cache_lru_list;
533 spin_lock(&mb_cache_spinlock);
534 continue;
536 mb_assert(list_empty(&ce->e_lru_list));
537 mb_assert(!(ce->e_used || ce->e_queued ||
538 atomic_read(&ce->e_refcnt)));
539 __mb_cache_entry_unhash_unlock(ce);
540 goto found;
543 spin_unlock(&mb_cache_spinlock);
546 ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags);
547 if (!ce)
548 return NULL;
549 atomic_inc(&cache->c_entry_count);
550 INIT_LIST_HEAD(&ce->e_lru_list);
551 INIT_HLIST_BL_NODE(&ce->e_block_list);
552 INIT_HLIST_BL_NODE(&ce->e_index.o_list);
553 ce->e_cache = cache;
554 ce->e_queued = 0;
555 atomic_set(&ce->e_refcnt, 0);
556 found:
557 ce->e_block_hash_p = &cache->c_block_hash[0];
558 ce->e_index_hash_p = &cache->c_index_hash[0];
559 ce->e_used = 1 + MB_CACHE_WRITER;
560 return ce;
565 * mb_cache_entry_insert()
567 * Inserts an entry that was allocated using mb_cache_entry_alloc() into
568 * the cache. After this, the cache entry can be looked up, but is not yet
569 * in the lru list as the caller still holds a handle to it. Returns 0 on
570 * success, or -EBUSY if a cache entry for that device + inode exists
571 * already (this may happen after a failed lookup, but when another process
572 * has inserted the same cache entry in the meantime).
574 * @bdev: device the cache entry belongs to
575 * @block: block number
576 * @key: lookup key
579 mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev,
580 sector_t block, unsigned int key)
582 struct mb_cache *cache = ce->e_cache;
583 unsigned int bucket;
584 struct hlist_bl_node *l;
585 struct hlist_bl_head *block_hash_p;
586 struct hlist_bl_head *index_hash_p;
587 struct mb_cache_entry *lce;
589 mb_assert(ce);
590 bucket = hash_long((unsigned long)bdev + (block & 0xffffffff),
591 cache->c_bucket_bits);
592 block_hash_p = &cache->c_block_hash[bucket];
593 hlist_bl_lock(block_hash_p);
594 hlist_bl_for_each_entry(lce, l, block_hash_p, e_block_list) {
595 if (lce->e_bdev == bdev && lce->e_block == block) {
596 hlist_bl_unlock(block_hash_p);
597 return -EBUSY;
600 mb_assert(!__mb_cache_entry_is_block_hashed(ce));
601 __mb_cache_entry_unhash_block(ce);
602 __mb_cache_entry_unhash_index(ce);
603 ce->e_bdev = bdev;
604 ce->e_block = block;
605 ce->e_block_hash_p = block_hash_p;
606 ce->e_index.o_key = key;
607 hlist_bl_add_head(&ce->e_block_list, block_hash_p);
608 hlist_bl_unlock(block_hash_p);
609 bucket = hash_long(key, cache->c_bucket_bits);
610 index_hash_p = &cache->c_index_hash[bucket];
611 hlist_bl_lock(index_hash_p);
612 ce->e_index_hash_p = index_hash_p;
613 hlist_bl_add_head(&ce->e_index.o_list, index_hash_p);
614 hlist_bl_unlock(index_hash_p);
615 return 0;
620 * mb_cache_entry_release()
622 * Release a handle to a cache entry. When the last handle to a cache entry
623 * is released it is either freed (if it is invalid) or otherwise inserted
624 * in to the lru list.
626 void
627 mb_cache_entry_release(struct mb_cache_entry *ce)
629 __mb_cache_entry_release(ce);
634 * mb_cache_entry_free()
637 void
638 mb_cache_entry_free(struct mb_cache_entry *ce)
640 mb_assert(ce);
641 mb_assert(list_empty(&ce->e_lru_list));
642 hlist_bl_lock(ce->e_index_hash_p);
643 __mb_cache_entry_unhash_index(ce);
644 hlist_bl_unlock(ce->e_index_hash_p);
645 hlist_bl_lock(ce->e_block_hash_p);
646 __mb_cache_entry_unhash_block(ce);
647 hlist_bl_unlock(ce->e_block_hash_p);
648 __mb_cache_entry_release(ce);
653 * mb_cache_entry_get()
655 * Get a cache entry by device / block number. (There can only be one entry
656 * in the cache per device and block.) Returns NULL if no such cache entry
657 * exists. The returned cache entry is locked for exclusive access ("single
658 * writer").
660 struct mb_cache_entry *
661 mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev,
662 sector_t block)
664 unsigned int bucket;
665 struct hlist_bl_node *l;
666 struct mb_cache_entry *ce;
667 struct hlist_bl_head *block_hash_p;
669 bucket = hash_long((unsigned long)bdev + (block & 0xffffffff),
670 cache->c_bucket_bits);
671 block_hash_p = &cache->c_block_hash[bucket];
672 /* First serialize access to the block corresponding hash chain. */
673 hlist_bl_lock(block_hash_p);
674 hlist_bl_for_each_entry(ce, l, block_hash_p, e_block_list) {
675 mb_assert(ce->e_block_hash_p == block_hash_p);
676 if (ce->e_bdev == bdev && ce->e_block == block) {
678 * Prevent a free from removing the entry.
680 atomic_inc(&ce->e_refcnt);
681 hlist_bl_unlock(block_hash_p);
682 __spin_lock_mb_cache_entry(ce);
683 atomic_dec(&ce->e_refcnt);
684 if (ce->e_used > 0) {
685 DEFINE_WAIT(wait);
686 while (ce->e_used > 0) {
687 ce->e_queued++;
688 prepare_to_wait(&mb_cache_queue, &wait,
689 TASK_UNINTERRUPTIBLE);
690 __spin_unlock_mb_cache_entry(ce);
691 schedule();
692 __spin_lock_mb_cache_entry(ce);
693 ce->e_queued--;
695 finish_wait(&mb_cache_queue, &wait);
697 ce->e_used += 1 + MB_CACHE_WRITER;
698 __spin_unlock_mb_cache_entry(ce);
700 if (!list_empty(&ce->e_lru_list)) {
701 spin_lock(&mb_cache_spinlock);
702 list_del_init(&ce->e_lru_list);
703 spin_unlock(&mb_cache_spinlock);
705 if (!__mb_cache_entry_is_block_hashed(ce)) {
706 __mb_cache_entry_release(ce);
707 return NULL;
709 return ce;
712 hlist_bl_unlock(block_hash_p);
713 return NULL;
716 #if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0)
718 static struct mb_cache_entry *
719 __mb_cache_entry_find(struct hlist_bl_node *l, struct hlist_bl_head *head,
720 struct block_device *bdev, unsigned int key)
723 /* The index hash chain is alredy acquire by caller. */
724 while (l != NULL) {
725 struct mb_cache_entry *ce =
726 hlist_bl_entry(l, struct mb_cache_entry,
727 e_index.o_list);
728 mb_assert(ce->e_index_hash_p == head);
729 if (ce->e_bdev == bdev && ce->e_index.o_key == key) {
731 * Prevent a free from removing the entry.
733 atomic_inc(&ce->e_refcnt);
734 hlist_bl_unlock(head);
735 __spin_lock_mb_cache_entry(ce);
736 atomic_dec(&ce->e_refcnt);
737 ce->e_used++;
738 /* Incrementing before holding the lock gives readers
739 priority over writers. */
740 if (ce->e_used >= MB_CACHE_WRITER) {
741 DEFINE_WAIT(wait);
743 while (ce->e_used >= MB_CACHE_WRITER) {
744 ce->e_queued++;
745 prepare_to_wait(&mb_cache_queue, &wait,
746 TASK_UNINTERRUPTIBLE);
747 __spin_unlock_mb_cache_entry(ce);
748 schedule();
749 __spin_lock_mb_cache_entry(ce);
750 ce->e_queued--;
752 finish_wait(&mb_cache_queue, &wait);
754 __spin_unlock_mb_cache_entry(ce);
755 if (!list_empty(&ce->e_lru_list)) {
756 spin_lock(&mb_cache_spinlock);
757 list_del_init(&ce->e_lru_list);
758 spin_unlock(&mb_cache_spinlock);
760 if (!__mb_cache_entry_is_block_hashed(ce)) {
761 __mb_cache_entry_release(ce);
762 return ERR_PTR(-EAGAIN);
764 return ce;
766 l = l->next;
768 hlist_bl_unlock(head);
769 return NULL;
774 * mb_cache_entry_find_first()
776 * Find the first cache entry on a given device with a certain key in
777 * an additional index. Additional matches can be found with
778 * mb_cache_entry_find_next(). Returns NULL if no match was found. The
779 * returned cache entry is locked for shared access ("multiple readers").
781 * @cache: the cache to search
782 * @bdev: the device the cache entry should belong to
783 * @key: the key in the index
785 struct mb_cache_entry *
786 mb_cache_entry_find_first(struct mb_cache *cache, struct block_device *bdev,
787 unsigned int key)
789 unsigned int bucket = hash_long(key, cache->c_bucket_bits);
790 struct hlist_bl_node *l;
791 struct mb_cache_entry *ce = NULL;
792 struct hlist_bl_head *index_hash_p;
794 index_hash_p = &cache->c_index_hash[bucket];
795 hlist_bl_lock(index_hash_p);
796 if (!hlist_bl_empty(index_hash_p)) {
797 l = hlist_bl_first(index_hash_p);
798 ce = __mb_cache_entry_find(l, index_hash_p, bdev, key);
799 } else
800 hlist_bl_unlock(index_hash_p);
801 return ce;
806 * mb_cache_entry_find_next()
808 * Find the next cache entry on a given device with a certain key in an
809 * additional index. Returns NULL if no match could be found. The previous
810 * entry is atomatically released, so that mb_cache_entry_find_next() can
811 * be called like this:
813 * entry = mb_cache_entry_find_first();
814 * while (entry) {
815 * ...
816 * entry = mb_cache_entry_find_next(entry, ...);
819 * @prev: The previous match
820 * @bdev: the device the cache entry should belong to
821 * @key: the key in the index
823 struct mb_cache_entry *
824 mb_cache_entry_find_next(struct mb_cache_entry *prev,
825 struct block_device *bdev, unsigned int key)
827 struct mb_cache *cache = prev->e_cache;
828 unsigned int bucket = hash_long(key, cache->c_bucket_bits);
829 struct hlist_bl_node *l;
830 struct mb_cache_entry *ce;
831 struct hlist_bl_head *index_hash_p;
833 index_hash_p = &cache->c_index_hash[bucket];
834 mb_assert(prev->e_index_hash_p == index_hash_p);
835 hlist_bl_lock(index_hash_p);
836 mb_assert(!hlist_bl_empty(index_hash_p));
837 l = prev->e_index.o_list.next;
838 ce = __mb_cache_entry_find(l, index_hash_p, bdev, key);
839 __mb_cache_entry_release(prev);
840 return ce;
843 #endif /* !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) */
845 static int __init init_mbcache(void)
847 register_shrinker(&mb_cache_shrinker);
848 return 0;
851 static void __exit exit_mbcache(void)
853 unregister_shrinker(&mb_cache_shrinker);
856 module_init(init_mbcache)
857 module_exit(exit_mbcache)