Add linux-next specific files for 20110831
[linux-2.6/next.git] / drivers / md / persistent-data / dm-block-manager.c
blob43b1235b0cbae0497bae0dcee6bc64b6fafe8fc2
1 /*
2 * Copyright (C) 2011 Red Hat, Inc. All rights reserved.
4 * This file is released under the GPL.
5 */
6 #include "dm-block-manager.h"
7 #include "dm-persistent-data-internal.h"
9 #include <linux/dm-io.h>
10 #include <linux/module.h>
11 #include <linux/slab.h>
12 #include <linux/device-mapper.h>
14 #define DM_MSG_PREFIX "block manager"
16 /*----------------------------------------------------------------*/
18 #define SECTOR_SIZE (1 << SECTOR_SHIFT)
19 #define MAX_CACHE_SIZE 16U
21 enum dm_block_state {
22 BS_EMPTY,
23 BS_CLEAN,
24 BS_READING,
25 BS_WRITING,
26 BS_READ_LOCKED,
27 BS_READ_LOCKED_DIRTY, /* Block was dirty before it was read locked. */
28 BS_WRITE_LOCKED,
29 BS_DIRTY,
30 BS_ERROR
33 struct dm_block {
34 struct list_head list;
35 struct hlist_node hlist;
37 dm_block_t where;
38 struct dm_block_validator *validator;
39 void *data;
40 wait_queue_head_t io_q;
41 unsigned read_lock_count;
42 unsigned write_lock_pending;
43 enum dm_block_state state;
46 * Extra flags like REQ_FLUSH and REQ_FUA can be set here. This is
47 * mainly as to avoid a race condition in flush_and_unlock() where
48 * the newly-unlocked superblock may have been submitted for a
49 * write before the write_all_dirty() call is made.
51 int io_flags;
54 * Sadly we need an up pointer so we can get to the bm on io
55 * completion.
57 struct dm_block_manager *bm;
60 struct dm_block_manager {
61 struct block_device *bdev;
62 unsigned cache_size;
63 unsigned max_held_per_thread;
64 unsigned block_size; /* In bytes */
65 dm_block_t nr_blocks;
68 * This will trigger every time an io completes.
70 wait_queue_head_t io_q;
72 struct dm_io_client *io;
75 * Protects all the lists and the hash table.
77 spinlock_t lock;
79 unsigned error_count;
80 unsigned available_count;
81 unsigned reading_count;
82 unsigned writing_count;
84 struct list_head empty_list; /* No block assigned */
85 struct list_head clean_list; /* Unlocked and clean */
86 struct list_head dirty_list; /* Unlocked and dirty */
87 struct list_head error_list;
89 char buffer_cache_name[32];
90 struct kmem_cache *buffer_cache; /* The buffers that store the raw data */
93 * Hash table of cached blocks, holds everything that isn't in the
94 * BS_EMPTY state.
96 unsigned hash_size;
97 unsigned hash_mask;
99 struct hlist_head buckets[0]; /* Must be last member of struct. */
102 dm_block_t dm_block_location(struct dm_block *b)
104 return b->where;
106 EXPORT_SYMBOL_GPL(dm_block_location);
108 void *dm_block_data(struct dm_block *b)
110 return b->data;
112 EXPORT_SYMBOL_GPL(dm_block_data);
114 /*----------------------------------------------------------------
115 * Hash table
116 *--------------------------------------------------------------*/
117 static struct dm_block *__find_block(struct dm_block_manager *bm, dm_block_t b)
119 unsigned bucket = dm_hash_block(b, bm->hash_mask);
120 struct dm_block *blk;
121 struct hlist_node *n;
123 hlist_for_each_entry(blk, n, bm->buckets + bucket, hlist)
124 if (blk->where == b)
125 return blk;
127 return NULL;
130 static void __insert_block(struct dm_block_manager *bm, struct dm_block *b)
132 unsigned bucket = dm_hash_block(b->where, bm->hash_mask);
134 hlist_add_head(&b->hlist, bm->buckets + bucket);
137 /*----------------------------------------------------------------
138 * Block state:
139 * __transition() handles transition of a block between different states.
140 * Study this to understand the state machine.
142 * Alternatively install graphviz and run:
143 * grep DOT dm-block-manager.c | grep -v ' ' |
144 * sed -e 's/.*DOT: //' -e 's/\*\///' |
145 * dot -Tps -o states.ps
147 * Assumes bm->lock is held.
148 *--------------------------------------------------------------*/
149 static void __transition(struct dm_block *b, enum dm_block_state new_state)
151 /* DOT: digraph BlockStates { */
152 struct dm_block_manager *bm = b->bm;
154 switch (new_state) {
155 case BS_EMPTY:
156 /* DOT: error -> empty */
157 /* DOT: clean -> empty */
158 BUG_ON(!((b->state == BS_ERROR) ||
159 (b->state == BS_CLEAN)));
160 hlist_del(&b->hlist);
161 list_move(&b->list, &bm->empty_list);
162 b->write_lock_pending = 0;
163 b->read_lock_count = 0;
164 b->io_flags = 0;
165 b->validator = NULL;
167 if (b->state == BS_ERROR) {
168 bm->error_count--;
169 bm->available_count++;
171 break;
173 case BS_CLEAN:
174 /* DOT: reading -> clean */
175 /* DOT: writing -> clean */
176 /* DOT: read_locked -> clean */
177 BUG_ON(!((b->state == BS_READING) ||
178 (b->state == BS_WRITING) ||
179 (b->state == BS_READ_LOCKED)));
180 switch (b->state) {
181 case BS_READING:
182 BUG_ON(!bm->reading_count);
183 bm->reading_count--;
184 break;
186 case BS_WRITING:
187 BUG_ON(!bm->writing_count);
188 bm->writing_count--;
189 b->io_flags = 0;
190 break;
192 default:
193 break;
195 list_add_tail(&b->list, &bm->clean_list);
196 bm->available_count++;
197 break;
199 case BS_READING:
200 /* DOT: empty -> reading */
201 BUG_ON(!(b->state == BS_EMPTY));
202 __insert_block(bm, b);
203 list_del(&b->list);
204 bm->available_count--;
205 bm->reading_count++;
206 break;
208 case BS_WRITING:
209 /* DOT: dirty -> writing */
210 BUG_ON(!(b->state == BS_DIRTY));
211 list_del(&b->list);
212 bm->writing_count++;
213 break;
215 case BS_READ_LOCKED:
216 /* DOT: clean -> read_locked */
217 BUG_ON(!(b->state == BS_CLEAN));
218 list_del(&b->list);
219 bm->available_count--;
220 break;
222 case BS_READ_LOCKED_DIRTY:
223 /* DOT: dirty -> read_locked_dirty */
224 BUG_ON(!((b->state == BS_DIRTY)));
225 list_del(&b->list);
226 break;
228 case BS_WRITE_LOCKED:
229 /* DOT: dirty -> write_locked */
230 /* DOT: clean -> write_locked */
231 BUG_ON(!((b->state == BS_DIRTY) ||
232 (b->state == BS_CLEAN)));
233 list_del(&b->list);
235 if (b->state == BS_CLEAN)
236 bm->available_count--;
237 break;
239 case BS_DIRTY:
240 /* DOT: write_locked -> dirty */
241 /* DOT: read_locked_dirty -> dirty */
242 BUG_ON(!((b->state == BS_WRITE_LOCKED) ||
243 (b->state == BS_READ_LOCKED_DIRTY)));
244 list_add_tail(&b->list, &bm->dirty_list);
245 break;
247 case BS_ERROR:
248 /* DOT: writing -> error */
249 /* DOT: reading -> error */
250 BUG_ON(!((b->state == BS_WRITING) ||
251 (b->state == BS_READING)));
252 bm->error_count++;
253 list_add_tail(&b->list, &bm->error_list);
254 break;
257 b->state = new_state;
258 /* DOT: } */
261 /*----------------------------------------------------------------
262 * Low-level io.
263 *--------------------------------------------------------------*/
264 typedef void (completion_fn)(unsigned long error, struct dm_block *b);
266 static void submit_io(struct dm_block *b, int rw,
267 completion_fn fn)
269 struct dm_block_manager *bm = b->bm;
270 struct dm_io_request req;
271 struct dm_io_region region;
272 unsigned sectors_per_block = bm->block_size >> SECTOR_SHIFT;
274 region.bdev = bm->bdev;
275 region.sector = b->where * sectors_per_block;
276 region.count = sectors_per_block;
278 req.bi_rw = rw;
279 req.mem.type = DM_IO_KMEM;
280 req.mem.offset = 0;
281 req.mem.ptr.addr = b->data;
282 req.notify.fn = (void (*)(unsigned long, void *)) fn;
283 req.notify.context = b;
284 req.client = bm->io;
286 if (dm_io(&req, 1, &region, NULL) < 0)
287 fn(1, b);
290 /*----------------------------------------------------------------
291 * High-level io.
292 *--------------------------------------------------------------*/
293 static void __complete_io(unsigned long error, struct dm_block *b)
295 struct dm_block_manager *bm = b->bm;
297 if (error) {
298 DMERR("io error = %lu, block = %llu",
299 error , (unsigned long long)b->where);
300 __transition(b, BS_ERROR);
301 } else
302 __transition(b, BS_CLEAN);
304 wake_up(&b->io_q);
305 wake_up(&bm->io_q);
308 static void complete_io(unsigned long error, struct dm_block *b)
310 struct dm_block_manager *bm = b->bm;
311 unsigned long flags;
313 spin_lock_irqsave(&bm->lock, flags);
314 __complete_io(error, b);
315 spin_unlock_irqrestore(&bm->lock, flags);
318 static void read_block(struct dm_block *b)
320 submit_io(b, READ, complete_io);
323 static void write_block(struct dm_block *b)
325 if (b->validator)
326 b->validator->prepare_for_write(b->validator, b,
327 b->bm->block_size);
329 submit_io(b, WRITE | b->io_flags, complete_io);
332 static void write_dirty(struct dm_block_manager *bm, unsigned count)
334 struct dm_block *b, *tmp;
335 struct list_head dirty;
336 unsigned long flags;
339 * Grab the first @count entries from the dirty list
341 INIT_LIST_HEAD(&dirty);
342 spin_lock_irqsave(&bm->lock, flags);
343 list_for_each_entry_safe(b, tmp, &bm->dirty_list, list) {
344 if (!count--)
345 break;
346 __transition(b, BS_WRITING);
347 list_add_tail(&b->list, &dirty);
349 spin_unlock_irqrestore(&bm->lock, flags);
351 list_for_each_entry_safe(b, tmp, &dirty, list) {
352 list_del(&b->list);
353 write_block(b);
357 static void write_all_dirty(struct dm_block_manager *bm)
359 write_dirty(bm, bm->cache_size);
362 static void __clear_errors(struct dm_block_manager *bm)
364 struct dm_block *b, *tmp;
365 list_for_each_entry_safe(b, tmp, &bm->error_list, list)
366 __transition(b, BS_EMPTY);
369 /*----------------------------------------------------------------
370 * Waiting
371 *--------------------------------------------------------------*/
372 #ifdef __CHECKER__
373 # define __retains(x) __attribute__((context(x, 1, 1)))
374 #else
375 # define __retains(x)
376 #endif
378 #define __wait_block(wq, lock, flags, sched_fn, condition) \
379 do { \
380 DEFINE_WAIT(wait); \
381 add_wait_queue(wq, &wait); \
383 for (;;) { \
384 prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE); \
385 if (condition) \
386 break; \
388 spin_unlock_irqrestore(lock, flags); \
389 sched_fn(); \
390 spin_lock_irqsave(lock, flags); \
393 finish_wait(wq, &wait); \
394 } while (0)
396 static void __wait_io(struct dm_block *b, unsigned long *flags)
397 __retains(&b->bm->lock)
399 __wait_block(&b->io_q, &b->bm->lock, *flags, io_schedule,
400 ((b->state != BS_READING) && (b->state != BS_WRITING)));
403 static void __wait_unlocked(struct dm_block *b, unsigned long *flags)
404 __retains(&b->bm->lock)
406 __wait_block(&b->io_q, &b->bm->lock, *flags, schedule,
407 ((b->state == BS_CLEAN) || (b->state == BS_DIRTY)));
410 static void __wait_read_lockable(struct dm_block *b, unsigned long *flags)
411 __retains(&b->bm->lock)
413 __wait_block(&b->io_q, &b->bm->lock, *flags, schedule,
414 (!b->write_lock_pending && (b->state == BS_CLEAN ||
415 b->state == BS_DIRTY ||
416 b->state == BS_READ_LOCKED)));
419 static void __wait_all_writes(struct dm_block_manager *bm, unsigned long *flags)
420 __retains(&bm->lock)
422 __wait_block(&bm->io_q, &bm->lock, *flags, io_schedule,
423 !bm->writing_count);
426 static void __wait_all_io(struct dm_block_manager *bm, unsigned long *flags)
427 __retains(&bm->lock)
429 __wait_block(&bm->io_q, &bm->lock, *flags, io_schedule,
430 !bm->writing_count && !bm->reading_count);
433 static void __wait_clean(struct dm_block_manager *bm, unsigned long *flags)
434 __retains(&bm->lock)
436 __wait_block(&bm->io_q, &bm->lock, *flags, io_schedule,
437 (!list_empty(&bm->clean_list) ||
438 (!bm->writing_count)));
441 /*----------------------------------------------------------------
442 * Finding a free block to recycle
443 *--------------------------------------------------------------*/
444 static int __recycle_block(struct dm_block_manager *bm, dm_block_t where,
445 int need_read, struct dm_block_validator *v,
446 unsigned long flags,
447 struct dm_block **result)
448 __retains(&bm->lock)
450 int r = 0;
451 struct dm_block *b;
452 unsigned long available;
455 * Wait for a block to appear on the empty or clean lists.
457 retry:
458 while (1) {
460 * The calling thread may hold some locks on blocks, and
461 * the rest be errored. In which case we're never going to
462 * succeed here.
464 if (bm->error_count == bm->cache_size - bm->max_held_per_thread)
465 return -ENOMEM;
468 * Once we can lock and do io concurrently then we should
469 * probably flush at bm->cache_size / 2 and write _all_
470 * dirty blocks.
472 available = bm->available_count + bm->writing_count;
473 if (available < bm->cache_size / 4) {
474 spin_unlock_irqrestore(&bm->lock, flags);
475 write_dirty(bm, bm->cache_size / 4);
476 spin_lock_irqsave(&bm->lock, flags);
479 if (!list_empty(&bm->empty_list)) {
480 b = list_first_entry(&bm->empty_list, struct dm_block, list);
481 break;
483 } else if (!list_empty(&bm->clean_list)) {
484 b = list_first_entry(&bm->clean_list, struct dm_block, list);
485 __transition(b, BS_EMPTY);
486 break;
489 __wait_clean(bm, &flags);
492 b->where = where;
493 __transition(b, BS_READING);
495 if (!need_read) {
496 memset(b->data, 0, bm->block_size);
497 b->validator = v;
498 __transition(b, BS_CLEAN);
499 } else {
500 spin_unlock_irqrestore(&bm->lock, flags);
501 read_block(b);
502 spin_lock_irqsave(&bm->lock, flags);
503 __wait_io(b, &flags);
506 * Has b been recycled whilst we were unlocked?
508 if (b->where != where)
509 goto retry;
512 * Did the io succeed?
514 if (b->state == BS_ERROR) {
516 * Since this is a read that has failed we can clear the error
517 * immediately. Failed writes are revealed during a commit.
519 __transition(b, BS_EMPTY);
520 r = -EIO;
521 } else {
523 * We set the validator late, since there's a
524 * window while we're waiting for the read where
525 * someone could have set a different one.
527 b->validator = v;
528 if (b->validator) {
529 r = b->validator->check(b->validator, b, bm->block_size);
530 if (r) {
531 DMERR("%s validator check failed for block %llu",
532 b->validator->name, (unsigned long long)b->where);
533 __transition(b, BS_EMPTY);
539 if (!r)
540 *result = b;
542 return r;
545 /*----------------------------------------------------------------
546 * Low level block management
547 *--------------------------------------------------------------*/
549 static struct kmem_cache *dm_block_cache; /* struct dm_block */
551 static struct dm_block *alloc_block(struct dm_block_manager *bm)
553 struct dm_block *b = kmem_cache_alloc(dm_block_cache, GFP_KERNEL);
555 if (!b)
556 return NULL;
558 INIT_LIST_HEAD(&b->list);
559 INIT_HLIST_NODE(&b->hlist);
561 b->data = kmem_cache_alloc(bm->buffer_cache, GFP_KERNEL);
562 if (!b->data) {
563 kmem_cache_free(dm_block_cache, b);
564 return NULL;
567 b->validator = NULL;
568 b->state = BS_EMPTY;
569 init_waitqueue_head(&b->io_q);
570 b->read_lock_count = 0;
571 b->write_lock_pending = 0;
572 b->io_flags = 0;
573 b->bm = bm;
575 return b;
578 static void free_block(struct dm_block *b)
580 kmem_cache_free(b->bm->buffer_cache, b->data);
581 kmem_cache_free(dm_block_cache, b);
584 static int populate_bm(struct dm_block_manager *bm, unsigned count)
586 int i;
587 LIST_HEAD(bs);
589 for (i = 0; i < count; i++) {
590 struct dm_block *b = alloc_block(bm);
591 if (!b) {
592 struct dm_block *tmp;
593 list_for_each_entry_safe(b, tmp, &bs, list)
594 free_block(b);
595 return -ENOMEM;
598 list_add(&b->list, &bs);
601 list_replace(&bs, &bm->empty_list);
602 bm->available_count = count;
604 return 0;
607 /*----------------------------------------------------------------
608 * Public interface
609 *--------------------------------------------------------------*/
610 static unsigned calc_hash_size(unsigned cache_size)
612 unsigned r = 32; /* Minimum size is 16 */
614 while (r < cache_size)
615 r <<= 1;
617 return r >> 1;
620 struct dm_block_manager *dm_block_manager_create(struct block_device *bdev,
621 unsigned block_size,
622 unsigned cache_size,
623 unsigned max_held_per_thread)
625 unsigned i;
626 unsigned hash_size = calc_hash_size(cache_size);
627 size_t len = sizeof(struct dm_block_manager) +
628 sizeof(struct hlist_head) * hash_size;
629 struct dm_block_manager *bm;
631 bm = kmalloc(len, GFP_KERNEL);
632 if (!bm)
633 return NULL;
635 bm->bdev = bdev;
636 bm->cache_size = max(MAX_CACHE_SIZE, cache_size);
637 bm->max_held_per_thread = max_held_per_thread;
638 bm->block_size = block_size;
639 bm->nr_blocks = i_size_read(bdev->bd_inode);
640 do_div(bm->nr_blocks, block_size);
641 init_waitqueue_head(&bm->io_q);
642 spin_lock_init(&bm->lock);
644 INIT_LIST_HEAD(&bm->empty_list);
645 INIT_LIST_HEAD(&bm->clean_list);
646 INIT_LIST_HEAD(&bm->dirty_list);
647 INIT_LIST_HEAD(&bm->error_list);
648 bm->error_count = 0;
649 bm->available_count = 0;
650 bm->reading_count = 0;
651 bm->writing_count = 0;
653 sprintf(bm->buffer_cache_name, "dm_block_buffer-%d-%d",
654 MAJOR(disk_devt(bdev->bd_disk)),
655 MINOR(disk_devt(bdev->bd_disk)));
657 bm->buffer_cache = kmem_cache_create(bm->buffer_cache_name,
658 block_size, SECTOR_SIZE,
659 0, NULL);
660 if (!bm->buffer_cache)
661 goto bad_free_bm;
663 bm->hash_size = hash_size;
664 bm->hash_mask = hash_size - 1;
665 for (i = 0; i < hash_size; i++)
666 INIT_HLIST_HEAD(bm->buckets + i);
668 bm->io = dm_io_client_create();
669 if (!bm->io)
670 goto bad_free_buffer_cache;
672 if (populate_bm(bm, cache_size) < 0)
673 goto bad_free_io_client;
675 return bm;
677 bad_free_io_client:
678 dm_io_client_destroy(bm->io);
679 bad_free_buffer_cache:
680 kmem_cache_destroy(bm->buffer_cache);
681 bad_free_bm:
682 kfree(bm);
684 return NULL;
686 EXPORT_SYMBOL_GPL(dm_block_manager_create);
688 void dm_block_manager_destroy(struct dm_block_manager *bm)
690 int i;
691 struct dm_block *b, *btmp;
692 struct hlist_node *n, *tmp;
694 dm_io_client_destroy(bm->io);
696 for (i = 0; i < bm->hash_size; i++)
697 hlist_for_each_entry_safe(b, n, tmp, bm->buckets + i, hlist)
698 free_block(b);
700 list_for_each_entry_safe(b, btmp, &bm->empty_list, list)
701 free_block(b);
703 kmem_cache_destroy(bm->buffer_cache);
705 kfree(bm);
707 EXPORT_SYMBOL_GPL(dm_block_manager_destroy);
709 unsigned dm_bm_block_size(struct dm_block_manager *bm)
711 return bm->block_size;
713 EXPORT_SYMBOL_GPL(dm_bm_block_size);
715 dm_block_t dm_bm_nr_blocks(struct dm_block_manager *bm)
717 return bm->nr_blocks;
720 static int lock_internal(struct dm_block_manager *bm, dm_block_t block,
721 int how, int need_read, int can_block,
722 struct dm_block_validator *v,
723 struct dm_block **result)
725 int r = 0;
726 struct dm_block *b;
727 unsigned long flags;
729 spin_lock_irqsave(&bm->lock, flags);
730 retry:
731 b = __find_block(bm, block);
732 if (b) {
734 * The block may be in state BS_READING at this point.
735 * Which means we're racing for this block against another
736 * locking op. This is fine, __wait_read_lockable() below
737 * will do the right thing. We do need to be careful
738 * however that the validator isn't set until the lock is
739 * full granted, otherwise the other thread could get the
740 * lock, but this one's validator be used. This situation
741 * only arises if there's a programming error in the code
742 * driving bm.
745 switch (how) {
746 case READ:
747 if (b->write_lock_pending || (b->state != BS_CLEAN &&
748 b->state != BS_DIRTY &&
749 b->state != BS_READ_LOCKED)) {
750 if (!can_block) {
751 spin_unlock_irqrestore(&bm->lock, flags);
752 return -EWOULDBLOCK;
755 __wait_read_lockable(b, &flags);
757 if (b->where != block)
758 goto retry;
760 break;
762 case WRITE:
763 while (b->state != BS_CLEAN && b->state != BS_DIRTY) {
764 if (!can_block) {
765 spin_unlock_irqrestore(&bm->lock, flags);
766 return -EWOULDBLOCK;
769 b->write_lock_pending++;
770 __wait_unlocked(b, &flags);
771 if (b->where != block)
773 * Recycled blocks have their
774 * write_lock_pending count reset
775 * to zero, so no need to undo the
776 * above increment.
778 goto retry;
779 b->write_lock_pending--;
781 break;
784 if (!need_read)
785 b->validator = v;
786 else {
787 if (b->validator && (v != b->validator)) {
788 DMERR("validator mismatch (old=%s vs new=%s) for block %llu",
789 b->validator->name, v ? v->name : "NULL",
790 (unsigned long long)b->where);
791 spin_unlock_irqrestore(&bm->lock, flags);
792 return -EINVAL;
795 if (!b->validator && v) {
796 b->validator = v;
797 r = b->validator->check(b->validator, b, bm->block_size);
798 if (r) {
799 DMERR("%s validator check failed for block %llu",
800 b->validator->name,
801 (unsigned long long)b->where);
802 spin_unlock_irqrestore(&bm->lock, flags);
803 return r;
808 } else if (!can_block) {
809 r = -EWOULDBLOCK;
810 goto out;
812 } else
813 r = __recycle_block(bm, block, need_read, v, flags, &b);
815 if (!r) {
816 switch (how) {
817 case READ:
818 b->read_lock_count++;
820 if (b->state == BS_DIRTY)
821 __transition(b, BS_READ_LOCKED_DIRTY);
822 else if (b->state == BS_CLEAN)
823 __transition(b, BS_READ_LOCKED);
824 break;
826 case WRITE:
827 __transition(b, BS_WRITE_LOCKED);
828 break;
831 *result = b;
834 out:
835 spin_unlock_irqrestore(&bm->lock, flags);
837 return r;
840 int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b,
841 struct dm_block_validator *v,
842 struct dm_block **result)
844 return lock_internal(bm, b, READ, 1, 1, v, result);
846 EXPORT_SYMBOL_GPL(dm_bm_read_lock);
848 int dm_bm_write_lock(struct dm_block_manager *bm,
849 dm_block_t b, struct dm_block_validator *v,
850 struct dm_block **result)
852 return lock_internal(bm, b, WRITE, 1, 1, v, result);
854 EXPORT_SYMBOL_GPL(dm_bm_write_lock);
856 int dm_bm_read_try_lock(struct dm_block_manager *bm,
857 dm_block_t b, struct dm_block_validator *v,
858 struct dm_block **result)
860 return lock_internal(bm, b, READ, 1, 0, v, result);
863 int dm_bm_write_lock_zero(struct dm_block_manager *bm,
864 dm_block_t b, struct dm_block_validator *v,
865 struct dm_block **result)
867 int r = lock_internal(bm, b, WRITE, 0, 1, v, result);
869 if (!r)
870 memset((*result)->data, 0, bm->block_size);
872 return r;
875 int dm_bm_unlock(struct dm_block *b)
877 int r = 0;
878 unsigned long flags;
880 spin_lock_irqsave(&b->bm->lock, flags);
881 switch (b->state) {
882 case BS_WRITE_LOCKED:
883 __transition(b, BS_DIRTY);
884 wake_up(&b->io_q);
885 break;
887 case BS_READ_LOCKED:
888 if (!--b->read_lock_count) {
889 __transition(b, BS_CLEAN);
890 wake_up(&b->io_q);
892 break;
894 case BS_READ_LOCKED_DIRTY:
895 if (!--b->read_lock_count) {
896 __transition(b, BS_DIRTY);
897 wake_up(&b->io_q);
899 break;
901 default:
902 DMERR("block = %llu not locked",
903 (unsigned long long)b->where);
904 r = -EINVAL;
905 break;
907 spin_unlock_irqrestore(&b->bm->lock, flags);
909 return r;
911 EXPORT_SYMBOL_GPL(dm_bm_unlock);
913 static int __wait_flush(struct dm_block_manager *bm)
915 int r = 0;
916 unsigned long flags;
918 spin_lock_irqsave(&bm->lock, flags);
919 __wait_all_writes(bm, &flags);
921 if (!list_empty(&bm->error_list)) {
922 r = -EIO;
923 __clear_errors(bm);
925 spin_unlock_irqrestore(&bm->lock, flags);
927 return r;
930 int dm_bm_flush_and_unlock(struct dm_block_manager *bm,
931 struct dm_block *superblock)
933 int r;
934 unsigned long flags;
936 write_all_dirty(bm);
937 r = __wait_flush(bm);
938 if (r)
939 return r;
941 spin_lock_irqsave(&bm->lock, flags);
942 superblock->io_flags = REQ_FUA | REQ_FLUSH;
943 spin_unlock_irqrestore(&bm->lock, flags);
945 dm_bm_unlock(superblock);
946 write_all_dirty(bm);
948 return __wait_flush(bm);
951 int dm_bm_rebind_block_device(struct dm_block_manager *bm,
952 struct block_device *bdev)
954 unsigned long flags;
955 dm_block_t nr_blocks = i_size_read(bdev->bd_inode);
957 do_div(nr_blocks, bm->block_size);
959 spin_lock_irqsave(&bm->lock, flags);
960 if (nr_blocks < bm->nr_blocks) {
961 spin_unlock_irqrestore(&bm->lock, flags);
962 return -EINVAL;
965 bm->bdev = bdev;
966 bm->nr_blocks = nr_blocks;
969 * Wait for any in-flight io that may be using the old bdev
971 __wait_all_io(bm, &flags);
972 spin_unlock_irqrestore(&bm->lock, flags);
974 return 0;
976 EXPORT_SYMBOL_GPL(dm_bm_rebind_block_device);
978 /*----------------------------------------------------------------*/
980 static int __init init_persistent_data(void)
982 dm_block_cache = KMEM_CACHE(dm_block, SLAB_HWCACHE_ALIGN);
983 if (!dm_block_cache)
984 return -ENOMEM;
986 return 0;
989 static void __exit exit_persistent_data(void)
991 kmem_cache_destroy(dm_block_cache);
994 MODULE_LICENSE("GPL");
995 MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
996 MODULE_DESCRIPTION("Immutable metadata library for dm");
997 module_init(init_persistent_data);
998 module_exit(exit_persistent_data);