Add linux-next specific files for 20110801
[linux-2.6/next.git] / drivers / md / persistent-data / dm-block-manager.c
blob7b8e84c484bb186791eef4604c7e38803b88bc70
1 /*
2 * Copyright (C) 2011 Red Hat, Inc. All rights reserved.
4 * This file is released under the GPL.
5 */
6 #include "dm-block-manager.h"
7 #include "dm-persistent-data-internal.h"
9 #include <linux/dm-io.h>
10 #include <linux/slab.h>
11 #include <linux/device-mapper.h>
12 #include <linux/export.h>
14 #define DM_MSG_PREFIX "block manager"
16 /*----------------------------------------------------------------*/
18 #define SECTOR_SIZE (1 << SECTOR_SHIFT)
19 #define MAX_CACHE_SIZE 16U
21 enum dm_block_state {
22 BS_EMPTY,
23 BS_CLEAN,
24 BS_READING,
25 BS_WRITING,
26 BS_READ_LOCKED,
27 BS_READ_LOCKED_DIRTY, /* Block was dirty before it was read locked. */
28 BS_WRITE_LOCKED,
29 BS_DIRTY,
30 BS_ERROR
33 struct dm_block {
34 struct list_head list;
35 struct hlist_node hlist;
37 dm_block_t where;
38 struct dm_block_validator *validator;
39 void *data;
40 wait_queue_head_t io_q;
41 unsigned read_lock_count;
42 unsigned write_lock_pending;
43 enum dm_block_state state;
46 * Extra flags like REQ_FLUSH and REQ_FUA can be set here. This is
47 * mainly as to avoid a race condition in flush_and_unlock() where
48 * the newly-unlocked superblock may have been submitted for a
49 * write before the write_all_dirty() call is made.
51 int io_flags;
54 * Sadly we need an up pointer so we can get to the bm on io
55 * completion.
57 struct dm_block_manager *bm;
60 struct dm_block_manager {
61 struct block_device *bdev;
62 unsigned cache_size; /* In bytes */
63 unsigned block_size; /* In bytes */
64 dm_block_t nr_blocks;
67 * This will trigger every time an io completes.
69 wait_queue_head_t io_q;
71 struct dm_io_client *io;
74 * Protects all the lists and the hash table.
76 spinlock_t lock;
78 struct list_head empty_list; /* No block assigned */
79 struct list_head clean_list; /* Unlocked and clean */
80 struct list_head dirty_list; /* Unlocked and dirty */
81 struct list_head error_list;
83 unsigned available_count;
84 unsigned reading_count;
85 unsigned writing_count;
87 struct kmem_cache *block_cache; /* struct dm_block */
88 struct kmem_cache *buffer_cache; /* The buffers that store the raw data */
91 * Hash table of cached blocks, holds everything that isn't in the
92 * BS_EMPTY state.
94 unsigned hash_size;
95 unsigned hash_mask;
97 struct hlist_head buckets[0]; /* Must be last member of struct. */
100 dm_block_t dm_block_location(struct dm_block *b)
102 return b->where;
104 EXPORT_SYMBOL_GPL(dm_block_location);
106 void *dm_block_data(struct dm_block *b)
108 return b->data;
110 EXPORT_SYMBOL_GPL(dm_block_data);
112 /*----------------------------------------------------------------
113 * Hash table
114 *--------------------------------------------------------------*/
115 static struct dm_block *__find_block(struct dm_block_manager *bm, dm_block_t b)
117 unsigned bucket = dm_hash_block(b, bm->hash_mask);
118 struct dm_block *blk;
119 struct hlist_node *n;
121 hlist_for_each_entry(blk, n, bm->buckets + bucket, hlist)
122 if (blk->where == b)
123 return blk;
125 return NULL;
128 static void __insert_block(struct dm_block_manager *bm, struct dm_block *b)
130 unsigned bucket = dm_hash_block(b->where, bm->hash_mask);
132 hlist_add_head(&b->hlist, bm->buckets + bucket);
135 /*----------------------------------------------------------------
136 * Block state:
137 * __transition() handles transition of a block between different states.
138 * Study this to understand the state machine.
140 * Alternatively install graphviz and run:
141 * grep DOT dm-block-manager.c | grep -v ' ' |
142 * sed -e 's/.*DOT: //' -e 's/\*\///' |
143 * dot -Tps -o states.ps
145 * Assumes bm->lock is held.
146 *--------------------------------------------------------------*/
147 static void __transition(struct dm_block *b, enum dm_block_state new_state)
149 /* DOT: digraph BlockStates { */
150 struct dm_block_manager *bm = b->bm;
152 switch (new_state) {
153 case BS_EMPTY:
154 /* DOT: error -> empty */
155 /* DOT: clean -> empty */
156 BUG_ON(!((b->state == BS_ERROR) ||
157 (b->state == BS_CLEAN)));
158 hlist_del(&b->hlist);
159 list_move(&b->list, &bm->empty_list);
160 b->write_lock_pending = 0;
161 b->read_lock_count = 0;
162 b->io_flags = 0;
163 b->validator = NULL;
165 if (b->state == BS_ERROR)
166 bm->available_count++;
167 break;
169 case BS_CLEAN:
170 /* DOT: reading -> clean */
171 /* DOT: writing -> clean */
172 /* DOT: read_locked -> clean */
173 BUG_ON(!((b->state == BS_READING) ||
174 (b->state == BS_WRITING) ||
175 (b->state == BS_READ_LOCKED)));
176 switch (b->state) {
177 case BS_READING:
178 BUG_ON(!bm->reading_count);
179 bm->reading_count--;
180 break;
182 case BS_WRITING:
183 BUG_ON(!bm->writing_count);
184 bm->writing_count--;
185 b->io_flags = 0;
186 break;
188 default:
189 break;
191 list_add_tail(&b->list, &bm->clean_list);
192 bm->available_count++;
193 break;
195 case BS_READING:
196 /* DOT: empty -> reading */
197 BUG_ON(!(b->state == BS_EMPTY));
198 __insert_block(bm, b);
199 list_del(&b->list);
200 bm->available_count--;
201 bm->reading_count++;
202 break;
204 case BS_WRITING:
205 /* DOT: dirty -> writing */
206 BUG_ON(!(b->state == BS_DIRTY));
207 list_del(&b->list);
208 bm->writing_count++;
209 break;
211 case BS_READ_LOCKED:
212 /* DOT: clean -> read_locked */
213 BUG_ON(!(b->state == BS_CLEAN));
214 list_del(&b->list);
215 bm->available_count--;
216 break;
218 case BS_READ_LOCKED_DIRTY:
219 /* DOT: dirty -> read_locked_dirty */
220 BUG_ON(!((b->state == BS_DIRTY)));
221 list_del(&b->list);
222 break;
224 case BS_WRITE_LOCKED:
225 /* DOT: dirty -> write_locked */
226 /* DOT: clean -> write_locked */
227 BUG_ON(!((b->state == BS_DIRTY) ||
228 (b->state == BS_CLEAN)));
229 list_del(&b->list);
231 if (b->state == BS_CLEAN)
232 bm->available_count--;
233 break;
235 case BS_DIRTY:
236 /* DOT: write_locked -> dirty */
237 /* DOT: read_locked_dirty -> dirty */
238 BUG_ON(!((b->state == BS_WRITE_LOCKED) ||
239 (b->state == BS_READ_LOCKED_DIRTY)));
240 list_add_tail(&b->list, &bm->dirty_list);
241 break;
243 case BS_ERROR:
244 /* DOT: writing -> error */
245 /* DOT: reading -> error */
246 BUG_ON(!((b->state == BS_WRITING) ||
247 (b->state == BS_READING)));
248 list_add_tail(&b->list, &bm->error_list);
249 break;
252 b->state = new_state;
253 /* DOT: } */
256 /*----------------------------------------------------------------
257 * Low-level io.
258 *--------------------------------------------------------------*/
259 typedef void (completion_fn)(unsigned long error, struct dm_block *b);
261 static void submit_io(struct dm_block *b, int rw,
262 completion_fn fn)
264 struct dm_block_manager *bm = b->bm;
265 struct dm_io_request req;
266 struct dm_io_region region;
267 unsigned sectors_per_block = bm->block_size >> SECTOR_SHIFT;
269 region.bdev = bm->bdev;
270 region.sector = b->where * sectors_per_block;
271 region.count = sectors_per_block;
273 req.bi_rw = rw;
274 req.mem.type = DM_IO_KMEM;
275 req.mem.offset = 0;
276 req.mem.ptr.addr = b->data;
277 req.notify.fn = (void (*)(unsigned long, void *)) fn;
278 req.notify.context = b;
279 req.client = bm->io;
281 if (dm_io(&req, 1, &region, NULL) < 0)
282 fn(1, b);
285 /*----------------------------------------------------------------
286 * High-level io.
287 *--------------------------------------------------------------*/
288 static void __complete_io(unsigned long error, struct dm_block *b)
290 struct dm_block_manager *bm = b->bm;
292 if (error) {
293 DMERR("io error = %lu, block = %llu",
294 error , (unsigned long long)b->where);
295 __transition(b, BS_ERROR);
296 } else
297 __transition(b, BS_CLEAN);
299 wake_up(&b->io_q);
300 wake_up(&bm->io_q);
303 static void complete_io(unsigned long error, struct dm_block *b)
305 struct dm_block_manager *bm = b->bm;
306 unsigned long flags;
308 spin_lock_irqsave(&bm->lock, flags);
309 __complete_io(error, b);
310 spin_unlock_irqrestore(&bm->lock, flags);
313 static void read_block(struct dm_block *b)
315 submit_io(b, READ, complete_io);
318 static void write_block(struct dm_block *b)
320 if (b->validator)
321 b->validator->prepare_for_write(b->validator, b,
322 b->bm->block_size);
324 submit_io(b, WRITE | b->io_flags, complete_io);
327 static void write_dirty(struct dm_block_manager *bm, unsigned count)
329 struct dm_block *b, *tmp;
330 struct list_head dirty;
331 unsigned long flags;
334 * Grab the first @count entries from the dirty list
336 INIT_LIST_HEAD(&dirty);
337 spin_lock_irqsave(&bm->lock, flags);
338 list_for_each_entry_safe(b, tmp, &bm->dirty_list, list) {
339 if (!count--)
340 break;
341 __transition(b, BS_WRITING);
342 list_add_tail(&b->list, &dirty);
344 spin_unlock_irqrestore(&bm->lock, flags);
346 list_for_each_entry_safe(b, tmp, &dirty, list) {
347 list_del(&b->list);
348 write_block(b);
352 static void write_all_dirty(struct dm_block_manager *bm)
354 write_dirty(bm, bm->cache_size);
357 static void __clear_errors(struct dm_block_manager *bm)
359 struct dm_block *b, *tmp;
360 list_for_each_entry_safe(b, tmp, &bm->error_list, list)
361 __transition(b, BS_EMPTY);
364 /*----------------------------------------------------------------
365 * Waiting
366 *--------------------------------------------------------------*/
367 #ifdef __CHECKER__
368 # define __retains(x) __attribute__((context(x, 1, 1)))
369 #else
370 # define __retains(x)
371 #endif
373 #define __wait_block(wq, lock, flags, sched_fn, condition) \
374 do { \
375 int r = 0; \
377 DEFINE_WAIT(wait); \
378 add_wait_queue(wq, &wait); \
380 for (;;) { \
381 prepare_to_wait(wq, &wait, TASK_INTERRUPTIBLE); \
382 if (condition) \
383 break; \
385 spin_unlock_irqrestore(lock, flags); \
386 if (signal_pending(current)) { \
387 r = -ERESTARTSYS; \
388 spin_lock_irqsave(lock, flags); \
389 break; \
392 sched_fn(); \
393 spin_lock_irqsave(lock, flags); \
396 finish_wait(wq, &wait); \
397 return r; \
398 } while (0)
400 static int __wait_io(struct dm_block *b, unsigned long *flags)
401 __retains(&b->bm->lock)
403 __wait_block(&b->io_q, &b->bm->lock, *flags, io_schedule,
404 ((b->state != BS_READING) && (b->state != BS_WRITING)));
407 static int __wait_unlocked(struct dm_block *b, unsigned long *flags)
408 __retains(&b->bm->lock)
410 __wait_block(&b->io_q, &b->bm->lock, *flags, schedule,
411 ((b->state == BS_CLEAN) || (b->state == BS_DIRTY)));
414 static int __wait_read_lockable(struct dm_block *b, unsigned long *flags)
415 __retains(&b->bm->lock)
417 __wait_block(&b->io_q, &b->bm->lock, *flags, schedule,
418 (!b->write_lock_pending && (b->state == BS_CLEAN ||
419 b->state == BS_DIRTY ||
420 b->state == BS_READ_LOCKED)));
423 static int __wait_all_writes(struct dm_block_manager *bm, unsigned long *flags)
424 __retains(&bm->lock)
426 __wait_block(&bm->io_q, &bm->lock, *flags, io_schedule,
427 !bm->writing_count);
430 static int __wait_all_io(struct dm_block_manager *bm, unsigned long *flags)
431 __retains(&bm->lock)
433 __wait_block(&bm->io_q, &bm->lock, *flags, io_schedule,
434 !bm->writing_count && !bm->reading_count);
437 static int __wait_clean(struct dm_block_manager *bm, unsigned long *flags)
438 __retains(&bm->lock)
440 __wait_block(&bm->io_q, &bm->lock, *flags, io_schedule,
441 (!list_empty(&bm->clean_list) ||
442 (!bm->writing_count)));
445 /*----------------------------------------------------------------
446 * Finding a free block to recycle
447 *--------------------------------------------------------------*/
448 static int recycle_block(struct dm_block_manager *bm, dm_block_t where,
449 int need_read, struct dm_block_validator *v,
450 struct dm_block **result)
452 int r = 0;
453 struct dm_block *b;
454 unsigned long flags, available;
457 * Wait for a block to appear on the empty or clean lists.
459 spin_lock_irqsave(&bm->lock, flags);
460 while (1) {
462 * Once we can lock and do io concurrently then we should
463 * probably flush at bm->cache_size / 2 and write _all_
464 * dirty blocks.
466 available = bm->available_count + bm->writing_count;
467 if (available < bm->cache_size / 4) {
468 spin_unlock_irqrestore(&bm->lock, flags);
469 write_dirty(bm, bm->cache_size / 4);
470 spin_lock_irqsave(&bm->lock, flags);
473 if (!list_empty(&bm->empty_list)) {
474 b = list_first_entry(&bm->empty_list, struct dm_block, list);
475 break;
477 } else if (!list_empty(&bm->clean_list)) {
478 b = list_first_entry(&bm->clean_list, struct dm_block, list);
479 __transition(b, BS_EMPTY);
480 break;
483 __wait_clean(bm, &flags);
486 b->where = where;
487 b->validator = v;
488 __transition(b, BS_READING);
490 if (!need_read) {
491 memset(b->data, 0, bm->block_size);
492 __transition(b, BS_CLEAN);
493 } else {
494 spin_unlock_irqrestore(&bm->lock, flags);
495 read_block(b);
496 spin_lock_irqsave(&bm->lock, flags);
497 __wait_io(b, &flags);
499 /* FIXME: Can b have been recycled between io completion and here? */
502 * Did the io succeed?
504 if (b->state == BS_ERROR) {
506 * Since this is a read that has failed we can clear the error
507 * immediately. Failed writes are revealed during a commit.
509 __transition(b, BS_EMPTY);
510 r = -EIO;
513 if (b->validator) {
514 r = b->validator->check(b->validator, b, bm->block_size);
515 if (r) {
516 DMERR("%s validator check failed for block %llu",
517 b->validator->name, (unsigned long long)b->where);
518 __transition(b, BS_EMPTY);
522 spin_unlock_irqrestore(&bm->lock, flags);
524 if (!r)
525 *result = b;
527 return r;
530 /*----------------------------------------------------------------
531 * Low level block management
532 *--------------------------------------------------------------*/
534 static struct dm_block *alloc_block(struct dm_block_manager *bm)
536 struct dm_block *b = kmem_cache_alloc(bm->block_cache, GFP_KERNEL);
538 if (!b)
539 return NULL;
541 INIT_LIST_HEAD(&b->list);
542 INIT_HLIST_NODE(&b->hlist);
544 b->data = kmem_cache_alloc(bm->buffer_cache, GFP_KERNEL);
545 if (!b->data) {
546 kmem_cache_free(bm->block_cache, b);
547 return NULL;
550 b->validator = NULL;
551 b->state = BS_EMPTY;
552 init_waitqueue_head(&b->io_q);
553 b->read_lock_count = 0;
554 b->write_lock_pending = 0;
555 b->io_flags = 0;
556 b->bm = bm;
558 return b;
561 static void free_block(struct dm_block *b)
563 kmem_cache_free(b->bm->buffer_cache, b->data);
564 kmem_cache_free(b->bm->block_cache, b);
567 static int populate_bm(struct dm_block_manager *bm, unsigned count)
569 int i;
570 LIST_HEAD(bs);
572 for (i = 0; i < count; i++) {
573 struct dm_block *b = alloc_block(bm);
574 if (!b) {
575 struct dm_block *tmp;
576 list_for_each_entry_safe(b, tmp, &bs, list)
577 free_block(b);
578 return -ENOMEM;
581 list_add(&b->list, &bs);
584 list_replace(&bs, &bm->empty_list);
585 bm->available_count = count;
587 return 0;
590 /*----------------------------------------------------------------
591 * Public interface
592 *--------------------------------------------------------------*/
593 static unsigned calc_hash_size(unsigned cache_size)
595 unsigned r = 32; /* Minimum size is 16 */
597 while (r < cache_size)
598 r <<= 1;
600 return r >> 1;
603 struct dm_block_manager *dm_block_manager_create(struct block_device *bdev,
604 unsigned block_size,
605 unsigned cache_size)
607 unsigned i;
608 unsigned hash_size = calc_hash_size(cache_size);
609 size_t len = sizeof(struct dm_block_manager) +
610 sizeof(struct hlist_head) * hash_size;
611 struct dm_block_manager *bm;
613 bm = kmalloc(len, GFP_KERNEL);
614 if (!bm)
615 return NULL;
617 bm->bdev = bdev;
618 bm->cache_size = max(MAX_CACHE_SIZE, cache_size);
619 bm->block_size = block_size;
620 bm->nr_blocks = i_size_read(bdev->bd_inode);
621 do_div(bm->nr_blocks, block_size);
622 init_waitqueue_head(&bm->io_q);
623 spin_lock_init(&bm->lock);
625 INIT_LIST_HEAD(&bm->empty_list);
626 INIT_LIST_HEAD(&bm->clean_list);
627 INIT_LIST_HEAD(&bm->dirty_list);
628 INIT_LIST_HEAD(&bm->error_list);
629 bm->available_count = 0;
630 bm->reading_count = 0;
631 bm->writing_count = 0;
633 bm->block_cache = kmem_cache_create("dm-block-manager-blocks",
634 sizeof(struct dm_block),
635 __alignof__(struct dm_block),
636 SLAB_HWCACHE_ALIGN, NULL);
637 if (!bm->block_cache)
638 goto bad_bm;
640 bm->buffer_cache = kmem_cache_create("dm-block-manager-buffers",
641 block_size, SECTOR_SIZE,
642 0, NULL);
643 if (!bm->buffer_cache)
644 goto bad_block_cache;
646 bm->hash_size = hash_size;
647 bm->hash_mask = hash_size - 1;
648 for (i = 0; i < hash_size; i++)
649 INIT_HLIST_HEAD(bm->buckets + i);
651 bm->io = dm_io_client_create();
652 if (!bm->io)
653 goto bad_buffer_cache;
655 if (populate_bm(bm, cache_size) < 0)
656 goto bad_io_client;
658 return bm;
660 bad_io_client:
661 dm_io_client_destroy(bm->io);
662 bad_buffer_cache:
663 kmem_cache_destroy(bm->buffer_cache);
664 bad_block_cache:
665 kmem_cache_destroy(bm->block_cache);
666 bad_bm:
667 kfree(bm);
669 return NULL;
671 EXPORT_SYMBOL_GPL(dm_block_manager_create);
673 void dm_block_manager_destroy(struct dm_block_manager *bm)
675 int i;
676 struct dm_block *b, *btmp;
677 struct hlist_node *n, *tmp;
679 dm_io_client_destroy(bm->io);
681 for (i = 0; i < bm->hash_size; i++)
682 hlist_for_each_entry_safe(b, n, tmp, bm->buckets + i, hlist)
683 free_block(b);
685 list_for_each_entry_safe(b, btmp, &bm->empty_list, list)
686 free_block(b);
688 kmem_cache_destroy(bm->buffer_cache);
689 kmem_cache_destroy(bm->block_cache);
691 kfree(bm);
693 EXPORT_SYMBOL_GPL(dm_block_manager_destroy);
695 unsigned dm_bm_block_size(struct dm_block_manager *bm)
697 return bm->block_size;
699 EXPORT_SYMBOL_GPL(dm_bm_block_size);
701 dm_block_t dm_bm_nr_blocks(struct dm_block_manager *bm)
703 return bm->nr_blocks;
706 static int lock_internal(struct dm_block_manager *bm, dm_block_t block,
707 int how, int need_read, int can_block,
708 struct dm_block_validator *v,
709 struct dm_block **result)
711 int r = 0;
712 struct dm_block *b;
713 unsigned long flags;
715 spin_lock_irqsave(&bm->lock, flags);
716 retry:
717 b = __find_block(bm, block);
718 if (b) {
719 if (!need_read)
720 b->validator = v;
721 else {
722 if (b->validator && (v != b->validator)) {
723 DMERR("validator mismatch (old=%s vs new=%s) for block %llu",
724 b->validator->name, v ? v->name : "NULL",
725 (unsigned long long)b->where);
726 spin_unlock_irqrestore(&bm->lock, flags);
727 return -EINVAL;
730 if (!b->validator && v) {
731 b->validator = v;
732 r = b->validator->check(b->validator, b, bm->block_size);
733 if (r) {
734 DMERR("%s validator check failed for block %llu",
735 b->validator->name,
736 (unsigned long long)b->where);
737 spin_unlock_irqrestore(&bm->lock, flags);
738 return r;
743 switch (how) {
744 case READ:
745 if (b->write_lock_pending || (b->state != BS_CLEAN &&
746 b->state != BS_DIRTY &&
747 b->state != BS_READ_LOCKED)) {
748 if (!can_block) {
749 spin_unlock_irqrestore(&bm->lock, flags);
750 return -EWOULDBLOCK;
753 __wait_read_lockable(b, &flags);
755 if (b->where != block)
756 goto retry;
758 break;
760 case WRITE:
761 while (b->state != BS_CLEAN && b->state != BS_DIRTY) {
762 if (!can_block) {
763 spin_unlock_irqrestore(&bm->lock, flags);
764 return -EWOULDBLOCK;
767 b->write_lock_pending++;
768 __wait_unlocked(b, &flags);
769 b->write_lock_pending--;
770 if (b->where != block)
771 goto retry;
773 break;
776 } else if (!can_block) {
777 r = -EWOULDBLOCK;
778 goto out;
780 } else {
781 spin_unlock_irqrestore(&bm->lock, flags);
782 r = recycle_block(bm, block, need_read, v, &b);
783 spin_lock_irqsave(&bm->lock, flags);
786 if (!r) {
787 switch (how) {
788 case READ:
789 b->read_lock_count++;
791 if (b->state == BS_DIRTY)
792 __transition(b, BS_READ_LOCKED_DIRTY);
793 else if (b->state == BS_CLEAN)
794 __transition(b, BS_READ_LOCKED);
795 break;
797 case WRITE:
798 __transition(b, BS_WRITE_LOCKED);
799 break;
802 *result = b;
805 out:
806 spin_unlock_irqrestore(&bm->lock, flags);
808 return r;
811 int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b,
812 struct dm_block_validator *v,
813 struct dm_block **result)
815 return lock_internal(bm, b, READ, 1, 1, v, result);
817 EXPORT_SYMBOL_GPL(dm_bm_read_lock);
819 int dm_bm_write_lock(struct dm_block_manager *bm,
820 dm_block_t b, struct dm_block_validator *v,
821 struct dm_block **result)
823 return lock_internal(bm, b, WRITE, 1, 1, v, result);
825 EXPORT_SYMBOL_GPL(dm_bm_write_lock);
827 int dm_bm_read_try_lock(struct dm_block_manager *bm,
828 dm_block_t b, struct dm_block_validator *v,
829 struct dm_block **result)
831 return lock_internal(bm, b, READ, 1, 0, v, result);
834 int dm_bm_write_lock_zero(struct dm_block_manager *bm,
835 dm_block_t b, struct dm_block_validator *v,
836 struct dm_block **result)
838 int r = lock_internal(bm, b, WRITE, 0, 1, v, result);
840 if (!r)
841 memset((*result)->data, 0, bm->block_size);
843 return r;
846 int dm_bm_unlock(struct dm_block *b)
848 int r = 0;
849 unsigned long flags;
851 spin_lock_irqsave(&b->bm->lock, flags);
852 switch (b->state) {
853 case BS_WRITE_LOCKED:
854 __transition(b, BS_DIRTY);
855 wake_up(&b->io_q);
856 break;
858 case BS_READ_LOCKED:
859 if (!--b->read_lock_count) {
860 __transition(b, BS_CLEAN);
861 wake_up(&b->io_q);
863 break;
865 case BS_READ_LOCKED_DIRTY:
866 if (!--b->read_lock_count) {
867 __transition(b, BS_DIRTY);
868 wake_up(&b->io_q);
870 break;
872 default:
873 DMERR("block = %llu not locked",
874 (unsigned long long)b->where);
875 r = -EINVAL;
876 break;
878 spin_unlock_irqrestore(&b->bm->lock, flags);
880 return r;
882 EXPORT_SYMBOL_GPL(dm_bm_unlock);
884 static int __wait_flush(struct dm_block_manager *bm)
886 int r = 0;
887 unsigned long flags;
889 spin_lock_irqsave(&bm->lock, flags);
890 __wait_all_writes(bm, &flags);
892 if (!list_empty(&bm->error_list)) {
893 r = -EIO;
894 __clear_errors(bm);
896 spin_unlock_irqrestore(&bm->lock, flags);
898 return r;
901 int dm_bm_flush_and_unlock(struct dm_block_manager *bm,
902 struct dm_block *superblock)
904 int r;
905 unsigned long flags;
907 write_all_dirty(bm);
908 r = __wait_flush(bm);
909 if (r)
910 return r;
912 spin_lock_irqsave(&bm->lock, flags);
913 superblock->io_flags = REQ_FUA | REQ_FLUSH;
914 spin_unlock_irqrestore(&bm->lock, flags);
916 dm_bm_unlock(superblock);
917 write_all_dirty(bm);
919 return __wait_flush(bm);
922 int dm_bm_rebind_block_device(struct dm_block_manager *bm,
923 struct block_device *bdev)
925 unsigned long flags;
926 dm_block_t nr_blocks = i_size_read(bdev->bd_inode);
928 do_div(nr_blocks, bm->block_size);
930 spin_lock_irqsave(&bm->lock, flags);
931 if (nr_blocks < bm->nr_blocks) {
932 spin_unlock_irqrestore(&bm->lock, flags);
933 return -EINVAL;
936 bm->bdev = bdev;
937 bm->nr_blocks = nr_blocks;
940 * Wait for any in-flight io that may be using the old bdev
942 __wait_all_io(bm, &flags);
943 spin_unlock_irqrestore(&bm->lock, flags);
945 return 0;
947 EXPORT_SYMBOL_GPL(dm_bm_rebind_block_device);