2 * Copyright (C) 2011 Red Hat, Inc.
4 * This file is released under the GPL.
7 #include "dm-space-map.h"
8 #include "dm-space-map-common.h"
9 #include "dm-space-map-metadata.h"
11 #include <linux/list.h>
12 #include <linux/slab.h>
13 #include <linux/device-mapper.h>
15 #define DM_MSG_PREFIX "space map metadata"
17 /*----------------------------------------------------------------*/
20 * An edge triggered threshold.
26 dm_block_t current_value
;
27 dm_sm_threshold_fn fn
;
31 static void threshold_init(struct threshold
*t
)
33 t
->threshold_set
= false;
37 static void set_threshold(struct threshold
*t
, dm_block_t value
,
38 dm_sm_threshold_fn fn
, void *context
)
40 t
->threshold_set
= true;
46 static bool below_threshold(struct threshold
*t
, dm_block_t value
)
48 return t
->threshold_set
&& value
<= t
->threshold
;
51 static bool threshold_already_triggered(struct threshold
*t
)
53 return t
->value_set
&& below_threshold(t
, t
->current_value
);
56 static void check_threshold(struct threshold
*t
, dm_block_t value
)
58 if (below_threshold(t
, value
) &&
59 !threshold_already_triggered(t
))
63 t
->current_value
= value
;
66 /*----------------------------------------------------------------*/
69 * Space map interface.
71 * The low level disk format is written using the standard btree and
72 * transaction manager. This means that performing disk operations may
73 * cause us to recurse into the space map in order to allocate new blocks.
74 * For this reason we have a pool of pre-allocated blocks large enough to
75 * service any metadata_ll_disk operation.
79 * FIXME: we should calculate this based on the size of the device.
80 * Only the metadata space map needs this functionality.
82 #define MAX_RECURSIVE_ALLOCATIONS 1024
90 enum block_op_type type
;
94 struct bop_ring_buffer
{
97 struct block_op bops
[MAX_RECURSIVE_ALLOCATIONS
+ 1];
100 static void brb_init(struct bop_ring_buffer
*brb
)
106 static bool brb_empty(struct bop_ring_buffer
*brb
)
108 return brb
->begin
== brb
->end
;
111 static unsigned brb_next(struct bop_ring_buffer
*brb
, unsigned old
)
113 unsigned r
= old
+ 1;
114 return (r
>= (sizeof(brb
->bops
) / sizeof(*brb
->bops
))) ? 0 : r
;
117 static int brb_push(struct bop_ring_buffer
*brb
,
118 enum block_op_type type
, dm_block_t b
)
120 struct block_op
*bop
;
121 unsigned next
= brb_next(brb
, brb
->end
);
124 * We don't allow the last bop to be filled, this way we can
125 * differentiate between full and empty.
127 if (next
== brb
->begin
)
130 bop
= brb
->bops
+ brb
->end
;
139 static int brb_pop(struct bop_ring_buffer
*brb
, struct block_op
*result
)
141 struct block_op
*bop
;
146 bop
= brb
->bops
+ brb
->begin
;
147 result
->type
= bop
->type
;
148 result
->block
= bop
->block
;
150 brb
->begin
= brb_next(brb
, brb
->begin
);
155 /*----------------------------------------------------------------*/
158 struct dm_space_map sm
;
161 struct ll_disk old_ll
;
165 unsigned recursion_count
;
166 unsigned allocated_this_transaction
;
167 struct bop_ring_buffer uncommitted
;
169 struct threshold threshold
;
172 static int add_bop(struct sm_metadata
*smm
, enum block_op_type type
, dm_block_t b
)
174 int r
= brb_push(&smm
->uncommitted
, type
, b
);
177 DMERR("too many recursive allocations");
184 static int commit_bop(struct sm_metadata
*smm
, struct block_op
*op
)
187 enum allocation_event ev
;
191 r
= sm_ll_inc(&smm
->ll
, op
->block
, &ev
);
195 r
= sm_ll_dec(&smm
->ll
, op
->block
, &ev
);
202 static void in(struct sm_metadata
*smm
)
204 smm
->recursion_count
++;
207 static int apply_bops(struct sm_metadata
*smm
)
211 while (!brb_empty(&smm
->uncommitted
)) {
214 r
= brb_pop(&smm
->uncommitted
, &bop
);
216 DMERR("bug in bop ring buffer");
220 r
= commit_bop(smm
, &bop
);
228 static int out(struct sm_metadata
*smm
)
233 * If we're not recursing then very bad things are happening.
235 if (!smm
->recursion_count
) {
236 DMERR("lost track of recursion depth");
240 if (smm
->recursion_count
== 1)
243 smm
->recursion_count
--;
249 * When using the out() function above, we often want to combine an error
250 * code for the operation run in the recursive context with that from
253 static int combine_errors(int r1
, int r2
)
258 static int recursing(struct sm_metadata
*smm
)
260 return smm
->recursion_count
;
263 static void sm_metadata_destroy(struct dm_space_map
*sm
)
265 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
270 static int sm_metadata_get_nr_blocks(struct dm_space_map
*sm
, dm_block_t
*count
)
272 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
274 *count
= smm
->ll
.nr_blocks
;
279 static int sm_metadata_get_nr_free(struct dm_space_map
*sm
, dm_block_t
*count
)
281 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
283 *count
= smm
->old_ll
.nr_blocks
- smm
->old_ll
.nr_allocated
-
284 smm
->allocated_this_transaction
;
289 static int sm_metadata_get_count(struct dm_space_map
*sm
, dm_block_t b
,
294 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
295 unsigned adjustment
= 0;
298 * We may have some uncommitted adjustments to add. This list
299 * should always be really short.
301 for (i
= smm
->uncommitted
.begin
;
302 i
!= smm
->uncommitted
.end
;
303 i
= brb_next(&smm
->uncommitted
, i
)) {
304 struct block_op
*op
= smm
->uncommitted
.bops
+ i
;
320 r
= sm_ll_lookup(&smm
->ll
, b
, result
);
324 *result
+= adjustment
;
329 static int sm_metadata_count_is_more_than_one(struct dm_space_map
*sm
,
330 dm_block_t b
, int *result
)
332 int r
, adjustment
= 0;
334 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
338 * We may have some uncommitted adjustments to add. This list
339 * should always be really short.
341 for (i
= smm
->uncommitted
.begin
;
342 i
!= smm
->uncommitted
.end
;
343 i
= brb_next(&smm
->uncommitted
, i
)) {
345 struct block_op
*op
= smm
->uncommitted
.bops
+ i
;
361 if (adjustment
> 1) {
366 r
= sm_ll_lookup_bitmap(&smm
->ll
, b
, &rc
);
372 * We err on the side of caution, and always return true.
376 *result
= rc
+ adjustment
> 1;
381 static int sm_metadata_set_count(struct dm_space_map
*sm
, dm_block_t b
,
385 enum allocation_event ev
;
386 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
388 if (smm
->recursion_count
) {
389 DMERR("cannot recurse set_count()");
394 r
= sm_ll_insert(&smm
->ll
, b
, count
, &ev
);
397 return combine_errors(r
, r2
);
400 static int sm_metadata_inc_block(struct dm_space_map
*sm
, dm_block_t b
)
403 enum allocation_event ev
;
404 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
407 r
= add_bop(smm
, BOP_INC
, b
);
410 r
= sm_ll_inc(&smm
->ll
, b
, &ev
);
414 return combine_errors(r
, r2
);
417 static int sm_metadata_dec_block(struct dm_space_map
*sm
, dm_block_t b
)
420 enum allocation_event ev
;
421 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
424 r
= add_bop(smm
, BOP_DEC
, b
);
427 r
= sm_ll_dec(&smm
->ll
, b
, &ev
);
431 return combine_errors(r
, r2
);
434 static int sm_metadata_new_block_(struct dm_space_map
*sm
, dm_block_t
*b
)
437 enum allocation_event ev
;
438 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
440 r
= sm_ll_find_free_block(&smm
->old_ll
, smm
->begin
, smm
->old_ll
.nr_blocks
, b
);
447 r
= add_bop(smm
, BOP_INC
, *b
);
450 r
= sm_ll_inc(&smm
->ll
, *b
, &ev
);
455 smm
->allocated_this_transaction
++;
457 return combine_errors(r
, r2
);
460 static int sm_metadata_new_block(struct dm_space_map
*sm
, dm_block_t
*b
)
463 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
465 int r
= sm_metadata_new_block_(sm
, b
);
467 DMERR_LIMIT("unable to allocate new metadata block");
471 r
= sm_metadata_get_nr_free(sm
, &count
);
473 DMERR_LIMIT("couldn't get free block count");
477 check_threshold(&smm
->threshold
, count
);
482 static int sm_metadata_commit(struct dm_space_map
*sm
)
485 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
487 r
= sm_ll_commit(&smm
->ll
);
491 memcpy(&smm
->old_ll
, &smm
->ll
, sizeof(smm
->old_ll
));
493 smm
->allocated_this_transaction
= 0;
498 static int sm_metadata_register_threshold_callback(struct dm_space_map
*sm
,
499 dm_block_t threshold
,
500 dm_sm_threshold_fn fn
,
503 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
505 set_threshold(&smm
->threshold
, threshold
, fn
, context
);
510 static int sm_metadata_root_size(struct dm_space_map
*sm
, size_t *result
)
512 *result
= sizeof(struct disk_sm_root
);
517 static int sm_metadata_copy_root(struct dm_space_map
*sm
, void *where_le
, size_t max
)
519 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
520 struct disk_sm_root root_le
;
522 root_le
.nr_blocks
= cpu_to_le64(smm
->ll
.nr_blocks
);
523 root_le
.nr_allocated
= cpu_to_le64(smm
->ll
.nr_allocated
);
524 root_le
.bitmap_root
= cpu_to_le64(smm
->ll
.bitmap_root
);
525 root_le
.ref_count_root
= cpu_to_le64(smm
->ll
.ref_count_root
);
527 if (max
< sizeof(root_le
))
530 memcpy(where_le
, &root_le
, sizeof(root_le
));
535 static int sm_metadata_extend(struct dm_space_map
*sm
, dm_block_t extra_blocks
);
537 static struct dm_space_map ops
= {
538 .destroy
= sm_metadata_destroy
,
539 .extend
= sm_metadata_extend
,
540 .get_nr_blocks
= sm_metadata_get_nr_blocks
,
541 .get_nr_free
= sm_metadata_get_nr_free
,
542 .get_count
= sm_metadata_get_count
,
543 .count_is_more_than_one
= sm_metadata_count_is_more_than_one
,
544 .set_count
= sm_metadata_set_count
,
545 .inc_block
= sm_metadata_inc_block
,
546 .dec_block
= sm_metadata_dec_block
,
547 .new_block
= sm_metadata_new_block
,
548 .commit
= sm_metadata_commit
,
549 .root_size
= sm_metadata_root_size
,
550 .copy_root
= sm_metadata_copy_root
,
551 .register_threshold_callback
= sm_metadata_register_threshold_callback
554 /*----------------------------------------------------------------*/
557 * When a new space map is created that manages its own space. We use
558 * this tiny bootstrap allocator.
560 static void sm_bootstrap_destroy(struct dm_space_map
*sm
)
564 static int sm_bootstrap_extend(struct dm_space_map
*sm
, dm_block_t extra_blocks
)
566 DMERR("bootstrap doesn't support extend");
571 static int sm_bootstrap_get_nr_blocks(struct dm_space_map
*sm
, dm_block_t
*count
)
573 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
575 *count
= smm
->ll
.nr_blocks
;
580 static int sm_bootstrap_get_nr_free(struct dm_space_map
*sm
, dm_block_t
*count
)
582 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
584 *count
= smm
->ll
.nr_blocks
- smm
->begin
;
589 static int sm_bootstrap_get_count(struct dm_space_map
*sm
, dm_block_t b
,
592 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
594 *result
= (b
< smm
->begin
) ? 1 : 0;
599 static int sm_bootstrap_count_is_more_than_one(struct dm_space_map
*sm
,
600 dm_block_t b
, int *result
)
607 static int sm_bootstrap_set_count(struct dm_space_map
*sm
, dm_block_t b
,
610 DMERR("bootstrap doesn't support set_count");
615 static int sm_bootstrap_new_block(struct dm_space_map
*sm
, dm_block_t
*b
)
617 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
620 * We know the entire device is unused.
622 if (smm
->begin
== smm
->ll
.nr_blocks
)
630 static int sm_bootstrap_inc_block(struct dm_space_map
*sm
, dm_block_t b
)
632 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
634 return add_bop(smm
, BOP_INC
, b
);
637 static int sm_bootstrap_dec_block(struct dm_space_map
*sm
, dm_block_t b
)
639 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
641 return add_bop(smm
, BOP_DEC
, b
);
644 static int sm_bootstrap_commit(struct dm_space_map
*sm
)
649 static int sm_bootstrap_root_size(struct dm_space_map
*sm
, size_t *result
)
651 DMERR("bootstrap doesn't support root_size");
656 static int sm_bootstrap_copy_root(struct dm_space_map
*sm
, void *where
,
659 DMERR("bootstrap doesn't support copy_root");
664 static struct dm_space_map bootstrap_ops
= {
665 .destroy
= sm_bootstrap_destroy
,
666 .extend
= sm_bootstrap_extend
,
667 .get_nr_blocks
= sm_bootstrap_get_nr_blocks
,
668 .get_nr_free
= sm_bootstrap_get_nr_free
,
669 .get_count
= sm_bootstrap_get_count
,
670 .count_is_more_than_one
= sm_bootstrap_count_is_more_than_one
,
671 .set_count
= sm_bootstrap_set_count
,
672 .inc_block
= sm_bootstrap_inc_block
,
673 .dec_block
= sm_bootstrap_dec_block
,
674 .new_block
= sm_bootstrap_new_block
,
675 .commit
= sm_bootstrap_commit
,
676 .root_size
= sm_bootstrap_root_size
,
677 .copy_root
= sm_bootstrap_copy_root
,
678 .register_threshold_callback
= NULL
681 /*----------------------------------------------------------------*/
683 static int sm_metadata_extend(struct dm_space_map
*sm
, dm_block_t extra_blocks
)
686 enum allocation_event ev
;
687 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
688 dm_block_t old_len
= smm
->ll
.nr_blocks
;
691 * Flick into a mode where all blocks get allocated in the new area.
693 smm
->begin
= old_len
;
694 memcpy(sm
, &bootstrap_ops
, sizeof(*sm
));
699 r
= sm_ll_extend(&smm
->ll
, extra_blocks
);
704 * We repeatedly increment then commit until the commit doesn't
705 * allocate any new blocks.
708 for (i
= old_len
; !r
&& i
< smm
->begin
; i
++) {
709 r
= sm_ll_inc(&smm
->ll
, i
, &ev
);
713 old_len
= smm
->begin
;
717 DMERR("%s: apply_bops failed", __func__
);
721 r
= sm_ll_commit(&smm
->ll
);
725 } while (old_len
!= smm
->begin
);
729 * Switch back to normal behaviour.
731 memcpy(sm
, &ops
, sizeof(*sm
));
735 /*----------------------------------------------------------------*/
737 struct dm_space_map
*dm_sm_metadata_init(void)
739 struct sm_metadata
*smm
;
741 smm
= kmalloc(sizeof(*smm
), GFP_KERNEL
);
743 return ERR_PTR(-ENOMEM
);
745 memcpy(&smm
->sm
, &ops
, sizeof(smm
->sm
));
750 int dm_sm_metadata_create(struct dm_space_map
*sm
,
751 struct dm_transaction_manager
*tm
,
752 dm_block_t nr_blocks
,
753 dm_block_t superblock
)
757 enum allocation_event ev
;
758 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
760 smm
->begin
= superblock
+ 1;
761 smm
->recursion_count
= 0;
762 smm
->allocated_this_transaction
= 0;
763 brb_init(&smm
->uncommitted
);
764 threshold_init(&smm
->threshold
);
766 memcpy(&smm
->sm
, &bootstrap_ops
, sizeof(smm
->sm
));
768 r
= sm_ll_new_metadata(&smm
->ll
, tm
);
770 if (nr_blocks
> DM_SM_METADATA_MAX_BLOCKS
)
771 nr_blocks
= DM_SM_METADATA_MAX_BLOCKS
;
772 r
= sm_ll_extend(&smm
->ll
, nr_blocks
);
774 memcpy(&smm
->sm
, &ops
, sizeof(smm
->sm
));
779 * Now we need to update the newly created data structures with the
780 * allocated blocks that they were built from.
782 for (i
= superblock
; !r
&& i
< smm
->begin
; i
++)
783 r
= sm_ll_inc(&smm
->ll
, i
, &ev
);
790 DMERR("%s: apply_bops failed", __func__
);
794 return sm_metadata_commit(sm
);
797 int dm_sm_metadata_open(struct dm_space_map
*sm
,
798 struct dm_transaction_manager
*tm
,
799 void *root_le
, size_t len
)
802 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
804 r
= sm_ll_open_metadata(&smm
->ll
, tm
, root_le
, len
);
809 smm
->recursion_count
= 0;
810 smm
->allocated_this_transaction
= 0;
811 brb_init(&smm
->uncommitted
);
812 threshold_init(&smm
->threshold
);
814 memcpy(&smm
->old_ll
, &smm
->ll
, sizeof(smm
->old_ll
));