2 * Copyright (C) 2011 Red Hat, Inc. All rights reserved.
4 * This file is released under the GPL.
7 #include "dm-space-map.h"
8 #include "dm-space-map-common.h"
9 #include "dm-space-map-metadata.h"
11 #include <linux/list.h>
12 #include <linux/slab.h>
13 #include <linux/bitops.h>
14 #include <linux/device-mapper.h>
16 #define DM_MSG_PREFIX "space map metadata"
18 /*----------------------------------------------------------------*/
23 static void index_prepare_for_write(struct dm_block_validator
*v
,
27 struct disk_metadata_index
*mi_le
= dm_block_data(b
);
29 mi_le
->blocknr
= cpu_to_le64(dm_block_location(b
));
30 mi_le
->csum
= cpu_to_le32(dm_block_csum_data(&mi_le
->padding
, block_size
- sizeof(__le32
)));
33 static int index_check(struct dm_block_validator
*v
,
37 struct disk_metadata_index
*mi_le
= dm_block_data(b
);
40 if (dm_block_location(b
) != le64_to_cpu(mi_le
->blocknr
)) {
41 DMERR("index_check failed blocknr %llu wanted %llu",
42 le64_to_cpu(mi_le
->blocknr
), dm_block_location(b
));
46 csum_disk
= cpu_to_le32(dm_block_csum_data(&mi_le
->padding
,
47 block_size
- sizeof(__le32
)));
48 if (csum_disk
!= mi_le
->csum
) {
49 DMERR("index_check failed csum %u wanted %u",
50 le32_to_cpu(csum_disk
), le32_to_cpu(mi_le
->csum
));
57 static struct dm_block_validator index_validator
= {
59 .prepare_for_write
= index_prepare_for_write
,
63 /*----------------------------------------------------------------*/
68 static int metadata_ll_init(struct ll_disk
*ll
, struct dm_transaction_manager
*tm
)
72 ll
->ref_count_info
.tm
= tm
;
73 ll
->ref_count_info
.levels
= 1;
74 ll
->ref_count_info
.value_type
.size
= sizeof(uint32_t);
75 ll
->ref_count_info
.value_type
.inc
= NULL
;
76 ll
->ref_count_info
.value_type
.dec
= NULL
;
77 ll
->ref_count_info
.value_type
.equal
= NULL
;
79 ll
->block_size
= dm_bm_block_size(dm_tm_get_bm(tm
));
81 if (ll
->block_size
> (1 << 30)) {
82 DMERR("block size too big to hold bitmaps");
86 ll
->entries_per_block
= (ll
->block_size
- sizeof(struct disk_bitmap_header
)) *
90 ll
->ref_count_root
= 0;
95 static int metadata_ll_new(struct ll_disk
*ll
, struct dm_transaction_manager
*tm
,
101 struct dm_block
*index_block
;
103 r
= metadata_ll_init(ll
, tm
);
107 ll
->nr_blocks
= nr_blocks
;
108 ll
->nr_allocated
= 0;
110 blocks
= dm_sector_div_up(nr_blocks
, ll
->entries_per_block
);
111 if (blocks
> MAX_METADATA_BITMAPS
) {
112 DMERR("metadata device too large");
116 for (i
= 0; i
< blocks
; i
++) {
118 struct disk_index_entry
*idx_le
= ll
->mi_le
.index
+ i
;
120 r
= dm_tm_new_block(tm
, &dm_sm_bitmap_validator
, &b
);
123 idx_le
->blocknr
= cpu_to_le64(dm_block_location(b
));
125 r
= dm_tm_unlock(tm
, b
);
129 idx_le
->nr_free
= cpu_to_le32(ll
->entries_per_block
);
130 idx_le
->none_free_before
= 0;
136 r
= dm_tm_new_block(tm
, &index_validator
, &index_block
);
140 ll
->bitmap_root
= dm_block_location(index_block
);
141 memcpy(dm_block_data(index_block
), &ll
->mi_le
, sizeof(ll
->mi_le
));
142 r
= dm_tm_unlock(tm
, index_block
);
146 r
= dm_btree_create(&ll
->ref_count_info
, &ll
->ref_count_root
);
153 static int metadata_ll_open(struct ll_disk
*ll
, struct dm_transaction_manager
*tm
,
154 void *root_le
, size_t len
)
157 struct disk_sm_root
*smr
= root_le
;
158 struct dm_block
*block
;
160 if (len
< sizeof(struct disk_sm_root
)) {
161 DMERR("sm_metadata root too small");
165 r
= metadata_ll_init(ll
, tm
);
169 ll
->nr_blocks
= le64_to_cpu(smr
->nr_blocks
);
170 ll
->nr_allocated
= le64_to_cpu(smr
->nr_allocated
);
171 ll
->bitmap_root
= le64_to_cpu(smr
->bitmap_root
);
173 r
= dm_tm_read_lock(tm
, le64_to_cpu(smr
->bitmap_root
),
174 &index_validator
, &block
);
178 memcpy(&ll
->mi_le
, dm_block_data(block
), sizeof(ll
->mi_le
));
179 r
= dm_tm_unlock(tm
, block
);
183 ll
->ref_count_root
= le64_to_cpu(smr
->ref_count_root
);
187 static int metadata_ll_lookup_bitmap(struct ll_disk
*ll
, dm_block_t b
, uint32_t *result
)
190 dm_block_t index
= b
;
191 struct disk_index_entry
*ie_disk
;
192 struct dm_block
*blk
;
194 b
= do_div(index
, ll
->entries_per_block
);
195 ie_disk
= ll
->mi_le
.index
+ index
;
197 r
= dm_tm_read_lock(ll
->tm
, le64_to_cpu(ie_disk
->blocknr
),
198 &dm_sm_bitmap_validator
, &blk
);
202 *result
= sm_lookup_bitmap(dm_bitmap_data(blk
), b
);
204 return dm_tm_unlock(ll
->tm
, blk
);
207 static int metadata_ll_lookup(struct ll_disk
*ll
, dm_block_t b
, uint32_t *result
)
210 int r
= metadata_ll_lookup_bitmap(ll
, b
, result
);
218 r
= dm_btree_lookup(&ll
->ref_count_info
, ll
->ref_count_root
, &b
, &le_rc
);
222 *result
= le32_to_cpu(le_rc
);
227 static int metadata_ll_find_free_block(struct ll_disk
*ll
, dm_block_t begin
,
228 dm_block_t end
, dm_block_t
*result
)
231 struct disk_index_entry
*ie_disk
;
232 dm_block_t i
, index_begin
= begin
;
233 dm_block_t index_end
= dm_sector_div_up(end
, ll
->entries_per_block
);
238 begin
= do_div(index_begin
, ll
->entries_per_block
);
239 end
= do_div(end
, ll
->entries_per_block
);
241 for (i
= index_begin
; i
< index_end
; i
++, begin
= 0) {
242 struct dm_block
*blk
;
246 ie_disk
= ll
->mi_le
.index
+ i
;
248 if (le32_to_cpu(ie_disk
->nr_free
) <= 0)
251 r
= dm_tm_read_lock(ll
->tm
, le64_to_cpu(ie_disk
->blocknr
),
252 &dm_sm_bitmap_validator
, &blk
);
256 bit_end
= (i
== index_end
- 1) ? end
: ll
->entries_per_block
;
258 r
= sm_find_free(dm_bitmap_data(blk
), begin
, bit_end
, &position
);
260 dm_tm_unlock(ll
->tm
, blk
);
262 * Avoiding retry (FIXME: explain why)
267 r
= dm_tm_unlock(ll
->tm
, blk
);
271 *result
= i
* ll
->entries_per_block
+ (dm_block_t
) position
;
279 static int metadata_ll_insert(struct ll_disk
*ll
, dm_block_t b
, uint32_t ref_count
)
284 dm_block_t index
= b
;
285 struct disk_index_entry
*ie_disk
;
289 bit
= do_div(index
, ll
->entries_per_block
);
290 ie_disk
= ll
->mi_le
.index
+ index
;
292 r
= dm_tm_shadow_block(ll
->tm
, le64_to_cpu(ie_disk
->blocknr
),
293 &dm_sm_bitmap_validator
, &nb
, &inc
);
295 DMERR("dm_tm_shadow_block() failed");
298 ie_disk
->blocknr
= cpu_to_le64(dm_block_location(nb
));
300 bm_le
= dm_bitmap_data(nb
);
301 old
= sm_lookup_bitmap(bm_le
, bit
);
303 if (ref_count
<= 2) {
304 sm_set_bitmap(bm_le
, bit
, ref_count
);
306 r
= dm_tm_unlock(ll
->tm
, nb
);
311 r
= dm_btree_remove(&ll
->ref_count_info
,
313 &b
, &ll
->ref_count_root
);
315 sm_set_bitmap(bm_le
, bit
, old
);
320 __le32 le_rc
= cpu_to_le32(ref_count
);
322 __dm_bless_for_disk(&le_rc
);
324 sm_set_bitmap(bm_le
, bit
, 3);
325 r
= dm_tm_unlock(ll
->tm
, nb
);
327 __dm_unbless_for_disk(&le_rc
);
331 r
= dm_btree_insert(&ll
->ref_count_info
, ll
->ref_count_root
,
332 &b
, &le_rc
, &ll
->ref_count_root
);
334 /* FIXME: release shadow? or assume the whole transaction will be ditched */
335 DMERR("ref count insert failed");
340 if (ref_count
&& !old
) {
342 ie_disk
->nr_free
= cpu_to_le32(le32_to_cpu(ie_disk
->nr_free
) - 1);
343 if (le32_to_cpu(ie_disk
->none_free_before
) == b
)
344 ie_disk
->none_free_before
= cpu_to_le32(b
+ 1);
345 } else if (old
&& !ref_count
) {
347 ie_disk
->nr_free
= cpu_to_le32(le32_to_cpu(ie_disk
->nr_free
) + 1);
348 ie_disk
->none_free_before
= cpu_to_le32(min((dm_block_t
) le32_to_cpu(ie_disk
->none_free_before
), b
));
354 static int metadata_ll_inc(struct ll_disk
*ll
, dm_block_t b
)
359 r
= metadata_ll_lookup(ll
, b
, &rc
);
363 return metadata_ll_insert(ll
, b
, rc
+ 1);
366 static int metadata_ll_dec(struct ll_disk
*ll
, dm_block_t b
)
371 r
= metadata_ll_lookup(ll
, b
, &rc
);
378 return metadata_ll_insert(ll
, b
, rc
- 1);
381 static int metadata_ll_commit(struct ll_disk
*ll
)
386 r
= dm_tm_shadow_block(ll
->tm
, ll
->bitmap_root
, &index_validator
, &b
, &inc
);
390 memcpy(dm_block_data(b
), &ll
->mi_le
, sizeof(ll
->mi_le
));
391 ll
->bitmap_root
= dm_block_location(b
);
393 return dm_tm_unlock(ll
->tm
, b
);
396 /*----------------------------------------------------------------*/
399 * Space map interface.
401 * The low level disk format is written using the standard btree and
402 * transaction manager. This means that performing disk operations may
403 * cause us to recurse into the space map in order to allocate new blocks.
404 * For this reason we have a pool of pre-allocated blocks large enough to
405 * service any metadata_ll_disk operation.
409 * FIXME: we should calculate this based on the size of the device.
410 * Only the metadata space map needs this functionality.
412 #define MAX_RECURSIVE_ALLOCATIONS 1024
420 enum block_op_type type
;
425 struct dm_space_map sm
;
428 struct ll_disk old_ll
;
432 unsigned recursion_count
;
433 unsigned allocated_this_transaction
;
434 unsigned nr_uncommitted
;
435 struct block_op uncommitted
[MAX_RECURSIVE_ALLOCATIONS
];
438 static int add_bop(struct sm_metadata
*smm
, enum block_op_type type
, dm_block_t b
)
442 if (smm
->nr_uncommitted
== MAX_RECURSIVE_ALLOCATIONS
) {
443 DMERR("too many recursive allocations");
447 op
= smm
->uncommitted
+ smm
->nr_uncommitted
++;
454 static int commit_bop(struct sm_metadata
*smm
, struct block_op
*op
)
460 r
= metadata_ll_inc(&smm
->ll
, op
->block
);
464 r
= metadata_ll_dec(&smm
->ll
, op
->block
);
471 static void in(struct sm_metadata
*smm
)
473 smm
->recursion_count
++;
476 static int out(struct sm_metadata
*smm
)
481 * If we're not recursing then very bad things are happening.
483 if (!smm
->recursion_count
) {
484 DMERR("lost track of recursion depth");
488 if (smm
->recursion_count
== 1 && smm
->nr_uncommitted
) {
489 while (smm
->nr_uncommitted
&& !r
) {
490 smm
->nr_uncommitted
--;
491 r
= commit_bop(smm
, smm
->uncommitted
+
492 smm
->nr_uncommitted
);
498 smm
->recursion_count
--;
504 * When using the out() function above, we often want to combine an error
505 * code for the operation run in the recursive context with that from
508 static int combine_errors(int r1
, int r2
)
513 static int recursing(struct sm_metadata
*smm
)
515 return smm
->recursion_count
;
518 static void sm_metadata_destroy(struct dm_space_map
*sm
)
520 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
525 static int sm_metadata_extend(struct dm_space_map
*sm
, dm_block_t extra_blocks
)
527 DMERR("doesn't support extend");
531 static int sm_metadata_get_nr_blocks(struct dm_space_map
*sm
, dm_block_t
*count
)
533 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
535 *count
= smm
->ll
.nr_blocks
;
540 static int sm_metadata_get_nr_free(struct dm_space_map
*sm
, dm_block_t
*count
)
542 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
544 *count
= smm
->old_ll
.nr_blocks
- smm
->old_ll
.nr_allocated
-
545 smm
->allocated_this_transaction
;
550 static int sm_metadata_get_count(struct dm_space_map
*sm
, dm_block_t b
,
554 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
555 unsigned adjustment
= 0;
558 * We may have some uncommitted adjustments to add. This list
559 * should always be really short.
561 for (i
= 0; i
< smm
->nr_uncommitted
; i
++) {
562 struct block_op
*op
= smm
->uncommitted
+ i
;
578 r
= metadata_ll_lookup(&smm
->ll
, b
, result
);
582 *result
+= adjustment
;
587 static int sm_metadata_count_is_more_than_one(struct dm_space_map
*sm
,
588 dm_block_t b
, int *result
)
590 int r
, i
, adjustment
= 0;
591 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
595 * We may have some uncommitted adjustments to add. This list
596 * should always be really short.
598 for (i
= 0; i
< smm
->nr_uncommitted
; i
++) {
599 struct block_op
*op
= smm
->uncommitted
+ i
;
615 if (adjustment
> 1) {
620 r
= metadata_ll_lookup_bitmap(&smm
->ll
, b
, &rc
);
626 * We err on the side of caution, and always return true.
630 *result
= rc
+ adjustment
> 1;
635 static int sm_metadata_set_count(struct dm_space_map
*sm
, dm_block_t b
,
639 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
641 if (smm
->recursion_count
) {
642 DMERR("cannot recurse set_count()");
647 r
= metadata_ll_insert(&smm
->ll
, b
, count
);
650 return combine_errors(r
, r2
);
653 static int sm_metadata_inc_block(struct dm_space_map
*sm
, dm_block_t b
)
656 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
659 r
= add_bop(smm
, BOP_INC
, b
);
662 r
= metadata_ll_inc(&smm
->ll
, b
);
666 return combine_errors(r
, r2
);
669 static int sm_metadata_dec_block(struct dm_space_map
*sm
, dm_block_t b
)
672 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
675 r
= add_bop(smm
, BOP_DEC
, b
);
678 r
= metadata_ll_dec(&smm
->ll
, b
);
682 return combine_errors(r
, r2
);
685 static int sm_metadata_new_block(struct dm_space_map
*sm
, dm_block_t
*b
)
688 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
690 r
= metadata_ll_find_free_block(&smm
->old_ll
, smm
->begin
, smm
->old_ll
.nr_blocks
, b
);
697 r
= add_bop(smm
, BOP_INC
, *b
);
700 r
= metadata_ll_inc(&smm
->ll
, *b
);
705 smm
->allocated_this_transaction
++;
707 return combine_errors(r
, r2
);
710 static int sm_metadata_commit(struct dm_space_map
*sm
)
713 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
715 memcpy(&smm
->old_ll
, &smm
->ll
, sizeof(smm
->old_ll
));
717 r
= metadata_ll_commit(&smm
->ll
);
721 memcpy(&smm
->old_ll
, &smm
->ll
, sizeof(smm
->old_ll
));
723 smm
->allocated_this_transaction
= 0;
728 static int sm_metadata_root_size(struct dm_space_map
*sm
, size_t *result
)
730 *result
= sizeof(struct disk_sm_root
);
735 static int sm_metadata_copy_root(struct dm_space_map
*sm
, void *where_le
, size_t max
)
737 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
738 struct disk_sm_root root_le
;
740 root_le
.nr_blocks
= cpu_to_le64(smm
->ll
.nr_blocks
);
741 root_le
.nr_allocated
= cpu_to_le64(smm
->ll
.nr_allocated
);
742 root_le
.bitmap_root
= cpu_to_le64(smm
->ll
.bitmap_root
);
743 root_le
.ref_count_root
= cpu_to_le64(smm
->ll
.ref_count_root
);
745 if (max
< sizeof(root_le
))
748 memcpy(where_le
, &root_le
, sizeof(root_le
));
753 static struct dm_space_map ops
= {
754 .destroy
= sm_metadata_destroy
,
755 .extend
= sm_metadata_extend
,
756 .get_nr_blocks
= sm_metadata_get_nr_blocks
,
757 .get_nr_free
= sm_metadata_get_nr_free
,
758 .get_count
= sm_metadata_get_count
,
759 .count_is_more_than_one
= sm_metadata_count_is_more_than_one
,
760 .set_count
= sm_metadata_set_count
,
761 .inc_block
= sm_metadata_inc_block
,
762 .dec_block
= sm_metadata_dec_block
,
763 .new_block
= sm_metadata_new_block
,
764 .commit
= sm_metadata_commit
,
765 .root_size
= sm_metadata_root_size
,
766 .copy_root
= sm_metadata_copy_root
769 /*----------------------------------------------------------------*/
772 * When a new space map is created that manages its own space. We use
773 * this tiny bootstrap allocator.
775 static void sm_bootstrap_destroy(struct dm_space_map
*sm
)
779 static int sm_bootstrap_extend(struct dm_space_map
*sm
, dm_block_t extra_blocks
)
781 DMERR("boostrap doesn't support extend");
786 static int sm_bootstrap_get_nr_blocks(struct dm_space_map
*sm
, dm_block_t
*count
)
788 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
790 return smm
->ll
.nr_blocks
;
793 static int sm_bootstrap_get_nr_free(struct dm_space_map
*sm
, dm_block_t
*count
)
795 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
797 *count
= smm
->ll
.nr_blocks
- smm
->begin
;
802 static int sm_bootstrap_get_count(struct dm_space_map
*sm
, dm_block_t b
,
805 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
807 return b
< smm
->begin
? 1 : 0;
810 static int sm_bootstrap_count_is_more_than_one(struct dm_space_map
*sm
,
811 dm_block_t b
, int *result
)
818 static int sm_bootstrap_set_count(struct dm_space_map
*sm
, dm_block_t b
,
821 DMERR("boostrap doesn't support set_count");
826 static int sm_bootstrap_new_block(struct dm_space_map
*sm
, dm_block_t
*b
)
828 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
831 * We know the entire device is unused.
833 if (smm
->begin
== smm
->ll
.nr_blocks
)
841 static int sm_bootstrap_inc_block(struct dm_space_map
*sm
, dm_block_t b
)
843 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
845 return add_bop(smm
, BOP_INC
, b
);
848 static int sm_bootstrap_dec_block(struct dm_space_map
*sm
, dm_block_t b
)
850 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
852 return add_bop(smm
, BOP_DEC
, b
);
855 static int sm_bootstrap_commit(struct dm_space_map
*sm
)
860 static int sm_bootstrap_root_size(struct dm_space_map
*sm
, size_t *result
)
862 DMERR("boostrap doesn't support root_size");
867 static int sm_bootstrap_copy_root(struct dm_space_map
*sm
, void *where
,
870 DMERR("boostrap doesn't support copy_root");
875 static struct dm_space_map bootstrap_ops
= {
876 .destroy
= sm_bootstrap_destroy
,
877 .extend
= sm_bootstrap_extend
,
878 .get_nr_blocks
= sm_bootstrap_get_nr_blocks
,
879 .get_nr_free
= sm_bootstrap_get_nr_free
,
880 .get_count
= sm_bootstrap_get_count
,
881 .count_is_more_than_one
= sm_bootstrap_count_is_more_than_one
,
882 .set_count
= sm_bootstrap_set_count
,
883 .inc_block
= sm_bootstrap_inc_block
,
884 .dec_block
= sm_bootstrap_dec_block
,
885 .new_block
= sm_bootstrap_new_block
,
886 .commit
= sm_bootstrap_commit
,
887 .root_size
= sm_bootstrap_root_size
,
888 .copy_root
= sm_bootstrap_copy_root
891 /*----------------------------------------------------------------*/
893 struct dm_space_map
*dm_sm_metadata_init(void)
895 struct sm_metadata
*smm
;
897 smm
= kmalloc(sizeof(*smm
), GFP_KERNEL
);
899 return ERR_PTR(-ENOMEM
);
901 memcpy(&smm
->sm
, &ops
, sizeof(smm
->sm
));
906 int dm_sm_metadata_create(struct dm_space_map
*sm
,
907 struct dm_transaction_manager
*tm
,
908 dm_block_t nr_blocks
,
909 dm_block_t superblock
)
913 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
915 smm
->begin
= superblock
+ 1;
916 smm
->recursion_count
= 0;
917 smm
->allocated_this_transaction
= 0;
918 smm
->nr_uncommitted
= 0;
920 memcpy(&smm
->sm
, &bootstrap_ops
, sizeof(smm
->sm
));
921 r
= metadata_ll_new(&smm
->ll
, tm
, nr_blocks
);
924 memcpy(&smm
->sm
, &ops
, sizeof(smm
->sm
));
927 * Now we need to update the newly created data structures with the
928 * allocated blocks that they were built from.
930 for (i
= superblock
; !r
&& i
< smm
->begin
; i
++)
931 r
= metadata_ll_inc(&smm
->ll
, i
);
936 return sm_metadata_commit(sm
);
939 int dm_sm_metadata_open(struct dm_space_map
*sm
,
940 struct dm_transaction_manager
*tm
,
941 void *root_le
, size_t len
)
944 struct sm_metadata
*smm
= container_of(sm
, struct sm_metadata
, sm
);
946 r
= metadata_ll_open(&smm
->ll
, tm
, root_le
, len
);
951 smm
->recursion_count
= 0;
952 smm
->allocated_this_transaction
= 0;
953 smm
->nr_uncommitted
= 0;
955 return sm_metadata_commit(sm
);