2 * Copyright (C) 2015 Google, Inc.
4 * Author: Sami Tolvanen <samitolvanen@google.com>
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the Free
8 * Software Foundation; either version 2 of the License, or (at your option)
12 #include "dm-verity-fec.h"
13 #include <linux/math64.h>
15 #define DM_MSG_PREFIX "verity-fec"
18 * If error correction has been configured, returns true.
20 bool verity_fec_is_enabled(struct dm_verity
*v
)
22 return v
->fec
&& v
->fec
->dev
;
26 * Return a pointer to dm_verity_fec_io after dm_verity_io and its variable
29 static inline struct dm_verity_fec_io
*fec_io(struct dm_verity_io
*io
)
31 return (struct dm_verity_fec_io
*) verity_io_digest_end(io
->v
, io
);
35 * Return an interleaved offset for a byte in RS block.
37 static inline u64
fec_interleave(struct dm_verity
*v
, u64 offset
)
41 mod
= do_div(offset
, v
->fec
->rsn
);
42 return offset
+ mod
* (v
->fec
->rounds
<< v
->data_dev_block_bits
);
46 * Decode an RS block using Reed-Solomon.
48 static int fec_decode_rs8(struct dm_verity
*v
, struct dm_verity_fec_io
*fio
,
49 u8
*data
, u8
*fec
, int neras
)
52 uint16_t par
[DM_VERITY_FEC_RSM
- DM_VERITY_FEC_MIN_RSN
];
54 for (i
= 0; i
< v
->fec
->roots
; i
++)
57 return decode_rs8(fio
->rs
, data
, par
, v
->fec
->rsn
, NULL
, neras
,
58 fio
->erasures
, 0, NULL
);
62 * Read error-correcting codes for the requested RS block. Returns a pointer
63 * to the data block. Caller is responsible for releasing buf.
65 static u8
*fec_read_parity(struct dm_verity
*v
, u64 rsb
, int index
,
66 unsigned *offset
, struct dm_buffer
**buf
)
71 position
= (index
+ rsb
) * v
->fec
->roots
;
72 block
= position
>> v
->data_dev_block_bits
;
73 *offset
= (unsigned)(position
- (block
<< v
->data_dev_block_bits
));
75 res
= dm_bufio_read(v
->fec
->bufio
, v
->fec
->start
+ block
, buf
);
76 if (unlikely(IS_ERR(res
))) {
77 DMERR("%s: FEC %llu: parity read failed (block %llu): %ld",
78 v
->data_dev
->name
, (unsigned long long)rsb
,
79 (unsigned long long)(v
->fec
->start
+ block
),
87 /* Loop over each preallocated buffer slot. */
88 #define fec_for_each_prealloc_buffer(__i) \
89 for (__i = 0; __i < DM_VERITY_FEC_BUF_PREALLOC; __i++)
91 /* Loop over each extra buffer slot. */
92 #define fec_for_each_extra_buffer(io, __i) \
93 for (__i = DM_VERITY_FEC_BUF_PREALLOC; __i < DM_VERITY_FEC_BUF_MAX; __i++)
95 /* Loop over each allocated buffer. */
96 #define fec_for_each_buffer(io, __i) \
97 for (__i = 0; __i < (io)->nbufs; __i++)
99 /* Loop over each RS block in each allocated buffer. */
100 #define fec_for_each_buffer_rs_block(io, __i, __j) \
101 fec_for_each_buffer(io, __i) \
102 for (__j = 0; __j < 1 << DM_VERITY_FEC_BUF_RS_BITS; __j++)
105 * Return a pointer to the current RS block when called inside
106 * fec_for_each_buffer_rs_block.
108 static inline u8
*fec_buffer_rs_block(struct dm_verity
*v
,
109 struct dm_verity_fec_io
*fio
,
110 unsigned i
, unsigned j
)
112 return &fio
->bufs
[i
][j
* v
->fec
->rsn
];
116 * Return an index to the current RS block when called inside
117 * fec_for_each_buffer_rs_block.
119 static inline unsigned fec_buffer_rs_index(unsigned i
, unsigned j
)
121 return (i
<< DM_VERITY_FEC_BUF_RS_BITS
) + j
;
125 * Decode all RS blocks from buffers and copy corrected bytes into fio->output
126 * starting from block_offset.
128 static int fec_decode_bufs(struct dm_verity
*v
, struct dm_verity_fec_io
*fio
,
129 u64 rsb
, int byte_index
, unsigned block_offset
,
132 int r
, corrected
= 0, res
;
133 struct dm_buffer
*buf
;
134 unsigned n
, i
, offset
;
137 par
= fec_read_parity(v
, rsb
, block_offset
, &offset
, &buf
);
142 * Decode the RS blocks we have in bufs. Each RS block results in
143 * one corrected target byte and consumes fec->roots parity bytes.
145 fec_for_each_buffer_rs_block(fio
, n
, i
) {
146 block
= fec_buffer_rs_block(v
, fio
, n
, i
);
147 res
= fec_decode_rs8(v
, fio
, block
, &par
[offset
], neras
);
149 dm_bufio_release(buf
);
156 fio
->output
[block_offset
] = block
[byte_index
];
159 if (block_offset
>= 1 << v
->data_dev_block_bits
)
162 /* read the next block when we run out of parity bytes */
163 offset
+= v
->fec
->roots
;
164 if (offset
>= 1 << v
->data_dev_block_bits
) {
165 dm_bufio_release(buf
);
167 par
= fec_read_parity(v
, rsb
, block_offset
, &offset
, &buf
);
168 if (unlikely(IS_ERR(par
)))
176 DMERR_LIMIT("%s: FEC %llu: failed to correct: %d",
177 v
->data_dev
->name
, (unsigned long long)rsb
, r
);
179 DMWARN_LIMIT("%s: FEC %llu: corrected %d errors",
180 v
->data_dev
->name
, (unsigned long long)rsb
, r
);
186 * Locate data block erasures using verity hashes.
188 static int fec_is_erasure(struct dm_verity
*v
, struct dm_verity_io
*io
,
189 u8
*want_digest
, u8
*data
)
191 if (unlikely(verity_hash(v
, verity_io_hash_desc(v
, io
),
192 data
, 1 << v
->data_dev_block_bits
,
193 verity_io_real_digest(v
, io
))))
196 return memcmp(verity_io_real_digest(v
, io
), want_digest
,
197 v
->digest_size
) != 0;
201 * Read data blocks that are part of the RS block and deinterleave as much as
202 * fits into buffers. Check for erasure locations if @neras is non-NULL.
204 static int fec_read_bufs(struct dm_verity
*v
, struct dm_verity_io
*io
,
205 u64 rsb
, u64 target
, unsigned block_offset
,
209 int i
, j
, target_index
= -1;
210 struct dm_buffer
*buf
;
211 struct dm_bufio_client
*bufio
;
212 struct dm_verity_fec_io
*fio
= fec_io(io
);
215 u8 want_digest
[v
->digest_size
];
222 * read each of the rsn data blocks that are part of the RS block, and
223 * interleave contents to available bufs
225 for (i
= 0; i
< v
->fec
->rsn
; i
++) {
226 ileaved
= fec_interleave(v
, rsb
* v
->fec
->rsn
+ i
);
229 * target is the data block we want to correct, target_index is
230 * the index of this block within the rsn RS blocks
232 if (ileaved
== target
)
235 block
= ileaved
>> v
->data_dev_block_bits
;
236 bufio
= v
->fec
->data_bufio
;
238 if (block
>= v
->data_blocks
) {
239 block
-= v
->data_blocks
;
242 * blocks outside the area were assumed to contain
243 * zeros when encoding data was generated
245 if (unlikely(block
>= v
->fec
->hash_blocks
))
248 block
+= v
->hash_start
;
252 bbuf
= dm_bufio_read(bufio
, block
, &buf
);
253 if (unlikely(IS_ERR(bbuf
))) {
254 DMWARN_LIMIT("%s: FEC %llu: read failed (%llu): %ld",
256 (unsigned long long)rsb
,
257 (unsigned long long)block
, PTR_ERR(bbuf
));
259 /* assume the block is corrupted */
260 if (neras
&& *neras
<= v
->fec
->roots
)
261 fio
->erasures
[(*neras
)++] = i
;
266 /* locate erasures if the block is on the data device */
267 if (bufio
== v
->fec
->data_bufio
&&
268 verity_hash_for_block(v
, io
, block
, want_digest
,
270 /* skip known zero blocks entirely */
275 * skip if we have already found the theoretical
276 * maximum number (i.e. fec->roots) of erasures
278 if (neras
&& *neras
<= v
->fec
->roots
&&
279 fec_is_erasure(v
, io
, want_digest
, bbuf
))
280 fio
->erasures
[(*neras
)++] = i
;
284 * deinterleave and copy the bytes that fit into bufs,
285 * starting from block_offset
287 fec_for_each_buffer_rs_block(fio
, n
, j
) {
288 k
= fec_buffer_rs_index(n
, j
) + block_offset
;
290 if (k
>= 1 << v
->data_dev_block_bits
)
293 rs_block
= fec_buffer_rs_block(v
, fio
, n
, j
);
294 rs_block
[i
] = bbuf
[k
];
297 dm_bufio_release(buf
);
304 * Allocate RS control structure and FEC buffers from preallocated mempools,
305 * and attempt to allocate as many extra buffers as available.
307 static int fec_alloc_bufs(struct dm_verity
*v
, struct dm_verity_fec_io
*fio
)
312 fio
->rs
= mempool_alloc(v
->fec
->rs_pool
, 0);
313 if (unlikely(!fio
->rs
)) {
314 DMERR("failed to allocate RS");
319 fec_for_each_prealloc_buffer(n
) {
323 fio
->bufs
[n
] = mempool_alloc(v
->fec
->prealloc_pool
, GFP_NOIO
);
324 if (unlikely(!fio
->bufs
[n
])) {
325 DMERR("failed to allocate FEC buffer");
330 /* try to allocate the maximum number of buffers */
331 fec_for_each_extra_buffer(fio
, n
) {
335 fio
->bufs
[n
] = mempool_alloc(v
->fec
->extra_pool
, GFP_NOIO
);
336 /* we can manage with even one buffer if necessary */
337 if (unlikely(!fio
->bufs
[n
]))
343 fio
->output
= mempool_alloc(v
->fec
->output_pool
, GFP_NOIO
);
346 DMERR("failed to allocate FEC page");
355 * Initialize buffers and clear erasures. fec_read_bufs() assumes buffers are
356 * zeroed before deinterleaving.
358 static void fec_init_bufs(struct dm_verity
*v
, struct dm_verity_fec_io
*fio
)
362 fec_for_each_buffer(fio
, n
)
363 memset(fio
->bufs
[n
], 0, v
->fec
->rsn
<< DM_VERITY_FEC_BUF_RS_BITS
);
365 memset(fio
->erasures
, 0, sizeof(fio
->erasures
));
369 * Decode all RS blocks in a single data block and return the target block
370 * (indicated by @offset) in fio->output. If @use_erasures is non-zero, uses
371 * hashes to locate erasures.
373 static int fec_decode_rsb(struct dm_verity
*v
, struct dm_verity_io
*io
,
374 struct dm_verity_fec_io
*fio
, u64 rsb
, u64 offset
,
380 r
= fec_alloc_bufs(v
, fio
);
384 for (pos
= 0; pos
< 1 << v
->data_dev_block_bits
; ) {
385 fec_init_bufs(v
, fio
);
387 r
= fec_read_bufs(v
, io
, rsb
, offset
, pos
,
388 use_erasures
? &neras
: NULL
);
392 r
= fec_decode_bufs(v
, fio
, rsb
, r
, pos
, neras
);
396 pos
+= fio
->nbufs
<< DM_VERITY_FEC_BUF_RS_BITS
;
399 /* Always re-validate the corrected block against the expected hash */
400 r
= verity_hash(v
, verity_io_hash_desc(v
, io
), fio
->output
,
401 1 << v
->data_dev_block_bits
,
402 verity_io_real_digest(v
, io
));
406 if (memcmp(verity_io_real_digest(v
, io
), verity_io_want_digest(v
, io
),
408 DMERR_LIMIT("%s: FEC %llu: failed to correct (%d erasures)",
409 v
->data_dev
->name
, (unsigned long long)rsb
, neras
);
416 static int fec_bv_copy(struct dm_verity
*v
, struct dm_verity_io
*io
, u8
*data
,
419 struct dm_verity_fec_io
*fio
= fec_io(io
);
421 memcpy(data
, &fio
->output
[fio
->output_pos
], len
);
422 fio
->output_pos
+= len
;
428 * Correct errors in a block. Copies corrected block to dest if non-NULL,
429 * otherwise to a bio_vec starting from iter.
431 int verity_fec_decode(struct dm_verity
*v
, struct dm_verity_io
*io
,
432 enum verity_block_type type
, sector_t block
, u8
*dest
,
433 struct bvec_iter
*iter
)
436 struct dm_verity_fec_io
*fio
= fec_io(io
);
437 u64 offset
, res
, rsb
;
439 if (!verity_fec_is_enabled(v
))
442 if (type
== DM_VERITY_BLOCK_TYPE_METADATA
)
443 block
+= v
->data_blocks
;
446 * For RS(M, N), the continuous FEC data is divided into blocks of N
447 * bytes. Since block size may not be divisible by N, the last block
448 * is zero padded when decoding.
450 * Each byte of the block is covered by a different RS(M, N) code,
451 * and each code is interleaved over N blocks to make it less likely
452 * that bursty corruption will leave us in unrecoverable state.
455 offset
= block
<< v
->data_dev_block_bits
;
458 div64_u64(res
, v
->fec
->rounds
<< v
->data_dev_block_bits
);
461 * The base RS block we can feed to the interleaver to find out all
462 * blocks required for decoding.
464 rsb
= offset
- res
* (v
->fec
->rounds
<< v
->data_dev_block_bits
);
467 * Locating erasures is slow, so attempt to recover the block without
468 * them first. Do a second attempt with erasures if the corruption is
471 r
= fec_decode_rsb(v
, io
, fio
, rsb
, offset
, false);
473 r
= fec_decode_rsb(v
, io
, fio
, rsb
, offset
, true);
479 memcpy(dest
, fio
->output
, 1 << v
->data_dev_block_bits
);
482 r
= verity_for_bv_block(v
, io
, iter
, fec_bv_copy
);
489 * Clean up per-bio data.
491 void verity_fec_finish_io(struct dm_verity_io
*io
)
494 struct dm_verity_fec
*f
= io
->v
->fec
;
495 struct dm_verity_fec_io
*fio
= fec_io(io
);
497 if (!verity_fec_is_enabled(io
->v
))
500 mempool_free(fio
->rs
, f
->rs_pool
);
502 fec_for_each_prealloc_buffer(n
)
503 mempool_free(fio
->bufs
[n
], f
->prealloc_pool
);
505 fec_for_each_extra_buffer(fio
, n
)
506 mempool_free(fio
->bufs
[n
], f
->extra_pool
);
508 mempool_free(fio
->output
, f
->output_pool
);
512 * Initialize per-bio data.
514 void verity_fec_init_io(struct dm_verity_io
*io
)
516 struct dm_verity_fec_io
*fio
= fec_io(io
);
518 if (!verity_fec_is_enabled(io
->v
))
522 memset(fio
->bufs
, 0, sizeof(fio
->bufs
));
528 * Append feature arguments and values to the status table.
530 unsigned verity_fec_status_table(struct dm_verity
*v
, unsigned sz
,
531 char *result
, unsigned maxlen
)
533 if (!verity_fec_is_enabled(v
))
536 DMEMIT(" " DM_VERITY_OPT_FEC_DEV
" %s "
537 DM_VERITY_OPT_FEC_BLOCKS
" %llu "
538 DM_VERITY_OPT_FEC_START
" %llu "
539 DM_VERITY_OPT_FEC_ROOTS
" %d",
541 (unsigned long long)v
->fec
->blocks
,
542 (unsigned long long)v
->fec
->start
,
548 void verity_fec_dtr(struct dm_verity
*v
)
550 struct dm_verity_fec
*f
= v
->fec
;
552 if (!verity_fec_is_enabled(v
))
555 mempool_destroy(f
->rs_pool
);
556 mempool_destroy(f
->prealloc_pool
);
557 mempool_destroy(f
->extra_pool
);
558 kmem_cache_destroy(f
->cache
);
561 dm_bufio_client_destroy(f
->data_bufio
);
563 dm_bufio_client_destroy(f
->bufio
);
566 dm_put_device(v
->ti
, f
->dev
);
572 static void *fec_rs_alloc(gfp_t gfp_mask
, void *pool_data
)
574 struct dm_verity
*v
= (struct dm_verity
*)pool_data
;
576 return init_rs(8, 0x11d, 0, 1, v
->fec
->roots
);
579 static void fec_rs_free(void *element
, void *pool_data
)
581 struct rs_control
*rs
= (struct rs_control
*)element
;
587 bool verity_is_fec_opt_arg(const char *arg_name
)
589 return (!strcasecmp(arg_name
, DM_VERITY_OPT_FEC_DEV
) ||
590 !strcasecmp(arg_name
, DM_VERITY_OPT_FEC_BLOCKS
) ||
591 !strcasecmp(arg_name
, DM_VERITY_OPT_FEC_START
) ||
592 !strcasecmp(arg_name
, DM_VERITY_OPT_FEC_ROOTS
));
595 int verity_fec_parse_opt_args(struct dm_arg_set
*as
, struct dm_verity
*v
,
596 unsigned *argc
, const char *arg_name
)
599 struct dm_target
*ti
= v
->ti
;
600 const char *arg_value
;
601 unsigned long long num_ll
;
606 ti
->error
= "FEC feature arguments require a value";
610 arg_value
= dm_shift_arg(as
);
613 if (!strcasecmp(arg_name
, DM_VERITY_OPT_FEC_DEV
)) {
614 r
= dm_get_device(ti
, arg_value
, FMODE_READ
, &v
->fec
->dev
);
616 ti
->error
= "FEC device lookup failed";
620 } else if (!strcasecmp(arg_name
, DM_VERITY_OPT_FEC_BLOCKS
)) {
621 if (sscanf(arg_value
, "%llu%c", &num_ll
, &dummy
) != 1 ||
622 ((sector_t
)(num_ll
<< (v
->data_dev_block_bits
- SECTOR_SHIFT
))
623 >> (v
->data_dev_block_bits
- SECTOR_SHIFT
) != num_ll
)) {
624 ti
->error
= "Invalid " DM_VERITY_OPT_FEC_BLOCKS
;
627 v
->fec
->blocks
= num_ll
;
629 } else if (!strcasecmp(arg_name
, DM_VERITY_OPT_FEC_START
)) {
630 if (sscanf(arg_value
, "%llu%c", &num_ll
, &dummy
) != 1 ||
631 ((sector_t
)(num_ll
<< (v
->data_dev_block_bits
- SECTOR_SHIFT
)) >>
632 (v
->data_dev_block_bits
- SECTOR_SHIFT
) != num_ll
)) {
633 ti
->error
= "Invalid " DM_VERITY_OPT_FEC_START
;
636 v
->fec
->start
= num_ll
;
638 } else if (!strcasecmp(arg_name
, DM_VERITY_OPT_FEC_ROOTS
)) {
639 if (sscanf(arg_value
, "%hhu%c", &num_c
, &dummy
) != 1 || !num_c
||
640 num_c
< (DM_VERITY_FEC_RSM
- DM_VERITY_FEC_MAX_RSN
) ||
641 num_c
> (DM_VERITY_FEC_RSM
- DM_VERITY_FEC_MIN_RSN
)) {
642 ti
->error
= "Invalid " DM_VERITY_OPT_FEC_ROOTS
;
645 v
->fec
->roots
= num_c
;
648 ti
->error
= "Unrecognized verity FEC feature request";
656 * Allocate dm_verity_fec for v->fec. Must be called before verity_fec_ctr.
658 int verity_fec_ctr_alloc(struct dm_verity
*v
)
660 struct dm_verity_fec
*f
;
662 f
= kzalloc(sizeof(struct dm_verity_fec
), GFP_KERNEL
);
664 v
->ti
->error
= "Cannot allocate FEC structure";
673 * Validate arguments and preallocate memory. Must be called after arguments
674 * have been parsed using verity_fec_parse_opt_args.
676 int verity_fec_ctr(struct dm_verity
*v
)
678 struct dm_verity_fec
*f
= v
->fec
;
679 struct dm_target
*ti
= v
->ti
;
682 if (!verity_fec_is_enabled(v
)) {
688 * FEC is computed over data blocks, possible metadata, and
689 * hash blocks. In other words, FEC covers total of fec_blocks
690 * blocks consisting of the following:
692 * data blocks | hash blocks | metadata (optional)
694 * We allow metadata after hash blocks to support a use case
695 * where all data is stored on the same device and FEC covers
698 * If metadata is included, we require it to be available on the
699 * hash device after the hash blocks.
702 hash_blocks
= v
->hash_blocks
- v
->hash_start
;
705 * Require matching block sizes for data and hash devices for
708 if (v
->data_dev_block_bits
!= v
->hash_dev_block_bits
) {
709 ti
->error
= "Block sizes must match to use FEC";
714 ti
->error
= "Missing " DM_VERITY_OPT_FEC_ROOTS
;
717 f
->rsn
= DM_VERITY_FEC_RSM
- f
->roots
;
720 ti
->error
= "Missing " DM_VERITY_OPT_FEC_BLOCKS
;
724 f
->rounds
= f
->blocks
;
725 if (sector_div(f
->rounds
, f
->rsn
))
729 * Due to optional metadata, f->blocks can be larger than
730 * data_blocks and hash_blocks combined.
732 if (f
->blocks
< v
->data_blocks
+ hash_blocks
|| !f
->rounds
) {
733 ti
->error
= "Invalid " DM_VERITY_OPT_FEC_BLOCKS
;
738 * Metadata is accessed through the hash device, so we require
739 * it to be large enough.
741 f
->hash_blocks
= f
->blocks
- v
->data_blocks
;
742 if (dm_bufio_get_device_size(v
->bufio
) < f
->hash_blocks
) {
743 ti
->error
= "Hash device is too small for "
744 DM_VERITY_OPT_FEC_BLOCKS
;
748 f
->bufio
= dm_bufio_client_create(f
->dev
->bdev
,
749 1 << v
->data_dev_block_bits
,
751 if (IS_ERR(f
->bufio
)) {
752 ti
->error
= "Cannot initialize FEC bufio client";
753 return PTR_ERR(f
->bufio
);
756 if (dm_bufio_get_device_size(f
->bufio
) <
757 ((f
->start
+ f
->rounds
* f
->roots
) >> v
->data_dev_block_bits
)) {
758 ti
->error
= "FEC device is too small";
762 f
->data_bufio
= dm_bufio_client_create(v
->data_dev
->bdev
,
763 1 << v
->data_dev_block_bits
,
765 if (IS_ERR(f
->data_bufio
)) {
766 ti
->error
= "Cannot initialize FEC data bufio client";
767 return PTR_ERR(f
->data_bufio
);
770 if (dm_bufio_get_device_size(f
->data_bufio
) < v
->data_blocks
) {
771 ti
->error
= "Data device is too small";
775 /* Preallocate an rs_control structure for each worker thread */
776 f
->rs_pool
= mempool_create(num_online_cpus(), fec_rs_alloc
,
777 fec_rs_free
, (void *) v
);
779 ti
->error
= "Cannot allocate RS pool";
783 f
->cache
= kmem_cache_create("dm_verity_fec_buffers",
784 f
->rsn
<< DM_VERITY_FEC_BUF_RS_BITS
,
787 ti
->error
= "Cannot create FEC buffer cache";
791 /* Preallocate DM_VERITY_FEC_BUF_PREALLOC buffers for each thread */
792 f
->prealloc_pool
= mempool_create_slab_pool(num_online_cpus() *
793 DM_VERITY_FEC_BUF_PREALLOC
,
795 if (!f
->prealloc_pool
) {
796 ti
->error
= "Cannot allocate FEC buffer prealloc pool";
800 f
->extra_pool
= mempool_create_slab_pool(0, f
->cache
);
801 if (!f
->extra_pool
) {
802 ti
->error
= "Cannot allocate FEC buffer extra pool";
806 /* Preallocate an output buffer for each thread */
807 f
->output_pool
= mempool_create_kmalloc_pool(num_online_cpus(),
808 1 << v
->data_dev_block_bits
);
809 if (!f
->output_pool
) {
810 ti
->error
= "Cannot allocate FEC output pool";
814 /* Reserve space for our per-bio data */
815 ti
->per_bio_data_size
+= sizeof(struct dm_verity_fec_io
);