2 * Copyright (C) 2015 Google, Inc.
4 * Author: Sami Tolvanen <samitolvanen@google.com>
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the Free
8 * Software Foundation; either version 2 of the License, or (at your option)
12 #include "dm-verity-fec.h"
13 #include <linux/math64.h>
15 #define DM_MSG_PREFIX "verity-fec"
18 * If error correction has been configured, returns true.
20 bool verity_fec_is_enabled(struct dm_verity
*v
)
22 return v
->fec
&& v
->fec
->dev
;
26 * Return a pointer to dm_verity_fec_io after dm_verity_io and its variable
29 static inline struct dm_verity_fec_io
*fec_io(struct dm_verity_io
*io
)
31 return (struct dm_verity_fec_io
*) verity_io_digest_end(io
->v
, io
);
35 * Return an interleaved offset for a byte in RS block.
37 static inline u64
fec_interleave(struct dm_verity
*v
, u64 offset
)
41 mod
= do_div(offset
, v
->fec
->rsn
);
42 return offset
+ mod
* (v
->fec
->rounds
<< v
->data_dev_block_bits
);
46 * Decode an RS block using Reed-Solomon.
48 static int fec_decode_rs8(struct dm_verity
*v
, struct dm_verity_fec_io
*fio
,
49 u8
*data
, u8
*fec
, int neras
)
52 uint16_t par
[DM_VERITY_FEC_RSM
- DM_VERITY_FEC_MIN_RSN
];
54 for (i
= 0; i
< v
->fec
->roots
; i
++)
57 return decode_rs8(fio
->rs
, data
, par
, v
->fec
->rsn
, NULL
, neras
,
58 fio
->erasures
, 0, NULL
);
62 * Read error-correcting codes for the requested RS block. Returns a pointer
63 * to the data block. Caller is responsible for releasing buf.
65 static u8
*fec_read_parity(struct dm_verity
*v
, u64 rsb
, int index
,
66 unsigned *offset
, struct dm_buffer
**buf
)
71 position
= (index
+ rsb
) * v
->fec
->roots
;
72 block
= position
>> v
->data_dev_block_bits
;
73 *offset
= (unsigned)(position
- (block
<< v
->data_dev_block_bits
));
75 res
= dm_bufio_read(v
->fec
->bufio
, v
->fec
->start
+ block
, buf
);
76 if (unlikely(IS_ERR(res
))) {
77 DMERR("%s: FEC %llu: parity read failed (block %llu): %ld",
78 v
->data_dev
->name
, (unsigned long long)rsb
,
79 (unsigned long long)(v
->fec
->start
+ block
),
87 /* Loop over each preallocated buffer slot. */
88 #define fec_for_each_prealloc_buffer(__i) \
89 for (__i = 0; __i < DM_VERITY_FEC_BUF_PREALLOC; __i++)
91 /* Loop over each extra buffer slot. */
92 #define fec_for_each_extra_buffer(io, __i) \
93 for (__i = DM_VERITY_FEC_BUF_PREALLOC; __i < DM_VERITY_FEC_BUF_MAX; __i++)
95 /* Loop over each allocated buffer. */
96 #define fec_for_each_buffer(io, __i) \
97 for (__i = 0; __i < (io)->nbufs; __i++)
99 /* Loop over each RS block in each allocated buffer. */
100 #define fec_for_each_buffer_rs_block(io, __i, __j) \
101 fec_for_each_buffer(io, __i) \
102 for (__j = 0; __j < 1 << DM_VERITY_FEC_BUF_RS_BITS; __j++)
105 * Return a pointer to the current RS block when called inside
106 * fec_for_each_buffer_rs_block.
108 static inline u8
*fec_buffer_rs_block(struct dm_verity
*v
,
109 struct dm_verity_fec_io
*fio
,
110 unsigned i
, unsigned j
)
112 return &fio
->bufs
[i
][j
* v
->fec
->rsn
];
116 * Return an index to the current RS block when called inside
117 * fec_for_each_buffer_rs_block.
119 static inline unsigned fec_buffer_rs_index(unsigned i
, unsigned j
)
121 return (i
<< DM_VERITY_FEC_BUF_RS_BITS
) + j
;
125 * Decode all RS blocks from buffers and copy corrected bytes into fio->output
126 * starting from block_offset.
128 static int fec_decode_bufs(struct dm_verity
*v
, struct dm_verity_fec_io
*fio
,
129 u64 rsb
, int byte_index
, unsigned block_offset
,
132 int r
, corrected
= 0, res
;
133 struct dm_buffer
*buf
;
134 unsigned n
, i
, offset
;
137 par
= fec_read_parity(v
, rsb
, block_offset
, &offset
, &buf
);
142 * Decode the RS blocks we have in bufs. Each RS block results in
143 * one corrected target byte and consumes fec->roots parity bytes.
145 fec_for_each_buffer_rs_block(fio
, n
, i
) {
146 block
= fec_buffer_rs_block(v
, fio
, n
, i
);
147 res
= fec_decode_rs8(v
, fio
, block
, &par
[offset
], neras
);
154 fio
->output
[block_offset
] = block
[byte_index
];
157 if (block_offset
>= 1 << v
->data_dev_block_bits
)
160 /* read the next block when we run out of parity bytes */
161 offset
+= v
->fec
->roots
;
162 if (offset
>= 1 << v
->data_dev_block_bits
) {
163 dm_bufio_release(buf
);
165 par
= fec_read_parity(v
, rsb
, block_offset
, &offset
, &buf
);
166 if (unlikely(IS_ERR(par
)))
173 dm_bufio_release(buf
);
176 DMERR_LIMIT("%s: FEC %llu: failed to correct: %d",
177 v
->data_dev
->name
, (unsigned long long)rsb
, r
);
179 DMWARN_LIMIT("%s: FEC %llu: corrected %d errors",
180 v
->data_dev
->name
, (unsigned long long)rsb
, r
);
186 * Locate data block erasures using verity hashes.
188 static int fec_is_erasure(struct dm_verity
*v
, struct dm_verity_io
*io
,
189 u8
*want_digest
, u8
*data
)
191 if (unlikely(verity_hash(v
, verity_io_hash_req(v
, io
),
192 data
, 1 << v
->data_dev_block_bits
,
193 verity_io_real_digest(v
, io
))))
196 return memcmp(verity_io_real_digest(v
, io
), want_digest
,
197 v
->digest_size
) != 0;
201 * Read data blocks that are part of the RS block and deinterleave as much as
202 * fits into buffers. Check for erasure locations if @neras is non-NULL.
204 static int fec_read_bufs(struct dm_verity
*v
, struct dm_verity_io
*io
,
205 u64 rsb
, u64 target
, unsigned block_offset
,
209 int i
, j
, target_index
= -1;
210 struct dm_buffer
*buf
;
211 struct dm_bufio_client
*bufio
;
212 struct dm_verity_fec_io
*fio
= fec_io(io
);
215 u8 want_digest
[v
->digest_size
];
222 * read each of the rsn data blocks that are part of the RS block, and
223 * interleave contents to available bufs
225 for (i
= 0; i
< v
->fec
->rsn
; i
++) {
226 ileaved
= fec_interleave(v
, rsb
* v
->fec
->rsn
+ i
);
229 * target is the data block we want to correct, target_index is
230 * the index of this block within the rsn RS blocks
232 if (ileaved
== target
)
235 block
= ileaved
>> v
->data_dev_block_bits
;
236 bufio
= v
->fec
->data_bufio
;
238 if (block
>= v
->data_blocks
) {
239 block
-= v
->data_blocks
;
242 * blocks outside the area were assumed to contain
243 * zeros when encoding data was generated
245 if (unlikely(block
>= v
->fec
->hash_blocks
))
248 block
+= v
->hash_start
;
252 bbuf
= dm_bufio_read(bufio
, block
, &buf
);
253 if (unlikely(IS_ERR(bbuf
))) {
254 DMWARN_LIMIT("%s: FEC %llu: read failed (%llu): %ld",
256 (unsigned long long)rsb
,
257 (unsigned long long)block
, PTR_ERR(bbuf
));
259 /* assume the block is corrupted */
260 if (neras
&& *neras
<= v
->fec
->roots
)
261 fio
->erasures
[(*neras
)++] = i
;
266 /* locate erasures if the block is on the data device */
267 if (bufio
== v
->fec
->data_bufio
&&
268 verity_hash_for_block(v
, io
, block
, want_digest
,
270 /* skip known zero blocks entirely */
275 * skip if we have already found the theoretical
276 * maximum number (i.e. fec->roots) of erasures
278 if (neras
&& *neras
<= v
->fec
->roots
&&
279 fec_is_erasure(v
, io
, want_digest
, bbuf
))
280 fio
->erasures
[(*neras
)++] = i
;
284 * deinterleave and copy the bytes that fit into bufs,
285 * starting from block_offset
287 fec_for_each_buffer_rs_block(fio
, n
, j
) {
288 k
= fec_buffer_rs_index(n
, j
) + block_offset
;
290 if (k
>= 1 << v
->data_dev_block_bits
)
293 rs_block
= fec_buffer_rs_block(v
, fio
, n
, j
);
294 rs_block
[i
] = bbuf
[k
];
297 dm_bufio_release(buf
);
304 * Allocate RS control structure and FEC buffers from preallocated mempools,
305 * and attempt to allocate as many extra buffers as available.
307 static int fec_alloc_bufs(struct dm_verity
*v
, struct dm_verity_fec_io
*fio
)
312 fio
->rs
= mempool_alloc(v
->fec
->rs_pool
, GFP_NOIO
);
314 fec_for_each_prealloc_buffer(n
) {
318 fio
->bufs
[n
] = mempool_alloc(v
->fec
->prealloc_pool
, GFP_NOWAIT
);
319 if (unlikely(!fio
->bufs
[n
])) {
320 DMERR("failed to allocate FEC buffer");
325 /* try to allocate the maximum number of buffers */
326 fec_for_each_extra_buffer(fio
, n
) {
330 fio
->bufs
[n
] = mempool_alloc(v
->fec
->extra_pool
, GFP_NOWAIT
);
331 /* we can manage with even one buffer if necessary */
332 if (unlikely(!fio
->bufs
[n
]))
338 fio
->output
= mempool_alloc(v
->fec
->output_pool
, GFP_NOIO
);
344 * Initialize buffers and clear erasures. fec_read_bufs() assumes buffers are
345 * zeroed before deinterleaving.
347 static void fec_init_bufs(struct dm_verity
*v
, struct dm_verity_fec_io
*fio
)
351 fec_for_each_buffer(fio
, n
)
352 memset(fio
->bufs
[n
], 0, v
->fec
->rsn
<< DM_VERITY_FEC_BUF_RS_BITS
);
354 memset(fio
->erasures
, 0, sizeof(fio
->erasures
));
358 * Decode all RS blocks in a single data block and return the target block
359 * (indicated by @offset) in fio->output. If @use_erasures is non-zero, uses
360 * hashes to locate erasures.
362 static int fec_decode_rsb(struct dm_verity
*v
, struct dm_verity_io
*io
,
363 struct dm_verity_fec_io
*fio
, u64 rsb
, u64 offset
,
369 r
= fec_alloc_bufs(v
, fio
);
373 for (pos
= 0; pos
< 1 << v
->data_dev_block_bits
; ) {
374 fec_init_bufs(v
, fio
);
376 r
= fec_read_bufs(v
, io
, rsb
, offset
, pos
,
377 use_erasures
? &neras
: NULL
);
381 r
= fec_decode_bufs(v
, fio
, rsb
, r
, pos
, neras
);
385 pos
+= fio
->nbufs
<< DM_VERITY_FEC_BUF_RS_BITS
;
388 /* Always re-validate the corrected block against the expected hash */
389 r
= verity_hash(v
, verity_io_hash_req(v
, io
), fio
->output
,
390 1 << v
->data_dev_block_bits
,
391 verity_io_real_digest(v
, io
));
395 if (memcmp(verity_io_real_digest(v
, io
), verity_io_want_digest(v
, io
),
397 DMERR_LIMIT("%s: FEC %llu: failed to correct (%d erasures)",
398 v
->data_dev
->name
, (unsigned long long)rsb
, neras
);
405 static int fec_bv_copy(struct dm_verity
*v
, struct dm_verity_io
*io
, u8
*data
,
408 struct dm_verity_fec_io
*fio
= fec_io(io
);
410 memcpy(data
, &fio
->output
[fio
->output_pos
], len
);
411 fio
->output_pos
+= len
;
417 * Correct errors in a block. Copies corrected block to dest if non-NULL,
418 * otherwise to a bio_vec starting from iter.
420 int verity_fec_decode(struct dm_verity
*v
, struct dm_verity_io
*io
,
421 enum verity_block_type type
, sector_t block
, u8
*dest
,
422 struct bvec_iter
*iter
)
425 struct dm_verity_fec_io
*fio
= fec_io(io
);
426 u64 offset
, res
, rsb
;
428 if (!verity_fec_is_enabled(v
))
431 if (fio
->level
>= DM_VERITY_FEC_MAX_RECURSION
) {
432 DMWARN_LIMIT("%s: FEC: recursion too deep", v
->data_dev
->name
);
438 if (type
== DM_VERITY_BLOCK_TYPE_METADATA
)
439 block
+= v
->data_blocks
;
442 * For RS(M, N), the continuous FEC data is divided into blocks of N
443 * bytes. Since block size may not be divisible by N, the last block
444 * is zero padded when decoding.
446 * Each byte of the block is covered by a different RS(M, N) code,
447 * and each code is interleaved over N blocks to make it less likely
448 * that bursty corruption will leave us in unrecoverable state.
451 offset
= block
<< v
->data_dev_block_bits
;
452 res
= div64_u64(offset
, v
->fec
->rounds
<< v
->data_dev_block_bits
);
455 * The base RS block we can feed to the interleaver to find out all
456 * blocks required for decoding.
458 rsb
= offset
- res
* (v
->fec
->rounds
<< v
->data_dev_block_bits
);
461 * Locating erasures is slow, so attempt to recover the block without
462 * them first. Do a second attempt with erasures if the corruption is
465 r
= fec_decode_rsb(v
, io
, fio
, rsb
, offset
, false);
467 r
= fec_decode_rsb(v
, io
, fio
, rsb
, offset
, true);
473 memcpy(dest
, fio
->output
, 1 << v
->data_dev_block_bits
);
476 r
= verity_for_bv_block(v
, io
, iter
, fec_bv_copy
);
485 * Clean up per-bio data.
487 void verity_fec_finish_io(struct dm_verity_io
*io
)
490 struct dm_verity_fec
*f
= io
->v
->fec
;
491 struct dm_verity_fec_io
*fio
= fec_io(io
);
493 if (!verity_fec_is_enabled(io
->v
))
496 mempool_free(fio
->rs
, f
->rs_pool
);
498 fec_for_each_prealloc_buffer(n
)
499 mempool_free(fio
->bufs
[n
], f
->prealloc_pool
);
501 fec_for_each_extra_buffer(fio
, n
)
502 mempool_free(fio
->bufs
[n
], f
->extra_pool
);
504 mempool_free(fio
->output
, f
->output_pool
);
508 * Initialize per-bio data.
510 void verity_fec_init_io(struct dm_verity_io
*io
)
512 struct dm_verity_fec_io
*fio
= fec_io(io
);
514 if (!verity_fec_is_enabled(io
->v
))
518 memset(fio
->bufs
, 0, sizeof(fio
->bufs
));
525 * Append feature arguments and values to the status table.
527 unsigned verity_fec_status_table(struct dm_verity
*v
, unsigned sz
,
528 char *result
, unsigned maxlen
)
530 if (!verity_fec_is_enabled(v
))
533 DMEMIT(" " DM_VERITY_OPT_FEC_DEV
" %s "
534 DM_VERITY_OPT_FEC_BLOCKS
" %llu "
535 DM_VERITY_OPT_FEC_START
" %llu "
536 DM_VERITY_OPT_FEC_ROOTS
" %d",
538 (unsigned long long)v
->fec
->blocks
,
539 (unsigned long long)v
->fec
->start
,
545 void verity_fec_dtr(struct dm_verity
*v
)
547 struct dm_verity_fec
*f
= v
->fec
;
549 if (!verity_fec_is_enabled(v
))
552 mempool_destroy(f
->rs_pool
);
553 mempool_destroy(f
->prealloc_pool
);
554 mempool_destroy(f
->extra_pool
);
555 kmem_cache_destroy(f
->cache
);
558 dm_bufio_client_destroy(f
->data_bufio
);
560 dm_bufio_client_destroy(f
->bufio
);
563 dm_put_device(v
->ti
, f
->dev
);
569 static void *fec_rs_alloc(gfp_t gfp_mask
, void *pool_data
)
571 struct dm_verity
*v
= (struct dm_verity
*)pool_data
;
573 return init_rs(8, 0x11d, 0, 1, v
->fec
->roots
);
576 static void fec_rs_free(void *element
, void *pool_data
)
578 struct rs_control
*rs
= (struct rs_control
*)element
;
584 bool verity_is_fec_opt_arg(const char *arg_name
)
586 return (!strcasecmp(arg_name
, DM_VERITY_OPT_FEC_DEV
) ||
587 !strcasecmp(arg_name
, DM_VERITY_OPT_FEC_BLOCKS
) ||
588 !strcasecmp(arg_name
, DM_VERITY_OPT_FEC_START
) ||
589 !strcasecmp(arg_name
, DM_VERITY_OPT_FEC_ROOTS
));
592 int verity_fec_parse_opt_args(struct dm_arg_set
*as
, struct dm_verity
*v
,
593 unsigned *argc
, const char *arg_name
)
596 struct dm_target
*ti
= v
->ti
;
597 const char *arg_value
;
598 unsigned long long num_ll
;
603 ti
->error
= "FEC feature arguments require a value";
607 arg_value
= dm_shift_arg(as
);
610 if (!strcasecmp(arg_name
, DM_VERITY_OPT_FEC_DEV
)) {
611 r
= dm_get_device(ti
, arg_value
, FMODE_READ
, &v
->fec
->dev
);
613 ti
->error
= "FEC device lookup failed";
617 } else if (!strcasecmp(arg_name
, DM_VERITY_OPT_FEC_BLOCKS
)) {
618 if (sscanf(arg_value
, "%llu%c", &num_ll
, &dummy
) != 1 ||
619 ((sector_t
)(num_ll
<< (v
->data_dev_block_bits
- SECTOR_SHIFT
))
620 >> (v
->data_dev_block_bits
- SECTOR_SHIFT
) != num_ll
)) {
621 ti
->error
= "Invalid " DM_VERITY_OPT_FEC_BLOCKS
;
624 v
->fec
->blocks
= num_ll
;
626 } else if (!strcasecmp(arg_name
, DM_VERITY_OPT_FEC_START
)) {
627 if (sscanf(arg_value
, "%llu%c", &num_ll
, &dummy
) != 1 ||
628 ((sector_t
)(num_ll
<< (v
->data_dev_block_bits
- SECTOR_SHIFT
)) >>
629 (v
->data_dev_block_bits
- SECTOR_SHIFT
) != num_ll
)) {
630 ti
->error
= "Invalid " DM_VERITY_OPT_FEC_START
;
633 v
->fec
->start
= num_ll
;
635 } else if (!strcasecmp(arg_name
, DM_VERITY_OPT_FEC_ROOTS
)) {
636 if (sscanf(arg_value
, "%hhu%c", &num_c
, &dummy
) != 1 || !num_c
||
637 num_c
< (DM_VERITY_FEC_RSM
- DM_VERITY_FEC_MAX_RSN
) ||
638 num_c
> (DM_VERITY_FEC_RSM
- DM_VERITY_FEC_MIN_RSN
)) {
639 ti
->error
= "Invalid " DM_VERITY_OPT_FEC_ROOTS
;
642 v
->fec
->roots
= num_c
;
645 ti
->error
= "Unrecognized verity FEC feature request";
653 * Allocate dm_verity_fec for v->fec. Must be called before verity_fec_ctr.
655 int verity_fec_ctr_alloc(struct dm_verity
*v
)
657 struct dm_verity_fec
*f
;
659 f
= kzalloc(sizeof(struct dm_verity_fec
), GFP_KERNEL
);
661 v
->ti
->error
= "Cannot allocate FEC structure";
670 * Validate arguments and preallocate memory. Must be called after arguments
671 * have been parsed using verity_fec_parse_opt_args.
673 int verity_fec_ctr(struct dm_verity
*v
)
675 struct dm_verity_fec
*f
= v
->fec
;
676 struct dm_target
*ti
= v
->ti
;
679 if (!verity_fec_is_enabled(v
)) {
685 * FEC is computed over data blocks, possible metadata, and
686 * hash blocks. In other words, FEC covers total of fec_blocks
687 * blocks consisting of the following:
689 * data blocks | hash blocks | metadata (optional)
691 * We allow metadata after hash blocks to support a use case
692 * where all data is stored on the same device and FEC covers
695 * If metadata is included, we require it to be available on the
696 * hash device after the hash blocks.
699 hash_blocks
= v
->hash_blocks
- v
->hash_start
;
702 * Require matching block sizes for data and hash devices for
705 if (v
->data_dev_block_bits
!= v
->hash_dev_block_bits
) {
706 ti
->error
= "Block sizes must match to use FEC";
711 ti
->error
= "Missing " DM_VERITY_OPT_FEC_ROOTS
;
714 f
->rsn
= DM_VERITY_FEC_RSM
- f
->roots
;
717 ti
->error
= "Missing " DM_VERITY_OPT_FEC_BLOCKS
;
721 f
->rounds
= f
->blocks
;
722 if (sector_div(f
->rounds
, f
->rsn
))
726 * Due to optional metadata, f->blocks can be larger than
727 * data_blocks and hash_blocks combined.
729 if (f
->blocks
< v
->data_blocks
+ hash_blocks
|| !f
->rounds
) {
730 ti
->error
= "Invalid " DM_VERITY_OPT_FEC_BLOCKS
;
735 * Metadata is accessed through the hash device, so we require
736 * it to be large enough.
738 f
->hash_blocks
= f
->blocks
- v
->data_blocks
;
739 if (dm_bufio_get_device_size(v
->bufio
) < f
->hash_blocks
) {
740 ti
->error
= "Hash device is too small for "
741 DM_VERITY_OPT_FEC_BLOCKS
;
745 f
->bufio
= dm_bufio_client_create(f
->dev
->bdev
,
746 1 << v
->data_dev_block_bits
,
748 if (IS_ERR(f
->bufio
)) {
749 ti
->error
= "Cannot initialize FEC bufio client";
750 return PTR_ERR(f
->bufio
);
753 if (dm_bufio_get_device_size(f
->bufio
) <
754 ((f
->start
+ f
->rounds
* f
->roots
) >> v
->data_dev_block_bits
)) {
755 ti
->error
= "FEC device is too small";
759 f
->data_bufio
= dm_bufio_client_create(v
->data_dev
->bdev
,
760 1 << v
->data_dev_block_bits
,
762 if (IS_ERR(f
->data_bufio
)) {
763 ti
->error
= "Cannot initialize FEC data bufio client";
764 return PTR_ERR(f
->data_bufio
);
767 if (dm_bufio_get_device_size(f
->data_bufio
) < v
->data_blocks
) {
768 ti
->error
= "Data device is too small";
772 /* Preallocate an rs_control structure for each worker thread */
773 f
->rs_pool
= mempool_create(num_online_cpus(), fec_rs_alloc
,
774 fec_rs_free
, (void *) v
);
776 ti
->error
= "Cannot allocate RS pool";
780 f
->cache
= kmem_cache_create("dm_verity_fec_buffers",
781 f
->rsn
<< DM_VERITY_FEC_BUF_RS_BITS
,
784 ti
->error
= "Cannot create FEC buffer cache";
788 /* Preallocate DM_VERITY_FEC_BUF_PREALLOC buffers for each thread */
789 f
->prealloc_pool
= mempool_create_slab_pool(num_online_cpus() *
790 DM_VERITY_FEC_BUF_PREALLOC
,
792 if (!f
->prealloc_pool
) {
793 ti
->error
= "Cannot allocate FEC buffer prealloc pool";
797 f
->extra_pool
= mempool_create_slab_pool(0, f
->cache
);
798 if (!f
->extra_pool
) {
799 ti
->error
= "Cannot allocate FEC buffer extra pool";
803 /* Preallocate an output buffer for each thread */
804 f
->output_pool
= mempool_create_kmalloc_pool(num_online_cpus(),
805 1 << v
->data_dev_block_bits
);
806 if (!f
->output_pool
) {
807 ti
->error
= "Cannot allocate FEC output pool";
811 /* Reserve space for our per-bio data */
812 ti
->per_io_data_size
+= sizeof(struct dm_verity_fec_io
);