2 * Copyright (C) 2015 Google, Inc.
4 * Author: Sami Tolvanen <samitolvanen@google.com>
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the Free
8 * Software Foundation; either version 2 of the License, or (at your option)
12 #include "dm-verity-fec.h"
13 #include <linux/math64.h>
15 #define DM_MSG_PREFIX "verity-fec"
18 * If error correction has been configured, returns true.
20 bool verity_fec_is_enabled(struct dm_verity
*v
)
22 return v
->fec
&& v
->fec
->dev
;
26 * Return a pointer to dm_verity_fec_io after dm_verity_io and its variable
29 static inline struct dm_verity_fec_io
*fec_io(struct dm_verity_io
*io
)
31 return (struct dm_verity_fec_io
*) verity_io_digest_end(io
->v
, io
);
35 * Return an interleaved offset for a byte in RS block.
37 static inline u64
fec_interleave(struct dm_verity
*v
, u64 offset
)
41 mod
= do_div(offset
, v
->fec
->rsn
);
42 return offset
+ mod
* (v
->fec
->rounds
<< v
->data_dev_block_bits
);
46 * Decode an RS block using Reed-Solomon.
48 static int fec_decode_rs8(struct dm_verity
*v
, struct dm_verity_fec_io
*fio
,
49 u8
*data
, u8
*fec
, int neras
)
52 uint16_t par
[DM_VERITY_FEC_RSM
- DM_VERITY_FEC_MIN_RSN
];
54 for (i
= 0; i
< v
->fec
->roots
; i
++)
57 return decode_rs8(fio
->rs
, data
, par
, v
->fec
->rsn
, NULL
, neras
,
58 fio
->erasures
, 0, NULL
);
62 * Read error-correcting codes for the requested RS block. Returns a pointer
63 * to the data block. Caller is responsible for releasing buf.
65 static u8
*fec_read_parity(struct dm_verity
*v
, u64 rsb
, int index
,
66 unsigned *offset
, struct dm_buffer
**buf
)
71 position
= (index
+ rsb
) * v
->fec
->roots
;
72 block
= position
>> v
->data_dev_block_bits
;
73 *offset
= (unsigned)(position
- (block
<< v
->data_dev_block_bits
));
75 res
= dm_bufio_read(v
->fec
->bufio
, v
->fec
->start
+ block
, buf
);
76 if (unlikely(IS_ERR(res
))) {
77 DMERR("%s: FEC %llu: parity read failed (block %llu): %ld",
78 v
->data_dev
->name
, (unsigned long long)rsb
,
79 (unsigned long long)(v
->fec
->start
+ block
),
87 /* Loop over each preallocated buffer slot. */
88 #define fec_for_each_prealloc_buffer(__i) \
89 for (__i = 0; __i < DM_VERITY_FEC_BUF_PREALLOC; __i++)
91 /* Loop over each extra buffer slot. */
92 #define fec_for_each_extra_buffer(io, __i) \
93 for (__i = DM_VERITY_FEC_BUF_PREALLOC; __i < DM_VERITY_FEC_BUF_MAX; __i++)
95 /* Loop over each allocated buffer. */
96 #define fec_for_each_buffer(io, __i) \
97 for (__i = 0; __i < (io)->nbufs; __i++)
99 /* Loop over each RS block in each allocated buffer. */
100 #define fec_for_each_buffer_rs_block(io, __i, __j) \
101 fec_for_each_buffer(io, __i) \
102 for (__j = 0; __j < 1 << DM_VERITY_FEC_BUF_RS_BITS; __j++)
105 * Return a pointer to the current RS block when called inside
106 * fec_for_each_buffer_rs_block.
108 static inline u8
*fec_buffer_rs_block(struct dm_verity
*v
,
109 struct dm_verity_fec_io
*fio
,
110 unsigned i
, unsigned j
)
112 return &fio
->bufs
[i
][j
* v
->fec
->rsn
];
116 * Return an index to the current RS block when called inside
117 * fec_for_each_buffer_rs_block.
119 static inline unsigned fec_buffer_rs_index(unsigned i
, unsigned j
)
121 return (i
<< DM_VERITY_FEC_BUF_RS_BITS
) + j
;
125 * Decode all RS blocks from buffers and copy corrected bytes into fio->output
126 * starting from block_offset.
128 static int fec_decode_bufs(struct dm_verity
*v
, struct dm_verity_fec_io
*fio
,
129 u64 rsb
, int byte_index
, unsigned block_offset
,
132 int r
, corrected
= 0, res
;
133 struct dm_buffer
*buf
;
134 unsigned n
, i
, offset
;
137 par
= fec_read_parity(v
, rsb
, block_offset
, &offset
, &buf
);
142 * Decode the RS blocks we have in bufs. Each RS block results in
143 * one corrected target byte and consumes fec->roots parity bytes.
145 fec_for_each_buffer_rs_block(fio
, n
, i
) {
146 block
= fec_buffer_rs_block(v
, fio
, n
, i
);
147 res
= fec_decode_rs8(v
, fio
, block
, &par
[offset
], neras
);
149 dm_bufio_release(buf
);
156 fio
->output
[block_offset
] = block
[byte_index
];
159 if (block_offset
>= 1 << v
->data_dev_block_bits
)
162 /* read the next block when we run out of parity bytes */
163 offset
+= v
->fec
->roots
;
164 if (offset
>= 1 << v
->data_dev_block_bits
) {
165 dm_bufio_release(buf
);
167 par
= fec_read_parity(v
, rsb
, block_offset
, &offset
, &buf
);
168 if (unlikely(IS_ERR(par
)))
176 DMERR_LIMIT("%s: FEC %llu: failed to correct: %d",
177 v
->data_dev
->name
, (unsigned long long)rsb
, r
);
179 DMWARN_LIMIT("%s: FEC %llu: corrected %d errors",
180 v
->data_dev
->name
, (unsigned long long)rsb
, r
);
186 * Locate data block erasures using verity hashes.
188 static int fec_is_erasure(struct dm_verity
*v
, struct dm_verity_io
*io
,
189 u8
*want_digest
, u8
*data
)
191 if (unlikely(verity_hash(v
, verity_io_hash_desc(v
, io
),
192 data
, 1 << v
->data_dev_block_bits
,
193 verity_io_real_digest(v
, io
))))
196 return memcmp(verity_io_real_digest(v
, io
), want_digest
,
197 v
->digest_size
) != 0;
201 * Read data blocks that are part of the RS block and deinterleave as much as
202 * fits into buffers. Check for erasure locations if @neras is non-NULL.
204 static int fec_read_bufs(struct dm_verity
*v
, struct dm_verity_io
*io
,
205 u64 rsb
, u64 target
, unsigned block_offset
,
209 int i
, j
, target_index
= -1;
210 struct dm_buffer
*buf
;
211 struct dm_bufio_client
*bufio
;
212 struct dm_verity_fec_io
*fio
= fec_io(io
);
215 u8 want_digest
[v
->digest_size
];
222 * read each of the rsn data blocks that are part of the RS block, and
223 * interleave contents to available bufs
225 for (i
= 0; i
< v
->fec
->rsn
; i
++) {
226 ileaved
= fec_interleave(v
, rsb
* v
->fec
->rsn
+ i
);
229 * target is the data block we want to correct, target_index is
230 * the index of this block within the rsn RS blocks
232 if (ileaved
== target
)
235 block
= ileaved
>> v
->data_dev_block_bits
;
236 bufio
= v
->fec
->data_bufio
;
238 if (block
>= v
->data_blocks
) {
239 block
-= v
->data_blocks
;
242 * blocks outside the area were assumed to contain
243 * zeros when encoding data was generated
245 if (unlikely(block
>= v
->fec
->hash_blocks
))
248 block
+= v
->hash_start
;
252 bbuf
= dm_bufio_read(bufio
, block
, &buf
);
253 if (unlikely(IS_ERR(bbuf
))) {
254 DMWARN_LIMIT("%s: FEC %llu: read failed (%llu): %ld",
256 (unsigned long long)rsb
,
257 (unsigned long long)block
, PTR_ERR(bbuf
));
259 /* assume the block is corrupted */
260 if (neras
&& *neras
<= v
->fec
->roots
)
261 fio
->erasures
[(*neras
)++] = i
;
266 /* locate erasures if the block is on the data device */
267 if (bufio
== v
->fec
->data_bufio
&&
268 verity_hash_for_block(v
, io
, block
, want_digest
,
270 /* skip known zero blocks entirely */
275 * skip if we have already found the theoretical
276 * maximum number (i.e. fec->roots) of erasures
278 if (neras
&& *neras
<= v
->fec
->roots
&&
279 fec_is_erasure(v
, io
, want_digest
, bbuf
))
280 fio
->erasures
[(*neras
)++] = i
;
284 * deinterleave and copy the bytes that fit into bufs,
285 * starting from block_offset
287 fec_for_each_buffer_rs_block(fio
, n
, j
) {
288 k
= fec_buffer_rs_index(n
, j
) + block_offset
;
290 if (k
>= 1 << v
->data_dev_block_bits
)
293 rs_block
= fec_buffer_rs_block(v
, fio
, n
, j
);
294 rs_block
[i
] = bbuf
[k
];
297 dm_bufio_release(buf
);
304 * Allocate RS control structure and FEC buffers from preallocated mempools,
305 * and attempt to allocate as many extra buffers as available.
307 static int fec_alloc_bufs(struct dm_verity
*v
, struct dm_verity_fec_io
*fio
)
312 fio
->rs
= mempool_alloc(v
->fec
->rs_pool
, 0);
313 if (unlikely(!fio
->rs
)) {
314 DMERR("failed to allocate RS");
319 fec_for_each_prealloc_buffer(n
) {
323 fio
->bufs
[n
] = mempool_alloc(v
->fec
->prealloc_pool
, GFP_NOIO
);
324 if (unlikely(!fio
->bufs
[n
])) {
325 DMERR("failed to allocate FEC buffer");
330 /* try to allocate the maximum number of buffers */
331 fec_for_each_extra_buffer(fio
, n
) {
335 fio
->bufs
[n
] = mempool_alloc(v
->fec
->extra_pool
, GFP_NOIO
);
336 /* we can manage with even one buffer if necessary */
337 if (unlikely(!fio
->bufs
[n
]))
343 fio
->output
= mempool_alloc(v
->fec
->output_pool
, GFP_NOIO
);
346 DMERR("failed to allocate FEC page");
355 * Initialize buffers and clear erasures. fec_read_bufs() assumes buffers are
356 * zeroed before deinterleaving.
358 static void fec_init_bufs(struct dm_verity
*v
, struct dm_verity_fec_io
*fio
)
362 fec_for_each_buffer(fio
, n
)
363 memset(fio
->bufs
[n
], 0, v
->fec
->rsn
<< DM_VERITY_FEC_BUF_RS_BITS
);
365 memset(fio
->erasures
, 0, sizeof(fio
->erasures
));
369 * Decode all RS blocks in a single data block and return the target block
370 * (indicated by @offset) in fio->output. If @use_erasures is non-zero, uses
371 * hashes to locate erasures.
373 static int fec_decode_rsb(struct dm_verity
*v
, struct dm_verity_io
*io
,
374 struct dm_verity_fec_io
*fio
, u64 rsb
, u64 offset
,
380 r
= fec_alloc_bufs(v
, fio
);
384 for (pos
= 0; pos
< 1 << v
->data_dev_block_bits
; ) {
385 fec_init_bufs(v
, fio
);
387 r
= fec_read_bufs(v
, io
, rsb
, offset
, pos
,
388 use_erasures
? &neras
: NULL
);
392 r
= fec_decode_bufs(v
, fio
, rsb
, r
, pos
, neras
);
396 pos
+= fio
->nbufs
<< DM_VERITY_FEC_BUF_RS_BITS
;
399 /* Always re-validate the corrected block against the expected hash */
400 r
= verity_hash(v
, verity_io_hash_desc(v
, io
), fio
->output
,
401 1 << v
->data_dev_block_bits
,
402 verity_io_real_digest(v
, io
));
406 if (memcmp(verity_io_real_digest(v
, io
), verity_io_want_digest(v
, io
),
408 DMERR_LIMIT("%s: FEC %llu: failed to correct (%d erasures)",
409 v
->data_dev
->name
, (unsigned long long)rsb
, neras
);
416 static int fec_bv_copy(struct dm_verity
*v
, struct dm_verity_io
*io
, u8
*data
,
419 struct dm_verity_fec_io
*fio
= fec_io(io
);
421 memcpy(data
, &fio
->output
[fio
->output_pos
], len
);
422 fio
->output_pos
+= len
;
428 * Correct errors in a block. Copies corrected block to dest if non-NULL,
429 * otherwise to a bio_vec starting from iter.
431 int verity_fec_decode(struct dm_verity
*v
, struct dm_verity_io
*io
,
432 enum verity_block_type type
, sector_t block
, u8
*dest
,
433 struct bvec_iter
*iter
)
436 struct dm_verity_fec_io
*fio
= fec_io(io
);
437 u64 offset
, res
, rsb
;
439 if (!verity_fec_is_enabled(v
))
442 if (type
== DM_VERITY_BLOCK_TYPE_METADATA
)
443 block
+= v
->data_blocks
;
446 * For RS(M, N), the continuous FEC data is divided into blocks of N
447 * bytes. Since block size may not be divisible by N, the last block
448 * is zero padded when decoding.
450 * Each byte of the block is covered by a different RS(M, N) code,
451 * and each code is interleaved over N blocks to make it less likely
452 * that bursty corruption will leave us in unrecoverable state.
455 offset
= block
<< v
->data_dev_block_bits
;
456 res
= div64_u64(offset
, v
->fec
->rounds
<< v
->data_dev_block_bits
);
459 * The base RS block we can feed to the interleaver to find out all
460 * blocks required for decoding.
462 rsb
= offset
- res
* (v
->fec
->rounds
<< v
->data_dev_block_bits
);
465 * Locating erasures is slow, so attempt to recover the block without
466 * them first. Do a second attempt with erasures if the corruption is
469 r
= fec_decode_rsb(v
, io
, fio
, rsb
, offset
, false);
471 r
= fec_decode_rsb(v
, io
, fio
, rsb
, offset
, true);
477 memcpy(dest
, fio
->output
, 1 << v
->data_dev_block_bits
);
480 r
= verity_for_bv_block(v
, io
, iter
, fec_bv_copy
);
487 * Clean up per-bio data.
489 void verity_fec_finish_io(struct dm_verity_io
*io
)
492 struct dm_verity_fec
*f
= io
->v
->fec
;
493 struct dm_verity_fec_io
*fio
= fec_io(io
);
495 if (!verity_fec_is_enabled(io
->v
))
498 mempool_free(fio
->rs
, f
->rs_pool
);
500 fec_for_each_prealloc_buffer(n
)
501 mempool_free(fio
->bufs
[n
], f
->prealloc_pool
);
503 fec_for_each_extra_buffer(fio
, n
)
504 mempool_free(fio
->bufs
[n
], f
->extra_pool
);
506 mempool_free(fio
->output
, f
->output_pool
);
510 * Initialize per-bio data.
512 void verity_fec_init_io(struct dm_verity_io
*io
)
514 struct dm_verity_fec_io
*fio
= fec_io(io
);
516 if (!verity_fec_is_enabled(io
->v
))
520 memset(fio
->bufs
, 0, sizeof(fio
->bufs
));
526 * Append feature arguments and values to the status table.
528 unsigned verity_fec_status_table(struct dm_verity
*v
, unsigned sz
,
529 char *result
, unsigned maxlen
)
531 if (!verity_fec_is_enabled(v
))
534 DMEMIT(" " DM_VERITY_OPT_FEC_DEV
" %s "
535 DM_VERITY_OPT_FEC_BLOCKS
" %llu "
536 DM_VERITY_OPT_FEC_START
" %llu "
537 DM_VERITY_OPT_FEC_ROOTS
" %d",
539 (unsigned long long)v
->fec
->blocks
,
540 (unsigned long long)v
->fec
->start
,
546 void verity_fec_dtr(struct dm_verity
*v
)
548 struct dm_verity_fec
*f
= v
->fec
;
550 if (!verity_fec_is_enabled(v
))
553 mempool_destroy(f
->rs_pool
);
554 mempool_destroy(f
->prealloc_pool
);
555 mempool_destroy(f
->extra_pool
);
556 kmem_cache_destroy(f
->cache
);
559 dm_bufio_client_destroy(f
->data_bufio
);
561 dm_bufio_client_destroy(f
->bufio
);
564 dm_put_device(v
->ti
, f
->dev
);
570 static void *fec_rs_alloc(gfp_t gfp_mask
, void *pool_data
)
572 struct dm_verity
*v
= (struct dm_verity
*)pool_data
;
574 return init_rs(8, 0x11d, 0, 1, v
->fec
->roots
);
577 static void fec_rs_free(void *element
, void *pool_data
)
579 struct rs_control
*rs
= (struct rs_control
*)element
;
585 bool verity_is_fec_opt_arg(const char *arg_name
)
587 return (!strcasecmp(arg_name
, DM_VERITY_OPT_FEC_DEV
) ||
588 !strcasecmp(arg_name
, DM_VERITY_OPT_FEC_BLOCKS
) ||
589 !strcasecmp(arg_name
, DM_VERITY_OPT_FEC_START
) ||
590 !strcasecmp(arg_name
, DM_VERITY_OPT_FEC_ROOTS
));
593 int verity_fec_parse_opt_args(struct dm_arg_set
*as
, struct dm_verity
*v
,
594 unsigned *argc
, const char *arg_name
)
597 struct dm_target
*ti
= v
->ti
;
598 const char *arg_value
;
599 unsigned long long num_ll
;
604 ti
->error
= "FEC feature arguments require a value";
608 arg_value
= dm_shift_arg(as
);
611 if (!strcasecmp(arg_name
, DM_VERITY_OPT_FEC_DEV
)) {
612 r
= dm_get_device(ti
, arg_value
, FMODE_READ
, &v
->fec
->dev
);
614 ti
->error
= "FEC device lookup failed";
618 } else if (!strcasecmp(arg_name
, DM_VERITY_OPT_FEC_BLOCKS
)) {
619 if (sscanf(arg_value
, "%llu%c", &num_ll
, &dummy
) != 1 ||
620 ((sector_t
)(num_ll
<< (v
->data_dev_block_bits
- SECTOR_SHIFT
))
621 >> (v
->data_dev_block_bits
- SECTOR_SHIFT
) != num_ll
)) {
622 ti
->error
= "Invalid " DM_VERITY_OPT_FEC_BLOCKS
;
625 v
->fec
->blocks
= num_ll
;
627 } else if (!strcasecmp(arg_name
, DM_VERITY_OPT_FEC_START
)) {
628 if (sscanf(arg_value
, "%llu%c", &num_ll
, &dummy
) != 1 ||
629 ((sector_t
)(num_ll
<< (v
->data_dev_block_bits
- SECTOR_SHIFT
)) >>
630 (v
->data_dev_block_bits
- SECTOR_SHIFT
) != num_ll
)) {
631 ti
->error
= "Invalid " DM_VERITY_OPT_FEC_START
;
634 v
->fec
->start
= num_ll
;
636 } else if (!strcasecmp(arg_name
, DM_VERITY_OPT_FEC_ROOTS
)) {
637 if (sscanf(arg_value
, "%hhu%c", &num_c
, &dummy
) != 1 || !num_c
||
638 num_c
< (DM_VERITY_FEC_RSM
- DM_VERITY_FEC_MAX_RSN
) ||
639 num_c
> (DM_VERITY_FEC_RSM
- DM_VERITY_FEC_MIN_RSN
)) {
640 ti
->error
= "Invalid " DM_VERITY_OPT_FEC_ROOTS
;
643 v
->fec
->roots
= num_c
;
646 ti
->error
= "Unrecognized verity FEC feature request";
654 * Allocate dm_verity_fec for v->fec. Must be called before verity_fec_ctr.
656 int verity_fec_ctr_alloc(struct dm_verity
*v
)
658 struct dm_verity_fec
*f
;
660 f
= kzalloc(sizeof(struct dm_verity_fec
), GFP_KERNEL
);
662 v
->ti
->error
= "Cannot allocate FEC structure";
671 * Validate arguments and preallocate memory. Must be called after arguments
672 * have been parsed using verity_fec_parse_opt_args.
674 int verity_fec_ctr(struct dm_verity
*v
)
676 struct dm_verity_fec
*f
= v
->fec
;
677 struct dm_target
*ti
= v
->ti
;
680 if (!verity_fec_is_enabled(v
)) {
686 * FEC is computed over data blocks, possible metadata, and
687 * hash blocks. In other words, FEC covers total of fec_blocks
688 * blocks consisting of the following:
690 * data blocks | hash blocks | metadata (optional)
692 * We allow metadata after hash blocks to support a use case
693 * where all data is stored on the same device and FEC covers
696 * If metadata is included, we require it to be available on the
697 * hash device after the hash blocks.
700 hash_blocks
= v
->hash_blocks
- v
->hash_start
;
703 * Require matching block sizes for data and hash devices for
706 if (v
->data_dev_block_bits
!= v
->hash_dev_block_bits
) {
707 ti
->error
= "Block sizes must match to use FEC";
712 ti
->error
= "Missing " DM_VERITY_OPT_FEC_ROOTS
;
715 f
->rsn
= DM_VERITY_FEC_RSM
- f
->roots
;
718 ti
->error
= "Missing " DM_VERITY_OPT_FEC_BLOCKS
;
722 f
->rounds
= f
->blocks
;
723 if (sector_div(f
->rounds
, f
->rsn
))
727 * Due to optional metadata, f->blocks can be larger than
728 * data_blocks and hash_blocks combined.
730 if (f
->blocks
< v
->data_blocks
+ hash_blocks
|| !f
->rounds
) {
731 ti
->error
= "Invalid " DM_VERITY_OPT_FEC_BLOCKS
;
736 * Metadata is accessed through the hash device, so we require
737 * it to be large enough.
739 f
->hash_blocks
= f
->blocks
- v
->data_blocks
;
740 if (dm_bufio_get_device_size(v
->bufio
) < f
->hash_blocks
) {
741 ti
->error
= "Hash device is too small for "
742 DM_VERITY_OPT_FEC_BLOCKS
;
746 f
->bufio
= dm_bufio_client_create(f
->dev
->bdev
,
747 1 << v
->data_dev_block_bits
,
749 if (IS_ERR(f
->bufio
)) {
750 ti
->error
= "Cannot initialize FEC bufio client";
751 return PTR_ERR(f
->bufio
);
754 if (dm_bufio_get_device_size(f
->bufio
) <
755 ((f
->start
+ f
->rounds
* f
->roots
) >> v
->data_dev_block_bits
)) {
756 ti
->error
= "FEC device is too small";
760 f
->data_bufio
= dm_bufio_client_create(v
->data_dev
->bdev
,
761 1 << v
->data_dev_block_bits
,
763 if (IS_ERR(f
->data_bufio
)) {
764 ti
->error
= "Cannot initialize FEC data bufio client";
765 return PTR_ERR(f
->data_bufio
);
768 if (dm_bufio_get_device_size(f
->data_bufio
) < v
->data_blocks
) {
769 ti
->error
= "Data device is too small";
773 /* Preallocate an rs_control structure for each worker thread */
774 f
->rs_pool
= mempool_create(num_online_cpus(), fec_rs_alloc
,
775 fec_rs_free
, (void *) v
);
777 ti
->error
= "Cannot allocate RS pool";
781 f
->cache
= kmem_cache_create("dm_verity_fec_buffers",
782 f
->rsn
<< DM_VERITY_FEC_BUF_RS_BITS
,
785 ti
->error
= "Cannot create FEC buffer cache";
789 /* Preallocate DM_VERITY_FEC_BUF_PREALLOC buffers for each thread */
790 f
->prealloc_pool
= mempool_create_slab_pool(num_online_cpus() *
791 DM_VERITY_FEC_BUF_PREALLOC
,
793 if (!f
->prealloc_pool
) {
794 ti
->error
= "Cannot allocate FEC buffer prealloc pool";
798 f
->extra_pool
= mempool_create_slab_pool(0, f
->cache
);
799 if (!f
->extra_pool
) {
800 ti
->error
= "Cannot allocate FEC buffer extra pool";
804 /* Preallocate an output buffer for each thread */
805 f
->output_pool
= mempool_create_kmalloc_pool(num_online_cpus(),
806 1 << v
->data_dev_block_bits
);
807 if (!f
->output_pool
) {
808 ti
->error
= "Cannot allocate FEC output pool";
812 /* Reserve space for our per-bio data */
813 ti
->per_io_data_size
+= sizeof(struct dm_verity_fec_io
);