1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Copyright (C) 2015 Google, Inc.
5 * Author: Sami Tolvanen <samitolvanen@google.com>
8 #include "dm-verity-fec.h"
9 #include <linux/math64.h>
11 #define DM_MSG_PREFIX "verity-fec"
14 * If error correction has been configured, returns true.
16 bool verity_fec_is_enabled(struct dm_verity
*v
)
18 return v
->fec
&& v
->fec
->dev
;
22 * Return a pointer to dm_verity_fec_io after dm_verity_io and its variable
25 static inline struct dm_verity_fec_io
*fec_io(struct dm_verity_io
*io
)
27 return (struct dm_verity_fec_io
*) verity_io_digest_end(io
->v
, io
);
31 * Return an interleaved offset for a byte in RS block.
33 static inline u64
fec_interleave(struct dm_verity
*v
, u64 offset
)
37 mod
= do_div(offset
, v
->fec
->rsn
);
38 return offset
+ mod
* (v
->fec
->rounds
<< v
->data_dev_block_bits
);
42 * Decode an RS block using Reed-Solomon.
44 static int fec_decode_rs8(struct dm_verity
*v
, struct dm_verity_fec_io
*fio
,
45 u8
*data
, u8
*fec
, int neras
)
48 uint16_t par
[DM_VERITY_FEC_RSM
- DM_VERITY_FEC_MIN_RSN
];
50 for (i
= 0; i
< v
->fec
->roots
; i
++)
53 return decode_rs8(fio
->rs
, data
, par
, v
->fec
->rsn
, NULL
, neras
,
54 fio
->erasures
, 0, NULL
);
58 * Read error-correcting codes for the requested RS block. Returns a pointer
59 * to the data block. Caller is responsible for releasing buf.
61 static u8
*fec_read_parity(struct dm_verity
*v
, u64 rsb
, int index
,
62 unsigned *offset
, struct dm_buffer
**buf
)
67 position
= (index
+ rsb
) * v
->fec
->roots
;
68 block
= position
>> v
->data_dev_block_bits
;
69 *offset
= (unsigned)(position
- (block
<< v
->data_dev_block_bits
));
71 res
= dm_bufio_read(v
->fec
->bufio
, v
->fec
->start
+ block
, buf
);
73 DMERR("%s: FEC %llu: parity read failed (block %llu): %ld",
74 v
->data_dev
->name
, (unsigned long long)rsb
,
75 (unsigned long long)(v
->fec
->start
+ block
),
83 /* Loop over each preallocated buffer slot. */
84 #define fec_for_each_prealloc_buffer(__i) \
85 for (__i = 0; __i < DM_VERITY_FEC_BUF_PREALLOC; __i++)
87 /* Loop over each extra buffer slot. */
88 #define fec_for_each_extra_buffer(io, __i) \
89 for (__i = DM_VERITY_FEC_BUF_PREALLOC; __i < DM_VERITY_FEC_BUF_MAX; __i++)
91 /* Loop over each allocated buffer. */
92 #define fec_for_each_buffer(io, __i) \
93 for (__i = 0; __i < (io)->nbufs; __i++)
95 /* Loop over each RS block in each allocated buffer. */
96 #define fec_for_each_buffer_rs_block(io, __i, __j) \
97 fec_for_each_buffer(io, __i) \
98 for (__j = 0; __j < 1 << DM_VERITY_FEC_BUF_RS_BITS; __j++)
101 * Return a pointer to the current RS block when called inside
102 * fec_for_each_buffer_rs_block.
104 static inline u8
*fec_buffer_rs_block(struct dm_verity
*v
,
105 struct dm_verity_fec_io
*fio
,
106 unsigned i
, unsigned j
)
108 return &fio
->bufs
[i
][j
* v
->fec
->rsn
];
112 * Return an index to the current RS block when called inside
113 * fec_for_each_buffer_rs_block.
115 static inline unsigned fec_buffer_rs_index(unsigned i
, unsigned j
)
117 return (i
<< DM_VERITY_FEC_BUF_RS_BITS
) + j
;
121 * Decode all RS blocks from buffers and copy corrected bytes into fio->output
122 * starting from block_offset.
124 static int fec_decode_bufs(struct dm_verity
*v
, struct dm_verity_fec_io
*fio
,
125 u64 rsb
, int byte_index
, unsigned block_offset
,
128 int r
, corrected
= 0, res
;
129 struct dm_buffer
*buf
;
130 unsigned n
, i
, offset
;
133 par
= fec_read_parity(v
, rsb
, block_offset
, &offset
, &buf
);
138 * Decode the RS blocks we have in bufs. Each RS block results in
139 * one corrected target byte and consumes fec->roots parity bytes.
141 fec_for_each_buffer_rs_block(fio
, n
, i
) {
142 block
= fec_buffer_rs_block(v
, fio
, n
, i
);
143 res
= fec_decode_rs8(v
, fio
, block
, &par
[offset
], neras
);
150 fio
->output
[block_offset
] = block
[byte_index
];
153 if (block_offset
>= 1 << v
->data_dev_block_bits
)
156 /* read the next block when we run out of parity bytes */
157 offset
+= v
->fec
->roots
;
158 if (offset
>= 1 << v
->data_dev_block_bits
) {
159 dm_bufio_release(buf
);
161 par
= fec_read_parity(v
, rsb
, block_offset
, &offset
, &buf
);
169 dm_bufio_release(buf
);
172 DMERR_LIMIT("%s: FEC %llu: failed to correct: %d",
173 v
->data_dev
->name
, (unsigned long long)rsb
, r
);
175 DMWARN_LIMIT("%s: FEC %llu: corrected %d errors",
176 v
->data_dev
->name
, (unsigned long long)rsb
, r
);
182 * Locate data block erasures using verity hashes.
184 static int fec_is_erasure(struct dm_verity
*v
, struct dm_verity_io
*io
,
185 u8
*want_digest
, u8
*data
)
187 if (unlikely(verity_hash(v
, verity_io_hash_req(v
, io
),
188 data
, 1 << v
->data_dev_block_bits
,
189 verity_io_real_digest(v
, io
))))
192 return memcmp(verity_io_real_digest(v
, io
), want_digest
,
193 v
->digest_size
) != 0;
197 * Read data blocks that are part of the RS block and deinterleave as much as
198 * fits into buffers. Check for erasure locations if @neras is non-NULL.
200 static int fec_read_bufs(struct dm_verity
*v
, struct dm_verity_io
*io
,
201 u64 rsb
, u64 target
, unsigned block_offset
,
205 int i
, j
, target_index
= -1;
206 struct dm_buffer
*buf
;
207 struct dm_bufio_client
*bufio
;
208 struct dm_verity_fec_io
*fio
= fec_io(io
);
211 u8 want_digest
[HASH_MAX_DIGESTSIZE
];
217 if (WARN_ON(v
->digest_size
> sizeof(want_digest
)))
221 * read each of the rsn data blocks that are part of the RS block, and
222 * interleave contents to available bufs
224 for (i
= 0; i
< v
->fec
->rsn
; i
++) {
225 ileaved
= fec_interleave(v
, rsb
* v
->fec
->rsn
+ i
);
228 * target is the data block we want to correct, target_index is
229 * the index of this block within the rsn RS blocks
231 if (ileaved
== target
)
234 block
= ileaved
>> v
->data_dev_block_bits
;
235 bufio
= v
->fec
->data_bufio
;
237 if (block
>= v
->data_blocks
) {
238 block
-= v
->data_blocks
;
241 * blocks outside the area were assumed to contain
242 * zeros when encoding data was generated
244 if (unlikely(block
>= v
->fec
->hash_blocks
))
247 block
+= v
->hash_start
;
251 bbuf
= dm_bufio_read(bufio
, block
, &buf
);
253 DMWARN_LIMIT("%s: FEC %llu: read failed (%llu): %ld",
255 (unsigned long long)rsb
,
256 (unsigned long long)block
, PTR_ERR(bbuf
));
258 /* assume the block is corrupted */
259 if (neras
&& *neras
<= v
->fec
->roots
)
260 fio
->erasures
[(*neras
)++] = i
;
265 /* locate erasures if the block is on the data device */
266 if (bufio
== v
->fec
->data_bufio
&&
267 verity_hash_for_block(v
, io
, block
, want_digest
,
269 /* skip known zero blocks entirely */
274 * skip if we have already found the theoretical
275 * maximum number (i.e. fec->roots) of erasures
277 if (neras
&& *neras
<= v
->fec
->roots
&&
278 fec_is_erasure(v
, io
, want_digest
, bbuf
))
279 fio
->erasures
[(*neras
)++] = i
;
283 * deinterleave and copy the bytes that fit into bufs,
284 * starting from block_offset
286 fec_for_each_buffer_rs_block(fio
, n
, j
) {
287 k
= fec_buffer_rs_index(n
, j
) + block_offset
;
289 if (k
>= 1 << v
->data_dev_block_bits
)
292 rs_block
= fec_buffer_rs_block(v
, fio
, n
, j
);
293 rs_block
[i
] = bbuf
[k
];
296 dm_bufio_release(buf
);
303 * Allocate RS control structure and FEC buffers from preallocated mempools,
304 * and attempt to allocate as many extra buffers as available.
306 static int fec_alloc_bufs(struct dm_verity
*v
, struct dm_verity_fec_io
*fio
)
311 fio
->rs
= mempool_alloc(&v
->fec
->rs_pool
, GFP_NOIO
);
313 fec_for_each_prealloc_buffer(n
) {
317 fio
->bufs
[n
] = mempool_alloc(&v
->fec
->prealloc_pool
, GFP_NOWAIT
);
318 if (unlikely(!fio
->bufs
[n
])) {
319 DMERR("failed to allocate FEC buffer");
324 /* try to allocate the maximum number of buffers */
325 fec_for_each_extra_buffer(fio
, n
) {
329 fio
->bufs
[n
] = mempool_alloc(&v
->fec
->extra_pool
, GFP_NOWAIT
);
330 /* we can manage with even one buffer if necessary */
331 if (unlikely(!fio
->bufs
[n
]))
337 fio
->output
= mempool_alloc(&v
->fec
->output_pool
, GFP_NOIO
);
343 * Initialize buffers and clear erasures. fec_read_bufs() assumes buffers are
344 * zeroed before deinterleaving.
346 static void fec_init_bufs(struct dm_verity
*v
, struct dm_verity_fec_io
*fio
)
350 fec_for_each_buffer(fio
, n
)
351 memset(fio
->bufs
[n
], 0, v
->fec
->rsn
<< DM_VERITY_FEC_BUF_RS_BITS
);
353 memset(fio
->erasures
, 0, sizeof(fio
->erasures
));
357 * Decode all RS blocks in a single data block and return the target block
358 * (indicated by @offset) in fio->output. If @use_erasures is non-zero, uses
359 * hashes to locate erasures.
361 static int fec_decode_rsb(struct dm_verity
*v
, struct dm_verity_io
*io
,
362 struct dm_verity_fec_io
*fio
, u64 rsb
, u64 offset
,
368 r
= fec_alloc_bufs(v
, fio
);
372 for (pos
= 0; pos
< 1 << v
->data_dev_block_bits
; ) {
373 fec_init_bufs(v
, fio
);
375 r
= fec_read_bufs(v
, io
, rsb
, offset
, pos
,
376 use_erasures
? &neras
: NULL
);
380 r
= fec_decode_bufs(v
, fio
, rsb
, r
, pos
, neras
);
384 pos
+= fio
->nbufs
<< DM_VERITY_FEC_BUF_RS_BITS
;
387 /* Always re-validate the corrected block against the expected hash */
388 r
= verity_hash(v
, verity_io_hash_req(v
, io
), fio
->output
,
389 1 << v
->data_dev_block_bits
,
390 verity_io_real_digest(v
, io
));
394 if (memcmp(verity_io_real_digest(v
, io
), verity_io_want_digest(v
, io
),
396 DMERR_LIMIT("%s: FEC %llu: failed to correct (%d erasures)",
397 v
->data_dev
->name
, (unsigned long long)rsb
, neras
);
404 static int fec_bv_copy(struct dm_verity
*v
, struct dm_verity_io
*io
, u8
*data
,
407 struct dm_verity_fec_io
*fio
= fec_io(io
);
409 memcpy(data
, &fio
->output
[fio
->output_pos
], len
);
410 fio
->output_pos
+= len
;
416 * Correct errors in a block. Copies corrected block to dest if non-NULL,
417 * otherwise to a bio_vec starting from iter.
419 int verity_fec_decode(struct dm_verity
*v
, struct dm_verity_io
*io
,
420 enum verity_block_type type
, sector_t block
, u8
*dest
,
421 struct bvec_iter
*iter
)
424 struct dm_verity_fec_io
*fio
= fec_io(io
);
425 u64 offset
, res
, rsb
;
427 if (!verity_fec_is_enabled(v
))
430 if (fio
->level
>= DM_VERITY_FEC_MAX_RECURSION
) {
431 DMWARN_LIMIT("%s: FEC: recursion too deep", v
->data_dev
->name
);
437 if (type
== DM_VERITY_BLOCK_TYPE_METADATA
)
438 block
+= v
->data_blocks
;
441 * For RS(M, N), the continuous FEC data is divided into blocks of N
442 * bytes. Since block size may not be divisible by N, the last block
443 * is zero padded when decoding.
445 * Each byte of the block is covered by a different RS(M, N) code,
446 * and each code is interleaved over N blocks to make it less likely
447 * that bursty corruption will leave us in unrecoverable state.
450 offset
= block
<< v
->data_dev_block_bits
;
451 res
= div64_u64(offset
, v
->fec
->rounds
<< v
->data_dev_block_bits
);
454 * The base RS block we can feed to the interleaver to find out all
455 * blocks required for decoding.
457 rsb
= offset
- res
* (v
->fec
->rounds
<< v
->data_dev_block_bits
);
460 * Locating erasures is slow, so attempt to recover the block without
461 * them first. Do a second attempt with erasures if the corruption is
464 r
= fec_decode_rsb(v
, io
, fio
, rsb
, offset
, false);
466 r
= fec_decode_rsb(v
, io
, fio
, rsb
, offset
, true);
472 memcpy(dest
, fio
->output
, 1 << v
->data_dev_block_bits
);
475 r
= verity_for_bv_block(v
, io
, iter
, fec_bv_copy
);
484 * Clean up per-bio data.
486 void verity_fec_finish_io(struct dm_verity_io
*io
)
489 struct dm_verity_fec
*f
= io
->v
->fec
;
490 struct dm_verity_fec_io
*fio
= fec_io(io
);
492 if (!verity_fec_is_enabled(io
->v
))
495 mempool_free(fio
->rs
, &f
->rs_pool
);
497 fec_for_each_prealloc_buffer(n
)
498 mempool_free(fio
->bufs
[n
], &f
->prealloc_pool
);
500 fec_for_each_extra_buffer(fio
, n
)
501 mempool_free(fio
->bufs
[n
], &f
->extra_pool
);
503 mempool_free(fio
->output
, &f
->output_pool
);
507 * Initialize per-bio data.
509 void verity_fec_init_io(struct dm_verity_io
*io
)
511 struct dm_verity_fec_io
*fio
= fec_io(io
);
513 if (!verity_fec_is_enabled(io
->v
))
517 memset(fio
->bufs
, 0, sizeof(fio
->bufs
));
524 * Append feature arguments and values to the status table.
526 unsigned verity_fec_status_table(struct dm_verity
*v
, unsigned sz
,
527 char *result
, unsigned maxlen
)
529 if (!verity_fec_is_enabled(v
))
532 DMEMIT(" " DM_VERITY_OPT_FEC_DEV
" %s "
533 DM_VERITY_OPT_FEC_BLOCKS
" %llu "
534 DM_VERITY_OPT_FEC_START
" %llu "
535 DM_VERITY_OPT_FEC_ROOTS
" %d",
537 (unsigned long long)v
->fec
->blocks
,
538 (unsigned long long)v
->fec
->start
,
544 void verity_fec_dtr(struct dm_verity
*v
)
546 struct dm_verity_fec
*f
= v
->fec
;
548 if (!verity_fec_is_enabled(v
))
551 mempool_exit(&f
->rs_pool
);
552 mempool_exit(&f
->prealloc_pool
);
553 mempool_exit(&f
->extra_pool
);
554 mempool_exit(&f
->output_pool
);
555 kmem_cache_destroy(f
->cache
);
558 dm_bufio_client_destroy(f
->data_bufio
);
560 dm_bufio_client_destroy(f
->bufio
);
563 dm_put_device(v
->ti
, f
->dev
);
569 static void *fec_rs_alloc(gfp_t gfp_mask
, void *pool_data
)
571 struct dm_verity
*v
= (struct dm_verity
*)pool_data
;
573 return init_rs_gfp(8, 0x11d, 0, 1, v
->fec
->roots
, gfp_mask
);
576 static void fec_rs_free(void *element
, void *pool_data
)
578 struct rs_control
*rs
= (struct rs_control
*)element
;
584 bool verity_is_fec_opt_arg(const char *arg_name
)
586 return (!strcasecmp(arg_name
, DM_VERITY_OPT_FEC_DEV
) ||
587 !strcasecmp(arg_name
, DM_VERITY_OPT_FEC_BLOCKS
) ||
588 !strcasecmp(arg_name
, DM_VERITY_OPT_FEC_START
) ||
589 !strcasecmp(arg_name
, DM_VERITY_OPT_FEC_ROOTS
));
592 int verity_fec_parse_opt_args(struct dm_arg_set
*as
, struct dm_verity
*v
,
593 unsigned *argc
, const char *arg_name
)
596 struct dm_target
*ti
= v
->ti
;
597 const char *arg_value
;
598 unsigned long long num_ll
;
603 ti
->error
= "FEC feature arguments require a value";
607 arg_value
= dm_shift_arg(as
);
610 if (!strcasecmp(arg_name
, DM_VERITY_OPT_FEC_DEV
)) {
611 r
= dm_get_device(ti
, arg_value
, FMODE_READ
, &v
->fec
->dev
);
613 ti
->error
= "FEC device lookup failed";
617 } else if (!strcasecmp(arg_name
, DM_VERITY_OPT_FEC_BLOCKS
)) {
618 if (sscanf(arg_value
, "%llu%c", &num_ll
, &dummy
) != 1 ||
619 ((sector_t
)(num_ll
<< (v
->data_dev_block_bits
- SECTOR_SHIFT
))
620 >> (v
->data_dev_block_bits
- SECTOR_SHIFT
) != num_ll
)) {
621 ti
->error
= "Invalid " DM_VERITY_OPT_FEC_BLOCKS
;
624 v
->fec
->blocks
= num_ll
;
626 } else if (!strcasecmp(arg_name
, DM_VERITY_OPT_FEC_START
)) {
627 if (sscanf(arg_value
, "%llu%c", &num_ll
, &dummy
) != 1 ||
628 ((sector_t
)(num_ll
<< (v
->data_dev_block_bits
- SECTOR_SHIFT
)) >>
629 (v
->data_dev_block_bits
- SECTOR_SHIFT
) != num_ll
)) {
630 ti
->error
= "Invalid " DM_VERITY_OPT_FEC_START
;
633 v
->fec
->start
= num_ll
;
635 } else if (!strcasecmp(arg_name
, DM_VERITY_OPT_FEC_ROOTS
)) {
636 if (sscanf(arg_value
, "%hhu%c", &num_c
, &dummy
) != 1 || !num_c
||
637 num_c
< (DM_VERITY_FEC_RSM
- DM_VERITY_FEC_MAX_RSN
) ||
638 num_c
> (DM_VERITY_FEC_RSM
- DM_VERITY_FEC_MIN_RSN
)) {
639 ti
->error
= "Invalid " DM_VERITY_OPT_FEC_ROOTS
;
642 v
->fec
->roots
= num_c
;
645 ti
->error
= "Unrecognized verity FEC feature request";
653 * Allocate dm_verity_fec for v->fec. Must be called before verity_fec_ctr.
655 int verity_fec_ctr_alloc(struct dm_verity
*v
)
657 struct dm_verity_fec
*f
;
659 f
= kzalloc(sizeof(struct dm_verity_fec
), GFP_KERNEL
);
661 v
->ti
->error
= "Cannot allocate FEC structure";
670 * Validate arguments and preallocate memory. Must be called after arguments
671 * have been parsed using verity_fec_parse_opt_args.
673 int verity_fec_ctr(struct dm_verity
*v
)
675 struct dm_verity_fec
*f
= v
->fec
;
676 struct dm_target
*ti
= v
->ti
;
680 if (!verity_fec_is_enabled(v
)) {
686 * FEC is computed over data blocks, possible metadata, and
687 * hash blocks. In other words, FEC covers total of fec_blocks
688 * blocks consisting of the following:
690 * data blocks | hash blocks | metadata (optional)
692 * We allow metadata after hash blocks to support a use case
693 * where all data is stored on the same device and FEC covers
696 * If metadata is included, we require it to be available on the
697 * hash device after the hash blocks.
700 hash_blocks
= v
->hash_blocks
- v
->hash_start
;
703 * Require matching block sizes for data and hash devices for
706 if (v
->data_dev_block_bits
!= v
->hash_dev_block_bits
) {
707 ti
->error
= "Block sizes must match to use FEC";
712 ti
->error
= "Missing " DM_VERITY_OPT_FEC_ROOTS
;
715 f
->rsn
= DM_VERITY_FEC_RSM
- f
->roots
;
718 ti
->error
= "Missing " DM_VERITY_OPT_FEC_BLOCKS
;
722 f
->rounds
= f
->blocks
;
723 if (sector_div(f
->rounds
, f
->rsn
))
727 * Due to optional metadata, f->blocks can be larger than
728 * data_blocks and hash_blocks combined.
730 if (f
->blocks
< v
->data_blocks
+ hash_blocks
|| !f
->rounds
) {
731 ti
->error
= "Invalid " DM_VERITY_OPT_FEC_BLOCKS
;
736 * Metadata is accessed through the hash device, so we require
737 * it to be large enough.
739 f
->hash_blocks
= f
->blocks
- v
->data_blocks
;
740 if (dm_bufio_get_device_size(v
->bufio
) < f
->hash_blocks
) {
741 ti
->error
= "Hash device is too small for "
742 DM_VERITY_OPT_FEC_BLOCKS
;
746 f
->bufio
= dm_bufio_client_create(f
->dev
->bdev
,
747 1 << v
->data_dev_block_bits
,
749 if (IS_ERR(f
->bufio
)) {
750 ti
->error
= "Cannot initialize FEC bufio client";
751 return PTR_ERR(f
->bufio
);
754 if (dm_bufio_get_device_size(f
->bufio
) <
755 ((f
->start
+ f
->rounds
* f
->roots
) >> v
->data_dev_block_bits
)) {
756 ti
->error
= "FEC device is too small";
760 f
->data_bufio
= dm_bufio_client_create(v
->data_dev
->bdev
,
761 1 << v
->data_dev_block_bits
,
763 if (IS_ERR(f
->data_bufio
)) {
764 ti
->error
= "Cannot initialize FEC data bufio client";
765 return PTR_ERR(f
->data_bufio
);
768 if (dm_bufio_get_device_size(f
->data_bufio
) < v
->data_blocks
) {
769 ti
->error
= "Data device is too small";
773 /* Preallocate an rs_control structure for each worker thread */
774 ret
= mempool_init(&f
->rs_pool
, num_online_cpus(), fec_rs_alloc
,
775 fec_rs_free
, (void *) v
);
777 ti
->error
= "Cannot allocate RS pool";
781 f
->cache
= kmem_cache_create("dm_verity_fec_buffers",
782 f
->rsn
<< DM_VERITY_FEC_BUF_RS_BITS
,
785 ti
->error
= "Cannot create FEC buffer cache";
789 /* Preallocate DM_VERITY_FEC_BUF_PREALLOC buffers for each thread */
790 ret
= mempool_init_slab_pool(&f
->prealloc_pool
, num_online_cpus() *
791 DM_VERITY_FEC_BUF_PREALLOC
,
794 ti
->error
= "Cannot allocate FEC buffer prealloc pool";
798 ret
= mempool_init_slab_pool(&f
->extra_pool
, 0, f
->cache
);
800 ti
->error
= "Cannot allocate FEC buffer extra pool";
804 /* Preallocate an output buffer for each thread */
805 ret
= mempool_init_kmalloc_pool(&f
->output_pool
, num_online_cpus(),
806 1 << v
->data_dev_block_bits
);
808 ti
->error
= "Cannot allocate FEC output pool";
812 /* Reserve space for our per-bio data */
813 ti
->per_io_data_size
+= sizeof(struct dm_verity_fec_io
);