1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * This file contains an ECC algorithm that detects and corrects 1 bit
4 * errors in a 256 byte block of data.
6 * Copyright © 2008 Koninklijke Philips Electronics NV.
7 * Author: Frans Meulenbroeks
9 * Completely replaces the previous ECC implementation which was written by:
10 * Steven J. Hill (sjhill@realitydiluted.com)
11 * Thomas Gleixner (tglx@linutronix.de)
13 * Information on how this algorithm works and how it was developed
14 * can be found in Documentation/driver-api/mtd/nand_ecc.rst
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/module.h>
20 #include <linux/mtd/mtd.h>
21 #include <linux/mtd/rawnand.h>
22 #include <linux/mtd/nand_ecc.h>
23 #include <asm/byteorder.h>
26 * invparity is a 256 byte table that contains the odd parity
27 * for each byte. So if the number of bits in a byte is even,
28 * the array element is 1, and when the number of bits is odd
29 * the array eleemnt is 0.
31 static const char invparity
[256] = {
32 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
33 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
34 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
35 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
36 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
37 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
38 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
39 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
40 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
41 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
42 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
43 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
44 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
45 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
46 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
47 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1
51 * bitsperbyte contains the number of bits per byte
52 * this is only used for testing and repairing parity
53 * (a precalculated value slightly improves performance)
55 static const char bitsperbyte
[256] = {
56 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
57 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
58 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
59 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
60 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
61 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
62 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
63 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
64 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
65 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
66 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
67 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
68 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
69 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
70 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
71 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
75 * addressbits is a lookup table to filter out the bits from the xor-ed
76 * ECC data that identify the faulty location.
77 * this is only used for repairing parity
78 * see the comments in nand_correct_data for more details
80 static const char addressbits
[256] = {
81 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
82 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
83 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
84 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
85 0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
86 0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
87 0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
88 0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
89 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
90 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
91 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
92 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
93 0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
94 0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
95 0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
96 0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
97 0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
98 0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
99 0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
100 0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
101 0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
102 0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
103 0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
104 0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
105 0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
106 0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
107 0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
108 0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
109 0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
110 0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
111 0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
112 0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f
116 * __nand_calculate_ecc - [NAND Interface] Calculate 3-byte ECC for 256/512-byte
118 * @buf: input buffer with raw data
119 * @eccsize: data bytes per ECC step (256 or 512)
120 * @code: output buffer with ECC
121 * @sm_order: Smart Media byte ordering
123 void __nand_calculate_ecc(const unsigned char *buf
, unsigned int eccsize
,
124 unsigned char *code
, bool sm_order
)
127 const uint32_t *bp
= (uint32_t *)buf
;
128 /* 256 or 512 bytes/ecc */
129 const uint32_t eccsize_mult
= eccsize
>> 8;
130 uint32_t cur
; /* current value in buffer */
131 /* rp0..rp15..rp17 are the various accumulated parities (per byte) */
132 uint32_t rp0
, rp1
, rp2
, rp3
, rp4
, rp5
, rp6
, rp7
;
133 uint32_t rp8
, rp9
, rp10
, rp11
, rp12
, rp13
, rp14
, rp15
, rp16
;
134 uint32_t uninitialized_var(rp17
); /* to make compiler happy */
135 uint32_t par
; /* the cumulative parity for all data */
136 uint32_t tmppar
; /* the cumulative parity for this iteration;
137 for rp12, rp14 and rp16 at the end of the
150 * The loop is unrolled a number of times;
151 * This avoids if statements to decide on which rp value to update
152 * Also we process the data by longwords.
153 * Note: passing unaligned data might give a performance penalty.
154 * It is assumed that the buffers are aligned.
155 * tmppar is the cumulative sum of this iteration.
156 * needed for calculating rp12, rp14, rp16 and par
157 * also used as a performance improvement for rp6, rp8 and rp10
159 for (i
= 0; i
< eccsize_mult
<< 2; i
++) {
222 if (eccsize_mult
== 2 && (i
& 0x4) == 0)
227 * handle the fact that we use longword operations
228 * we'll bring rp4..rp14..rp16 back to single byte entities by
229 * shifting and xoring first fold the upper and lower 16 bits,
230 * then the upper and lower 8 bits.
241 rp10
^= (rp10
>> 16);
244 rp12
^= (rp12
>> 16);
247 rp14
^= (rp14
>> 16);
250 if (eccsize_mult
== 2) {
251 rp16
^= (rp16
>> 16);
257 * we also need to calculate the row parity for rp0..rp3
258 * This is present in par, because par is now
259 * rp3 rp3 rp2 rp2 in little endian and
260 * rp2 rp2 rp3 rp3 in big endian
262 * rp1 rp0 rp1 rp0 in little endian and
263 * rp0 rp1 rp0 rp1 in big endian
264 * First calculate rp2 and rp3
282 /* reduce par to 16 bits then calculate rp1 and rp0 */
285 rp0
= (par
>> 8) & 0xff;
288 rp1
= (par
>> 8) & 0xff;
292 /* finally reduce par to 8 bits */
297 * and calculate rp5..rp15..rp17
298 * note that par = rp4 ^ rp5 and due to the commutative property
299 * of the ^ operator we can say:
301 * The & 0xff seems superfluous, but benchmarking learned that
302 * leaving it out gives slightly worse results. No idea why, probably
303 * it has to do with the way the pipeline in pentium is organized.
305 rp5
= (par
^ rp4
) & 0xff;
306 rp7
= (par
^ rp6
) & 0xff;
307 rp9
= (par
^ rp8
) & 0xff;
308 rp11
= (par
^ rp10
) & 0xff;
309 rp13
= (par
^ rp12
) & 0xff;
310 rp15
= (par
^ rp14
) & 0xff;
311 if (eccsize_mult
== 2)
312 rp17
= (par
^ rp16
) & 0xff;
315 * Finally calculate the ECC bits.
316 * Again here it might seem that there are performance optimisations
317 * possible, but benchmarks showed that on the system this is developed
318 * the code below is the fastest
321 code
[0] = (invparity
[rp7
] << 7) | (invparity
[rp6
] << 6) |
322 (invparity
[rp5
] << 5) | (invparity
[rp4
] << 4) |
323 (invparity
[rp3
] << 3) | (invparity
[rp2
] << 2) |
324 (invparity
[rp1
] << 1) | (invparity
[rp0
]);
325 code
[1] = (invparity
[rp15
] << 7) | (invparity
[rp14
] << 6) |
326 (invparity
[rp13
] << 5) | (invparity
[rp12
] << 4) |
327 (invparity
[rp11
] << 3) | (invparity
[rp10
] << 2) |
328 (invparity
[rp9
] << 1) | (invparity
[rp8
]);
330 code
[1] = (invparity
[rp7
] << 7) | (invparity
[rp6
] << 6) |
331 (invparity
[rp5
] << 5) | (invparity
[rp4
] << 4) |
332 (invparity
[rp3
] << 3) | (invparity
[rp2
] << 2) |
333 (invparity
[rp1
] << 1) | (invparity
[rp0
]);
334 code
[0] = (invparity
[rp15
] << 7) | (invparity
[rp14
] << 6) |
335 (invparity
[rp13
] << 5) | (invparity
[rp12
] << 4) |
336 (invparity
[rp11
] << 3) | (invparity
[rp10
] << 2) |
337 (invparity
[rp9
] << 1) | (invparity
[rp8
]);
340 if (eccsize_mult
== 1)
342 (invparity
[par
& 0xf0] << 7) |
343 (invparity
[par
& 0x0f] << 6) |
344 (invparity
[par
& 0xcc] << 5) |
345 (invparity
[par
& 0x33] << 4) |
346 (invparity
[par
& 0xaa] << 3) |
347 (invparity
[par
& 0x55] << 2) |
351 (invparity
[par
& 0xf0] << 7) |
352 (invparity
[par
& 0x0f] << 6) |
353 (invparity
[par
& 0xcc] << 5) |
354 (invparity
[par
& 0x33] << 4) |
355 (invparity
[par
& 0xaa] << 3) |
356 (invparity
[par
& 0x55] << 2) |
357 (invparity
[rp17
] << 1) |
358 (invparity
[rp16
] << 0);
360 EXPORT_SYMBOL(__nand_calculate_ecc
);
363 * nand_calculate_ecc - [NAND Interface] Calculate 3-byte ECC for 256/512-byte
365 * @chip: NAND chip object
366 * @buf: input buffer with raw data
367 * @code: output buffer with ECC
369 int nand_calculate_ecc(struct nand_chip
*chip
, const unsigned char *buf
,
372 bool sm_order
= chip
->ecc
.options
& NAND_ECC_SOFT_HAMMING_SM_ORDER
;
374 __nand_calculate_ecc(buf
, chip
->ecc
.size
, code
, sm_order
);
378 EXPORT_SYMBOL(nand_calculate_ecc
);
381 * __nand_correct_data - [NAND Interface] Detect and correct bit error(s)
382 * @buf: raw data read from the chip
383 * @read_ecc: ECC from the chip
384 * @calc_ecc: the ECC calculated from raw data
385 * @eccsize: data bytes per ECC step (256 or 512)
386 * @sm_order: Smart Media byte order
388 * Detect and correct a 1 bit error for eccsize byte block
390 int __nand_correct_data(unsigned char *buf
,
391 unsigned char *read_ecc
, unsigned char *calc_ecc
,
392 unsigned int eccsize
, bool sm_order
)
394 unsigned char b0
, b1
, b2
, bit_addr
;
395 unsigned int byte_addr
;
396 /* 256 or 512 bytes/ecc */
397 const uint32_t eccsize_mult
= eccsize
>> 8;
400 * b0 to b2 indicate which bit is faulty (if any)
401 * we might need the xor result more than once,
402 * so keep them in a local var
405 b0
= read_ecc
[0] ^ calc_ecc
[0];
406 b1
= read_ecc
[1] ^ calc_ecc
[1];
408 b0
= read_ecc
[1] ^ calc_ecc
[1];
409 b1
= read_ecc
[0] ^ calc_ecc
[0];
412 b2
= read_ecc
[2] ^ calc_ecc
[2];
414 /* check if there are any bitfaults */
416 /* repeated if statements are slightly more efficient than switch ... */
417 /* ordered in order of likelihood */
419 if ((b0
| b1
| b2
) == 0)
420 return 0; /* no error */
422 if ((((b0
^ (b0
>> 1)) & 0x55) == 0x55) &&
423 (((b1
^ (b1
>> 1)) & 0x55) == 0x55) &&
424 ((eccsize_mult
== 1 && ((b2
^ (b2
>> 1)) & 0x54) == 0x54) ||
425 (eccsize_mult
== 2 && ((b2
^ (b2
>> 1)) & 0x55) == 0x55))) {
426 /* single bit error */
428 * rp17/rp15/13/11/9/7/5/3/1 indicate which byte is the faulty
429 * byte, cp 5/3/1 indicate the faulty bit.
430 * A lookup table (called addressbits) is used to filter
431 * the bits from the byte they are in.
432 * A marginal optimisation is possible by having three
433 * different lookup tables.
434 * One as we have now (for b0), one for b2
435 * (that would avoid the >> 1), and one for b1 (with all values
436 * << 4). However it was felt that introducing two more tables
437 * hardly justify the gain.
439 * The b2 shift is there to get rid of the lowest two bits.
440 * We could also do addressbits[b2] >> 1 but for the
441 * performance it does not make any difference
443 if (eccsize_mult
== 1)
444 byte_addr
= (addressbits
[b1
] << 4) + addressbits
[b0
];
446 byte_addr
= (addressbits
[b2
& 0x3] << 8) +
447 (addressbits
[b1
] << 4) + addressbits
[b0
];
448 bit_addr
= addressbits
[b2
>> 2];
450 buf
[byte_addr
] ^= (1 << bit_addr
);
454 /* count nr of bits; use table lookup, faster than calculating it */
455 if ((bitsperbyte
[b0
] + bitsperbyte
[b1
] + bitsperbyte
[b2
]) == 1)
456 return 1; /* error in ECC data; no action needed */
458 pr_err("%s: uncorrectable ECC error\n", __func__
);
461 EXPORT_SYMBOL(__nand_correct_data
);
464 * nand_correct_data - [NAND Interface] Detect and correct bit error(s)
465 * @chip: NAND chip object
466 * @buf: raw data read from the chip
467 * @read_ecc: ECC from the chip
468 * @calc_ecc: the ECC calculated from raw data
470 * Detect and correct a 1 bit error for 256/512 byte block
472 int nand_correct_data(struct nand_chip
*chip
, unsigned char *buf
,
473 unsigned char *read_ecc
, unsigned char *calc_ecc
)
475 bool sm_order
= chip
->ecc
.options
& NAND_ECC_SOFT_HAMMING_SM_ORDER
;
477 return __nand_correct_data(buf
, read_ecc
, calc_ecc
, chip
->ecc
.size
,
480 EXPORT_SYMBOL(nand_correct_data
);
482 MODULE_LICENSE("GPL");
483 MODULE_AUTHOR("Frans Meulenbroeks <fransmeulenbroeks@gmail.com>");
484 MODULE_DESCRIPTION("Generic NAND ECC support");