Btrfs progs v4.17.1
[btrfs-progs-unstable/devel.git] / kernel-lib / raid56.c
blobe3a9339e6412c9280b222ee9b06b07584e4a87be
1 /* -*- linux-c -*- ------------------------------------------------------- *
3 * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference.
11 * ----------------------------------------------------------------------- */
14 * Added helpers for unaligned native int access
18 * raid6int1.c
20 * 1-way unrolled portable integer math RAID-6 instruction set
22 * This file was postprocessed using unroll.pl and then ported to userspace
24 #include <stdint.h>
25 #include <unistd.h>
26 #include "kerncompat.h"
27 #include "ctree.h"
28 #include "disk-io.h"
29 #include "volumes.h"
30 #include "utils.h"
31 #include "kernel-lib/raid56.h"
34 * This is the C data type to use
37 /* Change this from BITS_PER_LONG if there is something better... */
38 #if BITS_PER_LONG == 64
39 # define NBYTES(x) ((x) * 0x0101010101010101UL)
40 # define NSIZE 8
41 # define NSHIFT 3
42 typedef uint64_t unative_t;
43 #define put_unaligned_native(val,p) put_unaligned_64((val),(p))
44 #define get_unaligned_native(p) get_unaligned_64((p))
45 #else
46 # define NBYTES(x) ((x) * 0x01010101U)
47 # define NSIZE 4
48 # define NSHIFT 2
49 typedef uint32_t unative_t;
50 #define put_unaligned_native(val,p) put_unaligned_32((val),(p))
51 #define get_unaligned_native(p) get_unaligned_32((p))
52 #endif
55 * These sub-operations are separate inlines since they can sometimes be
56 * specially optimized using architecture-specific hacks.
60 * The SHLBYTE() operation shifts each byte left by 1, *not*
61 * rolling over into the next byte
63 static inline __attribute_const__ unative_t SHLBYTE(unative_t v)
65 unative_t vv;
67 vv = (v << 1) & NBYTES(0xfe);
68 return vv;
72 * The MASK() operation returns 0xFF in any byte for which the high
73 * bit is 1, 0x00 for any byte for which the high bit is 0.
75 static inline __attribute_const__ unative_t MASK(unative_t v)
77 unative_t vv;
79 vv = v & NBYTES(0x80);
80 vv = (vv << 1) - (vv >> 7); /* Overflow on the top bit is OK */
81 return vv;
85 void raid6_gen_syndrome(int disks, size_t bytes, void **ptrs)
87 uint8_t **dptr = (uint8_t **)ptrs;
88 uint8_t *p, *q;
89 int d, z, z0;
91 unative_t wd0, wq0, wp0, w10, w20;
93 z0 = disks - 3; /* Highest data disk */
94 p = dptr[z0+1]; /* XOR parity */
95 q = dptr[z0+2]; /* RS syndrome */
97 for ( d = 0 ; d < bytes ; d += NSIZE*1 ) {
98 wq0 = wp0 = get_unaligned_native(&dptr[z0][d+0*NSIZE]);
99 for ( z = z0-1 ; z >= 0 ; z-- ) {
100 wd0 = get_unaligned_native(&dptr[z][d+0*NSIZE]);
101 wp0 ^= wd0;
102 w20 = MASK(wq0);
103 w10 = SHLBYTE(wq0);
104 w20 &= NBYTES(0x1d);
105 w10 ^= w20;
106 wq0 = w10 ^ wd0;
108 put_unaligned_native(wp0, &p[d+NSIZE*0]);
109 put_unaligned_native(wq0, &q[d+NSIZE*0]);
113 static void xor_range(char *dst, const char*src, size_t size)
115 /* Move to DWORD aligned */
116 while (size && ((unsigned long)dst & sizeof(unsigned long))) {
117 *dst++ ^= *src++;
118 size--;
121 /* DWORD aligned part */
122 while (size >= sizeof(unsigned long)) {
123 *(unsigned long *)dst ^= *(unsigned long *)src;
124 src += sizeof(unsigned long);
125 dst += sizeof(unsigned long);
126 size -= sizeof(unsigned long);
128 /* Remaining */
129 while (size) {
130 *dst++ ^= *src++;
131 size--;
136 * Generate desired data/parity stripe for RAID5
138 * @nr_devs: Total number of devices, including parity
139 * @stripe_len: Stripe length
140 * @data: Data, with special layout:
141 * data[0]: Data stripe 0
142 * data[nr_devs-2]: Last data stripe
143 * data[nr_devs-1]: RAID5 parity
144 * @dest: To generate which data. should follow above data layout
146 int raid5_gen_result(int nr_devs, size_t stripe_len, int dest, void **data)
148 int i;
149 char *buf = data[dest];
151 /* Validation check */
152 if (stripe_len <= 0 || stripe_len != BTRFS_STRIPE_LEN) {
153 error("invalid parameter for %s", __func__);
154 return -EINVAL;
157 if (dest >= nr_devs || nr_devs < 2) {
158 error("invalid parameter for %s", __func__);
159 return -EINVAL;
161 /* Shortcut for 2 devs RAID5, which is just RAID1 */
162 if (nr_devs == 2) {
163 memcpy(data[dest], data[1 - dest], stripe_len);
164 return 0;
166 memset(buf, 0, stripe_len);
167 for (i = 0; i < nr_devs; i++) {
168 if (i == dest)
169 continue;
170 xor_range(buf, data[i], stripe_len);
172 return 0;
176 * Raid 6 recovery code copied from kernel lib/raid6/recov.c.
177 * With modifications:
178 * - rename from raid6_2data_recov_intx1
179 * - kfree/free modification for btrfs-progs
181 int raid6_recov_data2(int nr_devs, size_t stripe_len, int dest1, int dest2,
182 void **data)
184 u8 *p, *q, *dp, *dq;
185 u8 px, qx, db;
186 const u8 *pbmul; /* P multiplier table for B data */
187 const u8 *qmul; /* Q multiplier table (for both) */
188 char *zero_mem1, *zero_mem2;
189 int ret = 0;
191 /* Early check */
192 if (dest1 < 0 || dest1 >= nr_devs - 2 ||
193 dest2 < 0 || dest2 >= nr_devs - 2 || dest1 >= dest2)
194 return -EINVAL;
196 zero_mem1 = calloc(1, stripe_len);
197 zero_mem2 = calloc(1, stripe_len);
198 if (!zero_mem1 || !zero_mem2) {
199 free(zero_mem1);
200 free(zero_mem2);
201 return -ENOMEM;
204 p = (u8 *)data[nr_devs - 2];
205 q = (u8 *)data[nr_devs - 1];
207 /* Compute syndrome with zero for the missing data pages
208 Use the dead data pages as temporary storage for
209 delta p and delta q */
210 dp = (u8 *)data[dest1];
211 data[dest1] = (void *)zero_mem1;
212 data[nr_devs - 2] = dp;
213 dq = (u8 *)data[dest2];
214 data[dest2] = (void *)zero_mem2;
215 data[nr_devs - 1] = dq;
217 raid6_gen_syndrome(nr_devs, stripe_len, data);
219 /* Restore pointer table */
220 data[dest1] = dp;
221 data[dest2] = dq;
222 data[nr_devs - 2] = p;
223 data[nr_devs - 1] = q;
225 /* Now, pick the proper data tables */
226 pbmul = raid6_gfmul[raid6_gfexi[dest2 - dest1]];
227 qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[dest1]^raid6_gfexp[dest2]]];
229 /* Now do it... */
230 while ( stripe_len-- ) {
231 px = *p ^ *dp;
232 qx = qmul[*q ^ *dq];
233 *dq++ = db = pbmul[px] ^ qx; /* Reconstructed B */
234 *dp++ = db ^ px; /* Reconstructed A */
235 p++; q++;
238 free(zero_mem1);
239 free(zero_mem2);
240 return ret;
244 * Raid 6 recover code copied from kernel lib/raid6/recov.c
245 * - rename from raid6_datap_recov_intx1()
246 * - parameter changed from faila to dest1
248 int raid6_recov_datap(int nr_devs, size_t stripe_len, int dest1, void **data)
250 u8 *p, *q, *dq;
251 const u8 *qmul; /* Q multiplier table */
252 char *zero_mem;
254 p = (u8 *)data[nr_devs - 2];
255 q = (u8 *)data[nr_devs - 1];
257 zero_mem = calloc(1, stripe_len);
258 if (!zero_mem)
259 return -ENOMEM;
261 /* Compute syndrome with zero for the missing data page
262 Use the dead data page as temporary storage for delta q */
263 dq = (u8 *)data[dest1];
264 data[dest1] = (void *)zero_mem;
265 data[nr_devs - 1] = dq;
267 raid6_gen_syndrome(nr_devs, stripe_len, data);
269 /* Restore pointer table */
270 data[dest1] = dq;
271 data[nr_devs - 1] = q;
273 /* Now, pick the proper data tables */
274 qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[dest1]]];
276 /* Now do it... */
277 while ( stripe_len-- ) {
278 *p++ ^= *dq = qmul[*q ^ *dq];
279 q++; dq++;
281 return 0;
284 /* Original raid56 recovery wrapper */
285 int raid56_recov(int nr_devs, size_t stripe_len, u64 profile, int dest1,
286 int dest2, void **data)
288 int min_devs;
289 int ret;
291 if (profile & BTRFS_BLOCK_GROUP_RAID5)
292 min_devs = 2;
293 else if (profile & BTRFS_BLOCK_GROUP_RAID6)
294 min_devs = 3;
295 else
296 return -EINVAL;
297 if (nr_devs < min_devs)
298 return -EINVAL;
300 /* Nothing to recover */
301 if (dest1 == -1 && dest2 == -1)
302 return 0;
304 /* Reorder dest1/2, so only dest2 can be -1 */
305 if (dest1 == -1) {
306 dest1 = dest2;
307 dest2 = -1;
308 } else if (dest2 != -1 && dest1 != -1) {
309 /* Reorder dest1/2, ensure dest2 > dest1 */
310 if (dest1 > dest2) {
311 int tmp;
313 tmp = dest2;
314 dest2 = dest1;
315 dest1 = tmp;
319 if (profile & BTRFS_BLOCK_GROUP_RAID5) {
320 if (dest2 != -1)
321 return 1;
322 return raid5_gen_result(nr_devs, stripe_len, dest1, data);
325 /* RAID6 one dev corrupted case*/
326 if (dest2 == -1) {
327 /* Regenerate P/Q */
328 if (dest1 == nr_devs - 1 || dest1 == nr_devs - 2) {
329 raid6_gen_syndrome(nr_devs, stripe_len, data);
330 return 0;
333 /* Regerneate data from P */
334 return raid5_gen_result(nr_devs - 1, stripe_len, dest1, data);
337 /* P/Q bot corrupted */
338 if (dest1 == nr_devs - 2 && dest2 == nr_devs - 1) {
339 raid6_gen_syndrome(nr_devs, stripe_len, data);
340 return 0;
343 /* 2 Data corrupted */
344 if (dest2 < nr_devs - 2)
345 return raid6_recov_data2(nr_devs, stripe_len, dest1, dest2,
346 data);
347 /* Data and P*/
348 if (dest2 == nr_devs - 1)
349 return raid6_recov_datap(nr_devs, stripe_len, dest1, data);
352 * Final case, Data and Q, recover data first then regenerate Q
354 ret = raid5_gen_result(nr_devs - 1, stripe_len, dest1, data);
355 if (ret < 0)
356 return ret;
357 raid6_gen_syndrome(nr_devs, stripe_len, data);
358 return 0;