1 /* -*- linux-c -*- ------------------------------------------------------- *
3 * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference.
11 * ----------------------------------------------------------------------- */
14 * Added helpers for unaligned native int access
20 * 1-way unrolled portable integer math RAID-6 instruction set
22 * This file was postprocessed using unroll.pl and then ported to userspace
26 #include "kerncompat.h"
31 #include "kernel-lib/raid56.h"
34 * This is the C data type to use
37 /* Change this from BITS_PER_LONG if there is something better... */
38 #if BITS_PER_LONG == 64
39 # define NBYTES(x) ((x) * 0x0101010101010101UL)
42 typedef uint64_t unative_t
;
43 #define put_unaligned_native(val,p) put_unaligned_64((val),(p))
44 #define get_unaligned_native(p) get_unaligned_64((p))
46 # define NBYTES(x) ((x) * 0x01010101U)
49 typedef uint32_t unative_t
;
50 #define put_unaligned_native(val,p) put_unaligned_32((val),(p))
51 #define get_unaligned_native(p) get_unaligned_32((p))
55 * These sub-operations are separate inlines since they can sometimes be
56 * specially optimized using architecture-specific hacks.
60 * The SHLBYTE() operation shifts each byte left by 1, *not*
61 * rolling over into the next byte
63 static inline __attribute_const__ unative_t
SHLBYTE(unative_t v
)
67 vv
= (v
<< 1) & NBYTES(0xfe);
72 * The MASK() operation returns 0xFF in any byte for which the high
73 * bit is 1, 0x00 for any byte for which the high bit is 0.
75 static inline __attribute_const__ unative_t
MASK(unative_t v
)
79 vv
= v
& NBYTES(0x80);
80 vv
= (vv
<< 1) - (vv
>> 7); /* Overflow on the top bit is OK */
85 void raid6_gen_syndrome(int disks
, size_t bytes
, void **ptrs
)
87 uint8_t **dptr
= (uint8_t **)ptrs
;
91 unative_t wd0
, wq0
, wp0
, w10
, w20
;
93 z0
= disks
- 3; /* Highest data disk */
94 p
= dptr
[z0
+1]; /* XOR parity */
95 q
= dptr
[z0
+2]; /* RS syndrome */
97 for ( d
= 0 ; d
< bytes
; d
+= NSIZE
*1 ) {
98 wq0
= wp0
= get_unaligned_native(&dptr
[z0
][d
+0*NSIZE
]);
99 for ( z
= z0
-1 ; z
>= 0 ; z
-- ) {
100 wd0
= get_unaligned_native(&dptr
[z
][d
+0*NSIZE
]);
108 put_unaligned_native(wp0
, &p
[d
+NSIZE
*0]);
109 put_unaligned_native(wq0
, &q
[d
+NSIZE
*0]);
113 static void xor_range(char *dst
, const char*src
, size_t size
)
115 /* Move to DWORD aligned */
116 while (size
&& ((unsigned long)dst
& sizeof(unsigned long))) {
121 /* DWORD aligned part */
122 while (size
>= sizeof(unsigned long)) {
123 *(unsigned long *)dst
^= *(unsigned long *)src
;
124 src
+= sizeof(unsigned long);
125 dst
+= sizeof(unsigned long);
126 size
-= sizeof(unsigned long);
136 * Generate desired data/parity stripe for RAID5
138 * @nr_devs: Total number of devices, including parity
139 * @stripe_len: Stripe length
140 * @data: Data, with special layout:
141 * data[0]: Data stripe 0
142 * data[nr_devs-2]: Last data stripe
143 * data[nr_devs-1]: RAID5 parity
144 * @dest: To generate which data. should follow above data layout
146 int raid5_gen_result(int nr_devs
, size_t stripe_len
, int dest
, void **data
)
149 char *buf
= data
[dest
];
151 /* Validation check */
152 if (stripe_len
<= 0 || stripe_len
!= BTRFS_STRIPE_LEN
) {
153 error("invalid parameter for %s", __func__
);
157 if (dest
>= nr_devs
|| nr_devs
< 2) {
158 error("invalid parameter for %s", __func__
);
161 /* Shortcut for 2 devs RAID5, which is just RAID1 */
163 memcpy(data
[dest
], data
[1 - dest
], stripe_len
);
166 memset(buf
, 0, stripe_len
);
167 for (i
= 0; i
< nr_devs
; i
++) {
170 xor_range(buf
, data
[i
], stripe_len
);
176 * Raid 6 recovery code copied from kernel lib/raid6/recov.c.
177 * With modifications:
178 * - rename from raid6_2data_recov_intx1
179 * - kfree/free modification for btrfs-progs
181 int raid6_recov_data2(int nr_devs
, size_t stripe_len
, int dest1
, int dest2
,
186 const u8
*pbmul
; /* P multiplier table for B data */
187 const u8
*qmul
; /* Q multiplier table (for both) */
188 char *zero_mem1
, *zero_mem2
;
192 if (dest1
< 0 || dest1
>= nr_devs
- 2 ||
193 dest2
< 0 || dest2
>= nr_devs
- 2 || dest1
>= dest2
)
196 zero_mem1
= calloc(1, stripe_len
);
197 zero_mem2
= calloc(1, stripe_len
);
198 if (!zero_mem1
|| !zero_mem2
) {
204 p
= (u8
*)data
[nr_devs
- 2];
205 q
= (u8
*)data
[nr_devs
- 1];
207 /* Compute syndrome with zero for the missing data pages
208 Use the dead data pages as temporary storage for
209 delta p and delta q */
210 dp
= (u8
*)data
[dest1
];
211 data
[dest1
] = (void *)zero_mem1
;
212 data
[nr_devs
- 2] = dp
;
213 dq
= (u8
*)data
[dest2
];
214 data
[dest2
] = (void *)zero_mem2
;
215 data
[nr_devs
- 1] = dq
;
217 raid6_gen_syndrome(nr_devs
, stripe_len
, data
);
219 /* Restore pointer table */
222 data
[nr_devs
- 2] = p
;
223 data
[nr_devs
- 1] = q
;
225 /* Now, pick the proper data tables */
226 pbmul
= raid6_gfmul
[raid6_gfexi
[dest2
- dest1
]];
227 qmul
= raid6_gfmul
[raid6_gfinv
[raid6_gfexp
[dest1
]^raid6_gfexp
[dest2
]]];
230 while ( stripe_len
-- ) {
233 *dq
++ = db
= pbmul
[px
] ^ qx
; /* Reconstructed B */
234 *dp
++ = db
^ px
; /* Reconstructed A */
244 * Raid 6 recover code copied from kernel lib/raid6/recov.c
245 * - rename from raid6_datap_recov_intx1()
246 * - parameter changed from faila to dest1
248 int raid6_recov_datap(int nr_devs
, size_t stripe_len
, int dest1
, void **data
)
251 const u8
*qmul
; /* Q multiplier table */
254 p
= (u8
*)data
[nr_devs
- 2];
255 q
= (u8
*)data
[nr_devs
- 1];
257 zero_mem
= calloc(1, stripe_len
);
261 /* Compute syndrome with zero for the missing data page
262 Use the dead data page as temporary storage for delta q */
263 dq
= (u8
*)data
[dest1
];
264 data
[dest1
] = (void *)zero_mem
;
265 data
[nr_devs
- 1] = dq
;
267 raid6_gen_syndrome(nr_devs
, stripe_len
, data
);
269 /* Restore pointer table */
271 data
[nr_devs
- 1] = q
;
273 /* Now, pick the proper data tables */
274 qmul
= raid6_gfmul
[raid6_gfinv
[raid6_gfexp
[dest1
]]];
277 while ( stripe_len
-- ) {
278 *p
++ ^= *dq
= qmul
[*q
^ *dq
];
284 /* Original raid56 recovery wrapper */
285 int raid56_recov(int nr_devs
, size_t stripe_len
, u64 profile
, int dest1
,
286 int dest2
, void **data
)
291 if (profile
& BTRFS_BLOCK_GROUP_RAID5
)
293 else if (profile
& BTRFS_BLOCK_GROUP_RAID6
)
297 if (nr_devs
< min_devs
)
300 /* Nothing to recover */
301 if (dest1
== -1 && dest2
== -1)
304 /* Reorder dest1/2, so only dest2 can be -1 */
308 } else if (dest2
!= -1 && dest1
!= -1) {
309 /* Reorder dest1/2, ensure dest2 > dest1 */
319 if (profile
& BTRFS_BLOCK_GROUP_RAID5
) {
322 return raid5_gen_result(nr_devs
, stripe_len
, dest1
, data
);
325 /* RAID6 one dev corrupted case*/
328 if (dest1
== nr_devs
- 1 || dest1
== nr_devs
- 2) {
329 raid6_gen_syndrome(nr_devs
, stripe_len
, data
);
333 /* Regerneate data from P */
334 return raid5_gen_result(nr_devs
- 1, stripe_len
, dest1
, data
);
337 /* P/Q bot corrupted */
338 if (dest1
== nr_devs
- 2 && dest2
== nr_devs
- 1) {
339 raid6_gen_syndrome(nr_devs
, stripe_len
, data
);
343 /* 2 Data corrupted */
344 if (dest2
< nr_devs
- 2)
345 return raid6_recov_data2(nr_devs
, stripe_len
, dest1
, dest2
,
348 if (dest2
== nr_devs
- 1)
349 return raid6_recov_datap(nr_devs
, stripe_len
, dest1
, data
);
352 * Final case, Data and Q, recover data first then regenerate Q
354 ret
= raid5_gen_result(nr_devs
- 1, stripe_len
, dest1
, data
);
357 raid6_gen_syndrome(nr_devs
, stripe_len
, data
);