1 /* Filter driver - middle layer - checksumming */
7 #define GROUP_SIZE (SECTOR_SIZE * NR_SUM_SEC)
8 #define SEC2SUM_NR(nr) ((nr)/NR_SUM_SEC*(NR_SUM_SEC+1) + NR_SUM_SEC)
9 #define LOG2PHYS(nr) ((nr)/NR_SUM_SEC*(NR_SUM_SEC+1) + (nr)%NR_SUM_SEC)
11 #define POS2SEC(nr) div64u((nr), SECTOR_SIZE)
12 #define SEC2POS(nr) mul64u((nr), SECTOR_SIZE)
15 static char *ext_array
, *ext_buffer
; /* interspersed buffer */
16 static char *rb0_array
; /* write readback buffer for disk 0 */
17 static char *rb1_array
; /* write readback buffer for disk 1 */
19 /*===========================================================================*
21 *===========================================================================*/
24 /* Initialize buffers. */
26 ext_array
= flt_malloc(SBUF_SIZE
, NULL
, 0);
27 rb0_array
= flt_malloc(SBUF_SIZE
, NULL
, 0);
28 rb1_array
= flt_malloc(SBUF_SIZE
, NULL
, 0);
30 if (ext_array
== NULL
|| rb0_array
== NULL
|| rb1_array
== NULL
)
31 panic("no memory available");
34 /*===========================================================================*
36 *===========================================================================*/
37 static void calc_sum(unsigned sector
, char *data
, char *sum
)
39 /* Compute the checksum for a sector. The sector number must be part
40 * of the checksum in some way.
42 unsigned long crc
, *p
, *q
;
44 struct MD5Context ctx
;
48 /* No checksum at all */
50 q
= (unsigned long *) sum
;
56 /* Basic XOR checksum */
57 p
= (unsigned long *) data
;
59 memset(sum
, 0, SUM_SIZE
);
60 for(i
= 0; i
< SECTOR_SIZE
/ SUM_SIZE
; i
++) {
61 q
= (unsigned long *) sum
;
62 for(j
= 0; (size_t) j
< SUM_SIZE
/ sizeof(*p
); j
++) {
68 q
= (unsigned long *) sum
;
76 crc
= compute_crc((unsigned char *) data
, SECTOR_SIZE
);
78 q
= (unsigned long *) sum
;
88 MD5Update(&ctx
, (unsigned char *) data
, SECTOR_SIZE
);
89 MD5Update(&ctx
, (unsigned char *) §or
, sizeof(sector
));
90 MD5Final((unsigned char *) sum
, &ctx
);
95 panic("invalid checksum type: %d", SUM_TYPE
);
99 /*===========================================================================*
101 *===========================================================================*/
102 static int read_sectors(char *buf
, sector_t phys_sector
, int count
)
104 /* Read 'count' sectors starting at 'phys_sector' into 'buf'. If an
105 * EOF occurs, zero-fill the remaining part of the buffer.
110 size
= wsize
= count
* SECTOR_SIZE
;
112 r
= read_write(SEC2POS(phys_sector
), buf
, buf
, &size
, FLT_READ
);
119 printf("Filter: EOF reading sector %lu\n", phys_sector
);
122 memset(buf
+ size
, 0, wsize
- size
);
128 /*===========================================================================*
130 *===========================================================================*/
131 static void make_group_sum(char *bufp
, char *sump
, sector_t sector
, int index
,
134 /* Compute checksums for 'count' sectors within a group, starting at
135 * sector 'index' into the group, which has logical sector number
136 * 'sector'. The 'bufp' pointer points to the same first sector to
137 * start checksumming; 'sump' is a pointer to the checksum sector.
140 sump
+= index
* SUM_SIZE
;
143 calc_sum(sector
, bufp
, sump
);
152 /*===========================================================================*
154 *===========================================================================*/
155 static int check_group_sum(char *bufp
, const char *sump
, sector_t sector
,
156 int index
, int count
)
158 /* Check checksums in a group. Parameters are the same as in
159 * make_group_sum(). Return OK if all checksums check out, or RET_REDO
162 char sum_buffer
[SECTOR_SIZE
];
164 sump
+= index
* SUM_SIZE
;
167 calc_sum(sector
, bufp
, sum_buffer
);
169 if (memcmp(sum_buffer
, sump
, SUM_SIZE
)) {
170 printf("Filter: BAD CHECKSUM at sector %lu\n", sector
);
173 return bad_driver(DRIVER_MAIN
, BD_DATA
, EIO
);
184 /*===========================================================================*
186 *===========================================================================*/
187 static int make_sum(sector_t current_sector
, sector_t sectors_left
)
189 /* Compute checksums over all data in the buffer with expanded data.
190 * As side effect, possibly read in first and last checksum sectors
191 * and data to fill the gap between the last data sector and the last
194 sector_t sector_in_group
, group_left
;
199 /* See the description of the extended buffer in transfer(). A number
200 * of points are relevant for this function in particular:
202 * 1) If the "xx" head of the buffer does not cover an entire group,
203 * we need to copy in the first checksum sector so that we can
205 * 2) We can generate checksums for the full "yyyyy" groups without
206 * copying in the corresponding checksum sectors first, because
207 * those sectors will be overwritten entirely anyway.
208 * 3) We copy in not only the checksum sector for the group containing
209 * the "zzz" tail data, but also all the data between "zzz" and the
210 * last checksum sector. This allows us to write all the data in
211 * the buffer in one operation. In theory, we could verify the
212 * checksum of the data in this gap for extra early failure
213 * detection, but we currently do not do this.
215 * If points 1 and 3 cover the same group (implying a small, unaligned
216 * write operation), the read operation is done only once. Whether
217 * point 1 or 3 is skipped depends on whether there is a gap before
218 * the checksum sector.
221 sector_in_group
= current_sector
% NR_SUM_SEC
;
222 group_left
= NR_SUM_SEC
- sector_in_group
;
226 /* This loop covers points 1 and 2. */
227 while (sectors_left
>= group_left
) {
228 size
= group_left
* SECTOR_SIZE
;
230 if (sector_in_group
> 0) {
231 if ((r
= read_sectors(extp
+ size
,
232 LOG2PHYS(current_sector
) + group_left
,
236 else memset(extp
+ size
, 0, SECTOR_SIZE
);
238 make_group_sum(extp
, extp
+ size
, current_sector
,
239 sector_in_group
, group_left
);
241 extp
+= size
+ SECTOR_SIZE
;
243 sectors_left
-= group_left
;
244 current_sector
+= group_left
;
247 group_left
= NR_SUM_SEC
;
250 /* The remaining code covers point 3. */
251 if (sectors_left
> 0) {
252 size
= sectors_left
* SECTOR_SIZE
;
254 if (group_left
!= NR_SUM_SEC
- sector_in_group
)
255 panic("group_left assertion: %d", 0);
257 gap
= group_left
- sectors_left
;
260 panic("gap assertion: %d", 0);
262 if ((r
= read_sectors(extp
+ size
,
263 LOG2PHYS(current_sector
) + sectors_left
,
267 make_group_sum(extp
, extp
+ size
+ gap
* SECTOR_SIZE
,
268 current_sector
, sector_in_group
, sectors_left
);
274 /*===========================================================================*
276 *===========================================================================*/
277 static int check_sum(sector_t current_sector
, size_t bytes_left
)
279 /* Check checksums of all data in the buffer with expanded data.
280 * Return OK if all checksums are okay, or RET_REDO upon failure.
282 sector_t sector_in_group
;
283 size_t size
, groupbytes_left
;
289 sector_in_group
= current_sector
% NR_SUM_SEC
;
290 groupbytes_left
= (NR_SUM_SEC
- sector_in_group
) * SECTOR_SIZE
;
292 while (bytes_left
> 0) {
293 size
= MIN(bytes_left
, groupbytes_left
);
294 count
= size
/ SECTOR_SIZE
;
296 if (check_group_sum(extp
, extp
+ groupbytes_left
,
297 current_sector
, sector_in_group
, count
))
300 extp
+= size
+ SECTOR_SIZE
;
302 bytes_left
-= MIN(size
+ SECTOR_SIZE
, bytes_left
);
303 current_sector
+= count
;
306 groupbytes_left
= GROUP_SIZE
;
312 /*===========================================================================*
314 *===========================================================================*/
315 static int check_write(u64_t pos
, size_t size
)
317 /* Read back the data just written, from both disks if mirroring is
318 * enabled, and check the result against the original. Return OK on
319 * success; report the malfunctioning driver and return RET_REDO
322 char *rb0_buffer
, *rb1_buffer
;
329 rb0_buffer
= rb1_buffer
=
330 flt_malloc(size
, rb0_array
, SBUF_SIZE
);
332 rb1_buffer
= flt_malloc(size
, rb1_array
, SBUF_SIZE
);
336 r
= read_write(pos
, rb0_buffer
, rb1_buffer
, &size
, FLT_READ2
);
339 if (USE_MIRROR
) flt_free(rb1_buffer
, orig_size
, rb1_array
);
340 flt_free(rb0_buffer
, orig_size
, rb0_array
);
345 /* If we get a size smaller than what we requested, then we somehow
346 * succeeded in writing past the disk end, and now fail to read it all
347 * back. This is not an error, and we just compare the part that we
348 * did manage to read back in.
351 if (memcmp(ext_buffer
, rb0_buffer
, size
)) {
353 printf("Filter: readback from disk 0 failed (size %d)\n",
357 return bad_driver(DRIVER_MAIN
, BD_DATA
, EFAULT
);
360 if (USE_MIRROR
&& memcmp(ext_buffer
, rb1_buffer
, size
)) {
362 printf("Filter: readback from disk 1 failed (size %d)\n",
366 return bad_driver(DRIVER_BACKUP
, BD_DATA
, EFAULT
);
369 if (USE_MIRROR
) flt_free(rb1_buffer
, orig_size
, rb1_array
);
370 flt_free(rb0_buffer
, orig_size
, rb0_array
);
375 /*===========================================================================*
377 *===========================================================================*/
378 static void expand(sector_t first_sector
, char *buffer
, sector_t sectors_left
)
380 /* Expand the contiguous data in 'buffer' to interspersed format in
381 * 'ext_buffer'. The checksum areas are not touched.
391 group_left
= NR_SUM_SEC
- first_sector
% NR_SUM_SEC
;
393 while (sectors_left
> 0) {
394 count
= MIN(sectors_left
, group_left
);
395 size
= count
* SECTOR_SIZE
;
397 memcpy(dstp
, srcp
, size
);
400 dstp
+= size
+ SECTOR_SIZE
;
402 sectors_left
-= count
;
403 group_left
= NR_SUM_SEC
;
407 /*===========================================================================*
409 *===========================================================================*/
410 static void collapse(sector_t first_sector
, char *buffer
, size_t *sizep
)
412 /* Collapse the interspersed data in 'ext_buffer' to contiguous format
413 * in 'buffer'. As side effect, adjust the given size to reflect the
414 * resulting contiguous data size.
417 size_t size
, bytes_left
, groupbytes_left
;
424 (NR_SUM_SEC
- first_sector
% NR_SUM_SEC
) * SECTOR_SIZE
;
426 while (bytes_left
> 0) {
427 size
= MIN(bytes_left
, groupbytes_left
);
429 memcpy(dstp
, srcp
, size
);
431 srcp
+= size
+ SECTOR_SIZE
;
434 bytes_left
-= MIN(size
+ SECTOR_SIZE
, bytes_left
);
435 groupbytes_left
= GROUP_SIZE
;
438 *sizep
= dstp
- buffer
;
441 /*===========================================================================*
443 *===========================================================================*/
444 static size_t expand_sizes(sector_t first_sector
, sector_t nr_sectors
,
447 /* Compute the size of the data area including interspersed checksum
448 * sectors (req_size) and the size of the data area including
449 * interspersed and trailing checksum sectors (the return value).
451 sector_t last_sector
, sum_sector
, phys_sector
;
453 last_sector
= LOG2PHYS(first_sector
+ nr_sectors
- 1);
455 sum_sector
= SEC2SUM_NR(first_sector
+ nr_sectors
- 1);
457 phys_sector
= LOG2PHYS(first_sector
);
459 *req_size
= (last_sector
- phys_sector
+ 1) * SECTOR_SIZE
;
461 return (sum_sector
- phys_sector
+ 1) * SECTOR_SIZE
;
464 /*===========================================================================*
466 *===========================================================================*/
467 static void collapse_size(sector_t first_sector
, size_t *sizep
)
469 /* Compute the size of the contiguous user data written to disk, given
470 * the result size of the write operation with interspersed checksums.
472 sector_t sector_in_group
;
473 size_t sectors_from_group_base
, nr_sum_secs
, nr_data_secs
;
475 sector_in_group
= first_sector
% NR_SUM_SEC
;
477 sectors_from_group_base
= *sizep
/ SECTOR_SIZE
+ sector_in_group
;
479 nr_sum_secs
= sectors_from_group_base
/ (NR_SUM_SEC
+1);
481 nr_data_secs
= sectors_from_group_base
- sector_in_group
- nr_sum_secs
;
483 *sizep
= nr_data_secs
* SECTOR_SIZE
;
486 /*===========================================================================*
488 *===========================================================================*/
489 int transfer(u64_t pos
, char *buffer
, size_t *sizep
, int flag_rw
)
491 /* Transfer data in interspersed-checksum format. When writing, first
492 * compute checksums, and read back the written data afterwards. When
493 * reading, check the stored checksums afterwards.
495 sector_t first_sector
, nr_sectors
;
496 size_t ext_size
, req_size
, res_size
;
500 /* If we don't use checksums or even checksum layout, simply pass on
501 * the request to the drivers as is.
504 return read_write(pos
, buffer
, buffer
, sizep
, flag_rw
);
506 /* The extended buffer (for checksumming) essentially looks like this:
508 * ------------------------------
509 * |xx|C|yyyyy|C|yyyyy|C|zzz |C|
510 * ------------------------------
512 * In this example, "xxyyyyyyyyyyzzz" is our actual data. The data is
513 * split up into groups, so that each group is followed by a checksum
514 * sector C containing the checksums for all data sectors in that
515 * group. The head and tail of the actual data may cover parts of
516 * groups; the remaining data (nor their checksums) are not to be
519 * The entire buffer is written or read in one operation: the
520 * read_write() call below. In order to write, we may first have to
521 * read some data; see the description in make_sum().
523 * Some points of interest here:
524 * - We need a buffer large enough to hold the all user and non-user
525 * data, from the first "xx" to the last checksum sector. This size
527 * - For writing, we need to expand the user-provided data from
528 * contiguous layout to interspersed format. The size of the user
529 * data after expansion is req_size.
530 * - For reading, we need to collapse the user-requested data from
531 * interspersed to contiguous format. For writing, we still need to
532 * compute the contiguous result size to return to the user.
533 * - In both cases, the result size may be different from the
534 * requested write size, because an EOF (as in, disk end) may occur
535 * and the resulting size is less than the requested size.
536 * - If we only follow the checksum layout, and do not do any
537 * checksumming, ext_size is reduced to req_size.
540 first_sector
= POS2SEC(pos
);
541 nr_sectors
= *sizep
/ SECTOR_SIZE
;
542 phys_pos
= SEC2POS(LOG2PHYS(first_sector
));
545 printf("Filter: transfer: pos 0x%lx:0x%lx -> phys_pos 0x%lx:0x%lx\n",
546 ex64hi(pos
), ex64lo(pos
), ex64hi(phys_pos
), ex64lo(phys_pos
));
549 /* Compute the size for the buffer and for the user data after
552 ext_size
= expand_sizes(first_sector
, nr_sectors
, &req_size
);
557 ext_buffer
= flt_malloc(ext_size
, ext_array
, SBUF_SIZE
);
559 if (flag_rw
== FLT_WRITE
) {
560 expand(first_sector
, buffer
, nr_sectors
);
562 if (USE_CHECKSUM
&& make_sum(first_sector
, nr_sectors
))
566 /* Perform the actual I/O. */
568 r
= read_write(phys_pos
, ext_buffer
, ext_buffer
, &res_size
, flag_rw
);
571 printf("Filter: transfer: read_write(%"PRIx64
", %u, %d) = %d, %u\n",
572 phys_pos
, ext_size
, flag_rw
, r
, res_size
);
576 flt_free(ext_buffer
, ext_size
, ext_array
);
581 /* Limit the resulting size to the user data part of the buffer.
582 * The resulting size may already be less, due to an EOF.
584 *sizep
= MIN(req_size
, res_size
);
586 if (flag_rw
== FLT_WRITE
) {
587 if (USE_CHECKSUM
&& check_write(phys_pos
, res_size
))
590 collapse_size(first_sector
, sizep
);
592 else { /* FLT_READ */
593 if (USE_CHECKSUM
&& check_sum(first_sector
, *sizep
))
596 collapse(first_sector
, buffer
, sizep
);
599 flt_free(ext_buffer
, ext_size
, ext_array
);
604 /*===========================================================================*
606 *===========================================================================*/
607 u64_t
convert(u64_t size
)
609 /* Given a raw disk size, subtract the amount of disk space used for
610 * checksums, resulting in the user-visible disk size.
617 sectors
= POS2SEC(size
);
619 return SEC2POS(sectors
/ (NR_SUM_SEC
+ 1) * NR_SUM_SEC
);