kernel: scheduling fix for ARM
[minix.git] / drivers / filter / sum.c
blob5fe581c2efde5c4e1519e77186b7ed7469b9eca9
1 /* Filter driver - middle layer - checksumming */
3 #include "inc.h"
4 #include "crc.h"
5 #include "md5.h"
7 #define GROUP_SIZE (SECTOR_SIZE * NR_SUM_SEC)
8 #define SEC2SUM_NR(nr) ((nr)/NR_SUM_SEC*(NR_SUM_SEC+1) + NR_SUM_SEC)
9 #define LOG2PHYS(nr) ((nr)/NR_SUM_SEC*(NR_SUM_SEC+1) + (nr)%NR_SUM_SEC)
11 #define POS2SEC(nr) div64u((nr), SECTOR_SIZE)
12 #define SEC2POS(nr) mul64u((nr), SECTOR_SIZE)
14 /* Data buffers. */
15 static char *ext_array, *ext_buffer; /* interspersed buffer */
16 static char *rb0_array; /* write readback buffer for disk 0 */
17 static char *rb1_array; /* write readback buffer for disk 1 */
19 /*===========================================================================*
20 * sum_init *
21 *===========================================================================*/
22 void sum_init(void)
24 /* Initialize buffers. */
26 ext_array = flt_malloc(SBUF_SIZE, NULL, 0);
27 rb0_array = flt_malloc(SBUF_SIZE, NULL, 0);
28 rb1_array = flt_malloc(SBUF_SIZE, NULL, 0);
30 if (ext_array == NULL || rb0_array == NULL || rb1_array == NULL)
31 panic("no memory available");
34 /*===========================================================================*
35 * calc_sum *
36 *===========================================================================*/
37 static void calc_sum(unsigned sector, char *data, char *sum)
39 /* Compute the checksum for a sector. The sector number must be part
40 * of the checksum in some way.
42 unsigned long crc, *p, *q;
43 int i, j;
44 struct MD5Context ctx;
46 switch(SUM_TYPE) {
47 case ST_NIL:
48 /* No checksum at all */
50 q = (unsigned long *) sum;
51 *q = sector;
53 break;
55 case ST_XOR:
56 /* Basic XOR checksum */
57 p = (unsigned long *) data;
59 memset(sum, 0, SUM_SIZE);
60 for(i = 0; i < SECTOR_SIZE / SUM_SIZE; i++) {
61 q = (unsigned long *) sum;
62 for(j = 0; (size_t) j < SUM_SIZE / sizeof(*p); j++) {
63 *q ^= *p;
64 q++;
65 p++;
68 q = (unsigned long *) sum;
69 *q ^= sector;
71 break;
73 case ST_CRC:
74 /* CRC32 checksum */
76 crc = compute_crc((unsigned char *) data, SECTOR_SIZE);
78 q = (unsigned long *) sum;
80 *q = crc ^ sector;
82 break;
84 case ST_MD5:
85 /* MD5 checksum */
87 MD5Init(&ctx);
88 MD5Update(&ctx, (unsigned char *) data, SECTOR_SIZE);
89 MD5Update(&ctx, (unsigned char *) &sector, sizeof(sector));
90 MD5Final((unsigned char *) sum, &ctx);
92 break;
94 default:
95 panic("invalid checksum type: %d", SUM_TYPE);
99 /*===========================================================================*
100 * read_sectors *
101 *===========================================================================*/
102 static int read_sectors(char *buf, sector_t phys_sector, int count)
104 /* Read 'count' sectors starting at 'phys_sector' into 'buf'. If an
105 * EOF occurs, zero-fill the remaining part of the buffer.
107 size_t size, wsize;
108 int r;
110 size = wsize = count * SECTOR_SIZE;
112 r = read_write(SEC2POS(phys_sector), buf, buf, &size, FLT_READ);
114 if (r != OK)
115 return r;
117 if (size != wsize) {
118 #if DEBUG
119 printf("Filter: EOF reading sector %lu\n", phys_sector);
120 #endif
122 memset(buf + size, 0, wsize - size);
125 return OK;
128 /*===========================================================================*
129 * make_group_sum *
130 *===========================================================================*/
131 static void make_group_sum(char *bufp, char *sump, sector_t sector, int index,
132 int count)
134 /* Compute checksums for 'count' sectors within a group, starting at
135 * sector 'index' into the group, which has logical sector number
136 * 'sector'. The 'bufp' pointer points to the same first sector to
137 * start checksumming; 'sump' is a pointer to the checksum sector.
140 sump += index * SUM_SIZE;
142 while (count--) {
143 calc_sum(sector, bufp, sump);
145 bufp += SECTOR_SIZE;
147 sump += SUM_SIZE;
148 sector++;
152 /*===========================================================================*
153 * check_group_sum *
154 *===========================================================================*/
155 static int check_group_sum(char *bufp, const char *sump, sector_t sector,
156 int index, int count)
158 /* Check checksums in a group. Parameters are the same as in
159 * make_group_sum(). Return OK if all checksums check out, or RET_REDO
160 * upon failure.
162 char sum_buffer[SECTOR_SIZE];
164 sump += index * SUM_SIZE;
166 while (count--) {
167 calc_sum(sector, bufp, sum_buffer);
169 if (memcmp(sum_buffer, sump, SUM_SIZE)) {
170 printf("Filter: BAD CHECKSUM at sector %lu\n", sector);
172 if (BAD_SUM_ERROR)
173 return bad_driver(DRIVER_MAIN, BD_DATA, EIO);
176 bufp += SECTOR_SIZE;
177 sump += SUM_SIZE;
178 sector++;
181 return OK;
184 /*===========================================================================*
185 * make_sum *
186 *===========================================================================*/
187 static int make_sum(sector_t current_sector, sector_t sectors_left)
189 /* Compute checksums over all data in the buffer with expanded data.
190 * As side effect, possibly read in first and last checksum sectors
191 * and data to fill the gap between the last data sector and the last
192 * checksum sector.
194 sector_t sector_in_group, group_left;
195 size_t size, gap;
196 char *extp;
197 int r;
199 /* See the description of the extended buffer in transfer(). A number
200 * of points are relevant for this function in particular:
202 * 1) If the "xx" head of the buffer does not cover an entire group,
203 * we need to copy in the first checksum sector so that we can
204 * modify it.
205 * 2) We can generate checksums for the full "yyyyy" groups without
206 * copying in the corresponding checksum sectors first, because
207 * those sectors will be overwritten entirely anyway.
208 * 3) We copy in not only the checksum sector for the group containing
209 * the "zzz" tail data, but also all the data between "zzz" and the
210 * last checksum sector. This allows us to write all the data in
211 * the buffer in one operation. In theory, we could verify the
212 * checksum of the data in this gap for extra early failure
213 * detection, but we currently do not do this.
215 * If points 1 and 3 cover the same group (implying a small, unaligned
216 * write operation), the read operation is done only once. Whether
217 * point 1 or 3 is skipped depends on whether there is a gap before
218 * the checksum sector.
221 sector_in_group = current_sector % NR_SUM_SEC;
222 group_left = NR_SUM_SEC - sector_in_group;
224 extp = ext_buffer;
226 /* This loop covers points 1 and 2. */
227 while (sectors_left >= group_left) {
228 size = group_left * SECTOR_SIZE;
230 if (sector_in_group > 0) {
231 if ((r = read_sectors(extp + size,
232 LOG2PHYS(current_sector) + group_left,
233 1)) != OK)
234 return r;
236 else memset(extp + size, 0, SECTOR_SIZE);
238 make_group_sum(extp, extp + size, current_sector,
239 sector_in_group, group_left);
241 extp += size + SECTOR_SIZE;
243 sectors_left -= group_left;
244 current_sector += group_left;
246 sector_in_group = 0;
247 group_left = NR_SUM_SEC;
250 /* The remaining code covers point 3. */
251 if (sectors_left > 0) {
252 size = sectors_left * SECTOR_SIZE;
254 if (group_left != NR_SUM_SEC - sector_in_group)
255 panic("group_left assertion: %d", 0);
257 gap = group_left - sectors_left;
259 if (gap <= 0)
260 panic("gap assertion: %d", 0);
262 if ((r = read_sectors(extp + size,
263 LOG2PHYS(current_sector) + sectors_left,
264 gap + 1)) != OK)
265 return r;
267 make_group_sum(extp, extp + size + gap * SECTOR_SIZE,
268 current_sector, sector_in_group, sectors_left);
271 return OK;
274 /*===========================================================================*
275 * check_sum *
276 *===========================================================================*/
277 static int check_sum(sector_t current_sector, size_t bytes_left)
279 /* Check checksums of all data in the buffer with expanded data.
280 * Return OK if all checksums are okay, or RET_REDO upon failure.
282 sector_t sector_in_group;
283 size_t size, groupbytes_left;
284 int count;
285 char *extp;
287 extp = ext_buffer;
289 sector_in_group = current_sector % NR_SUM_SEC;
290 groupbytes_left = (NR_SUM_SEC - sector_in_group) * SECTOR_SIZE;
292 while (bytes_left > 0) {
293 size = MIN(bytes_left, groupbytes_left);
294 count = size / SECTOR_SIZE;
296 if (check_group_sum(extp, extp + groupbytes_left,
297 current_sector, sector_in_group, count))
298 return RET_REDO;
300 extp += size + SECTOR_SIZE;
302 bytes_left -= MIN(size + SECTOR_SIZE, bytes_left);
303 current_sector += count;
305 sector_in_group = 0;
306 groupbytes_left = GROUP_SIZE;
309 return OK;
312 /*===========================================================================*
313 * check_write *
314 *===========================================================================*/
315 static int check_write(u64_t pos, size_t size)
317 /* Read back the data just written, from both disks if mirroring is
318 * enabled, and check the result against the original. Return OK on
319 * success; report the malfunctioning driver and return RET_REDO
320 * otherwise.
322 char *rb0_buffer, *rb1_buffer;
323 size_t orig_size;
324 int r;
326 if (size == 0)
327 return OK;
329 rb0_buffer = rb1_buffer =
330 flt_malloc(size, rb0_array, SBUF_SIZE);
331 if (USE_MIRROR)
332 rb1_buffer = flt_malloc(size, rb1_array, SBUF_SIZE);
334 orig_size = size;
336 r = read_write(pos, rb0_buffer, rb1_buffer, &size, FLT_READ2);
338 if (r != OK) {
339 if (USE_MIRROR) flt_free(rb1_buffer, orig_size, rb1_array);
340 flt_free(rb0_buffer, orig_size, rb0_array);
342 return r;
345 /* If we get a size smaller than what we requested, then we somehow
346 * succeeded in writing past the disk end, and now fail to read it all
347 * back. This is not an error, and we just compare the part that we
348 * did manage to read back in.
351 if (memcmp(ext_buffer, rb0_buffer, size)) {
352 #if DEBUG
353 printf("Filter: readback from disk 0 failed (size %d)\n",
354 size);
355 #endif
357 return bad_driver(DRIVER_MAIN, BD_DATA, EFAULT);
360 if (USE_MIRROR && memcmp(ext_buffer, rb1_buffer, size)) {
361 #if DEBUG
362 printf("Filter: readback from disk 1 failed (size %d)\n",
363 size);
364 #endif
366 return bad_driver(DRIVER_BACKUP, BD_DATA, EFAULT);
369 if (USE_MIRROR) flt_free(rb1_buffer, orig_size, rb1_array);
370 flt_free(rb0_buffer, orig_size, rb0_array);
372 return OK;
375 /*===========================================================================*
376 * expand *
377 *===========================================================================*/
378 static void expand(sector_t first_sector, char *buffer, sector_t sectors_left)
380 /* Expand the contiguous data in 'buffer' to interspersed format in
381 * 'ext_buffer'. The checksum areas are not touched.
383 char *srcp, *dstp;
384 sector_t group_left;
385 size_t size;
386 int count;
388 srcp = buffer;
389 dstp = ext_buffer;
391 group_left = NR_SUM_SEC - first_sector % NR_SUM_SEC;
393 while (sectors_left > 0) {
394 count = MIN(sectors_left, group_left);
395 size = count * SECTOR_SIZE;
397 memcpy(dstp, srcp, size);
399 srcp += size;
400 dstp += size + SECTOR_SIZE;
402 sectors_left -= count;
403 group_left = NR_SUM_SEC;
407 /*===========================================================================*
408 * collapse *
409 *===========================================================================*/
410 static void collapse(sector_t first_sector, char *buffer, size_t *sizep)
412 /* Collapse the interspersed data in 'ext_buffer' to contiguous format
413 * in 'buffer'. As side effect, adjust the given size to reflect the
414 * resulting contiguous data size.
416 char *srcp, *dstp;
417 size_t size, bytes_left, groupbytes_left;
419 srcp = ext_buffer;
420 dstp = buffer;
422 bytes_left = *sizep;
423 groupbytes_left =
424 (NR_SUM_SEC - first_sector % NR_SUM_SEC) * SECTOR_SIZE;
426 while (bytes_left > 0) {
427 size = MIN(bytes_left, groupbytes_left);
429 memcpy(dstp, srcp, size);
431 srcp += size + SECTOR_SIZE;
432 dstp += size;
434 bytes_left -= MIN(size + SECTOR_SIZE, bytes_left);
435 groupbytes_left = GROUP_SIZE;
438 *sizep = dstp - buffer;
441 /*===========================================================================*
442 * expand_sizes *
443 *===========================================================================*/
444 static size_t expand_sizes(sector_t first_sector, sector_t nr_sectors,
445 size_t *req_size)
447 /* Compute the size of the data area including interspersed checksum
448 * sectors (req_size) and the size of the data area including
449 * interspersed and trailing checksum sectors (the return value).
451 sector_t last_sector, sum_sector, phys_sector;
453 last_sector = LOG2PHYS(first_sector + nr_sectors - 1);
455 sum_sector = SEC2SUM_NR(first_sector + nr_sectors - 1);
457 phys_sector = LOG2PHYS(first_sector);
459 *req_size = (last_sector - phys_sector + 1) * SECTOR_SIZE;
461 return (sum_sector - phys_sector + 1) * SECTOR_SIZE;
464 /*===========================================================================*
465 * collapse_size *
466 *===========================================================================*/
467 static void collapse_size(sector_t first_sector, size_t *sizep)
469 /* Compute the size of the contiguous user data written to disk, given
470 * the result size of the write operation with interspersed checksums.
472 sector_t sector_in_group;
473 size_t sectors_from_group_base, nr_sum_secs, nr_data_secs;
475 sector_in_group = first_sector % NR_SUM_SEC;
477 sectors_from_group_base = *sizep / SECTOR_SIZE + sector_in_group;
479 nr_sum_secs = sectors_from_group_base / (NR_SUM_SEC+1);
481 nr_data_secs = sectors_from_group_base - sector_in_group - nr_sum_secs;
483 *sizep = nr_data_secs * SECTOR_SIZE;
486 /*===========================================================================*
487 * transfer *
488 *===========================================================================*/
489 int transfer(u64_t pos, char *buffer, size_t *sizep, int flag_rw)
491 /* Transfer data in interspersed-checksum format. When writing, first
492 * compute checksums, and read back the written data afterwards. When
493 * reading, check the stored checksums afterwards.
495 sector_t first_sector, nr_sectors;
496 size_t ext_size, req_size, res_size;
497 u64_t phys_pos;
498 int r;
500 /* If we don't use checksums or even checksum layout, simply pass on
501 * the request to the drivers as is.
503 if (!USE_SUM_LAYOUT)
504 return read_write(pos, buffer, buffer, sizep, flag_rw);
506 /* The extended buffer (for checksumming) essentially looks like this:
508 * ------------------------------
509 * |xx|C|yyyyy|C|yyyyy|C|zzz |C|
510 * ------------------------------
512 * In this example, "xxyyyyyyyyyyzzz" is our actual data. The data is
513 * split up into groups, so that each group is followed by a checksum
514 * sector C containing the checksums for all data sectors in that
515 * group. The head and tail of the actual data may cover parts of
516 * groups; the remaining data (nor their checksums) are not to be
517 * modified.
519 * The entire buffer is written or read in one operation: the
520 * read_write() call below. In order to write, we may first have to
521 * read some data; see the description in make_sum().
523 * Some points of interest here:
524 * - We need a buffer large enough to hold the all user and non-user
525 * data, from the first "xx" to the last checksum sector. This size
526 * is ext_size.
527 * - For writing, we need to expand the user-provided data from
528 * contiguous layout to interspersed format. The size of the user
529 * data after expansion is req_size.
530 * - For reading, we need to collapse the user-requested data from
531 * interspersed to contiguous format. For writing, we still need to
532 * compute the contiguous result size to return to the user.
533 * - In both cases, the result size may be different from the
534 * requested write size, because an EOF (as in, disk end) may occur
535 * and the resulting size is less than the requested size.
536 * - If we only follow the checksum layout, and do not do any
537 * checksumming, ext_size is reduced to req_size.
540 first_sector = POS2SEC(pos);
541 nr_sectors = *sizep / SECTOR_SIZE;
542 phys_pos = SEC2POS(LOG2PHYS(first_sector));
544 #if DEBUG2
545 printf("Filter: transfer: pos 0x%lx:0x%lx -> phys_pos 0x%lx:0x%lx\n",
546 ex64hi(pos), ex64lo(pos), ex64hi(phys_pos), ex64lo(phys_pos));
547 #endif
549 /* Compute the size for the buffer and for the user data after
550 * expansion.
552 ext_size = expand_sizes(first_sector, nr_sectors, &req_size);
554 if (!USE_CHECKSUM)
555 ext_size = req_size;
557 ext_buffer = flt_malloc(ext_size, ext_array, SBUF_SIZE);
559 if (flag_rw == FLT_WRITE) {
560 expand(first_sector, buffer, nr_sectors);
562 if (USE_CHECKSUM && make_sum(first_sector, nr_sectors))
563 return RET_REDO;
566 /* Perform the actual I/O. */
567 res_size = ext_size;
568 r = read_write(phys_pos, ext_buffer, ext_buffer, &res_size, flag_rw);
570 #if DEBUG2
571 printf("Filter: transfer: read_write(%"PRIx64", %u, %d) = %d, %u\n",
572 phys_pos, ext_size, flag_rw, r, res_size);
573 #endif
575 if (r != OK) {
576 flt_free(ext_buffer, ext_size, ext_array);
578 return r;
581 /* Limit the resulting size to the user data part of the buffer.
582 * The resulting size may already be less, due to an EOF.
584 *sizep = MIN(req_size, res_size);
586 if (flag_rw == FLT_WRITE) {
587 if (USE_CHECKSUM && check_write(phys_pos, res_size))
588 return RET_REDO;
590 collapse_size(first_sector, sizep);
592 else { /* FLT_READ */
593 if (USE_CHECKSUM && check_sum(first_sector, *sizep))
594 return RET_REDO;
596 collapse(first_sector, buffer, sizep);
599 flt_free(ext_buffer, ext_size, ext_array);
601 return OK;
604 /*===========================================================================*
605 * convert *
606 *===========================================================================*/
607 u64_t convert(u64_t size)
609 /* Given a raw disk size, subtract the amount of disk space used for
610 * checksums, resulting in the user-visible disk size.
612 sector_t sectors;
614 if (!USE_SUM_LAYOUT)
615 return size;
617 sectors = POS2SEC(size);
619 return SEC2POS(sectors / (NR_SUM_SEC + 1) * NR_SUM_SEC);