1 /* Filter driver - top layer - block interface */
3 /* This is a filter driver, which lays above disk driver, and forwards
4 * messages between disk driver and its callers. The filter can detect
5 * corrupted data (toggled by USE_CHECKSUM) and recover it (toggled
6 * by USE_MIRROR). These two functions are independent from each other.
7 * The mirroring function requires two disks, on separate disk drivers.
12 #define _POSIX_SOURCE 1
15 /* Global settings. */
16 int USE_CHECKSUM
= 0; /* enable checksumming */
17 int USE_MIRROR
= 0; /* enable mirroring */
19 int BAD_SUM_ERROR
= 1; /* bad checksums are considered a driver error */
21 int USE_SUM_LAYOUT
= 0; /* use checksumming layout on disk */
22 int NR_SUM_SEC
= 8; /* number of checksums per checksum sector */
24 int SUM_TYPE
= ST_CRC
; /* use NIL, XOR, CRC, or MD5 */
25 int SUM_SIZE
= 0; /* size of the stored checksum */
27 int NR_RETRIES
= 3; /* number of times the request will be retried (N) */
28 int NR_RESTARTS
= 3; /* number of times a driver will be restarted (M) */
29 int DRIVER_TIMEOUT
= 5; /* timeout in seconds to declare a driver dead (T) */
31 int CHUNK_SIZE
= 0; /* driver requests will be vectorized at this size */
33 char MAIN_LABEL
[LABEL_SIZE
] = ""; /* main disk driver label */
34 char BACKUP_LABEL
[LABEL_SIZE
] = ""; /* backup disk driver label */
35 int MAIN_MINOR
= -1; /* main partition minor nr */
36 int BACKUP_MINOR
= -1; /* backup partition minor nr */
38 static struct optset optset_table
[] = {
39 { "label0", OPT_STRING
, MAIN_LABEL
, LABEL_SIZE
},
40 { "label1", OPT_STRING
, BACKUP_LABEL
, LABEL_SIZE
},
41 { "minor0", OPT_INT
, &MAIN_MINOR
, 10 },
42 { "minor1", OPT_INT
, &BACKUP_MINOR
, 10 },
43 { "sum_sec", OPT_INT
, &NR_SUM_SEC
, 10 },
44 { "layout", OPT_BOOL
, &USE_SUM_LAYOUT
, 1 },
45 { "nolayout", OPT_BOOL
, &USE_SUM_LAYOUT
, 0 },
46 { "sum", OPT_BOOL
, &USE_CHECKSUM
, 1 },
47 { "nosum", OPT_BOOL
, &USE_CHECKSUM
, 0 },
48 { "mirror", OPT_BOOL
, &USE_MIRROR
, 1 },
49 { "nomirror", OPT_BOOL
, &USE_MIRROR
, 0 },
50 { "nil", OPT_BOOL
, &SUM_TYPE
, ST_NIL
},
51 { "xor", OPT_BOOL
, &SUM_TYPE
, ST_XOR
},
52 { "crc", OPT_BOOL
, &SUM_TYPE
, ST_CRC
},
53 { "md5", OPT_BOOL
, &SUM_TYPE
, ST_MD5
},
54 { "sumerr", OPT_BOOL
, &BAD_SUM_ERROR
, 1 },
55 { "nosumerr", OPT_BOOL
, &BAD_SUM_ERROR
, 0 },
56 { "retries", OPT_INT
, &NR_RETRIES
, 10 },
57 { "N", OPT_INT
, &NR_RETRIES
, 10 },
58 { "restarts", OPT_INT
, &NR_RESTARTS
, 10 },
59 { "M", OPT_INT
, &NR_RESTARTS
, 10 },
60 { "timeout", OPT_INT
, &DRIVER_TIMEOUT
, 10 },
61 { "T", OPT_INT
, &DRIVER_TIMEOUT
, 10 },
62 { "chunk", OPT_INT
, &CHUNK_SIZE
, 10 },
66 /* Request message. */
68 static endpoint_t who_e
; /* m_source */
69 static long req_id
; /* BDEV_ID */
70 static cp_grant_id_t grant_id
; /* BDEV_GRANT */
73 static char *buf_array
, *buffer
; /* contiguous buffer */
75 /* SEF functions and variables. */
76 static void sef_local_startup(void);
77 static int sef_cb_init_fresh(int type
, sef_init_info_t
*info
);
78 static void sef_cb_signal_handler(int signo
);
80 /*===========================================================================*
82 *===========================================================================*/
83 static int carry(size_t size
, int flag_rw
)
85 /* Carry data between caller proc and filter.
88 if (flag_rw
== FLT_WRITE
)
89 return sys_safecopyfrom(who_e
, grant_id
, 0,
90 (vir_bytes
) buffer
, size
);
92 return sys_safecopyto(who_e
, grant_id
, 0,
93 (vir_bytes
) buffer
, size
);
96 /*===========================================================================*
98 *===========================================================================*/
99 static int vcarry(int grants
, iovec_t
*iov
, int flag_rw
, size_t size
)
101 /* Carry data between caller proc and filter, through grant-vector.
108 for(i
= 0; i
< grants
&& size
> 0; i
++) {
109 bytes
= MIN(size
, iov
[i
].iov_size
);
111 if (flag_rw
== FLT_WRITE
)
112 r
= sys_safecopyfrom(who_e
,
113 (vir_bytes
) iov
[i
].iov_addr
, 0,
114 (vir_bytes
) bufp
, bytes
);
116 r
= sys_safecopyto(who_e
,
117 (vir_bytes
) iov
[i
].iov_addr
, 0,
118 (vir_bytes
) bufp
, bytes
);
130 /*===========================================================================*
132 *===========================================================================*/
133 static int do_rdwt(int flag_rw
)
135 size_t size
, size_ret
;
139 pos
= make64(m_in
.BDEV_POS_LO
, m_in
.BDEV_POS_HI
);
140 size
= m_in
.BDEV_COUNT
;
142 if (rem64u(pos
, SECTOR_SIZE
) != 0 || size
% SECTOR_SIZE
!= 0) {
143 printf("Filter: unaligned request from caller!\n");
148 buffer
= flt_malloc(size
, buf_array
, BUF_SIZE
);
150 if(flag_rw
== FLT_WRITE
)
151 carry(size
, flag_rw
);
157 r
= transfer(pos
, buffer
, &size_ret
, flag_rw
);
162 printf("Filter: transfer yielded RET_REDO, checking drivers\n");
164 if((r
= check_driver(DRIVER_MAIN
)) != OK
) break;
165 if((r
= check_driver(DRIVER_BACKUP
)) != OK
) break;
168 if(r
== OK
&& flag_rw
== FLT_READ
)
169 carry(size_ret
, flag_rw
);
171 flt_free(buffer
, size
, buf_array
);
179 /*===========================================================================*
181 *===========================================================================*/
182 static int do_vrdwt(int flag_rw
)
184 size_t size
, size_ret
;
188 iovec_t iov_proc
[NR_IOREQS
];
190 /* Extract informations. */
191 grants
= m_in
.BDEV_COUNT
;
192 if((r
= sys_safecopyfrom(who_e
, grant_id
, 0, (vir_bytes
) iov_proc
,
193 grants
* sizeof(iovec_t
))) != OK
) {
194 panic("copying in grant vector failed: %d", r
);
197 pos
= make64(m_in
.BDEV_POS_LO
, m_in
.BDEV_POS_HI
);
198 for(size
= 0, i
= 0; i
< grants
; i
++)
199 size
+= iov_proc
[i
].iov_size
;
201 if (rem64u(pos
, SECTOR_SIZE
) != 0 || size
% SECTOR_SIZE
!= 0) {
202 printf("Filter: unaligned request from caller!\n");
206 buffer
= flt_malloc(size
, buf_array
, BUF_SIZE
);
208 if(flag_rw
== FLT_WRITE
)
209 vcarry(grants
, iov_proc
, flag_rw
, size
);
215 r
= transfer(pos
, buffer
, &size_ret
, flag_rw
);
220 printf("Filter: transfer yielded RET_REDO, checking drivers\n");
222 if((r
= check_driver(DRIVER_MAIN
)) != OK
) break;
223 if((r
= check_driver(DRIVER_BACKUP
)) != OK
) break;
227 flt_free(buffer
, size
, buf_array
);
231 if(flag_rw
== FLT_READ
)
232 vcarry(grants
, iov_proc
, flag_rw
, size_ret
);
234 flt_free(buffer
, size
, buf_array
);
239 /*===========================================================================*
241 *===========================================================================*/
242 static int do_ioctl(message
*m
)
244 struct partition sizepart
;
246 switch(m
->BDEV_REQUEST
) {
250 /* These do not make sense for us. */
254 memset(&sizepart
, 0, sizeof(sizepart
));
256 /* The presented disk size is the raw partition size,
257 * corrected for space needed for checksums.
259 sizepart
.size
= convert(get_raw_size());
261 if(sys_safecopyto(who_e
, (vir_bytes
) grant_id
, 0,
262 (vir_bytes
) &sizepart
,
263 sizeof(struct partition
)) != OK
) {
264 printf("Filter: DIOCGETP safecopyto failed\n");
270 printf("Filter: unknown ioctl request: %d!\n",
278 /*===========================================================================*
280 *===========================================================================*/
281 static int parse_arguments(int argc
, char *argv
[])
287 optset_parse(optset_table
, argv
[1]);
289 if (MAIN_LABEL
[0] == 0 || MAIN_MINOR
< 0 || MAIN_MINOR
> 255)
291 if (USE_MIRROR
&& (BACKUP_LABEL
[0] == 0 ||
292 BACKUP_MINOR
< 0 || BACKUP_MINOR
> 255))
295 /* Checksumming implies a checksum layout. */
299 /* Determine the checksum size for the chosen checksum type. */
302 SUM_SIZE
= 4; /* for the sector number */
305 SUM_SIZE
= 16; /* compatibility */
317 if (NR_SUM_SEC
<= 0 || SUM_SIZE
* NR_SUM_SEC
> SECTOR_SIZE
)
321 printf("Filter starting. Configuration:\n");
322 printf(" USE_CHECKSUM : %3s ", USE_CHECKSUM
? "yes" : "no");
323 printf(" USE_MIRROR : %3s\n", USE_MIRROR
? "yes" : "no");
326 printf(" BAD_SUM_ERROR : %3s ",
327 BAD_SUM_ERROR
? "yes" : "no");
328 printf(" NR_SUM_SEC : %3d\n", NR_SUM_SEC
);
330 printf(" SUM_TYPE : ");
333 case ST_NIL
: printf("nil"); break;
334 case ST_XOR
: printf("xor"); break;
335 case ST_CRC
: printf("crc"); break;
336 case ST_MD5
: printf("md5"); break;
339 printf(" SUM_SIZE : %3d\n", SUM_SIZE
);
341 else printf(" USE_SUM_LAYOUT : %3s\n", USE_SUM_LAYOUT
? "yes" : "no");
343 printf(" N : %3dx M : %3dx T : %3ds\n",
344 NR_RETRIES
, NR_RESTARTS
, DRIVER_TIMEOUT
);
346 printf(" MAIN_LABEL / MAIN_MINOR : %19s / %d\n",
347 MAIN_LABEL
, MAIN_MINOR
);
349 printf(" BACKUP_LABEL / BACKUP_MINOR : %15s / %d\n",
350 BACKUP_LABEL
, BACKUP_MINOR
);
355 /* Convert timeout seconds to ticks. */
356 DRIVER_TIMEOUT
*= sys_hz();
361 /*===========================================================================*
363 *===========================================================================*/
364 int main(int argc
, char *argv
[])
370 /* SEF local startup. */
371 env_setargs(argc
, argv
);
375 /* Wait for request. */
376 if(driver_receive(ANY
, &m_in
, &ipc_status
) != OK
) {
377 panic("driver_receive failed");
381 printf("Filter: got request %d from %d\n",
382 m_in
.m_type
, m_in
.m_source
);
385 if(m_in
.m_source
== DS_PROC_NR
&& is_ipc_notify(ipc_status
)) {
390 who_e
= m_in
.m_source
;
391 req_id
= m_in
.BDEV_ID
;
392 grant_id
= m_in
.BDEV_GRANT
;
395 /* Forword the request message to the drivers. */
396 switch(m_in
.m_type
) {
397 case BDEV_OPEN
: /* open/close is a noop for filter. */
398 case BDEV_CLOSE
: r
= OK
; break;
399 case BDEV_READ
: r
= do_rdwt(FLT_READ
); break;
400 case BDEV_WRITE
: r
= do_rdwt(FLT_WRITE
); break;
401 case BDEV_GATHER
: r
= do_vrdwt(FLT_READ
); break;
402 case BDEV_SCATTER
: r
= do_vrdwt(FLT_WRITE
); break;
403 case BDEV_IOCTL
: r
= do_ioctl(&m_in
); break;
406 printf("Filter: ignoring unknown request %d from %d\n",
407 m_in
.m_type
, m_in
.m_source
);
412 printf("Filter: replying with code %d\n", r
);
415 /* Send back reply message. */
416 m_out
.m_type
= BDEV_REPLY
;
417 m_out
.BDEV_ID
= req_id
;
418 m_out
.BDEV_STATUS
= r
;
425 /*===========================================================================*
426 * sef_local_startup *
427 *===========================================================================*/
428 static void sef_local_startup(void)
430 /* Register init callbacks. */
431 sef_setcb_init_fresh(sef_cb_init_fresh
);
432 sef_setcb_init_restart(sef_cb_init_fresh
);
434 /* No live update support for now. */
436 /* Register signal callbacks. */
437 sef_setcb_signal_handler(sef_cb_signal_handler
);
439 /* Let SEF perform startup. */
443 /*===========================================================================*
444 * sef_cb_init_fresh *
445 *===========================================================================*/
446 static int sef_cb_init_fresh(int type
, sef_init_info_t
*UNUSED(info
))
448 /* Initialize the filter driver. */
451 r
= parse_arguments(env_argc
, env_argv
);
453 printf("Filter: wrong argument!\n");
457 if ((buf_array
= flt_malloc(BUF_SIZE
, NULL
, 0)) == NULL
)
458 panic("no memory available");
464 /* Subscribe to block driver events. */
465 r
= ds_subscribe("drv\\.blk\\..*", DSF_INITIAL
| DSF_OVERWRITE
);
467 panic("Filter: can't subscribe to driver events");
470 /* Announce we are up! */
471 blockdriver_announce(type
);
476 /*===========================================================================*
477 * sef_cb_signal_handler *
478 *===========================================================================*/
479 static void sef_cb_signal_handler(int signo
)
481 /* Only check for termination signal, ignore anything else. */
482 if (signo
!= SIGTERM
) return;
484 /* If so, shut down this driver. */
486 printf("Filter: shutdown...\n");