2 * Copyright 2006-2007, François Revol. All rights reserved.
3 * Distributed under the terms of the MIT License.
9 * Maps a Network Block Device as virtual partitions.
13 #include <ByteOrder.h>
14 #include <KernelExport.h>
16 #include <driver_settings.h>
24 #include <netinet/in.h>
28 /* on the first open(), open ourselves for some seconds,
29 * to avoid trying to reconnect and failing on a 2nd open,
30 * as it happens with the python server.
32 //#define MOUNT_KLUDGE
35 /* names, ohh names... */
37 #define SHUT_RDWR SHUTDOWN_BOTH
42 #include <kernel/lock.h>
45 #ifndef _IMPEXP_KERNEL
46 #define _IMPEXP_KERNEL
50 #define mutex_init new_lock
51 #define mutex_destroy free_lock
52 #define mutex_lock LOCK
53 #define mutex_unlock UNLOCK
61 #define DEVICE_PREFIX "disk/virtual/nbd/"
62 #define DEVICE_FMT DEVICE_PREFIX "%d/raw"
63 #define DEVICE_NAME_MAX 32
64 #define MAX_REQ_SIZE (32*1024*1024)
69 #define PRINT(a) dprintf a
70 #define WHICH(dev) ((int)(dev - nbd_devices))
75 struct nbd_request_entry
{
76 struct nbd_request_entry
*next
;
77 struct nbd_request req
; /* net byte order */
78 struct nbd_reply reply
; /* net byte order */
86 void *buffer
; /* write: ptr to passed buffer; read: ptr to malloc()ed extra */
92 struct sockaddr_in server
;
95 uint64 req
; /* next ID for requests */
99 struct nbd_request_entry
*reqs
;
105 typedef struct cookie
{
106 struct nbd_device
*dev
;
110 /* data=NULL on read */
111 status_t
nbd_alloc_request(struct nbd_device
*dev
, struct nbd_request_entry
**req
, uint32 type
, off_t from
, size_t len
, const char *data
);
112 status_t
nbd_queue_request(struct nbd_device
*dev
, struct nbd_request_entry
*req
);
113 status_t
nbd_dequeue_request(struct nbd_device
*dev
, uint64 handle
, struct nbd_request_entry
**req
);
114 status_t
nbd_free_request(struct nbd_device
*dev
, struct nbd_request_entry
*req
);
116 struct nbd_device
*nbd_find_device(const char* name
);
118 int32
nbd_postoffice(void *arg
);
119 status_t
nbd_connect(struct nbd_device
*dev
);
120 status_t
nbd_teardown(struct nbd_device
*dev
);
121 status_t
nbd_post_request(struct nbd_device
*dev
, struct nbd_request_entry
*req
);
123 status_t
nbd_open(const char *name
, uint32 flags
, cookie_t
**cookie
);
124 status_t
nbd_close(cookie_t
*cookie
);
125 status_t
nbd_free(cookie_t
*cookie
);
126 status_t
nbd_control(cookie_t
*cookie
, uint32 op
, void *data
, size_t len
);
127 status_t
nbd_read(cookie_t
*cookie
, off_t position
, void *data
, size_t *numbytes
);
128 status_t
nbd_write(cookie_t
*cookie
, off_t position
, const void *data
, size_t *numbytes
);
133 * In BONE at least, if connect() fails (EINTR or ETIMEDOUT)
134 * keeps locked pages around (likely a bone_data,
135 * until TCP gets the last ACK). If that happens, we snooze()
136 * in unload_driver() to let TCP timeout before the kernel
137 * tries to delete the image. */
138 bool gDelayUnload
= false;
139 #define BONE_TEARDOWN_DELAY 60000000
142 #pragma mark ==== support ====
145 // move that to ksocket inlined
146 static int kinet_aton(const char *in
, struct in_addr
*addr
)
151 char *p
= (char *)in
;
152 for (i
= 0; i
< 4; i
++) {
153 a
= strtoul(p
, &p
, 10);
156 inaddr
= (inaddr
>> 8) | ((a
& 0x0ff) << 24);
157 *(uint32
*)addr
= inaddr
;
166 #pragma mark ==== request manager ====
169 status_t
nbd_alloc_request(struct nbd_device
*dev
, struct nbd_request_entry
**req
, uint32 type
, off_t from
, size_t len
, const char *data
)
171 bool w
= (type
== NBD_CMD_WRITE
);
172 struct nbd_request_entry
*r
;
173 status_t err
= EINVAL
;
175 PRINT((DP
">%s(%" B_PRIu32
", %" B_PRIdOFF
", %ld)\n", __FUNCTION__
, type
,
178 if (type
!= NBD_CMD_READ
&& type
!= NBD_CMD_WRITE
&& type
!= NBD_CMD_DISC
)
180 if (!dev
|| !req
|| from
< 0)
184 err
= mutex_lock(&dev
->ben
);
193 mutex_unlock(&dev
->ben
);
196 r
= malloc(sizeof(struct nbd_request_entry
) + (w
? 0 : len
));
200 err
= r
->sem
= create_sem(0, "nbd request sem");
211 r
->req
.magic
= B_HOST_TO_BENDIAN_INT32(NBD_REQUEST_MAGIC
);
212 r
->req
.type
= B_HOST_TO_BENDIAN_INT32(type
);
213 r
->req
.handle
= B_HOST_TO_BENDIAN_INT64(r
->handle
);
214 r
->req
.from
= B_HOST_TO_BENDIAN_INT64(r
->from
);
215 r
->req
.len
= B_HOST_TO_BENDIAN_INT32(len
);
217 r
->buffer
= (void *)(w
? data
: (((char *)r
) + sizeof(struct nbd_request_entry
)));
225 dprintf(DP
" %s: error 0x%08" B_PRIx32
"\n", __FUNCTION__
, err
);
230 status_t
nbd_queue_request(struct nbd_device
*dev
, struct nbd_request_entry
*req
)
232 PRINT((DP
">%s(handle:%" B_PRIu64
")\n", __FUNCTION__
, req
->handle
));
233 req
->next
= dev
->reqs
;
239 status_t
nbd_dequeue_request(struct nbd_device
*dev
, uint64 handle
, struct nbd_request_entry
**req
)
241 struct nbd_request_entry
*r
, *prev
;
242 PRINT((DP
">%s(handle:%" B_PRIu64
")\n", __FUNCTION__
, handle
));
245 while (r
&& r
->handle
!= handle
) {
253 prev
->next
= r
->next
;
262 status_t
nbd_free_request(struct nbd_device
*dev
, struct nbd_request_entry
*req
)
264 PRINT((DP
">%s(handle:%" B_PRIu64
")\n", __FUNCTION__
, req
->handle
));
265 delete_sem(req
->sem
);
272 #pragma mark ==== nbd handler ====
275 int32
nbd_postoffice(void *arg
)
277 struct nbd_device
*dev
= (struct nbd_device
*)arg
;
278 struct nbd_request_entry
*req
= NULL
;
279 struct nbd_reply reply
;
282 PRINT((DP
">%s()\n", __FUNCTION__
));
286 err
= krecv(dev
->sock
, &reply
, sizeof(reply
), 0);
287 if (err
== -1 && errno
< 0)
291 reason
= "recv:size";
292 if (err
< sizeof(reply
))
298 if (B_BENDIAN_TO_HOST_INT32(reply
.magic
) != NBD_REPLY_MAGIC
)
303 err
= mutex_lock(&dev
->ben
);
307 reason
= "dequeue_request";
308 err
= nbd_dequeue_request(dev
, B_BENDIAN_TO_HOST_INT64(reply
.handle
), &req
);
311 mutex_unlock(&dev
->ben
);
314 dprintf(DP
"nbd_dequeue_rquest found NULL!\n");
319 memcpy(&req
->reply
, &reply
, sizeof(reply
));
320 if (req
->type
== NBD_CMD_READ
) {
322 reason
= "recv(data)";
323 if (reply
.error
== 0)
324 err
= krecv(dev
->sock
, req
->buffer
, req
->len
, 0);
327 /* tell back how much we've got (?) */
336 err
= mutex_lock(&dev
->ben
);
340 // this also must be atomic!
341 release_sem(req
->sem
);
344 nbd_free_request(dev
, req
);
347 mutex_unlock(&dev
->ben
);
352 PRINT((DP
"<%s\n", __FUNCTION__
));
356 dprintf(DP
"%s: %s: error 0x%08" B_PRIx32
"\n", __FUNCTION__
, reason
, err
);
361 status_t
nbd_connect(struct nbd_device
*dev
)
363 struct nbd_init_packet initpkt
;
365 PRINT((DP
">%s()\n", __FUNCTION__
));
367 PRINT((DP
" %s: socket()\n", __FUNCTION__
));
368 err
= dev
->sock
= ksocket(AF_INET
, SOCK_STREAM
, IPPROTO_TCP
);
369 if (err
== -1 && errno
< 0)
374 PRINT((DP
" %s: connect()\n", __FUNCTION__
));
375 err
= kconnect(dev
->sock
, (struct sockaddr
*)&dev
->server
, sizeof(dev
->server
));
377 if (err
== -1 && errno
< 0)
379 /* HACK: avoid the kernel unloading us with locked pages from TCP */
385 PRINT((DP
" %s: recv(initpkt)\n", __FUNCTION__
));
386 err
= krecv(dev
->sock
, &initpkt
, sizeof(initpkt
), 0);
387 if (err
== -1 && errno
< 0)
389 if (err
< sizeof(initpkt
))
391 err
= EINVAL
;//EPROTO;
392 if (memcmp(initpkt
.passwd
, NBD_INIT_PASSWD
, sizeof(initpkt
.passwd
)))
394 if (B_BENDIAN_TO_HOST_INT64(initpkt
.magic
) != NBD_INIT_MAGIC
)
397 dev
->size
= B_BENDIAN_TO_HOST_INT64(initpkt
.device_size
);
399 dprintf(DP
" %s: connected, device size %" B_PRIu64
" bytes.\n",
400 __FUNCTION__
, dev
->size
);
402 err
= dev
->postoffice
= spawn_kernel_thread(nbd_postoffice
, "nbd postoffice", B_REAL_TIME_PRIORITY
, dev
);
405 resume_thread(dev
->postoffice
);
407 PRINT((DP
"<%s\n", __FUNCTION__
));
411 dev
->postoffice
= -1;
415 kclosesocket(dev
->sock
);
418 dprintf(DP
"<%s: error 0x%08" B_PRIx32
"\n", __FUNCTION__
, err
);
423 status_t
nbd_teardown(struct nbd_device
*dev
)
426 PRINT((DP
">%s()\n", __FUNCTION__
));
427 kshutdown(dev
->sock
, SHUT_RDWR
);
428 kclosesocket(dev
->sock
);
430 wait_for_thread(dev
->postoffice
, &ret
);
435 status_t
nbd_post_request(struct nbd_device
*dev
, struct nbd_request_entry
*req
)
438 PRINT((DP
">%s(handle:%" B_PRIu64
")\n", __FUNCTION__
, req
->handle
));
440 err
= ksend(dev
->sock
, &req
->req
, sizeof(req
->req
), 0);
444 if (req
->type
== NBD_CMD_WRITE
)
445 err
= ksend(dev
->sock
, req
->buffer
, req
->len
, 0);
451 err
= nbd_queue_request(dev
, req
);
457 #pragma mark ==== device hooks ====
460 static struct nbd_device nbd_devices
[MAX_NBDS
];
462 status_t
nbd_open(const char *name
, uint32 flags
, cookie_t
**cookie
) {
468 struct nbd_device
*dev
= NULL
;
469 PRINT((DP
">%s(%s, %" B_PRIx32
", )\n", __FUNCTION__
, name
, flags
));
470 (void)name
; (void)flags
;
471 dev
= nbd_find_device(name
);
472 if (!dev
|| !dev
->valid
)
475 *cookie
= (void*)malloc(sizeof(cookie_t
));
478 memset(*cookie
, 0, sizeof(cookie_t
));
479 (*cookie
)->dev
= dev
;
480 err
= mutex_lock(&dev
->ben
);
485 err
= nbd_connect(dev
);
489 refcnt
= dev
->refcnt
++;
493 mutex_unlock(&dev
->ben
);
498 sprintf(buf
, "/dev/%s", name
);
499 dev
->kludge
= open(buf
, O_RDONLY
);
508 mutex_unlock(&dev
->ben
);
512 dprintf(DP
" %s: error 0x%08" B_PRIx32
"\n", __FUNCTION__
, err
);
517 status_t
nbd_close(cookie_t
*cookie
) {
518 struct nbd_device
*dev
= cookie
->dev
;
523 PRINT((DP
">%s(%d)\n", __FUNCTION__
, WHICH(cookie
->dev
)));
525 err
= mutex_lock(&dev
->ben
);
529 // XXX: do something ?
535 mutex_unlock(&dev
->ben
);
546 status_t
nbd_free(cookie_t
*cookie
) {
547 struct nbd_device
*dev
= cookie
->dev
;
549 PRINT((DP
">%s(%d)\n", __FUNCTION__
, WHICH(cookie
->dev
)));
551 err
= mutex_lock(&dev
->ben
);
555 if (--dev
->refcnt
== 0) {
556 err
= nbd_teardown(dev
);
559 mutex_unlock(&dev
->ben
);
566 status_t
nbd_control(cookie_t
*cookie
, uint32 op
, void *data
, size_t len
) {
567 PRINT((DP
">%s(%d, %" B_PRIu32
", , %ld)\n", __FUNCTION__
,
568 WHICH(cookie
->dev
), op
, len
));
570 case B_GET_DEVICE_SIZE
: /* this one is broken anyway... */
572 *(size_t *)data
= (size_t)cookie
->dev
->size
;
576 case B_SET_DEVICE_SIZE
: /* broken */
578 case B_SET_NONBLOCKING_IO
:
580 case B_SET_BLOCKING_IO
:
582 case B_GET_READ_STATUS
:
583 case B_GET_WRITE_STATUS
:
585 *(bool *)data
= false;
590 case B_GET_BIOS_GEOMETRY
:
592 device_geometry
*geom
= (device_geometry
*)data
;
593 geom
->bytes_per_sector
= BLKSIZE
;
594 geom
->sectors_per_track
= 1;
595 geom
->cylinder_count
= cookie
->dev
->size
/ BLKSIZE
;
596 geom
->head_count
= 1;
597 geom
->device_type
= B_DISK
;
598 geom
->removable
= false;
599 geom
->read_only
= cookie
->dev
->readonly
;
600 geom
->write_once
= false;
604 case B_GET_MEDIA_STATUS
:
606 *(status_t
*)data
= B_OK
;
614 case B_FLUSH_DRIVE_CACHE
: /* wait for request list to be empty ? */
619 return B_NOT_ALLOWED
;
623 status_t
nbd_read(cookie_t
*cookie
, off_t position
, void *data
, size_t *numbytes
) {
624 struct nbd_device
*dev
= cookie
->dev
;
625 struct nbd_request_entry
*req
;
626 status_t err
, semerr
;
627 PRINT((DP
">%s(%d, %" B_PRIdOFF
", , )\n", __FUNCTION__
,
628 WHICH(cookie
->dev
), position
));
635 err
= nbd_alloc_request(dev
, &req
, NBD_CMD_READ
, position
, *numbytes
, NULL
);
640 err
= mutex_lock(&dev
->ben
);
644 err
= nbd_post_request(dev
, req
);
647 mutex_unlock(&dev
->ben
);
653 semerr
= acquire_sem(req
->sem
);
656 err
= mutex_lock(&dev
->ben
);
664 nbd_free_request(dev
, req
);
667 mutex_unlock(&dev
->ben
);
669 if (semerr
== B_OK
) {
670 *numbytes
= req
->len
;
671 memcpy(data
, req
->buffer
, req
->len
);
673 if (*numbytes
== 0 && req
->reply
.error
)
675 nbd_free_request(dev
, req
);
686 nbd_free_request(dev
, req
);
693 status_t
nbd_write(cookie_t
*cookie
, off_t position
, const void *data
, size_t *numbytes
) {
694 struct nbd_device
*dev
= cookie
->dev
;
695 struct nbd_request_entry
*req
;
696 status_t err
, semerr
;
697 PRINT((DP
">%s(%d, %" B_PRIdOFF
", %ld, )\n", __FUNCTION__
,
698 WHICH(cookie
->dev
), position
, *numbytes
));
708 err
= nbd_alloc_request(dev
, &req
, NBD_CMD_WRITE
, position
, *numbytes
, data
);
713 err
= mutex_lock(&dev
->ben
);
717 /* sending request+data must be atomic */
718 err
= nbd_post_request(dev
, req
);
721 mutex_unlock(&dev
->ben
);
727 semerr
= acquire_sem(req
->sem
);
730 err
= mutex_lock(&dev
->ben
);
738 nbd_free_request(dev
, req
);
741 mutex_unlock(&dev
->ben
);
743 if (semerr
== B_OK
) {
744 *numbytes
= req
->len
;
746 if (*numbytes
== 0 && req
->reply
.error
)
748 nbd_free_request(dev
, req
);
759 nbd_free_request(dev
, req
);
766 device_hooks nbd_hooks
={
767 (device_open_hook
)nbd_open
,
768 (device_close_hook
)nbd_close
,
769 (device_free_hook
)nbd_free
,
770 (device_control_hook
)nbd_control
,
771 (device_read_hook
)nbd_read
,
772 (device_write_hook
)nbd_write
,
780 #pragma mark ==== driver hooks ====
783 int32 api_version
= B_CUR_DRIVER_API_VERSION
;
785 static char *nbd_name
[MAX_NBDS
+1] = {
793 PRINT((DP
">%s()\n", __FUNCTION__
));
803 // XXX: load settings
805 char **names
= nbd_name
;
806 PRINT((DP
">%s()\n", __FUNCTION__
));
808 handle
= load_driver_settings(DRV
);
811 // XXX: test for boot args ?
814 err
= ksocket_init();
818 for (i
= 0; i
< MAX_NBDS
; i
++) {
819 nbd_devices
[i
].valid
= false;
820 nbd_devices
[i
].readonly
= false;
821 mutex_init(&nbd_devices
[i
].ben
, "nbd lock");
822 nbd_devices
[i
].refcnt
= 0;
823 nbd_devices
[i
].req
= 0LL; /* next ID for requests */
824 nbd_devices
[i
].sock
= -1;
825 nbd_devices
[i
].postoffice
= -1;
826 nbd_devices
[i
].size
= 0LL;
827 nbd_devices
[i
].reqs
= NULL
;
829 nbd_devices
[i
].kludge
= -1;
834 for (i
= 0; i
< MAX_NBDS
; i
++) {
835 const driver_settings
*settings
= get_driver_settings(handle
);
836 driver_parameter
*p
= NULL
;
838 sprintf(keyname
, "%d", i
);
839 for (j
= 0; j
< settings
->parameter_count
; j
++)
840 if (!strcmp(settings
->parameters
[j
].name
, keyname
))
841 p
= &settings
->parameters
[j
];
844 for (j
= 0; j
< p
->parameter_count
; j
++) {
845 if (!strcmp(p
->parameters
[j
].name
, "readonly"))
846 nbd_devices
[i
].readonly
= true;
847 if (!strcmp(p
->parameters
[j
].name
, "server")) {
848 if (p
->parameters
[j
].value_count
< 2)
850 nbd_devices
[i
].server
.sin_len
= sizeof(struct sockaddr_in
);
851 nbd_devices
[i
].server
.sin_family
= AF_INET
;
852 kinet_aton(p
->parameters
[j
].values
[0], &nbd_devices
[i
].server
.sin_addr
);
853 nbd_devices
[i
].server
.sin_port
= htons(atoi(p
->parameters
[j
].values
[1]));
854 dprintf(DP
" configured [%d]\n", i
);
855 *(names
) = malloc(DEVICE_NAME_MAX
);
856 if (*(names
) == NULL
)
858 sprintf(*(names
++), DEVICE_FMT
, i
);
859 nbd_devices
[i
].valid
= true;
865 unload_driver_settings(handle
);
874 PRINT((DP
">%s()\n", __FUNCTION__
));
875 for (i
= 0; i
< MAX_NBDS
; i
++) {
877 mutex_destroy(&nbd_devices
[i
].ben
);
882 snooze(BONE_TEARDOWN_DELAY
);
889 PRINT((DP
">%s()\n", __FUNCTION__
));
890 return (const char **)nbd_name
;
895 find_device(const char* name
)
897 PRINT((DP
">%s(%s)\n", __FUNCTION__
, name
));
903 nbd_find_device(const char* name
)
906 PRINT((DP
">%s(%s)\n", __FUNCTION__
, name
));
907 for (i
= 0; i
< MAX_NBDS
; i
++) {
908 char buf
[DEVICE_NAME_MAX
];
909 sprintf(buf
, DEVICE_FMT
, i
);
910 if (!strcmp(buf
, name
))
911 return &nbd_devices
[i
];