2 * vhost-user-blk sample application
4 * Copyright (c) 2017 Intel Corporation. All rights reserved.
7 * Changpeng Liu <changpeng.liu@intel.com>
9 * This work is based on the "vhost-user-scsi" sample and "virtio-blk" driver
11 * Felipe Franciosi <felipe@nutanix.com>
12 * Anthony Liguori <aliguori@us.ibm.com>
14 * This work is licensed under the terms of the GNU GPL, version 2 only.
15 * See the COPYING file in the top-level directory.
18 #include "qemu/osdep.h"
19 #include "qemu/bswap.h"
20 #include "standard-headers/linux/virtio_blk.h"
21 #include "libvhost-user-glib.h"
23 #if defined(__linux__)
25 #include <sys/ioctl.h>
29 VHOST_USER_BLK_MAX_QUEUES
= 8,
32 struct virtio_blk_inhdr
{
36 /* vhost user block device */
37 typedef struct VubDev
{
40 struct virtio_blk_config blkcfg
;
46 typedef struct VubReq
{
50 struct virtio_blk_inhdr
*in
;
51 struct virtio_blk_outhdr
*out
;
56 /* refer util/iov.c */
57 static size_t vub_iov_size(const struct iovec
*iov
,
58 const unsigned int iov_cnt
)
64 for (i
= 0; i
< iov_cnt
; i
++) {
65 len
+= iov
[i
].iov_len
;
70 static size_t vub_iov_to_buf(const struct iovec
*iov
,
71 const unsigned int iov_cnt
, void *buf
)
77 for (i
= 0; i
< iov_cnt
; i
++) {
78 memcpy(buf
+ len
, iov
[i
].iov_base
, iov
[i
].iov_len
);
79 len
+= iov
[i
].iov_len
;
84 static void vub_panic_cb(VuDev
*vu_dev
, const char *buf
)
91 gdev
= container_of(vu_dev
, VugDev
, parent
);
92 vdev_blk
= container_of(gdev
, VubDev
, parent
);
94 g_warning("vu_panic: %s", buf
);
97 g_main_loop_quit(vdev_blk
->loop
);
100 static void vub_req_complete(VubReq
*req
)
102 VugDev
*gdev
= &req
->vdev_blk
->parent
;
103 VuDev
*vu_dev
= &gdev
->parent
;
105 /* IO size with 1 extra status byte */
106 vu_queue_push(vu_dev
, req
->vq
, req
->elem
,
108 vu_queue_notify(vu_dev
, req
->vq
);
114 static int vub_open(const char *file_name
, bool wce
)
123 fd
= open(file_name
, flags
);
125 fprintf(stderr
, "Cannot open file %s, %s\n", file_name
,
134 vub_readv(VubReq
*req
, struct iovec
*iov
, uint32_t iovcnt
)
136 VubDev
*vdev_blk
= req
->vdev_blk
;
140 fprintf(stderr
, "Invalid Read IOV count\n");
144 req
->size
= vub_iov_size(iov
, iovcnt
);
145 rc
= preadv(vdev_blk
->blk_fd
, iov
, iovcnt
, req
->sector_num
* 512);
147 fprintf(stderr
, "%s, Sector %"PRIu64
", Size %zu failed with %s\n",
148 vdev_blk
->blk_name
, req
->sector_num
, req
->size
,
157 vub_writev(VubReq
*req
, struct iovec
*iov
, uint32_t iovcnt
)
159 VubDev
*vdev_blk
= req
->vdev_blk
;
163 fprintf(stderr
, "Invalid Write IOV count\n");
167 req
->size
= vub_iov_size(iov
, iovcnt
);
168 rc
= pwritev(vdev_blk
->blk_fd
, iov
, iovcnt
, req
->sector_num
* 512);
170 fprintf(stderr
, "%s, Sector %"PRIu64
", Size %zu failed with %s\n",
171 vdev_blk
->blk_name
, req
->sector_num
, req
->size
,
180 vub_discard_write_zeroes(VubReq
*req
, struct iovec
*iov
, uint32_t iovcnt
,
183 struct virtio_blk_discard_write_zeroes
*desc
;
187 size
= vub_iov_size(iov
, iovcnt
);
188 if (size
!= sizeof(*desc
)) {
189 fprintf(stderr
, "Invalid size %zd, expect %zd\n", size
, sizeof(*desc
));
192 buf
= g_new0(char, size
);
193 vub_iov_to_buf(iov
, iovcnt
, buf
);
195 #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
196 VubDev
*vdev_blk
= req
->vdev_blk
;
198 uint64_t range
[2] = { le64_to_cpu(desc
->sector
) << 9,
199 (uint64_t)le32_to_cpu(desc
->num_sectors
) << 9 };
200 if (type
== VIRTIO_BLK_T_DISCARD
) {
201 if (ioctl(vdev_blk
->blk_fd
, BLKDISCARD
, range
) == 0) {
205 } else if (type
== VIRTIO_BLK_T_WRITE_ZEROES
) {
206 if (ioctl(vdev_blk
->blk_fd
, BLKZEROOUT
, range
) == 0) {
218 vub_flush(VubReq
*req
)
220 VubDev
*vdev_blk
= req
->vdev_blk
;
222 fdatasync(vdev_blk
->blk_fd
);
225 static int vub_virtio_process_req(VubDev
*vdev_blk
,
228 VugDev
*gdev
= &vdev_blk
->parent
;
229 VuDev
*vu_dev
= &gdev
->parent
;
230 VuVirtqElement
*elem
;
236 elem
= vu_queue_pop(vu_dev
, vq
, sizeof(VuVirtqElement
) + sizeof(VubReq
));
241 /* refer to hw/block/virtio_blk.c */
242 if (elem
->out_num
< 1 || elem
->in_num
< 1) {
243 fprintf(stderr
, "virtio-blk request missing headers\n");
248 req
= g_new0(VubReq
, 1);
249 req
->vdev_blk
= vdev_blk
;
253 in_num
= elem
->in_num
;
254 out_num
= elem
->out_num
;
256 /* don't support VIRTIO_F_ANY_LAYOUT and virtio 1.0 only */
257 if (elem
->out_sg
[0].iov_len
< sizeof(struct virtio_blk_outhdr
)) {
258 fprintf(stderr
, "Invalid outhdr size\n");
261 req
->out
= (struct virtio_blk_outhdr
*)elem
->out_sg
[0].iov_base
;
264 if (elem
->in_sg
[in_num
- 1].iov_len
< sizeof(struct virtio_blk_inhdr
)) {
265 fprintf(stderr
, "Invalid inhdr size\n");
268 req
->in
= (struct virtio_blk_inhdr
*)elem
->in_sg
[in_num
- 1].iov_base
;
271 type
= le32_to_cpu(req
->out
->type
);
272 switch (type
& ~VIRTIO_BLK_T_BARRIER
) {
273 case VIRTIO_BLK_T_IN
:
274 case VIRTIO_BLK_T_OUT
: {
276 bool is_write
= type
& VIRTIO_BLK_T_OUT
;
277 req
->sector_num
= le64_to_cpu(req
->out
->sector
);
279 ret
= vub_writev(req
, &elem
->out_sg
[1], out_num
);
281 ret
= vub_readv(req
, &elem
->in_sg
[0], in_num
);
284 req
->in
->status
= VIRTIO_BLK_S_OK
;
286 req
->in
->status
= VIRTIO_BLK_S_IOERR
;
288 vub_req_complete(req
);
291 case VIRTIO_BLK_T_FLUSH
:
293 req
->in
->status
= VIRTIO_BLK_S_OK
;
294 vub_req_complete(req
);
296 case VIRTIO_BLK_T_GET_ID
: {
297 size_t size
= MIN(vub_iov_size(&elem
->in_sg
[0], in_num
),
298 VIRTIO_BLK_ID_BYTES
);
299 snprintf(elem
->in_sg
[0].iov_base
, size
, "%s", "vhost_user_blk");
300 req
->in
->status
= VIRTIO_BLK_S_OK
;
301 req
->size
= elem
->in_sg
[0].iov_len
;
302 vub_req_complete(req
);
305 case VIRTIO_BLK_T_DISCARD
:
306 case VIRTIO_BLK_T_WRITE_ZEROES
: {
308 rc
= vub_discard_write_zeroes(req
, &elem
->out_sg
[1], out_num
, type
);
310 req
->in
->status
= VIRTIO_BLK_S_OK
;
312 req
->in
->status
= VIRTIO_BLK_S_IOERR
;
314 vub_req_complete(req
);
318 req
->in
->status
= VIRTIO_BLK_S_UNSUPP
;
319 vub_req_complete(req
);
331 static void vub_process_vq(VuDev
*vu_dev
, int idx
)
338 gdev
= container_of(vu_dev
, VugDev
, parent
);
339 vdev_blk
= container_of(gdev
, VubDev
, parent
);
342 vq
= vu_get_queue(vu_dev
, idx
);
346 ret
= vub_virtio_process_req(vdev_blk
, vq
);
353 static void vub_queue_set_started(VuDev
*vu_dev
, int idx
, bool started
)
359 vq
= vu_get_queue(vu_dev
, idx
);
360 vu_set_queue_handler(vu_dev
, vq
, started
? vub_process_vq
: NULL
);
364 vub_get_features(VuDev
*dev
)
370 gdev
= container_of(dev
, VugDev
, parent
);
371 vdev_blk
= container_of(gdev
, VubDev
, parent
);
373 features
= 1ull << VIRTIO_BLK_F_SIZE_MAX
|
374 1ull << VIRTIO_BLK_F_SEG_MAX
|
375 1ull << VIRTIO_BLK_F_TOPOLOGY
|
376 1ull << VIRTIO_BLK_F_BLK_SIZE
|
377 1ull << VIRTIO_BLK_F_FLUSH
|
378 #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
379 1ull << VIRTIO_BLK_F_DISCARD
|
380 1ull << VIRTIO_BLK_F_WRITE_ZEROES
|
382 1ull << VIRTIO_BLK_F_CONFIG_WCE
;
384 if (vdev_blk
->enable_ro
) {
385 features
|= 1ull << VIRTIO_BLK_F_RO
;
392 vub_get_protocol_features(VuDev
*dev
)
394 return 1ull << VHOST_USER_PROTOCOL_F_CONFIG
|
395 1ull << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD
;
399 vub_get_config(VuDev
*vu_dev
, uint8_t *config
, uint32_t len
)
404 if (len
> sizeof(struct virtio_blk_config
)) {
408 gdev
= container_of(vu_dev
, VugDev
, parent
);
409 vdev_blk
= container_of(gdev
, VubDev
, parent
);
410 memcpy(config
, &vdev_blk
->blkcfg
, len
);
416 vub_set_config(VuDev
*vu_dev
, const uint8_t *data
,
417 uint32_t offset
, uint32_t size
, uint32_t flags
)
424 /* don't support live migration */
425 if (flags
!= VHOST_SET_CONFIG_TYPE_FRONTEND
) {
429 gdev
= container_of(vu_dev
, VugDev
, parent
);
430 vdev_blk
= container_of(gdev
, VubDev
, parent
);
432 if (offset
!= offsetof(struct virtio_blk_config
, wce
) ||
438 if (wce
== vdev_blk
->blkcfg
.wce
) {
439 /* Do nothing as same with old configuration */
443 vdev_blk
->blkcfg
.wce
= wce
;
444 fprintf(stdout
, "Write Cache Policy Changed\n");
445 if (vdev_blk
->blk_fd
>= 0) {
446 close(vdev_blk
->blk_fd
);
447 vdev_blk
->blk_fd
= -1;
450 fd
= vub_open(vdev_blk
->blk_name
, wce
);
452 fprintf(stderr
, "Error to open block device %s\n", vdev_blk
->blk_name
);
453 vdev_blk
->blk_fd
= -1;
456 vdev_blk
->blk_fd
= fd
;
461 static const VuDevIface vub_iface
= {
462 .get_features
= vub_get_features
,
463 .queue_set_started
= vub_queue_set_started
,
464 .get_protocol_features
= vub_get_protocol_features
,
465 .get_config
= vub_get_config
,
466 .set_config
= vub_set_config
,
469 static int unix_sock_new(char *unix_fn
)
472 struct sockaddr_un un
;
476 sock
= socket(AF_UNIX
, SOCK_STREAM
, 0);
482 un
.sun_family
= AF_UNIX
;
483 (void)snprintf(un
.sun_path
, sizeof(un
.sun_path
), "%s", unix_fn
);
485 (void)unlink(unix_fn
);
486 if (bind(sock
, (struct sockaddr
*)&un
, sizeof(un
)) < 0) {
491 if (listen(sock
, 1) < 0) {
504 static void vub_free(struct VubDev
*vdev_blk
)
510 g_main_loop_unref(vdev_blk
->loop
);
511 if (vdev_blk
->blk_fd
>= 0) {
512 close(vdev_blk
->blk_fd
);
518 vub_get_blocksize(int fd
)
520 uint32_t blocksize
= 512;
522 #if defined(__linux__) && defined(BLKSSZGET)
523 if (ioctl(fd
, BLKSSZGET
, &blocksize
) == 0) {
532 vub_initialize_config(int fd
, struct virtio_blk_config
*config
)
536 capacity
= lseek(fd
, 0, SEEK_END
);
537 config
->capacity
= capacity
>> 9;
538 config
->blk_size
= vub_get_blocksize(fd
);
539 config
->size_max
= 65536;
540 config
->seg_max
= 128 - 2;
541 config
->min_io_size
= 1;
542 config
->opt_io_size
= 1;
543 config
->num_queues
= 1;
544 #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
545 config
->max_discard_sectors
= 32768;
546 config
->max_discard_seg
= 1;
547 config
->discard_sector_alignment
= config
->blk_size
>> 9;
548 config
->max_write_zeroes_sectors
= 32768;
549 config
->max_write_zeroes_seg
= 1;
554 vub_new(char *blk_file
)
558 vdev_blk
= g_new0(VubDev
, 1);
559 vdev_blk
->loop
= g_main_loop_new(NULL
, FALSE
);
560 vdev_blk
->blk_fd
= vub_open(blk_file
, 0);
561 if (vdev_blk
->blk_fd
< 0) {
562 fprintf(stderr
, "Error to open block device %s\n", blk_file
);
566 vdev_blk
->enable_ro
= false;
567 vdev_blk
->blkcfg
.wce
= 0;
568 vdev_blk
->blk_name
= blk_file
;
570 /* fill virtio_blk_config with block parameters */
571 vub_initialize_config(vdev_blk
->blk_fd
, &vdev_blk
->blkcfg
);
576 static int opt_fdnum
= -1;
577 static char *opt_socket_path
;
578 static char *opt_blk_file
;
579 static gboolean opt_print_caps
;
580 static gboolean opt_read_only
;
582 static GOptionEntry entries
[] = {
583 { "print-capabilities", 'c', 0, G_OPTION_ARG_NONE
, &opt_print_caps
,
584 "Print capabilities", NULL
},
585 { "fd", 'f', 0, G_OPTION_ARG_INT
, &opt_fdnum
,
586 "Use inherited fd socket", "FDNUM" },
587 { "socket-path", 's', 0, G_OPTION_ARG_FILENAME
, &opt_socket_path
,
588 "Use UNIX socket path", "PATH" },
589 {"blk-file", 'b', 0, G_OPTION_ARG_FILENAME
, &opt_blk_file
,
590 "block device or file path", "PATH"},
591 { "read-only", 'r', 0, G_OPTION_ARG_NONE
, &opt_read_only
,
592 "Enable read-only", NULL
},
596 int main(int argc
, char **argv
)
598 int lsock
= -1, csock
= -1;
599 VubDev
*vdev_blk
= NULL
;
600 GError
*error
= NULL
;
601 GOptionContext
*context
;
603 context
= g_option_context_new(NULL
);
604 g_option_context_add_main_entries(context
, entries
, NULL
);
605 if (!g_option_context_parse(context
, &argc
, &argv
, &error
)) {
606 g_printerr("Option parsing failed: %s\n", error
->message
);
609 if (opt_print_caps
) {
611 g_print(" \"type\": \"block\",\n");
612 g_print(" \"features\": [\n");
613 g_print(" \"read-only\",\n");
614 g_print(" \"blk-file\"\n");
621 g_print("%s\n", g_option_context_get_help(context
, true, NULL
));
625 if (opt_socket_path
) {
626 lsock
= unix_sock_new(opt_socket_path
);
630 } else if (opt_fdnum
< 0) {
631 g_print("%s\n", g_option_context_get_help(context
, true, NULL
));
637 csock
= accept(lsock
, NULL
, NULL
);
639 g_printerr("Accept error %s\n", strerror(errno
));
643 vdev_blk
= vub_new(opt_blk_file
);
648 vdev_blk
->enable_ro
= true;
651 if (!vug_init(&vdev_blk
->parent
, VHOST_USER_BLK_MAX_QUEUES
, csock
,
652 vub_panic_cb
, &vub_iface
)) {
653 g_printerr("Failed to initialize libvhost-user-glib\n");
657 g_main_loop_run(vdev_blk
->loop
);
658 g_main_loop_unref(vdev_blk
->loop
);
659 g_option_context_free(context
);
660 vug_deinit(&vdev_blk
->parent
);
668 g_free(opt_socket_path
);
669 g_free(opt_blk_file
);