Merge tag 'pull-loongarch-20241016' of https://gitlab.com/gaosong/qemu into staging
[qemu/armbru.git] / contrib / vhost-user-blk / vhost-user-blk.c
blob6cc18a1c04f4efa94d4341b2b043217cd77f5bd5
1 /*
2 * vhost-user-blk sample application
4 * Copyright (c) 2017 Intel Corporation. All rights reserved.
6 * Author:
7 * Changpeng Liu <changpeng.liu@intel.com>
9 * This work is based on the "vhost-user-scsi" sample and "virtio-blk" driver
10 * implementation by:
11 * Felipe Franciosi <felipe@nutanix.com>
12 * Anthony Liguori <aliguori@us.ibm.com>
14 * This work is licensed under the terms of the GNU GPL, version 2 only.
15 * See the COPYING file in the top-level directory.
18 #include "qemu/osdep.h"
19 #include "qemu/bswap.h"
20 #include "standard-headers/linux/virtio_blk.h"
21 #include "libvhost-user-glib.h"
23 #if defined(__linux__)
24 #include <linux/fs.h>
25 #include <sys/ioctl.h>
26 #endif
28 enum {
29 VHOST_USER_BLK_MAX_QUEUES = 8,
32 struct virtio_blk_inhdr {
33 unsigned char status;
36 /* vhost user block device */
37 typedef struct VubDev {
38 VugDev parent;
39 int blk_fd;
40 struct virtio_blk_config blkcfg;
41 bool enable_ro;
42 char *blk_name;
43 GMainLoop *loop;
44 } VubDev;
46 typedef struct VubReq {
47 VuVirtqElement *elem;
48 int64_t sector_num;
49 size_t size;
50 struct virtio_blk_inhdr *in;
51 struct virtio_blk_outhdr *out;
52 VubDev *vdev_blk;
53 struct VuVirtq *vq;
54 } VubReq;
56 /* refer util/iov.c */
57 static size_t vub_iov_size(const struct iovec *iov,
58 const unsigned int iov_cnt)
60 size_t len;
61 unsigned int i;
63 len = 0;
64 for (i = 0; i < iov_cnt; i++) {
65 len += iov[i].iov_len;
67 return len;
70 static size_t vub_iov_to_buf(const struct iovec *iov,
71 const unsigned int iov_cnt, void *buf)
73 size_t len;
74 unsigned int i;
76 len = 0;
77 for (i = 0; i < iov_cnt; i++) {
78 memcpy(buf + len, iov[i].iov_base, iov[i].iov_len);
79 len += iov[i].iov_len;
81 return len;
84 static void vub_panic_cb(VuDev *vu_dev, const char *buf)
86 VugDev *gdev;
87 VubDev *vdev_blk;
89 assert(vu_dev);
91 gdev = container_of(vu_dev, VugDev, parent);
92 vdev_blk = container_of(gdev, VubDev, parent);
93 if (buf) {
94 g_warning("vu_panic: %s", buf);
97 g_main_loop_quit(vdev_blk->loop);
100 static void vub_req_complete(VubReq *req)
102 VugDev *gdev = &req->vdev_blk->parent;
103 VuDev *vu_dev = &gdev->parent;
105 /* IO size with 1 extra status byte */
106 vu_queue_push(vu_dev, req->vq, req->elem,
107 req->size + 1);
108 vu_queue_notify(vu_dev, req->vq);
110 g_free(req->elem);
111 g_free(req);
114 static int vub_open(const char *file_name, bool wce)
116 int fd;
117 int flags = O_RDWR;
119 if (!wce) {
120 flags |= O_DIRECT;
123 fd = open(file_name, flags);
124 if (fd < 0) {
125 fprintf(stderr, "Cannot open file %s, %s\n", file_name,
126 strerror(errno));
127 return -1;
130 return fd;
133 static ssize_t
134 vub_readv(VubReq *req, struct iovec *iov, uint32_t iovcnt)
136 VubDev *vdev_blk = req->vdev_blk;
137 ssize_t rc;
139 if (!iovcnt) {
140 fprintf(stderr, "Invalid Read IOV count\n");
141 return -1;
144 req->size = vub_iov_size(iov, iovcnt);
145 rc = preadv(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
146 if (rc < 0) {
147 fprintf(stderr, "%s, Sector %"PRIu64", Size %zu failed with %s\n",
148 vdev_blk->blk_name, req->sector_num, req->size,
149 strerror(errno));
150 return -1;
153 return rc;
156 static ssize_t
157 vub_writev(VubReq *req, struct iovec *iov, uint32_t iovcnt)
159 VubDev *vdev_blk = req->vdev_blk;
160 ssize_t rc;
162 if (!iovcnt) {
163 fprintf(stderr, "Invalid Write IOV count\n");
164 return -1;
167 req->size = vub_iov_size(iov, iovcnt);
168 rc = pwritev(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
169 if (rc < 0) {
170 fprintf(stderr, "%s, Sector %"PRIu64", Size %zu failed with %s\n",
171 vdev_blk->blk_name, req->sector_num, req->size,
172 strerror(errno));
173 return -1;
176 return rc;
179 static int
180 vub_discard_write_zeroes(VubReq *req, struct iovec *iov, uint32_t iovcnt,
181 uint32_t type)
183 struct virtio_blk_discard_write_zeroes *desc;
184 ssize_t size;
185 void *buf;
187 size = vub_iov_size(iov, iovcnt);
188 if (size != sizeof(*desc)) {
189 fprintf(stderr, "Invalid size %zd, expect %zd\n", size, sizeof(*desc));
190 return -1;
192 buf = g_new0(char, size);
193 vub_iov_to_buf(iov, iovcnt, buf);
195 #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
196 VubDev *vdev_blk = req->vdev_blk;
197 desc = buf;
198 uint64_t range[2] = { le64_to_cpu(desc->sector) << 9,
199 (uint64_t)le32_to_cpu(desc->num_sectors) << 9 };
200 if (type == VIRTIO_BLK_T_DISCARD) {
201 if (ioctl(vdev_blk->blk_fd, BLKDISCARD, range) == 0) {
202 g_free(buf);
203 return 0;
205 } else if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
206 if (ioctl(vdev_blk->blk_fd, BLKZEROOUT, range) == 0) {
207 g_free(buf);
208 return 0;
211 #endif
213 g_free(buf);
214 return -1;
217 static void
218 vub_flush(VubReq *req)
220 VubDev *vdev_blk = req->vdev_blk;
222 fdatasync(vdev_blk->blk_fd);
225 static int vub_virtio_process_req(VubDev *vdev_blk,
226 VuVirtq *vq)
228 VugDev *gdev = &vdev_blk->parent;
229 VuDev *vu_dev = &gdev->parent;
230 VuVirtqElement *elem;
231 uint32_t type;
232 unsigned in_num;
233 unsigned out_num;
234 VubReq *req;
236 elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) + sizeof(VubReq));
237 if (!elem) {
238 return -1;
241 /* refer to hw/block/virtio_blk.c */
242 if (elem->out_num < 1 || elem->in_num < 1) {
243 fprintf(stderr, "virtio-blk request missing headers\n");
244 g_free(elem);
245 return -1;
248 req = g_new0(VubReq, 1);
249 req->vdev_blk = vdev_blk;
250 req->vq = vq;
251 req->elem = elem;
253 in_num = elem->in_num;
254 out_num = elem->out_num;
256 /* don't support VIRTIO_F_ANY_LAYOUT and virtio 1.0 only */
257 if (elem->out_sg[0].iov_len < sizeof(struct virtio_blk_outhdr)) {
258 fprintf(stderr, "Invalid outhdr size\n");
259 goto err;
261 req->out = (struct virtio_blk_outhdr *)elem->out_sg[0].iov_base;
262 out_num--;
264 if (elem->in_sg[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
265 fprintf(stderr, "Invalid inhdr size\n");
266 goto err;
268 req->in = (struct virtio_blk_inhdr *)elem->in_sg[in_num - 1].iov_base;
269 in_num--;
271 type = le32_to_cpu(req->out->type);
272 switch (type & ~VIRTIO_BLK_T_BARRIER) {
273 case VIRTIO_BLK_T_IN:
274 case VIRTIO_BLK_T_OUT: {
275 ssize_t ret = 0;
276 bool is_write = type & VIRTIO_BLK_T_OUT;
277 req->sector_num = le64_to_cpu(req->out->sector);
278 if (is_write) {
279 ret = vub_writev(req, &elem->out_sg[1], out_num);
280 } else {
281 ret = vub_readv(req, &elem->in_sg[0], in_num);
283 if (ret >= 0) {
284 req->in->status = VIRTIO_BLK_S_OK;
285 } else {
286 req->in->status = VIRTIO_BLK_S_IOERR;
288 vub_req_complete(req);
289 break;
291 case VIRTIO_BLK_T_FLUSH:
292 vub_flush(req);
293 req->in->status = VIRTIO_BLK_S_OK;
294 vub_req_complete(req);
295 break;
296 case VIRTIO_BLK_T_GET_ID: {
297 size_t size = MIN(vub_iov_size(&elem->in_sg[0], in_num),
298 VIRTIO_BLK_ID_BYTES);
299 snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk");
300 req->in->status = VIRTIO_BLK_S_OK;
301 req->size = elem->in_sg[0].iov_len;
302 vub_req_complete(req);
303 break;
305 case VIRTIO_BLK_T_DISCARD:
306 case VIRTIO_BLK_T_WRITE_ZEROES: {
307 int rc;
308 rc = vub_discard_write_zeroes(req, &elem->out_sg[1], out_num, type);
309 if (rc == 0) {
310 req->in->status = VIRTIO_BLK_S_OK;
311 } else {
312 req->in->status = VIRTIO_BLK_S_IOERR;
314 vub_req_complete(req);
315 break;
317 default:
318 req->in->status = VIRTIO_BLK_S_UNSUPP;
319 vub_req_complete(req);
320 break;
323 return 0;
325 err:
326 g_free(elem);
327 g_free(req);
328 return -1;
331 static void vub_process_vq(VuDev *vu_dev, int idx)
333 VugDev *gdev;
334 VubDev *vdev_blk;
335 VuVirtq *vq;
336 int ret;
338 gdev = container_of(vu_dev, VugDev, parent);
339 vdev_blk = container_of(gdev, VubDev, parent);
340 assert(vdev_blk);
342 vq = vu_get_queue(vu_dev, idx);
343 assert(vq);
345 while (1) {
346 ret = vub_virtio_process_req(vdev_blk, vq);
347 if (ret) {
348 break;
353 static void vub_queue_set_started(VuDev *vu_dev, int idx, bool started)
355 VuVirtq *vq;
357 assert(vu_dev);
359 vq = vu_get_queue(vu_dev, idx);
360 vu_set_queue_handler(vu_dev, vq, started ? vub_process_vq : NULL);
363 static uint64_t
364 vub_get_features(VuDev *dev)
366 uint64_t features;
367 VugDev *gdev;
368 VubDev *vdev_blk;
370 gdev = container_of(dev, VugDev, parent);
371 vdev_blk = container_of(gdev, VubDev, parent);
373 features = 1ull << VIRTIO_BLK_F_SIZE_MAX |
374 1ull << VIRTIO_BLK_F_SEG_MAX |
375 1ull << VIRTIO_BLK_F_TOPOLOGY |
376 1ull << VIRTIO_BLK_F_BLK_SIZE |
377 1ull << VIRTIO_BLK_F_FLUSH |
378 #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
379 1ull << VIRTIO_BLK_F_DISCARD |
380 1ull << VIRTIO_BLK_F_WRITE_ZEROES |
381 #endif
382 1ull << VIRTIO_BLK_F_CONFIG_WCE;
384 if (vdev_blk->enable_ro) {
385 features |= 1ull << VIRTIO_BLK_F_RO;
388 return features;
391 static uint64_t
392 vub_get_protocol_features(VuDev *dev)
394 return 1ull << VHOST_USER_PROTOCOL_F_CONFIG |
395 1ull << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD;
398 static int
399 vub_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len)
401 VugDev *gdev;
402 VubDev *vdev_blk;
404 if (len > sizeof(struct virtio_blk_config)) {
405 return -1;
408 gdev = container_of(vu_dev, VugDev, parent);
409 vdev_blk = container_of(gdev, VubDev, parent);
410 memcpy(config, &vdev_blk->blkcfg, len);
412 return 0;
415 static int
416 vub_set_config(VuDev *vu_dev, const uint8_t *data,
417 uint32_t offset, uint32_t size, uint32_t flags)
419 VugDev *gdev;
420 VubDev *vdev_blk;
421 uint8_t wce;
422 int fd;
424 /* don't support live migration */
425 if (flags != VHOST_SET_CONFIG_TYPE_FRONTEND) {
426 return -1;
429 gdev = container_of(vu_dev, VugDev, parent);
430 vdev_blk = container_of(gdev, VubDev, parent);
432 if (offset != offsetof(struct virtio_blk_config, wce) ||
433 size != 1) {
434 return -1;
437 wce = *data;
438 if (wce == vdev_blk->blkcfg.wce) {
439 /* Do nothing as same with old configuration */
440 return 0;
443 vdev_blk->blkcfg.wce = wce;
444 fprintf(stdout, "Write Cache Policy Changed\n");
445 if (vdev_blk->blk_fd >= 0) {
446 close(vdev_blk->blk_fd);
447 vdev_blk->blk_fd = -1;
450 fd = vub_open(vdev_blk->blk_name, wce);
451 if (fd < 0) {
452 fprintf(stderr, "Error to open block device %s\n", vdev_blk->blk_name);
453 vdev_blk->blk_fd = -1;
454 return -1;
456 vdev_blk->blk_fd = fd;
458 return 0;
461 static const VuDevIface vub_iface = {
462 .get_features = vub_get_features,
463 .queue_set_started = vub_queue_set_started,
464 .get_protocol_features = vub_get_protocol_features,
465 .get_config = vub_get_config,
466 .set_config = vub_set_config,
469 static int unix_sock_new(char *unix_fn)
471 int sock;
472 struct sockaddr_un un;
474 assert(unix_fn);
476 sock = socket(AF_UNIX, SOCK_STREAM, 0);
477 if (sock < 0) {
478 perror("socket");
479 return -1;
482 un.sun_family = AF_UNIX;
483 (void)snprintf(un.sun_path, sizeof(un.sun_path), "%s", unix_fn);
485 (void)unlink(unix_fn);
486 if (bind(sock, (struct sockaddr *)&un, sizeof(un)) < 0) {
487 perror("bind");
488 goto fail;
491 if (listen(sock, 1) < 0) {
492 perror("listen");
493 goto fail;
496 return sock;
498 fail:
499 (void)close(sock);
501 return -1;
504 static void vub_free(struct VubDev *vdev_blk)
506 if (!vdev_blk) {
507 return;
510 g_main_loop_unref(vdev_blk->loop);
511 if (vdev_blk->blk_fd >= 0) {
512 close(vdev_blk->blk_fd);
514 g_free(vdev_blk);
517 static uint32_t
518 vub_get_blocksize(int fd)
520 uint32_t blocksize = 512;
522 #if defined(__linux__) && defined(BLKSSZGET)
523 if (ioctl(fd, BLKSSZGET, &blocksize) == 0) {
524 return blocksize;
526 #endif
528 return blocksize;
531 static void
532 vub_initialize_config(int fd, struct virtio_blk_config *config)
534 off_t capacity;
536 capacity = lseek(fd, 0, SEEK_END);
537 config->capacity = capacity >> 9;
538 config->blk_size = vub_get_blocksize(fd);
539 config->size_max = 65536;
540 config->seg_max = 128 - 2;
541 config->min_io_size = 1;
542 config->opt_io_size = 1;
543 config->num_queues = 1;
544 #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
545 config->max_discard_sectors = 32768;
546 config->max_discard_seg = 1;
547 config->discard_sector_alignment = config->blk_size >> 9;
548 config->max_write_zeroes_sectors = 32768;
549 config->max_write_zeroes_seg = 1;
550 #endif
553 static VubDev *
554 vub_new(char *blk_file)
556 VubDev *vdev_blk;
558 vdev_blk = g_new0(VubDev, 1);
559 vdev_blk->loop = g_main_loop_new(NULL, FALSE);
560 vdev_blk->blk_fd = vub_open(blk_file, 0);
561 if (vdev_blk->blk_fd < 0) {
562 fprintf(stderr, "Error to open block device %s\n", blk_file);
563 vub_free(vdev_blk);
564 return NULL;
566 vdev_blk->enable_ro = false;
567 vdev_blk->blkcfg.wce = 0;
568 vdev_blk->blk_name = blk_file;
570 /* fill virtio_blk_config with block parameters */
571 vub_initialize_config(vdev_blk->blk_fd, &vdev_blk->blkcfg);
573 return vdev_blk;
576 static int opt_fdnum = -1;
577 static char *opt_socket_path;
578 static char *opt_blk_file;
579 static gboolean opt_print_caps;
580 static gboolean opt_read_only;
582 static GOptionEntry entries[] = {
583 { "print-capabilities", 'c', 0, G_OPTION_ARG_NONE, &opt_print_caps,
584 "Print capabilities", NULL },
585 { "fd", 'f', 0, G_OPTION_ARG_INT, &opt_fdnum,
586 "Use inherited fd socket", "FDNUM" },
587 { "socket-path", 's', 0, G_OPTION_ARG_FILENAME, &opt_socket_path,
588 "Use UNIX socket path", "PATH" },
589 {"blk-file", 'b', 0, G_OPTION_ARG_FILENAME, &opt_blk_file,
590 "block device or file path", "PATH"},
591 { "read-only", 'r', 0, G_OPTION_ARG_NONE, &opt_read_only,
592 "Enable read-only", NULL },
593 { NULL, },
596 int main(int argc, char **argv)
598 int lsock = -1, csock = -1;
599 VubDev *vdev_blk = NULL;
600 GError *error = NULL;
601 GOptionContext *context;
603 context = g_option_context_new(NULL);
604 g_option_context_add_main_entries(context, entries, NULL);
605 if (!g_option_context_parse(context, &argc, &argv, &error)) {
606 g_printerr("Option parsing failed: %s\n", error->message);
607 exit(EXIT_FAILURE);
609 if (opt_print_caps) {
610 g_print("{\n");
611 g_print(" \"type\": \"block\",\n");
612 g_print(" \"features\": [\n");
613 g_print(" \"read-only\",\n");
614 g_print(" \"blk-file\"\n");
615 g_print(" ]\n");
616 g_print("}\n");
617 exit(EXIT_SUCCESS);
620 if (!opt_blk_file) {
621 g_print("%s\n", g_option_context_get_help(context, true, NULL));
622 exit(EXIT_FAILURE);
625 if (opt_socket_path) {
626 lsock = unix_sock_new(opt_socket_path);
627 if (lsock < 0) {
628 exit(EXIT_FAILURE);
630 } else if (opt_fdnum < 0) {
631 g_print("%s\n", g_option_context_get_help(context, true, NULL));
632 exit(EXIT_FAILURE);
633 } else {
634 lsock = opt_fdnum;
637 csock = accept(lsock, NULL, NULL);
638 if (csock < 0) {
639 g_printerr("Accept error %s\n", strerror(errno));
640 exit(EXIT_FAILURE);
643 vdev_blk = vub_new(opt_blk_file);
644 if (!vdev_blk) {
645 exit(EXIT_FAILURE);
647 if (opt_read_only) {
648 vdev_blk->enable_ro = true;
651 if (!vug_init(&vdev_blk->parent, VHOST_USER_BLK_MAX_QUEUES, csock,
652 vub_panic_cb, &vub_iface)) {
653 g_printerr("Failed to initialize libvhost-user-glib\n");
654 exit(EXIT_FAILURE);
657 g_main_loop_run(vdev_blk->loop);
658 g_main_loop_unref(vdev_blk->loop);
659 g_option_context_free(context);
660 vug_deinit(&vdev_blk->parent);
661 vub_free(vdev_blk);
662 if (csock >= 0) {
663 close(csock);
665 if (lsock >= 0) {
666 close(lsock);
668 g_free(opt_socket_path);
669 g_free(opt_blk_file);
671 return 0;