qapi: fix example of dump-guest-memory
[qemu/armbru.git] / subprojects / libvhost-user / libvhost-user.h
blobcde9f07bb3c4a12495b9a3caaafc69a54d742e23
1 /*
2 * Vhost User library
4 * Copyright (c) 2016 Red Hat, Inc.
6 * Authors:
7 * Victor Kaplansky <victork@redhat.com>
8 * Marc-André Lureau <mlureau@redhat.com>
10 * This work is licensed under the terms of the GNU GPL, version 2 or
11 * later. See the COPYING file in the top-level directory.
14 #ifndef LIBVHOST_USER_H
15 #define LIBVHOST_USER_H
17 #include <stdint.h>
18 #include <stdbool.h>
19 #include <stddef.h>
20 #include <poll.h>
21 #include <linux/vhost.h>
22 #include <pthread.h>
23 #include "standard-headers/linux/virtio_ring.h"
25 /* Based on qemu/hw/virtio/vhost-user.c */
26 #define VHOST_USER_F_PROTOCOL_FEATURES 30
27 #define VHOST_LOG_PAGE 4096
29 #define VIRTQUEUE_MAX_SIZE 1024
31 #define VHOST_MEMORY_BASELINE_NREGIONS 8
34 * Set a reasonable maximum number of ram slots, which will be supported by
35 * any architecture.
37 #define VHOST_USER_MAX_RAM_SLOTS 32
39 #define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
41 typedef enum VhostSetConfigType {
42 VHOST_SET_CONFIG_TYPE_MASTER = 0,
43 VHOST_SET_CONFIG_TYPE_MIGRATION = 1,
44 } VhostSetConfigType;
47 * Maximum size of virtio device config space
49 #define VHOST_USER_MAX_CONFIG_SIZE 256
51 enum VhostUserProtocolFeature {
52 VHOST_USER_PROTOCOL_F_MQ = 0,
53 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
54 VHOST_USER_PROTOCOL_F_RARP = 2,
55 VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
56 VHOST_USER_PROTOCOL_F_NET_MTU = 4,
57 VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5,
58 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
59 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
60 VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
61 VHOST_USER_PROTOCOL_F_CONFIG = 9,
62 VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10,
63 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
64 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
65 VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS = 14,
66 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15,
68 VHOST_USER_PROTOCOL_F_MAX
71 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
73 typedef enum VhostUserRequest {
74 VHOST_USER_NONE = 0,
75 VHOST_USER_GET_FEATURES = 1,
76 VHOST_USER_SET_FEATURES = 2,
77 VHOST_USER_SET_OWNER = 3,
78 VHOST_USER_RESET_OWNER = 4,
79 VHOST_USER_SET_MEM_TABLE = 5,
80 VHOST_USER_SET_LOG_BASE = 6,
81 VHOST_USER_SET_LOG_FD = 7,
82 VHOST_USER_SET_VRING_NUM = 8,
83 VHOST_USER_SET_VRING_ADDR = 9,
84 VHOST_USER_SET_VRING_BASE = 10,
85 VHOST_USER_GET_VRING_BASE = 11,
86 VHOST_USER_SET_VRING_KICK = 12,
87 VHOST_USER_SET_VRING_CALL = 13,
88 VHOST_USER_SET_VRING_ERR = 14,
89 VHOST_USER_GET_PROTOCOL_FEATURES = 15,
90 VHOST_USER_SET_PROTOCOL_FEATURES = 16,
91 VHOST_USER_GET_QUEUE_NUM = 17,
92 VHOST_USER_SET_VRING_ENABLE = 18,
93 VHOST_USER_SEND_RARP = 19,
94 VHOST_USER_NET_SET_MTU = 20,
95 VHOST_USER_SET_SLAVE_REQ_FD = 21,
96 VHOST_USER_IOTLB_MSG = 22,
97 VHOST_USER_SET_VRING_ENDIAN = 23,
98 VHOST_USER_GET_CONFIG = 24,
99 VHOST_USER_SET_CONFIG = 25,
100 VHOST_USER_CREATE_CRYPTO_SESSION = 26,
101 VHOST_USER_CLOSE_CRYPTO_SESSION = 27,
102 VHOST_USER_POSTCOPY_ADVISE = 28,
103 VHOST_USER_POSTCOPY_LISTEN = 29,
104 VHOST_USER_POSTCOPY_END = 30,
105 VHOST_USER_GET_INFLIGHT_FD = 31,
106 VHOST_USER_SET_INFLIGHT_FD = 32,
107 VHOST_USER_GPU_SET_SOCKET = 33,
108 VHOST_USER_VRING_KICK = 35,
109 VHOST_USER_GET_MAX_MEM_SLOTS = 36,
110 VHOST_USER_ADD_MEM_REG = 37,
111 VHOST_USER_REM_MEM_REG = 38,
112 VHOST_USER_MAX
113 } VhostUserRequest;
115 typedef enum VhostUserSlaveRequest {
116 VHOST_USER_SLAVE_NONE = 0,
117 VHOST_USER_SLAVE_IOTLB_MSG = 1,
118 VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2,
119 VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
120 VHOST_USER_SLAVE_VRING_CALL = 4,
121 VHOST_USER_SLAVE_VRING_ERR = 5,
122 VHOST_USER_SLAVE_MAX
123 } VhostUserSlaveRequest;
125 typedef struct VhostUserMemoryRegion {
126 uint64_t guest_phys_addr;
127 uint64_t memory_size;
128 uint64_t userspace_addr;
129 uint64_t mmap_offset;
130 } VhostUserMemoryRegion;
132 #define VHOST_USER_MEM_REG_SIZE (sizeof(VhostUserMemoryRegion))
134 typedef struct VhostUserMemory {
135 uint32_t nregions;
136 uint32_t padding;
137 VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS];
138 } VhostUserMemory;
140 typedef struct VhostUserMemRegMsg {
141 uint64_t padding;
142 VhostUserMemoryRegion region;
143 } VhostUserMemRegMsg;
145 typedef struct VhostUserLog {
146 uint64_t mmap_size;
147 uint64_t mmap_offset;
148 } VhostUserLog;
150 typedef struct VhostUserConfig {
151 uint32_t offset;
152 uint32_t size;
153 uint32_t flags;
154 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
155 } VhostUserConfig;
157 static VhostUserConfig c __attribute__ ((unused));
158 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \
159 + sizeof(c.size) \
160 + sizeof(c.flags))
162 typedef struct VhostUserVringArea {
163 uint64_t u64;
164 uint64_t size;
165 uint64_t offset;
166 } VhostUserVringArea;
168 typedef struct VhostUserInflight {
169 uint64_t mmap_size;
170 uint64_t mmap_offset;
171 uint16_t num_queues;
172 uint16_t queue_size;
173 } VhostUserInflight;
175 #if defined(_WIN32) && (defined(__x86_64__) || defined(__i386__))
176 # define VU_PACKED __attribute__((gcc_struct, packed))
177 #else
178 # define VU_PACKED __attribute__((packed))
179 #endif
181 typedef struct VhostUserMsg {
182 int request;
184 #define VHOST_USER_VERSION_MASK (0x3)
185 #define VHOST_USER_REPLY_MASK (0x1 << 2)
186 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3)
187 uint32_t flags;
188 uint32_t size; /* the following payload size */
190 union {
191 #define VHOST_USER_VRING_IDX_MASK (0xff)
192 #define VHOST_USER_VRING_NOFD_MASK (0x1 << 8)
193 uint64_t u64;
194 struct vhost_vring_state state;
195 struct vhost_vring_addr addr;
196 VhostUserMemory memory;
197 VhostUserMemRegMsg memreg;
198 VhostUserLog log;
199 VhostUserConfig config;
200 VhostUserVringArea area;
201 VhostUserInflight inflight;
202 } payload;
204 int fds[VHOST_MEMORY_BASELINE_NREGIONS];
205 int fd_num;
206 uint8_t *data;
207 } VU_PACKED VhostUserMsg;
209 typedef struct VuDevRegion {
210 /* Guest Physical address. */
211 uint64_t gpa;
212 /* Memory region size. */
213 uint64_t size;
214 /* QEMU virtual address (userspace). */
215 uint64_t qva;
216 /* Starting offset in our mmaped space. */
217 uint64_t mmap_offset;
218 /* Start address of mmaped space. */
219 uint64_t mmap_addr;
220 } VuDevRegion;
222 typedef struct VuDev VuDev;
224 typedef uint64_t (*vu_get_features_cb) (VuDev *dev);
225 typedef void (*vu_set_features_cb) (VuDev *dev, uint64_t features);
226 typedef int (*vu_process_msg_cb) (VuDev *dev, VhostUserMsg *vmsg,
227 int *do_reply);
228 typedef bool (*vu_read_msg_cb) (VuDev *dev, int sock, VhostUserMsg *vmsg);
229 typedef void (*vu_queue_set_started_cb) (VuDev *dev, int qidx, bool started);
230 typedef bool (*vu_queue_is_processed_in_order_cb) (VuDev *dev, int qidx);
231 typedef int (*vu_get_config_cb) (VuDev *dev, uint8_t *config, uint32_t len);
232 typedef int (*vu_set_config_cb) (VuDev *dev, const uint8_t *data,
233 uint32_t offset, uint32_t size,
234 uint32_t flags);
236 typedef struct VuDevIface {
237 /* called by VHOST_USER_GET_FEATURES to get the features bitmask */
238 vu_get_features_cb get_features;
239 /* enable vhost implementation features */
240 vu_set_features_cb set_features;
241 /* get the protocol feature bitmask from the underlying vhost
242 * implementation */
243 vu_get_features_cb get_protocol_features;
244 /* enable protocol features in the underlying vhost implementation. */
245 vu_set_features_cb set_protocol_features;
246 /* process_msg is called for each vhost-user message received */
247 /* skip libvhost-user processing if return value != 0 */
248 vu_process_msg_cb process_msg;
249 /* tells when queues can be processed */
250 vu_queue_set_started_cb queue_set_started;
252 * If the queue is processed in order, in which case it will be
253 * resumed to vring.used->idx. This can help to support resuming
254 * on unmanaged exit/crash.
256 vu_queue_is_processed_in_order_cb queue_is_processed_in_order;
257 /* get the config space of the device */
258 vu_get_config_cb get_config;
259 /* set the config space of the device */
260 vu_set_config_cb set_config;
261 } VuDevIface;
263 typedef void (*vu_queue_handler_cb) (VuDev *dev, int qidx);
265 typedef struct VuRing {
266 unsigned int num;
267 struct vring_desc *desc;
268 struct vring_avail *avail;
269 struct vring_used *used;
270 uint64_t log_guest_addr;
271 uint32_t flags;
272 } VuRing;
274 typedef struct VuDescStateSplit {
275 /* Indicate whether this descriptor is inflight or not.
276 * Only available for head-descriptor. */
277 uint8_t inflight;
279 /* Padding */
280 uint8_t padding[5];
282 /* Maintain a list for the last batch of used descriptors.
283 * Only available when batching is used for submitting */
284 uint16_t next;
286 /* Used to preserve the order of fetching available descriptors.
287 * Only available for head-descriptor. */
288 uint64_t counter;
289 } VuDescStateSplit;
291 typedef struct VuVirtqInflight {
292 /* The feature flags of this region. Now it's initialized to 0. */
293 uint64_t features;
295 /* The version of this region. It's 1 currently.
296 * Zero value indicates a vm reset happened. */
297 uint16_t version;
299 /* The size of VuDescStateSplit array. It's equal to the virtqueue
300 * size. Slave could get it from queue size field of VhostUserInflight. */
301 uint16_t desc_num;
303 /* The head of list that track the last batch of used descriptors. */
304 uint16_t last_batch_head;
306 /* Storing the idx value of used ring */
307 uint16_t used_idx;
309 /* Used to track the state of each descriptor in descriptor table */
310 VuDescStateSplit desc[];
311 } VuVirtqInflight;
313 typedef struct VuVirtqInflightDesc {
314 uint16_t index;
315 uint64_t counter;
316 } VuVirtqInflightDesc;
318 typedef struct VuVirtq {
319 VuRing vring;
321 VuVirtqInflight *inflight;
323 VuVirtqInflightDesc *resubmit_list;
325 uint16_t resubmit_num;
327 uint64_t counter;
329 /* Next head to pop */
330 uint16_t last_avail_idx;
332 /* Last avail_idx read from VQ. */
333 uint16_t shadow_avail_idx;
335 uint16_t used_idx;
337 /* Last used index value we have signalled on */
338 uint16_t signalled_used;
340 /* Last used index value we have signalled on */
341 bool signalled_used_valid;
343 /* Notification enabled? */
344 bool notification;
346 int inuse;
348 vu_queue_handler_cb handler;
350 int call_fd;
351 int kick_fd;
352 int err_fd;
353 unsigned int enable;
354 bool started;
356 /* Guest addresses of our ring */
357 struct vhost_vring_addr vra;
358 } VuVirtq;
360 enum VuWatchCondtion {
361 VU_WATCH_IN = POLLIN,
362 VU_WATCH_OUT = POLLOUT,
363 VU_WATCH_PRI = POLLPRI,
364 VU_WATCH_ERR = POLLERR,
365 VU_WATCH_HUP = POLLHUP,
368 typedef void (*vu_panic_cb) (VuDev *dev, const char *err);
369 typedef void (*vu_watch_cb) (VuDev *dev, int condition, void *data);
370 typedef void (*vu_set_watch_cb) (VuDev *dev, int fd, int condition,
371 vu_watch_cb cb, void *data);
372 typedef void (*vu_remove_watch_cb) (VuDev *dev, int fd);
374 typedef struct VuDevInflightInfo {
375 int fd;
376 void *addr;
377 uint64_t size;
378 } VuDevInflightInfo;
380 struct VuDev {
381 int sock;
382 uint32_t nregions;
383 VuDevRegion regions[VHOST_USER_MAX_RAM_SLOTS];
384 VuVirtq *vq;
385 VuDevInflightInfo inflight_info;
386 int log_call_fd;
387 /* Must be held while using slave_fd */
388 pthread_mutex_t slave_mutex;
389 int slave_fd;
390 uint64_t log_size;
391 uint8_t *log_table;
392 uint64_t features;
393 uint64_t protocol_features;
394 bool broken;
395 uint16_t max_queues;
398 * @read_msg: custom method to read vhost-user message
400 * Read data from vhost_user socket fd and fill up
401 * the passed VhostUserMsg *vmsg struct.
403 * If reading fails, it should close the received set of file
404 * descriptors as socket message's auxiliary data.
406 * For the details, please refer to vu_message_read in libvhost-user.c
407 * which will be used by default if not custom method is provided when
408 * calling vu_init
410 * Returns: true if vhost-user message successfully received,
411 * otherwise return false.
414 vu_read_msg_cb read_msg;
417 * @set_watch: add or update the given fd to the watch set,
418 * call cb when condition is met.
420 vu_set_watch_cb set_watch;
422 /* @remove_watch: remove the given fd from the watch set */
423 vu_remove_watch_cb remove_watch;
426 * @panic: encountered an unrecoverable error, you may try to re-initialize
428 vu_panic_cb panic;
429 const VuDevIface *iface;
431 /* Postcopy data */
432 int postcopy_ufd;
433 bool postcopy_listening;
436 typedef struct VuVirtqElement {
437 unsigned int index;
438 unsigned int out_num;
439 unsigned int in_num;
440 struct iovec *in_sg;
441 struct iovec *out_sg;
442 } VuVirtqElement;
445 * vu_init:
446 * @dev: a VuDev context
447 * @max_queues: maximum number of virtqueues
448 * @socket: the socket connected to vhost-user master
449 * @panic: a panic callback
450 * @set_watch: a set_watch callback
451 * @remove_watch: a remove_watch callback
452 * @iface: a VuDevIface structure with vhost-user device callbacks
454 * Initializes a VuDev vhost-user context.
456 * Returns: true on success, false on failure.
458 bool vu_init(VuDev *dev,
459 uint16_t max_queues,
460 int socket,
461 vu_panic_cb panic,
462 vu_read_msg_cb read_msg,
463 vu_set_watch_cb set_watch,
464 vu_remove_watch_cb remove_watch,
465 const VuDevIface *iface);
469 * vu_deinit:
470 * @dev: a VuDev context
472 * Cleans up the VuDev context
474 void vu_deinit(VuDev *dev);
477 * vu_dispatch:
478 * @dev: a VuDev context
480 * Process one vhost-user message.
482 * Returns: TRUE on success, FALSE on failure.
484 bool vu_dispatch(VuDev *dev);
487 * vu_gpa_to_va:
488 * @dev: a VuDev context
489 * @plen: guest memory size
490 * @guest_addr: guest address
492 * Translate a guest address to a pointer. Returns NULL on failure.
494 void *vu_gpa_to_va(VuDev *dev, uint64_t *plen, uint64_t guest_addr);
497 * vu_get_queue:
498 * @dev: a VuDev context
499 * @qidx: queue index
501 * Returns the queue number @qidx.
503 VuVirtq *vu_get_queue(VuDev *dev, int qidx);
506 * vu_set_queue_handler:
507 * @dev: a VuDev context
508 * @vq: a VuVirtq queue
509 * @handler: the queue handler callback
511 * Set the queue handler. This function may be called several times
512 * for the same queue. If called with NULL @handler, the handler is
513 * removed.
515 void vu_set_queue_handler(VuDev *dev, VuVirtq *vq,
516 vu_queue_handler_cb handler);
519 * vu_set_queue_host_notifier:
520 * @dev: a VuDev context
521 * @vq: a VuVirtq queue
522 * @fd: a file descriptor
523 * @size: host page size
524 * @offset: notifier offset in @fd file
526 * Set queue's host notifier. This function may be called several
527 * times for the same queue. If called with -1 @fd, the notifier
528 * is removed.
530 bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd,
531 int size, int offset);
534 * vu_queue_set_notification:
535 * @dev: a VuDev context
536 * @vq: a VuVirtq queue
537 * @enable: state
539 * Set whether the queue notifies (via event index or interrupt)
541 void vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable);
544 * vu_queue_enabled:
545 * @dev: a VuDev context
546 * @vq: a VuVirtq queue
548 * Returns: whether the queue is enabled.
550 bool vu_queue_enabled(VuDev *dev, VuVirtq *vq);
553 * vu_queue_started:
554 * @dev: a VuDev context
555 * @vq: a VuVirtq queue
557 * Returns: whether the queue is started.
559 bool vu_queue_started(const VuDev *dev, const VuVirtq *vq);
562 * vu_queue_empty:
563 * @dev: a VuDev context
564 * @vq: a VuVirtq queue
566 * Returns: true if the queue is empty or not ready.
568 bool vu_queue_empty(VuDev *dev, VuVirtq *vq);
571 * vu_queue_notify:
572 * @dev: a VuDev context
573 * @vq: a VuVirtq queue
575 * Request to notify the queue via callfd (skipped if unnecessary)
577 void vu_queue_notify(VuDev *dev, VuVirtq *vq);
580 * vu_queue_notify_sync:
581 * @dev: a VuDev context
582 * @vq: a VuVirtq queue
584 * Request to notify the queue via callfd (skipped if unnecessary)
585 * or sync message if possible.
587 void vu_queue_notify_sync(VuDev *dev, VuVirtq *vq);
590 * vu_queue_pop:
591 * @dev: a VuDev context
592 * @vq: a VuVirtq queue
593 * @sz: the size of struct to return (must be >= VuVirtqElement)
595 * Returns: a VuVirtqElement filled from the queue or NULL. The
596 * returned element must be free()-d by the caller.
598 void *vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz);
602 * vu_queue_unpop:
603 * @dev: a VuDev context
604 * @vq: a VuVirtq queue
605 * @elem: The #VuVirtqElement
606 * @len: number of bytes written
608 * Pretend the most recent element wasn't popped from the virtqueue. The next
609 * call to vu_queue_pop() will refetch the element.
611 void vu_queue_unpop(VuDev *dev, VuVirtq *vq, VuVirtqElement *elem,
612 size_t len);
615 * vu_queue_rewind:
616 * @dev: a VuDev context
617 * @vq: a VuVirtq queue
618 * @num: number of elements to push back
620 * Pretend that elements weren't popped from the virtqueue. The next
621 * virtqueue_pop() will refetch the oldest element.
623 * Returns: true on success, false if @num is greater than the number of in use
624 * elements.
626 bool vu_queue_rewind(VuDev *dev, VuVirtq *vq, unsigned int num);
629 * vu_queue_fill:
630 * @dev: a VuDev context
631 * @vq: a VuVirtq queue
632 * @elem: a VuVirtqElement
633 * @len: length in bytes to write
634 * @idx: optional offset for the used ring index (0 in general)
636 * Fill the used ring with @elem element.
638 void vu_queue_fill(VuDev *dev, VuVirtq *vq,
639 const VuVirtqElement *elem,
640 unsigned int len, unsigned int idx);
643 * vu_queue_push:
644 * @dev: a VuDev context
645 * @vq: a VuVirtq queue
646 * @elem: a VuVirtqElement
647 * @len: length in bytes to write
649 * Helper that combines vu_queue_fill() with a vu_queue_flush().
651 void vu_queue_push(VuDev *dev, VuVirtq *vq,
652 const VuVirtqElement *elem, unsigned int len);
655 * vu_queue_flush:
656 * @dev: a VuDev context
657 * @vq: a VuVirtq queue
658 * @num: number of elements to flush
660 * Mark the last number of elements as done (used.idx is updated by
661 * num elements).
663 void vu_queue_flush(VuDev *dev, VuVirtq *vq, unsigned int num);
666 * vu_queue_get_avail_bytes:
667 * @dev: a VuDev context
668 * @vq: a VuVirtq queue
669 * @in_bytes: in bytes
670 * @out_bytes: out bytes
671 * @max_in_bytes: stop counting after max_in_bytes
672 * @max_out_bytes: stop counting after max_out_bytes
674 * Count the number of available bytes, up to max_in_bytes/max_out_bytes.
676 void vu_queue_get_avail_bytes(VuDev *vdev, VuVirtq *vq, unsigned int *in_bytes,
677 unsigned int *out_bytes,
678 unsigned max_in_bytes, unsigned max_out_bytes);
681 * vu_queue_avail_bytes:
682 * @dev: a VuDev context
683 * @vq: a VuVirtq queue
684 * @in_bytes: expected in bytes
685 * @out_bytes: expected out bytes
687 * Returns: true if in_bytes <= in_total && out_bytes <= out_total
689 bool vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes,
690 unsigned int out_bytes);
692 #endif /* LIBVHOST_USER_H */