2 * VDUSE (vDPA Device in Userspace) library
4 * Copyright (C) 2022 Bytedance Inc. and/or its affiliates. All rights reserved.
5 * Portions of codes and concepts borrowed from libvhost-user.c, so:
6 * Copyright IBM, Corp. 2007
7 * Copyright (c) 2016 Red Hat, Inc.
10 * Xie Yongji <xieyongji@bytedance.com>
11 * Anthony Liguori <aliguori@us.ibm.com>
12 * Marc-André Lureau <mlureau@redhat.com>
13 * Victor Kaplansky <victork@redhat.com>
15 * This work is licensed under the terms of the GNU GPL, version 2 or
16 * later. See the COPYING file in the top-level directory.
36 #include <sys/ioctl.h>
37 #include <sys/eventfd.h>
40 #include "include/atomic.h"
41 #include "linux-headers/linux/virtio_ring.h"
42 #include "linux-headers/linux/virtio_config.h"
43 #include "linux-headers/linux/vduse.h"
46 #define VDUSE_VQ_ALIGN 4096
47 #define MAX_IOVA_REGIONS 256
49 #define LOG_ALIGNMENT 64
51 /* Round number down to multiple */
52 #define ALIGN_DOWN(n, m) ((n) / (m) * (m))
54 /* Round number up to multiple */
55 #define ALIGN_UP(n, m) ALIGN_DOWN((n) + (m) - 1, (m))
58 #define unlikely(x) __builtin_expect(!!(x), 0)
61 typedef struct VduseDescStateSplit
{
66 } VduseDescStateSplit
;
68 typedef struct VduseVirtqLogInflight
{
72 uint16_t last_batch_head
;
74 VduseDescStateSplit desc
[];
75 } VduseVirtqLogInflight
;
77 typedef struct VduseVirtqLog
{
78 VduseVirtqLogInflight inflight
;
81 typedef struct VduseVirtqInflightDesc
{
84 } VduseVirtqInflightDesc
;
86 typedef struct VduseRing
{
91 struct vring_desc
*desc
;
92 struct vring_avail
*avail
;
93 struct vring_used
*used
;
98 uint16_t last_avail_idx
;
99 uint16_t shadow_avail_idx
;
101 uint16_t signalled_used
;
102 bool signalled_used_valid
;
108 VduseVirtqInflightDesc
*resubmit_list
;
109 uint16_t resubmit_num
;
114 typedef struct VduseIovaRegion
{
117 uint64_t mmap_offset
;
123 VduseIovaRegion regions
[MAX_IOVA_REGIONS
];
138 static inline size_t vduse_vq_log_size(uint16_t queue_size
)
140 return ALIGN_UP(sizeof(VduseDescStateSplit
) * queue_size
+
141 sizeof(VduseVirtqLogInflight
), LOG_ALIGNMENT
);
144 static void *vduse_log_get(const char *filename
, size_t size
)
146 void *ptr
= MAP_FAILED
;
149 fd
= open(filename
, O_RDWR
| O_CREAT
, 0600);
154 if (ftruncate(fd
, size
) == -1) {
158 ptr
= mmap(0, size
, PROT_READ
| PROT_WRITE
, MAP_SHARED
, fd
, 0);
165 static inline bool has_feature(uint64_t features
, unsigned int fbit
)
168 return !!(features
& (1ULL << fbit
));
171 static inline bool vduse_dev_has_feature(VduseDev
*dev
, unsigned int fbit
)
173 return has_feature(dev
->features
, fbit
);
176 uint64_t vduse_get_virtio_features(void)
178 return (1ULL << VIRTIO_F_IOMMU_PLATFORM
) |
179 (1ULL << VIRTIO_F_VERSION_1
) |
180 (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY
) |
181 (1ULL << VIRTIO_RING_F_EVENT_IDX
) |
182 (1ULL << VIRTIO_RING_F_INDIRECT_DESC
);
185 VduseDev
*vduse_queue_get_dev(VduseVirtq
*vq
)
190 int vduse_queue_get_fd(VduseVirtq
*vq
)
195 void *vduse_dev_get_priv(VduseDev
*dev
)
200 VduseVirtq
*vduse_dev_get_queue(VduseDev
*dev
, int index
)
202 return &dev
->vqs
[index
];
205 int vduse_dev_get_fd(VduseDev
*dev
)
210 static int vduse_inject_irq(VduseDev
*dev
, int index
)
212 return ioctl(dev
->fd
, VDUSE_VQ_INJECT_IRQ
, &index
);
215 static int inflight_desc_compare(const void *a
, const void *b
)
217 VduseVirtqInflightDesc
*desc0
= (VduseVirtqInflightDesc
*)a
,
218 *desc1
= (VduseVirtqInflightDesc
*)b
;
220 if (desc1
->counter
> desc0
->counter
&&
221 (desc1
->counter
- desc0
->counter
) < VIRTQUEUE_MAX_SIZE
* 2) {
228 static int vduse_queue_check_inflights(VduseVirtq
*vq
)
231 VduseDev
*dev
= vq
->dev
;
233 vq
->used_idx
= le16toh(vq
->vring
.used
->idx
);
234 vq
->resubmit_num
= 0;
235 vq
->resubmit_list
= NULL
;
238 if (unlikely(vq
->log
->inflight
.used_idx
!= vq
->used_idx
)) {
239 if (vq
->log
->inflight
.last_batch_head
> VIRTQUEUE_MAX_SIZE
) {
243 vq
->log
->inflight
.desc
[vq
->log
->inflight
.last_batch_head
].inflight
= 0;
247 vq
->log
->inflight
.used_idx
= vq
->used_idx
;
250 for (i
= 0; i
< vq
->log
->inflight
.desc_num
; i
++) {
251 if (vq
->log
->inflight
.desc
[i
].inflight
== 1) {
256 vq
->shadow_avail_idx
= vq
->last_avail_idx
= vq
->inuse
+ vq
->used_idx
;
259 vq
->resubmit_list
= calloc(vq
->inuse
, sizeof(VduseVirtqInflightDesc
));
260 if (!vq
->resubmit_list
) {
264 for (i
= 0; i
< vq
->log
->inflight
.desc_num
; i
++) {
265 if (vq
->log
->inflight
.desc
[i
].inflight
) {
266 vq
->resubmit_list
[vq
->resubmit_num
].index
= i
;
267 vq
->resubmit_list
[vq
->resubmit_num
].counter
=
268 vq
->log
->inflight
.desc
[i
].counter
;
273 if (vq
->resubmit_num
> 1) {
274 qsort(vq
->resubmit_list
, vq
->resubmit_num
,
275 sizeof(VduseVirtqInflightDesc
), inflight_desc_compare
);
277 vq
->counter
= vq
->resubmit_list
[0].counter
+ 1;
280 vduse_inject_irq(dev
, vq
->index
);
285 static int vduse_queue_inflight_get(VduseVirtq
*vq
, int desc_idx
)
287 vq
->log
->inflight
.desc
[desc_idx
].counter
= vq
->counter
++;
291 vq
->log
->inflight
.desc
[desc_idx
].inflight
= 1;
296 static int vduse_queue_inflight_pre_put(VduseVirtq
*vq
, int desc_idx
)
298 vq
->log
->inflight
.last_batch_head
= desc_idx
;
303 static int vduse_queue_inflight_post_put(VduseVirtq
*vq
, int desc_idx
)
305 vq
->log
->inflight
.desc
[desc_idx
].inflight
= 0;
309 vq
->log
->inflight
.used_idx
= vq
->used_idx
;
314 static void vduse_iova_remove_region(VduseDev
*dev
, uint64_t start
,
323 for (i
= 0; i
< MAX_IOVA_REGIONS
; i
++) {
324 if (!dev
->regions
[i
].mmap_addr
) {
328 if (start
<= dev
->regions
[i
].iova
&&
329 last
>= (dev
->regions
[i
].iova
+ dev
->regions
[i
].size
- 1)) {
330 munmap((void *)(uintptr_t)dev
->regions
[i
].mmap_addr
,
331 dev
->regions
[i
].mmap_offset
+ dev
->regions
[i
].size
);
332 dev
->regions
[i
].mmap_addr
= 0;
338 static int vduse_iova_add_region(VduseDev
*dev
, int fd
,
339 uint64_t offset
, uint64_t start
,
340 uint64_t last
, int prot
)
343 uint64_t size
= last
- start
+ 1;
344 void *mmap_addr
= mmap(0, size
+ offset
, prot
, MAP_SHARED
, fd
, 0);
346 if (mmap_addr
== MAP_FAILED
) {
351 for (i
= 0; i
< MAX_IOVA_REGIONS
; i
++) {
352 if (!dev
->regions
[i
].mmap_addr
) {
353 dev
->regions
[i
].mmap_addr
= (uint64_t)(uintptr_t)mmap_addr
;
354 dev
->regions
[i
].mmap_offset
= offset
;
355 dev
->regions
[i
].iova
= start
;
356 dev
->regions
[i
].size
= size
;
361 assert(i
< MAX_IOVA_REGIONS
);
367 static int perm_to_prot(uint8_t perm
)
372 case VDUSE_ACCESS_WO
:
375 case VDUSE_ACCESS_RO
:
378 case VDUSE_ACCESS_RW
:
379 prot
|= PROT_READ
| PROT_WRITE
;
388 static inline void *iova_to_va(VduseDev
*dev
, uint64_t *plen
, uint64_t iova
)
391 struct vduse_iotlb_entry entry
;
393 for (i
= 0; i
< MAX_IOVA_REGIONS
; i
++) {
394 VduseIovaRegion
*r
= &dev
->regions
[i
];
400 if ((iova
>= r
->iova
) && (iova
< (r
->iova
+ r
->size
))) {
401 if ((iova
+ *plen
) > (r
->iova
+ r
->size
)) {
402 *plen
= r
->iova
+ r
->size
- iova
;
404 return (void *)(uintptr_t)(iova
- r
->iova
+
405 r
->mmap_addr
+ r
->mmap_offset
);
410 entry
.last
= iova
+ 1;
411 ret
= ioctl(dev
->fd
, VDUSE_IOTLB_GET_FD
, &entry
);
416 if (!vduse_iova_add_region(dev
, ret
, entry
.offset
, entry
.start
,
417 entry
.last
, perm_to_prot(entry
.perm
))) {
418 return iova_to_va(dev
, plen
, iova
);
424 static inline uint16_t vring_avail_flags(VduseVirtq
*vq
)
426 return le16toh(vq
->vring
.avail
->flags
);
429 static inline uint16_t vring_avail_idx(VduseVirtq
*vq
)
431 vq
->shadow_avail_idx
= le16toh(vq
->vring
.avail
->idx
);
433 return vq
->shadow_avail_idx
;
436 static inline uint16_t vring_avail_ring(VduseVirtq
*vq
, int i
)
438 return le16toh(vq
->vring
.avail
->ring
[i
]);
441 static inline uint16_t vring_get_used_event(VduseVirtq
*vq
)
443 return vring_avail_ring(vq
, vq
->vring
.num
);
446 static bool vduse_queue_get_head(VduseVirtq
*vq
, unsigned int idx
,
450 * Grab the next descriptor number they're advertising, and increment
451 * the index we've seen.
453 *head
= vring_avail_ring(vq
, idx
% vq
->vring
.num
);
455 /* If their number is silly, that's a fatal mistake. */
456 if (*head
>= vq
->vring
.num
) {
457 fprintf(stderr
, "Guest says index %u is available\n", *head
);
465 vduse_queue_read_indirect_desc(VduseDev
*dev
, struct vring_desc
*desc
,
466 uint64_t addr
, size_t len
)
468 struct vring_desc
*ori_desc
;
471 if (len
> (VIRTQUEUE_MAX_SIZE
* sizeof(struct vring_desc
))) {
481 ori_desc
= iova_to_va(dev
, &read_len
, addr
);
486 memcpy(desc
, ori_desc
, read_len
);
496 VIRTQUEUE_READ_DESC_ERROR
= -1,
497 VIRTQUEUE_READ_DESC_DONE
= 0, /* end of chain */
498 VIRTQUEUE_READ_DESC_MORE
= 1, /* more buffers in chain */
501 static int vduse_queue_read_next_desc(struct vring_desc
*desc
, int i
,
502 unsigned int max
, unsigned int *next
)
504 /* If this descriptor says it doesn't chain, we're done. */
505 if (!(le16toh(desc
[i
].flags
) & VRING_DESC_F_NEXT
)) {
506 return VIRTQUEUE_READ_DESC_DONE
;
509 /* Check they're not leading us off end of descriptors. */
510 *next
= desc
[i
].next
;
511 /* Make sure compiler knows to grab that: we don't want it changing! */
515 fprintf(stderr
, "Desc next is %u\n", *next
);
516 return VIRTQUEUE_READ_DESC_ERROR
;
519 return VIRTQUEUE_READ_DESC_MORE
;
523 * Fetch avail_idx from VQ memory only when we really need to know if
524 * guest has added some buffers.
526 static bool vduse_queue_empty(VduseVirtq
*vq
)
528 if (unlikely(!vq
->vring
.avail
)) {
532 if (vq
->shadow_avail_idx
!= vq
->last_avail_idx
) {
536 return vring_avail_idx(vq
) == vq
->last_avail_idx
;
539 static bool vduse_queue_should_notify(VduseVirtq
*vq
)
541 VduseDev
*dev
= vq
->dev
;
545 /* We need to expose used array entries before checking used event. */
548 /* Always notify when queue is empty (when feature acknowledge) */
549 if (vduse_dev_has_feature(dev
, VIRTIO_F_NOTIFY_ON_EMPTY
) &&
550 !vq
->inuse
&& vduse_queue_empty(vq
)) {
554 if (!vduse_dev_has_feature(dev
, VIRTIO_RING_F_EVENT_IDX
)) {
555 return !(vring_avail_flags(vq
) & VRING_AVAIL_F_NO_INTERRUPT
);
558 v
= vq
->signalled_used_valid
;
559 vq
->signalled_used_valid
= true;
560 old
= vq
->signalled_used
;
561 new = vq
->signalled_used
= vq
->used_idx
;
562 return !v
|| vring_need_event(vring_get_used_event(vq
), new, old
);
565 void vduse_queue_notify(VduseVirtq
*vq
)
567 VduseDev
*dev
= vq
->dev
;
569 if (unlikely(!vq
->vring
.avail
)) {
573 if (!vduse_queue_should_notify(vq
)) {
577 if (vduse_inject_irq(dev
, vq
->index
) < 0) {
578 fprintf(stderr
, "Error inject irq for vq %d: %s\n",
579 vq
->index
, strerror(errno
));
583 static inline void vring_set_avail_event(VduseVirtq
*vq
, uint16_t val
)
585 uint16_t val_le
= htole16(val
);
586 memcpy(&vq
->vring
.used
->ring
[vq
->vring
.num
], &val_le
, sizeof(uint16_t));
589 static bool vduse_queue_map_single_desc(VduseVirtq
*vq
, unsigned int *p_num_sg
,
590 struct iovec
*iov
, unsigned int max_num_sg
,
591 bool is_write
, uint64_t pa
, size_t sz
)
593 unsigned num_sg
= *p_num_sg
;
594 VduseDev
*dev
= vq
->dev
;
596 assert(num_sg
<= max_num_sg
);
599 fprintf(stderr
, "virtio: zero sized buffers are not allowed\n");
606 if (num_sg
== max_num_sg
) {
608 "virtio: too many descriptors in indirect table\n");
612 iov
[num_sg
].iov_base
= iova_to_va(dev
, &len
, pa
);
613 if (iov
[num_sg
].iov_base
== NULL
) {
614 fprintf(stderr
, "virtio: invalid address for buffers\n");
617 iov
[num_sg
++].iov_len
= len
;
626 static void *vduse_queue_alloc_element(size_t sz
, unsigned out_num
,
629 VduseVirtqElement
*elem
;
630 size_t in_sg_ofs
= ALIGN_UP(sz
, __alignof__(elem
->in_sg
[0]));
631 size_t out_sg_ofs
= in_sg_ofs
+ in_num
* sizeof(elem
->in_sg
[0]);
632 size_t out_sg_end
= out_sg_ofs
+ out_num
* sizeof(elem
->out_sg
[0]);
634 assert(sz
>= sizeof(VduseVirtqElement
));
635 elem
= malloc(out_sg_end
);
639 elem
->out_num
= out_num
;
640 elem
->in_num
= in_num
;
641 elem
->in_sg
= (void *)elem
+ in_sg_ofs
;
642 elem
->out_sg
= (void *)elem
+ out_sg_ofs
;
646 static void *vduse_queue_map_desc(VduseVirtq
*vq
, unsigned int idx
, size_t sz
)
648 struct vring_desc
*desc
= vq
->vring
.desc
;
649 VduseDev
*dev
= vq
->dev
;
650 uint64_t desc_addr
, read_len
;
651 unsigned int desc_len
;
652 unsigned int max
= vq
->vring
.num
;
653 unsigned int i
= idx
;
654 VduseVirtqElement
*elem
;
655 struct iovec iov
[VIRTQUEUE_MAX_SIZE
];
656 struct vring_desc desc_buf
[VIRTQUEUE_MAX_SIZE
];
657 unsigned int out_num
= 0, in_num
= 0;
660 if (le16toh(desc
[i
].flags
) & VRING_DESC_F_INDIRECT
) {
661 if (le32toh(desc
[i
].len
) % sizeof(struct vring_desc
)) {
662 fprintf(stderr
, "Invalid size for indirect buffer table\n");
666 /* loop over the indirect descriptor table */
667 desc_addr
= le64toh(desc
[i
].addr
);
668 desc_len
= le32toh(desc
[i
].len
);
669 max
= desc_len
/ sizeof(struct vring_desc
);
671 desc
= iova_to_va(dev
, &read_len
, desc_addr
);
672 if (unlikely(desc
&& read_len
!= desc_len
)) {
673 /* Failed to use zero copy */
675 if (!vduse_queue_read_indirect_desc(dev
, desc_buf
,
682 fprintf(stderr
, "Invalid indirect buffer table\n");
688 /* Collect all the descriptors */
690 if (le16toh(desc
[i
].flags
) & VRING_DESC_F_WRITE
) {
691 if (!vduse_queue_map_single_desc(vq
, &in_num
, iov
+ out_num
,
692 VIRTQUEUE_MAX_SIZE
- out_num
,
693 true, le64toh(desc
[i
].addr
),
694 le32toh(desc
[i
].len
))) {
699 fprintf(stderr
, "Incorrect order for descriptors\n");
702 if (!vduse_queue_map_single_desc(vq
, &out_num
, iov
,
703 VIRTQUEUE_MAX_SIZE
, false,
704 le64toh(desc
[i
].addr
),
705 le32toh(desc
[i
].len
))) {
710 /* If we've got too many, that implies a descriptor loop. */
711 if ((in_num
+ out_num
) > max
) {
712 fprintf(stderr
, "Looped descriptor\n");
715 rc
= vduse_queue_read_next_desc(desc
, i
, max
, &i
);
716 } while (rc
== VIRTQUEUE_READ_DESC_MORE
);
718 if (rc
== VIRTQUEUE_READ_DESC_ERROR
) {
719 fprintf(stderr
, "read descriptor error\n");
723 /* Now copy what we have collected and mapped */
724 elem
= vduse_queue_alloc_element(sz
, out_num
, in_num
);
726 fprintf(stderr
, "read descriptor error\n");
730 for (i
= 0; i
< out_num
; i
++) {
731 elem
->out_sg
[i
] = iov
[i
];
733 for (i
= 0; i
< in_num
; i
++) {
734 elem
->in_sg
[i
] = iov
[out_num
+ i
];
740 void *vduse_queue_pop(VduseVirtq
*vq
, size_t sz
)
743 VduseVirtqElement
*elem
;
744 VduseDev
*dev
= vq
->dev
;
747 if (unlikely(!vq
->vring
.avail
)) {
751 if (unlikely(vq
->resubmit_list
&& vq
->resubmit_num
> 0)) {
752 i
= (--vq
->resubmit_num
);
753 elem
= vduse_queue_map_desc(vq
, vq
->resubmit_list
[i
].index
, sz
);
755 if (!vq
->resubmit_num
) {
756 free(vq
->resubmit_list
);
757 vq
->resubmit_list
= NULL
;
763 if (vduse_queue_empty(vq
)) {
766 /* Needed after virtio_queue_empty() */
769 if (vq
->inuse
>= vq
->vring
.num
) {
770 fprintf(stderr
, "Virtqueue size exceeded: %d\n", vq
->inuse
);
774 if (!vduse_queue_get_head(vq
, vq
->last_avail_idx
++, &head
)) {
778 if (vduse_dev_has_feature(dev
, VIRTIO_RING_F_EVENT_IDX
)) {
779 vring_set_avail_event(vq
, vq
->last_avail_idx
);
782 elem
= vduse_queue_map_desc(vq
, head
, sz
);
790 vduse_queue_inflight_get(vq
, head
);
795 static inline void vring_used_write(VduseVirtq
*vq
,
796 struct vring_used_elem
*uelem
, int i
)
798 struct vring_used
*used
= vq
->vring
.used
;
800 used
->ring
[i
] = *uelem
;
803 static void vduse_queue_fill(VduseVirtq
*vq
, const VduseVirtqElement
*elem
,
804 unsigned int len
, unsigned int idx
)
806 struct vring_used_elem uelem
;
808 if (unlikely(!vq
->vring
.used
)) {
812 idx
= (idx
+ vq
->used_idx
) % vq
->vring
.num
;
814 uelem
.id
= htole32(elem
->index
);
815 uelem
.len
= htole32(len
);
816 vring_used_write(vq
, &uelem
, idx
);
819 static inline void vring_used_idx_set(VduseVirtq
*vq
, uint16_t val
)
821 vq
->vring
.used
->idx
= htole16(val
);
825 static void vduse_queue_flush(VduseVirtq
*vq
, unsigned int count
)
829 if (unlikely(!vq
->vring
.used
)) {
833 /* Make sure buffer is written before we update index. */
838 vring_used_idx_set(vq
, new);
840 if (unlikely((int16_t)(new - vq
->signalled_used
) < (uint16_t)(new - old
))) {
841 vq
->signalled_used_valid
= false;
845 void vduse_queue_push(VduseVirtq
*vq
, const VduseVirtqElement
*elem
,
848 vduse_queue_fill(vq
, elem
, len
, 0);
849 vduse_queue_inflight_pre_put(vq
, elem
->index
);
850 vduse_queue_flush(vq
, 1);
851 vduse_queue_inflight_post_put(vq
, elem
->index
);
854 static int vduse_queue_update_vring(VduseVirtq
*vq
, uint64_t desc_addr
,
855 uint64_t avail_addr
, uint64_t used_addr
)
857 struct VduseDev
*dev
= vq
->dev
;
860 len
= sizeof(struct vring_desc
);
861 vq
->vring
.desc
= iova_to_va(dev
, &len
, desc_addr
);
862 if (len
!= sizeof(struct vring_desc
)) {
866 len
= sizeof(struct vring_avail
);
867 vq
->vring
.avail
= iova_to_va(dev
, &len
, avail_addr
);
868 if (len
!= sizeof(struct vring_avail
)) {
872 len
= sizeof(struct vring_used
);
873 vq
->vring
.used
= iova_to_va(dev
, &len
, used_addr
);
874 if (len
!= sizeof(struct vring_used
)) {
878 if (!vq
->vring
.desc
|| !vq
->vring
.avail
|| !vq
->vring
.used
) {
879 fprintf(stderr
, "Failed to get vq[%d] iova mapping\n", vq
->index
);
886 static void vduse_queue_enable(VduseVirtq
*vq
)
888 struct VduseDev
*dev
= vq
->dev
;
889 struct vduse_vq_info vq_info
;
890 struct vduse_vq_eventfd vq_eventfd
;
893 vq_info
.index
= vq
->index
;
894 if (ioctl(dev
->fd
, VDUSE_VQ_GET_INFO
, &vq_info
)) {
895 fprintf(stderr
, "Failed to get vq[%d] info: %s\n",
896 vq
->index
, strerror(errno
));
900 if (!vq_info
.ready
) {
904 vq
->vring
.num
= vq_info
.num
;
905 vq
->vring
.desc_addr
= vq_info
.desc_addr
;
906 vq
->vring
.avail_addr
= vq_info
.driver_addr
;
907 vq
->vring
.used_addr
= vq_info
.device_addr
;
909 if (vduse_queue_update_vring(vq
, vq_info
.desc_addr
,
910 vq_info
.driver_addr
, vq_info
.device_addr
)) {
911 fprintf(stderr
, "Failed to update vring for vq[%d]\n", vq
->index
);
915 fd
= eventfd(0, EFD_NONBLOCK
| EFD_CLOEXEC
);
917 fprintf(stderr
, "Failed to init eventfd for vq[%d]\n", vq
->index
);
921 vq_eventfd
.index
= vq
->index
;
923 if (ioctl(dev
->fd
, VDUSE_VQ_SETUP_KICKFD
, &vq_eventfd
)) {
924 fprintf(stderr
, "Failed to setup kick fd for vq[%d]\n", vq
->index
);
930 vq
->signalled_used_valid
= false;
933 if (vduse_queue_check_inflights(vq
)) {
934 fprintf(stderr
, "Failed to check inflights for vq[%d]\n", vq
->index
);
939 dev
->ops
->enable_queue(dev
, vq
);
942 static void vduse_queue_disable(VduseVirtq
*vq
)
944 struct VduseDev
*dev
= vq
->dev
;
945 struct vduse_vq_eventfd eventfd
;
951 dev
->ops
->disable_queue(dev
, vq
);
953 eventfd
.index
= vq
->index
;
954 eventfd
.fd
= VDUSE_EVENTFD_DEASSIGN
;
955 ioctl(dev
->fd
, VDUSE_VQ_SETUP_KICKFD
, &eventfd
);
958 assert(vq
->inuse
== 0);
961 vq
->vring
.desc_addr
= 0;
962 vq
->vring
.avail_addr
= 0;
963 vq
->vring
.used_addr
= 0;
971 static void vduse_dev_start_dataplane(VduseDev
*dev
)
975 if (ioctl(dev
->fd
, VDUSE_DEV_GET_FEATURES
, &dev
->features
)) {
976 fprintf(stderr
, "Failed to get features: %s\n", strerror(errno
));
979 assert(vduse_dev_has_feature(dev
, VIRTIO_F_VERSION_1
));
981 for (i
= 0; i
< dev
->num_queues
; i
++) {
982 vduse_queue_enable(&dev
->vqs
[i
]);
986 static void vduse_dev_stop_dataplane(VduseDev
*dev
)
988 size_t log_size
= dev
->num_queues
* vduse_vq_log_size(VIRTQUEUE_MAX_SIZE
);
991 for (i
= 0; i
< dev
->num_queues
; i
++) {
992 vduse_queue_disable(&dev
->vqs
[i
]);
995 memset(dev
->log
, 0, log_size
);
998 vduse_iova_remove_region(dev
, 0, ULONG_MAX
);
1001 int vduse_dev_handler(VduseDev
*dev
)
1003 struct vduse_dev_request req
;
1004 struct vduse_dev_response resp
= { 0 };
1008 ret
= read(dev
->fd
, &req
, sizeof(req
));
1009 if (ret
!= sizeof(req
)) {
1010 fprintf(stderr
, "Read request error [%d]: %s\n",
1011 ret
, strerror(errno
));
1014 resp
.request_id
= req
.request_id
;
1017 case VDUSE_GET_VQ_STATE
:
1018 vq
= &dev
->vqs
[req
.vq_state
.index
];
1019 resp
.vq_state
.split
.avail_index
= vq
->last_avail_idx
;
1020 resp
.result
= VDUSE_REQ_RESULT_OK
;
1022 case VDUSE_SET_STATUS
:
1023 if (req
.s
.status
& VIRTIO_CONFIG_S_DRIVER_OK
) {
1024 vduse_dev_start_dataplane(dev
);
1025 } else if (req
.s
.status
== 0) {
1026 vduse_dev_stop_dataplane(dev
);
1028 resp
.result
= VDUSE_REQ_RESULT_OK
;
1030 case VDUSE_UPDATE_IOTLB
:
1031 /* The iova will be updated by iova_to_va() later, so just remove it */
1032 vduse_iova_remove_region(dev
, req
.iova
.start
, req
.iova
.last
);
1033 for (i
= 0; i
< dev
->num_queues
; i
++) {
1036 if (vduse_queue_update_vring(vq
, vq
->vring
.desc_addr
,
1037 vq
->vring
.avail_addr
,
1038 vq
->vring
.used_addr
)) {
1039 fprintf(stderr
, "Failed to update vring for vq[%d]\n",
1044 resp
.result
= VDUSE_REQ_RESULT_OK
;
1047 resp
.result
= VDUSE_REQ_RESULT_FAILED
;
1051 ret
= write(dev
->fd
, &resp
, sizeof(resp
));
1052 if (ret
!= sizeof(resp
)) {
1053 fprintf(stderr
, "Write request %d error [%d]: %s\n",
1054 req
.type
, ret
, strerror(errno
));
1060 int vduse_dev_update_config(VduseDev
*dev
, uint32_t size
,
1061 uint32_t offset
, char *buffer
)
1064 struct vduse_config_data
*data
;
1066 data
= malloc(offsetof(struct vduse_config_data
, buffer
) + size
);
1071 data
->offset
= offset
;
1072 data
->length
= size
;
1073 memcpy(data
->buffer
, buffer
, size
);
1075 ret
= ioctl(dev
->fd
, VDUSE_DEV_SET_CONFIG
, data
);
1082 if (ioctl(dev
->fd
, VDUSE_DEV_INJECT_CONFIG_IRQ
)) {
1089 int vduse_dev_setup_queue(VduseDev
*dev
, int index
, int max_size
)
1091 VduseVirtq
*vq
= &dev
->vqs
[index
];
1092 struct vduse_vq_config vq_config
= { 0 };
1094 if (max_size
> VIRTQUEUE_MAX_SIZE
) {
1098 vq_config
.index
= vq
->index
;
1099 vq_config
.max_size
= max_size
;
1101 if (ioctl(dev
->fd
, VDUSE_VQ_SETUP
, &vq_config
)) {
1105 vduse_queue_enable(vq
);
1110 int vduse_set_reconnect_log_file(VduseDev
*dev
, const char *filename
)
1113 size_t log_size
= dev
->num_queues
* vduse_vq_log_size(VIRTQUEUE_MAX_SIZE
);
1117 dev
->log
= log
= vduse_log_get(filename
, log_size
);
1118 if (log
== MAP_FAILED
) {
1119 fprintf(stderr
, "Failed to get vduse log\n");
1123 for (i
= 0; i
< dev
->num_queues
; i
++) {
1124 dev
->vqs
[i
].log
= log
;
1125 dev
->vqs
[i
].log
->inflight
.desc_num
= VIRTQUEUE_MAX_SIZE
;
1126 log
= (void *)((char *)log
+ vduse_vq_log_size(VIRTQUEUE_MAX_SIZE
));
1132 static int vduse_dev_init_vqs(VduseDev
*dev
, uint16_t num_queues
)
1137 vqs
= calloc(sizeof(VduseVirtq
), num_queues
);
1142 for (i
= 0; i
< num_queues
; i
++) {
1152 static int vduse_dev_init(VduseDev
*dev
, const char *name
,
1153 uint16_t num_queues
, const VduseOps
*ops
,
1156 char *dev_path
, *dev_name
;
1159 dev_path
= malloc(strlen(name
) + strlen("/dev/vduse/") + 1);
1163 sprintf(dev_path
, "/dev/vduse/%s", name
);
1165 fd
= open(dev_path
, O_RDWR
);
1168 fprintf(stderr
, "Failed to open vduse dev %s: %s\n",
1169 name
, strerror(errno
));
1173 if (ioctl(fd
, VDUSE_DEV_GET_FEATURES
, &dev
->features
)) {
1174 fprintf(stderr
, "Failed to get features: %s\n", strerror(errno
));
1179 dev_name
= strdup(name
);
1185 ret
= vduse_dev_init_vqs(dev
, num_queues
);
1192 dev
->name
= dev_name
;
1193 dev
->num_queues
= num_queues
;
1201 static inline bool vduse_name_is_invalid(const char *name
)
1203 return strlen(name
) >= VDUSE_NAME_MAX
|| strstr(name
, "..");
1206 VduseDev
*vduse_dev_create_by_fd(int fd
, uint16_t num_queues
,
1207 const VduseOps
*ops
, void *priv
)
1212 if (!ops
|| !ops
->enable_queue
|| !ops
->disable_queue
) {
1213 fprintf(stderr
, "Invalid parameter for vduse\n");
1217 dev
= calloc(sizeof(VduseDev
), 1);
1219 fprintf(stderr
, "Failed to allocate vduse device\n");
1223 if (ioctl(fd
, VDUSE_DEV_GET_FEATURES
, &dev
->features
)) {
1224 fprintf(stderr
, "Failed to get features: %s\n", strerror(errno
));
1229 ret
= vduse_dev_init_vqs(dev
, num_queues
);
1231 fprintf(stderr
, "Failed to init vqs\n");
1236 dev
->num_queues
= num_queues
;
1244 VduseDev
*vduse_dev_create_by_name(const char *name
, uint16_t num_queues
,
1245 const VduseOps
*ops
, void *priv
)
1250 if (!name
|| vduse_name_is_invalid(name
) || !ops
||
1251 !ops
->enable_queue
|| !ops
->disable_queue
) {
1252 fprintf(stderr
, "Invalid parameter for vduse\n");
1256 dev
= calloc(sizeof(VduseDev
), 1);
1258 fprintf(stderr
, "Failed to allocate vduse device\n");
1262 ret
= vduse_dev_init(dev
, name
, num_queues
, ops
, priv
);
1264 fprintf(stderr
, "Failed to init vduse device %s: %s\n",
1265 name
, strerror(-ret
));
1273 VduseDev
*vduse_dev_create(const char *name
, uint32_t device_id
,
1274 uint32_t vendor_id
, uint64_t features
,
1275 uint16_t num_queues
, uint32_t config_size
,
1276 char *config
, const VduseOps
*ops
, void *priv
)
1281 struct vduse_dev_config
*dev_config
;
1282 size_t size
= offsetof(struct vduse_dev_config
, config
);
1284 if (!name
|| vduse_name_is_invalid(name
) ||
1285 !has_feature(features
, VIRTIO_F_VERSION_1
) || !config
||
1286 !config_size
|| !ops
|| !ops
->enable_queue
|| !ops
->disable_queue
) {
1287 fprintf(stderr
, "Invalid parameter for vduse\n");
1291 dev
= calloc(sizeof(VduseDev
), 1);
1293 fprintf(stderr
, "Failed to allocate vduse device\n");
1297 ctrl_fd
= open("/dev/vduse/control", O_RDWR
);
1299 fprintf(stderr
, "Failed to open /dev/vduse/control: %s\n",
1304 version
= VDUSE_API_VERSION
;
1305 if (ioctl(ctrl_fd
, VDUSE_SET_API_VERSION
, &version
)) {
1306 fprintf(stderr
, "Failed to set api version %" PRIu64
": %s\n",
1307 version
, strerror(errno
));
1311 dev_config
= calloc(size
+ config_size
, 1);
1313 fprintf(stderr
, "Failed to allocate config space\n");
1317 assert(!vduse_name_is_invalid(name
));
1318 strcpy(dev_config
->name
, name
);
1319 dev_config
->device_id
= device_id
;
1320 dev_config
->vendor_id
= vendor_id
;
1321 dev_config
->features
= features
;
1322 dev_config
->vq_num
= num_queues
;
1323 dev_config
->vq_align
= VDUSE_VQ_ALIGN
;
1324 dev_config
->config_size
= config_size
;
1325 memcpy(dev_config
->config
, config
, config_size
);
1327 ret
= ioctl(ctrl_fd
, VDUSE_CREATE_DEV
, dev_config
);
1329 if (ret
&& errno
!= EEXIST
) {
1330 fprintf(stderr
, "Failed to create vduse device %s: %s\n",
1331 name
, strerror(errno
));
1334 dev
->ctrl_fd
= ctrl_fd
;
1336 ret
= vduse_dev_init(dev
, name
, num_queues
, ops
, priv
);
1338 fprintf(stderr
, "Failed to init vduse device %s: %s\n",
1339 name
, strerror(-ret
));
1345 ioctl(ctrl_fd
, VDUSE_DESTROY_DEV
, name
);
1354 int vduse_dev_destroy(VduseDev
*dev
)
1356 size_t log_size
= dev
->num_queues
* vduse_vq_log_size(VIRTQUEUE_MAX_SIZE
);
1360 munmap(dev
->log
, log_size
);
1362 for (i
= 0; i
< dev
->num_queues
; i
++) {
1363 free(dev
->vqs
[i
].resubmit_list
);
1370 if (dev
->ctrl_fd
>= 0) {
1371 if (ioctl(dev
->ctrl_fd
, VDUSE_DESTROY_DEV
, dev
->name
)) {
1374 close(dev
->ctrl_fd
);