2 * Generic virtio library for MINIX 3
4 * Copyright (c) 2013, A. Welzel, <arne.welzel@gmail.com>
6 * This software is released under the BSD license. See the LICENSE file
7 * included in the main directory of this source distribution for the
8 * license terms and conditions.
14 #include <errno.h> /* for OK... */
15 #include <string.h> /* memset() */
16 #include <stdlib.h> /* malloc() */
18 #include <machine/pci.h> /* PCI_ILR, PCI_BAR... */
19 #include <machine/vmparam.h> /* PAGE_SIZE */
21 #include <minix/syslib.h> /* umap, vumap, alloc_..*/
22 #include <minix/sysutil.h> /* panic(), at least */
23 #include <minix/virtio.h> /* virtio system include */
25 #include "virtio_ring.h" /* virtio types / helper */
28 * About indirect descriptors:
30 * For each possible thread, a single indirect descriptor table is allocated.
31 * If using direct descriptors would lead to the situation that another thread
32 * might not be able to add another descriptor to the ring, indirect descriptors
35 * Indirect descriptors are pre-allocated. Each alloc_contig() call involves a
36 * kernel call which is critical for performance.
38 * The size of indirect descriptor tables is chosen based on MAPVEC_NR. A driver
39 * using this library should never add more than
41 * MAPVEC_NR + MAPVEC_NR / 2
43 * descriptors to a queue as this represent the maximum size of an indirect
47 struct indirect_desc_table
{
49 struct vring_desc
*descs
;
56 void *vaddr
; /* virtual addr of ring */
57 phys_bytes paddr
; /* physical addr of ring */
58 u32_t page
; /* physical guest page */
60 u16_t num
; /* number of descriptors */
61 u32_t ring_size
; /* size of ring in bytes */
64 u16_t free_num
; /* free descriptors */
65 u16_t free_head
; /* next free descriptor */
66 u16_t free_tail
; /* last free descriptor */
67 u16_t last_used
; /* we checked in used */
69 void **data
; /* points to pointers */
72 struct virtio_device
{
74 const char *name
; /* for debugging */
76 u16_t port
; /* io port */
78 struct virtio_feature
*features
; /* host / guest features */
79 u8_t num_features
; /* max 32 */
81 struct virtio_queue
*queues
; /* our queues */
84 int irq
; /* interrupt line */
85 int irq_hook
; /* hook id */
86 int msi
; /* is MSI enabled? */
88 int threads
; /* max number of threads */
90 struct indirect_desc_table
*indirect
; /* indirect descriptor tables */
94 static int is_matching_device(u16_t expected_sdid
, u16_t vid
, u16_t sdid
);
95 static int init_device(int devind
, struct virtio_device
*dev
);
96 static int init_phys_queues(struct virtio_device
*dev
);
97 static int exchange_features(struct virtio_device
*dev
);
98 static int alloc_phys_queue(struct virtio_queue
*q
);
99 static void free_phys_queue(struct virtio_queue
*q
);
100 static void init_phys_queue(struct virtio_queue
*q
);
101 static int init_indirect_desc_table(struct indirect_desc_table
*desc
);
102 static int init_indirect_desc_tables(struct virtio_device
*dev
);
103 static void virtio_irq_register(struct virtio_device
*dev
);
104 static void virtio_irq_unregister(struct virtio_device
*dev
);
105 static int wants_kick(struct virtio_queue
*q
);
106 static void kick_queue(struct virtio_device
*dev
, int qidx
);
108 struct virtio_device
*
109 virtio_setup_device(u16_t subdevid
, const char *name
,
110 struct virtio_feature
*features
, int num_features
,
111 int threads
, int skip
)
114 u16_t vid
, did
, sdid
;
115 struct virtio_device
*ret
;
118 if (skip
< 0 || name
== NULL
|| num_features
< 0 || threads
<= 0)
123 r
= pci_first_dev(&devind
, &vid
, &did
);
126 sdid
= pci_attr_r16(devind
, PCI_SUBDID
);
127 if (is_matching_device(subdevid
, vid
, sdid
)) {
129 /* this is the device we are looking for */
136 r
= pci_next_dev(&devind
, &vid
, &did
);
139 /* pci_[first|next_dev()] return 0 if no device was found */
140 if (r
== 0 || skip
> 0)
143 /* allocate and set known info about the device */
144 ret
= malloc(sizeof(*ret
));
149 /* Prepare virtio_device intance */
150 memset(ret
, 0, sizeof(*ret
));
152 ret
->features
= features
;
153 ret
->num_features
= num_features
;
154 ret
->threads
= threads
;
155 /* see comment in the beginning of this file */
156 ret
->num_indirect
= threads
;
158 if (init_device(devind
, ret
) != OK
) {
159 printf("%s: Could not initialize device\n", ret
->name
);
164 virtio_write8(ret
, VIRTIO_DEV_STATUS_OFF
, VIRTIO_STATUS_ACK
);
166 if (exchange_features(ret
) != OK
) {
167 printf("%s: Could not exchange features\n", ret
->name
);
171 if (init_indirect_desc_tables(ret
) != OK
) {
172 printf("%s: Could not initialize indirect tables\n", ret
->name
);
176 /* We know how to drive the device... */
177 virtio_write8(ret
, VIRTIO_DEV_STATUS_OFF
, VIRTIO_STATUS_DRV
);
188 init_device(int devind
, struct virtio_device
*dev
)
195 if ((r
= pci_get_bar(devind
, PCI_BAR
, &base
, &size
, &iof
)) != OK
) {
196 printf("%s: Could not get BAR (%d)", dev
->name
, r
);
201 printf("%s: PCI not IO space?", dev
->name
);
205 if (base
& 0xFFFF0000) {
206 printf("%s: IO port weird (%08x)", dev
->name
, base
);
210 /* store the I/O port */
213 /* Reset the device */
214 virtio_write8(dev
, VIRTIO_DEV_STATUS_OFF
, 0);
217 dev
->irq
= pci_attr_r8(devind
, PCI_ILR
);
223 exchange_features(struct virtio_device
*dev
)
225 u32_t guest_features
= 0, host_features
= 0;
226 struct virtio_feature
*f
;
228 host_features
= virtio_read32(dev
, VIRTIO_HOST_F_OFF
);
230 for (int i
= 0; i
< dev
->num_features
; i
++) {
231 f
= &dev
->features
[i
];
233 /* prepare the features the driver supports */
234 guest_features
|= (f
->guest_support
<< f
->bit
);
236 /* just load the host feature int the struct */
237 f
->host_support
= ((host_features
>> f
->bit
) & 1);
240 /* let the device know about our features */
241 virtio_write32(dev
, VIRTIO_GUEST_F_OFF
, guest_features
);
247 virtio_alloc_queues(struct virtio_device
*dev
, int num_queues
)
253 /* Assume there's no device with more than 256 queues */
254 if (num_queues
< 0 || num_queues
> 256)
257 dev
->num_queues
= num_queues
;
258 /* allocate queue memory */
259 dev
->queues
= malloc(num_queues
* sizeof(dev
->queues
[0]));
261 if (dev
->queues
== NULL
)
264 memset(dev
->queues
, 0, num_queues
* sizeof(dev
->queues
[0]));
266 if ((r
= init_phys_queues(dev
) != OK
)) {
267 printf("%s: Could not initialize queues (%d)\n", dev
->name
, r
);
276 init_phys_queues(struct virtio_device
*dev
)
278 /* Initialize all queues */
280 struct virtio_queue
*q
;
282 for (i
= 0; i
< dev
->num_queues
; i
++) {
284 /* select the queue */
285 virtio_write16(dev
, VIRTIO_QSEL_OFF
, i
);
286 q
->num
= virtio_read16(dev
, VIRTIO_QSIZE_OFF
);
288 if (q
->num
& (q
->num
- 1)) {
289 printf("%s: Queue %d num=%d not ^2", dev
->name
, i
,
292 goto free_phys_queues
;
295 if ((r
= alloc_phys_queue(q
)) != OK
)
296 goto free_phys_queues
;
300 /* Let the host know about the guest physical page */
301 virtio_write32(dev
, VIRTIO_QADDR_OFF
, q
->page
);
308 for (j
= 0; j
< i
; j
++)
309 free_phys_queue(&dev
->queues
[i
]);
315 alloc_phys_queue(struct virtio_queue
*q
)
319 /* How much memory do we need? */
320 q
->ring_size
= vring_size(q
->num
, PAGE_SIZE
);
322 q
->vaddr
= alloc_contig(q
->ring_size
, AC_ALIGN4K
, &q
->paddr
);
324 if (q
->vaddr
== NULL
)
327 q
->data
= alloc_contig(sizeof(q
->data
[0]) * q
->num
, AC_ALIGN4K
, NULL
);
329 if (q
->data
== NULL
) {
330 free_contig(q
->vaddr
, q
->ring_size
);
340 virtio_device_ready(struct virtio_device
*dev
)
344 /* Register IRQ line */
345 virtio_irq_register(dev
);
347 /* Driver is ready to go! */
348 virtio_write8(dev
, VIRTIO_DEV_STATUS_OFF
, VIRTIO_STATUS_DRV_OK
);
352 virtio_free_queues(struct virtio_device
*dev
)
356 assert(dev
->queues
!= NULL
);
357 assert(dev
->num_queues
> 0);
359 for (i
= 0; i
< dev
->num_queues
; i
++)
360 free_phys_queue(&dev
->queues
[i
]);
367 free_phys_queue(struct virtio_queue
*q
)
370 assert(q
->vaddr
!= NULL
);
372 free_contig(q
->vaddr
, q
->ring_size
);
376 free_contig(q
->data
, sizeof(q
->data
[0]));
381 init_phys_queue(struct virtio_queue
*q
)
383 memset(q
->vaddr
, 0, q
->ring_size
);
384 memset(q
->data
, 0, sizeof(q
->data
[0]) * q
->num
);
386 /* physical page in guest */
387 q
->page
= q
->paddr
/ PAGE_SIZE
;
389 /* Set pointers in q->vring according to size */
390 vring_init(&q
->vring
, q
->num
, q
->vaddr
, PAGE_SIZE
);
392 /* Everything's free at this point */
393 for (int i
= 0; i
< q
->num
; i
++) {
394 q
->vring
.desc
[i
].flags
= VRING_DESC_F_NEXT
;
395 q
->vring
.desc
[i
].next
= (i
+ 1) & (q
->num
- 1);
398 q
->free_num
= q
->num
;
400 q
->free_tail
= q
->num
- 1;
407 virtio_free_device(struct virtio_device
*dev
)
410 struct indirect_desc_table
*desc
;
414 assert(dev
->num_indirect
> 0);
416 for (i
= 0; i
< dev
->num_indirect
; i
++) {
417 desc
= &dev
->indirect
[i
];
418 free_contig(desc
->descs
, desc
->len
);
421 dev
->num_indirect
= 0;
423 assert(dev
->indirect
!= NULL
);
425 dev
->indirect
= NULL
;
431 init_indirect_desc_table(struct indirect_desc_table
*desc
)
434 desc
->len
= (MAPVEC_NR
+ MAPVEC_NR
/ 2) * sizeof(struct vring_desc
);
436 desc
->descs
= alloc_contig(desc
->len
, AC_ALIGN4K
, &desc
->paddr
);
437 memset(desc
->descs
, 0, desc
->len
);
439 if (desc
->descs
== NULL
)
446 init_indirect_desc_tables(struct virtio_device
*dev
)
449 struct indirect_desc_table
*desc
;
451 dev
->indirect
= malloc(dev
->num_indirect
* sizeof(dev
->indirect
[0]));
453 if (dev
->indirect
== NULL
) {
454 printf("%s: Could not allocate indirect tables\n", dev
->name
);
458 memset(dev
->indirect
, 0, dev
->num_indirect
* sizeof(dev
->indirect
[0]));
460 for (i
= 0; i
< dev
->num_indirect
; i
++) {
461 desc
= &dev
->indirect
[i
];
462 if ((r
= init_indirect_desc_table(desc
)) != OK
) {
465 for (j
= 0; j
< i
; j
++) {
466 desc
= &dev
->indirect
[j
];
467 free_contig(desc
->descs
, desc
->len
);
480 clear_indirect_table(struct virtio_device
*dev
, struct vring_desc
*vd
)
483 struct indirect_desc_table
*desc
;
486 assert(vd
->flags
& VRING_DESC_F_INDIRECT
);
487 vd
->flags
= vd
->flags
& ~VRING_DESC_F_INDIRECT
;
490 for (i
= 0; i
< dev
->num_indirect
; i
++) {
491 desc
= &dev
->indirect
[i
];
493 if (desc
->paddr
== vd
->addr
) {
494 assert(desc
->in_use
);
500 if (i
>= dev
->num_indirect
)
501 panic("%s: Could not clear indirect descriptor table ");
506 use_vring_desc(struct vring_desc
*vd
, struct vumap_phys
*vp
)
508 vd
->addr
= vp
->vp_addr
& ~1UL;
509 vd
->len
= vp
->vp_size
;
510 vd
->flags
= VRING_DESC_F_NEXT
;
513 vd
->flags
|= VRING_DESC_F_WRITE
;
517 set_indirect_descriptors(struct virtio_device
*dev
, struct virtio_queue
*q
,
518 struct vumap_phys
*bufs
, size_t num
)
520 /* Indirect descriptor tables are simply filled from left to right */
522 struct indirect_desc_table
*desc
;
523 struct vring
*vring
= &q
->vring
;
524 struct vring_desc
*vd
, *ivd
;
526 /* Find the first unused indirect descriptor table */
527 for (i
= 0; i
< dev
->num_indirect
; i
++) {
528 desc
= &dev
->indirect
[i
];
530 /* If an unused indirect descriptor table was found,
531 * mark it as being used and exit the loop.
540 if (i
>= dev
->num_indirect
)
541 panic("%s: No indirect descriptor tables left");
543 /* For indirect descriptor tables, only a single descriptor from
544 * the main ring is used.
546 vd
= &vring
->desc
[q
->free_head
];
547 vd
->flags
= VRING_DESC_F_INDIRECT
;
548 vd
->addr
= desc
->paddr
;
549 vd
->len
= num
* sizeof(desc
->descs
[0]);
551 /* Initialize the descriptors in the indirect descriptor table */
552 for (i
= 0; i
< num
; i
++) {
553 ivd
= &desc
->descs
[i
];
555 use_vring_desc(ivd
, &bufs
[i
]);
559 /* Unset the next bit of the last descriptor */
560 ivd
->flags
= ivd
->flags
& ~VRING_DESC_F_NEXT
;
562 /* Update queue, only a single descriptor was used */
564 q
->free_head
= vd
->next
;
568 set_direct_descriptors(struct virtio_queue
*q
, struct vumap_phys
*bufs
,
573 struct vring
*vring
= &q
->vring
;
574 struct vring_desc
*vd
;
576 for (i
= q
->free_head
, count
= 0; count
< num
; count
++) {
578 /* The next free descriptor */
579 vd
= &vring
->desc
[i
];
581 /* The descriptor is linked in the free list, so
582 * it always has the next bit set.
584 assert(vd
->flags
& VRING_DESC_F_NEXT
);
586 use_vring_desc(vd
, &bufs
[count
]);
590 /* Unset the next bit of the last descriptor */
591 vd
->flags
= vd
->flags
& ~VRING_DESC_F_NEXT
;
599 virtio_to_queue(struct virtio_device
*dev
, int qidx
, struct vumap_phys
*bufs
,
600 size_t num
, void *data
)
604 struct virtio_queue
*q
= &dev
->queues
[qidx
];
605 struct vring
*vring
= &q
->vring
;
607 assert(0 <= qidx
&& qidx
<= dev
->num_queues
);
610 panic("%s: NULL data received queue %d", dev
->name
, qidx
);
612 free_first
= q
->free_head
;
614 left
= (int)q
->free_num
- (int)num
;
616 if (left
< dev
->threads
)
617 set_indirect_descriptors(dev
, q
, bufs
, num
);
619 set_direct_descriptors(q
, bufs
, num
);
621 /* Next index for host is old free_head */
622 vring
->avail
->ring
[vring
->avail
->idx
% q
->num
] = free_first
;
624 /* Provided by the caller to identify this slot */
625 q
->data
[free_first
] = data
;
627 /* Make sure the host sees the new descriptors */
630 /* advance last idx */
631 vring
->avail
->idx
+= 1;
633 /* Make sure the host sees the avail->idx */
637 kick_queue(dev
, qidx
);
642 virtio_from_queue(struct virtio_device
*dev
, int qidx
, void **data
)
644 struct virtio_queue
*q
;
646 struct vring_used_elem
*uel
;
647 struct vring_desc
*vd
;
652 assert(0 <= qidx
&& qidx
< dev
->num_queues
);
654 q
= &dev
->queues
[qidx
];
657 /* Make sure we see changes done by the host */
660 /* The index from the host */
661 used_idx
= vring
->used
->idx
% q
->num
;
663 /* We already saw this one, nothing to do here */
664 if (q
->last_used
== used_idx
)
667 /* Get the vring_used element */
668 uel
= &q
->vring
.used
->ring
[q
->last_used
];
670 /* Update the last used element */
671 q
->last_used
= (q
->last_used
+ 1) % q
->num
;
673 /* index of the used element */
674 idx
= uel
->id
% q
->num
;
676 assert(q
->data
[idx
] != NULL
);
678 /* Get the descriptor */
679 vd
= &vring
->desc
[idx
];
681 /* Unconditionally set the tail->next to the first used one */
682 assert(vring
->desc
[q
->free_tail
].flags
& VRING_DESC_F_NEXT
);
683 vring
->desc
[q
->free_tail
].next
= idx
;
685 /* Find the last index, eventually there has to be one
686 * without a the next flag.
688 * FIXME: Protect from endless loop
690 while (vd
->flags
& VRING_DESC_F_NEXT
) {
692 if (vd
->flags
& VRING_DESC_F_INDIRECT
)
693 clear_indirect_table(dev
, vd
);
696 vd
= &vring
->desc
[idx
];
700 /* Didn't count the last one */
703 if (vd
->flags
& VRING_DESC_F_INDIRECT
)
704 clear_indirect_table(dev
, vd
);
706 /* idx points to the tail now, update the queue */
708 assert(!(vd
->flags
& VRING_DESC_F_NEXT
));
710 /* We can always connect the tail with the head */
711 vring
->desc
[q
->free_tail
].next
= q
->free_head
;
712 vring
->desc
[q
->free_tail
].flags
= VRING_DESC_F_NEXT
;
714 q
->free_num
+= count
;
716 assert(q
->free_num
<= q
->num
);
718 *data
= q
->data
[uel
->id
];
719 q
->data
[uel
->id
] = NULL
;
725 virtio_had_irq(struct virtio_device
*dev
)
727 return virtio_read8(dev
, VIRTIO_ISR_STATUS_OFF
) & 1;
731 virtio_reset_device(struct virtio_device
*dev
)
733 virtio_irq_unregister(dev
);
734 virtio_write8(dev
, VIRTIO_DEV_STATUS_OFF
, 0);
739 virtio_irq_enable(struct virtio_device
*dev
)
742 if ((r
= sys_irqenable(&dev
->irq_hook
) != OK
))
743 panic("%s Unable to enable IRQ %d", dev
->name
, r
);
747 virtio_irq_disable(struct virtio_device
*dev
)
750 if ((r
= sys_irqdisable(&dev
->irq_hook
) != OK
))
751 panic("%s: Unable to disable IRQ %d", dev
->name
, r
);
755 wants_kick(struct virtio_queue
*q
)
758 return !(q
->vring
.used
->flags
& VRING_USED_F_NO_NOTIFY
);
762 kick_queue(struct virtio_device
*dev
, int qidx
)
764 assert(0 <= qidx
&& qidx
< dev
->num_queues
);
766 if (wants_kick(&dev
->queues
[qidx
]))
767 virtio_write16(dev
, VIRTIO_QNOTFIY_OFF
, qidx
);
773 is_matching_device(u16_t expected_sdid
, u16_t vid
, u16_t sdid
)
775 return vid
== VIRTIO_VENDOR_ID
&& sdid
== expected_sdid
;
779 virtio_irq_register(struct virtio_device
*dev
)
782 if ((r
= sys_irqsetpolicy(dev
->irq
, 0, &dev
->irq_hook
) != OK
))
783 panic("%s: Unable to register IRQ %d", dev
->name
, r
);
787 virtio_irq_unregister(struct virtio_device
*dev
)
790 if ((r
= sys_irqrmpolicy(&dev
->irq_hook
) != OK
))
791 panic("%s: Unable to unregister IRQ %d", dev
->name
, r
);
795 _supports(struct virtio_device
*dev
, int bit
, int host
)
797 for (int i
= 0; i
< dev
->num_features
; i
++) {
798 struct virtio_feature
*f
= &dev
->features
[i
];
801 return host
? f
->host_support
: f
->guest_support
;
804 panic("%s: Feature not found bit=%d", dev
->name
, bit
);
808 virtio_host_supports(struct virtio_device
*dev
, int bit
)
810 return _supports(dev
, bit
, 1);
814 virtio_guest_supports(struct virtio_device
*dev
, int bit
)
816 return _supports(dev
, bit
, 0);
820 /* Just some wrappers around sys_read */
821 #define VIRTIO_READ_XX(xx, suff) \
823 virtio_read##xx(struct virtio_device *dev, off_t off) \
827 if ((r = sys_in##suff(dev->port + off, &ret)) != OK) \
828 panic("%s: Read failed %d %d r=%d", dev->name, \
836 VIRTIO_READ_XX(32, l
)
837 VIRTIO_READ_XX(16, w
)
840 /* Just some wrappers around sys_write */
841 #define VIRTIO_WRITE_XX(xx, suff) \
843 virtio_write##xx(struct virtio_device *dev, off_t off, u##xx##_t val) \
846 if ((r = sys_out##suff(dev->port + off, val)) != OK) \
847 panic("%s: Write failed %d %d r=%d", dev->name, \
853 VIRTIO_WRITE_XX(32, l
)
854 VIRTIO_WRITE_XX(16, w
)
855 VIRTIO_WRITE_XX(8, b
)
857 /* Just some wrappers around sys_read */
858 #define VIRTIO_SREAD_XX(xx, suff) \
860 virtio_sread##xx(struct virtio_device *dev, off_t off) \
864 off += VIRTIO_DEV_SPECIFIC_OFF; \
867 off += VIRTIO_MSI_ADD_OFF; \
869 if ((r = sys_in##suff(dev->port + off, &ret)) != OK) \
870 panic("%s: Read failed %d %d r=%d", dev->name, \
878 VIRTIO_SREAD_XX(32, l
)
879 VIRTIO_SREAD_XX(16, w
)
880 VIRTIO_SREAD_XX(8, b
)
882 /* Just some wrappers around sys_write */
883 #define VIRTIO_SWRITE_XX(xx, suff) \
885 virtio_swrite##xx(struct virtio_device *dev, off_t off, u##xx##_t val) \
888 off += VIRTIO_DEV_SPECIFIC_OFF; \
891 off += VIRTIO_MSI_ADD_OFF; \
893 if ((r = sys_out##suff(dev->port + off, val)) != OK) \
894 panic("%s: Write failed %d %d r=%d", dev->name, \
900 VIRTIO_SWRITE_XX(32, l
)
901 VIRTIO_SWRITE_XX(16, w
)
902 VIRTIO_SWRITE_XX(8, b
)