2 * Virtio PCI driver - modern (virtio 1.0) device support
4 * This module allows virtio devices to be used over a virtual PCI device.
5 * This can be used with QEMU based VMMs like KVM or Xen.
7 * Copyright IBM Corp. 2007
8 * Copyright Red Hat, Inc. 2014
11 * Anthony Liguori <aliguori@us.ibm.com>
12 * Rusty Russell <rusty@rustcorp.com.au>
13 * Michael S. Tsirkin <mst@redhat.com>
15 * This work is licensed under the terms of the GNU GPL, version 2 or later.
16 * See the COPYING file in the top-level directory.
20 #define VIRTIO_PCI_NO_LEGACY
21 #include "virtio_pci_common.h"
23 static void __iomem
*map_capability(struct pci_dev
*dev
, int off
,
33 pci_read_config_byte(dev
, off
+ offsetof(struct virtio_pci_cap
,
36 pci_read_config_dword(dev
, off
+ offsetof(struct virtio_pci_cap
, offset
),
38 pci_read_config_dword(dev
, off
+ offsetof(struct virtio_pci_cap
, length
),
41 if (length
<= start
) {
43 "virtio_pci: bad capability len %u (>%u expected)\n",
48 if (length
- start
< minlen
) {
50 "virtio_pci: bad capability len %u (>=%zu expected)\n",
57 if (start
+ offset
< offset
) {
59 "virtio_pci: map wrap-around %u+%u\n",
66 if (offset
& (align
- 1)) {
68 "virtio_pci: offset %u not aligned to %u\n",
79 if (minlen
+ offset
< minlen
||
80 minlen
+ offset
> pci_resource_len(dev
, bar
)) {
82 "virtio_pci: map virtio %zu@%u "
83 "out of range on bar %i length %lu\n",
85 bar
, (unsigned long)pci_resource_len(dev
, bar
));
89 p
= pci_iomap_range(dev
, bar
, offset
, length
);
92 "virtio_pci: unable to map virtio %u@%u on bar %i\n",
97 static void iowrite64_twopart(u64 val
, __le32 __iomem
*lo
, __le32 __iomem
*hi
)
99 iowrite32((u32
)val
, lo
);
100 iowrite32(val
>> 32, hi
);
103 /* virtio config->get_features() implementation */
104 static u64
vp_get_features(struct virtio_device
*vdev
)
106 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
109 iowrite32(0, &vp_dev
->common
->device_feature_select
);
110 features
= ioread32(&vp_dev
->common
->device_feature
);
111 iowrite32(1, &vp_dev
->common
->device_feature_select
);
112 features
|= ((u64
)ioread32(&vp_dev
->common
->device_feature
) << 32);
117 /* virtio config->finalize_features() implementation */
118 static int vp_finalize_features(struct virtio_device
*vdev
)
120 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
122 /* Give virtio_ring a chance to accept features. */
123 vring_transport_features(vdev
);
125 if (!__virtio_test_bit(vdev
, VIRTIO_F_VERSION_1
)) {
126 dev_err(&vdev
->dev
, "virtio: device uses modern interface "
127 "but does not have VIRTIO_F_VERSION_1\n");
131 iowrite32(0, &vp_dev
->common
->guest_feature_select
);
132 iowrite32((u32
)vdev
->features
, &vp_dev
->common
->guest_feature
);
133 iowrite32(1, &vp_dev
->common
->guest_feature_select
);
134 iowrite32(vdev
->features
>> 32, &vp_dev
->common
->guest_feature
);
139 /* virtio config->get() implementation */
140 static void vp_get(struct virtio_device
*vdev
, unsigned offset
,
141 void *buf
, unsigned len
)
143 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
148 BUG_ON(offset
+ len
> vp_dev
->device_len
);
152 b
= ioread8(vp_dev
->device
+ offset
);
153 memcpy(buf
, &b
, sizeof b
);
156 w
= cpu_to_le16(ioread16(vp_dev
->device
+ offset
));
157 memcpy(buf
, &w
, sizeof w
);
160 l
= cpu_to_le32(ioread32(vp_dev
->device
+ offset
));
161 memcpy(buf
, &l
, sizeof l
);
164 l
= cpu_to_le32(ioread32(vp_dev
->device
+ offset
));
165 memcpy(buf
, &l
, sizeof l
);
166 l
= cpu_to_le32(ioread32(vp_dev
->device
+ offset
+ sizeof l
));
167 memcpy(buf
+ sizeof l
, &l
, sizeof l
);
174 /* the config->set() implementation. it's symmetric to the config->get()
176 static void vp_set(struct virtio_device
*vdev
, unsigned offset
,
177 const void *buf
, unsigned len
)
179 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
184 BUG_ON(offset
+ len
> vp_dev
->device_len
);
188 memcpy(&b
, buf
, sizeof b
);
189 iowrite8(b
, vp_dev
->device
+ offset
);
192 memcpy(&w
, buf
, sizeof w
);
193 iowrite16(le16_to_cpu(w
), vp_dev
->device
+ offset
);
196 memcpy(&l
, buf
, sizeof l
);
197 iowrite32(le32_to_cpu(l
), vp_dev
->device
+ offset
);
200 memcpy(&l
, buf
, sizeof l
);
201 iowrite32(le32_to_cpu(l
), vp_dev
->device
+ offset
);
202 memcpy(&l
, buf
+ sizeof l
, sizeof l
);
203 iowrite32(le32_to_cpu(l
), vp_dev
->device
+ offset
+ sizeof l
);
210 static u32
vp_generation(struct virtio_device
*vdev
)
212 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
213 return ioread8(&vp_dev
->common
->config_generation
);
216 /* config->{get,set}_status() implementations */
217 static u8
vp_get_status(struct virtio_device
*vdev
)
219 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
220 return ioread8(&vp_dev
->common
->device_status
);
223 static void vp_set_status(struct virtio_device
*vdev
, u8 status
)
225 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
226 /* We should never be setting status to 0. */
228 iowrite8(status
, &vp_dev
->common
->device_status
);
231 static void vp_reset(struct virtio_device
*vdev
)
233 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
234 /* 0 status means a reset. */
235 iowrite8(0, &vp_dev
->common
->device_status
);
236 /* Flush out the status write, and flush in device writes,
237 * including MSI-X interrupts, if any. */
238 ioread8(&vp_dev
->common
->device_status
);
239 /* Flush pending VQ/configuration callbacks. */
240 vp_synchronize_vectors(vdev
);
243 static u16
vp_config_vector(struct virtio_pci_device
*vp_dev
, u16 vector
)
245 /* Setup the vector used for configuration events */
246 iowrite16(vector
, &vp_dev
->common
->msix_config
);
247 /* Verify we had enough resources to assign the vector */
248 /* Will also flush the write out to device */
249 return ioread16(&vp_dev
->common
->msix_config
);
252 static size_t vring_pci_size(u16 num
)
254 /* We only need a cacheline separation. */
255 return PAGE_ALIGN(vring_size(num
, SMP_CACHE_BYTES
));
258 static void *alloc_virtqueue_pages(int *num
)
262 /* TODO: allocate each queue chunk individually */
263 for (; *num
&& vring_pci_size(*num
) > PAGE_SIZE
; *num
/= 2) {
264 pages
= alloc_pages_exact(vring_pci_size(*num
),
265 GFP_KERNEL
|__GFP_ZERO
|__GFP_NOWARN
);
273 /* Try to get a single page. You are my only hope! */
274 return alloc_pages_exact(vring_pci_size(*num
), GFP_KERNEL
|__GFP_ZERO
);
277 static struct virtqueue
*setup_vq(struct virtio_pci_device
*vp_dev
,
278 struct virtio_pci_vq_info
*info
,
280 void (*callback
)(struct virtqueue
*vq
),
284 struct virtio_pci_common_cfg __iomem
*cfg
= vp_dev
->common
;
285 struct virtqueue
*vq
;
289 if (index
>= ioread16(&cfg
->num_queues
))
290 return ERR_PTR(-ENOENT
);
292 /* Select the queue we're interested in */
293 iowrite16(index
, &cfg
->queue_select
);
295 /* Check if queue is either not available or already active. */
296 num
= ioread16(&cfg
->queue_size
);
297 if (!num
|| ioread16(&cfg
->queue_enable
))
298 return ERR_PTR(-ENOENT
);
300 if (num
& (num
- 1)) {
301 dev_warn(&vp_dev
->pci_dev
->dev
, "bad queue size %u", num
);
302 return ERR_PTR(-EINVAL
);
305 /* get offset of notification word for this vq */
306 off
= ioread16(&cfg
->queue_notify_off
);
309 info
->msix_vector
= msix_vec
;
311 info
->queue
= alloc_virtqueue_pages(&info
->num
);
312 if (info
->queue
== NULL
)
313 return ERR_PTR(-ENOMEM
);
315 /* create the vring */
316 vq
= vring_new_virtqueue(index
, info
->num
,
317 SMP_CACHE_BYTES
, &vp_dev
->vdev
,
318 true, info
->queue
, vp_notify
, callback
, name
);
324 /* activate the queue */
325 iowrite16(num
, &cfg
->queue_size
);
326 iowrite64_twopart(virt_to_phys(info
->queue
),
327 &cfg
->queue_desc_lo
, &cfg
->queue_desc_hi
);
328 iowrite64_twopart(virt_to_phys(virtqueue_get_avail(vq
)),
329 &cfg
->queue_avail_lo
, &cfg
->queue_avail_hi
);
330 iowrite64_twopart(virt_to_phys(virtqueue_get_used(vq
)),
331 &cfg
->queue_used_lo
, &cfg
->queue_used_hi
);
333 if (vp_dev
->notify_base
) {
334 /* offset should not wrap */
335 if ((u64
)off
* vp_dev
->notify_offset_multiplier
+ 2
336 > vp_dev
->notify_len
) {
337 dev_warn(&vp_dev
->pci_dev
->dev
,
338 "bad notification offset %u (x %u) "
339 "for queue %u > %zd",
340 off
, vp_dev
->notify_offset_multiplier
,
341 index
, vp_dev
->notify_len
);
345 vq
->priv
= (void __force
*)vp_dev
->notify_base
+
346 off
* vp_dev
->notify_offset_multiplier
;
348 vq
->priv
= (void __force
*)map_capability(vp_dev
->pci_dev
,
349 vp_dev
->notify_map_cap
, 2, 2,
350 off
* vp_dev
->notify_offset_multiplier
, 2,
359 if (msix_vec
!= VIRTIO_MSI_NO_VECTOR
) {
360 iowrite16(msix_vec
, &cfg
->queue_msix_vector
);
361 msix_vec
= ioread16(&cfg
->queue_msix_vector
);
362 if (msix_vec
== VIRTIO_MSI_NO_VECTOR
) {
364 goto err_assign_vector
;
371 if (!vp_dev
->notify_base
)
372 pci_iounmap(vp_dev
->pci_dev
, (void __iomem __force
*)vq
->priv
);
374 vring_del_virtqueue(vq
);
376 free_pages_exact(info
->queue
, vring_pci_size(info
->num
));
380 static int vp_modern_find_vqs(struct virtio_device
*vdev
, unsigned nvqs
,
381 struct virtqueue
*vqs
[],
382 vq_callback_t
*callbacks
[],
385 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
386 struct virtqueue
*vq
;
387 int rc
= vp_find_vqs(vdev
, nvqs
, vqs
, callbacks
, names
);
392 /* Select and activate all queues. Has to be done last: once we do
393 * this, there's no way to go back except reset.
395 list_for_each_entry(vq
, &vdev
->vqs
, list
) {
396 iowrite16(vq
->index
, &vp_dev
->common
->queue_select
);
397 iowrite16(1, &vp_dev
->common
->queue_enable
);
403 static void del_vq(struct virtio_pci_vq_info
*info
)
405 struct virtqueue
*vq
= info
->vq
;
406 struct virtio_pci_device
*vp_dev
= to_vp_device(vq
->vdev
);
408 iowrite16(vq
->index
, &vp_dev
->common
->queue_select
);
410 if (vp_dev
->msix_enabled
) {
411 iowrite16(VIRTIO_MSI_NO_VECTOR
,
412 &vp_dev
->common
->queue_msix_vector
);
413 /* Flush the write out to device */
414 ioread16(&vp_dev
->common
->queue_msix_vector
);
417 if (!vp_dev
->notify_base
)
418 pci_iounmap(vp_dev
->pci_dev
, (void __force __iomem
*)vq
->priv
);
420 vring_del_virtqueue(vq
);
422 free_pages_exact(info
->queue
, vring_pci_size(info
->num
));
425 static const struct virtio_config_ops virtio_pci_config_nodev_ops
= {
428 .generation
= vp_generation
,
429 .get_status
= vp_get_status
,
430 .set_status
= vp_set_status
,
432 .find_vqs
= vp_modern_find_vqs
,
433 .del_vqs
= vp_del_vqs
,
434 .get_features
= vp_get_features
,
435 .finalize_features
= vp_finalize_features
,
436 .bus_name
= vp_bus_name
,
437 .set_vq_affinity
= vp_set_vq_affinity
,
440 static const struct virtio_config_ops virtio_pci_config_ops
= {
443 .generation
= vp_generation
,
444 .get_status
= vp_get_status
,
445 .set_status
= vp_set_status
,
447 .find_vqs
= vp_modern_find_vqs
,
448 .del_vqs
= vp_del_vqs
,
449 .get_features
= vp_get_features
,
450 .finalize_features
= vp_finalize_features
,
451 .bus_name
= vp_bus_name
,
452 .set_vq_affinity
= vp_set_vq_affinity
,
456 * virtio_pci_find_capability - walk capabilities to find device info.
457 * @dev: the pci device
458 * @cfg_type: the VIRTIO_PCI_CAP_* value we seek
459 * @ioresource_types: IORESOURCE_MEM and/or IORESOURCE_IO.
461 * Returns offset of the capability, or 0.
463 static inline int virtio_pci_find_capability(struct pci_dev
*dev
, u8 cfg_type
,
464 u32 ioresource_types
)
468 for (pos
= pci_find_capability(dev
, PCI_CAP_ID_VNDR
);
470 pos
= pci_find_next_capability(dev
, pos
, PCI_CAP_ID_VNDR
)) {
472 pci_read_config_byte(dev
, pos
+ offsetof(struct virtio_pci_cap
,
475 pci_read_config_byte(dev
, pos
+ offsetof(struct virtio_pci_cap
,
479 /* Ignore structures with reserved BAR values */
483 if (type
== cfg_type
) {
484 if (pci_resource_len(dev
, bar
) &&
485 pci_resource_flags(dev
, bar
) & ioresource_types
)
492 /* This is part of the ABI. Don't screw with it. */
493 static inline void check_offsets(void)
495 /* Note: disk space was harmed in compilation of this function. */
496 BUILD_BUG_ON(VIRTIO_PCI_CAP_VNDR
!=
497 offsetof(struct virtio_pci_cap
, cap_vndr
));
498 BUILD_BUG_ON(VIRTIO_PCI_CAP_NEXT
!=
499 offsetof(struct virtio_pci_cap
, cap_next
));
500 BUILD_BUG_ON(VIRTIO_PCI_CAP_LEN
!=
501 offsetof(struct virtio_pci_cap
, cap_len
));
502 BUILD_BUG_ON(VIRTIO_PCI_CAP_CFG_TYPE
!=
503 offsetof(struct virtio_pci_cap
, cfg_type
));
504 BUILD_BUG_ON(VIRTIO_PCI_CAP_BAR
!=
505 offsetof(struct virtio_pci_cap
, bar
));
506 BUILD_BUG_ON(VIRTIO_PCI_CAP_OFFSET
!=
507 offsetof(struct virtio_pci_cap
, offset
));
508 BUILD_BUG_ON(VIRTIO_PCI_CAP_LENGTH
!=
509 offsetof(struct virtio_pci_cap
, length
));
510 BUILD_BUG_ON(VIRTIO_PCI_NOTIFY_CAP_MULT
!=
511 offsetof(struct virtio_pci_notify_cap
,
512 notify_off_multiplier
));
513 BUILD_BUG_ON(VIRTIO_PCI_COMMON_DFSELECT
!=
514 offsetof(struct virtio_pci_common_cfg
,
515 device_feature_select
));
516 BUILD_BUG_ON(VIRTIO_PCI_COMMON_DF
!=
517 offsetof(struct virtio_pci_common_cfg
, device_feature
));
518 BUILD_BUG_ON(VIRTIO_PCI_COMMON_GFSELECT
!=
519 offsetof(struct virtio_pci_common_cfg
,
520 guest_feature_select
));
521 BUILD_BUG_ON(VIRTIO_PCI_COMMON_GF
!=
522 offsetof(struct virtio_pci_common_cfg
, guest_feature
));
523 BUILD_BUG_ON(VIRTIO_PCI_COMMON_MSIX
!=
524 offsetof(struct virtio_pci_common_cfg
, msix_config
));
525 BUILD_BUG_ON(VIRTIO_PCI_COMMON_NUMQ
!=
526 offsetof(struct virtio_pci_common_cfg
, num_queues
));
527 BUILD_BUG_ON(VIRTIO_PCI_COMMON_STATUS
!=
528 offsetof(struct virtio_pci_common_cfg
, device_status
));
529 BUILD_BUG_ON(VIRTIO_PCI_COMMON_CFGGENERATION
!=
530 offsetof(struct virtio_pci_common_cfg
, config_generation
));
531 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_SELECT
!=
532 offsetof(struct virtio_pci_common_cfg
, queue_select
));
533 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_SIZE
!=
534 offsetof(struct virtio_pci_common_cfg
, queue_size
));
535 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_MSIX
!=
536 offsetof(struct virtio_pci_common_cfg
, queue_msix_vector
));
537 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_ENABLE
!=
538 offsetof(struct virtio_pci_common_cfg
, queue_enable
));
539 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_NOFF
!=
540 offsetof(struct virtio_pci_common_cfg
, queue_notify_off
));
541 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_DESCLO
!=
542 offsetof(struct virtio_pci_common_cfg
, queue_desc_lo
));
543 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_DESCHI
!=
544 offsetof(struct virtio_pci_common_cfg
, queue_desc_hi
));
545 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_AVAILLO
!=
546 offsetof(struct virtio_pci_common_cfg
, queue_avail_lo
));
547 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_AVAILHI
!=
548 offsetof(struct virtio_pci_common_cfg
, queue_avail_hi
));
549 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_USEDLO
!=
550 offsetof(struct virtio_pci_common_cfg
, queue_used_lo
));
551 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_USEDHI
!=
552 offsetof(struct virtio_pci_common_cfg
, queue_used_hi
));
555 /* the PCI probing function */
556 int virtio_pci_modern_probe(struct virtio_pci_device
*vp_dev
)
558 struct pci_dev
*pci_dev
= vp_dev
->pci_dev
;
559 int err
, common
, isr
, notify
, device
;
565 /* We only own devices >= 0x1000 and <= 0x107f: leave the rest. */
566 if (pci_dev
->device
< 0x1000 || pci_dev
->device
> 0x107f)
569 if (pci_dev
->device
< 0x1040) {
570 /* Transitional devices: use the PCI subsystem device id as
571 * virtio device id, same as legacy driver always did.
573 vp_dev
->vdev
.id
.device
= pci_dev
->subsystem_device
;
575 /* Modern devices: simply use PCI device id, but start from 0x1040. */
576 vp_dev
->vdev
.id
.device
= pci_dev
->device
- 0x1040;
578 vp_dev
->vdev
.id
.vendor
= pci_dev
->subsystem_vendor
;
580 if (virtio_device_is_legacy_only(vp_dev
->vdev
.id
))
583 /* check for a common config: if not, use legacy mode (bar 0). */
584 common
= virtio_pci_find_capability(pci_dev
, VIRTIO_PCI_CAP_COMMON_CFG
,
585 IORESOURCE_IO
| IORESOURCE_MEM
);
587 dev_info(&pci_dev
->dev
,
588 "virtio_pci: leaving for legacy driver\n");
592 /* If common is there, these should be too... */
593 isr
= virtio_pci_find_capability(pci_dev
, VIRTIO_PCI_CAP_ISR_CFG
,
594 IORESOURCE_IO
| IORESOURCE_MEM
);
595 notify
= virtio_pci_find_capability(pci_dev
, VIRTIO_PCI_CAP_NOTIFY_CFG
,
596 IORESOURCE_IO
| IORESOURCE_MEM
);
597 if (!isr
|| !notify
) {
598 dev_err(&pci_dev
->dev
,
599 "virtio_pci: missing capabilities %i/%i/%i\n",
600 common
, isr
, notify
);
604 /* Device capability is only mandatory for devices that have
605 * device-specific configuration.
607 device
= virtio_pci_find_capability(pci_dev
, VIRTIO_PCI_CAP_DEVICE_CFG
,
608 IORESOURCE_IO
| IORESOURCE_MEM
);
611 vp_dev
->common
= map_capability(pci_dev
, common
,
612 sizeof(struct virtio_pci_common_cfg
), 4,
613 0, sizeof(struct virtio_pci_common_cfg
),
617 vp_dev
->isr
= map_capability(pci_dev
, isr
, sizeof(u8
), 1,
623 /* Read notify_off_multiplier from config space. */
624 pci_read_config_dword(pci_dev
,
625 notify
+ offsetof(struct virtio_pci_notify_cap
,
626 notify_off_multiplier
),
627 &vp_dev
->notify_offset_multiplier
);
628 /* Read notify length and offset from config space. */
629 pci_read_config_dword(pci_dev
,
630 notify
+ offsetof(struct virtio_pci_notify_cap
,
634 pci_read_config_dword(pci_dev
,
635 notify
+ offsetof(struct virtio_pci_notify_cap
,
639 /* We don't know how many VQs we'll map, ahead of the time.
640 * If notify length is small, map it all now.
641 * Otherwise, map each VQ individually later.
643 if ((u64
)notify_length
+ (notify_offset
% PAGE_SIZE
) <= PAGE_SIZE
) {
644 vp_dev
->notify_base
= map_capability(pci_dev
, notify
, 2, 2,
646 &vp_dev
->notify_len
);
647 if (!vp_dev
->notify_base
)
650 vp_dev
->notify_map_cap
= notify
;
653 /* Again, we don't know how much we should map, but PAGE_SIZE
654 * is more than enough for all existing devices.
657 vp_dev
->device
= map_capability(pci_dev
, device
, 0, 4,
659 &vp_dev
->device_len
);
663 vp_dev
->vdev
.config
= &virtio_pci_config_ops
;
665 vp_dev
->vdev
.config
= &virtio_pci_config_nodev_ops
;
668 vp_dev
->config_vector
= vp_config_vector
;
669 vp_dev
->setup_vq
= setup_vq
;
670 vp_dev
->del_vq
= del_vq
;
675 if (vp_dev
->notify_base
)
676 pci_iounmap(pci_dev
, vp_dev
->notify_base
);
678 pci_iounmap(pci_dev
, vp_dev
->isr
);
680 pci_iounmap(pci_dev
, vp_dev
->common
);
685 void virtio_pci_modern_remove(struct virtio_pci_device
*vp_dev
)
687 struct pci_dev
*pci_dev
= vp_dev
->pci_dev
;
690 pci_iounmap(pci_dev
, vp_dev
->device
);
691 if (vp_dev
->notify_base
)
692 pci_iounmap(pci_dev
, vp_dev
->notify_base
);
693 pci_iounmap(pci_dev
, vp_dev
->isr
);
694 pci_iounmap(pci_dev
, vp_dev
->common
);