4 * Copyright (c) 2020 Red Hat, Inc.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2 or later, as published by the Free Software Foundation.
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * You should have received a copy of the GNU General Public License along with
16 * this program. If not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
23 #include "qemu-common.h"
24 #include "hw/qdev-properties.h"
25 #include "hw/virtio/virtio.h"
26 #include "sysemu/kvm.h"
27 #include "sysemu/reset.h"
28 #include "qapi/error.h"
29 #include "qemu/error-report.h"
32 #include "standard-headers/linux/virtio_ids.h"
34 #include "hw/virtio/virtio-bus.h"
35 #include "hw/virtio/virtio-access.h"
36 #include "hw/virtio/virtio-iommu.h"
37 #include "hw/pci/pci_bus.h"
38 #include "hw/pci/pci.h"
41 #define VIOMMU_DEFAULT_QUEUE_SIZE 256
42 #define VIOMMU_PROBE_SIZE 512
44 typedef struct VirtIOIOMMUDomain
{
48 QLIST_HEAD(, VirtIOIOMMUEndpoint
) endpoint_list
;
51 typedef struct VirtIOIOMMUEndpoint
{
53 VirtIOIOMMUDomain
*domain
;
54 IOMMUMemoryRegion
*iommu_mr
;
55 QLIST_ENTRY(VirtIOIOMMUEndpoint
) next
;
56 } VirtIOIOMMUEndpoint
;
58 typedef struct VirtIOIOMMUInterval
{
61 } VirtIOIOMMUInterval
;
63 typedef struct VirtIOIOMMUMapping
{
68 static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice
*dev
)
70 return PCI_BUILD_BDF(pci_bus_num(dev
->bus
), dev
->devfn
);
74 * The bus number is used for lookup when SID based operations occur.
75 * In that case we lazily populate the IOMMUPciBus array from the bus hash
76 * table. At the time the IOMMUPciBus is created (iommu_find_add_as), the bus
77 * numbers may not be always initialized yet.
79 static IOMMUPciBus
*iommu_find_iommu_pcibus(VirtIOIOMMU
*s
, uint8_t bus_num
)
81 IOMMUPciBus
*iommu_pci_bus
= s
->iommu_pcibus_by_bus_num
[bus_num
];
86 g_hash_table_iter_init(&iter
, s
->as_by_busptr
);
87 while (g_hash_table_iter_next(&iter
, NULL
, (void **)&iommu_pci_bus
)) {
88 if (pci_bus_num(iommu_pci_bus
->bus
) == bus_num
) {
89 s
->iommu_pcibus_by_bus_num
[bus_num
] = iommu_pci_bus
;
98 static IOMMUMemoryRegion
*virtio_iommu_mr(VirtIOIOMMU
*s
, uint32_t sid
)
100 uint8_t bus_n
, devfn
;
101 IOMMUPciBus
*iommu_pci_bus
;
104 bus_n
= PCI_BUS_NUM(sid
);
105 iommu_pci_bus
= iommu_find_iommu_pcibus(s
, bus_n
);
107 devfn
= sid
& (PCI_DEVFN_MAX
- 1);
108 dev
= iommu_pci_bus
->pbdev
[devfn
];
110 return &dev
->iommu_mr
;
116 static gint
interval_cmp(gconstpointer a
, gconstpointer b
, gpointer user_data
)
118 VirtIOIOMMUInterval
*inta
= (VirtIOIOMMUInterval
*)a
;
119 VirtIOIOMMUInterval
*intb
= (VirtIOIOMMUInterval
*)b
;
121 if (inta
->high
< intb
->low
) {
123 } else if (intb
->high
< inta
->low
) {
130 static void virtio_iommu_notify_map(IOMMUMemoryRegion
*mr
, hwaddr virt_start
,
131 hwaddr virt_end
, hwaddr paddr
,
135 IOMMUAccessFlags perm
= IOMMU_ACCESS_FLAG(flags
& VIRTIO_IOMMU_MAP_F_READ
,
136 flags
& VIRTIO_IOMMU_MAP_F_WRITE
);
138 if (!(mr
->iommu_notify_flags
& IOMMU_NOTIFIER_MAP
) ||
139 (flags
& VIRTIO_IOMMU_MAP_F_MMIO
) || !perm
) {
143 trace_virtio_iommu_notify_map(mr
->parent_obj
.name
, virt_start
, virt_end
,
146 event
.type
= IOMMU_NOTIFIER_MAP
;
147 event
.entry
.target_as
= &address_space_memory
;
148 event
.entry
.addr_mask
= virt_end
- virt_start
;
149 event
.entry
.iova
= virt_start
;
150 event
.entry
.perm
= perm
;
151 event
.entry
.translated_addr
= paddr
;
153 memory_region_notify_iommu(mr
, 0, event
);
156 static void virtio_iommu_notify_unmap(IOMMUMemoryRegion
*mr
, hwaddr virt_start
,
160 uint64_t delta
= virt_end
- virt_start
;
162 if (!(mr
->iommu_notify_flags
& IOMMU_NOTIFIER_UNMAP
)) {
166 trace_virtio_iommu_notify_unmap(mr
->parent_obj
.name
, virt_start
, virt_end
);
168 event
.type
= IOMMU_NOTIFIER_UNMAP
;
169 event
.entry
.target_as
= &address_space_memory
;
170 event
.entry
.perm
= IOMMU_NONE
;
171 event
.entry
.translated_addr
= 0;
172 event
.entry
.addr_mask
= delta
;
173 event
.entry
.iova
= virt_start
;
175 if (delta
== UINT64_MAX
) {
176 memory_region_notify_iommu(mr
, 0, event
);
180 while (virt_start
!= virt_end
+ 1) {
181 uint64_t mask
= dma_aligned_pow2_mask(virt_start
, virt_end
, 64);
183 event
.entry
.addr_mask
= mask
;
184 event
.entry
.iova
= virt_start
;
185 memory_region_notify_iommu(mr
, 0, event
);
186 virt_start
+= mask
+ 1;
190 static gboolean
virtio_iommu_notify_unmap_cb(gpointer key
, gpointer value
,
193 VirtIOIOMMUInterval
*interval
= (VirtIOIOMMUInterval
*) key
;
194 IOMMUMemoryRegion
*mr
= (IOMMUMemoryRegion
*) data
;
196 virtio_iommu_notify_unmap(mr
, interval
->low
, interval
->high
);
201 static gboolean
virtio_iommu_notify_map_cb(gpointer key
, gpointer value
,
204 VirtIOIOMMUMapping
*mapping
= (VirtIOIOMMUMapping
*) value
;
205 VirtIOIOMMUInterval
*interval
= (VirtIOIOMMUInterval
*) key
;
206 IOMMUMemoryRegion
*mr
= (IOMMUMemoryRegion
*) data
;
208 virtio_iommu_notify_map(mr
, interval
->low
, interval
->high
,
209 mapping
->phys_addr
, mapping
->flags
);
214 static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint
*ep
)
216 VirtIOIOMMUDomain
*domain
= ep
->domain
;
221 g_tree_foreach(domain
->mappings
, virtio_iommu_notify_unmap_cb
,
223 QLIST_REMOVE(ep
, next
);
227 static VirtIOIOMMUEndpoint
*virtio_iommu_get_endpoint(VirtIOIOMMU
*s
,
230 VirtIOIOMMUEndpoint
*ep
;
231 IOMMUMemoryRegion
*mr
;
233 ep
= g_tree_lookup(s
->endpoints
, GUINT_TO_POINTER(ep_id
));
237 mr
= virtio_iommu_mr(s
, ep_id
);
241 ep
= g_malloc0(sizeof(*ep
));
244 trace_virtio_iommu_get_endpoint(ep_id
);
245 g_tree_insert(s
->endpoints
, GUINT_TO_POINTER(ep_id
), ep
);
249 static void virtio_iommu_put_endpoint(gpointer data
)
251 VirtIOIOMMUEndpoint
*ep
= (VirtIOIOMMUEndpoint
*)data
;
254 virtio_iommu_detach_endpoint_from_domain(ep
);
257 trace_virtio_iommu_put_endpoint(ep
->id
);
261 static VirtIOIOMMUDomain
*virtio_iommu_get_domain(VirtIOIOMMU
*s
,
265 VirtIOIOMMUDomain
*domain
;
267 domain
= g_tree_lookup(s
->domains
, GUINT_TO_POINTER(domain_id
));
269 if (domain
->bypass
!= bypass
) {
274 domain
= g_malloc0(sizeof(*domain
));
275 domain
->id
= domain_id
;
276 domain
->mappings
= g_tree_new_full((GCompareDataFunc
)interval_cmp
,
277 NULL
, (GDestroyNotify
)g_free
,
278 (GDestroyNotify
)g_free
);
279 domain
->bypass
= bypass
;
280 g_tree_insert(s
->domains
, GUINT_TO_POINTER(domain_id
), domain
);
281 QLIST_INIT(&domain
->endpoint_list
);
282 trace_virtio_iommu_get_domain(domain_id
);
286 static void virtio_iommu_put_domain(gpointer data
)
288 VirtIOIOMMUDomain
*domain
= (VirtIOIOMMUDomain
*)data
;
289 VirtIOIOMMUEndpoint
*iter
, *tmp
;
291 QLIST_FOREACH_SAFE(iter
, &domain
->endpoint_list
, next
, tmp
) {
292 virtio_iommu_detach_endpoint_from_domain(iter
);
294 g_tree_destroy(domain
->mappings
);
295 trace_virtio_iommu_put_domain(domain
->id
);
299 static AddressSpace
*virtio_iommu_find_add_as(PCIBus
*bus
, void *opaque
,
302 VirtIOIOMMU
*s
= opaque
;
303 IOMMUPciBus
*sbus
= g_hash_table_lookup(s
->as_by_busptr
, bus
);
304 static uint32_t mr_index
;
308 sbus
= g_malloc0(sizeof(IOMMUPciBus
) +
309 sizeof(IOMMUDevice
*) * PCI_DEVFN_MAX
);
311 g_hash_table_insert(s
->as_by_busptr
, bus
, sbus
);
314 sdev
= sbus
->pbdev
[devfn
];
316 char *name
= g_strdup_printf("%s-%d-%d",
317 TYPE_VIRTIO_IOMMU_MEMORY_REGION
,
319 sdev
= sbus
->pbdev
[devfn
] = g_new0(IOMMUDevice
, 1);
325 trace_virtio_iommu_init_iommu_mr(name
);
327 memory_region_init_iommu(&sdev
->iommu_mr
, sizeof(sdev
->iommu_mr
),
328 TYPE_VIRTIO_IOMMU_MEMORY_REGION
,
331 address_space_init(&sdev
->as
,
332 MEMORY_REGION(&sdev
->iommu_mr
), TYPE_VIRTIO_IOMMU
);
338 static int virtio_iommu_attach(VirtIOIOMMU
*s
,
339 struct virtio_iommu_req_attach
*req
)
341 uint32_t domain_id
= le32_to_cpu(req
->domain
);
342 uint32_t ep_id
= le32_to_cpu(req
->endpoint
);
343 uint32_t flags
= le32_to_cpu(req
->flags
);
344 VirtIOIOMMUDomain
*domain
;
345 VirtIOIOMMUEndpoint
*ep
;
347 trace_virtio_iommu_attach(domain_id
, ep_id
);
349 if (flags
& ~VIRTIO_IOMMU_ATTACH_F_BYPASS
) {
350 return VIRTIO_IOMMU_S_INVAL
;
353 ep
= virtio_iommu_get_endpoint(s
, ep_id
);
355 return VIRTIO_IOMMU_S_NOENT
;
359 VirtIOIOMMUDomain
*previous_domain
= ep
->domain
;
361 * the device is already attached to a domain,
364 virtio_iommu_detach_endpoint_from_domain(ep
);
365 if (QLIST_EMPTY(&previous_domain
->endpoint_list
)) {
366 g_tree_remove(s
->domains
, GUINT_TO_POINTER(previous_domain
->id
));
370 domain
= virtio_iommu_get_domain(s
, domain_id
,
371 flags
& VIRTIO_IOMMU_ATTACH_F_BYPASS
);
373 /* Incompatible bypass flag */
374 return VIRTIO_IOMMU_S_INVAL
;
376 QLIST_INSERT_HEAD(&domain
->endpoint_list
, ep
, next
);
380 /* Replay domain mappings on the associated memory region */
381 g_tree_foreach(domain
->mappings
, virtio_iommu_notify_map_cb
,
384 return VIRTIO_IOMMU_S_OK
;
387 static int virtio_iommu_detach(VirtIOIOMMU
*s
,
388 struct virtio_iommu_req_detach
*req
)
390 uint32_t domain_id
= le32_to_cpu(req
->domain
);
391 uint32_t ep_id
= le32_to_cpu(req
->endpoint
);
392 VirtIOIOMMUDomain
*domain
;
393 VirtIOIOMMUEndpoint
*ep
;
395 trace_virtio_iommu_detach(domain_id
, ep_id
);
397 ep
= g_tree_lookup(s
->endpoints
, GUINT_TO_POINTER(ep_id
));
399 return VIRTIO_IOMMU_S_NOENT
;
404 if (!domain
|| domain
->id
!= domain_id
) {
405 return VIRTIO_IOMMU_S_INVAL
;
408 virtio_iommu_detach_endpoint_from_domain(ep
);
410 if (QLIST_EMPTY(&domain
->endpoint_list
)) {
411 g_tree_remove(s
->domains
, GUINT_TO_POINTER(domain
->id
));
413 return VIRTIO_IOMMU_S_OK
;
416 static int virtio_iommu_map(VirtIOIOMMU
*s
,
417 struct virtio_iommu_req_map
*req
)
419 uint32_t domain_id
= le32_to_cpu(req
->domain
);
420 uint64_t phys_start
= le64_to_cpu(req
->phys_start
);
421 uint64_t virt_start
= le64_to_cpu(req
->virt_start
);
422 uint64_t virt_end
= le64_to_cpu(req
->virt_end
);
423 uint32_t flags
= le32_to_cpu(req
->flags
);
424 VirtIOIOMMUDomain
*domain
;
425 VirtIOIOMMUInterval
*interval
;
426 VirtIOIOMMUMapping
*mapping
;
427 VirtIOIOMMUEndpoint
*ep
;
429 if (flags
& ~VIRTIO_IOMMU_MAP_F_MASK
) {
430 return VIRTIO_IOMMU_S_INVAL
;
433 domain
= g_tree_lookup(s
->domains
, GUINT_TO_POINTER(domain_id
));
435 return VIRTIO_IOMMU_S_NOENT
;
438 if (domain
->bypass
) {
439 return VIRTIO_IOMMU_S_INVAL
;
442 interval
= g_malloc0(sizeof(*interval
));
444 interval
->low
= virt_start
;
445 interval
->high
= virt_end
;
447 mapping
= g_tree_lookup(domain
->mappings
, (gpointer
)interval
);
450 return VIRTIO_IOMMU_S_INVAL
;
453 trace_virtio_iommu_map(domain_id
, virt_start
, virt_end
, phys_start
, flags
);
455 mapping
= g_malloc0(sizeof(*mapping
));
456 mapping
->phys_addr
= phys_start
;
457 mapping
->flags
= flags
;
459 g_tree_insert(domain
->mappings
, interval
, mapping
);
461 QLIST_FOREACH(ep
, &domain
->endpoint_list
, next
) {
462 virtio_iommu_notify_map(ep
->iommu_mr
, virt_start
, virt_end
, phys_start
,
466 return VIRTIO_IOMMU_S_OK
;
469 static int virtio_iommu_unmap(VirtIOIOMMU
*s
,
470 struct virtio_iommu_req_unmap
*req
)
472 uint32_t domain_id
= le32_to_cpu(req
->domain
);
473 uint64_t virt_start
= le64_to_cpu(req
->virt_start
);
474 uint64_t virt_end
= le64_to_cpu(req
->virt_end
);
475 VirtIOIOMMUMapping
*iter_val
;
476 VirtIOIOMMUInterval interval
, *iter_key
;
477 VirtIOIOMMUDomain
*domain
;
478 VirtIOIOMMUEndpoint
*ep
;
479 int ret
= VIRTIO_IOMMU_S_OK
;
481 trace_virtio_iommu_unmap(domain_id
, virt_start
, virt_end
);
483 domain
= g_tree_lookup(s
->domains
, GUINT_TO_POINTER(domain_id
));
485 return VIRTIO_IOMMU_S_NOENT
;
488 if (domain
->bypass
) {
489 return VIRTIO_IOMMU_S_INVAL
;
492 interval
.low
= virt_start
;
493 interval
.high
= virt_end
;
495 while (g_tree_lookup_extended(domain
->mappings
, &interval
,
496 (void **)&iter_key
, (void**)&iter_val
)) {
497 uint64_t current_low
= iter_key
->low
;
498 uint64_t current_high
= iter_key
->high
;
500 if (interval
.low
<= current_low
&& interval
.high
>= current_high
) {
501 QLIST_FOREACH(ep
, &domain
->endpoint_list
, next
) {
502 virtio_iommu_notify_unmap(ep
->iommu_mr
, current_low
,
505 g_tree_remove(domain
->mappings
, iter_key
);
506 trace_virtio_iommu_unmap_done(domain_id
, current_low
, current_high
);
508 ret
= VIRTIO_IOMMU_S_RANGE
;
515 static ssize_t
virtio_iommu_fill_resv_mem_prop(VirtIOIOMMU
*s
, uint32_t ep
,
516 uint8_t *buf
, size_t free
)
518 struct virtio_iommu_probe_resv_mem prop
= {};
519 size_t size
= sizeof(prop
), length
= size
- sizeof(prop
.head
), total
;
522 total
= size
* s
->nb_reserved_regions
;
528 for (i
= 0; i
< s
->nb_reserved_regions
; i
++) {
529 unsigned subtype
= s
->reserved_regions
[i
].type
;
531 assert(subtype
== VIRTIO_IOMMU_RESV_MEM_T_RESERVED
||
532 subtype
== VIRTIO_IOMMU_RESV_MEM_T_MSI
);
533 prop
.head
.type
= cpu_to_le16(VIRTIO_IOMMU_PROBE_T_RESV_MEM
);
534 prop
.head
.length
= cpu_to_le16(length
);
535 prop
.subtype
= subtype
;
536 prop
.start
= cpu_to_le64(s
->reserved_regions
[i
].low
);
537 prop
.end
= cpu_to_le64(s
->reserved_regions
[i
].high
);
539 memcpy(buf
, &prop
, size
);
541 trace_virtio_iommu_fill_resv_property(ep
, prop
.subtype
,
542 prop
.start
, prop
.end
);
549 * virtio_iommu_probe - Fill the probe request buffer with
550 * the properties the device is able to return
552 static int virtio_iommu_probe(VirtIOIOMMU
*s
,
553 struct virtio_iommu_req_probe
*req
,
556 uint32_t ep_id
= le32_to_cpu(req
->endpoint
);
557 size_t free
= VIOMMU_PROBE_SIZE
;
560 if (!virtio_iommu_mr(s
, ep_id
)) {
561 return VIRTIO_IOMMU_S_NOENT
;
564 count
= virtio_iommu_fill_resv_mem_prop(s
, ep_id
, buf
, free
);
566 return VIRTIO_IOMMU_S_INVAL
;
571 return VIRTIO_IOMMU_S_OK
;
574 static int virtio_iommu_iov_to_req(struct iovec
*iov
,
575 unsigned int iov_cnt
,
576 void *req
, size_t req_sz
)
578 size_t sz
, payload_sz
= req_sz
- sizeof(struct virtio_iommu_req_tail
);
580 sz
= iov_to_buf(iov
, iov_cnt
, 0, req
, payload_sz
);
581 if (unlikely(sz
!= payload_sz
)) {
582 return VIRTIO_IOMMU_S_INVAL
;
587 #define virtio_iommu_handle_req(__req) \
588 static int virtio_iommu_handle_ ## __req(VirtIOIOMMU *s, \
590 unsigned int iov_cnt) \
592 struct virtio_iommu_req_ ## __req req; \
593 int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, sizeof(req)); \
595 return ret ? ret : virtio_iommu_ ## __req(s, &req); \
598 virtio_iommu_handle_req(attach
)
599 virtio_iommu_handle_req(detach
)
600 virtio_iommu_handle_req(map
)
601 virtio_iommu_handle_req(unmap
)
603 static int virtio_iommu_handle_probe(VirtIOIOMMU
*s
,
605 unsigned int iov_cnt
,
608 struct virtio_iommu_req_probe req
;
609 int ret
= virtio_iommu_iov_to_req(iov
, iov_cnt
, &req
, sizeof(req
));
611 return ret
? ret
: virtio_iommu_probe(s
, &req
, buf
);
614 static void virtio_iommu_handle_command(VirtIODevice
*vdev
, VirtQueue
*vq
)
616 VirtIOIOMMU
*s
= VIRTIO_IOMMU(vdev
);
617 struct virtio_iommu_req_head head
;
618 struct virtio_iommu_req_tail tail
= {};
619 size_t output_size
= sizeof(tail
), sz
;
620 VirtQueueElement
*elem
;
621 unsigned int iov_cnt
;
626 elem
= virtqueue_pop(vq
, sizeof(VirtQueueElement
));
631 if (iov_size(elem
->in_sg
, elem
->in_num
) < sizeof(tail
) ||
632 iov_size(elem
->out_sg
, elem
->out_num
) < sizeof(head
)) {
633 virtio_error(vdev
, "virtio-iommu bad head/tail size");
634 virtqueue_detach_element(vq
, elem
, 0);
639 iov_cnt
= elem
->out_num
;
641 sz
= iov_to_buf(iov
, iov_cnt
, 0, &head
, sizeof(head
));
642 if (unlikely(sz
!= sizeof(head
))) {
643 tail
.status
= VIRTIO_IOMMU_S_DEVERR
;
646 qemu_mutex_lock(&s
->mutex
);
648 case VIRTIO_IOMMU_T_ATTACH
:
649 tail
.status
= virtio_iommu_handle_attach(s
, iov
, iov_cnt
);
651 case VIRTIO_IOMMU_T_DETACH
:
652 tail
.status
= virtio_iommu_handle_detach(s
, iov
, iov_cnt
);
654 case VIRTIO_IOMMU_T_MAP
:
655 tail
.status
= virtio_iommu_handle_map(s
, iov
, iov_cnt
);
657 case VIRTIO_IOMMU_T_UNMAP
:
658 tail
.status
= virtio_iommu_handle_unmap(s
, iov
, iov_cnt
);
660 case VIRTIO_IOMMU_T_PROBE
:
662 struct virtio_iommu_req_tail
*ptail
;
664 output_size
= s
->config
.probe_size
+ sizeof(tail
);
665 buf
= g_malloc0(output_size
);
667 ptail
= (struct virtio_iommu_req_tail
*)
668 (buf
+ s
->config
.probe_size
);
669 ptail
->status
= virtio_iommu_handle_probe(s
, iov
, iov_cnt
, buf
);
673 tail
.status
= VIRTIO_IOMMU_S_UNSUPP
;
675 qemu_mutex_unlock(&s
->mutex
);
678 sz
= iov_from_buf(elem
->in_sg
, elem
->in_num
, 0,
679 buf
? buf
: &tail
, output_size
);
680 assert(sz
== output_size
);
682 virtqueue_push(vq
, elem
, sz
);
683 virtio_notify(vdev
, vq
);
689 static void virtio_iommu_report_fault(VirtIOIOMMU
*viommu
, uint8_t reason
,
690 int flags
, uint32_t endpoint
,
693 VirtIODevice
*vdev
= &viommu
->parent_obj
;
694 VirtQueue
*vq
= viommu
->event_vq
;
695 struct virtio_iommu_fault fault
;
696 VirtQueueElement
*elem
;
699 memset(&fault
, 0, sizeof(fault
));
700 fault
.reason
= reason
;
701 fault
.flags
= cpu_to_le32(flags
);
702 fault
.endpoint
= cpu_to_le32(endpoint
);
703 fault
.address
= cpu_to_le64(address
);
705 elem
= virtqueue_pop(vq
, sizeof(VirtQueueElement
));
709 "no buffer available in event queue to report event");
713 if (iov_size(elem
->in_sg
, elem
->in_num
) < sizeof(fault
)) {
714 virtio_error(vdev
, "error buffer of wrong size");
715 virtqueue_detach_element(vq
, elem
, 0);
720 sz
= iov_from_buf(elem
->in_sg
, elem
->in_num
, 0,
721 &fault
, sizeof(fault
));
722 assert(sz
== sizeof(fault
));
724 trace_virtio_iommu_report_fault(reason
, flags
, endpoint
, address
);
725 virtqueue_push(vq
, elem
, sz
);
726 virtio_notify(vdev
, vq
);
731 static IOMMUTLBEntry
virtio_iommu_translate(IOMMUMemoryRegion
*mr
, hwaddr addr
,
732 IOMMUAccessFlags flag
,
735 IOMMUDevice
*sdev
= container_of(mr
, IOMMUDevice
, iommu_mr
);
736 VirtIOIOMMUInterval interval
, *mapping_key
;
737 VirtIOIOMMUMapping
*mapping_value
;
738 VirtIOIOMMU
*s
= sdev
->viommu
;
739 bool read_fault
, write_fault
;
740 VirtIOIOMMUEndpoint
*ep
;
747 interval
.high
= addr
+ 1;
749 IOMMUTLBEntry entry
= {
750 .target_as
= &address_space_memory
,
752 .translated_addr
= addr
,
753 .addr_mask
= (1 << ctz32(s
->config
.page_size_mask
)) - 1,
757 bypass_allowed
= s
->config
.bypass
;
759 sid
= virtio_iommu_get_bdf(sdev
);
761 trace_virtio_iommu_translate(mr
->parent_obj
.name
, sid
, addr
, flag
);
762 qemu_mutex_lock(&s
->mutex
);
764 ep
= g_tree_lookup(s
->endpoints
, GUINT_TO_POINTER(sid
));
766 if (!bypass_allowed
) {
767 error_report_once("%s sid=%d is not known!!", __func__
, sid
);
768 virtio_iommu_report_fault(s
, VIRTIO_IOMMU_FAULT_R_UNKNOWN
,
769 VIRTIO_IOMMU_FAULT_F_ADDRESS
,
777 for (i
= 0; i
< s
->nb_reserved_regions
; i
++) {
778 ReservedRegion
*reg
= &s
->reserved_regions
[i
];
780 if (addr
>= reg
->low
&& addr
<= reg
->high
) {
782 case VIRTIO_IOMMU_RESV_MEM_T_MSI
:
785 case VIRTIO_IOMMU_RESV_MEM_T_RESERVED
:
787 virtio_iommu_report_fault(s
, VIRTIO_IOMMU_FAULT_R_MAPPING
,
788 VIRTIO_IOMMU_FAULT_F_ADDRESS
,
797 if (!bypass_allowed
) {
798 error_report_once("%s %02x:%02x.%01x not attached to any domain",
799 __func__
, PCI_BUS_NUM(sid
),
800 PCI_SLOT(sid
), PCI_FUNC(sid
));
801 virtio_iommu_report_fault(s
, VIRTIO_IOMMU_FAULT_R_DOMAIN
,
802 VIRTIO_IOMMU_FAULT_F_ADDRESS
,
808 } else if (ep
->domain
->bypass
) {
813 found
= g_tree_lookup_extended(ep
->domain
->mappings
, (gpointer
)(&interval
),
814 (void **)&mapping_key
,
815 (void **)&mapping_value
);
817 error_report_once("%s no mapping for 0x%"PRIx64
" for sid=%d",
818 __func__
, addr
, sid
);
819 virtio_iommu_report_fault(s
, VIRTIO_IOMMU_FAULT_R_MAPPING
,
820 VIRTIO_IOMMU_FAULT_F_ADDRESS
,
825 read_fault
= (flag
& IOMMU_RO
) &&
826 !(mapping_value
->flags
& VIRTIO_IOMMU_MAP_F_READ
);
827 write_fault
= (flag
& IOMMU_WO
) &&
828 !(mapping_value
->flags
& VIRTIO_IOMMU_MAP_F_WRITE
);
830 flags
= read_fault
? VIRTIO_IOMMU_FAULT_F_READ
: 0;
831 flags
|= write_fault
? VIRTIO_IOMMU_FAULT_F_WRITE
: 0;
833 error_report_once("%s permission error on 0x%"PRIx64
"(%d): allowed=%d",
834 __func__
, addr
, flag
, mapping_value
->flags
);
835 flags
|= VIRTIO_IOMMU_FAULT_F_ADDRESS
;
836 virtio_iommu_report_fault(s
, VIRTIO_IOMMU_FAULT_R_MAPPING
,
837 flags
| VIRTIO_IOMMU_FAULT_F_ADDRESS
,
841 entry
.translated_addr
= addr
- mapping_key
->low
+ mapping_value
->phys_addr
;
843 trace_virtio_iommu_translate_out(addr
, entry
.translated_addr
, sid
);
846 qemu_mutex_unlock(&s
->mutex
);
850 static void virtio_iommu_get_config(VirtIODevice
*vdev
, uint8_t *config_data
)
852 VirtIOIOMMU
*dev
= VIRTIO_IOMMU(vdev
);
853 struct virtio_iommu_config
*dev_config
= &dev
->config
;
854 struct virtio_iommu_config
*out_config
= (void *)config_data
;
856 out_config
->page_size_mask
= cpu_to_le64(dev_config
->page_size_mask
);
857 out_config
->input_range
.start
= cpu_to_le64(dev_config
->input_range
.start
);
858 out_config
->input_range
.end
= cpu_to_le64(dev_config
->input_range
.end
);
859 out_config
->domain_range
.start
= cpu_to_le32(dev_config
->domain_range
.start
);
860 out_config
->domain_range
.end
= cpu_to_le32(dev_config
->domain_range
.end
);
861 out_config
->probe_size
= cpu_to_le32(dev_config
->probe_size
);
862 out_config
->bypass
= dev_config
->bypass
;
864 trace_virtio_iommu_get_config(dev_config
->page_size_mask
,
865 dev_config
->input_range
.start
,
866 dev_config
->input_range
.end
,
867 dev_config
->domain_range
.start
,
868 dev_config
->domain_range
.end
,
869 dev_config
->probe_size
,
873 static void virtio_iommu_set_config(VirtIODevice
*vdev
,
874 const uint8_t *config_data
)
876 VirtIOIOMMU
*dev
= VIRTIO_IOMMU(vdev
);
877 struct virtio_iommu_config
*dev_config
= &dev
->config
;
878 const struct virtio_iommu_config
*in_config
= (void *)config_data
;
880 if (in_config
->bypass
!= dev_config
->bypass
) {
881 if (!virtio_vdev_has_feature(vdev
, VIRTIO_IOMMU_F_BYPASS_CONFIG
)) {
882 virtio_error(vdev
, "cannot set config.bypass");
884 } else if (in_config
->bypass
!= 0 && in_config
->bypass
!= 1) {
885 virtio_error(vdev
, "invalid config.bypass value '%u'",
889 dev_config
->bypass
= in_config
->bypass
;
892 trace_virtio_iommu_set_config(in_config
->bypass
);
895 static uint64_t virtio_iommu_get_features(VirtIODevice
*vdev
, uint64_t f
,
898 VirtIOIOMMU
*dev
= VIRTIO_IOMMU(vdev
);
901 trace_virtio_iommu_get_features(f
);
905 static gint
int_cmp(gconstpointer a
, gconstpointer b
, gpointer user_data
)
907 guint ua
= GPOINTER_TO_UINT(a
);
908 guint ub
= GPOINTER_TO_UINT(b
);
909 return (ua
> ub
) - (ua
< ub
);
912 static gboolean
virtio_iommu_remap(gpointer key
, gpointer value
, gpointer data
)
914 VirtIOIOMMUMapping
*mapping
= (VirtIOIOMMUMapping
*) value
;
915 VirtIOIOMMUInterval
*interval
= (VirtIOIOMMUInterval
*) key
;
916 IOMMUMemoryRegion
*mr
= (IOMMUMemoryRegion
*) data
;
918 trace_virtio_iommu_remap(mr
->parent_obj
.name
, interval
->low
, interval
->high
,
920 virtio_iommu_notify_map(mr
, interval
->low
, interval
->high
,
921 mapping
->phys_addr
, mapping
->flags
);
925 static void virtio_iommu_replay(IOMMUMemoryRegion
*mr
, IOMMUNotifier
*n
)
927 IOMMUDevice
*sdev
= container_of(mr
, IOMMUDevice
, iommu_mr
);
928 VirtIOIOMMU
*s
= sdev
->viommu
;
930 VirtIOIOMMUEndpoint
*ep
;
932 sid
= virtio_iommu_get_bdf(sdev
);
934 qemu_mutex_lock(&s
->mutex
);
940 ep
= g_tree_lookup(s
->endpoints
, GUINT_TO_POINTER(sid
));
941 if (!ep
|| !ep
->domain
) {
945 g_tree_foreach(ep
->domain
->mappings
, virtio_iommu_remap
, mr
);
948 qemu_mutex_unlock(&s
->mutex
);
951 static int virtio_iommu_notify_flag_changed(IOMMUMemoryRegion
*iommu_mr
,
952 IOMMUNotifierFlag old
,
953 IOMMUNotifierFlag
new,
956 if (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP
) {
957 error_setg(errp
, "Virtio-iommu does not support dev-iotlb yet");
961 if (old
== IOMMU_NOTIFIER_NONE
) {
962 trace_virtio_iommu_notify_flag_add(iommu_mr
->parent_obj
.name
);
963 } else if (new == IOMMU_NOTIFIER_NONE
) {
964 trace_virtio_iommu_notify_flag_del(iommu_mr
->parent_obj
.name
);
970 * The default mask (TARGET_PAGE_MASK) is the smallest supported guest granule,
971 * for example 0xfffffffffffff000. When an assigned device has page size
972 * restrictions due to the hardware IOMMU configuration, apply this restriction
975 static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion
*mr
,
979 IOMMUDevice
*sdev
= container_of(mr
, IOMMUDevice
, iommu_mr
);
980 VirtIOIOMMU
*s
= sdev
->viommu
;
981 uint64_t cur_mask
= s
->config
.page_size_mask
;
983 trace_virtio_iommu_set_page_size_mask(mr
->parent_obj
.name
, cur_mask
,
986 if ((cur_mask
& new_mask
) == 0) {
987 error_setg(errp
, "virtio-iommu page mask 0x%"PRIx64
988 " is incompatible with mask 0x%"PRIx64
, cur_mask
, new_mask
);
993 * After the machine is finalized, we can't change the mask anymore. If by
994 * chance the hotplugged device supports the same granule, we can still
995 * accept it. Having a different masks is possible but the guest will use
996 * sub-optimal block sizes, so warn about it.
998 if (phase_check(PHASE_MACHINE_READY
)) {
999 int new_granule
= ctz64(new_mask
);
1000 int cur_granule
= ctz64(cur_mask
);
1002 if (new_granule
!= cur_granule
) {
1003 error_setg(errp
, "virtio-iommu page mask 0x%"PRIx64
1004 " is incompatible with mask 0x%"PRIx64
, cur_mask
,
1007 } else if (new_mask
!= cur_mask
) {
1008 warn_report("virtio-iommu page mask 0x%"PRIx64
1009 " does not match 0x%"PRIx64
, cur_mask
, new_mask
);
1014 s
->config
.page_size_mask
&= new_mask
;
1018 static void virtio_iommu_system_reset(void *opaque
)
1020 VirtIOIOMMU
*s
= opaque
;
1022 trace_virtio_iommu_system_reset();
1025 * config.bypass is sticky across device reset, but should be restored on
1028 s
->config
.bypass
= s
->boot_bypass
;
1031 static void virtio_iommu_device_realize(DeviceState
*dev
, Error
**errp
)
1033 VirtIODevice
*vdev
= VIRTIO_DEVICE(dev
);
1034 VirtIOIOMMU
*s
= VIRTIO_IOMMU(dev
);
1036 virtio_init(vdev
, "virtio-iommu", VIRTIO_ID_IOMMU
,
1037 sizeof(struct virtio_iommu_config
));
1039 memset(s
->iommu_pcibus_by_bus_num
, 0, sizeof(s
->iommu_pcibus_by_bus_num
));
1041 s
->req_vq
= virtio_add_queue(vdev
, VIOMMU_DEFAULT_QUEUE_SIZE
,
1042 virtio_iommu_handle_command
);
1043 s
->event_vq
= virtio_add_queue(vdev
, VIOMMU_DEFAULT_QUEUE_SIZE
, NULL
);
1045 s
->config
.page_size_mask
= TARGET_PAGE_MASK
;
1046 s
->config
.input_range
.end
= UINT64_MAX
;
1047 s
->config
.domain_range
.end
= UINT32_MAX
;
1048 s
->config
.probe_size
= VIOMMU_PROBE_SIZE
;
1050 virtio_add_feature(&s
->features
, VIRTIO_RING_F_EVENT_IDX
);
1051 virtio_add_feature(&s
->features
, VIRTIO_RING_F_INDIRECT_DESC
);
1052 virtio_add_feature(&s
->features
, VIRTIO_F_VERSION_1
);
1053 virtio_add_feature(&s
->features
, VIRTIO_IOMMU_F_INPUT_RANGE
);
1054 virtio_add_feature(&s
->features
, VIRTIO_IOMMU_F_DOMAIN_RANGE
);
1055 virtio_add_feature(&s
->features
, VIRTIO_IOMMU_F_MAP_UNMAP
);
1056 virtio_add_feature(&s
->features
, VIRTIO_IOMMU_F_MMIO
);
1057 virtio_add_feature(&s
->features
, VIRTIO_IOMMU_F_PROBE
);
1058 virtio_add_feature(&s
->features
, VIRTIO_IOMMU_F_BYPASS_CONFIG
);
1060 qemu_mutex_init(&s
->mutex
);
1062 s
->as_by_busptr
= g_hash_table_new_full(NULL
, NULL
, NULL
, g_free
);
1064 if (s
->primary_bus
) {
1065 pci_setup_iommu(s
->primary_bus
, virtio_iommu_find_add_as
, s
);
1067 error_setg(errp
, "VIRTIO-IOMMU is not attached to any PCI bus!");
1070 qemu_register_reset(virtio_iommu_system_reset
, s
);
1073 static void virtio_iommu_device_unrealize(DeviceState
*dev
)
1075 VirtIODevice
*vdev
= VIRTIO_DEVICE(dev
);
1076 VirtIOIOMMU
*s
= VIRTIO_IOMMU(dev
);
1078 qemu_unregister_reset(virtio_iommu_system_reset
, s
);
1080 g_hash_table_destroy(s
->as_by_busptr
);
1082 g_tree_destroy(s
->domains
);
1085 g_tree_destroy(s
->endpoints
);
1088 virtio_delete_queue(s
->req_vq
);
1089 virtio_delete_queue(s
->event_vq
);
1090 virtio_cleanup(vdev
);
1093 static void virtio_iommu_device_reset(VirtIODevice
*vdev
)
1095 VirtIOIOMMU
*s
= VIRTIO_IOMMU(vdev
);
1097 trace_virtio_iommu_device_reset();
1100 g_tree_destroy(s
->domains
);
1103 g_tree_destroy(s
->endpoints
);
1105 s
->domains
= g_tree_new_full((GCompareDataFunc
)int_cmp
,
1106 NULL
, NULL
, virtio_iommu_put_domain
);
1107 s
->endpoints
= g_tree_new_full((GCompareDataFunc
)int_cmp
,
1108 NULL
, NULL
, virtio_iommu_put_endpoint
);
1111 static void virtio_iommu_set_status(VirtIODevice
*vdev
, uint8_t status
)
1113 trace_virtio_iommu_device_status(status
);
1116 static void virtio_iommu_instance_init(Object
*obj
)
1120 #define VMSTATE_INTERVAL \
1122 .name = "interval", \
1124 .minimum_version_id = 1, \
1125 .fields = (VMStateField[]) { \
1126 VMSTATE_UINT64(low, VirtIOIOMMUInterval), \
1127 VMSTATE_UINT64(high, VirtIOIOMMUInterval), \
1128 VMSTATE_END_OF_LIST() \
1132 #define VMSTATE_MAPPING \
1134 .name = "mapping", \
1136 .minimum_version_id = 1, \
1137 .fields = (VMStateField[]) { \
1138 VMSTATE_UINT64(phys_addr, VirtIOIOMMUMapping),\
1139 VMSTATE_UINT32(flags, VirtIOIOMMUMapping), \
1140 VMSTATE_END_OF_LIST() \
1144 static const VMStateDescription vmstate_interval_mapping
[2] = {
1145 VMSTATE_MAPPING
, /* value */
1146 VMSTATE_INTERVAL
/* key */
1149 static int domain_preload(void *opaque
)
1151 VirtIOIOMMUDomain
*domain
= opaque
;
1153 domain
->mappings
= g_tree_new_full((GCompareDataFunc
)interval_cmp
,
1154 NULL
, g_free
, g_free
);
1158 static const VMStateDescription vmstate_endpoint
= {
1161 .minimum_version_id
= 1,
1162 .fields
= (VMStateField
[]) {
1163 VMSTATE_UINT32(id
, VirtIOIOMMUEndpoint
),
1164 VMSTATE_END_OF_LIST()
1168 static const VMStateDescription vmstate_domain
= {
1171 .minimum_version_id
= 2,
1172 .pre_load
= domain_preload
,
1173 .fields
= (VMStateField
[]) {
1174 VMSTATE_UINT32(id
, VirtIOIOMMUDomain
),
1175 VMSTATE_GTREE_V(mappings
, VirtIOIOMMUDomain
, 1,
1176 vmstate_interval_mapping
,
1177 VirtIOIOMMUInterval
, VirtIOIOMMUMapping
),
1178 VMSTATE_QLIST_V(endpoint_list
, VirtIOIOMMUDomain
, 1,
1179 vmstate_endpoint
, VirtIOIOMMUEndpoint
, next
),
1180 VMSTATE_BOOL_V(bypass
, VirtIOIOMMUDomain
, 2),
1181 VMSTATE_END_OF_LIST()
1185 static gboolean
reconstruct_endpoints(gpointer key
, gpointer value
,
1188 VirtIOIOMMU
*s
= (VirtIOIOMMU
*)data
;
1189 VirtIOIOMMUDomain
*d
= (VirtIOIOMMUDomain
*)value
;
1190 VirtIOIOMMUEndpoint
*iter
;
1191 IOMMUMemoryRegion
*mr
;
1193 QLIST_FOREACH(iter
, &d
->endpoint_list
, next
) {
1194 mr
= virtio_iommu_mr(s
, iter
->id
);
1198 iter
->iommu_mr
= mr
;
1199 g_tree_insert(s
->endpoints
, GUINT_TO_POINTER(iter
->id
), iter
);
1201 return false; /* continue the domain traversal */
1204 static int iommu_post_load(void *opaque
, int version_id
)
1206 VirtIOIOMMU
*s
= opaque
;
1208 g_tree_foreach(s
->domains
, reconstruct_endpoints
, s
);
1212 static const VMStateDescription vmstate_virtio_iommu_device
= {
1213 .name
= "virtio-iommu-device",
1214 .minimum_version_id
= 2,
1216 .post_load
= iommu_post_load
,
1217 .fields
= (VMStateField
[]) {
1218 VMSTATE_GTREE_DIRECT_KEY_V(domains
, VirtIOIOMMU
, 2,
1219 &vmstate_domain
, VirtIOIOMMUDomain
),
1220 VMSTATE_UINT8_V(config
.bypass
, VirtIOIOMMU
, 2),
1221 VMSTATE_END_OF_LIST()
1225 static const VMStateDescription vmstate_virtio_iommu
= {
1226 .name
= "virtio-iommu",
1227 .minimum_version_id
= 2,
1228 .priority
= MIG_PRI_IOMMU
,
1230 .fields
= (VMStateField
[]) {
1231 VMSTATE_VIRTIO_DEVICE
,
1232 VMSTATE_END_OF_LIST()
1236 static Property virtio_iommu_properties
[] = {
1237 DEFINE_PROP_LINK("primary-bus", VirtIOIOMMU
, primary_bus
, "PCI", PCIBus
*),
1238 DEFINE_PROP_BOOL("boot-bypass", VirtIOIOMMU
, boot_bypass
, true),
1239 DEFINE_PROP_END_OF_LIST(),
1242 static void virtio_iommu_class_init(ObjectClass
*klass
, void *data
)
1244 DeviceClass
*dc
= DEVICE_CLASS(klass
);
1245 VirtioDeviceClass
*vdc
= VIRTIO_DEVICE_CLASS(klass
);
1247 device_class_set_props(dc
, virtio_iommu_properties
);
1248 dc
->vmsd
= &vmstate_virtio_iommu
;
1250 set_bit(DEVICE_CATEGORY_MISC
, dc
->categories
);
1251 vdc
->realize
= virtio_iommu_device_realize
;
1252 vdc
->unrealize
= virtio_iommu_device_unrealize
;
1253 vdc
->reset
= virtio_iommu_device_reset
;
1254 vdc
->get_config
= virtio_iommu_get_config
;
1255 vdc
->set_config
= virtio_iommu_set_config
;
1256 vdc
->get_features
= virtio_iommu_get_features
;
1257 vdc
->set_status
= virtio_iommu_set_status
;
1258 vdc
->vmsd
= &vmstate_virtio_iommu_device
;
1261 static void virtio_iommu_memory_region_class_init(ObjectClass
*klass
,
1264 IOMMUMemoryRegionClass
*imrc
= IOMMU_MEMORY_REGION_CLASS(klass
);
1266 imrc
->translate
= virtio_iommu_translate
;
1267 imrc
->replay
= virtio_iommu_replay
;
1268 imrc
->notify_flag_changed
= virtio_iommu_notify_flag_changed
;
1269 imrc
->iommu_set_page_size_mask
= virtio_iommu_set_page_size_mask
;
1272 static const TypeInfo virtio_iommu_info
= {
1273 .name
= TYPE_VIRTIO_IOMMU
,
1274 .parent
= TYPE_VIRTIO_DEVICE
,
1275 .instance_size
= sizeof(VirtIOIOMMU
),
1276 .instance_init
= virtio_iommu_instance_init
,
1277 .class_init
= virtio_iommu_class_init
,
1280 static const TypeInfo virtio_iommu_memory_region_info
= {
1281 .parent
= TYPE_IOMMU_MEMORY_REGION
,
1282 .name
= TYPE_VIRTIO_IOMMU_MEMORY_REGION
,
1283 .class_init
= virtio_iommu_memory_region_class_init
,
1286 static void virtio_register_types(void)
1288 type_register_static(&virtio_iommu_info
);
1289 type_register_static(&virtio_iommu_memory_region_info
);
1292 type_init(virtio_register_types
)