qapi: fix example of dump-guest-memory
[qemu/armbru.git] / hw / virtio / virtio-iommu.c
blob664cbd958323b19bb9cb96470fdbb7274a007499
1 /*
2 * virtio-iommu device
4 * Copyright (c) 2020 Red Hat, Inc.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2 or later, as published by the Free Software Foundation.
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
15 * You should have received a copy of the GNU General Public License along with
16 * this program. If not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "qemu/log.h"
22 #include "qemu/iov.h"
23 #include "qemu-common.h"
24 #include "hw/qdev-properties.h"
25 #include "hw/virtio/virtio.h"
26 #include "sysemu/kvm.h"
27 #include "sysemu/reset.h"
28 #include "qapi/error.h"
29 #include "qemu/error-report.h"
30 #include "trace.h"
32 #include "standard-headers/linux/virtio_ids.h"
34 #include "hw/virtio/virtio-bus.h"
35 #include "hw/virtio/virtio-access.h"
36 #include "hw/virtio/virtio-iommu.h"
37 #include "hw/pci/pci_bus.h"
38 #include "hw/pci/pci.h"
40 /* Max size */
41 #define VIOMMU_DEFAULT_QUEUE_SIZE 256
42 #define VIOMMU_PROBE_SIZE 512
44 typedef struct VirtIOIOMMUDomain {
45 uint32_t id;
46 bool bypass;
47 GTree *mappings;
48 QLIST_HEAD(, VirtIOIOMMUEndpoint) endpoint_list;
49 } VirtIOIOMMUDomain;
51 typedef struct VirtIOIOMMUEndpoint {
52 uint32_t id;
53 VirtIOIOMMUDomain *domain;
54 IOMMUMemoryRegion *iommu_mr;
55 QLIST_ENTRY(VirtIOIOMMUEndpoint) next;
56 } VirtIOIOMMUEndpoint;
58 typedef struct VirtIOIOMMUInterval {
59 uint64_t low;
60 uint64_t high;
61 } VirtIOIOMMUInterval;
63 typedef struct VirtIOIOMMUMapping {
64 uint64_t phys_addr;
65 uint32_t flags;
66 } VirtIOIOMMUMapping;
68 static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice *dev)
70 return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn);
73 /**
74 * The bus number is used for lookup when SID based operations occur.
75 * In that case we lazily populate the IOMMUPciBus array from the bus hash
76 * table. At the time the IOMMUPciBus is created (iommu_find_add_as), the bus
77 * numbers may not be always initialized yet.
79 static IOMMUPciBus *iommu_find_iommu_pcibus(VirtIOIOMMU *s, uint8_t bus_num)
81 IOMMUPciBus *iommu_pci_bus = s->iommu_pcibus_by_bus_num[bus_num];
83 if (!iommu_pci_bus) {
84 GHashTableIter iter;
86 g_hash_table_iter_init(&iter, s->as_by_busptr);
87 while (g_hash_table_iter_next(&iter, NULL, (void **)&iommu_pci_bus)) {
88 if (pci_bus_num(iommu_pci_bus->bus) == bus_num) {
89 s->iommu_pcibus_by_bus_num[bus_num] = iommu_pci_bus;
90 return iommu_pci_bus;
93 return NULL;
95 return iommu_pci_bus;
98 static IOMMUMemoryRegion *virtio_iommu_mr(VirtIOIOMMU *s, uint32_t sid)
100 uint8_t bus_n, devfn;
101 IOMMUPciBus *iommu_pci_bus;
102 IOMMUDevice *dev;
104 bus_n = PCI_BUS_NUM(sid);
105 iommu_pci_bus = iommu_find_iommu_pcibus(s, bus_n);
106 if (iommu_pci_bus) {
107 devfn = sid & (PCI_DEVFN_MAX - 1);
108 dev = iommu_pci_bus->pbdev[devfn];
109 if (dev) {
110 return &dev->iommu_mr;
113 return NULL;
116 static gint interval_cmp(gconstpointer a, gconstpointer b, gpointer user_data)
118 VirtIOIOMMUInterval *inta = (VirtIOIOMMUInterval *)a;
119 VirtIOIOMMUInterval *intb = (VirtIOIOMMUInterval *)b;
121 if (inta->high < intb->low) {
122 return -1;
123 } else if (intb->high < inta->low) {
124 return 1;
125 } else {
126 return 0;
130 static void virtio_iommu_notify_map(IOMMUMemoryRegion *mr, hwaddr virt_start,
131 hwaddr virt_end, hwaddr paddr,
132 uint32_t flags)
134 IOMMUTLBEvent event;
135 IOMMUAccessFlags perm = IOMMU_ACCESS_FLAG(flags & VIRTIO_IOMMU_MAP_F_READ,
136 flags & VIRTIO_IOMMU_MAP_F_WRITE);
138 if (!(mr->iommu_notify_flags & IOMMU_NOTIFIER_MAP) ||
139 (flags & VIRTIO_IOMMU_MAP_F_MMIO) || !perm) {
140 return;
143 trace_virtio_iommu_notify_map(mr->parent_obj.name, virt_start, virt_end,
144 paddr, perm);
146 event.type = IOMMU_NOTIFIER_MAP;
147 event.entry.target_as = &address_space_memory;
148 event.entry.addr_mask = virt_end - virt_start;
149 event.entry.iova = virt_start;
150 event.entry.perm = perm;
151 event.entry.translated_addr = paddr;
153 memory_region_notify_iommu(mr, 0, event);
156 static void virtio_iommu_notify_unmap(IOMMUMemoryRegion *mr, hwaddr virt_start,
157 hwaddr virt_end)
159 IOMMUTLBEvent event;
160 uint64_t delta = virt_end - virt_start;
162 if (!(mr->iommu_notify_flags & IOMMU_NOTIFIER_UNMAP)) {
163 return;
166 trace_virtio_iommu_notify_unmap(mr->parent_obj.name, virt_start, virt_end);
168 event.type = IOMMU_NOTIFIER_UNMAP;
169 event.entry.target_as = &address_space_memory;
170 event.entry.perm = IOMMU_NONE;
171 event.entry.translated_addr = 0;
172 event.entry.addr_mask = delta;
173 event.entry.iova = virt_start;
175 if (delta == UINT64_MAX) {
176 memory_region_notify_iommu(mr, 0, event);
180 while (virt_start != virt_end + 1) {
181 uint64_t mask = dma_aligned_pow2_mask(virt_start, virt_end, 64);
183 event.entry.addr_mask = mask;
184 event.entry.iova = virt_start;
185 memory_region_notify_iommu(mr, 0, event);
186 virt_start += mask + 1;
190 static gboolean virtio_iommu_notify_unmap_cb(gpointer key, gpointer value,
191 gpointer data)
193 VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key;
194 IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data;
196 virtio_iommu_notify_unmap(mr, interval->low, interval->high);
198 return false;
201 static gboolean virtio_iommu_notify_map_cb(gpointer key, gpointer value,
202 gpointer data)
204 VirtIOIOMMUMapping *mapping = (VirtIOIOMMUMapping *) value;
205 VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key;
206 IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data;
208 virtio_iommu_notify_map(mr, interval->low, interval->high,
209 mapping->phys_addr, mapping->flags);
211 return false;
214 static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep)
216 VirtIOIOMMUDomain *domain = ep->domain;
218 if (!ep->domain) {
219 return;
221 g_tree_foreach(domain->mappings, virtio_iommu_notify_unmap_cb,
222 ep->iommu_mr);
223 QLIST_REMOVE(ep, next);
224 ep->domain = NULL;
227 static VirtIOIOMMUEndpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
228 uint32_t ep_id)
230 VirtIOIOMMUEndpoint *ep;
231 IOMMUMemoryRegion *mr;
233 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(ep_id));
234 if (ep) {
235 return ep;
237 mr = virtio_iommu_mr(s, ep_id);
238 if (!mr) {
239 return NULL;
241 ep = g_malloc0(sizeof(*ep));
242 ep->id = ep_id;
243 ep->iommu_mr = mr;
244 trace_virtio_iommu_get_endpoint(ep_id);
245 g_tree_insert(s->endpoints, GUINT_TO_POINTER(ep_id), ep);
246 return ep;
249 static void virtio_iommu_put_endpoint(gpointer data)
251 VirtIOIOMMUEndpoint *ep = (VirtIOIOMMUEndpoint *)data;
253 if (ep->domain) {
254 virtio_iommu_detach_endpoint_from_domain(ep);
257 trace_virtio_iommu_put_endpoint(ep->id);
258 g_free(ep);
261 static VirtIOIOMMUDomain *virtio_iommu_get_domain(VirtIOIOMMU *s,
262 uint32_t domain_id,
263 bool bypass)
265 VirtIOIOMMUDomain *domain;
267 domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id));
268 if (domain) {
269 if (domain->bypass != bypass) {
270 return NULL;
272 return domain;
274 domain = g_malloc0(sizeof(*domain));
275 domain->id = domain_id;
276 domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp,
277 NULL, (GDestroyNotify)g_free,
278 (GDestroyNotify)g_free);
279 domain->bypass = bypass;
280 g_tree_insert(s->domains, GUINT_TO_POINTER(domain_id), domain);
281 QLIST_INIT(&domain->endpoint_list);
282 trace_virtio_iommu_get_domain(domain_id);
283 return domain;
286 static void virtio_iommu_put_domain(gpointer data)
288 VirtIOIOMMUDomain *domain = (VirtIOIOMMUDomain *)data;
289 VirtIOIOMMUEndpoint *iter, *tmp;
291 QLIST_FOREACH_SAFE(iter, &domain->endpoint_list, next, tmp) {
292 virtio_iommu_detach_endpoint_from_domain(iter);
294 g_tree_destroy(domain->mappings);
295 trace_virtio_iommu_put_domain(domain->id);
296 g_free(domain);
299 static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque,
300 int devfn)
302 VirtIOIOMMU *s = opaque;
303 IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus);
304 static uint32_t mr_index;
305 IOMMUDevice *sdev;
307 if (!sbus) {
308 sbus = g_malloc0(sizeof(IOMMUPciBus) +
309 sizeof(IOMMUDevice *) * PCI_DEVFN_MAX);
310 sbus->bus = bus;
311 g_hash_table_insert(s->as_by_busptr, bus, sbus);
314 sdev = sbus->pbdev[devfn];
315 if (!sdev) {
316 char *name = g_strdup_printf("%s-%d-%d",
317 TYPE_VIRTIO_IOMMU_MEMORY_REGION,
318 mr_index++, devfn);
319 sdev = sbus->pbdev[devfn] = g_new0(IOMMUDevice, 1);
321 sdev->viommu = s;
322 sdev->bus = bus;
323 sdev->devfn = devfn;
325 trace_virtio_iommu_init_iommu_mr(name);
327 memory_region_init_iommu(&sdev->iommu_mr, sizeof(sdev->iommu_mr),
328 TYPE_VIRTIO_IOMMU_MEMORY_REGION,
329 OBJECT(s), name,
330 UINT64_MAX);
331 address_space_init(&sdev->as,
332 MEMORY_REGION(&sdev->iommu_mr), TYPE_VIRTIO_IOMMU);
333 g_free(name);
335 return &sdev->as;
338 static int virtio_iommu_attach(VirtIOIOMMU *s,
339 struct virtio_iommu_req_attach *req)
341 uint32_t domain_id = le32_to_cpu(req->domain);
342 uint32_t ep_id = le32_to_cpu(req->endpoint);
343 uint32_t flags = le32_to_cpu(req->flags);
344 VirtIOIOMMUDomain *domain;
345 VirtIOIOMMUEndpoint *ep;
347 trace_virtio_iommu_attach(domain_id, ep_id);
349 if (flags & ~VIRTIO_IOMMU_ATTACH_F_BYPASS) {
350 return VIRTIO_IOMMU_S_INVAL;
353 ep = virtio_iommu_get_endpoint(s, ep_id);
354 if (!ep) {
355 return VIRTIO_IOMMU_S_NOENT;
358 if (ep->domain) {
359 VirtIOIOMMUDomain *previous_domain = ep->domain;
361 * the device is already attached to a domain,
362 * detach it first
364 virtio_iommu_detach_endpoint_from_domain(ep);
365 if (QLIST_EMPTY(&previous_domain->endpoint_list)) {
366 g_tree_remove(s->domains, GUINT_TO_POINTER(previous_domain->id));
370 domain = virtio_iommu_get_domain(s, domain_id,
371 flags & VIRTIO_IOMMU_ATTACH_F_BYPASS);
372 if (!domain) {
373 /* Incompatible bypass flag */
374 return VIRTIO_IOMMU_S_INVAL;
376 QLIST_INSERT_HEAD(&domain->endpoint_list, ep, next);
378 ep->domain = domain;
380 /* Replay domain mappings on the associated memory region */
381 g_tree_foreach(domain->mappings, virtio_iommu_notify_map_cb,
382 ep->iommu_mr);
384 return VIRTIO_IOMMU_S_OK;
387 static int virtio_iommu_detach(VirtIOIOMMU *s,
388 struct virtio_iommu_req_detach *req)
390 uint32_t domain_id = le32_to_cpu(req->domain);
391 uint32_t ep_id = le32_to_cpu(req->endpoint);
392 VirtIOIOMMUDomain *domain;
393 VirtIOIOMMUEndpoint *ep;
395 trace_virtio_iommu_detach(domain_id, ep_id);
397 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(ep_id));
398 if (!ep) {
399 return VIRTIO_IOMMU_S_NOENT;
402 domain = ep->domain;
404 if (!domain || domain->id != domain_id) {
405 return VIRTIO_IOMMU_S_INVAL;
408 virtio_iommu_detach_endpoint_from_domain(ep);
410 if (QLIST_EMPTY(&domain->endpoint_list)) {
411 g_tree_remove(s->domains, GUINT_TO_POINTER(domain->id));
413 return VIRTIO_IOMMU_S_OK;
416 static int virtio_iommu_map(VirtIOIOMMU *s,
417 struct virtio_iommu_req_map *req)
419 uint32_t domain_id = le32_to_cpu(req->domain);
420 uint64_t phys_start = le64_to_cpu(req->phys_start);
421 uint64_t virt_start = le64_to_cpu(req->virt_start);
422 uint64_t virt_end = le64_to_cpu(req->virt_end);
423 uint32_t flags = le32_to_cpu(req->flags);
424 VirtIOIOMMUDomain *domain;
425 VirtIOIOMMUInterval *interval;
426 VirtIOIOMMUMapping *mapping;
427 VirtIOIOMMUEndpoint *ep;
429 if (flags & ~VIRTIO_IOMMU_MAP_F_MASK) {
430 return VIRTIO_IOMMU_S_INVAL;
433 domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id));
434 if (!domain) {
435 return VIRTIO_IOMMU_S_NOENT;
438 if (domain->bypass) {
439 return VIRTIO_IOMMU_S_INVAL;
442 interval = g_malloc0(sizeof(*interval));
444 interval->low = virt_start;
445 interval->high = virt_end;
447 mapping = g_tree_lookup(domain->mappings, (gpointer)interval);
448 if (mapping) {
449 g_free(interval);
450 return VIRTIO_IOMMU_S_INVAL;
453 trace_virtio_iommu_map(domain_id, virt_start, virt_end, phys_start, flags);
455 mapping = g_malloc0(sizeof(*mapping));
456 mapping->phys_addr = phys_start;
457 mapping->flags = flags;
459 g_tree_insert(domain->mappings, interval, mapping);
461 QLIST_FOREACH(ep, &domain->endpoint_list, next) {
462 virtio_iommu_notify_map(ep->iommu_mr, virt_start, virt_end, phys_start,
463 flags);
466 return VIRTIO_IOMMU_S_OK;
469 static int virtio_iommu_unmap(VirtIOIOMMU *s,
470 struct virtio_iommu_req_unmap *req)
472 uint32_t domain_id = le32_to_cpu(req->domain);
473 uint64_t virt_start = le64_to_cpu(req->virt_start);
474 uint64_t virt_end = le64_to_cpu(req->virt_end);
475 VirtIOIOMMUMapping *iter_val;
476 VirtIOIOMMUInterval interval, *iter_key;
477 VirtIOIOMMUDomain *domain;
478 VirtIOIOMMUEndpoint *ep;
479 int ret = VIRTIO_IOMMU_S_OK;
481 trace_virtio_iommu_unmap(domain_id, virt_start, virt_end);
483 domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id));
484 if (!domain) {
485 return VIRTIO_IOMMU_S_NOENT;
488 if (domain->bypass) {
489 return VIRTIO_IOMMU_S_INVAL;
492 interval.low = virt_start;
493 interval.high = virt_end;
495 while (g_tree_lookup_extended(domain->mappings, &interval,
496 (void **)&iter_key, (void**)&iter_val)) {
497 uint64_t current_low = iter_key->low;
498 uint64_t current_high = iter_key->high;
500 if (interval.low <= current_low && interval.high >= current_high) {
501 QLIST_FOREACH(ep, &domain->endpoint_list, next) {
502 virtio_iommu_notify_unmap(ep->iommu_mr, current_low,
503 current_high);
505 g_tree_remove(domain->mappings, iter_key);
506 trace_virtio_iommu_unmap_done(domain_id, current_low, current_high);
507 } else {
508 ret = VIRTIO_IOMMU_S_RANGE;
509 break;
512 return ret;
515 static ssize_t virtio_iommu_fill_resv_mem_prop(VirtIOIOMMU *s, uint32_t ep,
516 uint8_t *buf, size_t free)
518 struct virtio_iommu_probe_resv_mem prop = {};
519 size_t size = sizeof(prop), length = size - sizeof(prop.head), total;
520 int i;
522 total = size * s->nb_reserved_regions;
524 if (total > free) {
525 return -ENOSPC;
528 for (i = 0; i < s->nb_reserved_regions; i++) {
529 unsigned subtype = s->reserved_regions[i].type;
531 assert(subtype == VIRTIO_IOMMU_RESV_MEM_T_RESERVED ||
532 subtype == VIRTIO_IOMMU_RESV_MEM_T_MSI);
533 prop.head.type = cpu_to_le16(VIRTIO_IOMMU_PROBE_T_RESV_MEM);
534 prop.head.length = cpu_to_le16(length);
535 prop.subtype = subtype;
536 prop.start = cpu_to_le64(s->reserved_regions[i].low);
537 prop.end = cpu_to_le64(s->reserved_regions[i].high);
539 memcpy(buf, &prop, size);
541 trace_virtio_iommu_fill_resv_property(ep, prop.subtype,
542 prop.start, prop.end);
543 buf += size;
545 return total;
549 * virtio_iommu_probe - Fill the probe request buffer with
550 * the properties the device is able to return
552 static int virtio_iommu_probe(VirtIOIOMMU *s,
553 struct virtio_iommu_req_probe *req,
554 uint8_t *buf)
556 uint32_t ep_id = le32_to_cpu(req->endpoint);
557 size_t free = VIOMMU_PROBE_SIZE;
558 ssize_t count;
560 if (!virtio_iommu_mr(s, ep_id)) {
561 return VIRTIO_IOMMU_S_NOENT;
564 count = virtio_iommu_fill_resv_mem_prop(s, ep_id, buf, free);
565 if (count < 0) {
566 return VIRTIO_IOMMU_S_INVAL;
568 buf += count;
569 free -= count;
571 return VIRTIO_IOMMU_S_OK;
574 static int virtio_iommu_iov_to_req(struct iovec *iov,
575 unsigned int iov_cnt,
576 void *req, size_t req_sz)
578 size_t sz, payload_sz = req_sz - sizeof(struct virtio_iommu_req_tail);
580 sz = iov_to_buf(iov, iov_cnt, 0, req, payload_sz);
581 if (unlikely(sz != payload_sz)) {
582 return VIRTIO_IOMMU_S_INVAL;
584 return 0;
587 #define virtio_iommu_handle_req(__req) \
588 static int virtio_iommu_handle_ ## __req(VirtIOIOMMU *s, \
589 struct iovec *iov, \
590 unsigned int iov_cnt) \
592 struct virtio_iommu_req_ ## __req req; \
593 int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, sizeof(req)); \
595 return ret ? ret : virtio_iommu_ ## __req(s, &req); \
598 virtio_iommu_handle_req(attach)
599 virtio_iommu_handle_req(detach)
600 virtio_iommu_handle_req(map)
601 virtio_iommu_handle_req(unmap)
603 static int virtio_iommu_handle_probe(VirtIOIOMMU *s,
604 struct iovec *iov,
605 unsigned int iov_cnt,
606 uint8_t *buf)
608 struct virtio_iommu_req_probe req;
609 int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, sizeof(req));
611 return ret ? ret : virtio_iommu_probe(s, &req, buf);
614 static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq)
616 VirtIOIOMMU *s = VIRTIO_IOMMU(vdev);
617 struct virtio_iommu_req_head head;
618 struct virtio_iommu_req_tail tail = {};
619 size_t output_size = sizeof(tail), sz;
620 VirtQueueElement *elem;
621 unsigned int iov_cnt;
622 struct iovec *iov;
623 void *buf = NULL;
625 for (;;) {
626 elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
627 if (!elem) {
628 return;
631 if (iov_size(elem->in_sg, elem->in_num) < sizeof(tail) ||
632 iov_size(elem->out_sg, elem->out_num) < sizeof(head)) {
633 virtio_error(vdev, "virtio-iommu bad head/tail size");
634 virtqueue_detach_element(vq, elem, 0);
635 g_free(elem);
636 break;
639 iov_cnt = elem->out_num;
640 iov = elem->out_sg;
641 sz = iov_to_buf(iov, iov_cnt, 0, &head, sizeof(head));
642 if (unlikely(sz != sizeof(head))) {
643 tail.status = VIRTIO_IOMMU_S_DEVERR;
644 goto out;
646 qemu_mutex_lock(&s->mutex);
647 switch (head.type) {
648 case VIRTIO_IOMMU_T_ATTACH:
649 tail.status = virtio_iommu_handle_attach(s, iov, iov_cnt);
650 break;
651 case VIRTIO_IOMMU_T_DETACH:
652 tail.status = virtio_iommu_handle_detach(s, iov, iov_cnt);
653 break;
654 case VIRTIO_IOMMU_T_MAP:
655 tail.status = virtio_iommu_handle_map(s, iov, iov_cnt);
656 break;
657 case VIRTIO_IOMMU_T_UNMAP:
658 tail.status = virtio_iommu_handle_unmap(s, iov, iov_cnt);
659 break;
660 case VIRTIO_IOMMU_T_PROBE:
662 struct virtio_iommu_req_tail *ptail;
664 output_size = s->config.probe_size + sizeof(tail);
665 buf = g_malloc0(output_size);
667 ptail = (struct virtio_iommu_req_tail *)
668 (buf + s->config.probe_size);
669 ptail->status = virtio_iommu_handle_probe(s, iov, iov_cnt, buf);
670 break;
672 default:
673 tail.status = VIRTIO_IOMMU_S_UNSUPP;
675 qemu_mutex_unlock(&s->mutex);
677 out:
678 sz = iov_from_buf(elem->in_sg, elem->in_num, 0,
679 buf ? buf : &tail, output_size);
680 assert(sz == output_size);
682 virtqueue_push(vq, elem, sz);
683 virtio_notify(vdev, vq);
684 g_free(elem);
685 g_free(buf);
689 static void virtio_iommu_report_fault(VirtIOIOMMU *viommu, uint8_t reason,
690 int flags, uint32_t endpoint,
691 uint64_t address)
693 VirtIODevice *vdev = &viommu->parent_obj;
694 VirtQueue *vq = viommu->event_vq;
695 struct virtio_iommu_fault fault;
696 VirtQueueElement *elem;
697 size_t sz;
699 memset(&fault, 0, sizeof(fault));
700 fault.reason = reason;
701 fault.flags = cpu_to_le32(flags);
702 fault.endpoint = cpu_to_le32(endpoint);
703 fault.address = cpu_to_le64(address);
705 elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
707 if (!elem) {
708 error_report_once(
709 "no buffer available in event queue to report event");
710 return;
713 if (iov_size(elem->in_sg, elem->in_num) < sizeof(fault)) {
714 virtio_error(vdev, "error buffer of wrong size");
715 virtqueue_detach_element(vq, elem, 0);
716 g_free(elem);
717 return;
720 sz = iov_from_buf(elem->in_sg, elem->in_num, 0,
721 &fault, sizeof(fault));
722 assert(sz == sizeof(fault));
724 trace_virtio_iommu_report_fault(reason, flags, endpoint, address);
725 virtqueue_push(vq, elem, sz);
726 virtio_notify(vdev, vq);
727 g_free(elem);
731 static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr,
732 IOMMUAccessFlags flag,
733 int iommu_idx)
735 IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
736 VirtIOIOMMUInterval interval, *mapping_key;
737 VirtIOIOMMUMapping *mapping_value;
738 VirtIOIOMMU *s = sdev->viommu;
739 bool read_fault, write_fault;
740 VirtIOIOMMUEndpoint *ep;
741 uint32_t sid, flags;
742 bool bypass_allowed;
743 bool found;
744 int i;
746 interval.low = addr;
747 interval.high = addr + 1;
749 IOMMUTLBEntry entry = {
750 .target_as = &address_space_memory,
751 .iova = addr,
752 .translated_addr = addr,
753 .addr_mask = (1 << ctz32(s->config.page_size_mask)) - 1,
754 .perm = IOMMU_NONE,
757 bypass_allowed = s->config.bypass;
759 sid = virtio_iommu_get_bdf(sdev);
761 trace_virtio_iommu_translate(mr->parent_obj.name, sid, addr, flag);
762 qemu_mutex_lock(&s->mutex);
764 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid));
765 if (!ep) {
766 if (!bypass_allowed) {
767 error_report_once("%s sid=%d is not known!!", __func__, sid);
768 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_UNKNOWN,
769 VIRTIO_IOMMU_FAULT_F_ADDRESS,
770 sid, addr);
771 } else {
772 entry.perm = flag;
774 goto unlock;
777 for (i = 0; i < s->nb_reserved_regions; i++) {
778 ReservedRegion *reg = &s->reserved_regions[i];
780 if (addr >= reg->low && addr <= reg->high) {
781 switch (reg->type) {
782 case VIRTIO_IOMMU_RESV_MEM_T_MSI:
783 entry.perm = flag;
784 break;
785 case VIRTIO_IOMMU_RESV_MEM_T_RESERVED:
786 default:
787 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING,
788 VIRTIO_IOMMU_FAULT_F_ADDRESS,
789 sid, addr);
790 break;
792 goto unlock;
796 if (!ep->domain) {
797 if (!bypass_allowed) {
798 error_report_once("%s %02x:%02x.%01x not attached to any domain",
799 __func__, PCI_BUS_NUM(sid),
800 PCI_SLOT(sid), PCI_FUNC(sid));
801 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_DOMAIN,
802 VIRTIO_IOMMU_FAULT_F_ADDRESS,
803 sid, addr);
804 } else {
805 entry.perm = flag;
807 goto unlock;
808 } else if (ep->domain->bypass) {
809 entry.perm = flag;
810 goto unlock;
813 found = g_tree_lookup_extended(ep->domain->mappings, (gpointer)(&interval),
814 (void **)&mapping_key,
815 (void **)&mapping_value);
816 if (!found) {
817 error_report_once("%s no mapping for 0x%"PRIx64" for sid=%d",
818 __func__, addr, sid);
819 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING,
820 VIRTIO_IOMMU_FAULT_F_ADDRESS,
821 sid, addr);
822 goto unlock;
825 read_fault = (flag & IOMMU_RO) &&
826 !(mapping_value->flags & VIRTIO_IOMMU_MAP_F_READ);
827 write_fault = (flag & IOMMU_WO) &&
828 !(mapping_value->flags & VIRTIO_IOMMU_MAP_F_WRITE);
830 flags = read_fault ? VIRTIO_IOMMU_FAULT_F_READ : 0;
831 flags |= write_fault ? VIRTIO_IOMMU_FAULT_F_WRITE : 0;
832 if (flags) {
833 error_report_once("%s permission error on 0x%"PRIx64"(%d): allowed=%d",
834 __func__, addr, flag, mapping_value->flags);
835 flags |= VIRTIO_IOMMU_FAULT_F_ADDRESS;
836 virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING,
837 flags | VIRTIO_IOMMU_FAULT_F_ADDRESS,
838 sid, addr);
839 goto unlock;
841 entry.translated_addr = addr - mapping_key->low + mapping_value->phys_addr;
842 entry.perm = flag;
843 trace_virtio_iommu_translate_out(addr, entry.translated_addr, sid);
845 unlock:
846 qemu_mutex_unlock(&s->mutex);
847 return entry;
850 static void virtio_iommu_get_config(VirtIODevice *vdev, uint8_t *config_data)
852 VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev);
853 struct virtio_iommu_config *dev_config = &dev->config;
854 struct virtio_iommu_config *out_config = (void *)config_data;
856 out_config->page_size_mask = cpu_to_le64(dev_config->page_size_mask);
857 out_config->input_range.start = cpu_to_le64(dev_config->input_range.start);
858 out_config->input_range.end = cpu_to_le64(dev_config->input_range.end);
859 out_config->domain_range.start = cpu_to_le32(dev_config->domain_range.start);
860 out_config->domain_range.end = cpu_to_le32(dev_config->domain_range.end);
861 out_config->probe_size = cpu_to_le32(dev_config->probe_size);
862 out_config->bypass = dev_config->bypass;
864 trace_virtio_iommu_get_config(dev_config->page_size_mask,
865 dev_config->input_range.start,
866 dev_config->input_range.end,
867 dev_config->domain_range.start,
868 dev_config->domain_range.end,
869 dev_config->probe_size,
870 dev_config->bypass);
873 static void virtio_iommu_set_config(VirtIODevice *vdev,
874 const uint8_t *config_data)
876 VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev);
877 struct virtio_iommu_config *dev_config = &dev->config;
878 const struct virtio_iommu_config *in_config = (void *)config_data;
880 if (in_config->bypass != dev_config->bypass) {
881 if (!virtio_vdev_has_feature(vdev, VIRTIO_IOMMU_F_BYPASS_CONFIG)) {
882 virtio_error(vdev, "cannot set config.bypass");
883 return;
884 } else if (in_config->bypass != 0 && in_config->bypass != 1) {
885 virtio_error(vdev, "invalid config.bypass value '%u'",
886 in_config->bypass);
887 return;
889 dev_config->bypass = in_config->bypass;
892 trace_virtio_iommu_set_config(in_config->bypass);
895 static uint64_t virtio_iommu_get_features(VirtIODevice *vdev, uint64_t f,
896 Error **errp)
898 VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev);
900 f |= dev->features;
901 trace_virtio_iommu_get_features(f);
902 return f;
905 static gint int_cmp(gconstpointer a, gconstpointer b, gpointer user_data)
907 guint ua = GPOINTER_TO_UINT(a);
908 guint ub = GPOINTER_TO_UINT(b);
909 return (ua > ub) - (ua < ub);
912 static gboolean virtio_iommu_remap(gpointer key, gpointer value, gpointer data)
914 VirtIOIOMMUMapping *mapping = (VirtIOIOMMUMapping *) value;
915 VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key;
916 IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data;
918 trace_virtio_iommu_remap(mr->parent_obj.name, interval->low, interval->high,
919 mapping->phys_addr);
920 virtio_iommu_notify_map(mr, interval->low, interval->high,
921 mapping->phys_addr, mapping->flags);
922 return false;
925 static void virtio_iommu_replay(IOMMUMemoryRegion *mr, IOMMUNotifier *n)
927 IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
928 VirtIOIOMMU *s = sdev->viommu;
929 uint32_t sid;
930 VirtIOIOMMUEndpoint *ep;
932 sid = virtio_iommu_get_bdf(sdev);
934 qemu_mutex_lock(&s->mutex);
936 if (!s->endpoints) {
937 goto unlock;
940 ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid));
941 if (!ep || !ep->domain) {
942 goto unlock;
945 g_tree_foreach(ep->domain->mappings, virtio_iommu_remap, mr);
947 unlock:
948 qemu_mutex_unlock(&s->mutex);
951 static int virtio_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu_mr,
952 IOMMUNotifierFlag old,
953 IOMMUNotifierFlag new,
954 Error **errp)
956 if (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP) {
957 error_setg(errp, "Virtio-iommu does not support dev-iotlb yet");
958 return -EINVAL;
961 if (old == IOMMU_NOTIFIER_NONE) {
962 trace_virtio_iommu_notify_flag_add(iommu_mr->parent_obj.name);
963 } else if (new == IOMMU_NOTIFIER_NONE) {
964 trace_virtio_iommu_notify_flag_del(iommu_mr->parent_obj.name);
966 return 0;
970 * The default mask (TARGET_PAGE_MASK) is the smallest supported guest granule,
971 * for example 0xfffffffffffff000. When an assigned device has page size
972 * restrictions due to the hardware IOMMU configuration, apply this restriction
973 * to the mask.
975 static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr,
976 uint64_t new_mask,
977 Error **errp)
979 IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
980 VirtIOIOMMU *s = sdev->viommu;
981 uint64_t cur_mask = s->config.page_size_mask;
983 trace_virtio_iommu_set_page_size_mask(mr->parent_obj.name, cur_mask,
984 new_mask);
986 if ((cur_mask & new_mask) == 0) {
987 error_setg(errp, "virtio-iommu page mask 0x%"PRIx64
988 " is incompatible with mask 0x%"PRIx64, cur_mask, new_mask);
989 return -1;
993 * After the machine is finalized, we can't change the mask anymore. If by
994 * chance the hotplugged device supports the same granule, we can still
995 * accept it. Having a different masks is possible but the guest will use
996 * sub-optimal block sizes, so warn about it.
998 if (phase_check(PHASE_MACHINE_READY)) {
999 int new_granule = ctz64(new_mask);
1000 int cur_granule = ctz64(cur_mask);
1002 if (new_granule != cur_granule) {
1003 error_setg(errp, "virtio-iommu page mask 0x%"PRIx64
1004 " is incompatible with mask 0x%"PRIx64, cur_mask,
1005 new_mask);
1006 return -1;
1007 } else if (new_mask != cur_mask) {
1008 warn_report("virtio-iommu page mask 0x%"PRIx64
1009 " does not match 0x%"PRIx64, cur_mask, new_mask);
1011 return 0;
1014 s->config.page_size_mask &= new_mask;
1015 return 0;
1018 static void virtio_iommu_system_reset(void *opaque)
1020 VirtIOIOMMU *s = opaque;
1022 trace_virtio_iommu_system_reset();
1025 * config.bypass is sticky across device reset, but should be restored on
1026 * system reset
1028 s->config.bypass = s->boot_bypass;
1031 static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
1033 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1034 VirtIOIOMMU *s = VIRTIO_IOMMU(dev);
1036 virtio_init(vdev, "virtio-iommu", VIRTIO_ID_IOMMU,
1037 sizeof(struct virtio_iommu_config));
1039 memset(s->iommu_pcibus_by_bus_num, 0, sizeof(s->iommu_pcibus_by_bus_num));
1041 s->req_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE,
1042 virtio_iommu_handle_command);
1043 s->event_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, NULL);
1045 s->config.page_size_mask = TARGET_PAGE_MASK;
1046 s->config.input_range.end = UINT64_MAX;
1047 s->config.domain_range.end = UINT32_MAX;
1048 s->config.probe_size = VIOMMU_PROBE_SIZE;
1050 virtio_add_feature(&s->features, VIRTIO_RING_F_EVENT_IDX);
1051 virtio_add_feature(&s->features, VIRTIO_RING_F_INDIRECT_DESC);
1052 virtio_add_feature(&s->features, VIRTIO_F_VERSION_1);
1053 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_INPUT_RANGE);
1054 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_DOMAIN_RANGE);
1055 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MAP_UNMAP);
1056 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MMIO);
1057 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_PROBE);
1058 virtio_add_feature(&s->features, VIRTIO_IOMMU_F_BYPASS_CONFIG);
1060 qemu_mutex_init(&s->mutex);
1062 s->as_by_busptr = g_hash_table_new_full(NULL, NULL, NULL, g_free);
1064 if (s->primary_bus) {
1065 pci_setup_iommu(s->primary_bus, virtio_iommu_find_add_as, s);
1066 } else {
1067 error_setg(errp, "VIRTIO-IOMMU is not attached to any PCI bus!");
1070 qemu_register_reset(virtio_iommu_system_reset, s);
1073 static void virtio_iommu_device_unrealize(DeviceState *dev)
1075 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1076 VirtIOIOMMU *s = VIRTIO_IOMMU(dev);
1078 qemu_unregister_reset(virtio_iommu_system_reset, s);
1080 g_hash_table_destroy(s->as_by_busptr);
1081 if (s->domains) {
1082 g_tree_destroy(s->domains);
1084 if (s->endpoints) {
1085 g_tree_destroy(s->endpoints);
1088 virtio_delete_queue(s->req_vq);
1089 virtio_delete_queue(s->event_vq);
1090 virtio_cleanup(vdev);
1093 static void virtio_iommu_device_reset(VirtIODevice *vdev)
1095 VirtIOIOMMU *s = VIRTIO_IOMMU(vdev);
1097 trace_virtio_iommu_device_reset();
1099 if (s->domains) {
1100 g_tree_destroy(s->domains);
1102 if (s->endpoints) {
1103 g_tree_destroy(s->endpoints);
1105 s->domains = g_tree_new_full((GCompareDataFunc)int_cmp,
1106 NULL, NULL, virtio_iommu_put_domain);
1107 s->endpoints = g_tree_new_full((GCompareDataFunc)int_cmp,
1108 NULL, NULL, virtio_iommu_put_endpoint);
1111 static void virtio_iommu_set_status(VirtIODevice *vdev, uint8_t status)
1113 trace_virtio_iommu_device_status(status);
1116 static void virtio_iommu_instance_init(Object *obj)
1120 #define VMSTATE_INTERVAL \
1122 .name = "interval", \
1123 .version_id = 1, \
1124 .minimum_version_id = 1, \
1125 .fields = (VMStateField[]) { \
1126 VMSTATE_UINT64(low, VirtIOIOMMUInterval), \
1127 VMSTATE_UINT64(high, VirtIOIOMMUInterval), \
1128 VMSTATE_END_OF_LIST() \
1132 #define VMSTATE_MAPPING \
1134 .name = "mapping", \
1135 .version_id = 1, \
1136 .minimum_version_id = 1, \
1137 .fields = (VMStateField[]) { \
1138 VMSTATE_UINT64(phys_addr, VirtIOIOMMUMapping),\
1139 VMSTATE_UINT32(flags, VirtIOIOMMUMapping), \
1140 VMSTATE_END_OF_LIST() \
1141 }, \
1144 static const VMStateDescription vmstate_interval_mapping[2] = {
1145 VMSTATE_MAPPING, /* value */
1146 VMSTATE_INTERVAL /* key */
1149 static int domain_preload(void *opaque)
1151 VirtIOIOMMUDomain *domain = opaque;
1153 domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp,
1154 NULL, g_free, g_free);
1155 return 0;
1158 static const VMStateDescription vmstate_endpoint = {
1159 .name = "endpoint",
1160 .version_id = 1,
1161 .minimum_version_id = 1,
1162 .fields = (VMStateField[]) {
1163 VMSTATE_UINT32(id, VirtIOIOMMUEndpoint),
1164 VMSTATE_END_OF_LIST()
1168 static const VMStateDescription vmstate_domain = {
1169 .name = "domain",
1170 .version_id = 2,
1171 .minimum_version_id = 2,
1172 .pre_load = domain_preload,
1173 .fields = (VMStateField[]) {
1174 VMSTATE_UINT32(id, VirtIOIOMMUDomain),
1175 VMSTATE_GTREE_V(mappings, VirtIOIOMMUDomain, 1,
1176 vmstate_interval_mapping,
1177 VirtIOIOMMUInterval, VirtIOIOMMUMapping),
1178 VMSTATE_QLIST_V(endpoint_list, VirtIOIOMMUDomain, 1,
1179 vmstate_endpoint, VirtIOIOMMUEndpoint, next),
1180 VMSTATE_BOOL_V(bypass, VirtIOIOMMUDomain, 2),
1181 VMSTATE_END_OF_LIST()
1185 static gboolean reconstruct_endpoints(gpointer key, gpointer value,
1186 gpointer data)
1188 VirtIOIOMMU *s = (VirtIOIOMMU *)data;
1189 VirtIOIOMMUDomain *d = (VirtIOIOMMUDomain *)value;
1190 VirtIOIOMMUEndpoint *iter;
1191 IOMMUMemoryRegion *mr;
1193 QLIST_FOREACH(iter, &d->endpoint_list, next) {
1194 mr = virtio_iommu_mr(s, iter->id);
1195 assert(mr);
1197 iter->domain = d;
1198 iter->iommu_mr = mr;
1199 g_tree_insert(s->endpoints, GUINT_TO_POINTER(iter->id), iter);
1201 return false; /* continue the domain traversal */
1204 static int iommu_post_load(void *opaque, int version_id)
1206 VirtIOIOMMU *s = opaque;
1208 g_tree_foreach(s->domains, reconstruct_endpoints, s);
1209 return 0;
1212 static const VMStateDescription vmstate_virtio_iommu_device = {
1213 .name = "virtio-iommu-device",
1214 .minimum_version_id = 2,
1215 .version_id = 2,
1216 .post_load = iommu_post_load,
1217 .fields = (VMStateField[]) {
1218 VMSTATE_GTREE_DIRECT_KEY_V(domains, VirtIOIOMMU, 2,
1219 &vmstate_domain, VirtIOIOMMUDomain),
1220 VMSTATE_UINT8_V(config.bypass, VirtIOIOMMU, 2),
1221 VMSTATE_END_OF_LIST()
1225 static const VMStateDescription vmstate_virtio_iommu = {
1226 .name = "virtio-iommu",
1227 .minimum_version_id = 2,
1228 .priority = MIG_PRI_IOMMU,
1229 .version_id = 2,
1230 .fields = (VMStateField[]) {
1231 VMSTATE_VIRTIO_DEVICE,
1232 VMSTATE_END_OF_LIST()
1236 static Property virtio_iommu_properties[] = {
1237 DEFINE_PROP_LINK("primary-bus", VirtIOIOMMU, primary_bus, "PCI", PCIBus *),
1238 DEFINE_PROP_BOOL("boot-bypass", VirtIOIOMMU, boot_bypass, true),
1239 DEFINE_PROP_END_OF_LIST(),
1242 static void virtio_iommu_class_init(ObjectClass *klass, void *data)
1244 DeviceClass *dc = DEVICE_CLASS(klass);
1245 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
1247 device_class_set_props(dc, virtio_iommu_properties);
1248 dc->vmsd = &vmstate_virtio_iommu;
1250 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
1251 vdc->realize = virtio_iommu_device_realize;
1252 vdc->unrealize = virtio_iommu_device_unrealize;
1253 vdc->reset = virtio_iommu_device_reset;
1254 vdc->get_config = virtio_iommu_get_config;
1255 vdc->set_config = virtio_iommu_set_config;
1256 vdc->get_features = virtio_iommu_get_features;
1257 vdc->set_status = virtio_iommu_set_status;
1258 vdc->vmsd = &vmstate_virtio_iommu_device;
1261 static void virtio_iommu_memory_region_class_init(ObjectClass *klass,
1262 void *data)
1264 IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
1266 imrc->translate = virtio_iommu_translate;
1267 imrc->replay = virtio_iommu_replay;
1268 imrc->notify_flag_changed = virtio_iommu_notify_flag_changed;
1269 imrc->iommu_set_page_size_mask = virtio_iommu_set_page_size_mask;
1272 static const TypeInfo virtio_iommu_info = {
1273 .name = TYPE_VIRTIO_IOMMU,
1274 .parent = TYPE_VIRTIO_DEVICE,
1275 .instance_size = sizeof(VirtIOIOMMU),
1276 .instance_init = virtio_iommu_instance_init,
1277 .class_init = virtio_iommu_class_init,
1280 static const TypeInfo virtio_iommu_memory_region_info = {
1281 .parent = TYPE_IOMMU_MEMORY_REGION,
1282 .name = TYPE_VIRTIO_IOMMU_MEMORY_REGION,
1283 .class_init = virtio_iommu_memory_region_class_init,
1286 static void virtio_register_types(void)
1288 type_register_static(&virtio_iommu_info);
1289 type_register_static(&virtio_iommu_memory_region_info);
1292 type_init(virtio_register_types)