Merge tag 'pull-loongarch-20241016' of https://gitlab.com/gaosong/qemu into staging
[qemu/armbru.git] / hw / i386 / kvm / xen_gnttab.c
blob245e4b15db7bf40cebfedd7e05b07b47eff22014
1 /*
2 * QEMU Xen emulation: Grant table support
4 * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
6 * Authors: David Woodhouse <dwmw2@infradead.org>
8 * This work is licensed under the terms of the GNU GPL, version 2 or later.
9 * See the COPYING file in the top-level directory.
12 #include "qemu/osdep.h"
13 #include "qemu/host-utils.h"
14 #include "qemu/module.h"
15 #include "qemu/lockable.h"
16 #include "qemu/main-loop.h"
17 #include "qapi/error.h"
18 #include "qom/object.h"
19 #include "exec/target_page.h"
20 #include "exec/address-spaces.h"
21 #include "migration/vmstate.h"
23 #include "hw/sysbus.h"
24 #include "hw/xen/xen.h"
25 #include "hw/xen/xen_backend_ops.h"
26 #include "xen_overlay.h"
27 #include "xen_gnttab.h"
28 #include "xen_primary_console.h"
30 #include "sysemu/kvm.h"
31 #include "sysemu/kvm_xen.h"
33 #include "hw/xen/interface/memory.h"
34 #include "hw/xen/interface/grant_table.h"
36 #define TYPE_XEN_GNTTAB "xen-gnttab"
37 OBJECT_DECLARE_SIMPLE_TYPE(XenGnttabState, XEN_GNTTAB)
39 #define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t))
41 static struct gnttab_backend_ops emu_gnttab_backend_ops;
43 struct XenGnttabState {
44 /*< private >*/
45 SysBusDevice busdev;
46 /*< public >*/
48 QemuMutex gnt_lock;
50 uint32_t nr_frames;
51 uint32_t max_frames;
53 union {
54 grant_entry_v1_t *v1;
55 /* Theoretically, v2 support could be added here. */
56 } entries;
58 MemoryRegion gnt_frames;
59 MemoryRegion *gnt_aliases;
60 uint64_t *gnt_frame_gpas;
62 uint8_t *map_track;
65 struct XenGnttabState *xen_gnttab_singleton;
67 static void xen_gnttab_realize(DeviceState *dev, Error **errp)
69 XenGnttabState *s = XEN_GNTTAB(dev);
70 int i;
72 if (xen_mode != XEN_EMULATE) {
73 error_setg(errp, "Xen grant table support is for Xen emulation");
74 return;
76 s->max_frames = kvm_xen_get_gnttab_max_frames();
77 memory_region_init_ram(&s->gnt_frames, OBJECT(dev), "xen:grant_table",
78 XEN_PAGE_SIZE * s->max_frames, &error_abort);
79 memory_region_set_enabled(&s->gnt_frames, true);
80 s->entries.v1 = memory_region_get_ram_ptr(&s->gnt_frames);
82 /* Create individual page-sizes aliases for overlays */
83 s->gnt_aliases = (void *)g_new0(MemoryRegion, s->max_frames);
84 s->gnt_frame_gpas = (void *)g_new(uint64_t, s->max_frames);
85 for (i = 0; i < s->max_frames; i++) {
86 memory_region_init_alias(&s->gnt_aliases[i], OBJECT(dev),
87 NULL, &s->gnt_frames,
88 i * XEN_PAGE_SIZE, XEN_PAGE_SIZE);
89 s->gnt_frame_gpas[i] = INVALID_GPA;
92 s->nr_frames = 0;
93 memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames);
94 s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access;
95 s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE);
97 qemu_mutex_init(&s->gnt_lock);
99 xen_gnttab_singleton = s;
101 s->map_track = g_new0(uint8_t, s->max_frames * ENTRIES_PER_FRAME_V1);
103 xen_gnttab_ops = &emu_gnttab_backend_ops;
106 static int xen_gnttab_post_load(void *opaque, int version_id)
108 XenGnttabState *s = XEN_GNTTAB(opaque);
109 uint32_t i;
111 for (i = 0; i < s->nr_frames; i++) {
112 if (s->gnt_frame_gpas[i] != INVALID_GPA) {
113 xen_overlay_do_map_page(&s->gnt_aliases[i], s->gnt_frame_gpas[i]);
116 return 0;
119 static bool xen_gnttab_is_needed(void *opaque)
121 return xen_mode == XEN_EMULATE;
124 static const VMStateDescription xen_gnttab_vmstate = {
125 .name = "xen_gnttab",
126 .version_id = 1,
127 .minimum_version_id = 1,
128 .needed = xen_gnttab_is_needed,
129 .post_load = xen_gnttab_post_load,
130 .fields = (const VMStateField[]) {
131 VMSTATE_UINT32(nr_frames, XenGnttabState),
132 VMSTATE_VARRAY_UINT32(gnt_frame_gpas, XenGnttabState, nr_frames, 0,
133 vmstate_info_uint64, uint64_t),
134 VMSTATE_END_OF_LIST()
138 static void xen_gnttab_class_init(ObjectClass *klass, void *data)
140 DeviceClass *dc = DEVICE_CLASS(klass);
142 dc->realize = xen_gnttab_realize;
143 dc->vmsd = &xen_gnttab_vmstate;
146 static const TypeInfo xen_gnttab_info = {
147 .name = TYPE_XEN_GNTTAB,
148 .parent = TYPE_SYS_BUS_DEVICE,
149 .instance_size = sizeof(XenGnttabState),
150 .class_init = xen_gnttab_class_init,
153 void xen_gnttab_create(void)
155 xen_gnttab_singleton = XEN_GNTTAB(sysbus_create_simple(TYPE_XEN_GNTTAB,
156 -1, NULL));
159 static void xen_gnttab_register_types(void)
161 type_register_static(&xen_gnttab_info);
164 type_init(xen_gnttab_register_types)
166 int xen_gnttab_map_page(uint64_t idx, uint64_t gfn)
168 XenGnttabState *s = xen_gnttab_singleton;
169 uint64_t gpa = gfn << XEN_PAGE_SHIFT;
171 if (!s) {
172 return -ENOTSUP;
175 if (idx >= s->max_frames) {
176 return -EINVAL;
179 BQL_LOCK_GUARD();
180 QEMU_LOCK_GUARD(&s->gnt_lock);
182 xen_overlay_do_map_page(&s->gnt_aliases[idx], gpa);
184 s->gnt_frame_gpas[idx] = gpa;
186 if (s->nr_frames <= idx) {
187 s->nr_frames = idx + 1;
190 return 0;
193 int xen_gnttab_set_version_op(struct gnttab_set_version *set)
195 int ret;
197 switch (set->version) {
198 case 1:
199 ret = 0;
200 break;
202 case 2:
203 /* Behave as before set_version was introduced. */
204 ret = -ENOSYS;
205 break;
207 default:
208 ret = -EINVAL;
211 set->version = 1;
212 return ret;
215 int xen_gnttab_get_version_op(struct gnttab_get_version *get)
217 if (get->dom != DOMID_SELF && get->dom != xen_domid) {
218 return -ESRCH;
221 get->version = 1;
222 return 0;
225 int xen_gnttab_query_size_op(struct gnttab_query_size *size)
227 XenGnttabState *s = xen_gnttab_singleton;
229 if (!s) {
230 return -ENOTSUP;
233 if (size->dom != DOMID_SELF && size->dom != xen_domid) {
234 size->status = GNTST_bad_domain;
235 return 0;
238 size->status = GNTST_okay;
239 size->nr_frames = s->nr_frames;
240 size->max_nr_frames = s->max_frames;
241 return 0;
244 /* Track per-open refs, to allow close() to clean up. */
245 struct active_ref {
246 MemoryRegionSection mrs;
247 void *virtaddr;
248 uint32_t refcnt;
249 int prot;
252 static void gnt_unref(XenGnttabState *s, grant_ref_t ref,
253 MemoryRegionSection *mrs, int prot)
255 if (mrs && mrs->mr) {
256 if (prot & PROT_WRITE) {
257 memory_region_set_dirty(mrs->mr, mrs->offset_within_region,
258 XEN_PAGE_SIZE);
260 memory_region_unref(mrs->mr);
261 mrs->mr = NULL;
263 assert(s->map_track[ref] != 0);
265 if (--s->map_track[ref] == 0) {
266 grant_entry_v1_t *gnt_p = &s->entries.v1[ref];
267 qatomic_and(&gnt_p->flags, (uint16_t)~(GTF_reading | GTF_writing));
271 static uint64_t gnt_ref(XenGnttabState *s, grant_ref_t ref, int prot)
273 uint16_t mask = GTF_type_mask | GTF_sub_page;
274 grant_entry_v1_t gnt, *gnt_p;
275 int retries = 0;
277 if (ref >= s->max_frames * ENTRIES_PER_FRAME_V1 ||
278 s->map_track[ref] == UINT8_MAX) {
279 return INVALID_GPA;
282 if (prot & PROT_WRITE) {
283 mask |= GTF_readonly;
286 gnt_p = &s->entries.v1[ref];
289 * The guest can legitimately be changing the GTF_readonly flag. Allow
290 * that, but don't let a malicious guest cause a livelock.
292 for (retries = 0; retries < 5; retries++) {
293 uint16_t new_flags;
295 /* Read the entry before an atomic operation on its flags */
296 gnt = *(volatile grant_entry_v1_t *)gnt_p;
298 if ((gnt.flags & mask) != GTF_permit_access ||
299 gnt.domid != DOMID_QEMU) {
300 return INVALID_GPA;
303 new_flags = gnt.flags | GTF_reading;
304 if (prot & PROT_WRITE) {
305 new_flags |= GTF_writing;
308 if (qatomic_cmpxchg(&gnt_p->flags, gnt.flags, new_flags) == gnt.flags) {
309 return (uint64_t)gnt.frame << XEN_PAGE_SHIFT;
313 return INVALID_GPA;
316 struct xengntdev_handle {
317 GHashTable *active_maps;
320 static int xen_be_gnttab_set_max_grants(struct xengntdev_handle *xgt,
321 uint32_t nr_grants)
323 return 0;
326 static void *xen_be_gnttab_map_refs(struct xengntdev_handle *xgt,
327 uint32_t count, uint32_t domid,
328 uint32_t *refs, int prot)
330 XenGnttabState *s = xen_gnttab_singleton;
331 struct active_ref *act;
333 if (!s) {
334 errno = ENOTSUP;
335 return NULL;
338 if (domid != xen_domid) {
339 errno = EINVAL;
340 return NULL;
343 if (!count || count > 4096) {
344 errno = EINVAL;
345 return NULL;
349 * Making a contiguous mapping from potentially discontiguous grant
350 * references would be... distinctly non-trivial. We don't support it.
351 * Even changing the API to return an array of pointers, one per page,
352 * wouldn't be simple to use in PV backends because some structures
353 * actually cross page boundaries (e.g. 32-bit blkif_response ring
354 * entries are 12 bytes).
356 if (count != 1) {
357 errno = EINVAL;
358 return NULL;
361 QEMU_LOCK_GUARD(&s->gnt_lock);
363 act = g_hash_table_lookup(xgt->active_maps, GINT_TO_POINTER(refs[0]));
364 if (act) {
365 if ((prot & PROT_WRITE) && !(act->prot & PROT_WRITE)) {
366 if (gnt_ref(s, refs[0], prot) == INVALID_GPA) {
367 return NULL;
369 act->prot |= PROT_WRITE;
371 act->refcnt++;
372 } else {
373 uint64_t gpa = gnt_ref(s, refs[0], prot);
374 if (gpa == INVALID_GPA) {
375 errno = EINVAL;
376 return NULL;
379 act = g_new0(struct active_ref, 1);
380 act->prot = prot;
381 act->refcnt = 1;
382 act->mrs = memory_region_find(get_system_memory(), gpa, XEN_PAGE_SIZE);
384 if (act->mrs.mr &&
385 !int128_lt(act->mrs.size, int128_make64(XEN_PAGE_SIZE)) &&
386 memory_region_get_ram_addr(act->mrs.mr) != RAM_ADDR_INVALID) {
387 act->virtaddr = qemu_map_ram_ptr(act->mrs.mr->ram_block,
388 act->mrs.offset_within_region);
390 if (!act->virtaddr) {
391 gnt_unref(s, refs[0], &act->mrs, 0);
392 g_free(act);
393 errno = EINVAL;
394 return NULL;
397 s->map_track[refs[0]]++;
398 g_hash_table_insert(xgt->active_maps, GINT_TO_POINTER(refs[0]), act);
401 return act->virtaddr;
404 static gboolean do_unmap(gpointer key, gpointer value, gpointer user_data)
406 XenGnttabState *s = user_data;
407 grant_ref_t gref = GPOINTER_TO_INT(key);
408 struct active_ref *act = value;
410 gnt_unref(s, gref, &act->mrs, act->prot);
411 g_free(act);
412 return true;
415 static int xen_be_gnttab_unmap(struct xengntdev_handle *xgt,
416 void *start_address, uint32_t *refs,
417 uint32_t count)
419 XenGnttabState *s = xen_gnttab_singleton;
420 struct active_ref *act;
422 if (!s) {
423 return -ENOTSUP;
426 if (count != 1) {
427 return -EINVAL;
430 QEMU_LOCK_GUARD(&s->gnt_lock);
432 act = g_hash_table_lookup(xgt->active_maps, GINT_TO_POINTER(refs[0]));
433 if (!act) {
434 return -ENOENT;
437 if (act->virtaddr != start_address) {
438 return -EINVAL;
441 if (!--act->refcnt) {
442 do_unmap(GINT_TO_POINTER(refs[0]), act, s);
443 g_hash_table_remove(xgt->active_maps, GINT_TO_POINTER(refs[0]));
446 return 0;
450 * This looks a bit like the one for true Xen in xen-operations.c but
451 * in emulation we don't support multi-page mappings. And under Xen we
452 * *want* the multi-page mappings so we have fewer bounces through the
453 * kernel and the hypervisor. So the code paths end up being similar,
454 * but different.
456 static int xen_be_gnttab_copy(struct xengntdev_handle *xgt, bool to_domain,
457 uint32_t domid, XenGrantCopySegment *segs,
458 uint32_t nr_segs, Error **errp)
460 int prot = to_domain ? PROT_WRITE : PROT_READ;
461 unsigned int i;
463 for (i = 0; i < nr_segs; i++) {
464 XenGrantCopySegment *seg = &segs[i];
465 void *page;
466 uint32_t ref = to_domain ? seg->dest.foreign.ref :
467 seg->source.foreign.ref;
469 page = xen_be_gnttab_map_refs(xgt, 1, domid, &ref, prot);
470 if (!page) {
471 if (errp) {
472 error_setg_errno(errp, errno,
473 "xen_be_gnttab_map_refs failed");
475 return -errno;
478 if (to_domain) {
479 memcpy(page + seg->dest.foreign.offset, seg->source.virt,
480 seg->len);
481 } else {
482 memcpy(seg->dest.virt, page + seg->source.foreign.offset,
483 seg->len);
486 if (xen_be_gnttab_unmap(xgt, page, &ref, 1)) {
487 if (errp) {
488 error_setg_errno(errp, errno, "xen_be_gnttab_unmap failed");
490 return -errno;
494 return 0;
497 static struct xengntdev_handle *xen_be_gnttab_open(void)
499 struct xengntdev_handle *xgt = g_new0(struct xengntdev_handle, 1);
501 xgt->active_maps = g_hash_table_new(g_direct_hash, g_direct_equal);
502 return xgt;
505 static int xen_be_gnttab_close(struct xengntdev_handle *xgt)
507 XenGnttabState *s = xen_gnttab_singleton;
509 if (!s) {
510 return -ENOTSUP;
513 g_hash_table_foreach_remove(xgt->active_maps, do_unmap, s);
514 g_hash_table_destroy(xgt->active_maps);
515 g_free(xgt);
516 return 0;
519 static struct gnttab_backend_ops emu_gnttab_backend_ops = {
520 .open = xen_be_gnttab_open,
521 .close = xen_be_gnttab_close,
522 .grant_copy = xen_be_gnttab_copy,
523 .set_max_grants = xen_be_gnttab_set_max_grants,
524 .map_refs = xen_be_gnttab_map_refs,
525 .unmap = xen_be_gnttab_unmap,
528 int xen_gnttab_reset(void)
530 XenGnttabState *s = xen_gnttab_singleton;
532 if (!s) {
533 return -ENOTSUP;
536 QEMU_LOCK_GUARD(&s->gnt_lock);
538 s->nr_frames = 0;
540 memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames);
541 s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access;
542 s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE);
544 if (xen_primary_console_get_pfn()) {
545 s->entries.v1[GNTTAB_RESERVED_CONSOLE].flags = GTF_permit_access;
546 s->entries.v1[GNTTAB_RESERVED_CONSOLE].frame = XEN_SPECIAL_PFN(CONSOLE);
549 return 0;