2 * QEMU Xen emulation: Event channel support
4 * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
6 * Authors: David Woodhouse <dwmw2@infradead.org>
8 * This work is licensed under the terms of the GNU GPL, version 2 or later.
9 * See the COPYING file in the top-level directory.
12 #include "qemu/osdep.h"
13 #include "qemu/host-utils.h"
14 #include "qemu/module.h"
15 #include "qemu/lockable.h"
16 #include "qemu/main-loop.h"
18 #include "qemu/error-report.h"
19 #include "monitor/monitor.h"
20 #include "monitor/hmp.h"
21 #include "qapi/error.h"
22 #include "qapi/qapi-commands-misc-target.h"
23 #include "qapi/qmp/qdict.h"
24 #include "qom/object.h"
25 #include "exec/target_page.h"
26 #include "exec/address-spaces.h"
27 #include "migration/vmstate.h"
30 #include "hw/sysbus.h"
31 #include "hw/xen/xen.h"
32 #include "hw/i386/x86.h"
33 #include "hw/i386/pc.h"
34 #include "hw/pci/pci.h"
35 #include "hw/pci/msi.h"
36 #include "hw/pci/msix.h"
38 #include "hw/xen/xen_backend_ops.h"
40 #include "xen_evtchn.h"
41 #include "xen_overlay.h"
42 #include "xen_xenstore.h"
44 #include "sysemu/kvm.h"
45 #include "sysemu/kvm_xen.h"
46 #include <linux/kvm.h>
47 #include <sys/eventfd.h>
49 #include "hw/xen/interface/memory.h"
50 #include "hw/xen/interface/hvm/params.h"
52 /* XX: For kvm_update_msi_routes_all() */
53 #include "target/i386/kvm/kvm_i386.h"
55 #define TYPE_XEN_EVTCHN "xen-evtchn"
56 OBJECT_DECLARE_SIMPLE_TYPE(XenEvtchnState
, XEN_EVTCHN
)
58 typedef struct XenEvtchnPort
{
59 uint32_t vcpu
; /* Xen/ACPI vcpu_id */
60 uint16_t type
; /* EVTCHNSTAT_xxxx */
62 uint16_t val
; /* raw value for serialization etc. */
67 uint16_t to_qemu
:1; /* Only two targets; qemu or loopback */
72 /* 32-bit compatibility definitions, also used natively in 32-bit build */
73 struct compat_arch_vcpu_info
{
78 struct compat_vcpu_info
{
79 uint8_t evtchn_upcall_pending
;
80 uint8_t evtchn_upcall_mask
;
82 uint32_t evtchn_pending_sel
;
83 struct compat_arch_vcpu_info arch
;
84 struct vcpu_time_info time
;
85 }; /* 64 bytes (x86) */
87 struct compat_arch_shared_info
{
89 unsigned int pfn_to_mfn_frame_list_list
;
90 unsigned int nmi_reason
;
92 unsigned int p2m_vaddr
;
93 unsigned int p2m_generation
;
97 struct compat_shared_info
{
98 struct compat_vcpu_info vcpu_info
[XEN_LEGACY_MAX_VCPUS
];
99 uint32_t evtchn_pending
[32];
100 uint32_t evtchn_mask
[32];
101 uint32_t wc_version
; /* Version counter: see vcpu_time_info_t. */
104 struct compat_arch_shared_info arch
;
107 #define COMPAT_EVTCHN_2L_NR_CHANNELS 1024
109 /* Local private implementation of struct xenevtchn_handle */
110 struct xenevtchn_handle
{
111 evtchn_port_t be_port
;
112 evtchn_port_t guest_port
; /* Or zero for unbound */
117 * These 'emuirq' values are used by Xen in the LM stream... and yes, I am
118 * insane enough to think about guest-transparent live migration from actual
119 * Xen to QEMU, and ensuring that we can convert/consume the stream.
121 #define IRQ_UNBOUND -1
123 #define IRQ_MSI_EMU -3
136 struct XenEvtchnState
{
141 uint64_t callback_param
;
142 bool evtchn_in_kernel
;
143 uint32_t callback_gsi
;
149 XenEvtchnPort port_table
[EVTCHN_2L_NR_CHANNELS
];
151 /* Connected to the system GSIs for raising callback as GSI / INTx */
152 unsigned int nr_callback_gsis
;
153 qemu_irq
*callback_gsis
;
155 struct xenevtchn_handle
*be_handles
[EVTCHN_2L_NR_CHANNELS
];
159 /* Bitmap of allocated PIRQs (serialized) */
160 uint16_t nr_pirq_inuse_words
;
161 uint64_t *pirq_inuse_bitmap
;
163 /* GSI → PIRQ mapping (serialized) */
164 uint16_t gsi_pirq
[IOAPIC_NUM_PINS
];
166 /* Per-GSI assertion state (serialized) */
167 uint32_t pirq_gsi_set
;
169 /* Per-PIRQ information (rebuilt on migration, protected by BQL) */
170 struct pirq_info
*pirq
;
173 #define pirq_inuse_word(s, pirq) (s->pirq_inuse_bitmap[((pirq) / 64)])
174 #define pirq_inuse_bit(pirq) (1ULL << ((pirq) & 63))
176 #define pirq_inuse(s, pirq) (pirq_inuse_word(s, pirq) & pirq_inuse_bit(pirq))
178 struct XenEvtchnState
*xen_evtchn_singleton
;
180 /* Top bits of callback_param are the type (HVM_PARAM_CALLBACK_TYPE_xxx) */
181 #define CALLBACK_VIA_TYPE_SHIFT 56
183 static void unbind_backend_ports(XenEvtchnState
*s
);
185 static int xen_evtchn_pre_load(void *opaque
)
187 XenEvtchnState
*s
= opaque
;
189 /* Unbind all the backend-side ports; they need to rebind */
190 unbind_backend_ports(s
);
192 /* It'll be leaked otherwise. */
193 g_free(s
->pirq_inuse_bitmap
);
194 s
->pirq_inuse_bitmap
= NULL
;
199 static int xen_evtchn_post_load(void *opaque
, int version_id
)
201 XenEvtchnState
*s
= opaque
;
204 if (s
->callback_param
) {
205 xen_evtchn_set_callback_param(s
->callback_param
);
208 /* Rebuild s->pirq[].port mapping */
209 for (i
= 0; i
< s
->nr_ports
; i
++) {
210 XenEvtchnPort
*p
= &s
->port_table
[i
];
212 if (p
->type
== EVTCHNSTAT_pirq
) {
214 assert(p
->u
.pirq
< s
->nr_pirqs
);
217 * Set the gsi to IRQ_UNBOUND; it may be changed to an actual
218 * GSI# below, or to IRQ_MSI_EMU when the MSI table snooping
219 * catches up with it.
221 s
->pirq
[p
->u
.pirq
].gsi
= IRQ_UNBOUND
;
222 s
->pirq
[p
->u
.pirq
].port
= i
;
225 /* Rebuild s->pirq[].gsi mapping */
226 for (i
= 0; i
< IOAPIC_NUM_PINS
; i
++) {
227 if (s
->gsi_pirq
[i
]) {
228 s
->pirq
[s
->gsi_pirq
[i
]].gsi
= i
;
234 static bool xen_evtchn_is_needed(void *opaque
)
236 return xen_mode
== XEN_EMULATE
;
239 static const VMStateDescription xen_evtchn_port_vmstate
= {
240 .name
= "xen_evtchn_port",
242 .minimum_version_id
= 1,
243 .fields
= (const VMStateField
[]) {
244 VMSTATE_UINT32(vcpu
, XenEvtchnPort
),
245 VMSTATE_UINT16(type
, XenEvtchnPort
),
246 VMSTATE_UINT16(u
.val
, XenEvtchnPort
),
247 VMSTATE_END_OF_LIST()
251 static const VMStateDescription xen_evtchn_vmstate
= {
252 .name
= "xen_evtchn",
254 .minimum_version_id
= 1,
255 .needed
= xen_evtchn_is_needed
,
256 .pre_load
= xen_evtchn_pre_load
,
257 .post_load
= xen_evtchn_post_load
,
258 .fields
= (const VMStateField
[]) {
259 VMSTATE_UINT64(callback_param
, XenEvtchnState
),
260 VMSTATE_UINT32(nr_ports
, XenEvtchnState
),
261 VMSTATE_STRUCT_VARRAY_UINT32(port_table
, XenEvtchnState
, nr_ports
, 1,
262 xen_evtchn_port_vmstate
, XenEvtchnPort
),
263 VMSTATE_UINT16_ARRAY(gsi_pirq
, XenEvtchnState
, IOAPIC_NUM_PINS
),
264 VMSTATE_VARRAY_UINT16_ALLOC(pirq_inuse_bitmap
, XenEvtchnState
,
265 nr_pirq_inuse_words
, 0,
266 vmstate_info_uint64
, uint64_t),
267 VMSTATE_UINT32(pirq_gsi_set
, XenEvtchnState
),
268 VMSTATE_END_OF_LIST()
272 static void xen_evtchn_class_init(ObjectClass
*klass
, void *data
)
274 DeviceClass
*dc
= DEVICE_CLASS(klass
);
276 dc
->vmsd
= &xen_evtchn_vmstate
;
279 static const TypeInfo xen_evtchn_info
= {
280 .name
= TYPE_XEN_EVTCHN
,
281 .parent
= TYPE_SYS_BUS_DEVICE
,
282 .instance_size
= sizeof(XenEvtchnState
),
283 .class_init
= xen_evtchn_class_init
,
286 static struct evtchn_backend_ops emu_evtchn_backend_ops
= {
287 .open
= xen_be_evtchn_open
,
288 .bind_interdomain
= xen_be_evtchn_bind_interdomain
,
289 .unbind
= xen_be_evtchn_unbind
,
290 .close
= xen_be_evtchn_close
,
291 .get_fd
= xen_be_evtchn_fd
,
292 .notify
= xen_be_evtchn_notify
,
293 .unmask
= xen_be_evtchn_unmask
,
294 .pending
= xen_be_evtchn_pending
,
297 static void gsi_assert_bh(void *opaque
)
299 struct vcpu_info
*vi
= kvm_xen_get_vcpu_info_hva(0);
301 xen_evtchn_set_callback_level(!!vi
->evtchn_upcall_pending
);
305 void xen_evtchn_create(unsigned int nr_gsis
, qemu_irq
*system_gsis
)
307 XenEvtchnState
*s
= XEN_EVTCHN(sysbus_create_simple(TYPE_XEN_EVTCHN
,
311 xen_evtchn_singleton
= s
;
313 qemu_mutex_init(&s
->port_lock
);
314 s
->gsi_bh
= aio_bh_new(qemu_get_aio_context(), gsi_assert_bh
, s
);
317 * These are the *output* GSI from event channel support, for
318 * signalling CPU0's events via GSI or PCI INTx instead of the
319 * per-CPU vector. We create a *set* of irqs and connect one to
320 * each of the system GSIs which were passed in from the platform
321 * code, and then just trigger the right one as appropriate from
322 * xen_evtchn_set_callback_level().
324 s
->nr_callback_gsis
= nr_gsis
;
325 s
->callback_gsis
= g_new0(qemu_irq
, nr_gsis
);
326 for (i
= 0; i
< nr_gsis
; i
++) {
327 sysbus_init_irq(SYS_BUS_DEVICE(s
), &s
->callback_gsis
[i
]);
328 sysbus_connect_irq(SYS_BUS_DEVICE(s
), i
, system_gsis
[i
]);
332 * The Xen scheme for encoding PIRQ# into an MSI message is not
333 * compatible with 32-bit MSI, as it puts the high bits of the
334 * PIRQ# into the high bits of the MSI message address, instead of
335 * using the Extended Destination ID in address bits 4-11 which
336 * perhaps would have been a better choice.
338 * To keep life simple, kvm_accel_instance_init() initialises the
339 * default to 256. which conveniently doesn't need to set anything
340 * outside the low 32 bits of the address. It can be increased by
341 * setting the xen-evtchn-max-pirq property.
343 s
->nr_pirqs
= kvm_xen_get_evtchn_max_pirq();
345 s
->nr_pirq_inuse_words
= DIV_ROUND_UP(s
->nr_pirqs
, 64);
346 s
->pirq_inuse_bitmap
= g_new0(uint64_t, s
->nr_pirq_inuse_words
);
347 s
->pirq
= g_new0(struct pirq_info
, s
->nr_pirqs
);
349 /* Set event channel functions for backend drivers to use */
350 xen_evtchn_ops
= &emu_evtchn_backend_ops
;
353 static void xen_evtchn_register_types(void)
355 type_register_static(&xen_evtchn_info
);
358 type_init(xen_evtchn_register_types
)
360 static int set_callback_pci_intx(XenEvtchnState
*s
, uint64_t param
)
362 PCMachineState
*pcms
= PC_MACHINE(qdev_get_machine());
363 uint8_t pin
= param
& 3;
364 uint8_t devfn
= (param
>> 8) & 0xff;
365 uint16_t bus
= (param
>> 16) & 0xffff;
366 uint16_t domain
= (param
>> 32) & 0xffff;
370 if (domain
|| !pcms
) {
374 pdev
= pci_find_device(pcms
->pcibus
, bus
, devfn
);
379 r
= pci_device_route_intx_to_irq(pdev
, pin
);
380 if (r
.mode
!= PCI_INTX_ENABLED
) {
385 * Hm, can we be notified of INTX routing changes? Not without
386 * *owning* the device and being allowed to overwrite its own
387 * ->intx_routing_notifier, AFAICT. So let's not.
392 void xen_evtchn_set_callback_level(int level
)
394 XenEvtchnState
*s
= xen_evtchn_singleton
;
400 * We get to this function in a number of ways:
402 * • From I/O context, via PV backend drivers sending a notification to
405 * • From guest vCPU context, via loopback interdomain event channels
406 * (or theoretically even IPIs but guests don't use those with GSI
407 * delivery because that's pointless. We don't want a malicious guest
408 * to be able to trigger a deadlock though, so we can't rule it out.)
410 * • From guest vCPU context when the HVM_PARAM_CALLBACK_IRQ is being
413 * • From guest vCPU context in the KVM exit handler, if the upcall
414 * pending flag has been cleared and the GSI needs to be deasserted.
416 * • Maybe in future, in an interrupt ack/eoi notifier when the GSI has
417 * been acked in the irqchip.
419 * Whichever context we come from if we aren't already holding the BQL
420 * then e can't take it now, as we may already hold s->port_lock. So
421 * trigger the BH to set the IRQ for us instead of doing it immediately.
423 * In the HVM_PARAM_CALLBACK_IRQ and KVM exit handler cases, the caller
424 * will deliberately take the BQL because they want the change to take
425 * effect immediately. That just leaves interdomain loopback as the case
429 qemu_bh_schedule(s
->gsi_bh
);
433 if (s
->callback_gsi
&& s
->callback_gsi
< s
->nr_callback_gsis
) {
434 qemu_set_irq(s
->callback_gsis
[s
->callback_gsi
], level
);
436 /* Ensure the vCPU polls for deassertion */
437 kvm_xen_set_callback_asserted();
442 int xen_evtchn_set_callback_param(uint64_t param
)
444 XenEvtchnState
*s
= xen_evtchn_singleton
;
445 struct kvm_xen_hvm_attr xa
= {
446 .type
= KVM_XEN_ATTR_TYPE_UPCALL_VECTOR
,
449 bool in_kernel
= false;
451 int type
= param
>> CALLBACK_VIA_TYPE_SHIFT
;
459 * We need the BQL because set_callback_pci_intx() may call into PCI code,
460 * and because we may need to manipulate the old and new GSI levels.
462 assert(bql_locked());
463 qemu_mutex_lock(&s
->port_lock
);
466 case HVM_PARAM_CALLBACK_TYPE_VECTOR
: {
467 xa
.u
.vector
= (uint8_t)param
,
469 ret
= kvm_vm_ioctl(kvm_state
, KVM_XEN_HVM_SET_ATTR
, &xa
);
470 if (!ret
&& kvm_xen_has_cap(EVTCHN_SEND
)) {
477 case HVM_PARAM_CALLBACK_TYPE_PCI_INTX
:
478 gsi
= set_callback_pci_intx(s
, param
);
479 ret
= gsi
? 0 : -EINVAL
;
482 case HVM_PARAM_CALLBACK_TYPE_GSI
:
483 gsi
= (uint32_t)param
;
488 /* Xen doesn't return error even if you set something bogus */
493 /* If the guest has set a per-vCPU callback vector, prefer that. */
494 if (gsi
&& kvm_xen_has_vcpu_callback_vector()) {
495 in_kernel
= kvm_xen_has_cap(EVTCHN_SEND
);
500 /* If vector delivery was turned *off* then tell the kernel */
501 if ((s
->callback_param
>> CALLBACK_VIA_TYPE_SHIFT
) ==
502 HVM_PARAM_CALLBACK_TYPE_VECTOR
&& !xa
.u
.vector
) {
503 kvm_vm_ioctl(kvm_state
, KVM_XEN_HVM_SET_ATTR
, &xa
);
505 s
->callback_param
= param
;
506 s
->evtchn_in_kernel
= in_kernel
;
508 if (gsi
!= s
->callback_gsi
) {
509 struct vcpu_info
*vi
= kvm_xen_get_vcpu_info_hva(0);
511 xen_evtchn_set_callback_level(0);
512 s
->callback_gsi
= gsi
;
514 if (gsi
&& vi
&& vi
->evtchn_upcall_pending
) {
515 kvm_xen_inject_vcpu_callback_vector(0, type
);
520 qemu_mutex_unlock(&s
->port_lock
);
525 static void inject_callback(XenEvtchnState
*s
, uint32_t vcpu
)
527 int type
= s
->callback_param
>> CALLBACK_VIA_TYPE_SHIFT
;
529 kvm_xen_inject_vcpu_callback_vector(vcpu
, type
);
532 static void deassign_kernel_port(evtchn_port_t port
)
534 struct kvm_xen_hvm_attr ha
;
537 ha
.type
= KVM_XEN_ATTR_TYPE_EVTCHN
;
538 ha
.u
.evtchn
.send_port
= port
;
539 ha
.u
.evtchn
.flags
= KVM_XEN_EVTCHN_DEASSIGN
;
541 ret
= kvm_vm_ioctl(kvm_state
, KVM_XEN_HVM_SET_ATTR
, &ha
);
543 qemu_log_mask(LOG_GUEST_ERROR
, "Failed to unbind kernel port %d: %s\n",
544 port
, strerror(ret
));
548 static int assign_kernel_port(uint16_t type
, evtchn_port_t port
,
551 CPUState
*cpu
= qemu_get_cpu(vcpu_id
);
552 struct kvm_xen_hvm_attr ha
;
558 ha
.type
= KVM_XEN_ATTR_TYPE_EVTCHN
;
559 ha
.u
.evtchn
.send_port
= port
;
560 ha
.u
.evtchn
.type
= type
;
561 ha
.u
.evtchn
.flags
= 0;
562 ha
.u
.evtchn
.deliver
.port
.port
= port
;
563 ha
.u
.evtchn
.deliver
.port
.vcpu
= kvm_arch_vcpu_id(cpu
);
564 ha
.u
.evtchn
.deliver
.port
.priority
= KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL
;
566 return kvm_vm_ioctl(kvm_state
, KVM_XEN_HVM_SET_ATTR
, &ha
);
569 static int assign_kernel_eventfd(uint16_t type
, evtchn_port_t port
, int fd
)
571 struct kvm_xen_hvm_attr ha
;
573 ha
.type
= KVM_XEN_ATTR_TYPE_EVTCHN
;
574 ha
.u
.evtchn
.send_port
= port
;
575 ha
.u
.evtchn
.type
= type
;
576 ha
.u
.evtchn
.flags
= 0;
577 ha
.u
.evtchn
.deliver
.eventfd
.port
= 0;
578 ha
.u
.evtchn
.deliver
.eventfd
.fd
= fd
;
580 return kvm_vm_ioctl(kvm_state
, KVM_XEN_HVM_SET_ATTR
, &ha
);
583 static bool valid_port(evtchn_port_t port
)
589 if (xen_is_long_mode()) {
590 return port
< EVTCHN_2L_NR_CHANNELS
;
592 return port
< COMPAT_EVTCHN_2L_NR_CHANNELS
;
596 static bool valid_vcpu(uint32_t vcpu
)
598 return !!qemu_get_cpu(vcpu
);
601 static void unbind_backend_ports(XenEvtchnState
*s
)
606 for (i
= 1; i
< s
->nr_ports
; i
++) {
607 p
= &s
->port_table
[i
];
608 if (p
->type
== EVTCHNSTAT_interdomain
&& p
->u
.interdomain
.to_qemu
) {
609 evtchn_port_t be_port
= p
->u
.interdomain
.port
;
611 if (s
->be_handles
[be_port
]) {
612 /* This part will be overwritten on the load anyway. */
613 p
->type
= EVTCHNSTAT_unbound
;
614 p
->u
.interdomain
.port
= 0;
616 /* Leave the backend port open and unbound too. */
617 if (kvm_xen_has_cap(EVTCHN_SEND
)) {
618 deassign_kernel_port(i
);
620 s
->be_handles
[be_port
]->guest_port
= 0;
626 int xen_evtchn_status_op(struct evtchn_status
*status
)
628 XenEvtchnState
*s
= xen_evtchn_singleton
;
635 if (status
->dom
!= DOMID_SELF
&& status
->dom
!= xen_domid
) {
639 if (!valid_port(status
->port
)) {
643 qemu_mutex_lock(&s
->port_lock
);
645 p
= &s
->port_table
[status
->port
];
647 status
->status
= p
->type
;
648 status
->vcpu
= p
->vcpu
;
651 case EVTCHNSTAT_unbound
:
652 status
->u
.unbound
.dom
= p
->u
.interdomain
.to_qemu
? DOMID_QEMU
656 case EVTCHNSTAT_interdomain
:
657 status
->u
.interdomain
.dom
= p
->u
.interdomain
.to_qemu
? DOMID_QEMU
659 status
->u
.interdomain
.port
= p
->u
.interdomain
.port
;
662 case EVTCHNSTAT_pirq
:
663 status
->u
.pirq
= p
->u
.pirq
;
666 case EVTCHNSTAT_virq
:
667 status
->u
.virq
= p
->u
.virq
;
671 qemu_mutex_unlock(&s
->port_lock
);
676 * Never thought I'd hear myself say this, but C++ templates would be
679 * template<class T> static int do_unmask_port(T *shinfo, ...);
681 static int do_unmask_port_lm(XenEvtchnState
*s
, evtchn_port_t port
,
682 bool do_unmask
, struct shared_info
*shinfo
,
683 struct vcpu_info
*vcpu_info
)
685 const int bits_per_word
= BITS_PER_BYTE
* sizeof(shinfo
->evtchn_pending
[0]);
686 typeof(shinfo
->evtchn_pending
[0]) mask
;
687 int idx
= port
/ bits_per_word
;
688 int offset
= port
% bits_per_word
;
690 mask
= 1UL << offset
;
692 if (idx
>= bits_per_word
) {
698 * If this is a true unmask operation, clear the mask bit. If
699 * it was already unmasked, we have nothing further to do.
701 if (!((qatomic_fetch_and(&shinfo
->evtchn_mask
[idx
], ~mask
) & mask
))) {
706 * This is a pseudo-unmask for affinity changes. We don't
707 * change the mask bit, and if it's *masked* we have nothing
710 if (qatomic_fetch_or(&shinfo
->evtchn_mask
[idx
], 0) & mask
) {
715 /* If the event was not pending, we're done. */
716 if (!(qatomic_fetch_or(&shinfo
->evtchn_pending
[idx
], 0) & mask
)) {
720 /* Now on to the vcpu_info evtchn_pending_sel index... */
723 /* If a port in this word was already pending for this vCPU, all done. */
724 if (qatomic_fetch_or(&vcpu_info
->evtchn_pending_sel
, mask
) & mask
) {
728 /* Set evtchn_upcall_pending for this vCPU */
729 if (qatomic_fetch_or(&vcpu_info
->evtchn_upcall_pending
, 1)) {
733 inject_callback(s
, s
->port_table
[port
].vcpu
);
738 static int do_unmask_port_compat(XenEvtchnState
*s
, evtchn_port_t port
,
740 struct compat_shared_info
*shinfo
,
741 struct compat_vcpu_info
*vcpu_info
)
743 const int bits_per_word
= BITS_PER_BYTE
* sizeof(shinfo
->evtchn_pending
[0]);
744 typeof(shinfo
->evtchn_pending
[0]) mask
;
745 int idx
= port
/ bits_per_word
;
746 int offset
= port
% bits_per_word
;
748 mask
= 1UL << offset
;
750 if (idx
>= bits_per_word
) {
756 * If this is a true unmask operation, clear the mask bit. If
757 * it was already unmasked, we have nothing further to do.
759 if (!((qatomic_fetch_and(&shinfo
->evtchn_mask
[idx
], ~mask
) & mask
))) {
764 * This is a pseudo-unmask for affinity changes. We don't
765 * change the mask bit, and if it's *masked* we have nothing
768 if (qatomic_fetch_or(&shinfo
->evtchn_mask
[idx
], 0) & mask
) {
773 /* If the event was not pending, we're done. */
774 if (!(qatomic_fetch_or(&shinfo
->evtchn_pending
[idx
], 0) & mask
)) {
778 /* Now on to the vcpu_info evtchn_pending_sel index... */
781 /* If a port in this word was already pending for this vCPU, all done. */
782 if (qatomic_fetch_or(&vcpu_info
->evtchn_pending_sel
, mask
) & mask
) {
786 /* Set evtchn_upcall_pending for this vCPU */
787 if (qatomic_fetch_or(&vcpu_info
->evtchn_upcall_pending
, 1)) {
791 inject_callback(s
, s
->port_table
[port
].vcpu
);
796 static int unmask_port(XenEvtchnState
*s
, evtchn_port_t port
, bool do_unmask
)
798 void *vcpu_info
, *shinfo
;
800 if (s
->port_table
[port
].type
== EVTCHNSTAT_closed
) {
804 shinfo
= xen_overlay_get_shinfo_ptr();
809 vcpu_info
= kvm_xen_get_vcpu_info_hva(s
->port_table
[port
].vcpu
);
814 if (xen_is_long_mode()) {
815 return do_unmask_port_lm(s
, port
, do_unmask
, shinfo
, vcpu_info
);
817 return do_unmask_port_compat(s
, port
, do_unmask
, shinfo
, vcpu_info
);
821 static int do_set_port_lm(XenEvtchnState
*s
, evtchn_port_t port
,
822 struct shared_info
*shinfo
,
823 struct vcpu_info
*vcpu_info
)
825 const int bits_per_word
= BITS_PER_BYTE
* sizeof(shinfo
->evtchn_pending
[0]);
826 typeof(shinfo
->evtchn_pending
[0]) mask
;
827 int idx
= port
/ bits_per_word
;
828 int offset
= port
% bits_per_word
;
830 mask
= 1UL << offset
;
832 if (idx
>= bits_per_word
) {
836 /* Update the pending bit itself. If it was already set, we're done. */
837 if (qatomic_fetch_or(&shinfo
->evtchn_pending
[idx
], mask
) & mask
) {
841 /* Check if it's masked. */
842 if (qatomic_fetch_or(&shinfo
->evtchn_mask
[idx
], 0) & mask
) {
846 /* Now on to the vcpu_info evtchn_pending_sel index... */
849 /* If a port in this word was already pending for this vCPU, all done. */
850 if (qatomic_fetch_or(&vcpu_info
->evtchn_pending_sel
, mask
) & mask
) {
854 /* Set evtchn_upcall_pending for this vCPU */
855 if (qatomic_fetch_or(&vcpu_info
->evtchn_upcall_pending
, 1)) {
859 inject_callback(s
, s
->port_table
[port
].vcpu
);
864 static int do_set_port_compat(XenEvtchnState
*s
, evtchn_port_t port
,
865 struct compat_shared_info
*shinfo
,
866 struct compat_vcpu_info
*vcpu_info
)
868 const int bits_per_word
= BITS_PER_BYTE
* sizeof(shinfo
->evtchn_pending
[0]);
869 typeof(shinfo
->evtchn_pending
[0]) mask
;
870 int idx
= port
/ bits_per_word
;
871 int offset
= port
% bits_per_word
;
873 mask
= 1UL << offset
;
875 if (idx
>= bits_per_word
) {
879 /* Update the pending bit itself. If it was already set, we're done. */
880 if (qatomic_fetch_or(&shinfo
->evtchn_pending
[idx
], mask
) & mask
) {
884 /* Check if it's masked. */
885 if (qatomic_fetch_or(&shinfo
->evtchn_mask
[idx
], 0) & mask
) {
889 /* Now on to the vcpu_info evtchn_pending_sel index... */
892 /* If a port in this word was already pending for this vCPU, all done. */
893 if (qatomic_fetch_or(&vcpu_info
->evtchn_pending_sel
, mask
) & mask
) {
897 /* Set evtchn_upcall_pending for this vCPU */
898 if (qatomic_fetch_or(&vcpu_info
->evtchn_upcall_pending
, 1)) {
902 inject_callback(s
, s
->port_table
[port
].vcpu
);
907 static int set_port_pending(XenEvtchnState
*s
, evtchn_port_t port
)
909 void *vcpu_info
, *shinfo
;
911 if (s
->port_table
[port
].type
== EVTCHNSTAT_closed
) {
915 if (s
->evtchn_in_kernel
) {
916 XenEvtchnPort
*p
= &s
->port_table
[port
];
917 CPUState
*cpu
= qemu_get_cpu(p
->vcpu
);
918 struct kvm_irq_routing_xen_evtchn evt
;
925 evt
.vcpu
= kvm_arch_vcpu_id(cpu
);
926 evt
.priority
= KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL
;
928 return kvm_vm_ioctl(kvm_state
, KVM_XEN_HVM_EVTCHN_SEND
, &evt
);
931 shinfo
= xen_overlay_get_shinfo_ptr();
936 vcpu_info
= kvm_xen_get_vcpu_info_hva(s
->port_table
[port
].vcpu
);
941 if (xen_is_long_mode()) {
942 return do_set_port_lm(s
, port
, shinfo
, vcpu_info
);
944 return do_set_port_compat(s
, port
, shinfo
, vcpu_info
);
948 static int clear_port_pending(XenEvtchnState
*s
, evtchn_port_t port
)
950 void *p
= xen_overlay_get_shinfo_ptr();
956 if (xen_is_long_mode()) {
957 struct shared_info
*shinfo
= p
;
958 const int bits_per_word
= BITS_PER_BYTE
* sizeof(shinfo
->evtchn_pending
[0]);
959 typeof(shinfo
->evtchn_pending
[0]) mask
;
960 int idx
= port
/ bits_per_word
;
961 int offset
= port
% bits_per_word
;
963 mask
= 1UL << offset
;
965 qatomic_fetch_and(&shinfo
->evtchn_pending
[idx
], ~mask
);
967 struct compat_shared_info
*shinfo
= p
;
968 const int bits_per_word
= BITS_PER_BYTE
* sizeof(shinfo
->evtchn_pending
[0]);
969 typeof(shinfo
->evtchn_pending
[0]) mask
;
970 int idx
= port
/ bits_per_word
;
971 int offset
= port
% bits_per_word
;
973 mask
= 1UL << offset
;
975 qatomic_fetch_and(&shinfo
->evtchn_pending
[idx
], ~mask
);
980 static void free_port(XenEvtchnState
*s
, evtchn_port_t port
)
982 s
->port_table
[port
].type
= EVTCHNSTAT_closed
;
983 s
->port_table
[port
].u
.val
= 0;
984 s
->port_table
[port
].vcpu
= 0;
986 if (s
->nr_ports
== port
+ 1) {
989 } while (s
->nr_ports
&&
990 s
->port_table
[s
->nr_ports
- 1].type
== EVTCHNSTAT_closed
);
993 /* Clear pending event to avoid unexpected behavior on re-bind. */
994 clear_port_pending(s
, port
);
997 static int allocate_port(XenEvtchnState
*s
, uint32_t vcpu
, uint16_t type
,
998 uint16_t val
, evtchn_port_t
*port
)
1000 evtchn_port_t p
= 1;
1002 for (p
= 1; valid_port(p
); p
++) {
1003 if (s
->port_table
[p
].type
== EVTCHNSTAT_closed
) {
1004 s
->port_table
[p
].vcpu
= vcpu
;
1005 s
->port_table
[p
].type
= type
;
1006 s
->port_table
[p
].u
.val
= val
;
1010 if (s
->nr_ports
< p
+ 1) {
1011 s
->nr_ports
= p
+ 1;
1020 static bool virq_is_global(uint32_t virq
)
1034 static int close_port(XenEvtchnState
*s
, evtchn_port_t port
,
1035 bool *flush_kvm_routes
)
1037 XenEvtchnPort
*p
= &s
->port_table
[port
];
1039 /* Because it *might* be a PIRQ port */
1040 assert(bql_locked());
1043 case EVTCHNSTAT_closed
:
1046 case EVTCHNSTAT_pirq
:
1047 s
->pirq
[p
->u
.pirq
].port
= 0;
1048 if (s
->pirq
[p
->u
.pirq
].is_translated
) {
1049 *flush_kvm_routes
= true;
1053 case EVTCHNSTAT_virq
:
1054 kvm_xen_set_vcpu_virq(virq_is_global(p
->u
.virq
) ? 0 : p
->vcpu
,
1058 case EVTCHNSTAT_ipi
:
1059 if (s
->evtchn_in_kernel
) {
1060 deassign_kernel_port(port
);
1064 case EVTCHNSTAT_interdomain
:
1065 if (p
->u
.interdomain
.to_qemu
) {
1066 uint16_t be_port
= p
->u
.interdomain
.port
;
1067 struct xenevtchn_handle
*xc
= s
->be_handles
[be_port
];
1069 if (kvm_xen_has_cap(EVTCHN_SEND
)) {
1070 deassign_kernel_port(port
);
1075 /* Loopback interdomain */
1076 XenEvtchnPort
*rp
= &s
->port_table
[p
->u
.interdomain
.port
];
1077 if (!valid_port(p
->u
.interdomain
.port
) ||
1078 rp
->u
.interdomain
.port
!= port
||
1079 rp
->type
!= EVTCHNSTAT_interdomain
) {
1080 error_report("Inconsistent state for interdomain unbind");
1082 /* Set the other end back to unbound */
1083 rp
->type
= EVTCHNSTAT_unbound
;
1084 rp
->u
.interdomain
.port
= 0;
1097 int xen_evtchn_soft_reset(void)
1099 XenEvtchnState
*s
= xen_evtchn_singleton
;
1100 bool flush_kvm_routes
= false;
1107 assert(bql_locked());
1109 qemu_mutex_lock(&s
->port_lock
);
1111 for (i
= 0; i
< s
->nr_ports
; i
++) {
1112 close_port(s
, i
, &flush_kvm_routes
);
1115 qemu_mutex_unlock(&s
->port_lock
);
1117 if (flush_kvm_routes
) {
1118 kvm_update_msi_routes_all(NULL
, true, 0, 0);
1124 int xen_evtchn_reset_op(struct evtchn_reset
*reset
)
1126 if (reset
->dom
!= DOMID_SELF
&& reset
->dom
!= xen_domid
) {
1131 return xen_evtchn_soft_reset();
1134 int xen_evtchn_close_op(struct evtchn_close
*close
)
1136 XenEvtchnState
*s
= xen_evtchn_singleton
;
1137 bool flush_kvm_routes
= false;
1144 if (!valid_port(close
->port
)) {
1149 qemu_mutex_lock(&s
->port_lock
);
1151 ret
= close_port(s
, close
->port
, &flush_kvm_routes
);
1153 qemu_mutex_unlock(&s
->port_lock
);
1155 if (flush_kvm_routes
) {
1156 kvm_update_msi_routes_all(NULL
, true, 0, 0);
1162 int xen_evtchn_unmask_op(struct evtchn_unmask
*unmask
)
1164 XenEvtchnState
*s
= xen_evtchn_singleton
;
1171 if (!valid_port(unmask
->port
)) {
1175 qemu_mutex_lock(&s
->port_lock
);
1177 ret
= unmask_port(s
, unmask
->port
, true);
1179 qemu_mutex_unlock(&s
->port_lock
);
1184 int xen_evtchn_bind_vcpu_op(struct evtchn_bind_vcpu
*vcpu
)
1186 XenEvtchnState
*s
= xen_evtchn_singleton
;
1194 if (!valid_port(vcpu
->port
)) {
1198 if (!valid_vcpu(vcpu
->vcpu
)) {
1202 qemu_mutex_lock(&s
->port_lock
);
1204 p
= &s
->port_table
[vcpu
->port
];
1206 if (p
->type
== EVTCHNSTAT_interdomain
||
1207 p
->type
== EVTCHNSTAT_unbound
||
1208 p
->type
== EVTCHNSTAT_pirq
||
1209 (p
->type
== EVTCHNSTAT_virq
&& virq_is_global(p
->u
.virq
))) {
1211 * unmask_port() with do_unmask==false will just raise the event
1212 * on the new vCPU if the port was already pending.
1214 p
->vcpu
= vcpu
->vcpu
;
1215 unmask_port(s
, vcpu
->port
, false);
1219 qemu_mutex_unlock(&s
->port_lock
);
1224 int xen_evtchn_bind_virq_op(struct evtchn_bind_virq
*virq
)
1226 XenEvtchnState
*s
= xen_evtchn_singleton
;
1233 if (virq
->virq
>= NR_VIRQS
) {
1237 /* Global VIRQ must be allocated on vCPU0 first */
1238 if (virq_is_global(virq
->virq
) && virq
->vcpu
!= 0) {
1242 if (!valid_vcpu(virq
->vcpu
)) {
1246 qemu_mutex_lock(&s
->port_lock
);
1248 ret
= allocate_port(s
, virq
->vcpu
, EVTCHNSTAT_virq
, virq
->virq
,
1251 ret
= kvm_xen_set_vcpu_virq(virq
->vcpu
, virq
->virq
, virq
->port
);
1253 free_port(s
, virq
->port
);
1257 qemu_mutex_unlock(&s
->port_lock
);
1262 int xen_evtchn_bind_pirq_op(struct evtchn_bind_pirq
*pirq
)
1264 XenEvtchnState
*s
= xen_evtchn_singleton
;
1271 if (pirq
->pirq
>= s
->nr_pirqs
) {
1277 if (s
->pirq
[pirq
->pirq
].port
) {
1281 qemu_mutex_lock(&s
->port_lock
);
1283 ret
= allocate_port(s
, 0, EVTCHNSTAT_pirq
, pirq
->pirq
,
1286 qemu_mutex_unlock(&s
->port_lock
);
1290 s
->pirq
[pirq
->pirq
].port
= pirq
->port
;
1291 trace_kvm_xen_bind_pirq(pirq
->pirq
, pirq
->port
);
1293 qemu_mutex_unlock(&s
->port_lock
);
1296 * Need to do the unmask outside port_lock because it may call
1297 * back into the MSI translate function.
1299 if (s
->pirq
[pirq
->pirq
].gsi
== IRQ_MSI_EMU
) {
1300 if (s
->pirq
[pirq
->pirq
].is_masked
) {
1301 PCIDevice
*dev
= s
->pirq
[pirq
->pirq
].dev
;
1302 int vector
= s
->pirq
[pirq
->pirq
].vector
;
1303 char *dev_path
= qdev_get_dev_path(DEVICE(dev
));
1305 trace_kvm_xen_unmask_pirq(pirq
->pirq
, dev_path
, vector
);
1308 if (s
->pirq
[pirq
->pirq
].is_msix
) {
1309 msix_set_mask(dev
, vector
, false);
1311 msi_set_mask(dev
, vector
, false, NULL
);
1313 } else if (s
->pirq
[pirq
->pirq
].is_translated
) {
1315 * If KVM had attempted to translate this one before, make it try
1316 * again. If we unmasked, then the notifier on the MSI(-X) vector
1317 * will already have had the same effect.
1319 kvm_update_msi_routes_all(NULL
, true, 0, 0);
1326 int xen_evtchn_bind_ipi_op(struct evtchn_bind_ipi
*ipi
)
1328 XenEvtchnState
*s
= xen_evtchn_singleton
;
1335 if (!valid_vcpu(ipi
->vcpu
)) {
1339 qemu_mutex_lock(&s
->port_lock
);
1341 ret
= allocate_port(s
, ipi
->vcpu
, EVTCHNSTAT_ipi
, 0, &ipi
->port
);
1342 if (!ret
&& s
->evtchn_in_kernel
) {
1343 assign_kernel_port(EVTCHNSTAT_ipi
, ipi
->port
, ipi
->vcpu
);
1346 qemu_mutex_unlock(&s
->port_lock
);
1351 int xen_evtchn_bind_interdomain_op(struct evtchn_bind_interdomain
*interdomain
)
1353 XenEvtchnState
*s
= xen_evtchn_singleton
;
1360 if (interdomain
->remote_dom
!= DOMID_QEMU
&&
1361 interdomain
->remote_dom
!= DOMID_SELF
&&
1362 interdomain
->remote_dom
!= xen_domid
) {
1366 if (!valid_port(interdomain
->remote_port
)) {
1370 qemu_mutex_lock(&s
->port_lock
);
1372 /* The newly allocated port starts out as unbound */
1373 ret
= allocate_port(s
, 0, EVTCHNSTAT_unbound
, 0, &interdomain
->local_port
);
1379 if (interdomain
->remote_dom
== DOMID_QEMU
) {
1380 struct xenevtchn_handle
*xc
= s
->be_handles
[interdomain
->remote_port
];
1381 XenEvtchnPort
*lp
= &s
->port_table
[interdomain
->local_port
];
1388 if (xc
->guest_port
) {
1393 assert(xc
->be_port
== interdomain
->remote_port
);
1394 xc
->guest_port
= interdomain
->local_port
;
1395 if (kvm_xen_has_cap(EVTCHN_SEND
)) {
1396 assign_kernel_eventfd(lp
->type
, xc
->guest_port
, xc
->fd
);
1398 lp
->type
= EVTCHNSTAT_interdomain
;
1399 lp
->u
.interdomain
.to_qemu
= 1;
1400 lp
->u
.interdomain
.port
= interdomain
->remote_port
;
1404 XenEvtchnPort
*rp
= &s
->port_table
[interdomain
->remote_port
];
1405 XenEvtchnPort
*lp
= &s
->port_table
[interdomain
->local_port
];
1408 * The 'remote' port for loopback must be an unbound port allocated
1409 * for communication with the local domain, and must *not* be the
1410 * port that was just allocated for the local end.
1412 if (interdomain
->local_port
!= interdomain
->remote_port
&&
1413 rp
->type
== EVTCHNSTAT_unbound
&& !rp
->u
.interdomain
.to_qemu
) {
1415 rp
->type
= EVTCHNSTAT_interdomain
;
1416 rp
->u
.interdomain
.port
= interdomain
->local_port
;
1418 lp
->type
= EVTCHNSTAT_interdomain
;
1419 lp
->u
.interdomain
.port
= interdomain
->remote_port
;
1427 free_port(s
, interdomain
->local_port
);
1430 qemu_mutex_unlock(&s
->port_lock
);
1435 int xen_evtchn_alloc_unbound_op(struct evtchn_alloc_unbound
*alloc
)
1437 XenEvtchnState
*s
= xen_evtchn_singleton
;
1444 if (alloc
->dom
!= DOMID_SELF
&& alloc
->dom
!= xen_domid
) {
1448 if (alloc
->remote_dom
!= DOMID_QEMU
&&
1449 alloc
->remote_dom
!= DOMID_SELF
&&
1450 alloc
->remote_dom
!= xen_domid
) {
1454 qemu_mutex_lock(&s
->port_lock
);
1456 ret
= allocate_port(s
, 0, EVTCHNSTAT_unbound
, 0, &alloc
->port
);
1458 if (!ret
&& alloc
->remote_dom
== DOMID_QEMU
) {
1459 XenEvtchnPort
*p
= &s
->port_table
[alloc
->port
];
1460 p
->u
.interdomain
.to_qemu
= 1;
1463 qemu_mutex_unlock(&s
->port_lock
);
1468 int xen_evtchn_send_op(struct evtchn_send
*send
)
1470 XenEvtchnState
*s
= xen_evtchn_singleton
;
1478 if (!valid_port(send
->port
)) {
1482 qemu_mutex_lock(&s
->port_lock
);
1484 p
= &s
->port_table
[send
->port
];
1487 case EVTCHNSTAT_interdomain
:
1488 if (p
->u
.interdomain
.to_qemu
) {
1490 * This is an event from the guest to qemu itself, which is
1491 * serving as the driver domain.
1493 uint16_t be_port
= p
->u
.interdomain
.port
;
1494 struct xenevtchn_handle
*xc
= s
->be_handles
[be_port
];
1496 eventfd_write(xc
->fd
, 1);
1502 /* Loopback interdomain ports; just a complex IPI */
1503 set_port_pending(s
, p
->u
.interdomain
.port
);
1507 case EVTCHNSTAT_ipi
:
1508 set_port_pending(s
, send
->port
);
1511 case EVTCHNSTAT_unbound
:
1512 /* Xen will silently drop these */
1520 qemu_mutex_unlock(&s
->port_lock
);
1525 int xen_evtchn_set_port(uint16_t port
)
1527 XenEvtchnState
*s
= xen_evtchn_singleton
;
1535 if (!valid_port(port
)) {
1539 qemu_mutex_lock(&s
->port_lock
);
1541 p
= &s
->port_table
[port
];
1543 /* QEMU has no business sending to anything but these */
1544 if (p
->type
== EVTCHNSTAT_virq
||
1545 (p
->type
== EVTCHNSTAT_interdomain
&& p
->u
.interdomain
.to_qemu
)) {
1546 set_port_pending(s
, port
);
1550 qemu_mutex_unlock(&s
->port_lock
);
1555 static int allocate_pirq(XenEvtchnState
*s
, int type
, int gsi
)
1560 * Preserve the allocation strategy that Xen has. It looks like
1561 * we *never* give out PIRQ 0-15, we give out 16-nr_irqs_gsi only
1562 * to GSIs (counting up from 16), and then we count backwards from
1563 * the top for MSIs or when the GSI space is exhausted.
1565 if (type
== MAP_PIRQ_TYPE_GSI
) {
1566 for (pirq
= 16 ; pirq
< IOAPIC_NUM_PINS
; pirq
++) {
1567 if (pirq_inuse(s
, pirq
)) {
1575 for (pirq
= s
->nr_pirqs
- 1; pirq
>= IOAPIC_NUM_PINS
; pirq
--) {
1576 /* Skip whole words at a time when they're full */
1577 if (pirq_inuse_word(s
, pirq
) == UINT64_MAX
) {
1581 if (pirq_inuse(s
, pirq
)) {
1590 pirq_inuse_word(s
, pirq
) |= pirq_inuse_bit(pirq
);
1592 assert(gsi
< IOAPIC_NUM_PINS
);
1593 s
->gsi_pirq
[gsi
] = pirq
;
1595 s
->pirq
[pirq
].gsi
= gsi
;
1599 bool xen_evtchn_set_gsi(int gsi
, int level
)
1601 XenEvtchnState
*s
= xen_evtchn_singleton
;
1604 assert(bql_locked());
1606 if (!s
|| gsi
< 0 || gsi
>= IOAPIC_NUM_PINS
) {
1611 * Check that that it *isn't* the event channel GSI, and thus
1612 * that we are not recursing and it's safe to take s->port_lock.
1614 * Locking aside, it's perfectly sane to bail out early for that
1615 * special case, as it would make no sense for the event channel
1616 * GSI to be routed back to event channels, when the delivery
1617 * method is to raise the GSI... that recursion wouldn't *just*
1618 * be a locking issue.
1620 if (gsi
&& gsi
== s
->callback_gsi
) {
1624 QEMU_LOCK_GUARD(&s
->port_lock
);
1626 pirq
= s
->gsi_pirq
[gsi
];
1632 int port
= s
->pirq
[pirq
].port
;
1634 s
->pirq_gsi_set
|= (1U << gsi
);
1636 set_port_pending(s
, port
);
1639 s
->pirq_gsi_set
&= ~(1U << gsi
);
1644 static uint32_t msi_pirq_target(uint64_t addr
, uint32_t data
)
1646 /* The vector (in low 8 bits of data) must be zero */
1651 uint32_t pirq
= (addr
& 0xff000) >> 12;
1652 pirq
|= (addr
>> 32) & 0xffffff00;
1657 static void do_remove_pci_vector(XenEvtchnState
*s
, PCIDevice
*dev
, int vector
,
1662 for (pirq
= 0; pirq
< s
->nr_pirqs
; pirq
++) {
1664 * We could be cleverer here, but it isn't really a fast path, and
1665 * this trivial optimisation is enough to let us skip the big gap
1666 * in the middle a bit quicker (in terms of both loop iterations,
1669 if (!(pirq
& 63) && !(pirq_inuse_word(s
, pirq
))) {
1673 if (except_pirq
&& pirq
== except_pirq
) {
1676 if (s
->pirq
[pirq
].dev
!= dev
) {
1679 if (vector
!= -1 && s
->pirq
[pirq
].vector
!= vector
) {
1683 /* It could theoretically be bound to a port already, but that is OK. */
1684 s
->pirq
[pirq
].dev
= dev
;
1685 s
->pirq
[pirq
].gsi
= IRQ_UNBOUND
;
1686 s
->pirq
[pirq
].is_msix
= false;
1687 s
->pirq
[pirq
].vector
= 0;
1688 s
->pirq
[pirq
].is_masked
= false;
1689 s
->pirq
[pirq
].is_translated
= false;
1693 void xen_evtchn_remove_pci_device(PCIDevice
*dev
)
1695 XenEvtchnState
*s
= xen_evtchn_singleton
;
1701 QEMU_LOCK_GUARD(&s
->port_lock
);
1702 do_remove_pci_vector(s
, dev
, -1, 0);
1705 void xen_evtchn_snoop_msi(PCIDevice
*dev
, bool is_msix
, unsigned int vector
,
1706 uint64_t addr
, uint32_t data
, bool is_masked
)
1708 XenEvtchnState
*s
= xen_evtchn_singleton
;
1715 assert(bql_locked());
1717 pirq
= msi_pirq_target(addr
, data
);
1720 * The PIRQ# must be sane, and there must be an allocated PIRQ in
1721 * IRQ_UNBOUND or IRQ_MSI_EMU state to match it.
1723 if (!pirq
|| pirq
>= s
->nr_pirqs
|| !pirq_inuse(s
, pirq
) ||
1724 (s
->pirq
[pirq
].gsi
!= IRQ_UNBOUND
&&
1725 s
->pirq
[pirq
].gsi
!= IRQ_MSI_EMU
)) {
1730 s
->pirq
[pirq
].dev
= dev
;
1731 s
->pirq
[pirq
].gsi
= IRQ_MSI_EMU
;
1732 s
->pirq
[pirq
].is_msix
= is_msix
;
1733 s
->pirq
[pirq
].vector
= vector
;
1734 s
->pirq
[pirq
].is_masked
= is_masked
;
1737 /* Remove any (other) entries for this {device, vector} */
1738 do_remove_pci_vector(s
, dev
, vector
, pirq
);
1741 int xen_evtchn_translate_pirq_msi(struct kvm_irq_routing_entry
*route
,
1742 uint64_t address
, uint32_t data
)
1744 XenEvtchnState
*s
= xen_evtchn_singleton
;
1745 uint32_t pirq
, port
;
1749 return 1; /* Not a PIRQ */
1752 assert(bql_locked());
1754 pirq
= msi_pirq_target(address
, data
);
1755 if (!pirq
|| pirq
>= s
->nr_pirqs
) {
1756 return 1; /* Not a PIRQ */
1759 if (!kvm_xen_has_cap(EVTCHN_2LEVEL
)) {
1763 if (s
->pirq
[pirq
].gsi
!= IRQ_MSI_EMU
) {
1767 /* Remember that KVM tried to translate this. It might need to try again. */
1768 s
->pirq
[pirq
].is_translated
= true;
1770 QEMU_LOCK_GUARD(&s
->port_lock
);
1772 port
= s
->pirq
[pirq
].port
;
1773 if (!valid_port(port
)) {
1777 cpu
= qemu_get_cpu(s
->port_table
[port
].vcpu
);
1782 route
->type
= KVM_IRQ_ROUTING_XEN_EVTCHN
;
1783 route
->u
.xen_evtchn
.port
= port
;
1784 route
->u
.xen_evtchn
.vcpu
= kvm_arch_vcpu_id(cpu
);
1785 route
->u
.xen_evtchn
.priority
= KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL
;
1787 return 0; /* Handled */
1790 bool xen_evtchn_deliver_pirq_msi(uint64_t address
, uint32_t data
)
1792 XenEvtchnState
*s
= xen_evtchn_singleton
;
1793 uint32_t pirq
, port
;
1799 assert(bql_locked());
1801 pirq
= msi_pirq_target(address
, data
);
1802 if (!pirq
|| pirq
>= s
->nr_pirqs
) {
1806 QEMU_LOCK_GUARD(&s
->port_lock
);
1808 port
= s
->pirq
[pirq
].port
;
1809 if (!valid_port(port
)) {
1813 set_port_pending(s
, port
);
1817 int xen_physdev_map_pirq(struct physdev_map_pirq
*map
)
1819 XenEvtchnState
*s
= xen_evtchn_singleton
;
1820 int pirq
= map
->pirq
;
1821 int gsi
= map
->index
;
1828 QEMU_LOCK_GUARD(&s
->port_lock
);
1830 if (map
->domid
!= DOMID_SELF
&& map
->domid
!= xen_domid
) {
1833 if (map
->type
!= MAP_PIRQ_TYPE_GSI
) {
1836 if (gsi
< 0 || gsi
>= IOAPIC_NUM_PINS
) {
1841 pirq
= allocate_pirq(s
, map
->type
, gsi
);
1846 } else if (pirq
> s
->nr_pirqs
) {
1850 * User specified a valid-looking PIRQ#. Allow it if it is
1851 * allocated and not yet bound, or if it is unallocated
1853 if (pirq_inuse(s
, pirq
)) {
1854 if (s
->pirq
[pirq
].gsi
!= IRQ_UNBOUND
) {
1858 /* If it was unused, mark it used now. */
1859 pirq_inuse_word(s
, pirq
) |= pirq_inuse_bit(pirq
);
1861 /* Set the mapping in both directions. */
1862 s
->pirq
[pirq
].gsi
= gsi
;
1863 s
->gsi_pirq
[gsi
] = pirq
;
1866 trace_kvm_xen_map_pirq(pirq
, gsi
);
1870 int xen_physdev_unmap_pirq(struct physdev_unmap_pirq
*unmap
)
1872 XenEvtchnState
*s
= xen_evtchn_singleton
;
1873 int pirq
= unmap
->pirq
;
1880 if (unmap
->domid
!= DOMID_SELF
&& unmap
->domid
!= xen_domid
) {
1883 if (pirq
< 0 || pirq
>= s
->nr_pirqs
) {
1888 qemu_mutex_lock(&s
->port_lock
);
1890 if (!pirq_inuse(s
, pirq
)) {
1891 qemu_mutex_unlock(&s
->port_lock
);
1895 gsi
= s
->pirq
[pirq
].gsi
;
1897 /* We can only unmap GSI PIRQs */
1899 qemu_mutex_unlock(&s
->port_lock
);
1903 s
->gsi_pirq
[gsi
] = 0;
1904 s
->pirq
[pirq
].gsi
= IRQ_UNBOUND
; /* Doesn't actually matter because: */
1905 pirq_inuse_word(s
, pirq
) &= ~pirq_inuse_bit(pirq
);
1907 trace_kvm_xen_unmap_pirq(pirq
, gsi
);
1908 qemu_mutex_unlock(&s
->port_lock
);
1910 if (gsi
== IRQ_MSI_EMU
) {
1911 kvm_update_msi_routes_all(NULL
, true, 0, 0);
1917 int xen_physdev_eoi_pirq(struct physdev_eoi
*eoi
)
1919 XenEvtchnState
*s
= xen_evtchn_singleton
;
1920 int pirq
= eoi
->irq
;
1928 QEMU_LOCK_GUARD(&s
->port_lock
);
1930 if (!pirq_inuse(s
, pirq
)) {
1934 gsi
= s
->pirq
[pirq
].gsi
;
1939 /* Reassert a level IRQ if needed */
1940 if (s
->pirq_gsi_set
& (1U << gsi
)) {
1941 int port
= s
->pirq
[pirq
].port
;
1943 set_port_pending(s
, port
);
1950 int xen_physdev_query_pirq(struct physdev_irq_status_query
*query
)
1952 XenEvtchnState
*s
= xen_evtchn_singleton
;
1953 int pirq
= query
->irq
;
1960 QEMU_LOCK_GUARD(&s
->port_lock
);
1962 if (!pirq_inuse(s
, pirq
)) {
1966 if (s
->pirq
[pirq
].gsi
>= 0) {
1967 query
->flags
= XENIRQSTAT_needs_eoi
;
1975 int xen_physdev_get_free_pirq(struct physdev_get_free_pirq
*get
)
1977 XenEvtchnState
*s
= xen_evtchn_singleton
;
1984 QEMU_LOCK_GUARD(&s
->port_lock
);
1986 pirq
= allocate_pirq(s
, get
->type
, IRQ_UNBOUND
);
1992 trace_kvm_xen_get_free_pirq(pirq
, get
->type
);
1996 struct xenevtchn_handle
*xen_be_evtchn_open(void)
1998 struct xenevtchn_handle
*xc
= g_new0(struct xenevtchn_handle
, 1);
2000 xc
->fd
= eventfd(0, EFD_CLOEXEC
);
2009 static int find_be_port(XenEvtchnState
*s
, struct xenevtchn_handle
*xc
)
2013 for (i
= 1; i
< EVTCHN_2L_NR_CHANNELS
; i
++) {
2014 if (!s
->be_handles
[i
]) {
2015 s
->be_handles
[i
] = xc
;
2023 int xen_be_evtchn_bind_interdomain(struct xenevtchn_handle
*xc
, uint32_t domid
,
2024 evtchn_port_t guest_port
)
2026 XenEvtchnState
*s
= xen_evtchn_singleton
;
2028 uint16_t be_port
= 0;
2039 if (domid
!= xen_domid
) {
2043 if (!valid_port(guest_port
)) {
2047 qemu_mutex_lock(&s
->port_lock
);
2049 /* The guest has to have an unbound port waiting for us to bind */
2050 gp
= &s
->port_table
[guest_port
];
2053 case EVTCHNSTAT_interdomain
:
2054 /* Allow rebinding after migration, preserve port # if possible */
2055 be_port
= gp
->u
.interdomain
.port
;
2056 assert(be_port
!= 0);
2057 if (!s
->be_handles
[be_port
]) {
2058 s
->be_handles
[be_port
] = xc
;
2059 xc
->guest_port
= guest_port
;
2060 ret
= xc
->be_port
= be_port
;
2061 if (kvm_xen_has_cap(EVTCHN_SEND
)) {
2062 assign_kernel_eventfd(gp
->type
, guest_port
, xc
->fd
);
2068 case EVTCHNSTAT_unbound
:
2069 be_port
= find_be_port(s
, xc
);
2075 gp
->type
= EVTCHNSTAT_interdomain
;
2076 gp
->u
.interdomain
.to_qemu
= 1;
2077 gp
->u
.interdomain
.port
= be_port
;
2078 xc
->guest_port
= guest_port
;
2079 if (kvm_xen_has_cap(EVTCHN_SEND
)) {
2080 assign_kernel_eventfd(gp
->type
, guest_port
, xc
->fd
);
2091 qemu_mutex_unlock(&s
->port_lock
);
2096 int xen_be_evtchn_unbind(struct xenevtchn_handle
*xc
, evtchn_port_t port
)
2098 XenEvtchnState
*s
= xen_evtchn_singleton
;
2109 qemu_mutex_lock(&s
->port_lock
);
2111 if (port
&& port
!= xc
->be_port
) {
2116 if (xc
->guest_port
) {
2117 XenEvtchnPort
*gp
= &s
->port_table
[xc
->guest_port
];
2119 /* This should never *not* be true */
2120 if (gp
->type
== EVTCHNSTAT_interdomain
) {
2121 gp
->type
= EVTCHNSTAT_unbound
;
2122 gp
->u
.interdomain
.port
= 0;
2125 if (kvm_xen_has_cap(EVTCHN_SEND
)) {
2126 deassign_kernel_port(xc
->guest_port
);
2131 s
->be_handles
[xc
->be_port
] = NULL
;
2135 qemu_mutex_unlock(&s
->port_lock
);
2139 int xen_be_evtchn_close(struct xenevtchn_handle
*xc
)
2145 xen_be_evtchn_unbind(xc
, 0);
2152 int xen_be_evtchn_fd(struct xenevtchn_handle
*xc
)
2160 int xen_be_evtchn_notify(struct xenevtchn_handle
*xc
, evtchn_port_t port
)
2162 XenEvtchnState
*s
= xen_evtchn_singleton
;
2173 qemu_mutex_lock(&s
->port_lock
);
2175 if (xc
->guest_port
) {
2176 set_port_pending(s
, xc
->guest_port
);
2182 qemu_mutex_unlock(&s
->port_lock
);
2187 int xen_be_evtchn_pending(struct xenevtchn_handle
*xc
)
2199 if (eventfd_read(xc
->fd
, &val
)) {
2203 return val
? xc
->be_port
: 0;
2206 int xen_be_evtchn_unmask(struct xenevtchn_handle
*xc
, evtchn_port_t port
)
2212 if (xc
->be_port
!= port
) {
2217 * We don't actually do anything to unmask it; the event was already
2218 * consumed in xen_be_evtchn_pending().
2223 int xen_be_evtchn_get_guest_port(struct xenevtchn_handle
*xc
)
2225 return xc
->guest_port
;
2228 EvtchnInfoList
*qmp_xen_event_list(Error
**errp
)
2230 XenEvtchnState
*s
= xen_evtchn_singleton
;
2231 EvtchnInfoList
*head
= NULL
, **tail
= &head
;
2232 void *shinfo
, *pending
, *mask
;
2236 error_setg(errp
, "Xen event channel emulation not enabled");
2240 shinfo
= xen_overlay_get_shinfo_ptr();
2242 error_setg(errp
, "Xen shared info page not allocated");
2246 if (xen_is_long_mode()) {
2247 pending
= shinfo
+ offsetof(struct shared_info
, evtchn_pending
);
2248 mask
= shinfo
+ offsetof(struct shared_info
, evtchn_mask
);
2250 pending
= shinfo
+ offsetof(struct compat_shared_info
, evtchn_pending
);
2251 mask
= shinfo
+ offsetof(struct compat_shared_info
, evtchn_mask
);
2254 QEMU_LOCK_GUARD(&s
->port_lock
);
2256 for (i
= 0; i
< s
->nr_ports
; i
++) {
2257 XenEvtchnPort
*p
= &s
->port_table
[i
];
2260 if (p
->type
== EVTCHNSTAT_closed
) {
2264 info
= g_new0(EvtchnInfo
, 1);
2267 qemu_build_assert(EVTCHN_PORT_TYPE_CLOSED
== EVTCHNSTAT_closed
);
2268 qemu_build_assert(EVTCHN_PORT_TYPE_UNBOUND
== EVTCHNSTAT_unbound
);
2269 qemu_build_assert(EVTCHN_PORT_TYPE_INTERDOMAIN
== EVTCHNSTAT_interdomain
);
2270 qemu_build_assert(EVTCHN_PORT_TYPE_PIRQ
== EVTCHNSTAT_pirq
);
2271 qemu_build_assert(EVTCHN_PORT_TYPE_VIRQ
== EVTCHNSTAT_virq
);
2272 qemu_build_assert(EVTCHN_PORT_TYPE_IPI
== EVTCHNSTAT_ipi
);
2274 info
->type
= p
->type
;
2275 if (p
->type
== EVTCHNSTAT_interdomain
) {
2276 info
->remote_domain
= g_strdup(p
->u
.interdomain
.to_qemu
?
2277 "qemu" : "loopback");
2278 info
->target
= p
->u
.interdomain
.port
;
2280 info
->target
= p
->u
.val
; /* pirq# or virq# */
2282 info
->vcpu
= p
->vcpu
;
2283 info
->pending
= test_bit(i
, pending
);
2284 info
->masked
= test_bit(i
, mask
);
2286 QAPI_LIST_APPEND(tail
, info
);
2292 void qmp_xen_event_inject(uint32_t port
, Error
**errp
)
2294 XenEvtchnState
*s
= xen_evtchn_singleton
;
2297 error_setg(errp
, "Xen event channel emulation not enabled");
2301 if (!valid_port(port
)) {
2302 error_setg(errp
, "Invalid port %u", port
);
2305 QEMU_LOCK_GUARD(&s
->port_lock
);
2307 if (set_port_pending(s
, port
)) {
2308 error_setg(errp
, "Failed to set port %u", port
);
2313 void hmp_xen_event_list(Monitor
*mon
, const QDict
*qdict
)
2315 EvtchnInfoList
*iter
, *info_list
;
2318 info_list
= qmp_xen_event_list(&err
);
2320 hmp_handle_error(mon
, err
);
2324 for (iter
= info_list
; iter
; iter
= iter
->next
) {
2325 EvtchnInfo
*info
= iter
->value
;
2327 monitor_printf(mon
, "port %4u: vcpu: %d %s", info
->port
, info
->vcpu
,
2328 EvtchnPortType_str(info
->type
));
2329 if (info
->type
!= EVTCHN_PORT_TYPE_IPI
) {
2330 monitor_printf(mon
, "(");
2331 if (info
->remote_domain
) {
2332 monitor_printf(mon
, "%s:", info
->remote_domain
);
2334 monitor_printf(mon
, "%d)", info
->target
);
2336 if (info
->pending
) {
2337 monitor_printf(mon
, " PENDING");
2340 monitor_printf(mon
, " MASKED");
2342 monitor_printf(mon
, "\n");
2345 qapi_free_EvtchnInfoList(info_list
);
2348 void hmp_xen_event_inject(Monitor
*mon
, const QDict
*qdict
)
2350 int port
= qdict_get_int(qdict
, "port");
2353 qmp_xen_event_inject(port
, &err
);
2355 hmp_handle_error(mon
, err
);
2357 monitor_printf(mon
, "Delivered port %d\n", port
);