1 // SPDX-License-Identifier: GPL-2.0
3 * PCI Backend Xenbus Setup - handles setup with frontend and xend
5 * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10 #include <linux/moduleparam.h>
11 #include <linux/init.h>
12 #include <linux/list.h>
13 #include <linux/vmalloc.h>
14 #include <linux/workqueue.h>
15 #include <xen/xenbus.h>
16 #include <xen/events.h>
20 #define INVALID_EVTCHN_IRQ (-1)
22 static bool __read_mostly passthrough
;
23 module_param(passthrough
, bool, S_IRUGO
);
24 MODULE_PARM_DESC(passthrough
,
25 "Option to specify how to export PCI topology to guest:\n"\
26 " 0 - (default) Hide the true PCI topology and makes the frontend\n"\
27 " there is a single PCI bus with only the exported devices on it.\n"\
28 " For example, a device at 03:05.0 will be re-assigned to 00:00.0\n"\
29 " while second device at 02:1a.1 will be re-assigned to 00:01.1.\n"\
30 " 1 - Passthrough provides a real view of the PCI topology to the\n"\
31 " frontend (for example, a device at 06:01.b will still appear at\n"\
32 " 06:01.b to the frontend). This is similar to how Xen 2.0.x\n"\
33 " exposed PCI devices to its driver domains. This may be required\n"\
34 " for drivers which depend on finding their hardware in certain\n"\
35 " bus/slot locations.");
37 static struct xen_pcibk_device
*alloc_pdev(struct xenbus_device
*xdev
)
39 struct xen_pcibk_device
*pdev
;
41 pdev
= kzalloc(sizeof(struct xen_pcibk_device
), GFP_KERNEL
);
44 dev_dbg(&xdev
->dev
, "allocated pdev @ 0x%p\n", pdev
);
48 mutex_init(&pdev
->dev_lock
);
51 pdev
->evtchn_irq
= INVALID_EVTCHN_IRQ
;
52 pdev
->be_watching
= 0;
54 INIT_WORK(&pdev
->op_work
, xen_pcibk_do_op
);
56 if (xen_pcibk_init_devices(pdev
)) {
61 dev_set_drvdata(&xdev
->dev
, pdev
);
67 static void xen_pcibk_disconnect(struct xen_pcibk_device
*pdev
)
69 mutex_lock(&pdev
->dev_lock
);
70 /* Ensure the guest can't trigger our handler before removing devices */
71 if (pdev
->evtchn_irq
!= INVALID_EVTCHN_IRQ
) {
72 unbind_from_irqhandler(pdev
->evtchn_irq
, pdev
);
73 pdev
->evtchn_irq
= INVALID_EVTCHN_IRQ
;
76 /* If the driver domain started an op, make sure we complete it
77 * before releasing the shared memory */
79 flush_work(&pdev
->op_work
);
81 if (pdev
->sh_info
!= NULL
) {
82 xenbus_unmap_ring_vfree(pdev
->xdev
, pdev
->sh_info
);
85 mutex_unlock(&pdev
->dev_lock
);
88 static void free_pdev(struct xen_pcibk_device
*pdev
)
90 if (pdev
->be_watching
) {
91 unregister_xenbus_watch(&pdev
->be_watch
);
92 pdev
->be_watching
= 0;
95 xen_pcibk_disconnect(pdev
);
97 /* N.B. This calls pcistub_put_pci_dev which does the FLR on all
98 * of the PCIe devices. */
99 xen_pcibk_release_devices(pdev
);
101 dev_set_drvdata(&pdev
->xdev
->dev
, NULL
);
107 static int xen_pcibk_do_attach(struct xen_pcibk_device
*pdev
, int gnt_ref
,
108 evtchn_port_t remote_evtchn
)
113 dev_dbg(&pdev
->xdev
->dev
,
114 "Attaching to frontend resources - gnt_ref=%d evtchn=%u\n",
115 gnt_ref
, remote_evtchn
);
117 err
= xenbus_map_ring_valloc(pdev
->xdev
, &gnt_ref
, 1, &vaddr
);
119 xenbus_dev_fatal(pdev
->xdev
, err
,
120 "Error mapping other domain page in ours.");
124 pdev
->sh_info
= vaddr
;
126 err
= bind_interdomain_evtchn_to_irqhandler_lateeoi(
127 pdev
->xdev
, remote_evtchn
, xen_pcibk_handle_event
,
130 xenbus_dev_fatal(pdev
->xdev
, err
,
131 "Error binding event channel to IRQ");
134 pdev
->evtchn_irq
= err
;
137 dev_dbg(&pdev
->xdev
->dev
, "Attached!\n");
142 static int xen_pcibk_attach(struct xen_pcibk_device
*pdev
)
146 evtchn_port_t remote_evtchn
;
150 mutex_lock(&pdev
->dev_lock
);
151 /* Make sure we only do this setup once */
152 if (xenbus_read_driver_state(pdev
->xdev
->nodename
) !=
153 XenbusStateInitialised
)
156 /* Wait for frontend to state that it has published the configuration */
157 if (xenbus_read_driver_state(pdev
->xdev
->otherend
) !=
158 XenbusStateInitialised
)
161 dev_dbg(&pdev
->xdev
->dev
, "Reading frontend config\n");
163 err
= xenbus_gather(XBT_NIL
, pdev
->xdev
->otherend
,
164 "pci-op-ref", "%u", &gnt_ref
,
165 "event-channel", "%u", &remote_evtchn
,
166 "magic", NULL
, &magic
, NULL
);
168 /* If configuration didn't get read correctly, wait longer */
169 xenbus_dev_fatal(pdev
->xdev
, err
,
170 "Error reading configuration from frontend");
174 if (magic
== NULL
|| strcmp(magic
, XEN_PCI_MAGIC
) != 0) {
175 xenbus_dev_fatal(pdev
->xdev
, -EFAULT
,
176 "version mismatch (%s/%s) with pcifront - "
178 magic
, XEN_PCI_MAGIC
);
183 err
= xen_pcibk_do_attach(pdev
, gnt_ref
, remote_evtchn
);
187 dev_dbg(&pdev
->xdev
->dev
, "Connecting...\n");
189 err
= xenbus_switch_state(pdev
->xdev
, XenbusStateConnected
);
191 xenbus_dev_fatal(pdev
->xdev
, err
,
192 "Error switching to connected state!");
194 dev_dbg(&pdev
->xdev
->dev
, "Connected? %d\n", err
);
196 mutex_unlock(&pdev
->dev_lock
);
203 static int xen_pcibk_publish_pci_dev(struct xen_pcibk_device
*pdev
,
204 unsigned int domain
, unsigned int bus
,
205 unsigned int devfn
, unsigned int devid
)
211 len
= snprintf(str
, sizeof(str
), "vdev-%d", devid
);
212 if (unlikely(len
>= (sizeof(str
) - 1))) {
217 /* Note: The PV protocol uses %02x, don't change it */
218 err
= xenbus_printf(XBT_NIL
, pdev
->xdev
->nodename
, str
,
219 "%04x:%02x:%02x.%02x", domain
, bus
,
220 PCI_SLOT(devfn
), PCI_FUNC(devfn
));
226 static int xen_pcibk_export_device(struct xen_pcibk_device
*pdev
,
227 int domain
, int bus
, int slot
, int func
,
233 dev_dbg(&pdev
->xdev
->dev
, "exporting dom %x bus %x slot %x func %x\n",
234 domain
, bus
, slot
, func
);
236 dev
= pcistub_get_pci_dev_by_slot(pdev
, domain
, bus
, slot
, func
);
239 xenbus_dev_fatal(pdev
->xdev
, err
,
240 "Couldn't locate PCI device "
241 "(%04x:%02x:%02x.%d)! "
242 "perhaps already in-use?",
243 domain
, bus
, slot
, func
);
247 err
= xen_pcibk_add_pci_dev(pdev
, dev
, devid
,
248 xen_pcibk_publish_pci_dev
);
252 dev_info(&dev
->dev
, "registering for %d\n", pdev
->xdev
->otherend_id
);
253 if (xen_register_device_domain_owner(dev
,
254 pdev
->xdev
->otherend_id
) != 0) {
255 dev_err(&dev
->dev
, "Stealing ownership from dom%d.\n",
256 xen_find_device_domain_owner(dev
));
257 xen_unregister_device_domain_owner(dev
);
258 xen_register_device_domain_owner(dev
, pdev
->xdev
->otherend_id
);
261 /* TODO: It'd be nice to export a bridge and have all of its children
262 * get exported with it. This may be best done in xend (which will
263 * have to calculate resource usage anyway) but we probably want to
264 * put something in here to ensure that if a bridge gets given to a
265 * driver domain, that all devices under that bridge are not given
266 * to other driver domains (as he who controls the bridge can disable
267 * it and stop the other devices from working).
273 static int xen_pcibk_remove_device(struct xen_pcibk_device
*pdev
,
274 int domain
, int bus
, int slot
, int func
)
279 dev_dbg(&pdev
->xdev
->dev
, "removing dom %x bus %x slot %x func %x\n",
280 domain
, bus
, slot
, func
);
282 dev
= xen_pcibk_get_pci_dev(pdev
, domain
, bus
, PCI_DEVFN(slot
, func
));
285 dev_dbg(&pdev
->xdev
->dev
, "Couldn't locate PCI device "
286 "(%04x:%02x:%02x.%d)! not owned by this domain\n",
287 domain
, bus
, slot
, func
);
291 dev_dbg(&dev
->dev
, "unregistering for %d\n", pdev
->xdev
->otherend_id
);
292 xen_unregister_device_domain_owner(dev
);
294 /* N.B. This ends up calling pcistub_put_pci_dev which ends up
296 xen_pcibk_release_pci_dev(pdev
, dev
, true /* use the lock. */);
302 static int xen_pcibk_publish_pci_root(struct xen_pcibk_device
*pdev
,
303 unsigned int domain
, unsigned int bus
)
306 int i
, root_num
, len
, err
;
309 dev_dbg(&pdev
->xdev
->dev
, "Publishing pci roots\n");
311 err
= xenbus_scanf(XBT_NIL
, pdev
->xdev
->nodename
,
312 "root_num", "%d", &root_num
);
313 if (err
== 0 || err
== -ENOENT
)
318 /* Verify that we haven't already published this pci root */
319 for (i
= 0; i
< root_num
; i
++) {
320 len
= snprintf(str
, sizeof(str
), "root-%d", i
);
321 if (unlikely(len
>= (sizeof(str
) - 1))) {
326 err
= xenbus_scanf(XBT_NIL
, pdev
->xdev
->nodename
,
327 str
, "%x:%x", &d
, &b
);
335 if (d
== domain
&& b
== bus
) {
341 len
= snprintf(str
, sizeof(str
), "root-%d", root_num
);
342 if (unlikely(len
>= (sizeof(str
) - 1))) {
347 dev_dbg(&pdev
->xdev
->dev
, "writing root %d at %04x:%02x\n",
348 root_num
, domain
, bus
);
350 err
= xenbus_printf(XBT_NIL
, pdev
->xdev
->nodename
, str
,
351 "%04x:%02x", domain
, bus
);
355 err
= xenbus_printf(XBT_NIL
, pdev
->xdev
->nodename
,
356 "root_num", "%d", (root_num
+ 1));
362 static int xen_pcibk_reconfigure(struct xen_pcibk_device
*pdev
,
363 enum xenbus_state state
)
367 int domain
, bus
, slot
, func
;
368 unsigned int substate
;
374 dev_dbg(&pdev
->xdev
->dev
, "Reconfiguring device ...\n");
376 mutex_lock(&pdev
->dev_lock
);
377 if (xenbus_read_driver_state(pdev
->xdev
->nodename
) != state
)
380 err
= xenbus_scanf(XBT_NIL
, pdev
->xdev
->nodename
, "num_devs", "%d",
385 xenbus_dev_fatal(pdev
->xdev
, err
,
386 "Error reading number of devices");
390 for (i
= 0; i
< num_devs
; i
++) {
391 len
= snprintf(state_str
, sizeof(state_str
), "state-%d", i
);
392 if (unlikely(len
>= (sizeof(state_str
) - 1))) {
394 xenbus_dev_fatal(pdev
->xdev
, err
,
395 "String overflow while reading "
399 substate
= xenbus_read_unsigned(pdev
->xdev
->nodename
, state_str
,
403 case XenbusStateInitialising
:
404 dev_dbg(&pdev
->xdev
->dev
, "Attaching dev-%d ...\n", i
);
406 len
= snprintf(dev_str
, sizeof(dev_str
), "dev-%d", i
);
407 if (unlikely(len
>= (sizeof(dev_str
) - 1))) {
409 xenbus_dev_fatal(pdev
->xdev
, err
,
410 "String overflow while "
411 "reading configuration");
414 err
= xenbus_scanf(XBT_NIL
, pdev
->xdev
->nodename
,
415 dev_str
, "%x:%x:%x.%x",
416 &domain
, &bus
, &slot
, &func
);
418 xenbus_dev_fatal(pdev
->xdev
, err
,
419 "Error reading device "
425 xenbus_dev_fatal(pdev
->xdev
, err
,
426 "Error parsing pci device "
431 err
= xen_pcibk_export_device(pdev
, domain
, bus
, slot
,
436 /* Publish pci roots. */
437 err
= xen_pcibk_publish_pci_roots(pdev
,
438 xen_pcibk_publish_pci_root
);
440 xenbus_dev_fatal(pdev
->xdev
, err
,
441 "Error while publish PCI root"
442 "buses for frontend");
446 err
= xenbus_printf(XBT_NIL
, pdev
->xdev
->nodename
,
448 XenbusStateInitialised
);
450 xenbus_dev_fatal(pdev
->xdev
, err
,
451 "Error switching substate of "
457 case XenbusStateClosing
:
458 dev_dbg(&pdev
->xdev
->dev
, "Detaching dev-%d ...\n", i
);
460 len
= snprintf(dev_str
, sizeof(dev_str
), "vdev-%d", i
);
461 if (unlikely(len
>= (sizeof(dev_str
) - 1))) {
463 xenbus_dev_fatal(pdev
->xdev
, err
,
464 "String overflow while "
465 "reading configuration");
468 err
= xenbus_scanf(XBT_NIL
, pdev
->xdev
->nodename
,
469 dev_str
, "%x:%x:%x.%x",
470 &domain
, &bus
, &slot
, &func
);
472 xenbus_dev_fatal(pdev
->xdev
, err
,
473 "Error reading device "
479 xenbus_dev_fatal(pdev
->xdev
, err
,
480 "Error parsing pci device "
485 err
= xen_pcibk_remove_device(pdev
, domain
, bus
, slot
,
490 /* TODO: If at some point we implement support for pci
491 * root hot-remove on pcifront side, we'll need to
492 * remove unnecessary xenstore nodes of pci roots here.
502 if (state
!= XenbusStateReconfiguring
)
503 /* Make sure we only reconfigure once. */
506 err
= xenbus_switch_state(pdev
->xdev
, XenbusStateReconfigured
);
508 xenbus_dev_fatal(pdev
->xdev
, err
,
509 "Error switching to reconfigured state!");
514 mutex_unlock(&pdev
->dev_lock
);
518 static void xen_pcibk_frontend_changed(struct xenbus_device
*xdev
,
519 enum xenbus_state fe_state
)
521 struct xen_pcibk_device
*pdev
= dev_get_drvdata(&xdev
->dev
);
523 dev_dbg(&xdev
->dev
, "fe state changed %d\n", fe_state
);
526 case XenbusStateInitialised
:
527 xen_pcibk_attach(pdev
);
530 case XenbusStateReconfiguring
:
531 xen_pcibk_reconfigure(pdev
, XenbusStateReconfiguring
);
534 case XenbusStateConnected
:
535 /* pcifront switched its state from reconfiguring to connected.
536 * Then switch to connected state.
538 xenbus_switch_state(xdev
, XenbusStateConnected
);
541 case XenbusStateClosing
:
542 xen_pcibk_disconnect(pdev
);
543 xenbus_switch_state(xdev
, XenbusStateClosing
);
546 case XenbusStateClosed
:
547 xen_pcibk_disconnect(pdev
);
548 xenbus_switch_state(xdev
, XenbusStateClosed
);
549 if (xenbus_dev_is_online(xdev
))
551 fallthrough
; /* if not online */
552 case XenbusStateUnknown
:
553 dev_dbg(&xdev
->dev
, "frontend is gone! unregister device\n");
554 device_unregister(&xdev
->dev
);
562 static int xen_pcibk_setup_backend(struct xen_pcibk_device
*pdev
)
564 /* Get configuration from xend (if available now) */
565 int domain
, bus
, slot
, func
;
571 mutex_lock(&pdev
->dev_lock
);
572 /* It's possible we could get the call to setup twice, so make sure
573 * we're not already connected.
575 if (xenbus_read_driver_state(pdev
->xdev
->nodename
) !=
579 dev_dbg(&pdev
->xdev
->dev
, "getting be setup\n");
581 err
= xenbus_scanf(XBT_NIL
, pdev
->xdev
->nodename
, "num_devs", "%d",
586 xenbus_dev_fatal(pdev
->xdev
, err
,
587 "Error reading number of devices");
591 for (i
= 0; i
< num_devs
; i
++) {
592 int l
= snprintf(dev_str
, sizeof(dev_str
), "dev-%d", i
);
593 if (unlikely(l
>= (sizeof(dev_str
) - 1))) {
595 xenbus_dev_fatal(pdev
->xdev
, err
,
596 "String overflow while reading "
601 err
= xenbus_scanf(XBT_NIL
, pdev
->xdev
->nodename
, dev_str
,
602 "%x:%x:%x.%x", &domain
, &bus
, &slot
, &func
);
604 xenbus_dev_fatal(pdev
->xdev
, err
,
605 "Error reading device configuration");
610 xenbus_dev_fatal(pdev
->xdev
, err
,
611 "Error parsing pci device "
616 err
= xen_pcibk_export_device(pdev
, domain
, bus
, slot
, func
, i
);
620 /* Switch substate of this device. */
621 l
= snprintf(state_str
, sizeof(state_str
), "state-%d", i
);
622 if (unlikely(l
>= (sizeof(state_str
) - 1))) {
624 xenbus_dev_fatal(pdev
->xdev
, err
,
625 "String overflow while reading "
629 err
= xenbus_printf(XBT_NIL
, pdev
->xdev
->nodename
, state_str
,
630 "%d", XenbusStateInitialised
);
632 xenbus_dev_fatal(pdev
->xdev
, err
, "Error switching "
633 "substate of dev-%d\n", i
);
638 err
= xen_pcibk_publish_pci_roots(pdev
, xen_pcibk_publish_pci_root
);
640 xenbus_dev_fatal(pdev
->xdev
, err
,
641 "Error while publish PCI root buses "
646 err
= xenbus_switch_state(pdev
->xdev
, XenbusStateInitialised
);
648 xenbus_dev_fatal(pdev
->xdev
, err
,
649 "Error switching to initialised state!");
652 mutex_unlock(&pdev
->dev_lock
);
654 /* see if pcifront is already configured (if not, we'll wait) */
655 xen_pcibk_attach(pdev
);
659 static void xen_pcibk_be_watch(struct xenbus_watch
*watch
,
660 const char *path
, const char *token
)
662 struct xen_pcibk_device
*pdev
=
663 container_of(watch
, struct xen_pcibk_device
, be_watch
);
665 switch (xenbus_read_driver_state(pdev
->xdev
->nodename
)) {
666 case XenbusStateInitWait
:
667 xen_pcibk_setup_backend(pdev
);
670 case XenbusStateInitialised
:
672 * We typically move to Initialised when the first device was
673 * added. Hence subsequent devices getting added may need
676 xen_pcibk_reconfigure(pdev
, XenbusStateInitialised
);
684 static int xen_pcibk_xenbus_probe(struct xenbus_device
*dev
,
685 const struct xenbus_device_id
*id
)
688 struct xen_pcibk_device
*pdev
= alloc_pdev(dev
);
692 xenbus_dev_fatal(dev
, err
,
693 "Error allocating xen_pcibk_device struct");
697 /* wait for xend to configure us */
698 err
= xenbus_switch_state(dev
, XenbusStateInitWait
);
702 /* watch the backend node for backend configuration information */
703 err
= xenbus_watch_path(dev
, dev
->nodename
, &pdev
->be_watch
,
704 NULL
, xen_pcibk_be_watch
);
708 pdev
->be_watching
= 1;
710 /* We need to force a call to our callback here in case
711 * xend already configured us!
713 xen_pcibk_be_watch(&pdev
->be_watch
, NULL
, NULL
);
719 static void xen_pcibk_xenbus_remove(struct xenbus_device
*dev
)
721 struct xen_pcibk_device
*pdev
= dev_get_drvdata(&dev
->dev
);
727 static const struct xenbus_device_id xen_pcibk_ids
[] = {
732 static struct xenbus_driver xen_pcibk_driver
= {
734 .ids
= xen_pcibk_ids
,
735 .probe
= xen_pcibk_xenbus_probe
,
736 .remove
= xen_pcibk_xenbus_remove
,
737 .otherend_changed
= xen_pcibk_frontend_changed
,
740 const struct xen_pcibk_backend
*__read_mostly xen_pcibk_backend
;
742 int __init
xen_pcibk_xenbus_register(void)
744 if (!xen_pcibk_pv_support())
747 xen_pcibk_backend
= &xen_pcibk_vpci_backend
;
749 xen_pcibk_backend
= &xen_pcibk_passthrough_backend
;
750 pr_info("backend is %s\n", xen_pcibk_backend
->name
);
751 return xenbus_register_backend(&xen_pcibk_driver
);
754 void __exit
xen_pcibk_xenbus_unregister(void)
756 if (xen_pcibk_pv_support())
757 xenbus_unregister_driver(&xen_pcibk_driver
);