dccp: do not assume DCCP code is non preemptible
[linux/fpc-iii.git] / arch / x86 / kvm / assigned-dev.c
blob308b8597c6913c0c2010e7119ea1350d1bf46a3c
1 /*
2 * Kernel-based Virtual Machine - device assignment support
4 * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates.
6 * This work is licensed under the terms of the GNU GPL, version 2. See
7 * the COPYING file in the top-level directory.
9 */
11 #include <linux/kvm_host.h>
12 #include <linux/kvm.h>
13 #include <linux/uaccess.h>
14 #include <linux/vmalloc.h>
15 #include <linux/errno.h>
16 #include <linux/spinlock.h>
17 #include <linux/pci.h>
18 #include <linux/interrupt.h>
19 #include <linux/slab.h>
20 #include <linux/namei.h>
21 #include <linux/fs.h>
22 #include "irq.h"
23 #include "assigned-dev.h"
24 #include "trace/events/kvm.h"
26 struct kvm_assigned_dev_kernel {
27 struct kvm_irq_ack_notifier ack_notifier;
28 struct list_head list;
29 int assigned_dev_id;
30 int host_segnr;
31 int host_busnr;
32 int host_devfn;
33 unsigned int entries_nr;
34 int host_irq;
35 bool host_irq_disabled;
36 bool pci_2_3;
37 struct msix_entry *host_msix_entries;
38 int guest_irq;
39 struct msix_entry *guest_msix_entries;
40 unsigned long irq_requested_type;
41 int irq_source_id;
42 int flags;
43 struct pci_dev *dev;
44 struct kvm *kvm;
45 spinlock_t intx_lock;
46 spinlock_t intx_mask_lock;
47 char irq_name[32];
48 struct pci_saved_state *pci_saved_state;
51 static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
52 int assigned_dev_id)
54 struct kvm_assigned_dev_kernel *match;
56 list_for_each_entry(match, head, list) {
57 if (match->assigned_dev_id == assigned_dev_id)
58 return match;
60 return NULL;
63 static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
64 *assigned_dev, int irq)
66 int i, index;
67 struct msix_entry *host_msix_entries;
69 host_msix_entries = assigned_dev->host_msix_entries;
71 index = -1;
72 for (i = 0; i < assigned_dev->entries_nr; i++)
73 if (irq == host_msix_entries[i].vector) {
74 index = i;
75 break;
77 if (index < 0)
78 printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
80 return index;
83 static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id)
85 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
86 int ret;
88 spin_lock(&assigned_dev->intx_lock);
89 if (pci_check_and_mask_intx(assigned_dev->dev)) {
90 assigned_dev->host_irq_disabled = true;
91 ret = IRQ_WAKE_THREAD;
92 } else
93 ret = IRQ_NONE;
94 spin_unlock(&assigned_dev->intx_lock);
96 return ret;
99 static void
100 kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev,
101 int vector)
103 if (unlikely(assigned_dev->irq_requested_type &
104 KVM_DEV_IRQ_GUEST_INTX)) {
105 spin_lock(&assigned_dev->intx_mask_lock);
106 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX))
107 kvm_set_irq(assigned_dev->kvm,
108 assigned_dev->irq_source_id, vector, 1,
109 false);
110 spin_unlock(&assigned_dev->intx_mask_lock);
111 } else
112 kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
113 vector, 1, false);
116 static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id)
118 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
120 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
121 spin_lock_irq(&assigned_dev->intx_lock);
122 disable_irq_nosync(irq);
123 assigned_dev->host_irq_disabled = true;
124 spin_unlock_irq(&assigned_dev->intx_lock);
127 kvm_assigned_dev_raise_guest_irq(assigned_dev,
128 assigned_dev->guest_irq);
130 return IRQ_HANDLED;
134 * Deliver an IRQ in an atomic context if we can, or return a failure,
135 * user can retry in a process context.
136 * Return value:
137 * -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context.
138 * Other values - No need to retry.
140 static int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq,
141 int level)
143 struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
144 struct kvm_kernel_irq_routing_entry *e;
145 int ret = -EINVAL;
146 int idx;
148 trace_kvm_set_irq(irq, level, irq_source_id);
151 * Injection into either PIC or IOAPIC might need to scan all CPUs,
152 * which would need to be retried from thread context; when same GSI
153 * is connected to both PIC and IOAPIC, we'd have to report a
154 * partial failure here.
155 * Since there's no easy way to do this, we only support injecting MSI
156 * which is limited to 1:1 GSI mapping.
158 idx = srcu_read_lock(&kvm->irq_srcu);
159 if (kvm_irq_map_gsi(kvm, entries, irq) > 0) {
160 e = &entries[0];
161 ret = kvm_arch_set_irq_inatomic(e, kvm, irq_source_id,
162 irq, level);
164 srcu_read_unlock(&kvm->irq_srcu, idx);
165 return ret;
169 static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id)
171 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
172 int ret = kvm_set_irq_inatomic(assigned_dev->kvm,
173 assigned_dev->irq_source_id,
174 assigned_dev->guest_irq, 1);
175 return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
178 static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id)
180 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
182 kvm_assigned_dev_raise_guest_irq(assigned_dev,
183 assigned_dev->guest_irq);
185 return IRQ_HANDLED;
188 static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id)
190 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
191 int index = find_index_from_host_irq(assigned_dev, irq);
192 u32 vector;
193 int ret = 0;
195 if (index >= 0) {
196 vector = assigned_dev->guest_msix_entries[index].vector;
197 ret = kvm_set_irq_inatomic(assigned_dev->kvm,
198 assigned_dev->irq_source_id,
199 vector, 1);
202 return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
205 static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id)
207 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
208 int index = find_index_from_host_irq(assigned_dev, irq);
209 u32 vector;
211 if (index >= 0) {
212 vector = assigned_dev->guest_msix_entries[index].vector;
213 kvm_assigned_dev_raise_guest_irq(assigned_dev, vector);
216 return IRQ_HANDLED;
219 /* Ack the irq line for an assigned device */
220 static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
222 struct kvm_assigned_dev_kernel *dev =
223 container_of(kian, struct kvm_assigned_dev_kernel,
224 ack_notifier);
226 kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0, false);
228 spin_lock(&dev->intx_mask_lock);
230 if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) {
231 bool reassert = false;
233 spin_lock_irq(&dev->intx_lock);
235 * The guest IRQ may be shared so this ack can come from an
236 * IRQ for another guest device.
238 if (dev->host_irq_disabled) {
239 if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3))
240 enable_irq(dev->host_irq);
241 else if (!pci_check_and_unmask_intx(dev->dev))
242 reassert = true;
243 dev->host_irq_disabled = reassert;
245 spin_unlock_irq(&dev->intx_lock);
247 if (reassert)
248 kvm_set_irq(dev->kvm, dev->irq_source_id,
249 dev->guest_irq, 1, false);
252 spin_unlock(&dev->intx_mask_lock);
255 static void deassign_guest_irq(struct kvm *kvm,
256 struct kvm_assigned_dev_kernel *assigned_dev)
258 if (assigned_dev->ack_notifier.gsi != -1)
259 kvm_unregister_irq_ack_notifier(kvm,
260 &assigned_dev->ack_notifier);
262 kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
263 assigned_dev->guest_irq, 0, false);
265 if (assigned_dev->irq_source_id != -1)
266 kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
267 assigned_dev->irq_source_id = -1;
268 assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
271 /* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
272 static void deassign_host_irq(struct kvm *kvm,
273 struct kvm_assigned_dev_kernel *assigned_dev)
276 * We disable irq here to prevent further events.
278 * Notice this maybe result in nested disable if the interrupt type is
279 * INTx, but it's OK for we are going to free it.
281 * If this function is a part of VM destroy, please ensure that till
282 * now, the kvm state is still legal for probably we also have to wait
283 * on a currently running IRQ handler.
285 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
286 int i;
287 for (i = 0; i < assigned_dev->entries_nr; i++)
288 disable_irq(assigned_dev->host_msix_entries[i].vector);
290 for (i = 0; i < assigned_dev->entries_nr; i++)
291 free_irq(assigned_dev->host_msix_entries[i].vector,
292 assigned_dev);
294 assigned_dev->entries_nr = 0;
295 kfree(assigned_dev->host_msix_entries);
296 kfree(assigned_dev->guest_msix_entries);
297 pci_disable_msix(assigned_dev->dev);
298 } else {
299 /* Deal with MSI and INTx */
300 if ((assigned_dev->irq_requested_type &
301 KVM_DEV_IRQ_HOST_INTX) &&
302 (assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
303 spin_lock_irq(&assigned_dev->intx_lock);
304 pci_intx(assigned_dev->dev, false);
305 spin_unlock_irq(&assigned_dev->intx_lock);
306 synchronize_irq(assigned_dev->host_irq);
307 } else
308 disable_irq(assigned_dev->host_irq);
310 free_irq(assigned_dev->host_irq, assigned_dev);
312 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
313 pci_disable_msi(assigned_dev->dev);
316 assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK);
319 static int kvm_deassign_irq(struct kvm *kvm,
320 struct kvm_assigned_dev_kernel *assigned_dev,
321 unsigned long irq_requested_type)
323 unsigned long guest_irq_type, host_irq_type;
325 if (!irqchip_in_kernel(kvm))
326 return -EINVAL;
327 /* no irq assignment to deassign */
328 if (!assigned_dev->irq_requested_type)
329 return -ENXIO;
331 host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK;
332 guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK;
334 if (host_irq_type)
335 deassign_host_irq(kvm, assigned_dev);
336 if (guest_irq_type)
337 deassign_guest_irq(kvm, assigned_dev);
339 return 0;
342 static void kvm_free_assigned_irq(struct kvm *kvm,
343 struct kvm_assigned_dev_kernel *assigned_dev)
345 kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
348 static void kvm_free_assigned_device(struct kvm *kvm,
349 struct kvm_assigned_dev_kernel
350 *assigned_dev)
352 kvm_free_assigned_irq(kvm, assigned_dev);
354 pci_reset_function(assigned_dev->dev);
355 if (pci_load_and_free_saved_state(assigned_dev->dev,
356 &assigned_dev->pci_saved_state))
357 printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
358 __func__, dev_name(&assigned_dev->dev->dev));
359 else
360 pci_restore_state(assigned_dev->dev);
362 pci_clear_dev_assigned(assigned_dev->dev);
364 pci_release_regions(assigned_dev->dev);
365 pci_disable_device(assigned_dev->dev);
366 pci_dev_put(assigned_dev->dev);
368 list_del(&assigned_dev->list);
369 kfree(assigned_dev);
372 void kvm_free_all_assigned_devices(struct kvm *kvm)
374 struct kvm_assigned_dev_kernel *assigned_dev, *tmp;
376 list_for_each_entry_safe(assigned_dev, tmp,
377 &kvm->arch.assigned_dev_head, list) {
378 kvm_free_assigned_device(kvm, assigned_dev);
382 static int assigned_device_enable_host_intx(struct kvm *kvm,
383 struct kvm_assigned_dev_kernel *dev)
385 irq_handler_t irq_handler;
386 unsigned long flags;
388 dev->host_irq = dev->dev->irq;
391 * We can only share the IRQ line with other host devices if we are
392 * able to disable the IRQ source at device-level - independently of
393 * the guest driver. Otherwise host devices may suffer from unbounded
394 * IRQ latencies when the guest keeps the line asserted.
396 if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
397 irq_handler = kvm_assigned_dev_intx;
398 flags = IRQF_SHARED;
399 } else {
400 irq_handler = NULL;
401 flags = IRQF_ONESHOT;
403 if (request_threaded_irq(dev->host_irq, irq_handler,
404 kvm_assigned_dev_thread_intx, flags,
405 dev->irq_name, dev))
406 return -EIO;
408 if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
409 spin_lock_irq(&dev->intx_lock);
410 pci_intx(dev->dev, true);
411 spin_unlock_irq(&dev->intx_lock);
413 return 0;
416 static int assigned_device_enable_host_msi(struct kvm *kvm,
417 struct kvm_assigned_dev_kernel *dev)
419 int r;
421 if (!dev->dev->msi_enabled) {
422 r = pci_enable_msi(dev->dev);
423 if (r)
424 return r;
427 dev->host_irq = dev->dev->irq;
428 if (request_threaded_irq(dev->host_irq, kvm_assigned_dev_msi,
429 kvm_assigned_dev_thread_msi, 0,
430 dev->irq_name, dev)) {
431 pci_disable_msi(dev->dev);
432 return -EIO;
435 return 0;
438 static int assigned_device_enable_host_msix(struct kvm *kvm,
439 struct kvm_assigned_dev_kernel *dev)
441 int i, r = -EINVAL;
443 /* host_msix_entries and guest_msix_entries should have been
444 * initialized */
445 if (dev->entries_nr == 0)
446 return r;
448 r = pci_enable_msix_exact(dev->dev,
449 dev->host_msix_entries, dev->entries_nr);
450 if (r)
451 return r;
453 for (i = 0; i < dev->entries_nr; i++) {
454 r = request_threaded_irq(dev->host_msix_entries[i].vector,
455 kvm_assigned_dev_msix,
456 kvm_assigned_dev_thread_msix,
457 0, dev->irq_name, dev);
458 if (r)
459 goto err;
462 return 0;
463 err:
464 for (i -= 1; i >= 0; i--)
465 free_irq(dev->host_msix_entries[i].vector, dev);
466 pci_disable_msix(dev->dev);
467 return r;
470 static int assigned_device_enable_guest_intx(struct kvm *kvm,
471 struct kvm_assigned_dev_kernel *dev,
472 struct kvm_assigned_irq *irq)
474 dev->guest_irq = irq->guest_irq;
475 dev->ack_notifier.gsi = irq->guest_irq;
476 return 0;
479 static int assigned_device_enable_guest_msi(struct kvm *kvm,
480 struct kvm_assigned_dev_kernel *dev,
481 struct kvm_assigned_irq *irq)
483 dev->guest_irq = irq->guest_irq;
484 dev->ack_notifier.gsi = -1;
485 return 0;
488 static int assigned_device_enable_guest_msix(struct kvm *kvm,
489 struct kvm_assigned_dev_kernel *dev,
490 struct kvm_assigned_irq *irq)
492 dev->guest_irq = irq->guest_irq;
493 dev->ack_notifier.gsi = -1;
494 return 0;
497 static int assign_host_irq(struct kvm *kvm,
498 struct kvm_assigned_dev_kernel *dev,
499 __u32 host_irq_type)
501 int r = -EEXIST;
503 if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
504 return r;
506 snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s",
507 pci_name(dev->dev));
509 switch (host_irq_type) {
510 case KVM_DEV_IRQ_HOST_INTX:
511 r = assigned_device_enable_host_intx(kvm, dev);
512 break;
513 case KVM_DEV_IRQ_HOST_MSI:
514 r = assigned_device_enable_host_msi(kvm, dev);
515 break;
516 case KVM_DEV_IRQ_HOST_MSIX:
517 r = assigned_device_enable_host_msix(kvm, dev);
518 break;
519 default:
520 r = -EINVAL;
522 dev->host_irq_disabled = false;
524 if (!r)
525 dev->irq_requested_type |= host_irq_type;
527 return r;
530 static int assign_guest_irq(struct kvm *kvm,
531 struct kvm_assigned_dev_kernel *dev,
532 struct kvm_assigned_irq *irq,
533 unsigned long guest_irq_type)
535 int id;
536 int r = -EEXIST;
538 if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK)
539 return r;
541 id = kvm_request_irq_source_id(kvm);
542 if (id < 0)
543 return id;
545 dev->irq_source_id = id;
547 switch (guest_irq_type) {
548 case KVM_DEV_IRQ_GUEST_INTX:
549 r = assigned_device_enable_guest_intx(kvm, dev, irq);
550 break;
551 case KVM_DEV_IRQ_GUEST_MSI:
552 r = assigned_device_enable_guest_msi(kvm, dev, irq);
553 break;
554 case KVM_DEV_IRQ_GUEST_MSIX:
555 r = assigned_device_enable_guest_msix(kvm, dev, irq);
556 break;
557 default:
558 r = -EINVAL;
561 if (!r) {
562 dev->irq_requested_type |= guest_irq_type;
563 if (dev->ack_notifier.gsi != -1)
564 kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
565 } else {
566 kvm_free_irq_source_id(kvm, dev->irq_source_id);
567 dev->irq_source_id = -1;
570 return r;
573 /* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
574 static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
575 struct kvm_assigned_irq *assigned_irq)
577 int r = -EINVAL;
578 struct kvm_assigned_dev_kernel *match;
579 unsigned long host_irq_type, guest_irq_type;
581 if (!irqchip_in_kernel(kvm))
582 return r;
584 mutex_lock(&kvm->lock);
585 r = -ENODEV;
586 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
587 assigned_irq->assigned_dev_id);
588 if (!match)
589 goto out;
591 host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK);
592 guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK);
594 r = -EINVAL;
595 /* can only assign one type at a time */
596 if (hweight_long(host_irq_type) > 1)
597 goto out;
598 if (hweight_long(guest_irq_type) > 1)
599 goto out;
600 if (host_irq_type == 0 && guest_irq_type == 0)
601 goto out;
603 r = 0;
604 if (host_irq_type)
605 r = assign_host_irq(kvm, match, host_irq_type);
606 if (r)
607 goto out;
609 if (guest_irq_type)
610 r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type);
611 out:
612 mutex_unlock(&kvm->lock);
613 return r;
616 static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
617 struct kvm_assigned_irq
618 *assigned_irq)
620 int r = -ENODEV;
621 struct kvm_assigned_dev_kernel *match;
622 unsigned long irq_type;
624 mutex_lock(&kvm->lock);
626 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
627 assigned_irq->assigned_dev_id);
628 if (!match)
629 goto out;
631 irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK |
632 KVM_DEV_IRQ_GUEST_MASK);
633 r = kvm_deassign_irq(kvm, match, irq_type);
634 out:
635 mutex_unlock(&kvm->lock);
636 return r;
640 * We want to test whether the caller has been granted permissions to
641 * use this device. To be able to configure and control the device,
642 * the user needs access to PCI configuration space and BAR resources.
643 * These are accessed through PCI sysfs. PCI config space is often
644 * passed to the process calling this ioctl via file descriptor, so we
645 * can't rely on access to that file. We can check for permissions
646 * on each of the BAR resource files, which is a pretty clear
647 * indicator that the user has been granted access to the device.
649 static int probe_sysfs_permissions(struct pci_dev *dev)
651 #ifdef CONFIG_SYSFS
652 int i;
653 bool bar_found = false;
655 for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) {
656 char *kpath, *syspath;
657 struct path path;
658 struct inode *inode;
659 int r;
661 if (!pci_resource_len(dev, i))
662 continue;
664 kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL);
665 if (!kpath)
666 return -ENOMEM;
668 /* Per sysfs-rules, sysfs is always at /sys */
669 syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i);
670 kfree(kpath);
671 if (!syspath)
672 return -ENOMEM;
674 r = kern_path(syspath, LOOKUP_FOLLOW, &path);
675 kfree(syspath);
676 if (r)
677 return r;
679 inode = d_backing_inode(path.dentry);
681 r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS);
682 path_put(&path);
683 if (r)
684 return r;
686 bar_found = true;
689 /* If no resources, probably something special */
690 if (!bar_found)
691 return -EPERM;
693 return 0;
694 #else
695 return -EINVAL; /* No way to control the device without sysfs */
696 #endif
699 static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
700 struct kvm_assigned_pci_dev *assigned_dev)
702 int r = 0, idx;
703 struct kvm_assigned_dev_kernel *match;
704 struct pci_dev *dev;
706 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU))
707 return -EINVAL;
709 mutex_lock(&kvm->lock);
710 idx = srcu_read_lock(&kvm->srcu);
712 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
713 assigned_dev->assigned_dev_id);
714 if (match) {
715 /* device already assigned */
716 r = -EEXIST;
717 goto out;
720 match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
721 if (match == NULL) {
722 printk(KERN_INFO "%s: Couldn't allocate memory\n",
723 __func__);
724 r = -ENOMEM;
725 goto out;
727 dev = pci_get_domain_bus_and_slot(assigned_dev->segnr,
728 assigned_dev->busnr,
729 assigned_dev->devfn);
730 if (!dev) {
731 printk(KERN_INFO "%s: host device not found\n", __func__);
732 r = -EINVAL;
733 goto out_free;
736 /* Don't allow bridges to be assigned */
737 if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) {
738 r = -EPERM;
739 goto out_put;
742 r = probe_sysfs_permissions(dev);
743 if (r)
744 goto out_put;
746 if (pci_enable_device(dev)) {
747 printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
748 r = -EBUSY;
749 goto out_put;
751 r = pci_request_regions(dev, "kvm_assigned_device");
752 if (r) {
753 printk(KERN_INFO "%s: Could not get access to device regions\n",
754 __func__);
755 goto out_disable;
758 pci_reset_function(dev);
759 pci_save_state(dev);
760 match->pci_saved_state = pci_store_saved_state(dev);
761 if (!match->pci_saved_state)
762 printk(KERN_DEBUG "%s: Couldn't store %s saved state\n",
763 __func__, dev_name(&dev->dev));
765 if (!pci_intx_mask_supported(dev))
766 assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3;
768 match->assigned_dev_id = assigned_dev->assigned_dev_id;
769 match->host_segnr = assigned_dev->segnr;
770 match->host_busnr = assigned_dev->busnr;
771 match->host_devfn = assigned_dev->devfn;
772 match->flags = assigned_dev->flags;
773 match->dev = dev;
774 spin_lock_init(&match->intx_lock);
775 spin_lock_init(&match->intx_mask_lock);
776 match->irq_source_id = -1;
777 match->kvm = kvm;
778 match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
780 list_add(&match->list, &kvm->arch.assigned_dev_head);
782 if (!kvm->arch.iommu_domain) {
783 r = kvm_iommu_map_guest(kvm);
784 if (r)
785 goto out_list_del;
787 r = kvm_assign_device(kvm, match->dev);
788 if (r)
789 goto out_list_del;
791 out:
792 srcu_read_unlock(&kvm->srcu, idx);
793 mutex_unlock(&kvm->lock);
794 return r;
795 out_list_del:
796 if (pci_load_and_free_saved_state(dev, &match->pci_saved_state))
797 printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
798 __func__, dev_name(&dev->dev));
799 list_del(&match->list);
800 pci_release_regions(dev);
801 out_disable:
802 pci_disable_device(dev);
803 out_put:
804 pci_dev_put(dev);
805 out_free:
806 kfree(match);
807 srcu_read_unlock(&kvm->srcu, idx);
808 mutex_unlock(&kvm->lock);
809 return r;
812 static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
813 struct kvm_assigned_pci_dev *assigned_dev)
815 int r = 0;
816 struct kvm_assigned_dev_kernel *match;
818 mutex_lock(&kvm->lock);
820 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
821 assigned_dev->assigned_dev_id);
822 if (!match) {
823 printk(KERN_INFO "%s: device hasn't been assigned before, "
824 "so cannot be deassigned\n", __func__);
825 r = -EINVAL;
826 goto out;
829 kvm_deassign_device(kvm, match->dev);
831 kvm_free_assigned_device(kvm, match);
833 out:
834 mutex_unlock(&kvm->lock);
835 return r;
839 static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
840 struct kvm_assigned_msix_nr *entry_nr)
842 int r = 0;
843 struct kvm_assigned_dev_kernel *adev;
845 mutex_lock(&kvm->lock);
847 adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
848 entry_nr->assigned_dev_id);
849 if (!adev) {
850 r = -EINVAL;
851 goto msix_nr_out;
854 if (adev->entries_nr == 0) {
855 adev->entries_nr = entry_nr->entry_nr;
856 if (adev->entries_nr == 0 ||
857 adev->entries_nr > KVM_MAX_MSIX_PER_DEV) {
858 r = -EINVAL;
859 goto msix_nr_out;
862 adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
863 entry_nr->entry_nr,
864 GFP_KERNEL);
865 if (!adev->host_msix_entries) {
866 r = -ENOMEM;
867 goto msix_nr_out;
869 adev->guest_msix_entries =
870 kzalloc(sizeof(struct msix_entry) * entry_nr->entry_nr,
871 GFP_KERNEL);
872 if (!adev->guest_msix_entries) {
873 kfree(adev->host_msix_entries);
874 r = -ENOMEM;
875 goto msix_nr_out;
877 } else /* Not allowed set MSI-X number twice */
878 r = -EINVAL;
879 msix_nr_out:
880 mutex_unlock(&kvm->lock);
881 return r;
884 static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
885 struct kvm_assigned_msix_entry *entry)
887 int r = 0, i;
888 struct kvm_assigned_dev_kernel *adev;
890 mutex_lock(&kvm->lock);
892 adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
893 entry->assigned_dev_id);
895 if (!adev) {
896 r = -EINVAL;
897 goto msix_entry_out;
900 for (i = 0; i < adev->entries_nr; i++)
901 if (adev->guest_msix_entries[i].vector == 0 ||
902 adev->guest_msix_entries[i].entry == entry->entry) {
903 adev->guest_msix_entries[i].entry = entry->entry;
904 adev->guest_msix_entries[i].vector = entry->gsi;
905 adev->host_msix_entries[i].entry = entry->entry;
906 break;
908 if (i == adev->entries_nr) {
909 r = -ENOSPC;
910 goto msix_entry_out;
913 msix_entry_out:
914 mutex_unlock(&kvm->lock);
916 return r;
919 static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm,
920 struct kvm_assigned_pci_dev *assigned_dev)
922 int r = 0;
923 struct kvm_assigned_dev_kernel *match;
925 mutex_lock(&kvm->lock);
927 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
928 assigned_dev->assigned_dev_id);
929 if (!match) {
930 r = -ENODEV;
931 goto out;
934 spin_lock(&match->intx_mask_lock);
936 match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX;
937 match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX;
939 if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {
940 if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) {
941 kvm_set_irq(match->kvm, match->irq_source_id,
942 match->guest_irq, 0, false);
944 * Masking at hardware-level is performed on demand,
945 * i.e. when an IRQ actually arrives at the host.
947 } else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
949 * Unmask the IRQ line if required. Unmasking at
950 * device level will be performed by user space.
952 spin_lock_irq(&match->intx_lock);
953 if (match->host_irq_disabled) {
954 enable_irq(match->host_irq);
955 match->host_irq_disabled = false;
957 spin_unlock_irq(&match->intx_lock);
961 spin_unlock(&match->intx_mask_lock);
963 out:
964 mutex_unlock(&kvm->lock);
965 return r;
968 long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
969 unsigned long arg)
971 void __user *argp = (void __user *)arg;
972 int r;
974 switch (ioctl) {
975 case KVM_ASSIGN_PCI_DEVICE: {
976 struct kvm_assigned_pci_dev assigned_dev;
978 r = -EFAULT;
979 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
980 goto out;
981 r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
982 if (r)
983 goto out;
984 break;
986 case KVM_ASSIGN_IRQ: {
987 r = -EOPNOTSUPP;
988 break;
990 case KVM_ASSIGN_DEV_IRQ: {
991 struct kvm_assigned_irq assigned_irq;
993 r = -EFAULT;
994 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
995 goto out;
996 r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
997 if (r)
998 goto out;
999 break;
1001 case KVM_DEASSIGN_DEV_IRQ: {
1002 struct kvm_assigned_irq assigned_irq;
1004 r = -EFAULT;
1005 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
1006 goto out;
1007 r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq);
1008 if (r)
1009 goto out;
1010 break;
1012 case KVM_DEASSIGN_PCI_DEVICE: {
1013 struct kvm_assigned_pci_dev assigned_dev;
1015 r = -EFAULT;
1016 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
1017 goto out;
1018 r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev);
1019 if (r)
1020 goto out;
1021 break;
1023 case KVM_ASSIGN_SET_MSIX_NR: {
1024 struct kvm_assigned_msix_nr entry_nr;
1025 r = -EFAULT;
1026 if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
1027 goto out;
1028 r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
1029 if (r)
1030 goto out;
1031 break;
1033 case KVM_ASSIGN_SET_MSIX_ENTRY: {
1034 struct kvm_assigned_msix_entry entry;
1035 r = -EFAULT;
1036 if (copy_from_user(&entry, argp, sizeof entry))
1037 goto out;
1038 r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
1039 if (r)
1040 goto out;
1041 break;
1043 case KVM_ASSIGN_SET_INTX_MASK: {
1044 struct kvm_assigned_pci_dev assigned_dev;
1046 r = -EFAULT;
1047 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
1048 goto out;
1049 r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev);
1050 break;
1052 default:
1053 r = -ENOTTY;
1054 break;
1056 out:
1057 return r;