Linux 4.1.18
[linux/fpc-iii.git] / arch / x86 / kvm / assigned-dev.c
blobd090ecf088097f4f845fc20d7d1bdd0a5849e712
1 /*
2 * Kernel-based Virtual Machine - device assignment support
4 * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates.
6 * This work is licensed under the terms of the GNU GPL, version 2. See
7 * the COPYING file in the top-level directory.
9 */
11 #include <linux/kvm_host.h>
12 #include <linux/kvm.h>
13 #include <linux/uaccess.h>
14 #include <linux/vmalloc.h>
15 #include <linux/errno.h>
16 #include <linux/spinlock.h>
17 #include <linux/pci.h>
18 #include <linux/interrupt.h>
19 #include <linux/slab.h>
20 #include <linux/namei.h>
21 #include <linux/fs.h>
22 #include "irq.h"
23 #include "assigned-dev.h"
25 struct kvm_assigned_dev_kernel {
26 struct kvm_irq_ack_notifier ack_notifier;
27 struct list_head list;
28 int assigned_dev_id;
29 int host_segnr;
30 int host_busnr;
31 int host_devfn;
32 unsigned int entries_nr;
33 int host_irq;
34 bool host_irq_disabled;
35 bool pci_2_3;
36 struct msix_entry *host_msix_entries;
37 int guest_irq;
38 struct msix_entry *guest_msix_entries;
39 unsigned long irq_requested_type;
40 int irq_source_id;
41 int flags;
42 struct pci_dev *dev;
43 struct kvm *kvm;
44 spinlock_t intx_lock;
45 spinlock_t intx_mask_lock;
46 char irq_name[32];
47 struct pci_saved_state *pci_saved_state;
50 static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
51 int assigned_dev_id)
53 struct list_head *ptr;
54 struct kvm_assigned_dev_kernel *match;
56 list_for_each(ptr, head) {
57 match = list_entry(ptr, struct kvm_assigned_dev_kernel, list);
58 if (match->assigned_dev_id == assigned_dev_id)
59 return match;
61 return NULL;
64 static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
65 *assigned_dev, int irq)
67 int i, index;
68 struct msix_entry *host_msix_entries;
70 host_msix_entries = assigned_dev->host_msix_entries;
72 index = -1;
73 for (i = 0; i < assigned_dev->entries_nr; i++)
74 if (irq == host_msix_entries[i].vector) {
75 index = i;
76 break;
78 if (index < 0)
79 printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
81 return index;
84 static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id)
86 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
87 int ret;
89 spin_lock(&assigned_dev->intx_lock);
90 if (pci_check_and_mask_intx(assigned_dev->dev)) {
91 assigned_dev->host_irq_disabled = true;
92 ret = IRQ_WAKE_THREAD;
93 } else
94 ret = IRQ_NONE;
95 spin_unlock(&assigned_dev->intx_lock);
97 return ret;
100 static void
101 kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev,
102 int vector)
104 if (unlikely(assigned_dev->irq_requested_type &
105 KVM_DEV_IRQ_GUEST_INTX)) {
106 spin_lock(&assigned_dev->intx_mask_lock);
107 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX))
108 kvm_set_irq(assigned_dev->kvm,
109 assigned_dev->irq_source_id, vector, 1,
110 false);
111 spin_unlock(&assigned_dev->intx_mask_lock);
112 } else
113 kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
114 vector, 1, false);
117 static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id)
119 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
121 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
122 spin_lock_irq(&assigned_dev->intx_lock);
123 disable_irq_nosync(irq);
124 assigned_dev->host_irq_disabled = true;
125 spin_unlock_irq(&assigned_dev->intx_lock);
128 kvm_assigned_dev_raise_guest_irq(assigned_dev,
129 assigned_dev->guest_irq);
131 return IRQ_HANDLED;
134 #ifdef __KVM_HAVE_MSI
135 static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id)
137 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
138 int ret = kvm_set_irq_inatomic(assigned_dev->kvm,
139 assigned_dev->irq_source_id,
140 assigned_dev->guest_irq, 1);
141 return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
144 static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id)
146 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
148 kvm_assigned_dev_raise_guest_irq(assigned_dev,
149 assigned_dev->guest_irq);
151 return IRQ_HANDLED;
153 #endif
155 #ifdef __KVM_HAVE_MSIX
156 static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id)
158 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
159 int index = find_index_from_host_irq(assigned_dev, irq);
160 u32 vector;
161 int ret = 0;
163 if (index >= 0) {
164 vector = assigned_dev->guest_msix_entries[index].vector;
165 ret = kvm_set_irq_inatomic(assigned_dev->kvm,
166 assigned_dev->irq_source_id,
167 vector, 1);
170 return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
173 static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id)
175 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
176 int index = find_index_from_host_irq(assigned_dev, irq);
177 u32 vector;
179 if (index >= 0) {
180 vector = assigned_dev->guest_msix_entries[index].vector;
181 kvm_assigned_dev_raise_guest_irq(assigned_dev, vector);
184 return IRQ_HANDLED;
186 #endif
188 /* Ack the irq line for an assigned device */
189 static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
191 struct kvm_assigned_dev_kernel *dev =
192 container_of(kian, struct kvm_assigned_dev_kernel,
193 ack_notifier);
195 kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0, false);
197 spin_lock(&dev->intx_mask_lock);
199 if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) {
200 bool reassert = false;
202 spin_lock_irq(&dev->intx_lock);
204 * The guest IRQ may be shared so this ack can come from an
205 * IRQ for another guest device.
207 if (dev->host_irq_disabled) {
208 if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3))
209 enable_irq(dev->host_irq);
210 else if (!pci_check_and_unmask_intx(dev->dev))
211 reassert = true;
212 dev->host_irq_disabled = reassert;
214 spin_unlock_irq(&dev->intx_lock);
216 if (reassert)
217 kvm_set_irq(dev->kvm, dev->irq_source_id,
218 dev->guest_irq, 1, false);
221 spin_unlock(&dev->intx_mask_lock);
224 static void deassign_guest_irq(struct kvm *kvm,
225 struct kvm_assigned_dev_kernel *assigned_dev)
227 if (assigned_dev->ack_notifier.gsi != -1)
228 kvm_unregister_irq_ack_notifier(kvm,
229 &assigned_dev->ack_notifier);
231 kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
232 assigned_dev->guest_irq, 0, false);
234 if (assigned_dev->irq_source_id != -1)
235 kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
236 assigned_dev->irq_source_id = -1;
237 assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
240 /* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
241 static void deassign_host_irq(struct kvm *kvm,
242 struct kvm_assigned_dev_kernel *assigned_dev)
245 * We disable irq here to prevent further events.
247 * Notice this maybe result in nested disable if the interrupt type is
248 * INTx, but it's OK for we are going to free it.
250 * If this function is a part of VM destroy, please ensure that till
251 * now, the kvm state is still legal for probably we also have to wait
252 * on a currently running IRQ handler.
254 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
255 int i;
256 for (i = 0; i < assigned_dev->entries_nr; i++)
257 disable_irq(assigned_dev->host_msix_entries[i].vector);
259 for (i = 0; i < assigned_dev->entries_nr; i++)
260 free_irq(assigned_dev->host_msix_entries[i].vector,
261 assigned_dev);
263 assigned_dev->entries_nr = 0;
264 kfree(assigned_dev->host_msix_entries);
265 kfree(assigned_dev->guest_msix_entries);
266 pci_disable_msix(assigned_dev->dev);
267 } else {
268 /* Deal with MSI and INTx */
269 if ((assigned_dev->irq_requested_type &
270 KVM_DEV_IRQ_HOST_INTX) &&
271 (assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
272 spin_lock_irq(&assigned_dev->intx_lock);
273 pci_intx(assigned_dev->dev, false);
274 spin_unlock_irq(&assigned_dev->intx_lock);
275 synchronize_irq(assigned_dev->host_irq);
276 } else
277 disable_irq(assigned_dev->host_irq);
279 free_irq(assigned_dev->host_irq, assigned_dev);
281 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
282 pci_disable_msi(assigned_dev->dev);
285 assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK);
288 static int kvm_deassign_irq(struct kvm *kvm,
289 struct kvm_assigned_dev_kernel *assigned_dev,
290 unsigned long irq_requested_type)
292 unsigned long guest_irq_type, host_irq_type;
294 if (!irqchip_in_kernel(kvm))
295 return -EINVAL;
296 /* no irq assignment to deassign */
297 if (!assigned_dev->irq_requested_type)
298 return -ENXIO;
300 host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK;
301 guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK;
303 if (host_irq_type)
304 deassign_host_irq(kvm, assigned_dev);
305 if (guest_irq_type)
306 deassign_guest_irq(kvm, assigned_dev);
308 return 0;
311 static void kvm_free_assigned_irq(struct kvm *kvm,
312 struct kvm_assigned_dev_kernel *assigned_dev)
314 kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
317 static void kvm_free_assigned_device(struct kvm *kvm,
318 struct kvm_assigned_dev_kernel
319 *assigned_dev)
321 kvm_free_assigned_irq(kvm, assigned_dev);
323 pci_reset_function(assigned_dev->dev);
324 if (pci_load_and_free_saved_state(assigned_dev->dev,
325 &assigned_dev->pci_saved_state))
326 printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
327 __func__, dev_name(&assigned_dev->dev->dev));
328 else
329 pci_restore_state(assigned_dev->dev);
331 pci_clear_dev_assigned(assigned_dev->dev);
333 pci_release_regions(assigned_dev->dev);
334 pci_disable_device(assigned_dev->dev);
335 pci_dev_put(assigned_dev->dev);
337 list_del(&assigned_dev->list);
338 kfree(assigned_dev);
341 void kvm_free_all_assigned_devices(struct kvm *kvm)
343 struct list_head *ptr, *ptr2;
344 struct kvm_assigned_dev_kernel *assigned_dev;
346 list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
347 assigned_dev = list_entry(ptr,
348 struct kvm_assigned_dev_kernel,
349 list);
351 kvm_free_assigned_device(kvm, assigned_dev);
355 static int assigned_device_enable_host_intx(struct kvm *kvm,
356 struct kvm_assigned_dev_kernel *dev)
358 irq_handler_t irq_handler;
359 unsigned long flags;
361 dev->host_irq = dev->dev->irq;
364 * We can only share the IRQ line with other host devices if we are
365 * able to disable the IRQ source at device-level - independently of
366 * the guest driver. Otherwise host devices may suffer from unbounded
367 * IRQ latencies when the guest keeps the line asserted.
369 if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
370 irq_handler = kvm_assigned_dev_intx;
371 flags = IRQF_SHARED;
372 } else {
373 irq_handler = NULL;
374 flags = IRQF_ONESHOT;
376 if (request_threaded_irq(dev->host_irq, irq_handler,
377 kvm_assigned_dev_thread_intx, flags,
378 dev->irq_name, dev))
379 return -EIO;
381 if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
382 spin_lock_irq(&dev->intx_lock);
383 pci_intx(dev->dev, true);
384 spin_unlock_irq(&dev->intx_lock);
386 return 0;
389 #ifdef __KVM_HAVE_MSI
390 static int assigned_device_enable_host_msi(struct kvm *kvm,
391 struct kvm_assigned_dev_kernel *dev)
393 int r;
395 if (!dev->dev->msi_enabled) {
396 r = pci_enable_msi(dev->dev);
397 if (r)
398 return r;
401 dev->host_irq = dev->dev->irq;
402 if (request_threaded_irq(dev->host_irq, kvm_assigned_dev_msi,
403 kvm_assigned_dev_thread_msi, 0,
404 dev->irq_name, dev)) {
405 pci_disable_msi(dev->dev);
406 return -EIO;
409 return 0;
411 #endif
413 #ifdef __KVM_HAVE_MSIX
414 static int assigned_device_enable_host_msix(struct kvm *kvm,
415 struct kvm_assigned_dev_kernel *dev)
417 int i, r = -EINVAL;
419 /* host_msix_entries and guest_msix_entries should have been
420 * initialized */
421 if (dev->entries_nr == 0)
422 return r;
424 r = pci_enable_msix_exact(dev->dev,
425 dev->host_msix_entries, dev->entries_nr);
426 if (r)
427 return r;
429 for (i = 0; i < dev->entries_nr; i++) {
430 r = request_threaded_irq(dev->host_msix_entries[i].vector,
431 kvm_assigned_dev_msix,
432 kvm_assigned_dev_thread_msix,
433 0, dev->irq_name, dev);
434 if (r)
435 goto err;
438 return 0;
439 err:
440 for (i -= 1; i >= 0; i--)
441 free_irq(dev->host_msix_entries[i].vector, dev);
442 pci_disable_msix(dev->dev);
443 return r;
446 #endif
448 static int assigned_device_enable_guest_intx(struct kvm *kvm,
449 struct kvm_assigned_dev_kernel *dev,
450 struct kvm_assigned_irq *irq)
452 dev->guest_irq = irq->guest_irq;
453 dev->ack_notifier.gsi = irq->guest_irq;
454 return 0;
457 #ifdef __KVM_HAVE_MSI
458 static int assigned_device_enable_guest_msi(struct kvm *kvm,
459 struct kvm_assigned_dev_kernel *dev,
460 struct kvm_assigned_irq *irq)
462 dev->guest_irq = irq->guest_irq;
463 dev->ack_notifier.gsi = -1;
464 return 0;
466 #endif
468 #ifdef __KVM_HAVE_MSIX
469 static int assigned_device_enable_guest_msix(struct kvm *kvm,
470 struct kvm_assigned_dev_kernel *dev,
471 struct kvm_assigned_irq *irq)
473 dev->guest_irq = irq->guest_irq;
474 dev->ack_notifier.gsi = -1;
475 return 0;
477 #endif
479 static int assign_host_irq(struct kvm *kvm,
480 struct kvm_assigned_dev_kernel *dev,
481 __u32 host_irq_type)
483 int r = -EEXIST;
485 if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
486 return r;
488 snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s",
489 pci_name(dev->dev));
491 switch (host_irq_type) {
492 case KVM_DEV_IRQ_HOST_INTX:
493 r = assigned_device_enable_host_intx(kvm, dev);
494 break;
495 #ifdef __KVM_HAVE_MSI
496 case KVM_DEV_IRQ_HOST_MSI:
497 r = assigned_device_enable_host_msi(kvm, dev);
498 break;
499 #endif
500 #ifdef __KVM_HAVE_MSIX
501 case KVM_DEV_IRQ_HOST_MSIX:
502 r = assigned_device_enable_host_msix(kvm, dev);
503 break;
504 #endif
505 default:
506 r = -EINVAL;
508 dev->host_irq_disabled = false;
510 if (!r)
511 dev->irq_requested_type |= host_irq_type;
513 return r;
516 static int assign_guest_irq(struct kvm *kvm,
517 struct kvm_assigned_dev_kernel *dev,
518 struct kvm_assigned_irq *irq,
519 unsigned long guest_irq_type)
521 int id;
522 int r = -EEXIST;
524 if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK)
525 return r;
527 id = kvm_request_irq_source_id(kvm);
528 if (id < 0)
529 return id;
531 dev->irq_source_id = id;
533 switch (guest_irq_type) {
534 case KVM_DEV_IRQ_GUEST_INTX:
535 r = assigned_device_enable_guest_intx(kvm, dev, irq);
536 break;
537 #ifdef __KVM_HAVE_MSI
538 case KVM_DEV_IRQ_GUEST_MSI:
539 r = assigned_device_enable_guest_msi(kvm, dev, irq);
540 break;
541 #endif
542 #ifdef __KVM_HAVE_MSIX
543 case KVM_DEV_IRQ_GUEST_MSIX:
544 r = assigned_device_enable_guest_msix(kvm, dev, irq);
545 break;
546 #endif
547 default:
548 r = -EINVAL;
551 if (!r) {
552 dev->irq_requested_type |= guest_irq_type;
553 if (dev->ack_notifier.gsi != -1)
554 kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
555 } else {
556 kvm_free_irq_source_id(kvm, dev->irq_source_id);
557 dev->irq_source_id = -1;
560 return r;
563 /* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
564 static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
565 struct kvm_assigned_irq *assigned_irq)
567 int r = -EINVAL;
568 struct kvm_assigned_dev_kernel *match;
569 unsigned long host_irq_type, guest_irq_type;
571 if (!irqchip_in_kernel(kvm))
572 return r;
574 mutex_lock(&kvm->lock);
575 r = -ENODEV;
576 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
577 assigned_irq->assigned_dev_id);
578 if (!match)
579 goto out;
581 host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK);
582 guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK);
584 r = -EINVAL;
585 /* can only assign one type at a time */
586 if (hweight_long(host_irq_type) > 1)
587 goto out;
588 if (hweight_long(guest_irq_type) > 1)
589 goto out;
590 if (host_irq_type == 0 && guest_irq_type == 0)
591 goto out;
593 r = 0;
594 if (host_irq_type)
595 r = assign_host_irq(kvm, match, host_irq_type);
596 if (r)
597 goto out;
599 if (guest_irq_type)
600 r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type);
601 out:
602 mutex_unlock(&kvm->lock);
603 return r;
606 static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
607 struct kvm_assigned_irq
608 *assigned_irq)
610 int r = -ENODEV;
611 struct kvm_assigned_dev_kernel *match;
612 unsigned long irq_type;
614 mutex_lock(&kvm->lock);
616 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
617 assigned_irq->assigned_dev_id);
618 if (!match)
619 goto out;
621 irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK |
622 KVM_DEV_IRQ_GUEST_MASK);
623 r = kvm_deassign_irq(kvm, match, irq_type);
624 out:
625 mutex_unlock(&kvm->lock);
626 return r;
630 * We want to test whether the caller has been granted permissions to
631 * use this device. To be able to configure and control the device,
632 * the user needs access to PCI configuration space and BAR resources.
633 * These are accessed through PCI sysfs. PCI config space is often
634 * passed to the process calling this ioctl via file descriptor, so we
635 * can't rely on access to that file. We can check for permissions
636 * on each of the BAR resource files, which is a pretty clear
637 * indicator that the user has been granted access to the device.
639 static int probe_sysfs_permissions(struct pci_dev *dev)
641 #ifdef CONFIG_SYSFS
642 int i;
643 bool bar_found = false;
645 for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) {
646 char *kpath, *syspath;
647 struct path path;
648 struct inode *inode;
649 int r;
651 if (!pci_resource_len(dev, i))
652 continue;
654 kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL);
655 if (!kpath)
656 return -ENOMEM;
658 /* Per sysfs-rules, sysfs is always at /sys */
659 syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i);
660 kfree(kpath);
661 if (!syspath)
662 return -ENOMEM;
664 r = kern_path(syspath, LOOKUP_FOLLOW, &path);
665 kfree(syspath);
666 if (r)
667 return r;
669 inode = d_backing_inode(path.dentry);
671 r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS);
672 path_put(&path);
673 if (r)
674 return r;
676 bar_found = true;
679 /* If no resources, probably something special */
680 if (!bar_found)
681 return -EPERM;
683 return 0;
684 #else
685 return -EINVAL; /* No way to control the device without sysfs */
686 #endif
689 static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
690 struct kvm_assigned_pci_dev *assigned_dev)
692 int r = 0, idx;
693 struct kvm_assigned_dev_kernel *match;
694 struct pci_dev *dev;
696 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU))
697 return -EINVAL;
699 mutex_lock(&kvm->lock);
700 idx = srcu_read_lock(&kvm->srcu);
702 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
703 assigned_dev->assigned_dev_id);
704 if (match) {
705 /* device already assigned */
706 r = -EEXIST;
707 goto out;
710 match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
711 if (match == NULL) {
712 printk(KERN_INFO "%s: Couldn't allocate memory\n",
713 __func__);
714 r = -ENOMEM;
715 goto out;
717 dev = pci_get_domain_bus_and_slot(assigned_dev->segnr,
718 assigned_dev->busnr,
719 assigned_dev->devfn);
720 if (!dev) {
721 printk(KERN_INFO "%s: host device not found\n", __func__);
722 r = -EINVAL;
723 goto out_free;
726 /* Don't allow bridges to be assigned */
727 if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) {
728 r = -EPERM;
729 goto out_put;
732 r = probe_sysfs_permissions(dev);
733 if (r)
734 goto out_put;
736 if (pci_enable_device(dev)) {
737 printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
738 r = -EBUSY;
739 goto out_put;
741 r = pci_request_regions(dev, "kvm_assigned_device");
742 if (r) {
743 printk(KERN_INFO "%s: Could not get access to device regions\n",
744 __func__);
745 goto out_disable;
748 pci_reset_function(dev);
749 pci_save_state(dev);
750 match->pci_saved_state = pci_store_saved_state(dev);
751 if (!match->pci_saved_state)
752 printk(KERN_DEBUG "%s: Couldn't store %s saved state\n",
753 __func__, dev_name(&dev->dev));
755 if (!pci_intx_mask_supported(dev))
756 assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3;
758 match->assigned_dev_id = assigned_dev->assigned_dev_id;
759 match->host_segnr = assigned_dev->segnr;
760 match->host_busnr = assigned_dev->busnr;
761 match->host_devfn = assigned_dev->devfn;
762 match->flags = assigned_dev->flags;
763 match->dev = dev;
764 spin_lock_init(&match->intx_lock);
765 spin_lock_init(&match->intx_mask_lock);
766 match->irq_source_id = -1;
767 match->kvm = kvm;
768 match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
770 list_add(&match->list, &kvm->arch.assigned_dev_head);
772 if (!kvm->arch.iommu_domain) {
773 r = kvm_iommu_map_guest(kvm);
774 if (r)
775 goto out_list_del;
777 r = kvm_assign_device(kvm, match->dev);
778 if (r)
779 goto out_list_del;
781 out:
782 srcu_read_unlock(&kvm->srcu, idx);
783 mutex_unlock(&kvm->lock);
784 return r;
785 out_list_del:
786 if (pci_load_and_free_saved_state(dev, &match->pci_saved_state))
787 printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
788 __func__, dev_name(&dev->dev));
789 list_del(&match->list);
790 pci_release_regions(dev);
791 out_disable:
792 pci_disable_device(dev);
793 out_put:
794 pci_dev_put(dev);
795 out_free:
796 kfree(match);
797 srcu_read_unlock(&kvm->srcu, idx);
798 mutex_unlock(&kvm->lock);
799 return r;
802 static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
803 struct kvm_assigned_pci_dev *assigned_dev)
805 int r = 0;
806 struct kvm_assigned_dev_kernel *match;
808 mutex_lock(&kvm->lock);
810 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
811 assigned_dev->assigned_dev_id);
812 if (!match) {
813 printk(KERN_INFO "%s: device hasn't been assigned before, "
814 "so cannot be deassigned\n", __func__);
815 r = -EINVAL;
816 goto out;
819 kvm_deassign_device(kvm, match->dev);
821 kvm_free_assigned_device(kvm, match);
823 out:
824 mutex_unlock(&kvm->lock);
825 return r;
829 #ifdef __KVM_HAVE_MSIX
830 static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
831 struct kvm_assigned_msix_nr *entry_nr)
833 int r = 0;
834 struct kvm_assigned_dev_kernel *adev;
836 mutex_lock(&kvm->lock);
838 adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
839 entry_nr->assigned_dev_id);
840 if (!adev) {
841 r = -EINVAL;
842 goto msix_nr_out;
845 if (adev->entries_nr == 0) {
846 adev->entries_nr = entry_nr->entry_nr;
847 if (adev->entries_nr == 0 ||
848 adev->entries_nr > KVM_MAX_MSIX_PER_DEV) {
849 r = -EINVAL;
850 goto msix_nr_out;
853 adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
854 entry_nr->entry_nr,
855 GFP_KERNEL);
856 if (!adev->host_msix_entries) {
857 r = -ENOMEM;
858 goto msix_nr_out;
860 adev->guest_msix_entries =
861 kzalloc(sizeof(struct msix_entry) * entry_nr->entry_nr,
862 GFP_KERNEL);
863 if (!adev->guest_msix_entries) {
864 kfree(adev->host_msix_entries);
865 r = -ENOMEM;
866 goto msix_nr_out;
868 } else /* Not allowed set MSI-X number twice */
869 r = -EINVAL;
870 msix_nr_out:
871 mutex_unlock(&kvm->lock);
872 return r;
875 static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
876 struct kvm_assigned_msix_entry *entry)
878 int r = 0, i;
879 struct kvm_assigned_dev_kernel *adev;
881 mutex_lock(&kvm->lock);
883 adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
884 entry->assigned_dev_id);
886 if (!adev) {
887 r = -EINVAL;
888 goto msix_entry_out;
891 for (i = 0; i < adev->entries_nr; i++)
892 if (adev->guest_msix_entries[i].vector == 0 ||
893 adev->guest_msix_entries[i].entry == entry->entry) {
894 adev->guest_msix_entries[i].entry = entry->entry;
895 adev->guest_msix_entries[i].vector = entry->gsi;
896 adev->host_msix_entries[i].entry = entry->entry;
897 break;
899 if (i == adev->entries_nr) {
900 r = -ENOSPC;
901 goto msix_entry_out;
904 msix_entry_out:
905 mutex_unlock(&kvm->lock);
907 return r;
909 #endif
911 static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm,
912 struct kvm_assigned_pci_dev *assigned_dev)
914 int r = 0;
915 struct kvm_assigned_dev_kernel *match;
917 mutex_lock(&kvm->lock);
919 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
920 assigned_dev->assigned_dev_id);
921 if (!match) {
922 r = -ENODEV;
923 goto out;
926 spin_lock(&match->intx_mask_lock);
928 match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX;
929 match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX;
931 if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {
932 if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) {
933 kvm_set_irq(match->kvm, match->irq_source_id,
934 match->guest_irq, 0, false);
936 * Masking at hardware-level is performed on demand,
937 * i.e. when an IRQ actually arrives at the host.
939 } else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
941 * Unmask the IRQ line if required. Unmasking at
942 * device level will be performed by user space.
944 spin_lock_irq(&match->intx_lock);
945 if (match->host_irq_disabled) {
946 enable_irq(match->host_irq);
947 match->host_irq_disabled = false;
949 spin_unlock_irq(&match->intx_lock);
953 spin_unlock(&match->intx_mask_lock);
955 out:
956 mutex_unlock(&kvm->lock);
957 return r;
960 long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
961 unsigned long arg)
963 void __user *argp = (void __user *)arg;
964 int r;
966 switch (ioctl) {
967 case KVM_ASSIGN_PCI_DEVICE: {
968 struct kvm_assigned_pci_dev assigned_dev;
970 r = -EFAULT;
971 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
972 goto out;
973 r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
974 if (r)
975 goto out;
976 break;
978 case KVM_ASSIGN_IRQ: {
979 r = -EOPNOTSUPP;
980 break;
982 case KVM_ASSIGN_DEV_IRQ: {
983 struct kvm_assigned_irq assigned_irq;
985 r = -EFAULT;
986 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
987 goto out;
988 r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
989 if (r)
990 goto out;
991 break;
993 case KVM_DEASSIGN_DEV_IRQ: {
994 struct kvm_assigned_irq assigned_irq;
996 r = -EFAULT;
997 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
998 goto out;
999 r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq);
1000 if (r)
1001 goto out;
1002 break;
1004 case KVM_DEASSIGN_PCI_DEVICE: {
1005 struct kvm_assigned_pci_dev assigned_dev;
1007 r = -EFAULT;
1008 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
1009 goto out;
1010 r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev);
1011 if (r)
1012 goto out;
1013 break;
1015 #ifdef __KVM_HAVE_MSIX
1016 case KVM_ASSIGN_SET_MSIX_NR: {
1017 struct kvm_assigned_msix_nr entry_nr;
1018 r = -EFAULT;
1019 if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
1020 goto out;
1021 r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
1022 if (r)
1023 goto out;
1024 break;
1026 case KVM_ASSIGN_SET_MSIX_ENTRY: {
1027 struct kvm_assigned_msix_entry entry;
1028 r = -EFAULT;
1029 if (copy_from_user(&entry, argp, sizeof entry))
1030 goto out;
1031 r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
1032 if (r)
1033 goto out;
1034 break;
1036 #endif
1037 case KVM_ASSIGN_SET_INTX_MASK: {
1038 struct kvm_assigned_pci_dev assigned_dev;
1040 r = -EFAULT;
1041 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
1042 goto out;
1043 r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev);
1044 break;
1046 default:
1047 r = -ENOTTY;
1048 break;
1050 out:
1051 return r;