of: MSI: Simplify irqdomain lookup
[linux/fpc-iii.git] / arch / x86 / kvm / assigned-dev.c
blob9dc091acd5fbab0a6da92510447174a6d995e22c
1 /*
2 * Kernel-based Virtual Machine - device assignment support
4 * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates.
6 * This work is licensed under the terms of the GNU GPL, version 2. See
7 * the COPYING file in the top-level directory.
9 */
11 #include <linux/kvm_host.h>
12 #include <linux/kvm.h>
13 #include <linux/uaccess.h>
14 #include <linux/vmalloc.h>
15 #include <linux/errno.h>
16 #include <linux/spinlock.h>
17 #include <linux/pci.h>
18 #include <linux/interrupt.h>
19 #include <linux/slab.h>
20 #include <linux/namei.h>
21 #include <linux/fs.h>
22 #include "irq.h"
23 #include "assigned-dev.h"
24 #include "trace/events/kvm.h"
26 struct kvm_assigned_dev_kernel {
27 struct kvm_irq_ack_notifier ack_notifier;
28 struct list_head list;
29 int assigned_dev_id;
30 int host_segnr;
31 int host_busnr;
32 int host_devfn;
33 unsigned int entries_nr;
34 int host_irq;
35 bool host_irq_disabled;
36 bool pci_2_3;
37 struct msix_entry *host_msix_entries;
38 int guest_irq;
39 struct msix_entry *guest_msix_entries;
40 unsigned long irq_requested_type;
41 int irq_source_id;
42 int flags;
43 struct pci_dev *dev;
44 struct kvm *kvm;
45 spinlock_t intx_lock;
46 spinlock_t intx_mask_lock;
47 char irq_name[32];
48 struct pci_saved_state *pci_saved_state;
51 static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
52 int assigned_dev_id)
54 struct list_head *ptr;
55 struct kvm_assigned_dev_kernel *match;
57 list_for_each(ptr, head) {
58 match = list_entry(ptr, struct kvm_assigned_dev_kernel, list);
59 if (match->assigned_dev_id == assigned_dev_id)
60 return match;
62 return NULL;
65 static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
66 *assigned_dev, int irq)
68 int i, index;
69 struct msix_entry *host_msix_entries;
71 host_msix_entries = assigned_dev->host_msix_entries;
73 index = -1;
74 for (i = 0; i < assigned_dev->entries_nr; i++)
75 if (irq == host_msix_entries[i].vector) {
76 index = i;
77 break;
79 if (index < 0)
80 printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
82 return index;
85 static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id)
87 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
88 int ret;
90 spin_lock(&assigned_dev->intx_lock);
91 if (pci_check_and_mask_intx(assigned_dev->dev)) {
92 assigned_dev->host_irq_disabled = true;
93 ret = IRQ_WAKE_THREAD;
94 } else
95 ret = IRQ_NONE;
96 spin_unlock(&assigned_dev->intx_lock);
98 return ret;
101 static void
102 kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev,
103 int vector)
105 if (unlikely(assigned_dev->irq_requested_type &
106 KVM_DEV_IRQ_GUEST_INTX)) {
107 spin_lock(&assigned_dev->intx_mask_lock);
108 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX))
109 kvm_set_irq(assigned_dev->kvm,
110 assigned_dev->irq_source_id, vector, 1,
111 false);
112 spin_unlock(&assigned_dev->intx_mask_lock);
113 } else
114 kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
115 vector, 1, false);
118 static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id)
120 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
122 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
123 spin_lock_irq(&assigned_dev->intx_lock);
124 disable_irq_nosync(irq);
125 assigned_dev->host_irq_disabled = true;
126 spin_unlock_irq(&assigned_dev->intx_lock);
129 kvm_assigned_dev_raise_guest_irq(assigned_dev,
130 assigned_dev->guest_irq);
132 return IRQ_HANDLED;
136 * Deliver an IRQ in an atomic context if we can, or return a failure,
137 * user can retry in a process context.
138 * Return value:
139 * -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context.
140 * Other values - No need to retry.
142 static int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq,
143 int level)
145 struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
146 struct kvm_kernel_irq_routing_entry *e;
147 int ret = -EINVAL;
148 int idx;
150 trace_kvm_set_irq(irq, level, irq_source_id);
153 * Injection into either PIC or IOAPIC might need to scan all CPUs,
154 * which would need to be retried from thread context; when same GSI
155 * is connected to both PIC and IOAPIC, we'd have to report a
156 * partial failure here.
157 * Since there's no easy way to do this, we only support injecting MSI
158 * which is limited to 1:1 GSI mapping.
160 idx = srcu_read_lock(&kvm->irq_srcu);
161 if (kvm_irq_map_gsi(kvm, entries, irq) > 0) {
162 e = &entries[0];
163 ret = kvm_arch_set_irq_inatomic(e, kvm, irq_source_id,
164 irq, level);
166 srcu_read_unlock(&kvm->irq_srcu, idx);
167 return ret;
171 static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id)
173 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
174 int ret = kvm_set_irq_inatomic(assigned_dev->kvm,
175 assigned_dev->irq_source_id,
176 assigned_dev->guest_irq, 1);
177 return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
180 static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id)
182 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
184 kvm_assigned_dev_raise_guest_irq(assigned_dev,
185 assigned_dev->guest_irq);
187 return IRQ_HANDLED;
190 static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id)
192 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
193 int index = find_index_from_host_irq(assigned_dev, irq);
194 u32 vector;
195 int ret = 0;
197 if (index >= 0) {
198 vector = assigned_dev->guest_msix_entries[index].vector;
199 ret = kvm_set_irq_inatomic(assigned_dev->kvm,
200 assigned_dev->irq_source_id,
201 vector, 1);
204 return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
207 static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id)
209 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
210 int index = find_index_from_host_irq(assigned_dev, irq);
211 u32 vector;
213 if (index >= 0) {
214 vector = assigned_dev->guest_msix_entries[index].vector;
215 kvm_assigned_dev_raise_guest_irq(assigned_dev, vector);
218 return IRQ_HANDLED;
221 /* Ack the irq line for an assigned device */
222 static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
224 struct kvm_assigned_dev_kernel *dev =
225 container_of(kian, struct kvm_assigned_dev_kernel,
226 ack_notifier);
228 kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0, false);
230 spin_lock(&dev->intx_mask_lock);
232 if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) {
233 bool reassert = false;
235 spin_lock_irq(&dev->intx_lock);
237 * The guest IRQ may be shared so this ack can come from an
238 * IRQ for another guest device.
240 if (dev->host_irq_disabled) {
241 if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3))
242 enable_irq(dev->host_irq);
243 else if (!pci_check_and_unmask_intx(dev->dev))
244 reassert = true;
245 dev->host_irq_disabled = reassert;
247 spin_unlock_irq(&dev->intx_lock);
249 if (reassert)
250 kvm_set_irq(dev->kvm, dev->irq_source_id,
251 dev->guest_irq, 1, false);
254 spin_unlock(&dev->intx_mask_lock);
257 static void deassign_guest_irq(struct kvm *kvm,
258 struct kvm_assigned_dev_kernel *assigned_dev)
260 if (assigned_dev->ack_notifier.gsi != -1)
261 kvm_unregister_irq_ack_notifier(kvm,
262 &assigned_dev->ack_notifier);
264 kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
265 assigned_dev->guest_irq, 0, false);
267 if (assigned_dev->irq_source_id != -1)
268 kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
269 assigned_dev->irq_source_id = -1;
270 assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
273 /* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
274 static void deassign_host_irq(struct kvm *kvm,
275 struct kvm_assigned_dev_kernel *assigned_dev)
278 * We disable irq here to prevent further events.
280 * Notice this maybe result in nested disable if the interrupt type is
281 * INTx, but it's OK for we are going to free it.
283 * If this function is a part of VM destroy, please ensure that till
284 * now, the kvm state is still legal for probably we also have to wait
285 * on a currently running IRQ handler.
287 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
288 int i;
289 for (i = 0; i < assigned_dev->entries_nr; i++)
290 disable_irq(assigned_dev->host_msix_entries[i].vector);
292 for (i = 0; i < assigned_dev->entries_nr; i++)
293 free_irq(assigned_dev->host_msix_entries[i].vector,
294 assigned_dev);
296 assigned_dev->entries_nr = 0;
297 kfree(assigned_dev->host_msix_entries);
298 kfree(assigned_dev->guest_msix_entries);
299 pci_disable_msix(assigned_dev->dev);
300 } else {
301 /* Deal with MSI and INTx */
302 if ((assigned_dev->irq_requested_type &
303 KVM_DEV_IRQ_HOST_INTX) &&
304 (assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
305 spin_lock_irq(&assigned_dev->intx_lock);
306 pci_intx(assigned_dev->dev, false);
307 spin_unlock_irq(&assigned_dev->intx_lock);
308 synchronize_irq(assigned_dev->host_irq);
309 } else
310 disable_irq(assigned_dev->host_irq);
312 free_irq(assigned_dev->host_irq, assigned_dev);
314 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
315 pci_disable_msi(assigned_dev->dev);
318 assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK);
321 static int kvm_deassign_irq(struct kvm *kvm,
322 struct kvm_assigned_dev_kernel *assigned_dev,
323 unsigned long irq_requested_type)
325 unsigned long guest_irq_type, host_irq_type;
327 if (!irqchip_in_kernel(kvm))
328 return -EINVAL;
329 /* no irq assignment to deassign */
330 if (!assigned_dev->irq_requested_type)
331 return -ENXIO;
333 host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK;
334 guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK;
336 if (host_irq_type)
337 deassign_host_irq(kvm, assigned_dev);
338 if (guest_irq_type)
339 deassign_guest_irq(kvm, assigned_dev);
341 return 0;
344 static void kvm_free_assigned_irq(struct kvm *kvm,
345 struct kvm_assigned_dev_kernel *assigned_dev)
347 kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
350 static void kvm_free_assigned_device(struct kvm *kvm,
351 struct kvm_assigned_dev_kernel
352 *assigned_dev)
354 kvm_free_assigned_irq(kvm, assigned_dev);
356 pci_reset_function(assigned_dev->dev);
357 if (pci_load_and_free_saved_state(assigned_dev->dev,
358 &assigned_dev->pci_saved_state))
359 printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
360 __func__, dev_name(&assigned_dev->dev->dev));
361 else
362 pci_restore_state(assigned_dev->dev);
364 pci_clear_dev_assigned(assigned_dev->dev);
366 pci_release_regions(assigned_dev->dev);
367 pci_disable_device(assigned_dev->dev);
368 pci_dev_put(assigned_dev->dev);
370 list_del(&assigned_dev->list);
371 kfree(assigned_dev);
374 void kvm_free_all_assigned_devices(struct kvm *kvm)
376 struct list_head *ptr, *ptr2;
377 struct kvm_assigned_dev_kernel *assigned_dev;
379 list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
380 assigned_dev = list_entry(ptr,
381 struct kvm_assigned_dev_kernel,
382 list);
384 kvm_free_assigned_device(kvm, assigned_dev);
388 static int assigned_device_enable_host_intx(struct kvm *kvm,
389 struct kvm_assigned_dev_kernel *dev)
391 irq_handler_t irq_handler;
392 unsigned long flags;
394 dev->host_irq = dev->dev->irq;
397 * We can only share the IRQ line with other host devices if we are
398 * able to disable the IRQ source at device-level - independently of
399 * the guest driver. Otherwise host devices may suffer from unbounded
400 * IRQ latencies when the guest keeps the line asserted.
402 if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
403 irq_handler = kvm_assigned_dev_intx;
404 flags = IRQF_SHARED;
405 } else {
406 irq_handler = NULL;
407 flags = IRQF_ONESHOT;
409 if (request_threaded_irq(dev->host_irq, irq_handler,
410 kvm_assigned_dev_thread_intx, flags,
411 dev->irq_name, dev))
412 return -EIO;
414 if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
415 spin_lock_irq(&dev->intx_lock);
416 pci_intx(dev->dev, true);
417 spin_unlock_irq(&dev->intx_lock);
419 return 0;
422 static int assigned_device_enable_host_msi(struct kvm *kvm,
423 struct kvm_assigned_dev_kernel *dev)
425 int r;
427 if (!dev->dev->msi_enabled) {
428 r = pci_enable_msi(dev->dev);
429 if (r)
430 return r;
433 dev->host_irq = dev->dev->irq;
434 if (request_threaded_irq(dev->host_irq, kvm_assigned_dev_msi,
435 kvm_assigned_dev_thread_msi, 0,
436 dev->irq_name, dev)) {
437 pci_disable_msi(dev->dev);
438 return -EIO;
441 return 0;
444 static int assigned_device_enable_host_msix(struct kvm *kvm,
445 struct kvm_assigned_dev_kernel *dev)
447 int i, r = -EINVAL;
449 /* host_msix_entries and guest_msix_entries should have been
450 * initialized */
451 if (dev->entries_nr == 0)
452 return r;
454 r = pci_enable_msix_exact(dev->dev,
455 dev->host_msix_entries, dev->entries_nr);
456 if (r)
457 return r;
459 for (i = 0; i < dev->entries_nr; i++) {
460 r = request_threaded_irq(dev->host_msix_entries[i].vector,
461 kvm_assigned_dev_msix,
462 kvm_assigned_dev_thread_msix,
463 0, dev->irq_name, dev);
464 if (r)
465 goto err;
468 return 0;
469 err:
470 for (i -= 1; i >= 0; i--)
471 free_irq(dev->host_msix_entries[i].vector, dev);
472 pci_disable_msix(dev->dev);
473 return r;
476 static int assigned_device_enable_guest_intx(struct kvm *kvm,
477 struct kvm_assigned_dev_kernel *dev,
478 struct kvm_assigned_irq *irq)
480 dev->guest_irq = irq->guest_irq;
481 dev->ack_notifier.gsi = irq->guest_irq;
482 return 0;
485 static int assigned_device_enable_guest_msi(struct kvm *kvm,
486 struct kvm_assigned_dev_kernel *dev,
487 struct kvm_assigned_irq *irq)
489 dev->guest_irq = irq->guest_irq;
490 dev->ack_notifier.gsi = -1;
491 return 0;
494 static int assigned_device_enable_guest_msix(struct kvm *kvm,
495 struct kvm_assigned_dev_kernel *dev,
496 struct kvm_assigned_irq *irq)
498 dev->guest_irq = irq->guest_irq;
499 dev->ack_notifier.gsi = -1;
500 return 0;
503 static int assign_host_irq(struct kvm *kvm,
504 struct kvm_assigned_dev_kernel *dev,
505 __u32 host_irq_type)
507 int r = -EEXIST;
509 if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
510 return r;
512 snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s",
513 pci_name(dev->dev));
515 switch (host_irq_type) {
516 case KVM_DEV_IRQ_HOST_INTX:
517 r = assigned_device_enable_host_intx(kvm, dev);
518 break;
519 case KVM_DEV_IRQ_HOST_MSI:
520 r = assigned_device_enable_host_msi(kvm, dev);
521 break;
522 case KVM_DEV_IRQ_HOST_MSIX:
523 r = assigned_device_enable_host_msix(kvm, dev);
524 break;
525 default:
526 r = -EINVAL;
528 dev->host_irq_disabled = false;
530 if (!r)
531 dev->irq_requested_type |= host_irq_type;
533 return r;
536 static int assign_guest_irq(struct kvm *kvm,
537 struct kvm_assigned_dev_kernel *dev,
538 struct kvm_assigned_irq *irq,
539 unsigned long guest_irq_type)
541 int id;
542 int r = -EEXIST;
544 if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK)
545 return r;
547 id = kvm_request_irq_source_id(kvm);
548 if (id < 0)
549 return id;
551 dev->irq_source_id = id;
553 switch (guest_irq_type) {
554 case KVM_DEV_IRQ_GUEST_INTX:
555 r = assigned_device_enable_guest_intx(kvm, dev, irq);
556 break;
557 case KVM_DEV_IRQ_GUEST_MSI:
558 r = assigned_device_enable_guest_msi(kvm, dev, irq);
559 break;
560 case KVM_DEV_IRQ_GUEST_MSIX:
561 r = assigned_device_enable_guest_msix(kvm, dev, irq);
562 break;
563 default:
564 r = -EINVAL;
567 if (!r) {
568 dev->irq_requested_type |= guest_irq_type;
569 if (dev->ack_notifier.gsi != -1)
570 kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
571 } else {
572 kvm_free_irq_source_id(kvm, dev->irq_source_id);
573 dev->irq_source_id = -1;
576 return r;
579 /* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
580 static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
581 struct kvm_assigned_irq *assigned_irq)
583 int r = -EINVAL;
584 struct kvm_assigned_dev_kernel *match;
585 unsigned long host_irq_type, guest_irq_type;
587 if (!irqchip_in_kernel(kvm))
588 return r;
590 mutex_lock(&kvm->lock);
591 r = -ENODEV;
592 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
593 assigned_irq->assigned_dev_id);
594 if (!match)
595 goto out;
597 host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK);
598 guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK);
600 r = -EINVAL;
601 /* can only assign one type at a time */
602 if (hweight_long(host_irq_type) > 1)
603 goto out;
604 if (hweight_long(guest_irq_type) > 1)
605 goto out;
606 if (host_irq_type == 0 && guest_irq_type == 0)
607 goto out;
609 r = 0;
610 if (host_irq_type)
611 r = assign_host_irq(kvm, match, host_irq_type);
612 if (r)
613 goto out;
615 if (guest_irq_type)
616 r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type);
617 out:
618 mutex_unlock(&kvm->lock);
619 return r;
622 static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
623 struct kvm_assigned_irq
624 *assigned_irq)
626 int r = -ENODEV;
627 struct kvm_assigned_dev_kernel *match;
628 unsigned long irq_type;
630 mutex_lock(&kvm->lock);
632 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
633 assigned_irq->assigned_dev_id);
634 if (!match)
635 goto out;
637 irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK |
638 KVM_DEV_IRQ_GUEST_MASK);
639 r = kvm_deassign_irq(kvm, match, irq_type);
640 out:
641 mutex_unlock(&kvm->lock);
642 return r;
646 * We want to test whether the caller has been granted permissions to
647 * use this device. To be able to configure and control the device,
648 * the user needs access to PCI configuration space and BAR resources.
649 * These are accessed through PCI sysfs. PCI config space is often
650 * passed to the process calling this ioctl via file descriptor, so we
651 * can't rely on access to that file. We can check for permissions
652 * on each of the BAR resource files, which is a pretty clear
653 * indicator that the user has been granted access to the device.
655 static int probe_sysfs_permissions(struct pci_dev *dev)
657 #ifdef CONFIG_SYSFS
658 int i;
659 bool bar_found = false;
661 for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) {
662 char *kpath, *syspath;
663 struct path path;
664 struct inode *inode;
665 int r;
667 if (!pci_resource_len(dev, i))
668 continue;
670 kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL);
671 if (!kpath)
672 return -ENOMEM;
674 /* Per sysfs-rules, sysfs is always at /sys */
675 syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i);
676 kfree(kpath);
677 if (!syspath)
678 return -ENOMEM;
680 r = kern_path(syspath, LOOKUP_FOLLOW, &path);
681 kfree(syspath);
682 if (r)
683 return r;
685 inode = d_backing_inode(path.dentry);
687 r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS);
688 path_put(&path);
689 if (r)
690 return r;
692 bar_found = true;
695 /* If no resources, probably something special */
696 if (!bar_found)
697 return -EPERM;
699 return 0;
700 #else
701 return -EINVAL; /* No way to control the device without sysfs */
702 #endif
705 static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
706 struct kvm_assigned_pci_dev *assigned_dev)
708 int r = 0, idx;
709 struct kvm_assigned_dev_kernel *match;
710 struct pci_dev *dev;
712 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU))
713 return -EINVAL;
715 mutex_lock(&kvm->lock);
716 idx = srcu_read_lock(&kvm->srcu);
718 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
719 assigned_dev->assigned_dev_id);
720 if (match) {
721 /* device already assigned */
722 r = -EEXIST;
723 goto out;
726 match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
727 if (match == NULL) {
728 printk(KERN_INFO "%s: Couldn't allocate memory\n",
729 __func__);
730 r = -ENOMEM;
731 goto out;
733 dev = pci_get_domain_bus_and_slot(assigned_dev->segnr,
734 assigned_dev->busnr,
735 assigned_dev->devfn);
736 if (!dev) {
737 printk(KERN_INFO "%s: host device not found\n", __func__);
738 r = -EINVAL;
739 goto out_free;
742 /* Don't allow bridges to be assigned */
743 if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) {
744 r = -EPERM;
745 goto out_put;
748 r = probe_sysfs_permissions(dev);
749 if (r)
750 goto out_put;
752 if (pci_enable_device(dev)) {
753 printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
754 r = -EBUSY;
755 goto out_put;
757 r = pci_request_regions(dev, "kvm_assigned_device");
758 if (r) {
759 printk(KERN_INFO "%s: Could not get access to device regions\n",
760 __func__);
761 goto out_disable;
764 pci_reset_function(dev);
765 pci_save_state(dev);
766 match->pci_saved_state = pci_store_saved_state(dev);
767 if (!match->pci_saved_state)
768 printk(KERN_DEBUG "%s: Couldn't store %s saved state\n",
769 __func__, dev_name(&dev->dev));
771 if (!pci_intx_mask_supported(dev))
772 assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3;
774 match->assigned_dev_id = assigned_dev->assigned_dev_id;
775 match->host_segnr = assigned_dev->segnr;
776 match->host_busnr = assigned_dev->busnr;
777 match->host_devfn = assigned_dev->devfn;
778 match->flags = assigned_dev->flags;
779 match->dev = dev;
780 spin_lock_init(&match->intx_lock);
781 spin_lock_init(&match->intx_mask_lock);
782 match->irq_source_id = -1;
783 match->kvm = kvm;
784 match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
786 list_add(&match->list, &kvm->arch.assigned_dev_head);
788 if (!kvm->arch.iommu_domain) {
789 r = kvm_iommu_map_guest(kvm);
790 if (r)
791 goto out_list_del;
793 r = kvm_assign_device(kvm, match->dev);
794 if (r)
795 goto out_list_del;
797 out:
798 srcu_read_unlock(&kvm->srcu, idx);
799 mutex_unlock(&kvm->lock);
800 return r;
801 out_list_del:
802 if (pci_load_and_free_saved_state(dev, &match->pci_saved_state))
803 printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
804 __func__, dev_name(&dev->dev));
805 list_del(&match->list);
806 pci_release_regions(dev);
807 out_disable:
808 pci_disable_device(dev);
809 out_put:
810 pci_dev_put(dev);
811 out_free:
812 kfree(match);
813 srcu_read_unlock(&kvm->srcu, idx);
814 mutex_unlock(&kvm->lock);
815 return r;
818 static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
819 struct kvm_assigned_pci_dev *assigned_dev)
821 int r = 0;
822 struct kvm_assigned_dev_kernel *match;
824 mutex_lock(&kvm->lock);
826 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
827 assigned_dev->assigned_dev_id);
828 if (!match) {
829 printk(KERN_INFO "%s: device hasn't been assigned before, "
830 "so cannot be deassigned\n", __func__);
831 r = -EINVAL;
832 goto out;
835 kvm_deassign_device(kvm, match->dev);
837 kvm_free_assigned_device(kvm, match);
839 out:
840 mutex_unlock(&kvm->lock);
841 return r;
845 static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
846 struct kvm_assigned_msix_nr *entry_nr)
848 int r = 0;
849 struct kvm_assigned_dev_kernel *adev;
851 mutex_lock(&kvm->lock);
853 adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
854 entry_nr->assigned_dev_id);
855 if (!adev) {
856 r = -EINVAL;
857 goto msix_nr_out;
860 if (adev->entries_nr == 0) {
861 adev->entries_nr = entry_nr->entry_nr;
862 if (adev->entries_nr == 0 ||
863 adev->entries_nr > KVM_MAX_MSIX_PER_DEV) {
864 r = -EINVAL;
865 goto msix_nr_out;
868 adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
869 entry_nr->entry_nr,
870 GFP_KERNEL);
871 if (!adev->host_msix_entries) {
872 r = -ENOMEM;
873 goto msix_nr_out;
875 adev->guest_msix_entries =
876 kzalloc(sizeof(struct msix_entry) * entry_nr->entry_nr,
877 GFP_KERNEL);
878 if (!adev->guest_msix_entries) {
879 kfree(adev->host_msix_entries);
880 r = -ENOMEM;
881 goto msix_nr_out;
883 } else /* Not allowed set MSI-X number twice */
884 r = -EINVAL;
885 msix_nr_out:
886 mutex_unlock(&kvm->lock);
887 return r;
890 static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
891 struct kvm_assigned_msix_entry *entry)
893 int r = 0, i;
894 struct kvm_assigned_dev_kernel *adev;
896 mutex_lock(&kvm->lock);
898 adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
899 entry->assigned_dev_id);
901 if (!adev) {
902 r = -EINVAL;
903 goto msix_entry_out;
906 for (i = 0; i < adev->entries_nr; i++)
907 if (adev->guest_msix_entries[i].vector == 0 ||
908 adev->guest_msix_entries[i].entry == entry->entry) {
909 adev->guest_msix_entries[i].entry = entry->entry;
910 adev->guest_msix_entries[i].vector = entry->gsi;
911 adev->host_msix_entries[i].entry = entry->entry;
912 break;
914 if (i == adev->entries_nr) {
915 r = -ENOSPC;
916 goto msix_entry_out;
919 msix_entry_out:
920 mutex_unlock(&kvm->lock);
922 return r;
925 static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm,
926 struct kvm_assigned_pci_dev *assigned_dev)
928 int r = 0;
929 struct kvm_assigned_dev_kernel *match;
931 mutex_lock(&kvm->lock);
933 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
934 assigned_dev->assigned_dev_id);
935 if (!match) {
936 r = -ENODEV;
937 goto out;
940 spin_lock(&match->intx_mask_lock);
942 match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX;
943 match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX;
945 if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {
946 if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) {
947 kvm_set_irq(match->kvm, match->irq_source_id,
948 match->guest_irq, 0, false);
950 * Masking at hardware-level is performed on demand,
951 * i.e. when an IRQ actually arrives at the host.
953 } else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
955 * Unmask the IRQ line if required. Unmasking at
956 * device level will be performed by user space.
958 spin_lock_irq(&match->intx_lock);
959 if (match->host_irq_disabled) {
960 enable_irq(match->host_irq);
961 match->host_irq_disabled = false;
963 spin_unlock_irq(&match->intx_lock);
967 spin_unlock(&match->intx_mask_lock);
969 out:
970 mutex_unlock(&kvm->lock);
971 return r;
974 long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
975 unsigned long arg)
977 void __user *argp = (void __user *)arg;
978 int r;
980 switch (ioctl) {
981 case KVM_ASSIGN_PCI_DEVICE: {
982 struct kvm_assigned_pci_dev assigned_dev;
984 r = -EFAULT;
985 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
986 goto out;
987 r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
988 if (r)
989 goto out;
990 break;
992 case KVM_ASSIGN_IRQ: {
993 r = -EOPNOTSUPP;
994 break;
996 case KVM_ASSIGN_DEV_IRQ: {
997 struct kvm_assigned_irq assigned_irq;
999 r = -EFAULT;
1000 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
1001 goto out;
1002 r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
1003 if (r)
1004 goto out;
1005 break;
1007 case KVM_DEASSIGN_DEV_IRQ: {
1008 struct kvm_assigned_irq assigned_irq;
1010 r = -EFAULT;
1011 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
1012 goto out;
1013 r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq);
1014 if (r)
1015 goto out;
1016 break;
1018 case KVM_DEASSIGN_PCI_DEVICE: {
1019 struct kvm_assigned_pci_dev assigned_dev;
1021 r = -EFAULT;
1022 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
1023 goto out;
1024 r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev);
1025 if (r)
1026 goto out;
1027 break;
1029 case KVM_ASSIGN_SET_MSIX_NR: {
1030 struct kvm_assigned_msix_nr entry_nr;
1031 r = -EFAULT;
1032 if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
1033 goto out;
1034 r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
1035 if (r)
1036 goto out;
1037 break;
1039 case KVM_ASSIGN_SET_MSIX_ENTRY: {
1040 struct kvm_assigned_msix_entry entry;
1041 r = -EFAULT;
1042 if (copy_from_user(&entry, argp, sizeof entry))
1043 goto out;
1044 r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
1045 if (r)
1046 goto out;
1047 break;
1049 case KVM_ASSIGN_SET_INTX_MASK: {
1050 struct kvm_assigned_pci_dev assigned_dev;
1052 r = -EFAULT;
1053 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
1054 goto out;
1055 r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev);
1056 break;
1058 default:
1059 r = -ENOTTY;
1060 break;
1062 out:
1063 return r;