1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
5 * VFIO container (/dev/vfio/vfio)
7 #include <linux/file.h>
8 #include <linux/slab.h>
10 #include <linux/capability.h>
11 #include <linux/iommu.h>
12 #include <linux/miscdevice.h>
13 #include <linux/vfio.h>
14 #include <uapi/linux/vfio.h>
18 struct vfio_container
{
20 struct list_head group_list
;
21 struct rw_semaphore group_lock
;
22 struct vfio_iommu_driver
*iommu_driver
;
28 struct list_head iommu_drivers_list
;
29 struct mutex iommu_drivers_lock
;
32 static void *vfio_noiommu_open(unsigned long arg
)
34 if (arg
!= VFIO_NOIOMMU_IOMMU
)
35 return ERR_PTR(-EINVAL
);
36 if (!capable(CAP_SYS_RAWIO
))
37 return ERR_PTR(-EPERM
);
42 static void vfio_noiommu_release(void *iommu_data
)
46 static long vfio_noiommu_ioctl(void *iommu_data
,
47 unsigned int cmd
, unsigned long arg
)
49 if (cmd
== VFIO_CHECK_EXTENSION
)
50 return vfio_noiommu
&& (arg
== VFIO_NOIOMMU_IOMMU
) ? 1 : 0;
55 static int vfio_noiommu_attach_group(void *iommu_data
,
56 struct iommu_group
*iommu_group
, enum vfio_group_type type
)
61 static void vfio_noiommu_detach_group(void *iommu_data
,
62 struct iommu_group
*iommu_group
)
66 static const struct vfio_iommu_driver_ops vfio_noiommu_ops
= {
67 .name
= "vfio-noiommu",
69 .open
= vfio_noiommu_open
,
70 .release
= vfio_noiommu_release
,
71 .ioctl
= vfio_noiommu_ioctl
,
72 .attach_group
= vfio_noiommu_attach_group
,
73 .detach_group
= vfio_noiommu_detach_group
,
77 * Only noiommu containers can use vfio-noiommu and noiommu containers can only
80 static bool vfio_iommu_driver_allowed(struct vfio_container
*container
,
81 const struct vfio_iommu_driver
*driver
)
83 if (!IS_ENABLED(CONFIG_VFIO_NOIOMMU
))
85 return container
->noiommu
== (driver
->ops
== &vfio_noiommu_ops
);
89 * IOMMU driver registration
91 int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops
*ops
)
93 struct vfio_iommu_driver
*driver
, *tmp
;
95 if (WARN_ON(!ops
->register_device
!= !ops
->unregister_device
))
98 driver
= kzalloc(sizeof(*driver
), GFP_KERNEL
);
104 mutex_lock(&vfio
.iommu_drivers_lock
);
106 /* Check for duplicates */
107 list_for_each_entry(tmp
, &vfio
.iommu_drivers_list
, vfio_next
) {
108 if (tmp
->ops
== ops
) {
109 mutex_unlock(&vfio
.iommu_drivers_lock
);
115 list_add(&driver
->vfio_next
, &vfio
.iommu_drivers_list
);
117 mutex_unlock(&vfio
.iommu_drivers_lock
);
121 EXPORT_SYMBOL_GPL(vfio_register_iommu_driver
);
123 void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops
*ops
)
125 struct vfio_iommu_driver
*driver
;
127 mutex_lock(&vfio
.iommu_drivers_lock
);
128 list_for_each_entry(driver
, &vfio
.iommu_drivers_list
, vfio_next
) {
129 if (driver
->ops
== ops
) {
130 list_del(&driver
->vfio_next
);
131 mutex_unlock(&vfio
.iommu_drivers_lock
);
136 mutex_unlock(&vfio
.iommu_drivers_lock
);
138 EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver
);
141 * Container objects - containers are created when /dev/vfio/vfio is
142 * opened, but their lifecycle extends until the last user is done, so
143 * it's freed via kref. Must support container/group/device being
144 * closed in any order.
146 static void vfio_container_release(struct kref
*kref
)
148 struct vfio_container
*container
;
149 container
= container_of(kref
, struct vfio_container
, kref
);
154 static void vfio_container_get(struct vfio_container
*container
)
156 kref_get(&container
->kref
);
159 static void vfio_container_put(struct vfio_container
*container
)
161 kref_put(&container
->kref
, vfio_container_release
);
164 void vfio_device_container_register(struct vfio_device
*device
)
166 struct vfio_iommu_driver
*iommu_driver
=
167 device
->group
->container
->iommu_driver
;
169 if (iommu_driver
&& iommu_driver
->ops
->register_device
)
170 iommu_driver
->ops
->register_device(
171 device
->group
->container
->iommu_data
, device
);
174 void vfio_device_container_unregister(struct vfio_device
*device
)
176 struct vfio_iommu_driver
*iommu_driver
=
177 device
->group
->container
->iommu_driver
;
179 if (iommu_driver
&& iommu_driver
->ops
->unregister_device
)
180 iommu_driver
->ops
->unregister_device(
181 device
->group
->container
->iommu_data
, device
);
185 vfio_container_ioctl_check_extension(struct vfio_container
*container
,
188 struct vfio_iommu_driver
*driver
;
191 down_read(&container
->group_lock
);
193 driver
= container
->iommu_driver
;
196 /* No base extensions yet */
199 * If no driver is set, poll all registered drivers for
200 * extensions and return the first positive result. If
201 * a driver is already set, further queries will be passed
202 * only to that driver.
205 mutex_lock(&vfio
.iommu_drivers_lock
);
206 list_for_each_entry(driver
, &vfio
.iommu_drivers_list
,
209 if (!list_empty(&container
->group_list
) &&
210 !vfio_iommu_driver_allowed(container
,
213 if (!try_module_get(driver
->ops
->owner
))
216 ret
= driver
->ops
->ioctl(NULL
,
217 VFIO_CHECK_EXTENSION
,
219 module_put(driver
->ops
->owner
);
223 mutex_unlock(&vfio
.iommu_drivers_lock
);
225 ret
= driver
->ops
->ioctl(container
->iommu_data
,
226 VFIO_CHECK_EXTENSION
, arg
);
229 up_read(&container
->group_lock
);
234 /* hold write lock on container->group_lock */
235 static int __vfio_container_attach_groups(struct vfio_container
*container
,
236 struct vfio_iommu_driver
*driver
,
239 struct vfio_group
*group
;
242 list_for_each_entry(group
, &container
->group_list
, container_next
) {
243 ret
= driver
->ops
->attach_group(data
, group
->iommu_group
,
252 list_for_each_entry_continue_reverse(group
, &container
->group_list
,
254 driver
->ops
->detach_group(data
, group
->iommu_group
);
260 static long vfio_ioctl_set_iommu(struct vfio_container
*container
,
263 struct vfio_iommu_driver
*driver
;
266 down_write(&container
->group_lock
);
269 * The container is designed to be an unprivileged interface while
270 * the group can be assigned to specific users. Therefore, only by
271 * adding a group to a container does the user get the privilege of
272 * enabling the iommu, which may allocate finite resources. There
273 * is no unset_iommu, but by removing all the groups from a container,
274 * the container is deprivileged and returns to an unset state.
276 if (list_empty(&container
->group_list
) || container
->iommu_driver
) {
277 up_write(&container
->group_lock
);
281 mutex_lock(&vfio
.iommu_drivers_lock
);
282 list_for_each_entry(driver
, &vfio
.iommu_drivers_list
, vfio_next
) {
285 if (!vfio_iommu_driver_allowed(container
, driver
))
287 if (!try_module_get(driver
->ops
->owner
))
291 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
292 * so test which iommu driver reported support for this
293 * extension and call open on them. We also pass them the
294 * magic, allowing a single driver to support multiple
295 * interfaces if they'd like.
297 if (driver
->ops
->ioctl(NULL
, VFIO_CHECK_EXTENSION
, arg
) <= 0) {
298 module_put(driver
->ops
->owner
);
302 data
= driver
->ops
->open(arg
);
305 module_put(driver
->ops
->owner
);
309 ret
= __vfio_container_attach_groups(container
, driver
, data
);
311 driver
->ops
->release(data
);
312 module_put(driver
->ops
->owner
);
316 container
->iommu_driver
= driver
;
317 container
->iommu_data
= data
;
321 mutex_unlock(&vfio
.iommu_drivers_lock
);
322 up_write(&container
->group_lock
);
327 static long vfio_fops_unl_ioctl(struct file
*filep
,
328 unsigned int cmd
, unsigned long arg
)
330 struct vfio_container
*container
= filep
->private_data
;
331 struct vfio_iommu_driver
*driver
;
339 case VFIO_GET_API_VERSION
:
340 ret
= VFIO_API_VERSION
;
342 case VFIO_CHECK_EXTENSION
:
343 ret
= vfio_container_ioctl_check_extension(container
, arg
);
346 ret
= vfio_ioctl_set_iommu(container
, arg
);
349 driver
= container
->iommu_driver
;
350 data
= container
->iommu_data
;
352 if (driver
) /* passthrough all unrecognized ioctls */
353 ret
= driver
->ops
->ioctl(data
, cmd
, arg
);
359 static int vfio_fops_open(struct inode
*inode
, struct file
*filep
)
361 struct vfio_container
*container
;
363 container
= kzalloc(sizeof(*container
), GFP_KERNEL_ACCOUNT
);
367 INIT_LIST_HEAD(&container
->group_list
);
368 init_rwsem(&container
->group_lock
);
369 kref_init(&container
->kref
);
371 filep
->private_data
= container
;
376 static int vfio_fops_release(struct inode
*inode
, struct file
*filep
)
378 struct vfio_container
*container
= filep
->private_data
;
380 filep
->private_data
= NULL
;
382 vfio_container_put(container
);
387 static const struct file_operations vfio_fops
= {
388 .owner
= THIS_MODULE
,
389 .open
= vfio_fops_open
,
390 .release
= vfio_fops_release
,
391 .unlocked_ioctl
= vfio_fops_unl_ioctl
,
392 .compat_ioctl
= compat_ptr_ioctl
,
395 struct vfio_container
*vfio_container_from_file(struct file
*file
)
397 struct vfio_container
*container
;
399 /* Sanity check, is this really our fd? */
400 if (file
->f_op
!= &vfio_fops
)
403 container
= file
->private_data
;
404 WARN_ON(!container
); /* fget ensures we don't race vfio_release */
408 static struct miscdevice vfio_dev
= {
412 .nodename
= "vfio/vfio",
413 .mode
= S_IRUGO
| S_IWUGO
,
416 int vfio_container_attach_group(struct vfio_container
*container
,
417 struct vfio_group
*group
)
419 struct vfio_iommu_driver
*driver
;
422 lockdep_assert_held(&group
->group_lock
);
424 if (group
->type
== VFIO_NO_IOMMU
&& !capable(CAP_SYS_RAWIO
))
427 down_write(&container
->group_lock
);
429 /* Real groups and fake groups cannot mix */
430 if (!list_empty(&container
->group_list
) &&
431 container
->noiommu
!= (group
->type
== VFIO_NO_IOMMU
)) {
433 goto out_unlock_container
;
436 if (group
->type
== VFIO_IOMMU
) {
437 ret
= iommu_group_claim_dma_owner(group
->iommu_group
, group
);
439 goto out_unlock_container
;
442 driver
= container
->iommu_driver
;
444 ret
= driver
->ops
->attach_group(container
->iommu_data
,
448 if (group
->type
== VFIO_IOMMU
)
449 iommu_group_release_dma_owner(
451 goto out_unlock_container
;
455 group
->container
= container
;
456 group
->container_users
= 1;
457 container
->noiommu
= (group
->type
== VFIO_NO_IOMMU
);
458 list_add(&group
->container_next
, &container
->group_list
);
460 /* Get a reference on the container and mark a user within the group */
461 vfio_container_get(container
);
463 out_unlock_container
:
464 up_write(&container
->group_lock
);
468 void vfio_group_detach_container(struct vfio_group
*group
)
470 struct vfio_container
*container
= group
->container
;
471 struct vfio_iommu_driver
*driver
;
473 lockdep_assert_held(&group
->group_lock
);
474 WARN_ON(group
->container_users
!= 1);
476 down_write(&container
->group_lock
);
478 driver
= container
->iommu_driver
;
480 driver
->ops
->detach_group(container
->iommu_data
,
483 if (group
->type
== VFIO_IOMMU
)
484 iommu_group_release_dma_owner(group
->iommu_group
);
486 group
->container
= NULL
;
487 group
->container_users
= 0;
488 list_del(&group
->container_next
);
490 /* Detaching the last group deprivileges a container, remove iommu */
491 if (driver
&& list_empty(&container
->group_list
)) {
492 driver
->ops
->release(container
->iommu_data
);
493 module_put(driver
->ops
->owner
);
494 container
->iommu_driver
= NULL
;
495 container
->iommu_data
= NULL
;
498 up_write(&container
->group_lock
);
500 vfio_container_put(container
);
503 int vfio_group_use_container(struct vfio_group
*group
)
505 lockdep_assert_held(&group
->group_lock
);
508 * The container fd has been assigned with VFIO_GROUP_SET_CONTAINER but
509 * VFIO_SET_IOMMU hasn't been done yet.
511 if (!group
->container
->iommu_driver
)
514 if (group
->type
== VFIO_NO_IOMMU
&& !capable(CAP_SYS_RAWIO
))
517 get_file(group
->opened_file
);
518 group
->container_users
++;
522 void vfio_group_unuse_container(struct vfio_group
*group
)
524 lockdep_assert_held(&group
->group_lock
);
526 WARN_ON(group
->container_users
<= 1);
527 group
->container_users
--;
528 fput(group
->opened_file
);
531 int vfio_device_container_pin_pages(struct vfio_device
*device
,
532 dma_addr_t iova
, int npage
,
533 int prot
, struct page
**pages
)
535 struct vfio_container
*container
= device
->group
->container
;
536 struct iommu_group
*iommu_group
= device
->group
->iommu_group
;
537 struct vfio_iommu_driver
*driver
= container
->iommu_driver
;
539 if (npage
> VFIO_PIN_PAGES_MAX_ENTRIES
)
542 if (unlikely(!driver
|| !driver
->ops
->pin_pages
))
544 return driver
->ops
->pin_pages(container
->iommu_data
, iommu_group
, iova
,
548 void vfio_device_container_unpin_pages(struct vfio_device
*device
,
549 dma_addr_t iova
, int npage
)
551 struct vfio_container
*container
= device
->group
->container
;
553 if (WARN_ON(npage
<= 0 || npage
> VFIO_PIN_PAGES_MAX_ENTRIES
))
556 container
->iommu_driver
->ops
->unpin_pages(container
->iommu_data
, iova
,
560 int vfio_device_container_dma_rw(struct vfio_device
*device
,
561 dma_addr_t iova
, void *data
,
562 size_t len
, bool write
)
564 struct vfio_container
*container
= device
->group
->container
;
565 struct vfio_iommu_driver
*driver
= container
->iommu_driver
;
567 if (unlikely(!driver
|| !driver
->ops
->dma_rw
))
569 return driver
->ops
->dma_rw(container
->iommu_data
, iova
, data
, len
,
573 int __init
vfio_container_init(void)
577 mutex_init(&vfio
.iommu_drivers_lock
);
578 INIT_LIST_HEAD(&vfio
.iommu_drivers_list
);
580 ret
= misc_register(&vfio_dev
);
582 pr_err("vfio: misc device register failed\n");
586 if (IS_ENABLED(CONFIG_VFIO_NOIOMMU
)) {
587 ret
= vfio_register_iommu_driver(&vfio_noiommu_ops
);
594 misc_deregister(&vfio_dev
);
598 void vfio_container_cleanup(void)
600 if (IS_ENABLED(CONFIG_VFIO_NOIOMMU
))
601 vfio_unregister_iommu_driver(&vfio_noiommu_ops
);
602 misc_deregister(&vfio_dev
);
603 mutex_destroy(&vfio
.iommu_drivers_lock
);
606 MODULE_ALIAS_MISCDEV(VFIO_MINOR
);
607 MODULE_ALIAS("devname:vfio/vfio");