1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (C) 2018-2020 Intel Corporation.
4 * Copyright (C) 2020 Red Hat, Inc.
6 * Author: Tiwei Bie <tiwei.bie@intel.com>
7 * Jason Wang <jasowang@redhat.com>
9 * Thanks Michael S. Tsirkin for the valuable comments and
10 * suggestions. And thanks to Cunming Liang and Zhihong Wang for all
14 #include <linux/kernel.h>
15 #include <linux/module.h>
16 #include <linux/cdev.h>
17 #include <linux/device.h>
19 #include <linux/iommu.h>
20 #include <linux/uuid.h>
21 #include <linux/vdpa.h>
22 #include <linux/nospec.h>
23 #include <linux/vhost.h>
24 #include <linux/virtio_net.h>
29 VHOST_VDPA_BACKEND_FEATURES
=
30 (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2
) |
31 (1ULL << VHOST_BACKEND_F_IOTLB_BATCH
),
34 #define VHOST_VDPA_DEV_MAX (1U << MINORBITS)
37 struct vhost_dev vdev
;
38 struct iommu_domain
*domain
;
39 struct vhost_virtqueue
*vqs
;
40 struct completion completion
;
41 struct vdpa_device
*vdpa
;
48 struct eventfd_ctx
*config_ctx
;
50 struct vdpa_iova_range range
;
53 static DEFINE_IDA(vhost_vdpa_ida
);
55 static dev_t vhost_vdpa_major
;
57 static void handle_vq_kick(struct vhost_work
*work
)
59 struct vhost_virtqueue
*vq
= container_of(work
, struct vhost_virtqueue
,
61 struct vhost_vdpa
*v
= container_of(vq
->dev
, struct vhost_vdpa
, vdev
);
62 const struct vdpa_config_ops
*ops
= v
->vdpa
->config
;
64 ops
->kick_vq(v
->vdpa
, vq
- v
->vqs
);
67 static irqreturn_t
vhost_vdpa_virtqueue_cb(void *private)
69 struct vhost_virtqueue
*vq
= private;
70 struct eventfd_ctx
*call_ctx
= vq
->call_ctx
.ctx
;
73 eventfd_signal(call_ctx
, 1);
78 static irqreturn_t
vhost_vdpa_config_cb(void *private)
80 struct vhost_vdpa
*v
= private;
81 struct eventfd_ctx
*config_ctx
= v
->config_ctx
;
84 eventfd_signal(config_ctx
, 1);
89 static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa
*v
, u16 qid
)
91 struct vhost_virtqueue
*vq
= &v
->vqs
[qid
];
92 const struct vdpa_config_ops
*ops
= v
->vdpa
->config
;
93 struct vdpa_device
*vdpa
= v
->vdpa
;
99 irq
= ops
->get_vq_irq(vdpa
, qid
);
100 irq_bypass_unregister_producer(&vq
->call_ctx
.producer
);
101 if (!vq
->call_ctx
.ctx
|| irq
< 0)
104 vq
->call_ctx
.producer
.token
= vq
->call_ctx
.ctx
;
105 vq
->call_ctx
.producer
.irq
= irq
;
106 ret
= irq_bypass_register_producer(&vq
->call_ctx
.producer
);
108 dev_info(&v
->dev
, "vq %u, irq bypass producer (token %p) registration fails, ret = %d\n",
109 qid
, vq
->call_ctx
.producer
.token
, ret
);
112 static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa
*v
, u16 qid
)
114 struct vhost_virtqueue
*vq
= &v
->vqs
[qid
];
116 irq_bypass_unregister_producer(&vq
->call_ctx
.producer
);
119 static void vhost_vdpa_reset(struct vhost_vdpa
*v
)
121 struct vdpa_device
*vdpa
= v
->vdpa
;
127 static long vhost_vdpa_get_device_id(struct vhost_vdpa
*v
, u8 __user
*argp
)
129 struct vdpa_device
*vdpa
= v
->vdpa
;
130 const struct vdpa_config_ops
*ops
= vdpa
->config
;
133 device_id
= ops
->get_device_id(vdpa
);
135 if (copy_to_user(argp
, &device_id
, sizeof(device_id
)))
141 static long vhost_vdpa_get_status(struct vhost_vdpa
*v
, u8 __user
*statusp
)
143 struct vdpa_device
*vdpa
= v
->vdpa
;
144 const struct vdpa_config_ops
*ops
= vdpa
->config
;
147 status
= ops
->get_status(vdpa
);
149 if (copy_to_user(statusp
, &status
, sizeof(status
)))
155 static long vhost_vdpa_set_status(struct vhost_vdpa
*v
, u8 __user
*statusp
)
157 struct vdpa_device
*vdpa
= v
->vdpa
;
158 const struct vdpa_config_ops
*ops
= vdpa
->config
;
159 u8 status
, status_old
;
163 if (copy_from_user(&status
, statusp
, sizeof(status
)))
166 status_old
= ops
->get_status(vdpa
);
169 * Userspace shouldn't remove status bits unless reset the
172 if (status
!= 0 && (ops
->get_status(vdpa
) & ~status
) != 0)
175 ops
->set_status(vdpa
, status
);
177 if ((status
& VIRTIO_CONFIG_S_DRIVER_OK
) && !(status_old
& VIRTIO_CONFIG_S_DRIVER_OK
))
178 for (i
= 0; i
< nvqs
; i
++)
179 vhost_vdpa_setup_vq_irq(v
, i
);
181 if ((status_old
& VIRTIO_CONFIG_S_DRIVER_OK
) && !(status
& VIRTIO_CONFIG_S_DRIVER_OK
))
182 for (i
= 0; i
< nvqs
; i
++)
183 vhost_vdpa_unsetup_vq_irq(v
, i
);
188 static int vhost_vdpa_config_validate(struct vhost_vdpa
*v
,
189 struct vhost_vdpa_config
*c
)
193 switch (v
->virtio_id
) {
195 size
= sizeof(struct virtio_net_config
);
202 if (c
->len
> size
- c
->off
)
208 static long vhost_vdpa_get_config(struct vhost_vdpa
*v
,
209 struct vhost_vdpa_config __user
*c
)
211 struct vdpa_device
*vdpa
= v
->vdpa
;
212 struct vhost_vdpa_config config
;
213 unsigned long size
= offsetof(struct vhost_vdpa_config
, buf
);
216 if (copy_from_user(&config
, c
, size
))
218 if (vhost_vdpa_config_validate(v
, &config
))
220 buf
= kvzalloc(config
.len
, GFP_KERNEL
);
224 vdpa_get_config(vdpa
, config
.off
, buf
, config
.len
);
226 if (copy_to_user(c
->buf
, buf
, config
.len
)) {
235 static long vhost_vdpa_set_config(struct vhost_vdpa
*v
,
236 struct vhost_vdpa_config __user
*c
)
238 struct vdpa_device
*vdpa
= v
->vdpa
;
239 const struct vdpa_config_ops
*ops
= vdpa
->config
;
240 struct vhost_vdpa_config config
;
241 unsigned long size
= offsetof(struct vhost_vdpa_config
, buf
);
244 if (copy_from_user(&config
, c
, size
))
246 if (vhost_vdpa_config_validate(v
, &config
))
249 buf
= vmemdup_user(c
->buf
, config
.len
);
253 ops
->set_config(vdpa
, config
.off
, buf
, config
.len
);
259 static long vhost_vdpa_get_features(struct vhost_vdpa
*v
, u64 __user
*featurep
)
261 struct vdpa_device
*vdpa
= v
->vdpa
;
262 const struct vdpa_config_ops
*ops
= vdpa
->config
;
265 features
= ops
->get_features(vdpa
);
267 if (copy_to_user(featurep
, &features
, sizeof(features
)))
273 static long vhost_vdpa_set_features(struct vhost_vdpa
*v
, u64 __user
*featurep
)
275 struct vdpa_device
*vdpa
= v
->vdpa
;
276 const struct vdpa_config_ops
*ops
= vdpa
->config
;
280 * It's not allowed to change the features after they have
283 if (ops
->get_status(vdpa
) & VIRTIO_CONFIG_S_FEATURES_OK
)
286 if (copy_from_user(&features
, featurep
, sizeof(features
)))
289 if (vdpa_set_features(vdpa
, features
))
295 static long vhost_vdpa_get_vring_num(struct vhost_vdpa
*v
, u16 __user
*argp
)
297 struct vdpa_device
*vdpa
= v
->vdpa
;
298 const struct vdpa_config_ops
*ops
= vdpa
->config
;
301 num
= ops
->get_vq_num_max(vdpa
);
303 if (copy_to_user(argp
, &num
, sizeof(num
)))
309 static void vhost_vdpa_config_put(struct vhost_vdpa
*v
)
312 eventfd_ctx_put(v
->config_ctx
);
315 static long vhost_vdpa_set_config_call(struct vhost_vdpa
*v
, u32 __user
*argp
)
317 struct vdpa_callback cb
;
319 struct eventfd_ctx
*ctx
;
321 cb
.callback
= vhost_vdpa_config_cb
;
322 cb
.private = v
->vdpa
;
323 if (copy_from_user(&fd
, argp
, sizeof(fd
)))
326 ctx
= fd
== VHOST_FILE_UNBIND
? NULL
: eventfd_ctx_fdget(fd
);
327 swap(ctx
, v
->config_ctx
);
329 if (!IS_ERR_OR_NULL(ctx
))
330 eventfd_ctx_put(ctx
);
332 if (IS_ERR(v
->config_ctx
))
333 return PTR_ERR(v
->config_ctx
);
335 v
->vdpa
->config
->set_config_cb(v
->vdpa
, &cb
);
340 static long vhost_vdpa_get_iova_range(struct vhost_vdpa
*v
, u32 __user
*argp
)
342 struct vhost_vdpa_iova_range range
= {
343 .first
= v
->range
.first
,
344 .last
= v
->range
.last
,
347 if (copy_to_user(argp
, &range
, sizeof(range
)))
352 static long vhost_vdpa_vring_ioctl(struct vhost_vdpa
*v
, unsigned int cmd
,
355 struct vdpa_device
*vdpa
= v
->vdpa
;
356 const struct vdpa_config_ops
*ops
= vdpa
->config
;
357 struct vdpa_vq_state vq_state
;
358 struct vdpa_callback cb
;
359 struct vhost_virtqueue
*vq
;
360 struct vhost_vring_state s
;
364 r
= get_user(idx
, (u32 __user
*)argp
);
371 idx
= array_index_nospec(idx
, v
->nvqs
);
375 case VHOST_VDPA_SET_VRING_ENABLE
:
376 if (copy_from_user(&s
, argp
, sizeof(s
)))
378 ops
->set_vq_ready(vdpa
, idx
, s
.num
);
380 case VHOST_GET_VRING_BASE
:
381 r
= ops
->get_vq_state(v
->vdpa
, idx
, &vq_state
);
385 vq
->last_avail_idx
= vq_state
.avail_index
;
389 r
= vhost_vring_ioctl(&v
->vdev
, cmd
, argp
);
394 case VHOST_SET_VRING_ADDR
:
395 if (ops
->set_vq_address(vdpa
, idx
,
396 (u64
)(uintptr_t)vq
->desc
,
397 (u64
)(uintptr_t)vq
->avail
,
398 (u64
)(uintptr_t)vq
->used
))
402 case VHOST_SET_VRING_BASE
:
403 vq_state
.avail_index
= vq
->last_avail_idx
;
404 if (ops
->set_vq_state(vdpa
, idx
, &vq_state
))
408 case VHOST_SET_VRING_CALL
:
409 if (vq
->call_ctx
.ctx
) {
410 cb
.callback
= vhost_vdpa_virtqueue_cb
;
416 ops
->set_vq_cb(vdpa
, idx
, &cb
);
417 vhost_vdpa_setup_vq_irq(v
, idx
);
420 case VHOST_SET_VRING_NUM
:
421 ops
->set_vq_num(vdpa
, idx
, vq
->num
);
428 static long vhost_vdpa_unlocked_ioctl(struct file
*filep
,
429 unsigned int cmd
, unsigned long arg
)
431 struct vhost_vdpa
*v
= filep
->private_data
;
432 struct vhost_dev
*d
= &v
->vdev
;
433 void __user
*argp
= (void __user
*)arg
;
434 u64 __user
*featurep
= argp
;
438 if (cmd
== VHOST_SET_BACKEND_FEATURES
) {
439 if (copy_from_user(&features
, featurep
, sizeof(features
)))
441 if (features
& ~VHOST_VDPA_BACKEND_FEATURES
)
443 vhost_set_backend_features(&v
->vdev
, features
);
447 mutex_lock(&d
->mutex
);
450 case VHOST_VDPA_GET_DEVICE_ID
:
451 r
= vhost_vdpa_get_device_id(v
, argp
);
453 case VHOST_VDPA_GET_STATUS
:
454 r
= vhost_vdpa_get_status(v
, argp
);
456 case VHOST_VDPA_SET_STATUS
:
457 r
= vhost_vdpa_set_status(v
, argp
);
459 case VHOST_VDPA_GET_CONFIG
:
460 r
= vhost_vdpa_get_config(v
, argp
);
462 case VHOST_VDPA_SET_CONFIG
:
463 r
= vhost_vdpa_set_config(v
, argp
);
465 case VHOST_GET_FEATURES
:
466 r
= vhost_vdpa_get_features(v
, argp
);
468 case VHOST_SET_FEATURES
:
469 r
= vhost_vdpa_set_features(v
, argp
);
471 case VHOST_VDPA_GET_VRING_NUM
:
472 r
= vhost_vdpa_get_vring_num(v
, argp
);
474 case VHOST_SET_LOG_BASE
:
475 case VHOST_SET_LOG_FD
:
478 case VHOST_VDPA_SET_CONFIG_CALL
:
479 r
= vhost_vdpa_set_config_call(v
, argp
);
481 case VHOST_GET_BACKEND_FEATURES
:
482 features
= VHOST_VDPA_BACKEND_FEATURES
;
483 if (copy_to_user(featurep
, &features
, sizeof(features
)))
486 case VHOST_VDPA_GET_IOVA_RANGE
:
487 r
= vhost_vdpa_get_iova_range(v
, argp
);
490 r
= vhost_dev_ioctl(&v
->vdev
, cmd
, argp
);
491 if (r
== -ENOIOCTLCMD
)
492 r
= vhost_vdpa_vring_ioctl(v
, cmd
, argp
);
496 mutex_unlock(&d
->mutex
);
500 static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa
*v
, u64 start
, u64 last
)
502 struct vhost_dev
*dev
= &v
->vdev
;
503 struct vhost_iotlb
*iotlb
= dev
->iotlb
;
504 struct vhost_iotlb_map
*map
;
506 unsigned long pfn
, pinned
;
508 while ((map
= vhost_iotlb_itree_first(iotlb
, start
, last
)) != NULL
) {
509 pinned
= map
->size
>> PAGE_SHIFT
;
510 for (pfn
= map
->addr
>> PAGE_SHIFT
;
511 pinned
> 0; pfn
++, pinned
--) {
512 page
= pfn_to_page(pfn
);
513 if (map
->perm
& VHOST_ACCESS_WO
)
514 set_page_dirty_lock(page
);
515 unpin_user_page(page
);
517 atomic64_sub(map
->size
>> PAGE_SHIFT
, &dev
->mm
->pinned_vm
);
518 vhost_iotlb_map_free(iotlb
, map
);
522 static void vhost_vdpa_iotlb_free(struct vhost_vdpa
*v
)
524 struct vhost_dev
*dev
= &v
->vdev
;
526 vhost_vdpa_iotlb_unmap(v
, 0ULL, 0ULL - 1);
531 static int perm_to_iommu_flags(u32 perm
)
536 case VHOST_ACCESS_WO
:
537 flags
|= IOMMU_WRITE
;
539 case VHOST_ACCESS_RO
:
542 case VHOST_ACCESS_RW
:
543 flags
|= (IOMMU_WRITE
| IOMMU_READ
);
546 WARN(1, "invalidate vhost IOTLB permission\n");
550 return flags
| IOMMU_CACHE
;
553 static int vhost_vdpa_map(struct vhost_vdpa
*v
,
554 u64 iova
, u64 size
, u64 pa
, u32 perm
)
556 struct vhost_dev
*dev
= &v
->vdev
;
557 struct vdpa_device
*vdpa
= v
->vdpa
;
558 const struct vdpa_config_ops
*ops
= vdpa
->config
;
561 r
= vhost_iotlb_add_range(dev
->iotlb
, iova
, iova
+ size
- 1,
567 r
= ops
->dma_map(vdpa
, iova
, size
, pa
, perm
);
568 } else if (ops
->set_map
) {
570 r
= ops
->set_map(vdpa
, dev
->iotlb
);
572 r
= iommu_map(v
->domain
, iova
, pa
, size
,
573 perm_to_iommu_flags(perm
));
577 vhost_iotlb_del_range(dev
->iotlb
, iova
, iova
+ size
- 1);
579 atomic64_add(size
>> PAGE_SHIFT
, &dev
->mm
->pinned_vm
);
584 static void vhost_vdpa_unmap(struct vhost_vdpa
*v
, u64 iova
, u64 size
)
586 struct vhost_dev
*dev
= &v
->vdev
;
587 struct vdpa_device
*vdpa
= v
->vdpa
;
588 const struct vdpa_config_ops
*ops
= vdpa
->config
;
590 vhost_vdpa_iotlb_unmap(v
, iova
, iova
+ size
- 1);
593 ops
->dma_unmap(vdpa
, iova
, size
);
594 } else if (ops
->set_map
) {
596 ops
->set_map(vdpa
, dev
->iotlb
);
598 iommu_unmap(v
->domain
, iova
, size
);
602 static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa
*v
,
603 struct vhost_iotlb_msg
*msg
)
605 struct vhost_dev
*dev
= &v
->vdev
;
606 struct vhost_iotlb
*iotlb
= dev
->iotlb
;
607 struct page
**page_list
;
608 unsigned long list_size
= PAGE_SIZE
/ sizeof(struct page
*);
609 unsigned int gup_flags
= FOLL_LONGTERM
;
610 unsigned long npages
, cur_base
, map_pfn
, last_pfn
= 0;
611 unsigned long lock_limit
, sz2pin
, nchunks
, i
;
612 u64 iova
= msg
->iova
;
616 if (msg
->iova
< v
->range
.first
||
617 msg
->iova
+ msg
->size
- 1 > v
->range
.last
)
620 if (vhost_iotlb_itree_first(iotlb
, msg
->iova
,
621 msg
->iova
+ msg
->size
- 1))
624 /* Limit the use of memory for bookkeeping */
625 page_list
= (struct page
**) __get_free_page(GFP_KERNEL
);
629 if (msg
->perm
& VHOST_ACCESS_WO
)
630 gup_flags
|= FOLL_WRITE
;
632 npages
= PAGE_ALIGN(msg
->size
+ (iova
& ~PAGE_MASK
)) >> PAGE_SHIFT
;
638 mmap_read_lock(dev
->mm
);
640 lock_limit
= rlimit(RLIMIT_MEMLOCK
) >> PAGE_SHIFT
;
641 if (npages
+ atomic64_read(&dev
->mm
->pinned_vm
) > lock_limit
) {
646 cur_base
= msg
->uaddr
& PAGE_MASK
;
651 sz2pin
= min_t(unsigned long, npages
, list_size
);
652 pinned
= pin_user_pages(cur_base
, sz2pin
,
653 gup_flags
, page_list
, NULL
);
654 if (sz2pin
!= pinned
) {
658 unpin_user_pages(page_list
, pinned
);
666 map_pfn
= page_to_pfn(page_list
[0]);
668 for (i
= 0; i
< pinned
; i
++) {
669 unsigned long this_pfn
= page_to_pfn(page_list
[i
]);
672 if (last_pfn
&& (this_pfn
!= last_pfn
+ 1)) {
673 /* Pin a contiguous chunk of memory */
674 csize
= (last_pfn
- map_pfn
+ 1) << PAGE_SHIFT
;
675 ret
= vhost_vdpa_map(v
, iova
, csize
,
676 map_pfn
<< PAGE_SHIFT
,
680 * Unpin the pages that are left unmapped
681 * from this point on in the current
682 * page_list. The remaining outstanding
683 * ones which may stride across several
684 * chunks will be covered in the common
685 * error path subsequently.
687 unpin_user_pages(&page_list
[i
],
700 cur_base
+= pinned
<< PAGE_SHIFT
;
704 /* Pin the rest chunk */
705 ret
= vhost_vdpa_map(v
, iova
, (last_pfn
- map_pfn
+ 1) << PAGE_SHIFT
,
706 map_pfn
<< PAGE_SHIFT
, msg
->perm
);
713 * Unpin the outstanding pages which are yet to be
714 * mapped but haven't due to vdpa_map() or
715 * pin_user_pages() failure.
717 * Mapped pages are accounted in vdpa_map(), hence
718 * the corresponding unpinning will be handled by
722 for (pfn
= map_pfn
; pfn
<= last_pfn
; pfn
++)
723 unpin_user_page(pfn_to_page(pfn
));
725 vhost_vdpa_unmap(v
, msg
->iova
, msg
->size
);
728 mmap_read_unlock(dev
->mm
);
730 free_page((unsigned long)page_list
);
734 static int vhost_vdpa_process_iotlb_msg(struct vhost_dev
*dev
,
735 struct vhost_iotlb_msg
*msg
)
737 struct vhost_vdpa
*v
= container_of(dev
, struct vhost_vdpa
, vdev
);
738 struct vdpa_device
*vdpa
= v
->vdpa
;
739 const struct vdpa_config_ops
*ops
= vdpa
->config
;
742 r
= vhost_dev_check_owner(dev
);
747 case VHOST_IOTLB_UPDATE
:
748 r
= vhost_vdpa_process_iotlb_update(v
, msg
);
750 case VHOST_IOTLB_INVALIDATE
:
751 vhost_vdpa_unmap(v
, msg
->iova
, msg
->size
);
753 case VHOST_IOTLB_BATCH_BEGIN
:
756 case VHOST_IOTLB_BATCH_END
:
757 if (v
->in_batch
&& ops
->set_map
)
758 ops
->set_map(vdpa
, dev
->iotlb
);
769 static ssize_t
vhost_vdpa_chr_write_iter(struct kiocb
*iocb
,
770 struct iov_iter
*from
)
772 struct file
*file
= iocb
->ki_filp
;
773 struct vhost_vdpa
*v
= file
->private_data
;
774 struct vhost_dev
*dev
= &v
->vdev
;
776 return vhost_chr_write_iter(dev
, from
);
779 static int vhost_vdpa_alloc_domain(struct vhost_vdpa
*v
)
781 struct vdpa_device
*vdpa
= v
->vdpa
;
782 const struct vdpa_config_ops
*ops
= vdpa
->config
;
783 struct device
*dma_dev
= vdpa_get_dma_dev(vdpa
);
784 struct bus_type
*bus
;
787 /* Device want to do DMA by itself */
788 if (ops
->set_map
|| ops
->dma_map
)
795 if (!iommu_capable(bus
, IOMMU_CAP_CACHE_COHERENCY
))
798 v
->domain
= iommu_domain_alloc(bus
);
802 ret
= iommu_attach_device(v
->domain
, dma_dev
);
809 iommu_domain_free(v
->domain
);
813 static void vhost_vdpa_free_domain(struct vhost_vdpa
*v
)
815 struct vdpa_device
*vdpa
= v
->vdpa
;
816 struct device
*dma_dev
= vdpa_get_dma_dev(vdpa
);
819 iommu_detach_device(v
->domain
, dma_dev
);
820 iommu_domain_free(v
->domain
);
826 static void vhost_vdpa_set_iova_range(struct vhost_vdpa
*v
)
828 struct vdpa_iova_range
*range
= &v
->range
;
829 struct iommu_domain_geometry geo
;
830 struct vdpa_device
*vdpa
= v
->vdpa
;
831 const struct vdpa_config_ops
*ops
= vdpa
->config
;
833 if (ops
->get_iova_range
) {
834 *range
= ops
->get_iova_range(vdpa
);
835 } else if (v
->domain
&&
836 !iommu_domain_get_attr(v
->domain
,
837 DOMAIN_ATTR_GEOMETRY
, &geo
) &&
838 geo
.force_aperture
) {
839 range
->first
= geo
.aperture_start
;
840 range
->last
= geo
.aperture_end
;
843 range
->last
= ULLONG_MAX
;
847 static int vhost_vdpa_open(struct inode
*inode
, struct file
*filep
)
849 struct vhost_vdpa
*v
;
850 struct vhost_dev
*dev
;
851 struct vhost_virtqueue
**vqs
;
852 int nvqs
, i
, r
, opened
;
854 v
= container_of(inode
->i_cdev
, struct vhost_vdpa
, cdev
);
856 opened
= atomic_cmpxchg(&v
->opened
, 0, 1);
863 vqs
= kmalloc_array(nvqs
, sizeof(*vqs
), GFP_KERNEL
);
870 for (i
= 0; i
< nvqs
; i
++) {
872 vqs
[i
]->handle_kick
= handle_vq_kick
;
874 vhost_dev_init(dev
, vqs
, nvqs
, 0, 0, 0, false,
875 vhost_vdpa_process_iotlb_msg
);
877 dev
->iotlb
= vhost_iotlb_alloc(0, 0);
883 r
= vhost_vdpa_alloc_domain(v
);
887 vhost_vdpa_set_iova_range(v
);
889 filep
->private_data
= v
;
894 vhost_dev_cleanup(&v
->vdev
);
897 atomic_dec(&v
->opened
);
901 static void vhost_vdpa_clean_irq(struct vhost_vdpa
*v
)
903 struct vhost_virtqueue
*vq
;
906 for (i
= 0; i
< v
->nvqs
; i
++) {
908 if (vq
->call_ctx
.producer
.irq
)
909 irq_bypass_unregister_producer(&vq
->call_ctx
.producer
);
913 static int vhost_vdpa_release(struct inode
*inode
, struct file
*filep
)
915 struct vhost_vdpa
*v
= filep
->private_data
;
916 struct vhost_dev
*d
= &v
->vdev
;
918 mutex_lock(&d
->mutex
);
919 filep
->private_data
= NULL
;
921 vhost_dev_stop(&v
->vdev
);
922 vhost_vdpa_iotlb_free(v
);
923 vhost_vdpa_free_domain(v
);
924 vhost_vdpa_config_put(v
);
925 vhost_vdpa_clean_irq(v
);
926 vhost_dev_cleanup(&v
->vdev
);
928 mutex_unlock(&d
->mutex
);
930 atomic_dec(&v
->opened
);
931 complete(&v
->completion
);
937 static vm_fault_t
vhost_vdpa_fault(struct vm_fault
*vmf
)
939 struct vhost_vdpa
*v
= vmf
->vma
->vm_file
->private_data
;
940 struct vdpa_device
*vdpa
= v
->vdpa
;
941 const struct vdpa_config_ops
*ops
= vdpa
->config
;
942 struct vdpa_notification_area notify
;
943 struct vm_area_struct
*vma
= vmf
->vma
;
944 u16 index
= vma
->vm_pgoff
;
946 notify
= ops
->get_vq_notification(vdpa
, index
);
948 vma
->vm_page_prot
= pgprot_noncached(vma
->vm_page_prot
);
949 if (remap_pfn_range(vma
, vmf
->address
& PAGE_MASK
,
950 notify
.addr
>> PAGE_SHIFT
, PAGE_SIZE
,
952 return VM_FAULT_SIGBUS
;
954 return VM_FAULT_NOPAGE
;
957 static const struct vm_operations_struct vhost_vdpa_vm_ops
= {
958 .fault
= vhost_vdpa_fault
,
961 static int vhost_vdpa_mmap(struct file
*file
, struct vm_area_struct
*vma
)
963 struct vhost_vdpa
*v
= vma
->vm_file
->private_data
;
964 struct vdpa_device
*vdpa
= v
->vdpa
;
965 const struct vdpa_config_ops
*ops
= vdpa
->config
;
966 struct vdpa_notification_area notify
;
967 unsigned long index
= vma
->vm_pgoff
;
969 if (vma
->vm_end
- vma
->vm_start
!= PAGE_SIZE
)
971 if ((vma
->vm_flags
& VM_SHARED
) == 0)
973 if (vma
->vm_flags
& VM_READ
)
977 if (!ops
->get_vq_notification
)
980 /* To be safe and easily modelled by userspace, We only
981 * support the doorbell which sits on the page boundary and
982 * does not share the page with other registers.
984 notify
= ops
->get_vq_notification(vdpa
, index
);
985 if (notify
.addr
& (PAGE_SIZE
- 1))
987 if (vma
->vm_end
- vma
->vm_start
!= notify
.size
)
990 vma
->vm_ops
= &vhost_vdpa_vm_ops
;
993 #endif /* CONFIG_MMU */
995 static const struct file_operations vhost_vdpa_fops
= {
996 .owner
= THIS_MODULE
,
997 .open
= vhost_vdpa_open
,
998 .release
= vhost_vdpa_release
,
999 .write_iter
= vhost_vdpa_chr_write_iter
,
1000 .unlocked_ioctl
= vhost_vdpa_unlocked_ioctl
,
1002 .mmap
= vhost_vdpa_mmap
,
1003 #endif /* CONFIG_MMU */
1004 .compat_ioctl
= compat_ptr_ioctl
,
1007 static void vhost_vdpa_release_dev(struct device
*device
)
1009 struct vhost_vdpa
*v
=
1010 container_of(device
, struct vhost_vdpa
, dev
);
1012 ida_simple_remove(&vhost_vdpa_ida
, v
->minor
);
1017 static int vhost_vdpa_probe(struct vdpa_device
*vdpa
)
1019 const struct vdpa_config_ops
*ops
= vdpa
->config
;
1020 struct vhost_vdpa
*v
;
1024 /* Currently, we only accept the network devices. */
1025 if (ops
->get_device_id(vdpa
) != VIRTIO_ID_NET
)
1028 v
= kzalloc(sizeof(*v
), GFP_KERNEL
| __GFP_RETRY_MAYFAIL
);
1032 minor
= ida_simple_get(&vhost_vdpa_ida
, 0,
1033 VHOST_VDPA_DEV_MAX
, GFP_KERNEL
);
1039 atomic_set(&v
->opened
, 0);
1042 v
->nvqs
= vdpa
->nvqs
;
1043 v
->virtio_id
= ops
->get_device_id(vdpa
);
1045 device_initialize(&v
->dev
);
1046 v
->dev
.release
= vhost_vdpa_release_dev
;
1047 v
->dev
.parent
= &vdpa
->dev
;
1048 v
->dev
.devt
= MKDEV(MAJOR(vhost_vdpa_major
), minor
);
1049 v
->vqs
= kmalloc_array(v
->nvqs
, sizeof(struct vhost_virtqueue
),
1056 r
= dev_set_name(&v
->dev
, "vhost-vdpa-%u", minor
);
1060 cdev_init(&v
->cdev
, &vhost_vdpa_fops
);
1061 v
->cdev
.owner
= THIS_MODULE
;
1063 r
= cdev_device_add(&v
->cdev
, &v
->dev
);
1067 init_completion(&v
->completion
);
1068 vdpa_set_drvdata(vdpa
, v
);
1073 put_device(&v
->dev
);
1077 static void vhost_vdpa_remove(struct vdpa_device
*vdpa
)
1079 struct vhost_vdpa
*v
= vdpa_get_drvdata(vdpa
);
1082 cdev_device_del(&v
->cdev
, &v
->dev
);
1085 opened
= atomic_cmpxchg(&v
->opened
, 0, 1);
1088 wait_for_completion(&v
->completion
);
1091 put_device(&v
->dev
);
1094 static struct vdpa_driver vhost_vdpa_driver
= {
1096 .name
= "vhost_vdpa",
1098 .probe
= vhost_vdpa_probe
,
1099 .remove
= vhost_vdpa_remove
,
1102 static int __init
vhost_vdpa_init(void)
1106 r
= alloc_chrdev_region(&vhost_vdpa_major
, 0, VHOST_VDPA_DEV_MAX
,
1109 goto err_alloc_chrdev
;
1111 r
= vdpa_register_driver(&vhost_vdpa_driver
);
1113 goto err_vdpa_register_driver
;
1117 err_vdpa_register_driver
:
1118 unregister_chrdev_region(vhost_vdpa_major
, VHOST_VDPA_DEV_MAX
);
1122 module_init(vhost_vdpa_init
);
1124 static void __exit
vhost_vdpa_exit(void)
1126 vdpa_unregister_driver(&vhost_vdpa_driver
);
1127 unregister_chrdev_region(vhost_vdpa_major
, VHOST_VDPA_DEV_MAX
);
1129 module_exit(vhost_vdpa_exit
);
1131 MODULE_VERSION("0.0.1");
1132 MODULE_LICENSE("GPL v2");
1133 MODULE_AUTHOR("Intel Corporation");
1134 MODULE_DESCRIPTION("vDPA-based vhost backend for virtio");