1 // SPDX-License-Identifier: GPL-2.0-only
3 * Intel MIC Platform Software Stack (MPSS)
5 * Copyright(c) 2016 Intel Corporation.
7 * Intel Virtio Over PCIe (VOP) driver.
9 #include <linux/sched.h>
10 #include <linux/poll.h>
11 #include <linux/dma-mapping.h>
13 #include <linux/mic_common.h>
14 #include "../common/mic_dev.h"
16 #include <linux/mic_ioctl.h>
19 /* Helper API to obtain the VOP PCIe device */
20 static inline struct device
*vop_dev(struct vop_vdev
*vdev
)
22 return vdev
->vpdev
->dev
.parent
;
25 /* Helper API to check if a virtio device is initialized */
26 static inline int vop_vdev_inited(struct vop_vdev
*vdev
)
30 /* Device has not been created yet */
31 if (!vdev
->dd
|| !vdev
->dd
->type
) {
32 dev_err(vop_dev(vdev
), "%s %d err %d\n",
33 __func__
, __LINE__
, -EINVAL
);
36 /* Device has been removed/deleted */
37 if (vdev
->dd
->type
== -1) {
38 dev_dbg(vop_dev(vdev
), "%s %d err %d\n",
39 __func__
, __LINE__
, -ENODEV
);
45 static void _vop_notify(struct vringh
*vrh
)
47 struct vop_vringh
*vvrh
= container_of(vrh
, struct vop_vringh
, vrh
);
48 struct vop_vdev
*vdev
= vvrh
->vdev
;
49 struct vop_device
*vpdev
= vdev
->vpdev
;
50 s8 db
= vdev
->dc
->h2c_vdev_db
;
53 vpdev
->hw_ops
->send_intr(vpdev
, db
);
56 static void vop_virtio_init_post(struct vop_vdev
*vdev
)
58 struct mic_vqconfig
*vqconfig
= mic_vq_config(vdev
->dd
);
59 struct vop_device
*vpdev
= vdev
->vpdev
;
62 for (i
= 0; i
< vdev
->dd
->num_vq
; i
++) {
63 used_size
= PAGE_ALIGN(sizeof(u16
) * 3 +
64 sizeof(struct vring_used_elem
) *
65 le16_to_cpu(vqconfig
->num
));
66 if (!le64_to_cpu(vqconfig
[i
].used_address
)) {
67 dev_warn(vop_dev(vdev
), "used_address zero??\n");
70 vdev
->vvr
[i
].vrh
.vring
.used
=
71 (void __force
*)vpdev
->hw_ops
->remap(
73 le64_to_cpu(vqconfig
[i
].used_address
),
77 vdev
->dc
->used_address_updated
= 0;
79 dev_info(vop_dev(vdev
), "%s: device type %d LINKUP\n",
80 __func__
, vdev
->virtio_id
);
83 static inline void vop_virtio_device_reset(struct vop_vdev
*vdev
)
87 dev_dbg(vop_dev(vdev
), "%s: status %d device type %d RESET\n",
88 __func__
, vdev
->dd
->status
, vdev
->virtio_id
);
90 for (i
= 0; i
< vdev
->dd
->num_vq
; i
++)
92 * Avoid lockdep false positive. The + 1 is for the vop
93 * mutex which is held in the reset devices code path.
95 mutex_lock_nested(&vdev
->vvr
[i
].vr_mutex
, i
+ 1);
97 /* 0 status means "reset" */
99 vdev
->dc
->vdev_reset
= 0;
100 vdev
->dc
->host_ack
= 1;
102 for (i
= 0; i
< vdev
->dd
->num_vq
; i
++) {
103 struct vringh
*vrh
= &vdev
->vvr
[i
].vrh
;
105 vdev
->vvr
[i
].vring
.info
->avail_idx
= 0;
107 vrh
->last_avail_idx
= 0;
108 vrh
->last_used_idx
= 0;
111 for (i
= 0; i
< vdev
->dd
->num_vq
; i
++)
112 mutex_unlock(&vdev
->vvr
[i
].vr_mutex
);
115 static void vop_virtio_reset_devices(struct vop_info
*vi
)
117 struct list_head
*pos
, *tmp
;
118 struct vop_vdev
*vdev
;
120 list_for_each_safe(pos
, tmp
, &vi
->vdev_list
) {
121 vdev
= list_entry(pos
, struct vop_vdev
, list
);
122 vop_virtio_device_reset(vdev
);
124 wake_up(&vdev
->waitq
);
128 static void vop_bh_handler(struct work_struct
*work
)
130 struct vop_vdev
*vdev
= container_of(work
, struct vop_vdev
,
133 if (vdev
->dc
->used_address_updated
)
134 vop_virtio_init_post(vdev
);
136 if (vdev
->dc
->vdev_reset
)
137 vop_virtio_device_reset(vdev
);
140 wake_up(&vdev
->waitq
);
143 static irqreturn_t
_vop_virtio_intr_handler(int irq
, void *data
)
145 struct vop_vdev
*vdev
= data
;
146 struct vop_device
*vpdev
= vdev
->vpdev
;
148 vpdev
->hw_ops
->ack_interrupt(vpdev
, vdev
->virtio_db
);
149 schedule_work(&vdev
->virtio_bh_work
);
153 static int vop_virtio_config_change(struct vop_vdev
*vdev
, void *argp
)
155 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake
);
156 int ret
= 0, retry
, i
;
157 struct vop_device
*vpdev
= vdev
->vpdev
;
158 struct vop_info
*vi
= dev_get_drvdata(&vpdev
->dev
);
159 struct mic_bootparam
*bootparam
= vpdev
->hw_ops
->get_dp(vpdev
);
160 s8 db
= bootparam
->h2c_config_db
;
162 mutex_lock(&vi
->vop_mutex
);
163 for (i
= 0; i
< vdev
->dd
->num_vq
; i
++)
164 mutex_lock_nested(&vdev
->vvr
[i
].vr_mutex
, i
+ 1);
166 if (db
== -1 || vdev
->dd
->type
== -1) {
171 memcpy(mic_vq_configspace(vdev
->dd
), argp
, vdev
->dd
->config_len
);
172 vdev
->dc
->config_change
= MIC_VIRTIO_PARAM_CONFIG_CHANGED
;
173 vpdev
->hw_ops
->send_intr(vpdev
, db
);
175 for (retry
= 100; retry
--;) {
176 ret
= wait_event_timeout(wake
, vdev
->dc
->guest_ack
,
177 msecs_to_jiffies(100));
182 dev_dbg(vop_dev(vdev
),
183 "%s %d retry: %d\n", __func__
, __LINE__
, retry
);
184 vdev
->dc
->config_change
= 0;
185 vdev
->dc
->guest_ack
= 0;
187 for (i
= 0; i
< vdev
->dd
->num_vq
; i
++)
188 mutex_unlock(&vdev
->vvr
[i
].vr_mutex
);
189 mutex_unlock(&vi
->vop_mutex
);
193 static int vop_copy_dp_entry(struct vop_vdev
*vdev
,
194 struct mic_device_desc
*argp
, __u8
*type
,
195 struct mic_device_desc
**devpage
)
197 struct vop_device
*vpdev
= vdev
->vpdev
;
198 struct mic_device_desc
*devp
;
199 struct mic_vqconfig
*vqconfig
;
201 bool slot_found
= false;
203 vqconfig
= mic_vq_config(argp
);
204 for (i
= 0; i
< argp
->num_vq
; i
++) {
205 if (le16_to_cpu(vqconfig
[i
].num
) > MIC_MAX_VRING_ENTRIES
) {
207 dev_err(vop_dev(vdev
), "%s %d err %d\n",
208 __func__
, __LINE__
, ret
);
213 /* Find the first free device page entry */
214 for (i
= sizeof(struct mic_bootparam
);
215 i
< MIC_DP_SIZE
- mic_total_desc_size(argp
);
216 i
+= mic_total_desc_size(devp
)) {
217 devp
= vpdev
->hw_ops
->get_dp(vpdev
) + i
;
218 if (devp
->type
== 0 || devp
->type
== -1) {
225 dev_err(vop_dev(vdev
), "%s %d err %d\n",
226 __func__
, __LINE__
, ret
);
230 * Save off the type before doing the memcpy. Type will be set in the
231 * end after completing all initialization for the new device.
235 memcpy(devp
, argp
, mic_desc_size(argp
));
242 static void vop_init_device_ctrl(struct vop_vdev
*vdev
,
243 struct mic_device_desc
*devpage
)
245 struct mic_device_ctrl
*dc
;
247 dc
= (void *)devpage
+ mic_aligned_desc_size(devpage
);
249 dc
->config_change
= 0;
253 dc
->used_address_updated
= 0;
254 dc
->c2h_vdev_db
= -1;
255 dc
->h2c_vdev_db
= -1;
259 static int vop_virtio_add_device(struct vop_vdev
*vdev
,
260 struct mic_device_desc
*argp
)
262 struct vop_info
*vi
= vdev
->vi
;
263 struct vop_device
*vpdev
= vi
->vpdev
;
264 struct mic_device_desc
*dd
= NULL
;
265 struct mic_vqconfig
*vqconfig
;
266 int vr_size
, i
, j
, ret
;
270 struct mic_bootparam
*bootparam
;
274 bootparam
= vpdev
->hw_ops
->get_dp(vpdev
);
275 init_waitqueue_head(&vdev
->waitq
);
276 INIT_LIST_HEAD(&vdev
->list
);
279 ret
= vop_copy_dp_entry(vdev
, argp
, &type
, &dd
);
281 dev_err(vop_dev(vdev
), "%s %d err %d\n",
282 __func__
, __LINE__
, ret
);
286 vop_init_device_ctrl(vdev
, dd
);
289 vdev
->virtio_id
= type
;
290 vqconfig
= mic_vq_config(dd
);
291 INIT_WORK(&vdev
->virtio_bh_work
, vop_bh_handler
);
293 for (i
= 0; i
< dd
->num_vq
; i
++) {
294 struct vop_vringh
*vvr
= &vdev
->vvr
[i
];
295 struct mic_vring
*vr
= &vdev
->vvr
[i
].vring
;
297 num
= le16_to_cpu(vqconfig
[i
].num
);
298 mutex_init(&vvr
->vr_mutex
);
299 vr_size
= PAGE_ALIGN(vring_size(num
, MIC_VIRTIO_RING_ALIGN
) +
300 sizeof(struct _mic_vring_info
));
302 __get_free_pages(GFP_KERNEL
| __GFP_ZERO
,
306 dev_err(vop_dev(vdev
), "%s %d err %d\n",
307 __func__
, __LINE__
, ret
);
311 vr
->info
= vr
->va
+ vring_size(num
, MIC_VIRTIO_RING_ALIGN
);
312 vr
->info
->magic
= cpu_to_le32(MIC_MAGIC
+ vdev
->virtio_id
+ i
);
313 vr_addr
= dma_map_single(&vpdev
->dev
, vr
->va
, vr_size
,
315 if (dma_mapping_error(&vpdev
->dev
, vr_addr
)) {
316 free_pages((unsigned long)vr
->va
, get_order(vr_size
));
318 dev_err(vop_dev(vdev
), "%s %d err %d\n",
319 __func__
, __LINE__
, ret
);
322 vqconfig
[i
].address
= cpu_to_le64(vr_addr
);
324 vring_init(&vr
->vr
, num
, vr
->va
, MIC_VIRTIO_RING_ALIGN
);
325 ret
= vringh_init_kern(&vvr
->vrh
,
326 *(u32
*)mic_vq_features(vdev
->dd
),
327 num
, false, vr
->vr
.desc
, vr
->vr
.avail
,
330 dev_err(vop_dev(vdev
), "%s %d err %d\n",
331 __func__
, __LINE__
, ret
);
334 vringh_kiov_init(&vvr
->riov
, NULL
, 0);
335 vringh_kiov_init(&vvr
->wiov
, NULL
, 0);
336 vvr
->head
= USHRT_MAX
;
338 vvr
->vrh
.notify
= _vop_notify
;
340 "%s %d index %d va %p info %p vr_size 0x%x\n",
341 __func__
, __LINE__
, i
, vr
->va
, vr
->info
, vr_size
);
342 vvr
->buf
= (void *)__get_free_pages(GFP_KERNEL
,
343 get_order(VOP_INT_DMA_BUF_SIZE
));
344 vvr
->buf_da
= dma_map_single(&vpdev
->dev
,
345 vvr
->buf
, VOP_INT_DMA_BUF_SIZE
,
349 snprintf(irqname
, sizeof(irqname
), "vop%dvirtio%d", vpdev
->index
,
351 vdev
->virtio_db
= vpdev
->hw_ops
->next_db(vpdev
);
352 vdev
->virtio_cookie
= vpdev
->hw_ops
->request_irq(vpdev
,
353 _vop_virtio_intr_handler
, irqname
, vdev
,
355 if (IS_ERR(vdev
->virtio_cookie
)) {
356 ret
= PTR_ERR(vdev
->virtio_cookie
);
357 dev_dbg(&vpdev
->dev
, "request irq failed\n");
361 vdev
->dc
->c2h_vdev_db
= vdev
->virtio_db
;
364 * Order the type update with previous stores. This write barrier
365 * is paired with the corresponding read barrier before the uncached
366 * system memory read of the type, on the card while scanning the
374 db
= bootparam
->h2c_config_db
;
376 vpdev
->hw_ops
->send_intr(vpdev
, db
);
378 dev_dbg(&vpdev
->dev
, "Added virtio id %d db %d\n", dd
->type
, db
);
381 vqconfig
= mic_vq_config(dd
);
382 for (j
= 0; j
< i
; j
++) {
383 struct vop_vringh
*vvr
= &vdev
->vvr
[j
];
385 dma_unmap_single(&vpdev
->dev
, le64_to_cpu(vqconfig
[j
].address
),
386 vvr
->vring
.len
, DMA_BIDIRECTIONAL
);
387 free_pages((unsigned long)vvr
->vring
.va
,
388 get_order(vvr
->vring
.len
));
393 static void vop_dev_remove(struct vop_info
*pvi
, struct mic_device_ctrl
*devp
,
394 struct vop_device
*vpdev
)
396 struct mic_bootparam
*bootparam
= vpdev
->hw_ops
->get_dp(vpdev
);
399 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake
);
401 devp
->config_change
= MIC_VIRTIO_PARAM_DEV_REMOVE
;
402 db
= bootparam
->h2c_config_db
;
404 vpdev
->hw_ops
->send_intr(vpdev
, db
);
407 for (retry
= 15; retry
--;) {
408 ret
= wait_event_timeout(wake
, devp
->guest_ack
,
409 msecs_to_jiffies(1000));
414 devp
->config_change
= 0;
418 static void vop_virtio_del_device(struct vop_vdev
*vdev
)
420 struct vop_info
*vi
= vdev
->vi
;
421 struct vop_device
*vpdev
= vdev
->vpdev
;
423 struct mic_vqconfig
*vqconfig
;
424 struct mic_bootparam
*bootparam
= vpdev
->hw_ops
->get_dp(vpdev
);
427 goto skip_hot_remove
;
428 vop_dev_remove(vi
, vdev
->dc
, vpdev
);
430 vpdev
->hw_ops
->free_irq(vpdev
, vdev
->virtio_cookie
, vdev
);
431 flush_work(&vdev
->virtio_bh_work
);
432 vqconfig
= mic_vq_config(vdev
->dd
);
433 for (i
= 0; i
< vdev
->dd
->num_vq
; i
++) {
434 struct vop_vringh
*vvr
= &vdev
->vvr
[i
];
436 dma_unmap_single(&vpdev
->dev
,
437 vvr
->buf_da
, VOP_INT_DMA_BUF_SIZE
,
439 free_pages((unsigned long)vvr
->buf
,
440 get_order(VOP_INT_DMA_BUF_SIZE
));
441 vringh_kiov_cleanup(&vvr
->riov
);
442 vringh_kiov_cleanup(&vvr
->wiov
);
443 dma_unmap_single(&vpdev
->dev
, le64_to_cpu(vqconfig
[i
].address
),
444 vvr
->vring
.len
, DMA_BIDIRECTIONAL
);
445 free_pages((unsigned long)vvr
->vring
.va
,
446 get_order(vvr
->vring
.len
));
449 * Order the type update with previous stores. This write barrier
450 * is paired with the corresponding read barrier before the uncached
451 * system memory read of the type, on the card while scanning the
459 * vop_sync_dma - Wrapper for synchronous DMAs.
461 * @dev - The address of the pointer to the device instance used
462 * for DMA registration.
463 * @dst - destination DMA address.
464 * @src - source DMA address.
465 * @len - size of the transfer.
467 * Return DMA_SUCCESS on success
469 static int vop_sync_dma(struct vop_vdev
*vdev
, dma_addr_t dst
, dma_addr_t src
,
473 struct dma_device
*ddev
;
474 struct dma_async_tx_descriptor
*tx
;
475 struct vop_info
*vi
= dev_get_drvdata(&vdev
->vpdev
->dev
);
476 struct dma_chan
*vop_ch
= vi
->dma_ch
;
482 ddev
= vop_ch
->device
;
483 tx
= ddev
->device_prep_dma_memcpy(vop_ch
, dst
, src
, len
,
491 cookie
= tx
->tx_submit(tx
);
492 if (dma_submit_error(cookie
)) {
496 dma_async_issue_pending(vop_ch
);
497 err
= dma_sync_wait(vop_ch
, cookie
);
501 dev_err(&vi
->vpdev
->dev
, "%s %d err %d\n",
502 __func__
, __LINE__
, err
);
506 #define VOP_USE_DMA true
509 * Initiates the copies across the PCIe bus from card memory to a user
510 * space buffer. When transfers are done using DMA, source/destination
511 * addresses and transfer length must follow the alignment requirements of
512 * the MIC DMA engine.
514 static int vop_virtio_copy_to_user(struct vop_vdev
*vdev
, void __user
*ubuf
,
515 size_t len
, u64 daddr
, size_t dlen
,
518 struct vop_device
*vpdev
= vdev
->vpdev
;
519 void __iomem
*dbuf
= vpdev
->hw_ops
->remap(vpdev
, daddr
, len
);
520 struct vop_vringh
*vvr
= &vdev
->vvr
[vr_idx
];
521 struct vop_info
*vi
= dev_get_drvdata(&vpdev
->dev
);
522 size_t dma_alignment
;
524 size_t dma_offset
, partlen
;
527 if (!VOP_USE_DMA
|| !vi
->dma_ch
) {
528 if (copy_to_user(ubuf
, (void __force
*)dbuf
, len
)) {
530 dev_err(vop_dev(vdev
), "%s %d err %d\n",
531 __func__
, __LINE__
, err
);
534 vdev
->in_bytes
+= len
;
539 dma_alignment
= 1 << vi
->dma_ch
->device
->copy_align
;
540 x200
= is_dma_copy_aligned(vi
->dma_ch
->device
, 1, 1, 1);
542 dma_offset
= daddr
- round_down(daddr
, dma_alignment
);
546 * X100 uses DMA addresses as seen by the card so adding
547 * the aperture base is not required for DMA. However x200
548 * requires DMA addresses to be an offset into the bar so
549 * add the aperture base for x200.
552 daddr
+= vpdev
->aper
->pa
;
554 partlen
= min_t(size_t, len
, VOP_INT_DMA_BUF_SIZE
);
555 err
= vop_sync_dma(vdev
, vvr
->buf_da
, daddr
,
556 ALIGN(partlen
, dma_alignment
));
558 dev_err(vop_dev(vdev
), "%s %d err %d\n",
559 __func__
, __LINE__
, err
);
562 if (copy_to_user(ubuf
, vvr
->buf
+ dma_offset
,
563 partlen
- dma_offset
)) {
565 dev_err(vop_dev(vdev
), "%s %d err %d\n",
566 __func__
, __LINE__
, err
);
572 vdev
->in_bytes_dma
+= partlen
;
573 vdev
->in_bytes
+= partlen
;
579 vpdev
->hw_ops
->unmap(vpdev
, dbuf
);
580 dev_dbg(vop_dev(vdev
),
581 "%s: ubuf %p dbuf %p len 0x%zx vr_idx 0x%x\n",
582 __func__
, ubuf
, dbuf
, len
, vr_idx
);
587 * Initiates copies across the PCIe bus from a user space buffer to card
588 * memory. When transfers are done using DMA, source/destination addresses
589 * and transfer length must follow the alignment requirements of the MIC
592 static int vop_virtio_copy_from_user(struct vop_vdev
*vdev
, void __user
*ubuf
,
593 size_t len
, u64 daddr
, size_t dlen
,
596 struct vop_device
*vpdev
= vdev
->vpdev
;
597 void __iomem
*dbuf
= vpdev
->hw_ops
->remap(vpdev
, daddr
, len
);
598 struct vop_vringh
*vvr
= &vdev
->vvr
[vr_idx
];
599 struct vop_info
*vi
= dev_get_drvdata(&vdev
->vpdev
->dev
);
600 size_t dma_alignment
;
603 bool dma
= VOP_USE_DMA
&& vi
->dma_ch
;
607 dma_alignment
= 1 << vi
->dma_ch
->device
->copy_align
;
608 x200
= is_dma_copy_aligned(vi
->dma_ch
->device
, 1, 1, 1);
610 if (daddr
& (dma_alignment
- 1)) {
611 vdev
->tx_dst_unaligned
+= len
;
613 } else if (ALIGN(len
, dma_alignment
) > dlen
) {
614 vdev
->tx_len_unaligned
+= len
;
623 * X100 uses DMA addresses as seen by the card so adding
624 * the aperture base is not required for DMA. However x200
625 * requires DMA addresses to be an offset into the bar so
626 * add the aperture base for x200.
629 daddr
+= vpdev
->aper
->pa
;
631 partlen
= min_t(size_t, len
, VOP_INT_DMA_BUF_SIZE
);
633 if (copy_from_user(vvr
->buf
, ubuf
, partlen
)) {
635 dev_err(vop_dev(vdev
), "%s %d err %d\n",
636 __func__
, __LINE__
, err
);
639 err
= vop_sync_dma(vdev
, daddr
, vvr
->buf_da
,
640 ALIGN(partlen
, dma_alignment
));
642 dev_err(vop_dev(vdev
), "%s %d err %d\n",
643 __func__
, __LINE__
, err
);
649 vdev
->out_bytes_dma
+= partlen
;
650 vdev
->out_bytes
+= partlen
;
655 * We are copying to IO below and should ideally use something
656 * like copy_from_user_toio(..) if it existed.
658 if (copy_from_user((void __force
*)dbuf
, ubuf
, len
)) {
660 dev_err(vop_dev(vdev
), "%s %d err %d\n",
661 __func__
, __LINE__
, err
);
664 vdev
->out_bytes
+= len
;
667 vpdev
->hw_ops
->unmap(vpdev
, dbuf
);
668 dev_dbg(vop_dev(vdev
),
669 "%s: ubuf %p dbuf %p len 0x%zx vr_idx 0x%x\n",
670 __func__
, ubuf
, dbuf
, len
, vr_idx
);
674 #define MIC_VRINGH_READ true
676 /* Determine the total number of bytes consumed in a VRINGH KIOV */
677 static inline u32
vop_vringh_iov_consumed(struct vringh_kiov
*iov
)
680 u32 total
= iov
->consumed
;
682 for (i
= 0; i
< iov
->i
; i
++)
683 total
+= iov
->iov
[i
].iov_len
;
688 * Traverse the VRINGH KIOV and issue the APIs to trigger the copies.
689 * This API is heavily based on the vringh_iov_xfer(..) implementation
690 * in vringh.c. The reason we cannot reuse vringh_iov_pull_kern(..)
691 * and vringh_iov_push_kern(..) directly is because there is no
692 * way to override the VRINGH xfer(..) routines as of v3.10.
694 static int vop_vringh_copy(struct vop_vdev
*vdev
, struct vringh_kiov
*iov
,
695 void __user
*ubuf
, size_t len
, bool read
, int vr_idx
,
699 size_t partlen
, tot_len
= 0;
701 while (len
&& iov
->i
< iov
->used
) {
702 struct kvec
*kiov
= &iov
->iov
[iov
->i
];
703 unsigned long daddr
= (unsigned long)kiov
->iov_base
;
705 partlen
= min(kiov
->iov_len
, len
);
707 ret
= vop_virtio_copy_to_user(vdev
, ubuf
, partlen
,
712 ret
= vop_virtio_copy_from_user(vdev
, ubuf
, partlen
,
717 dev_err(vop_dev(vdev
), "%s %d err %d\n",
718 __func__
, __LINE__
, ret
);
724 iov
->consumed
+= partlen
;
725 kiov
->iov_len
-= partlen
;
726 kiov
->iov_base
+= partlen
;
727 if (!kiov
->iov_len
) {
728 /* Fix up old iov element then increment. */
729 kiov
->iov_len
= iov
->consumed
;
730 kiov
->iov_base
-= iov
->consumed
;
741 * Use the standard VRINGH infrastructure in the kernel to fetch new
742 * descriptors, initiate the copies and update the used ring.
744 static int _vop_virtio_copy(struct vop_vdev
*vdev
, struct mic_copy_desc
*copy
)
747 u32 iovcnt
= copy
->iovcnt
;
749 struct iovec __user
*u_iov
= copy
->iov
;
750 void __user
*ubuf
= NULL
;
751 struct vop_vringh
*vvr
= &vdev
->vvr
[copy
->vr_idx
];
752 struct vringh_kiov
*riov
= &vvr
->riov
;
753 struct vringh_kiov
*wiov
= &vvr
->wiov
;
754 struct vringh
*vrh
= &vvr
->vrh
;
755 u16
*head
= &vvr
->head
;
756 struct mic_vring
*vr
= &vvr
->vring
;
757 size_t len
= 0, out_len
;
760 /* Fetch a new IOVEC if all previous elements have been processed */
761 if (riov
->i
== riov
->used
&& wiov
->i
== wiov
->used
) {
762 ret
= vringh_getdesc_kern(vrh
, riov
, wiov
,
764 /* Check if there are available descriptors */
770 /* Copy over a new iovec from user space. */
771 ret
= copy_from_user(&iov
, u_iov
, sizeof(*u_iov
));
774 dev_err(vop_dev(vdev
), "%s %d err %d\n",
775 __func__
, __LINE__
, ret
);
781 /* Issue all the read descriptors first */
782 ret
= vop_vringh_copy(vdev
, riov
, ubuf
, len
,
783 MIC_VRINGH_READ
, copy
->vr_idx
, &out_len
);
785 dev_err(vop_dev(vdev
), "%s %d err %d\n",
786 __func__
, __LINE__
, ret
);
791 copy
->out_len
+= out_len
;
792 /* Issue the write descriptors next */
793 ret
= vop_vringh_copy(vdev
, wiov
, ubuf
, len
,
794 !MIC_VRINGH_READ
, copy
->vr_idx
, &out_len
);
796 dev_err(vop_dev(vdev
), "%s %d err %d\n",
797 __func__
, __LINE__
, ret
);
802 copy
->out_len
+= out_len
;
804 /* One user space iovec is now completed */
808 /* Exit loop if all elements in KIOVs have been processed. */
809 if (riov
->i
== riov
->used
&& wiov
->i
== wiov
->used
)
813 * Update the used ring if a descriptor was available and some data was
814 * copied in/out and the user asked for a used ring update.
816 if (*head
!= USHRT_MAX
&& copy
->out_len
&& copy
->update_used
) {
819 /* Determine the total data consumed */
820 total
+= vop_vringh_iov_consumed(riov
);
821 total
+= vop_vringh_iov_consumed(wiov
);
822 vringh_complete_kern(vrh
, *head
, total
);
824 if (vringh_need_notify_kern(vrh
) > 0)
826 vringh_kiov_cleanup(riov
);
827 vringh_kiov_cleanup(wiov
);
828 /* Update avail idx for user space */
829 vr
->info
->avail_idx
= vrh
->last_avail_idx
;
834 static inline int vop_verify_copy_args(struct vop_vdev
*vdev
,
835 struct mic_copy_desc
*copy
)
837 if (!vdev
|| copy
->vr_idx
>= vdev
->dd
->num_vq
)
842 /* Copy a specified number of virtio descriptors in a chain */
843 static int vop_virtio_copy_desc(struct vop_vdev
*vdev
,
844 struct mic_copy_desc
*copy
)
847 struct vop_vringh
*vvr
;
849 err
= vop_verify_copy_args(vdev
, copy
);
853 vvr
= &vdev
->vvr
[copy
->vr_idx
];
854 mutex_lock(&vvr
->vr_mutex
);
855 if (!vop_vdevup(vdev
)) {
857 dev_err(vop_dev(vdev
), "%s %d err %d\n",
858 __func__
, __LINE__
, err
);
861 err
= _vop_virtio_copy(vdev
, copy
);
863 dev_err(vop_dev(vdev
), "%s %d err %d\n",
864 __func__
, __LINE__
, err
);
867 mutex_unlock(&vvr
->vr_mutex
);
871 static int vop_open(struct inode
*inode
, struct file
*f
)
873 struct vop_vdev
*vdev
;
874 struct vop_info
*vi
= container_of(f
->private_data
,
875 struct vop_info
, miscdev
);
877 vdev
= kzalloc(sizeof(*vdev
), GFP_KERNEL
);
881 mutex_init(&vdev
->vdev_mutex
);
882 f
->private_data
= vdev
;
883 init_completion(&vdev
->destroy
);
884 complete(&vdev
->destroy
);
888 static int vop_release(struct inode
*inode
, struct file
*f
)
890 struct vop_vdev
*vdev
= f
->private_data
, *vdev_tmp
;
891 struct vop_info
*vi
= vdev
->vi
;
892 struct list_head
*pos
, *tmp
;
895 mutex_lock(&vdev
->vdev_mutex
);
898 mutex_lock(&vi
->vop_mutex
);
899 list_for_each_safe(pos
, tmp
, &vi
->vdev_list
) {
900 vdev_tmp
= list_entry(pos
, struct vop_vdev
, list
);
901 if (vdev
== vdev_tmp
) {
902 vop_virtio_del_device(vdev
);
908 mutex_unlock(&vi
->vop_mutex
);
910 mutex_unlock(&vdev
->vdev_mutex
);
912 wait_for_completion(&vdev
->destroy
);
913 f
->private_data
= NULL
;
918 static long vop_ioctl(struct file
*f
, unsigned int cmd
, unsigned long arg
)
920 struct vop_vdev
*vdev
= f
->private_data
;
921 struct vop_info
*vi
= vdev
->vi
;
922 void __user
*argp
= (void __user
*)arg
;
926 case MIC_VIRTIO_ADD_DEVICE
:
928 struct mic_device_desc dd
, *dd_config
;
930 if (copy_from_user(&dd
, argp
, sizeof(dd
)))
933 if (mic_aligned_desc_size(&dd
) > MIC_MAX_DESC_BLK_SIZE
||
934 dd
.num_vq
> MIC_MAX_VRINGS
)
937 dd_config
= memdup_user(argp
, mic_desc_size(&dd
));
938 if (IS_ERR(dd_config
))
939 return PTR_ERR(dd_config
);
941 /* Ensure desc has not changed between the two reads */
942 if (memcmp(&dd
, dd_config
, sizeof(dd
))) {
946 mutex_lock(&vdev
->vdev_mutex
);
947 mutex_lock(&vi
->vop_mutex
);
948 ret
= vop_virtio_add_device(vdev
, dd_config
);
951 list_add_tail(&vdev
->list
, &vi
->vdev_list
);
953 mutex_unlock(&vi
->vop_mutex
);
954 mutex_unlock(&vdev
->vdev_mutex
);
959 case MIC_VIRTIO_COPY_DESC
:
961 struct mic_copy_desc copy
;
963 mutex_lock(&vdev
->vdev_mutex
);
964 ret
= vop_vdev_inited(vdev
);
968 if (copy_from_user(©
, argp
, sizeof(copy
))) {
973 ret
= vop_virtio_copy_desc(vdev
, ©
);
977 &((struct mic_copy_desc __user
*)argp
)->out_len
,
978 ©
.out_len
, sizeof(copy
.out_len
)))
981 mutex_unlock(&vdev
->vdev_mutex
);
984 case MIC_VIRTIO_CONFIG_CHANGE
:
988 mutex_lock(&vdev
->vdev_mutex
);
989 ret
= vop_vdev_inited(vdev
);
992 buf
= memdup_user(argp
, vdev
->dd
->config_len
);
997 ret
= vop_virtio_config_change(vdev
, buf
);
1000 mutex_unlock(&vdev
->vdev_mutex
);
1004 return -ENOIOCTLCMD
;
1010 * We return EPOLLIN | EPOLLOUT from poll when new buffers are enqueued, and
1011 * not when previously enqueued buffers may be available. This means that
1012 * in the card->host (TX) path, when userspace is unblocked by poll it
1013 * must drain all available descriptors or it can stall.
1015 static __poll_t
vop_poll(struct file
*f
, poll_table
*wait
)
1017 struct vop_vdev
*vdev
= f
->private_data
;
1020 mutex_lock(&vdev
->vdev_mutex
);
1021 if (vop_vdev_inited(vdev
)) {
1025 poll_wait(f
, &vdev
->waitq
, wait
);
1026 if (vop_vdev_inited(vdev
)) {
1028 } else if (vdev
->poll_wake
) {
1029 vdev
->poll_wake
= 0;
1030 mask
= EPOLLIN
| EPOLLOUT
;
1033 mutex_unlock(&vdev
->vdev_mutex
);
1038 vop_query_offset(struct vop_vdev
*vdev
, unsigned long offset
,
1039 unsigned long *size
, unsigned long *pa
)
1041 struct vop_device
*vpdev
= vdev
->vpdev
;
1042 unsigned long start
= MIC_DP_SIZE
;
1046 * MMAP interface is as follows:
1048 * 0x0 virtio device_page
1049 * 0x1000 first vring
1050 * 0x1000 + size of 1st vring second vring
1054 *pa
= virt_to_phys(vpdev
->hw_ops
->get_dp(vpdev
));
1055 *size
= MIC_DP_SIZE
;
1059 for (i
= 0; i
< vdev
->dd
->num_vq
; i
++) {
1060 struct vop_vringh
*vvr
= &vdev
->vvr
[i
];
1062 if (offset
== start
) {
1063 *pa
= virt_to_phys(vvr
->vring
.va
);
1064 *size
= vvr
->vring
.len
;
1067 start
+= vvr
->vring
.len
;
1073 * Maps the device page and virtio rings to user space for readonly access.
1075 static int vop_mmap(struct file
*f
, struct vm_area_struct
*vma
)
1077 struct vop_vdev
*vdev
= f
->private_data
;
1078 unsigned long offset
= vma
->vm_pgoff
<< PAGE_SHIFT
;
1079 unsigned long pa
, size
= vma
->vm_end
- vma
->vm_start
, size_rem
= size
;
1082 err
= vop_vdev_inited(vdev
);
1085 if (vma
->vm_flags
& VM_WRITE
) {
1090 i
= vop_query_offset(vdev
, offset
, &size
, &pa
);
1095 err
= remap_pfn_range(vma
, vma
->vm_start
+ offset
,
1096 pa
>> PAGE_SHIFT
, size
,
1107 static const struct file_operations vop_fops
= {
1109 .release
= vop_release
,
1110 .unlocked_ioctl
= vop_ioctl
,
1113 .owner
= THIS_MODULE
,
1116 int vop_host_init(struct vop_info
*vi
)
1119 struct miscdevice
*mdev
;
1120 struct vop_device
*vpdev
= vi
->vpdev
;
1122 INIT_LIST_HEAD(&vi
->vdev_list
);
1123 vi
->dma_ch
= vpdev
->dma_ch
;
1124 mdev
= &vi
->miscdev
;
1125 mdev
->minor
= MISC_DYNAMIC_MINOR
;
1126 snprintf(vi
->name
, sizeof(vi
->name
), "vop_virtio%d", vpdev
->index
);
1127 mdev
->name
= vi
->name
;
1128 mdev
->fops
= &vop_fops
;
1129 mdev
->parent
= &vpdev
->dev
;
1131 rc
= misc_register(mdev
);
1133 dev_err(&vpdev
->dev
, "%s failed rc %d\n", __func__
, rc
);
1137 void vop_host_uninit(struct vop_info
*vi
)
1139 struct list_head
*pos
, *tmp
;
1140 struct vop_vdev
*vdev
;
1142 mutex_lock(&vi
->vop_mutex
);
1143 vop_virtio_reset_devices(vi
);
1144 list_for_each_safe(pos
, tmp
, &vi
->vdev_list
) {
1145 vdev
= list_entry(pos
, struct vop_vdev
, list
);
1147 reinit_completion(&vdev
->destroy
);
1148 mutex_unlock(&vi
->vop_mutex
);
1149 mutex_lock(&vdev
->vdev_mutex
);
1150 vop_virtio_del_device(vdev
);
1151 vdev
->deleted
= true;
1152 mutex_unlock(&vdev
->vdev_mutex
);
1153 complete(&vdev
->destroy
);
1154 mutex_lock(&vi
->vop_mutex
);
1156 mutex_unlock(&vi
->vop_mutex
);
1157 misc_deregister(&vi
->miscdev
);