2 * Intel MIC Platform Software Stack (MPSS)
4 * Copyright(c) 2016 Intel Corporation.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2, as
8 * published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
15 * The full GNU General Public License is included in this distribution in
16 * the file called "COPYING".
18 * Intel Virtio Over PCIe (VOP) driver.
21 #include <linux/sched.h>
22 #include <linux/poll.h>
23 #include <linux/dma-mapping.h>
25 #include <linux/mic_common.h>
26 #include "../common/mic_dev.h"
28 #include <linux/mic_ioctl.h>
31 /* Helper API to obtain the VOP PCIe device */
32 static inline struct device
*vop_dev(struct vop_vdev
*vdev
)
34 return vdev
->vpdev
->dev
.parent
;
37 /* Helper API to check if a virtio device is initialized */
38 static inline int vop_vdev_inited(struct vop_vdev
*vdev
)
42 /* Device has not been created yet */
43 if (!vdev
->dd
|| !vdev
->dd
->type
) {
44 dev_err(vop_dev(vdev
), "%s %d err %d\n",
45 __func__
, __LINE__
, -EINVAL
);
48 /* Device has been removed/deleted */
49 if (vdev
->dd
->type
== -1) {
50 dev_dbg(vop_dev(vdev
), "%s %d err %d\n",
51 __func__
, __LINE__
, -ENODEV
);
57 static void _vop_notify(struct vringh
*vrh
)
59 struct vop_vringh
*vvrh
= container_of(vrh
, struct vop_vringh
, vrh
);
60 struct vop_vdev
*vdev
= vvrh
->vdev
;
61 struct vop_device
*vpdev
= vdev
->vpdev
;
62 s8 db
= vdev
->dc
->h2c_vdev_db
;
65 vpdev
->hw_ops
->send_intr(vpdev
, db
);
68 static void vop_virtio_init_post(struct vop_vdev
*vdev
)
70 struct mic_vqconfig
*vqconfig
= mic_vq_config(vdev
->dd
);
71 struct vop_device
*vpdev
= vdev
->vpdev
;
74 for (i
= 0; i
< vdev
->dd
->num_vq
; i
++) {
75 used_size
= PAGE_ALIGN(sizeof(u16
) * 3 +
76 sizeof(struct vring_used_elem
) *
77 le16_to_cpu(vqconfig
->num
));
78 if (!le64_to_cpu(vqconfig
[i
].used_address
)) {
79 dev_warn(vop_dev(vdev
), "used_address zero??\n");
82 vdev
->vvr
[i
].vrh
.vring
.used
=
83 (void __force
*)vpdev
->hw_ops
->remap(
85 le64_to_cpu(vqconfig
[i
].used_address
),
89 vdev
->dc
->used_address_updated
= 0;
91 dev_info(vop_dev(vdev
), "%s: device type %d LINKUP\n",
92 __func__
, vdev
->virtio_id
);
95 static inline void vop_virtio_device_reset(struct vop_vdev
*vdev
)
99 dev_dbg(vop_dev(vdev
), "%s: status %d device type %d RESET\n",
100 __func__
, vdev
->dd
->status
, vdev
->virtio_id
);
102 for (i
= 0; i
< vdev
->dd
->num_vq
; i
++)
104 * Avoid lockdep false positive. The + 1 is for the vop
105 * mutex which is held in the reset devices code path.
107 mutex_lock_nested(&vdev
->vvr
[i
].vr_mutex
, i
+ 1);
109 /* 0 status means "reset" */
110 vdev
->dd
->status
= 0;
111 vdev
->dc
->vdev_reset
= 0;
112 vdev
->dc
->host_ack
= 1;
114 for (i
= 0; i
< vdev
->dd
->num_vq
; i
++) {
115 struct vringh
*vrh
= &vdev
->vvr
[i
].vrh
;
117 vdev
->vvr
[i
].vring
.info
->avail_idx
= 0;
119 vrh
->last_avail_idx
= 0;
120 vrh
->last_used_idx
= 0;
123 for (i
= 0; i
< vdev
->dd
->num_vq
; i
++)
124 mutex_unlock(&vdev
->vvr
[i
].vr_mutex
);
127 static void vop_virtio_reset_devices(struct vop_info
*vi
)
129 struct list_head
*pos
, *tmp
;
130 struct vop_vdev
*vdev
;
132 list_for_each_safe(pos
, tmp
, &vi
->vdev_list
) {
133 vdev
= list_entry(pos
, struct vop_vdev
, list
);
134 vop_virtio_device_reset(vdev
);
136 wake_up(&vdev
->waitq
);
140 static void vop_bh_handler(struct work_struct
*work
)
142 struct vop_vdev
*vdev
= container_of(work
, struct vop_vdev
,
145 if (vdev
->dc
->used_address_updated
)
146 vop_virtio_init_post(vdev
);
148 if (vdev
->dc
->vdev_reset
)
149 vop_virtio_device_reset(vdev
);
152 wake_up(&vdev
->waitq
);
155 static irqreturn_t
_vop_virtio_intr_handler(int irq
, void *data
)
157 struct vop_vdev
*vdev
= data
;
158 struct vop_device
*vpdev
= vdev
->vpdev
;
160 vpdev
->hw_ops
->ack_interrupt(vpdev
, vdev
->virtio_db
);
161 schedule_work(&vdev
->virtio_bh_work
);
165 static int vop_virtio_config_change(struct vop_vdev
*vdev
, void *argp
)
167 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake
);
168 int ret
= 0, retry
, i
;
169 struct vop_device
*vpdev
= vdev
->vpdev
;
170 struct vop_info
*vi
= dev_get_drvdata(&vpdev
->dev
);
171 struct mic_bootparam
*bootparam
= vpdev
->hw_ops
->get_dp(vpdev
);
172 s8 db
= bootparam
->h2c_config_db
;
174 mutex_lock(&vi
->vop_mutex
);
175 for (i
= 0; i
< vdev
->dd
->num_vq
; i
++)
176 mutex_lock_nested(&vdev
->vvr
[i
].vr_mutex
, i
+ 1);
178 if (db
== -1 || vdev
->dd
->type
== -1) {
183 memcpy(mic_vq_configspace(vdev
->dd
), argp
, vdev
->dd
->config_len
);
184 vdev
->dc
->config_change
= MIC_VIRTIO_PARAM_CONFIG_CHANGED
;
185 vpdev
->hw_ops
->send_intr(vpdev
, db
);
187 for (retry
= 100; retry
--;) {
188 ret
= wait_event_timeout(wake
, vdev
->dc
->guest_ack
,
189 msecs_to_jiffies(100));
194 dev_dbg(vop_dev(vdev
),
195 "%s %d retry: %d\n", __func__
, __LINE__
, retry
);
196 vdev
->dc
->config_change
= 0;
197 vdev
->dc
->guest_ack
= 0;
199 for (i
= 0; i
< vdev
->dd
->num_vq
; i
++)
200 mutex_unlock(&vdev
->vvr
[i
].vr_mutex
);
201 mutex_unlock(&vi
->vop_mutex
);
205 static int vop_copy_dp_entry(struct vop_vdev
*vdev
,
206 struct mic_device_desc
*argp
, __u8
*type
,
207 struct mic_device_desc
**devpage
)
209 struct vop_device
*vpdev
= vdev
->vpdev
;
210 struct mic_device_desc
*devp
;
211 struct mic_vqconfig
*vqconfig
;
213 bool slot_found
= false;
215 vqconfig
= mic_vq_config(argp
);
216 for (i
= 0; i
< argp
->num_vq
; i
++) {
217 if (le16_to_cpu(vqconfig
[i
].num
) > MIC_MAX_VRING_ENTRIES
) {
219 dev_err(vop_dev(vdev
), "%s %d err %d\n",
220 __func__
, __LINE__
, ret
);
225 /* Find the first free device page entry */
226 for (i
= sizeof(struct mic_bootparam
);
227 i
< MIC_DP_SIZE
- mic_total_desc_size(argp
);
228 i
+= mic_total_desc_size(devp
)) {
229 devp
= vpdev
->hw_ops
->get_dp(vpdev
) + i
;
230 if (devp
->type
== 0 || devp
->type
== -1) {
237 dev_err(vop_dev(vdev
), "%s %d err %d\n",
238 __func__
, __LINE__
, ret
);
242 * Save off the type before doing the memcpy. Type will be set in the
243 * end after completing all initialization for the new device.
247 memcpy(devp
, argp
, mic_desc_size(argp
));
254 static void vop_init_device_ctrl(struct vop_vdev
*vdev
,
255 struct mic_device_desc
*devpage
)
257 struct mic_device_ctrl
*dc
;
259 dc
= (void *)devpage
+ mic_aligned_desc_size(devpage
);
261 dc
->config_change
= 0;
265 dc
->used_address_updated
= 0;
266 dc
->c2h_vdev_db
= -1;
267 dc
->h2c_vdev_db
= -1;
271 static int vop_virtio_add_device(struct vop_vdev
*vdev
,
272 struct mic_device_desc
*argp
)
274 struct vop_info
*vi
= vdev
->vi
;
275 struct vop_device
*vpdev
= vi
->vpdev
;
276 struct mic_device_desc
*dd
= NULL
;
277 struct mic_vqconfig
*vqconfig
;
278 int vr_size
, i
, j
, ret
;
282 struct mic_bootparam
*bootparam
;
286 bootparam
= vpdev
->hw_ops
->get_dp(vpdev
);
287 init_waitqueue_head(&vdev
->waitq
);
288 INIT_LIST_HEAD(&vdev
->list
);
291 ret
= vop_copy_dp_entry(vdev
, argp
, &type
, &dd
);
293 dev_err(vop_dev(vdev
), "%s %d err %d\n",
294 __func__
, __LINE__
, ret
);
298 vop_init_device_ctrl(vdev
, dd
);
301 vdev
->virtio_id
= type
;
302 vqconfig
= mic_vq_config(dd
);
303 INIT_WORK(&vdev
->virtio_bh_work
, vop_bh_handler
);
305 for (i
= 0; i
< dd
->num_vq
; i
++) {
306 struct vop_vringh
*vvr
= &vdev
->vvr
[i
];
307 struct mic_vring
*vr
= &vdev
->vvr
[i
].vring
;
309 num
= le16_to_cpu(vqconfig
[i
].num
);
310 mutex_init(&vvr
->vr_mutex
);
311 vr_size
= PAGE_ALIGN(vring_size(num
, MIC_VIRTIO_RING_ALIGN
) +
312 sizeof(struct _mic_vring_info
));
314 __get_free_pages(GFP_KERNEL
| __GFP_ZERO
,
318 dev_err(vop_dev(vdev
), "%s %d err %d\n",
319 __func__
, __LINE__
, ret
);
323 vr
->info
= vr
->va
+ vring_size(num
, MIC_VIRTIO_RING_ALIGN
);
324 vr
->info
->magic
= cpu_to_le32(MIC_MAGIC
+ vdev
->virtio_id
+ i
);
325 vr_addr
= dma_map_single(&vpdev
->dev
, vr
->va
, vr_size
,
327 if (dma_mapping_error(&vpdev
->dev
, vr_addr
)) {
328 free_pages((unsigned long)vr
->va
, get_order(vr_size
));
330 dev_err(vop_dev(vdev
), "%s %d err %d\n",
331 __func__
, __LINE__
, ret
);
334 vqconfig
[i
].address
= cpu_to_le64(vr_addr
);
336 vring_init(&vr
->vr
, num
, vr
->va
, MIC_VIRTIO_RING_ALIGN
);
337 ret
= vringh_init_kern(&vvr
->vrh
,
338 *(u32
*)mic_vq_features(vdev
->dd
),
339 num
, false, vr
->vr
.desc
, vr
->vr
.avail
,
342 dev_err(vop_dev(vdev
), "%s %d err %d\n",
343 __func__
, __LINE__
, ret
);
346 vringh_kiov_init(&vvr
->riov
, NULL
, 0);
347 vringh_kiov_init(&vvr
->wiov
, NULL
, 0);
348 vvr
->head
= USHRT_MAX
;
350 vvr
->vrh
.notify
= _vop_notify
;
352 "%s %d index %d va %p info %p vr_size 0x%x\n",
353 __func__
, __LINE__
, i
, vr
->va
, vr
->info
, vr_size
);
354 vvr
->buf
= (void *)__get_free_pages(GFP_KERNEL
,
355 get_order(VOP_INT_DMA_BUF_SIZE
));
356 vvr
->buf_da
= dma_map_single(&vpdev
->dev
,
357 vvr
->buf
, VOP_INT_DMA_BUF_SIZE
,
361 snprintf(irqname
, sizeof(irqname
), "vop%dvirtio%d", vpdev
->index
,
363 vdev
->virtio_db
= vpdev
->hw_ops
->next_db(vpdev
);
364 vdev
->virtio_cookie
= vpdev
->hw_ops
->request_irq(vpdev
,
365 _vop_virtio_intr_handler
, irqname
, vdev
,
367 if (IS_ERR(vdev
->virtio_cookie
)) {
368 ret
= PTR_ERR(vdev
->virtio_cookie
);
369 dev_dbg(&vpdev
->dev
, "request irq failed\n");
373 vdev
->dc
->c2h_vdev_db
= vdev
->virtio_db
;
376 * Order the type update with previous stores. This write barrier
377 * is paired with the corresponding read barrier before the uncached
378 * system memory read of the type, on the card while scanning the
386 db
= bootparam
->h2c_config_db
;
388 vpdev
->hw_ops
->send_intr(vpdev
, db
);
390 dev_dbg(&vpdev
->dev
, "Added virtio id %d db %d\n", dd
->type
, db
);
393 vqconfig
= mic_vq_config(dd
);
394 for (j
= 0; j
< i
; j
++) {
395 struct vop_vringh
*vvr
= &vdev
->vvr
[j
];
397 dma_unmap_single(&vpdev
->dev
, le64_to_cpu(vqconfig
[j
].address
),
398 vvr
->vring
.len
, DMA_BIDIRECTIONAL
);
399 free_pages((unsigned long)vvr
->vring
.va
,
400 get_order(vvr
->vring
.len
));
405 static void vop_dev_remove(struct vop_info
*pvi
, struct mic_device_ctrl
*devp
,
406 struct vop_device
*vpdev
)
408 struct mic_bootparam
*bootparam
= vpdev
->hw_ops
->get_dp(vpdev
);
411 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake
);
413 devp
->config_change
= MIC_VIRTIO_PARAM_DEV_REMOVE
;
414 db
= bootparam
->h2c_config_db
;
416 vpdev
->hw_ops
->send_intr(vpdev
, db
);
419 for (retry
= 15; retry
--;) {
420 ret
= wait_event_timeout(wake
, devp
->guest_ack
,
421 msecs_to_jiffies(1000));
426 devp
->config_change
= 0;
430 static void vop_virtio_del_device(struct vop_vdev
*vdev
)
432 struct vop_info
*vi
= vdev
->vi
;
433 struct vop_device
*vpdev
= vdev
->vpdev
;
435 struct mic_vqconfig
*vqconfig
;
436 struct mic_bootparam
*bootparam
= vpdev
->hw_ops
->get_dp(vpdev
);
439 goto skip_hot_remove
;
440 vop_dev_remove(vi
, vdev
->dc
, vpdev
);
442 vpdev
->hw_ops
->free_irq(vpdev
, vdev
->virtio_cookie
, vdev
);
443 flush_work(&vdev
->virtio_bh_work
);
444 vqconfig
= mic_vq_config(vdev
->dd
);
445 for (i
= 0; i
< vdev
->dd
->num_vq
; i
++) {
446 struct vop_vringh
*vvr
= &vdev
->vvr
[i
];
448 dma_unmap_single(&vpdev
->dev
,
449 vvr
->buf_da
, VOP_INT_DMA_BUF_SIZE
,
451 free_pages((unsigned long)vvr
->buf
,
452 get_order(VOP_INT_DMA_BUF_SIZE
));
453 vringh_kiov_cleanup(&vvr
->riov
);
454 vringh_kiov_cleanup(&vvr
->wiov
);
455 dma_unmap_single(&vpdev
->dev
, le64_to_cpu(vqconfig
[i
].address
),
456 vvr
->vring
.len
, DMA_BIDIRECTIONAL
);
457 free_pages((unsigned long)vvr
->vring
.va
,
458 get_order(vvr
->vring
.len
));
461 * Order the type update with previous stores. This write barrier
462 * is paired with the corresponding read barrier before the uncached
463 * system memory read of the type, on the card while scanning the
471 * vop_sync_dma - Wrapper for synchronous DMAs.
473 * @dev - The address of the pointer to the device instance used
474 * for DMA registration.
475 * @dst - destination DMA address.
476 * @src - source DMA address.
477 * @len - size of the transfer.
479 * Return DMA_SUCCESS on success
481 static int vop_sync_dma(struct vop_vdev
*vdev
, dma_addr_t dst
, dma_addr_t src
,
485 struct dma_device
*ddev
;
486 struct dma_async_tx_descriptor
*tx
;
487 struct vop_info
*vi
= dev_get_drvdata(&vdev
->vpdev
->dev
);
488 struct dma_chan
*vop_ch
= vi
->dma_ch
;
494 ddev
= vop_ch
->device
;
495 tx
= ddev
->device_prep_dma_memcpy(vop_ch
, dst
, src
, len
,
503 cookie
= tx
->tx_submit(tx
);
504 if (dma_submit_error(cookie
)) {
508 dma_async_issue_pending(vop_ch
);
509 err
= dma_sync_wait(vop_ch
, cookie
);
513 dev_err(&vi
->vpdev
->dev
, "%s %d err %d\n",
514 __func__
, __LINE__
, err
);
518 #define VOP_USE_DMA true
521 * Initiates the copies across the PCIe bus from card memory to a user
522 * space buffer. When transfers are done using DMA, source/destination
523 * addresses and transfer length must follow the alignment requirements of
524 * the MIC DMA engine.
526 static int vop_virtio_copy_to_user(struct vop_vdev
*vdev
, void __user
*ubuf
,
527 size_t len
, u64 daddr
, size_t dlen
,
530 struct vop_device
*vpdev
= vdev
->vpdev
;
531 void __iomem
*dbuf
= vpdev
->hw_ops
->remap(vpdev
, daddr
, len
);
532 struct vop_vringh
*vvr
= &vdev
->vvr
[vr_idx
];
533 struct vop_info
*vi
= dev_get_drvdata(&vpdev
->dev
);
534 size_t dma_alignment
;
536 size_t dma_offset
, partlen
;
539 if (!VOP_USE_DMA
|| !vi
->dma_ch
) {
540 if (copy_to_user(ubuf
, (void __force
*)dbuf
, len
)) {
542 dev_err(vop_dev(vdev
), "%s %d err %d\n",
543 __func__
, __LINE__
, err
);
546 vdev
->in_bytes
+= len
;
551 dma_alignment
= 1 << vi
->dma_ch
->device
->copy_align
;
552 x200
= is_dma_copy_aligned(vi
->dma_ch
->device
, 1, 1, 1);
554 dma_offset
= daddr
- round_down(daddr
, dma_alignment
);
558 * X100 uses DMA addresses as seen by the card so adding
559 * the aperture base is not required for DMA. However x200
560 * requires DMA addresses to be an offset into the bar so
561 * add the aperture base for x200.
564 daddr
+= vpdev
->aper
->pa
;
566 partlen
= min_t(size_t, len
, VOP_INT_DMA_BUF_SIZE
);
567 err
= vop_sync_dma(vdev
, vvr
->buf_da
, daddr
,
568 ALIGN(partlen
, dma_alignment
));
570 dev_err(vop_dev(vdev
), "%s %d err %d\n",
571 __func__
, __LINE__
, err
);
574 if (copy_to_user(ubuf
, vvr
->buf
+ dma_offset
,
575 partlen
- dma_offset
)) {
577 dev_err(vop_dev(vdev
), "%s %d err %d\n",
578 __func__
, __LINE__
, err
);
584 vdev
->in_bytes_dma
+= partlen
;
585 vdev
->in_bytes
+= partlen
;
591 vpdev
->hw_ops
->unmap(vpdev
, dbuf
);
592 dev_dbg(vop_dev(vdev
),
593 "%s: ubuf %p dbuf %p len 0x%zx vr_idx 0x%x\n",
594 __func__
, ubuf
, dbuf
, len
, vr_idx
);
599 * Initiates copies across the PCIe bus from a user space buffer to card
600 * memory. When transfers are done using DMA, source/destination addresses
601 * and transfer length must follow the alignment requirements of the MIC
604 static int vop_virtio_copy_from_user(struct vop_vdev
*vdev
, void __user
*ubuf
,
605 size_t len
, u64 daddr
, size_t dlen
,
608 struct vop_device
*vpdev
= vdev
->vpdev
;
609 void __iomem
*dbuf
= vpdev
->hw_ops
->remap(vpdev
, daddr
, len
);
610 struct vop_vringh
*vvr
= &vdev
->vvr
[vr_idx
];
611 struct vop_info
*vi
= dev_get_drvdata(&vdev
->vpdev
->dev
);
612 size_t dma_alignment
;
615 bool dma
= VOP_USE_DMA
&& vi
->dma_ch
;
619 dma_alignment
= 1 << vi
->dma_ch
->device
->copy_align
;
620 x200
= is_dma_copy_aligned(vi
->dma_ch
->device
, 1, 1, 1);
622 if (daddr
& (dma_alignment
- 1)) {
623 vdev
->tx_dst_unaligned
+= len
;
625 } else if (ALIGN(len
, dma_alignment
) > dlen
) {
626 vdev
->tx_len_unaligned
+= len
;
635 * X100 uses DMA addresses as seen by the card so adding
636 * the aperture base is not required for DMA. However x200
637 * requires DMA addresses to be an offset into the bar so
638 * add the aperture base for x200.
641 daddr
+= vpdev
->aper
->pa
;
643 partlen
= min_t(size_t, len
, VOP_INT_DMA_BUF_SIZE
);
645 if (copy_from_user(vvr
->buf
, ubuf
, partlen
)) {
647 dev_err(vop_dev(vdev
), "%s %d err %d\n",
648 __func__
, __LINE__
, err
);
651 err
= vop_sync_dma(vdev
, daddr
, vvr
->buf_da
,
652 ALIGN(partlen
, dma_alignment
));
654 dev_err(vop_dev(vdev
), "%s %d err %d\n",
655 __func__
, __LINE__
, err
);
661 vdev
->out_bytes_dma
+= partlen
;
662 vdev
->out_bytes
+= partlen
;
667 * We are copying to IO below and should ideally use something
668 * like copy_from_user_toio(..) if it existed.
670 if (copy_from_user((void __force
*)dbuf
, ubuf
, len
)) {
672 dev_err(vop_dev(vdev
), "%s %d err %d\n",
673 __func__
, __LINE__
, err
);
676 vdev
->out_bytes
+= len
;
679 vpdev
->hw_ops
->unmap(vpdev
, dbuf
);
680 dev_dbg(vop_dev(vdev
),
681 "%s: ubuf %p dbuf %p len 0x%zx vr_idx 0x%x\n",
682 __func__
, ubuf
, dbuf
, len
, vr_idx
);
686 #define MIC_VRINGH_READ true
688 /* Determine the total number of bytes consumed in a VRINGH KIOV */
689 static inline u32
vop_vringh_iov_consumed(struct vringh_kiov
*iov
)
692 u32 total
= iov
->consumed
;
694 for (i
= 0; i
< iov
->i
; i
++)
695 total
+= iov
->iov
[i
].iov_len
;
700 * Traverse the VRINGH KIOV and issue the APIs to trigger the copies.
701 * This API is heavily based on the vringh_iov_xfer(..) implementation
702 * in vringh.c. The reason we cannot reuse vringh_iov_pull_kern(..)
703 * and vringh_iov_push_kern(..) directly is because there is no
704 * way to override the VRINGH xfer(..) routines as of v3.10.
706 static int vop_vringh_copy(struct vop_vdev
*vdev
, struct vringh_kiov
*iov
,
707 void __user
*ubuf
, size_t len
, bool read
, int vr_idx
,
711 size_t partlen
, tot_len
= 0;
713 while (len
&& iov
->i
< iov
->used
) {
714 struct kvec
*kiov
= &iov
->iov
[iov
->i
];
715 unsigned long daddr
= (unsigned long)kiov
->iov_base
;
717 partlen
= min(kiov
->iov_len
, len
);
719 ret
= vop_virtio_copy_to_user(vdev
, ubuf
, partlen
,
724 ret
= vop_virtio_copy_from_user(vdev
, ubuf
, partlen
,
729 dev_err(vop_dev(vdev
), "%s %d err %d\n",
730 __func__
, __LINE__
, ret
);
736 iov
->consumed
+= partlen
;
737 kiov
->iov_len
-= partlen
;
738 kiov
->iov_base
+= partlen
;
739 if (!kiov
->iov_len
) {
740 /* Fix up old iov element then increment. */
741 kiov
->iov_len
= iov
->consumed
;
742 kiov
->iov_base
-= iov
->consumed
;
753 * Use the standard VRINGH infrastructure in the kernel to fetch new
754 * descriptors, initiate the copies and update the used ring.
756 static int _vop_virtio_copy(struct vop_vdev
*vdev
, struct mic_copy_desc
*copy
)
759 u32 iovcnt
= copy
->iovcnt
;
761 struct iovec __user
*u_iov
= copy
->iov
;
762 void __user
*ubuf
= NULL
;
763 struct vop_vringh
*vvr
= &vdev
->vvr
[copy
->vr_idx
];
764 struct vringh_kiov
*riov
= &vvr
->riov
;
765 struct vringh_kiov
*wiov
= &vvr
->wiov
;
766 struct vringh
*vrh
= &vvr
->vrh
;
767 u16
*head
= &vvr
->head
;
768 struct mic_vring
*vr
= &vvr
->vring
;
769 size_t len
= 0, out_len
;
772 /* Fetch a new IOVEC if all previous elements have been processed */
773 if (riov
->i
== riov
->used
&& wiov
->i
== wiov
->used
) {
774 ret
= vringh_getdesc_kern(vrh
, riov
, wiov
,
776 /* Check if there are available descriptors */
782 /* Copy over a new iovec from user space. */
783 ret
= copy_from_user(&iov
, u_iov
, sizeof(*u_iov
));
786 dev_err(vop_dev(vdev
), "%s %d err %d\n",
787 __func__
, __LINE__
, ret
);
793 /* Issue all the read descriptors first */
794 ret
= vop_vringh_copy(vdev
, riov
, ubuf
, len
,
795 MIC_VRINGH_READ
, copy
->vr_idx
, &out_len
);
797 dev_err(vop_dev(vdev
), "%s %d err %d\n",
798 __func__
, __LINE__
, ret
);
803 copy
->out_len
+= out_len
;
804 /* Issue the write descriptors next */
805 ret
= vop_vringh_copy(vdev
, wiov
, ubuf
, len
,
806 !MIC_VRINGH_READ
, copy
->vr_idx
, &out_len
);
808 dev_err(vop_dev(vdev
), "%s %d err %d\n",
809 __func__
, __LINE__
, ret
);
814 copy
->out_len
+= out_len
;
816 /* One user space iovec is now completed */
820 /* Exit loop if all elements in KIOVs have been processed. */
821 if (riov
->i
== riov
->used
&& wiov
->i
== wiov
->used
)
825 * Update the used ring if a descriptor was available and some data was
826 * copied in/out and the user asked for a used ring update.
828 if (*head
!= USHRT_MAX
&& copy
->out_len
&& copy
->update_used
) {
831 /* Determine the total data consumed */
832 total
+= vop_vringh_iov_consumed(riov
);
833 total
+= vop_vringh_iov_consumed(wiov
);
834 vringh_complete_kern(vrh
, *head
, total
);
836 if (vringh_need_notify_kern(vrh
) > 0)
838 vringh_kiov_cleanup(riov
);
839 vringh_kiov_cleanup(wiov
);
840 /* Update avail idx for user space */
841 vr
->info
->avail_idx
= vrh
->last_avail_idx
;
846 static inline int vop_verify_copy_args(struct vop_vdev
*vdev
,
847 struct mic_copy_desc
*copy
)
849 if (!vdev
|| copy
->vr_idx
>= vdev
->dd
->num_vq
)
854 /* Copy a specified number of virtio descriptors in a chain */
855 static int vop_virtio_copy_desc(struct vop_vdev
*vdev
,
856 struct mic_copy_desc
*copy
)
859 struct vop_vringh
*vvr
;
861 err
= vop_verify_copy_args(vdev
, copy
);
865 vvr
= &vdev
->vvr
[copy
->vr_idx
];
866 mutex_lock(&vvr
->vr_mutex
);
867 if (!vop_vdevup(vdev
)) {
869 dev_err(vop_dev(vdev
), "%s %d err %d\n",
870 __func__
, __LINE__
, err
);
873 err
= _vop_virtio_copy(vdev
, copy
);
875 dev_err(vop_dev(vdev
), "%s %d err %d\n",
876 __func__
, __LINE__
, err
);
879 mutex_unlock(&vvr
->vr_mutex
);
883 static int vop_open(struct inode
*inode
, struct file
*f
)
885 struct vop_vdev
*vdev
;
886 struct vop_info
*vi
= container_of(f
->private_data
,
887 struct vop_info
, miscdev
);
889 vdev
= kzalloc(sizeof(*vdev
), GFP_KERNEL
);
893 mutex_init(&vdev
->vdev_mutex
);
894 f
->private_data
= vdev
;
895 init_completion(&vdev
->destroy
);
896 complete(&vdev
->destroy
);
900 static int vop_release(struct inode
*inode
, struct file
*f
)
902 struct vop_vdev
*vdev
= f
->private_data
, *vdev_tmp
;
903 struct vop_info
*vi
= vdev
->vi
;
904 struct list_head
*pos
, *tmp
;
907 mutex_lock(&vdev
->vdev_mutex
);
910 mutex_lock(&vi
->vop_mutex
);
911 list_for_each_safe(pos
, tmp
, &vi
->vdev_list
) {
912 vdev_tmp
= list_entry(pos
, struct vop_vdev
, list
);
913 if (vdev
== vdev_tmp
) {
914 vop_virtio_del_device(vdev
);
920 mutex_unlock(&vi
->vop_mutex
);
922 mutex_unlock(&vdev
->vdev_mutex
);
924 wait_for_completion(&vdev
->destroy
);
925 f
->private_data
= NULL
;
930 static long vop_ioctl(struct file
*f
, unsigned int cmd
, unsigned long arg
)
932 struct vop_vdev
*vdev
= f
->private_data
;
933 struct vop_info
*vi
= vdev
->vi
;
934 void __user
*argp
= (void __user
*)arg
;
938 case MIC_VIRTIO_ADD_DEVICE
:
940 struct mic_device_desc dd
, *dd_config
;
942 if (copy_from_user(&dd
, argp
, sizeof(dd
)))
945 if (mic_aligned_desc_size(&dd
) > MIC_MAX_DESC_BLK_SIZE
||
946 dd
.num_vq
> MIC_MAX_VRINGS
)
949 dd_config
= memdup_user(argp
, mic_desc_size(&dd
));
950 if (IS_ERR(dd_config
))
951 return PTR_ERR(dd_config
);
953 /* Ensure desc has not changed between the two reads */
954 if (memcmp(&dd
, dd_config
, sizeof(dd
))) {
958 mutex_lock(&vdev
->vdev_mutex
);
959 mutex_lock(&vi
->vop_mutex
);
960 ret
= vop_virtio_add_device(vdev
, dd_config
);
963 list_add_tail(&vdev
->list
, &vi
->vdev_list
);
965 mutex_unlock(&vi
->vop_mutex
);
966 mutex_unlock(&vdev
->vdev_mutex
);
971 case MIC_VIRTIO_COPY_DESC
:
973 struct mic_copy_desc copy
;
975 mutex_lock(&vdev
->vdev_mutex
);
976 ret
= vop_vdev_inited(vdev
);
980 if (copy_from_user(©
, argp
, sizeof(copy
))) {
985 ret
= vop_virtio_copy_desc(vdev
, ©
);
989 &((struct mic_copy_desc __user
*)argp
)->out_len
,
990 ©
.out_len
, sizeof(copy
.out_len
)))
993 mutex_unlock(&vdev
->vdev_mutex
);
996 case MIC_VIRTIO_CONFIG_CHANGE
:
1000 mutex_lock(&vdev
->vdev_mutex
);
1001 ret
= vop_vdev_inited(vdev
);
1004 buf
= memdup_user(argp
, vdev
->dd
->config_len
);
1009 ret
= vop_virtio_config_change(vdev
, buf
);
1012 mutex_unlock(&vdev
->vdev_mutex
);
1016 return -ENOIOCTLCMD
;
1022 * We return EPOLLIN | EPOLLOUT from poll when new buffers are enqueued, and
1023 * not when previously enqueued buffers may be available. This means that
1024 * in the card->host (TX) path, when userspace is unblocked by poll it
1025 * must drain all available descriptors or it can stall.
1027 static __poll_t
vop_poll(struct file
*f
, poll_table
*wait
)
1029 struct vop_vdev
*vdev
= f
->private_data
;
1032 mutex_lock(&vdev
->vdev_mutex
);
1033 if (vop_vdev_inited(vdev
)) {
1037 poll_wait(f
, &vdev
->waitq
, wait
);
1038 if (vop_vdev_inited(vdev
)) {
1040 } else if (vdev
->poll_wake
) {
1041 vdev
->poll_wake
= 0;
1042 mask
= EPOLLIN
| EPOLLOUT
;
1045 mutex_unlock(&vdev
->vdev_mutex
);
1050 vop_query_offset(struct vop_vdev
*vdev
, unsigned long offset
,
1051 unsigned long *size
, unsigned long *pa
)
1053 struct vop_device
*vpdev
= vdev
->vpdev
;
1054 unsigned long start
= MIC_DP_SIZE
;
1058 * MMAP interface is as follows:
1060 * 0x0 virtio device_page
1061 * 0x1000 first vring
1062 * 0x1000 + size of 1st vring second vring
1066 *pa
= virt_to_phys(vpdev
->hw_ops
->get_dp(vpdev
));
1067 *size
= MIC_DP_SIZE
;
1071 for (i
= 0; i
< vdev
->dd
->num_vq
; i
++) {
1072 struct vop_vringh
*vvr
= &vdev
->vvr
[i
];
1074 if (offset
== start
) {
1075 *pa
= virt_to_phys(vvr
->vring
.va
);
1076 *size
= vvr
->vring
.len
;
1079 start
+= vvr
->vring
.len
;
1085 * Maps the device page and virtio rings to user space for readonly access.
1087 static int vop_mmap(struct file
*f
, struct vm_area_struct
*vma
)
1089 struct vop_vdev
*vdev
= f
->private_data
;
1090 unsigned long offset
= vma
->vm_pgoff
<< PAGE_SHIFT
;
1091 unsigned long pa
, size
= vma
->vm_end
- vma
->vm_start
, size_rem
= size
;
1094 err
= vop_vdev_inited(vdev
);
1097 if (vma
->vm_flags
& VM_WRITE
) {
1102 i
= vop_query_offset(vdev
, offset
, &size
, &pa
);
1107 err
= remap_pfn_range(vma
, vma
->vm_start
+ offset
,
1108 pa
>> PAGE_SHIFT
, size
,
1119 static const struct file_operations vop_fops
= {
1121 .release
= vop_release
,
1122 .unlocked_ioctl
= vop_ioctl
,
1125 .owner
= THIS_MODULE
,
1128 int vop_host_init(struct vop_info
*vi
)
1131 struct miscdevice
*mdev
;
1132 struct vop_device
*vpdev
= vi
->vpdev
;
1134 INIT_LIST_HEAD(&vi
->vdev_list
);
1135 vi
->dma_ch
= vpdev
->dma_ch
;
1136 mdev
= &vi
->miscdev
;
1137 mdev
->minor
= MISC_DYNAMIC_MINOR
;
1138 snprintf(vi
->name
, sizeof(vi
->name
), "vop_virtio%d", vpdev
->index
);
1139 mdev
->name
= vi
->name
;
1140 mdev
->fops
= &vop_fops
;
1141 mdev
->parent
= &vpdev
->dev
;
1143 rc
= misc_register(mdev
);
1145 dev_err(&vpdev
->dev
, "%s failed rc %d\n", __func__
, rc
);
1149 void vop_host_uninit(struct vop_info
*vi
)
1151 struct list_head
*pos
, *tmp
;
1152 struct vop_vdev
*vdev
;
1154 mutex_lock(&vi
->vop_mutex
);
1155 vop_virtio_reset_devices(vi
);
1156 list_for_each_safe(pos
, tmp
, &vi
->vdev_list
) {
1157 vdev
= list_entry(pos
, struct vop_vdev
, list
);
1159 reinit_completion(&vdev
->destroy
);
1160 mutex_unlock(&vi
->vop_mutex
);
1161 mutex_lock(&vdev
->vdev_mutex
);
1162 vop_virtio_del_device(vdev
);
1163 vdev
->deleted
= true;
1164 mutex_unlock(&vdev
->vdev_mutex
);
1165 complete(&vdev
->destroy
);
1166 mutex_lock(&vi
->vop_mutex
);
1168 mutex_unlock(&vi
->vop_mutex
);
1169 misc_deregister(&vi
->miscdev
);