2 * Intel MIC Platform Software Stack (MPSS)
4 * Copyright(c) 2016 Intel Corporation.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2, as
8 * published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
15 * The full GNU General Public License is included in this distribution in
16 * the file called "COPYING".
18 * Intel Virtio Over PCIe (VOP) driver.
21 #include <linux/sched.h>
22 #include <linux/poll.h>
23 #include <linux/dma-mapping.h>
25 #include <linux/mic_common.h>
26 #include "../common/mic_dev.h"
28 #include <linux/mic_ioctl.h>
31 /* Helper API to obtain the VOP PCIe device */
32 static inline struct device
*vop_dev(struct vop_vdev
*vdev
)
34 return vdev
->vpdev
->dev
.parent
;
37 /* Helper API to check if a virtio device is initialized */
38 static inline int vop_vdev_inited(struct vop_vdev
*vdev
)
42 /* Device has not been created yet */
43 if (!vdev
->dd
|| !vdev
->dd
->type
) {
44 dev_err(vop_dev(vdev
), "%s %d err %d\n",
45 __func__
, __LINE__
, -EINVAL
);
48 /* Device has been removed/deleted */
49 if (vdev
->dd
->type
== -1) {
50 dev_dbg(vop_dev(vdev
), "%s %d err %d\n",
51 __func__
, __LINE__
, -ENODEV
);
57 static void _vop_notify(struct vringh
*vrh
)
59 struct vop_vringh
*vvrh
= container_of(vrh
, struct vop_vringh
, vrh
);
60 struct vop_vdev
*vdev
= vvrh
->vdev
;
61 struct vop_device
*vpdev
= vdev
->vpdev
;
62 s8 db
= vdev
->dc
->h2c_vdev_db
;
65 vpdev
->hw_ops
->send_intr(vpdev
, db
);
68 static void vop_virtio_init_post(struct vop_vdev
*vdev
)
70 struct mic_vqconfig
*vqconfig
= mic_vq_config(vdev
->dd
);
71 struct vop_device
*vpdev
= vdev
->vpdev
;
74 for (i
= 0; i
< vdev
->dd
->num_vq
; i
++) {
75 used_size
= PAGE_ALIGN(sizeof(u16
) * 3 +
76 sizeof(struct vring_used_elem
) *
77 le16_to_cpu(vqconfig
->num
));
78 if (!le64_to_cpu(vqconfig
[i
].used_address
)) {
79 dev_warn(vop_dev(vdev
), "used_address zero??\n");
82 vdev
->vvr
[i
].vrh
.vring
.used
=
83 (void __force
*)vpdev
->hw_ops
->ioremap(
85 le64_to_cpu(vqconfig
[i
].used_address
),
89 vdev
->dc
->used_address_updated
= 0;
91 dev_info(vop_dev(vdev
), "%s: device type %d LINKUP\n",
92 __func__
, vdev
->virtio_id
);
95 static inline void vop_virtio_device_reset(struct vop_vdev
*vdev
)
99 dev_dbg(vop_dev(vdev
), "%s: status %d device type %d RESET\n",
100 __func__
, vdev
->dd
->status
, vdev
->virtio_id
);
102 for (i
= 0; i
< vdev
->dd
->num_vq
; i
++)
104 * Avoid lockdep false positive. The + 1 is for the vop
105 * mutex which is held in the reset devices code path.
107 mutex_lock_nested(&vdev
->vvr
[i
].vr_mutex
, i
+ 1);
109 /* 0 status means "reset" */
110 vdev
->dd
->status
= 0;
111 vdev
->dc
->vdev_reset
= 0;
112 vdev
->dc
->host_ack
= 1;
114 for (i
= 0; i
< vdev
->dd
->num_vq
; i
++) {
115 struct vringh
*vrh
= &vdev
->vvr
[i
].vrh
;
117 vdev
->vvr
[i
].vring
.info
->avail_idx
= 0;
119 vrh
->last_avail_idx
= 0;
120 vrh
->last_used_idx
= 0;
123 for (i
= 0; i
< vdev
->dd
->num_vq
; i
++)
124 mutex_unlock(&vdev
->vvr
[i
].vr_mutex
);
127 static void vop_virtio_reset_devices(struct vop_info
*vi
)
129 struct list_head
*pos
, *tmp
;
130 struct vop_vdev
*vdev
;
132 list_for_each_safe(pos
, tmp
, &vi
->vdev_list
) {
133 vdev
= list_entry(pos
, struct vop_vdev
, list
);
134 vop_virtio_device_reset(vdev
);
136 wake_up(&vdev
->waitq
);
140 static void vop_bh_handler(struct work_struct
*work
)
142 struct vop_vdev
*vdev
= container_of(work
, struct vop_vdev
,
145 if (vdev
->dc
->used_address_updated
)
146 vop_virtio_init_post(vdev
);
148 if (vdev
->dc
->vdev_reset
)
149 vop_virtio_device_reset(vdev
);
152 wake_up(&vdev
->waitq
);
155 static irqreturn_t
_vop_virtio_intr_handler(int irq
, void *data
)
157 struct vop_vdev
*vdev
= data
;
158 struct vop_device
*vpdev
= vdev
->vpdev
;
160 vpdev
->hw_ops
->ack_interrupt(vpdev
, vdev
->virtio_db
);
161 schedule_work(&vdev
->virtio_bh_work
);
165 static int vop_virtio_config_change(struct vop_vdev
*vdev
, void *argp
)
167 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake
);
168 int ret
= 0, retry
, i
;
169 struct vop_device
*vpdev
= vdev
->vpdev
;
170 struct vop_info
*vi
= dev_get_drvdata(&vpdev
->dev
);
171 struct mic_bootparam
*bootparam
= vpdev
->hw_ops
->get_dp(vpdev
);
172 s8 db
= bootparam
->h2c_config_db
;
174 mutex_lock(&vi
->vop_mutex
);
175 for (i
= 0; i
< vdev
->dd
->num_vq
; i
++)
176 mutex_lock_nested(&vdev
->vvr
[i
].vr_mutex
, i
+ 1);
178 if (db
== -1 || vdev
->dd
->type
== -1) {
183 memcpy(mic_vq_configspace(vdev
->dd
), argp
, vdev
->dd
->config_len
);
184 vdev
->dc
->config_change
= MIC_VIRTIO_PARAM_CONFIG_CHANGED
;
185 vpdev
->hw_ops
->send_intr(vpdev
, db
);
187 for (retry
= 100; retry
--;) {
188 ret
= wait_event_timeout(wake
, vdev
->dc
->guest_ack
,
189 msecs_to_jiffies(100));
194 dev_dbg(vop_dev(vdev
),
195 "%s %d retry: %d\n", __func__
, __LINE__
, retry
);
196 vdev
->dc
->config_change
= 0;
197 vdev
->dc
->guest_ack
= 0;
199 for (i
= 0; i
< vdev
->dd
->num_vq
; i
++)
200 mutex_unlock(&vdev
->vvr
[i
].vr_mutex
);
201 mutex_unlock(&vi
->vop_mutex
);
205 static int vop_copy_dp_entry(struct vop_vdev
*vdev
,
206 struct mic_device_desc
*argp
, __u8
*type
,
207 struct mic_device_desc
**devpage
)
209 struct vop_device
*vpdev
= vdev
->vpdev
;
210 struct mic_device_desc
*devp
;
211 struct mic_vqconfig
*vqconfig
;
213 bool slot_found
= false;
215 vqconfig
= mic_vq_config(argp
);
216 for (i
= 0; i
< argp
->num_vq
; i
++) {
217 if (le16_to_cpu(vqconfig
[i
].num
) > MIC_MAX_VRING_ENTRIES
) {
219 dev_err(vop_dev(vdev
), "%s %d err %d\n",
220 __func__
, __LINE__
, ret
);
225 /* Find the first free device page entry */
226 for (i
= sizeof(struct mic_bootparam
);
227 i
< MIC_DP_SIZE
- mic_total_desc_size(argp
);
228 i
+= mic_total_desc_size(devp
)) {
229 devp
= vpdev
->hw_ops
->get_dp(vpdev
) + i
;
230 if (devp
->type
== 0 || devp
->type
== -1) {
237 dev_err(vop_dev(vdev
), "%s %d err %d\n",
238 __func__
, __LINE__
, ret
);
242 * Save off the type before doing the memcpy. Type will be set in the
243 * end after completing all initialization for the new device.
247 memcpy(devp
, argp
, mic_desc_size(argp
));
254 static void vop_init_device_ctrl(struct vop_vdev
*vdev
,
255 struct mic_device_desc
*devpage
)
257 struct mic_device_ctrl
*dc
;
259 dc
= (void *)devpage
+ mic_aligned_desc_size(devpage
);
261 dc
->config_change
= 0;
265 dc
->used_address_updated
= 0;
266 dc
->c2h_vdev_db
= -1;
267 dc
->h2c_vdev_db
= -1;
271 static int vop_virtio_add_device(struct vop_vdev
*vdev
,
272 struct mic_device_desc
*argp
)
274 struct vop_info
*vi
= vdev
->vi
;
275 struct vop_device
*vpdev
= vi
->vpdev
;
276 struct mic_device_desc
*dd
= NULL
;
277 struct mic_vqconfig
*vqconfig
;
278 int vr_size
, i
, j
, ret
;
282 struct mic_bootparam
*bootparam
;
286 bootparam
= vpdev
->hw_ops
->get_dp(vpdev
);
287 init_waitqueue_head(&vdev
->waitq
);
288 INIT_LIST_HEAD(&vdev
->list
);
291 ret
= vop_copy_dp_entry(vdev
, argp
, &type
, &dd
);
293 dev_err(vop_dev(vdev
), "%s %d err %d\n",
294 __func__
, __LINE__
, ret
);
298 vop_init_device_ctrl(vdev
, dd
);
301 vdev
->virtio_id
= type
;
302 vqconfig
= mic_vq_config(dd
);
303 INIT_WORK(&vdev
->virtio_bh_work
, vop_bh_handler
);
305 for (i
= 0; i
< dd
->num_vq
; i
++) {
306 struct vop_vringh
*vvr
= &vdev
->vvr
[i
];
307 struct mic_vring
*vr
= &vdev
->vvr
[i
].vring
;
309 num
= le16_to_cpu(vqconfig
[i
].num
);
310 mutex_init(&vvr
->vr_mutex
);
311 vr_size
= PAGE_ALIGN(vring_size(num
, MIC_VIRTIO_RING_ALIGN
) +
312 sizeof(struct _mic_vring_info
));
314 __get_free_pages(GFP_KERNEL
| __GFP_ZERO
,
318 dev_err(vop_dev(vdev
), "%s %d err %d\n",
319 __func__
, __LINE__
, ret
);
323 vr
->info
= vr
->va
+ vring_size(num
, MIC_VIRTIO_RING_ALIGN
);
324 vr
->info
->magic
= cpu_to_le32(MIC_MAGIC
+ vdev
->virtio_id
+ i
);
325 vr_addr
= dma_map_single(&vpdev
->dev
, vr
->va
, vr_size
,
327 if (dma_mapping_error(&vpdev
->dev
, vr_addr
)) {
328 free_pages((unsigned long)vr
->va
, get_order(vr_size
));
330 dev_err(vop_dev(vdev
), "%s %d err %d\n",
331 __func__
, __LINE__
, ret
);
334 vqconfig
[i
].address
= cpu_to_le64(vr_addr
);
336 vring_init(&vr
->vr
, num
, vr
->va
, MIC_VIRTIO_RING_ALIGN
);
337 ret
= vringh_init_kern(&vvr
->vrh
,
338 *(u32
*)mic_vq_features(vdev
->dd
),
339 num
, false, vr
->vr
.desc
, vr
->vr
.avail
,
342 dev_err(vop_dev(vdev
), "%s %d err %d\n",
343 __func__
, __LINE__
, ret
);
346 vringh_kiov_init(&vvr
->riov
, NULL
, 0);
347 vringh_kiov_init(&vvr
->wiov
, NULL
, 0);
348 vvr
->head
= USHRT_MAX
;
350 vvr
->vrh
.notify
= _vop_notify
;
352 "%s %d index %d va %p info %p vr_size 0x%x\n",
353 __func__
, __LINE__
, i
, vr
->va
, vr
->info
, vr_size
);
354 vvr
->buf
= (void *)__get_free_pages(GFP_KERNEL
,
355 get_order(VOP_INT_DMA_BUF_SIZE
));
356 vvr
->buf_da
= dma_map_single(&vpdev
->dev
,
357 vvr
->buf
, VOP_INT_DMA_BUF_SIZE
,
361 snprintf(irqname
, sizeof(irqname
), "vop%dvirtio%d", vpdev
->index
,
363 vdev
->virtio_db
= vpdev
->hw_ops
->next_db(vpdev
);
364 vdev
->virtio_cookie
= vpdev
->hw_ops
->request_irq(vpdev
,
365 _vop_virtio_intr_handler
, irqname
, vdev
,
367 if (IS_ERR(vdev
->virtio_cookie
)) {
368 ret
= PTR_ERR(vdev
->virtio_cookie
);
369 dev_dbg(&vpdev
->dev
, "request irq failed\n");
373 vdev
->dc
->c2h_vdev_db
= vdev
->virtio_db
;
376 * Order the type update with previous stores. This write barrier
377 * is paired with the corresponding read barrier before the uncached
378 * system memory read of the type, on the card while scanning the
386 db
= bootparam
->h2c_config_db
;
388 vpdev
->hw_ops
->send_intr(vpdev
, db
);
390 dev_dbg(&vpdev
->dev
, "Added virtio id %d db %d\n", dd
->type
, db
);
393 vqconfig
= mic_vq_config(dd
);
394 for (j
= 0; j
< i
; j
++) {
395 struct vop_vringh
*vvr
= &vdev
->vvr
[j
];
397 dma_unmap_single(&vpdev
->dev
, le64_to_cpu(vqconfig
[j
].address
),
398 vvr
->vring
.len
, DMA_BIDIRECTIONAL
);
399 free_pages((unsigned long)vvr
->vring
.va
,
400 get_order(vvr
->vring
.len
));
405 static void vop_dev_remove(struct vop_info
*pvi
, struct mic_device_ctrl
*devp
,
406 struct vop_device
*vpdev
)
408 struct mic_bootparam
*bootparam
= vpdev
->hw_ops
->get_dp(vpdev
);
411 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake
);
413 devp
->config_change
= MIC_VIRTIO_PARAM_DEV_REMOVE
;
414 db
= bootparam
->h2c_config_db
;
416 vpdev
->hw_ops
->send_intr(vpdev
, db
);
419 for (retry
= 15; retry
--;) {
420 ret
= wait_event_timeout(wake
, devp
->guest_ack
,
421 msecs_to_jiffies(1000));
426 devp
->config_change
= 0;
430 static void vop_virtio_del_device(struct vop_vdev
*vdev
)
432 struct vop_info
*vi
= vdev
->vi
;
433 struct vop_device
*vpdev
= vdev
->vpdev
;
435 struct mic_vqconfig
*vqconfig
;
436 struct mic_bootparam
*bootparam
= vpdev
->hw_ops
->get_dp(vpdev
);
439 goto skip_hot_remove
;
440 vop_dev_remove(vi
, vdev
->dc
, vpdev
);
442 vpdev
->hw_ops
->free_irq(vpdev
, vdev
->virtio_cookie
, vdev
);
443 flush_work(&vdev
->virtio_bh_work
);
444 vqconfig
= mic_vq_config(vdev
->dd
);
445 for (i
= 0; i
< vdev
->dd
->num_vq
; i
++) {
446 struct vop_vringh
*vvr
= &vdev
->vvr
[i
];
448 dma_unmap_single(&vpdev
->dev
,
449 vvr
->buf_da
, VOP_INT_DMA_BUF_SIZE
,
451 free_pages((unsigned long)vvr
->buf
,
452 get_order(VOP_INT_DMA_BUF_SIZE
));
453 vringh_kiov_cleanup(&vvr
->riov
);
454 vringh_kiov_cleanup(&vvr
->wiov
);
455 dma_unmap_single(&vpdev
->dev
, le64_to_cpu(vqconfig
[i
].address
),
456 vvr
->vring
.len
, DMA_BIDIRECTIONAL
);
457 free_pages((unsigned long)vvr
->vring
.va
,
458 get_order(vvr
->vring
.len
));
461 * Order the type update with previous stores. This write barrier
462 * is paired with the corresponding read barrier before the uncached
463 * system memory read of the type, on the card while scanning the
471 * vop_sync_dma - Wrapper for synchronous DMAs.
473 * @dev - The address of the pointer to the device instance used
474 * for DMA registration.
475 * @dst - destination DMA address.
476 * @src - source DMA address.
477 * @len - size of the transfer.
479 * Return DMA_SUCCESS on success
481 static int vop_sync_dma(struct vop_vdev
*vdev
, dma_addr_t dst
, dma_addr_t src
,
485 struct dma_device
*ddev
;
486 struct dma_async_tx_descriptor
*tx
;
487 struct vop_info
*vi
= dev_get_drvdata(&vdev
->vpdev
->dev
);
488 struct dma_chan
*vop_ch
= vi
->dma_ch
;
494 ddev
= vop_ch
->device
;
495 tx
= ddev
->device_prep_dma_memcpy(vop_ch
, dst
, src
, len
,
503 cookie
= tx
->tx_submit(tx
);
504 if (dma_submit_error(cookie
)) {
508 dma_async_issue_pending(vop_ch
);
509 err
= dma_sync_wait(vop_ch
, cookie
);
513 dev_err(&vi
->vpdev
->dev
, "%s %d err %d\n",
514 __func__
, __LINE__
, err
);
518 #define VOP_USE_DMA true
521 * Initiates the copies across the PCIe bus from card memory to a user
522 * space buffer. When transfers are done using DMA, source/destination
523 * addresses and transfer length must follow the alignment requirements of
524 * the MIC DMA engine.
526 static int vop_virtio_copy_to_user(struct vop_vdev
*vdev
, void __user
*ubuf
,
527 size_t len
, u64 daddr
, size_t dlen
,
530 struct vop_device
*vpdev
= vdev
->vpdev
;
531 void __iomem
*dbuf
= vpdev
->hw_ops
->ioremap(vpdev
, daddr
, len
);
532 struct vop_vringh
*vvr
= &vdev
->vvr
[vr_idx
];
533 struct vop_info
*vi
= dev_get_drvdata(&vpdev
->dev
);
534 size_t dma_alignment
= 1 << vi
->dma_ch
->device
->copy_align
;
535 bool x200
= is_dma_copy_aligned(vi
->dma_ch
->device
, 1, 1, 1);
536 size_t dma_offset
, partlen
;
540 if (copy_to_user(ubuf
, (void __force
*)dbuf
, len
)) {
542 dev_err(vop_dev(vdev
), "%s %d err %d\n",
543 __func__
, __LINE__
, err
);
546 vdev
->in_bytes
+= len
;
551 dma_offset
= daddr
- round_down(daddr
, dma_alignment
);
555 * X100 uses DMA addresses as seen by the card so adding
556 * the aperture base is not required for DMA. However x200
557 * requires DMA addresses to be an offset into the bar so
558 * add the aperture base for x200.
561 daddr
+= vpdev
->aper
->pa
;
563 partlen
= min_t(size_t, len
, VOP_INT_DMA_BUF_SIZE
);
564 err
= vop_sync_dma(vdev
, vvr
->buf_da
, daddr
,
565 ALIGN(partlen
, dma_alignment
));
567 dev_err(vop_dev(vdev
), "%s %d err %d\n",
568 __func__
, __LINE__
, err
);
571 if (copy_to_user(ubuf
, vvr
->buf
+ dma_offset
,
572 partlen
- dma_offset
)) {
574 dev_err(vop_dev(vdev
), "%s %d err %d\n",
575 __func__
, __LINE__
, err
);
581 vdev
->in_bytes_dma
+= partlen
;
582 vdev
->in_bytes
+= partlen
;
588 vpdev
->hw_ops
->iounmap(vpdev
, dbuf
);
589 dev_dbg(vop_dev(vdev
),
590 "%s: ubuf %p dbuf %p len 0x%lx vr_idx 0x%x\n",
591 __func__
, ubuf
, dbuf
, len
, vr_idx
);
596 * Initiates copies across the PCIe bus from a user space buffer to card
597 * memory. When transfers are done using DMA, source/destination addresses
598 * and transfer length must follow the alignment requirements of the MIC
601 static int vop_virtio_copy_from_user(struct vop_vdev
*vdev
, void __user
*ubuf
,
602 size_t len
, u64 daddr
, size_t dlen
,
605 struct vop_device
*vpdev
= vdev
->vpdev
;
606 void __iomem
*dbuf
= vpdev
->hw_ops
->ioremap(vpdev
, daddr
, len
);
607 struct vop_vringh
*vvr
= &vdev
->vvr
[vr_idx
];
608 struct vop_info
*vi
= dev_get_drvdata(&vdev
->vpdev
->dev
);
609 size_t dma_alignment
= 1 << vi
->dma_ch
->device
->copy_align
;
610 bool x200
= is_dma_copy_aligned(vi
->dma_ch
->device
, 1, 1, 1);
612 bool dma
= VOP_USE_DMA
;
615 if (daddr
& (dma_alignment
- 1)) {
616 vdev
->tx_dst_unaligned
+= len
;
618 } else if (ALIGN(len
, dma_alignment
) > dlen
) {
619 vdev
->tx_len_unaligned
+= len
;
627 * X100 uses DMA addresses as seen by the card so adding
628 * the aperture base is not required for DMA. However x200
629 * requires DMA addresses to be an offset into the bar so
630 * add the aperture base for x200.
633 daddr
+= vpdev
->aper
->pa
;
635 partlen
= min_t(size_t, len
, VOP_INT_DMA_BUF_SIZE
);
637 if (copy_from_user(vvr
->buf
, ubuf
, partlen
)) {
639 dev_err(vop_dev(vdev
), "%s %d err %d\n",
640 __func__
, __LINE__
, err
);
643 err
= vop_sync_dma(vdev
, daddr
, vvr
->buf_da
,
644 ALIGN(partlen
, dma_alignment
));
646 dev_err(vop_dev(vdev
), "%s %d err %d\n",
647 __func__
, __LINE__
, err
);
653 vdev
->out_bytes_dma
+= partlen
;
654 vdev
->out_bytes
+= partlen
;
659 * We are copying to IO below and should ideally use something
660 * like copy_from_user_toio(..) if it existed.
662 if (copy_from_user((void __force
*)dbuf
, ubuf
, len
)) {
664 dev_err(vop_dev(vdev
), "%s %d err %d\n",
665 __func__
, __LINE__
, err
);
668 vdev
->out_bytes
+= len
;
671 vpdev
->hw_ops
->iounmap(vpdev
, dbuf
);
672 dev_dbg(vop_dev(vdev
),
673 "%s: ubuf %p dbuf %p len 0x%lx vr_idx 0x%x\n",
674 __func__
, ubuf
, dbuf
, len
, vr_idx
);
678 #define MIC_VRINGH_READ true
680 /* Determine the total number of bytes consumed in a VRINGH KIOV */
681 static inline u32
vop_vringh_iov_consumed(struct vringh_kiov
*iov
)
684 u32 total
= iov
->consumed
;
686 for (i
= 0; i
< iov
->i
; i
++)
687 total
+= iov
->iov
[i
].iov_len
;
692 * Traverse the VRINGH KIOV and issue the APIs to trigger the copies.
693 * This API is heavily based on the vringh_iov_xfer(..) implementation
694 * in vringh.c. The reason we cannot reuse vringh_iov_pull_kern(..)
695 * and vringh_iov_push_kern(..) directly is because there is no
696 * way to override the VRINGH xfer(..) routines as of v3.10.
698 static int vop_vringh_copy(struct vop_vdev
*vdev
, struct vringh_kiov
*iov
,
699 void __user
*ubuf
, size_t len
, bool read
, int vr_idx
,
703 size_t partlen
, tot_len
= 0;
705 while (len
&& iov
->i
< iov
->used
) {
706 struct kvec
*kiov
= &iov
->iov
[iov
->i
];
708 partlen
= min(kiov
->iov_len
, len
);
710 ret
= vop_virtio_copy_to_user(vdev
, ubuf
, partlen
,
715 ret
= vop_virtio_copy_from_user(vdev
, ubuf
, partlen
,
720 dev_err(vop_dev(vdev
), "%s %d err %d\n",
721 __func__
, __LINE__
, ret
);
727 iov
->consumed
+= partlen
;
728 kiov
->iov_len
-= partlen
;
729 kiov
->iov_base
+= partlen
;
730 if (!kiov
->iov_len
) {
731 /* Fix up old iov element then increment. */
732 kiov
->iov_len
= iov
->consumed
;
733 kiov
->iov_base
-= iov
->consumed
;
744 * Use the standard VRINGH infrastructure in the kernel to fetch new
745 * descriptors, initiate the copies and update the used ring.
747 static int _vop_virtio_copy(struct vop_vdev
*vdev
, struct mic_copy_desc
*copy
)
750 u32 iovcnt
= copy
->iovcnt
;
752 struct iovec __user
*u_iov
= copy
->iov
;
753 void __user
*ubuf
= NULL
;
754 struct vop_vringh
*vvr
= &vdev
->vvr
[copy
->vr_idx
];
755 struct vringh_kiov
*riov
= &vvr
->riov
;
756 struct vringh_kiov
*wiov
= &vvr
->wiov
;
757 struct vringh
*vrh
= &vvr
->vrh
;
758 u16
*head
= &vvr
->head
;
759 struct mic_vring
*vr
= &vvr
->vring
;
760 size_t len
= 0, out_len
;
763 /* Fetch a new IOVEC if all previous elements have been processed */
764 if (riov
->i
== riov
->used
&& wiov
->i
== wiov
->used
) {
765 ret
= vringh_getdesc_kern(vrh
, riov
, wiov
,
767 /* Check if there are available descriptors */
773 /* Copy over a new iovec from user space. */
774 ret
= copy_from_user(&iov
, u_iov
, sizeof(*u_iov
));
777 dev_err(vop_dev(vdev
), "%s %d err %d\n",
778 __func__
, __LINE__
, ret
);
784 /* Issue all the read descriptors first */
785 ret
= vop_vringh_copy(vdev
, riov
, ubuf
, len
,
786 MIC_VRINGH_READ
, copy
->vr_idx
, &out_len
);
788 dev_err(vop_dev(vdev
), "%s %d err %d\n",
789 __func__
, __LINE__
, ret
);
794 copy
->out_len
+= out_len
;
795 /* Issue the write descriptors next */
796 ret
= vop_vringh_copy(vdev
, wiov
, ubuf
, len
,
797 !MIC_VRINGH_READ
, copy
->vr_idx
, &out_len
);
799 dev_err(vop_dev(vdev
), "%s %d err %d\n",
800 __func__
, __LINE__
, ret
);
805 copy
->out_len
+= out_len
;
807 /* One user space iovec is now completed */
811 /* Exit loop if all elements in KIOVs have been processed. */
812 if (riov
->i
== riov
->used
&& wiov
->i
== wiov
->used
)
816 * Update the used ring if a descriptor was available and some data was
817 * copied in/out and the user asked for a used ring update.
819 if (*head
!= USHRT_MAX
&& copy
->out_len
&& copy
->update_used
) {
822 /* Determine the total data consumed */
823 total
+= vop_vringh_iov_consumed(riov
);
824 total
+= vop_vringh_iov_consumed(wiov
);
825 vringh_complete_kern(vrh
, *head
, total
);
827 if (vringh_need_notify_kern(vrh
) > 0)
829 vringh_kiov_cleanup(riov
);
830 vringh_kiov_cleanup(wiov
);
831 /* Update avail idx for user space */
832 vr
->info
->avail_idx
= vrh
->last_avail_idx
;
837 static inline int vop_verify_copy_args(struct vop_vdev
*vdev
,
838 struct mic_copy_desc
*copy
)
840 if (!vdev
|| copy
->vr_idx
>= vdev
->dd
->num_vq
)
845 /* Copy a specified number of virtio descriptors in a chain */
846 static int vop_virtio_copy_desc(struct vop_vdev
*vdev
,
847 struct mic_copy_desc
*copy
)
850 struct vop_vringh
*vvr
;
852 err
= vop_verify_copy_args(vdev
, copy
);
856 vvr
= &vdev
->vvr
[copy
->vr_idx
];
857 mutex_lock(&vvr
->vr_mutex
);
858 if (!vop_vdevup(vdev
)) {
860 dev_err(vop_dev(vdev
), "%s %d err %d\n",
861 __func__
, __LINE__
, err
);
864 err
= _vop_virtio_copy(vdev
, copy
);
866 dev_err(vop_dev(vdev
), "%s %d err %d\n",
867 __func__
, __LINE__
, err
);
870 mutex_unlock(&vvr
->vr_mutex
);
874 static int vop_open(struct inode
*inode
, struct file
*f
)
876 struct vop_vdev
*vdev
;
877 struct vop_info
*vi
= container_of(f
->private_data
,
878 struct vop_info
, miscdev
);
880 vdev
= kzalloc(sizeof(*vdev
), GFP_KERNEL
);
884 mutex_init(&vdev
->vdev_mutex
);
885 f
->private_data
= vdev
;
886 init_completion(&vdev
->destroy
);
887 complete(&vdev
->destroy
);
891 static int vop_release(struct inode
*inode
, struct file
*f
)
893 struct vop_vdev
*vdev
= f
->private_data
, *vdev_tmp
;
894 struct vop_info
*vi
= vdev
->vi
;
895 struct list_head
*pos
, *tmp
;
898 mutex_lock(&vdev
->vdev_mutex
);
901 mutex_lock(&vi
->vop_mutex
);
902 list_for_each_safe(pos
, tmp
, &vi
->vdev_list
) {
903 vdev_tmp
= list_entry(pos
, struct vop_vdev
, list
);
904 if (vdev
== vdev_tmp
) {
905 vop_virtio_del_device(vdev
);
911 mutex_unlock(&vi
->vop_mutex
);
913 mutex_unlock(&vdev
->vdev_mutex
);
915 wait_for_completion(&vdev
->destroy
);
916 f
->private_data
= NULL
;
921 static long vop_ioctl(struct file
*f
, unsigned int cmd
, unsigned long arg
)
923 struct vop_vdev
*vdev
= f
->private_data
;
924 struct vop_info
*vi
= vdev
->vi
;
925 void __user
*argp
= (void __user
*)arg
;
929 case MIC_VIRTIO_ADD_DEVICE
:
931 struct mic_device_desc dd
, *dd_config
;
933 if (copy_from_user(&dd
, argp
, sizeof(dd
)))
936 if (mic_aligned_desc_size(&dd
) > MIC_MAX_DESC_BLK_SIZE
||
937 dd
.num_vq
> MIC_MAX_VRINGS
)
940 dd_config
= kzalloc(mic_desc_size(&dd
), GFP_KERNEL
);
943 if (copy_from_user(dd_config
, argp
, mic_desc_size(&dd
))) {
947 /* Ensure desc has not changed between the two reads */
948 if (memcmp(&dd
, dd_config
, sizeof(dd
))) {
952 mutex_lock(&vdev
->vdev_mutex
);
953 mutex_lock(&vi
->vop_mutex
);
954 ret
= vop_virtio_add_device(vdev
, dd_config
);
957 list_add_tail(&vdev
->list
, &vi
->vdev_list
);
959 mutex_unlock(&vi
->vop_mutex
);
960 mutex_unlock(&vdev
->vdev_mutex
);
965 case MIC_VIRTIO_COPY_DESC
:
967 struct mic_copy_desc copy
;
969 mutex_lock(&vdev
->vdev_mutex
);
970 ret
= vop_vdev_inited(vdev
);
974 if (copy_from_user(©
, argp
, sizeof(copy
))) {
979 ret
= vop_virtio_copy_desc(vdev
, ©
);
983 &((struct mic_copy_desc __user
*)argp
)->out_len
,
984 ©
.out_len
, sizeof(copy
.out_len
)))
987 mutex_unlock(&vdev
->vdev_mutex
);
990 case MIC_VIRTIO_CONFIG_CHANGE
:
994 mutex_lock(&vdev
->vdev_mutex
);
995 ret
= vop_vdev_inited(vdev
);
998 buf
= kzalloc(vdev
->dd
->config_len
, GFP_KERNEL
);
1003 if (copy_from_user(buf
, argp
, vdev
->dd
->config_len
)) {
1007 ret
= vop_virtio_config_change(vdev
, buf
);
1011 mutex_unlock(&vdev
->vdev_mutex
);
1015 return -ENOIOCTLCMD
;
1021 * We return POLLIN | POLLOUT from poll when new buffers are enqueued, and
1022 * not when previously enqueued buffers may be available. This means that
1023 * in the card->host (TX) path, when userspace is unblocked by poll it
1024 * must drain all available descriptors or it can stall.
1026 static unsigned int vop_poll(struct file
*f
, poll_table
*wait
)
1028 struct vop_vdev
*vdev
= f
->private_data
;
1031 mutex_lock(&vdev
->vdev_mutex
);
1032 if (vop_vdev_inited(vdev
)) {
1036 poll_wait(f
, &vdev
->waitq
, wait
);
1037 if (vop_vdev_inited(vdev
)) {
1039 } else if (vdev
->poll_wake
) {
1040 vdev
->poll_wake
= 0;
1041 mask
= POLLIN
| POLLOUT
;
1044 mutex_unlock(&vdev
->vdev_mutex
);
1049 vop_query_offset(struct vop_vdev
*vdev
, unsigned long offset
,
1050 unsigned long *size
, unsigned long *pa
)
1052 struct vop_device
*vpdev
= vdev
->vpdev
;
1053 unsigned long start
= MIC_DP_SIZE
;
1057 * MMAP interface is as follows:
1059 * 0x0 virtio device_page
1060 * 0x1000 first vring
1061 * 0x1000 + size of 1st vring second vring
1065 *pa
= virt_to_phys(vpdev
->hw_ops
->get_dp(vpdev
));
1066 *size
= MIC_DP_SIZE
;
1070 for (i
= 0; i
< vdev
->dd
->num_vq
; i
++) {
1071 struct vop_vringh
*vvr
= &vdev
->vvr
[i
];
1073 if (offset
== start
) {
1074 *pa
= virt_to_phys(vvr
->vring
.va
);
1075 *size
= vvr
->vring
.len
;
1078 start
+= vvr
->vring
.len
;
1084 * Maps the device page and virtio rings to user space for readonly access.
1086 static int vop_mmap(struct file
*f
, struct vm_area_struct
*vma
)
1088 struct vop_vdev
*vdev
= f
->private_data
;
1089 unsigned long offset
= vma
->vm_pgoff
<< PAGE_SHIFT
;
1090 unsigned long pa
, size
= vma
->vm_end
- vma
->vm_start
, size_rem
= size
;
1093 err
= vop_vdev_inited(vdev
);
1096 if (vma
->vm_flags
& VM_WRITE
) {
1101 i
= vop_query_offset(vdev
, offset
, &size
, &pa
);
1106 err
= remap_pfn_range(vma
, vma
->vm_start
+ offset
,
1107 pa
>> PAGE_SHIFT
, size
,
1118 static const struct file_operations vop_fops
= {
1120 .release
= vop_release
,
1121 .unlocked_ioctl
= vop_ioctl
,
1124 .owner
= THIS_MODULE
,
1127 int vop_host_init(struct vop_info
*vi
)
1130 struct miscdevice
*mdev
;
1131 struct vop_device
*vpdev
= vi
->vpdev
;
1133 INIT_LIST_HEAD(&vi
->vdev_list
);
1134 vi
->dma_ch
= vpdev
->dma_ch
;
1135 mdev
= &vi
->miscdev
;
1136 mdev
->minor
= MISC_DYNAMIC_MINOR
;
1137 snprintf(vi
->name
, sizeof(vi
->name
), "vop_virtio%d", vpdev
->index
);
1138 mdev
->name
= vi
->name
;
1139 mdev
->fops
= &vop_fops
;
1140 mdev
->parent
= &vpdev
->dev
;
1142 rc
= misc_register(mdev
);
1144 dev_err(&vpdev
->dev
, "%s failed rc %d\n", __func__
, rc
);
1148 void vop_host_uninit(struct vop_info
*vi
)
1150 struct list_head
*pos
, *tmp
;
1151 struct vop_vdev
*vdev
;
1153 mutex_lock(&vi
->vop_mutex
);
1154 vop_virtio_reset_devices(vi
);
1155 list_for_each_safe(pos
, tmp
, &vi
->vdev_list
) {
1156 vdev
= list_entry(pos
, struct vop_vdev
, list
);
1158 reinit_completion(&vdev
->destroy
);
1159 mutex_unlock(&vi
->vop_mutex
);
1160 mutex_lock(&vdev
->vdev_mutex
);
1161 vop_virtio_del_device(vdev
);
1162 vdev
->deleted
= true;
1163 mutex_unlock(&vdev
->vdev_mutex
);
1164 complete(&vdev
->destroy
);
1165 mutex_lock(&vi
->vop_mutex
);
1167 mutex_unlock(&vi
->vop_mutex
);
1168 misc_deregister(&vi
->miscdev
);