2 * Intel MIC Platform Software Stack (MPSS)
4 * Copyright(c) 2016 Intel Corporation.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2, as
8 * published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
15 * The full GNU General Public License is included in this distribution in
16 * the file called "COPYING".
18 * Intel Virtio Over PCIe (VOP) driver.
21 #include <linux/sched.h>
22 #include <linux/poll.h>
23 #include <linux/dma-mapping.h>
25 #include <linux/mic_common.h>
26 #include "../common/mic_dev.h"
28 #include <linux/mic_ioctl.h>
31 /* Helper API to obtain the VOP PCIe device */
32 static inline struct device
*vop_dev(struct vop_vdev
*vdev
)
34 return vdev
->vpdev
->dev
.parent
;
37 /* Helper API to check if a virtio device is initialized */
38 static inline int vop_vdev_inited(struct vop_vdev
*vdev
)
42 /* Device has not been created yet */
43 if (!vdev
->dd
|| !vdev
->dd
->type
) {
44 dev_err(vop_dev(vdev
), "%s %d err %d\n",
45 __func__
, __LINE__
, -EINVAL
);
48 /* Device has been removed/deleted */
49 if (vdev
->dd
->type
== -1) {
50 dev_dbg(vop_dev(vdev
), "%s %d err %d\n",
51 __func__
, __LINE__
, -ENODEV
);
57 static void _vop_notify(struct vringh
*vrh
)
59 struct vop_vringh
*vvrh
= container_of(vrh
, struct vop_vringh
, vrh
);
60 struct vop_vdev
*vdev
= vvrh
->vdev
;
61 struct vop_device
*vpdev
= vdev
->vpdev
;
62 s8 db
= vdev
->dc
->h2c_vdev_db
;
65 vpdev
->hw_ops
->send_intr(vpdev
, db
);
68 static void vop_virtio_init_post(struct vop_vdev
*vdev
)
70 struct mic_vqconfig
*vqconfig
= mic_vq_config(vdev
->dd
);
71 struct vop_device
*vpdev
= vdev
->vpdev
;
74 for (i
= 0; i
< vdev
->dd
->num_vq
; i
++) {
75 used_size
= PAGE_ALIGN(sizeof(u16
) * 3 +
76 sizeof(struct vring_used_elem
) *
77 le16_to_cpu(vqconfig
->num
));
78 if (!le64_to_cpu(vqconfig
[i
].used_address
)) {
79 dev_warn(vop_dev(vdev
), "used_address zero??\n");
82 vdev
->vvr
[i
].vrh
.vring
.used
=
83 (void __force
*)vpdev
->hw_ops
->ioremap(
85 le64_to_cpu(vqconfig
[i
].used_address
),
89 vdev
->dc
->used_address_updated
= 0;
91 dev_info(vop_dev(vdev
), "%s: device type %d LINKUP\n",
92 __func__
, vdev
->virtio_id
);
95 static inline void vop_virtio_device_reset(struct vop_vdev
*vdev
)
99 dev_dbg(vop_dev(vdev
), "%s: status %d device type %d RESET\n",
100 __func__
, vdev
->dd
->status
, vdev
->virtio_id
);
102 for (i
= 0; i
< vdev
->dd
->num_vq
; i
++)
104 * Avoid lockdep false positive. The + 1 is for the vop
105 * mutex which is held in the reset devices code path.
107 mutex_lock_nested(&vdev
->vvr
[i
].vr_mutex
, i
+ 1);
109 /* 0 status means "reset" */
110 vdev
->dd
->status
= 0;
111 vdev
->dc
->vdev_reset
= 0;
112 vdev
->dc
->host_ack
= 1;
114 for (i
= 0; i
< vdev
->dd
->num_vq
; i
++) {
115 struct vringh
*vrh
= &vdev
->vvr
[i
].vrh
;
117 vdev
->vvr
[i
].vring
.info
->avail_idx
= 0;
119 vrh
->last_avail_idx
= 0;
120 vrh
->last_used_idx
= 0;
123 for (i
= 0; i
< vdev
->dd
->num_vq
; i
++)
124 mutex_unlock(&vdev
->vvr
[i
].vr_mutex
);
127 static void vop_virtio_reset_devices(struct vop_info
*vi
)
129 struct list_head
*pos
, *tmp
;
130 struct vop_vdev
*vdev
;
132 list_for_each_safe(pos
, tmp
, &vi
->vdev_list
) {
133 vdev
= list_entry(pos
, struct vop_vdev
, list
);
134 vop_virtio_device_reset(vdev
);
136 wake_up(&vdev
->waitq
);
140 static void vop_bh_handler(struct work_struct
*work
)
142 struct vop_vdev
*vdev
= container_of(work
, struct vop_vdev
,
145 if (vdev
->dc
->used_address_updated
)
146 vop_virtio_init_post(vdev
);
148 if (vdev
->dc
->vdev_reset
)
149 vop_virtio_device_reset(vdev
);
152 wake_up(&vdev
->waitq
);
155 static irqreturn_t
_vop_virtio_intr_handler(int irq
, void *data
)
157 struct vop_vdev
*vdev
= data
;
158 struct vop_device
*vpdev
= vdev
->vpdev
;
160 vpdev
->hw_ops
->ack_interrupt(vpdev
, vdev
->virtio_db
);
161 schedule_work(&vdev
->virtio_bh_work
);
165 static int vop_virtio_config_change(struct vop_vdev
*vdev
, void *argp
)
167 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake
);
168 int ret
= 0, retry
, i
;
169 struct vop_device
*vpdev
= vdev
->vpdev
;
170 struct vop_info
*vi
= dev_get_drvdata(&vpdev
->dev
);
171 struct mic_bootparam
*bootparam
= vpdev
->hw_ops
->get_dp(vpdev
);
172 s8 db
= bootparam
->h2c_config_db
;
174 mutex_lock(&vi
->vop_mutex
);
175 for (i
= 0; i
< vdev
->dd
->num_vq
; i
++)
176 mutex_lock_nested(&vdev
->vvr
[i
].vr_mutex
, i
+ 1);
178 if (db
== -1 || vdev
->dd
->type
== -1) {
183 memcpy(mic_vq_configspace(vdev
->dd
), argp
, vdev
->dd
->config_len
);
184 vdev
->dc
->config_change
= MIC_VIRTIO_PARAM_CONFIG_CHANGED
;
185 vpdev
->hw_ops
->send_intr(vpdev
, db
);
187 for (retry
= 100; retry
--;) {
188 ret
= wait_event_timeout(wake
, vdev
->dc
->guest_ack
,
189 msecs_to_jiffies(100));
194 dev_dbg(vop_dev(vdev
),
195 "%s %d retry: %d\n", __func__
, __LINE__
, retry
);
196 vdev
->dc
->config_change
= 0;
197 vdev
->dc
->guest_ack
= 0;
199 for (i
= 0; i
< vdev
->dd
->num_vq
; i
++)
200 mutex_unlock(&vdev
->vvr
[i
].vr_mutex
);
201 mutex_unlock(&vi
->vop_mutex
);
205 static int vop_copy_dp_entry(struct vop_vdev
*vdev
,
206 struct mic_device_desc
*argp
, __u8
*type
,
207 struct mic_device_desc
**devpage
)
209 struct vop_device
*vpdev
= vdev
->vpdev
;
210 struct mic_device_desc
*devp
;
211 struct mic_vqconfig
*vqconfig
;
213 bool slot_found
= false;
215 vqconfig
= mic_vq_config(argp
);
216 for (i
= 0; i
< argp
->num_vq
; i
++) {
217 if (le16_to_cpu(vqconfig
[i
].num
) > MIC_MAX_VRING_ENTRIES
) {
219 dev_err(vop_dev(vdev
), "%s %d err %d\n",
220 __func__
, __LINE__
, ret
);
225 /* Find the first free device page entry */
226 for (i
= sizeof(struct mic_bootparam
);
227 i
< MIC_DP_SIZE
- mic_total_desc_size(argp
);
228 i
+= mic_total_desc_size(devp
)) {
229 devp
= vpdev
->hw_ops
->get_dp(vpdev
) + i
;
230 if (devp
->type
== 0 || devp
->type
== -1) {
237 dev_err(vop_dev(vdev
), "%s %d err %d\n",
238 __func__
, __LINE__
, ret
);
242 * Save off the type before doing the memcpy. Type will be set in the
243 * end after completing all initialization for the new device.
247 memcpy(devp
, argp
, mic_desc_size(argp
));
254 static void vop_init_device_ctrl(struct vop_vdev
*vdev
,
255 struct mic_device_desc
*devpage
)
257 struct mic_device_ctrl
*dc
;
259 dc
= (void *)devpage
+ mic_aligned_desc_size(devpage
);
261 dc
->config_change
= 0;
265 dc
->used_address_updated
= 0;
266 dc
->c2h_vdev_db
= -1;
267 dc
->h2c_vdev_db
= -1;
271 static int vop_virtio_add_device(struct vop_vdev
*vdev
,
272 struct mic_device_desc
*argp
)
274 struct vop_info
*vi
= vdev
->vi
;
275 struct vop_device
*vpdev
= vi
->vpdev
;
276 struct mic_device_desc
*dd
= NULL
;
277 struct mic_vqconfig
*vqconfig
;
278 int vr_size
, i
, j
, ret
;
282 struct mic_bootparam
*bootparam
;
286 bootparam
= vpdev
->hw_ops
->get_dp(vpdev
);
287 init_waitqueue_head(&vdev
->waitq
);
288 INIT_LIST_HEAD(&vdev
->list
);
291 ret
= vop_copy_dp_entry(vdev
, argp
, &type
, &dd
);
293 dev_err(vop_dev(vdev
), "%s %d err %d\n",
294 __func__
, __LINE__
, ret
);
299 vop_init_device_ctrl(vdev
, dd
);
302 vdev
->virtio_id
= type
;
303 vqconfig
= mic_vq_config(dd
);
304 INIT_WORK(&vdev
->virtio_bh_work
, vop_bh_handler
);
306 for (i
= 0; i
< dd
->num_vq
; i
++) {
307 struct vop_vringh
*vvr
= &vdev
->vvr
[i
];
308 struct mic_vring
*vr
= &vdev
->vvr
[i
].vring
;
310 num
= le16_to_cpu(vqconfig
[i
].num
);
311 mutex_init(&vvr
->vr_mutex
);
312 vr_size
= PAGE_ALIGN(vring_size(num
, MIC_VIRTIO_RING_ALIGN
) +
313 sizeof(struct _mic_vring_info
));
315 __get_free_pages(GFP_KERNEL
| __GFP_ZERO
,
319 dev_err(vop_dev(vdev
), "%s %d err %d\n",
320 __func__
, __LINE__
, ret
);
324 vr
->info
= vr
->va
+ vring_size(num
, MIC_VIRTIO_RING_ALIGN
);
325 vr
->info
->magic
= cpu_to_le32(MIC_MAGIC
+ vdev
->virtio_id
+ i
);
326 vr_addr
= dma_map_single(&vpdev
->dev
, vr
->va
, vr_size
,
328 if (dma_mapping_error(&vpdev
->dev
, vr_addr
)) {
329 free_pages((unsigned long)vr
->va
, get_order(vr_size
));
331 dev_err(vop_dev(vdev
), "%s %d err %d\n",
332 __func__
, __LINE__
, ret
);
335 vqconfig
[i
].address
= cpu_to_le64(vr_addr
);
337 vring_init(&vr
->vr
, num
, vr
->va
, MIC_VIRTIO_RING_ALIGN
);
338 ret
= vringh_init_kern(&vvr
->vrh
,
339 *(u32
*)mic_vq_features(vdev
->dd
),
340 num
, false, vr
->vr
.desc
, vr
->vr
.avail
,
343 dev_err(vop_dev(vdev
), "%s %d err %d\n",
344 __func__
, __LINE__
, ret
);
347 vringh_kiov_init(&vvr
->riov
, NULL
, 0);
348 vringh_kiov_init(&vvr
->wiov
, NULL
, 0);
349 vvr
->head
= USHRT_MAX
;
351 vvr
->vrh
.notify
= _vop_notify
;
353 "%s %d index %d va %p info %p vr_size 0x%x\n",
354 __func__
, __LINE__
, i
, vr
->va
, vr
->info
, vr_size
);
355 vvr
->buf
= (void *)__get_free_pages(GFP_KERNEL
,
356 get_order(VOP_INT_DMA_BUF_SIZE
));
357 vvr
->buf_da
= dma_map_single(&vpdev
->dev
,
358 vvr
->buf
, VOP_INT_DMA_BUF_SIZE
,
362 snprintf(irqname
, sizeof(irqname
), "vop%dvirtio%d", vpdev
->index
,
364 vdev
->virtio_db
= vpdev
->hw_ops
->next_db(vpdev
);
365 vdev
->virtio_cookie
= vpdev
->hw_ops
->request_irq(vpdev
,
366 _vop_virtio_intr_handler
, irqname
, vdev
,
368 if (IS_ERR(vdev
->virtio_cookie
)) {
369 ret
= PTR_ERR(vdev
->virtio_cookie
);
370 dev_dbg(&vpdev
->dev
, "request irq failed\n");
374 vdev
->dc
->c2h_vdev_db
= vdev
->virtio_db
;
377 * Order the type update with previous stores. This write barrier
378 * is paired with the corresponding read barrier before the uncached
379 * system memory read of the type, on the card while scanning the
387 db
= bootparam
->h2c_config_db
;
389 vpdev
->hw_ops
->send_intr(vpdev
, db
);
391 dev_dbg(&vpdev
->dev
, "Added virtio id %d db %d\n", dd
->type
, db
);
394 vqconfig
= mic_vq_config(dd
);
395 for (j
= 0; j
< i
; j
++) {
396 struct vop_vringh
*vvr
= &vdev
->vvr
[j
];
398 dma_unmap_single(&vpdev
->dev
, le64_to_cpu(vqconfig
[j
].address
),
399 vvr
->vring
.len
, DMA_BIDIRECTIONAL
);
400 free_pages((unsigned long)vvr
->vring
.va
,
401 get_order(vvr
->vring
.len
));
406 static void vop_dev_remove(struct vop_info
*pvi
, struct mic_device_ctrl
*devp
,
407 struct vop_device
*vpdev
)
409 struct mic_bootparam
*bootparam
= vpdev
->hw_ops
->get_dp(vpdev
);
412 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake
);
414 devp
->config_change
= MIC_VIRTIO_PARAM_DEV_REMOVE
;
415 db
= bootparam
->h2c_config_db
;
417 vpdev
->hw_ops
->send_intr(vpdev
, db
);
420 for (retry
= 15; retry
--;) {
421 ret
= wait_event_timeout(wake
, devp
->guest_ack
,
422 msecs_to_jiffies(1000));
427 devp
->config_change
= 0;
431 static void vop_virtio_del_device(struct vop_vdev
*vdev
)
433 struct vop_info
*vi
= vdev
->vi
;
434 struct vop_device
*vpdev
= vdev
->vpdev
;
436 struct mic_vqconfig
*vqconfig
;
437 struct mic_bootparam
*bootparam
= vpdev
->hw_ops
->get_dp(vpdev
);
440 goto skip_hot_remove
;
441 vop_dev_remove(vi
, vdev
->dc
, vpdev
);
443 vpdev
->hw_ops
->free_irq(vpdev
, vdev
->virtio_cookie
, vdev
);
444 flush_work(&vdev
->virtio_bh_work
);
445 vqconfig
= mic_vq_config(vdev
->dd
);
446 for (i
= 0; i
< vdev
->dd
->num_vq
; i
++) {
447 struct vop_vringh
*vvr
= &vdev
->vvr
[i
];
449 dma_unmap_single(&vpdev
->dev
,
450 vvr
->buf_da
, VOP_INT_DMA_BUF_SIZE
,
452 free_pages((unsigned long)vvr
->buf
,
453 get_order(VOP_INT_DMA_BUF_SIZE
));
454 vringh_kiov_cleanup(&vvr
->riov
);
455 vringh_kiov_cleanup(&vvr
->wiov
);
456 dma_unmap_single(&vpdev
->dev
, le64_to_cpu(vqconfig
[i
].address
),
457 vvr
->vring
.len
, DMA_BIDIRECTIONAL
);
458 free_pages((unsigned long)vvr
->vring
.va
,
459 get_order(vvr
->vring
.len
));
462 * Order the type update with previous stores. This write barrier
463 * is paired with the corresponding read barrier before the uncached
464 * system memory read of the type, on the card while scanning the
472 * vop_sync_dma - Wrapper for synchronous DMAs.
474 * @dev - The address of the pointer to the device instance used
475 * for DMA registration.
476 * @dst - destination DMA address.
477 * @src - source DMA address.
478 * @len - size of the transfer.
480 * Return DMA_SUCCESS on success
482 static int vop_sync_dma(struct vop_vdev
*vdev
, dma_addr_t dst
, dma_addr_t src
,
486 struct dma_device
*ddev
;
487 struct dma_async_tx_descriptor
*tx
;
488 struct vop_info
*vi
= dev_get_drvdata(&vdev
->vpdev
->dev
);
489 struct dma_chan
*vop_ch
= vi
->dma_ch
;
495 ddev
= vop_ch
->device
;
496 tx
= ddev
->device_prep_dma_memcpy(vop_ch
, dst
, src
, len
,
504 cookie
= tx
->tx_submit(tx
);
505 if (dma_submit_error(cookie
)) {
509 dma_async_issue_pending(vop_ch
);
510 err
= dma_sync_wait(vop_ch
, cookie
);
514 dev_err(&vi
->vpdev
->dev
, "%s %d err %d\n",
515 __func__
, __LINE__
, err
);
519 #define VOP_USE_DMA true
522 * Initiates the copies across the PCIe bus from card memory to a user
523 * space buffer. When transfers are done using DMA, source/destination
524 * addresses and transfer length must follow the alignment requirements of
525 * the MIC DMA engine.
527 static int vop_virtio_copy_to_user(struct vop_vdev
*vdev
, void __user
*ubuf
,
528 size_t len
, u64 daddr
, size_t dlen
,
531 struct vop_device
*vpdev
= vdev
->vpdev
;
532 void __iomem
*dbuf
= vpdev
->hw_ops
->ioremap(vpdev
, daddr
, len
);
533 struct vop_vringh
*vvr
= &vdev
->vvr
[vr_idx
];
534 struct vop_info
*vi
= dev_get_drvdata(&vpdev
->dev
);
535 size_t dma_alignment
= 1 << vi
->dma_ch
->device
->copy_align
;
536 bool x200
= is_dma_copy_aligned(vi
->dma_ch
->device
, 1, 1, 1);
537 size_t dma_offset
, partlen
;
541 if (copy_to_user(ubuf
, (void __force
*)dbuf
, len
)) {
543 dev_err(vop_dev(vdev
), "%s %d err %d\n",
544 __func__
, __LINE__
, err
);
547 vdev
->in_bytes
+= len
;
552 dma_offset
= daddr
- round_down(daddr
, dma_alignment
);
556 * X100 uses DMA addresses as seen by the card so adding
557 * the aperture base is not required for DMA. However x200
558 * requires DMA addresses to be an offset into the bar so
559 * add the aperture base for x200.
562 daddr
+= vpdev
->aper
->pa
;
564 partlen
= min_t(size_t, len
, VOP_INT_DMA_BUF_SIZE
);
565 err
= vop_sync_dma(vdev
, vvr
->buf_da
, daddr
,
566 ALIGN(partlen
, dma_alignment
));
568 dev_err(vop_dev(vdev
), "%s %d err %d\n",
569 __func__
, __LINE__
, err
);
572 if (copy_to_user(ubuf
, vvr
->buf
+ dma_offset
,
573 partlen
- dma_offset
)) {
575 dev_err(vop_dev(vdev
), "%s %d err %d\n",
576 __func__
, __LINE__
, err
);
582 vdev
->in_bytes_dma
+= partlen
;
583 vdev
->in_bytes
+= partlen
;
589 vpdev
->hw_ops
->iounmap(vpdev
, dbuf
);
590 dev_dbg(vop_dev(vdev
),
591 "%s: ubuf %p dbuf %p len 0x%lx vr_idx 0x%x\n",
592 __func__
, ubuf
, dbuf
, len
, vr_idx
);
597 * Initiates copies across the PCIe bus from a user space buffer to card
598 * memory. When transfers are done using DMA, source/destination addresses
599 * and transfer length must follow the alignment requirements of the MIC
602 static int vop_virtio_copy_from_user(struct vop_vdev
*vdev
, void __user
*ubuf
,
603 size_t len
, u64 daddr
, size_t dlen
,
606 struct vop_device
*vpdev
= vdev
->vpdev
;
607 void __iomem
*dbuf
= vpdev
->hw_ops
->ioremap(vpdev
, daddr
, len
);
608 struct vop_vringh
*vvr
= &vdev
->vvr
[vr_idx
];
609 struct vop_info
*vi
= dev_get_drvdata(&vdev
->vpdev
->dev
);
610 size_t dma_alignment
= 1 << vi
->dma_ch
->device
->copy_align
;
611 bool x200
= is_dma_copy_aligned(vi
->dma_ch
->device
, 1, 1, 1);
613 bool dma
= VOP_USE_DMA
;
616 if (daddr
& (dma_alignment
- 1)) {
617 vdev
->tx_dst_unaligned
+= len
;
619 } else if (ALIGN(len
, dma_alignment
) > dlen
) {
620 vdev
->tx_len_unaligned
+= len
;
628 * X100 uses DMA addresses as seen by the card so adding
629 * the aperture base is not required for DMA. However x200
630 * requires DMA addresses to be an offset into the bar so
631 * add the aperture base for x200.
634 daddr
+= vpdev
->aper
->pa
;
636 partlen
= min_t(size_t, len
, VOP_INT_DMA_BUF_SIZE
);
638 if (copy_from_user(vvr
->buf
, ubuf
, partlen
)) {
640 dev_err(vop_dev(vdev
), "%s %d err %d\n",
641 __func__
, __LINE__
, err
);
644 err
= vop_sync_dma(vdev
, daddr
, vvr
->buf_da
,
645 ALIGN(partlen
, dma_alignment
));
647 dev_err(vop_dev(vdev
), "%s %d err %d\n",
648 __func__
, __LINE__
, err
);
654 vdev
->out_bytes_dma
+= partlen
;
655 vdev
->out_bytes
+= partlen
;
660 * We are copying to IO below and should ideally use something
661 * like copy_from_user_toio(..) if it existed.
663 if (copy_from_user((void __force
*)dbuf
, ubuf
, len
)) {
665 dev_err(vop_dev(vdev
), "%s %d err %d\n",
666 __func__
, __LINE__
, err
);
669 vdev
->out_bytes
+= len
;
672 vpdev
->hw_ops
->iounmap(vpdev
, dbuf
);
673 dev_dbg(vop_dev(vdev
),
674 "%s: ubuf %p dbuf %p len 0x%lx vr_idx 0x%x\n",
675 __func__
, ubuf
, dbuf
, len
, vr_idx
);
679 #define MIC_VRINGH_READ true
681 /* Determine the total number of bytes consumed in a VRINGH KIOV */
682 static inline u32
vop_vringh_iov_consumed(struct vringh_kiov
*iov
)
685 u32 total
= iov
->consumed
;
687 for (i
= 0; i
< iov
->i
; i
++)
688 total
+= iov
->iov
[i
].iov_len
;
693 * Traverse the VRINGH KIOV and issue the APIs to trigger the copies.
694 * This API is heavily based on the vringh_iov_xfer(..) implementation
695 * in vringh.c. The reason we cannot reuse vringh_iov_pull_kern(..)
696 * and vringh_iov_push_kern(..) directly is because there is no
697 * way to override the VRINGH xfer(..) routines as of v3.10.
699 static int vop_vringh_copy(struct vop_vdev
*vdev
, struct vringh_kiov
*iov
,
700 void __user
*ubuf
, size_t len
, bool read
, int vr_idx
,
704 size_t partlen
, tot_len
= 0;
706 while (len
&& iov
->i
< iov
->used
) {
707 struct kvec
*kiov
= &iov
->iov
[iov
->i
];
709 partlen
= min(kiov
->iov_len
, len
);
711 ret
= vop_virtio_copy_to_user(vdev
, ubuf
, partlen
,
716 ret
= vop_virtio_copy_from_user(vdev
, ubuf
, partlen
,
721 dev_err(vop_dev(vdev
), "%s %d err %d\n",
722 __func__
, __LINE__
, ret
);
728 iov
->consumed
+= partlen
;
729 kiov
->iov_len
-= partlen
;
730 kiov
->iov_base
+= partlen
;
731 if (!kiov
->iov_len
) {
732 /* Fix up old iov element then increment. */
733 kiov
->iov_len
= iov
->consumed
;
734 kiov
->iov_base
-= iov
->consumed
;
745 * Use the standard VRINGH infrastructure in the kernel to fetch new
746 * descriptors, initiate the copies and update the used ring.
748 static int _vop_virtio_copy(struct vop_vdev
*vdev
, struct mic_copy_desc
*copy
)
751 u32 iovcnt
= copy
->iovcnt
;
753 struct iovec __user
*u_iov
= copy
->iov
;
754 void __user
*ubuf
= NULL
;
755 struct vop_vringh
*vvr
= &vdev
->vvr
[copy
->vr_idx
];
756 struct vringh_kiov
*riov
= &vvr
->riov
;
757 struct vringh_kiov
*wiov
= &vvr
->wiov
;
758 struct vringh
*vrh
= &vvr
->vrh
;
759 u16
*head
= &vvr
->head
;
760 struct mic_vring
*vr
= &vvr
->vring
;
761 size_t len
= 0, out_len
;
764 /* Fetch a new IOVEC if all previous elements have been processed */
765 if (riov
->i
== riov
->used
&& wiov
->i
== wiov
->used
) {
766 ret
= vringh_getdesc_kern(vrh
, riov
, wiov
,
768 /* Check if there are available descriptors */
774 /* Copy over a new iovec from user space. */
775 ret
= copy_from_user(&iov
, u_iov
, sizeof(*u_iov
));
778 dev_err(vop_dev(vdev
), "%s %d err %d\n",
779 __func__
, __LINE__
, ret
);
785 /* Issue all the read descriptors first */
786 ret
= vop_vringh_copy(vdev
, riov
, ubuf
, len
,
787 MIC_VRINGH_READ
, copy
->vr_idx
, &out_len
);
789 dev_err(vop_dev(vdev
), "%s %d err %d\n",
790 __func__
, __LINE__
, ret
);
795 copy
->out_len
+= out_len
;
796 /* Issue the write descriptors next */
797 ret
= vop_vringh_copy(vdev
, wiov
, ubuf
, len
,
798 !MIC_VRINGH_READ
, copy
->vr_idx
, &out_len
);
800 dev_err(vop_dev(vdev
), "%s %d err %d\n",
801 __func__
, __LINE__
, ret
);
806 copy
->out_len
+= out_len
;
808 /* One user space iovec is now completed */
812 /* Exit loop if all elements in KIOVs have been processed. */
813 if (riov
->i
== riov
->used
&& wiov
->i
== wiov
->used
)
817 * Update the used ring if a descriptor was available and some data was
818 * copied in/out and the user asked for a used ring update.
820 if (*head
!= USHRT_MAX
&& copy
->out_len
&& copy
->update_used
) {
823 /* Determine the total data consumed */
824 total
+= vop_vringh_iov_consumed(riov
);
825 total
+= vop_vringh_iov_consumed(wiov
);
826 vringh_complete_kern(vrh
, *head
, total
);
828 if (vringh_need_notify_kern(vrh
) > 0)
830 vringh_kiov_cleanup(riov
);
831 vringh_kiov_cleanup(wiov
);
832 /* Update avail idx for user space */
833 vr
->info
->avail_idx
= vrh
->last_avail_idx
;
838 static inline int vop_verify_copy_args(struct vop_vdev
*vdev
,
839 struct mic_copy_desc
*copy
)
841 if (!vdev
|| copy
->vr_idx
>= vdev
->dd
->num_vq
)
846 /* Copy a specified number of virtio descriptors in a chain */
847 static int vop_virtio_copy_desc(struct vop_vdev
*vdev
,
848 struct mic_copy_desc
*copy
)
851 struct vop_vringh
*vvr
;
853 err
= vop_verify_copy_args(vdev
, copy
);
857 vvr
= &vdev
->vvr
[copy
->vr_idx
];
858 mutex_lock(&vvr
->vr_mutex
);
859 if (!vop_vdevup(vdev
)) {
861 dev_err(vop_dev(vdev
), "%s %d err %d\n",
862 __func__
, __LINE__
, err
);
865 err
= _vop_virtio_copy(vdev
, copy
);
867 dev_err(vop_dev(vdev
), "%s %d err %d\n",
868 __func__
, __LINE__
, err
);
871 mutex_unlock(&vvr
->vr_mutex
);
875 static int vop_open(struct inode
*inode
, struct file
*f
)
877 struct vop_vdev
*vdev
;
878 struct vop_info
*vi
= container_of(f
->private_data
,
879 struct vop_info
, miscdev
);
881 vdev
= kzalloc(sizeof(*vdev
), GFP_KERNEL
);
885 mutex_init(&vdev
->vdev_mutex
);
886 f
->private_data
= vdev
;
887 init_completion(&vdev
->destroy
);
888 complete(&vdev
->destroy
);
892 static int vop_release(struct inode
*inode
, struct file
*f
)
894 struct vop_vdev
*vdev
= f
->private_data
, *vdev_tmp
;
895 struct vop_info
*vi
= vdev
->vi
;
896 struct list_head
*pos
, *tmp
;
899 mutex_lock(&vdev
->vdev_mutex
);
902 mutex_lock(&vi
->vop_mutex
);
903 list_for_each_safe(pos
, tmp
, &vi
->vdev_list
) {
904 vdev_tmp
= list_entry(pos
, struct vop_vdev
, list
);
905 if (vdev
== vdev_tmp
) {
906 vop_virtio_del_device(vdev
);
912 mutex_unlock(&vi
->vop_mutex
);
914 mutex_unlock(&vdev
->vdev_mutex
);
916 wait_for_completion(&vdev
->destroy
);
917 f
->private_data
= NULL
;
922 static long vop_ioctl(struct file
*f
, unsigned int cmd
, unsigned long arg
)
924 struct vop_vdev
*vdev
= f
->private_data
;
925 struct vop_info
*vi
= vdev
->vi
;
926 void __user
*argp
= (void __user
*)arg
;
930 case MIC_VIRTIO_ADD_DEVICE
:
932 struct mic_device_desc dd
, *dd_config
;
934 if (copy_from_user(&dd
, argp
, sizeof(dd
)))
937 if (mic_aligned_desc_size(&dd
) > MIC_MAX_DESC_BLK_SIZE
||
938 dd
.num_vq
> MIC_MAX_VRINGS
)
941 dd_config
= kzalloc(mic_desc_size(&dd
), GFP_KERNEL
);
944 if (copy_from_user(dd_config
, argp
, mic_desc_size(&dd
))) {
948 /* Ensure desc has not changed between the two reads */
949 if (memcmp(&dd
, dd_config
, sizeof(dd
))) {
953 mutex_lock(&vdev
->vdev_mutex
);
954 mutex_lock(&vi
->vop_mutex
);
955 ret
= vop_virtio_add_device(vdev
, dd_config
);
958 list_add_tail(&vdev
->list
, &vi
->vdev_list
);
960 mutex_unlock(&vi
->vop_mutex
);
961 mutex_unlock(&vdev
->vdev_mutex
);
966 case MIC_VIRTIO_COPY_DESC
:
968 struct mic_copy_desc copy
;
970 mutex_lock(&vdev
->vdev_mutex
);
971 ret
= vop_vdev_inited(vdev
);
975 if (copy_from_user(©
, argp
, sizeof(copy
))) {
980 ret
= vop_virtio_copy_desc(vdev
, ©
);
984 &((struct mic_copy_desc __user
*)argp
)->out_len
,
985 ©
.out_len
, sizeof(copy
.out_len
)))
988 mutex_unlock(&vdev
->vdev_mutex
);
991 case MIC_VIRTIO_CONFIG_CHANGE
:
995 mutex_lock(&vdev
->vdev_mutex
);
996 ret
= vop_vdev_inited(vdev
);
999 buf
= kzalloc(vdev
->dd
->config_len
, GFP_KERNEL
);
1004 if (copy_from_user(buf
, argp
, vdev
->dd
->config_len
)) {
1008 ret
= vop_virtio_config_change(vdev
, buf
);
1012 mutex_unlock(&vdev
->vdev_mutex
);
1016 return -ENOIOCTLCMD
;
1022 * We return POLLIN | POLLOUT from poll when new buffers are enqueued, and
1023 * not when previously enqueued buffers may be available. This means that
1024 * in the card->host (TX) path, when userspace is unblocked by poll it
1025 * must drain all available descriptors or it can stall.
1027 static unsigned int vop_poll(struct file
*f
, poll_table
*wait
)
1029 struct vop_vdev
*vdev
= f
->private_data
;
1032 mutex_lock(&vdev
->vdev_mutex
);
1033 if (vop_vdev_inited(vdev
)) {
1037 poll_wait(f
, &vdev
->waitq
, wait
);
1038 if (vop_vdev_inited(vdev
)) {
1040 } else if (vdev
->poll_wake
) {
1041 vdev
->poll_wake
= 0;
1042 mask
= POLLIN
| POLLOUT
;
1045 mutex_unlock(&vdev
->vdev_mutex
);
1050 vop_query_offset(struct vop_vdev
*vdev
, unsigned long offset
,
1051 unsigned long *size
, unsigned long *pa
)
1053 struct vop_device
*vpdev
= vdev
->vpdev
;
1054 unsigned long start
= MIC_DP_SIZE
;
1058 * MMAP interface is as follows:
1060 * 0x0 virtio device_page
1061 * 0x1000 first vring
1062 * 0x1000 + size of 1st vring second vring
1066 *pa
= virt_to_phys(vpdev
->hw_ops
->get_dp(vpdev
));
1067 *size
= MIC_DP_SIZE
;
1071 for (i
= 0; i
< vdev
->dd
->num_vq
; i
++) {
1072 struct vop_vringh
*vvr
= &vdev
->vvr
[i
];
1074 if (offset
== start
) {
1075 *pa
= virt_to_phys(vvr
->vring
.va
);
1076 *size
= vvr
->vring
.len
;
1079 start
+= vvr
->vring
.len
;
1085 * Maps the device page and virtio rings to user space for readonly access.
1087 static int vop_mmap(struct file
*f
, struct vm_area_struct
*vma
)
1089 struct vop_vdev
*vdev
= f
->private_data
;
1090 unsigned long offset
= vma
->vm_pgoff
<< PAGE_SHIFT
;
1091 unsigned long pa
, size
= vma
->vm_end
- vma
->vm_start
, size_rem
= size
;
1094 err
= vop_vdev_inited(vdev
);
1097 if (vma
->vm_flags
& VM_WRITE
) {
1102 i
= vop_query_offset(vdev
, offset
, &size
, &pa
);
1107 err
= remap_pfn_range(vma
, vma
->vm_start
+ offset
,
1108 pa
>> PAGE_SHIFT
, size
,
1119 static const struct file_operations vop_fops
= {
1121 .release
= vop_release
,
1122 .unlocked_ioctl
= vop_ioctl
,
1125 .owner
= THIS_MODULE
,
1128 int vop_host_init(struct vop_info
*vi
)
1131 struct miscdevice
*mdev
;
1132 struct vop_device
*vpdev
= vi
->vpdev
;
1134 INIT_LIST_HEAD(&vi
->vdev_list
);
1135 vi
->dma_ch
= vpdev
->dma_ch
;
1136 mdev
= &vi
->miscdev
;
1137 mdev
->minor
= MISC_DYNAMIC_MINOR
;
1138 snprintf(vi
->name
, sizeof(vi
->name
), "vop_virtio%d", vpdev
->index
);
1139 mdev
->name
= vi
->name
;
1140 mdev
->fops
= &vop_fops
;
1141 mdev
->parent
= &vpdev
->dev
;
1143 rc
= misc_register(mdev
);
1145 dev_err(&vpdev
->dev
, "%s failed rc %d\n", __func__
, rc
);
1149 void vop_host_uninit(struct vop_info
*vi
)
1151 struct list_head
*pos
, *tmp
;
1152 struct vop_vdev
*vdev
;
1154 mutex_lock(&vi
->vop_mutex
);
1155 vop_virtio_reset_devices(vi
);
1156 list_for_each_safe(pos
, tmp
, &vi
->vdev_list
) {
1157 vdev
= list_entry(pos
, struct vop_vdev
, list
);
1159 reinit_completion(&vdev
->destroy
);
1160 mutex_unlock(&vi
->vop_mutex
);
1161 mutex_lock(&vdev
->vdev_mutex
);
1162 vop_virtio_del_device(vdev
);
1163 vdev
->deleted
= true;
1164 mutex_unlock(&vdev
->vdev_mutex
);
1165 complete(&vdev
->destroy
);
1166 mutex_lock(&vi
->vop_mutex
);
1168 mutex_unlock(&vi
->vop_mutex
);
1169 misc_deregister(&vi
->miscdev
);