2 * Intel MIC Platform Software Stack (MPSS)
4 * Copyright(c) 2013 Intel Corporation.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2, as
8 * published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
15 * The full GNU General Public License is included in this distribution in
16 * the file called "COPYING".
18 * Intel MIC Host driver.
21 #include <linux/pci.h>
22 #include <linux/sched.h>
23 #include <linux/uaccess.h>
25 #include <linux/mic_common.h>
26 #include "../common/mic_dev.h"
27 #include "mic_device.h"
29 #include "mic_virtio.h"
32 * Initiates the copies across the PCIe bus from card memory to
33 * a user space buffer.
35 static int mic_virtio_copy_to_user(struct mic_vdev
*mvdev
,
36 void __user
*ubuf
, size_t len
, u64 addr
)
39 void __iomem
*dbuf
= mvdev
->mdev
->aper
.va
+ addr
;
41 * We are copying from IO below an should ideally use something
42 * like copy_to_user_fromio(..) if it existed.
44 if (copy_to_user(ubuf
, (void __force
*)dbuf
, len
)) {
46 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
47 __func__
, __LINE__
, err
);
50 mvdev
->in_bytes
+= len
;
57 * Initiates copies across the PCIe bus from a user space
58 * buffer to card memory.
60 static int mic_virtio_copy_from_user(struct mic_vdev
*mvdev
,
61 void __user
*ubuf
, size_t len
, u64 addr
)
64 void __iomem
*dbuf
= mvdev
->mdev
->aper
.va
+ addr
;
66 * We are copying to IO below and should ideally use something
67 * like copy_from_user_toio(..) if it existed.
69 if (copy_from_user((void __force
*)dbuf
, ubuf
, len
)) {
71 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
72 __func__
, __LINE__
, err
);
75 mvdev
->out_bytes
+= len
;
81 #define MIC_VRINGH_READ true
83 /* The function to call to notify the card about added buffers */
84 static void mic_notify(struct vringh
*vrh
)
86 struct mic_vringh
*mvrh
= container_of(vrh
, struct mic_vringh
, vrh
);
87 struct mic_vdev
*mvdev
= mvrh
->mvdev
;
88 s8 db
= mvdev
->dc
->h2c_vdev_db
;
91 mvdev
->mdev
->ops
->send_intr(mvdev
->mdev
, db
);
94 /* Determine the total number of bytes consumed in a VRINGH KIOV */
95 static inline u32
mic_vringh_iov_consumed(struct vringh_kiov
*iov
)
98 u32 total
= iov
->consumed
;
100 for (i
= 0; i
< iov
->i
; i
++)
101 total
+= iov
->iov
[i
].iov_len
;
106 * Traverse the VRINGH KIOV and issue the APIs to trigger the copies.
107 * This API is heavily based on the vringh_iov_xfer(..) implementation
108 * in vringh.c. The reason we cannot reuse vringh_iov_pull_kern(..)
109 * and vringh_iov_push_kern(..) directly is because there is no
110 * way to override the VRINGH xfer(..) routines as of v3.10.
112 static int mic_vringh_copy(struct mic_vdev
*mvdev
, struct vringh_kiov
*iov
,
113 void __user
*ubuf
, size_t len
, bool read
, size_t *out_len
)
116 size_t partlen
, tot_len
= 0;
118 while (len
&& iov
->i
< iov
->used
) {
119 partlen
= min(iov
->iov
[iov
->i
].iov_len
, len
);
121 ret
= mic_virtio_copy_to_user(mvdev
,
123 (u64
)iov
->iov
[iov
->i
].iov_base
);
125 ret
= mic_virtio_copy_from_user(mvdev
,
127 (u64
)iov
->iov
[iov
->i
].iov_base
);
129 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
130 __func__
, __LINE__
, ret
);
136 iov
->consumed
+= partlen
;
137 iov
->iov
[iov
->i
].iov_len
-= partlen
;
138 iov
->iov
[iov
->i
].iov_base
+= partlen
;
139 if (!iov
->iov
[iov
->i
].iov_len
) {
140 /* Fix up old iov element then increment. */
141 iov
->iov
[iov
->i
].iov_len
= iov
->consumed
;
142 iov
->iov
[iov
->i
].iov_base
-= iov
->consumed
;
153 * Use the standard VRINGH infrastructure in the kernel to fetch new
154 * descriptors, initiate the copies and update the used ring.
156 static int _mic_virtio_copy(struct mic_vdev
*mvdev
,
157 struct mic_copy_desc
*copy
)
160 u32 iovcnt
= copy
->iovcnt
;
162 struct iovec __user
*u_iov
= copy
->iov
;
163 void __user
*ubuf
= NULL
;
164 struct mic_vringh
*mvr
= &mvdev
->mvr
[copy
->vr_idx
];
165 struct vringh_kiov
*riov
= &mvr
->riov
;
166 struct vringh_kiov
*wiov
= &mvr
->wiov
;
167 struct vringh
*vrh
= &mvr
->vrh
;
168 u16
*head
= &mvr
->head
;
169 struct mic_vring
*vr
= &mvr
->vring
;
170 size_t len
= 0, out_len
;
173 /* Fetch a new IOVEC if all previous elements have been processed */
174 if (riov
->i
== riov
->used
&& wiov
->i
== wiov
->used
) {
175 ret
= vringh_getdesc_kern(vrh
, riov
, wiov
,
177 /* Check if there are available descriptors */
183 /* Copy over a new iovec from user space. */
184 ret
= copy_from_user(&iov
, u_iov
, sizeof(*u_iov
));
187 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
188 __func__
, __LINE__
, ret
);
194 /* Issue all the read descriptors first */
195 ret
= mic_vringh_copy(mvdev
, riov
, ubuf
, len
,
196 MIC_VRINGH_READ
, &out_len
);
198 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
199 __func__
, __LINE__
, ret
);
204 copy
->out_len
+= out_len
;
205 /* Issue the write descriptors next */
206 ret
= mic_vringh_copy(mvdev
, wiov
, ubuf
, len
,
207 !MIC_VRINGH_READ
, &out_len
);
209 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
210 __func__
, __LINE__
, ret
);
215 copy
->out_len
+= out_len
;
217 /* One user space iovec is now completed */
221 /* Exit loop if all elements in KIOVs have been processed. */
222 if (riov
->i
== riov
->used
&& wiov
->i
== wiov
->used
)
226 * Update the used ring if a descriptor was available and some data was
227 * copied in/out and the user asked for a used ring update.
229 if (*head
!= USHRT_MAX
&& copy
->out_len
&& copy
->update_used
) {
232 /* Determine the total data consumed */
233 total
+= mic_vringh_iov_consumed(riov
);
234 total
+= mic_vringh_iov_consumed(wiov
);
235 vringh_complete_kern(vrh
, *head
, total
);
237 if (vringh_need_notify_kern(vrh
) > 0)
239 vringh_kiov_cleanup(riov
);
240 vringh_kiov_cleanup(wiov
);
241 /* Update avail idx for user space */
242 vr
->info
->avail_idx
= vrh
->last_avail_idx
;
247 static inline int mic_verify_copy_args(struct mic_vdev
*mvdev
,
248 struct mic_copy_desc
*copy
)
250 if (copy
->vr_idx
>= mvdev
->dd
->num_vq
) {
251 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
252 __func__
, __LINE__
, -EINVAL
);
258 /* Copy a specified number of virtio descriptors in a chain */
259 int mic_virtio_copy_desc(struct mic_vdev
*mvdev
,
260 struct mic_copy_desc
*copy
)
263 struct mic_vringh
*mvr
= &mvdev
->mvr
[copy
->vr_idx
];
265 err
= mic_verify_copy_args(mvdev
, copy
);
269 mutex_lock(&mvr
->vr_mutex
);
270 if (!mic_vdevup(mvdev
)) {
272 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
273 __func__
, __LINE__
, err
);
276 err
= _mic_virtio_copy(mvdev
, copy
);
278 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
279 __func__
, __LINE__
, err
);
282 mutex_unlock(&mvr
->vr_mutex
);
286 static void mic_virtio_init_post(struct mic_vdev
*mvdev
)
288 struct mic_vqconfig
*vqconfig
= mic_vq_config(mvdev
->dd
);
291 for (i
= 0; i
< mvdev
->dd
->num_vq
; i
++) {
292 if (!le64_to_cpu(vqconfig
[i
].used_address
)) {
293 dev_warn(mic_dev(mvdev
), "used_address zero??\n");
296 mvdev
->mvr
[i
].vrh
.vring
.used
=
297 (void __force
*)mvdev
->mdev
->aper
.va
+
298 le64_to_cpu(vqconfig
[i
].used_address
);
301 mvdev
->dc
->used_address_updated
= 0;
303 dev_dbg(mic_dev(mvdev
), "%s: device type %d LINKUP\n",
304 __func__
, mvdev
->virtio_id
);
307 static inline void mic_virtio_device_reset(struct mic_vdev
*mvdev
)
311 dev_dbg(mic_dev(mvdev
), "%s: status %d device type %d RESET\n",
312 __func__
, mvdev
->dd
->status
, mvdev
->virtio_id
);
314 for (i
= 0; i
< mvdev
->dd
->num_vq
; i
++)
316 * Avoid lockdep false positive. The + 1 is for the mic
317 * mutex which is held in the reset devices code path.
319 mutex_lock_nested(&mvdev
->mvr
[i
].vr_mutex
, i
+ 1);
321 /* 0 status means "reset" */
322 mvdev
->dd
->status
= 0;
323 mvdev
->dc
->vdev_reset
= 0;
324 mvdev
->dc
->host_ack
= 1;
326 for (i
= 0; i
< mvdev
->dd
->num_vq
; i
++) {
327 struct vringh
*vrh
= &mvdev
->mvr
[i
].vrh
;
328 mvdev
->mvr
[i
].vring
.info
->avail_idx
= 0;
330 vrh
->last_avail_idx
= 0;
331 vrh
->last_used_idx
= 0;
334 for (i
= 0; i
< mvdev
->dd
->num_vq
; i
++)
335 mutex_unlock(&mvdev
->mvr
[i
].vr_mutex
);
338 void mic_virtio_reset_devices(struct mic_device
*mdev
)
340 struct list_head
*pos
, *tmp
;
341 struct mic_vdev
*mvdev
;
343 dev_dbg(mdev
->sdev
->parent
, "%s\n", __func__
);
345 list_for_each_safe(pos
, tmp
, &mdev
->vdev_list
) {
346 mvdev
= list_entry(pos
, struct mic_vdev
, list
);
347 mic_virtio_device_reset(mvdev
);
348 mvdev
->poll_wake
= 1;
349 wake_up(&mvdev
->waitq
);
353 void mic_bh_handler(struct work_struct
*work
)
355 struct mic_vdev
*mvdev
= container_of(work
, struct mic_vdev
,
358 if (mvdev
->dc
->used_address_updated
)
359 mic_virtio_init_post(mvdev
);
361 if (mvdev
->dc
->vdev_reset
)
362 mic_virtio_device_reset(mvdev
);
364 mvdev
->poll_wake
= 1;
365 wake_up(&mvdev
->waitq
);
368 static irqreturn_t
mic_virtio_intr_handler(int irq
, void *data
)
370 struct mic_vdev
*mvdev
= data
;
371 struct mic_device
*mdev
= mvdev
->mdev
;
373 mdev
->ops
->intr_workarounds(mdev
);
374 schedule_work(&mvdev
->virtio_bh_work
);
378 int mic_virtio_config_change(struct mic_vdev
*mvdev
,
381 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake
);
382 int ret
= 0, retry
, i
;
383 struct mic_bootparam
*bootparam
= mvdev
->mdev
->dp
;
384 s8 db
= bootparam
->h2c_config_db
;
386 mutex_lock(&mvdev
->mdev
->mic_mutex
);
387 for (i
= 0; i
< mvdev
->dd
->num_vq
; i
++)
388 mutex_lock_nested(&mvdev
->mvr
[i
].vr_mutex
, i
+ 1);
390 if (db
== -1 || mvdev
->dd
->type
== -1) {
395 if (copy_from_user(mic_vq_configspace(mvdev
->dd
),
396 argp
, mvdev
->dd
->config_len
)) {
397 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
398 __func__
, __LINE__
, -EFAULT
);
402 mvdev
->dc
->config_change
= MIC_VIRTIO_PARAM_CONFIG_CHANGED
;
403 mvdev
->mdev
->ops
->send_intr(mvdev
->mdev
, db
);
405 for (retry
= 100; retry
--;) {
406 ret
= wait_event_timeout(wake
,
407 mvdev
->dc
->guest_ack
, msecs_to_jiffies(100));
412 dev_dbg(mic_dev(mvdev
),
413 "%s %d retry: %d\n", __func__
, __LINE__
, retry
);
414 mvdev
->dc
->config_change
= 0;
415 mvdev
->dc
->guest_ack
= 0;
417 for (i
= 0; i
< mvdev
->dd
->num_vq
; i
++)
418 mutex_unlock(&mvdev
->mvr
[i
].vr_mutex
);
419 mutex_unlock(&mvdev
->mdev
->mic_mutex
);
423 static int mic_copy_dp_entry(struct mic_vdev
*mvdev
,
426 struct mic_device_desc
**devpage
)
428 struct mic_device
*mdev
= mvdev
->mdev
;
429 struct mic_device_desc dd
, *dd_config
, *devp
;
430 struct mic_vqconfig
*vqconfig
;
432 bool slot_found
= false;
434 if (copy_from_user(&dd
, argp
, sizeof(dd
))) {
435 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
436 __func__
, __LINE__
, -EFAULT
);
440 if (mic_aligned_desc_size(&dd
) > MIC_MAX_DESC_BLK_SIZE
||
441 dd
.num_vq
> MIC_MAX_VRINGS
) {
442 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
443 __func__
, __LINE__
, -EINVAL
);
447 dd_config
= kmalloc(mic_desc_size(&dd
), GFP_KERNEL
);
448 if (dd_config
== NULL
) {
449 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
450 __func__
, __LINE__
, -ENOMEM
);
453 if (copy_from_user(dd_config
, argp
, mic_desc_size(&dd
))) {
455 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
456 __func__
, __LINE__
, ret
);
460 vqconfig
= mic_vq_config(dd_config
);
461 for (i
= 0; i
< dd
.num_vq
; i
++) {
462 if (le16_to_cpu(vqconfig
[i
].num
) > MIC_MAX_VRING_ENTRIES
) {
464 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
465 __func__
, __LINE__
, ret
);
470 /* Find the first free device page entry */
471 for (i
= sizeof(struct mic_bootparam
);
472 i
< MIC_DP_SIZE
- mic_total_desc_size(dd_config
);
473 i
+= mic_total_desc_size(devp
)) {
475 if (devp
->type
== 0 || devp
->type
== -1) {
482 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
483 __func__
, __LINE__
, ret
);
487 * Save off the type before doing the memcpy. Type will be set in the
488 * end after completing all initialization for the new device.
490 *type
= dd_config
->type
;
492 memcpy(devp
, dd_config
, mic_desc_size(dd_config
));
500 static void mic_init_device_ctrl(struct mic_vdev
*mvdev
,
501 struct mic_device_desc
*devpage
)
503 struct mic_device_ctrl
*dc
;
505 dc
= (void *)devpage
+ mic_aligned_desc_size(devpage
);
507 dc
->config_change
= 0;
511 dc
->used_address_updated
= 0;
512 dc
->c2h_vdev_db
= -1;
513 dc
->h2c_vdev_db
= -1;
517 int mic_virtio_add_device(struct mic_vdev
*mvdev
,
520 struct mic_device
*mdev
= mvdev
->mdev
;
521 struct mic_device_desc
*dd
= NULL
;
522 struct mic_vqconfig
*vqconfig
;
523 int vr_size
, i
, j
, ret
;
527 struct mic_bootparam
*bootparam
= mdev
->dp
;
531 mutex_lock(&mdev
->mic_mutex
);
533 ret
= mic_copy_dp_entry(mvdev
, argp
, &type
, &dd
);
535 mutex_unlock(&mdev
->mic_mutex
);
539 mic_init_device_ctrl(mvdev
, dd
);
542 mvdev
->virtio_id
= type
;
543 vqconfig
= mic_vq_config(dd
);
544 INIT_WORK(&mvdev
->virtio_bh_work
, mic_bh_handler
);
546 for (i
= 0; i
< dd
->num_vq
; i
++) {
547 struct mic_vringh
*mvr
= &mvdev
->mvr
[i
];
548 struct mic_vring
*vr
= &mvdev
->mvr
[i
].vring
;
549 num
= le16_to_cpu(vqconfig
[i
].num
);
550 mutex_init(&mvr
->vr_mutex
);
551 vr_size
= PAGE_ALIGN(vring_size(num
, MIC_VIRTIO_RING_ALIGN
) +
552 sizeof(struct _mic_vring_info
));
554 __get_free_pages(GFP_KERNEL
| __GFP_ZERO
,
558 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
559 __func__
, __LINE__
, ret
);
563 vr
->info
= vr
->va
+ vring_size(num
, MIC_VIRTIO_RING_ALIGN
);
564 vr
->info
->magic
= cpu_to_le32(MIC_MAGIC
+ mvdev
->virtio_id
+ i
);
565 vr_addr
= mic_map_single(mdev
, vr
->va
, vr_size
);
566 if (mic_map_error(vr_addr
)) {
567 free_pages((unsigned long)vr
->va
, get_order(vr_size
));
569 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
570 __func__
, __LINE__
, ret
);
573 vqconfig
[i
].address
= cpu_to_le64(vr_addr
);
575 vring_init(&vr
->vr
, num
, vr
->va
, MIC_VIRTIO_RING_ALIGN
);
576 ret
= vringh_init_kern(&mvr
->vrh
,
577 *(u32
*)mic_vq_features(mvdev
->dd
), num
, false,
578 vr
->vr
.desc
, vr
->vr
.avail
, vr
->vr
.used
);
580 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
581 __func__
, __LINE__
, ret
);
584 vringh_kiov_init(&mvr
->riov
, NULL
, 0);
585 vringh_kiov_init(&mvr
->wiov
, NULL
, 0);
586 mvr
->head
= USHRT_MAX
;
588 mvr
->vrh
.notify
= mic_notify
;
589 dev_dbg(mdev
->sdev
->parent
,
590 "%s %d index %d va %p info %p vr_size 0x%x\n",
591 __func__
, __LINE__
, i
, vr
->va
, vr
->info
, vr_size
);
594 snprintf(irqname
, sizeof(irqname
), "mic%dvirtio%d", mdev
->id
,
596 mvdev
->virtio_db
= mic_next_db(mdev
);
597 mvdev
->virtio_cookie
= mic_request_irq(mdev
, mic_virtio_intr_handler
,
598 irqname
, mvdev
, mvdev
->virtio_db
, MIC_INTR_DB
);
599 if (IS_ERR(mvdev
->virtio_cookie
)) {
600 ret
= PTR_ERR(mvdev
->virtio_cookie
);
601 dev_dbg(mdev
->sdev
->parent
, "request irq failed\n");
605 mvdev
->dc
->c2h_vdev_db
= mvdev
->virtio_db
;
607 list_add_tail(&mvdev
->list
, &mdev
->vdev_list
);
609 * Order the type update with previous stores. This write barrier
610 * is paired with the corresponding read barrier before the uncached
611 * system memory read of the type, on the card while scanning the
617 dev_dbg(mdev
->sdev
->parent
, "Added virtio device id %d\n", dd
->type
);
619 db
= bootparam
->h2c_config_db
;
621 mdev
->ops
->send_intr(mdev
, db
);
622 mutex_unlock(&mdev
->mic_mutex
);
625 vqconfig
= mic_vq_config(dd
);
626 for (j
= 0; j
< i
; j
++) {
627 struct mic_vringh
*mvr
= &mvdev
->mvr
[j
];
628 mic_unmap_single(mdev
, le64_to_cpu(vqconfig
[j
].address
),
630 free_pages((unsigned long)mvr
->vring
.va
,
631 get_order(mvr
->vring
.len
));
633 mutex_unlock(&mdev
->mic_mutex
);
637 void mic_virtio_del_device(struct mic_vdev
*mvdev
)
639 struct list_head
*pos
, *tmp
;
640 struct mic_vdev
*tmp_mvdev
;
641 struct mic_device
*mdev
= mvdev
->mdev
;
642 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake
);
644 struct mic_vqconfig
*vqconfig
;
645 struct mic_bootparam
*bootparam
= mdev
->dp
;
648 mutex_lock(&mdev
->mic_mutex
);
649 db
= bootparam
->h2c_config_db
;
651 goto skip_hot_remove
;
652 dev_dbg(mdev
->sdev
->parent
,
653 "Requesting hot remove id %d\n", mvdev
->virtio_id
);
654 mvdev
->dc
->config_change
= MIC_VIRTIO_PARAM_DEV_REMOVE
;
655 mdev
->ops
->send_intr(mdev
, db
);
656 for (retry
= 100; retry
--;) {
657 ret
= wait_event_timeout(wake
,
658 mvdev
->dc
->guest_ack
, msecs_to_jiffies(100));
662 dev_dbg(mdev
->sdev
->parent
,
663 "Device id %d config_change %d guest_ack %d retry %d\n",
664 mvdev
->virtio_id
, mvdev
->dc
->config_change
,
665 mvdev
->dc
->guest_ack
, retry
);
666 mvdev
->dc
->config_change
= 0;
667 mvdev
->dc
->guest_ack
= 0;
669 mic_free_irq(mdev
, mvdev
->virtio_cookie
, mvdev
);
670 flush_work(&mvdev
->virtio_bh_work
);
671 vqconfig
= mic_vq_config(mvdev
->dd
);
672 for (i
= 0; i
< mvdev
->dd
->num_vq
; i
++) {
673 struct mic_vringh
*mvr
= &mvdev
->mvr
[i
];
674 vringh_kiov_cleanup(&mvr
->riov
);
675 vringh_kiov_cleanup(&mvr
->wiov
);
676 mic_unmap_single(mdev
, le64_to_cpu(vqconfig
[i
].address
),
678 free_pages((unsigned long)mvr
->vring
.va
,
679 get_order(mvr
->vring
.len
));
682 list_for_each_safe(pos
, tmp
, &mdev
->vdev_list
) {
683 tmp_mvdev
= list_entry(pos
, struct mic_vdev
, list
);
684 if (tmp_mvdev
== mvdev
) {
686 dev_dbg(mdev
->sdev
->parent
,
687 "Removing virtio device id %d\n",
693 * Order the type update with previous stores. This write barrier
694 * is paired with the corresponding read barrier before the uncached
695 * system memory read of the type, on the card while scanning the
699 mvdev
->dd
->type
= -1;
700 mutex_unlock(&mdev
->mic_mutex
);