1 // SPDX-License-Identifier: GPL-2.0-only
3 * Helpers for the host side of a virtio ring.
5 * Since these may be in userspace, we use (inline) accessors.
7 #include <linux/compiler.h>
8 #include <linux/module.h>
9 #include <linux/vringh.h>
10 #include <linux/virtio_ring.h>
11 #include <linux/kernel.h>
12 #include <linux/ratelimit.h>
13 #include <linux/uaccess.h>
14 #include <linux/slab.h>
15 #include <linux/export.h>
16 #if IS_REACHABLE(CONFIG_VHOST_IOTLB)
17 #include <linux/bvec.h>
18 #include <linux/highmem.h>
19 #include <linux/vhost_iotlb.h>
21 #include <uapi/linux/virtio_config.h>
23 static __printf(1,2) __cold
void vringh_bad(const char *fmt
, ...)
25 static DEFINE_RATELIMIT_STATE(vringh_rs
,
26 DEFAULT_RATELIMIT_INTERVAL
,
27 DEFAULT_RATELIMIT_BURST
);
28 if (__ratelimit(&vringh_rs
)) {
31 printk(KERN_NOTICE
"vringh:");
37 /* Returns vring->num if empty, -ve on error. */
38 static inline int __vringh_get_head(const struct vringh
*vrh
,
39 int (*getu16
)(const struct vringh
*vrh
,
40 u16
*val
, const __virtio16
*p
),
43 u16 avail_idx
, i
, head
;
46 err
= getu16(vrh
, &avail_idx
, &vrh
->vring
.avail
->idx
);
48 vringh_bad("Failed to access avail idx at %p",
49 &vrh
->vring
.avail
->idx
);
53 if (*last_avail_idx
== avail_idx
)
54 return vrh
->vring
.num
;
56 /* Only get avail ring entries after they have been exposed by guest. */
57 virtio_rmb(vrh
->weak_barriers
);
59 i
= *last_avail_idx
& (vrh
->vring
.num
- 1);
61 err
= getu16(vrh
, &head
, &vrh
->vring
.avail
->ring
[i
]);
63 vringh_bad("Failed to read head: idx %d address %p",
64 *last_avail_idx
, &vrh
->vring
.avail
->ring
[i
]);
68 if (head
>= vrh
->vring
.num
) {
69 vringh_bad("Guest says index %u > %u is available",
70 head
, vrh
->vring
.num
);
78 /* Copy some bytes to/from the iovec. Returns num copied. */
79 static inline ssize_t
vringh_iov_xfer(struct vringh
*vrh
,
80 struct vringh_kiov
*iov
,
81 void *ptr
, size_t len
,
82 int (*xfer
)(const struct vringh
*vrh
,
83 void *addr
, void *ptr
,
88 while (len
&& iov
->i
< iov
->used
) {
91 partlen
= min(iov
->iov
[iov
->i
].iov_len
, len
);
92 err
= xfer(vrh
, iov
->iov
[iov
->i
].iov_base
, ptr
, partlen
);
98 iov
->consumed
+= partlen
;
99 iov
->iov
[iov
->i
].iov_len
-= partlen
;
100 iov
->iov
[iov
->i
].iov_base
+= partlen
;
102 if (!iov
->iov
[iov
->i
].iov_len
) {
103 /* Fix up old iov element then increment. */
104 iov
->iov
[iov
->i
].iov_len
= iov
->consumed
;
105 iov
->iov
[iov
->i
].iov_base
-= iov
->consumed
;
115 /* May reduce *len if range is shorter. */
116 static inline bool range_check(struct vringh
*vrh
, u64 addr
, size_t *len
,
117 struct vringh_range
*range
,
118 bool (*getrange
)(struct vringh
*,
119 u64
, struct vringh_range
*))
121 if (addr
< range
->start
|| addr
> range
->end_incl
) {
122 if (!getrange(vrh
, addr
, range
))
125 BUG_ON(addr
< range
->start
|| addr
> range
->end_incl
);
127 /* To end of memory? */
128 if (unlikely(addr
+ *len
== 0)) {
129 if (range
->end_incl
== -1ULL)
134 /* Otherwise, don't wrap. */
135 if (addr
+ *len
< addr
) {
136 vringh_bad("Wrapping descriptor %zu@0x%llx",
137 *len
, (unsigned long long)addr
);
141 if (unlikely(addr
+ *len
- 1 > range
->end_incl
))
146 *len
= range
->end_incl
+ 1 - addr
;
150 static inline bool no_range_check(struct vringh
*vrh
, u64 addr
, size_t *len
,
151 struct vringh_range
*range
,
152 bool (*getrange
)(struct vringh
*,
153 u64
, struct vringh_range
*))
158 /* No reason for this code to be inline. */
159 static int move_to_indirect(const struct vringh
*vrh
,
160 int *up_next
, u16
*i
, void *addr
,
161 const struct vring_desc
*desc
,
162 struct vring_desc
**descs
, int *desc_max
)
166 /* Indirect tables can't have indirect. */
167 if (*up_next
!= -1) {
168 vringh_bad("Multilevel indirect %u->%u", *up_next
, *i
);
172 len
= vringh32_to_cpu(vrh
, desc
->len
);
173 if (unlikely(len
% sizeof(struct vring_desc
))) {
174 vringh_bad("Strange indirect len %u", desc
->len
);
178 /* We will check this when we follow it! */
179 if (desc
->flags
& cpu_to_vringh16(vrh
, VRING_DESC_F_NEXT
))
180 *up_next
= vringh16_to_cpu(vrh
, desc
->next
);
184 *desc_max
= len
/ sizeof(struct vring_desc
);
186 /* Now, start at the first indirect. */
191 static int resize_iovec(struct vringh_kiov
*iov
, gfp_t gfp
)
194 unsigned int flag
, new_num
= (iov
->max_num
& ~VRINGH_IOV_ALLOCATED
) * 2;
199 flag
= (iov
->max_num
& VRINGH_IOV_ALLOCATED
);
201 new = krealloc_array(iov
->iov
, new_num
,
202 sizeof(struct iovec
), gfp
);
204 new = kmalloc_array(new_num
, sizeof(struct iovec
), gfp
);
206 memcpy(new, iov
->iov
,
207 iov
->max_num
* sizeof(struct iovec
));
208 flag
= VRINGH_IOV_ALLOCATED
;
214 iov
->max_num
= (new_num
| flag
);
218 static u16 __cold
return_from_indirect(const struct vringh
*vrh
, int *up_next
,
219 struct vring_desc
**descs
, int *desc_max
)
224 *descs
= vrh
->vring
.desc
;
225 *desc_max
= vrh
->vring
.num
;
229 static int slow_copy(struct vringh
*vrh
, void *dst
, const void *src
,
230 bool (*rcheck
)(struct vringh
*vrh
, u64 addr
, size_t *len
,
231 struct vringh_range
*range
,
232 bool (*getrange
)(struct vringh
*vrh
,
234 struct vringh_range
*)),
235 bool (*getrange
)(struct vringh
*vrh
,
237 struct vringh_range
*r
),
238 struct vringh_range
*range
,
239 int (*copy
)(const struct vringh
*vrh
,
240 void *dst
, const void *src
, size_t len
))
242 size_t part
, len
= sizeof(struct vring_desc
);
249 addr
= (u64
)(unsigned long)src
- range
->offset
;
251 if (!rcheck(vrh
, addr
, &part
, range
, getrange
))
254 err
= copy(vrh
, dst
, src
, part
);
266 __vringh_iov(struct vringh
*vrh
, u16 i
,
267 struct vringh_kiov
*riov
,
268 struct vringh_kiov
*wiov
,
269 bool (*rcheck
)(struct vringh
*vrh
, u64 addr
, size_t *len
,
270 struct vringh_range
*range
,
271 bool (*getrange
)(struct vringh
*, u64
,
272 struct vringh_range
*)),
273 bool (*getrange
)(struct vringh
*, u64
, struct vringh_range
*),
275 int (*copy
)(const struct vringh
*vrh
,
276 void *dst
, const void *src
, size_t len
))
278 int err
, count
= 0, up_next
, desc_max
;
279 struct vring_desc desc
, *descs
;
280 struct vringh_range range
= { -1ULL, 0 }, slowrange
;
283 /* We start traversing vring's descriptor table. */
284 descs
= vrh
->vring
.desc
;
285 desc_max
= vrh
->vring
.num
;
288 /* You must want something! */
289 if (WARN_ON(!riov
&& !wiov
))
293 riov
->i
= riov
->used
= 0;
295 wiov
->i
= wiov
->used
= 0;
299 struct vringh_kiov
*iov
;
303 err
= slow_copy(vrh
, &desc
, &descs
[i
], rcheck
, getrange
,
306 err
= copy(vrh
, &desc
, &descs
[i
], sizeof(desc
));
310 if (unlikely(desc
.flags
&
311 cpu_to_vringh16(vrh
, VRING_DESC_F_INDIRECT
))) {
312 u64 a
= vringh64_to_cpu(vrh
, desc
.addr
);
314 /* Make sure it's OK, and get offset. */
315 len
= vringh32_to_cpu(vrh
, desc
.len
);
316 if (!rcheck(vrh
, a
, &len
, &range
, getrange
)) {
321 if (unlikely(len
!= vringh32_to_cpu(vrh
, desc
.len
))) {
323 /* We need to save this range to use offset */
327 addr
= (void *)(long)(a
+ range
.offset
);
328 err
= move_to_indirect(vrh
, &up_next
, &i
, addr
, &desc
,
335 if (count
++ == vrh
->vring
.num
) {
336 vringh_bad("Descriptor loop in %p", descs
);
341 if (desc
.flags
& cpu_to_vringh16(vrh
, VRING_DESC_F_WRITE
))
345 if (unlikely(wiov
&& wiov
->i
)) {
346 vringh_bad("Readable desc %p after writable",
354 vringh_bad("Unexpected %s desc",
355 !wiov
? "writable" : "readable");
361 /* Make sure it's OK, and get offset. */
362 len
= vringh32_to_cpu(vrh
, desc
.len
);
363 if (!rcheck(vrh
, vringh64_to_cpu(vrh
, desc
.addr
), &len
, &range
,
368 addr
= (void *)(unsigned long)(vringh64_to_cpu(vrh
, desc
.addr
) +
371 if (unlikely(iov
->used
== (iov
->max_num
& ~VRINGH_IOV_ALLOCATED
))) {
372 err
= resize_iovec(iov
, gfp
);
377 iov
->iov
[iov
->used
].iov_base
= addr
;
378 iov
->iov
[iov
->used
].iov_len
= len
;
381 if (unlikely(len
!= vringh32_to_cpu(vrh
, desc
.len
))) {
382 desc
.len
= cpu_to_vringh32(vrh
,
383 vringh32_to_cpu(vrh
, desc
.len
) - len
);
384 desc
.addr
= cpu_to_vringh64(vrh
,
385 vringh64_to_cpu(vrh
, desc
.addr
) + len
);
389 if (desc
.flags
& cpu_to_vringh16(vrh
, VRING_DESC_F_NEXT
)) {
390 i
= vringh16_to_cpu(vrh
, desc
.next
);
392 /* Just in case we need to finish traversing above. */
393 if (unlikely(up_next
> 0)) {
394 i
= return_from_indirect(vrh
, &up_next
,
402 vringh_bad("Chained index %u > %u", i
, desc_max
);
414 static inline int __vringh_complete(struct vringh
*vrh
,
415 const struct vring_used_elem
*used
,
416 unsigned int num_used
,
417 int (*putu16
)(const struct vringh
*vrh
,
418 __virtio16
*p
, u16 val
),
419 int (*putused
)(const struct vringh
*vrh
,
420 struct vring_used_elem
*dst
,
421 const struct vring_used_elem
424 struct vring_used
*used_ring
;
428 used_ring
= vrh
->vring
.used
;
429 used_idx
= vrh
->last_used_idx
+ vrh
->completed
;
431 off
= used_idx
% vrh
->vring
.num
;
433 /* Compiler knows num_used == 1 sometimes, hence extra check */
434 if (num_used
> 1 && unlikely(off
+ num_used
>= vrh
->vring
.num
)) {
435 u16 part
= vrh
->vring
.num
- off
;
436 err
= putused(vrh
, &used_ring
->ring
[off
], used
, part
);
438 err
= putused(vrh
, &used_ring
->ring
[0], used
+ part
,
441 err
= putused(vrh
, &used_ring
->ring
[off
], used
, num_used
);
444 vringh_bad("Failed to write %u used entries %u at %p",
445 num_used
, off
, &used_ring
->ring
[off
]);
449 /* Make sure buffer is written before we update index. */
450 virtio_wmb(vrh
->weak_barriers
);
452 err
= putu16(vrh
, &vrh
->vring
.used
->idx
, used_idx
+ num_used
);
454 vringh_bad("Failed to update used index at %p",
455 &vrh
->vring
.used
->idx
);
459 vrh
->completed
+= num_used
;
464 static inline int __vringh_need_notify(struct vringh
*vrh
,
465 int (*getu16
)(const struct vringh
*vrh
,
467 const __virtio16
*p
))
473 /* Flush out used index update. This is paired with the
474 * barrier that the Guest executes when enabling
476 virtio_mb(vrh
->weak_barriers
);
478 /* Old-style, without event indices. */
479 if (!vrh
->event_indices
) {
481 err
= getu16(vrh
, &flags
, &vrh
->vring
.avail
->flags
);
483 vringh_bad("Failed to get flags at %p",
484 &vrh
->vring
.avail
->flags
);
487 return (!(flags
& VRING_AVAIL_F_NO_INTERRUPT
));
490 /* Modern: we know when other side wants to know. */
491 err
= getu16(vrh
, &used_event
, &vring_used_event(&vrh
->vring
));
493 vringh_bad("Failed to get used event idx at %p",
494 &vring_used_event(&vrh
->vring
));
498 /* Just in case we added so many that we wrap. */
499 if (unlikely(vrh
->completed
> 0xffff))
502 notify
= vring_need_event(used_event
,
503 vrh
->last_used_idx
+ vrh
->completed
,
506 vrh
->last_used_idx
+= vrh
->completed
;
511 static inline bool __vringh_notify_enable(struct vringh
*vrh
,
512 int (*getu16
)(const struct vringh
*vrh
,
513 u16
*val
, const __virtio16
*p
),
514 int (*putu16
)(const struct vringh
*vrh
,
515 __virtio16
*p
, u16 val
))
519 if (!vrh
->event_indices
) {
520 /* Old-school; update flags. */
521 if (putu16(vrh
, &vrh
->vring
.used
->flags
, 0) != 0) {
522 vringh_bad("Clearing used flags %p",
523 &vrh
->vring
.used
->flags
);
527 if (putu16(vrh
, &vring_avail_event(&vrh
->vring
),
528 vrh
->last_avail_idx
) != 0) {
529 vringh_bad("Updating avail event index %p",
530 &vring_avail_event(&vrh
->vring
));
535 /* They could have slipped one in as we were doing that: make
536 * sure it's written, then check again. */
537 virtio_mb(vrh
->weak_barriers
);
539 if (getu16(vrh
, &avail
, &vrh
->vring
.avail
->idx
) != 0) {
540 vringh_bad("Failed to check avail idx at %p",
541 &vrh
->vring
.avail
->idx
);
545 /* This is unlikely, so we just leave notifications enabled
546 * (if we're using event_indices, we'll only get one
547 * notification anyway). */
548 return avail
== vrh
->last_avail_idx
;
551 static inline void __vringh_notify_disable(struct vringh
*vrh
,
552 int (*putu16
)(const struct vringh
*vrh
,
553 __virtio16
*p
, u16 val
))
555 if (!vrh
->event_indices
) {
556 /* Old-school; update flags. */
557 if (putu16(vrh
, &vrh
->vring
.used
->flags
,
558 VRING_USED_F_NO_NOTIFY
)) {
559 vringh_bad("Setting used flags %p",
560 &vrh
->vring
.used
->flags
);
565 /* Userspace access helpers: in this case, addresses are really userspace. */
566 static inline int getu16_user(const struct vringh
*vrh
, u16
*val
, const __virtio16
*p
)
569 int rc
= get_user(v
, (__force __virtio16 __user
*)p
);
570 *val
= vringh16_to_cpu(vrh
, v
);
574 static inline int putu16_user(const struct vringh
*vrh
, __virtio16
*p
, u16 val
)
576 __virtio16 v
= cpu_to_vringh16(vrh
, val
);
577 return put_user(v
, (__force __virtio16 __user
*)p
);
580 static inline int copydesc_user(const struct vringh
*vrh
,
581 void *dst
, const void *src
, size_t len
)
583 return copy_from_user(dst
, (__force
void __user
*)src
, len
) ?
587 static inline int putused_user(const struct vringh
*vrh
,
588 struct vring_used_elem
*dst
,
589 const struct vring_used_elem
*src
,
592 return copy_to_user((__force
void __user
*)dst
, src
,
593 sizeof(*dst
) * num
) ? -EFAULT
: 0;
596 static inline int xfer_from_user(const struct vringh
*vrh
, void *src
,
597 void *dst
, size_t len
)
599 return copy_from_user(dst
, (__force
void __user
*)src
, len
) ?
603 static inline int xfer_to_user(const struct vringh
*vrh
,
604 void *dst
, void *src
, size_t len
)
606 return copy_to_user((__force
void __user
*)dst
, src
, len
) ?
611 * vringh_init_user - initialize a vringh for a userspace vring.
612 * @vrh: the vringh to initialize.
613 * @features: the feature bits for this ring.
614 * @num: the number of elements.
615 * @weak_barriers: true if we only need memory barriers, not I/O.
616 * @desc: the userpace descriptor pointer.
617 * @avail: the userpace avail pointer.
618 * @used: the userpace used pointer.
620 * Returns an error if num is invalid: you should check pointers
623 int vringh_init_user(struct vringh
*vrh
, u64 features
,
624 unsigned int num
, bool weak_barriers
,
625 vring_desc_t __user
*desc
,
626 vring_avail_t __user
*avail
,
627 vring_used_t __user
*used
)
629 /* Sane power of 2 please! */
630 if (!num
|| num
> 0xffff || (num
& (num
- 1))) {
631 vringh_bad("Bad ring size %u", num
);
635 vrh
->little_endian
= (features
& (1ULL << VIRTIO_F_VERSION_1
));
636 vrh
->event_indices
= (features
& (1 << VIRTIO_RING_F_EVENT_IDX
));
637 vrh
->weak_barriers
= weak_barriers
;
639 vrh
->last_avail_idx
= 0;
640 vrh
->last_used_idx
= 0;
641 vrh
->vring
.num
= num
;
642 /* vring expects kernel addresses, but only used via accessors. */
643 vrh
->vring
.desc
= (__force
struct vring_desc
*)desc
;
644 vrh
->vring
.avail
= (__force
struct vring_avail
*)avail
;
645 vrh
->vring
.used
= (__force
struct vring_used
*)used
;
648 EXPORT_SYMBOL(vringh_init_user
);
651 * vringh_getdesc_user - get next available descriptor from userspace ring.
652 * @vrh: the userspace vring.
653 * @riov: where to put the readable descriptors (or NULL)
654 * @wiov: where to put the writable descriptors (or NULL)
655 * @getrange: function to call to check ranges.
656 * @head: head index we received, for passing to vringh_complete_user().
658 * Returns 0 if there was no descriptor, 1 if there was, or -errno.
660 * Note that on error return, you can tell the difference between an
661 * invalid ring and a single invalid descriptor: in the former case,
662 * *head will be vrh->vring.num. You may be able to ignore an invalid
663 * descriptor, but there's not much you can do with an invalid ring.
665 * Note that you may need to clean up riov and wiov, even on error!
667 int vringh_getdesc_user(struct vringh
*vrh
,
668 struct vringh_iov
*riov
,
669 struct vringh_iov
*wiov
,
670 bool (*getrange
)(struct vringh
*vrh
,
671 u64 addr
, struct vringh_range
*r
),
676 *head
= vrh
->vring
.num
;
677 err
= __vringh_get_head(vrh
, getu16_user
, &vrh
->last_avail_idx
);
682 if (err
== vrh
->vring
.num
)
685 /* We need the layouts to be the identical for this to work */
686 BUILD_BUG_ON(sizeof(struct vringh_kiov
) != sizeof(struct vringh_iov
));
687 BUILD_BUG_ON(offsetof(struct vringh_kiov
, iov
) !=
688 offsetof(struct vringh_iov
, iov
));
689 BUILD_BUG_ON(offsetof(struct vringh_kiov
, i
) !=
690 offsetof(struct vringh_iov
, i
));
691 BUILD_BUG_ON(offsetof(struct vringh_kiov
, used
) !=
692 offsetof(struct vringh_iov
, used
));
693 BUILD_BUG_ON(offsetof(struct vringh_kiov
, max_num
) !=
694 offsetof(struct vringh_iov
, max_num
));
695 BUILD_BUG_ON(sizeof(struct iovec
) != sizeof(struct kvec
));
696 BUILD_BUG_ON(offsetof(struct iovec
, iov_base
) !=
697 offsetof(struct kvec
, iov_base
));
698 BUILD_BUG_ON(offsetof(struct iovec
, iov_len
) !=
699 offsetof(struct kvec
, iov_len
));
700 BUILD_BUG_ON(sizeof(((struct iovec
*)NULL
)->iov_base
)
701 != sizeof(((struct kvec
*)NULL
)->iov_base
));
702 BUILD_BUG_ON(sizeof(((struct iovec
*)NULL
)->iov_len
)
703 != sizeof(((struct kvec
*)NULL
)->iov_len
));
706 err
= __vringh_iov(vrh
, *head
, (struct vringh_kiov
*)riov
,
707 (struct vringh_kiov
*)wiov
,
708 range_check
, getrange
, GFP_KERNEL
, copydesc_user
);
714 EXPORT_SYMBOL(vringh_getdesc_user
);
717 * vringh_iov_pull_user - copy bytes from vring_iov.
718 * @riov: the riov as passed to vringh_getdesc_user() (updated as we consume)
719 * @dst: the place to copy.
720 * @len: the maximum length to copy.
722 * Returns the bytes copied <= len or a negative errno.
724 ssize_t
vringh_iov_pull_user(struct vringh_iov
*riov
, void *dst
, size_t len
)
726 return vringh_iov_xfer(NULL
, (struct vringh_kiov
*)riov
,
727 dst
, len
, xfer_from_user
);
729 EXPORT_SYMBOL(vringh_iov_pull_user
);
732 * vringh_iov_push_user - copy bytes into vring_iov.
733 * @wiov: the wiov as passed to vringh_getdesc_user() (updated as we consume)
734 * @src: the place to copy from.
735 * @len: the maximum length to copy.
737 * Returns the bytes copied <= len or a negative errno.
739 ssize_t
vringh_iov_push_user(struct vringh_iov
*wiov
,
740 const void *src
, size_t len
)
742 return vringh_iov_xfer(NULL
, (struct vringh_kiov
*)wiov
,
743 (void *)src
, len
, xfer_to_user
);
745 EXPORT_SYMBOL(vringh_iov_push_user
);
748 * vringh_abandon_user - we've decided not to handle the descriptor(s).
750 * @num: the number of descriptors to put back (ie. num
751 * vringh_get_user() to undo).
753 * The next vringh_get_user() will return the old descriptor(s) again.
755 void vringh_abandon_user(struct vringh
*vrh
, unsigned int num
)
757 /* We only update vring_avail_event(vr) when we want to be notified,
758 * so we haven't changed that yet. */
759 vrh
->last_avail_idx
-= num
;
761 EXPORT_SYMBOL(vringh_abandon_user
);
764 * vringh_complete_user - we've finished with descriptor, publish it.
766 * @head: the head as filled in by vringh_getdesc_user.
767 * @len: the length of data we have written.
769 * You should check vringh_need_notify_user() after one or more calls
772 int vringh_complete_user(struct vringh
*vrh
, u16 head
, u32 len
)
774 struct vring_used_elem used
;
776 used
.id
= cpu_to_vringh32(vrh
, head
);
777 used
.len
= cpu_to_vringh32(vrh
, len
);
778 return __vringh_complete(vrh
, &used
, 1, putu16_user
, putused_user
);
780 EXPORT_SYMBOL(vringh_complete_user
);
783 * vringh_complete_multi_user - we've finished with many descriptors.
785 * @used: the head, length pairs.
786 * @num_used: the number of used elements.
788 * You should check vringh_need_notify_user() after one or more calls
791 int vringh_complete_multi_user(struct vringh
*vrh
,
792 const struct vring_used_elem used
[],
795 return __vringh_complete(vrh
, used
, num_used
,
796 putu16_user
, putused_user
);
798 EXPORT_SYMBOL(vringh_complete_multi_user
);
801 * vringh_notify_enable_user - we want to know if something changes.
804 * This always enables notifications, but returns false if there are
805 * now more buffers available in the vring.
807 bool vringh_notify_enable_user(struct vringh
*vrh
)
809 return __vringh_notify_enable(vrh
, getu16_user
, putu16_user
);
811 EXPORT_SYMBOL(vringh_notify_enable_user
);
814 * vringh_notify_disable_user - don't tell us if something changes.
817 * This is our normal running state: we disable and then only enable when
818 * we're going to sleep.
820 void vringh_notify_disable_user(struct vringh
*vrh
)
822 __vringh_notify_disable(vrh
, putu16_user
);
824 EXPORT_SYMBOL(vringh_notify_disable_user
);
827 * vringh_need_notify_user - must we tell the other side about used buffers?
828 * @vrh: the vring we've called vringh_complete_user() on.
830 * Returns -errno or 0 if we don't need to tell the other side, 1 if we do.
832 int vringh_need_notify_user(struct vringh
*vrh
)
834 return __vringh_need_notify(vrh
, getu16_user
);
836 EXPORT_SYMBOL(vringh_need_notify_user
);
838 /* Kernelspace access helpers. */
839 static inline int getu16_kern(const struct vringh
*vrh
,
840 u16
*val
, const __virtio16
*p
)
842 *val
= vringh16_to_cpu(vrh
, READ_ONCE(*p
));
846 static inline int putu16_kern(const struct vringh
*vrh
, __virtio16
*p
, u16 val
)
848 WRITE_ONCE(*p
, cpu_to_vringh16(vrh
, val
));
852 static inline int copydesc_kern(const struct vringh
*vrh
,
853 void *dst
, const void *src
, size_t len
)
855 memcpy(dst
, src
, len
);
859 static inline int putused_kern(const struct vringh
*vrh
,
860 struct vring_used_elem
*dst
,
861 const struct vring_used_elem
*src
,
864 memcpy(dst
, src
, num
* sizeof(*dst
));
868 static inline int xfer_kern(const struct vringh
*vrh
, void *src
,
869 void *dst
, size_t len
)
871 memcpy(dst
, src
, len
);
875 static inline int kern_xfer(const struct vringh
*vrh
, void *dst
,
876 void *src
, size_t len
)
878 memcpy(dst
, src
, len
);
883 * vringh_init_kern - initialize a vringh for a kernelspace vring.
884 * @vrh: the vringh to initialize.
885 * @features: the feature bits for this ring.
886 * @num: the number of elements.
887 * @weak_barriers: true if we only need memory barriers, not I/O.
888 * @desc: the userpace descriptor pointer.
889 * @avail: the userpace avail pointer.
890 * @used: the userpace used pointer.
892 * Returns an error if num is invalid.
894 int vringh_init_kern(struct vringh
*vrh
, u64 features
,
895 unsigned int num
, bool weak_barriers
,
896 struct vring_desc
*desc
,
897 struct vring_avail
*avail
,
898 struct vring_used
*used
)
900 /* Sane power of 2 please! */
901 if (!num
|| num
> 0xffff || (num
& (num
- 1))) {
902 vringh_bad("Bad ring size %u", num
);
906 vrh
->little_endian
= (features
& (1ULL << VIRTIO_F_VERSION_1
));
907 vrh
->event_indices
= (features
& (1 << VIRTIO_RING_F_EVENT_IDX
));
908 vrh
->weak_barriers
= weak_barriers
;
910 vrh
->last_avail_idx
= 0;
911 vrh
->last_used_idx
= 0;
912 vrh
->vring
.num
= num
;
913 vrh
->vring
.desc
= desc
;
914 vrh
->vring
.avail
= avail
;
915 vrh
->vring
.used
= used
;
918 EXPORT_SYMBOL(vringh_init_kern
);
921 * vringh_getdesc_kern - get next available descriptor from kernelspace ring.
922 * @vrh: the kernelspace vring.
923 * @riov: where to put the readable descriptors (or NULL)
924 * @wiov: where to put the writable descriptors (or NULL)
925 * @head: head index we received, for passing to vringh_complete_kern().
926 * @gfp: flags for allocating larger riov/wiov.
928 * Returns 0 if there was no descriptor, 1 if there was, or -errno.
930 * Note that on error return, you can tell the difference between an
931 * invalid ring and a single invalid descriptor: in the former case,
932 * *head will be vrh->vring.num. You may be able to ignore an invalid
933 * descriptor, but there's not much you can do with an invalid ring.
935 * Note that you may need to clean up riov and wiov, even on error!
937 int vringh_getdesc_kern(struct vringh
*vrh
,
938 struct vringh_kiov
*riov
,
939 struct vringh_kiov
*wiov
,
945 err
= __vringh_get_head(vrh
, getu16_kern
, &vrh
->last_avail_idx
);
950 if (err
== vrh
->vring
.num
)
954 err
= __vringh_iov(vrh
, *head
, riov
, wiov
, no_range_check
, NULL
,
961 EXPORT_SYMBOL(vringh_getdesc_kern
);
964 * vringh_iov_pull_kern - copy bytes from vring_iov.
965 * @riov: the riov as passed to vringh_getdesc_kern() (updated as we consume)
966 * @dst: the place to copy.
967 * @len: the maximum length to copy.
969 * Returns the bytes copied <= len or a negative errno.
971 ssize_t
vringh_iov_pull_kern(struct vringh_kiov
*riov
, void *dst
, size_t len
)
973 return vringh_iov_xfer(NULL
, riov
, dst
, len
, xfer_kern
);
975 EXPORT_SYMBOL(vringh_iov_pull_kern
);
978 * vringh_iov_push_kern - copy bytes into vring_iov.
979 * @wiov: the wiov as passed to vringh_getdesc_kern() (updated as we consume)
980 * @src: the place to copy from.
981 * @len: the maximum length to copy.
983 * Returns the bytes copied <= len or a negative errno.
985 ssize_t
vringh_iov_push_kern(struct vringh_kiov
*wiov
,
986 const void *src
, size_t len
)
988 return vringh_iov_xfer(NULL
, wiov
, (void *)src
, len
, kern_xfer
);
990 EXPORT_SYMBOL(vringh_iov_push_kern
);
993 * vringh_abandon_kern - we've decided not to handle the descriptor(s).
995 * @num: the number of descriptors to put back (ie. num
996 * vringh_get_kern() to undo).
998 * The next vringh_get_kern() will return the old descriptor(s) again.
1000 void vringh_abandon_kern(struct vringh
*vrh
, unsigned int num
)
1002 /* We only update vring_avail_event(vr) when we want to be notified,
1003 * so we haven't changed that yet. */
1004 vrh
->last_avail_idx
-= num
;
1006 EXPORT_SYMBOL(vringh_abandon_kern
);
1009 * vringh_complete_kern - we've finished with descriptor, publish it.
1011 * @head: the head as filled in by vringh_getdesc_kern.
1012 * @len: the length of data we have written.
1014 * You should check vringh_need_notify_kern() after one or more calls
1017 int vringh_complete_kern(struct vringh
*vrh
, u16 head
, u32 len
)
1019 struct vring_used_elem used
;
1021 used
.id
= cpu_to_vringh32(vrh
, head
);
1022 used
.len
= cpu_to_vringh32(vrh
, len
);
1024 return __vringh_complete(vrh
, &used
, 1, putu16_kern
, putused_kern
);
1026 EXPORT_SYMBOL(vringh_complete_kern
);
1029 * vringh_notify_enable_kern - we want to know if something changes.
1032 * This always enables notifications, but returns false if there are
1033 * now more buffers available in the vring.
1035 bool vringh_notify_enable_kern(struct vringh
*vrh
)
1037 return __vringh_notify_enable(vrh
, getu16_kern
, putu16_kern
);
1039 EXPORT_SYMBOL(vringh_notify_enable_kern
);
1042 * vringh_notify_disable_kern - don't tell us if something changes.
1045 * This is our normal running state: we disable and then only enable when
1046 * we're going to sleep.
1048 void vringh_notify_disable_kern(struct vringh
*vrh
)
1050 __vringh_notify_disable(vrh
, putu16_kern
);
1052 EXPORT_SYMBOL(vringh_notify_disable_kern
);
1055 * vringh_need_notify_kern - must we tell the other side about used buffers?
1056 * @vrh: the vring we've called vringh_complete_kern() on.
1058 * Returns -errno or 0 if we don't need to tell the other side, 1 if we do.
1060 int vringh_need_notify_kern(struct vringh
*vrh
)
1062 return __vringh_need_notify(vrh
, getu16_kern
);
1064 EXPORT_SYMBOL(vringh_need_notify_kern
);
1066 #if IS_REACHABLE(CONFIG_VHOST_IOTLB)
1068 static int iotlb_translate(const struct vringh
*vrh
,
1069 u64 addr
, u64 len
, struct bio_vec iov
[],
1070 int iov_size
, u32 perm
)
1072 struct vhost_iotlb_map
*map
;
1073 struct vhost_iotlb
*iotlb
= vrh
->iotlb
;
1080 if (unlikely(ret
>= iov_size
)) {
1085 map
= vhost_iotlb_itree_first(iotlb
, addr
,
1087 if (!map
|| map
->start
> addr
) {
1090 } else if (!(map
->perm
& perm
)) {
1095 size
= map
->size
- addr
+ map
->start
;
1096 pa
= map
->addr
+ addr
- map
->start
;
1097 pfn
= pa
>> PAGE_SHIFT
;
1098 iov
[ret
].bv_page
= pfn_to_page(pfn
);
1099 iov
[ret
].bv_len
= min(len
- s
, size
);
1100 iov
[ret
].bv_offset
= pa
& (PAGE_SIZE
- 1);
1109 static inline int copy_from_iotlb(const struct vringh
*vrh
, void *dst
,
1110 void *src
, size_t len
)
1112 struct iov_iter iter
;
1113 struct bio_vec iov
[16];
1116 ret
= iotlb_translate(vrh
, (u64
)(uintptr_t)src
,
1117 len
, iov
, 16, VHOST_MAP_RO
);
1121 iov_iter_bvec(&iter
, READ
, iov
, ret
, len
);
1123 ret
= copy_from_iter(dst
, len
, &iter
);
1128 static inline int copy_to_iotlb(const struct vringh
*vrh
, void *dst
,
1129 void *src
, size_t len
)
1131 struct iov_iter iter
;
1132 struct bio_vec iov
[16];
1135 ret
= iotlb_translate(vrh
, (u64
)(uintptr_t)dst
,
1136 len
, iov
, 16, VHOST_MAP_WO
);
1140 iov_iter_bvec(&iter
, WRITE
, iov
, ret
, len
);
1142 return copy_to_iter(src
, len
, &iter
);
1145 static inline int getu16_iotlb(const struct vringh
*vrh
,
1146 u16
*val
, const __virtio16
*p
)
1152 /* Atomic read is needed for getu16 */
1153 ret
= iotlb_translate(vrh
, (u64
)(uintptr_t)p
, sizeof(*p
),
1154 &iov
, 1, VHOST_MAP_RO
);
1158 kaddr
= kmap_atomic(iov
.bv_page
);
1159 from
= kaddr
+ iov
.bv_offset
;
1160 *val
= vringh16_to_cpu(vrh
, READ_ONCE(*(__virtio16
*)from
));
1161 kunmap_atomic(kaddr
);
1166 static inline int putu16_iotlb(const struct vringh
*vrh
,
1167 __virtio16
*p
, u16 val
)
1173 /* Atomic write is needed for putu16 */
1174 ret
= iotlb_translate(vrh
, (u64
)(uintptr_t)p
, sizeof(*p
),
1175 &iov
, 1, VHOST_MAP_WO
);
1179 kaddr
= kmap_atomic(iov
.bv_page
);
1180 to
= kaddr
+ iov
.bv_offset
;
1181 WRITE_ONCE(*(__virtio16
*)to
, cpu_to_vringh16(vrh
, val
));
1182 kunmap_atomic(kaddr
);
1187 static inline int copydesc_iotlb(const struct vringh
*vrh
,
1188 void *dst
, const void *src
, size_t len
)
1192 ret
= copy_from_iotlb(vrh
, dst
, (void *)src
, len
);
1199 static inline int xfer_from_iotlb(const struct vringh
*vrh
, void *src
,
1200 void *dst
, size_t len
)
1204 ret
= copy_from_iotlb(vrh
, dst
, src
, len
);
1211 static inline int xfer_to_iotlb(const struct vringh
*vrh
,
1212 void *dst
, void *src
, size_t len
)
1216 ret
= copy_to_iotlb(vrh
, dst
, src
, len
);
1223 static inline int putused_iotlb(const struct vringh
*vrh
,
1224 struct vring_used_elem
*dst
,
1225 const struct vring_used_elem
*src
,
1228 int size
= num
* sizeof(*dst
);
1231 ret
= copy_to_iotlb(vrh
, dst
, (void *)src
, num
* sizeof(*dst
));
1239 * vringh_init_iotlb - initialize a vringh for a ring with IOTLB.
1240 * @vrh: the vringh to initialize.
1241 * @features: the feature bits for this ring.
1242 * @num: the number of elements.
1243 * @weak_barriers: true if we only need memory barriers, not I/O.
1244 * @desc: the userpace descriptor pointer.
1245 * @avail: the userpace avail pointer.
1246 * @used: the userpace used pointer.
1248 * Returns an error if num is invalid.
1250 int vringh_init_iotlb(struct vringh
*vrh
, u64 features
,
1251 unsigned int num
, bool weak_barriers
,
1252 struct vring_desc
*desc
,
1253 struct vring_avail
*avail
,
1254 struct vring_used
*used
)
1256 return vringh_init_kern(vrh
, features
, num
, weak_barriers
,
1259 EXPORT_SYMBOL(vringh_init_iotlb
);
1262 * vringh_set_iotlb - initialize a vringh for a ring with IOTLB.
1264 * @iotlb: iotlb associated with this vring
1266 void vringh_set_iotlb(struct vringh
*vrh
, struct vhost_iotlb
*iotlb
)
1270 EXPORT_SYMBOL(vringh_set_iotlb
);
1273 * vringh_getdesc_iotlb - get next available descriptor from ring with
1275 * @vrh: the kernelspace vring.
1276 * @riov: where to put the readable descriptors (or NULL)
1277 * @wiov: where to put the writable descriptors (or NULL)
1278 * @head: head index we received, for passing to vringh_complete_iotlb().
1279 * @gfp: flags for allocating larger riov/wiov.
1281 * Returns 0 if there was no descriptor, 1 if there was, or -errno.
1283 * Note that on error return, you can tell the difference between an
1284 * invalid ring and a single invalid descriptor: in the former case,
1285 * *head will be vrh->vring.num. You may be able to ignore an invalid
1286 * descriptor, but there's not much you can do with an invalid ring.
1288 * Note that you may need to clean up riov and wiov, even on error!
1290 int vringh_getdesc_iotlb(struct vringh
*vrh
,
1291 struct vringh_kiov
*riov
,
1292 struct vringh_kiov
*wiov
,
1298 err
= __vringh_get_head(vrh
, getu16_iotlb
, &vrh
->last_avail_idx
);
1303 if (err
== vrh
->vring
.num
)
1307 err
= __vringh_iov(vrh
, *head
, riov
, wiov
, no_range_check
, NULL
,
1308 gfp
, copydesc_iotlb
);
1314 EXPORT_SYMBOL(vringh_getdesc_iotlb
);
1317 * vringh_iov_pull_iotlb - copy bytes from vring_iov.
1319 * @riov: the riov as passed to vringh_getdesc_iotlb() (updated as we consume)
1320 * @dst: the place to copy.
1321 * @len: the maximum length to copy.
1323 * Returns the bytes copied <= len or a negative errno.
1325 ssize_t
vringh_iov_pull_iotlb(struct vringh
*vrh
,
1326 struct vringh_kiov
*riov
,
1327 void *dst
, size_t len
)
1329 return vringh_iov_xfer(vrh
, riov
, dst
, len
, xfer_from_iotlb
);
1331 EXPORT_SYMBOL(vringh_iov_pull_iotlb
);
1334 * vringh_iov_push_iotlb - copy bytes into vring_iov.
1336 * @wiov: the wiov as passed to vringh_getdesc_iotlb() (updated as we consume)
1337 * @src: the place to copy from.
1338 * @len: the maximum length to copy.
1340 * Returns the bytes copied <= len or a negative errno.
1342 ssize_t
vringh_iov_push_iotlb(struct vringh
*vrh
,
1343 struct vringh_kiov
*wiov
,
1344 const void *src
, size_t len
)
1346 return vringh_iov_xfer(vrh
, wiov
, (void *)src
, len
, xfer_to_iotlb
);
1348 EXPORT_SYMBOL(vringh_iov_push_iotlb
);
1351 * vringh_abandon_iotlb - we've decided not to handle the descriptor(s).
1353 * @num: the number of descriptors to put back (ie. num
1354 * vringh_get_iotlb() to undo).
1356 * The next vringh_get_iotlb() will return the old descriptor(s) again.
1358 void vringh_abandon_iotlb(struct vringh
*vrh
, unsigned int num
)
1360 /* We only update vring_avail_event(vr) when we want to be notified,
1361 * so we haven't changed that yet.
1363 vrh
->last_avail_idx
-= num
;
1365 EXPORT_SYMBOL(vringh_abandon_iotlb
);
1368 * vringh_complete_iotlb - we've finished with descriptor, publish it.
1370 * @head: the head as filled in by vringh_getdesc_iotlb.
1371 * @len: the length of data we have written.
1373 * You should check vringh_need_notify_iotlb() after one or more calls
1376 int vringh_complete_iotlb(struct vringh
*vrh
, u16 head
, u32 len
)
1378 struct vring_used_elem used
;
1380 used
.id
= cpu_to_vringh32(vrh
, head
);
1381 used
.len
= cpu_to_vringh32(vrh
, len
);
1383 return __vringh_complete(vrh
, &used
, 1, putu16_iotlb
, putused_iotlb
);
1385 EXPORT_SYMBOL(vringh_complete_iotlb
);
1388 * vringh_notify_enable_iotlb - we want to know if something changes.
1391 * This always enables notifications, but returns false if there are
1392 * now more buffers available in the vring.
1394 bool vringh_notify_enable_iotlb(struct vringh
*vrh
)
1396 return __vringh_notify_enable(vrh
, getu16_iotlb
, putu16_iotlb
);
1398 EXPORT_SYMBOL(vringh_notify_enable_iotlb
);
1401 * vringh_notify_disable_iotlb - don't tell us if something changes.
1404 * This is our normal running state: we disable and then only enable when
1405 * we're going to sleep.
1407 void vringh_notify_disable_iotlb(struct vringh
*vrh
)
1409 __vringh_notify_disable(vrh
, putu16_iotlb
);
1411 EXPORT_SYMBOL(vringh_notify_disable_iotlb
);
1414 * vringh_need_notify_iotlb - must we tell the other side about used buffers?
1415 * @vrh: the vring we've called vringh_complete_iotlb() on.
1417 * Returns -errno or 0 if we don't need to tell the other side, 1 if we do.
1419 int vringh_need_notify_iotlb(struct vringh
*vrh
)
1421 return __vringh_need_notify(vrh
, getu16_iotlb
);
1423 EXPORT_SYMBOL(vringh_need_notify_iotlb
);
1427 MODULE_LICENSE("GPL");