1 // SPDX-License-Identifier: GPL-2.0-only
3 * Helpers for the host side of a virtio ring.
5 * Since these may be in userspace, we use (inline) accessors.
7 #include <linux/compiler.h>
8 #include <linux/module.h>
9 #include <linux/vringh.h>
10 #include <linux/virtio_ring.h>
11 #include <linux/kernel.h>
12 #include <linux/ratelimit.h>
13 #include <linux/uaccess.h>
14 #include <linux/slab.h>
15 #include <linux/export.h>
16 #include <uapi/linux/virtio_config.h>
18 static __printf(1,2) __cold
void vringh_bad(const char *fmt
, ...)
20 static DEFINE_RATELIMIT_STATE(vringh_rs
,
21 DEFAULT_RATELIMIT_INTERVAL
,
22 DEFAULT_RATELIMIT_BURST
);
23 if (__ratelimit(&vringh_rs
)) {
26 printk(KERN_NOTICE
"vringh:");
32 /* Returns vring->num if empty, -ve on error. */
33 static inline int __vringh_get_head(const struct vringh
*vrh
,
34 int (*getu16
)(const struct vringh
*vrh
,
35 u16
*val
, const __virtio16
*p
),
38 u16 avail_idx
, i
, head
;
41 err
= getu16(vrh
, &avail_idx
, &vrh
->vring
.avail
->idx
);
43 vringh_bad("Failed to access avail idx at %p",
44 &vrh
->vring
.avail
->idx
);
48 if (*last_avail_idx
== avail_idx
)
49 return vrh
->vring
.num
;
51 /* Only get avail ring entries after they have been exposed by guest. */
52 virtio_rmb(vrh
->weak_barriers
);
54 i
= *last_avail_idx
& (vrh
->vring
.num
- 1);
56 err
= getu16(vrh
, &head
, &vrh
->vring
.avail
->ring
[i
]);
58 vringh_bad("Failed to read head: idx %d address %p",
59 *last_avail_idx
, &vrh
->vring
.avail
->ring
[i
]);
63 if (head
>= vrh
->vring
.num
) {
64 vringh_bad("Guest says index %u > %u is available",
65 head
, vrh
->vring
.num
);
73 /* Copy some bytes to/from the iovec. Returns num copied. */
74 static inline ssize_t
vringh_iov_xfer(struct vringh_kiov
*iov
,
75 void *ptr
, size_t len
,
76 int (*xfer
)(void *addr
, void *ptr
,
81 while (len
&& iov
->i
< iov
->used
) {
84 partlen
= min(iov
->iov
[iov
->i
].iov_len
, len
);
85 err
= xfer(iov
->iov
[iov
->i
].iov_base
, ptr
, partlen
);
91 iov
->consumed
+= partlen
;
92 iov
->iov
[iov
->i
].iov_len
-= partlen
;
93 iov
->iov
[iov
->i
].iov_base
+= partlen
;
95 if (!iov
->iov
[iov
->i
].iov_len
) {
96 /* Fix up old iov element then increment. */
97 iov
->iov
[iov
->i
].iov_len
= iov
->consumed
;
98 iov
->iov
[iov
->i
].iov_base
-= iov
->consumed
;
107 /* May reduce *len if range is shorter. */
108 static inline bool range_check(struct vringh
*vrh
, u64 addr
, size_t *len
,
109 struct vringh_range
*range
,
110 bool (*getrange
)(struct vringh
*,
111 u64
, struct vringh_range
*))
113 if (addr
< range
->start
|| addr
> range
->end_incl
) {
114 if (!getrange(vrh
, addr
, range
))
117 BUG_ON(addr
< range
->start
|| addr
> range
->end_incl
);
119 /* To end of memory? */
120 if (unlikely(addr
+ *len
== 0)) {
121 if (range
->end_incl
== -1ULL)
126 /* Otherwise, don't wrap. */
127 if (addr
+ *len
< addr
) {
128 vringh_bad("Wrapping descriptor %zu@0x%llx",
129 *len
, (unsigned long long)addr
);
133 if (unlikely(addr
+ *len
- 1 > range
->end_incl
))
138 *len
= range
->end_incl
+ 1 - addr
;
142 static inline bool no_range_check(struct vringh
*vrh
, u64 addr
, size_t *len
,
143 struct vringh_range
*range
,
144 bool (*getrange
)(struct vringh
*,
145 u64
, struct vringh_range
*))
150 /* No reason for this code to be inline. */
151 static int move_to_indirect(const struct vringh
*vrh
,
152 int *up_next
, u16
*i
, void *addr
,
153 const struct vring_desc
*desc
,
154 struct vring_desc
**descs
, int *desc_max
)
158 /* Indirect tables can't have indirect. */
159 if (*up_next
!= -1) {
160 vringh_bad("Multilevel indirect %u->%u", *up_next
, *i
);
164 len
= vringh32_to_cpu(vrh
, desc
->len
);
165 if (unlikely(len
% sizeof(struct vring_desc
))) {
166 vringh_bad("Strange indirect len %u", desc
->len
);
170 /* We will check this when we follow it! */
171 if (desc
->flags
& cpu_to_vringh16(vrh
, VRING_DESC_F_NEXT
))
172 *up_next
= vringh16_to_cpu(vrh
, desc
->next
);
176 *desc_max
= len
/ sizeof(struct vring_desc
);
178 /* Now, start at the first indirect. */
183 static int resize_iovec(struct vringh_kiov
*iov
, gfp_t gfp
)
186 unsigned int flag
, new_num
= (iov
->max_num
& ~VRINGH_IOV_ALLOCATED
) * 2;
191 flag
= (iov
->max_num
& VRINGH_IOV_ALLOCATED
);
193 new = krealloc(iov
->iov
, new_num
* sizeof(struct iovec
), gfp
);
195 new = kmalloc_array(new_num
, sizeof(struct iovec
), gfp
);
197 memcpy(new, iov
->iov
,
198 iov
->max_num
* sizeof(struct iovec
));
199 flag
= VRINGH_IOV_ALLOCATED
;
205 iov
->max_num
= (new_num
| flag
);
209 static u16 __cold
return_from_indirect(const struct vringh
*vrh
, int *up_next
,
210 struct vring_desc
**descs
, int *desc_max
)
215 *descs
= vrh
->vring
.desc
;
216 *desc_max
= vrh
->vring
.num
;
220 static int slow_copy(struct vringh
*vrh
, void *dst
, const void *src
,
221 bool (*rcheck
)(struct vringh
*vrh
, u64 addr
, size_t *len
,
222 struct vringh_range
*range
,
223 bool (*getrange
)(struct vringh
*vrh
,
225 struct vringh_range
*)),
226 bool (*getrange
)(struct vringh
*vrh
,
228 struct vringh_range
*r
),
229 struct vringh_range
*range
,
230 int (*copy
)(void *dst
, const void *src
, size_t len
))
232 size_t part
, len
= sizeof(struct vring_desc
);
239 addr
= (u64
)(unsigned long)src
- range
->offset
;
241 if (!rcheck(vrh
, addr
, &part
, range
, getrange
))
244 err
= copy(dst
, src
, part
);
256 __vringh_iov(struct vringh
*vrh
, u16 i
,
257 struct vringh_kiov
*riov
,
258 struct vringh_kiov
*wiov
,
259 bool (*rcheck
)(struct vringh
*vrh
, u64 addr
, size_t *len
,
260 struct vringh_range
*range
,
261 bool (*getrange
)(struct vringh
*, u64
,
262 struct vringh_range
*)),
263 bool (*getrange
)(struct vringh
*, u64
, struct vringh_range
*),
265 int (*copy
)(void *dst
, const void *src
, size_t len
))
267 int err
, count
= 0, up_next
, desc_max
;
268 struct vring_desc desc
, *descs
;
269 struct vringh_range range
= { -1ULL, 0 }, slowrange
;
272 /* We start traversing vring's descriptor table. */
273 descs
= vrh
->vring
.desc
;
274 desc_max
= vrh
->vring
.num
;
278 riov
->i
= riov
->used
= 0;
280 wiov
->i
= wiov
->used
= 0;
282 /* You must want something! */
287 struct vringh_kiov
*iov
;
291 err
= slow_copy(vrh
, &desc
, &descs
[i
], rcheck
, getrange
,
294 err
= copy(&desc
, &descs
[i
], sizeof(desc
));
298 if (unlikely(desc
.flags
&
299 cpu_to_vringh16(vrh
, VRING_DESC_F_INDIRECT
))) {
300 u64 a
= vringh64_to_cpu(vrh
, desc
.addr
);
302 /* Make sure it's OK, and get offset. */
303 len
= vringh32_to_cpu(vrh
, desc
.len
);
304 if (!rcheck(vrh
, a
, &len
, &range
, getrange
)) {
309 if (unlikely(len
!= vringh32_to_cpu(vrh
, desc
.len
))) {
311 /* We need to save this range to use offset */
315 addr
= (void *)(long)(a
+ range
.offset
);
316 err
= move_to_indirect(vrh
, &up_next
, &i
, addr
, &desc
,
323 if (count
++ == vrh
->vring
.num
) {
324 vringh_bad("Descriptor loop in %p", descs
);
329 if (desc
.flags
& cpu_to_vringh16(vrh
, VRING_DESC_F_WRITE
))
333 if (unlikely(wiov
&& wiov
->i
)) {
334 vringh_bad("Readable desc %p after writable",
342 vringh_bad("Unexpected %s desc",
343 !wiov
? "writable" : "readable");
349 /* Make sure it's OK, and get offset. */
350 len
= vringh32_to_cpu(vrh
, desc
.len
);
351 if (!rcheck(vrh
, vringh64_to_cpu(vrh
, desc
.addr
), &len
, &range
,
356 addr
= (void *)(unsigned long)(vringh64_to_cpu(vrh
, desc
.addr
) +
359 if (unlikely(iov
->used
== (iov
->max_num
& ~VRINGH_IOV_ALLOCATED
))) {
360 err
= resize_iovec(iov
, gfp
);
365 iov
->iov
[iov
->used
].iov_base
= addr
;
366 iov
->iov
[iov
->used
].iov_len
= len
;
369 if (unlikely(len
!= vringh32_to_cpu(vrh
, desc
.len
))) {
370 desc
.len
= cpu_to_vringh32(vrh
,
371 vringh32_to_cpu(vrh
, desc
.len
) - len
);
372 desc
.addr
= cpu_to_vringh64(vrh
,
373 vringh64_to_cpu(vrh
, desc
.addr
) + len
);
377 if (desc
.flags
& cpu_to_vringh16(vrh
, VRING_DESC_F_NEXT
)) {
378 i
= vringh16_to_cpu(vrh
, desc
.next
);
380 /* Just in case we need to finish traversing above. */
381 if (unlikely(up_next
> 0)) {
382 i
= return_from_indirect(vrh
, &up_next
,
390 vringh_bad("Chained index %u > %u", i
, desc_max
);
402 static inline int __vringh_complete(struct vringh
*vrh
,
403 const struct vring_used_elem
*used
,
404 unsigned int num_used
,
405 int (*putu16
)(const struct vringh
*vrh
,
406 __virtio16
*p
, u16 val
),
407 int (*putused
)(struct vring_used_elem
*dst
,
408 const struct vring_used_elem
411 struct vring_used
*used_ring
;
415 used_ring
= vrh
->vring
.used
;
416 used_idx
= vrh
->last_used_idx
+ vrh
->completed
;
418 off
= used_idx
% vrh
->vring
.num
;
420 /* Compiler knows num_used == 1 sometimes, hence extra check */
421 if (num_used
> 1 && unlikely(off
+ num_used
>= vrh
->vring
.num
)) {
422 u16 part
= vrh
->vring
.num
- off
;
423 err
= putused(&used_ring
->ring
[off
], used
, part
);
425 err
= putused(&used_ring
->ring
[0], used
+ part
,
428 err
= putused(&used_ring
->ring
[off
], used
, num_used
);
431 vringh_bad("Failed to write %u used entries %u at %p",
432 num_used
, off
, &used_ring
->ring
[off
]);
436 /* Make sure buffer is written before we update index. */
437 virtio_wmb(vrh
->weak_barriers
);
439 err
= putu16(vrh
, &vrh
->vring
.used
->idx
, used_idx
+ num_used
);
441 vringh_bad("Failed to update used index at %p",
442 &vrh
->vring
.used
->idx
);
446 vrh
->completed
+= num_used
;
451 static inline int __vringh_need_notify(struct vringh
*vrh
,
452 int (*getu16
)(const struct vringh
*vrh
,
454 const __virtio16
*p
))
460 /* Flush out used index update. This is paired with the
461 * barrier that the Guest executes when enabling
463 virtio_mb(vrh
->weak_barriers
);
465 /* Old-style, without event indices. */
466 if (!vrh
->event_indices
) {
468 err
= getu16(vrh
, &flags
, &vrh
->vring
.avail
->flags
);
470 vringh_bad("Failed to get flags at %p",
471 &vrh
->vring
.avail
->flags
);
474 return (!(flags
& VRING_AVAIL_F_NO_INTERRUPT
));
477 /* Modern: we know when other side wants to know. */
478 err
= getu16(vrh
, &used_event
, &vring_used_event(&vrh
->vring
));
480 vringh_bad("Failed to get used event idx at %p",
481 &vring_used_event(&vrh
->vring
));
485 /* Just in case we added so many that we wrap. */
486 if (unlikely(vrh
->completed
> 0xffff))
489 notify
= vring_need_event(used_event
,
490 vrh
->last_used_idx
+ vrh
->completed
,
493 vrh
->last_used_idx
+= vrh
->completed
;
498 static inline bool __vringh_notify_enable(struct vringh
*vrh
,
499 int (*getu16
)(const struct vringh
*vrh
,
500 u16
*val
, const __virtio16
*p
),
501 int (*putu16
)(const struct vringh
*vrh
,
502 __virtio16
*p
, u16 val
))
506 if (!vrh
->event_indices
) {
507 /* Old-school; update flags. */
508 if (putu16(vrh
, &vrh
->vring
.used
->flags
, 0) != 0) {
509 vringh_bad("Clearing used flags %p",
510 &vrh
->vring
.used
->flags
);
514 if (putu16(vrh
, &vring_avail_event(&vrh
->vring
),
515 vrh
->last_avail_idx
) != 0) {
516 vringh_bad("Updating avail event index %p",
517 &vring_avail_event(&vrh
->vring
));
522 /* They could have slipped one in as we were doing that: make
523 * sure it's written, then check again. */
524 virtio_mb(vrh
->weak_barriers
);
526 if (getu16(vrh
, &avail
, &vrh
->vring
.avail
->idx
) != 0) {
527 vringh_bad("Failed to check avail idx at %p",
528 &vrh
->vring
.avail
->idx
);
532 /* This is unlikely, so we just leave notifications enabled
533 * (if we're using event_indices, we'll only get one
534 * notification anyway). */
535 return avail
== vrh
->last_avail_idx
;
538 static inline void __vringh_notify_disable(struct vringh
*vrh
,
539 int (*putu16
)(const struct vringh
*vrh
,
540 __virtio16
*p
, u16 val
))
542 if (!vrh
->event_indices
) {
543 /* Old-school; update flags. */
544 if (putu16(vrh
, &vrh
->vring
.used
->flags
,
545 VRING_USED_F_NO_NOTIFY
)) {
546 vringh_bad("Setting used flags %p",
547 &vrh
->vring
.used
->flags
);
552 /* Userspace access helpers: in this case, addresses are really userspace. */
553 static inline int getu16_user(const struct vringh
*vrh
, u16
*val
, const __virtio16
*p
)
556 int rc
= get_user(v
, (__force __virtio16 __user
*)p
);
557 *val
= vringh16_to_cpu(vrh
, v
);
561 static inline int putu16_user(const struct vringh
*vrh
, __virtio16
*p
, u16 val
)
563 __virtio16 v
= cpu_to_vringh16(vrh
, val
);
564 return put_user(v
, (__force __virtio16 __user
*)p
);
567 static inline int copydesc_user(void *dst
, const void *src
, size_t len
)
569 return copy_from_user(dst
, (__force
void __user
*)src
, len
) ?
573 static inline int putused_user(struct vring_used_elem
*dst
,
574 const struct vring_used_elem
*src
,
577 return copy_to_user((__force
void __user
*)dst
, src
,
578 sizeof(*dst
) * num
) ? -EFAULT
: 0;
581 static inline int xfer_from_user(void *src
, void *dst
, size_t len
)
583 return copy_from_user(dst
, (__force
void __user
*)src
, len
) ?
587 static inline int xfer_to_user(void *dst
, void *src
, size_t len
)
589 return copy_to_user((__force
void __user
*)dst
, src
, len
) ?
594 * vringh_init_user - initialize a vringh for a userspace vring.
595 * @vrh: the vringh to initialize.
596 * @features: the feature bits for this ring.
597 * @num: the number of elements.
598 * @weak_barriers: true if we only need memory barriers, not I/O.
599 * @desc: the userpace descriptor pointer.
600 * @avail: the userpace avail pointer.
601 * @used: the userpace used pointer.
603 * Returns an error if num is invalid: you should check pointers
606 int vringh_init_user(struct vringh
*vrh
, u64 features
,
607 unsigned int num
, bool weak_barriers
,
608 struct vring_desc __user
*desc
,
609 struct vring_avail __user
*avail
,
610 struct vring_used __user
*used
)
612 /* Sane power of 2 please! */
613 if (!num
|| num
> 0xffff || (num
& (num
- 1))) {
614 vringh_bad("Bad ring size %u", num
);
618 vrh
->little_endian
= (features
& (1ULL << VIRTIO_F_VERSION_1
));
619 vrh
->event_indices
= (features
& (1 << VIRTIO_RING_F_EVENT_IDX
));
620 vrh
->weak_barriers
= weak_barriers
;
622 vrh
->last_avail_idx
= 0;
623 vrh
->last_used_idx
= 0;
624 vrh
->vring
.num
= num
;
625 /* vring expects kernel addresses, but only used via accessors. */
626 vrh
->vring
.desc
= (__force
struct vring_desc
*)desc
;
627 vrh
->vring
.avail
= (__force
struct vring_avail
*)avail
;
628 vrh
->vring
.used
= (__force
struct vring_used
*)used
;
631 EXPORT_SYMBOL(vringh_init_user
);
634 * vringh_getdesc_user - get next available descriptor from userspace ring.
635 * @vrh: the userspace vring.
636 * @riov: where to put the readable descriptors (or NULL)
637 * @wiov: where to put the writable descriptors (or NULL)
638 * @getrange: function to call to check ranges.
639 * @head: head index we received, for passing to vringh_complete_user().
641 * Returns 0 if there was no descriptor, 1 if there was, or -errno.
643 * Note that on error return, you can tell the difference between an
644 * invalid ring and a single invalid descriptor: in the former case,
645 * *head will be vrh->vring.num. You may be able to ignore an invalid
646 * descriptor, but there's not much you can do with an invalid ring.
648 * Note that you may need to clean up riov and wiov, even on error!
650 int vringh_getdesc_user(struct vringh
*vrh
,
651 struct vringh_iov
*riov
,
652 struct vringh_iov
*wiov
,
653 bool (*getrange
)(struct vringh
*vrh
,
654 u64 addr
, struct vringh_range
*r
),
659 *head
= vrh
->vring
.num
;
660 err
= __vringh_get_head(vrh
, getu16_user
, &vrh
->last_avail_idx
);
665 if (err
== vrh
->vring
.num
)
668 /* We need the layouts to be the identical for this to work */
669 BUILD_BUG_ON(sizeof(struct vringh_kiov
) != sizeof(struct vringh_iov
));
670 BUILD_BUG_ON(offsetof(struct vringh_kiov
, iov
) !=
671 offsetof(struct vringh_iov
, iov
));
672 BUILD_BUG_ON(offsetof(struct vringh_kiov
, i
) !=
673 offsetof(struct vringh_iov
, i
));
674 BUILD_BUG_ON(offsetof(struct vringh_kiov
, used
) !=
675 offsetof(struct vringh_iov
, used
));
676 BUILD_BUG_ON(offsetof(struct vringh_kiov
, max_num
) !=
677 offsetof(struct vringh_iov
, max_num
));
678 BUILD_BUG_ON(sizeof(struct iovec
) != sizeof(struct kvec
));
679 BUILD_BUG_ON(offsetof(struct iovec
, iov_base
) !=
680 offsetof(struct kvec
, iov_base
));
681 BUILD_BUG_ON(offsetof(struct iovec
, iov_len
) !=
682 offsetof(struct kvec
, iov_len
));
683 BUILD_BUG_ON(sizeof(((struct iovec
*)NULL
)->iov_base
)
684 != sizeof(((struct kvec
*)NULL
)->iov_base
));
685 BUILD_BUG_ON(sizeof(((struct iovec
*)NULL
)->iov_len
)
686 != sizeof(((struct kvec
*)NULL
)->iov_len
));
689 err
= __vringh_iov(vrh
, *head
, (struct vringh_kiov
*)riov
,
690 (struct vringh_kiov
*)wiov
,
691 range_check
, getrange
, GFP_KERNEL
, copydesc_user
);
697 EXPORT_SYMBOL(vringh_getdesc_user
);
700 * vringh_iov_pull_user - copy bytes from vring_iov.
701 * @riov: the riov as passed to vringh_getdesc_user() (updated as we consume)
702 * @dst: the place to copy.
703 * @len: the maximum length to copy.
705 * Returns the bytes copied <= len or a negative errno.
707 ssize_t
vringh_iov_pull_user(struct vringh_iov
*riov
, void *dst
, size_t len
)
709 return vringh_iov_xfer((struct vringh_kiov
*)riov
,
710 dst
, len
, xfer_from_user
);
712 EXPORT_SYMBOL(vringh_iov_pull_user
);
715 * vringh_iov_push_user - copy bytes into vring_iov.
716 * @wiov: the wiov as passed to vringh_getdesc_user() (updated as we consume)
717 * @dst: the place to copy.
718 * @len: the maximum length to copy.
720 * Returns the bytes copied <= len or a negative errno.
722 ssize_t
vringh_iov_push_user(struct vringh_iov
*wiov
,
723 const void *src
, size_t len
)
725 return vringh_iov_xfer((struct vringh_kiov
*)wiov
,
726 (void *)src
, len
, xfer_to_user
);
728 EXPORT_SYMBOL(vringh_iov_push_user
);
731 * vringh_abandon_user - we've decided not to handle the descriptor(s).
733 * @num: the number of descriptors to put back (ie. num
734 * vringh_get_user() to undo).
736 * The next vringh_get_user() will return the old descriptor(s) again.
738 void vringh_abandon_user(struct vringh
*vrh
, unsigned int num
)
740 /* We only update vring_avail_event(vr) when we want to be notified,
741 * so we haven't changed that yet. */
742 vrh
->last_avail_idx
-= num
;
744 EXPORT_SYMBOL(vringh_abandon_user
);
747 * vringh_complete_user - we've finished with descriptor, publish it.
749 * @head: the head as filled in by vringh_getdesc_user.
750 * @len: the length of data we have written.
752 * You should check vringh_need_notify_user() after one or more calls
755 int vringh_complete_user(struct vringh
*vrh
, u16 head
, u32 len
)
757 struct vring_used_elem used
;
759 used
.id
= cpu_to_vringh32(vrh
, head
);
760 used
.len
= cpu_to_vringh32(vrh
, len
);
761 return __vringh_complete(vrh
, &used
, 1, putu16_user
, putused_user
);
763 EXPORT_SYMBOL(vringh_complete_user
);
766 * vringh_complete_multi_user - we've finished with many descriptors.
768 * @used: the head, length pairs.
769 * @num_used: the number of used elements.
771 * You should check vringh_need_notify_user() after one or more calls
774 int vringh_complete_multi_user(struct vringh
*vrh
,
775 const struct vring_used_elem used
[],
778 return __vringh_complete(vrh
, used
, num_used
,
779 putu16_user
, putused_user
);
781 EXPORT_SYMBOL(vringh_complete_multi_user
);
784 * vringh_notify_enable_user - we want to know if something changes.
787 * This always enables notifications, but returns false if there are
788 * now more buffers available in the vring.
790 bool vringh_notify_enable_user(struct vringh
*vrh
)
792 return __vringh_notify_enable(vrh
, getu16_user
, putu16_user
);
794 EXPORT_SYMBOL(vringh_notify_enable_user
);
797 * vringh_notify_disable_user - don't tell us if something changes.
800 * This is our normal running state: we disable and then only enable when
801 * we're going to sleep.
803 void vringh_notify_disable_user(struct vringh
*vrh
)
805 __vringh_notify_disable(vrh
, putu16_user
);
807 EXPORT_SYMBOL(vringh_notify_disable_user
);
810 * vringh_need_notify_user - must we tell the other side about used buffers?
811 * @vrh: the vring we've called vringh_complete_user() on.
813 * Returns -errno or 0 if we don't need to tell the other side, 1 if we do.
815 int vringh_need_notify_user(struct vringh
*vrh
)
817 return __vringh_need_notify(vrh
, getu16_user
);
819 EXPORT_SYMBOL(vringh_need_notify_user
);
821 /* Kernelspace access helpers. */
822 static inline int getu16_kern(const struct vringh
*vrh
,
823 u16
*val
, const __virtio16
*p
)
825 *val
= vringh16_to_cpu(vrh
, READ_ONCE(*p
));
829 static inline int putu16_kern(const struct vringh
*vrh
, __virtio16
*p
, u16 val
)
831 WRITE_ONCE(*p
, cpu_to_vringh16(vrh
, val
));
835 static inline int copydesc_kern(void *dst
, const void *src
, size_t len
)
837 memcpy(dst
, src
, len
);
841 static inline int putused_kern(struct vring_used_elem
*dst
,
842 const struct vring_used_elem
*src
,
845 memcpy(dst
, src
, num
* sizeof(*dst
));
849 static inline int xfer_kern(void *src
, void *dst
, size_t len
)
851 memcpy(dst
, src
, len
);
855 static inline int kern_xfer(void *dst
, void *src
, size_t len
)
857 memcpy(dst
, src
, len
);
862 * vringh_init_kern - initialize a vringh for a kernelspace vring.
863 * @vrh: the vringh to initialize.
864 * @features: the feature bits for this ring.
865 * @num: the number of elements.
866 * @weak_barriers: true if we only need memory barriers, not I/O.
867 * @desc: the userpace descriptor pointer.
868 * @avail: the userpace avail pointer.
869 * @used: the userpace used pointer.
871 * Returns an error if num is invalid.
873 int vringh_init_kern(struct vringh
*vrh
, u64 features
,
874 unsigned int num
, bool weak_barriers
,
875 struct vring_desc
*desc
,
876 struct vring_avail
*avail
,
877 struct vring_used
*used
)
879 /* Sane power of 2 please! */
880 if (!num
|| num
> 0xffff || (num
& (num
- 1))) {
881 vringh_bad("Bad ring size %u", num
);
885 vrh
->little_endian
= (features
& (1ULL << VIRTIO_F_VERSION_1
));
886 vrh
->event_indices
= (features
& (1 << VIRTIO_RING_F_EVENT_IDX
));
887 vrh
->weak_barriers
= weak_barriers
;
889 vrh
->last_avail_idx
= 0;
890 vrh
->last_used_idx
= 0;
891 vrh
->vring
.num
= num
;
892 vrh
->vring
.desc
= desc
;
893 vrh
->vring
.avail
= avail
;
894 vrh
->vring
.used
= used
;
897 EXPORT_SYMBOL(vringh_init_kern
);
900 * vringh_getdesc_kern - get next available descriptor from kernelspace ring.
901 * @vrh: the kernelspace vring.
902 * @riov: where to put the readable descriptors (or NULL)
903 * @wiov: where to put the writable descriptors (or NULL)
904 * @head: head index we received, for passing to vringh_complete_kern().
905 * @gfp: flags for allocating larger riov/wiov.
907 * Returns 0 if there was no descriptor, 1 if there was, or -errno.
909 * Note that on error return, you can tell the difference between an
910 * invalid ring and a single invalid descriptor: in the former case,
911 * *head will be vrh->vring.num. You may be able to ignore an invalid
912 * descriptor, but there's not much you can do with an invalid ring.
914 * Note that you may need to clean up riov and wiov, even on error!
916 int vringh_getdesc_kern(struct vringh
*vrh
,
917 struct vringh_kiov
*riov
,
918 struct vringh_kiov
*wiov
,
924 err
= __vringh_get_head(vrh
, getu16_kern
, &vrh
->last_avail_idx
);
929 if (err
== vrh
->vring
.num
)
933 err
= __vringh_iov(vrh
, *head
, riov
, wiov
, no_range_check
, NULL
,
940 EXPORT_SYMBOL(vringh_getdesc_kern
);
943 * vringh_iov_pull_kern - copy bytes from vring_iov.
944 * @riov: the riov as passed to vringh_getdesc_kern() (updated as we consume)
945 * @dst: the place to copy.
946 * @len: the maximum length to copy.
948 * Returns the bytes copied <= len or a negative errno.
950 ssize_t
vringh_iov_pull_kern(struct vringh_kiov
*riov
, void *dst
, size_t len
)
952 return vringh_iov_xfer(riov
, dst
, len
, xfer_kern
);
954 EXPORT_SYMBOL(vringh_iov_pull_kern
);
957 * vringh_iov_push_kern - copy bytes into vring_iov.
958 * @wiov: the wiov as passed to vringh_getdesc_kern() (updated as we consume)
959 * @dst: the place to copy.
960 * @len: the maximum length to copy.
962 * Returns the bytes copied <= len or a negative errno.
964 ssize_t
vringh_iov_push_kern(struct vringh_kiov
*wiov
,
965 const void *src
, size_t len
)
967 return vringh_iov_xfer(wiov
, (void *)src
, len
, kern_xfer
);
969 EXPORT_SYMBOL(vringh_iov_push_kern
);
972 * vringh_abandon_kern - we've decided not to handle the descriptor(s).
974 * @num: the number of descriptors to put back (ie. num
975 * vringh_get_kern() to undo).
977 * The next vringh_get_kern() will return the old descriptor(s) again.
979 void vringh_abandon_kern(struct vringh
*vrh
, unsigned int num
)
981 /* We only update vring_avail_event(vr) when we want to be notified,
982 * so we haven't changed that yet. */
983 vrh
->last_avail_idx
-= num
;
985 EXPORT_SYMBOL(vringh_abandon_kern
);
988 * vringh_complete_kern - we've finished with descriptor, publish it.
990 * @head: the head as filled in by vringh_getdesc_kern.
991 * @len: the length of data we have written.
993 * You should check vringh_need_notify_kern() after one or more calls
996 int vringh_complete_kern(struct vringh
*vrh
, u16 head
, u32 len
)
998 struct vring_used_elem used
;
1000 used
.id
= cpu_to_vringh32(vrh
, head
);
1001 used
.len
= cpu_to_vringh32(vrh
, len
);
1003 return __vringh_complete(vrh
, &used
, 1, putu16_kern
, putused_kern
);
1005 EXPORT_SYMBOL(vringh_complete_kern
);
1008 * vringh_notify_enable_kern - we want to know if something changes.
1011 * This always enables notifications, but returns false if there are
1012 * now more buffers available in the vring.
1014 bool vringh_notify_enable_kern(struct vringh
*vrh
)
1016 return __vringh_notify_enable(vrh
, getu16_kern
, putu16_kern
);
1018 EXPORT_SYMBOL(vringh_notify_enable_kern
);
1021 * vringh_notify_disable_kern - don't tell us if something changes.
1024 * This is our normal running state: we disable and then only enable when
1025 * we're going to sleep.
1027 void vringh_notify_disable_kern(struct vringh
*vrh
)
1029 __vringh_notify_disable(vrh
, putu16_kern
);
1031 EXPORT_SYMBOL(vringh_notify_disable_kern
);
1034 * vringh_need_notify_kern - must we tell the other side about used buffers?
1035 * @vrh: the vring we've called vringh_complete_kern() on.
1037 * Returns -errno or 0 if we don't need to tell the other side, 1 if we do.
1039 int vringh_need_notify_kern(struct vringh
*vrh
)
1041 return __vringh_need_notify(vrh
, getu16_kern
);
1043 EXPORT_SYMBOL(vringh_need_notify_kern
);
1045 MODULE_LICENSE("GPL");