1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Virtio vhost-user driver
5 * Copyright(c) 2019 Intel Corporation
7 * This driver allows virtio devices to be used over a vhost-user socket.
9 * Guest devices can be instantiated by kernel module or command line
10 * parameters. One device will be created for each parameter. Syntax:
12 * virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]
14 * <socket> := vhost-user socket path to connect
15 * <virtio_id> := virtio device id (as in virtio_ids.h)
16 * <platform_id> := (optional) platform device id
19 * virtio_uml.device=/var/uml.socket:1
21 * Based on Virtio MMIO driver by Pawel Moll, copyright 2011-2014, ARM Ltd.
23 #include <linux/module.h>
24 #include <linux/platform_device.h>
25 #include <linux/slab.h>
26 #include <linux/virtio.h>
27 #include <linux/virtio_config.h>
28 #include <linux/virtio_ring.h>
29 #include <linux/time-internal.h>
30 #include <shared/as-layout.h>
34 #include "vhost_user.h"
36 #define MAX_SUPPORTED_QUEUE_SIZE 256
38 #define to_virtio_uml_device(_vdev) \
39 container_of(_vdev, struct virtio_uml_device, vdev)
41 struct virtio_uml_platform_data
{
43 const char *socket_path
;
44 struct work_struct conn_broken_wk
;
45 struct platform_device
*pdev
;
48 struct virtio_uml_device
{
49 struct virtio_device vdev
;
50 struct platform_device
*pdev
;
53 int sock
, req_fd
, irq
;
55 u64 protocol_features
;
60 struct virtio_uml_vq_info
{
63 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
65 vq_callback_t
*callback
;
66 struct time_travel_event defer
;
70 extern unsigned long long physmem_size
, highmem
;
72 #define vu_err(vu_dev, ...) dev_err(&(vu_dev)->pdev->dev, ##__VA_ARGS__)
74 /* Vhost-user protocol */
76 static int full_sendmsg_fds(int fd
, const void *buf
, unsigned int len
,
77 const int *fds
, unsigned int fds_num
)
82 rc
= os_sendmsg_fds(fd
, buf
, len
, fds
, fds_num
);
89 } while (len
&& (rc
>= 0 || rc
== -EINTR
));
96 static int full_read(int fd
, void *buf
, int len
, bool abortable
)
101 rc
= os_read_file(fd
, buf
, len
);
106 } while (len
&& (rc
> 0 || rc
== -EINTR
|| (!abortable
&& rc
== -EAGAIN
)));
115 static int vhost_user_recv_header(int fd
, struct vhost_user_msg
*msg
)
117 return full_read(fd
, msg
, sizeof(msg
->header
), true);
120 static int vhost_user_recv(struct virtio_uml_device
*vu_dev
,
121 int fd
, struct vhost_user_msg
*msg
,
122 size_t max_payload_size
, bool wait
)
128 * In virtio time-travel mode, we're handling all the vhost-user
129 * FDs by polling them whenever appropriate. However, we may get
130 * into a situation where we're sending out an interrupt message
131 * to a device (e.g. a net device) and need to handle a simulation
132 * time message while doing so, e.g. one that tells us to update
133 * our idea of how long we can run without scheduling.
135 * Thus, we need to not just read() from the given fd, but need
136 * to also handle messages for the simulation time - this function
137 * does that for us while waiting for the given fd to be readable.
140 time_travel_wait_readable(fd
);
142 rc
= vhost_user_recv_header(fd
, msg
);
144 if (rc
== -ECONNRESET
&& vu_dev
->registered
) {
145 struct virtio_uml_platform_data
*pdata
;
147 pdata
= vu_dev
->pdev
->dev
.platform_data
;
149 virtio_break_device(&vu_dev
->vdev
);
150 schedule_work(&pdata
->conn_broken_wk
);
154 size
= msg
->header
.size
;
155 if (size
> max_payload_size
)
157 return full_read(fd
, &msg
->payload
, size
, false);
160 static int vhost_user_recv_resp(struct virtio_uml_device
*vu_dev
,
161 struct vhost_user_msg
*msg
,
162 size_t max_payload_size
)
164 int rc
= vhost_user_recv(vu_dev
, vu_dev
->sock
, msg
,
165 max_payload_size
, true);
170 if (msg
->header
.flags
!= (VHOST_USER_FLAG_REPLY
| VHOST_USER_VERSION
))
176 static int vhost_user_recv_u64(struct virtio_uml_device
*vu_dev
,
179 struct vhost_user_msg msg
;
180 int rc
= vhost_user_recv_resp(vu_dev
, &msg
,
181 sizeof(msg
.payload
.integer
));
185 if (msg
.header
.size
!= sizeof(msg
.payload
.integer
))
187 *value
= msg
.payload
.integer
;
191 static int vhost_user_recv_req(struct virtio_uml_device
*vu_dev
,
192 struct vhost_user_msg
*msg
,
193 size_t max_payload_size
)
195 int rc
= vhost_user_recv(vu_dev
, vu_dev
->req_fd
, msg
,
196 max_payload_size
, false);
201 if ((msg
->header
.flags
& ~VHOST_USER_FLAG_NEED_REPLY
) !=
208 static int vhost_user_send(struct virtio_uml_device
*vu_dev
,
209 bool need_response
, struct vhost_user_msg
*msg
,
210 int *fds
, size_t num_fds
)
212 size_t size
= sizeof(msg
->header
) + msg
->header
.size
;
217 msg
->header
.flags
|= VHOST_USER_VERSION
;
220 * The need_response flag indicates that we already need a response,
221 * e.g. to read the features. In these cases, don't request an ACK as
222 * it is meaningless. Also request an ACK only if supported.
224 request_ack
= !need_response
;
225 if (!(vu_dev
->protocol_features
&
226 BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK
)))
230 msg
->header
.flags
|= VHOST_USER_FLAG_NEED_REPLY
;
232 spin_lock_irqsave(&vu_dev
->sock_lock
, flags
);
233 rc
= full_sendmsg_fds(vu_dev
->sock
, msg
, size
, fds
, num_fds
);
240 rc
= vhost_user_recv_u64(vu_dev
, &status
);
245 vu_err(vu_dev
, "slave reports error: %llu\n", status
);
252 spin_unlock_irqrestore(&vu_dev
->sock_lock
, flags
);
256 static int vhost_user_send_no_payload(struct virtio_uml_device
*vu_dev
,
257 bool need_response
, u32 request
)
259 struct vhost_user_msg msg
= {
260 .header
.request
= request
,
263 return vhost_user_send(vu_dev
, need_response
, &msg
, NULL
, 0);
266 static int vhost_user_send_no_payload_fd(struct virtio_uml_device
*vu_dev
,
269 struct vhost_user_msg msg
= {
270 .header
.request
= request
,
273 return vhost_user_send(vu_dev
, false, &msg
, &fd
, 1);
276 static int vhost_user_send_u64(struct virtio_uml_device
*vu_dev
,
277 u32 request
, u64 value
)
279 struct vhost_user_msg msg
= {
280 .header
.request
= request
,
281 .header
.size
= sizeof(msg
.payload
.integer
),
282 .payload
.integer
= value
,
285 return vhost_user_send(vu_dev
, false, &msg
, NULL
, 0);
288 static int vhost_user_set_owner(struct virtio_uml_device
*vu_dev
)
290 return vhost_user_send_no_payload(vu_dev
, false, VHOST_USER_SET_OWNER
);
293 static int vhost_user_get_features(struct virtio_uml_device
*vu_dev
,
296 int rc
= vhost_user_send_no_payload(vu_dev
, true,
297 VHOST_USER_GET_FEATURES
);
301 return vhost_user_recv_u64(vu_dev
, features
);
304 static int vhost_user_set_features(struct virtio_uml_device
*vu_dev
,
307 return vhost_user_send_u64(vu_dev
, VHOST_USER_SET_FEATURES
, features
);
310 static int vhost_user_get_protocol_features(struct virtio_uml_device
*vu_dev
,
311 u64
*protocol_features
)
313 int rc
= vhost_user_send_no_payload(vu_dev
, true,
314 VHOST_USER_GET_PROTOCOL_FEATURES
);
318 return vhost_user_recv_u64(vu_dev
, protocol_features
);
321 static int vhost_user_set_protocol_features(struct virtio_uml_device
*vu_dev
,
322 u64 protocol_features
)
324 return vhost_user_send_u64(vu_dev
, VHOST_USER_SET_PROTOCOL_FEATURES
,
328 static void vhost_user_reply(struct virtio_uml_device
*vu_dev
,
329 struct vhost_user_msg
*msg
, int response
)
331 struct vhost_user_msg reply
= {
332 .payload
.integer
= response
,
334 size_t size
= sizeof(reply
.header
) + sizeof(reply
.payload
.integer
);
337 reply
.header
= msg
->header
;
338 reply
.header
.flags
&= ~VHOST_USER_FLAG_NEED_REPLY
;
339 reply
.header
.flags
|= VHOST_USER_FLAG_REPLY
;
340 reply
.header
.size
= sizeof(reply
.payload
.integer
);
342 rc
= full_sendmsg_fds(vu_dev
->req_fd
, &reply
, size
, NULL
, 0);
346 "sending reply to slave request failed: %d (size %zu)\n",
350 static irqreturn_t
vu_req_interrupt(int irq
, void *data
)
352 struct virtio_uml_device
*vu_dev
= data
;
353 struct virtqueue
*vq
;
356 struct vhost_user_msg msg
;
357 u8 extra_payload
[512];
361 rc
= vhost_user_recv_req(vu_dev
, &msg
.msg
,
362 sizeof(msg
.msg
.payload
) +
363 sizeof(msg
.extra_payload
));
368 switch (msg
.msg
.header
.request
) {
369 case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG
:
370 virtio_config_changed(&vu_dev
->vdev
);
373 case VHOST_USER_SLAVE_VRING_CALL
:
374 virtio_device_for_each_vq((&vu_dev
->vdev
), vq
) {
375 if (vq
->index
== msg
.msg
.payload
.vring_state
.index
) {
377 vring_interrupt(0 /* ignored */, vq
);
382 case VHOST_USER_SLAVE_IOTLB_MSG
:
383 /* not supported - VIRTIO_F_ACCESS_PLATFORM */
384 case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG
:
385 /* not supported - VHOST_USER_PROTOCOL_F_HOST_NOTIFIER */
387 vu_err(vu_dev
, "unexpected slave request %d\n",
388 msg
.msg
.header
.request
);
391 if (msg
.msg
.header
.flags
& VHOST_USER_FLAG_NEED_REPLY
)
392 vhost_user_reply(vu_dev
, &msg
.msg
, response
);
397 static int vhost_user_init_slave_req(struct virtio_uml_device
*vu_dev
)
401 /* Use a pipe for slave req fd, SIGIO is not supported for eventfd */
402 rc
= os_pipe(req_fds
, true, true);
405 vu_dev
->req_fd
= req_fds
[0];
407 rc
= um_request_irq(UM_IRQ_ALLOC
, vu_dev
->req_fd
, IRQ_READ
,
408 vu_req_interrupt
, IRQF_SHARED
,
409 vu_dev
->pdev
->name
, vu_dev
);
415 rc
= vhost_user_send_no_payload_fd(vu_dev
, VHOST_USER_SET_SLAVE_REQ_FD
,
423 um_free_irq(vu_dev
->irq
, vu_dev
);
425 os_close_file(req_fds
[0]);
427 /* Close unused write end of request fds */
428 os_close_file(req_fds
[1]);
432 static int vhost_user_init(struct virtio_uml_device
*vu_dev
)
434 int rc
= vhost_user_set_owner(vu_dev
);
438 rc
= vhost_user_get_features(vu_dev
, &vu_dev
->features
);
442 if (vu_dev
->features
& BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES
)) {
443 rc
= vhost_user_get_protocol_features(vu_dev
,
444 &vu_dev
->protocol_features
);
447 vu_dev
->protocol_features
&= VHOST_USER_SUPPORTED_PROTOCOL_F
;
448 rc
= vhost_user_set_protocol_features(vu_dev
,
449 vu_dev
->protocol_features
);
454 if (vu_dev
->protocol_features
&
455 BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ
)) {
456 rc
= vhost_user_init_slave_req(vu_dev
);
464 static void vhost_user_get_config(struct virtio_uml_device
*vu_dev
,
465 u32 offset
, void *buf
, u32 len
)
467 u32 cfg_size
= offset
+ len
;
468 struct vhost_user_msg
*msg
;
469 size_t payload_size
= sizeof(msg
->payload
.config
) + cfg_size
;
470 size_t msg_size
= sizeof(msg
->header
) + payload_size
;
473 if (!(vu_dev
->protocol_features
&
474 BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG
)))
477 msg
= kzalloc(msg_size
, GFP_KERNEL
);
480 msg
->header
.request
= VHOST_USER_GET_CONFIG
;
481 msg
->header
.size
= payload_size
;
482 msg
->payload
.config
.offset
= 0;
483 msg
->payload
.config
.size
= cfg_size
;
485 rc
= vhost_user_send(vu_dev
, true, msg
, NULL
, 0);
487 vu_err(vu_dev
, "sending VHOST_USER_GET_CONFIG failed: %d\n",
492 rc
= vhost_user_recv_resp(vu_dev
, msg
, msg_size
);
495 "receiving VHOST_USER_GET_CONFIG response failed: %d\n",
500 if (msg
->header
.size
!= payload_size
||
501 msg
->payload
.config
.size
!= cfg_size
) {
504 "Invalid VHOST_USER_GET_CONFIG sizes (payload %d expected %zu, config %u expected %u)\n",
505 msg
->header
.size
, payload_size
,
506 msg
->payload
.config
.size
, cfg_size
);
509 memcpy(buf
, msg
->payload
.config
.payload
+ offset
, len
);
515 static void vhost_user_set_config(struct virtio_uml_device
*vu_dev
,
516 u32 offset
, const void *buf
, u32 len
)
518 struct vhost_user_msg
*msg
;
519 size_t payload_size
= sizeof(msg
->payload
.config
) + len
;
520 size_t msg_size
= sizeof(msg
->header
) + payload_size
;
523 if (!(vu_dev
->protocol_features
&
524 BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG
)))
527 msg
= kzalloc(msg_size
, GFP_KERNEL
);
530 msg
->header
.request
= VHOST_USER_SET_CONFIG
;
531 msg
->header
.size
= payload_size
;
532 msg
->payload
.config
.offset
= offset
;
533 msg
->payload
.config
.size
= len
;
534 memcpy(msg
->payload
.config
.payload
, buf
, len
);
536 rc
= vhost_user_send(vu_dev
, false, msg
, NULL
, 0);
538 vu_err(vu_dev
, "sending VHOST_USER_SET_CONFIG failed: %d\n",
544 static int vhost_user_init_mem_region(u64 addr
, u64 size
, int *fd_out
,
545 struct vhost_user_mem_region
*region_out
)
547 unsigned long long mem_offset
;
548 int rc
= phys_mapping(addr
, &mem_offset
);
550 if (WARN(rc
< 0, "phys_mapping of 0x%llx returned %d\n", addr
, rc
))
553 region_out
->guest_addr
= addr
;
554 region_out
->user_addr
= addr
;
555 region_out
->size
= size
;
556 region_out
->mmap_offset
= mem_offset
;
558 /* Ensure mapping is valid for the entire region */
559 rc
= phys_mapping(addr
+ size
- 1, &mem_offset
);
560 if (WARN(rc
!= *fd_out
, "phys_mapping of 0x%llx failed: %d != %d\n",
561 addr
+ size
- 1, rc
, *fd_out
))
566 static int vhost_user_set_mem_table(struct virtio_uml_device
*vu_dev
)
568 struct vhost_user_msg msg
= {
569 .header
.request
= VHOST_USER_SET_MEM_TABLE
,
570 .header
.size
= sizeof(msg
.payload
.mem_regions
),
571 .payload
.mem_regions
.num
= 1,
573 unsigned long reserved
= uml_reserved
- uml_physmem
;
578 * This is a bit tricky, see also the comment with setup_physmem().
580 * Essentially, setup_physmem() uses a file to mmap() our physmem,
581 * but the code and data we *already* have is omitted. To us, this
582 * is no difference, since they both become part of our address
583 * space and memory consumption. To somebody looking in from the
584 * outside, however, it is different because the part of our memory
585 * consumption that's already part of the binary (code/data) is not
586 * mapped from the file, so it's not visible to another mmap from
587 * the file descriptor.
589 * Thus, don't advertise this space to the vhost-user slave. This
590 * means that the slave will likely abort or similar when we give
591 * it an address from the hidden range, since it's not marked as
592 * a valid address, but at least that way we detect the issue and
593 * don't just have the slave read an all-zeroes buffer from the
594 * shared memory file, or write something there that we can never
595 * see (depending on the direction of the virtqueue traffic.)
597 * Since we usually don't want to use .text for virtio buffers,
598 * this effectively means that you cannot use
599 * 1) global variables, which are in the .bss and not in the shm
601 * 2) the stack in some processes, depending on where they have
602 * their stack (or maybe only no interrupt stack?)
604 * The stack is already not typically valid for DMA, so this isn't
605 * much of a restriction, but global variables might be encountered.
607 * It might be possible to fix it by copying around the data that's
608 * between bss_start and where we map the file now, but it's not
609 * something that you typically encounter with virtio drivers, so
610 * it didn't seem worthwhile.
612 rc
= vhost_user_init_mem_region(reserved
, physmem_size
- reserved
,
614 &msg
.payload
.mem_regions
.regions
[0]);
619 msg
.payload
.mem_regions
.num
++;
620 rc
= vhost_user_init_mem_region(__pa(end_iomem
), highmem
,
621 &fds
[1], &msg
.payload
.mem_regions
.regions
[1]);
626 return vhost_user_send(vu_dev
, false, &msg
, fds
,
627 msg
.payload
.mem_regions
.num
);
630 static int vhost_user_set_vring_state(struct virtio_uml_device
*vu_dev
,
631 u32 request
, u32 index
, u32 num
)
633 struct vhost_user_msg msg
= {
634 .header
.request
= request
,
635 .header
.size
= sizeof(msg
.payload
.vring_state
),
636 .payload
.vring_state
.index
= index
,
637 .payload
.vring_state
.num
= num
,
640 return vhost_user_send(vu_dev
, false, &msg
, NULL
, 0);
643 static int vhost_user_set_vring_num(struct virtio_uml_device
*vu_dev
,
646 return vhost_user_set_vring_state(vu_dev
, VHOST_USER_SET_VRING_NUM
,
650 static int vhost_user_set_vring_base(struct virtio_uml_device
*vu_dev
,
651 u32 index
, u32 offset
)
653 return vhost_user_set_vring_state(vu_dev
, VHOST_USER_SET_VRING_BASE
,
657 static int vhost_user_set_vring_addr(struct virtio_uml_device
*vu_dev
,
658 u32 index
, u64 desc
, u64 used
, u64 avail
,
661 struct vhost_user_msg msg
= {
662 .header
.request
= VHOST_USER_SET_VRING_ADDR
,
663 .header
.size
= sizeof(msg
.payload
.vring_addr
),
664 .payload
.vring_addr
.index
= index
,
665 .payload
.vring_addr
.desc
= desc
,
666 .payload
.vring_addr
.used
= used
,
667 .payload
.vring_addr
.avail
= avail
,
668 .payload
.vring_addr
.log
= log
,
671 return vhost_user_send(vu_dev
, false, &msg
, NULL
, 0);
674 static int vhost_user_set_vring_fd(struct virtio_uml_device
*vu_dev
,
675 u32 request
, int index
, int fd
)
677 struct vhost_user_msg msg
= {
678 .header
.request
= request
,
679 .header
.size
= sizeof(msg
.payload
.integer
),
680 .payload
.integer
= index
,
683 if (index
& ~VHOST_USER_VRING_INDEX_MASK
)
686 msg
.payload
.integer
|= VHOST_USER_VRING_POLL_MASK
;
687 return vhost_user_send(vu_dev
, false, &msg
, NULL
, 0);
689 return vhost_user_send(vu_dev
, false, &msg
, &fd
, 1);
692 static int vhost_user_set_vring_call(struct virtio_uml_device
*vu_dev
,
695 return vhost_user_set_vring_fd(vu_dev
, VHOST_USER_SET_VRING_CALL
,
699 static int vhost_user_set_vring_kick(struct virtio_uml_device
*vu_dev
,
702 return vhost_user_set_vring_fd(vu_dev
, VHOST_USER_SET_VRING_KICK
,
706 static int vhost_user_set_vring_enable(struct virtio_uml_device
*vu_dev
,
707 u32 index
, bool enable
)
709 if (!(vu_dev
->features
& BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES
)))
712 return vhost_user_set_vring_state(vu_dev
, VHOST_USER_SET_VRING_ENABLE
,
717 /* Virtio interface */
719 static bool vu_notify(struct virtqueue
*vq
)
721 struct virtio_uml_vq_info
*info
= vq
->priv
;
722 const uint64_t n
= 1;
725 time_travel_propagate_time();
727 if (info
->kick_fd
< 0) {
728 struct virtio_uml_device
*vu_dev
;
730 vu_dev
= to_virtio_uml_device(vq
->vdev
);
732 return vhost_user_set_vring_state(vu_dev
, VHOST_USER_VRING_KICK
,
737 rc
= os_write_file(info
->kick_fd
, &n
, sizeof(n
));
738 } while (rc
== -EINTR
);
739 return !WARN(rc
!= sizeof(n
), "write returned %d\n", rc
);
742 static irqreturn_t
vu_interrupt(int irq
, void *opaque
)
744 struct virtqueue
*vq
= opaque
;
745 struct virtio_uml_vq_info
*info
= vq
->priv
;
748 irqreturn_t ret
= IRQ_NONE
;
751 rc
= os_read_file(info
->call_fd
, &n
, sizeof(n
));
753 ret
|= vring_interrupt(irq
, vq
);
754 } while (rc
== sizeof(n
) || rc
== -EINTR
);
755 WARN(rc
!= -EAGAIN
, "read returned %d\n", rc
);
760 static void vu_get(struct virtio_device
*vdev
, unsigned offset
,
761 void *buf
, unsigned len
)
763 struct virtio_uml_device
*vu_dev
= to_virtio_uml_device(vdev
);
765 vhost_user_get_config(vu_dev
, offset
, buf
, len
);
768 static void vu_set(struct virtio_device
*vdev
, unsigned offset
,
769 const void *buf
, unsigned len
)
771 struct virtio_uml_device
*vu_dev
= to_virtio_uml_device(vdev
);
773 vhost_user_set_config(vu_dev
, offset
, buf
, len
);
776 static u8
vu_get_status(struct virtio_device
*vdev
)
778 struct virtio_uml_device
*vu_dev
= to_virtio_uml_device(vdev
);
780 return vu_dev
->status
;
783 static void vu_set_status(struct virtio_device
*vdev
, u8 status
)
785 struct virtio_uml_device
*vu_dev
= to_virtio_uml_device(vdev
);
787 vu_dev
->status
= status
;
790 static void vu_reset(struct virtio_device
*vdev
)
792 struct virtio_uml_device
*vu_dev
= to_virtio_uml_device(vdev
);
797 static void vu_del_vq(struct virtqueue
*vq
)
799 struct virtio_uml_vq_info
*info
= vq
->priv
;
801 if (info
->call_fd
>= 0) {
802 struct virtio_uml_device
*vu_dev
;
804 vu_dev
= to_virtio_uml_device(vq
->vdev
);
806 um_free_irq(vu_dev
->irq
, vq
);
807 os_close_file(info
->call_fd
);
810 if (info
->kick_fd
>= 0)
811 os_close_file(info
->kick_fd
);
813 vring_del_virtqueue(vq
);
817 static void vu_del_vqs(struct virtio_device
*vdev
)
819 struct virtio_uml_device
*vu_dev
= to_virtio_uml_device(vdev
);
820 struct virtqueue
*vq
, *n
;
823 /* Note: reverse order as a workaround to a decoding bug in snabb */
824 list_for_each_entry_reverse(vq
, &vdev
->vqs
, list
)
825 WARN_ON(vhost_user_set_vring_enable(vu_dev
, vq
->index
, false));
827 /* Ensure previous messages have been processed */
828 WARN_ON(vhost_user_get_features(vu_dev
, &features
));
830 list_for_each_entry_safe(vq
, n
, &vdev
->vqs
, list
)
834 static int vu_setup_vq_call_fd(struct virtio_uml_device
*vu_dev
,
835 struct virtqueue
*vq
)
837 struct virtio_uml_vq_info
*info
= vq
->priv
;
841 /* no call FD needed/desired in this case */
842 if (vu_dev
->protocol_features
&
843 BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS
) &&
844 vu_dev
->protocol_features
&
845 BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ
)) {
850 /* Use a pipe for call fd, since SIGIO is not supported for eventfd */
851 rc
= os_pipe(call_fds
, true, true);
855 info
->call_fd
= call_fds
[0];
856 rc
= um_request_irq(vu_dev
->irq
, info
->call_fd
, IRQ_READ
,
857 vu_interrupt
, IRQF_SHARED
, info
->name
, vq
);
861 rc
= vhost_user_set_vring_call(vu_dev
, vq
->index
, call_fds
[1]);
868 um_free_irq(vu_dev
->irq
, vq
);
870 os_close_file(call_fds
[0]);
872 /* Close (unused) write end of call fds */
873 os_close_file(call_fds
[1]);
878 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
879 static void vu_defer_irq_handle(struct time_travel_event
*d
)
881 struct virtio_uml_vq_info
*info
;
883 info
= container_of(d
, struct virtio_uml_vq_info
, defer
);
884 info
->callback(info
->vq
);
887 static void vu_defer_irq_callback(struct virtqueue
*vq
)
889 struct virtio_uml_vq_info
*info
= vq
->priv
;
891 time_travel_add_irq_event(&info
->defer
);
895 static struct virtqueue
*vu_setup_vq(struct virtio_device
*vdev
,
896 unsigned index
, vq_callback_t
*callback
,
897 const char *name
, bool ctx
)
899 struct virtio_uml_device
*vu_dev
= to_virtio_uml_device(vdev
);
900 struct platform_device
*pdev
= vu_dev
->pdev
;
901 struct virtio_uml_vq_info
*info
;
902 struct virtqueue
*vq
;
903 int num
= MAX_SUPPORTED_QUEUE_SIZE
;
906 info
= kzalloc(sizeof(*info
), GFP_KERNEL
);
911 snprintf(info
->name
, sizeof(info
->name
), "%s.%d-%s", pdev
->name
,
914 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
916 * When we get an interrupt, we must bounce it through the simulation
917 * calendar (the simtime device), except for the simtime device itself
918 * since that's part of the simulation control.
920 if (time_travel_mode
== TT_MODE_EXTERNAL
&& callback
) {
921 info
->callback
= callback
;
922 callback
= vu_defer_irq_callback
;
923 time_travel_set_event_fn(&info
->defer
, vu_defer_irq_handle
);
927 vq
= vring_create_virtqueue(index
, num
, PAGE_SIZE
, vdev
, true, true,
928 ctx
, vu_notify
, callback
, info
->name
);
934 num
= virtqueue_get_vring_size(vq
);
935 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
939 if (vu_dev
->protocol_features
&
940 BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS
)) {
943 rc
= os_eventfd(0, 0);
949 rc
= vu_setup_vq_call_fd(vu_dev
, vq
);
953 rc
= vhost_user_set_vring_num(vu_dev
, index
, num
);
957 rc
= vhost_user_set_vring_base(vu_dev
, index
, 0);
961 rc
= vhost_user_set_vring_addr(vu_dev
, index
,
962 virtqueue_get_desc_addr(vq
),
963 virtqueue_get_used_addr(vq
),
964 virtqueue_get_avail_addr(vq
),
972 if (info
->call_fd
>= 0) {
973 um_free_irq(vu_dev
->irq
, vq
);
974 os_close_file(info
->call_fd
);
977 if (info
->kick_fd
>= 0)
978 os_close_file(info
->kick_fd
);
980 vring_del_virtqueue(vq
);
987 static int vu_find_vqs(struct virtio_device
*vdev
, unsigned nvqs
,
988 struct virtqueue
*vqs
[], vq_callback_t
*callbacks
[],
989 const char * const names
[], const bool *ctx
,
990 struct irq_affinity
*desc
)
992 struct virtio_uml_device
*vu_dev
= to_virtio_uml_device(vdev
);
993 int i
, queue_idx
= 0, rc
;
994 struct virtqueue
*vq
;
996 rc
= vhost_user_set_mem_table(vu_dev
);
1000 for (i
= 0; i
< nvqs
; ++i
) {
1006 vqs
[i
] = vu_setup_vq(vdev
, queue_idx
++, callbacks
[i
], names
[i
],
1007 ctx
? ctx
[i
] : false);
1008 if (IS_ERR(vqs
[i
])) {
1009 rc
= PTR_ERR(vqs
[i
]);
1014 list_for_each_entry(vq
, &vdev
->vqs
, list
) {
1015 struct virtio_uml_vq_info
*info
= vq
->priv
;
1017 if (info
->kick_fd
>= 0) {
1018 rc
= vhost_user_set_vring_kick(vu_dev
, vq
->index
,
1024 rc
= vhost_user_set_vring_enable(vu_dev
, vq
->index
, true);
1036 static u64
vu_get_features(struct virtio_device
*vdev
)
1038 struct virtio_uml_device
*vu_dev
= to_virtio_uml_device(vdev
);
1040 return vu_dev
->features
;
1043 static int vu_finalize_features(struct virtio_device
*vdev
)
1045 struct virtio_uml_device
*vu_dev
= to_virtio_uml_device(vdev
);
1046 u64 supported
= vdev
->features
& VHOST_USER_SUPPORTED_F
;
1048 vring_transport_features(vdev
);
1049 vu_dev
->features
= vdev
->features
| supported
;
1051 return vhost_user_set_features(vu_dev
, vu_dev
->features
);
1054 static const char *vu_bus_name(struct virtio_device
*vdev
)
1056 struct virtio_uml_device
*vu_dev
= to_virtio_uml_device(vdev
);
1058 return vu_dev
->pdev
->name
;
1061 static const struct virtio_config_ops virtio_uml_config_ops
= {
1064 .get_status
= vu_get_status
,
1065 .set_status
= vu_set_status
,
1067 .find_vqs
= vu_find_vqs
,
1068 .del_vqs
= vu_del_vqs
,
1069 .get_features
= vu_get_features
,
1070 .finalize_features
= vu_finalize_features
,
1071 .bus_name
= vu_bus_name
,
1074 static void virtio_uml_release_dev(struct device
*d
)
1076 struct virtio_device
*vdev
=
1077 container_of(d
, struct virtio_device
, dev
);
1078 struct virtio_uml_device
*vu_dev
= to_virtio_uml_device(vdev
);
1080 /* might not have been opened due to not negotiating the feature */
1081 if (vu_dev
->req_fd
>= 0) {
1082 um_free_irq(vu_dev
->irq
, vu_dev
);
1083 os_close_file(vu_dev
->req_fd
);
1086 os_close_file(vu_dev
->sock
);
1089 /* Platform device */
1091 static int virtio_uml_probe(struct platform_device
*pdev
)
1093 struct virtio_uml_platform_data
*pdata
= pdev
->dev
.platform_data
;
1094 struct virtio_uml_device
*vu_dev
;
1100 vu_dev
= devm_kzalloc(&pdev
->dev
, sizeof(*vu_dev
), GFP_KERNEL
);
1104 vu_dev
->vdev
.dev
.parent
= &pdev
->dev
;
1105 vu_dev
->vdev
.dev
.release
= virtio_uml_release_dev
;
1106 vu_dev
->vdev
.config
= &virtio_uml_config_ops
;
1107 vu_dev
->vdev
.id
.device
= pdata
->virtio_device_id
;
1108 vu_dev
->vdev
.id
.vendor
= VIRTIO_DEV_ANY_ID
;
1109 vu_dev
->pdev
= pdev
;
1110 vu_dev
->req_fd
= -1;
1113 rc
= os_connect_socket(pdata
->socket_path
);
1114 } while (rc
== -EINTR
);
1119 spin_lock_init(&vu_dev
->sock_lock
);
1121 rc
= vhost_user_init(vu_dev
);
1125 platform_set_drvdata(pdev
, vu_dev
);
1127 rc
= register_virtio_device(&vu_dev
->vdev
);
1129 put_device(&vu_dev
->vdev
.dev
);
1130 vu_dev
->registered
= 1;
1134 os_close_file(vu_dev
->sock
);
1138 static int virtio_uml_remove(struct platform_device
*pdev
)
1140 struct virtio_uml_device
*vu_dev
= platform_get_drvdata(pdev
);
1142 unregister_virtio_device(&vu_dev
->vdev
);
1146 /* Command line device list */
1148 static void vu_cmdline_release_dev(struct device
*d
)
1152 static struct device vu_cmdline_parent
= {
1153 .init_name
= "virtio-uml-cmdline",
1154 .release
= vu_cmdline_release_dev
,
1157 static bool vu_cmdline_parent_registered
;
1158 static int vu_cmdline_id
;
1160 static int vu_unregister_cmdline_device(struct device
*dev
, void *data
)
1162 struct platform_device
*pdev
= to_platform_device(dev
);
1163 struct virtio_uml_platform_data
*pdata
= pdev
->dev
.platform_data
;
1165 kfree(pdata
->socket_path
);
1166 platform_device_unregister(pdev
);
1170 static void vu_conn_broken(struct work_struct
*wk
)
1172 struct virtio_uml_platform_data
*pdata
;
1174 pdata
= container_of(wk
, struct virtio_uml_platform_data
, conn_broken_wk
);
1175 vu_unregister_cmdline_device(&pdata
->pdev
->dev
, NULL
);
1178 static int vu_cmdline_set(const char *device
, const struct kernel_param
*kp
)
1180 const char *ids
= strchr(device
, ':');
1181 unsigned int virtio_device_id
;
1182 int processed
, consumed
, err
;
1184 struct virtio_uml_platform_data pdata
, *ppdata
;
1185 struct platform_device
*pdev
;
1187 if (!ids
|| ids
== device
)
1190 processed
= sscanf(ids
, ":%u%n:%d%n",
1191 &virtio_device_id
, &consumed
,
1192 &vu_cmdline_id
, &consumed
);
1194 if (processed
< 1 || ids
[consumed
])
1197 if (!vu_cmdline_parent_registered
) {
1198 err
= device_register(&vu_cmdline_parent
);
1200 pr_err("Failed to register parent device!\n");
1201 put_device(&vu_cmdline_parent
);
1204 vu_cmdline_parent_registered
= true;
1207 socket_path
= kmemdup_nul(device
, ids
- device
, GFP_KERNEL
);
1211 pdata
.virtio_device_id
= (u32
) virtio_device_id
;
1212 pdata
.socket_path
= socket_path
;
1214 pr_info("Registering device virtio-uml.%d id=%d at %s\n",
1215 vu_cmdline_id
, virtio_device_id
, socket_path
);
1217 pdev
= platform_device_register_data(&vu_cmdline_parent
, "virtio-uml",
1218 vu_cmdline_id
++, &pdata
,
1220 err
= PTR_ERR_OR_ZERO(pdev
);
1224 ppdata
= pdev
->dev
.platform_data
;
1225 ppdata
->pdev
= pdev
;
1226 INIT_WORK(&ppdata
->conn_broken_wk
, vu_conn_broken
);
1235 static int vu_cmdline_get_device(struct device
*dev
, void *data
)
1237 struct platform_device
*pdev
= to_platform_device(dev
);
1238 struct virtio_uml_platform_data
*pdata
= pdev
->dev
.platform_data
;
1239 char *buffer
= data
;
1240 unsigned int len
= strlen(buffer
);
1242 snprintf(buffer
+ len
, PAGE_SIZE
- len
, "%s:%d:%d\n",
1243 pdata
->socket_path
, pdata
->virtio_device_id
, pdev
->id
);
1247 static int vu_cmdline_get(char *buffer
, const struct kernel_param
*kp
)
1250 if (vu_cmdline_parent_registered
)
1251 device_for_each_child(&vu_cmdline_parent
, buffer
,
1252 vu_cmdline_get_device
);
1253 return strlen(buffer
) + 1;
1256 static const struct kernel_param_ops vu_cmdline_param_ops
= {
1257 .set
= vu_cmdline_set
,
1258 .get
= vu_cmdline_get
,
1261 device_param_cb(device
, &vu_cmdline_param_ops
, NULL
, S_IRUSR
);
1262 __uml_help(vu_cmdline_param_ops
,
1263 "virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]\n"
1264 " Configure a virtio device over a vhost-user socket.\n"
1265 " See virtio_ids.h for a list of possible virtio device id values.\n"
1266 " Optionally use a specific platform_device id.\n\n"
1270 static void vu_unregister_cmdline_devices(void)
1272 if (vu_cmdline_parent_registered
) {
1273 device_for_each_child(&vu_cmdline_parent
, NULL
,
1274 vu_unregister_cmdline_device
);
1275 device_unregister(&vu_cmdline_parent
);
1276 vu_cmdline_parent_registered
= false;
1280 /* Platform driver */
1282 static const struct of_device_id virtio_uml_match
[] = {
1283 { .compatible
= "virtio,uml", },
1286 MODULE_DEVICE_TABLE(of
, virtio_uml_match
);
1288 static struct platform_driver virtio_uml_driver
= {
1289 .probe
= virtio_uml_probe
,
1290 .remove
= virtio_uml_remove
,
1292 .name
= "virtio-uml",
1293 .of_match_table
= virtio_uml_match
,
1297 static int __init
virtio_uml_init(void)
1299 return platform_driver_register(&virtio_uml_driver
);
1302 static void __exit
virtio_uml_exit(void)
1304 platform_driver_unregister(&virtio_uml_driver
);
1305 vu_unregister_cmdline_devices();
1308 module_init(virtio_uml_init
);
1309 module_exit(virtio_uml_exit
);
1310 __uml_exitcall(virtio_uml_exit
);
1312 MODULE_DESCRIPTION("UML driver for vhost-user virtio devices");
1313 MODULE_LICENSE("GPL");