1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * (c) 2017 Stefano Stabellini <stefano@aporeto.com>
6 #include <linux/inet.h>
7 #include <linux/kthread.h>
8 #include <linux/list.h>
9 #include <linux/radix-tree.h>
10 #include <linux/module.h>
11 #include <linux/semaphore.h>
12 #include <linux/wait.h>
14 #include <net/inet_common.h>
15 #include <net/inet_connection_sock.h>
16 #include <net/request_sock.h>
18 #include <xen/events.h>
19 #include <xen/grant_table.h>
21 #include <xen/xenbus.h>
22 #include <xen/interface/io/pvcalls.h>
24 #define PVCALLS_VERSIONS "1"
25 #define MAX_RING_ORDER XENBUS_MAX_RING_GRANT_ORDER
27 struct pvcalls_back_global
{
28 struct list_head frontends
;
29 struct semaphore frontends_lock
;
30 } pvcalls_back_global
;
33 * Per-frontend data structure. It contains pointers to the command
34 * ring, its event channel, a list of active sockets and a tree of
37 struct pvcalls_fedata
{
38 struct list_head list
;
39 struct xenbus_device
*dev
;
40 struct xen_pvcalls_sring
*sring
;
41 struct xen_pvcalls_back_ring ring
;
43 struct list_head socket_mappings
;
44 struct radix_tree_root socketpass_mappings
;
45 struct semaphore socket_lock
;
48 struct pvcalls_ioworker
{
49 struct work_struct register_work
;
50 struct workqueue_struct
*wq
;
54 struct list_head list
;
55 struct pvcalls_fedata
*fedata
;
56 struct sockpass_mapping
*sockpass
;
60 struct pvcalls_data_intf
*ring
;
62 struct pvcalls_data data
;
69 void (*saved_data_ready
)(struct sock
*sk
);
70 struct pvcalls_ioworker ioworker
;
73 struct sockpass_mapping
{
74 struct list_head list
;
75 struct pvcalls_fedata
*fedata
;
78 struct xen_pvcalls_request reqcopy
;
80 struct workqueue_struct
*wq
;
81 struct work_struct register_work
;
82 void (*saved_data_ready
)(struct sock
*sk
);
85 static irqreturn_t
pvcalls_back_conn_event(int irq
, void *sock_map
);
86 static int pvcalls_back_release_active(struct xenbus_device
*dev
,
87 struct pvcalls_fedata
*fedata
,
88 struct sock_mapping
*map
);
90 static void pvcalls_conn_back_read(void *opaque
)
92 struct sock_mapping
*map
= (struct sock_mapping
*)opaque
;
95 RING_IDX cons
, prod
, size
, wanted
, array_size
, masked_prod
, masked_cons
;
97 struct pvcalls_data_intf
*intf
= map
->ring
;
98 struct pvcalls_data
*data
= &map
->data
;
102 array_size
= XEN_FLEX_RING_SIZE(map
->ring_order
);
103 cons
= intf
->in_cons
;
104 prod
= intf
->in_prod
;
105 error
= intf
->in_error
;
106 /* read the indexes first, then deal with the data */
112 size
= pvcalls_queued(prod
, cons
, array_size
);
113 if (size
>= array_size
)
115 spin_lock_irqsave(&map
->sock
->sk
->sk_receive_queue
.lock
, flags
);
116 if (skb_queue_empty(&map
->sock
->sk
->sk_receive_queue
)) {
117 atomic_set(&map
->read
, 0);
118 spin_unlock_irqrestore(&map
->sock
->sk
->sk_receive_queue
.lock
,
122 spin_unlock_irqrestore(&map
->sock
->sk
->sk_receive_queue
.lock
, flags
);
123 wanted
= array_size
- size
;
124 masked_prod
= pvcalls_mask(prod
, array_size
);
125 masked_cons
= pvcalls_mask(cons
, array_size
);
127 memset(&msg
, 0, sizeof(msg
));
128 if (masked_prod
< masked_cons
) {
129 vec
[0].iov_base
= data
->in
+ masked_prod
;
130 vec
[0].iov_len
= wanted
;
131 iov_iter_kvec(&msg
.msg_iter
, WRITE
, vec
, 1, wanted
);
133 vec
[0].iov_base
= data
->in
+ masked_prod
;
134 vec
[0].iov_len
= array_size
- masked_prod
;
135 vec
[1].iov_base
= data
->in
;
136 vec
[1].iov_len
= wanted
- vec
[0].iov_len
;
137 iov_iter_kvec(&msg
.msg_iter
, WRITE
, vec
, 2, wanted
);
140 atomic_set(&map
->read
, 0);
141 ret
= inet_recvmsg(map
->sock
, &msg
, wanted
, MSG_DONTWAIT
);
142 WARN_ON(ret
> wanted
);
143 if (ret
== -EAGAIN
) /* shouldn't happen */
147 spin_lock_irqsave(&map
->sock
->sk
->sk_receive_queue
.lock
, flags
);
148 if (ret
> 0 && !skb_queue_empty(&map
->sock
->sk
->sk_receive_queue
))
149 atomic_inc(&map
->read
);
150 spin_unlock_irqrestore(&map
->sock
->sk
->sk_receive_queue
.lock
, flags
);
152 /* write the data, then modify the indexes */
155 atomic_set(&map
->read
, 0);
156 intf
->in_error
= ret
;
158 intf
->in_prod
= prod
+ ret
;
159 /* update the indexes, then notify the other end */
161 notify_remote_via_irq(map
->irq
);
166 static void pvcalls_conn_back_write(struct sock_mapping
*map
)
168 struct pvcalls_data_intf
*intf
= map
->ring
;
169 struct pvcalls_data
*data
= &map
->data
;
172 RING_IDX cons
, prod
, size
, array_size
;
175 cons
= intf
->out_cons
;
176 prod
= intf
->out_prod
;
177 /* read the indexes before dealing with the data */
180 array_size
= XEN_FLEX_RING_SIZE(map
->ring_order
);
181 size
= pvcalls_queued(prod
, cons
, array_size
);
185 memset(&msg
, 0, sizeof(msg
));
186 msg
.msg_flags
|= MSG_DONTWAIT
;
187 if (pvcalls_mask(prod
, array_size
) > pvcalls_mask(cons
, array_size
)) {
188 vec
[0].iov_base
= data
->out
+ pvcalls_mask(cons
, array_size
);
189 vec
[0].iov_len
= size
;
190 iov_iter_kvec(&msg
.msg_iter
, READ
, vec
, 1, size
);
192 vec
[0].iov_base
= data
->out
+ pvcalls_mask(cons
, array_size
);
193 vec
[0].iov_len
= array_size
- pvcalls_mask(cons
, array_size
);
194 vec
[1].iov_base
= data
->out
;
195 vec
[1].iov_len
= size
- vec
[0].iov_len
;
196 iov_iter_kvec(&msg
.msg_iter
, READ
, vec
, 2, size
);
199 atomic_set(&map
->write
, 0);
200 ret
= inet_sendmsg(map
->sock
, &msg
, size
);
201 if (ret
== -EAGAIN
|| (ret
>= 0 && ret
< size
)) {
202 atomic_inc(&map
->write
);
203 atomic_inc(&map
->io
);
208 /* write the data, then update the indexes */
211 intf
->out_error
= ret
;
214 intf
->out_cons
= cons
+ ret
;
215 prod
= intf
->out_prod
;
217 /* update the indexes, then notify the other end */
219 if (prod
!= cons
+ ret
)
220 atomic_inc(&map
->write
);
221 notify_remote_via_irq(map
->irq
);
224 static void pvcalls_back_ioworker(struct work_struct
*work
)
226 struct pvcalls_ioworker
*ioworker
= container_of(work
,
227 struct pvcalls_ioworker
, register_work
);
228 struct sock_mapping
*map
= container_of(ioworker
, struct sock_mapping
,
231 while (atomic_read(&map
->io
) > 0) {
232 if (atomic_read(&map
->release
) > 0) {
233 atomic_set(&map
->release
, 0);
237 if (atomic_read(&map
->read
) > 0)
238 pvcalls_conn_back_read(map
);
239 if (atomic_read(&map
->write
) > 0)
240 pvcalls_conn_back_write(map
);
242 atomic_dec(&map
->io
);
246 static int pvcalls_back_socket(struct xenbus_device
*dev
,
247 struct xen_pvcalls_request
*req
)
249 struct pvcalls_fedata
*fedata
;
251 struct xen_pvcalls_response
*rsp
;
253 fedata
= dev_get_drvdata(&dev
->dev
);
255 if (req
->u
.socket
.domain
!= AF_INET
||
256 req
->u
.socket
.type
!= SOCK_STREAM
||
257 (req
->u
.socket
.protocol
!= IPPROTO_IP
&&
258 req
->u
.socket
.protocol
!= AF_INET
))
263 /* leave the actual socket allocation for later */
265 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
266 rsp
->req_id
= req
->req_id
;
268 rsp
->u
.socket
.id
= req
->u
.socket
.id
;
274 static void pvcalls_sk_state_change(struct sock
*sock
)
276 struct sock_mapping
*map
= sock
->sk_user_data
;
281 atomic_inc(&map
->read
);
282 notify_remote_via_irq(map
->irq
);
285 static void pvcalls_sk_data_ready(struct sock
*sock
)
287 struct sock_mapping
*map
= sock
->sk_user_data
;
288 struct pvcalls_ioworker
*iow
;
293 iow
= &map
->ioworker
;
294 atomic_inc(&map
->read
);
295 atomic_inc(&map
->io
);
296 queue_work(iow
->wq
, &iow
->register_work
);
299 static struct sock_mapping
*pvcalls_new_active_socket(
300 struct pvcalls_fedata
*fedata
,
307 struct sock_mapping
*map
;
310 map
= kzalloc(sizeof(*map
), GFP_KERNEL
);
314 map
->fedata
= fedata
;
319 ret
= xenbus_map_ring_valloc(fedata
->dev
, &ref
, 1, &page
);
323 map
->ring_order
= map
->ring
->ring_order
;
324 /* first read the order, then map the data ring */
326 if (map
->ring_order
> MAX_RING_ORDER
) {
327 pr_warn("%s frontend requested ring_order %u, which is > MAX (%u)\n",
328 __func__
, map
->ring_order
, MAX_RING_ORDER
);
331 ret
= xenbus_map_ring_valloc(fedata
->dev
, map
->ring
->ref
,
332 (1 << map
->ring_order
), &page
);
337 ret
= bind_interdomain_evtchn_to_irqhandler(fedata
->dev
->otherend_id
,
339 pvcalls_back_conn_event
,
347 map
->data
.in
= map
->bytes
;
348 map
->data
.out
= map
->bytes
+ XEN_FLEX_RING_SIZE(map
->ring_order
);
350 map
->ioworker
.wq
= alloc_workqueue("pvcalls_io", WQ_UNBOUND
, 1);
351 if (!map
->ioworker
.wq
)
353 atomic_set(&map
->io
, 1);
354 INIT_WORK(&map
->ioworker
.register_work
, pvcalls_back_ioworker
);
356 down(&fedata
->socket_lock
);
357 list_add_tail(&map
->list
, &fedata
->socket_mappings
);
358 up(&fedata
->socket_lock
);
360 write_lock_bh(&map
->sock
->sk
->sk_callback_lock
);
361 map
->saved_data_ready
= map
->sock
->sk
->sk_data_ready
;
362 map
->sock
->sk
->sk_user_data
= map
;
363 map
->sock
->sk
->sk_data_ready
= pvcalls_sk_data_ready
;
364 map
->sock
->sk
->sk_state_change
= pvcalls_sk_state_change
;
365 write_unlock_bh(&map
->sock
->sk
->sk_callback_lock
);
369 down(&fedata
->socket_lock
);
370 list_del(&map
->list
);
371 pvcalls_back_release_active(fedata
->dev
, fedata
, map
);
372 up(&fedata
->socket_lock
);
376 static int pvcalls_back_connect(struct xenbus_device
*dev
,
377 struct xen_pvcalls_request
*req
)
379 struct pvcalls_fedata
*fedata
;
382 struct sock_mapping
*map
;
383 struct xen_pvcalls_response
*rsp
;
384 struct sockaddr
*sa
= (struct sockaddr
*)&req
->u
.connect
.addr
;
386 fedata
= dev_get_drvdata(&dev
->dev
);
388 if (req
->u
.connect
.len
< sizeof(sa
->sa_family
) ||
389 req
->u
.connect
.len
> sizeof(req
->u
.connect
.addr
) ||
390 sa
->sa_family
!= AF_INET
)
393 ret
= sock_create(AF_INET
, SOCK_STREAM
, 0, &sock
);
396 ret
= inet_stream_connect(sock
, sa
, req
->u
.connect
.len
, 0);
402 map
= pvcalls_new_active_socket(fedata
,
405 req
->u
.connect
.evtchn
,
413 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
414 rsp
->req_id
= req
->req_id
;
416 rsp
->u
.connect
.id
= req
->u
.connect
.id
;
422 static int pvcalls_back_release_active(struct xenbus_device
*dev
,
423 struct pvcalls_fedata
*fedata
,
424 struct sock_mapping
*map
)
426 disable_irq(map
->irq
);
427 if (map
->sock
->sk
!= NULL
) {
428 write_lock_bh(&map
->sock
->sk
->sk_callback_lock
);
429 map
->sock
->sk
->sk_user_data
= NULL
;
430 map
->sock
->sk
->sk_data_ready
= map
->saved_data_ready
;
431 write_unlock_bh(&map
->sock
->sk
->sk_callback_lock
);
434 atomic_set(&map
->release
, 1);
435 flush_work(&map
->ioworker
.register_work
);
437 xenbus_unmap_ring_vfree(dev
, map
->bytes
);
438 xenbus_unmap_ring_vfree(dev
, (void *)map
->ring
);
439 unbind_from_irqhandler(map
->irq
, map
);
441 sock_release(map
->sock
);
447 static int pvcalls_back_release_passive(struct xenbus_device
*dev
,
448 struct pvcalls_fedata
*fedata
,
449 struct sockpass_mapping
*mappass
)
451 if (mappass
->sock
->sk
!= NULL
) {
452 write_lock_bh(&mappass
->sock
->sk
->sk_callback_lock
);
453 mappass
->sock
->sk
->sk_user_data
= NULL
;
454 mappass
->sock
->sk
->sk_data_ready
= mappass
->saved_data_ready
;
455 write_unlock_bh(&mappass
->sock
->sk
->sk_callback_lock
);
457 sock_release(mappass
->sock
);
458 flush_workqueue(mappass
->wq
);
459 destroy_workqueue(mappass
->wq
);
465 static int pvcalls_back_release(struct xenbus_device
*dev
,
466 struct xen_pvcalls_request
*req
)
468 struct pvcalls_fedata
*fedata
;
469 struct sock_mapping
*map
, *n
;
470 struct sockpass_mapping
*mappass
;
472 struct xen_pvcalls_response
*rsp
;
474 fedata
= dev_get_drvdata(&dev
->dev
);
476 down(&fedata
->socket_lock
);
477 list_for_each_entry_safe(map
, n
, &fedata
->socket_mappings
, list
) {
478 if (map
->id
== req
->u
.release
.id
) {
479 list_del(&map
->list
);
480 up(&fedata
->socket_lock
);
481 ret
= pvcalls_back_release_active(dev
, fedata
, map
);
485 mappass
= radix_tree_lookup(&fedata
->socketpass_mappings
,
487 if (mappass
!= NULL
) {
488 radix_tree_delete(&fedata
->socketpass_mappings
, mappass
->id
);
489 up(&fedata
->socket_lock
);
490 ret
= pvcalls_back_release_passive(dev
, fedata
, mappass
);
492 up(&fedata
->socket_lock
);
495 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
496 rsp
->req_id
= req
->req_id
;
497 rsp
->u
.release
.id
= req
->u
.release
.id
;
503 static void __pvcalls_back_accept(struct work_struct
*work
)
505 struct sockpass_mapping
*mappass
= container_of(
506 work
, struct sockpass_mapping
, register_work
);
507 struct sock_mapping
*map
;
508 struct pvcalls_ioworker
*iow
;
509 struct pvcalls_fedata
*fedata
;
511 struct xen_pvcalls_response
*rsp
;
512 struct xen_pvcalls_request
*req
;
517 fedata
= mappass
->fedata
;
519 * __pvcalls_back_accept can race against pvcalls_back_accept.
520 * We only need to check the value of "cmd" on read. It could be
521 * done atomically, but to simplify the code on the write side, we
524 spin_lock_irqsave(&mappass
->copy_lock
, flags
);
525 req
= &mappass
->reqcopy
;
526 if (req
->cmd
!= PVCALLS_ACCEPT
) {
527 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
530 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
535 sock
->type
= mappass
->sock
->type
;
536 sock
->ops
= mappass
->sock
->ops
;
538 ret
= inet_accept(mappass
->sock
, sock
, O_NONBLOCK
, true);
539 if (ret
== -EAGAIN
) {
544 map
= pvcalls_new_active_socket(fedata
,
545 req
->u
.accept
.id_new
,
547 req
->u
.accept
.evtchn
,
555 map
->sockpass
= mappass
;
556 iow
= &map
->ioworker
;
557 atomic_inc(&map
->read
);
558 atomic_inc(&map
->io
);
559 queue_work(iow
->wq
, &iow
->register_work
);
562 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
563 rsp
->req_id
= req
->req_id
;
565 rsp
->u
.accept
.id
= req
->u
.accept
.id
;
567 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&fedata
->ring
, notify
);
569 notify_remote_via_irq(fedata
->irq
);
571 mappass
->reqcopy
.cmd
= 0;
574 static void pvcalls_pass_sk_data_ready(struct sock
*sock
)
576 struct sockpass_mapping
*mappass
= sock
->sk_user_data
;
577 struct pvcalls_fedata
*fedata
;
578 struct xen_pvcalls_response
*rsp
;
585 fedata
= mappass
->fedata
;
586 spin_lock_irqsave(&mappass
->copy_lock
, flags
);
587 if (mappass
->reqcopy
.cmd
== PVCALLS_POLL
) {
588 rsp
= RING_GET_RESPONSE(&fedata
->ring
,
589 fedata
->ring
.rsp_prod_pvt
++);
590 rsp
->req_id
= mappass
->reqcopy
.req_id
;
591 rsp
->u
.poll
.id
= mappass
->reqcopy
.u
.poll
.id
;
592 rsp
->cmd
= mappass
->reqcopy
.cmd
;
595 mappass
->reqcopy
.cmd
= 0;
596 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
598 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&fedata
->ring
, notify
);
600 notify_remote_via_irq(mappass
->fedata
->irq
);
602 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
603 queue_work(mappass
->wq
, &mappass
->register_work
);
607 static int pvcalls_back_bind(struct xenbus_device
*dev
,
608 struct xen_pvcalls_request
*req
)
610 struct pvcalls_fedata
*fedata
;
612 struct sockpass_mapping
*map
;
613 struct xen_pvcalls_response
*rsp
;
615 fedata
= dev_get_drvdata(&dev
->dev
);
617 map
= kzalloc(sizeof(*map
), GFP_KERNEL
);
623 INIT_WORK(&map
->register_work
, __pvcalls_back_accept
);
624 spin_lock_init(&map
->copy_lock
);
625 map
->wq
= alloc_workqueue("pvcalls_wq", WQ_UNBOUND
, 1);
631 ret
= sock_create(AF_INET
, SOCK_STREAM
, 0, &map
->sock
);
635 ret
= inet_bind(map
->sock
, (struct sockaddr
*)&req
->u
.bind
.addr
,
640 map
->fedata
= fedata
;
641 map
->id
= req
->u
.bind
.id
;
643 down(&fedata
->socket_lock
);
644 ret
= radix_tree_insert(&fedata
->socketpass_mappings
, map
->id
,
646 up(&fedata
->socket_lock
);
650 write_lock_bh(&map
->sock
->sk
->sk_callback_lock
);
651 map
->saved_data_ready
= map
->sock
->sk
->sk_data_ready
;
652 map
->sock
->sk
->sk_user_data
= map
;
653 map
->sock
->sk
->sk_data_ready
= pvcalls_pass_sk_data_ready
;
654 write_unlock_bh(&map
->sock
->sk
->sk_callback_lock
);
658 if (map
&& map
->sock
)
659 sock_release(map
->sock
);
661 destroy_workqueue(map
->wq
);
664 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
665 rsp
->req_id
= req
->req_id
;
667 rsp
->u
.bind
.id
= req
->u
.bind
.id
;
672 static int pvcalls_back_listen(struct xenbus_device
*dev
,
673 struct xen_pvcalls_request
*req
)
675 struct pvcalls_fedata
*fedata
;
677 struct sockpass_mapping
*map
;
678 struct xen_pvcalls_response
*rsp
;
680 fedata
= dev_get_drvdata(&dev
->dev
);
682 down(&fedata
->socket_lock
);
683 map
= radix_tree_lookup(&fedata
->socketpass_mappings
, req
->u
.listen
.id
);
684 up(&fedata
->socket_lock
);
688 ret
= inet_listen(map
->sock
, req
->u
.listen
.backlog
);
691 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
692 rsp
->req_id
= req
->req_id
;
694 rsp
->u
.listen
.id
= req
->u
.listen
.id
;
699 static int pvcalls_back_accept(struct xenbus_device
*dev
,
700 struct xen_pvcalls_request
*req
)
702 struct pvcalls_fedata
*fedata
;
703 struct sockpass_mapping
*mappass
;
705 struct xen_pvcalls_response
*rsp
;
708 fedata
= dev_get_drvdata(&dev
->dev
);
710 down(&fedata
->socket_lock
);
711 mappass
= radix_tree_lookup(&fedata
->socketpass_mappings
,
713 up(&fedata
->socket_lock
);
718 * Limitation of the current implementation: only support one
719 * concurrent accept or poll call on one socket.
721 spin_lock_irqsave(&mappass
->copy_lock
, flags
);
722 if (mappass
->reqcopy
.cmd
!= 0) {
723 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
728 mappass
->reqcopy
= *req
;
729 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
730 queue_work(mappass
->wq
, &mappass
->register_work
);
732 /* Tell the caller we don't need to send back a notification yet */
736 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
737 rsp
->req_id
= req
->req_id
;
739 rsp
->u
.accept
.id
= req
->u
.accept
.id
;
744 static int pvcalls_back_poll(struct xenbus_device
*dev
,
745 struct xen_pvcalls_request
*req
)
747 struct pvcalls_fedata
*fedata
;
748 struct sockpass_mapping
*mappass
;
749 struct xen_pvcalls_response
*rsp
;
750 struct inet_connection_sock
*icsk
;
751 struct request_sock_queue
*queue
;
756 fedata
= dev_get_drvdata(&dev
->dev
);
758 down(&fedata
->socket_lock
);
759 mappass
= radix_tree_lookup(&fedata
->socketpass_mappings
,
761 up(&fedata
->socket_lock
);
766 * Limitation of the current implementation: only support one
767 * concurrent accept or poll call on one socket.
769 spin_lock_irqsave(&mappass
->copy_lock
, flags
);
770 if (mappass
->reqcopy
.cmd
!= 0) {
775 mappass
->reqcopy
= *req
;
776 icsk
= inet_csk(mappass
->sock
->sk
);
777 queue
= &icsk
->icsk_accept_queue
;
778 data
= READ_ONCE(queue
->rskq_accept_head
) != NULL
;
780 mappass
->reqcopy
.cmd
= 0;
784 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
786 /* Tell the caller we don't need to send back a notification yet */
790 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
792 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
793 rsp
->req_id
= req
->req_id
;
795 rsp
->u
.poll
.id
= req
->u
.poll
.id
;
800 static int pvcalls_back_handle_cmd(struct xenbus_device
*dev
,
801 struct xen_pvcalls_request
*req
)
807 ret
= pvcalls_back_socket(dev
, req
);
809 case PVCALLS_CONNECT
:
810 ret
= pvcalls_back_connect(dev
, req
);
812 case PVCALLS_RELEASE
:
813 ret
= pvcalls_back_release(dev
, req
);
816 ret
= pvcalls_back_bind(dev
, req
);
819 ret
= pvcalls_back_listen(dev
, req
);
822 ret
= pvcalls_back_accept(dev
, req
);
825 ret
= pvcalls_back_poll(dev
, req
);
829 struct pvcalls_fedata
*fedata
;
830 struct xen_pvcalls_response
*rsp
;
832 fedata
= dev_get_drvdata(&dev
->dev
);
833 rsp
= RING_GET_RESPONSE(
834 &fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
835 rsp
->req_id
= req
->req_id
;
837 rsp
->ret
= -ENOTSUPP
;
844 static void pvcalls_back_work(struct pvcalls_fedata
*fedata
)
846 int notify
, notify_all
= 0, more
= 1;
847 struct xen_pvcalls_request req
;
848 struct xenbus_device
*dev
= fedata
->dev
;
851 while (RING_HAS_UNCONSUMED_REQUESTS(&fedata
->ring
)) {
852 RING_COPY_REQUEST(&fedata
->ring
,
853 fedata
->ring
.req_cons
++,
856 if (!pvcalls_back_handle_cmd(dev
, &req
)) {
857 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(
858 &fedata
->ring
, notify
);
859 notify_all
+= notify
;
864 notify_remote_via_irq(fedata
->irq
);
868 RING_FINAL_CHECK_FOR_REQUESTS(&fedata
->ring
, more
);
872 static irqreturn_t
pvcalls_back_event(int irq
, void *dev_id
)
874 struct xenbus_device
*dev
= dev_id
;
875 struct pvcalls_fedata
*fedata
= NULL
;
880 fedata
= dev_get_drvdata(&dev
->dev
);
884 pvcalls_back_work(fedata
);
888 static irqreturn_t
pvcalls_back_conn_event(int irq
, void *sock_map
)
890 struct sock_mapping
*map
= sock_map
;
891 struct pvcalls_ioworker
*iow
;
893 if (map
== NULL
|| map
->sock
== NULL
|| map
->sock
->sk
== NULL
||
894 map
->sock
->sk
->sk_user_data
!= map
)
897 iow
= &map
->ioworker
;
899 atomic_inc(&map
->write
);
900 atomic_inc(&map
->io
);
901 queue_work(iow
->wq
, &iow
->register_work
);
906 static int backend_connect(struct xenbus_device
*dev
)
909 grant_ref_t ring_ref
;
910 struct pvcalls_fedata
*fedata
= NULL
;
912 fedata
= kzalloc(sizeof(struct pvcalls_fedata
), GFP_KERNEL
);
917 err
= xenbus_scanf(XBT_NIL
, dev
->otherend
, "port", "%u",
921 xenbus_dev_fatal(dev
, err
, "reading %s/event-channel",
926 err
= xenbus_scanf(XBT_NIL
, dev
->otherend
, "ring-ref", "%u", &ring_ref
);
929 xenbus_dev_fatal(dev
, err
, "reading %s/ring-ref",
934 err
= bind_interdomain_evtchn_to_irq(dev
->otherend_id
, evtchn
);
939 err
= request_threaded_irq(fedata
->irq
, NULL
, pvcalls_back_event
,
940 IRQF_ONESHOT
, "pvcalls-back", dev
);
944 err
= xenbus_map_ring_valloc(dev
, &ring_ref
, 1,
945 (void **)&fedata
->sring
);
949 BACK_RING_INIT(&fedata
->ring
, fedata
->sring
, XEN_PAGE_SIZE
* 1);
952 INIT_LIST_HEAD(&fedata
->socket_mappings
);
953 INIT_RADIX_TREE(&fedata
->socketpass_mappings
, GFP_KERNEL
);
954 sema_init(&fedata
->socket_lock
, 1);
955 dev_set_drvdata(&dev
->dev
, fedata
);
957 down(&pvcalls_back_global
.frontends_lock
);
958 list_add_tail(&fedata
->list
, &pvcalls_back_global
.frontends
);
959 up(&pvcalls_back_global
.frontends_lock
);
964 if (fedata
->irq
>= 0)
965 unbind_from_irqhandler(fedata
->irq
, dev
);
966 if (fedata
->sring
!= NULL
)
967 xenbus_unmap_ring_vfree(dev
, fedata
->sring
);
972 static int backend_disconnect(struct xenbus_device
*dev
)
974 struct pvcalls_fedata
*fedata
;
975 struct sock_mapping
*map
, *n
;
976 struct sockpass_mapping
*mappass
;
977 struct radix_tree_iter iter
;
981 fedata
= dev_get_drvdata(&dev
->dev
);
983 down(&fedata
->socket_lock
);
984 list_for_each_entry_safe(map
, n
, &fedata
->socket_mappings
, list
) {
985 list_del(&map
->list
);
986 pvcalls_back_release_active(dev
, fedata
, map
);
989 radix_tree_for_each_slot(slot
, &fedata
->socketpass_mappings
, &iter
, 0) {
990 mappass
= radix_tree_deref_slot(slot
);
993 if (radix_tree_exception(mappass
)) {
994 if (radix_tree_deref_retry(mappass
))
995 slot
= radix_tree_iter_retry(&iter
);
997 radix_tree_delete(&fedata
->socketpass_mappings
,
999 pvcalls_back_release_passive(dev
, fedata
, mappass
);
1002 up(&fedata
->socket_lock
);
1004 unbind_from_irqhandler(fedata
->irq
, dev
);
1005 xenbus_unmap_ring_vfree(dev
, fedata
->sring
);
1007 list_del(&fedata
->list
);
1009 dev_set_drvdata(&dev
->dev
, NULL
);
1014 static int pvcalls_back_probe(struct xenbus_device
*dev
,
1015 const struct xenbus_device_id
*id
)
1018 struct xenbus_transaction xbt
;
1023 err
= xenbus_transaction_start(&xbt
);
1025 pr_warn("%s cannot create xenstore transaction\n", __func__
);
1029 err
= xenbus_printf(xbt
, dev
->nodename
, "versions", "%s",
1032 pr_warn("%s write out 'versions' failed\n", __func__
);
1036 err
= xenbus_printf(xbt
, dev
->nodename
, "max-page-order", "%u",
1039 pr_warn("%s write out 'max-page-order' failed\n", __func__
);
1043 err
= xenbus_printf(xbt
, dev
->nodename
, "function-calls",
1044 XENBUS_FUNCTIONS_CALLS
);
1046 pr_warn("%s write out 'function-calls' failed\n", __func__
);
1052 err
= xenbus_transaction_end(xbt
, abort
);
1054 if (err
== -EAGAIN
&& !abort
)
1056 pr_warn("%s cannot complete xenstore transaction\n", __func__
);
1063 xenbus_switch_state(dev
, XenbusStateInitWait
);
1068 static void set_backend_state(struct xenbus_device
*dev
,
1069 enum xenbus_state state
)
1071 while (dev
->state
!= state
) {
1072 switch (dev
->state
) {
1073 case XenbusStateClosed
:
1075 case XenbusStateInitWait
:
1076 case XenbusStateConnected
:
1077 xenbus_switch_state(dev
, XenbusStateInitWait
);
1079 case XenbusStateClosing
:
1080 xenbus_switch_state(dev
, XenbusStateClosing
);
1086 case XenbusStateInitWait
:
1087 case XenbusStateInitialised
:
1089 case XenbusStateConnected
:
1090 backend_connect(dev
);
1091 xenbus_switch_state(dev
, XenbusStateConnected
);
1093 case XenbusStateClosing
:
1094 case XenbusStateClosed
:
1095 xenbus_switch_state(dev
, XenbusStateClosing
);
1101 case XenbusStateConnected
:
1103 case XenbusStateInitWait
:
1104 case XenbusStateClosing
:
1105 case XenbusStateClosed
:
1106 down(&pvcalls_back_global
.frontends_lock
);
1107 backend_disconnect(dev
);
1108 up(&pvcalls_back_global
.frontends_lock
);
1109 xenbus_switch_state(dev
, XenbusStateClosing
);
1115 case XenbusStateClosing
:
1117 case XenbusStateInitWait
:
1118 case XenbusStateConnected
:
1119 case XenbusStateClosed
:
1120 xenbus_switch_state(dev
, XenbusStateClosed
);
1132 static void pvcalls_back_changed(struct xenbus_device
*dev
,
1133 enum xenbus_state frontend_state
)
1135 switch (frontend_state
) {
1136 case XenbusStateInitialising
:
1137 set_backend_state(dev
, XenbusStateInitWait
);
1140 case XenbusStateInitialised
:
1141 case XenbusStateConnected
:
1142 set_backend_state(dev
, XenbusStateConnected
);
1145 case XenbusStateClosing
:
1146 set_backend_state(dev
, XenbusStateClosing
);
1149 case XenbusStateClosed
:
1150 set_backend_state(dev
, XenbusStateClosed
);
1151 if (xenbus_dev_is_online(dev
))
1153 device_unregister(&dev
->dev
);
1155 case XenbusStateUnknown
:
1156 set_backend_state(dev
, XenbusStateClosed
);
1157 device_unregister(&dev
->dev
);
1161 xenbus_dev_fatal(dev
, -EINVAL
, "saw state %d at frontend",
1167 static int pvcalls_back_remove(struct xenbus_device
*dev
)
1172 static int pvcalls_back_uevent(struct xenbus_device
*xdev
,
1173 struct kobj_uevent_env
*env
)
1178 static const struct xenbus_device_id pvcalls_back_ids
[] = {
1183 static struct xenbus_driver pvcalls_back_driver
= {
1184 .ids
= pvcalls_back_ids
,
1185 .probe
= pvcalls_back_probe
,
1186 .remove
= pvcalls_back_remove
,
1187 .uevent
= pvcalls_back_uevent
,
1188 .otherend_changed
= pvcalls_back_changed
,
1191 static int __init
pvcalls_back_init(void)
1198 ret
= xenbus_register_backend(&pvcalls_back_driver
);
1202 sema_init(&pvcalls_back_global
.frontends_lock
, 1);
1203 INIT_LIST_HEAD(&pvcalls_back_global
.frontends
);
1206 module_init(pvcalls_back_init
);
1208 static void __exit
pvcalls_back_fin(void)
1210 struct pvcalls_fedata
*fedata
, *nfedata
;
1212 down(&pvcalls_back_global
.frontends_lock
);
1213 list_for_each_entry_safe(fedata
, nfedata
,
1214 &pvcalls_back_global
.frontends
, list
) {
1215 backend_disconnect(fedata
->dev
);
1217 up(&pvcalls_back_global
.frontends_lock
);
1219 xenbus_unregister_driver(&pvcalls_back_driver
);
1222 module_exit(pvcalls_back_fin
);
1224 MODULE_DESCRIPTION("Xen PV Calls backend driver");
1225 MODULE_AUTHOR("Stefano Stabellini <sstabellini@kernel.org>");
1226 MODULE_LICENSE("GPL");