2 * (c) 2017 Stefano Stabellini <stefano@aporeto.com>
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
15 #include <linux/inet.h>
16 #include <linux/kthread.h>
17 #include <linux/list.h>
18 #include <linux/radix-tree.h>
19 #include <linux/module.h>
20 #include <linux/semaphore.h>
21 #include <linux/wait.h>
23 #include <net/inet_common.h>
24 #include <net/inet_connection_sock.h>
25 #include <net/request_sock.h>
27 #include <xen/events.h>
28 #include <xen/grant_table.h>
30 #include <xen/xenbus.h>
31 #include <xen/interface/io/pvcalls.h>
33 #define PVCALLS_VERSIONS "1"
34 #define MAX_RING_ORDER XENBUS_MAX_RING_GRANT_ORDER
36 struct pvcalls_back_global
{
37 struct list_head frontends
;
38 struct semaphore frontends_lock
;
39 } pvcalls_back_global
;
42 * Per-frontend data structure. It contains pointers to the command
43 * ring, its event channel, a list of active sockets and a tree of
46 struct pvcalls_fedata
{
47 struct list_head list
;
48 struct xenbus_device
*dev
;
49 struct xen_pvcalls_sring
*sring
;
50 struct xen_pvcalls_back_ring ring
;
52 struct list_head socket_mappings
;
53 struct radix_tree_root socketpass_mappings
;
54 struct semaphore socket_lock
;
57 struct pvcalls_ioworker
{
58 struct work_struct register_work
;
59 struct workqueue_struct
*wq
;
63 struct list_head list
;
64 struct pvcalls_fedata
*fedata
;
65 struct sockpass_mapping
*sockpass
;
69 struct pvcalls_data_intf
*ring
;
71 struct pvcalls_data data
;
78 void (*saved_data_ready
)(struct sock
*sk
);
79 struct pvcalls_ioworker ioworker
;
82 struct sockpass_mapping
{
83 struct list_head list
;
84 struct pvcalls_fedata
*fedata
;
87 struct xen_pvcalls_request reqcopy
;
89 struct workqueue_struct
*wq
;
90 struct work_struct register_work
;
91 void (*saved_data_ready
)(struct sock
*sk
);
94 static irqreturn_t
pvcalls_back_conn_event(int irq
, void *sock_map
);
95 static int pvcalls_back_release_active(struct xenbus_device
*dev
,
96 struct pvcalls_fedata
*fedata
,
97 struct sock_mapping
*map
);
99 static void pvcalls_conn_back_read(void *opaque
)
101 struct sock_mapping
*map
= (struct sock_mapping
*)opaque
;
104 RING_IDX cons
, prod
, size
, wanted
, array_size
, masked_prod
, masked_cons
;
106 struct pvcalls_data_intf
*intf
= map
->ring
;
107 struct pvcalls_data
*data
= &map
->data
;
111 array_size
= XEN_FLEX_RING_SIZE(map
->ring_order
);
112 cons
= intf
->in_cons
;
113 prod
= intf
->in_prod
;
114 error
= intf
->in_error
;
115 /* read the indexes first, then deal with the data */
121 size
= pvcalls_queued(prod
, cons
, array_size
);
122 if (size
>= array_size
)
124 spin_lock_irqsave(&map
->sock
->sk
->sk_receive_queue
.lock
, flags
);
125 if (skb_queue_empty(&map
->sock
->sk
->sk_receive_queue
)) {
126 atomic_set(&map
->read
, 0);
127 spin_unlock_irqrestore(&map
->sock
->sk
->sk_receive_queue
.lock
,
131 spin_unlock_irqrestore(&map
->sock
->sk
->sk_receive_queue
.lock
, flags
);
132 wanted
= array_size
- size
;
133 masked_prod
= pvcalls_mask(prod
, array_size
);
134 masked_cons
= pvcalls_mask(cons
, array_size
);
136 memset(&msg
, 0, sizeof(msg
));
137 if (masked_prod
< masked_cons
) {
138 vec
[0].iov_base
= data
->in
+ masked_prod
;
139 vec
[0].iov_len
= wanted
;
140 iov_iter_kvec(&msg
.msg_iter
, ITER_KVEC
|WRITE
, vec
, 1, wanted
);
142 vec
[0].iov_base
= data
->in
+ masked_prod
;
143 vec
[0].iov_len
= array_size
- masked_prod
;
144 vec
[1].iov_base
= data
->in
;
145 vec
[1].iov_len
= wanted
- vec
[0].iov_len
;
146 iov_iter_kvec(&msg
.msg_iter
, ITER_KVEC
|WRITE
, vec
, 2, wanted
);
149 atomic_set(&map
->read
, 0);
150 ret
= inet_recvmsg(map
->sock
, &msg
, wanted
, MSG_DONTWAIT
);
151 WARN_ON(ret
> wanted
);
152 if (ret
== -EAGAIN
) /* shouldn't happen */
156 spin_lock_irqsave(&map
->sock
->sk
->sk_receive_queue
.lock
, flags
);
157 if (ret
> 0 && !skb_queue_empty(&map
->sock
->sk
->sk_receive_queue
))
158 atomic_inc(&map
->read
);
159 spin_unlock_irqrestore(&map
->sock
->sk
->sk_receive_queue
.lock
, flags
);
161 /* write the data, then modify the indexes */
164 atomic_set(&map
->read
, 0);
165 intf
->in_error
= ret
;
167 intf
->in_prod
= prod
+ ret
;
168 /* update the indexes, then notify the other end */
170 notify_remote_via_irq(map
->irq
);
175 static void pvcalls_conn_back_write(struct sock_mapping
*map
)
177 struct pvcalls_data_intf
*intf
= map
->ring
;
178 struct pvcalls_data
*data
= &map
->data
;
181 RING_IDX cons
, prod
, size
, array_size
;
184 cons
= intf
->out_cons
;
185 prod
= intf
->out_prod
;
186 /* read the indexes before dealing with the data */
189 array_size
= XEN_FLEX_RING_SIZE(map
->ring_order
);
190 size
= pvcalls_queued(prod
, cons
, array_size
);
194 memset(&msg
, 0, sizeof(msg
));
195 msg
.msg_flags
|= MSG_DONTWAIT
;
196 if (pvcalls_mask(prod
, array_size
) > pvcalls_mask(cons
, array_size
)) {
197 vec
[0].iov_base
= data
->out
+ pvcalls_mask(cons
, array_size
);
198 vec
[0].iov_len
= size
;
199 iov_iter_kvec(&msg
.msg_iter
, ITER_KVEC
|READ
, vec
, 1, size
);
201 vec
[0].iov_base
= data
->out
+ pvcalls_mask(cons
, array_size
);
202 vec
[0].iov_len
= array_size
- pvcalls_mask(cons
, array_size
);
203 vec
[1].iov_base
= data
->out
;
204 vec
[1].iov_len
= size
- vec
[0].iov_len
;
205 iov_iter_kvec(&msg
.msg_iter
, ITER_KVEC
|READ
, vec
, 2, size
);
208 atomic_set(&map
->write
, 0);
209 ret
= inet_sendmsg(map
->sock
, &msg
, size
);
210 if (ret
== -EAGAIN
|| (ret
>= 0 && ret
< size
)) {
211 atomic_inc(&map
->write
);
212 atomic_inc(&map
->io
);
217 /* write the data, then update the indexes */
220 intf
->out_error
= ret
;
223 intf
->out_cons
= cons
+ ret
;
224 prod
= intf
->out_prod
;
226 /* update the indexes, then notify the other end */
228 if (prod
!= cons
+ ret
)
229 atomic_inc(&map
->write
);
230 notify_remote_via_irq(map
->irq
);
233 static void pvcalls_back_ioworker(struct work_struct
*work
)
235 struct pvcalls_ioworker
*ioworker
= container_of(work
,
236 struct pvcalls_ioworker
, register_work
);
237 struct sock_mapping
*map
= container_of(ioworker
, struct sock_mapping
,
240 while (atomic_read(&map
->io
) > 0) {
241 if (atomic_read(&map
->release
) > 0) {
242 atomic_set(&map
->release
, 0);
246 if (atomic_read(&map
->read
) > 0)
247 pvcalls_conn_back_read(map
);
248 if (atomic_read(&map
->write
) > 0)
249 pvcalls_conn_back_write(map
);
251 atomic_dec(&map
->io
);
255 static int pvcalls_back_socket(struct xenbus_device
*dev
,
256 struct xen_pvcalls_request
*req
)
258 struct pvcalls_fedata
*fedata
;
260 struct xen_pvcalls_response
*rsp
;
262 fedata
= dev_get_drvdata(&dev
->dev
);
264 if (req
->u
.socket
.domain
!= AF_INET
||
265 req
->u
.socket
.type
!= SOCK_STREAM
||
266 (req
->u
.socket
.protocol
!= IPPROTO_IP
&&
267 req
->u
.socket
.protocol
!= AF_INET
))
272 /* leave the actual socket allocation for later */
274 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
275 rsp
->req_id
= req
->req_id
;
277 rsp
->u
.socket
.id
= req
->u
.socket
.id
;
283 static void pvcalls_sk_state_change(struct sock
*sock
)
285 struct sock_mapping
*map
= sock
->sk_user_data
;
290 atomic_inc(&map
->read
);
291 notify_remote_via_irq(map
->irq
);
294 static void pvcalls_sk_data_ready(struct sock
*sock
)
296 struct sock_mapping
*map
= sock
->sk_user_data
;
297 struct pvcalls_ioworker
*iow
;
302 iow
= &map
->ioworker
;
303 atomic_inc(&map
->read
);
304 atomic_inc(&map
->io
);
305 queue_work(iow
->wq
, &iow
->register_work
);
308 static struct sock_mapping
*pvcalls_new_active_socket(
309 struct pvcalls_fedata
*fedata
,
316 struct sock_mapping
*map
;
319 map
= kzalloc(sizeof(*map
), GFP_KERNEL
);
323 map
->fedata
= fedata
;
328 ret
= xenbus_map_ring_valloc(fedata
->dev
, &ref
, 1, &page
);
332 map
->ring_order
= map
->ring
->ring_order
;
333 /* first read the order, then map the data ring */
335 if (map
->ring_order
> MAX_RING_ORDER
) {
336 pr_warn("%s frontend requested ring_order %u, which is > MAX (%u)\n",
337 __func__
, map
->ring_order
, MAX_RING_ORDER
);
340 ret
= xenbus_map_ring_valloc(fedata
->dev
, map
->ring
->ref
,
341 (1 << map
->ring_order
), &page
);
346 ret
= bind_interdomain_evtchn_to_irqhandler(fedata
->dev
->otherend_id
,
348 pvcalls_back_conn_event
,
356 map
->data
.in
= map
->bytes
;
357 map
->data
.out
= map
->bytes
+ XEN_FLEX_RING_SIZE(map
->ring_order
);
359 map
->ioworker
.wq
= alloc_workqueue("pvcalls_io", WQ_UNBOUND
, 1);
360 if (!map
->ioworker
.wq
)
362 atomic_set(&map
->io
, 1);
363 INIT_WORK(&map
->ioworker
.register_work
, pvcalls_back_ioworker
);
365 down(&fedata
->socket_lock
);
366 list_add_tail(&map
->list
, &fedata
->socket_mappings
);
367 up(&fedata
->socket_lock
);
369 write_lock_bh(&map
->sock
->sk
->sk_callback_lock
);
370 map
->saved_data_ready
= map
->sock
->sk
->sk_data_ready
;
371 map
->sock
->sk
->sk_user_data
= map
;
372 map
->sock
->sk
->sk_data_ready
= pvcalls_sk_data_ready
;
373 map
->sock
->sk
->sk_state_change
= pvcalls_sk_state_change
;
374 write_unlock_bh(&map
->sock
->sk
->sk_callback_lock
);
378 down(&fedata
->socket_lock
);
379 list_del(&map
->list
);
380 pvcalls_back_release_active(fedata
->dev
, fedata
, map
);
381 up(&fedata
->socket_lock
);
385 static int pvcalls_back_connect(struct xenbus_device
*dev
,
386 struct xen_pvcalls_request
*req
)
388 struct pvcalls_fedata
*fedata
;
391 struct sock_mapping
*map
;
392 struct xen_pvcalls_response
*rsp
;
393 struct sockaddr
*sa
= (struct sockaddr
*)&req
->u
.connect
.addr
;
395 fedata
= dev_get_drvdata(&dev
->dev
);
397 if (req
->u
.connect
.len
< sizeof(sa
->sa_family
) ||
398 req
->u
.connect
.len
> sizeof(req
->u
.connect
.addr
) ||
399 sa
->sa_family
!= AF_INET
)
402 ret
= sock_create(AF_INET
, SOCK_STREAM
, 0, &sock
);
405 ret
= inet_stream_connect(sock
, sa
, req
->u
.connect
.len
, 0);
411 map
= pvcalls_new_active_socket(fedata
,
414 req
->u
.connect
.evtchn
,
422 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
423 rsp
->req_id
= req
->req_id
;
425 rsp
->u
.connect
.id
= req
->u
.connect
.id
;
431 static int pvcalls_back_release_active(struct xenbus_device
*dev
,
432 struct pvcalls_fedata
*fedata
,
433 struct sock_mapping
*map
)
435 disable_irq(map
->irq
);
436 if (map
->sock
->sk
!= NULL
) {
437 write_lock_bh(&map
->sock
->sk
->sk_callback_lock
);
438 map
->sock
->sk
->sk_user_data
= NULL
;
439 map
->sock
->sk
->sk_data_ready
= map
->saved_data_ready
;
440 write_unlock_bh(&map
->sock
->sk
->sk_callback_lock
);
443 atomic_set(&map
->release
, 1);
444 flush_work(&map
->ioworker
.register_work
);
446 xenbus_unmap_ring_vfree(dev
, map
->bytes
);
447 xenbus_unmap_ring_vfree(dev
, (void *)map
->ring
);
448 unbind_from_irqhandler(map
->irq
, map
);
450 sock_release(map
->sock
);
456 static int pvcalls_back_release_passive(struct xenbus_device
*dev
,
457 struct pvcalls_fedata
*fedata
,
458 struct sockpass_mapping
*mappass
)
460 if (mappass
->sock
->sk
!= NULL
) {
461 write_lock_bh(&mappass
->sock
->sk
->sk_callback_lock
);
462 mappass
->sock
->sk
->sk_user_data
= NULL
;
463 mappass
->sock
->sk
->sk_data_ready
= mappass
->saved_data_ready
;
464 write_unlock_bh(&mappass
->sock
->sk
->sk_callback_lock
);
466 sock_release(mappass
->sock
);
467 flush_workqueue(mappass
->wq
);
468 destroy_workqueue(mappass
->wq
);
474 static int pvcalls_back_release(struct xenbus_device
*dev
,
475 struct xen_pvcalls_request
*req
)
477 struct pvcalls_fedata
*fedata
;
478 struct sock_mapping
*map
, *n
;
479 struct sockpass_mapping
*mappass
;
481 struct xen_pvcalls_response
*rsp
;
483 fedata
= dev_get_drvdata(&dev
->dev
);
485 down(&fedata
->socket_lock
);
486 list_for_each_entry_safe(map
, n
, &fedata
->socket_mappings
, list
) {
487 if (map
->id
== req
->u
.release
.id
) {
488 list_del(&map
->list
);
489 up(&fedata
->socket_lock
);
490 ret
= pvcalls_back_release_active(dev
, fedata
, map
);
494 mappass
= radix_tree_lookup(&fedata
->socketpass_mappings
,
496 if (mappass
!= NULL
) {
497 radix_tree_delete(&fedata
->socketpass_mappings
, mappass
->id
);
498 up(&fedata
->socket_lock
);
499 ret
= pvcalls_back_release_passive(dev
, fedata
, mappass
);
501 up(&fedata
->socket_lock
);
504 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
505 rsp
->req_id
= req
->req_id
;
506 rsp
->u
.release
.id
= req
->u
.release
.id
;
512 static void __pvcalls_back_accept(struct work_struct
*work
)
514 struct sockpass_mapping
*mappass
= container_of(
515 work
, struct sockpass_mapping
, register_work
);
516 struct sock_mapping
*map
;
517 struct pvcalls_ioworker
*iow
;
518 struct pvcalls_fedata
*fedata
;
520 struct xen_pvcalls_response
*rsp
;
521 struct xen_pvcalls_request
*req
;
526 fedata
= mappass
->fedata
;
528 * __pvcalls_back_accept can race against pvcalls_back_accept.
529 * We only need to check the value of "cmd" on read. It could be
530 * done atomically, but to simplify the code on the write side, we
533 spin_lock_irqsave(&mappass
->copy_lock
, flags
);
534 req
= &mappass
->reqcopy
;
535 if (req
->cmd
!= PVCALLS_ACCEPT
) {
536 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
539 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
544 sock
->type
= mappass
->sock
->type
;
545 sock
->ops
= mappass
->sock
->ops
;
547 ret
= inet_accept(mappass
->sock
, sock
, O_NONBLOCK
, true);
548 if (ret
== -EAGAIN
) {
553 map
= pvcalls_new_active_socket(fedata
,
554 req
->u
.accept
.id_new
,
556 req
->u
.accept
.evtchn
,
564 map
->sockpass
= mappass
;
565 iow
= &map
->ioworker
;
566 atomic_inc(&map
->read
);
567 atomic_inc(&map
->io
);
568 queue_work(iow
->wq
, &iow
->register_work
);
571 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
572 rsp
->req_id
= req
->req_id
;
574 rsp
->u
.accept
.id
= req
->u
.accept
.id
;
576 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&fedata
->ring
, notify
);
578 notify_remote_via_irq(fedata
->irq
);
580 mappass
->reqcopy
.cmd
= 0;
583 static void pvcalls_pass_sk_data_ready(struct sock
*sock
)
585 struct sockpass_mapping
*mappass
= sock
->sk_user_data
;
586 struct pvcalls_fedata
*fedata
;
587 struct xen_pvcalls_response
*rsp
;
594 fedata
= mappass
->fedata
;
595 spin_lock_irqsave(&mappass
->copy_lock
, flags
);
596 if (mappass
->reqcopy
.cmd
== PVCALLS_POLL
) {
597 rsp
= RING_GET_RESPONSE(&fedata
->ring
,
598 fedata
->ring
.rsp_prod_pvt
++);
599 rsp
->req_id
= mappass
->reqcopy
.req_id
;
600 rsp
->u
.poll
.id
= mappass
->reqcopy
.u
.poll
.id
;
601 rsp
->cmd
= mappass
->reqcopy
.cmd
;
604 mappass
->reqcopy
.cmd
= 0;
605 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
607 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&fedata
->ring
, notify
);
609 notify_remote_via_irq(mappass
->fedata
->irq
);
611 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
612 queue_work(mappass
->wq
, &mappass
->register_work
);
616 static int pvcalls_back_bind(struct xenbus_device
*dev
,
617 struct xen_pvcalls_request
*req
)
619 struct pvcalls_fedata
*fedata
;
621 struct sockpass_mapping
*map
;
622 struct xen_pvcalls_response
*rsp
;
624 fedata
= dev_get_drvdata(&dev
->dev
);
626 map
= kzalloc(sizeof(*map
), GFP_KERNEL
);
632 INIT_WORK(&map
->register_work
, __pvcalls_back_accept
);
633 spin_lock_init(&map
->copy_lock
);
634 map
->wq
= alloc_workqueue("pvcalls_wq", WQ_UNBOUND
, 1);
640 ret
= sock_create(AF_INET
, SOCK_STREAM
, 0, &map
->sock
);
644 ret
= inet_bind(map
->sock
, (struct sockaddr
*)&req
->u
.bind
.addr
,
649 map
->fedata
= fedata
;
650 map
->id
= req
->u
.bind
.id
;
652 down(&fedata
->socket_lock
);
653 ret
= radix_tree_insert(&fedata
->socketpass_mappings
, map
->id
,
655 up(&fedata
->socket_lock
);
659 write_lock_bh(&map
->sock
->sk
->sk_callback_lock
);
660 map
->saved_data_ready
= map
->sock
->sk
->sk_data_ready
;
661 map
->sock
->sk
->sk_user_data
= map
;
662 map
->sock
->sk
->sk_data_ready
= pvcalls_pass_sk_data_ready
;
663 write_unlock_bh(&map
->sock
->sk
->sk_callback_lock
);
667 if (map
&& map
->sock
)
668 sock_release(map
->sock
);
670 destroy_workqueue(map
->wq
);
673 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
674 rsp
->req_id
= req
->req_id
;
676 rsp
->u
.bind
.id
= req
->u
.bind
.id
;
681 static int pvcalls_back_listen(struct xenbus_device
*dev
,
682 struct xen_pvcalls_request
*req
)
684 struct pvcalls_fedata
*fedata
;
686 struct sockpass_mapping
*map
;
687 struct xen_pvcalls_response
*rsp
;
689 fedata
= dev_get_drvdata(&dev
->dev
);
691 down(&fedata
->socket_lock
);
692 map
= radix_tree_lookup(&fedata
->socketpass_mappings
, req
->u
.listen
.id
);
693 up(&fedata
->socket_lock
);
697 ret
= inet_listen(map
->sock
, req
->u
.listen
.backlog
);
700 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
701 rsp
->req_id
= req
->req_id
;
703 rsp
->u
.listen
.id
= req
->u
.listen
.id
;
708 static int pvcalls_back_accept(struct xenbus_device
*dev
,
709 struct xen_pvcalls_request
*req
)
711 struct pvcalls_fedata
*fedata
;
712 struct sockpass_mapping
*mappass
;
714 struct xen_pvcalls_response
*rsp
;
717 fedata
= dev_get_drvdata(&dev
->dev
);
719 down(&fedata
->socket_lock
);
720 mappass
= radix_tree_lookup(&fedata
->socketpass_mappings
,
722 up(&fedata
->socket_lock
);
727 * Limitation of the current implementation: only support one
728 * concurrent accept or poll call on one socket.
730 spin_lock_irqsave(&mappass
->copy_lock
, flags
);
731 if (mappass
->reqcopy
.cmd
!= 0) {
732 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
737 mappass
->reqcopy
= *req
;
738 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
739 queue_work(mappass
->wq
, &mappass
->register_work
);
741 /* Tell the caller we don't need to send back a notification yet */
745 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
746 rsp
->req_id
= req
->req_id
;
748 rsp
->u
.accept
.id
= req
->u
.accept
.id
;
753 static int pvcalls_back_poll(struct xenbus_device
*dev
,
754 struct xen_pvcalls_request
*req
)
756 struct pvcalls_fedata
*fedata
;
757 struct sockpass_mapping
*mappass
;
758 struct xen_pvcalls_response
*rsp
;
759 struct inet_connection_sock
*icsk
;
760 struct request_sock_queue
*queue
;
765 fedata
= dev_get_drvdata(&dev
->dev
);
767 down(&fedata
->socket_lock
);
768 mappass
= radix_tree_lookup(&fedata
->socketpass_mappings
,
770 up(&fedata
->socket_lock
);
775 * Limitation of the current implementation: only support one
776 * concurrent accept or poll call on one socket.
778 spin_lock_irqsave(&mappass
->copy_lock
, flags
);
779 if (mappass
->reqcopy
.cmd
!= 0) {
784 mappass
->reqcopy
= *req
;
785 icsk
= inet_csk(mappass
->sock
->sk
);
786 queue
= &icsk
->icsk_accept_queue
;
787 data
= READ_ONCE(queue
->rskq_accept_head
) != NULL
;
789 mappass
->reqcopy
.cmd
= 0;
793 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
795 /* Tell the caller we don't need to send back a notification yet */
799 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
801 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
802 rsp
->req_id
= req
->req_id
;
804 rsp
->u
.poll
.id
= req
->u
.poll
.id
;
809 static int pvcalls_back_handle_cmd(struct xenbus_device
*dev
,
810 struct xen_pvcalls_request
*req
)
816 ret
= pvcalls_back_socket(dev
, req
);
818 case PVCALLS_CONNECT
:
819 ret
= pvcalls_back_connect(dev
, req
);
821 case PVCALLS_RELEASE
:
822 ret
= pvcalls_back_release(dev
, req
);
825 ret
= pvcalls_back_bind(dev
, req
);
828 ret
= pvcalls_back_listen(dev
, req
);
831 ret
= pvcalls_back_accept(dev
, req
);
834 ret
= pvcalls_back_poll(dev
, req
);
838 struct pvcalls_fedata
*fedata
;
839 struct xen_pvcalls_response
*rsp
;
841 fedata
= dev_get_drvdata(&dev
->dev
);
842 rsp
= RING_GET_RESPONSE(
843 &fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
844 rsp
->req_id
= req
->req_id
;
846 rsp
->ret
= -ENOTSUPP
;
853 static void pvcalls_back_work(struct pvcalls_fedata
*fedata
)
855 int notify
, notify_all
= 0, more
= 1;
856 struct xen_pvcalls_request req
;
857 struct xenbus_device
*dev
= fedata
->dev
;
860 while (RING_HAS_UNCONSUMED_REQUESTS(&fedata
->ring
)) {
861 RING_COPY_REQUEST(&fedata
->ring
,
862 fedata
->ring
.req_cons
++,
865 if (!pvcalls_back_handle_cmd(dev
, &req
)) {
866 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(
867 &fedata
->ring
, notify
);
868 notify_all
+= notify
;
873 notify_remote_via_irq(fedata
->irq
);
877 RING_FINAL_CHECK_FOR_REQUESTS(&fedata
->ring
, more
);
881 static irqreturn_t
pvcalls_back_event(int irq
, void *dev_id
)
883 struct xenbus_device
*dev
= dev_id
;
884 struct pvcalls_fedata
*fedata
= NULL
;
889 fedata
= dev_get_drvdata(&dev
->dev
);
893 pvcalls_back_work(fedata
);
897 static irqreturn_t
pvcalls_back_conn_event(int irq
, void *sock_map
)
899 struct sock_mapping
*map
= sock_map
;
900 struct pvcalls_ioworker
*iow
;
902 if (map
== NULL
|| map
->sock
== NULL
|| map
->sock
->sk
== NULL
||
903 map
->sock
->sk
->sk_user_data
!= map
)
906 iow
= &map
->ioworker
;
908 atomic_inc(&map
->write
);
909 atomic_inc(&map
->io
);
910 queue_work(iow
->wq
, &iow
->register_work
);
915 static int backend_connect(struct xenbus_device
*dev
)
918 grant_ref_t ring_ref
;
919 struct pvcalls_fedata
*fedata
= NULL
;
921 fedata
= kzalloc(sizeof(struct pvcalls_fedata
), GFP_KERNEL
);
926 err
= xenbus_scanf(XBT_NIL
, dev
->otherend
, "port", "%u",
930 xenbus_dev_fatal(dev
, err
, "reading %s/event-channel",
935 err
= xenbus_scanf(XBT_NIL
, dev
->otherend
, "ring-ref", "%u", &ring_ref
);
938 xenbus_dev_fatal(dev
, err
, "reading %s/ring-ref",
943 err
= bind_interdomain_evtchn_to_irq(dev
->otherend_id
, evtchn
);
948 err
= request_threaded_irq(fedata
->irq
, NULL
, pvcalls_back_event
,
949 IRQF_ONESHOT
, "pvcalls-back", dev
);
953 err
= xenbus_map_ring_valloc(dev
, &ring_ref
, 1,
954 (void **)&fedata
->sring
);
958 BACK_RING_INIT(&fedata
->ring
, fedata
->sring
, XEN_PAGE_SIZE
* 1);
961 INIT_LIST_HEAD(&fedata
->socket_mappings
);
962 INIT_RADIX_TREE(&fedata
->socketpass_mappings
, GFP_KERNEL
);
963 sema_init(&fedata
->socket_lock
, 1);
964 dev_set_drvdata(&dev
->dev
, fedata
);
966 down(&pvcalls_back_global
.frontends_lock
);
967 list_add_tail(&fedata
->list
, &pvcalls_back_global
.frontends
);
968 up(&pvcalls_back_global
.frontends_lock
);
973 if (fedata
->irq
>= 0)
974 unbind_from_irqhandler(fedata
->irq
, dev
);
975 if (fedata
->sring
!= NULL
)
976 xenbus_unmap_ring_vfree(dev
, fedata
->sring
);
981 static int backend_disconnect(struct xenbus_device
*dev
)
983 struct pvcalls_fedata
*fedata
;
984 struct sock_mapping
*map
, *n
;
985 struct sockpass_mapping
*mappass
;
986 struct radix_tree_iter iter
;
990 fedata
= dev_get_drvdata(&dev
->dev
);
992 down(&fedata
->socket_lock
);
993 list_for_each_entry_safe(map
, n
, &fedata
->socket_mappings
, list
) {
994 list_del(&map
->list
);
995 pvcalls_back_release_active(dev
, fedata
, map
);
998 radix_tree_for_each_slot(slot
, &fedata
->socketpass_mappings
, &iter
, 0) {
999 mappass
= radix_tree_deref_slot(slot
);
1002 if (radix_tree_exception(mappass
)) {
1003 if (radix_tree_deref_retry(mappass
))
1004 slot
= radix_tree_iter_retry(&iter
);
1006 radix_tree_delete(&fedata
->socketpass_mappings
,
1008 pvcalls_back_release_passive(dev
, fedata
, mappass
);
1011 up(&fedata
->socket_lock
);
1013 unbind_from_irqhandler(fedata
->irq
, dev
);
1014 xenbus_unmap_ring_vfree(dev
, fedata
->sring
);
1016 list_del(&fedata
->list
);
1018 dev_set_drvdata(&dev
->dev
, NULL
);
1023 static int pvcalls_back_probe(struct xenbus_device
*dev
,
1024 const struct xenbus_device_id
*id
)
1027 struct xenbus_transaction xbt
;
1032 err
= xenbus_transaction_start(&xbt
);
1034 pr_warn("%s cannot create xenstore transaction\n", __func__
);
1038 err
= xenbus_printf(xbt
, dev
->nodename
, "versions", "%s",
1041 pr_warn("%s write out 'versions' failed\n", __func__
);
1045 err
= xenbus_printf(xbt
, dev
->nodename
, "max-page-order", "%u",
1048 pr_warn("%s write out 'max-page-order' failed\n", __func__
);
1052 err
= xenbus_printf(xbt
, dev
->nodename
, "function-calls",
1053 XENBUS_FUNCTIONS_CALLS
);
1055 pr_warn("%s write out 'function-calls' failed\n", __func__
);
1061 err
= xenbus_transaction_end(xbt
, abort
);
1063 if (err
== -EAGAIN
&& !abort
)
1065 pr_warn("%s cannot complete xenstore transaction\n", __func__
);
1072 xenbus_switch_state(dev
, XenbusStateInitWait
);
1077 static void set_backend_state(struct xenbus_device
*dev
,
1078 enum xenbus_state state
)
1080 while (dev
->state
!= state
) {
1081 switch (dev
->state
) {
1082 case XenbusStateClosed
:
1084 case XenbusStateInitWait
:
1085 case XenbusStateConnected
:
1086 xenbus_switch_state(dev
, XenbusStateInitWait
);
1088 case XenbusStateClosing
:
1089 xenbus_switch_state(dev
, XenbusStateClosing
);
1095 case XenbusStateInitWait
:
1096 case XenbusStateInitialised
:
1098 case XenbusStateConnected
:
1099 if (backend_connect(dev
))
1101 xenbus_switch_state(dev
, XenbusStateConnected
);
1103 case XenbusStateClosing
:
1104 case XenbusStateClosed
:
1105 xenbus_switch_state(dev
, XenbusStateClosing
);
1111 case XenbusStateConnected
:
1113 case XenbusStateInitWait
:
1114 case XenbusStateClosing
:
1115 case XenbusStateClosed
:
1116 down(&pvcalls_back_global
.frontends_lock
);
1117 backend_disconnect(dev
);
1118 up(&pvcalls_back_global
.frontends_lock
);
1119 xenbus_switch_state(dev
, XenbusStateClosing
);
1125 case XenbusStateClosing
:
1127 case XenbusStateInitWait
:
1128 case XenbusStateConnected
:
1129 case XenbusStateClosed
:
1130 xenbus_switch_state(dev
, XenbusStateClosed
);
1142 static void pvcalls_back_changed(struct xenbus_device
*dev
,
1143 enum xenbus_state frontend_state
)
1145 switch (frontend_state
) {
1146 case XenbusStateInitialising
:
1147 set_backend_state(dev
, XenbusStateInitWait
);
1150 case XenbusStateInitialised
:
1151 case XenbusStateConnected
:
1152 set_backend_state(dev
, XenbusStateConnected
);
1155 case XenbusStateClosing
:
1156 set_backend_state(dev
, XenbusStateClosing
);
1159 case XenbusStateClosed
:
1160 set_backend_state(dev
, XenbusStateClosed
);
1161 if (xenbus_dev_is_online(dev
))
1163 device_unregister(&dev
->dev
);
1165 case XenbusStateUnknown
:
1166 set_backend_state(dev
, XenbusStateClosed
);
1167 device_unregister(&dev
->dev
);
1171 xenbus_dev_fatal(dev
, -EINVAL
, "saw state %d at frontend",
1177 static int pvcalls_back_remove(struct xenbus_device
*dev
)
1182 static int pvcalls_back_uevent(struct xenbus_device
*xdev
,
1183 struct kobj_uevent_env
*env
)
1188 static const struct xenbus_device_id pvcalls_back_ids
[] = {
1193 static struct xenbus_driver pvcalls_back_driver
= {
1194 .ids
= pvcalls_back_ids
,
1195 .probe
= pvcalls_back_probe
,
1196 .remove
= pvcalls_back_remove
,
1197 .uevent
= pvcalls_back_uevent
,
1198 .otherend_changed
= pvcalls_back_changed
,
1201 static int __init
pvcalls_back_init(void)
1208 ret
= xenbus_register_backend(&pvcalls_back_driver
);
1212 sema_init(&pvcalls_back_global
.frontends_lock
, 1);
1213 INIT_LIST_HEAD(&pvcalls_back_global
.frontends
);
1216 module_init(pvcalls_back_init
);
1218 static void __exit
pvcalls_back_fin(void)
1220 struct pvcalls_fedata
*fedata
, *nfedata
;
1222 down(&pvcalls_back_global
.frontends_lock
);
1223 list_for_each_entry_safe(fedata
, nfedata
,
1224 &pvcalls_back_global
.frontends
, list
) {
1225 backend_disconnect(fedata
->dev
);
1227 up(&pvcalls_back_global
.frontends_lock
);
1229 xenbus_unregister_driver(&pvcalls_back_driver
);
1232 module_exit(pvcalls_back_fin
);
1234 MODULE_DESCRIPTION("Xen PV Calls backend driver");
1235 MODULE_AUTHOR("Stefano Stabellini <sstabellini@kernel.org>");
1236 MODULE_LICENSE("GPL");