2 * (c) 2017 Stefano Stabellini <stefano@aporeto.com>
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
15 #include <linux/inet.h>
16 #include <linux/kthread.h>
17 #include <linux/list.h>
18 #include <linux/radix-tree.h>
19 #include <linux/module.h>
20 #include <linux/semaphore.h>
21 #include <linux/wait.h>
23 #include <net/inet_common.h>
24 #include <net/inet_connection_sock.h>
25 #include <net/request_sock.h>
27 #include <xen/events.h>
28 #include <xen/grant_table.h>
30 #include <xen/xenbus.h>
31 #include <xen/interface/io/pvcalls.h>
33 #define PVCALLS_VERSIONS "1"
34 #define MAX_RING_ORDER XENBUS_MAX_RING_GRANT_ORDER
36 struct pvcalls_back_global
{
37 struct list_head frontends
;
38 struct semaphore frontends_lock
;
39 } pvcalls_back_global
;
42 * Per-frontend data structure. It contains pointers to the command
43 * ring, its event channel, a list of active sockets and a tree of
46 struct pvcalls_fedata
{
47 struct list_head list
;
48 struct xenbus_device
*dev
;
49 struct xen_pvcalls_sring
*sring
;
50 struct xen_pvcalls_back_ring ring
;
52 struct list_head socket_mappings
;
53 struct radix_tree_root socketpass_mappings
;
54 struct semaphore socket_lock
;
57 struct pvcalls_ioworker
{
58 struct work_struct register_work
;
59 struct workqueue_struct
*wq
;
63 struct list_head list
;
64 struct pvcalls_fedata
*fedata
;
65 struct sockpass_mapping
*sockpass
;
69 struct pvcalls_data_intf
*ring
;
71 struct pvcalls_data data
;
78 void (*saved_data_ready
)(struct sock
*sk
);
79 struct pvcalls_ioworker ioworker
;
82 struct sockpass_mapping
{
83 struct list_head list
;
84 struct pvcalls_fedata
*fedata
;
87 struct xen_pvcalls_request reqcopy
;
89 struct workqueue_struct
*wq
;
90 struct work_struct register_work
;
91 void (*saved_data_ready
)(struct sock
*sk
);
94 static irqreturn_t
pvcalls_back_conn_event(int irq
, void *sock_map
);
95 static int pvcalls_back_release_active(struct xenbus_device
*dev
,
96 struct pvcalls_fedata
*fedata
,
97 struct sock_mapping
*map
);
99 static void pvcalls_conn_back_read(void *opaque
)
101 struct sock_mapping
*map
= (struct sock_mapping
*)opaque
;
104 RING_IDX cons
, prod
, size
, wanted
, array_size
, masked_prod
, masked_cons
;
106 struct pvcalls_data_intf
*intf
= map
->ring
;
107 struct pvcalls_data
*data
= &map
->data
;
111 array_size
= XEN_FLEX_RING_SIZE(map
->ring_order
);
112 cons
= intf
->in_cons
;
113 prod
= intf
->in_prod
;
114 error
= intf
->in_error
;
115 /* read the indexes first, then deal with the data */
121 size
= pvcalls_queued(prod
, cons
, array_size
);
122 if (size
>= array_size
)
124 spin_lock_irqsave(&map
->sock
->sk
->sk_receive_queue
.lock
, flags
);
125 if (skb_queue_empty(&map
->sock
->sk
->sk_receive_queue
)) {
126 atomic_set(&map
->read
, 0);
127 spin_unlock_irqrestore(&map
->sock
->sk
->sk_receive_queue
.lock
,
131 spin_unlock_irqrestore(&map
->sock
->sk
->sk_receive_queue
.lock
, flags
);
132 wanted
= array_size
- size
;
133 masked_prod
= pvcalls_mask(prod
, array_size
);
134 masked_cons
= pvcalls_mask(cons
, array_size
);
136 memset(&msg
, 0, sizeof(msg
));
137 if (masked_prod
< masked_cons
) {
138 vec
[0].iov_base
= data
->in
+ masked_prod
;
139 vec
[0].iov_len
= wanted
;
140 iov_iter_kvec(&msg
.msg_iter
, ITER_KVEC
|WRITE
, vec
, 1, wanted
);
142 vec
[0].iov_base
= data
->in
+ masked_prod
;
143 vec
[0].iov_len
= array_size
- masked_prod
;
144 vec
[1].iov_base
= data
->in
;
145 vec
[1].iov_len
= wanted
- vec
[0].iov_len
;
146 iov_iter_kvec(&msg
.msg_iter
, ITER_KVEC
|WRITE
, vec
, 2, wanted
);
149 atomic_set(&map
->read
, 0);
150 ret
= inet_recvmsg(map
->sock
, &msg
, wanted
, MSG_DONTWAIT
);
151 WARN_ON(ret
> wanted
);
152 if (ret
== -EAGAIN
) /* shouldn't happen */
156 spin_lock_irqsave(&map
->sock
->sk
->sk_receive_queue
.lock
, flags
);
157 if (ret
> 0 && !skb_queue_empty(&map
->sock
->sk
->sk_receive_queue
))
158 atomic_inc(&map
->read
);
159 spin_unlock_irqrestore(&map
->sock
->sk
->sk_receive_queue
.lock
, flags
);
161 /* write the data, then modify the indexes */
164 intf
->in_error
= ret
;
166 intf
->in_prod
= prod
+ ret
;
167 /* update the indexes, then notify the other end */
169 notify_remote_via_irq(map
->irq
);
174 static void pvcalls_conn_back_write(struct sock_mapping
*map
)
176 struct pvcalls_data_intf
*intf
= map
->ring
;
177 struct pvcalls_data
*data
= &map
->data
;
180 RING_IDX cons
, prod
, size
, array_size
;
183 cons
= intf
->out_cons
;
184 prod
= intf
->out_prod
;
185 /* read the indexes before dealing with the data */
188 array_size
= XEN_FLEX_RING_SIZE(map
->ring_order
);
189 size
= pvcalls_queued(prod
, cons
, array_size
);
193 memset(&msg
, 0, sizeof(msg
));
194 msg
.msg_flags
|= MSG_DONTWAIT
;
195 if (pvcalls_mask(prod
, array_size
) > pvcalls_mask(cons
, array_size
)) {
196 vec
[0].iov_base
= data
->out
+ pvcalls_mask(cons
, array_size
);
197 vec
[0].iov_len
= size
;
198 iov_iter_kvec(&msg
.msg_iter
, ITER_KVEC
|READ
, vec
, 1, size
);
200 vec
[0].iov_base
= data
->out
+ pvcalls_mask(cons
, array_size
);
201 vec
[0].iov_len
= array_size
- pvcalls_mask(cons
, array_size
);
202 vec
[1].iov_base
= data
->out
;
203 vec
[1].iov_len
= size
- vec
[0].iov_len
;
204 iov_iter_kvec(&msg
.msg_iter
, ITER_KVEC
|READ
, vec
, 2, size
);
207 atomic_set(&map
->write
, 0);
208 ret
= inet_sendmsg(map
->sock
, &msg
, size
);
209 if (ret
== -EAGAIN
|| (ret
>= 0 && ret
< size
)) {
210 atomic_inc(&map
->write
);
211 atomic_inc(&map
->io
);
216 /* write the data, then update the indexes */
219 intf
->out_error
= ret
;
222 intf
->out_cons
= cons
+ ret
;
223 prod
= intf
->out_prod
;
225 /* update the indexes, then notify the other end */
227 if (prod
!= cons
+ ret
)
228 atomic_inc(&map
->write
);
229 notify_remote_via_irq(map
->irq
);
232 static void pvcalls_back_ioworker(struct work_struct
*work
)
234 struct pvcalls_ioworker
*ioworker
= container_of(work
,
235 struct pvcalls_ioworker
, register_work
);
236 struct sock_mapping
*map
= container_of(ioworker
, struct sock_mapping
,
239 while (atomic_read(&map
->io
) > 0) {
240 if (atomic_read(&map
->release
) > 0) {
241 atomic_set(&map
->release
, 0);
245 if (atomic_read(&map
->read
) > 0)
246 pvcalls_conn_back_read(map
);
247 if (atomic_read(&map
->write
) > 0)
248 pvcalls_conn_back_write(map
);
250 atomic_dec(&map
->io
);
254 static int pvcalls_back_socket(struct xenbus_device
*dev
,
255 struct xen_pvcalls_request
*req
)
257 struct pvcalls_fedata
*fedata
;
259 struct xen_pvcalls_response
*rsp
;
261 fedata
= dev_get_drvdata(&dev
->dev
);
263 if (req
->u
.socket
.domain
!= AF_INET
||
264 req
->u
.socket
.type
!= SOCK_STREAM
||
265 (req
->u
.socket
.protocol
!= IPPROTO_IP
&&
266 req
->u
.socket
.protocol
!= AF_INET
))
271 /* leave the actual socket allocation for later */
273 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
274 rsp
->req_id
= req
->req_id
;
276 rsp
->u
.socket
.id
= req
->u
.socket
.id
;
282 static void pvcalls_sk_state_change(struct sock
*sock
)
284 struct sock_mapping
*map
= sock
->sk_user_data
;
285 struct pvcalls_data_intf
*intf
;
291 intf
->in_error
= -ENOTCONN
;
292 notify_remote_via_irq(map
->irq
);
295 static void pvcalls_sk_data_ready(struct sock
*sock
)
297 struct sock_mapping
*map
= sock
->sk_user_data
;
298 struct pvcalls_ioworker
*iow
;
303 iow
= &map
->ioworker
;
304 atomic_inc(&map
->read
);
305 atomic_inc(&map
->io
);
306 queue_work(iow
->wq
, &iow
->register_work
);
309 static struct sock_mapping
*pvcalls_new_active_socket(
310 struct pvcalls_fedata
*fedata
,
317 struct sock_mapping
*map
;
320 map
= kzalloc(sizeof(*map
), GFP_KERNEL
);
324 map
->fedata
= fedata
;
329 ret
= xenbus_map_ring_valloc(fedata
->dev
, &ref
, 1, &page
);
333 map
->ring_order
= map
->ring
->ring_order
;
334 /* first read the order, then map the data ring */
336 if (map
->ring_order
> MAX_RING_ORDER
) {
337 pr_warn("%s frontend requested ring_order %u, which is > MAX (%u)\n",
338 __func__
, map
->ring_order
, MAX_RING_ORDER
);
341 ret
= xenbus_map_ring_valloc(fedata
->dev
, map
->ring
->ref
,
342 (1 << map
->ring_order
), &page
);
347 ret
= bind_interdomain_evtchn_to_irqhandler(fedata
->dev
->otherend_id
,
349 pvcalls_back_conn_event
,
357 map
->data
.in
= map
->bytes
;
358 map
->data
.out
= map
->bytes
+ XEN_FLEX_RING_SIZE(map
->ring_order
);
360 map
->ioworker
.wq
= alloc_workqueue("pvcalls_io", WQ_UNBOUND
, 1);
361 if (!map
->ioworker
.wq
)
363 atomic_set(&map
->io
, 1);
364 INIT_WORK(&map
->ioworker
.register_work
, pvcalls_back_ioworker
);
366 down(&fedata
->socket_lock
);
367 list_add_tail(&map
->list
, &fedata
->socket_mappings
);
368 up(&fedata
->socket_lock
);
370 write_lock_bh(&map
->sock
->sk
->sk_callback_lock
);
371 map
->saved_data_ready
= map
->sock
->sk
->sk_data_ready
;
372 map
->sock
->sk
->sk_user_data
= map
;
373 map
->sock
->sk
->sk_data_ready
= pvcalls_sk_data_ready
;
374 map
->sock
->sk
->sk_state_change
= pvcalls_sk_state_change
;
375 write_unlock_bh(&map
->sock
->sk
->sk_callback_lock
);
379 down(&fedata
->socket_lock
);
380 list_del(&map
->list
);
381 pvcalls_back_release_active(fedata
->dev
, fedata
, map
);
382 up(&fedata
->socket_lock
);
386 static int pvcalls_back_connect(struct xenbus_device
*dev
,
387 struct xen_pvcalls_request
*req
)
389 struct pvcalls_fedata
*fedata
;
392 struct sock_mapping
*map
;
393 struct xen_pvcalls_response
*rsp
;
394 struct sockaddr
*sa
= (struct sockaddr
*)&req
->u
.connect
.addr
;
396 fedata
= dev_get_drvdata(&dev
->dev
);
398 if (req
->u
.connect
.len
< sizeof(sa
->sa_family
) ||
399 req
->u
.connect
.len
> sizeof(req
->u
.connect
.addr
) ||
400 sa
->sa_family
!= AF_INET
)
403 ret
= sock_create(AF_INET
, SOCK_STREAM
, 0, &sock
);
406 ret
= inet_stream_connect(sock
, sa
, req
->u
.connect
.len
, 0);
412 map
= pvcalls_new_active_socket(fedata
,
415 req
->u
.connect
.evtchn
,
419 sock_release(map
->sock
);
423 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
424 rsp
->req_id
= req
->req_id
;
426 rsp
->u
.connect
.id
= req
->u
.connect
.id
;
432 static int pvcalls_back_release_active(struct xenbus_device
*dev
,
433 struct pvcalls_fedata
*fedata
,
434 struct sock_mapping
*map
)
436 disable_irq(map
->irq
);
437 if (map
->sock
->sk
!= NULL
) {
438 write_lock_bh(&map
->sock
->sk
->sk_callback_lock
);
439 map
->sock
->sk
->sk_user_data
= NULL
;
440 map
->sock
->sk
->sk_data_ready
= map
->saved_data_ready
;
441 write_unlock_bh(&map
->sock
->sk
->sk_callback_lock
);
444 atomic_set(&map
->release
, 1);
445 flush_work(&map
->ioworker
.register_work
);
447 xenbus_unmap_ring_vfree(dev
, map
->bytes
);
448 xenbus_unmap_ring_vfree(dev
, (void *)map
->ring
);
449 unbind_from_irqhandler(map
->irq
, map
);
451 sock_release(map
->sock
);
457 static int pvcalls_back_release_passive(struct xenbus_device
*dev
,
458 struct pvcalls_fedata
*fedata
,
459 struct sockpass_mapping
*mappass
)
461 if (mappass
->sock
->sk
!= NULL
) {
462 write_lock_bh(&mappass
->sock
->sk
->sk_callback_lock
);
463 mappass
->sock
->sk
->sk_user_data
= NULL
;
464 mappass
->sock
->sk
->sk_data_ready
= mappass
->saved_data_ready
;
465 write_unlock_bh(&mappass
->sock
->sk
->sk_callback_lock
);
467 sock_release(mappass
->sock
);
468 flush_workqueue(mappass
->wq
);
469 destroy_workqueue(mappass
->wq
);
475 static int pvcalls_back_release(struct xenbus_device
*dev
,
476 struct xen_pvcalls_request
*req
)
478 struct pvcalls_fedata
*fedata
;
479 struct sock_mapping
*map
, *n
;
480 struct sockpass_mapping
*mappass
;
482 struct xen_pvcalls_response
*rsp
;
484 fedata
= dev_get_drvdata(&dev
->dev
);
486 down(&fedata
->socket_lock
);
487 list_for_each_entry_safe(map
, n
, &fedata
->socket_mappings
, list
) {
488 if (map
->id
== req
->u
.release
.id
) {
489 list_del(&map
->list
);
490 up(&fedata
->socket_lock
);
491 ret
= pvcalls_back_release_active(dev
, fedata
, map
);
495 mappass
= radix_tree_lookup(&fedata
->socketpass_mappings
,
497 if (mappass
!= NULL
) {
498 radix_tree_delete(&fedata
->socketpass_mappings
, mappass
->id
);
499 up(&fedata
->socket_lock
);
500 ret
= pvcalls_back_release_passive(dev
, fedata
, mappass
);
502 up(&fedata
->socket_lock
);
505 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
506 rsp
->req_id
= req
->req_id
;
507 rsp
->u
.release
.id
= req
->u
.release
.id
;
513 static void __pvcalls_back_accept(struct work_struct
*work
)
515 struct sockpass_mapping
*mappass
= container_of(
516 work
, struct sockpass_mapping
, register_work
);
517 struct sock_mapping
*map
;
518 struct pvcalls_ioworker
*iow
;
519 struct pvcalls_fedata
*fedata
;
521 struct xen_pvcalls_response
*rsp
;
522 struct xen_pvcalls_request
*req
;
527 fedata
= mappass
->fedata
;
529 * __pvcalls_back_accept can race against pvcalls_back_accept.
530 * We only need to check the value of "cmd" on read. It could be
531 * done atomically, but to simplify the code on the write side, we
534 spin_lock_irqsave(&mappass
->copy_lock
, flags
);
535 req
= &mappass
->reqcopy
;
536 if (req
->cmd
!= PVCALLS_ACCEPT
) {
537 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
540 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
545 sock
->type
= mappass
->sock
->type
;
546 sock
->ops
= mappass
->sock
->ops
;
548 ret
= inet_accept(mappass
->sock
, sock
, O_NONBLOCK
, true);
549 if (ret
== -EAGAIN
) {
554 map
= pvcalls_new_active_socket(fedata
,
555 req
->u
.accept
.id_new
,
557 req
->u
.accept
.evtchn
,
565 map
->sockpass
= mappass
;
566 iow
= &map
->ioworker
;
567 atomic_inc(&map
->read
);
568 atomic_inc(&map
->io
);
569 queue_work(iow
->wq
, &iow
->register_work
);
572 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
573 rsp
->req_id
= req
->req_id
;
575 rsp
->u
.accept
.id
= req
->u
.accept
.id
;
577 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&fedata
->ring
, notify
);
579 notify_remote_via_irq(fedata
->irq
);
581 mappass
->reqcopy
.cmd
= 0;
584 static void pvcalls_pass_sk_data_ready(struct sock
*sock
)
586 struct sockpass_mapping
*mappass
= sock
->sk_user_data
;
587 struct pvcalls_fedata
*fedata
;
588 struct xen_pvcalls_response
*rsp
;
595 fedata
= mappass
->fedata
;
596 spin_lock_irqsave(&mappass
->copy_lock
, flags
);
597 if (mappass
->reqcopy
.cmd
== PVCALLS_POLL
) {
598 rsp
= RING_GET_RESPONSE(&fedata
->ring
,
599 fedata
->ring
.rsp_prod_pvt
++);
600 rsp
->req_id
= mappass
->reqcopy
.req_id
;
601 rsp
->u
.poll
.id
= mappass
->reqcopy
.u
.poll
.id
;
602 rsp
->cmd
= mappass
->reqcopy
.cmd
;
605 mappass
->reqcopy
.cmd
= 0;
606 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
608 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&fedata
->ring
, notify
);
610 notify_remote_via_irq(mappass
->fedata
->irq
);
612 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
613 queue_work(mappass
->wq
, &mappass
->register_work
);
617 static int pvcalls_back_bind(struct xenbus_device
*dev
,
618 struct xen_pvcalls_request
*req
)
620 struct pvcalls_fedata
*fedata
;
622 struct sockpass_mapping
*map
;
623 struct xen_pvcalls_response
*rsp
;
625 fedata
= dev_get_drvdata(&dev
->dev
);
627 map
= kzalloc(sizeof(*map
), GFP_KERNEL
);
633 INIT_WORK(&map
->register_work
, __pvcalls_back_accept
);
634 spin_lock_init(&map
->copy_lock
);
635 map
->wq
= alloc_workqueue("pvcalls_wq", WQ_UNBOUND
, 1);
641 ret
= sock_create(AF_INET
, SOCK_STREAM
, 0, &map
->sock
);
645 ret
= inet_bind(map
->sock
, (struct sockaddr
*)&req
->u
.bind
.addr
,
650 map
->fedata
= fedata
;
651 map
->id
= req
->u
.bind
.id
;
653 down(&fedata
->socket_lock
);
654 ret
= radix_tree_insert(&fedata
->socketpass_mappings
, map
->id
,
656 up(&fedata
->socket_lock
);
660 write_lock_bh(&map
->sock
->sk
->sk_callback_lock
);
661 map
->saved_data_ready
= map
->sock
->sk
->sk_data_ready
;
662 map
->sock
->sk
->sk_user_data
= map
;
663 map
->sock
->sk
->sk_data_ready
= pvcalls_pass_sk_data_ready
;
664 write_unlock_bh(&map
->sock
->sk
->sk_callback_lock
);
668 if (map
&& map
->sock
)
669 sock_release(map
->sock
);
671 destroy_workqueue(map
->wq
);
674 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
675 rsp
->req_id
= req
->req_id
;
677 rsp
->u
.bind
.id
= req
->u
.bind
.id
;
682 static int pvcalls_back_listen(struct xenbus_device
*dev
,
683 struct xen_pvcalls_request
*req
)
685 struct pvcalls_fedata
*fedata
;
687 struct sockpass_mapping
*map
;
688 struct xen_pvcalls_response
*rsp
;
690 fedata
= dev_get_drvdata(&dev
->dev
);
692 down(&fedata
->socket_lock
);
693 map
= radix_tree_lookup(&fedata
->socketpass_mappings
, req
->u
.listen
.id
);
694 up(&fedata
->socket_lock
);
698 ret
= inet_listen(map
->sock
, req
->u
.listen
.backlog
);
701 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
702 rsp
->req_id
= req
->req_id
;
704 rsp
->u
.listen
.id
= req
->u
.listen
.id
;
709 static int pvcalls_back_accept(struct xenbus_device
*dev
,
710 struct xen_pvcalls_request
*req
)
712 struct pvcalls_fedata
*fedata
;
713 struct sockpass_mapping
*mappass
;
715 struct xen_pvcalls_response
*rsp
;
718 fedata
= dev_get_drvdata(&dev
->dev
);
720 down(&fedata
->socket_lock
);
721 mappass
= radix_tree_lookup(&fedata
->socketpass_mappings
,
723 up(&fedata
->socket_lock
);
728 * Limitation of the current implementation: only support one
729 * concurrent accept or poll call on one socket.
731 spin_lock_irqsave(&mappass
->copy_lock
, flags
);
732 if (mappass
->reqcopy
.cmd
!= 0) {
733 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
738 mappass
->reqcopy
= *req
;
739 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
740 queue_work(mappass
->wq
, &mappass
->register_work
);
742 /* Tell the caller we don't need to send back a notification yet */
746 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
747 rsp
->req_id
= req
->req_id
;
749 rsp
->u
.accept
.id
= req
->u
.accept
.id
;
754 static int pvcalls_back_poll(struct xenbus_device
*dev
,
755 struct xen_pvcalls_request
*req
)
757 struct pvcalls_fedata
*fedata
;
758 struct sockpass_mapping
*mappass
;
759 struct xen_pvcalls_response
*rsp
;
760 struct inet_connection_sock
*icsk
;
761 struct request_sock_queue
*queue
;
766 fedata
= dev_get_drvdata(&dev
->dev
);
768 down(&fedata
->socket_lock
);
769 mappass
= radix_tree_lookup(&fedata
->socketpass_mappings
,
771 up(&fedata
->socket_lock
);
776 * Limitation of the current implementation: only support one
777 * concurrent accept or poll call on one socket.
779 spin_lock_irqsave(&mappass
->copy_lock
, flags
);
780 if (mappass
->reqcopy
.cmd
!= 0) {
785 mappass
->reqcopy
= *req
;
786 icsk
= inet_csk(mappass
->sock
->sk
);
787 queue
= &icsk
->icsk_accept_queue
;
788 data
= queue
->rskq_accept_head
!= NULL
;
790 mappass
->reqcopy
.cmd
= 0;
794 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
796 /* Tell the caller we don't need to send back a notification yet */
800 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
802 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
803 rsp
->req_id
= req
->req_id
;
805 rsp
->u
.poll
.id
= req
->u
.poll
.id
;
810 static int pvcalls_back_handle_cmd(struct xenbus_device
*dev
,
811 struct xen_pvcalls_request
*req
)
817 ret
= pvcalls_back_socket(dev
, req
);
819 case PVCALLS_CONNECT
:
820 ret
= pvcalls_back_connect(dev
, req
);
822 case PVCALLS_RELEASE
:
823 ret
= pvcalls_back_release(dev
, req
);
826 ret
= pvcalls_back_bind(dev
, req
);
829 ret
= pvcalls_back_listen(dev
, req
);
832 ret
= pvcalls_back_accept(dev
, req
);
835 ret
= pvcalls_back_poll(dev
, req
);
839 struct pvcalls_fedata
*fedata
;
840 struct xen_pvcalls_response
*rsp
;
842 fedata
= dev_get_drvdata(&dev
->dev
);
843 rsp
= RING_GET_RESPONSE(
844 &fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
845 rsp
->req_id
= req
->req_id
;
847 rsp
->ret
= -ENOTSUPP
;
854 static void pvcalls_back_work(struct pvcalls_fedata
*fedata
)
856 int notify
, notify_all
= 0, more
= 1;
857 struct xen_pvcalls_request req
;
858 struct xenbus_device
*dev
= fedata
->dev
;
861 while (RING_HAS_UNCONSUMED_REQUESTS(&fedata
->ring
)) {
862 RING_COPY_REQUEST(&fedata
->ring
,
863 fedata
->ring
.req_cons
++,
866 if (!pvcalls_back_handle_cmd(dev
, &req
)) {
867 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(
868 &fedata
->ring
, notify
);
869 notify_all
+= notify
;
874 notify_remote_via_irq(fedata
->irq
);
878 RING_FINAL_CHECK_FOR_REQUESTS(&fedata
->ring
, more
);
882 static irqreturn_t
pvcalls_back_event(int irq
, void *dev_id
)
884 struct xenbus_device
*dev
= dev_id
;
885 struct pvcalls_fedata
*fedata
= NULL
;
890 fedata
= dev_get_drvdata(&dev
->dev
);
894 pvcalls_back_work(fedata
);
898 static irqreturn_t
pvcalls_back_conn_event(int irq
, void *sock_map
)
900 struct sock_mapping
*map
= sock_map
;
901 struct pvcalls_ioworker
*iow
;
903 if (map
== NULL
|| map
->sock
== NULL
|| map
->sock
->sk
== NULL
||
904 map
->sock
->sk
->sk_user_data
!= map
)
907 iow
= &map
->ioworker
;
909 atomic_inc(&map
->write
);
910 atomic_inc(&map
->io
);
911 queue_work(iow
->wq
, &iow
->register_work
);
916 static int backend_connect(struct xenbus_device
*dev
)
919 grant_ref_t ring_ref
;
920 struct pvcalls_fedata
*fedata
= NULL
;
922 fedata
= kzalloc(sizeof(struct pvcalls_fedata
), GFP_KERNEL
);
927 err
= xenbus_scanf(XBT_NIL
, dev
->otherend
, "port", "%u",
931 xenbus_dev_fatal(dev
, err
, "reading %s/event-channel",
936 err
= xenbus_scanf(XBT_NIL
, dev
->otherend
, "ring-ref", "%u", &ring_ref
);
939 xenbus_dev_fatal(dev
, err
, "reading %s/ring-ref",
944 err
= bind_interdomain_evtchn_to_irq(dev
->otherend_id
, evtchn
);
949 err
= request_threaded_irq(fedata
->irq
, NULL
, pvcalls_back_event
,
950 IRQF_ONESHOT
, "pvcalls-back", dev
);
954 err
= xenbus_map_ring_valloc(dev
, &ring_ref
, 1,
955 (void **)&fedata
->sring
);
959 BACK_RING_INIT(&fedata
->ring
, fedata
->sring
, XEN_PAGE_SIZE
* 1);
962 INIT_LIST_HEAD(&fedata
->socket_mappings
);
963 INIT_RADIX_TREE(&fedata
->socketpass_mappings
, GFP_KERNEL
);
964 sema_init(&fedata
->socket_lock
, 1);
965 dev_set_drvdata(&dev
->dev
, fedata
);
967 down(&pvcalls_back_global
.frontends_lock
);
968 list_add_tail(&fedata
->list
, &pvcalls_back_global
.frontends
);
969 up(&pvcalls_back_global
.frontends_lock
);
974 if (fedata
->irq
>= 0)
975 unbind_from_irqhandler(fedata
->irq
, dev
);
976 if (fedata
->sring
!= NULL
)
977 xenbus_unmap_ring_vfree(dev
, fedata
->sring
);
982 static int backend_disconnect(struct xenbus_device
*dev
)
984 struct pvcalls_fedata
*fedata
;
985 struct sock_mapping
*map
, *n
;
986 struct sockpass_mapping
*mappass
;
987 struct radix_tree_iter iter
;
991 fedata
= dev_get_drvdata(&dev
->dev
);
993 down(&fedata
->socket_lock
);
994 list_for_each_entry_safe(map
, n
, &fedata
->socket_mappings
, list
) {
995 list_del(&map
->list
);
996 pvcalls_back_release_active(dev
, fedata
, map
);
999 radix_tree_for_each_slot(slot
, &fedata
->socketpass_mappings
, &iter
, 0) {
1000 mappass
= radix_tree_deref_slot(slot
);
1003 if (radix_tree_exception(mappass
)) {
1004 if (radix_tree_deref_retry(mappass
))
1005 slot
= radix_tree_iter_retry(&iter
);
1007 radix_tree_delete(&fedata
->socketpass_mappings
,
1009 pvcalls_back_release_passive(dev
, fedata
, mappass
);
1012 up(&fedata
->socket_lock
);
1014 unbind_from_irqhandler(fedata
->irq
, dev
);
1015 xenbus_unmap_ring_vfree(dev
, fedata
->sring
);
1017 list_del(&fedata
->list
);
1019 dev_set_drvdata(&dev
->dev
, NULL
);
1024 static int pvcalls_back_probe(struct xenbus_device
*dev
,
1025 const struct xenbus_device_id
*id
)
1028 struct xenbus_transaction xbt
;
1033 err
= xenbus_transaction_start(&xbt
);
1035 pr_warn("%s cannot create xenstore transaction\n", __func__
);
1039 err
= xenbus_printf(xbt
, dev
->nodename
, "versions", "%s",
1042 pr_warn("%s write out 'versions' failed\n", __func__
);
1046 err
= xenbus_printf(xbt
, dev
->nodename
, "max-page-order", "%u",
1049 pr_warn("%s write out 'max-page-order' failed\n", __func__
);
1053 err
= xenbus_printf(xbt
, dev
->nodename
, "function-calls",
1054 XENBUS_FUNCTIONS_CALLS
);
1056 pr_warn("%s write out 'function-calls' failed\n", __func__
);
1062 err
= xenbus_transaction_end(xbt
, abort
);
1064 if (err
== -EAGAIN
&& !abort
)
1066 pr_warn("%s cannot complete xenstore transaction\n", __func__
);
1073 xenbus_switch_state(dev
, XenbusStateInitWait
);
1078 static void set_backend_state(struct xenbus_device
*dev
,
1079 enum xenbus_state state
)
1081 while (dev
->state
!= state
) {
1082 switch (dev
->state
) {
1083 case XenbusStateClosed
:
1085 case XenbusStateInitWait
:
1086 case XenbusStateConnected
:
1087 xenbus_switch_state(dev
, XenbusStateInitWait
);
1089 case XenbusStateClosing
:
1090 xenbus_switch_state(dev
, XenbusStateClosing
);
1096 case XenbusStateInitWait
:
1097 case XenbusStateInitialised
:
1099 case XenbusStateConnected
:
1100 backend_connect(dev
);
1101 xenbus_switch_state(dev
, XenbusStateConnected
);
1103 case XenbusStateClosing
:
1104 case XenbusStateClosed
:
1105 xenbus_switch_state(dev
, XenbusStateClosing
);
1111 case XenbusStateConnected
:
1113 case XenbusStateInitWait
:
1114 case XenbusStateClosing
:
1115 case XenbusStateClosed
:
1116 down(&pvcalls_back_global
.frontends_lock
);
1117 backend_disconnect(dev
);
1118 up(&pvcalls_back_global
.frontends_lock
);
1119 xenbus_switch_state(dev
, XenbusStateClosing
);
1125 case XenbusStateClosing
:
1127 case XenbusStateInitWait
:
1128 case XenbusStateConnected
:
1129 case XenbusStateClosed
:
1130 xenbus_switch_state(dev
, XenbusStateClosed
);
1142 static void pvcalls_back_changed(struct xenbus_device
*dev
,
1143 enum xenbus_state frontend_state
)
1145 switch (frontend_state
) {
1146 case XenbusStateInitialising
:
1147 set_backend_state(dev
, XenbusStateInitWait
);
1150 case XenbusStateInitialised
:
1151 case XenbusStateConnected
:
1152 set_backend_state(dev
, XenbusStateConnected
);
1155 case XenbusStateClosing
:
1156 set_backend_state(dev
, XenbusStateClosing
);
1159 case XenbusStateClosed
:
1160 set_backend_state(dev
, XenbusStateClosed
);
1161 if (xenbus_dev_is_online(dev
))
1163 device_unregister(&dev
->dev
);
1165 case XenbusStateUnknown
:
1166 set_backend_state(dev
, XenbusStateClosed
);
1167 device_unregister(&dev
->dev
);
1171 xenbus_dev_fatal(dev
, -EINVAL
, "saw state %d at frontend",
1177 static int pvcalls_back_remove(struct xenbus_device
*dev
)
1182 static int pvcalls_back_uevent(struct xenbus_device
*xdev
,
1183 struct kobj_uevent_env
*env
)
1188 static const struct xenbus_device_id pvcalls_back_ids
[] = {
1193 static struct xenbus_driver pvcalls_back_driver
= {
1194 .ids
= pvcalls_back_ids
,
1195 .probe
= pvcalls_back_probe
,
1196 .remove
= pvcalls_back_remove
,
1197 .uevent
= pvcalls_back_uevent
,
1198 .otherend_changed
= pvcalls_back_changed
,
1201 static int __init
pvcalls_back_init(void)
1208 ret
= xenbus_register_backend(&pvcalls_back_driver
);
1212 sema_init(&pvcalls_back_global
.frontends_lock
, 1);
1213 INIT_LIST_HEAD(&pvcalls_back_global
.frontends
);
1216 module_init(pvcalls_back_init
);
1218 static void __exit
pvcalls_back_fin(void)
1220 struct pvcalls_fedata
*fedata
, *nfedata
;
1222 down(&pvcalls_back_global
.frontends_lock
);
1223 list_for_each_entry_safe(fedata
, nfedata
,
1224 &pvcalls_back_global
.frontends
, list
) {
1225 backend_disconnect(fedata
->dev
);
1227 up(&pvcalls_back_global
.frontends_lock
);
1229 xenbus_unregister_driver(&pvcalls_back_driver
);
1232 module_exit(pvcalls_back_fin
);
1234 MODULE_DESCRIPTION("Xen PV Calls backend driver");
1235 MODULE_AUTHOR("Stefano Stabellini <sstabellini@kernel.org>");
1236 MODULE_LICENSE("GPL");