1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * (c) 2017 Stefano Stabellini <stefano@aporeto.com>
6 #include <linux/inet.h>
7 #include <linux/kthread.h>
8 #include <linux/list.h>
9 #include <linux/radix-tree.h>
10 #include <linux/module.h>
11 #include <linux/semaphore.h>
12 #include <linux/wait.h>
14 #include <net/inet_common.h>
15 #include <net/inet_connection_sock.h>
16 #include <net/request_sock.h>
17 #include <trace/events/sock.h>
19 #include <xen/events.h>
20 #include <xen/grant_table.h>
22 #include <xen/xenbus.h>
23 #include <xen/interface/io/pvcalls.h>
25 #define PVCALLS_VERSIONS "1"
26 #define MAX_RING_ORDER XENBUS_MAX_RING_GRANT_ORDER
28 static struct pvcalls_back_global
{
29 struct list_head frontends
;
30 struct semaphore frontends_lock
;
31 } pvcalls_back_global
;
34 * Per-frontend data structure. It contains pointers to the command
35 * ring, its event channel, a list of active sockets and a tree of
38 struct pvcalls_fedata
{
39 struct list_head list
;
40 struct xenbus_device
*dev
;
41 struct xen_pvcalls_sring
*sring
;
42 struct xen_pvcalls_back_ring ring
;
44 struct list_head socket_mappings
;
45 struct radix_tree_root socketpass_mappings
;
46 struct semaphore socket_lock
;
49 struct pvcalls_ioworker
{
50 struct work_struct register_work
;
51 struct workqueue_struct
*wq
;
55 struct list_head list
;
56 struct pvcalls_fedata
*fedata
;
57 struct sockpass_mapping
*sockpass
;
61 struct pvcalls_data_intf
*ring
;
63 struct pvcalls_data data
;
71 void (*saved_data_ready
)(struct sock
*sk
);
72 struct pvcalls_ioworker ioworker
;
75 struct sockpass_mapping
{
76 struct list_head list
;
77 struct pvcalls_fedata
*fedata
;
80 struct xen_pvcalls_request reqcopy
;
82 struct workqueue_struct
*wq
;
83 struct work_struct register_work
;
84 void (*saved_data_ready
)(struct sock
*sk
);
87 static irqreturn_t
pvcalls_back_conn_event(int irq
, void *sock_map
);
88 static int pvcalls_back_release_active(struct xenbus_device
*dev
,
89 struct pvcalls_fedata
*fedata
,
90 struct sock_mapping
*map
);
92 static bool pvcalls_conn_back_read(void *opaque
)
94 struct sock_mapping
*map
= (struct sock_mapping
*)opaque
;
97 RING_IDX cons
, prod
, size
, wanted
, array_size
, masked_prod
, masked_cons
;
99 struct pvcalls_data_intf
*intf
= map
->ring
;
100 struct pvcalls_data
*data
= &map
->data
;
104 array_size
= XEN_FLEX_RING_SIZE(map
->ring_order
);
105 cons
= intf
->in_cons
;
106 prod
= intf
->in_prod
;
107 error
= intf
->in_error
;
108 /* read the indexes first, then deal with the data */
114 size
= pvcalls_queued(prod
, cons
, array_size
);
115 if (size
>= array_size
)
117 spin_lock_irqsave(&map
->sock
->sk
->sk_receive_queue
.lock
, flags
);
118 if (skb_queue_empty(&map
->sock
->sk
->sk_receive_queue
)) {
119 atomic_set(&map
->read
, 0);
120 spin_unlock_irqrestore(&map
->sock
->sk
->sk_receive_queue
.lock
,
124 spin_unlock_irqrestore(&map
->sock
->sk
->sk_receive_queue
.lock
, flags
);
125 wanted
= array_size
- size
;
126 masked_prod
= pvcalls_mask(prod
, array_size
);
127 masked_cons
= pvcalls_mask(cons
, array_size
);
129 memset(&msg
, 0, sizeof(msg
));
130 if (masked_prod
< masked_cons
) {
131 vec
[0].iov_base
= data
->in
+ masked_prod
;
132 vec
[0].iov_len
= wanted
;
133 iov_iter_kvec(&msg
.msg_iter
, ITER_DEST
, vec
, 1, wanted
);
135 vec
[0].iov_base
= data
->in
+ masked_prod
;
136 vec
[0].iov_len
= array_size
- masked_prod
;
137 vec
[1].iov_base
= data
->in
;
138 vec
[1].iov_len
= wanted
- vec
[0].iov_len
;
139 iov_iter_kvec(&msg
.msg_iter
, ITER_DEST
, vec
, 2, wanted
);
142 atomic_set(&map
->read
, 0);
143 ret
= inet_recvmsg(map
->sock
, &msg
, wanted
, MSG_DONTWAIT
);
144 WARN_ON(ret
> wanted
);
145 if (ret
== -EAGAIN
) /* shouldn't happen */
149 spin_lock_irqsave(&map
->sock
->sk
->sk_receive_queue
.lock
, flags
);
150 if (ret
> 0 && !skb_queue_empty(&map
->sock
->sk
->sk_receive_queue
))
151 atomic_inc(&map
->read
);
152 spin_unlock_irqrestore(&map
->sock
->sk
->sk_receive_queue
.lock
, flags
);
154 /* write the data, then modify the indexes */
157 atomic_set(&map
->read
, 0);
158 intf
->in_error
= ret
;
160 intf
->in_prod
= prod
+ ret
;
161 /* update the indexes, then notify the other end */
163 notify_remote_via_irq(map
->irq
);
168 static bool pvcalls_conn_back_write(struct sock_mapping
*map
)
170 struct pvcalls_data_intf
*intf
= map
->ring
;
171 struct pvcalls_data
*data
= &map
->data
;
174 RING_IDX cons
, prod
, size
, array_size
;
177 atomic_set(&map
->write
, 0);
179 cons
= intf
->out_cons
;
180 prod
= intf
->out_prod
;
181 /* read the indexes before dealing with the data */
184 array_size
= XEN_FLEX_RING_SIZE(map
->ring_order
);
185 size
= pvcalls_queued(prod
, cons
, array_size
);
189 memset(&msg
, 0, sizeof(msg
));
190 msg
.msg_flags
|= MSG_DONTWAIT
;
191 if (pvcalls_mask(prod
, array_size
) > pvcalls_mask(cons
, array_size
)) {
192 vec
[0].iov_base
= data
->out
+ pvcalls_mask(cons
, array_size
);
193 vec
[0].iov_len
= size
;
194 iov_iter_kvec(&msg
.msg_iter
, ITER_SOURCE
, vec
, 1, size
);
196 vec
[0].iov_base
= data
->out
+ pvcalls_mask(cons
, array_size
);
197 vec
[0].iov_len
= array_size
- pvcalls_mask(cons
, array_size
);
198 vec
[1].iov_base
= data
->out
;
199 vec
[1].iov_len
= size
- vec
[0].iov_len
;
200 iov_iter_kvec(&msg
.msg_iter
, ITER_SOURCE
, vec
, 2, size
);
203 ret
= inet_sendmsg(map
->sock
, &msg
, size
);
204 if (ret
== -EAGAIN
) {
205 atomic_inc(&map
->write
);
206 atomic_inc(&map
->io
);
210 /* write the data, then update the indexes */
213 intf
->out_error
= ret
;
216 intf
->out_cons
= cons
+ ret
;
217 prod
= intf
->out_prod
;
219 /* update the indexes, then notify the other end */
221 if (prod
!= cons
+ ret
) {
222 atomic_inc(&map
->write
);
223 atomic_inc(&map
->io
);
225 notify_remote_via_irq(map
->irq
);
230 static void pvcalls_back_ioworker(struct work_struct
*work
)
232 struct pvcalls_ioworker
*ioworker
= container_of(work
,
233 struct pvcalls_ioworker
, register_work
);
234 struct sock_mapping
*map
= container_of(ioworker
, struct sock_mapping
,
236 unsigned int eoi_flags
= XEN_EOI_FLAG_SPURIOUS
;
238 while (atomic_read(&map
->io
) > 0) {
239 if (atomic_read(&map
->release
) > 0) {
240 atomic_set(&map
->release
, 0);
244 if (atomic_read(&map
->read
) > 0 &&
245 pvcalls_conn_back_read(map
))
247 if (atomic_read(&map
->write
) > 0 &&
248 pvcalls_conn_back_write(map
))
251 if (atomic_read(&map
->eoi
) > 0 && !atomic_read(&map
->write
)) {
252 atomic_set(&map
->eoi
, 0);
253 xen_irq_lateeoi(map
->irq
, eoi_flags
);
254 eoi_flags
= XEN_EOI_FLAG_SPURIOUS
;
257 atomic_dec(&map
->io
);
261 static int pvcalls_back_socket(struct xenbus_device
*dev
,
262 struct xen_pvcalls_request
*req
)
264 struct pvcalls_fedata
*fedata
;
266 struct xen_pvcalls_response
*rsp
;
268 fedata
= dev_get_drvdata(&dev
->dev
);
270 if (req
->u
.socket
.domain
!= AF_INET
||
271 req
->u
.socket
.type
!= SOCK_STREAM
||
272 (req
->u
.socket
.protocol
!= IPPROTO_IP
&&
273 req
->u
.socket
.protocol
!= AF_INET
))
278 /* leave the actual socket allocation for later */
280 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
281 rsp
->req_id
= req
->req_id
;
283 rsp
->u
.socket
.id
= req
->u
.socket
.id
;
289 static void pvcalls_sk_state_change(struct sock
*sock
)
291 struct sock_mapping
*map
= sock
->sk_user_data
;
296 atomic_inc(&map
->read
);
297 notify_remote_via_irq(map
->irq
);
300 static void pvcalls_sk_data_ready(struct sock
*sock
)
302 struct sock_mapping
*map
= sock
->sk_user_data
;
303 struct pvcalls_ioworker
*iow
;
305 trace_sk_data_ready(sock
);
310 iow
= &map
->ioworker
;
311 atomic_inc(&map
->read
);
312 atomic_inc(&map
->io
);
313 queue_work(iow
->wq
, &iow
->register_work
);
316 static struct sock_mapping
*pvcalls_new_active_socket(
317 struct pvcalls_fedata
*fedata
,
320 evtchn_port_t evtchn
,
324 struct sock_mapping
*map
;
327 map
= kzalloc(sizeof(*map
), GFP_KERNEL
);
333 map
->fedata
= fedata
;
338 ret
= xenbus_map_ring_valloc(fedata
->dev
, &ref
, 1, &page
);
342 map
->ring_order
= map
->ring
->ring_order
;
343 /* first read the order, then map the data ring */
345 if (map
->ring_order
> MAX_RING_ORDER
) {
346 pr_warn("%s frontend requested ring_order %u, which is > MAX (%u)\n",
347 __func__
, map
->ring_order
, MAX_RING_ORDER
);
350 ret
= xenbus_map_ring_valloc(fedata
->dev
, map
->ring
->ref
,
351 (1 << map
->ring_order
), &page
);
356 ret
= bind_interdomain_evtchn_to_irqhandler_lateeoi(
358 pvcalls_back_conn_event
, 0, "pvcalls-backend", map
);
363 map
->data
.in
= map
->bytes
;
364 map
->data
.out
= map
->bytes
+ XEN_FLEX_RING_SIZE(map
->ring_order
);
366 map
->ioworker
.wq
= alloc_ordered_workqueue("pvcalls_io", 0);
367 if (!map
->ioworker
.wq
)
369 atomic_set(&map
->io
, 1);
370 INIT_WORK(&map
->ioworker
.register_work
, pvcalls_back_ioworker
);
372 down(&fedata
->socket_lock
);
373 list_add_tail(&map
->list
, &fedata
->socket_mappings
);
374 up(&fedata
->socket_lock
);
376 write_lock_bh(&map
->sock
->sk
->sk_callback_lock
);
377 map
->saved_data_ready
= map
->sock
->sk
->sk_data_ready
;
378 map
->sock
->sk
->sk_user_data
= map
;
379 map
->sock
->sk
->sk_data_ready
= pvcalls_sk_data_ready
;
380 map
->sock
->sk
->sk_state_change
= pvcalls_sk_state_change
;
381 write_unlock_bh(&map
->sock
->sk
->sk_callback_lock
);
385 down(&fedata
->socket_lock
);
386 list_del(&map
->list
);
387 pvcalls_back_release_active(fedata
->dev
, fedata
, map
);
388 up(&fedata
->socket_lock
);
392 static int pvcalls_back_connect(struct xenbus_device
*dev
,
393 struct xen_pvcalls_request
*req
)
395 struct pvcalls_fedata
*fedata
;
398 struct sock_mapping
*map
;
399 struct xen_pvcalls_response
*rsp
;
400 struct sockaddr
*sa
= (struct sockaddr
*)&req
->u
.connect
.addr
;
402 fedata
= dev_get_drvdata(&dev
->dev
);
404 if (req
->u
.connect
.len
< sizeof(sa
->sa_family
) ||
405 req
->u
.connect
.len
> sizeof(req
->u
.connect
.addr
) ||
406 sa
->sa_family
!= AF_INET
)
409 ret
= sock_create(AF_INET
, SOCK_STREAM
, 0, &sock
);
412 ret
= inet_stream_connect(sock
, sa
, req
->u
.connect
.len
, 0);
418 map
= pvcalls_new_active_socket(fedata
,
421 req
->u
.connect
.evtchn
,
427 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
428 rsp
->req_id
= req
->req_id
;
430 rsp
->u
.connect
.id
= req
->u
.connect
.id
;
436 static int pvcalls_back_release_active(struct xenbus_device
*dev
,
437 struct pvcalls_fedata
*fedata
,
438 struct sock_mapping
*map
)
440 disable_irq(map
->irq
);
441 if (map
->sock
->sk
!= NULL
) {
442 write_lock_bh(&map
->sock
->sk
->sk_callback_lock
);
443 map
->sock
->sk
->sk_user_data
= NULL
;
444 map
->sock
->sk
->sk_data_ready
= map
->saved_data_ready
;
445 write_unlock_bh(&map
->sock
->sk
->sk_callback_lock
);
448 atomic_set(&map
->release
, 1);
449 flush_work(&map
->ioworker
.register_work
);
451 xenbus_unmap_ring_vfree(dev
, map
->bytes
);
452 xenbus_unmap_ring_vfree(dev
, (void *)map
->ring
);
453 unbind_from_irqhandler(map
->irq
, map
);
455 sock_release(map
->sock
);
461 static int pvcalls_back_release_passive(struct xenbus_device
*dev
,
462 struct pvcalls_fedata
*fedata
,
463 struct sockpass_mapping
*mappass
)
465 if (mappass
->sock
->sk
!= NULL
) {
466 write_lock_bh(&mappass
->sock
->sk
->sk_callback_lock
);
467 mappass
->sock
->sk
->sk_user_data
= NULL
;
468 mappass
->sock
->sk
->sk_data_ready
= mappass
->saved_data_ready
;
469 write_unlock_bh(&mappass
->sock
->sk
->sk_callback_lock
);
471 sock_release(mappass
->sock
);
472 destroy_workqueue(mappass
->wq
);
478 static int pvcalls_back_release(struct xenbus_device
*dev
,
479 struct xen_pvcalls_request
*req
)
481 struct pvcalls_fedata
*fedata
;
482 struct sock_mapping
*map
, *n
;
483 struct sockpass_mapping
*mappass
;
485 struct xen_pvcalls_response
*rsp
;
487 fedata
= dev_get_drvdata(&dev
->dev
);
489 down(&fedata
->socket_lock
);
490 list_for_each_entry_safe(map
, n
, &fedata
->socket_mappings
, list
) {
491 if (map
->id
== req
->u
.release
.id
) {
492 list_del(&map
->list
);
493 up(&fedata
->socket_lock
);
494 ret
= pvcalls_back_release_active(dev
, fedata
, map
);
498 mappass
= radix_tree_lookup(&fedata
->socketpass_mappings
,
500 if (mappass
!= NULL
) {
501 radix_tree_delete(&fedata
->socketpass_mappings
, mappass
->id
);
502 up(&fedata
->socket_lock
);
503 ret
= pvcalls_back_release_passive(dev
, fedata
, mappass
);
505 up(&fedata
->socket_lock
);
508 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
509 rsp
->req_id
= req
->req_id
;
510 rsp
->u
.release
.id
= req
->u
.release
.id
;
516 static void __pvcalls_back_accept(struct work_struct
*work
)
518 struct sockpass_mapping
*mappass
= container_of(
519 work
, struct sockpass_mapping
, register_work
);
520 struct proto_accept_arg arg
= {
524 struct sock_mapping
*map
;
525 struct pvcalls_ioworker
*iow
;
526 struct pvcalls_fedata
*fedata
;
528 struct xen_pvcalls_response
*rsp
;
529 struct xen_pvcalls_request
*req
;
534 fedata
= mappass
->fedata
;
536 * __pvcalls_back_accept can race against pvcalls_back_accept.
537 * We only need to check the value of "cmd" on read. It could be
538 * done atomically, but to simplify the code on the write side, we
541 spin_lock_irqsave(&mappass
->copy_lock
, flags
);
542 req
= &mappass
->reqcopy
;
543 if (req
->cmd
!= PVCALLS_ACCEPT
) {
544 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
547 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
552 sock
->type
= mappass
->sock
->type
;
553 sock
->ops
= mappass
->sock
->ops
;
555 ret
= inet_accept(mappass
->sock
, sock
, &arg
);
556 if (ret
== -EAGAIN
) {
561 map
= pvcalls_new_active_socket(fedata
,
562 req
->u
.accept
.id_new
,
564 req
->u
.accept
.evtchn
,
571 map
->sockpass
= mappass
;
572 iow
= &map
->ioworker
;
573 atomic_inc(&map
->read
);
574 atomic_inc(&map
->io
);
575 queue_work(iow
->wq
, &iow
->register_work
);
578 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
579 rsp
->req_id
= req
->req_id
;
581 rsp
->u
.accept
.id
= req
->u
.accept
.id
;
583 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&fedata
->ring
, notify
);
585 notify_remote_via_irq(fedata
->irq
);
587 mappass
->reqcopy
.cmd
= 0;
590 static void pvcalls_pass_sk_data_ready(struct sock
*sock
)
592 struct sockpass_mapping
*mappass
= sock
->sk_user_data
;
593 struct pvcalls_fedata
*fedata
;
594 struct xen_pvcalls_response
*rsp
;
598 trace_sk_data_ready(sock
);
603 fedata
= mappass
->fedata
;
604 spin_lock_irqsave(&mappass
->copy_lock
, flags
);
605 if (mappass
->reqcopy
.cmd
== PVCALLS_POLL
) {
606 rsp
= RING_GET_RESPONSE(&fedata
->ring
,
607 fedata
->ring
.rsp_prod_pvt
++);
608 rsp
->req_id
= mappass
->reqcopy
.req_id
;
609 rsp
->u
.poll
.id
= mappass
->reqcopy
.u
.poll
.id
;
610 rsp
->cmd
= mappass
->reqcopy
.cmd
;
613 mappass
->reqcopy
.cmd
= 0;
614 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
616 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&fedata
->ring
, notify
);
618 notify_remote_via_irq(mappass
->fedata
->irq
);
620 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
621 queue_work(mappass
->wq
, &mappass
->register_work
);
625 static int pvcalls_back_bind(struct xenbus_device
*dev
,
626 struct xen_pvcalls_request
*req
)
628 struct pvcalls_fedata
*fedata
;
630 struct sockpass_mapping
*map
;
631 struct xen_pvcalls_response
*rsp
;
633 fedata
= dev_get_drvdata(&dev
->dev
);
635 map
= kzalloc(sizeof(*map
), GFP_KERNEL
);
641 INIT_WORK(&map
->register_work
, __pvcalls_back_accept
);
642 spin_lock_init(&map
->copy_lock
);
643 map
->wq
= alloc_ordered_workqueue("pvcalls_wq", 0);
649 ret
= sock_create(AF_INET
, SOCK_STREAM
, 0, &map
->sock
);
653 ret
= inet_bind(map
->sock
, (struct sockaddr
*)&req
->u
.bind
.addr
,
658 map
->fedata
= fedata
;
659 map
->id
= req
->u
.bind
.id
;
661 down(&fedata
->socket_lock
);
662 ret
= radix_tree_insert(&fedata
->socketpass_mappings
, map
->id
,
664 up(&fedata
->socket_lock
);
668 write_lock_bh(&map
->sock
->sk
->sk_callback_lock
);
669 map
->saved_data_ready
= map
->sock
->sk
->sk_data_ready
;
670 map
->sock
->sk
->sk_user_data
= map
;
671 map
->sock
->sk
->sk_data_ready
= pvcalls_pass_sk_data_ready
;
672 write_unlock_bh(&map
->sock
->sk
->sk_callback_lock
);
676 if (map
&& map
->sock
)
677 sock_release(map
->sock
);
679 destroy_workqueue(map
->wq
);
682 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
683 rsp
->req_id
= req
->req_id
;
685 rsp
->u
.bind
.id
= req
->u
.bind
.id
;
690 static int pvcalls_back_listen(struct xenbus_device
*dev
,
691 struct xen_pvcalls_request
*req
)
693 struct pvcalls_fedata
*fedata
;
695 struct sockpass_mapping
*map
;
696 struct xen_pvcalls_response
*rsp
;
698 fedata
= dev_get_drvdata(&dev
->dev
);
700 down(&fedata
->socket_lock
);
701 map
= radix_tree_lookup(&fedata
->socketpass_mappings
, req
->u
.listen
.id
);
702 up(&fedata
->socket_lock
);
706 ret
= inet_listen(map
->sock
, req
->u
.listen
.backlog
);
709 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
710 rsp
->req_id
= req
->req_id
;
712 rsp
->u
.listen
.id
= req
->u
.listen
.id
;
717 static int pvcalls_back_accept(struct xenbus_device
*dev
,
718 struct xen_pvcalls_request
*req
)
720 struct pvcalls_fedata
*fedata
;
721 struct sockpass_mapping
*mappass
;
723 struct xen_pvcalls_response
*rsp
;
726 fedata
= dev_get_drvdata(&dev
->dev
);
728 down(&fedata
->socket_lock
);
729 mappass
= radix_tree_lookup(&fedata
->socketpass_mappings
,
731 up(&fedata
->socket_lock
);
736 * Limitation of the current implementation: only support one
737 * concurrent accept or poll call on one socket.
739 spin_lock_irqsave(&mappass
->copy_lock
, flags
);
740 if (mappass
->reqcopy
.cmd
!= 0) {
741 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
746 mappass
->reqcopy
= *req
;
747 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
748 queue_work(mappass
->wq
, &mappass
->register_work
);
750 /* Tell the caller we don't need to send back a notification yet */
754 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
755 rsp
->req_id
= req
->req_id
;
757 rsp
->u
.accept
.id
= req
->u
.accept
.id
;
762 static int pvcalls_back_poll(struct xenbus_device
*dev
,
763 struct xen_pvcalls_request
*req
)
765 struct pvcalls_fedata
*fedata
;
766 struct sockpass_mapping
*mappass
;
767 struct xen_pvcalls_response
*rsp
;
768 struct inet_connection_sock
*icsk
;
769 struct request_sock_queue
*queue
;
774 fedata
= dev_get_drvdata(&dev
->dev
);
776 down(&fedata
->socket_lock
);
777 mappass
= radix_tree_lookup(&fedata
->socketpass_mappings
,
779 up(&fedata
->socket_lock
);
784 * Limitation of the current implementation: only support one
785 * concurrent accept or poll call on one socket.
787 spin_lock_irqsave(&mappass
->copy_lock
, flags
);
788 if (mappass
->reqcopy
.cmd
!= 0) {
793 mappass
->reqcopy
= *req
;
794 icsk
= inet_csk(mappass
->sock
->sk
);
795 queue
= &icsk
->icsk_accept_queue
;
796 data
= READ_ONCE(queue
->rskq_accept_head
) != NULL
;
798 mappass
->reqcopy
.cmd
= 0;
802 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
804 /* Tell the caller we don't need to send back a notification yet */
808 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
810 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
811 rsp
->req_id
= req
->req_id
;
813 rsp
->u
.poll
.id
= req
->u
.poll
.id
;
818 static int pvcalls_back_handle_cmd(struct xenbus_device
*dev
,
819 struct xen_pvcalls_request
*req
)
825 ret
= pvcalls_back_socket(dev
, req
);
827 case PVCALLS_CONNECT
:
828 ret
= pvcalls_back_connect(dev
, req
);
830 case PVCALLS_RELEASE
:
831 ret
= pvcalls_back_release(dev
, req
);
834 ret
= pvcalls_back_bind(dev
, req
);
837 ret
= pvcalls_back_listen(dev
, req
);
840 ret
= pvcalls_back_accept(dev
, req
);
843 ret
= pvcalls_back_poll(dev
, req
);
847 struct pvcalls_fedata
*fedata
;
848 struct xen_pvcalls_response
*rsp
;
850 fedata
= dev_get_drvdata(&dev
->dev
);
851 rsp
= RING_GET_RESPONSE(
852 &fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
853 rsp
->req_id
= req
->req_id
;
855 rsp
->ret
= -ENOTSUPP
;
862 static void pvcalls_back_work(struct pvcalls_fedata
*fedata
)
864 int notify
, notify_all
= 0, more
= 1;
865 struct xen_pvcalls_request req
;
866 struct xenbus_device
*dev
= fedata
->dev
;
869 while (RING_HAS_UNCONSUMED_REQUESTS(&fedata
->ring
)) {
870 RING_COPY_REQUEST(&fedata
->ring
,
871 fedata
->ring
.req_cons
++,
874 if (!pvcalls_back_handle_cmd(dev
, &req
)) {
875 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(
876 &fedata
->ring
, notify
);
877 notify_all
+= notify
;
882 notify_remote_via_irq(fedata
->irq
);
886 RING_FINAL_CHECK_FOR_REQUESTS(&fedata
->ring
, more
);
890 static irqreturn_t
pvcalls_back_event(int irq
, void *dev_id
)
892 struct xenbus_device
*dev
= dev_id
;
893 struct pvcalls_fedata
*fedata
= NULL
;
894 unsigned int eoi_flags
= XEN_EOI_FLAG_SPURIOUS
;
897 fedata
= dev_get_drvdata(&dev
->dev
);
899 pvcalls_back_work(fedata
);
904 xen_irq_lateeoi(irq
, eoi_flags
);
909 static irqreturn_t
pvcalls_back_conn_event(int irq
, void *sock_map
)
911 struct sock_mapping
*map
= sock_map
;
912 struct pvcalls_ioworker
*iow
;
914 if (map
== NULL
|| map
->sock
== NULL
|| map
->sock
->sk
== NULL
||
915 map
->sock
->sk
->sk_user_data
!= map
) {
916 xen_irq_lateeoi(irq
, 0);
920 iow
= &map
->ioworker
;
922 atomic_inc(&map
->write
);
923 atomic_inc(&map
->eoi
);
924 atomic_inc(&map
->io
);
925 queue_work(iow
->wq
, &iow
->register_work
);
930 static int backend_connect(struct xenbus_device
*dev
)
933 evtchn_port_t evtchn
;
934 grant_ref_t ring_ref
;
935 struct pvcalls_fedata
*fedata
= NULL
;
937 fedata
= kzalloc(sizeof(struct pvcalls_fedata
), GFP_KERNEL
);
942 err
= xenbus_scanf(XBT_NIL
, dev
->otherend
, "port", "%u",
946 xenbus_dev_fatal(dev
, err
, "reading %s/event-channel",
951 err
= xenbus_scanf(XBT_NIL
, dev
->otherend
, "ring-ref", "%u", &ring_ref
);
954 xenbus_dev_fatal(dev
, err
, "reading %s/ring-ref",
959 err
= bind_interdomain_evtchn_to_irq_lateeoi(dev
, evtchn
);
964 err
= request_threaded_irq(fedata
->irq
, NULL
, pvcalls_back_event
,
965 IRQF_ONESHOT
, "pvcalls-back", dev
);
969 err
= xenbus_map_ring_valloc(dev
, &ring_ref
, 1,
970 (void **)&fedata
->sring
);
974 BACK_RING_INIT(&fedata
->ring
, fedata
->sring
, XEN_PAGE_SIZE
* 1);
977 INIT_LIST_HEAD(&fedata
->socket_mappings
);
978 INIT_RADIX_TREE(&fedata
->socketpass_mappings
, GFP_KERNEL
);
979 sema_init(&fedata
->socket_lock
, 1);
980 dev_set_drvdata(&dev
->dev
, fedata
);
982 down(&pvcalls_back_global
.frontends_lock
);
983 list_add_tail(&fedata
->list
, &pvcalls_back_global
.frontends
);
984 up(&pvcalls_back_global
.frontends_lock
);
989 if (fedata
->irq
>= 0)
990 unbind_from_irqhandler(fedata
->irq
, dev
);
991 if (fedata
->sring
!= NULL
)
992 xenbus_unmap_ring_vfree(dev
, fedata
->sring
);
997 static int backend_disconnect(struct xenbus_device
*dev
)
999 struct pvcalls_fedata
*fedata
;
1000 struct sock_mapping
*map
, *n
;
1001 struct sockpass_mapping
*mappass
;
1002 struct radix_tree_iter iter
;
1006 fedata
= dev_get_drvdata(&dev
->dev
);
1008 down(&fedata
->socket_lock
);
1009 list_for_each_entry_safe(map
, n
, &fedata
->socket_mappings
, list
) {
1010 list_del(&map
->list
);
1011 pvcalls_back_release_active(dev
, fedata
, map
);
1014 radix_tree_for_each_slot(slot
, &fedata
->socketpass_mappings
, &iter
, 0) {
1015 mappass
= radix_tree_deref_slot(slot
);
1018 if (radix_tree_exception(mappass
)) {
1019 if (radix_tree_deref_retry(mappass
))
1020 slot
= radix_tree_iter_retry(&iter
);
1022 radix_tree_delete(&fedata
->socketpass_mappings
,
1024 pvcalls_back_release_passive(dev
, fedata
, mappass
);
1027 up(&fedata
->socket_lock
);
1029 unbind_from_irqhandler(fedata
->irq
, dev
);
1030 xenbus_unmap_ring_vfree(dev
, fedata
->sring
);
1032 list_del(&fedata
->list
);
1034 dev_set_drvdata(&dev
->dev
, NULL
);
1039 static int pvcalls_back_probe(struct xenbus_device
*dev
,
1040 const struct xenbus_device_id
*id
)
1043 struct xenbus_transaction xbt
;
1048 err
= xenbus_transaction_start(&xbt
);
1050 pr_warn("%s cannot create xenstore transaction\n", __func__
);
1054 err
= xenbus_printf(xbt
, dev
->nodename
, "versions", "%s",
1057 pr_warn("%s write out 'versions' failed\n", __func__
);
1061 err
= xenbus_printf(xbt
, dev
->nodename
, "max-page-order", "%u",
1064 pr_warn("%s write out 'max-page-order' failed\n", __func__
);
1068 err
= xenbus_printf(xbt
, dev
->nodename
, "function-calls",
1069 XENBUS_FUNCTIONS_CALLS
);
1071 pr_warn("%s write out 'function-calls' failed\n", __func__
);
1077 err
= xenbus_transaction_end(xbt
, abort
);
1079 if (err
== -EAGAIN
&& !abort
)
1081 pr_warn("%s cannot complete xenstore transaction\n", __func__
);
1088 xenbus_switch_state(dev
, XenbusStateInitWait
);
1093 static void set_backend_state(struct xenbus_device
*dev
,
1094 enum xenbus_state state
)
1096 while (dev
->state
!= state
) {
1097 switch (dev
->state
) {
1098 case XenbusStateClosed
:
1100 case XenbusStateInitWait
:
1101 case XenbusStateConnected
:
1102 xenbus_switch_state(dev
, XenbusStateInitWait
);
1104 case XenbusStateClosing
:
1105 xenbus_switch_state(dev
, XenbusStateClosing
);
1111 case XenbusStateInitWait
:
1112 case XenbusStateInitialised
:
1114 case XenbusStateConnected
:
1115 if (backend_connect(dev
))
1117 xenbus_switch_state(dev
, XenbusStateConnected
);
1119 case XenbusStateClosing
:
1120 case XenbusStateClosed
:
1121 xenbus_switch_state(dev
, XenbusStateClosing
);
1127 case XenbusStateConnected
:
1129 case XenbusStateInitWait
:
1130 case XenbusStateClosing
:
1131 case XenbusStateClosed
:
1132 down(&pvcalls_back_global
.frontends_lock
);
1133 backend_disconnect(dev
);
1134 up(&pvcalls_back_global
.frontends_lock
);
1135 xenbus_switch_state(dev
, XenbusStateClosing
);
1141 case XenbusStateClosing
:
1143 case XenbusStateInitWait
:
1144 case XenbusStateConnected
:
1145 case XenbusStateClosed
:
1146 xenbus_switch_state(dev
, XenbusStateClosed
);
1158 static void pvcalls_back_changed(struct xenbus_device
*dev
,
1159 enum xenbus_state frontend_state
)
1161 switch (frontend_state
) {
1162 case XenbusStateInitialising
:
1163 set_backend_state(dev
, XenbusStateInitWait
);
1166 case XenbusStateInitialised
:
1167 case XenbusStateConnected
:
1168 set_backend_state(dev
, XenbusStateConnected
);
1171 case XenbusStateClosing
:
1172 set_backend_state(dev
, XenbusStateClosing
);
1175 case XenbusStateClosed
:
1176 set_backend_state(dev
, XenbusStateClosed
);
1177 if (xenbus_dev_is_online(dev
))
1179 device_unregister(&dev
->dev
);
1181 case XenbusStateUnknown
:
1182 set_backend_state(dev
, XenbusStateClosed
);
1183 device_unregister(&dev
->dev
);
1187 xenbus_dev_fatal(dev
, -EINVAL
, "saw state %d at frontend",
1193 static void pvcalls_back_remove(struct xenbus_device
*dev
)
1197 static int pvcalls_back_uevent(const struct xenbus_device
*xdev
,
1198 struct kobj_uevent_env
*env
)
1203 static const struct xenbus_device_id pvcalls_back_ids
[] = {
1208 static struct xenbus_driver pvcalls_back_driver
= {
1209 .ids
= pvcalls_back_ids
,
1210 .probe
= pvcalls_back_probe
,
1211 .remove
= pvcalls_back_remove
,
1212 .uevent
= pvcalls_back_uevent
,
1213 .otherend_changed
= pvcalls_back_changed
,
1216 static int __init
pvcalls_back_init(void)
1223 ret
= xenbus_register_backend(&pvcalls_back_driver
);
1227 sema_init(&pvcalls_back_global
.frontends_lock
, 1);
1228 INIT_LIST_HEAD(&pvcalls_back_global
.frontends
);
1231 module_init(pvcalls_back_init
);
1233 static void __exit
pvcalls_back_fin(void)
1235 struct pvcalls_fedata
*fedata
, *nfedata
;
1237 down(&pvcalls_back_global
.frontends_lock
);
1238 list_for_each_entry_safe(fedata
, nfedata
,
1239 &pvcalls_back_global
.frontends
, list
) {
1240 backend_disconnect(fedata
->dev
);
1242 up(&pvcalls_back_global
.frontends_lock
);
1244 xenbus_unregister_driver(&pvcalls_back_driver
);
1247 module_exit(pvcalls_back_fin
);
1249 MODULE_DESCRIPTION("Xen PV Calls backend driver");
1250 MODULE_AUTHOR("Stefano Stabellini <sstabellini@kernel.org>");
1251 MODULE_LICENSE("GPL");