2 * (c) 2017 Stefano Stabellini <stefano@aporeto.com>
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
15 #include <linux/module.h>
16 #include <linux/net.h>
17 #include <linux/socket.h>
21 #include <xen/events.h>
22 #include <xen/grant_table.h>
24 #include <xen/xenbus.h>
25 #include <xen/interface/io/pvcalls.h>
27 #include "pvcalls-front.h"
29 #define PVCALLS_INVALID_ID UINT_MAX
30 #define PVCALLS_RING_ORDER XENBUS_MAX_RING_GRANT_ORDER
31 #define PVCALLS_NR_RSP_PER_RING __CONST_RING_SIZE(xen_pvcalls, XEN_PAGE_SIZE)
32 #define PVCALLS_FRONT_MAX_SPIN 5000
34 struct pvcalls_bedata
{
35 struct xen_pvcalls_front_ring ring
;
39 struct list_head socket_mappings
;
40 spinlock_t socket_lock
;
42 wait_queue_head_t inflight_req
;
43 struct xen_pvcalls_response rsp
[PVCALLS_NR_RSP_PER_RING
];
45 /* Only one front/back connection supported. */
46 static struct xenbus_device
*pvcalls_front_dev
;
47 static atomic_t pvcalls_refcount
;
49 /* first increment refcount, then proceed */
50 #define pvcalls_enter() { \
51 atomic_inc(&pvcalls_refcount); \
54 /* first complete other operations, then decrement refcount */
55 #define pvcalls_exit() { \
56 atomic_dec(&pvcalls_refcount); \
61 struct list_head list
;
67 struct pvcalls_data_intf
*ring
;
68 struct pvcalls_data data
;
69 struct mutex in_mutex
;
70 struct mutex out_mutex
;
72 wait_queue_head_t inflight_conn_req
;
76 #define PVCALLS_STATUS_UNINITALIZED 0
77 #define PVCALLS_STATUS_BIND 1
78 #define PVCALLS_STATUS_LISTEN 2
81 * Internal state-machine flags.
82 * Only one accept operation can be inflight for a socket.
83 * Only one poll operation can be inflight for a given socket.
85 #define PVCALLS_FLAG_ACCEPT_INFLIGHT 0
86 #define PVCALLS_FLAG_POLL_INFLIGHT 1
87 #define PVCALLS_FLAG_POLL_RET 2
89 uint32_t inflight_req_id
;
90 struct sock_mapping
*accept_map
;
91 wait_queue_head_t inflight_accept_req
;
96 static inline int get_request(struct pvcalls_bedata
*bedata
, int *req_id
)
98 *req_id
= bedata
->ring
.req_prod_pvt
& (RING_SIZE(&bedata
->ring
) - 1);
99 if (RING_FULL(&bedata
->ring
) ||
100 bedata
->rsp
[*req_id
].req_id
!= PVCALLS_INVALID_ID
)
105 static bool pvcalls_front_write_todo(struct sock_mapping
*map
)
107 struct pvcalls_data_intf
*intf
= map
->active
.ring
;
108 RING_IDX cons
, prod
, size
= XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER
);
111 error
= intf
->out_error
;
112 if (error
== -ENOTCONN
)
117 cons
= intf
->out_cons
;
118 prod
= intf
->out_prod
;
119 return !!(size
- pvcalls_queued(prod
, cons
, size
));
122 static bool pvcalls_front_read_todo(struct sock_mapping
*map
)
124 struct pvcalls_data_intf
*intf
= map
->active
.ring
;
128 cons
= intf
->in_cons
;
129 prod
= intf
->in_prod
;
130 error
= intf
->in_error
;
131 return (error
!= 0 ||
132 pvcalls_queued(prod
, cons
,
133 XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER
)) != 0);
136 static irqreturn_t
pvcalls_front_event_handler(int irq
, void *dev_id
)
138 struct xenbus_device
*dev
= dev_id
;
139 struct pvcalls_bedata
*bedata
;
140 struct xen_pvcalls_response
*rsp
;
142 int req_id
= 0, more
= 0, done
= 0;
148 bedata
= dev_get_drvdata(&dev
->dev
);
149 if (bedata
== NULL
) {
155 while (RING_HAS_UNCONSUMED_RESPONSES(&bedata
->ring
)) {
156 rsp
= RING_GET_RESPONSE(&bedata
->ring
, bedata
->ring
.rsp_cons
);
158 req_id
= rsp
->req_id
;
159 if (rsp
->cmd
== PVCALLS_POLL
) {
160 struct sock_mapping
*map
= (struct sock_mapping
*)(uintptr_t)
163 clear_bit(PVCALLS_FLAG_POLL_INFLIGHT
,
164 (void *)&map
->passive
.flags
);
166 * clear INFLIGHT, then set RET. It pairs with
167 * the checks at the beginning of
168 * pvcalls_front_poll_passive.
171 set_bit(PVCALLS_FLAG_POLL_RET
,
172 (void *)&map
->passive
.flags
);
174 dst
= (uint8_t *)&bedata
->rsp
[req_id
] +
176 src
= (uint8_t *)rsp
+ sizeof(rsp
->req_id
);
177 memcpy(dst
, src
, sizeof(*rsp
) - sizeof(rsp
->req_id
));
179 * First copy the rest of the data, then req_id. It is
180 * paired with the barrier when accessing bedata->rsp.
183 bedata
->rsp
[req_id
].req_id
= req_id
;
187 bedata
->ring
.rsp_cons
++;
190 RING_FINAL_CHECK_FOR_RESPONSES(&bedata
->ring
, more
);
194 wake_up(&bedata
->inflight_req
);
199 static void pvcalls_front_free_map(struct pvcalls_bedata
*bedata
,
200 struct sock_mapping
*map
)
204 unbind_from_irqhandler(map
->active
.irq
, map
);
206 spin_lock(&bedata
->socket_lock
);
207 if (!list_empty(&map
->list
))
208 list_del_init(&map
->list
);
209 spin_unlock(&bedata
->socket_lock
);
211 for (i
= 0; i
< (1 << PVCALLS_RING_ORDER
); i
++)
212 gnttab_end_foreign_access(map
->active
.ring
->ref
[i
], 0, 0);
213 gnttab_end_foreign_access(map
->active
.ref
, 0, 0);
214 free_page((unsigned long)map
->active
.ring
);
219 static irqreturn_t
pvcalls_front_conn_handler(int irq
, void *sock_map
)
221 struct sock_mapping
*map
= sock_map
;
226 wake_up_interruptible(&map
->active
.inflight_conn_req
);
231 int pvcalls_front_socket(struct socket
*sock
)
233 struct pvcalls_bedata
*bedata
;
234 struct sock_mapping
*map
= NULL
;
235 struct xen_pvcalls_request
*req
;
236 int notify
, req_id
, ret
;
239 * PVCalls only supports domain AF_INET,
240 * type SOCK_STREAM and protocol 0 sockets for now.
242 * Check socket type here, AF_INET and protocol checks are done
245 if (sock
->type
!= SOCK_STREAM
)
249 if (!pvcalls_front_dev
) {
253 bedata
= dev_get_drvdata(&pvcalls_front_dev
->dev
);
255 map
= kzalloc(sizeof(*map
), GFP_KERNEL
);
261 spin_lock(&bedata
->socket_lock
);
263 ret
= get_request(bedata
, &req_id
);
266 spin_unlock(&bedata
->socket_lock
);
272 * sock->sk->sk_send_head is not used for ip sockets: reuse the
273 * field to store a pointer to the struct sock_mapping
274 * corresponding to the socket. This way, we can easily get the
275 * struct sock_mapping from the struct socket.
277 sock
->sk
->sk_send_head
= (void *)map
;
278 list_add_tail(&map
->list
, &bedata
->socket_mappings
);
280 req
= RING_GET_REQUEST(&bedata
->ring
, req_id
);
281 req
->req_id
= req_id
;
282 req
->cmd
= PVCALLS_SOCKET
;
283 req
->u
.socket
.id
= (uintptr_t) map
;
284 req
->u
.socket
.domain
= AF_INET
;
285 req
->u
.socket
.type
= SOCK_STREAM
;
286 req
->u
.socket
.protocol
= IPPROTO_IP
;
288 bedata
->ring
.req_prod_pvt
++;
289 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata
->ring
, notify
);
290 spin_unlock(&bedata
->socket_lock
);
292 notify_remote_via_irq(bedata
->irq
);
294 wait_event(bedata
->inflight_req
,
295 READ_ONCE(bedata
->rsp
[req_id
].req_id
) == req_id
);
297 /* read req_id, then the content */
299 ret
= bedata
->rsp
[req_id
].ret
;
300 bedata
->rsp
[req_id
].req_id
= PVCALLS_INVALID_ID
;
306 static int create_active(struct sock_mapping
*map
, int *evtchn
)
309 int ret
= -ENOMEM
, irq
= -1, i
;
312 init_waitqueue_head(&map
->active
.inflight_conn_req
);
314 map
->active
.ring
= (struct pvcalls_data_intf
*)
315 __get_free_page(GFP_KERNEL
| __GFP_ZERO
);
316 if (map
->active
.ring
== NULL
)
318 map
->active
.ring
->ring_order
= PVCALLS_RING_ORDER
;
319 bytes
= (void *)__get_free_pages(GFP_KERNEL
| __GFP_ZERO
,
323 for (i
= 0; i
< (1 << PVCALLS_RING_ORDER
); i
++)
324 map
->active
.ring
->ref
[i
] = gnttab_grant_foreign_access(
325 pvcalls_front_dev
->otherend_id
,
326 pfn_to_gfn(virt_to_pfn(bytes
) + i
), 0);
328 map
->active
.ref
= gnttab_grant_foreign_access(
329 pvcalls_front_dev
->otherend_id
,
330 pfn_to_gfn(virt_to_pfn((void *)map
->active
.ring
)), 0);
332 map
->active
.data
.in
= bytes
;
333 map
->active
.data
.out
= bytes
+
334 XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER
);
336 ret
= xenbus_alloc_evtchn(pvcalls_front_dev
, evtchn
);
339 irq
= bind_evtchn_to_irqhandler(*evtchn
, pvcalls_front_conn_handler
,
340 0, "pvcalls-frontend", map
);
346 map
->active
.irq
= irq
;
347 map
->active_socket
= true;
348 mutex_init(&map
->active
.in_mutex
);
349 mutex_init(&map
->active
.out_mutex
);
355 xenbus_free_evtchn(pvcalls_front_dev
, *evtchn
);
356 kfree(map
->active
.data
.in
);
357 kfree(map
->active
.ring
);
361 int pvcalls_front_connect(struct socket
*sock
, struct sockaddr
*addr
,
362 int addr_len
, int flags
)
364 struct pvcalls_bedata
*bedata
;
365 struct sock_mapping
*map
= NULL
;
366 struct xen_pvcalls_request
*req
;
367 int notify
, req_id
, ret
, evtchn
;
369 if (addr
->sa_family
!= AF_INET
|| sock
->type
!= SOCK_STREAM
)
373 if (!pvcalls_front_dev
) {
378 bedata
= dev_get_drvdata(&pvcalls_front_dev
->dev
);
380 map
= (struct sock_mapping
*)sock
->sk
->sk_send_head
;
386 spin_lock(&bedata
->socket_lock
);
387 ret
= get_request(bedata
, &req_id
);
389 spin_unlock(&bedata
->socket_lock
);
393 ret
= create_active(map
, &evtchn
);
395 spin_unlock(&bedata
->socket_lock
);
400 req
= RING_GET_REQUEST(&bedata
->ring
, req_id
);
401 req
->req_id
= req_id
;
402 req
->cmd
= PVCALLS_CONNECT
;
403 req
->u
.connect
.id
= (uintptr_t)map
;
404 req
->u
.connect
.len
= addr_len
;
405 req
->u
.connect
.flags
= flags
;
406 req
->u
.connect
.ref
= map
->active
.ref
;
407 req
->u
.connect
.evtchn
= evtchn
;
408 memcpy(req
->u
.connect
.addr
, addr
, sizeof(*addr
));
412 bedata
->ring
.req_prod_pvt
++;
413 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata
->ring
, notify
);
414 spin_unlock(&bedata
->socket_lock
);
417 notify_remote_via_irq(bedata
->irq
);
419 wait_event(bedata
->inflight_req
,
420 READ_ONCE(bedata
->rsp
[req_id
].req_id
) == req_id
);
422 /* read req_id, then the content */
424 ret
= bedata
->rsp
[req_id
].ret
;
425 bedata
->rsp
[req_id
].req_id
= PVCALLS_INVALID_ID
;
430 static int __write_ring(struct pvcalls_data_intf
*intf
,
431 struct pvcalls_data
*data
,
432 struct iov_iter
*msg_iter
,
435 RING_IDX cons
, prod
, size
, masked_prod
, masked_cons
;
436 RING_IDX array_size
= XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER
);
439 error
= intf
->out_error
;
442 cons
= intf
->out_cons
;
443 prod
= intf
->out_prod
;
444 /* read indexes before continuing */
447 size
= pvcalls_queued(prod
, cons
, array_size
);
448 if (size
>= array_size
)
450 if (len
> array_size
- size
)
451 len
= array_size
- size
;
453 masked_prod
= pvcalls_mask(prod
, array_size
);
454 masked_cons
= pvcalls_mask(cons
, array_size
);
456 if (masked_prod
< masked_cons
) {
457 len
= copy_from_iter(data
->out
+ masked_prod
, len
, msg_iter
);
459 if (len
> array_size
- masked_prod
) {
460 int ret
= copy_from_iter(data
->out
+ masked_prod
,
461 array_size
- masked_prod
, msg_iter
);
462 if (ret
!= array_size
- masked_prod
) {
466 len
= ret
+ copy_from_iter(data
->out
, len
- ret
, msg_iter
);
468 len
= copy_from_iter(data
->out
+ masked_prod
, len
, msg_iter
);
472 /* write to ring before updating pointer */
474 intf
->out_prod
+= len
;
479 int pvcalls_front_sendmsg(struct socket
*sock
, struct msghdr
*msg
,
482 struct pvcalls_bedata
*bedata
;
483 struct sock_mapping
*map
;
484 int sent
, tot_sent
= 0;
485 int count
= 0, flags
;
487 flags
= msg
->msg_flags
;
488 if (flags
& (MSG_CONFIRM
|MSG_DONTROUTE
|MSG_EOR
|MSG_OOB
))
492 if (!pvcalls_front_dev
) {
496 bedata
= dev_get_drvdata(&pvcalls_front_dev
->dev
);
498 map
= (struct sock_mapping
*) sock
->sk
->sk_send_head
;
504 mutex_lock(&map
->active
.out_mutex
);
505 if ((flags
& MSG_DONTWAIT
) && !pvcalls_front_write_todo(map
)) {
506 mutex_unlock(&map
->active
.out_mutex
);
515 sent
= __write_ring(map
->active
.ring
,
516 &map
->active
.data
, &msg
->msg_iter
,
521 notify_remote_via_irq(map
->active
.irq
);
523 if (sent
>= 0 && len
> 0 && count
< PVCALLS_FRONT_MAX_SPIN
)
528 mutex_unlock(&map
->active
.out_mutex
);
533 static int __read_ring(struct pvcalls_data_intf
*intf
,
534 struct pvcalls_data
*data
,
535 struct iov_iter
*msg_iter
,
536 size_t len
, int flags
)
538 RING_IDX cons
, prod
, size
, masked_prod
, masked_cons
;
539 RING_IDX array_size
= XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER
);
542 cons
= intf
->in_cons
;
543 prod
= intf
->in_prod
;
544 error
= intf
->in_error
;
545 /* get pointers before reading from the ring */
550 size
= pvcalls_queued(prod
, cons
, array_size
);
551 masked_prod
= pvcalls_mask(prod
, array_size
);
552 masked_cons
= pvcalls_mask(cons
, array_size
);
560 if (masked_prod
> masked_cons
) {
561 len
= copy_to_iter(data
->in
+ masked_cons
, len
, msg_iter
);
563 if (len
> (array_size
- masked_cons
)) {
564 int ret
= copy_to_iter(data
->in
+ masked_cons
,
565 array_size
- masked_cons
, msg_iter
);
566 if (ret
!= array_size
- masked_cons
) {
570 len
= ret
+ copy_to_iter(data
->in
, len
- ret
, msg_iter
);
572 len
= copy_to_iter(data
->in
+ masked_cons
, len
, msg_iter
);
576 /* read data from the ring before increasing the index */
578 if (!(flags
& MSG_PEEK
))
579 intf
->in_cons
+= len
;
584 int pvcalls_front_recvmsg(struct socket
*sock
, struct msghdr
*msg
, size_t len
,
587 struct pvcalls_bedata
*bedata
;
589 struct sock_mapping
*map
;
591 if (flags
& (MSG_CMSG_CLOEXEC
|MSG_ERRQUEUE
|MSG_OOB
|MSG_TRUNC
))
595 if (!pvcalls_front_dev
) {
599 bedata
= dev_get_drvdata(&pvcalls_front_dev
->dev
);
601 map
= (struct sock_mapping
*) sock
->sk
->sk_send_head
;
607 mutex_lock(&map
->active
.in_mutex
);
608 if (len
> XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER
))
609 len
= XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER
);
611 while (!(flags
& MSG_DONTWAIT
) && !pvcalls_front_read_todo(map
)) {
612 wait_event_interruptible(map
->active
.inflight_conn_req
,
613 pvcalls_front_read_todo(map
));
615 ret
= __read_ring(map
->active
.ring
, &map
->active
.data
,
616 &msg
->msg_iter
, len
, flags
);
619 notify_remote_via_irq(map
->active
.irq
);
621 ret
= (flags
& MSG_DONTWAIT
) ? -EAGAIN
: 0;
622 if (ret
== -ENOTCONN
)
625 mutex_unlock(&map
->active
.in_mutex
);
630 int pvcalls_front_bind(struct socket
*sock
, struct sockaddr
*addr
, int addr_len
)
632 struct pvcalls_bedata
*bedata
;
633 struct sock_mapping
*map
= NULL
;
634 struct xen_pvcalls_request
*req
;
635 int notify
, req_id
, ret
;
637 if (addr
->sa_family
!= AF_INET
|| sock
->type
!= SOCK_STREAM
)
641 if (!pvcalls_front_dev
) {
645 bedata
= dev_get_drvdata(&pvcalls_front_dev
->dev
);
647 map
= (struct sock_mapping
*) sock
->sk
->sk_send_head
;
653 spin_lock(&bedata
->socket_lock
);
654 ret
= get_request(bedata
, &req_id
);
656 spin_unlock(&bedata
->socket_lock
);
660 req
= RING_GET_REQUEST(&bedata
->ring
, req_id
);
661 req
->req_id
= req_id
;
663 req
->cmd
= PVCALLS_BIND
;
664 req
->u
.bind
.id
= (uintptr_t)map
;
665 memcpy(req
->u
.bind
.addr
, addr
, sizeof(*addr
));
666 req
->u
.bind
.len
= addr_len
;
668 init_waitqueue_head(&map
->passive
.inflight_accept_req
);
670 map
->active_socket
= false;
672 bedata
->ring
.req_prod_pvt
++;
673 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata
->ring
, notify
);
674 spin_unlock(&bedata
->socket_lock
);
676 notify_remote_via_irq(bedata
->irq
);
678 wait_event(bedata
->inflight_req
,
679 READ_ONCE(bedata
->rsp
[req_id
].req_id
) == req_id
);
681 /* read req_id, then the content */
683 ret
= bedata
->rsp
[req_id
].ret
;
684 bedata
->rsp
[req_id
].req_id
= PVCALLS_INVALID_ID
;
686 map
->passive
.status
= PVCALLS_STATUS_BIND
;
691 int pvcalls_front_listen(struct socket
*sock
, int backlog
)
693 struct pvcalls_bedata
*bedata
;
694 struct sock_mapping
*map
;
695 struct xen_pvcalls_request
*req
;
696 int notify
, req_id
, ret
;
699 if (!pvcalls_front_dev
) {
703 bedata
= dev_get_drvdata(&pvcalls_front_dev
->dev
);
705 map
= (struct sock_mapping
*) sock
->sk
->sk_send_head
;
711 if (map
->passive
.status
!= PVCALLS_STATUS_BIND
) {
716 spin_lock(&bedata
->socket_lock
);
717 ret
= get_request(bedata
, &req_id
);
719 spin_unlock(&bedata
->socket_lock
);
723 req
= RING_GET_REQUEST(&bedata
->ring
, req_id
);
724 req
->req_id
= req_id
;
725 req
->cmd
= PVCALLS_LISTEN
;
726 req
->u
.listen
.id
= (uintptr_t) map
;
727 req
->u
.listen
.backlog
= backlog
;
729 bedata
->ring
.req_prod_pvt
++;
730 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata
->ring
, notify
);
731 spin_unlock(&bedata
->socket_lock
);
733 notify_remote_via_irq(bedata
->irq
);
735 wait_event(bedata
->inflight_req
,
736 READ_ONCE(bedata
->rsp
[req_id
].req_id
) == req_id
);
738 /* read req_id, then the content */
740 ret
= bedata
->rsp
[req_id
].ret
;
741 bedata
->rsp
[req_id
].req_id
= PVCALLS_INVALID_ID
;
743 map
->passive
.status
= PVCALLS_STATUS_LISTEN
;
748 int pvcalls_front_accept(struct socket
*sock
, struct socket
*newsock
, int flags
)
750 struct pvcalls_bedata
*bedata
;
751 struct sock_mapping
*map
;
752 struct sock_mapping
*map2
= NULL
;
753 struct xen_pvcalls_request
*req
;
754 int notify
, req_id
, ret
, evtchn
, nonblock
;
757 if (!pvcalls_front_dev
) {
761 bedata
= dev_get_drvdata(&pvcalls_front_dev
->dev
);
763 map
= (struct sock_mapping
*) sock
->sk
->sk_send_head
;
769 if (map
->passive
.status
!= PVCALLS_STATUS_LISTEN
) {
774 nonblock
= flags
& SOCK_NONBLOCK
;
776 * Backend only supports 1 inflight accept request, will return
777 * errors for the others
779 if (test_and_set_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT
,
780 (void *)&map
->passive
.flags
)) {
781 req_id
= READ_ONCE(map
->passive
.inflight_req_id
);
782 if (req_id
!= PVCALLS_INVALID_ID
&&
783 READ_ONCE(bedata
->rsp
[req_id
].req_id
) == req_id
) {
784 map2
= map
->passive
.accept_map
;
791 if (wait_event_interruptible(map
->passive
.inflight_accept_req
,
792 !test_and_set_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT
,
793 (void *)&map
->passive
.flags
))) {
799 spin_lock(&bedata
->socket_lock
);
800 ret
= get_request(bedata
, &req_id
);
802 clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT
,
803 (void *)&map
->passive
.flags
);
804 spin_unlock(&bedata
->socket_lock
);
808 map2
= kzalloc(sizeof(*map2
), GFP_KERNEL
);
810 clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT
,
811 (void *)&map
->passive
.flags
);
812 spin_unlock(&bedata
->socket_lock
);
816 ret
= create_active(map2
, &evtchn
);
819 clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT
,
820 (void *)&map
->passive
.flags
);
821 spin_unlock(&bedata
->socket_lock
);
825 list_add_tail(&map2
->list
, &bedata
->socket_mappings
);
827 req
= RING_GET_REQUEST(&bedata
->ring
, req_id
);
828 req
->req_id
= req_id
;
829 req
->cmd
= PVCALLS_ACCEPT
;
830 req
->u
.accept
.id
= (uintptr_t) map
;
831 req
->u
.accept
.ref
= map2
->active
.ref
;
832 req
->u
.accept
.id_new
= (uintptr_t) map2
;
833 req
->u
.accept
.evtchn
= evtchn
;
834 map
->passive
.accept_map
= map2
;
836 bedata
->ring
.req_prod_pvt
++;
837 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata
->ring
, notify
);
838 spin_unlock(&bedata
->socket_lock
);
840 notify_remote_via_irq(bedata
->irq
);
841 /* We could check if we have received a response before returning. */
843 WRITE_ONCE(map
->passive
.inflight_req_id
, req_id
);
848 if (wait_event_interruptible(bedata
->inflight_req
,
849 READ_ONCE(bedata
->rsp
[req_id
].req_id
) == req_id
)) {
853 /* read req_id, then the content */
857 map2
->sock
= newsock
;
858 newsock
->sk
= kzalloc(sizeof(*newsock
->sk
), GFP_KERNEL
);
860 bedata
->rsp
[req_id
].req_id
= PVCALLS_INVALID_ID
;
861 map
->passive
.inflight_req_id
= PVCALLS_INVALID_ID
;
862 clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT
,
863 (void *)&map
->passive
.flags
);
864 pvcalls_front_free_map(bedata
, map2
);
868 newsock
->sk
->sk_send_head
= (void *)map2
;
870 ret
= bedata
->rsp
[req_id
].ret
;
871 bedata
->rsp
[req_id
].req_id
= PVCALLS_INVALID_ID
;
872 map
->passive
.inflight_req_id
= PVCALLS_INVALID_ID
;
874 clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT
, (void *)&map
->passive
.flags
);
875 wake_up(&map
->passive
.inflight_accept_req
);
881 static unsigned int pvcalls_front_poll_passive(struct file
*file
,
882 struct pvcalls_bedata
*bedata
,
883 struct sock_mapping
*map
,
886 int notify
, req_id
, ret
;
887 struct xen_pvcalls_request
*req
;
889 if (test_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT
,
890 (void *)&map
->passive
.flags
)) {
891 uint32_t req_id
= READ_ONCE(map
->passive
.inflight_req_id
);
893 if (req_id
!= PVCALLS_INVALID_ID
&&
894 READ_ONCE(bedata
->rsp
[req_id
].req_id
) == req_id
)
895 return POLLIN
| POLLRDNORM
;
897 poll_wait(file
, &map
->passive
.inflight_accept_req
, wait
);
901 if (test_and_clear_bit(PVCALLS_FLAG_POLL_RET
,
902 (void *)&map
->passive
.flags
))
903 return POLLIN
| POLLRDNORM
;
906 * First check RET, then INFLIGHT. No barriers necessary to
907 * ensure execution ordering because of the conditional
908 * instructions creating control dependencies.
911 if (test_and_set_bit(PVCALLS_FLAG_POLL_INFLIGHT
,
912 (void *)&map
->passive
.flags
)) {
913 poll_wait(file
, &bedata
->inflight_req
, wait
);
917 spin_lock(&bedata
->socket_lock
);
918 ret
= get_request(bedata
, &req_id
);
920 spin_unlock(&bedata
->socket_lock
);
923 req
= RING_GET_REQUEST(&bedata
->ring
, req_id
);
924 req
->req_id
= req_id
;
925 req
->cmd
= PVCALLS_POLL
;
926 req
->u
.poll
.id
= (uintptr_t) map
;
928 bedata
->ring
.req_prod_pvt
++;
929 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata
->ring
, notify
);
930 spin_unlock(&bedata
->socket_lock
);
932 notify_remote_via_irq(bedata
->irq
);
934 poll_wait(file
, &bedata
->inflight_req
, wait
);
938 static unsigned int pvcalls_front_poll_active(struct file
*file
,
939 struct pvcalls_bedata
*bedata
,
940 struct sock_mapping
*map
,
943 unsigned int mask
= 0;
944 int32_t in_error
, out_error
;
945 struct pvcalls_data_intf
*intf
= map
->active
.ring
;
947 out_error
= intf
->out_error
;
948 in_error
= intf
->in_error
;
950 poll_wait(file
, &map
->active
.inflight_conn_req
, wait
);
951 if (pvcalls_front_write_todo(map
))
952 mask
|= POLLOUT
| POLLWRNORM
;
953 if (pvcalls_front_read_todo(map
))
954 mask
|= POLLIN
| POLLRDNORM
;
955 if (in_error
!= 0 || out_error
!= 0)
961 unsigned int pvcalls_front_poll(struct file
*file
, struct socket
*sock
,
964 struct pvcalls_bedata
*bedata
;
965 struct sock_mapping
*map
;
969 if (!pvcalls_front_dev
) {
973 bedata
= dev_get_drvdata(&pvcalls_front_dev
->dev
);
975 map
= (struct sock_mapping
*) sock
->sk
->sk_send_head
;
980 if (map
->active_socket
)
981 ret
= pvcalls_front_poll_active(file
, bedata
, map
, wait
);
983 ret
= pvcalls_front_poll_passive(file
, bedata
, map
, wait
);
988 int pvcalls_front_release(struct socket
*sock
)
990 struct pvcalls_bedata
*bedata
;
991 struct sock_mapping
*map
;
992 int req_id
, notify
, ret
;
993 struct xen_pvcalls_request
*req
;
995 if (sock
->sk
== NULL
)
999 if (!pvcalls_front_dev
) {
1004 bedata
= dev_get_drvdata(&pvcalls_front_dev
->dev
);
1006 map
= (struct sock_mapping
*) sock
->sk
->sk_send_head
;
1012 spin_lock(&bedata
->socket_lock
);
1013 ret
= get_request(bedata
, &req_id
);
1015 spin_unlock(&bedata
->socket_lock
);
1019 sock
->sk
->sk_send_head
= NULL
;
1021 req
= RING_GET_REQUEST(&bedata
->ring
, req_id
);
1022 req
->req_id
= req_id
;
1023 req
->cmd
= PVCALLS_RELEASE
;
1024 req
->u
.release
.id
= (uintptr_t)map
;
1026 bedata
->ring
.req_prod_pvt
++;
1027 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata
->ring
, notify
);
1028 spin_unlock(&bedata
->socket_lock
);
1030 notify_remote_via_irq(bedata
->irq
);
1032 wait_event(bedata
->inflight_req
,
1033 READ_ONCE(bedata
->rsp
[req_id
].req_id
) == req_id
);
1035 if (map
->active_socket
) {
1037 * Set in_error and wake up inflight_conn_req to force
1038 * recvmsg waiters to exit.
1040 map
->active
.ring
->in_error
= -EBADF
;
1041 wake_up_interruptible(&map
->active
.inflight_conn_req
);
1044 * We need to make sure that sendmsg/recvmsg on this socket have
1045 * not started before we've cleared sk_send_head here. The
1046 * easiest (though not optimal) way to guarantee this is to see
1047 * that no pvcall (other than us) is in progress.
1049 while (atomic_read(&pvcalls_refcount
) > 1)
1052 pvcalls_front_free_map(bedata
, map
);
1054 spin_lock(&bedata
->socket_lock
);
1055 list_del(&map
->list
);
1056 spin_unlock(&bedata
->socket_lock
);
1057 if (READ_ONCE(map
->passive
.inflight_req_id
) !=
1058 PVCALLS_INVALID_ID
) {
1059 pvcalls_front_free_map(bedata
,
1060 map
->passive
.accept_map
);
1064 WRITE_ONCE(bedata
->rsp
[req_id
].req_id
, PVCALLS_INVALID_ID
);
1070 static const struct xenbus_device_id pvcalls_front_ids
[] = {
1075 static int pvcalls_front_remove(struct xenbus_device
*dev
)
1077 struct pvcalls_bedata
*bedata
;
1078 struct sock_mapping
*map
= NULL
, *n
;
1080 bedata
= dev_get_drvdata(&pvcalls_front_dev
->dev
);
1081 dev_set_drvdata(&dev
->dev
, NULL
);
1082 pvcalls_front_dev
= NULL
;
1083 if (bedata
->irq
>= 0)
1084 unbind_from_irqhandler(bedata
->irq
, dev
);
1086 list_for_each_entry_safe(map
, n
, &bedata
->socket_mappings
, list
) {
1087 map
->sock
->sk
->sk_send_head
= NULL
;
1088 if (map
->active_socket
) {
1089 map
->active
.ring
->in_error
= -EBADF
;
1090 wake_up_interruptible(&map
->active
.inflight_conn_req
);
1095 while (atomic_read(&pvcalls_refcount
) > 0)
1097 list_for_each_entry_safe(map
, n
, &bedata
->socket_mappings
, list
) {
1098 if (map
->active_socket
) {
1099 /* No need to lock, refcount is 0 */
1100 pvcalls_front_free_map(bedata
, map
);
1102 list_del(&map
->list
);
1106 if (bedata
->ref
!= -1)
1107 gnttab_end_foreign_access(bedata
->ref
, 0, 0);
1108 kfree(bedata
->ring
.sring
);
1110 xenbus_switch_state(dev
, XenbusStateClosed
);
1114 static int pvcalls_front_probe(struct xenbus_device
*dev
,
1115 const struct xenbus_device_id
*id
)
1117 int ret
= -ENOMEM
, evtchn
, i
;
1118 unsigned int max_page_order
, function_calls
, len
;
1120 grant_ref_t gref_head
= 0;
1121 struct xenbus_transaction xbt
;
1122 struct pvcalls_bedata
*bedata
= NULL
;
1123 struct xen_pvcalls_sring
*sring
;
1125 if (pvcalls_front_dev
!= NULL
) {
1126 dev_err(&dev
->dev
, "only one PV Calls connection supported\n");
1130 versions
= xenbus_read(XBT_NIL
, dev
->otherend
, "versions", &len
);
1131 if (IS_ERR(versions
))
1132 return PTR_ERR(versions
);
1135 if (strcmp(versions
, "1")) {
1140 max_page_order
= xenbus_read_unsigned(dev
->otherend
,
1141 "max-page-order", 0);
1142 if (max_page_order
< PVCALLS_RING_ORDER
)
1144 function_calls
= xenbus_read_unsigned(dev
->otherend
,
1145 "function-calls", 0);
1146 /* See XENBUS_FUNCTIONS_CALLS in pvcalls.h */
1147 if (function_calls
!= 1)
1149 pr_info("%s max-page-order is %u\n", __func__
, max_page_order
);
1151 bedata
= kzalloc(sizeof(struct pvcalls_bedata
), GFP_KERNEL
);
1155 dev_set_drvdata(&dev
->dev
, bedata
);
1156 pvcalls_front_dev
= dev
;
1157 init_waitqueue_head(&bedata
->inflight_req
);
1158 INIT_LIST_HEAD(&bedata
->socket_mappings
);
1159 spin_lock_init(&bedata
->socket_lock
);
1163 for (i
= 0; i
< PVCALLS_NR_RSP_PER_RING
; i
++)
1164 bedata
->rsp
[i
].req_id
= PVCALLS_INVALID_ID
;
1166 sring
= (struct xen_pvcalls_sring
*) __get_free_page(GFP_KERNEL
|
1170 SHARED_RING_INIT(sring
);
1171 FRONT_RING_INIT(&bedata
->ring
, sring
, XEN_PAGE_SIZE
);
1173 ret
= xenbus_alloc_evtchn(dev
, &evtchn
);
1177 bedata
->irq
= bind_evtchn_to_irqhandler(evtchn
,
1178 pvcalls_front_event_handler
,
1179 0, "pvcalls-frontend", dev
);
1180 if (bedata
->irq
< 0) {
1185 ret
= gnttab_alloc_grant_references(1, &gref_head
);
1188 ret
= gnttab_claim_grant_reference(&gref_head
);
1192 gnttab_grant_foreign_access_ref(bedata
->ref
, dev
->otherend_id
,
1193 virt_to_gfn((void *)sring
), 0);
1196 ret
= xenbus_transaction_start(&xbt
);
1198 xenbus_dev_fatal(dev
, ret
, "starting transaction");
1201 ret
= xenbus_printf(xbt
, dev
->nodename
, "version", "%u", 1);
1204 ret
= xenbus_printf(xbt
, dev
->nodename
, "ring-ref", "%d", bedata
->ref
);
1207 ret
= xenbus_printf(xbt
, dev
->nodename
, "port", "%u",
1211 ret
= xenbus_transaction_end(xbt
, 0);
1215 xenbus_dev_fatal(dev
, ret
, "completing transaction");
1218 xenbus_switch_state(dev
, XenbusStateInitialised
);
1223 xenbus_transaction_end(xbt
, 1);
1224 xenbus_dev_fatal(dev
, ret
, "writing xenstore");
1226 pvcalls_front_remove(dev
);
1230 static void pvcalls_front_changed(struct xenbus_device
*dev
,
1231 enum xenbus_state backend_state
)
1233 switch (backend_state
) {
1234 case XenbusStateReconfiguring
:
1235 case XenbusStateReconfigured
:
1236 case XenbusStateInitialising
:
1237 case XenbusStateInitialised
:
1238 case XenbusStateUnknown
:
1241 case XenbusStateInitWait
:
1244 case XenbusStateConnected
:
1245 xenbus_switch_state(dev
, XenbusStateConnected
);
1248 case XenbusStateClosed
:
1249 if (dev
->state
== XenbusStateClosed
)
1251 /* Missed the backend's CLOSING state */
1253 case XenbusStateClosing
:
1254 xenbus_frontend_closed(dev
);
1259 static struct xenbus_driver pvcalls_front_driver
= {
1260 .ids
= pvcalls_front_ids
,
1261 .probe
= pvcalls_front_probe
,
1262 .remove
= pvcalls_front_remove
,
1263 .otherend_changed
= pvcalls_front_changed
,
1266 static int __init
pvcalls_frontend_init(void)
1271 pr_info("Initialising Xen pvcalls frontend driver\n");
1273 return xenbus_register_frontend(&pvcalls_front_driver
);
1276 module_init(pvcalls_frontend_init
);
1278 MODULE_DESCRIPTION("Xen PV Calls frontend driver");
1279 MODULE_AUTHOR("Stefano Stabellini <sstabellini@kernel.org>");
1280 MODULE_LICENSE("GPL");