1 /* Socket event dispatching library - by D.C. van Moolenbroek */
3 #include <minix/drivers.h>
4 #include <minix/sockdriver.h>
5 #include <minix/sockevent.h>
8 #include "sockevent_proc.h"
10 #define US 1000000UL /* microseconds per second */
12 #define SOCKHASH_SLOTS 256 /* # slots in ID-to-sock hash table */
14 static SLIST_HEAD(, sock
) sockhash
[SOCKHASH_SLOTS
];
16 static SLIST_HEAD(, sock
) socktimer
;
18 static minix_timer_t sockevent_timer
;
20 static SIMPLEQ_HEAD(, sock
) sockevent_pending
;
22 static sockevent_socket_cb_t sockevent_socket_cb
= NULL
;
24 static int sockevent_working
;
26 static void socktimer_del(struct sock
* sock
);
27 static void sockevent_cancel_send(struct sock
* sock
,
28 struct sockevent_proc
* spr
, int err
);
29 static void sockevent_cancel_recv(struct sock
* sock
,
30 struct sockevent_proc
* spr
, int err
);
33 * Initialize the hash table of sock objects.
40 for (slot
= 0; slot
< __arraycount(sockhash
); slot
++)
41 SLIST_INIT(&sockhash
[slot
]);
45 * Given a socket identifier, return a hash table slot number.
48 sockhash_slot(sockid_t id
)
52 * The idea of the shift is that a socket driver may offer multiple
53 * classes of sockets, and put the class in the higher bits. The shift
54 * aims to prevent that all classes' first sockets end up in the same
57 return (id
+ (id
>> 16)) % SOCKHASH_SLOTS
;
61 * Obtain a sock object from the hash table using its unique identifier.
62 * Return a pointer to the object if found, or NULL otherwise.
65 sockhash_get(sockid_t id
)
70 slot
= sockhash_slot(id
);
72 SLIST_FOREACH(sock
, &sockhash
[slot
], sock_hash
) {
73 if (sock
->sock_id
== id
)
81 * Add a sock object to the hash table. The sock object must have a valid ID
82 * in its 'sock_id' field, and must not be in the hash table already.
85 sockhash_add(struct sock
* sock
)
89 slot
= sockhash_slot(sock
->sock_id
);
91 SLIST_INSERT_HEAD(&sockhash
[slot
], sock
, sock_hash
);
95 * Remove a sock object from the hash table. The sock object must be in the
99 sockhash_del(struct sock
* sock
)
103 slot
= sockhash_slot(sock
->sock_id
);
105 /* This macro is O(n). */
106 SLIST_REMOVE(&sockhash
[slot
], sock
, sock
, sock_hash
);
110 * Reset a socket object to a proper initial state, with a particular socket
111 * identifier, a SOCK_ type, and a socket operations table. The socket is
112 * added to the ID-to-object hash table. This function always succeeds.
115 sockevent_reset(struct sock
* sock
, sockid_t id
, int domain
, int type
,
116 const struct sockevent_ops
* ops
)
119 assert(sock
!= NULL
);
121 memset(sock
, 0, sizeof(*sock
));
124 sock
->sock_domain
= domain
;
125 sock
->sock_type
= type
;
127 sock
->sock_slowat
= 1;
128 sock
->sock_rlowat
= 1;
130 sock
->sock_ops
= ops
;
131 sock
->sock_proc
= NULL
;
132 sock
->sock_select
.ss_endpt
= NONE
;
138 * Initialize a new socket that will serve as an accepted socket on the given
139 * listening socket 'sock'. The new socket is given as 'newsock', and its new
140 * socket identifier is given as 'newid'. This function always succeeds.
143 sockevent_clone(struct sock
* sock
, struct sock
* newsock
, sockid_t newid
)
146 sockevent_reset(newsock
, newid
, (int)sock
->sock_domain
,
147 sock
->sock_type
, sock
->sock_ops
);
149 /* These are the settings that are currently inherited. */
150 newsock
->sock_opt
= sock
->sock_opt
& ~SO_ACCEPTCONN
;
151 newsock
->sock_linger
= sock
->sock_linger
;
152 newsock
->sock_stimeo
= sock
->sock_stimeo
;
153 newsock
->sock_rtimeo
= sock
->sock_rtimeo
;
154 newsock
->sock_slowat
= sock
->sock_slowat
;
155 newsock
->sock_rlowat
= sock
->sock_rlowat
;
157 newsock
->sock_flags
|= SFL_CLONED
;
161 * A new socket has just been accepted. The corresponding listening socket is
162 * given as 'sock'. The new socket has ID 'newid', and if it had not already
163 * been added to the hash table through sockevent_clone() before, 'newsock' is
164 * a non-NULL pointer which identifies the socket object to clone into.
167 sockevent_accepted(struct sock
* sock
, struct sock
* newsock
, sockid_t newid
)
170 if (newsock
== NULL
) {
171 if ((newsock
= sockhash_get(newid
)) == NULL
)
172 panic("libsockdriver: socket driver returned unknown "
173 "ID %d from accept callback", newid
);
175 sockevent_clone(sock
, newsock
, newid
);
177 assert(newsock
->sock_flags
& SFL_CLONED
);
178 newsock
->sock_flags
&= ~SFL_CLONED
;
182 * Allocate a sock object, by asking the socket driver for one. On success,
183 * return OK, with a pointer to the new object stored in 'sockp'. This new
184 * object has all its fields set to initial values, in part based on the given
185 * parameters. On failure, return an error code. Failure has two typical
186 * cause: either the given domain, type, protocol combination is not supported,
187 * or the socket driver is out of sockets (globally or for this combination).
190 sockevent_alloc(int domain
, int type
, int protocol
, endpoint_t user_endpt
,
191 struct sock
** sockp
)
194 const struct sockevent_ops
*ops
;
198 * Verify that the given domain is sane. Unlike the type and protocol,
199 * the domain is already verified by VFS, so we do not limit ourselves
200 * here. The result is that we can store the domain in just a byte.
202 if (domain
< 0 || domain
> UINT8_MAX
)
205 /* Make sure that the library has actually been initialized. */
206 if (sockevent_socket_cb
== NULL
)
207 panic("libsockevent: not initialized");
213 * Ask the socket driver to create a socket for the given combination
214 * of domain, type, and protocol. If so, let it return a new sock
215 * object, a unique socket identifier for that object, and an
216 * operations table for it.
218 if ((r
= sockevent_socket_cb(domain
, type
, protocol
, user_endpt
, &sock
,
222 assert(sock
!= NULL
);
225 sockevent_reset(sock
, r
, domain
, type
, ops
);
232 * Free a previously allocated sock object.
235 sockevent_free(struct sock
* sock
)
237 const struct sockevent_ops
*ops
;
239 assert(sock
->sock_proc
== NULL
);
246 * Invalidate the operations table on the socket, before freeing the
247 * socket. This allows us to detect cases where sockevent functions
248 * are called on sockets that have already been freed.
250 ops
= sock
->sock_ops
;
251 sock
->sock_ops
= NULL
;
254 assert(ops
->sop_free
!= NULL
);
260 * Create a new socket.
263 sockevent_socket(int domain
, int type
, int protocol
, endpoint_t user_endpt
)
268 if ((r
= sockevent_alloc(domain
, type
, protocol
, user_endpt
,
272 return sock
->sock_id
;
276 * Create a pair of connected sockets.
279 sockevent_socketpair(int domain
, int type
, int protocol
, endpoint_t user_endpt
,
282 struct sock
*sock1
, *sock2
;
285 if ((r
= sockevent_alloc(domain
, type
, protocol
, user_endpt
,
289 /* Creating socket pairs is not always supported. */
290 if (sock1
->sock_ops
->sop_pair
== NULL
) {
291 sockevent_free(sock1
);
296 if ((r
= sockevent_alloc(domain
, type
, protocol
, user_endpt
,
298 sockevent_free(sock1
);
303 assert(sock1
->sock_ops
== sock2
->sock_ops
);
305 r
= sock1
->sock_ops
->sop_pair(sock1
, sock2
, user_endpt
);
308 sockevent_free(sock2
);
309 sockevent_free(sock1
);
314 id
[0] = sock1
->sock_id
;
315 id
[1] = sock2
->sock_id
;
320 * A send request returned EPIPE. If desired, send a SIGPIPE signal to the
321 * user process that issued the request.
324 sockevent_sigpipe(struct sock
* sock
, endpoint_t user_endpt
, int flags
)
328 * POSIX says that pipe signals should be generated for SOCK_STREAM
329 * sockets. Linux does just this, NetBSD raises signals for all socket
332 if (sock
->sock_type
!= SOCK_STREAM
)
336 * Why would there be fewer than four ways to do the same thing?
337 * O_NOSIGPIPE, MSG_NOSIGNAL, SO_NOSIGPIPE, and of course blocking
338 * SIGPIPE. VFS already sets MSG_NOSIGNAL for calls on sockets with
339 * O_NOSIGPIPE. The fact that SO_NOSIGPIPE is a thing, is also the
340 * reason why we cannot let VFS handle signal generation altogether.
342 if (flags
& MSG_NOSIGNAL
)
344 if (sock
->sock_opt
& SO_NOSIGPIPE
)
348 * Send a SIGPIPE signal to the user process. Unfortunately we cannot
349 * guarantee that the SIGPIPE reaches the user process before the send
350 * call returns. Usually, the scheduling priorities of system services
351 * are such that the signal is likely to arrive first anyway, but if
352 * timely arrival of the signal is required, a more fundamental change
353 * to the system would be needed.
355 sys_kill(user_endpt
, SIGPIPE
);
359 * Suspend a request without data, that is, a bind, connect, accept, or close
363 sockevent_suspend(struct sock
* sock
, unsigned int event
,
364 const struct sockdriver_call
* __restrict call
, endpoint_t user_endpt
)
366 struct sockevent_proc
*spr
, **sprp
;
368 /* There is one slot for each process, so this should never fail. */
369 if ((spr
= sockevent_proc_alloc()) == NULL
)
370 panic("libsockevent: too many suspended processes");
372 spr
->spr_next
= NULL
;
373 spr
->spr_event
= event
;
374 spr
->spr_timer
= FALSE
;
375 spr
->spr_call
= *call
;
376 spr
->spr_endpt
= user_endpt
;
379 * Add the request to the tail of the queue. This operation is O(n),
380 * but the number of suspended requests per socket is expected to be
383 for (sprp
= &sock
->sock_proc
; *sprp
!= NULL
;
384 sprp
= &(*sprp
)->spr_next
);
389 * Suspend a request with data, that is, a send or receive request.
392 sockevent_suspend_data(struct sock
* sock
, unsigned int event
, int timer
,
393 const struct sockdriver_call
* __restrict call
, endpoint_t user_endpt
,
394 const struct sockdriver_data
* __restrict data
, size_t len
, size_t off
,
395 const struct sockdriver_data
* __restrict ctl
, socklen_t ctl_len
,
396 socklen_t ctl_off
, int flags
, int rflags
, clock_t time
)
398 struct sockevent_proc
*spr
, **sprp
;
400 /* There is one slot for each process, so this should never fail. */
401 if ((spr
= sockevent_proc_alloc()) == NULL
)
402 panic("libsockevent: too many suspended processes");
404 spr
->spr_next
= NULL
;
405 spr
->spr_event
= event
;
406 spr
->spr_timer
= timer
;
407 spr
->spr_call
= *call
;
408 spr
->spr_endpt
= user_endpt
;
409 sockdriver_pack_data(&spr
->spr_data
, call
, data
, len
);
410 spr
->spr_datalen
= len
;
411 spr
->spr_dataoff
= off
;
412 sockdriver_pack_data(&spr
->spr_ctl
, call
, ctl
, ctl_len
);
413 spr
->spr_ctllen
= ctl_len
;
414 spr
->spr_ctloff
= ctl_off
;
415 spr
->spr_flags
= flags
;
416 spr
->spr_rflags
= rflags
;
417 spr
->spr_time
= time
;
420 * Add the request to the tail of the queue. This operation is O(n),
421 * but the number of suspended requests per socket is expected to be
424 for (sprp
= &sock
->sock_proc
; *sprp
!= NULL
;
425 sprp
= &(*sprp
)->spr_next
);
430 * Return TRUE if there are any suspended requests on the given socket's queue
431 * that match any of the events in the given event mask, or FALSE otherwise.
434 sockevent_has_suspended(struct sock
* sock
, unsigned int mask
)
436 struct sockevent_proc
*spr
;
438 for (spr
= sock
->sock_proc
; spr
!= NULL
; spr
= spr
->spr_next
)
439 if (spr
->spr_event
& mask
)
446 * Check whether the given call is on the given socket's queue of suspended
447 * requests. If so, remove it from the queue and return a pointer to the
448 * suspension data structure. The caller is then responsible for freeing that
449 * data structure using sockevent_proc_free(). If the call was not found, the
450 * function returns NULL.
452 static struct sockevent_proc
*
453 sockevent_unsuspend(struct sock
* sock
, const struct sockdriver_call
* call
)
455 struct sockevent_proc
*spr
, **sprp
;
457 /* Find the suspended request being canceled. */
458 for (sprp
= &sock
->sock_proc
; (spr
= *sprp
) != NULL
;
459 sprp
= &spr
->spr_next
) {
460 if (spr
->spr_call
.sc_endpt
== call
->sc_endpt
&&
461 spr
->spr_call
.sc_req
== call
->sc_req
) {
462 /* Found; remove and return it. */
463 *sprp
= spr
->spr_next
;
473 * Attempt to resume the given suspended request for the given socket object.
474 * Return TRUE if the suspended request has been fully resumed and can be
475 * removed from the queue of suspended requests, or FALSE if it has not been
476 * fully resumed and should stay on the queue. In the latter case, no
477 * resumption will be attempted for other suspended requests of the same type.
480 sockevent_resume(struct sock
* sock
, struct sockevent_proc
* spr
)
482 struct sock
*newsock
;
483 struct sockdriver_data data
, ctl
;
484 char addr
[SOCKADDR_MAX
];
489 switch (spr
->spr_event
) {
492 * If the connect call was suspended for the purpose of
493 * intercepting resumption, simply remove it from the queue.
495 if (spr
->spr_call
.sc_endpt
== NONE
)
500 if ((r
= sock
->sock_err
) != OK
)
503 sockdriver_reply_generic(&spr
->spr_call
, r
);
509 * A previous accept call may not have blocked on a socket that
510 * was not in listening mode.
512 assert(sock
->sock_opt
& SO_ACCEPTCONN
);
518 * This call is suspended, which implies that the call table
519 * pointer has already tested to be non-NULL.
521 if ((r
= sock
->sock_ops
->sop_accept(sock
,
522 (struct sockaddr
*)&addr
, &addr_len
, spr
->spr_endpt
,
523 &newsock
)) == SUSPEND
)
527 assert(addr_len
<= sizeof(addr
));
529 sockevent_accepted(sock
, newsock
, r
);
532 sockdriver_reply_accept(&spr
->spr_call
, r
,
533 (struct sockaddr
*)&addr
, addr_len
);
538 if (sock
->sock_err
!= OK
|| (sock
->sock_flags
& SFL_SHUT_WR
)) {
539 if (spr
->spr_dataoff
> 0 || spr
->spr_ctloff
> 0)
540 r
= (int)spr
->spr_dataoff
;
541 else if ((r
= sock
->sock_err
) != OK
)
546 sockdriver_unpack_data(&data
, &spr
->spr_call
,
547 &spr
->spr_data
, spr
->spr_datalen
);
548 sockdriver_unpack_data(&ctl
, &spr
->spr_call
,
549 &spr
->spr_ctl
, spr
->spr_ctllen
);
551 len
= spr
->spr_datalen
- spr
->spr_dataoff
;
553 min
= sock
->sock_slowat
;
558 * As mentioned elsewhere, we do not save the address
559 * upon suspension so we cannot supply it anymore here.
561 r
= sock
->sock_ops
->sop_send(sock
, &data
, len
,
562 &spr
->spr_dataoff
, &ctl
,
563 spr
->spr_ctllen
- spr
->spr_ctloff
,
564 &spr
->spr_ctloff
, NULL
, 0, spr
->spr_endpt
,
565 spr
->spr_flags
, min
);
573 * If an error occurred but some data were already
574 * sent, return the progress rather than the error.
575 * Note that if the socket driver detects an
576 * asynchronous error during the send, it itself must
577 * perform this check and call sockevent_set_error() as
578 * needed, to make sure the error does not get lost.
580 if (spr
->spr_dataoff
> 0 || spr
->spr_ctloff
> 0)
581 r
= spr
->spr_dataoff
;
585 sockevent_sigpipe(sock
, spr
->spr_endpt
,
588 sockdriver_reply_generic(&spr
->spr_call
, r
);
595 if (sock
->sock_flags
& SFL_SHUT_RD
)
598 len
= spr
->spr_datalen
- spr
->spr_dataoff
;
600 if (sock
->sock_err
== OK
) {
601 min
= sock
->sock_rlowat
;
607 sockdriver_unpack_data(&data
, &spr
->spr_call
,
608 &spr
->spr_data
, spr
->spr_datalen
);
609 sockdriver_unpack_data(&ctl
, &spr
->spr_call
,
610 &spr
->spr_ctl
, spr
->spr_ctllen
);
612 r
= sock
->sock_ops
->sop_recv(sock
, &data
, len
,
613 &spr
->spr_dataoff
, &ctl
,
614 spr
->spr_ctllen
- spr
->spr_ctloff
,
615 &spr
->spr_ctloff
, (struct sockaddr
*)&addr
,
616 &addr_len
, spr
->spr_endpt
, spr
->spr_flags
, min
,
620 * If the call remains suspended but a socket error is
621 * pending, return the pending socket error instead.
624 if (sock
->sock_err
== OK
)
630 assert(addr_len
<= sizeof(addr
));
634 * If the receive call reported success, or if some data were
635 * already received, return the (partial) result. Otherwise,
636 * return a pending error if any, or otherwise a regular error
639 if (r
== OK
|| spr
->spr_dataoff
> 0 || spr
->spr_ctloff
> 0)
640 r
= (int)spr
->spr_dataoff
;
641 else if (sock
->sock_err
!= OK
) {
645 } else if (r
== SOCKEVENT_EOF
)
648 sockdriver_reply_recv(&spr
->spr_call
, r
, spr
->spr_ctloff
,
649 (struct sockaddr
*)&addr
, addr_len
, spr
->spr_rflags
);
654 sockdriver_reply_generic(&spr
->spr_call
, OK
);
659 panic("libsockevent: process suspended on unknown event 0x%x",
665 * Return TRUE if the given socket is ready for reading for a select call, or
669 sockevent_test_readable(struct sock
* sock
)
674 * The meaning of "ready-to-read" depends on whether the socket is a
675 * listening socket or not. For the former, it is a test on whether
676 * there are any new sockets to accept. However, shutdown flags take
677 * precedence in both cases.
679 if (sock
->sock_flags
& SFL_SHUT_RD
)
682 if (sock
->sock_err
!= OK
)
686 * Depending on whether this is a listening-mode socket, test whether
687 * either accepts or receives would block.
689 if (sock
->sock_opt
& SO_ACCEPTCONN
) {
690 if (sock
->sock_ops
->sop_test_accept
== NULL
)
693 r
= sock
->sock_ops
->sop_test_accept(sock
);
695 if (sock
->sock_ops
->sop_test_recv
== NULL
)
698 r
= sock
->sock_ops
->sop_test_recv(sock
, sock
->sock_rlowat
,
702 return (r
!= SUSPEND
);
706 * Return TRUE if the given socket is ready for writing for a select call, or
710 sockevent_test_writable(struct sock
* sock
)
714 if (sock
->sock_err
!= OK
)
717 if (sock
->sock_flags
& SFL_SHUT_WR
)
720 if (sock
->sock_ops
->sop_test_send
== NULL
)
724 * Test whether sends would block. The low send watermark is relevant
725 * for stream-type sockets only.
727 r
= sock
->sock_ops
->sop_test_send(sock
, sock
->sock_slowat
);
729 return (r
!= SUSPEND
);
733 * Test whether any of the given select operations are ready on the given
734 * socket. Return the subset of ready operations; zero if none.
737 sockevent_test_select(struct sock
* sock
, unsigned int ops
)
739 unsigned int ready_ops
;
741 assert(!(ops
& ~(SDEV_OP_RD
| SDEV_OP_WR
| SDEV_OP_ERR
)));
744 * We do not support the "bind in progress" case here. If a blocking
745 * bind call is in progress, the file descriptor should not be ready
746 * for either reading or writing. Currently, socket drivers will have
747 * to cover this case themselves. Otherwise we would have to check the
748 * queue of suspended calls, or create a custom flag for this.
753 if ((ops
& SDEV_OP_RD
) && sockevent_test_readable(sock
))
754 ready_ops
|= SDEV_OP_RD
;
756 if ((ops
& SDEV_OP_WR
) && sockevent_test_writable(sock
))
757 ready_ops
|= SDEV_OP_WR
;
759 /* TODO: OOB receive support. */
765 * Fire the given mask of events on the given socket object now.
768 sockevent_fire(struct sock
* sock
, unsigned int mask
)
770 struct sockevent_proc
*spr
, **sprp
;
771 unsigned int r
, flag
, ops
;
774 * A completed connection attempt (successful or not) also always
775 * implies that the socket becomes writable. For convenience we
776 * enforce this rule here, because it is easy to forget. Note that in
777 * any case, a suspended connect request should be the first in the
778 * list, so we do not risk returning 0 from a connect call as a result
779 * of sock_err getting eaten by another resumed call.
781 if (mask
& SEV_CONNECT
)
785 * First try resuming regular system calls.
787 for (sprp
= &sock
->sock_proc
; (spr
= *sprp
) != NULL
; ) {
788 flag
= spr
->spr_event
;
790 if ((mask
& flag
) && sockevent_resume(sock
, spr
)) {
791 *sprp
= spr
->spr_next
;
793 sockevent_proc_free(spr
);
797 sprp
= &spr
->spr_next
;
802 * Then see if we can satisfy pending select queries.
804 if ((mask
& (SEV_ACCEPT
| SEV_SEND
| SEV_RECV
)) &&
805 sock
->sock_select
.ss_endpt
!= NONE
) {
806 assert(sock
->sock_selops
!= 0);
809 * Only retest select operations that, based on the given event
810 * mask, could possibly be satisfied now.
812 ops
= sock
->sock_selops
;
813 if (!(mask
& (SEV_ACCEPT
| SEV_RECV
)))
815 if (!(mask
& SEV_SEND
))
817 if (!(0)) /* TODO: OOB receive support */
820 /* Are there any operations to test? */
822 /* Test those operations. */
823 r
= sockevent_test_select(sock
, ops
);
825 /* Were any satisfied? */
827 /* Let the caller know. */
828 sockdriver_reply_select(&sock
->sock_select
,
831 sock
->sock_selops
&= ~r
;
833 /* Are there any saved operations left now? */
834 if (sock
->sock_selops
== 0)
835 sock
->sock_select
.ss_endpt
= NONE
;
841 * Finally, a SEV_CLOSE event unconditionally frees the sock object.
842 * This event should be fired only for sockets that are either not yet,
843 * or not anymore, in use by userland.
845 if (mask
& SEV_CLOSE
) {
846 assert(sock
->sock_flags
& (SFL_CLONED
| SFL_CLOSING
));
848 sockevent_free(sock
);
853 * Process all pending events. Events must still be blocked, so that if
854 * handling one event generates a new event, that event is handled from here
855 * rather than immediately.
863 assert(sockevent_working
);
865 while (!SIMPLEQ_EMPTY(&sockevent_pending
)) {
866 sock
= SIMPLEQ_FIRST(&sockevent_pending
);
867 SIMPLEQ_REMOVE_HEAD(&sockevent_pending
, sock_next
);
869 mask
= sock
->sock_events
;
871 sock
->sock_events
= 0;
873 sockevent_fire(sock
, mask
);
875 * At this point, the sock object may already have been readded
876 * to the event list, or even be deallocated altogether.
882 * Return TRUE if any events are pending on any sockets, or FALSE otherwise.
885 sockevent_has_events(void)
888 return (!SIMPLEQ_EMPTY(&sockevent_pending
));
892 * Raise the given bitwise-OR'ed set of events on the given socket object.
893 * Depending on the context of the call, they events may or may not be
894 * processed immediately.
897 sockevent_raise(struct sock
* sock
, unsigned int mask
)
900 assert(sock
->sock_ops
!= NULL
);
903 * Handle SEV_CLOSE first. This event must not be deferred, so as to
904 * let socket drivers recycle sock objects as they are needed. For
905 * example, a user-closed TCP socket may stay open to transmit the
906 * remainder of its send buffer, until the TCP driver runs out of
907 * sockets, in which case the connection is aborted. The driver would
908 * then raise SEV_CLOSE on the sock object so as to clean it up, and
909 * immediately reuse it afterward. If the close event were to be
910 * deferred, this immediate reuse would not be possible.
912 * The sop_free() callback routine may not raise new events, and thus,
913 * the state of 'sockevent_working' need not be checked or set here.
915 if (mask
& SEV_CLOSE
) {
916 assert(mask
== SEV_CLOSE
);
918 sockevent_fire(sock
, mask
);
924 * If we are currently processing a socket message, store the event for
925 * later. If not, this call is not coming from inside libsockevent,
926 * and we must handle the event immediately.
928 if (sockevent_working
) {
930 assert(mask
<= UCHAR_MAX
); /* sock_events field size check */
932 if (sock
->sock_events
== 0)
933 SIMPLEQ_INSERT_TAIL(&sockevent_pending
, sock
,
936 sock
->sock_events
|= mask
;
938 sockevent_working
= TRUE
;
940 sockevent_fire(sock
, mask
);
942 if (sockevent_has_events())
945 sockevent_working
= FALSE
;
950 * Set a pending error on the socket object, and wake up any suspended
951 * operations that are affected by this.
954 sockevent_set_error(struct sock
* sock
, int err
)
958 assert(sock
->sock_ops
!= NULL
);
960 /* If an error was set already, it will be overridden. */
961 sock
->sock_err
= err
;
963 sockevent_raise(sock
, SEV_BIND
| SEV_CONNECT
| SEV_SEND
| SEV_RECV
);
967 * Initialize timer-related data structures.
973 SLIST_INIT(&socktimer
);
975 init_timer(&sockevent_timer
);
979 * Check whether the given socket object has any suspended requests that have
980 * now expired. If so, cancel them. Also, if the socket object has any
981 * suspended requests with a timeout that has not yet expired, return the
982 * earliest (relative) timeout of all of them, or TMR_NEVER if no such requests
986 sockevent_expire(struct sock
* sock
, clock_t now
)
988 struct sockevent_proc
*spr
, **sprp
;
989 clock_t lowest
, left
;
993 * First handle the case that the socket is closed. In this case,
994 * there may be a linger timer, although the socket may also simply
995 * still be on the timer list because of a request that did not time
996 * out right before the socket was closed.
998 if (sock
->sock_flags
& SFL_CLOSING
) {
999 /* Was there a linger timer and has it expired? */
1000 if ((sock
->sock_opt
& SO_LINGER
) &&
1001 tmr_is_first(sock
->sock_linger
, now
)) {
1002 assert(sock
->sock_ops
->sop_close
!= NULL
);
1005 * Whatever happens next, we must now resume the
1006 * pending close operation, if it was not canceled
1007 * earlier. As before, we return OK rather than the
1008 * standardized EWOULDBLOCK, to ensure that the user
1009 * process knows the file descriptor has been closed.
1011 if ((spr
= sock
->sock_proc
) != NULL
) {
1012 assert(spr
->spr_event
== SEV_CLOSE
);
1013 assert(spr
->spr_next
== NULL
);
1015 sock
->sock_proc
= NULL
;
1017 sockdriver_reply_generic(&spr
->spr_call
, OK
);
1019 sockevent_proc_free(spr
);
1023 * Tell the socket driver that closing the socket is
1024 * now a bit more desired than the last time we asked.
1026 r
= sock
->sock_ops
->sop_close(sock
, TRUE
/*force*/);
1028 assert(r
== OK
|| r
== SUSPEND
);
1031 * The linger timer fires once. After that, the socket
1032 * driver is free to decide that it still will not
1033 * close the socket. If it does, do not fire the
1034 * linger timer again.
1037 sock
->sock_opt
&= ~SO_LINGER
;
1039 sockevent_free(sock
);
1046 * Then see if any send and/or receive requests have expired. Also see
1047 * if there are any send and/or receive requests left that have not yet
1048 * expired but do have a timeout, so that we can return the lowest of
1053 for (sprp
= &sock
->sock_proc
; (spr
= *sprp
) != NULL
; ) {
1054 /* Skip requests without a timeout. */
1055 if (spr
->spr_timer
== 0) {
1056 sprp
= &spr
->spr_next
;
1061 assert(spr
->spr_event
== SEV_SEND
||
1062 spr
->spr_event
== SEV_RECV
);
1065 * If the request has expired, cancel it and remove it from the
1066 * list. Otherwise, see if the request has the lowest number
1067 * of ticks until its timeout so far.
1069 if (tmr_is_first(spr
->spr_time
, now
)) {
1070 *sprp
= spr
->spr_next
;
1072 if (spr
->spr_event
== SEV_SEND
)
1073 sockevent_cancel_send(sock
, spr
, EWOULDBLOCK
);
1075 sockevent_cancel_recv(sock
, spr
, EWOULDBLOCK
);
1077 sockevent_proc_free(spr
);
1079 left
= spr
->spr_time
- now
;
1081 if (lowest
== TMR_NEVER
|| lowest
> left
)
1084 sprp
= &spr
->spr_next
;
1092 * The socket event alarm went off. Go through the set of socket objects with
1093 * timers, and see if any of their requests have now expired. Set a new alarm
1097 socktimer_expire(int arg __unused
)
1099 SLIST_HEAD(, sock
) oldtimer
;
1100 struct sock
*sock
, *tsock
;
1101 clock_t now
, lowest
, left
;
1105 * This function may or may not be called from a context where we are
1106 * already deferring events, so we have to cover both cases here.
1108 if ((working
= sockevent_working
) == FALSE
)
1109 sockevent_working
= TRUE
;
1111 /* Start a new list. */
1112 memcpy(&oldtimer
, &socktimer
, sizeof(oldtimer
));
1113 SLIST_INIT(&socktimer
);
1119 * Go through all sockets that have or had a request with a timeout,
1120 * canceling any expired requests and building a new list of sockets
1121 * that still have requests with timeouts as we go.
1123 SLIST_FOREACH_SAFE(sock
, &oldtimer
, sock_timer
, tsock
) {
1124 assert(sock
->sock_flags
& SFL_TIMER
);
1125 sock
->sock_flags
&= ~SFL_TIMER
;
1127 left
= sockevent_expire(sock
, now
);
1129 * The sock object may already have been deallocated now.
1130 * If 'next' is TMR_NEVER, do not touch 'sock' anymore.
1133 if (left
!= TMR_NEVER
) {
1134 if (lowest
== TMR_NEVER
|| lowest
> left
)
1137 SLIST_INSERT_HEAD(&socktimer
, sock
, sock_timer
);
1139 sock
->sock_flags
|= SFL_TIMER
;
1143 /* If there is a new lowest timeout at all, set a new timer. */
1144 if (lowest
!= TMR_NEVER
)
1145 set_timer(&sockevent_timer
, lowest
, socktimer_expire
, 0);
1148 /* If any new events were raised, process them now. */
1149 if (sockevent_has_events())
1152 sockevent_working
= FALSE
;
1157 * Set a timer for the given (relative) number of clock ticks, adding the
1158 * associated socket object to the set of socket objects with timers, if it was
1159 * not already in that set. Set a new alarm if necessary, and return the
1160 * absolute timeout for the timer. Since the timers list is maintained lazily,
1161 * the caller need not take the object off the set if the call was canceled
1162 * later; see also socktimer_del().
1165 socktimer_add(struct sock
* sock
, clock_t ticks
)
1170 * Relative time comparisons require that any two times are no more
1171 * than half the comparison space (clock_t, unsigned long) apart.
1173 assert(ticks
<= TMRDIFF_MAX
);
1175 /* If the socket was not already on the timers list, put it on. */
1176 if (!(sock
->sock_flags
& SFL_TIMER
)) {
1177 SLIST_INSERT_HEAD(&socktimer
, sock
, sock_timer
);
1179 sock
->sock_flags
|= SFL_TIMER
;
1183 * (Re)set the timer if either it was not running at all or this new
1184 * timeout will occur sooner than the currently scheduled alarm. Note
1185 * that setting a timer that was already set is allowed.
1189 if (!tmr_is_set(&sockevent_timer
) ||
1190 tmr_is_first(now
+ ticks
, tmr_exp_time(&sockevent_timer
)))
1191 set_timer(&sockevent_timer
, ticks
, socktimer_expire
, 0);
1193 /* Return the absolute timeout. */
1198 * Remove a socket object from the set of socket objects with timers. Since
1199 * the timer list is maintained lazily, this needs to be done only right before
1200 * the socket object is freed.
1203 socktimer_del(struct sock
* sock
)
1206 if (sock
->sock_flags
& SFL_TIMER
) {
1207 /* This macro is O(n). */
1208 SLIST_REMOVE(&socktimer
, sock
, sock
, sock_timer
);
1210 sock
->sock_flags
&= ~SFL_TIMER
;
1215 * Bind a socket to a local address.
1218 sockevent_bind(sockid_t id
, const struct sockaddr
* __restrict addr
,
1219 socklen_t addr_len
, endpoint_t user_endpt
,
1220 const struct sockdriver_call
* __restrict call
)
1225 if ((sock
= sockhash_get(id
)) == NULL
)
1228 if (sock
->sock_ops
->sop_bind
== NULL
)
1231 /* Binding a socket in listening mode is never supported. */
1232 if (sock
->sock_opt
& SO_ACCEPTCONN
)
1235 r
= sock
->sock_ops
->sop_bind(sock
, addr
, addr_len
, user_endpt
);
1241 sockevent_suspend(sock
, SEV_BIND
, call
, user_endpt
);
1248 * Connect a socket to a remote address.
1251 sockevent_connect(sockid_t id
, const struct sockaddr
* __restrict addr
,
1252 socklen_t addr_len
, endpoint_t user_endpt
,
1253 const struct sockdriver_call
* call
)
1255 struct sockdriver_call fakecall
;
1256 struct sockevent_proc
*spr
;
1260 if ((sock
= sockhash_get(id
)) == NULL
)
1263 if (sock
->sock_ops
->sop_connect
== NULL
)
1266 /* Connecting a socket in listening mode is never supported. */
1267 if (sock
->sock_opt
& SO_ACCEPTCONN
)
1271 * The upcoming connect call may fire an accept event for which the
1272 * handler may in turn fire a connect event on this socket. Since we
1273 * delay event processing until after processing calls, this would
1274 * create the problem that even if the connection is accepted right
1275 * away, non-blocking connect requests would return EINPROGRESS. For
1276 * UDS, this is undesirable behavior. To remedy this, we use a hack:
1277 * we temporarily suspend the connect even if non-blocking, then
1278 * process events, and then cancel the connect request again. If the
1279 * connection was accepted immediately, the cancellation will have no
1280 * effect, since the request has already been replied to. In order not
1281 * to violate libsockdriver rules with this hack, we fabricate a fake
1284 r
= sock
->sock_ops
->sop_connect(sock
, addr
, addr_len
, user_endpt
);
1287 if (call
!= NULL
|| sockevent_has_events()) {
1289 fakecall
.sc_endpt
= NONE
;
1294 assert(!sockevent_has_suspended(sock
,
1295 SEV_SEND
| SEV_RECV
));
1297 sockevent_suspend(sock
, SEV_CONNECT
, call
, user_endpt
);
1299 if (call
== &fakecall
) {
1300 /* Process any pending events first now. */
1304 * If the connect request has not been resumed
1305 * yet now, we must remove it from the queue
1306 * again, and return EINPROGRESS ourselves.
1307 * Otherwise, return OK or a pending error.
1309 spr
= sockevent_unsuspend(sock
, call
);
1311 sockevent_proc_free(spr
);
1314 } else if ((r
= sock
->sock_err
) != OK
)
1315 sock
->sock_err
= OK
;
1323 * A completed connection attempt also always implies that the
1324 * socket becomes writable. For convenience we enforce this
1325 * rule here, because it is easy to forget.
1327 sockevent_raise(sock
, SEV_SEND
);
1334 * Put a socket in listening mode.
1337 sockevent_listen(sockid_t id
, int backlog
)
1342 if ((sock
= sockhash_get(id
)) == NULL
)
1345 if (sock
->sock_ops
->sop_listen
== NULL
)
1349 * Perform a general adjustment on the backlog value, applying the
1350 * customary BSD "fudge factor" of 1.5x. Keep the value within bounds
1351 * though. POSIX imposes that a negative backlog value is equal to a
1352 * backlog value of zero. A backlog value of zero, in turn, may mean
1353 * anything; we take it to be one. POSIX also imposes that all socket
1354 * drivers accept up to at least SOMAXCONN connections on the queue.
1358 if (backlog
< SOMAXCONN
)
1359 backlog
+= 1 + ((unsigned int)backlog
>> 1);
1360 if (backlog
> SOMAXCONN
)
1361 backlog
= SOMAXCONN
;
1363 r
= sock
->sock_ops
->sop_listen(sock
, backlog
);
1366 * On success, the socket is now in listening mode. As part of that,
1367 * a select(2) ready-to-read condition now indicates that a connection
1368 * may be accepted on the socket, rather than that data may be read.
1369 * Since libsockevent is responsible for this distinction, we keep
1370 * track of the listening mode at this level. Conveniently, there is a
1371 * socket option for this, which we support out of the box as a result.
1374 sock
->sock_opt
|= SO_ACCEPTCONN
;
1377 * For the extremely unlikely case that right after the socket
1378 * is put into listening mode, it has a connection ready to
1379 * accept, we retest blocked ready-to-read select queries now.
1381 sockevent_raise(sock
, SEV_ACCEPT
);
1388 * Accept a connection on a listening socket, creating a new socket.
1391 sockevent_accept(sockid_t id
, struct sockaddr
* __restrict addr
,
1392 socklen_t
* __restrict addr_len
, endpoint_t user_endpt
,
1393 const struct sockdriver_call
* __restrict call
)
1395 struct sock
*sock
, *newsock
;
1398 if ((sock
= sockhash_get(id
)) == NULL
)
1401 if (sock
->sock_ops
->sop_accept
== NULL
)
1405 * Attempt to accept a connection. The socket driver is responsible
1406 * for allocating a sock object (and identifier) on success. It may
1407 * already have done so before, in which case it should leave newsock
1408 * filled with NULL; otherwise, the returned sock object is cloned from
1409 * the listening socket. The socket driver is also responsible for
1410 * failing the call if the socket is not in listening mode, because it
1411 * must specify the error to return: EOPNOTSUPP or EINVAL.
1415 if ((r
= sock
->sock_ops
->sop_accept(sock
, addr
, addr_len
, user_endpt
,
1416 &newsock
)) == SUSPEND
) {
1417 assert(sock
->sock_opt
& SO_ACCEPTCONN
);
1422 sockevent_suspend(sock
, SEV_ACCEPT
, call
, user_endpt
);
1428 sockevent_accepted(sock
, newsock
, r
);
1434 * Send regular and/or control data.
1437 sockevent_send(sockid_t id
, const struct sockdriver_data
* __restrict data
,
1438 size_t len
, const struct sockdriver_data
* __restrict ctl_data
,
1439 socklen_t ctl_len
, const struct sockaddr
* __restrict addr
,
1440 socklen_t addr_len
, endpoint_t user_endpt
, int flags
,
1441 const struct sockdriver_call
* __restrict call
)
1449 if ((sock
= sockhash_get(id
)) == NULL
)
1453 * The order of the following checks is not necessarily fixed, and may
1454 * be changed later. As far as applicable, they should match the order
1455 * of the checks during call resumption, though.
1457 if ((r
= sock
->sock_err
) != OK
) {
1458 sock
->sock_err
= OK
;
1463 if (sock
->sock_flags
& SFL_SHUT_WR
) {
1464 sockevent_sigpipe(sock
, user_endpt
, flags
);
1470 * Translate the sticky SO_DONTROUTE option to a per-request
1471 * MSG_DONTROUTE flag. This achieves two purposes: socket drivers have
1472 * to check only one flag, and socket drivers that do not support the
1473 * flag will fail send requests in a consistent way.
1475 if (sock
->sock_opt
& SO_DONTROUTE
)
1476 flags
|= MSG_DONTROUTE
;
1479 * Check if this is a valid send request as far as the socket driver is
1480 * concerned. We do this separately from sop_send for the reason that
1481 * this send request may immediately be queued behind other pending
1482 * send requests (without a call to sop_send), which means even invalid
1483 * requests would be queued and not return failure until much later.
1485 if (sock
->sock_ops
->sop_pre_send
!= NULL
&&
1486 (r
= sock
->sock_ops
->sop_pre_send(sock
, len
, ctl_len
, addr
,
1487 addr_len
, user_endpt
,
1488 flags
& ~(MSG_DONTWAIT
| MSG_NOSIGNAL
))) != OK
)
1491 if (sock
->sock_ops
->sop_send
== NULL
)
1498 * Sending out-of-band data is treated differently from regular data:
1500 * - sop_send is called immediately, even if a partial non-OOB send
1501 * operation is currently suspended (TODO: it may have to be aborted
1502 * in order to maintain atomicity guarantees - that should be easy);
1503 * - sop_send must not return SUSPEND; instead, if it cannot process
1504 * the OOB data immediately, it must return an appropriate error;
1505 * - the send low watermark is ignored.
1507 * Given that none of the current socket drivers support OOB data at
1508 * all, more sophisticated approaches would have no added value now.
1510 if (flags
& MSG_OOB
) {
1511 r
= sock
->sock_ops
->sop_send(sock
, data
, len
, &off
, ctl_data
,
1512 ctl_len
, &ctl_off
, addr
, addr_len
, user_endpt
, flags
, 0);
1515 panic("libsockevent: MSG_OOB send calls may not be "
1518 return (r
== OK
) ? (int)off
: r
;
1522 * Only call the actual sop_send function now if no other send calls
1523 * are suspended already.
1525 * Call sop_send with 'min' set to the minimum of the request size and
1526 * the socket's send low water mark, but only if the call is non-
1527 * blocking. For stream-oriented sockets, this should have the effect
1528 * that non-blocking calls fail with EWOULDBLOCK if not at least that
1529 * much can be sent immediately. For consistency, we choose to apply
1530 * the same threshold to blocking calls. For datagram-oriented
1531 * sockets, the minimum is not a factor to be considered.
1533 if (!sockevent_has_suspended(sock
, SEV_SEND
)) {
1534 min
= sock
->sock_slowat
;
1538 r
= sock
->sock_ops
->sop_send(sock
, data
, len
, &off
, ctl_data
,
1539 ctl_len
, &ctl_off
, addr
, addr_len
, user_endpt
, flags
, min
);
1545 * We do not store the target's address on suspension, because
1546 * that would add significantly to the per-process suspension
1547 * state. As a result, we disallow socket drivers from
1548 * suspending send calls with addresses, because we would no
1549 * longer have the address for proper call resumption.
1550 * However, we do not know here whether the socket is in
1551 * connection-oriented mode; if it is, the address is to be
1552 * ignored altogether. Therefore, there is no test on 'addr'
1553 * here. Resumed calls will get a NULL address pointer, and
1554 * the socket driver is expected to do the right thing.
1558 * For non-blocking socket calls, return an error only if we
1559 * were not able to send anything at all. If only control data
1560 * were sent, the return value is therefore zero.
1563 if (sock
->sock_stimeo
!= 0) {
1565 time
= socktimer_add(sock
, sock
->sock_stimeo
);
1571 sockevent_suspend_data(sock
, SEV_SEND
, timer
, call
,
1572 user_endpt
, data
, len
, off
, ctl_data
, ctl_len
,
1573 ctl_off
, flags
, 0, time
);
1575 r
= (off
> 0 || ctl_off
> 0) ? OK
: EWOULDBLOCK
;
1576 } else if (r
== EPIPE
)
1577 sockevent_sigpipe(sock
, user_endpt
, flags
);
1579 return (r
== OK
) ? (int)off
: r
;
1583 * The inner part of the receive request handler. An error returned from here
1584 * may be overridden by an error pending on the socket, although data returned
1585 * from here trumps such pending errors.
1588 sockevent_recv_inner(struct sock
* sock
,
1589 const struct sockdriver_data
* __restrict data
,
1590 size_t len
, size_t * __restrict off
,
1591 const struct sockdriver_data
* __restrict ctl_data
,
1592 socklen_t ctl_len
, socklen_t
* __restrict ctl_off
,
1593 struct sockaddr
* __restrict addr
,
1594 socklen_t
* __restrict addr_len
, endpoint_t user_endpt
,
1595 int * __restrict flags
, const struct sockdriver_call
* __restrict call
)
1599 int r
, oob
, inflags
, timer
;
1602 * Check if this is a valid receive request as far as the socket driver
1603 * is concerned. We do this separately from sop_recv for the reason
1604 * that this receive request may immediately be queued behind other
1605 * pending receive requests (without a call to sop_recv), which means
1606 * even invalid requests would be queued and not return failure until
1612 if (sock
->sock_ops
->sop_pre_recv
!= NULL
&&
1613 (r
= sock
->sock_ops
->sop_pre_recv(sock
, user_endpt
,
1614 inflags
& ~(MSG_DONTWAIT
| MSG_NOSIGNAL
))) != OK
)
1618 * The order of the following checks is not necessarily fixed, and may
1619 * be changed later. As far as applicable, they should match the order
1620 * of the checks during call resumption, though.
1622 if (sock
->sock_flags
& SFL_SHUT_RD
)
1623 return SOCKEVENT_EOF
;
1625 if (sock
->sock_ops
->sop_recv
== NULL
)
1629 * Receiving out-of-band data is treated differently from regular data:
1631 * - sop_recv is called immediately, even if a partial non-OOB receive
1632 * operation is currently suspended (TODO: it may have to be aborted
1633 * in order to maintain atomicity guarantees - that should be easy);
1634 * - sop_recv must not return SUSPEND; instead, if it cannot return any
1635 * the OOB data immediately, it must return an appropriate error;
1636 * - the receive low watermark is ignored.
1638 * Given that none of the current socket drivers support OOB data at
1639 * all, more sophisticated approaches would have no added value now.
1641 oob
= (inflags
& MSG_OOB
);
1643 if (oob
&& (sock
->sock_opt
& SO_OOBINLINE
))
1647 * Only call the actual sop_recv function now if no other receive
1648 * calls are suspended already.
1650 * Call sop_recv with 'min' set to the minimum of the request size and
1651 * the socket's socket's low water mark, unless there is a pending
1652 * error. As a result, blocking calls will block, and non-blocking
1653 * calls will yield EWOULDBLOCK, if at least that much can be received,
1654 * unless another condition (EOF or that pending error) prevents more
1655 * from being received anyway. For datagram-oriented sockets, the
1656 * minimum is not a factor to be considered.
1658 if (oob
|| !sockevent_has_suspended(sock
, SEV_RECV
)) {
1659 if (!oob
&& sock
->sock_err
== OK
) {
1660 min
= sock
->sock_rlowat
;
1664 min
= 0; /* receive even no-data segments */
1666 r
= sock
->sock_ops
->sop_recv(sock
, data
, len
, off
, ctl_data
,
1667 ctl_len
, ctl_off
, addr
, addr_len
, user_endpt
, inflags
, min
,
1672 assert(r
<= 0 || r
== SOCKEVENT_EOF
);
1676 panic("libsockevent: MSG_OOB receive calls may not be "
1680 * For non-blocking socket calls, return EWOULDBLOCK only if we
1681 * did not receive anything at all. If only control data were
1682 * received, the return value is therefore zero. Suspension
1683 * implies that there is nothing to read. For the purpose of
1684 * the calling wrapper function, never suspend a call when
1685 * there is a pending error.
1687 if (call
!= NULL
&& sock
->sock_err
== OK
) {
1688 if (sock
->sock_rtimeo
!= 0) {
1690 time
= socktimer_add(sock
, sock
->sock_rtimeo
);
1696 sockevent_suspend_data(sock
, SEV_RECV
, timer
, call
,
1697 user_endpt
, data
, len
, *off
, ctl_data
,
1698 ctl_len
, *ctl_off
, inflags
, *flags
, time
);
1707 * Receive regular and/or control data.
1710 sockevent_recv(sockid_t id
, const struct sockdriver_data
* __restrict data
,
1711 size_t len
, const struct sockdriver_data
* __restrict ctl_data
,
1712 socklen_t
* __restrict ctl_len
, struct sockaddr
* __restrict addr
,
1713 socklen_t
* __restrict addr_len
, endpoint_t user_endpt
,
1714 int * __restrict flags
, const struct sockdriver_call
* __restrict call
)
1718 socklen_t ctl_inlen
;
1721 if ((sock
= sockhash_get(id
)) == NULL
)
1725 * This function is a wrapper around the actual receive functionality.
1726 * The reason for this is that receiving data should take precedence
1727 * over a pending socket error, while a pending socket error should
1728 * take precedence over both regular errors as well as EOF. In other
1729 * words: if there is a pending error, we must try to receive anything
1730 * at all; if receiving does not work, we must fail the call with the
1731 * pending error. However, until we call the receive callback, we have
1732 * no way of telling whether any data can be received. So we must try
1733 * that before we can decide whether to return a pending error.
1736 ctl_inlen
= *ctl_len
;
1740 * Attempt to perform the actual receive call.
1742 r
= sockevent_recv_inner(sock
, data
, len
, &off
, ctl_data
, ctl_inlen
,
1743 ctl_len
, addr
, addr_len
, user_endpt
, flags
, call
);
1746 * If the receive request succeeded, or it failed but yielded a partial
1747 * result, then return the (partal) result. Otherwise, if an error is
1748 * pending, return that error. Otherwise, return either a regular
1749 * error or 0 for EOF.
1751 if (r
== OK
|| (r
!= SUSPEND
&& (off
> 0 || *ctl_len
> 0)))
1753 else if (sock
->sock_err
!= OK
) {
1754 assert(r
!= SUSPEND
);
1758 sock
->sock_err
= OK
;
1759 } else if (r
== SOCKEVENT_EOF
)
1766 * Process an I/O control call.
1769 sockevent_ioctl(sockid_t id
, unsigned long request
,
1770 const struct sockdriver_data
* __restrict data
, endpoint_t user_endpt
,
1771 const struct sockdriver_call
* __restrict call __unused
)
1777 if ((sock
= sockhash_get(id
)) == NULL
)
1780 /* We handle a very small subset of generic IOCTLs here. */
1784 if (!(sock
->sock_flags
& SFL_SHUT_RD
) &&
1785 sock
->sock_ops
->sop_test_recv
!= NULL
)
1786 (void)sock
->sock_ops
->sop_test_recv(sock
, 0, &size
);
1790 return sockdriver_copyout(data
, 0, &val
, sizeof(val
));
1793 if (sock
->sock_ops
->sop_ioctl
== NULL
)
1796 r
= sock
->sock_ops
->sop_ioctl(sock
, request
, data
, user_endpt
);
1799 * Suspending IOCTL requests is not currently supported by this
1800 * library, even though the VFS protocol and libsockdriver do support
1801 * it. The reason is that IOCTLs do not match our proces suspension
1802 * model: they could be neither queued nor repeated. For now, it seems
1803 * that this feature is not needed by the socket drivers either. Thus,
1804 * even though there are possible solutions, we defer implementing them
1805 * until we know what exactly is needed.
1808 panic("libsockevent: socket driver suspended IOCTL 0x%lx",
1815 * Set socket options.
1818 sockevent_setsockopt(sockid_t id
, int level
, int name
,
1819 const struct sockdriver_data
* data
, socklen_t len
)
1822 struct linger linger
;
1824 clock_t secs
, ticks
;
1827 if ((sock
= sockhash_get(id
)) == NULL
)
1830 if (level
== SOL_SOCKET
) {
1832 * Handle a subset of the socket-level options here. For most
1833 * of them, this means that the socket driver itself need not
1834 * handle changing or returning the options, but still needs to
1835 * implement the correct behavior based on them where needed.
1836 * A few of them are handled exclusively in this library:
1837 * SO_ACCEPTCONN, SO_NOSIGPIPE, SO_ERROR, SO_TYPE, SO_LINGER,
1838 * SO_SNDLOWAT, SO_RCVLOWAT, SO_SNDTIMEO, and SO_RCVTIMEO.
1839 * The SO_USELOOPBACK option is explicitly absent, as it is
1840 * valid for routing sockets only and is set by default there.
1853 * Simple on-off options. Changing them does not
1854 * involve the socket driver.
1856 if ((r
= sockdriver_copyin_opt(data
, &val
, sizeof(val
),
1861 sock
->sock_opt
|= (unsigned int)name
;
1863 sock
->sock_opt
&= ~(unsigned int)name
;
1866 * In priciple these on-off options are maintained in
1867 * this library, but some socket drivers may need to
1868 * apply the options elsewhere, so we notify them that
1869 * something has changed. Using the sop_setsockopt
1870 * callback would be inconvenient for this for two
1871 * reasons: multiple value copy-ins and default errors.
1873 if (sock
->sock_ops
->sop_setsockmask
!= NULL
)
1874 sock
->sock_ops
->sop_setsockmask(sock
,
1878 * The inlining of OOB data may make new data available
1879 * through regular receive calls. Thus, see if we can
1880 * wake up any suspended receive calls now.
1882 if (name
== SO_OOBINLINE
&& val
)
1883 sockevent_raise(sock
, SEV_RECV
);
1888 /* The only on-off option with an associated value. */
1889 if ((r
= sockdriver_copyin_opt(data
, &linger
,
1890 sizeof(linger
), len
)) != OK
)
1893 if (linger
.l_onoff
) {
1894 if (linger
.l_linger
< 0)
1896 /* EDOM is the closest applicable error.. */
1897 secs
= (clock_t)linger
.l_linger
;
1898 if (secs
>= TMRDIFF_MAX
/ sys_hz())
1901 sock
->sock_opt
|= SO_LINGER
;
1902 sock
->sock_linger
= secs
* sys_hz();
1904 sock
->sock_opt
&= ~SO_LINGER
;
1905 sock
->sock_linger
= 0;
1912 if ((r
= sockdriver_copyin_opt(data
, &val
, sizeof(val
),
1920 * Setting these values may allow suspended operations
1921 * (send, recv, select) to be resumed, so recheck.
1923 if (name
== SO_SNDLOWAT
) {
1924 sock
->sock_slowat
= (size_t)val
;
1926 sockevent_raise(sock
, SEV_SEND
);
1928 sock
->sock_rlowat
= (size_t)val
;
1930 sockevent_raise(sock
, SEV_RECV
);
1937 if ((r
= sockdriver_copyin_opt(data
, &tv
, sizeof(tv
),
1941 if (tv
.tv_sec
< 0 || tv
.tv_usec
< 0 ||
1942 (unsigned long)tv
.tv_usec
>= US
)
1944 if (tv
.tv_sec
>= TMRDIFF_MAX
/ sys_hz())
1947 ticks
= tv
.tv_sec
* sys_hz() +
1948 (tv
.tv_usec
* sys_hz() + US
- 1) / US
;
1950 if (name
== SO_SNDTIMEO
)
1951 sock
->sock_stimeo
= ticks
;
1953 sock
->sock_rtimeo
= ticks
;
1956 * The timeouts for any calls already in progress for
1957 * this socket are left as is.
1964 /* These options may be retrieved but not set. */
1969 * The remaining options either cannot be handled in a
1970 * generic way, or are not recognized altogether. Pass
1971 * them to the socket driver, which should handle what
1972 * it knows and reject the rest.
1978 if (sock
->sock_ops
->sop_setsockopt
== NULL
)
1982 * The socket driver must return ENOPROTOOPT for all options it does
1985 return sock
->sock_ops
->sop_setsockopt(sock
, level
, name
, data
, len
);
1989 * Retrieve socket options.
1992 sockevent_getsockopt(sockid_t id
, int level
, int name
,
1993 const struct sockdriver_data
* __restrict data
,
1994 socklen_t
* __restrict len
)
1997 struct linger linger
;
2002 if ((sock
= sockhash_get(id
)) == NULL
)
2005 if (level
== SOL_SOCKET
) {
2007 * As with setting, handle a subset of the socket-level options
2008 * here. The rest is to be taken care of by the socket driver.
2021 val
= !!(sock
->sock_opt
& (unsigned int)name
);
2023 return sockdriver_copyout_opt(data
, &val
, sizeof(val
),
2027 linger
.l_onoff
= !!(sock
->sock_opt
& SO_LINGER
);
2028 linger
.l_linger
= sock
->sock_linger
/ sys_hz();
2030 return sockdriver_copyout_opt(data
, &linger
,
2031 sizeof(linger
), len
);
2034 if ((val
= -sock
->sock_err
) != OK
)
2035 sock
->sock_err
= OK
;
2037 return sockdriver_copyout_opt(data
, &val
, sizeof(val
),
2041 val
= sock
->sock_type
;
2043 return sockdriver_copyout_opt(data
, &val
, sizeof(val
),
2047 val
= (int)sock
->sock_slowat
;
2049 return sockdriver_copyout_opt(data
, &val
, sizeof(val
),
2053 val
= (int)sock
->sock_rlowat
;
2055 return sockdriver_copyout_opt(data
, &val
, sizeof(val
),
2060 if (name
== SO_SNDTIMEO
)
2061 ticks
= sock
->sock_stimeo
;
2063 ticks
= sock
->sock_rtimeo
;
2065 tv
.tv_sec
= ticks
/ sys_hz();
2066 tv
.tv_usec
= (ticks
% sys_hz()) * US
/ sys_hz();
2068 return sockdriver_copyout_opt(data
, &tv
, sizeof(tv
),
2076 if (sock
->sock_ops
->sop_getsockopt
== NULL
)
2080 * The socket driver must return ENOPROTOOPT for all options it does
2083 return sock
->sock_ops
->sop_getsockopt(sock
, level
, name
, data
, len
);
2087 * Retrieve a socket's local address.
2090 sockevent_getsockname(sockid_t id
, struct sockaddr
* __restrict addr
,
2091 socklen_t
* __restrict addr_len
)
2095 if ((sock
= sockhash_get(id
)) == NULL
)
2098 if (sock
->sock_ops
->sop_getsockname
== NULL
)
2101 return sock
->sock_ops
->sop_getsockname(sock
, addr
, addr_len
);
2105 * Retrieve a socket's remote address.
2108 sockevent_getpeername(sockid_t id
, struct sockaddr
* __restrict addr
,
2109 socklen_t
* __restrict addr_len
)
2113 if ((sock
= sockhash_get(id
)) == NULL
)
2116 /* Listening-mode sockets cannot possibly have a peer address. */
2117 if (sock
->sock_opt
& SO_ACCEPTCONN
)
2120 if (sock
->sock_ops
->sop_getpeername
== NULL
)
2123 return sock
->sock_ops
->sop_getpeername(sock
, addr
, addr_len
);
2127 * Mark the socket object as shut down for sending and/or receiving. The flags
2128 * parameter may be a bitwise-OR'ed combination of SFL_SHUT_RD and SFL_SHUT_WR.
2129 * This function will wake up any suspended requests affected by this change,
2130 * but it will not invoke the sop_shutdown() callback function on the socket.
2131 * The function may in fact be called from sop_shutdown() before completion to
2132 * mark the socket as shut down as reflected by sockevent_is_shutdown().
2135 sockevent_set_shutdown(struct sock
* sock
, unsigned int flags
)
2139 assert(sock
->sock_ops
!= NULL
);
2140 assert(!(flags
& ~(SFL_SHUT_RD
| SFL_SHUT_WR
)));
2142 /* Look at the newly set flags only. */
2143 flags
&= ~(unsigned int)sock
->sock_flags
;
2146 sock
->sock_flags
|= flags
;
2149 * Wake up any blocked calls that are affected by the shutdown.
2150 * Shutting down listening sockets causes ongoing accept calls
2154 if (flags
& SFL_SHUT_RD
)
2156 if (flags
& SFL_SHUT_WR
)
2158 if (sock
->sock_opt
& SO_ACCEPTCONN
)
2162 sockevent_raise(sock
, mask
);
2167 * Shut down socket send and receive operations.
2170 sockevent_shutdown(sockid_t id
, int how
)
2176 if ((sock
= sockhash_get(id
)) == NULL
)
2179 /* Convert the request to a set of flags. */
2181 if (how
== SHUT_RD
|| how
== SHUT_RDWR
)
2182 flags
|= SFL_SHUT_RD
;
2183 if (how
== SHUT_WR
|| how
== SHUT_RDWR
)
2184 flags
|= SFL_SHUT_WR
;
2186 if (sock
->sock_ops
->sop_shutdown
!= NULL
)
2187 r
= sock
->sock_ops
->sop_shutdown(sock
, flags
);
2191 /* On success, update our internal state as well. */
2193 sockevent_set_shutdown(sock
, flags
);
2202 sockevent_close(sockid_t id
, const struct sockdriver_call
* call
)
2207 if ((sock
= sockhash_get(id
)) == NULL
)
2210 assert(sock
->sock_proc
== NULL
);
2211 sock
->sock_select
.ss_endpt
= NONE
;
2214 * There are several scenarios when it comes to closing sockets. First
2215 * of all, we never actually force the socket driver to close a socket.
2216 * The driver may always suspend the close call and take as long as it
2217 * wants. After a suspension, it signals its completion of the close
2218 * through the SEV_CLOSE socket event.
2220 * With that said, we offer two levels of urgency regarding the close
2221 * request: regular and forced. The former allows for a graceful
2222 * close; the latter urges the socket driver to close the socket as
2223 * soon as possible. A socket that has been requested to be closed
2224 * gracefully can, as long as it is still open (i.e., no SEV_CLOSE was
2225 * fired yet), later be requested to be closed forcefully. This is how
2226 * SO_LINGER with a nonzero timeout is implemented. If SO_LINGER is
2227 * set with a zero timeout, the socket is force-closed immediately.
2228 * Finally, if SO_LINGER is not set, the socket will be closed normally
2229 * and never be forced--akin to SO_LINGER with an infinite timeout.
2231 * The return value of the caller's close(2) may only ever be either
2232 * OK or EINPROGRESS, to ensure that the caller knows that the file
2233 * descriptor is freed up, as per Austin Group Defect #529. In fact,
2234 * EINPROGRESS is to be returned only on signal interruption (i.e.,
2235 * cancel). For that reason, this function only ever returns OK.
2237 force
= ((sock
->sock_opt
& SO_LINGER
) && sock
->sock_linger
== 0);
2239 if (sock
->sock_ops
->sop_close
!= NULL
)
2240 r
= sock
->sock_ops
->sop_close(sock
, force
);
2244 assert(r
== OK
|| r
== SUSPEND
);
2247 sock
->sock_flags
|= SFL_CLOSING
;
2250 * If we were requested to force-close the socket immediately,
2251 * but the socket driver needs more time anyway, then tell the
2252 * caller that the socket was closed right away.
2258 * If we are to force-close the socket only after a specific
2259 * linger timeout, set the timer for that now, even if the call
2260 * is non-blocking. This also means that we cannot associate
2261 * the linger timeout with the close call. Instead, we convert
2262 * the sock_linger value from a (relative) duration to an
2263 * (absolute) timeout time, and use the SFL_CLOSING flag (along
2264 * with SFL_TIMER) to tell the difference. Since the socket is
2265 * otherwise unreachable from userland at this point, the
2266 * conversion is never visible in any way.
2268 * The socket may already be in the timers list, so we must
2269 * always check the SO_LINGER flag before checking sock_linger.
2271 * If SO_LINGER is not set, we must never suspend the call.
2273 if (sock
->sock_opt
& SO_LINGER
) {
2275 socktimer_add(sock
, sock
->sock_linger
);
2280 * A non-blocking close is completed asynchronously. The
2281 * caller is not told about this with EWOULDBLOCK as usual, for
2282 * the reasons mentioned above.
2285 sockevent_suspend(sock
, SEV_CLOSE
, call
, NONE
);
2289 sockevent_free(sock
);
2295 * Cancel a suspended send request.
2298 sockevent_cancel_send(struct sock
* sock
, struct sockevent_proc
* spr
, int err
)
2303 * If any regular or control data were sent, return the number of data
2304 * bytes sent--possibly zero. Otherwise return the given error code.
2306 if (spr
->spr_dataoff
> 0 || spr
->spr_ctloff
> 0)
2307 r
= (int)spr
->spr_dataoff
;
2311 sockdriver_reply_generic(&spr
->spr_call
, r
);
2314 * In extremely rare circumstances, one send may be queued behind
2315 * another send even though the former can actually be sent on the
2316 * socket right away. For this reason, we retry sending when canceling
2317 * a send. We need to do this only when the first send in the queue
2318 * was canceled, but multiple blocked sends on a single socket should
2321 sockevent_raise(sock
, SEV_SEND
);
2325 * Cancel a suspended receive request.
2328 sockevent_cancel_recv(struct sock
* sock
, struct sockevent_proc
* spr
, int err
)
2333 * If any regular or control data were received, return the number of
2334 * data bytes received--possibly zero. Otherwise return the given
2337 if (spr
->spr_dataoff
> 0 || spr
->spr_ctloff
> 0)
2338 r
= (int)spr
->spr_dataoff
;
2343 * Also return any flags set for the data received so far, e.g.
2344 * MSG_CTRUNC. Do not return an address: receive calls on unconnected
2345 * sockets must never block after receiving some data--instead, they
2346 * are supposed to return MSG_TRUNC if not all data were copied out.
2348 sockdriver_reply_recv(&spr
->spr_call
, r
, spr
->spr_ctloff
, NULL
, 0,
2352 * The same story as for sends (see above) applies to receives,
2353 * although this case should be even more rare in practice.
2355 sockevent_raise(sock
, SEV_RECV
);
2359 * Cancel a previous request that may currently be suspended. The cancel
2360 * operation itself does not have a reply. Instead, if the given request was
2361 * found to be suspended, that request must be aborted and an appropriate reply
2362 * must be sent for the request. If no matching request was found, no reply
2363 * must be sent at all.
2366 sockevent_cancel(sockid_t id
, const struct sockdriver_call
* call
)
2368 struct sockevent_proc
*spr
;
2372 * Due to asynchronous close(2) operations, not even the sock object
2373 * may be found. If this (entirely legitimate) case, do not send any
2376 if ((sock
= sockhash_get(id
)) == NULL
)
2380 * The request may already have completed by the time we receive the
2381 * cancel request, in which case we can not find it. In this (entirely
2382 * legitimate) case, do not send any reply.
2384 if ((spr
= sockevent_unsuspend(sock
, call
)) == NULL
)
2388 * We found the operation. Cancel it according to its call type.
2389 * Then, once fully done with it, free the suspension data structure.
2391 * Note that we have to use the call structure from the suspension data
2392 * structure rather than the given 'call' pointer: only the former
2393 * includes all the information necessary to resume the request!
2395 switch (spr
->spr_event
) {
2398 assert(spr
->spr_call
.sc_endpt
!= NONE
);
2400 sockdriver_reply_generic(&spr
->spr_call
, EINTR
);
2405 sockdriver_reply_accept(&spr
->spr_call
, EINTR
, NULL
, 0);
2410 sockevent_cancel_send(sock
, spr
, EINTR
);
2415 sockevent_cancel_recv(sock
, spr
, EINTR
);
2421 * Return EINPROGRESS rather than EINTR, so that the user
2422 * process can tell from the close(2) result that the file
2423 * descriptor has in fact been closed.
2425 sockdriver_reply_generic(&spr
->spr_call
, EINPROGRESS
);
2428 * Do not free the sock object here: the socket driver will
2429 * complete the close in the background, and fire SEV_CLOSE
2430 * once it is done. Only then is the sock object freed.
2435 panic("libsockevent: process suspended on unknown event 0x%x",
2439 sockevent_proc_free(spr
);
2443 * Process a select request.
2446 sockevent_select(sockid_t id
, unsigned int ops
,
2447 const struct sockdriver_select
* sel
)
2450 unsigned int r
, notify
;
2452 if ((sock
= sockhash_get(id
)) == NULL
)
2455 notify
= (ops
& SDEV_NOTIFY
);
2456 ops
&= (SDEV_OP_RD
| SDEV_OP_WR
| SDEV_OP_ERR
);
2459 * See if any of the requested select operations can be satisfied
2462 r
= sockevent_test_select(sock
, ops
);
2465 * If select operations were pending, the new results must not indicate
2466 * that any of those were satisfied, as that would indicate an internal
2467 * logic error: the socket driver is supposed to update its state
2468 * proactively, and thus, discovering that things have changed here is
2469 * not something that should ever happen.
2471 assert(!(sock
->sock_selops
& r
));
2474 * If any select operations are not satisfied immediately, and we are
2475 * asked to notify the caller when they are satisfied later, save them
2476 * for later retesting.
2480 if (notify
&& ops
!= 0) {
2482 * For now, we support only one caller when it comes to select
2483 * queries: VFS. If we want to support a networked file system
2484 * (or so) directly calling select as well, this library will
2485 * have to be extended accordingly (should not be too hard).
2487 if (sock
->sock_select
.ss_endpt
!= NONE
) {
2488 if (sock
->sock_select
.ss_endpt
!= sel
->ss_endpt
) {
2489 printf("libsockevent: no support for multiple "
2490 "select callers yet\n");
2496 * If a select query was already pending for this
2497 * caller, we must simply merge in the new operations.
2499 sock
->sock_selops
|= ops
;
2501 assert(sel
->ss_endpt
!= NONE
);
2503 sock
->sock_select
= *sel
;
2504 sock
->sock_selops
= ops
;
2512 * An alarm has triggered. Expire any timers. Socket drivers that do not pass
2513 * clock notification messages to libsockevent must call expire_timers(3)
2514 * themselves instead.
2517 sockevent_alarm(clock_t now
)
2523 static const struct sockdriver sockevent_tab
= {
2524 .sdr_socket
= sockevent_socket
,
2525 .sdr_socketpair
= sockevent_socketpair
,
2526 .sdr_bind
= sockevent_bind
,
2527 .sdr_connect
= sockevent_connect
,
2528 .sdr_listen
= sockevent_listen
,
2529 .sdr_accept
= sockevent_accept
,
2530 .sdr_send
= sockevent_send
,
2531 .sdr_recv
= sockevent_recv
,
2532 .sdr_ioctl
= sockevent_ioctl
,
2533 .sdr_setsockopt
= sockevent_setsockopt
,
2534 .sdr_getsockopt
= sockevent_getsockopt
,
2535 .sdr_getsockname
= sockevent_getsockname
,
2536 .sdr_getpeername
= sockevent_getpeername
,
2537 .sdr_shutdown
= sockevent_shutdown
,
2538 .sdr_close
= sockevent_close
,
2539 .sdr_cancel
= sockevent_cancel
,
2540 .sdr_select
= sockevent_select
,
2541 .sdr_alarm
= sockevent_alarm
2545 * Initialize the socket event library.
2548 sockevent_init(sockevent_socket_cb_t socket_cb
)
2555 sockevent_proc_init();
2557 SIMPLEQ_INIT(&sockevent_pending
);
2559 assert(socket_cb
!= NULL
);
2560 sockevent_socket_cb
= socket_cb
;
2562 /* Announce we are up. */
2563 sockdriver_announce();
2565 sockevent_working
= FALSE
;
2569 * Process a socket driver request message.
2572 sockevent_process(const message
* m_ptr
, int ipc_status
)
2575 /* Block events until after we have processed the request. */
2576 assert(!sockevent_working
);
2577 sockevent_working
= TRUE
;
2579 /* Actually process the request. */
2580 sockdriver_process(&sockevent_tab
, m_ptr
, ipc_status
);
2583 * If any events were fired while processing the request, they will
2584 * have been queued for later. Go through them now.
2586 if (sockevent_has_events())
2589 sockevent_working
= FALSE
;