Remove building with NOCRYPTO option
[minix3.git] / minix / lib / libsockevent / sockevent.c
blob31dbf2574f214e5e8bfd0f76b8b92e0d40d8ca9a
1 /* Socket event dispatching library - by D.C. van Moolenbroek */
3 #include <minix/drivers.h>
4 #include <minix/sockdriver.h>
5 #include <minix/sockevent.h>
6 #include <sys/ioctl.h>
8 #include "sockevent_proc.h"
10 #define US 1000000UL /* microseconds per second */
12 #define SOCKHASH_SLOTS 256 /* # slots in ID-to-sock hash table */
14 static SLIST_HEAD(, sock) sockhash[SOCKHASH_SLOTS];
16 static SLIST_HEAD(, sock) socktimer;
18 static minix_timer_t sockevent_timer;
20 static SIMPLEQ_HEAD(, sock) sockevent_pending;
22 static sockevent_socket_cb_t sockevent_socket_cb = NULL;
24 static int sockevent_working;
26 static void socktimer_del(struct sock * sock);
27 static void sockevent_cancel_send(struct sock * sock,
28 struct sockevent_proc * spr, int err);
29 static void sockevent_cancel_recv(struct sock * sock,
30 struct sockevent_proc * spr, int err);
33 * Initialize the hash table of sock objects.
35 static void
36 sockhash_init(void)
38 unsigned int slot;
40 for (slot = 0; slot < __arraycount(sockhash); slot++)
41 SLIST_INIT(&sockhash[slot]);
45 * Given a socket identifier, return a hash table slot number.
47 static unsigned int
48 sockhash_slot(sockid_t id)
52 * The idea of the shift is that a socket driver may offer multiple
53 * classes of sockets, and put the class in the higher bits. The shift
54 * aims to prevent that all classes' first sockets end up in the same
55 * hash slot.
57 return (id + (id >> 16)) % SOCKHASH_SLOTS;
61 * Obtain a sock object from the hash table using its unique identifier.
62 * Return a pointer to the object if found, or NULL otherwise.
64 static struct sock *
65 sockhash_get(sockid_t id)
67 struct sock *sock;
68 unsigned int slot;
70 slot = sockhash_slot(id);
72 SLIST_FOREACH(sock, &sockhash[slot], sock_hash) {
73 if (sock->sock_id == id)
74 return sock;
77 return NULL;
81 * Add a sock object to the hash table. The sock object must have a valid ID
82 * in its 'sock_id' field, and must not be in the hash table already.
84 static void
85 sockhash_add(struct sock * sock)
87 unsigned int slot;
89 slot = sockhash_slot(sock->sock_id);
91 SLIST_INSERT_HEAD(&sockhash[slot], sock, sock_hash);
95 * Remove a sock object from the hash table. The sock object must be in the
96 * hash table.
98 static void
99 sockhash_del(struct sock * sock)
101 unsigned int slot;
103 slot = sockhash_slot(sock->sock_id);
105 /* This macro is O(n). */
106 SLIST_REMOVE(&sockhash[slot], sock, sock, sock_hash);
110 * Reset a socket object to a proper initial state, with a particular socket
111 * identifier, a SOCK_ type, and a socket operations table. The socket is
112 * added to the ID-to-object hash table. This function always succeeds.
114 static void
115 sockevent_reset(struct sock * sock, sockid_t id, int domain, int type,
116 const struct sockevent_ops * ops)
119 assert(sock != NULL);
121 memset(sock, 0, sizeof(*sock));
123 sock->sock_id = id;
124 sock->sock_domain = domain;
125 sock->sock_type = type;
127 sock->sock_slowat = 1;
128 sock->sock_rlowat = 1;
130 sock->sock_ops = ops;
131 sock->sock_proc = NULL;
132 sock->sock_select.ss_endpt = NONE;
134 sockhash_add(sock);
138 * Initialize a new socket that will serve as an accepted socket on the given
139 * listening socket 'sock'. The new socket is given as 'newsock', and its new
140 * socket identifier is given as 'newid'. This function always succeeds.
142 void
143 sockevent_clone(struct sock * sock, struct sock * newsock, sockid_t newid)
146 sockevent_reset(newsock, newid, (int)sock->sock_domain,
147 sock->sock_type, sock->sock_ops);
149 /* These are the settings that are currently inherited. */
150 newsock->sock_opt = sock->sock_opt & ~SO_ACCEPTCONN;
151 newsock->sock_linger = sock->sock_linger;
152 newsock->sock_stimeo = sock->sock_stimeo;
153 newsock->sock_rtimeo = sock->sock_rtimeo;
154 newsock->sock_slowat = sock->sock_slowat;
155 newsock->sock_rlowat = sock->sock_rlowat;
157 newsock->sock_flags |= SFL_CLONED;
161 * A new socket has just been accepted. The corresponding listening socket is
162 * given as 'sock'. The new socket has ID 'newid', and if it had not already
163 * been added to the hash table through sockevent_clone() before, 'newsock' is
164 * a non-NULL pointer which identifies the socket object to clone into.
166 static void
167 sockevent_accepted(struct sock * sock, struct sock * newsock, sockid_t newid)
170 if (newsock == NULL) {
171 if ((newsock = sockhash_get(newid)) == NULL)
172 panic("libsockdriver: socket driver returned unknown "
173 "ID %d from accept callback", newid);
174 } else
175 sockevent_clone(sock, newsock, newid);
177 assert(newsock->sock_flags & SFL_CLONED);
178 newsock->sock_flags &= ~SFL_CLONED;
182 * Allocate a sock object, by asking the socket driver for one. On success,
183 * return OK, with a pointer to the new object stored in 'sockp'. This new
184 * object has all its fields set to initial values, in part based on the given
185 * parameters. On failure, return an error code. Failure has two typical
186 * cause: either the given domain, type, protocol combination is not supported,
187 * or the socket driver is out of sockets (globally or for this combination).
189 static int
190 sockevent_alloc(int domain, int type, int protocol, endpoint_t user_endpt,
191 struct sock ** sockp)
193 struct sock *sock;
194 const struct sockevent_ops *ops;
195 sockid_t r;
198 * Verify that the given domain is sane. Unlike the type and protocol,
199 * the domain is already verified by VFS, so we do not limit ourselves
200 * here. The result is that we can store the domain in just a byte.
202 if (domain < 0 || domain > UINT8_MAX)
203 return EAFNOSUPPORT;
205 /* Make sure that the library has actually been initialized. */
206 if (sockevent_socket_cb == NULL)
207 panic("libsockevent: not initialized");
209 sock = NULL;
210 ops = NULL;
213 * Ask the socket driver to create a socket for the given combination
214 * of domain, type, and protocol. If so, let it return a new sock
215 * object, a unique socket identifier for that object, and an
216 * operations table for it.
218 if ((r = sockevent_socket_cb(domain, type, protocol, user_endpt, &sock,
219 &ops)) < 0)
220 return r;
222 assert(sock != NULL);
223 assert(ops != NULL);
225 sockevent_reset(sock, r, domain, type, ops);
227 *sockp = sock;
228 return OK;
232 * Free a previously allocated sock object.
234 static void
235 sockevent_free(struct sock * sock)
237 const struct sockevent_ops *ops;
239 assert(sock->sock_proc == NULL);
241 socktimer_del(sock);
243 sockhash_del(sock);
246 * Invalidate the operations table on the socket, before freeing the
247 * socket. This allows us to detect cases where sockevent functions
248 * are called on sockets that have already been freed.
250 ops = sock->sock_ops;
251 sock->sock_ops = NULL;
253 assert(ops != NULL);
254 assert(ops->sop_free != NULL);
256 ops->sop_free(sock);
260 * Create a new socket.
262 static sockid_t
263 sockevent_socket(int domain, int type, int protocol, endpoint_t user_endpt)
265 struct sock *sock;
266 int r;
268 if ((r = sockevent_alloc(domain, type, protocol, user_endpt,
269 &sock)) != OK)
270 return r;
272 return sock->sock_id;
276 * Create a pair of connected sockets.
278 static int
279 sockevent_socketpair(int domain, int type, int protocol, endpoint_t user_endpt,
280 sockid_t id[2])
282 struct sock *sock1, *sock2;
283 int r;
285 if ((r = sockevent_alloc(domain, type, protocol, user_endpt,
286 &sock1)) != OK)
287 return r;
289 /* Creating socket pairs is not always supported. */
290 if (sock1->sock_ops->sop_pair == NULL) {
291 sockevent_free(sock1);
293 return EOPNOTSUPP;
296 if ((r = sockevent_alloc(domain, type, protocol, user_endpt,
297 &sock2)) != OK) {
298 sockevent_free(sock1);
300 return r;
303 assert(sock1->sock_ops == sock2->sock_ops);
305 r = sock1->sock_ops->sop_pair(sock1, sock2, user_endpt);
307 if (r != OK) {
308 sockevent_free(sock2);
309 sockevent_free(sock1);
311 return r;
314 id[0] = sock1->sock_id;
315 id[1] = sock2->sock_id;
316 return OK;
320 * A send request returned EPIPE. If desired, send a SIGPIPE signal to the
321 * user process that issued the request.
323 static void
324 sockevent_sigpipe(struct sock * sock, endpoint_t user_endpt, int flags)
328 * POSIX says that pipe signals should be generated for SOCK_STREAM
329 * sockets. Linux does just this, NetBSD raises signals for all socket
330 * types.
332 if (sock->sock_type != SOCK_STREAM)
333 return;
336 * Why would there be fewer than four ways to do the same thing?
337 * O_NOSIGPIPE, MSG_NOSIGNAL, SO_NOSIGPIPE, and of course blocking
338 * SIGPIPE. VFS already sets MSG_NOSIGNAL for calls on sockets with
339 * O_NOSIGPIPE. The fact that SO_NOSIGPIPE is a thing, is also the
340 * reason why we cannot let VFS handle signal generation altogether.
342 if (flags & MSG_NOSIGNAL)
343 return;
344 if (sock->sock_opt & SO_NOSIGPIPE)
345 return;
348 * Send a SIGPIPE signal to the user process. Unfortunately we cannot
349 * guarantee that the SIGPIPE reaches the user process before the send
350 * call returns. Usually, the scheduling priorities of system services
351 * are such that the signal is likely to arrive first anyway, but if
352 * timely arrival of the signal is required, a more fundamental change
353 * to the system would be needed.
355 sys_kill(user_endpt, SIGPIPE);
359 * Suspend a request without data, that is, a bind, connect, accept, or close
360 * request.
362 static void
363 sockevent_suspend(struct sock * sock, unsigned int event,
364 const struct sockdriver_call * __restrict call, endpoint_t user_endpt)
366 struct sockevent_proc *spr, **sprp;
368 /* There is one slot for each process, so this should never fail. */
369 if ((spr = sockevent_proc_alloc()) == NULL)
370 panic("libsockevent: too many suspended processes");
372 spr->spr_next = NULL;
373 spr->spr_event = event;
374 spr->spr_timer = FALSE;
375 spr->spr_call = *call;
376 spr->spr_endpt = user_endpt;
379 * Add the request to the tail of the queue. This operation is O(n),
380 * but the number of suspended requests per socket is expected to be
381 * low at all times.
383 for (sprp = &sock->sock_proc; *sprp != NULL;
384 sprp = &(*sprp)->spr_next);
385 *sprp = spr;
389 * Suspend a request with data, that is, a send or receive request.
391 static void
392 sockevent_suspend_data(struct sock * sock, unsigned int event, int timer,
393 const struct sockdriver_call * __restrict call, endpoint_t user_endpt,
394 const struct sockdriver_data * __restrict data, size_t len, size_t off,
395 const struct sockdriver_data * __restrict ctl, socklen_t ctl_len,
396 socklen_t ctl_off, int flags, int rflags, clock_t time)
398 struct sockevent_proc *spr, **sprp;
400 /* There is one slot for each process, so this should never fail. */
401 if ((spr = sockevent_proc_alloc()) == NULL)
402 panic("libsockevent: too many suspended processes");
404 spr->spr_next = NULL;
405 spr->spr_event = event;
406 spr->spr_timer = timer;
407 spr->spr_call = *call;
408 spr->spr_endpt = user_endpt;
409 sockdriver_pack_data(&spr->spr_data, call, data, len);
410 spr->spr_datalen = len;
411 spr->spr_dataoff = off;
412 sockdriver_pack_data(&spr->spr_ctl, call, ctl, ctl_len);
413 spr->spr_ctllen = ctl_len;
414 spr->spr_ctloff = ctl_off;
415 spr->spr_flags = flags;
416 spr->spr_rflags = rflags;
417 spr->spr_time = time;
420 * Add the request to the tail of the queue. This operation is O(n),
421 * but the number of suspended requests per socket is expected to be
422 * low at all times.
424 for (sprp = &sock->sock_proc; *sprp != NULL;
425 sprp = &(*sprp)->spr_next);
426 *sprp = spr;
430 * Return TRUE if there are any suspended requests on the given socket's queue
431 * that match any of the events in the given event mask, or FALSE otherwise.
433 static int
434 sockevent_has_suspended(struct sock * sock, unsigned int mask)
436 struct sockevent_proc *spr;
438 for (spr = sock->sock_proc; spr != NULL; spr = spr->spr_next)
439 if (spr->spr_event & mask)
440 return TRUE;
442 return FALSE;
446 * Check whether the given call is on the given socket's queue of suspended
447 * requests. If so, remove it from the queue and return a pointer to the
448 * suspension data structure. The caller is then responsible for freeing that
449 * data structure using sockevent_proc_free(). If the call was not found, the
450 * function returns NULL.
452 static struct sockevent_proc *
453 sockevent_unsuspend(struct sock * sock, const struct sockdriver_call * call)
455 struct sockevent_proc *spr, **sprp;
457 /* Find the suspended request being canceled. */
458 for (sprp = &sock->sock_proc; (spr = *sprp) != NULL;
459 sprp = &spr->spr_next) {
460 if (spr->spr_call.sc_endpt == call->sc_endpt &&
461 spr->spr_call.sc_req == call->sc_req) {
462 /* Found; remove and return it. */
463 *sprp = spr->spr_next;
465 return spr;
469 return NULL;
473 * Attempt to resume the given suspended request for the given socket object.
474 * Return TRUE if the suspended request has been fully resumed and can be
475 * removed from the queue of suspended requests, or FALSE if it has not been
476 * fully resumed and should stay on the queue. In the latter case, no
477 * resumption will be attempted for other suspended requests of the same type.
479 static int
480 sockevent_resume(struct sock * sock, struct sockevent_proc * spr)
482 struct sock *newsock;
483 struct sockdriver_data data, ctl;
484 char addr[SOCKADDR_MAX];
485 socklen_t addr_len;
486 size_t len, min;
487 sockid_t r;
489 switch (spr->spr_event) {
490 case SEV_CONNECT:
492 * If the connect call was suspended for the purpose of
493 * intercepting resumption, simply remove it from the queue.
495 if (spr->spr_call.sc_endpt == NONE)
496 return TRUE;
498 /* FALLTHROUGH */
499 case SEV_BIND:
500 if ((r = sock->sock_err) != OK)
501 sock->sock_err = OK;
503 sockdriver_reply_generic(&spr->spr_call, r);
505 return TRUE;
507 case SEV_ACCEPT:
509 * A previous accept call may not have blocked on a socket that
510 * was not in listening mode.
512 assert(sock->sock_opt & SO_ACCEPTCONN);
514 addr_len = 0;
515 newsock = NULL;
518 * This call is suspended, which implies that the call table
519 * pointer has already tested to be non-NULL.
521 if ((r = sock->sock_ops->sop_accept(sock,
522 (struct sockaddr *)&addr, &addr_len, spr->spr_endpt,
523 &newsock)) == SUSPEND)
524 return FALSE;
526 if (r >= 0) {
527 assert(addr_len <= sizeof(addr));
529 sockevent_accepted(sock, newsock, r);
532 sockdriver_reply_accept(&spr->spr_call, r,
533 (struct sockaddr *)&addr, addr_len);
535 return TRUE;
537 case SEV_SEND:
538 if (sock->sock_err != OK || (sock->sock_flags & SFL_SHUT_WR)) {
539 if (spr->spr_dataoff > 0 || spr->spr_ctloff > 0)
540 r = (int)spr->spr_dataoff;
541 else if ((r = sock->sock_err) != OK)
542 sock->sock_err = OK;
543 else
544 r = EPIPE;
545 } else {
546 sockdriver_unpack_data(&data, &spr->spr_call,
547 &spr->spr_data, spr->spr_datalen);
548 sockdriver_unpack_data(&ctl, &spr->spr_call,
549 &spr->spr_ctl, spr->spr_ctllen);
551 len = spr->spr_datalen - spr->spr_dataoff;
553 min = sock->sock_slowat;
554 if (min > len)
555 min = len;
558 * As mentioned elsewhere, we do not save the address
559 * upon suspension so we cannot supply it anymore here.
561 r = sock->sock_ops->sop_send(sock, &data, len,
562 &spr->spr_dataoff, &ctl,
563 spr->spr_ctllen - spr->spr_ctloff,
564 &spr->spr_ctloff, NULL, 0, spr->spr_endpt,
565 spr->spr_flags, min);
567 assert(r <= 0);
569 if (r == SUSPEND)
570 return FALSE;
573 * If an error occurred but some data were already
574 * sent, return the progress rather than the error.
575 * Note that if the socket driver detects an
576 * asynchronous error during the send, it itself must
577 * perform this check and call sockevent_set_error() as
578 * needed, to make sure the error does not get lost.
580 if (spr->spr_dataoff > 0 || spr->spr_ctloff > 0)
581 r = spr->spr_dataoff;
584 if (r == EPIPE)
585 sockevent_sigpipe(sock, spr->spr_endpt,
586 spr->spr_flags);
588 sockdriver_reply_generic(&spr->spr_call, r);
590 return TRUE;
592 case SEV_RECV:
593 addr_len = 0;
595 if (sock->sock_flags & SFL_SHUT_RD)
596 r = SOCKEVENT_EOF;
597 else {
598 len = spr->spr_datalen - spr->spr_dataoff;
600 if (sock->sock_err == OK) {
601 min = sock->sock_rlowat;
602 if (min > len)
603 min = len;
604 } else
605 min = 0;
607 sockdriver_unpack_data(&data, &spr->spr_call,
608 &spr->spr_data, spr->spr_datalen);
609 sockdriver_unpack_data(&ctl, &spr->spr_call,
610 &spr->spr_ctl, spr->spr_ctllen);
612 r = sock->sock_ops->sop_recv(sock, &data, len,
613 &spr->spr_dataoff, &ctl,
614 spr->spr_ctllen - spr->spr_ctloff,
615 &spr->spr_ctloff, (struct sockaddr *)&addr,
616 &addr_len, spr->spr_endpt, spr->spr_flags, min,
617 &spr->spr_rflags);
620 * If the call remains suspended but a socket error is
621 * pending, return the pending socket error instead.
623 if (r == SUSPEND) {
624 if (sock->sock_err == OK)
625 return FALSE;
627 r = SOCKEVENT_EOF;
630 assert(addr_len <= sizeof(addr));
634 * If the receive call reported success, or if some data were
635 * already received, return the (partial) result. Otherwise,
636 * return a pending error if any, or otherwise a regular error
637 * or 0 for EOF.
639 if (r == OK || spr->spr_dataoff > 0 || spr->spr_ctloff > 0)
640 r = (int)spr->spr_dataoff;
641 else if (sock->sock_err != OK) {
642 r = sock->sock_err;
644 sock->sock_err = OK;
645 } else if (r == SOCKEVENT_EOF)
646 r = 0; /* EOF */
648 sockdriver_reply_recv(&spr->spr_call, r, spr->spr_ctloff,
649 (struct sockaddr *)&addr, addr_len, spr->spr_rflags);
651 return TRUE;
653 case SEV_CLOSE:
654 sockdriver_reply_generic(&spr->spr_call, OK);
656 return TRUE;
658 default:
659 panic("libsockevent: process suspended on unknown event 0x%x",
660 spr->spr_event);
665 * Return TRUE if the given socket is ready for reading for a select call, or
666 * FALSE otherwise.
668 static int
669 sockevent_test_readable(struct sock * sock)
671 int r;
674 * The meaning of "ready-to-read" depends on whether the socket is a
675 * listening socket or not. For the former, it is a test on whether
676 * there are any new sockets to accept. However, shutdown flags take
677 * precedence in both cases.
679 if (sock->sock_flags & SFL_SHUT_RD)
680 return TRUE;
682 if (sock->sock_err != OK)
683 return TRUE;
686 * Depending on whether this is a listening-mode socket, test whether
687 * either accepts or receives would block.
689 if (sock->sock_opt & SO_ACCEPTCONN) {
690 if (sock->sock_ops->sop_test_accept == NULL)
691 return TRUE;
693 r = sock->sock_ops->sop_test_accept(sock);
694 } else {
695 if (sock->sock_ops->sop_test_recv == NULL)
696 return TRUE;
698 r = sock->sock_ops->sop_test_recv(sock, sock->sock_rlowat,
699 NULL);
702 return (r != SUSPEND);
706 * Return TRUE if the given socket is ready for writing for a select call, or
707 * FALSE otherwise.
709 static int
710 sockevent_test_writable(struct sock * sock)
712 int r;
714 if (sock->sock_err != OK)
715 return TRUE;
717 if (sock->sock_flags & SFL_SHUT_WR)
718 return TRUE;
720 if (sock->sock_ops->sop_test_send == NULL)
721 return TRUE;
724 * Test whether sends would block. The low send watermark is relevant
725 * for stream-type sockets only.
727 r = sock->sock_ops->sop_test_send(sock, sock->sock_slowat);
729 return (r != SUSPEND);
733 * Test whether any of the given select operations are ready on the given
734 * socket. Return the subset of ready operations; zero if none.
736 static unsigned int
737 sockevent_test_select(struct sock * sock, unsigned int ops)
739 unsigned int ready_ops;
741 assert(!(ops & ~(SDEV_OP_RD | SDEV_OP_WR | SDEV_OP_ERR)));
744 * We do not support the "bind in progress" case here. If a blocking
745 * bind call is in progress, the file descriptor should not be ready
746 * for either reading or writing. Currently, socket drivers will have
747 * to cover this case themselves. Otherwise we would have to check the
748 * queue of suspended calls, or create a custom flag for this.
751 ready_ops = 0;
753 if ((ops & SDEV_OP_RD) && sockevent_test_readable(sock))
754 ready_ops |= SDEV_OP_RD;
756 if ((ops & SDEV_OP_WR) && sockevent_test_writable(sock))
757 ready_ops |= SDEV_OP_WR;
759 /* TODO: OOB receive support. */
761 return ready_ops;
765 * Fire the given mask of events on the given socket object now.
767 static void
768 sockevent_fire(struct sock * sock, unsigned int mask)
770 struct sockevent_proc *spr, **sprp;
771 unsigned int r, flag, ops;
774 * A completed connection attempt (successful or not) also always
775 * implies that the socket becomes writable. For convenience we
776 * enforce this rule here, because it is easy to forget. Note that in
777 * any case, a suspended connect request should be the first in the
778 * list, so we do not risk returning 0 from a connect call as a result
779 * of sock_err getting eaten by another resumed call.
781 if (mask & SEV_CONNECT)
782 mask |= SEV_SEND;
785 * First try resuming regular system calls.
787 for (sprp = &sock->sock_proc; (spr = *sprp) != NULL; ) {
788 flag = spr->spr_event;
790 if ((mask & flag) && sockevent_resume(sock, spr)) {
791 *sprp = spr->spr_next;
793 sockevent_proc_free(spr);
794 } else {
795 mask &= ~flag;
797 sprp = &spr->spr_next;
802 * Then see if we can satisfy pending select queries.
804 if ((mask & (SEV_ACCEPT | SEV_SEND | SEV_RECV)) &&
805 sock->sock_select.ss_endpt != NONE) {
806 assert(sock->sock_selops != 0);
809 * Only retest select operations that, based on the given event
810 * mask, could possibly be satisfied now.
812 ops = sock->sock_selops;
813 if (!(mask & (SEV_ACCEPT | SEV_RECV)))
814 ops &= ~SDEV_OP_RD;
815 if (!(mask & SEV_SEND))
816 ops &= ~SDEV_OP_WR;
817 if (!(0)) /* TODO: OOB receive support */
818 ops &= ~SDEV_OP_ERR;
820 /* Are there any operations to test? */
821 if (ops != 0) {
822 /* Test those operations. */
823 r = sockevent_test_select(sock, ops);
825 /* Were any satisfied? */
826 if (r != 0) {
827 /* Let the caller know. */
828 sockdriver_reply_select(&sock->sock_select,
829 sock->sock_id, r);
831 sock->sock_selops &= ~r;
833 /* Are there any saved operations left now? */
834 if (sock->sock_selops == 0)
835 sock->sock_select.ss_endpt = NONE;
841 * Finally, a SEV_CLOSE event unconditionally frees the sock object.
842 * This event should be fired only for sockets that are either not yet,
843 * or not anymore, in use by userland.
845 if (mask & SEV_CLOSE) {
846 assert(sock->sock_flags & (SFL_CLONED | SFL_CLOSING));
848 sockevent_free(sock);
853 * Process all pending events. Events must still be blocked, so that if
854 * handling one event generates a new event, that event is handled from here
855 * rather than immediately.
857 static void
858 sockevent_pump(void)
860 struct sock *sock;
861 unsigned int mask;
863 assert(sockevent_working);
865 while (!SIMPLEQ_EMPTY(&sockevent_pending)) {
866 sock = SIMPLEQ_FIRST(&sockevent_pending);
867 SIMPLEQ_REMOVE_HEAD(&sockevent_pending, sock_next);
869 mask = sock->sock_events;
870 assert(mask != 0);
871 sock->sock_events = 0;
873 sockevent_fire(sock, mask);
875 * At this point, the sock object may already have been readded
876 * to the event list, or even be deallocated altogether.
882 * Return TRUE if any events are pending on any sockets, or FALSE otherwise.
884 static int
885 sockevent_has_events(void)
888 return (!SIMPLEQ_EMPTY(&sockevent_pending));
892 * Raise the given bitwise-OR'ed set of events on the given socket object.
893 * Depending on the context of the call, they events may or may not be
894 * processed immediately.
896 void
897 sockevent_raise(struct sock * sock, unsigned int mask)
900 assert(sock->sock_ops != NULL);
903 * Handle SEV_CLOSE first. This event must not be deferred, so as to
904 * let socket drivers recycle sock objects as they are needed. For
905 * example, a user-closed TCP socket may stay open to transmit the
906 * remainder of its send buffer, until the TCP driver runs out of
907 * sockets, in which case the connection is aborted. The driver would
908 * then raise SEV_CLOSE on the sock object so as to clean it up, and
909 * immediately reuse it afterward. If the close event were to be
910 * deferred, this immediate reuse would not be possible.
912 * The sop_free() callback routine may not raise new events, and thus,
913 * the state of 'sockevent_working' need not be checked or set here.
915 if (mask & SEV_CLOSE) {
916 assert(mask == SEV_CLOSE);
918 sockevent_fire(sock, mask);
920 return;
924 * If we are currently processing a socket message, store the event for
925 * later. If not, this call is not coming from inside libsockevent,
926 * and we must handle the event immediately.
928 if (sockevent_working) {
929 assert(mask != 0);
930 assert(mask <= UCHAR_MAX); /* sock_events field size check */
932 if (sock->sock_events == 0)
933 SIMPLEQ_INSERT_TAIL(&sockevent_pending, sock,
934 sock_next);
936 sock->sock_events |= mask;
937 } else {
938 sockevent_working = TRUE;
940 sockevent_fire(sock, mask);
942 if (sockevent_has_events())
943 sockevent_pump();
945 sockevent_working = FALSE;
950 * Set a pending error on the socket object, and wake up any suspended
951 * operations that are affected by this.
953 void
954 sockevent_set_error(struct sock * sock, int err)
957 assert(err < 0);
958 assert(sock->sock_ops != NULL);
960 /* If an error was set already, it will be overridden. */
961 sock->sock_err = err;
963 sockevent_raise(sock, SEV_BIND | SEV_CONNECT | SEV_SEND | SEV_RECV);
967 * Initialize timer-related data structures.
969 static void
970 socktimer_init(void)
973 SLIST_INIT(&socktimer);
975 init_timer(&sockevent_timer);
979 * Check whether the given socket object has any suspended requests that have
980 * now expired. If so, cancel them. Also, if the socket object has any
981 * suspended requests with a timeout that has not yet expired, return the
982 * earliest (relative) timeout of all of them, or TMR_NEVER if no such requests
983 * are present.
985 static clock_t
986 sockevent_expire(struct sock * sock, clock_t now)
988 struct sockevent_proc *spr, **sprp;
989 clock_t lowest, left;
990 int r;
993 * First handle the case that the socket is closed. In this case,
994 * there may be a linger timer, although the socket may also simply
995 * still be on the timer list because of a request that did not time
996 * out right before the socket was closed.
998 if (sock->sock_flags & SFL_CLOSING) {
999 /* Was there a linger timer and has it expired? */
1000 if ((sock->sock_opt & SO_LINGER) &&
1001 tmr_is_first(sock->sock_linger, now)) {
1002 assert(sock->sock_ops->sop_close != NULL);
1005 * Whatever happens next, we must now resume the
1006 * pending close operation, if it was not canceled
1007 * earlier. As before, we return OK rather than the
1008 * standardized EWOULDBLOCK, to ensure that the user
1009 * process knows the file descriptor has been closed.
1011 if ((spr = sock->sock_proc) != NULL) {
1012 assert(spr->spr_event == SEV_CLOSE);
1013 assert(spr->spr_next == NULL);
1015 sock->sock_proc = NULL;
1017 sockdriver_reply_generic(&spr->spr_call, OK);
1019 sockevent_proc_free(spr);
1023 * Tell the socket driver that closing the socket is
1024 * now a bit more desired than the last time we asked.
1026 r = sock->sock_ops->sop_close(sock, TRUE /*force*/);
1028 assert(r == OK || r == SUSPEND);
1031 * The linger timer fires once. After that, the socket
1032 * driver is free to decide that it still will not
1033 * close the socket. If it does, do not fire the
1034 * linger timer again.
1036 if (r == SUSPEND)
1037 sock->sock_opt &= ~SO_LINGER;
1038 else
1039 sockevent_free(sock);
1042 return TMR_NEVER;
1046 * Then see if any send and/or receive requests have expired. Also see
1047 * if there are any send and/or receive requests left that have not yet
1048 * expired but do have a timeout, so that we can return the lowest of
1049 * those timeouts.
1051 lowest = TMR_NEVER;
1053 for (sprp = &sock->sock_proc; (spr = *sprp) != NULL; ) {
1054 /* Skip requests without a timeout. */
1055 if (spr->spr_timer == 0) {
1056 sprp = &spr->spr_next;
1058 continue;
1061 assert(spr->spr_event == SEV_SEND ||
1062 spr->spr_event == SEV_RECV);
1065 * If the request has expired, cancel it and remove it from the
1066 * list. Otherwise, see if the request has the lowest number
1067 * of ticks until its timeout so far.
1069 if (tmr_is_first(spr->spr_time, now)) {
1070 *sprp = spr->spr_next;
1072 if (spr->spr_event == SEV_SEND)
1073 sockevent_cancel_send(sock, spr, EWOULDBLOCK);
1074 else
1075 sockevent_cancel_recv(sock, spr, EWOULDBLOCK);
1077 sockevent_proc_free(spr);
1078 } else {
1079 left = spr->spr_time - now;
1081 if (lowest == TMR_NEVER || lowest > left)
1082 lowest = left;
1084 sprp = &spr->spr_next;
1088 return lowest;
1092 * The socket event alarm went off. Go through the set of socket objects with
1093 * timers, and see if any of their requests have now expired. Set a new alarm
1094 * as necessary.
1096 static void
1097 socktimer_expire(int arg __unused)
1099 SLIST_HEAD(, sock) oldtimer;
1100 struct sock *sock, *tsock;
1101 clock_t now, lowest, left;
1102 int working;
1105 * This function may or may not be called from a context where we are
1106 * already deferring events, so we have to cover both cases here.
1108 if ((working = sockevent_working) == FALSE)
1109 sockevent_working = TRUE;
1111 /* Start a new list. */
1112 memcpy(&oldtimer, &socktimer, sizeof(oldtimer));
1113 SLIST_INIT(&socktimer);
1115 now = getticks();
1116 lowest = TMR_NEVER;
1119 * Go through all sockets that have or had a request with a timeout,
1120 * canceling any expired requests and building a new list of sockets
1121 * that still have requests with timeouts as we go.
1123 SLIST_FOREACH_SAFE(sock, &oldtimer, sock_timer, tsock) {
1124 assert(sock->sock_flags & SFL_TIMER);
1125 sock->sock_flags &= ~SFL_TIMER;
1127 left = sockevent_expire(sock, now);
1129 * The sock object may already have been deallocated now.
1130 * If 'next' is TMR_NEVER, do not touch 'sock' anymore.
1133 if (left != TMR_NEVER) {
1134 if (lowest == TMR_NEVER || lowest > left)
1135 lowest = left;
1137 SLIST_INSERT_HEAD(&socktimer, sock, sock_timer);
1139 sock->sock_flags |= SFL_TIMER;
1143 /* If there is a new lowest timeout at all, set a new timer. */
1144 if (lowest != TMR_NEVER)
1145 set_timer(&sockevent_timer, lowest, socktimer_expire, 0);
1147 if (!working) {
1148 /* If any new events were raised, process them now. */
1149 if (sockevent_has_events())
1150 sockevent_pump();
1152 sockevent_working = FALSE;
1157 * Set a timer for the given (relative) number of clock ticks, adding the
1158 * associated socket object to the set of socket objects with timers, if it was
1159 * not already in that set. Set a new alarm if necessary, and return the
1160 * absolute timeout for the timer. Since the timers list is maintained lazily,
1161 * the caller need not take the object off the set if the call was canceled
1162 * later; see also socktimer_del().
1164 static clock_t
1165 socktimer_add(struct sock * sock, clock_t ticks)
1167 clock_t now;
1170 * Relative time comparisons require that any two times are no more
1171 * than half the comparison space (clock_t, unsigned long) apart.
1173 assert(ticks <= TMRDIFF_MAX);
1175 /* If the socket was not already on the timers list, put it on. */
1176 if (!(sock->sock_flags & SFL_TIMER)) {
1177 SLIST_INSERT_HEAD(&socktimer, sock, sock_timer);
1179 sock->sock_flags |= SFL_TIMER;
1183 * (Re)set the timer if either it was not running at all or this new
1184 * timeout will occur sooner than the currently scheduled alarm. Note
1185 * that setting a timer that was already set is allowed.
1187 now = getticks();
1189 if (!tmr_is_set(&sockevent_timer) ||
1190 tmr_is_first(now + ticks, tmr_exp_time(&sockevent_timer)))
1191 set_timer(&sockevent_timer, ticks, socktimer_expire, 0);
1193 /* Return the absolute timeout. */
1194 return now + ticks;
1198 * Remove a socket object from the set of socket objects with timers. Since
1199 * the timer list is maintained lazily, this needs to be done only right before
1200 * the socket object is freed.
1202 static void
1203 socktimer_del(struct sock * sock)
1206 if (sock->sock_flags & SFL_TIMER) {
1207 /* This macro is O(n). */
1208 SLIST_REMOVE(&socktimer, sock, sock, sock_timer);
1210 sock->sock_flags &= ~SFL_TIMER;
1215 * Bind a socket to a local address.
1217 static int
1218 sockevent_bind(sockid_t id, const struct sockaddr * __restrict addr,
1219 socklen_t addr_len, endpoint_t user_endpt,
1220 const struct sockdriver_call * __restrict call)
1222 struct sock *sock;
1223 int r;
1225 if ((sock = sockhash_get(id)) == NULL)
1226 return EINVAL;
1228 if (sock->sock_ops->sop_bind == NULL)
1229 return EOPNOTSUPP;
1231 /* Binding a socket in listening mode is never supported. */
1232 if (sock->sock_opt & SO_ACCEPTCONN)
1233 return EINVAL;
1235 r = sock->sock_ops->sop_bind(sock, addr, addr_len, user_endpt);
1237 if (r == SUSPEND) {
1238 if (call == NULL)
1239 return EINPROGRESS;
1241 sockevent_suspend(sock, SEV_BIND, call, user_endpt);
1244 return r;
1248 * Connect a socket to a remote address.
1250 static int
1251 sockevent_connect(sockid_t id, const struct sockaddr * __restrict addr,
1252 socklen_t addr_len, endpoint_t user_endpt,
1253 const struct sockdriver_call * call)
1255 struct sockdriver_call fakecall;
1256 struct sockevent_proc *spr;
1257 struct sock *sock;
1258 int r;
1260 if ((sock = sockhash_get(id)) == NULL)
1261 return EINVAL;
1263 if (sock->sock_ops->sop_connect == NULL)
1264 return EOPNOTSUPP;
1266 /* Connecting a socket in listening mode is never supported. */
1267 if (sock->sock_opt & SO_ACCEPTCONN)
1268 return EOPNOTSUPP;
1271 * The upcoming connect call may fire an accept event for which the
1272 * handler may in turn fire a connect event on this socket. Since we
1273 * delay event processing until after processing calls, this would
1274 * create the problem that even if the connection is accepted right
1275 * away, non-blocking connect requests would return EINPROGRESS. For
1276 * UDS, this is undesirable behavior. To remedy this, we use a hack:
1277 * we temporarily suspend the connect even if non-blocking, then
1278 * process events, and then cancel the connect request again. If the
1279 * connection was accepted immediately, the cancellation will have no
1280 * effect, since the request has already been replied to. In order not
1281 * to violate libsockdriver rules with this hack, we fabricate a fake
1282 * 'conn' object.
1284 r = sock->sock_ops->sop_connect(sock, addr, addr_len, user_endpt);
1286 if (r == SUSPEND) {
1287 if (call != NULL || sockevent_has_events()) {
1288 if (call == NULL) {
1289 fakecall.sc_endpt = NONE;
1291 call = &fakecall;
1294 assert(!sockevent_has_suspended(sock,
1295 SEV_SEND | SEV_RECV));
1297 sockevent_suspend(sock, SEV_CONNECT, call, user_endpt);
1299 if (call == &fakecall) {
1300 /* Process any pending events first now. */
1301 sockevent_pump();
1304 * If the connect request has not been resumed
1305 * yet now, we must remove it from the queue
1306 * again, and return EINPROGRESS ourselves.
1307 * Otherwise, return OK or a pending error.
1309 spr = sockevent_unsuspend(sock, call);
1310 if (spr != NULL) {
1311 sockevent_proc_free(spr);
1313 r = EINPROGRESS;
1314 } else if ((r = sock->sock_err) != OK)
1315 sock->sock_err = OK;
1317 } else
1318 r = EINPROGRESS;
1321 if (r == OK) {
1323 * A completed connection attempt also always implies that the
1324 * socket becomes writable. For convenience we enforce this
1325 * rule here, because it is easy to forget.
1327 sockevent_raise(sock, SEV_SEND);
1330 return r;
1334 * Put a socket in listening mode.
1336 static int
1337 sockevent_listen(sockid_t id, int backlog)
1339 struct sock *sock;
1340 int r;
1342 if ((sock = sockhash_get(id)) == NULL)
1343 return EINVAL;
1345 if (sock->sock_ops->sop_listen == NULL)
1346 return EOPNOTSUPP;
1349 * Perform a general adjustment on the backlog value, applying the
1350 * customary BSD "fudge factor" of 1.5x. Keep the value within bounds
1351 * though. POSIX imposes that a negative backlog value is equal to a
1352 * backlog value of zero. A backlog value of zero, in turn, may mean
1353 * anything; we take it to be one. POSIX also imposes that all socket
1354 * drivers accept up to at least SOMAXCONN connections on the queue.
1356 if (backlog < 0)
1357 backlog = 0;
1358 if (backlog < SOMAXCONN)
1359 backlog += 1 + ((unsigned int)backlog >> 1);
1360 if (backlog > SOMAXCONN)
1361 backlog = SOMAXCONN;
1363 r = sock->sock_ops->sop_listen(sock, backlog);
1366 * On success, the socket is now in listening mode. As part of that,
1367 * a select(2) ready-to-read condition now indicates that a connection
1368 * may be accepted on the socket, rather than that data may be read.
1369 * Since libsockevent is responsible for this distinction, we keep
1370 * track of the listening mode at this level. Conveniently, there is a
1371 * socket option for this, which we support out of the box as a result.
1373 if (r == OK) {
1374 sock->sock_opt |= SO_ACCEPTCONN;
1377 * For the extremely unlikely case that right after the socket
1378 * is put into listening mode, it has a connection ready to
1379 * accept, we retest blocked ready-to-read select queries now.
1381 sockevent_raise(sock, SEV_ACCEPT);
1384 return r;
1388 * Accept a connection on a listening socket, creating a new socket.
1390 static sockid_t
1391 sockevent_accept(sockid_t id, struct sockaddr * __restrict addr,
1392 socklen_t * __restrict addr_len, endpoint_t user_endpt,
1393 const struct sockdriver_call * __restrict call)
1395 struct sock *sock, *newsock;
1396 sockid_t r;
1398 if ((sock = sockhash_get(id)) == NULL)
1399 return EINVAL;
1401 if (sock->sock_ops->sop_accept == NULL)
1402 return EOPNOTSUPP;
1405 * Attempt to accept a connection. The socket driver is responsible
1406 * for allocating a sock object (and identifier) on success. It may
1407 * already have done so before, in which case it should leave newsock
1408 * filled with NULL; otherwise, the returned sock object is cloned from
1409 * the listening socket. The socket driver is also responsible for
1410 * failing the call if the socket is not in listening mode, because it
1411 * must specify the error to return: EOPNOTSUPP or EINVAL.
1413 newsock = NULL;
1415 if ((r = sock->sock_ops->sop_accept(sock, addr, addr_len, user_endpt,
1416 &newsock)) == SUSPEND) {
1417 assert(sock->sock_opt & SO_ACCEPTCONN);
1419 if (call == NULL)
1420 return EWOULDBLOCK;
1422 sockevent_suspend(sock, SEV_ACCEPT, call, user_endpt);
1424 return SUSPEND;
1427 if (r >= 0)
1428 sockevent_accepted(sock, newsock, r);
1430 return r;
1434 * Send regular and/or control data.
1436 static int
1437 sockevent_send(sockid_t id, const struct sockdriver_data * __restrict data,
1438 size_t len, const struct sockdriver_data * __restrict ctl_data,
1439 socklen_t ctl_len, const struct sockaddr * __restrict addr,
1440 socklen_t addr_len, endpoint_t user_endpt, int flags,
1441 const struct sockdriver_call * __restrict call)
1443 struct sock *sock;
1444 clock_t time;
1445 size_t min, off;
1446 socklen_t ctl_off;
1447 int r, timer;
1449 if ((sock = sockhash_get(id)) == NULL)
1450 return EINVAL;
1453 * The order of the following checks is not necessarily fixed, and may
1454 * be changed later. As far as applicable, they should match the order
1455 * of the checks during call resumption, though.
1457 if ((r = sock->sock_err) != OK) {
1458 sock->sock_err = OK;
1460 return r;
1463 if (sock->sock_flags & SFL_SHUT_WR) {
1464 sockevent_sigpipe(sock, user_endpt, flags);
1466 return EPIPE;
1470 * Translate the sticky SO_DONTROUTE option to a per-request
1471 * MSG_DONTROUTE flag. This achieves two purposes: socket drivers have
1472 * to check only one flag, and socket drivers that do not support the
1473 * flag will fail send requests in a consistent way.
1475 if (sock->sock_opt & SO_DONTROUTE)
1476 flags |= MSG_DONTROUTE;
1479 * Check if this is a valid send request as far as the socket driver is
1480 * concerned. We do this separately from sop_send for the reason that
1481 * this send request may immediately be queued behind other pending
1482 * send requests (without a call to sop_send), which means even invalid
1483 * requests would be queued and not return failure until much later.
1485 if (sock->sock_ops->sop_pre_send != NULL &&
1486 (r = sock->sock_ops->sop_pre_send(sock, len, ctl_len, addr,
1487 addr_len, user_endpt,
1488 flags & ~(MSG_DONTWAIT | MSG_NOSIGNAL))) != OK)
1489 return r;
1491 if (sock->sock_ops->sop_send == NULL)
1492 return EOPNOTSUPP;
1494 off = 0;
1495 ctl_off = 0;
1498 * Sending out-of-band data is treated differently from regular data:
1500 * - sop_send is called immediately, even if a partial non-OOB send
1501 * operation is currently suspended (TODO: it may have to be aborted
1502 * in order to maintain atomicity guarantees - that should be easy);
1503 * - sop_send must not return SUSPEND; instead, if it cannot process
1504 * the OOB data immediately, it must return an appropriate error;
1505 * - the send low watermark is ignored.
1507 * Given that none of the current socket drivers support OOB data at
1508 * all, more sophisticated approaches would have no added value now.
1510 if (flags & MSG_OOB) {
1511 r = sock->sock_ops->sop_send(sock, data, len, &off, ctl_data,
1512 ctl_len, &ctl_off, addr, addr_len, user_endpt, flags, 0);
1514 if (r == SUSPEND)
1515 panic("libsockevent: MSG_OOB send calls may not be "
1516 "suspended");
1518 return (r == OK) ? (int)off : r;
1522 * Only call the actual sop_send function now if no other send calls
1523 * are suspended already.
1525 * Call sop_send with 'min' set to the minimum of the request size and
1526 * the socket's send low water mark, but only if the call is non-
1527 * blocking. For stream-oriented sockets, this should have the effect
1528 * that non-blocking calls fail with EWOULDBLOCK if not at least that
1529 * much can be sent immediately. For consistency, we choose to apply
1530 * the same threshold to blocking calls. For datagram-oriented
1531 * sockets, the minimum is not a factor to be considered.
1533 if (!sockevent_has_suspended(sock, SEV_SEND)) {
1534 min = sock->sock_slowat;
1535 if (min > len)
1536 min = len;
1538 r = sock->sock_ops->sop_send(sock, data, len, &off, ctl_data,
1539 ctl_len, &ctl_off, addr, addr_len, user_endpt, flags, min);
1540 } else
1541 r = SUSPEND;
1543 if (r == SUSPEND) {
1545 * We do not store the target's address on suspension, because
1546 * that would add significantly to the per-process suspension
1547 * state. As a result, we disallow socket drivers from
1548 * suspending send calls with addresses, because we would no
1549 * longer have the address for proper call resumption.
1550 * However, we do not know here whether the socket is in
1551 * connection-oriented mode; if it is, the address is to be
1552 * ignored altogether. Therefore, there is no test on 'addr'
1553 * here. Resumed calls will get a NULL address pointer, and
1554 * the socket driver is expected to do the right thing.
1558 * For non-blocking socket calls, return an error only if we
1559 * were not able to send anything at all. If only control data
1560 * were sent, the return value is therefore zero.
1562 if (call != NULL) {
1563 if (sock->sock_stimeo != 0) {
1564 timer = TRUE;
1565 time = socktimer_add(sock, sock->sock_stimeo);
1566 } else {
1567 timer = FALSE;
1568 time = 0;
1571 sockevent_suspend_data(sock, SEV_SEND, timer, call,
1572 user_endpt, data, len, off, ctl_data, ctl_len,
1573 ctl_off, flags, 0, time);
1574 } else
1575 r = (off > 0 || ctl_off > 0) ? OK : EWOULDBLOCK;
1576 } else if (r == EPIPE)
1577 sockevent_sigpipe(sock, user_endpt, flags);
1579 return (r == OK) ? (int)off : r;
1583 * The inner part of the receive request handler. An error returned from here
1584 * may be overridden by an error pending on the socket, although data returned
1585 * from here trumps such pending errors.
1587 static int
1588 sockevent_recv_inner(struct sock * sock,
1589 const struct sockdriver_data * __restrict data,
1590 size_t len, size_t * __restrict off,
1591 const struct sockdriver_data * __restrict ctl_data,
1592 socklen_t ctl_len, socklen_t * __restrict ctl_off,
1593 struct sockaddr * __restrict addr,
1594 socklen_t * __restrict addr_len, endpoint_t user_endpt,
1595 int * __restrict flags, const struct sockdriver_call * __restrict call)
1597 clock_t time;
1598 size_t min;
1599 int r, oob, inflags, timer;
1602 * Check if this is a valid receive request as far as the socket driver
1603 * is concerned. We do this separately from sop_recv for the reason
1604 * that this receive request may immediately be queued behind other
1605 * pending receive requests (without a call to sop_recv), which means
1606 * even invalid requests would be queued and not return failure until
1607 * much later.
1609 inflags = *flags;
1610 *flags = 0;
1612 if (sock->sock_ops->sop_pre_recv != NULL &&
1613 (r = sock->sock_ops->sop_pre_recv(sock, user_endpt,
1614 inflags & ~(MSG_DONTWAIT | MSG_NOSIGNAL))) != OK)
1615 return r;
1618 * The order of the following checks is not necessarily fixed, and may
1619 * be changed later. As far as applicable, they should match the order
1620 * of the checks during call resumption, though.
1622 if (sock->sock_flags & SFL_SHUT_RD)
1623 return SOCKEVENT_EOF;
1625 if (sock->sock_ops->sop_recv == NULL)
1626 return EOPNOTSUPP;
1629 * Receiving out-of-band data is treated differently from regular data:
1631 * - sop_recv is called immediately, even if a partial non-OOB receive
1632 * operation is currently suspended (TODO: it may have to be aborted
1633 * in order to maintain atomicity guarantees - that should be easy);
1634 * - sop_recv must not return SUSPEND; instead, if it cannot return any
1635 * the OOB data immediately, it must return an appropriate error;
1636 * - the receive low watermark is ignored.
1638 * Given that none of the current socket drivers support OOB data at
1639 * all, more sophisticated approaches would have no added value now.
1641 oob = (inflags & MSG_OOB);
1643 if (oob && (sock->sock_opt & SO_OOBINLINE))
1644 return EINVAL;
1647 * Only call the actual sop_recv function now if no other receive
1648 * calls are suspended already.
1650 * Call sop_recv with 'min' set to the minimum of the request size and
1651 * the socket's socket's low water mark, unless there is a pending
1652 * error. As a result, blocking calls will block, and non-blocking
1653 * calls will yield EWOULDBLOCK, if at least that much can be received,
1654 * unless another condition (EOF or that pending error) prevents more
1655 * from being received anyway. For datagram-oriented sockets, the
1656 * minimum is not a factor to be considered.
1658 if (oob || !sockevent_has_suspended(sock, SEV_RECV)) {
1659 if (!oob && sock->sock_err == OK) {
1660 min = sock->sock_rlowat;
1661 if (min > len)
1662 min = len;
1663 } else
1664 min = 0; /* receive even no-data segments */
1666 r = sock->sock_ops->sop_recv(sock, data, len, off, ctl_data,
1667 ctl_len, ctl_off, addr, addr_len, user_endpt, inflags, min,
1668 flags);
1669 } else
1670 r = SUSPEND;
1672 assert(r <= 0 || r == SOCKEVENT_EOF);
1674 if (r == SUSPEND) {
1675 if (oob)
1676 panic("libsockevent: MSG_OOB receive calls may not be "
1677 "suspended");
1680 * For non-blocking socket calls, return EWOULDBLOCK only if we
1681 * did not receive anything at all. If only control data were
1682 * received, the return value is therefore zero. Suspension
1683 * implies that there is nothing to read. For the purpose of
1684 * the calling wrapper function, never suspend a call when
1685 * there is a pending error.
1687 if (call != NULL && sock->sock_err == OK) {
1688 if (sock->sock_rtimeo != 0) {
1689 timer = TRUE;
1690 time = socktimer_add(sock, sock->sock_rtimeo);
1691 } else {
1692 timer = FALSE;
1693 time = 0;
1696 sockevent_suspend_data(sock, SEV_RECV, timer, call,
1697 user_endpt, data, len, *off, ctl_data,
1698 ctl_len, *ctl_off, inflags, *flags, time);
1699 } else
1700 r = EWOULDBLOCK;
1703 return r;
1707 * Receive regular and/or control data.
1709 static int
1710 sockevent_recv(sockid_t id, const struct sockdriver_data * __restrict data,
1711 size_t len, const struct sockdriver_data * __restrict ctl_data,
1712 socklen_t * __restrict ctl_len, struct sockaddr * __restrict addr,
1713 socklen_t * __restrict addr_len, endpoint_t user_endpt,
1714 int * __restrict flags, const struct sockdriver_call * __restrict call)
1716 struct sock *sock;
1717 size_t off;
1718 socklen_t ctl_inlen;
1719 int r;
1721 if ((sock = sockhash_get(id)) == NULL)
1722 return EINVAL;
1725 * This function is a wrapper around the actual receive functionality.
1726 * The reason for this is that receiving data should take precedence
1727 * over a pending socket error, while a pending socket error should
1728 * take precedence over both regular errors as well as EOF. In other
1729 * words: if there is a pending error, we must try to receive anything
1730 * at all; if receiving does not work, we must fail the call with the
1731 * pending error. However, until we call the receive callback, we have
1732 * no way of telling whether any data can be received. So we must try
1733 * that before we can decide whether to return a pending error.
1735 off = 0;
1736 ctl_inlen = *ctl_len;
1737 *ctl_len = 0;
1740 * Attempt to perform the actual receive call.
1742 r = sockevent_recv_inner(sock, data, len, &off, ctl_data, ctl_inlen,
1743 ctl_len, addr, addr_len, user_endpt, flags, call);
1746 * If the receive request succeeded, or it failed but yielded a partial
1747 * result, then return the (partal) result. Otherwise, if an error is
1748 * pending, return that error. Otherwise, return either a regular
1749 * error or 0 for EOF.
1751 if (r == OK || (r != SUSPEND && (off > 0 || *ctl_len > 0)))
1752 r = (int)off;
1753 else if (sock->sock_err != OK) {
1754 assert(r != SUSPEND);
1756 r = sock->sock_err;
1758 sock->sock_err = OK;
1759 } else if (r == SOCKEVENT_EOF)
1760 r = 0;
1762 return r;
1766 * Process an I/O control call.
1768 static int
1769 sockevent_ioctl(sockid_t id, unsigned long request,
1770 const struct sockdriver_data * __restrict data, endpoint_t user_endpt,
1771 const struct sockdriver_call * __restrict call __unused)
1773 struct sock *sock;
1774 size_t size;
1775 int r, val;
1777 if ((sock = sockhash_get(id)) == NULL)
1778 return EINVAL;
1780 /* We handle a very small subset of generic IOCTLs here. */
1781 switch (request) {
1782 case FIONREAD:
1783 size = 0;
1784 if (!(sock->sock_flags & SFL_SHUT_RD) &&
1785 sock->sock_ops->sop_test_recv != NULL)
1786 (void)sock->sock_ops->sop_test_recv(sock, 0, &size);
1788 val = (int)size;
1790 return sockdriver_copyout(data, 0, &val, sizeof(val));
1793 if (sock->sock_ops->sop_ioctl == NULL)
1794 return ENOTTY;
1796 r = sock->sock_ops->sop_ioctl(sock, request, data, user_endpt);
1799 * Suspending IOCTL requests is not currently supported by this
1800 * library, even though the VFS protocol and libsockdriver do support
1801 * it. The reason is that IOCTLs do not match our proces suspension
1802 * model: they could be neither queued nor repeated. For now, it seems
1803 * that this feature is not needed by the socket drivers either. Thus,
1804 * even though there are possible solutions, we defer implementing them
1805 * until we know what exactly is needed.
1807 if (r == SUSPEND)
1808 panic("libsockevent: socket driver suspended IOCTL 0x%lx",
1809 request);
1811 return r;
1815 * Set socket options.
1817 static int
1818 sockevent_setsockopt(sockid_t id, int level, int name,
1819 const struct sockdriver_data * data, socklen_t len)
1821 struct sock *sock;
1822 struct linger linger;
1823 struct timeval tv;
1824 clock_t secs, ticks;
1825 int r, val;
1827 if ((sock = sockhash_get(id)) == NULL)
1828 return EINVAL;
1830 if (level == SOL_SOCKET) {
1832 * Handle a subset of the socket-level options here. For most
1833 * of them, this means that the socket driver itself need not
1834 * handle changing or returning the options, but still needs to
1835 * implement the correct behavior based on them where needed.
1836 * A few of them are handled exclusively in this library:
1837 * SO_ACCEPTCONN, SO_NOSIGPIPE, SO_ERROR, SO_TYPE, SO_LINGER,
1838 * SO_SNDLOWAT, SO_RCVLOWAT, SO_SNDTIMEO, and SO_RCVTIMEO.
1839 * The SO_USELOOPBACK option is explicitly absent, as it is
1840 * valid for routing sockets only and is set by default there.
1842 switch (name) {
1843 case SO_DEBUG:
1844 case SO_REUSEADDR:
1845 case SO_KEEPALIVE:
1846 case SO_DONTROUTE:
1847 case SO_BROADCAST:
1848 case SO_OOBINLINE:
1849 case SO_REUSEPORT:
1850 case SO_NOSIGPIPE:
1851 case SO_TIMESTAMP:
1853 * Simple on-off options. Changing them does not
1854 * involve the socket driver.
1856 if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
1857 len)) != OK)
1858 return r;
1860 if (val)
1861 sock->sock_opt |= (unsigned int)name;
1862 else
1863 sock->sock_opt &= ~(unsigned int)name;
1866 * In priciple these on-off options are maintained in
1867 * this library, but some socket drivers may need to
1868 * apply the options elsewhere, so we notify them that
1869 * something has changed. Using the sop_setsockopt
1870 * callback would be inconvenient for this for two
1871 * reasons: multiple value copy-ins and default errors.
1873 if (sock->sock_ops->sop_setsockmask != NULL)
1874 sock->sock_ops->sop_setsockmask(sock,
1875 sock->sock_opt);
1878 * The inlining of OOB data may make new data available
1879 * through regular receive calls. Thus, see if we can
1880 * wake up any suspended receive calls now.
1882 if (name == SO_OOBINLINE && val)
1883 sockevent_raise(sock, SEV_RECV);
1885 return OK;
1887 case SO_LINGER:
1888 /* The only on-off option with an associated value. */
1889 if ((r = sockdriver_copyin_opt(data, &linger,
1890 sizeof(linger), len)) != OK)
1891 return r;
1893 if (linger.l_onoff) {
1894 if (linger.l_linger < 0)
1895 return EINVAL;
1896 /* EDOM is the closest applicable error.. */
1897 secs = (clock_t)linger.l_linger;
1898 if (secs >= TMRDIFF_MAX / sys_hz())
1899 return EDOM;
1901 sock->sock_opt |= SO_LINGER;
1902 sock->sock_linger = secs * sys_hz();
1903 } else {
1904 sock->sock_opt &= ~SO_LINGER;
1905 sock->sock_linger = 0;
1908 return OK;
1910 case SO_SNDLOWAT:
1911 case SO_RCVLOWAT:
1912 if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
1913 len)) != OK)
1914 return r;
1916 if (val <= 0)
1917 return EINVAL;
1920 * Setting these values may allow suspended operations
1921 * (send, recv, select) to be resumed, so recheck.
1923 if (name == SO_SNDLOWAT) {
1924 sock->sock_slowat = (size_t)val;
1926 sockevent_raise(sock, SEV_SEND);
1927 } else {
1928 sock->sock_rlowat = (size_t)val;
1930 sockevent_raise(sock, SEV_RECV);
1933 return OK;
1935 case SO_SNDTIMEO:
1936 case SO_RCVTIMEO:
1937 if ((r = sockdriver_copyin_opt(data, &tv, sizeof(tv),
1938 len)) != OK)
1939 return r;
1941 if (tv.tv_sec < 0 || tv.tv_usec < 0 ||
1942 (unsigned long)tv.tv_usec >= US)
1943 return EINVAL;
1944 if (tv.tv_sec >= TMRDIFF_MAX / sys_hz())
1945 return EDOM;
1947 ticks = tv.tv_sec * sys_hz() +
1948 (tv.tv_usec * sys_hz() + US - 1) / US;
1950 if (name == SO_SNDTIMEO)
1951 sock->sock_stimeo = ticks;
1952 else
1953 sock->sock_rtimeo = ticks;
1956 * The timeouts for any calls already in progress for
1957 * this socket are left as is.
1959 return OK;
1961 case SO_ACCEPTCONN:
1962 case SO_ERROR:
1963 case SO_TYPE:
1964 /* These options may be retrieved but not set. */
1965 return ENOPROTOOPT;
1967 default:
1969 * The remaining options either cannot be handled in a
1970 * generic way, or are not recognized altogether. Pass
1971 * them to the socket driver, which should handle what
1972 * it knows and reject the rest.
1974 break;
1978 if (sock->sock_ops->sop_setsockopt == NULL)
1979 return ENOPROTOOPT;
1982 * The socket driver must return ENOPROTOOPT for all options it does
1983 * not recognize.
1985 return sock->sock_ops->sop_setsockopt(sock, level, name, data, len);
1989 * Retrieve socket options.
1991 static int
1992 sockevent_getsockopt(sockid_t id, int level, int name,
1993 const struct sockdriver_data * __restrict data,
1994 socklen_t * __restrict len)
1996 struct sock *sock;
1997 struct linger linger;
1998 struct timeval tv;
1999 clock_t ticks;
2000 int val;
2002 if ((sock = sockhash_get(id)) == NULL)
2003 return EINVAL;
2005 if (level == SOL_SOCKET) {
2007 * As with setting, handle a subset of the socket-level options
2008 * here. The rest is to be taken care of by the socket driver.
2010 switch (name) {
2011 case SO_DEBUG:
2012 case SO_ACCEPTCONN:
2013 case SO_REUSEADDR:
2014 case SO_KEEPALIVE:
2015 case SO_DONTROUTE:
2016 case SO_BROADCAST:
2017 case SO_OOBINLINE:
2018 case SO_REUSEPORT:
2019 case SO_NOSIGPIPE:
2020 case SO_TIMESTAMP:
2021 val = !!(sock->sock_opt & (unsigned int)name);
2023 return sockdriver_copyout_opt(data, &val, sizeof(val),
2024 len);
2026 case SO_LINGER:
2027 linger.l_onoff = !!(sock->sock_opt & SO_LINGER);
2028 linger.l_linger = sock->sock_linger / sys_hz();
2030 return sockdriver_copyout_opt(data, &linger,
2031 sizeof(linger), len);
2033 case SO_ERROR:
2034 if ((val = -sock->sock_err) != OK)
2035 sock->sock_err = OK;
2037 return sockdriver_copyout_opt(data, &val, sizeof(val),
2038 len);
2040 case SO_TYPE:
2041 val = sock->sock_type;
2043 return sockdriver_copyout_opt(data, &val, sizeof(val),
2044 len);
2046 case SO_SNDLOWAT:
2047 val = (int)sock->sock_slowat;
2049 return sockdriver_copyout_opt(data, &val, sizeof(val),
2050 len);
2052 case SO_RCVLOWAT:
2053 val = (int)sock->sock_rlowat;
2055 return sockdriver_copyout_opt(data, &val, sizeof(val),
2056 len);
2058 case SO_SNDTIMEO:
2059 case SO_RCVTIMEO:
2060 if (name == SO_SNDTIMEO)
2061 ticks = sock->sock_stimeo;
2062 else
2063 ticks = sock->sock_rtimeo;
2065 tv.tv_sec = ticks / sys_hz();
2066 tv.tv_usec = (ticks % sys_hz()) * US / sys_hz();
2068 return sockdriver_copyout_opt(data, &tv, sizeof(tv),
2069 len);
2071 default:
2072 break;
2076 if (sock->sock_ops->sop_getsockopt == NULL)
2077 return ENOPROTOOPT;
2080 * The socket driver must return ENOPROTOOPT for all options it does
2081 * not recognize.
2083 return sock->sock_ops->sop_getsockopt(sock, level, name, data, len);
2087 * Retrieve a socket's local address.
2089 static int
2090 sockevent_getsockname(sockid_t id, struct sockaddr * __restrict addr,
2091 socklen_t * __restrict addr_len)
2093 struct sock *sock;
2095 if ((sock = sockhash_get(id)) == NULL)
2096 return EINVAL;
2098 if (sock->sock_ops->sop_getsockname == NULL)
2099 return EOPNOTSUPP;
2101 return sock->sock_ops->sop_getsockname(sock, addr, addr_len);
2105 * Retrieve a socket's remote address.
2107 static int
2108 sockevent_getpeername(sockid_t id, struct sockaddr * __restrict addr,
2109 socklen_t * __restrict addr_len)
2111 struct sock *sock;
2113 if ((sock = sockhash_get(id)) == NULL)
2114 return EINVAL;
2116 /* Listening-mode sockets cannot possibly have a peer address. */
2117 if (sock->sock_opt & SO_ACCEPTCONN)
2118 return ENOTCONN;
2120 if (sock->sock_ops->sop_getpeername == NULL)
2121 return EOPNOTSUPP;
2123 return sock->sock_ops->sop_getpeername(sock, addr, addr_len);
2127 * Mark the socket object as shut down for sending and/or receiving. The flags
2128 * parameter may be a bitwise-OR'ed combination of SFL_SHUT_RD and SFL_SHUT_WR.
2129 * This function will wake up any suspended requests affected by this change,
2130 * but it will not invoke the sop_shutdown() callback function on the socket.
2131 * The function may in fact be called from sop_shutdown() before completion to
2132 * mark the socket as shut down as reflected by sockevent_is_shutdown().
2134 void
2135 sockevent_set_shutdown(struct sock * sock, unsigned int flags)
2137 unsigned int mask;
2139 assert(sock->sock_ops != NULL);
2140 assert(!(flags & ~(SFL_SHUT_RD | SFL_SHUT_WR)));
2142 /* Look at the newly set flags only. */
2143 flags &= ~(unsigned int)sock->sock_flags;
2145 if (flags != 0) {
2146 sock->sock_flags |= flags;
2149 * Wake up any blocked calls that are affected by the shutdown.
2150 * Shutting down listening sockets causes ongoing accept calls
2151 * to be rechecked.
2153 mask = 0;
2154 if (flags & SFL_SHUT_RD)
2155 mask |= SEV_RECV;
2156 if (flags & SFL_SHUT_WR)
2157 mask |= SEV_SEND;
2158 if (sock->sock_opt & SO_ACCEPTCONN)
2159 mask |= SEV_ACCEPT;
2161 assert(mask != 0);
2162 sockevent_raise(sock, mask);
2167 * Shut down socket send and receive operations.
2169 static int
2170 sockevent_shutdown(sockid_t id, int how)
2172 struct sock *sock;
2173 unsigned int flags;
2174 int r;
2176 if ((sock = sockhash_get(id)) == NULL)
2177 return EINVAL;
2179 /* Convert the request to a set of flags. */
2180 flags = 0;
2181 if (how == SHUT_RD || how == SHUT_RDWR)
2182 flags |= SFL_SHUT_RD;
2183 if (how == SHUT_WR || how == SHUT_RDWR)
2184 flags |= SFL_SHUT_WR;
2186 if (sock->sock_ops->sop_shutdown != NULL)
2187 r = sock->sock_ops->sop_shutdown(sock, flags);
2188 else
2189 r = OK;
2191 /* On success, update our internal state as well. */
2192 if (r == OK)
2193 sockevent_set_shutdown(sock, flags);
2195 return r;
2199 * Close a socket.
2201 static int
2202 sockevent_close(sockid_t id, const struct sockdriver_call * call)
2204 struct sock *sock;
2205 int r, force;
2207 if ((sock = sockhash_get(id)) == NULL)
2208 return EINVAL;
2210 assert(sock->sock_proc == NULL);
2211 sock->sock_select.ss_endpt = NONE;
2214 * There are several scenarios when it comes to closing sockets. First
2215 * of all, we never actually force the socket driver to close a socket.
2216 * The driver may always suspend the close call and take as long as it
2217 * wants. After a suspension, it signals its completion of the close
2218 * through the SEV_CLOSE socket event.
2220 * With that said, we offer two levels of urgency regarding the close
2221 * request: regular and forced. The former allows for a graceful
2222 * close; the latter urges the socket driver to close the socket as
2223 * soon as possible. A socket that has been requested to be closed
2224 * gracefully can, as long as it is still open (i.e., no SEV_CLOSE was
2225 * fired yet), later be requested to be closed forcefully. This is how
2226 * SO_LINGER with a nonzero timeout is implemented. If SO_LINGER is
2227 * set with a zero timeout, the socket is force-closed immediately.
2228 * Finally, if SO_LINGER is not set, the socket will be closed normally
2229 * and never be forced--akin to SO_LINGER with an infinite timeout.
2231 * The return value of the caller's close(2) may only ever be either
2232 * OK or EINPROGRESS, to ensure that the caller knows that the file
2233 * descriptor is freed up, as per Austin Group Defect #529. In fact,
2234 * EINPROGRESS is to be returned only on signal interruption (i.e.,
2235 * cancel). For that reason, this function only ever returns OK.
2237 force = ((sock->sock_opt & SO_LINGER) && sock->sock_linger == 0);
2239 if (sock->sock_ops->sop_close != NULL)
2240 r = sock->sock_ops->sop_close(sock, force);
2241 else
2242 r = OK;
2244 assert(r == OK || r == SUSPEND);
2246 if (r == SUSPEND) {
2247 sock->sock_flags |= SFL_CLOSING;
2250 * If we were requested to force-close the socket immediately,
2251 * but the socket driver needs more time anyway, then tell the
2252 * caller that the socket was closed right away.
2254 if (force)
2255 return OK;
2258 * If we are to force-close the socket only after a specific
2259 * linger timeout, set the timer for that now, even if the call
2260 * is non-blocking. This also means that we cannot associate
2261 * the linger timeout with the close call. Instead, we convert
2262 * the sock_linger value from a (relative) duration to an
2263 * (absolute) timeout time, and use the SFL_CLOSING flag (along
2264 * with SFL_TIMER) to tell the difference. Since the socket is
2265 * otherwise unreachable from userland at this point, the
2266 * conversion is never visible in any way.
2268 * The socket may already be in the timers list, so we must
2269 * always check the SO_LINGER flag before checking sock_linger.
2271 * If SO_LINGER is not set, we must never suspend the call.
2273 if (sock->sock_opt & SO_LINGER) {
2274 sock->sock_linger =
2275 socktimer_add(sock, sock->sock_linger);
2276 } else
2277 call = NULL;
2280 * A non-blocking close is completed asynchronously. The
2281 * caller is not told about this with EWOULDBLOCK as usual, for
2282 * the reasons mentioned above.
2284 if (call != NULL)
2285 sockevent_suspend(sock, SEV_CLOSE, call, NONE);
2286 else
2287 r = OK;
2288 } else if (r == OK)
2289 sockevent_free(sock);
2291 return r;
2295 * Cancel a suspended send request.
2297 static void
2298 sockevent_cancel_send(struct sock * sock, struct sockevent_proc * spr, int err)
2300 int r;
2303 * If any regular or control data were sent, return the number of data
2304 * bytes sent--possibly zero. Otherwise return the given error code.
2306 if (spr->spr_dataoff > 0 || spr->spr_ctloff > 0)
2307 r = (int)spr->spr_dataoff;
2308 else
2309 r = err;
2311 sockdriver_reply_generic(&spr->spr_call, r);
2314 * In extremely rare circumstances, one send may be queued behind
2315 * another send even though the former can actually be sent on the
2316 * socket right away. For this reason, we retry sending when canceling
2317 * a send. We need to do this only when the first send in the queue
2318 * was canceled, but multiple blocked sends on a single socket should
2319 * be rare anyway.
2321 sockevent_raise(sock, SEV_SEND);
2325 * Cancel a suspended receive request.
2327 static void
2328 sockevent_cancel_recv(struct sock * sock, struct sockevent_proc * spr, int err)
2330 int r;
2333 * If any regular or control data were received, return the number of
2334 * data bytes received--possibly zero. Otherwise return the given
2335 * error code.
2337 if (spr->spr_dataoff > 0 || spr->spr_ctloff > 0)
2338 r = (int)spr->spr_dataoff;
2339 else
2340 r = err;
2343 * Also return any flags set for the data received so far, e.g.
2344 * MSG_CTRUNC. Do not return an address: receive calls on unconnected
2345 * sockets must never block after receiving some data--instead, they
2346 * are supposed to return MSG_TRUNC if not all data were copied out.
2348 sockdriver_reply_recv(&spr->spr_call, r, spr->spr_ctloff, NULL, 0,
2349 spr->spr_rflags);
2352 * The same story as for sends (see above) applies to receives,
2353 * although this case should be even more rare in practice.
2355 sockevent_raise(sock, SEV_RECV);
2359 * Cancel a previous request that may currently be suspended. The cancel
2360 * operation itself does not have a reply. Instead, if the given request was
2361 * found to be suspended, that request must be aborted and an appropriate reply
2362 * must be sent for the request. If no matching request was found, no reply
2363 * must be sent at all.
2365 static void
2366 sockevent_cancel(sockid_t id, const struct sockdriver_call * call)
2368 struct sockevent_proc *spr;
2369 struct sock *sock;
2372 * Due to asynchronous close(2) operations, not even the sock object
2373 * may be found. If this (entirely legitimate) case, do not send any
2374 * reply.
2376 if ((sock = sockhash_get(id)) == NULL)
2377 return;
2380 * The request may already have completed by the time we receive the
2381 * cancel request, in which case we can not find it. In this (entirely
2382 * legitimate) case, do not send any reply.
2384 if ((spr = sockevent_unsuspend(sock, call)) == NULL)
2385 return;
2388 * We found the operation. Cancel it according to its call type.
2389 * Then, once fully done with it, free the suspension data structure.
2391 * Note that we have to use the call structure from the suspension data
2392 * structure rather than the given 'call' pointer: only the former
2393 * includes all the information necessary to resume the request!
2395 switch (spr->spr_event) {
2396 case SEV_BIND:
2397 case SEV_CONNECT:
2398 assert(spr->spr_call.sc_endpt != NONE);
2400 sockdriver_reply_generic(&spr->spr_call, EINTR);
2402 break;
2404 case SEV_ACCEPT:
2405 sockdriver_reply_accept(&spr->spr_call, EINTR, NULL, 0);
2407 break;
2409 case SEV_SEND:
2410 sockevent_cancel_send(sock, spr, EINTR);
2412 break;
2414 case SEV_RECV:
2415 sockevent_cancel_recv(sock, spr, EINTR);
2417 break;
2419 case SEV_CLOSE:
2421 * Return EINPROGRESS rather than EINTR, so that the user
2422 * process can tell from the close(2) result that the file
2423 * descriptor has in fact been closed.
2425 sockdriver_reply_generic(&spr->spr_call, EINPROGRESS);
2428 * Do not free the sock object here: the socket driver will
2429 * complete the close in the background, and fire SEV_CLOSE
2430 * once it is done. Only then is the sock object freed.
2432 break;
2434 default:
2435 panic("libsockevent: process suspended on unknown event 0x%x",
2436 spr->spr_event);
2439 sockevent_proc_free(spr);
2443 * Process a select request.
2445 static int
2446 sockevent_select(sockid_t id, unsigned int ops,
2447 const struct sockdriver_select * sel)
2449 struct sock *sock;
2450 unsigned int r, notify;
2452 if ((sock = sockhash_get(id)) == NULL)
2453 return EINVAL;
2455 notify = (ops & SDEV_NOTIFY);
2456 ops &= (SDEV_OP_RD | SDEV_OP_WR | SDEV_OP_ERR);
2459 * See if any of the requested select operations can be satisfied
2460 * immediately.
2462 r = sockevent_test_select(sock, ops);
2465 * If select operations were pending, the new results must not indicate
2466 * that any of those were satisfied, as that would indicate an internal
2467 * logic error: the socket driver is supposed to update its state
2468 * proactively, and thus, discovering that things have changed here is
2469 * not something that should ever happen.
2471 assert(!(sock->sock_selops & r));
2474 * If any select operations are not satisfied immediately, and we are
2475 * asked to notify the caller when they are satisfied later, save them
2476 * for later retesting.
2478 ops &= ~r;
2480 if (notify && ops != 0) {
2482 * For now, we support only one caller when it comes to select
2483 * queries: VFS. If we want to support a networked file system
2484 * (or so) directly calling select as well, this library will
2485 * have to be extended accordingly (should not be too hard).
2487 if (sock->sock_select.ss_endpt != NONE) {
2488 if (sock->sock_select.ss_endpt != sel->ss_endpt) {
2489 printf("libsockevent: no support for multiple "
2490 "select callers yet\n");
2492 return EIO;
2496 * If a select query was already pending for this
2497 * caller, we must simply merge in the new operations.
2499 sock->sock_selops |= ops;
2500 } else {
2501 assert(sel->ss_endpt != NONE);
2503 sock->sock_select = *sel;
2504 sock->sock_selops = ops;
2508 return r;
2512 * An alarm has triggered. Expire any timers. Socket drivers that do not pass
2513 * clock notification messages to libsockevent must call expire_timers(3)
2514 * themselves instead.
2516 static void
2517 sockevent_alarm(clock_t now)
2520 expire_timers(now);
2523 static const struct sockdriver sockevent_tab = {
2524 .sdr_socket = sockevent_socket,
2525 .sdr_socketpair = sockevent_socketpair,
2526 .sdr_bind = sockevent_bind,
2527 .sdr_connect = sockevent_connect,
2528 .sdr_listen = sockevent_listen,
2529 .sdr_accept = sockevent_accept,
2530 .sdr_send = sockevent_send,
2531 .sdr_recv = sockevent_recv,
2532 .sdr_ioctl = sockevent_ioctl,
2533 .sdr_setsockopt = sockevent_setsockopt,
2534 .sdr_getsockopt = sockevent_getsockopt,
2535 .sdr_getsockname = sockevent_getsockname,
2536 .sdr_getpeername = sockevent_getpeername,
2537 .sdr_shutdown = sockevent_shutdown,
2538 .sdr_close = sockevent_close,
2539 .sdr_cancel = sockevent_cancel,
2540 .sdr_select = sockevent_select,
2541 .sdr_alarm = sockevent_alarm
2545 * Initialize the socket event library.
2547 void
2548 sockevent_init(sockevent_socket_cb_t socket_cb)
2551 sockhash_init();
2553 socktimer_init();
2555 sockevent_proc_init();
2557 SIMPLEQ_INIT(&sockevent_pending);
2559 assert(socket_cb != NULL);
2560 sockevent_socket_cb = socket_cb;
2562 /* Announce we are up. */
2563 sockdriver_announce();
2565 sockevent_working = FALSE;
2569 * Process a socket driver request message.
2571 void
2572 sockevent_process(const message * m_ptr, int ipc_status)
2575 /* Block events until after we have processed the request. */
2576 assert(!sockevent_working);
2577 sockevent_working = TRUE;
2579 /* Actually process the request. */
2580 sockdriver_process(&sockevent_tab, m_ptr, ipc_status);
2583 * If any events were fired while processing the request, they will
2584 * have been queued for later. Go through them now.
2586 if (sockevent_has_events())
2587 sockevent_pump();
2589 sockevent_working = FALSE;