2 * This file implements the lower socket layer of VFS: communication with
3 * socket drivers. Socket driver communication evolved out of character driver
4 * communication, and the two have many similarities between them. Most
5 * importantly, socket driver communication also has the distinction between
6 * short-lived and long-lived requests.
8 * Short-lived requests are expected to be replied to by the socket driver
9 * immediately in all cases. For such requests, VFS keeps the worker thread
10 * for the calling process alive until the reply arrives. In contrast,
11 * long-lived requests may block. For such requests, VFS suspends the calling
12 * process until a reply comes in, or until a signal interrupts the request.
13 * Both short-lived and long-lived requests may be aborted if VFS finds that
14 * the corresponding socket driver has died. Even though long-lived requests
15 * may be marked as nonblocking, nonblocking calls are still handled as
16 * long-lived in terms of VFS processing.
18 * For an overview of the socket driver requests and replies, message layouts,
19 * and which requests are long-lived or short-lived (i.e. may suspend or not),
20 * please refer to the corresponding table in the libsockdriver source code.
22 * For most long-lived socket requests, the main VFS thread processes the reply
23 * from the socket driver. This typically consists of waking up the user
24 * process that originally issued the system call on the socket by simply
25 * relaying the call's result code. Some socket calls require a specific reply
26 * message and/or additional post-call actions; for those, resume_*() calls are
27 * made back into the upper socket layer.
29 * If a process is interrupted by a signal, any ongoing long-lived socket
30 * request must be canceled. This is done by sending a one-way cancel request
31 * to the socket driver, and waiting for it to reply to the original request.
32 * In this case, the reply will be processed from the worker thread that is
33 * handling the cancel operation. Canceling does not imply call failure: the
34 * cancellation may result in a partial I/O reply, and a successful reply may
35 * cross the cancel request.
37 * One main exception is the reply to an accept request. Once a connection has
38 * been accepted, a new socket has to be created for it. This requires actions
39 * that require the ability to block the current thread, and so, a worker
40 * thread is spawned for processing successful accept replies, unless the reply
41 * was received from a worker thread already (as may be the case if the accept
42 * request was being canceled).
46 #include <sys/socket.h>
47 #include <minix/callnr.h>
50 * Send a short-lived request message to the given socket driver, and suspend
51 * the current worker thread until a reply message has been received. On
52 * success, the function will return OK, and the reply message will be stored
53 * in the message structure pointed to by 'm_ptr'. The function may fail if
54 * the socket driver dies before sending a reply. In that case, the function
55 * will return a negative error code, and also store the same negative error
56 * code in the m_type field of the 'm_ptr' message structure.
59 sdev_sendrec(struct smap
* sp
, message
* m_ptr
)
63 /* Send the request to the driver. */
64 if ((r
= asynsend3(sp
->smap_endpt
, m_ptr
, AMF_NOREPLY
)) != OK
)
65 panic("VFS: asynsend in sdev_sendrec failed: %d", r
);
67 /* Suspend this thread until we have received the response. */
68 self
->w_task
= sp
->smap_endpt
;
69 self
->w_drv_sendrec
= m_ptr
;
74 assert(self
->w_drv_sendrec
== NULL
);
76 return (!IS_SDEV_RS(m_ptr
->m_type
)) ? m_ptr
->m_type
: OK
;
80 * Suspend the current process for later completion of its system call.
83 sdev_suspend(dev_t dev
, cp_grant_id_t grant0
, cp_grant_id_t grant1
,
84 cp_grant_id_t grant2
, int fd
, vir_bytes buf
)
87 fp
->fp_sdev
.dev
= dev
;
88 fp
->fp_sdev
.callnr
= job_call_nr
;
89 fp
->fp_sdev
.grant
[0] = grant0
;
90 fp
->fp_sdev
.grant
[1] = grant1
;
91 fp
->fp_sdev
.grant
[2] = grant2
;
93 if (job_call_nr
== VFS_ACCEPT
) {
96 fp
->fp_sdev
.aux
.fd
= fd
;
97 } else if (job_call_nr
== VFS_RECVMSG
) {
100 * TODO: we are not yet consistent enough in dealing with
101 * mapped NULL pages to have an assert(buf != 0) here..
103 fp
->fp_sdev
.aux
.buf
= buf
;
109 suspend(FP_BLOCKED_ON_SDEV
);
114 * Create a socket or socket pair. Return OK on success, with the new socket
115 * device identifier(s) stored in the 'dev' array. Return an error code upon
119 sdev_socket(int domain
, int type
, int protocol
, dev_t
* dev
, int pair
)
123 sockid_t sock_id
, sock_id2
;
126 /* We could return EAFNOSUPPORT, but the caller should have checked. */
127 if ((sp
= get_smap_by_domain(domain
)) == NULL
)
128 panic("VFS: sdev_socket for unknown domain");
130 /* Prepare the request message. */
131 memset(&m
, 0, sizeof(m
));
132 m
.m_type
= pair
? SDEV_SOCKETPAIR
: SDEV_SOCKET
;
133 m
.m_vfs_lsockdriver_socket
.req_id
= (sockid_t
)who_e
;
134 m
.m_vfs_lsockdriver_socket
.domain
= domain
;
135 m
.m_vfs_lsockdriver_socket
.type
= type
;
136 m
.m_vfs_lsockdriver_socket
.protocol
= protocol
;
137 m
.m_vfs_lsockdriver_socket
.user_endpt
= who_e
;
139 /* Send the request, and wait for the reply. */
140 if ((r
= sdev_sendrec(sp
, &m
)) != OK
)
141 return r
; /* socket driver died */
143 /* Parse the reply message, and check for protocol errors. */
144 if (m
.m_type
!= SDEV_SOCKET_REPLY
) {
145 printf("VFS: %d sent bad reply type %d for call %d\n",
146 sp
->smap_endpt
, m
.m_type
, job_call_nr
);
150 sock_id
= m
.m_lsockdriver_vfs_socket_reply
.sock_id
;
151 sock_id2
= m
.m_lsockdriver_vfs_socket_reply
.sock_id2
;
153 /* Check for regular errors. Upon success, return the socket(s). */
157 dev
[0] = make_smap_dev(sp
, sock_id
);
160 /* Okay, one more protocol error. */
162 printf("VFS: %d sent bad SOCKETPAIR socket ID %d\n",
163 sp
->smap_endpt
, sock_id2
);
164 (void)sdev_close(dev
[0], FALSE
/*may_suspend*/);
168 dev
[1] = make_smap_dev(sp
, sock_id2
);
175 * Bind or connect a socket to a particular address. These calls may block, so
176 * suspend the current process instead of making the thread wait for the reply.
179 sdev_bindconn(dev_t dev
, int type
, vir_bytes addr
, unsigned int addr_len
,
188 if ((sp
= get_smap_by_dev(dev
, &sock_id
)) == NULL
)
191 /* Allocate resources. */
192 grant
= cpf_grant_magic(sp
->smap_endpt
, who_e
, addr
, addr_len
,
194 if (!GRANT_VALID(grant
))
195 panic("VFS: cpf_grant_magic failed");
197 /* Prepare the request message. */
198 memset(&m
, 0, sizeof(m
));
200 m
.m_vfs_lsockdriver_addr
.req_id
= (sockid_t
)who_e
;
201 m
.m_vfs_lsockdriver_addr
.sock_id
= sock_id
;
202 m
.m_vfs_lsockdriver_addr
.grant
= grant
;
203 m
.m_vfs_lsockdriver_addr
.len
= addr_len
;
204 m
.m_vfs_lsockdriver_addr
.user_endpt
= who_e
;
205 m
.m_vfs_lsockdriver_addr
.sflags
=
206 (filp_flags
& O_NONBLOCK
) ? SDEV_NONBLOCK
: 0;
208 /* Send the request to the driver. */
209 if ((r
= asynsend3(sp
->smap_endpt
, &m
, AMF_NOREPLY
)) != OK
)
210 panic("VFS: asynsend in sdev_bindconn failed: %d", r
);
212 /* Suspend the process until the reply arrives. */
213 return sdev_suspend(dev
, grant
, GRANT_INVALID
, GRANT_INVALID
, -1, 0);
217 * Bind a socket to a local address.
220 sdev_bind(dev_t dev
, vir_bytes addr
, unsigned int addr_len
, int filp_flags
)
223 return sdev_bindconn(dev
, SDEV_BIND
, addr
, addr_len
, filp_flags
);
227 * Connect a socket to a remote address.
230 sdev_connect(dev_t dev
, vir_bytes addr
, unsigned int addr_len
, int filp_flags
)
233 return sdev_bindconn(dev
, SDEV_CONNECT
, addr
, addr_len
, filp_flags
);
237 * Send and receive a "simple" request: listen, shutdown, or close. Note that
238 * while cancel requests use the same request format, they require a different
239 * way of handling their replies.
242 sdev_simple(dev_t dev
, int type
, int param
)
249 assert(type
== SDEV_LISTEN
|| type
== SDEV_SHUTDOWN
||
252 if ((sp
= get_smap_by_dev(dev
, &sock_id
)) == NULL
)
255 /* Prepare the request message. */
256 memset(&m
, 0, sizeof(m
));
258 m
.m_vfs_lsockdriver_simple
.req_id
= (sockid_t
)who_e
;
259 m
.m_vfs_lsockdriver_simple
.sock_id
= sock_id
;
260 m
.m_vfs_lsockdriver_simple
.param
= param
;
262 /* Send the request, and wait for the reply. */
263 if ((r
= sdev_sendrec(sp
, &m
)) != OK
)
264 return r
; /* socket driver died */
266 /* Parse and return the reply. */
267 if (m
.m_type
!= SDEV_REPLY
) {
268 printf("VFS: %d sent bad reply type %d for call %d\n",
269 sp
->smap_endpt
, m
.m_type
, job_call_nr
);
273 return m
.m_lsockdriver_vfs_reply
.status
;
277 * Put a socket in listening mode.
280 sdev_listen(dev_t dev
, int backlog
)
283 assert(backlog
>= 0);
285 return sdev_simple(dev
, SDEV_LISTEN
, backlog
);
289 * Accept a new connection on a socket.
292 sdev_accept(dev_t dev
, vir_bytes addr
, unsigned int addr_len
, int filp_flags
,
301 if ((sp
= get_smap_by_dev(dev
, &sock_id
)) == NULL
)
304 /* Allocate resources. */
306 grant
= cpf_grant_magic(sp
->smap_endpt
, who_e
, addr
, addr_len
,
308 if (!GRANT_VALID(grant
))
309 panic("VFS: cpf_grant_magic failed");
311 grant
= GRANT_INVALID
;
313 /* Prepare the request message. */
314 memset(&m
, 0, sizeof(m
));
315 m
.m_type
= SDEV_ACCEPT
;
316 m
.m_vfs_lsockdriver_addr
.req_id
= (sockid_t
)who_e
;
317 m
.m_vfs_lsockdriver_addr
.sock_id
= sock_id
;
318 m
.m_vfs_lsockdriver_addr
.grant
= grant
;
319 m
.m_vfs_lsockdriver_addr
.len
= addr_len
;
320 m
.m_vfs_lsockdriver_addr
.user_endpt
= who_e
;
321 m
.m_vfs_lsockdriver_addr
.sflags
=
322 (filp_flags
& O_NONBLOCK
) ? SDEV_NONBLOCK
: 0;
324 /* Send the request to the driver. */
325 if ((r
= asynsend3(sp
->smap_endpt
, &m
, AMF_NOREPLY
)) != OK
)
326 panic("VFS: asynsend in sdev_accept failed: %d", r
);
328 /* Suspend the process until the reply arrives. */
329 return sdev_suspend(dev
, grant
, GRANT_INVALID
, GRANT_INVALID
,
334 * Send or receive a message on a socket. All read (read(2), recvfrom(2), and
335 * recvmsg(2)) and write (write(2), sendto(2), sendmsg(2)) system calls on
336 * sockets pass through this function. The function is named sdev_readwrite
337 * rather than sdev_sendrecv to avoid confusion with sdev_sendrec.
340 sdev_readwrite(dev_t dev
, vir_bytes data_buf
, size_t data_len
,
341 vir_bytes ctl_buf
, unsigned int ctl_len
, vir_bytes addr_buf
,
342 unsigned int addr_len
, int flags
, int rw_flag
, int filp_flags
,
347 cp_grant_id_t data_grant
, ctl_grant
, addr_grant
;
351 if ((sp
= get_smap_by_dev(dev
, &sock_id
)) == NULL
)
354 /* Allocate resources. */
355 data_grant
= GRANT_INVALID
;
356 ctl_grant
= GRANT_INVALID
;
357 addr_grant
= GRANT_INVALID
;
358 bits
= (rw_flag
== WRITING
) ? CPF_READ
: CPF_WRITE
;
361 * Supposedly it is allowed to send or receive zero data bytes, even
362 * though it is a bad idea as the return value will then be zero, which
363 * may also indicate EOF (as per W. Richard Stevens).
366 data_grant
= cpf_grant_magic(sp
->smap_endpt
, who_e
, data_buf
,
368 if (!GRANT_VALID(data_grant
))
369 panic("VFS: cpf_grant_magic failed");
373 ctl_grant
= cpf_grant_magic(sp
->smap_endpt
, who_e
, ctl_buf
,
375 if (!GRANT_VALID(ctl_grant
))
376 panic("VFS: cpf_grant_magic failed");
380 addr_grant
= cpf_grant_magic(sp
->smap_endpt
, who_e
, addr_buf
,
382 if (!GRANT_VALID(addr_grant
))
383 panic("VFS: cpf_grant_magic failed");
386 /* Prepare the request message. */
387 memset(&m
, 0, sizeof(m
));
388 m
.m_type
= (rw_flag
== WRITING
) ? SDEV_SEND
: SDEV_RECV
;
389 m
.m_vfs_lsockdriver_sendrecv
.req_id
= (sockid_t
)who_e
;
390 m
.m_vfs_lsockdriver_sendrecv
.sock_id
= sock_id
;
391 m
.m_vfs_lsockdriver_sendrecv
.data_grant
= data_grant
;
392 m
.m_vfs_lsockdriver_sendrecv
.data_len
= data_len
;
393 m
.m_vfs_lsockdriver_sendrecv
.ctl_grant
= ctl_grant
;
394 m
.m_vfs_lsockdriver_sendrecv
.ctl_len
= ctl_len
;
395 m
.m_vfs_lsockdriver_sendrecv
.addr_grant
= addr_grant
;
396 m
.m_vfs_lsockdriver_sendrecv
.addr_len
= addr_len
;
397 m
.m_vfs_lsockdriver_sendrecv
.user_endpt
= who_e
;
398 m
.m_vfs_lsockdriver_sendrecv
.flags
= flags
;
399 if (filp_flags
& O_NONBLOCK
)
400 m
.m_vfs_lsockdriver_sendrecv
.flags
|= MSG_DONTWAIT
;
401 if (rw_flag
== WRITING
&& (filp_flags
& O_NOSIGPIPE
))
402 m
.m_vfs_lsockdriver_sendrecv
.flags
|= MSG_NOSIGNAL
;
404 /* Send the request to the driver. */
405 if ((r
= asynsend3(sp
->smap_endpt
, &m
, AMF_NOREPLY
)) != OK
)
406 panic("VFS: asynsend in sdev_readwrite failed: %d", r
);
408 /* Suspend the process until the reply arrives. */
409 return sdev_suspend(dev
, data_grant
, ctl_grant
, addr_grant
, -1,
414 * Perform I/O control.
417 sdev_ioctl(dev_t dev
, unsigned long request
, vir_bytes buf
, int filp_flags
)
425 if ((sp
= get_smap_by_dev(dev
, &sock_id
)) == NULL
)
428 /* Allocate resources. */
429 grant
= make_ioctl_grant(sp
->smap_endpt
, who_e
, buf
, request
);
431 /* Prepare the request message. */
432 memset(&m
, 0, sizeof(m
));
433 m
.m_type
= SDEV_IOCTL
;
434 m
.m_vfs_lsockdriver_ioctl
.req_id
= (sockid_t
)who_e
;
435 m
.m_vfs_lsockdriver_ioctl
.sock_id
= sock_id
;
436 m
.m_vfs_lsockdriver_ioctl
.request
= request
;
437 m
.m_vfs_lsockdriver_ioctl
.grant
= grant
;
438 m
.m_vfs_lsockdriver_ioctl
.user_endpt
= who_e
;
439 m
.m_vfs_lsockdriver_ioctl
.sflags
=
440 (filp_flags
& O_NONBLOCK
) ? SDEV_NONBLOCK
: 0;
442 /* Send the request to the driver. */
443 if ((r
= asynsend3(sp
->smap_endpt
, &m
, AMF_NOREPLY
)) != OK
)
444 panic("VFS: asynsend in sdev_ioctl failed: %d", r
);
446 /* Suspend the process until the reply arrives. */
447 return sdev_suspend(dev
, grant
, GRANT_INVALID
, GRANT_INVALID
, -1, 0);
451 * Set socket options.
454 sdev_setsockopt(dev_t dev
, int level
, int name
, vir_bytes addr
,
463 if ((sp
= get_smap_by_dev(dev
, &sock_id
)) == NULL
)
466 /* Allocate resources. */
467 grant
= cpf_grant_magic(sp
->smap_endpt
, who_e
, addr
, len
, CPF_READ
);
468 if (!GRANT_VALID(grant
))
469 panic("VFS: cpf_grant_magic failed");
471 /* Prepare the request message. */
472 memset(&m
, 0, sizeof(m
));
473 m
.m_type
= SDEV_SETSOCKOPT
;
474 m
.m_vfs_lsockdriver_getset
.req_id
= (sockid_t
)who_e
;
475 m
.m_vfs_lsockdriver_getset
.sock_id
= sock_id
;
476 m
.m_vfs_lsockdriver_getset
.level
= level
;
477 m
.m_vfs_lsockdriver_getset
.name
= name
;
478 m
.m_vfs_lsockdriver_getset
.grant
= grant
;
479 m
.m_vfs_lsockdriver_getset
.len
= len
;
481 /* Send the request, and wait for the reply. */
482 r
= sdev_sendrec(sp
, &m
);
484 /* Free resources. */
485 (void)cpf_revoke(grant
);
488 return r
; /* socket driver died */
490 /* Parse and return the reply. */
491 if (m
.m_type
!= SDEV_REPLY
) {
492 printf("VFS: %d sent bad reply type %d for call %d\n",
493 sp
->smap_endpt
, m
.m_type
, job_call_nr
);
497 return m
.m_lsockdriver_vfs_reply
.status
;
501 * Send and receive a "get" request: getsockopt, getsockname, or getpeername.
504 sdev_get(dev_t dev
, int type
, int level
, int name
, vir_bytes addr
,
513 assert(type
== SDEV_GETSOCKOPT
|| type
== SDEV_GETSOCKNAME
||
514 type
== SDEV_GETPEERNAME
);
516 if ((sp
= get_smap_by_dev(dev
, &sock_id
)) == NULL
)
519 /* Allocate resources. */
520 grant
= cpf_grant_magic(sp
->smap_endpt
, who_e
, addr
, *len
, CPF_WRITE
);
521 if (!GRANT_VALID(grant
))
522 panic("VFS: cpf_grant_magic failed");
524 /* Prepare the request message. */
525 memset(&m
, 0, sizeof(m
));
527 m
.m_vfs_lsockdriver_getset
.req_id
= (sockid_t
)who_e
;
528 m
.m_vfs_lsockdriver_getset
.sock_id
= sock_id
;
529 m
.m_vfs_lsockdriver_getset
.level
= level
;
530 m
.m_vfs_lsockdriver_getset
.name
= name
;
531 m
.m_vfs_lsockdriver_getset
.grant
= grant
;
532 m
.m_vfs_lsockdriver_getset
.len
= *len
;
534 /* Send the request, and wait for the reply. */
535 r
= sdev_sendrec(sp
, &m
);
537 /* Free resources. */
538 (void)cpf_revoke(grant
);
541 return r
; /* socket driver died */
543 /* Parse and return the reply. */
544 if (m
.m_type
!= SDEV_REPLY
) {
545 printf("VFS: %d sent bad reply type %d for call %d\n",
546 sp
->smap_endpt
, m
.m_type
, job_call_nr
);
550 if ((r
= m
.m_lsockdriver_vfs_reply
.status
) < 0)
553 *len
= (unsigned int)r
;
558 * Get socket options.
561 sdev_getsockopt(dev_t dev
, int level
, int name
, vir_bytes addr
,
565 return sdev_get(dev
, SDEV_GETSOCKOPT
, level
, name
, addr
, len
);
569 * Get the local address of a socket.
572 sdev_getsockname(dev_t dev
, vir_bytes addr
, unsigned int * addr_len
)
575 return sdev_get(dev
, SDEV_GETSOCKNAME
, 0, 0, addr
, addr_len
);
579 * Get the remote address of a socket.
582 sdev_getpeername(dev_t dev
, vir_bytes addr
, unsigned int * addr_len
)
585 return sdev_get(dev
, SDEV_GETPEERNAME
, 0, 0, addr
, addr_len
);
589 * Shut down socket send and receive operations.
592 sdev_shutdown(dev_t dev
, int how
)
595 assert(how
== SHUT_RD
|| how
== SHUT_WR
|| how
== SHUT_RDWR
);
597 return sdev_simple(dev
, SDEV_SHUTDOWN
, how
);
601 * Close the socket identified by the given socket device number.
604 sdev_close(dev_t dev
, int may_suspend
)
612 * Originally, all close requests were blocking the calling thread, but
613 * the new support for SO_LINGER has changed that. In a very strictly
614 * limited subset of cases - namely, the user process calling close(2),
615 * we suspend the close request and handle it asynchronously. In all
616 * other cases, including close-on-exit, close-on-exec, and even dup2,
617 * the close is issued as a thread-synchronous request instead.
620 if ((sp
= get_smap_by_dev(dev
, &sock_id
)) == NULL
)
623 /* Prepare the request message. */
624 memset(&m
, 0, sizeof(m
));
625 m
.m_type
= SDEV_CLOSE
;
626 m
.m_vfs_lsockdriver_simple
.req_id
= (sockid_t
)who_e
;
627 m
.m_vfs_lsockdriver_simple
.sock_id
= sock_id
;
628 m
.m_vfs_lsockdriver_simple
.param
= 0;
630 /* Send the request to the driver. */
631 if ((r
= asynsend3(sp
->smap_endpt
, &m
, AMF_NOREPLY
)) != OK
)
632 panic("VFS: asynsend in sdev_bindconn failed: %d", r
);
634 /* Suspend the process until the reply arrives. */
635 return sdev_suspend(dev
, GRANT_INVALID
, GRANT_INVALID
,
636 GRANT_INVALID
, -1, 0);
638 /* Block the calling thread until the socket is closed. */
639 return sdev_simple(dev
, SDEV_CLOSE
, SDEV_NONBLOCK
);
643 * Initiate a select call on a socket device. Return OK iff the request was
644 * sent, without suspending the process.
647 sdev_select(dev_t dev
, int ops
)
654 if ((sp
= get_smap_by_dev(dev
, &sock_id
)) == NULL
)
657 /* Prepare the request message. */
658 memset(&m
, 0, sizeof(m
));
659 m
.m_type
= SDEV_SELECT
;
660 m
.m_vfs_lsockdriver_select
.sock_id
= sock_id
;
661 m
.m_vfs_lsockdriver_select
.ops
= ops
;
663 /* Send the request to the driver. */
664 if ((r
= asynsend3(sp
->smap_endpt
, &m
, AMF_NOREPLY
)) != OK
)
665 panic("VFS: asynsend in sdev_select failed: %d", r
);
671 * A reply has arrived for a previous socket accept request, and the reply
672 * indicates that a socket has been accepted. A status is also returned;
673 * usually, this status is OK, but if not, the newly accepted socket must be
674 * closed immediately again. Process the low-level aspects of the reply, and
675 * call resume_accept() to let the upper socket layer handle the rest. This
676 * function is always called from a worker thread, and may thus block.
679 sdev_finish_accept(struct fproc
* rfp
, message
* m_ptr
)
687 assert(rfp
->fp_sdev
.callnr
== VFS_ACCEPT
);
688 assert(m_ptr
->m_type
== SDEV_ACCEPT_REPLY
);
689 assert(m_ptr
->m_lsockdriver_vfs_accept_reply
.sock_id
>= 0);
691 /* Free resources. Accept requests use up to one grant. */
692 if (GRANT_VALID(rfp
->fp_sdev
.grant
[0]))
693 cpf_revoke(rfp
->fp_sdev
.grant
[0]);
694 assert(!GRANT_VALID(rfp
->fp_sdev
.grant
[1]));
695 assert(!GRANT_VALID(rfp
->fp_sdev
.grant
[2]));
697 sock_id
= m_ptr
->m_lsockdriver_vfs_accept_reply
.sock_id
;
698 status
= m_ptr
->m_lsockdriver_vfs_accept_reply
.status
;
699 len
= m_ptr
->m_lsockdriver_vfs_accept_reply
.len
;
702 * We do not want the upper socket layer (socket.c) to deal with smap
703 * and socket ID details, so we construct the new socket device number
704 * here. We won't use the saved listen FD to determine the smap entry
705 * here, since that involves file pointers and other upper-layer-only
706 * stuff. So we have to look it up by the source endpoint. As a
707 * result, we detect some driver deaths here (but not all: see below).
709 if ((sp
= get_smap_by_endpt(m_ptr
->m_source
)) != NULL
) {
710 /* Leave 'status' as is, regardless of whether it is OK. */
711 dev
= make_smap_dev(sp
, sock_id
);
714 * The driver must have died while the thread was blocked on
715 * activation. Extremely rare, but theoretically possible.
716 * Some driver deaths are indicated only by a driver-up
717 * announcement though; resume_accept() will detect this by
718 * checking that the listening socket has not been invalidated.
724 /* Let the upper socket layer handle the rest. */
725 resume_accept(rfp
, status
, dev
, len
, rfp
->fp_sdev
.aux
.fd
);
729 * Worker thread stub for finishing successful accept requests.
732 do_accept_reply(void)
735 sdev_finish_accept(fp
, &job_m_in
);
739 * With the exception of successful accept requests, this function is called
740 * whenever a reply is received for a socket driver request for which the
741 * corresponding user process was suspended (as opposed to requests which just
742 * suspend the worker thread), i.e., for long-lasting socket calls. This
743 * function is also called if the socket driver has died during a long-lasting
744 * socket call, in which case the given message's m_type is a negative error
747 * The division between the upper socket layer (socket.c) and the lower socket
748 * layer (this file) here is roughly: if resuming the system call involves no
749 * more than a simple replycode() call, do that here; otherwise call into the
750 * upper socket layer to handle the details. In any case, do not ever let the
751 * upper socket layer deal with reply message parsing or suspension state.
753 * This function may or may not be called from a worker thread; as such, it
754 * MUST NOT block its calling thread. This function is called for failed
755 * accept requests; successful accept requests have their replies routed
756 * through sdev_finish_accept() instead, because those require a worker thread.
759 sdev_finish(struct fproc
* rfp
, message
* m_ptr
)
761 unsigned int ctl_len
, addr_len
;
762 int callnr
, status
, flags
;
764 /* The suspension status must just have been cleared by the caller. */
765 assert(rfp
->fp_blocked_on
== FP_BLOCKED_ON_NONE
);
768 * Free resources. Every suspending call sets all grant fields, so we
769 * can safely revoke all of them without testing the original call.
771 if (GRANT_VALID(rfp
->fp_sdev
.grant
[0]))
772 cpf_revoke(rfp
->fp_sdev
.grant
[0]);
773 if (GRANT_VALID(rfp
->fp_sdev
.grant
[1]))
774 cpf_revoke(rfp
->fp_sdev
.grant
[1]);
775 if (GRANT_VALID(rfp
->fp_sdev
.grant
[2]))
776 cpf_revoke(rfp
->fp_sdev
.grant
[2]);
779 * Now that the socket driver call has finished (or been stopped due to
780 * driver death), we need to finish the corresponding system call from
781 * the user process. The action to take depends on the system call.
783 callnr
= rfp
->fp_sdev
.callnr
;
794 * These calls all use the same SDEV_REPLY reply type and only
795 * need to reply an OK-or-error status code back to userland.
797 if (m_ptr
->m_type
== SDEV_REPLY
) {
798 status
= m_ptr
->m_lsockdriver_vfs_reply
.status
;
801 * For close(2) calls, the return value must indicate
802 * that the file descriptor has been closed.
804 if (callnr
== VFS_CLOSE
&&
805 status
!= OK
&& status
!= EINPROGRESS
)
807 } else if (m_ptr
->m_type
< 0) {
808 status
= m_ptr
->m_type
;
810 printf("VFS: %d sent bad reply type %d for call %d\n",
811 m_ptr
->m_source
, m_ptr
->m_type
, callnr
);
814 replycode(rfp
->fp_endpoint
, status
);
821 * These calls use SDEV_RECV_REPLY. The action to take depends
824 ctl_len
= addr_len
= 0;
826 if (m_ptr
->m_type
== SDEV_RECV_REPLY
) {
827 status
= m_ptr
->m_lsockdriver_vfs_recv_reply
.status
;
828 ctl_len
= m_ptr
->m_lsockdriver_vfs_recv_reply
.ctl_len
;
830 m_ptr
->m_lsockdriver_vfs_recv_reply
.addr_len
;
831 flags
= m_ptr
->m_lsockdriver_vfs_recv_reply
.flags
;
832 } else if (m_ptr
->m_type
< 0) {
833 status
= m_ptr
->m_type
;
835 printf("VFS: %d sent bad reply type %d for call %d\n",
836 m_ptr
->m_source
, m_ptr
->m_type
, callnr
);
842 replycode(rfp
->fp_endpoint
, status
);
845 resume_recvfrom(rfp
, status
, addr_len
);
848 resume_recvmsg(rfp
, status
, ctl_len
, addr_len
, flags
,
849 rfp
->fp_sdev
.aux
.buf
);
856 * This call uses SDEV_ACCEPT_REPLY. We only get here if the
857 * accept call has failed without creating a new socket, in
858 * which case we can simply call replycode() with the error.
859 * For nothing other than consistency, we let resume_accept()
860 * handle this case too.
863 if (m_ptr
->m_type
== SDEV_ACCEPT_REPLY
) {
864 assert(m_ptr
->m_lsockdriver_vfs_accept_reply
.sock_id
<
866 status
= m_ptr
->m_lsockdriver_vfs_accept_reply
.status
;
867 addr_len
= m_ptr
->m_lsockdriver_vfs_accept_reply
.len
;
868 } else if (m_ptr
->m_type
< 0) {
869 status
= m_ptr
->m_type
;
871 printf("VFS: %d sent bad reply type %d for call %d\n",
872 m_ptr
->m_source
, m_ptr
->m_type
, callnr
);
876 * Quick rundown of m_lsockdriver_vfs_accept_reply cases:
878 * - sock_id >= 0, status == OK: new socket accepted
879 * - sock_id >= 0, status != OK: new socket must be closed
880 * - sock_id < 0, status != OK: failure accepting socket
881 * - sock_id < 0, status == OK: invalid, covered right here
883 * See libsockdriver for why there are two reply fields at all.
886 printf("VFS: %d sent bad status %d for call %d\n",
887 m_ptr
->m_source
, status
, callnr
);
890 resume_accept(rfp
, status
, NO_DEV
, addr_len
,
891 rfp
->fp_sdev
.aux
.fd
);
896 * Ultimately, enumerating all system calls that may cause
897 * socket I/O may prove too cumbersome. In that case, the
898 * callnr field could be replaced by a field that stores the
899 * combination of the expected reply type and the action to
902 panic("VFS: socket reply %d for unknown call %d from %d",
903 m_ptr
->m_type
, callnr
, rfp
->fp_endpoint
);
908 * Abort the suspended socket call for the given process, because the
909 * corresponding socket driver has died.
912 sdev_stop(struct fproc
* rfp
)
916 assert(rfp
->fp_blocked_on
== FP_BLOCKED_ON_SDEV
);
918 rfp
->fp_blocked_on
= FP_BLOCKED_ON_NONE
;
921 * We use one single approach both here and when stopping worker
922 * threads: the reply message's m_type is set to an error code (always
923 * EIO for now) instead of an actual SDEV_ reply code. We test for
924 * this case in non-suspending calls as well as in sdev_finish().
927 sdev_finish(rfp
, &m
);
931 * Cancel the ongoing long-lasting socket call, because the calling process has
932 * received a caught or terminating signal. This function is always called
933 * from a worker thread (as part of PM) work, with 'fp' set to the process that
934 * issued the original system call. The calling function has just unsuspended
935 * the process out of _SDEV blocking state. The job of this function is to
936 * issue a cancel request and then block until a reply comes in; the reply may
937 * indicate success, in which case it must be handled accordingly.
946 /* The suspension status must just have been cleared by the caller. */
947 assert(fp
->fp_blocked_on
== FP_BLOCKED_ON_NONE
);
949 if ((sp
= get_smap_by_dev(fp
->fp_sdev
.dev
, &sock_id
)) != NULL
) {
950 /* Prepare the request message. */
951 memset(&m
, 0, sizeof(m
));
952 m
.m_type
= SDEV_CANCEL
;
953 m
.m_vfs_lsockdriver_simple
.req_id
= (sockid_t
)who_e
;
954 m
.m_vfs_lsockdriver_simple
.sock_id
= sock_id
;
957 * Send the cancel request, and wait for a reply. The reply
958 * will be for the original request and must be processed
959 * accordingly. It is possible that the original request
960 * actually succeeded, because 1) the cancel request resulted
961 * in partial success or 2) the original reply and the cancel
962 * request crossed each other. It is because of the second
963 * case that a socket driver must not respond at all to a
964 * cancel operation for an unknown request.
966 sdev_sendrec(sp
, &m
);
971 * Successful accept requests require special processing, but since we
972 * are already operating from a working thread here, we need not spawn
973 * an additional worker thread for this case.
975 if (m
.m_type
== SDEV_ACCEPT_REPLY
&&
976 m
.m_lsockdriver_vfs_accept_reply
.sock_id
>= 0)
977 sdev_finish_accept(fp
, &m
);
983 * A socket driver has sent a reply to a socket request. Process it, by either
984 * waking up an active worker thread, finishing the system call from here, or
985 * (in the exceptional case of accept calls) spawning a new worker thread to
986 * process the reply. This function MUST NOT block its calling thread.
993 struct worker_thread
*wp
;
994 sockid_t req_id
= -1;
998 if ((sp
= get_smap_by_endpt(who_e
)) == NULL
) {
999 printf("VFS: ignoring sock dev reply from unknown driver %d\n",
1006 req_id
= m_in
.m_lsockdriver_vfs_reply
.req_id
;
1008 case SDEV_SOCKET_REPLY
:
1009 req_id
= m_in
.m_lsockdriver_vfs_socket_reply
.req_id
;
1011 case SDEV_ACCEPT_REPLY
:
1012 req_id
= m_in
.m_lsockdriver_vfs_accept_reply
.req_id
;
1014 case SDEV_RECV_REPLY
:
1015 req_id
= m_in
.m_lsockdriver_vfs_recv_reply
.req_id
;
1017 case SDEV_SELECT1_REPLY
:
1018 dev
= make_smap_dev(sp
,
1019 m_in
.m_lsockdriver_vfs_select_reply
.sock_id
);
1020 select_sdev_reply1(dev
,
1021 m_in
.m_lsockdriver_vfs_select_reply
.status
);
1023 case SDEV_SELECT2_REPLY
:
1024 dev
= make_smap_dev(sp
,
1025 m_in
.m_lsockdriver_vfs_select_reply
.sock_id
);
1026 select_sdev_reply2(dev
,
1027 m_in
.m_lsockdriver_vfs_select_reply
.status
);
1030 printf("VFS: ignoring unknown sock dev reply %d from %d\n",
1035 if (isokendpt((endpoint_t
)req_id
, &slot
) != OK
) {
1036 printf("VFS: ignoring sock dev reply from %d for unknown %d\n",
1042 wp
= rfp
->fp_worker
;
1043 if (wp
!= NULL
&& wp
->w_task
== who_e
&& wp
->w_drv_sendrec
!= NULL
) {
1044 assert(!fp_is_blocked(rfp
));
1045 *wp
->w_drv_sendrec
= m_in
;
1046 wp
->w_drv_sendrec
= NULL
;
1047 worker_signal(wp
); /* resume suspended thread */
1049 * It is up to the worker thread to 1) check that the reply is
1050 * of the right type for the request, and 2) keep in mind that
1051 * the reply type may be EIO in case the socket driver died.
1053 } else if (rfp
->fp_blocked_on
!= FP_BLOCKED_ON_SDEV
||
1054 get_smap_by_dev(rfp
->fp_sdev
.dev
, NULL
) != sp
) {
1055 printf("VFS: ignoring sock dev reply, %d not blocked on %d\n",
1056 rfp
->fp_endpoint
, who_e
);
1058 } else if (call_nr
== SDEV_ACCEPT_REPLY
&&
1059 m_in
.m_lsockdriver_vfs_accept_reply
.sock_id
>= 0) {
1061 * For accept replies that return a new socket, we need to
1062 * spawn a worker thread, because accept calls may block (so
1063 * there will no longer be a worker thread) and processing the
1064 * reply requires additional blocking calls (which we cannot
1065 * issue from the main thread). This is tricky. Under no
1066 * circumstances may we "lose" a legitimate reply, because this
1067 * would lead to resource leaks in the socket driver. To this
1068 * end, we rely on the current worker thread model to
1069 * prioritize regular work over PM work. Still, sdev_cancel()
1070 * may end up receiving the accept reply if it was already
1071 * blocked waiting for the reply message, and it must then
1072 * perform the same tasks.
1075 * It is possible that if all threads are in use, there is a
1076 * "gap" between starting the thread and its activation. The
1077 * main problem for this case is that the socket driver dies
1078 * within that gap. For accepts, we address this with no less
1079 * than two checks: 1) in this file, by looking up the smap
1080 * entry by the reply source endpoint again - if the entry is
1081 * no longer valid, the socket driver must have died; 2) in
1082 * socket.c, by revalidating the original listening socket - if
1083 * the listening socket has been invalidated, the driver died.
1085 * Since we unsuspend the process now, a socket driver sending
1086 * two accept replies in a row may never cause VFS to attempt
1087 * spawning two threads; the second reply should be ignored.
1089 assert(fp
->fp_func
== NULL
);
1091 worker_start(rfp
, do_accept_reply
, &m_in
, FALSE
/*use_spare*/);
1094 * TODO: I just introduced the notion of not using the fp_u
1095 * union across yields after unsuspension, but for socket calls
1096 * we have a lot of socket state to carry over, so I'm now
1097 * immediately violating my own rule again here. Possible
1098 * solutions: 1) introduce another blocking state just to mark
1099 * the fp_u union in use (this has side effects though), 2)
1100 * introduce a pseudo message type which covers both the accept
1101 * reply fields and the fp_u state (do_pending_pipe does this),
1102 * or 3) add a fp_flags flag for this purpose. In any case,
1103 * the whole point is that we catch any attempts to reuse fp_u
1104 * for other purposes and thus cause state corruption. This
1105 * should not happen anyway, but it's too dangerous to leave
1106 * entirely unchecked. --dcvmoole
1108 rfp
->fp_blocked_on
= FP_BLOCKED_ON_NONE
;
1110 rfp
->fp_blocked_on
= FP_BLOCKED_ON_NONE
;
1112 sdev_finish(rfp
, &m_in
);