Sync with cat.c from netbsd-8
[minix3.git] / minix / servers / vfs / sdev.c
blobcc3cd4e50bcd97854887360a1b800735a24cbae3
1 /*
2 * This file implements the lower socket layer of VFS: communication with
3 * socket drivers. Socket driver communication evolved out of character driver
4 * communication, and the two have many similarities between them. Most
5 * importantly, socket driver communication also has the distinction between
6 * short-lived and long-lived requests.
8 * Short-lived requests are expected to be replied to by the socket driver
9 * immediately in all cases. For such requests, VFS keeps the worker thread
10 * for the calling process alive until the reply arrives. In contrast,
11 * long-lived requests may block. For such requests, VFS suspends the calling
12 * process until a reply comes in, or until a signal interrupts the request.
13 * Both short-lived and long-lived requests may be aborted if VFS finds that
14 * the corresponding socket driver has died. Even though long-lived requests
15 * may be marked as nonblocking, nonblocking calls are still handled as
16 * long-lived in terms of VFS processing.
18 * For an overview of the socket driver requests and replies, message layouts,
19 * and which requests are long-lived or short-lived (i.e. may suspend or not),
20 * please refer to the corresponding table in the libsockdriver source code.
22 * For most long-lived socket requests, the main VFS thread processes the reply
23 * from the socket driver. This typically consists of waking up the user
24 * process that originally issued the system call on the socket by simply
25 * relaying the call's result code. Some socket calls require a specific reply
26 * message and/or additional post-call actions; for those, resume_*() calls are
27 * made back into the upper socket layer.
29 * If a process is interrupted by a signal, any ongoing long-lived socket
30 * request must be canceled. This is done by sending a one-way cancel request
31 * to the socket driver, and waiting for it to reply to the original request.
32 * In this case, the reply will be processed from the worker thread that is
33 * handling the cancel operation. Canceling does not imply call failure: the
34 * cancellation may result in a partial I/O reply, and a successful reply may
35 * cross the cancel request.
37 * One main exception is the reply to an accept request. Once a connection has
38 * been accepted, a new socket has to be created for it. This requires actions
39 * that require the ability to block the current thread, and so, a worker
40 * thread is spawned for processing successful accept replies, unless the reply
41 * was received from a worker thread already (as may be the case if the accept
42 * request was being canceled).
45 #include "fs.h"
46 #include <sys/socket.h>
47 #include <minix/callnr.h>
50 * Send a short-lived request message to the given socket driver, and suspend
51 * the current worker thread until a reply message has been received. On
52 * success, the function will return OK, and the reply message will be stored
53 * in the message structure pointed to by 'm_ptr'. The function may fail if
54 * the socket driver dies before sending a reply. In that case, the function
55 * will return a negative error code, and also store the same negative error
56 * code in the m_type field of the 'm_ptr' message structure.
58 static int
59 sdev_sendrec(struct smap * sp, message * m_ptr)
61 int r;
63 /* Send the request to the driver. */
64 if ((r = asynsend3(sp->smap_endpt, m_ptr, AMF_NOREPLY)) != OK)
65 panic("VFS: asynsend in sdev_sendrec failed: %d", r);
67 /* Suspend this thread until we have received the response. */
68 self->w_task = sp->smap_endpt;
69 self->w_drv_sendrec = m_ptr;
71 worker_wait();
73 self->w_task = NONE;
74 assert(self->w_drv_sendrec == NULL);
76 return (!IS_SDEV_RS(m_ptr->m_type)) ? m_ptr->m_type : OK;
80 * Suspend the current process for later completion of its system call.
82 int
83 sdev_suspend(dev_t dev, cp_grant_id_t grant0, cp_grant_id_t grant1,
84 cp_grant_id_t grant2, int fd, vir_bytes buf)
87 fp->fp_sdev.dev = dev;
88 fp->fp_sdev.callnr = job_call_nr;
89 fp->fp_sdev.grant[0] = grant0;
90 fp->fp_sdev.grant[1] = grant1;
91 fp->fp_sdev.grant[2] = grant2;
93 if (job_call_nr == VFS_ACCEPT) {
94 assert(fd != -1);
95 assert(buf == 0);
96 fp->fp_sdev.aux.fd = fd;
97 } else if (job_call_nr == VFS_RECVMSG) {
98 assert(fd == -1);
100 * TODO: we are not yet consistent enough in dealing with
101 * mapped NULL pages to have an assert(buf != 0) here..
103 fp->fp_sdev.aux.buf = buf;
104 } else {
105 assert(fd == -1);
106 assert(buf == 0);
109 suspend(FP_BLOCKED_ON_SDEV);
110 return SUSPEND;
114 * Create a socket or socket pair. Return OK on success, with the new socket
115 * device identifier(s) stored in the 'dev' array. Return an error code upon
116 * failure.
119 sdev_socket(int domain, int type, int protocol, dev_t * dev, int pair)
121 struct smap *sp;
122 message m;
123 sockid_t sock_id, sock_id2;
124 int r;
126 /* We could return EAFNOSUPPORT, but the caller should have checked. */
127 if ((sp = get_smap_by_domain(domain)) == NULL)
128 panic("VFS: sdev_socket for unknown domain");
130 /* Prepare the request message. */
131 memset(&m, 0, sizeof(m));
132 m.m_type = pair ? SDEV_SOCKETPAIR : SDEV_SOCKET;
133 m.m_vfs_lsockdriver_socket.req_id = (sockid_t)who_e;
134 m.m_vfs_lsockdriver_socket.domain = domain;
135 m.m_vfs_lsockdriver_socket.type = type;
136 m.m_vfs_lsockdriver_socket.protocol = protocol;
137 m.m_vfs_lsockdriver_socket.user_endpt = who_e;
139 /* Send the request, and wait for the reply. */
140 if ((r = sdev_sendrec(sp, &m)) != OK)
141 return r; /* socket driver died */
143 /* Parse the reply message, and check for protocol errors. */
144 if (m.m_type != SDEV_SOCKET_REPLY) {
145 printf("VFS: %d sent bad reply type %d for call %d\n",
146 sp->smap_endpt, m.m_type, job_call_nr);
147 return EIO;
150 sock_id = m.m_lsockdriver_vfs_socket_reply.sock_id;
151 sock_id2 = m.m_lsockdriver_vfs_socket_reply.sock_id2;
153 /* Check for regular errors. Upon success, return the socket(s). */
154 if (sock_id < 0)
155 return sock_id;
157 dev[0] = make_smap_dev(sp, sock_id);
159 if (pair) {
160 /* Okay, one more protocol error. */
161 if (sock_id2 < 0) {
162 printf("VFS: %d sent bad SOCKETPAIR socket ID %d\n",
163 sp->smap_endpt, sock_id2);
164 (void)sdev_close(dev[0], FALSE /*may_suspend*/);
165 return EIO;
168 dev[1] = make_smap_dev(sp, sock_id2);
171 return OK;
175 * Bind or connect a socket to a particular address. These calls may block, so
176 * suspend the current process instead of making the thread wait for the reply.
178 static int
179 sdev_bindconn(dev_t dev, int type, vir_bytes addr, unsigned int addr_len,
180 int filp_flags)
182 struct smap *sp;
183 sockid_t sock_id;
184 cp_grant_id_t grant;
185 message m;
186 int r;
188 if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL)
189 return EIO;
191 /* Allocate resources. */
192 grant = cpf_grant_magic(sp->smap_endpt, who_e, addr, addr_len,
193 CPF_READ);
194 if (!GRANT_VALID(grant))
195 panic("VFS: cpf_grant_magic failed");
197 /* Prepare the request message. */
198 memset(&m, 0, sizeof(m));
199 m.m_type = type;
200 m.m_vfs_lsockdriver_addr.req_id = (sockid_t)who_e;
201 m.m_vfs_lsockdriver_addr.sock_id = sock_id;
202 m.m_vfs_lsockdriver_addr.grant = grant;
203 m.m_vfs_lsockdriver_addr.len = addr_len;
204 m.m_vfs_lsockdriver_addr.user_endpt = who_e;
205 m.m_vfs_lsockdriver_addr.sflags =
206 (filp_flags & O_NONBLOCK) ? SDEV_NONBLOCK : 0;
208 /* Send the request to the driver. */
209 if ((r = asynsend3(sp->smap_endpt, &m, AMF_NOREPLY)) != OK)
210 panic("VFS: asynsend in sdev_bindconn failed: %d", r);
212 /* Suspend the process until the reply arrives. */
213 return sdev_suspend(dev, grant, GRANT_INVALID, GRANT_INVALID, -1, 0);
217 * Bind a socket to a local address.
220 sdev_bind(dev_t dev, vir_bytes addr, unsigned int addr_len, int filp_flags)
223 return sdev_bindconn(dev, SDEV_BIND, addr, addr_len, filp_flags);
227 * Connect a socket to a remote address.
230 sdev_connect(dev_t dev, vir_bytes addr, unsigned int addr_len, int filp_flags)
233 return sdev_bindconn(dev, SDEV_CONNECT, addr, addr_len, filp_flags);
237 * Send and receive a "simple" request: listen, shutdown, or close. Note that
238 * while cancel requests use the same request format, they require a different
239 * way of handling their replies.
241 static int
242 sdev_simple(dev_t dev, int type, int param)
244 struct smap *sp;
245 sockid_t sock_id;
246 message m;
247 int r;
249 assert(type == SDEV_LISTEN || type == SDEV_SHUTDOWN ||
250 type == SDEV_CLOSE);
252 if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL)
253 return EIO;
255 /* Prepare the request message. */
256 memset(&m, 0, sizeof(m));
257 m.m_type = type;
258 m.m_vfs_lsockdriver_simple.req_id = (sockid_t)who_e;
259 m.m_vfs_lsockdriver_simple.sock_id = sock_id;
260 m.m_vfs_lsockdriver_simple.param = param;
262 /* Send the request, and wait for the reply. */
263 if ((r = sdev_sendrec(sp, &m)) != OK)
264 return r; /* socket driver died */
266 /* Parse and return the reply. */
267 if (m.m_type != SDEV_REPLY) {
268 printf("VFS: %d sent bad reply type %d for call %d\n",
269 sp->smap_endpt, m.m_type, job_call_nr);
270 return EIO;
273 return m.m_lsockdriver_vfs_reply.status;
277 * Put a socket in listening mode.
280 sdev_listen(dev_t dev, int backlog)
283 assert(backlog >= 0);
285 return sdev_simple(dev, SDEV_LISTEN, backlog);
289 * Accept a new connection on a socket.
292 sdev_accept(dev_t dev, vir_bytes addr, unsigned int addr_len, int filp_flags,
293 int listen_fd)
295 struct smap *sp;
296 sockid_t sock_id;
297 cp_grant_id_t grant;
298 message m;
299 int r;
301 if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL)
302 return EIO;
304 /* Allocate resources. */
305 if (addr != 0) {
306 grant = cpf_grant_magic(sp->smap_endpt, who_e, addr, addr_len,
307 CPF_WRITE);
308 if (!GRANT_VALID(grant))
309 panic("VFS: cpf_grant_magic failed");
310 } else
311 grant = GRANT_INVALID;
313 /* Prepare the request message. */
314 memset(&m, 0, sizeof(m));
315 m.m_type = SDEV_ACCEPT;
316 m.m_vfs_lsockdriver_addr.req_id = (sockid_t)who_e;
317 m.m_vfs_lsockdriver_addr.sock_id = sock_id;
318 m.m_vfs_lsockdriver_addr.grant = grant;
319 m.m_vfs_lsockdriver_addr.len = addr_len;
320 m.m_vfs_lsockdriver_addr.user_endpt = who_e;
321 m.m_vfs_lsockdriver_addr.sflags =
322 (filp_flags & O_NONBLOCK) ? SDEV_NONBLOCK : 0;
324 /* Send the request to the driver. */
325 if ((r = asynsend3(sp->smap_endpt, &m, AMF_NOREPLY)) != OK)
326 panic("VFS: asynsend in sdev_accept failed: %d", r);
328 /* Suspend the process until the reply arrives. */
329 return sdev_suspend(dev, grant, GRANT_INVALID, GRANT_INVALID,
330 listen_fd, 0);
334 * Send or receive a message on a socket. All read (read(2), recvfrom(2), and
335 * recvmsg(2)) and write (write(2), sendto(2), sendmsg(2)) system calls on
336 * sockets pass through this function. The function is named sdev_readwrite
337 * rather than sdev_sendrecv to avoid confusion with sdev_sendrec.
340 sdev_readwrite(dev_t dev, vir_bytes data_buf, size_t data_len,
341 vir_bytes ctl_buf, unsigned int ctl_len, vir_bytes addr_buf,
342 unsigned int addr_len, int flags, int rw_flag, int filp_flags,
343 vir_bytes user_buf)
345 struct smap *sp;
346 sockid_t sock_id;
347 cp_grant_id_t data_grant, ctl_grant, addr_grant;
348 message m;
349 int r, bits;
351 if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL)
352 return EIO;
354 /* Allocate resources. */
355 data_grant = GRANT_INVALID;
356 ctl_grant = GRANT_INVALID;
357 addr_grant = GRANT_INVALID;
358 bits = (rw_flag == WRITING) ? CPF_READ : CPF_WRITE;
361 * Supposedly it is allowed to send or receive zero data bytes, even
362 * though it is a bad idea as the return value will then be zero, which
363 * may also indicate EOF (as per W. Richard Stevens).
365 if (data_buf != 0) {
366 data_grant = cpf_grant_magic(sp->smap_endpt, who_e, data_buf,
367 data_len, bits);
368 if (!GRANT_VALID(data_grant))
369 panic("VFS: cpf_grant_magic failed");
372 if (ctl_buf != 0) {
373 ctl_grant = cpf_grant_magic(sp->smap_endpt, who_e, ctl_buf,
374 ctl_len, bits);
375 if (!GRANT_VALID(ctl_grant))
376 panic("VFS: cpf_grant_magic failed");
379 if (addr_buf != 0) {
380 addr_grant = cpf_grant_magic(sp->smap_endpt, who_e, addr_buf,
381 addr_len, bits);
382 if (!GRANT_VALID(addr_grant))
383 panic("VFS: cpf_grant_magic failed");
386 /* Prepare the request message. */
387 memset(&m, 0, sizeof(m));
388 m.m_type = (rw_flag == WRITING) ? SDEV_SEND : SDEV_RECV;
389 m.m_vfs_lsockdriver_sendrecv.req_id = (sockid_t)who_e;
390 m.m_vfs_lsockdriver_sendrecv.sock_id = sock_id;
391 m.m_vfs_lsockdriver_sendrecv.data_grant = data_grant;
392 m.m_vfs_lsockdriver_sendrecv.data_len = data_len;
393 m.m_vfs_lsockdriver_sendrecv.ctl_grant = ctl_grant;
394 m.m_vfs_lsockdriver_sendrecv.ctl_len = ctl_len;
395 m.m_vfs_lsockdriver_sendrecv.addr_grant = addr_grant;
396 m.m_vfs_lsockdriver_sendrecv.addr_len = addr_len;
397 m.m_vfs_lsockdriver_sendrecv.user_endpt = who_e;
398 m.m_vfs_lsockdriver_sendrecv.flags = flags;
399 if (filp_flags & O_NONBLOCK)
400 m.m_vfs_lsockdriver_sendrecv.flags |= MSG_DONTWAIT;
401 if (rw_flag == WRITING && (filp_flags & O_NOSIGPIPE))
402 m.m_vfs_lsockdriver_sendrecv.flags |= MSG_NOSIGNAL;
404 /* Send the request to the driver. */
405 if ((r = asynsend3(sp->smap_endpt, &m, AMF_NOREPLY)) != OK)
406 panic("VFS: asynsend in sdev_readwrite failed: %d", r);
408 /* Suspend the process until the reply arrives. */
409 return sdev_suspend(dev, data_grant, ctl_grant, addr_grant, -1,
410 user_buf);
414 * Perform I/O control.
417 sdev_ioctl(dev_t dev, unsigned long request, vir_bytes buf, int filp_flags)
419 struct smap *sp;
420 sockid_t sock_id;
421 cp_grant_id_t grant;
422 message m;
423 int r;
425 if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL)
426 return EIO;
428 /* Allocate resources. */
429 grant = make_ioctl_grant(sp->smap_endpt, who_e, buf, request);
431 /* Prepare the request message. */
432 memset(&m, 0, sizeof(m));
433 m.m_type = SDEV_IOCTL;
434 m.m_vfs_lsockdriver_ioctl.req_id = (sockid_t)who_e;
435 m.m_vfs_lsockdriver_ioctl.sock_id = sock_id;
436 m.m_vfs_lsockdriver_ioctl.request = request;
437 m.m_vfs_lsockdriver_ioctl.grant = grant;
438 m.m_vfs_lsockdriver_ioctl.user_endpt = who_e;
439 m.m_vfs_lsockdriver_ioctl.sflags =
440 (filp_flags & O_NONBLOCK) ? SDEV_NONBLOCK : 0;
442 /* Send the request to the driver. */
443 if ((r = asynsend3(sp->smap_endpt, &m, AMF_NOREPLY)) != OK)
444 panic("VFS: asynsend in sdev_ioctl failed: %d", r);
446 /* Suspend the process until the reply arrives. */
447 return sdev_suspend(dev, grant, GRANT_INVALID, GRANT_INVALID, -1, 0);
451 * Set socket options.
454 sdev_setsockopt(dev_t dev, int level, int name, vir_bytes addr,
455 unsigned int len)
457 struct smap *sp;
458 sockid_t sock_id;
459 cp_grant_id_t grant;
460 message m;
461 int r;
463 if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL)
464 return EIO;
466 /* Allocate resources. */
467 grant = cpf_grant_magic(sp->smap_endpt, who_e, addr, len, CPF_READ);
468 if (!GRANT_VALID(grant))
469 panic("VFS: cpf_grant_magic failed");
471 /* Prepare the request message. */
472 memset(&m, 0, sizeof(m));
473 m.m_type = SDEV_SETSOCKOPT;
474 m.m_vfs_lsockdriver_getset.req_id = (sockid_t)who_e;
475 m.m_vfs_lsockdriver_getset.sock_id = sock_id;
476 m.m_vfs_lsockdriver_getset.level = level;
477 m.m_vfs_lsockdriver_getset.name = name;
478 m.m_vfs_lsockdriver_getset.grant = grant;
479 m.m_vfs_lsockdriver_getset.len = len;
481 /* Send the request, and wait for the reply. */
482 r = sdev_sendrec(sp, &m);
484 /* Free resources. */
485 (void)cpf_revoke(grant);
487 if (r != OK)
488 return r; /* socket driver died */
490 /* Parse and return the reply. */
491 if (m.m_type != SDEV_REPLY) {
492 printf("VFS: %d sent bad reply type %d for call %d\n",
493 sp->smap_endpt, m.m_type, job_call_nr);
494 return EIO;
497 return m.m_lsockdriver_vfs_reply.status;
501 * Send and receive a "get" request: getsockopt, getsockname, or getpeername.
503 static int
504 sdev_get(dev_t dev, int type, int level, int name, vir_bytes addr,
505 unsigned int * len)
507 struct smap *sp;
508 sockid_t sock_id;
509 cp_grant_id_t grant;
510 message m;
511 int r;
513 assert(type == SDEV_GETSOCKOPT || type == SDEV_GETSOCKNAME ||
514 type == SDEV_GETPEERNAME);
516 if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL)
517 return EIO;
519 /* Allocate resources. */
520 grant = cpf_grant_magic(sp->smap_endpt, who_e, addr, *len, CPF_WRITE);
521 if (!GRANT_VALID(grant))
522 panic("VFS: cpf_grant_magic failed");
524 /* Prepare the request message. */
525 memset(&m, 0, sizeof(m));
526 m.m_type = type;
527 m.m_vfs_lsockdriver_getset.req_id = (sockid_t)who_e;
528 m.m_vfs_lsockdriver_getset.sock_id = sock_id;
529 m.m_vfs_lsockdriver_getset.level = level;
530 m.m_vfs_lsockdriver_getset.name = name;
531 m.m_vfs_lsockdriver_getset.grant = grant;
532 m.m_vfs_lsockdriver_getset.len = *len;
534 /* Send the request, and wait for the reply. */
535 r = sdev_sendrec(sp, &m);
537 /* Free resources. */
538 (void)cpf_revoke(grant);
540 if (r != OK)
541 return r; /* socket driver died */
543 /* Parse and return the reply. */
544 if (m.m_type != SDEV_REPLY) {
545 printf("VFS: %d sent bad reply type %d for call %d\n",
546 sp->smap_endpt, m.m_type, job_call_nr);
547 return EIO;
550 if ((r = m.m_lsockdriver_vfs_reply.status) < 0)
551 return r;
553 *len = (unsigned int)r;
554 return OK;
558 * Get socket options.
561 sdev_getsockopt(dev_t dev, int level, int name, vir_bytes addr,
562 unsigned int * len)
565 return sdev_get(dev, SDEV_GETSOCKOPT, level, name, addr, len);
569 * Get the local address of a socket.
572 sdev_getsockname(dev_t dev, vir_bytes addr, unsigned int * addr_len)
575 return sdev_get(dev, SDEV_GETSOCKNAME, 0, 0, addr, addr_len);
579 * Get the remote address of a socket.
582 sdev_getpeername(dev_t dev, vir_bytes addr, unsigned int * addr_len)
585 return sdev_get(dev, SDEV_GETPEERNAME, 0, 0, addr, addr_len);
589 * Shut down socket send and receive operations.
592 sdev_shutdown(dev_t dev, int how)
595 assert(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR);
597 return sdev_simple(dev, SDEV_SHUTDOWN, how);
601 * Close the socket identified by the given socket device number.
604 sdev_close(dev_t dev, int may_suspend)
606 struct smap *sp;
607 sockid_t sock_id;
608 message m;
609 int r;
612 * Originally, all close requests were blocking the calling thread, but
613 * the new support for SO_LINGER has changed that. In a very strictly
614 * limited subset of cases - namely, the user process calling close(2),
615 * we suspend the close request and handle it asynchronously. In all
616 * other cases, including close-on-exit, close-on-exec, and even dup2,
617 * the close is issued as a thread-synchronous request instead.
619 if (may_suspend) {
620 if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL)
621 return EIO;
623 /* Prepare the request message. */
624 memset(&m, 0, sizeof(m));
625 m.m_type = SDEV_CLOSE;
626 m.m_vfs_lsockdriver_simple.req_id = (sockid_t)who_e;
627 m.m_vfs_lsockdriver_simple.sock_id = sock_id;
628 m.m_vfs_lsockdriver_simple.param = 0;
630 /* Send the request to the driver. */
631 if ((r = asynsend3(sp->smap_endpt, &m, AMF_NOREPLY)) != OK)
632 panic("VFS: asynsend in sdev_bindconn failed: %d", r);
634 /* Suspend the process until the reply arrives. */
635 return sdev_suspend(dev, GRANT_INVALID, GRANT_INVALID,
636 GRANT_INVALID, -1, 0);
637 } else
638 /* Block the calling thread until the socket is closed. */
639 return sdev_simple(dev, SDEV_CLOSE, SDEV_NONBLOCK);
643 * Initiate a select call on a socket device. Return OK iff the request was
644 * sent, without suspending the process.
647 sdev_select(dev_t dev, int ops)
649 struct smap *sp;
650 sockid_t sock_id;
651 message m;
652 int r;
654 if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL)
655 return EIO;
657 /* Prepare the request message. */
658 memset(&m, 0, sizeof(m));
659 m.m_type = SDEV_SELECT;
660 m.m_vfs_lsockdriver_select.sock_id = sock_id;
661 m.m_vfs_lsockdriver_select.ops = ops;
663 /* Send the request to the driver. */
664 if ((r = asynsend3(sp->smap_endpt, &m, AMF_NOREPLY)) != OK)
665 panic("VFS: asynsend in sdev_select failed: %d", r);
667 return OK;
671 * A reply has arrived for a previous socket accept request, and the reply
672 * indicates that a socket has been accepted. A status is also returned;
673 * usually, this status is OK, but if not, the newly accepted socket must be
674 * closed immediately again. Process the low-level aspects of the reply, and
675 * call resume_accept() to let the upper socket layer handle the rest. This
676 * function is always called from a worker thread, and may thus block.
678 static void
679 sdev_finish_accept(struct fproc * rfp, message * m_ptr)
681 struct smap *sp;
682 sockid_t sock_id;
683 dev_t dev;
684 unsigned int len;
685 int status;
687 assert(rfp->fp_sdev.callnr == VFS_ACCEPT);
688 assert(m_ptr->m_type == SDEV_ACCEPT_REPLY);
689 assert(m_ptr->m_lsockdriver_vfs_accept_reply.sock_id >= 0);
691 /* Free resources. Accept requests use up to one grant. */
692 if (GRANT_VALID(rfp->fp_sdev.grant[0]))
693 cpf_revoke(rfp->fp_sdev.grant[0]);
694 assert(!GRANT_VALID(rfp->fp_sdev.grant[1]));
695 assert(!GRANT_VALID(rfp->fp_sdev.grant[2]));
697 sock_id = m_ptr->m_lsockdriver_vfs_accept_reply.sock_id;
698 status = m_ptr->m_lsockdriver_vfs_accept_reply.status;
699 len = m_ptr->m_lsockdriver_vfs_accept_reply.len;
702 * We do not want the upper socket layer (socket.c) to deal with smap
703 * and socket ID details, so we construct the new socket device number
704 * here. We won't use the saved listen FD to determine the smap entry
705 * here, since that involves file pointers and other upper-layer-only
706 * stuff. So we have to look it up by the source endpoint. As a
707 * result, we detect some driver deaths here (but not all: see below).
709 if ((sp = get_smap_by_endpt(m_ptr->m_source)) != NULL) {
710 /* Leave 'status' as is, regardless of whether it is OK. */
711 dev = make_smap_dev(sp, sock_id);
712 } else {
714 * The driver must have died while the thread was blocked on
715 * activation. Extremely rare, but theoretically possible.
716 * Some driver deaths are indicated only by a driver-up
717 * announcement though; resume_accept() will detect this by
718 * checking that the listening socket has not been invalidated.
720 status = EIO;
721 dev = NO_DEV;
724 /* Let the upper socket layer handle the rest. */
725 resume_accept(rfp, status, dev, len, rfp->fp_sdev.aux.fd);
729 * Worker thread stub for finishing successful accept requests.
731 static void
732 do_accept_reply(void)
735 sdev_finish_accept(fp, &job_m_in);
739 * With the exception of successful accept requests, this function is called
740 * whenever a reply is received for a socket driver request for which the
741 * corresponding user process was suspended (as opposed to requests which just
742 * suspend the worker thread), i.e., for long-lasting socket calls. This
743 * function is also called if the socket driver has died during a long-lasting
744 * socket call, in which case the given message's m_type is a negative error
745 * code.
747 * The division between the upper socket layer (socket.c) and the lower socket
748 * layer (this file) here is roughly: if resuming the system call involves no
749 * more than a simple replycode() call, do that here; otherwise call into the
750 * upper socket layer to handle the details. In any case, do not ever let the
751 * upper socket layer deal with reply message parsing or suspension state.
753 * This function may or may not be called from a worker thread; as such, it
754 * MUST NOT block its calling thread. This function is called for failed
755 * accept requests; successful accept requests have their replies routed
756 * through sdev_finish_accept() instead, because those require a worker thread.
758 static void
759 sdev_finish(struct fproc * rfp, message * m_ptr)
761 unsigned int ctl_len, addr_len;
762 int callnr, status, flags;
764 /* The suspension status must just have been cleared by the caller. */
765 assert(rfp->fp_blocked_on == FP_BLOCKED_ON_NONE);
768 * Free resources. Every suspending call sets all grant fields, so we
769 * can safely revoke all of them without testing the original call.
771 if (GRANT_VALID(rfp->fp_sdev.grant[0]))
772 cpf_revoke(rfp->fp_sdev.grant[0]);
773 if (GRANT_VALID(rfp->fp_sdev.grant[1]))
774 cpf_revoke(rfp->fp_sdev.grant[1]);
775 if (GRANT_VALID(rfp->fp_sdev.grant[2]))
776 cpf_revoke(rfp->fp_sdev.grant[2]);
779 * Now that the socket driver call has finished (or been stopped due to
780 * driver death), we need to finish the corresponding system call from
781 * the user process. The action to take depends on the system call.
783 callnr = rfp->fp_sdev.callnr;
785 switch (callnr) {
786 case VFS_BIND:
787 case VFS_CONNECT:
788 case VFS_WRITE:
789 case VFS_SENDTO:
790 case VFS_SENDMSG:
791 case VFS_IOCTL:
792 case VFS_CLOSE:
794 * These calls all use the same SDEV_REPLY reply type and only
795 * need to reply an OK-or-error status code back to userland.
797 if (m_ptr->m_type == SDEV_REPLY) {
798 status = m_ptr->m_lsockdriver_vfs_reply.status;
801 * For close(2) calls, the return value must indicate
802 * that the file descriptor has been closed.
804 if (callnr == VFS_CLOSE &&
805 status != OK && status != EINPROGRESS)
806 status = OK;
807 } else if (m_ptr->m_type < 0) {
808 status = m_ptr->m_type;
809 } else {
810 printf("VFS: %d sent bad reply type %d for call %d\n",
811 m_ptr->m_source, m_ptr->m_type, callnr);
812 status = EIO;
814 replycode(rfp->fp_endpoint, status);
815 break;
817 case VFS_READ:
818 case VFS_RECVFROM:
819 case VFS_RECVMSG:
821 * These calls use SDEV_RECV_REPLY. The action to take depends
822 * on the exact call.
824 ctl_len = addr_len = 0;
825 flags = 0;
826 if (m_ptr->m_type == SDEV_RECV_REPLY) {
827 status = m_ptr->m_lsockdriver_vfs_recv_reply.status;
828 ctl_len = m_ptr->m_lsockdriver_vfs_recv_reply.ctl_len;
829 addr_len =
830 m_ptr->m_lsockdriver_vfs_recv_reply.addr_len;
831 flags = m_ptr->m_lsockdriver_vfs_recv_reply.flags;
832 } else if (m_ptr->m_type < 0) {
833 status = m_ptr->m_type;
834 } else {
835 printf("VFS: %d sent bad reply type %d for call %d\n",
836 m_ptr->m_source, m_ptr->m_type, callnr);
837 status = EIO;
840 switch (callnr) {
841 case VFS_READ:
842 replycode(rfp->fp_endpoint, status);
843 break;
844 case VFS_RECVFROM:
845 resume_recvfrom(rfp, status, addr_len);
846 break;
847 case VFS_RECVMSG:
848 resume_recvmsg(rfp, status, ctl_len, addr_len, flags,
849 rfp->fp_sdev.aux.buf);
850 break;
852 break;
854 case VFS_ACCEPT:
856 * This call uses SDEV_ACCEPT_REPLY. We only get here if the
857 * accept call has failed without creating a new socket, in
858 * which case we can simply call replycode() with the error.
859 * For nothing other than consistency, we let resume_accept()
860 * handle this case too.
862 addr_len = 0;
863 if (m_ptr->m_type == SDEV_ACCEPT_REPLY) {
864 assert(m_ptr->m_lsockdriver_vfs_accept_reply.sock_id <
866 status = m_ptr->m_lsockdriver_vfs_accept_reply.status;
867 addr_len = m_ptr->m_lsockdriver_vfs_accept_reply.len;
868 } else if (m_ptr->m_type < 0) {
869 status = m_ptr->m_type;
870 } else {
871 printf("VFS: %d sent bad reply type %d for call %d\n",
872 m_ptr->m_source, m_ptr->m_type, callnr);
873 status = EIO;
876 * Quick rundown of m_lsockdriver_vfs_accept_reply cases:
878 * - sock_id >= 0, status == OK: new socket accepted
879 * - sock_id >= 0, status != OK: new socket must be closed
880 * - sock_id < 0, status != OK: failure accepting socket
881 * - sock_id < 0, status == OK: invalid, covered right here
883 * See libsockdriver for why there are two reply fields at all.
885 if (status >= 0) {
886 printf("VFS: %d sent bad status %d for call %d\n",
887 m_ptr->m_source, status, callnr);
888 status = EIO;
890 resume_accept(rfp, status, NO_DEV, addr_len,
891 rfp->fp_sdev.aux.fd);
892 break;
894 default:
896 * Ultimately, enumerating all system calls that may cause
897 * socket I/O may prove too cumbersome. In that case, the
898 * callnr field could be replaced by a field that stores the
899 * combination of the expected reply type and the action to
900 * take, for example.
902 panic("VFS: socket reply %d for unknown call %d from %d",
903 m_ptr->m_type, callnr, rfp->fp_endpoint);
908 * Abort the suspended socket call for the given process, because the
909 * corresponding socket driver has died.
911 void
912 sdev_stop(struct fproc * rfp)
914 message m;
916 assert(rfp->fp_blocked_on == FP_BLOCKED_ON_SDEV);
918 rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
921 * We use one single approach both here and when stopping worker
922 * threads: the reply message's m_type is set to an error code (always
923 * EIO for now) instead of an actual SDEV_ reply code. We test for
924 * this case in non-suspending calls as well as in sdev_finish().
926 m.m_type = EIO;
927 sdev_finish(rfp, &m);
931 * Cancel the ongoing long-lasting socket call, because the calling process has
932 * received a caught or terminating signal. This function is always called
933 * from a worker thread (as part of PM) work, with 'fp' set to the process that
934 * issued the original system call. The calling function has just unsuspended
935 * the process out of _SDEV blocking state. The job of this function is to
936 * issue a cancel request and then block until a reply comes in; the reply may
937 * indicate success, in which case it must be handled accordingly.
939 void
940 sdev_cancel(void)
942 struct smap *sp;
943 message m;
944 sockid_t sock_id;
946 /* The suspension status must just have been cleared by the caller. */
947 assert(fp->fp_blocked_on == FP_BLOCKED_ON_NONE);
949 if ((sp = get_smap_by_dev(fp->fp_sdev.dev, &sock_id)) != NULL) {
950 /* Prepare the request message. */
951 memset(&m, 0, sizeof(m));
952 m.m_type = SDEV_CANCEL;
953 m.m_vfs_lsockdriver_simple.req_id = (sockid_t)who_e;
954 m.m_vfs_lsockdriver_simple.sock_id = sock_id;
957 * Send the cancel request, and wait for a reply. The reply
958 * will be for the original request and must be processed
959 * accordingly. It is possible that the original request
960 * actually succeeded, because 1) the cancel request resulted
961 * in partial success or 2) the original reply and the cancel
962 * request crossed each other. It is because of the second
963 * case that a socket driver must not respond at all to a
964 * cancel operation for an unknown request.
966 sdev_sendrec(sp, &m);
967 } else
968 m.m_type = EIO;
971 * Successful accept requests require special processing, but since we
972 * are already operating from a working thread here, we need not spawn
973 * an additional worker thread for this case.
975 if (m.m_type == SDEV_ACCEPT_REPLY &&
976 m.m_lsockdriver_vfs_accept_reply.sock_id >= 0)
977 sdev_finish_accept(fp, &m);
978 else
979 sdev_finish(fp, &m);
983 * A socket driver has sent a reply to a socket request. Process it, by either
984 * waking up an active worker thread, finishing the system call from here, or
985 * (in the exceptional case of accept calls) spawning a new worker thread to
986 * process the reply. This function MUST NOT block its calling thread.
988 void
989 sdev_reply(void)
991 struct fproc *rfp;
992 struct smap *sp;
993 struct worker_thread *wp;
994 sockid_t req_id = -1;
995 dev_t dev;
996 int slot;
998 if ((sp = get_smap_by_endpt(who_e)) == NULL) {
999 printf("VFS: ignoring sock dev reply from unknown driver %d\n",
1000 who_e);
1001 return;
1004 switch (call_nr) {
1005 case SDEV_REPLY:
1006 req_id = m_in.m_lsockdriver_vfs_reply.req_id;
1007 break;
1008 case SDEV_SOCKET_REPLY:
1009 req_id = m_in.m_lsockdriver_vfs_socket_reply.req_id;
1010 break;
1011 case SDEV_ACCEPT_REPLY:
1012 req_id = m_in.m_lsockdriver_vfs_accept_reply.req_id;
1013 break;
1014 case SDEV_RECV_REPLY:
1015 req_id = m_in.m_lsockdriver_vfs_recv_reply.req_id;
1016 break;
1017 case SDEV_SELECT1_REPLY:
1018 dev = make_smap_dev(sp,
1019 m_in.m_lsockdriver_vfs_select_reply.sock_id);
1020 select_sdev_reply1(dev,
1021 m_in.m_lsockdriver_vfs_select_reply.status);
1022 return;
1023 case SDEV_SELECT2_REPLY:
1024 dev = make_smap_dev(sp,
1025 m_in.m_lsockdriver_vfs_select_reply.sock_id);
1026 select_sdev_reply2(dev,
1027 m_in.m_lsockdriver_vfs_select_reply.status);
1028 return;
1029 default:
1030 printf("VFS: ignoring unknown sock dev reply %d from %d\n",
1031 call_nr, who_e);
1032 return;
1035 if (isokendpt((endpoint_t)req_id, &slot) != OK) {
1036 printf("VFS: ignoring sock dev reply from %d for unknown %d\n",
1037 who_e, req_id);
1038 return;
1041 rfp = &fproc[slot];
1042 wp = rfp->fp_worker;
1043 if (wp != NULL && wp->w_task == who_e && wp->w_drv_sendrec != NULL) {
1044 assert(!fp_is_blocked(rfp));
1045 *wp->w_drv_sendrec = m_in;
1046 wp->w_drv_sendrec = NULL;
1047 worker_signal(wp); /* resume suspended thread */
1049 * It is up to the worker thread to 1) check that the reply is
1050 * of the right type for the request, and 2) keep in mind that
1051 * the reply type may be EIO in case the socket driver died.
1053 } else if (rfp->fp_blocked_on != FP_BLOCKED_ON_SDEV ||
1054 get_smap_by_dev(rfp->fp_sdev.dev, NULL) != sp) {
1055 printf("VFS: ignoring sock dev reply, %d not blocked on %d\n",
1056 rfp->fp_endpoint, who_e);
1057 return;
1058 } else if (call_nr == SDEV_ACCEPT_REPLY &&
1059 m_in.m_lsockdriver_vfs_accept_reply.sock_id >= 0) {
1061 * For accept replies that return a new socket, we need to
1062 * spawn a worker thread, because accept calls may block (so
1063 * there will no longer be a worker thread) and processing the
1064 * reply requires additional blocking calls (which we cannot
1065 * issue from the main thread). This is tricky. Under no
1066 * circumstances may we "lose" a legitimate reply, because this
1067 * would lead to resource leaks in the socket driver. To this
1068 * end, we rely on the current worker thread model to
1069 * prioritize regular work over PM work. Still, sdev_cancel()
1070 * may end up receiving the accept reply if it was already
1071 * blocked waiting for the reply message, and it must then
1072 * perform the same tasks.
1075 * It is possible that if all threads are in use, there is a
1076 * "gap" between starting the thread and its activation. The
1077 * main problem for this case is that the socket driver dies
1078 * within that gap. For accepts, we address this with no less
1079 * than two checks: 1) in this file, by looking up the smap
1080 * entry by the reply source endpoint again - if the entry is
1081 * no longer valid, the socket driver must have died; 2) in
1082 * socket.c, by revalidating the original listening socket - if
1083 * the listening socket has been invalidated, the driver died.
1085 * Since we unsuspend the process now, a socket driver sending
1086 * two accept replies in a row may never cause VFS to attempt
1087 * spawning two threads; the second reply should be ignored.
1089 assert(fp->fp_func == NULL);
1091 worker_start(rfp, do_accept_reply, &m_in, FALSE /*use_spare*/);
1094 * TODO: I just introduced the notion of not using the fp_u
1095 * union across yields after unsuspension, but for socket calls
1096 * we have a lot of socket state to carry over, so I'm now
1097 * immediately violating my own rule again here. Possible
1098 * solutions: 1) introduce another blocking state just to mark
1099 * the fp_u union in use (this has side effects though), 2)
1100 * introduce a pseudo message type which covers both the accept
1101 * reply fields and the fp_u state (do_pending_pipe does this),
1102 * or 3) add a fp_flags flag for this purpose. In any case,
1103 * the whole point is that we catch any attempts to reuse fp_u
1104 * for other purposes and thus cause state corruption. This
1105 * should not happen anyway, but it's too dangerous to leave
1106 * entirely unchecked. --dcvmoole
1108 rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
1109 } else {
1110 rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
1112 sdev_finish(rfp, &m_in);