Sync with cat.c from netbsd-8
[minix3.git] / minix / servers / vfs / select.c
blob9ce614c9b53d4275f28f93b59677d26270e66dd5
1 /* Implement entry point to select system call.
3 * The entry points into this file are
4 * do_select: perform the SELECT system call
5 * select_callback: notify select system of possible fd operation
6 * select_unsuspend_by_endpt: cancel a blocking select on exiting driver
8 * The select code uses minimal locking, so that the replies from character
9 * drivers can be processed without blocking. Filps are locked only for pipes.
10 * We make the assumption that any other structures and fields are safe to
11 * check (and possibly change) as long as we know that a process is blocked on
12 * a select(2) call, meaning that all involved filps are guaranteed to stay
13 * open until either we finish the select call, it the process gets interrupted
14 * by a signal.
17 #include "fs.h"
18 #include <sys/fcntl.h>
19 #include <sys/time.h>
20 #include <sys/select.h>
21 #include <sys/stat.h>
22 #include <minix/callnr.h>
23 #include <minix/u64.h>
24 #include <string.h>
25 #include <assert.h>
27 #include "file.h"
28 #include "vnode.h"
30 /* max. number of simultaneously pending select() calls */
31 #define MAXSELECTS 25
32 #define FROM_PROC 0
33 #define TO_PROC 1
35 #define USECPERSEC 1000000 /* number of microseconds in a second */
37 typedef fd_set *ixfer_fd_set_ptr;
39 static struct selectentry {
40 struct fproc *requestor; /* slot is free iff this is NULL */
41 endpoint_t req_endpt;
42 fd_set readfds, writefds, errorfds;
43 fd_set ready_readfds, ready_writefds, ready_errorfds;
44 ixfer_fd_set_ptr vir_readfds, vir_writefds, vir_errorfds;
45 struct filp *filps[OPEN_MAX];
46 int type[OPEN_MAX];
47 int nfds, nreadyfds;
48 int error;
49 char block;
50 char starting;
51 clock_t expiry;
52 minix_timer_t timer; /* if expiry > 0 */
53 } selecttab[MAXSELECTS];
55 static int copy_fdsets(struct selectentry *se, int nfds, int direction);
56 static void filp_status(struct filp *fp, int status);
57 static int is_deferred(struct selectentry *se);
58 static void restart_proc(struct selectentry *se);
59 static void ops2tab(int ops, int fd, struct selectentry *e);
60 static int is_regular_file(struct filp *f);
61 static int is_pipe(struct filp *f);
62 static int is_char_device(struct filp *f);
63 static int is_sock_device(struct filp *f);
64 static void select_lock_filp(struct filp *f, int ops);
65 static int select_request_file(struct filp *f, int *ops, int block,
66 struct fproc *rfp);
67 static int select_request_char(struct filp *f, int *ops, int block,
68 struct fproc *rfp);
69 static int select_request_sock(struct filp *f, int *ops, int block,
70 struct fproc *rfp);
71 static int select_request_pipe(struct filp *f, int *ops, int block,
72 struct fproc *rfp);
73 static void select_cancel_all(struct selectentry *e);
74 static void select_cancel_filp(struct filp *f);
75 static void select_return(struct selectentry *);
76 static void select_restart_filps(void);
77 static int tab2ops(int fd, struct selectentry *e);
78 static void wipe_select(struct selectentry *s);
79 void select_timeout_check(int s);
81 static struct fdtype {
82 int (*select_request)(struct filp *, int *ops, int block,
83 struct fproc *rfp);
84 int (*type_match)(struct filp *f);
85 } fdtypes[] = {
86 { select_request_char, is_char_device },
87 { select_request_sock, is_sock_device },
88 { select_request_file, is_regular_file },
89 { select_request_pipe, is_pipe },
91 #define SEL_FDS (sizeof(fdtypes) / sizeof(fdtypes[0]))
93 /*===========================================================================*
94 * do_select *
95 *===========================================================================*/
96 int do_select(void)
98 /* Implement the select(nfds, readfds, writefds, errorfds, timeout) system
99 * call. First we copy the arguments and verify their sanity. Then we check
100 * whether there are file descriptors that satisfy the select call right off
101 * the bat. If so, or if there are no ready file descriptors but the process
102 * requested to return immediately, we return the result. Otherwise we set a
103 * timeout and wait for either the file descriptors to become ready or the
104 * timer to go off. If no timeout value was provided, we wait indefinitely.
106 int r, nfds, do_timeout, fd, type, s;
107 struct filp *f;
108 unsigned int ops;
109 struct timeval timeout;
110 struct selectentry *se;
111 vir_bytes vtimeout;
112 clock_t ticks;
114 nfds = job_m_in.m_lc_vfs_select.nfds;
115 vtimeout = job_m_in.m_lc_vfs_select.timeout;
117 /* Sane amount of file descriptors? */
118 if (nfds < 0 || nfds > OPEN_MAX) return(EINVAL);
120 /* Find a slot to store this select request */
121 for (s = 0; s < MAXSELECTS; s++)
122 if (selecttab[s].requestor == NULL) /* Unused slot */
123 break;
124 if (s >= MAXSELECTS) return(ENOSPC);
126 se = &selecttab[s];
127 wipe_select(se); /* Clear results of previous usage */
128 se->requestor = fp;
129 se->req_endpt = who_e;
130 se->vir_readfds = job_m_in.m_lc_vfs_select.readfds;
131 se->vir_writefds = job_m_in.m_lc_vfs_select.writefds;
132 se->vir_errorfds = job_m_in.m_lc_vfs_select.errorfds;
134 /* Copy fdsets from the process */
135 if ((r = copy_fdsets(se, nfds, FROM_PROC)) != OK) {
136 se->requestor = NULL;
137 return(r);
140 /* Did the process set a timeout value? If so, retrieve it. */
141 if (vtimeout != 0) {
142 r = sys_datacopy_wrapper(who_e, vtimeout, SELF, (vir_bytes) &timeout,
143 sizeof(timeout));
145 /* No nonsense in the timeval */
146 if (r == OK && (timeout.tv_sec < 0 || timeout.tv_usec < 0 ||
147 timeout.tv_usec >= USECPERSEC))
148 r = EINVAL;
150 if (r != OK) {
151 se->requestor = NULL;
152 return(r);
154 do_timeout = 1;
155 } else
156 do_timeout = 0;
158 /* If there is no timeout, we block forever. Otherwise, we block up to the
159 * specified time interval.
161 if (!do_timeout) /* No timeout value set */
162 se->block = 1;
163 else if (do_timeout && (timeout.tv_sec > 0 || timeout.tv_usec > 0))
164 se->block = 1;
165 else /* timeout set as (0,0) - this effects a poll */
166 se->block = 0;
167 se->expiry = 0; /* no timer set (yet) */
169 /* We are going to lock filps, and that means that while locking a second
170 * filp, we might already get the results for the first one. In that case,
171 * the incoming results must not cause the select call to finish prematurely.
173 se->starting = TRUE;
175 /* Verify that file descriptors are okay to select on */
176 for (fd = 0; fd < nfds; fd++) {
177 /* Because the select() interface implicitly includes file descriptors
178 * you might not want to select on, we have to figure out whether we're
179 * interested in them. Typically, these file descriptors include fd's
180 * inherited from the parent proc and file descriptors that have been
181 * close()d, but had a lower fd than one in the current set.
183 if (!(ops = tab2ops(fd, se)))
184 continue; /* No operations set; nothing to do for this fd */
186 /* Get filp belonging to this fd. If this fails, there are two causes:
187 * either the given file descriptor was bad, or the associated filp is
188 * closed (in the FILP_CLOSED sense) as a result of invalidation. Only
189 * the former is a select error. The latter should result in operations
190 * being returned as ready on the file descriptor, since subsequent
191 * I/O calls are guaranteed to return I/O errors on such descriptors.
193 f = se->filps[fd] = get_filp(fd, VNODE_READ);
194 if (f == NULL && err_code != EIO) {
195 assert(err_code == EBADF);
197 /* We may already have adjusted filp_selectors on previous
198 * file pointers in the set, so do not simply return here.
200 se->error = EBADF;
201 break;
204 /* Check file types. According to POSIX 2008:
205 * "The pselect() and select() functions shall support regular files,
206 * terminal and pseudo-terminal devices, FIFOs, pipes, and sockets. The
207 * behavior of pselect() and select() on file descriptors that refer to
208 * other types of file is unspecified."
210 * In our case, terminal and pseudo-terminal devices are handled by the
211 * TTY and PTY character drivers respectively. Sockets are handled by
212 * by their respective socket drivers. Additionally, we give other
213 * character drivers the chance to handle select for any of their
214 * device nodes. Some may not implement support for select and let
215 * libchardriver return EBADF, which we then pass to the calling
216 * process once we receive the reply.
218 * If we could not access the file pointer at all, it will have been
219 * closed due to invalidation after a service crash. In that case, we
220 * skip type matching and simply return pending operations as ready.
222 se->type[fd] = -1;
223 if (f == NULL)
224 continue; /* closed, skip type matching */
225 for (type = 0; type < SEL_FDS; type++) {
226 if (fdtypes[type].type_match(f)) {
227 se->type[fd] = type;
228 se->nfds = fd+1;
229 se->filps[fd]->filp_selectors++;
230 break;
233 unlock_filp(f);
234 if (se->type[fd] == -1) { /* Type not found */
235 se->error = EBADF;
236 break;
240 /* If an error occurred already, undo any changes so far and return. */
241 if (se->error != OK) {
242 select_cancel_all(se);
243 se->requestor = NULL;
244 return(se->error);
247 /* Check all file descriptors in the set whether one is 'ready' now */
248 for (fd = 0; fd < nfds; fd++) {
249 /* Again, check for involuntarily selected fd's */
250 if (!(ops = tab2ops(fd, se)))
251 continue; /* No operations set; nothing to do for this fd */
253 /* File descriptors selected for reading that are not opened for
254 * reading should be marked as readable, as read calls would fail
255 * immediately. The same applies to writing. For file descriptors for
256 * which the file pointer is already closed (f==NULL), return readable
257 * and writable operations (if requested) and skip the rest.
259 f = se->filps[fd];
260 if (f == NULL) {
261 ops2tab(SEL_RD | SEL_WR, fd, se);
262 continue;
264 if ((ops & SEL_RD) && !(f->filp_mode & R_BIT)) {
265 ops2tab(SEL_RD, fd, se);
266 ops &= ~SEL_RD;
268 if ((ops & SEL_WR) && !(f->filp_mode & W_BIT)) {
269 ops2tab(SEL_WR, fd, se);
270 ops &= ~SEL_WR;
272 /* Test filp for select operations if not already done so. e.g.,
273 * processes sharing a filp and both doing a select on that filp. */
274 if ((f->filp_select_ops & ops) != ops) {
275 int wantops;
277 wantops = (f->filp_select_ops |= ops);
278 type = se->type[fd];
279 assert(type >= 0);
280 select_lock_filp(f, wantops);
281 r = fdtypes[type].select_request(f, &wantops, se->block, fp);
282 unlock_filp(f);
283 if (r != OK && r != SUSPEND) {
284 se->error = r;
285 break; /* Error or bogus return code; abort */
288 /* The select request above might have turned on/off some
289 * operations because they were 'ready' or not meaningful.
290 * Either way, we might have a result and we need to store them
291 * in the select table entry. */
292 if (wantops & ops) ops2tab(wantops, fd, se);
296 /* At this point there won't be any blocking calls anymore. */
297 se->starting = FALSE;
299 if ((se->nreadyfds > 0 || se->error != OK || !se->block) &&
300 !is_deferred(se)) {
301 /* An error occurred, or fd's were found that were ready to go right
302 * away, and/or we were instructed not to block at all. Must return
303 * immediately. Do not copy FD sets if an error occurred.
305 if (se->error != OK)
306 r = se->error;
307 else
308 r = copy_fdsets(se, se->nfds, TO_PROC);
309 select_cancel_all(se);
310 se->requestor = NULL;
312 if (r != OK)
313 return(r);
314 return(se->nreadyfds);
317 /* Convert timeval to ticks and set the timer. If it fails, undo
318 * all, return error.
320 if (do_timeout && se->block) {
321 /* Open Group:
322 * "If the requested timeout interval requires a finer
323 * granularity than the implementation supports, the
324 * actual timeout interval shall be rounded up to the next
325 * supported value."
327 if (timeout.tv_sec >= (TMRDIFF_MAX - 1) / system_hz) {
328 ticks = TMRDIFF_MAX; /* silently truncate */
329 } else {
330 ticks = timeout.tv_sec * system_hz +
331 (timeout.tv_usec * system_hz + USECPERSEC-1) / USECPERSEC;
333 assert(ticks != 0 && ticks <= TMRDIFF_MAX);
334 se->expiry = ticks;
335 set_timer(&se->timer, ticks, select_timeout_check, s);
338 /* process now blocked */
339 suspend(FP_BLOCKED_ON_SELECT);
340 return(SUSPEND);
343 /*===========================================================================*
344 * is_deferred *
345 *===========================================================================*/
346 static int is_deferred(struct selectentry *se)
348 /* Find out whether this select has pending initial replies */
350 int fd;
351 struct filp *f;
353 /* The select call must have finished its initialization at all. */
354 if (se->starting) return(TRUE);
356 for (fd = 0; fd < se->nfds; fd++) {
357 if ((f = se->filps[fd]) == NULL) continue;
358 if (f->filp_select_flags & (FSF_UPDATE|FSF_BUSY)) return(TRUE);
361 return(FALSE);
365 /*===========================================================================*
366 * is_regular_file *
367 *===========================================================================*/
368 static int is_regular_file(struct filp *f)
370 return(f && f->filp_vno && S_ISREG(f->filp_vno->v_mode));
373 /*===========================================================================*
374 * is_pipe *
375 *===========================================================================*/
376 static int is_pipe(struct filp *f)
378 /* Recognize either anonymous pipe or named pipe (FIFO) */
379 return(f && f->filp_vno && S_ISFIFO(f->filp_vno->v_mode));
382 /*===========================================================================*
383 * is_char_device *
384 *===========================================================================*/
385 static int is_char_device(struct filp *f)
387 /* See if this filp is a handle on a character device. This function MUST NOT
388 * block its calling thread. The given filp may or may not be locked.
391 return (f && f->filp_vno && S_ISCHR(f->filp_vno->v_mode));
394 /*===========================================================================*
395 * is_sock_device *
396 *===========================================================================*/
397 static int is_sock_device(struct filp *f)
399 /* See if this filp is a handle on a socket device. This function MUST NOT
400 * block its calling thread. The given filp may or may not be locked.
403 return (f && f->filp_vno && S_ISSOCK(f->filp_vno->v_mode));
406 /*===========================================================================*
407 * select_filter *
408 *===========================================================================*/
409 static int select_filter(struct filp *f, int *ops, int block)
411 /* Determine which select operations can be satisfied immediately and which
412 * should be requested. Used for character and socket devices. This function
413 * MUST NOT block its calling thread.
415 int rops;
417 rops = *ops;
419 /* By default, nothing to do */
420 *ops = 0;
423 * If we have previously asked the driver to notify us about certain ready
424 * operations, but it has not notified us yet, then we can safely assume that
425 * those operations are not ready right now. Therefore, if this call is not
426 * supposed to block, we can disregard the pending operations as not ready.
427 * We must make absolutely sure that the flags are "stable" right now though:
428 * we are neither waiting to query the driver about them (FSF_UPDATE) nor
429 * querying the driver about them right now (FSF_BUSY). This is a dangerous
430 * case of premature optimization and may be removed altogether if it proves
431 * to continue to be a source of bugs.
433 if (!block && !(f->filp_select_flags & (FSF_UPDATE | FSF_BUSY)) &&
434 (f->filp_select_flags & FSF_BLOCKED)) {
435 if ((rops & SEL_RD) && (f->filp_select_flags & FSF_RD_BLOCK))
436 rops &= ~SEL_RD;
437 if ((rops & SEL_WR) && (f->filp_select_flags & FSF_WR_BLOCK))
438 rops &= ~SEL_WR;
439 if ((rops & SEL_ERR) && (f->filp_select_flags & FSF_ERR_BLOCK))
440 rops &= ~SEL_ERR;
441 if (!(rops & (SEL_RD|SEL_WR|SEL_ERR)))
442 return(0);
445 f->filp_select_flags |= FSF_UPDATE;
446 if (block) {
447 rops |= SEL_NOTIFY;
448 if (rops & SEL_RD) f->filp_select_flags |= FSF_RD_BLOCK;
449 if (rops & SEL_WR) f->filp_select_flags |= FSF_WR_BLOCK;
450 if (rops & SEL_ERR) f->filp_select_flags |= FSF_ERR_BLOCK;
453 if (f->filp_select_flags & FSF_BUSY)
454 return(SUSPEND);
456 return rops;
459 /*===========================================================================*
460 * select_request_char *
461 *===========================================================================*/
462 static int select_request_char(struct filp *f, int *ops, int block,
463 struct fproc *rfp)
465 /* Check readiness status on a character device. Unless suitable results are
466 * available right now, this will only initiate the polling process, causing
467 * result processing to be deferred. This function MUST NOT block its calling
468 * thread. The given filp may or may not be locked.
470 dev_t dev;
471 int r, rops;
472 struct dmap *dp;
474 /* Start by remapping the device node number to a "real" device number. Those
475 * two are different only for CTTY_MAJOR aka /dev/tty, but that one single
476 * exception requires quite some extra effort here: the select code matches
477 * character driver replies to their requests based on the device number, so
478 * it needs to be aware that device numbers may be mapped. The idea is to
479 * perform the mapping once and store the result in the filp object, so that
480 * at least we don't run into problems when a process loses its controlling
481 * terminal while doing a select (see also free_proc). It should be noted
482 * that it is possible that multiple processes share the same /dev/tty filp,
483 * and they may not all have a controlling terminal. The ctty-less processes
484 * should never pass the mapping; a more problematic case is checked below.
486 * The cdev_map call also checks the major number for rough validity, so that
487 * we can use it to index the dmap array safely a bit later.
489 if ((dev = cdev_map(f->filp_vno->v_sdev, rfp)) == NO_DEV)
490 return(ENXIO);
492 if (f->filp_select_dev != NO_DEV && f->filp_select_dev != dev) {
493 /* Currently, this case can occur as follows: a process with a
494 * controlling terminal opens /dev/tty and forks, the new child starts
495 * a new session, opens a new controlling terminal, and both parent and
496 * child call select on the /dev/tty file descriptor. If this case ever
497 * becomes real, a better solution may be to force-close a filp for
498 * /dev/tty when a new controlling terminal is opened.
500 printf("VFS: file pointer has multiple controlling TTYs!\n");
501 return(EIO);
503 f->filp_select_dev = dev; /* set before possibly suspending */
505 if ((rops = select_filter(f, ops, block)) <= 0)
506 return(rops); /* OK or suspend: nothing to do for now */
508 dp = &dmap[major(dev)];
509 if (dp->dmap_sel_busy)
510 return(SUSPEND);
512 f->filp_select_flags &= ~FSF_UPDATE;
513 r = cdev_select(dev, rops);
514 if (r != OK)
515 return(r);
517 dp->dmap_sel_busy = TRUE;
518 dp->dmap_sel_filp = f;
519 f->filp_select_flags |= FSF_BUSY;
521 return(SUSPEND);
524 /*===========================================================================*
525 * select_request_sock *
526 *===========================================================================*/
527 static int select_request_sock(struct filp *f, int *ops, int block,
528 struct fproc *rfp __unused)
530 /* Check readiness status on a socket device. Unless suitable results are
531 * available right now, this will only initiate the polling process, causing
532 * result processing to be deferred. This function MUST NOT block its calling
533 * thread. The given filp may or may not be locked.
535 struct smap *sp;
536 dev_t dev;
537 int r, rops;
539 dev = f->filp_vno->v_sdev;
541 if ((sp = get_smap_by_dev(dev, NULL)) == NULL)
542 return(ENXIO); /* this should not happen */
544 f->filp_select_dev = dev; /* set before possibly suspending */
546 if ((rops = select_filter(f, ops, block)) <= 0)
547 return(rops); /* OK or suspend: nothing to do for now */
549 if (sp->smap_sel_busy)
550 return(SUSPEND);
552 f->filp_select_flags &= ~FSF_UPDATE;
553 r = sdev_select(dev, rops);
554 if (r != OK)
555 return(r);
557 sp->smap_sel_busy = TRUE;
558 sp->smap_sel_filp = f;
559 f->filp_select_flags |= FSF_BUSY;
561 return(SUSPEND);
564 /*===========================================================================*
565 * select_request_file *
566 *===========================================================================*/
567 static int select_request_file(struct filp *UNUSED(f), int *UNUSED(ops),
568 int UNUSED(block), struct fproc *UNUSED(rfp))
570 /* Files are always ready, so output *ops is input *ops */
571 return(OK);
574 /*===========================================================================*
575 * select_request_pipe *
576 *===========================================================================*/
577 static int select_request_pipe(struct filp *f, int *ops, int block,
578 struct fproc *UNUSED(rfp))
580 /* Check readiness status on a pipe. The given filp is locked. This function
581 * may block its calling thread if necessary.
583 int orig_ops, r = 0, err;
585 orig_ops = *ops;
587 if ((*ops & (SEL_RD|SEL_ERR))) {
588 /* Check if we can read 1 byte */
589 err = pipe_check(f, READING, f->filp_flags & ~O_NONBLOCK, 1,
590 1 /* Check only */);
592 if (err != SUSPEND)
593 r |= SEL_RD;
594 if (err < 0 && err != SUSPEND)
595 r |= SEL_ERR;
598 if ((*ops & (SEL_WR|SEL_ERR))) {
599 /* Check if we can write 1 byte */
600 err = pipe_check(f, WRITING, f->filp_flags & ~O_NONBLOCK, 1,
601 1 /* Check only */);
603 if (err != SUSPEND)
604 r |= SEL_WR;
605 if (err < 0 && err != SUSPEND)
606 r |= SEL_ERR;
609 /* Some options we collected might not be requested. */
610 *ops = r & orig_ops;
612 if (!*ops && block)
613 f->filp_pipe_select_ops |= orig_ops;
615 return(OK);
618 /*===========================================================================*
619 * tab2ops *
620 *===========================================================================*/
621 static int tab2ops(int fd, struct selectentry *e)
623 int ops = 0;
624 if (FD_ISSET(fd, &e->readfds)) ops |= SEL_RD;
625 if (FD_ISSET(fd, &e->writefds)) ops |= SEL_WR;
626 if (FD_ISSET(fd, &e->errorfds)) ops |= SEL_ERR;
628 return(ops);
632 /*===========================================================================*
633 * ops2tab *
634 *===========================================================================*/
635 static void ops2tab(int ops, int fd, struct selectentry *e)
637 if ((ops & SEL_RD) && e->vir_readfds && FD_ISSET(fd, &e->readfds) &&
638 !FD_ISSET(fd, &e->ready_readfds)) {
639 FD_SET(fd, &e->ready_readfds);
640 e->nreadyfds++;
643 if ((ops & SEL_WR) && e->vir_writefds && FD_ISSET(fd, &e->writefds) &&
644 !FD_ISSET(fd, &e->ready_writefds)) {
645 FD_SET(fd, &e->ready_writefds);
646 e->nreadyfds++;
649 if ((ops & SEL_ERR) && e->vir_errorfds && FD_ISSET(fd, &e->errorfds) &&
650 !FD_ISSET(fd, &e->ready_errorfds)) {
651 FD_SET(fd, &e->ready_errorfds);
652 e->nreadyfds++;
657 /*===========================================================================*
658 * copy_fdsets *
659 *===========================================================================*/
660 static int copy_fdsets(struct selectentry *se, int nfds, int direction)
662 /* Copy FD sets from or to the user process calling select(2). This function
663 * MUST NOT block the calling thread.
665 int r;
666 size_t fd_setsize;
667 endpoint_t src_e, dst_e;
668 fd_set *src_fds, *dst_fds;
670 if (nfds < 0 || nfds > OPEN_MAX)
671 panic("select copy_fdsets: nfds wrong: %d", nfds);
673 /* Only copy back as many bits as the user expects. */
674 fd_setsize = (size_t) (howmany(nfds, __NFDBITS) * sizeof(__fd_mask));
676 /* Set source and destination endpoints */
677 src_e = (direction == FROM_PROC) ? se->req_endpt : SELF;
678 dst_e = (direction == FROM_PROC) ? SELF : se->req_endpt;
680 /* read set */
681 src_fds = (direction == FROM_PROC) ? se->vir_readfds : &se->ready_readfds;
682 dst_fds = (direction == FROM_PROC) ? &se->readfds : se->vir_readfds;
683 if (se->vir_readfds) {
684 r = sys_datacopy_wrapper(src_e, (vir_bytes) src_fds, dst_e,
685 (vir_bytes) dst_fds, fd_setsize);
686 if (r != OK) return(r);
689 /* write set */
690 src_fds = (direction == FROM_PROC) ? se->vir_writefds : &se->ready_writefds;
691 dst_fds = (direction == FROM_PROC) ? &se->writefds : se->vir_writefds;
692 if (se->vir_writefds) {
693 r = sys_datacopy_wrapper(src_e, (vir_bytes) src_fds, dst_e,
694 (vir_bytes) dst_fds, fd_setsize);
695 if (r != OK) return(r);
698 /* error set */
699 src_fds = (direction == FROM_PROC) ? se->vir_errorfds : &se->ready_errorfds;
700 dst_fds = (direction == FROM_PROC) ? &se->errorfds : se->vir_errorfds;
701 if (se->vir_errorfds) {
702 r = sys_datacopy_wrapper(src_e, (vir_bytes) src_fds, dst_e,
703 (vir_bytes) dst_fds, fd_setsize);
704 if (r != OK) return(r);
707 return(OK);
711 /*===========================================================================*
712 * select_cancel_all *
713 *===========================================================================*/
714 static void select_cancel_all(struct selectentry *se)
716 /* Cancel select, possibly on success. Decrease select usage and cancel timer.
717 * This function MUST NOT block its calling thread.
720 int fd;
721 struct filp *f;
723 for (fd = 0; fd < se->nfds; fd++) {
724 if ((f = se->filps[fd]) == NULL) continue;
725 se->filps[fd] = NULL;
726 select_cancel_filp(f);
729 if (se->expiry > 0) {
730 cancel_timer(&se->timer);
731 se->expiry = 0;
734 se->requestor = NULL;
737 /*===========================================================================*
738 * select_cancel_filp *
739 *===========================================================================*/
740 static void select_cancel_filp(struct filp *f)
742 /* Reduce the number of select users of this filp. This function MUST NOT block
743 * its calling thread.
745 devmajor_t major;
746 struct smap *sp;
748 assert(f);
749 assert(f->filp_selectors > 0);
750 assert(f->filp_count > 0);
752 f->filp_selectors--;
753 if (f->filp_selectors == 0) {
754 /* No one selecting on this filp anymore, forget about select state */
755 f->filp_select_ops = 0;
756 f->filp_select_flags = 0;
757 f->filp_pipe_select_ops = 0;
759 /* If this filp is the subject of an ongoing select query to a
760 * character or socket device, mark the query as stale, so that this
761 * filp will not be checked when the result arrives. The filp select
762 * device may still be NO_DEV if do_select fails on the initial fd
763 * check.
765 if (is_char_device(f) && f->filp_select_dev != NO_DEV) {
766 major = major(f->filp_select_dev);
767 if (dmap[major].dmap_sel_busy &&
768 dmap[major].dmap_sel_filp == f)
769 dmap[major].dmap_sel_filp = NULL; /* leave _busy set */
770 f->filp_select_dev = NO_DEV;
771 } else if (is_sock_device(f) && f->filp_select_dev != NO_DEV) {
772 if ((sp = get_smap_by_dev(f->filp_select_dev, NULL)) != NULL &&
773 sp->smap_sel_busy && sp->smap_sel_filp == f)
774 sp->smap_sel_filp = NULL; /* leave _busy set */
775 f->filp_select_dev = NO_DEV;
780 /*===========================================================================*
781 * select_return *
782 *===========================================================================*/
783 static void select_return(struct selectentry *se)
785 /* Return the results of a select call to the user process and revive the
786 * process. This function MUST NOT block its calling thread.
788 int r;
790 assert(!is_deferred(se)); /* Not done yet, first wait for async reply */
792 select_cancel_all(se);
794 if (se->error != OK)
795 r = se->error;
796 else
797 r = copy_fdsets(se, se->nfds, TO_PROC);
798 if (r == OK)
799 r = se->nreadyfds;
801 revive(se->req_endpt, r);
805 /*===========================================================================*
806 * select_callback *
807 *===========================================================================*/
808 void select_callback(struct filp *f, int status)
810 /* The status of a filp has changed, with the given ready operations or error.
811 * This function is currently called only for pipes, and holds the lock to
812 * the filp.
815 filp_status(f, status);
818 /*===========================================================================*
819 * init_select *
820 *===========================================================================*/
821 void init_select(void)
823 int s;
825 for (s = 0; s < MAXSELECTS; s++)
826 init_timer(&selecttab[s].timer);
830 /*===========================================================================*
831 * select_forget *
832 *===========================================================================*/
833 void select_forget(void)
835 /* The calling thread's associated process is expected to be unpaused, due to
836 * a signal that is supposed to interrupt the current system call. Totally
837 * forget about the select(). This function may block its calling thread if
838 * necessary (but it doesn't).
840 int slot;
841 struct selectentry *se;
843 for (slot = 0; slot < MAXSELECTS; slot++) {
844 se = &selecttab[slot];
845 if (se->requestor == fp)
846 break;
849 if (slot >= MAXSELECTS) return; /* Entry not found */
851 assert(se->starting == FALSE);
853 /* Do NOT test on is_deferred here. We can safely cancel ongoing queries. */
854 select_cancel_all(se);
858 /*===========================================================================*
859 * select_timeout_check *
860 *===========================================================================*/
861 void select_timeout_check(int s)
863 /* An alarm has gone off for one of the select queries. This function MUST NOT
864 * block its calling thread.
866 struct selectentry *se;
868 if (s < 0 || s >= MAXSELECTS) return; /* Entry does not exist */
870 se = &selecttab[s];
871 if (se->requestor == NULL) return;
872 if (se->expiry == 0) return; /* Strange, did we even ask for a timeout? */
873 se->expiry = 0;
874 if (!is_deferred(se))
875 select_return(se);
876 else
877 se->block = 0; /* timer triggered "too soon", treat as nonblocking */
881 /*===========================================================================*
882 * select_unsuspend_by_endpt *
883 *===========================================================================*/
884 void select_unsuspend_by_endpt(endpoint_t proc_e)
886 /* Revive blocked processes when a driver has disappeared */
887 struct dmap *dp;
888 struct smap *sp;
889 devmajor_t major;
890 int fd, s, is_driver, restart;
891 struct selectentry *se;
892 struct filp *f;
894 /* Either or both of these may be NULL. */
895 dp = get_dmap_by_endpt(proc_e);
896 sp = get_smap_by_endpt(proc_e);
898 is_driver = (dp != NULL || sp != NULL);
900 for (s = 0; s < MAXSELECTS; s++) {
901 se = &selecttab[s];
902 if (se->requestor == NULL) continue;
903 if (se->requestor->fp_endpoint == proc_e) {
904 assert(se->requestor->fp_flags & FP_EXITING);
905 select_cancel_all(se);
906 continue;
909 /* Skip the more expensive "driver died" checks for non-drivers. */
910 if (!is_driver)
911 continue;
913 restart = FALSE;
915 for (fd = 0; fd < se->nfds; fd++) {
916 if ((f = se->filps[fd]) == NULL)
917 continue;
918 if (is_char_device(f)) {
919 assert(f->filp_select_dev != NO_DEV);
920 major = major(f->filp_select_dev);
921 if (dmap_driver_match(proc_e, major)) {
922 ops2tab(SEL_RD | SEL_WR, fd, se);
923 se->filps[fd] = NULL;
924 select_cancel_filp(f);
925 restart = TRUE;
927 } else if (sp != NULL && is_sock_device(f)) {
928 assert(f->filp_select_dev != NO_DEV);
929 if (get_smap_by_dev(f->filp_select_dev, NULL) == sp) {
930 ops2tab(SEL_RD | SEL_WR, fd, se);
931 se->filps[fd] = NULL;
932 select_cancel_filp(f);
933 restart = TRUE;
938 if (restart)
939 restart_proc(se);
942 /* Any outstanding queries will never be answered, so forget about them. */
943 if (dp != NULL) {
944 assert(dp->dmap_sel_filp == NULL);
945 dp->dmap_sel_busy = FALSE;
947 if (sp != NULL) {
948 assert(sp->smap_sel_filp == NULL);
949 sp->smap_sel_busy = FALSE;
953 /*===========================================================================*
954 * select_reply1 *
955 *===========================================================================*/
956 static void select_reply1(struct filp *f, int status)
958 /* Handle the initial reply to a character or socket select request. This
959 * function MUST NOT block its calling thread.
962 assert(f->filp_count >= 1);
963 assert(f->filp_select_flags & FSF_BUSY);
965 f->filp_select_flags &= ~FSF_BUSY;
967 /* The select call is done now, except when
968 * - another process started a select on the same filp with possibly a
969 * different set of operations.
970 * - a process does a select on the same filp but using different file
971 * descriptors.
972 * - the select has a timeout. Upon receiving this reply the operations
973 * might not be ready yet, so we want to wait for that to ultimately
974 * happen.
975 * Therefore we need to keep remembering what the operations are.
977 if (!(f->filp_select_flags & (FSF_UPDATE|FSF_BLOCKED)))
978 f->filp_select_ops = 0; /* done selecting */
979 else if (status > 0 && !(f->filp_select_flags & FSF_UPDATE))
980 /* there may be operations pending */
981 f->filp_select_ops &= ~status;
983 /* Record new filp status */
984 if (!(status == 0 && (f->filp_select_flags & FSF_BLOCKED))) {
985 if (status > 0) { /* operations ready */
986 if (status & SEL_RD)
987 f->filp_select_flags &= ~FSF_RD_BLOCK;
988 if (status & SEL_WR)
989 f->filp_select_flags &= ~FSF_WR_BLOCK;
990 if (status & SEL_ERR)
991 f->filp_select_flags &= ~FSF_ERR_BLOCK;
992 } else if (status < 0) { /* error */
993 /* Always unblock upon error */
994 f->filp_select_flags &= ~FSF_BLOCKED;
998 filp_status(f, status); /* Tell filp owners about the results */
1001 /*===========================================================================*
1002 * select_cdev_reply1 *
1003 *===========================================================================*/
1004 void select_cdev_reply1(endpoint_t driver_e, devminor_t minor, int status)
1006 /* Handle the initial reply to a CDEV_SELECT request. This function MUST NOT
1007 * block its calling thread.
1009 devmajor_t major;
1010 dev_t dev;
1011 struct filp *f;
1012 struct dmap *dp;
1014 /* Figure out which device is replying */
1015 if ((dp = get_dmap_by_endpt(driver_e)) == NULL) return;
1017 major = dp-dmap;
1018 dev = makedev(major, minor);
1020 /* Get filp belonging to character special file */
1021 if (!dp->dmap_sel_busy) {
1022 printf("VFS (%s:%d): major %d was not expecting a CDEV_SELECT reply\n",
1023 __FILE__, __LINE__, major);
1024 return;
1027 /* The select filp may have been set to NULL if the requestor has been
1028 * unpaused in the meantime. In that case, we ignore the result, but we do
1029 * look for other filps to restart later.
1031 if ((f = dp->dmap_sel_filp) != NULL) {
1032 /* Find vnode and check we got a reply from the device we expected */
1033 assert(is_char_device(f));
1034 assert(f->filp_select_dev != NO_DEV);
1035 if (f->filp_select_dev != dev) {
1036 /* This should never happen. The driver may be misbehaving.
1037 * For now we assume that the reply we want will arrive later..
1039 printf("VFS (%s:%d): expected reply from dev %llx not %llx\n",
1040 __FILE__, __LINE__, f->filp_select_dev, dev);
1041 return;
1045 /* No longer waiting for a reply from this device */
1046 dp->dmap_sel_busy = FALSE;
1047 dp->dmap_sel_filp = NULL;
1049 /* Process the status change, if still applicable. */
1050 if (f != NULL)
1051 select_reply1(f, status);
1053 /* See if we should send a select request for another filp now. */
1054 select_restart_filps();
1057 /*===========================================================================*
1058 * select_sdev_reply1 *
1059 *===========================================================================*/
1060 void select_sdev_reply1(dev_t dev, int status)
1062 /* Handle the initial reply to a SDEV_SELECT request. This function MUST NOT
1063 * block its calling thread.
1065 struct smap *sp;
1066 struct filp *f;
1068 if ((sp = get_smap_by_dev(dev, NULL)) == NULL)
1069 return;
1071 /* Get the file pointer for the socket device. */
1072 if (!sp->smap_sel_busy) {
1073 printf("VFS: was not expecting a SDEV_SELECT reply from %d\n",
1074 sp->smap_endpt);
1075 return;
1078 /* The select filp may have been set to NULL if the requestor has been
1079 * unpaused in the meantime. In that case, we ignore the result, but we do
1080 * look for other filps to restart later.
1082 if ((f = sp->smap_sel_filp) != NULL) {
1083 /* Find vnode and check we got a reply from the device we expected */
1084 assert(is_sock_device(f));
1085 assert(f->filp_select_dev != NO_DEV);
1086 if (f->filp_select_dev != dev) {
1087 /* This should never happen. The driver may be misbehaving.
1088 * For now we assume that the reply we want will arrive later..
1090 printf("VFS: expected reply from sock dev %llx, not %llx\n",
1091 f->filp_select_dev, dev);
1092 return;
1096 /* We are no longer waiting for a reply from this socket driver. */
1097 sp->smap_sel_busy = FALSE;
1098 sp->smap_sel_filp = NULL;
1100 /* Process the status change, if still applicable. */
1101 if (f != NULL)
1102 select_reply1(f, status);
1104 /* See if we should send a select request for another filp now. */
1105 select_restart_filps();
1108 /*===========================================================================*
1109 * select_reply2 *
1110 *===========================================================================*/
1111 static void select_reply2(int is_char, dev_t dev, int status)
1113 /* Find all file descriptors selecting for the given character (is_char==TRUE)
1114 * or socket (is_char==FALSE) device, update their statuses, and resume
1115 * activities accordingly.
1117 int slot, found, fd;
1118 struct filp *f;
1119 struct selectentry *se;
1121 for (slot = 0; slot < MAXSELECTS; slot++) {
1122 se = &selecttab[slot];
1123 if (se->requestor == NULL) continue; /* empty slot */
1125 found = FALSE;
1126 for (fd = 0; fd < se->nfds; fd++) {
1127 if ((f = se->filps[fd]) == NULL) continue;
1128 if (is_char && !is_char_device(f)) continue;
1129 if (!is_char && !is_sock_device(f)) continue;
1130 assert(f->filp_select_dev != NO_DEV);
1131 if (f->filp_select_dev != dev) continue;
1133 if (status > 0) { /* Operations ready */
1134 /* Clear the replied bits from the request
1135 * mask unless FSF_UPDATE is set.
1137 if (!(f->filp_select_flags & FSF_UPDATE))
1138 f->filp_select_ops &= ~status;
1139 if (status & SEL_RD)
1140 f->filp_select_flags &= ~FSF_RD_BLOCK;
1141 if (status & SEL_WR)
1142 f->filp_select_flags &= ~FSF_WR_BLOCK;
1143 if (status & SEL_ERR)
1144 f->filp_select_flags &= ~FSF_ERR_BLOCK;
1146 ops2tab(status, fd, se);
1147 } else {
1148 f->filp_select_flags &= ~FSF_BLOCKED;
1149 se->error = status;
1151 found = TRUE;
1153 /* Even if 'found' is set now, nothing may have changed for this call,
1154 * as it may not have been interested in the operations that were
1155 * reported as ready. Let restart_proc check.
1157 if (found)
1158 restart_proc(se);
1161 select_restart_filps();
1164 /*===========================================================================*
1165 * select_cdev_reply2 *
1166 *===========================================================================*/
1167 void select_cdev_reply2(endpoint_t driver_e, devminor_t minor, int status)
1169 /* Handle a secondary reply to a CDEV_SELECT request. A secondary reply occurs
1170 * when the select request is 'blocking' until an operation becomes ready. This
1171 * function MUST NOT block its calling thread.
1173 devmajor_t major;
1174 struct dmap *dp;
1175 dev_t dev;
1177 if (status == 0) {
1178 printf("VFS (%s:%d): weird status (%d) to report\n",
1179 __FILE__, __LINE__, status);
1180 return;
1183 /* Figure out which device is replying */
1184 if ((dp = get_dmap_by_endpt(driver_e)) == NULL) {
1185 printf("VFS (%s:%d): endpoint %d is not a known driver endpoint\n",
1186 __FILE__, __LINE__, driver_e);
1187 return;
1189 major = dp-dmap;
1190 dev = makedev(major, minor);
1192 select_reply2(TRUE /*is_char*/, dev, status);
1195 /*===========================================================================*
1196 * select_sdev_reply2 *
1197 *===========================================================================*/
1198 void select_sdev_reply2(dev_t dev, int status)
1200 /* Handle a secondary reply to a SDEV_SELECT request. A secondary reply occurs
1201 * when the select request is 'blocking' until an operation becomes ready. This
1202 * function MUST NOT block its calling thread.
1205 if (status == 0) {
1206 printf("VFS: weird socket device status (%d)\n", status);
1208 return;
1211 select_reply2(FALSE /*is_char*/, dev, status);
1214 /*===========================================================================*
1215 * select_restart_filps *
1216 *===========================================================================*/
1217 static void select_restart_filps(void)
1219 /* We got a result from a character driver, and now we need to check if we can
1220 * restart deferred polling operations. This function MUST NOT block its
1221 * calling thread.
1223 int fd, slot;
1224 struct filp *f;
1225 struct selectentry *se;
1227 /* Locate filps that can be restarted */
1228 for (slot = 0; slot < MAXSELECTS; slot++) {
1229 se = &selecttab[slot];
1230 if (se->requestor == NULL) continue; /* empty slot */
1232 /* Only 'deferred' processes are eligible to restart */
1233 if (!is_deferred(se)) continue;
1235 /* Find filps that are not waiting for a reply, but have an updated
1236 * status (i.e., another select on the same filp with possibly a
1237 * different set of operations is to be done), and thus requires the
1238 * select request to be sent again).
1240 for (fd = 0; fd < se->nfds; fd++) {
1241 int r, wantops, ops;
1242 if ((f = se->filps[fd]) == NULL) continue;
1243 if (f->filp_select_flags & FSF_BUSY) /* Still waiting for */
1244 continue; /* initial reply */
1245 if (!(f->filp_select_flags & FSF_UPDATE)) /* Must be in */
1246 continue; /* 'update' state */
1248 /* This function is suitable only for character and socket
1249 * devices. In particular, checking pipes the same way would
1250 * introduce a serious locking problem.
1252 assert(is_char_device(f) || is_sock_device(f));
1254 wantops = ops = f->filp_select_ops;
1255 if (is_char_device(f))
1256 r = select_request_char(f, &wantops, se->block,
1257 se->requestor);
1258 else
1259 r = select_request_sock(f, &wantops, se->block,
1260 se->requestor);
1261 if (r != OK && r != SUSPEND) {
1262 se->error = r;
1263 restart_proc(se);
1264 break; /* Error or bogus return code; abort */
1266 if (wantops & ops) ops2tab(wantops, fd, se);
1271 /*===========================================================================*
1272 * filp_status *
1273 *===========================================================================*/
1274 static void
1275 filp_status(struct filp *f, int status)
1277 /* Tell processes that need to know about the status of this filp. This
1278 * function MUST NOT block its calling thread.
1280 int fd, slot, found;
1281 struct selectentry *se;
1283 for (slot = 0; slot < MAXSELECTS; slot++) {
1284 se = &selecttab[slot];
1285 if (se->requestor == NULL) continue; /* empty slot */
1287 found = FALSE;
1288 for (fd = 0; fd < se->nfds; fd++) {
1289 if (se->filps[fd] != f) continue;
1290 if (status < 0)
1291 se->error = status;
1292 else
1293 ops2tab(status, fd, se);
1294 found = TRUE;
1296 if (found)
1297 restart_proc(se);
1301 /*===========================================================================*
1302 * restart_proc *
1303 *===========================================================================*/
1304 static void
1305 restart_proc(struct selectentry *se)
1307 /* Tell process about select results (if any) unless there are still results
1308 * pending. This function MUST NOT block its calling thread.
1311 if ((se->nreadyfds > 0 || se->error != OK || !se->block) && !is_deferred(se))
1312 select_return(se);
1315 /*===========================================================================*
1316 * wipe_select *
1317 *===========================================================================*/
1318 static void wipe_select(struct selectentry *se)
1320 se->nfds = 0;
1321 se->nreadyfds = 0;
1322 se->error = OK;
1323 se->block = 0;
1324 memset(se->filps, 0, sizeof(se->filps));
1326 FD_ZERO(&se->readfds);
1327 FD_ZERO(&se->writefds);
1328 FD_ZERO(&se->errorfds);
1329 FD_ZERO(&se->ready_readfds);
1330 FD_ZERO(&se->ready_writefds);
1331 FD_ZERO(&se->ready_errorfds);
1334 /*===========================================================================*
1335 * select_lock_filp *
1336 *===========================================================================*/
1337 static void select_lock_filp(struct filp *f, int ops)
1339 /* Lock a filp and vnode based on which operations are requested. This function
1340 * may block its calling thread, obviously.
1342 tll_access_t locktype;
1344 locktype = VNODE_READ; /* By default */
1346 if (ops & (SEL_WR|SEL_ERR))
1347 /* Selecting for error or writing requires exclusive access */
1348 locktype = VNODE_WRITE;
1350 lock_filp(f, locktype);
1354 * Dump the state of the entire select table, for debugging purposes.
1356 void
1357 select_dump(void)
1359 struct selectentry *se;
1360 struct filp *f;
1361 struct dmap *dp;
1362 struct smap *sp;
1363 dev_t dev;
1364 sockid_t sockid;
1365 int s, fd;
1367 for (s = 0; s < MAXSELECTS; s++) {
1368 se = &selecttab[s];
1369 if (se->requestor == NULL)
1370 continue;
1372 printf("select %d: endpt %d nfds %d nreadyfds %d error %d "
1373 "block %d starting %d expiry %u is_deferred %d\n",
1374 s, se->req_endpt, se->nfds, se->nreadyfds, se->error,
1375 se->block, se->starting, se->expiry, is_deferred(se));
1377 for (fd = 0; !se->starting && fd < se->nfds; fd++) {
1378 /* Save on output: do not print NULL filps at all. */
1379 if ((f = se->filps[fd]) == NULL)
1380 continue;
1382 printf("- [%d] filp %p flags %x type ", fd, f,
1383 f->filp_select_flags);
1384 if (is_regular_file(f))
1385 printf("regular\n");
1386 else if (is_pipe(f))
1387 printf("pipe\n");
1388 else if (is_char_device(f)) {
1389 dev = cdev_map(f->filp_vno->v_sdev,
1390 se->requestor);
1391 printf("char (dev <%d,%d>, dmap ",
1392 major(dev), minor(dev));
1393 if (dev != NO_DEV) {
1394 dp = &dmap[major(dev)];
1395 printf("busy %d filp %p)\n",
1396 dp->dmap_sel_busy,
1397 dp->dmap_sel_filp);
1398 } else
1399 printf("unknown)\n");
1400 } else if (is_sock_device(f)) {
1401 dev = f->filp_vno->v_sdev;
1402 printf("sock (dev ");
1403 sp = get_smap_by_dev(dev, &sockid);
1404 if (sp != NULL) {
1405 printf("<%d,%d>, smap busy %d filp "
1406 "%p)\n", sp->smap_num, sockid,
1407 sp->smap_sel_busy,
1408 sp->smap_sel_filp);
1409 } else
1410 printf("<0x%"PRIx64">, smap "
1411 "unknown)\n", dev);
1412 } else
1413 printf("unknown\n");