minix/servers/vfs/filedes.c

   1 /* This file contains the procedures that manipulate file descriptors.
   2  *
   3  * The entry points into this file are
   4  *   get_fd:        look for free file descriptor and free filp slots
   5  *   get_filp:      look up the filp entry for a given file descriptor
   6  *   find_filp:     find a filp slot that points to a given vnode
   7  *   inval_filp:    invalidate a filp and associated fd's, only let close()
   8  *                  happen on it
   9  *   do_copyfd:     copies a file descriptor from or to another endpoint
  10  */
  11
  12 #include <sys/select.h>
  13 #include <minix/callnr.h>
  14 #include <minix/u64.h>
  15 #include <assert.h>
  16 #include <sys/stat.h>
  17 #include "fs.h"
  18 #include "file.h"
  19 #include "vnode.h"
  20
  21
  22 #if LOCK_DEBUG
  23 /*===========================================================================*
  24  *                              check_filp_locks                             *
  25  *===========================================================================*/
  26 void check_filp_locks_by_me(void)
  27 {
  28 /* Check whether this thread still has filp locks held */
  29   struct filp *f;
  30   int r;
  31
  32   for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
  33         r = mutex_trylock(&f->filp_lock);
  34         if (r == -EDEADLK)
  35                 panic("Thread %d still holds filp lock on filp %p call_nr=%d\n",
  36                       mthread_self(), f, job_call_nr);
  37         else if (r == 0) {
  38                 /* We just obtained the lock, release it */
  39                 mutex_unlock(&f->filp_lock);
  40         }
  41   }
  42 }
  43 #endif
  44
  45 /*===========================================================================*
  46  *                              check_filp_locks                             *
  47  *===========================================================================*/
  48 void check_filp_locks(void)
  49 {
  50   struct filp *f;
  51   int r, count = 0;
  52
  53   for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
  54         r = mutex_trylock(&f->filp_lock);
  55         if (r == -EBUSY) {
  56                 /* Mutex is still locked */
  57                 count++;
  58         } else if (r == 0) {
  59                 /* We just obtained a lock, don't want it */
  60                 mutex_unlock(&f->filp_lock);
  61         } else
  62                 panic("filp_lock weird state");
  63   }
  64   if (count) panic("locked filps");
  65 #if 0
  66   else printf("check_filp_locks OK\n");
  67 #endif
  68 }
  69
  70 /*===========================================================================*
  71  *                              init_filps                                   *
  72  *===========================================================================*/
  73 void init_filps(void)
  74 {
  75 /* Initialize filps */
  76   struct filp *f;
  77
  78   for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
  79         if (mutex_init(&f->filp_lock, NULL) != 0)
  80                 panic("Failed to initialize filp mutex");
  81   }
  82
  83 }
  84
  85 /*===========================================================================*
  86  *                              check_fds                                    *
  87  *===========================================================================*/
  88 int check_fds(struct fproc *rfp, int nfds)
  89 {
  90 /* Check whether at least 'nfds' file descriptors can be created in the process
  91  * 'rfp'.  Return OK on success, or otherwise an appropriate error code.
  92  */
  93   int i;
  94
  95   assert(nfds >= 1);
  96
  97   for (i = 0; i < OPEN_MAX; i++) {
  98         if (rfp->fp_filp[i] == NULL) {
  99                 if (--nfds == 0)
 100                         return OK;
 101         }
 102   }
 103
 104   return EMFILE;
 105 }
 106
 107 /*===========================================================================*
 108  *                              get_fd                                       *
 109  *===========================================================================*/
 110 int get_fd(struct fproc *rfp, int start, mode_t bits, int *k, struct filp **fpt)
 111 {
 112 /* Look for a free file descriptor and a free filp slot.  Fill in the mode word
 113  * in the latter, but don't claim either one yet, since the open() or creat()
 114  * may yet fail.
 115  */
 116
 117   register struct filp *f;
 118   register int i;
 119
 120   /* Search the fproc fp_filp table for a free file descriptor. */
 121   for (i = start; i < OPEN_MAX; i++) {
 122         if (rfp->fp_filp[i] == NULL) {
 123                 /* A file descriptor has been located. */
 124                 *k = i;
 125                 break;
 126         }
 127   }
 128
 129   /* Check to see if a file descriptor has been found. */
 130   if (i >= OPEN_MAX) return(EMFILE);
 131
 132   /* If we don't care about a filp, return now */
 133   if (fpt == NULL) return(OK);
 134
 135   /* Now that a file descriptor has been found, look for a free filp slot. */
 136   for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
 137         assert(f->filp_count >= 0);
 138         if (f->filp_count == 0 && mutex_trylock(&f->filp_lock) == 0) {
 139                 f->filp_mode = bits;
 140                 f->filp_pos = 0;
 141                 f->filp_selectors = 0;
 142                 f->filp_select_ops = 0;
 143                 f->filp_pipe_select_ops = 0;
 144                 f->filp_select_dev = NO_DEV;
 145                 f->filp_flags = 0;
 146                 f->filp_select_flags = 0;
 147                 f->filp_softlock = NULL;
 148                 f->filp_ioctl_fp = NULL;
 149                 *fpt = f;
 150                 return(OK);
 151         }
 152   }
 153
 154   /* If control passes here, the filp table must be full.  Report that back. */
 155   return(ENFILE);
 156 }
 157
 158
 159 /*===========================================================================*
 160  *                              get_filp                                     *
 161  *===========================================================================*/
 162 struct filp *
 163 get_filp(
 164         int fild,                       /* file descriptor */
 165         tll_access_t locktype
 166 )
 167 {
 168 /* See if 'fild' refers to a valid file descr.  If so, return its filp ptr. */
 169
 170   return get_filp2(fp, fild, locktype);
 171 }
 172
 173
 174 /*===========================================================================*
 175  *                              get_filp2                                    *
 176  *===========================================================================*/
 177 struct filp *
 178 get_filp2(
 179         register struct fproc *rfp,
 180         int fild,                       /* file descriptor */
 181         tll_access_t locktype
 182 )
 183 {
 184 /* See if 'fild' refers to a valid file descr.  If so, return its filp ptr. */
 185   struct filp *filp;
 186
 187   filp = NULL;
 188   if (fild < 0 || fild >= OPEN_MAX)
 189         err_code = EBADF;
 190   else if (locktype != VNODE_OPCL && rfp->fp_filp[fild] != NULL &&
 191                 rfp->fp_filp[fild]->filp_mode == FILP_CLOSED)
 192         err_code = EIO; /* disallow all use except close(2) */
 193   else if ((filp = rfp->fp_filp[fild]) == NULL)
 194         err_code = EBADF;
 195   else if (locktype != VNODE_NONE)      /* Only lock the filp if requested */
 196         lock_filp(filp, locktype);      /* All is fine */
 197
 198   return(filp); /* may also be NULL */
 199 }
 200
 201
 202 /*===========================================================================*
 203  *                              find_filp                                    *
 204  *===========================================================================*/
 205 struct filp *find_filp(struct vnode *vp, mode_t bits)
 206 {
 207 /* Find a filp slot that refers to the vnode 'vp' in a way as described
 208  * by the mode bit 'bits'. Used for determining whether somebody is still
 209  * interested in either end of a pipe.  Also used when opening a FIFO to
 210  * find partners to share a filp field with (to shared the file position).
 211  * Like 'get_fd' it performs its job by linear search through the filp table.
 212  */
 213
 214   struct filp *f;
 215
 216   for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
 217         if (f->filp_count != 0 && f->filp_vno == vp && (f->filp_mode & bits)) {
 218                 return(f);
 219         }
 220   }
 221
 222   /* If control passes here, the filp wasn't there.  Report that back. */
 223   return(NULL);
 224 }
 225
 226 /*===========================================================================*
 227  *                              find_filp_by_sock_dev                        *
 228  *===========================================================================*/
 229 struct filp *find_filp_by_sock_dev(dev_t dev)
 230 {
 231 /* See if there is a file pointer for a socket with the given socket device
 232  * number.
 233  */
 234   struct filp *f;
 235
 236   for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
 237         if (f->filp_count != 0 && f->filp_vno != NULL &&
 238             S_ISSOCK(f->filp_vno->v_mode) && f->filp_vno->v_sdev == dev &&
 239             f->filp_mode != FILP_CLOSED) {
 240                 return f;
 241         }
 242   }
 243
 244   return NULL;
 245 }
 246
 247 /*===========================================================================*
 248  *                              invalidate_filp                              *
 249  *===========================================================================*/
 250 void invalidate_filp(struct filp *rfilp)
 251 {
 252 /* Invalidate filp. */
 253
 254   rfilp->filp_mode = FILP_CLOSED;
 255 }
 256
 257 /*===========================================================================*
 258  *                      invalidate_filp_by_char_major                        *
 259  *===========================================================================*/
 260 void invalidate_filp_by_char_major(devmajor_t major)
 261 {
 262   struct filp *f;
 263
 264   for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
 265         if (f->filp_count != 0 && f->filp_vno != NULL) {
 266                 if (major(f->filp_vno->v_sdev) == major &&
 267                     S_ISCHR(f->filp_vno->v_mode)) {
 268                         invalidate_filp(f);
 269                 }
 270         }
 271   }
 272 }
 273
 274 /*===========================================================================*
 275  *                      invalidate_filp_by_sock_drv                          *
 276  *===========================================================================*/
 277 void invalidate_filp_by_sock_drv(unsigned int num)
 278 {
 279 /* Invalidate all file pointers for sockets owned by the socket driver with the
 280  * smap number 'num'.
 281  */
 282   struct filp *f;
 283   struct smap *sp;
 284
 285   for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
 286         if (f->filp_count != 0 && f->filp_vno != NULL) {
 287                 if (S_ISSOCK(f->filp_vno->v_mode) &&
 288                     (sp = get_smap_by_dev(f->filp_vno->v_sdev, NULL)) != NULL
 289                     && sp->smap_num == num)
 290                         invalidate_filp(f);
 291         }
 292   }
 293 }
 294
 295 /*===========================================================================*
 296  *                      invalidate_filp_by_endpt                             *
 297  *===========================================================================*/
 298 void invalidate_filp_by_endpt(endpoint_t proc_e)
 299 {
 300   struct filp *f;
 301
 302   for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
 303         if (f->filp_count != 0 && f->filp_vno != NULL) {
 304                 if (f->filp_vno->v_fs_e == proc_e)
 305                         invalidate_filp(f);
 306         }
 307   }
 308 }
 309
 310 /*===========================================================================*
 311  *                              lock_filp                                    *
 312  *===========================================================================*/
 313 void
 314 lock_filp(struct filp *filp, tll_access_t locktype)
 315 {
 316   struct worker_thread *org_self;
 317   struct vnode *vp;
 318
 319   assert(filp->filp_count > 0);
 320   vp = filp->filp_vno;
 321   assert(vp != NULL);
 322
 323   /* Lock vnode only if we haven't already locked it. If already locked by us,
 324    * we're allowed to have one additional 'soft' lock. */
 325   if (tll_locked_by_me(&vp->v_lock)) {
 326         assert(filp->filp_softlock == NULL);
 327         filp->filp_softlock = fp;
 328   } else {
 329         /* We have to make an exception for vnodes belonging to pipes. Even
 330          * read(2) operations on pipes change the vnode and therefore require
 331          * exclusive access.
 332          */
 333         if (S_ISFIFO(vp->v_mode) && locktype == VNODE_READ)
 334                 locktype = VNODE_WRITE;
 335         lock_vnode(vp, locktype);
 336   }
 337
 338   assert(vp->v_ref_count > 0);  /* vnode still in use? */
 339   assert(filp->filp_vno == vp); /* vnode still what we think it is? */
 340
 341   /* First try to get filp lock right off the bat */
 342   if (mutex_trylock(&filp->filp_lock) != 0) {
 343
 344         /* Already in use, let's wait for our turn */
 345         org_self = worker_suspend();
 346
 347         if (mutex_lock(&filp->filp_lock) != 0)
 348                 panic("unable to obtain lock on filp");
 349
 350         worker_resume(org_self);
 351   }
 352 }
 353
 354 /*===========================================================================*
 355  *                              unlock_filp                                  *
 356  *===========================================================================*/
 357 void
 358 unlock_filp(struct filp *filp)
 359 {
 360   /* If this filp holds a soft lock on the vnode, we must be the owner */
 361   if (filp->filp_softlock != NULL)
 362         assert(filp->filp_softlock == fp);
 363
 364   if (filp->filp_count > 0) {
 365         /* Only unlock vnode if filp is still in use */
 366
 367         /* and if we don't hold a soft lock */
 368         if (filp->filp_softlock == NULL) {
 369                 assert(tll_islocked(&(filp->filp_vno->v_lock)));
 370                 unlock_vnode(filp->filp_vno);
 371         }
 372   }
 373
 374   filp->filp_softlock = NULL;
 375   if (mutex_unlock(&filp->filp_lock) != 0)
 376         panic("unable to release lock on filp");
 377 }
 378
 379 /*===========================================================================*
 380  *                              unlock_filps                                 *
 381  *===========================================================================*/
 382 void
 383 unlock_filps(struct filp *filp1, struct filp *filp2)
 384 {
 385 /* Unlock two filps that are tied to the same vnode. As a thread can lock a
 386  * vnode only once, unlocking the vnode twice would result in an error. */
 387
 388   /* No NULL pointers and not equal */
 389   assert(filp1);
 390   assert(filp2);
 391   assert(filp1 != filp2);
 392
 393   /* Must be tied to the same vnode and not NULL */
 394   assert(filp1->filp_vno == filp2->filp_vno);
 395   assert(filp1->filp_vno != NULL);
 396
 397   if (filp1->filp_count > 0 && filp2->filp_count > 0) {
 398         /* Only unlock vnode if filps are still in use */
 399         unlock_vnode(filp1->filp_vno);
 400   }
 401
 402   filp1->filp_softlock = NULL;
 403   filp2->filp_softlock = NULL;
 404   if (mutex_unlock(&filp2->filp_lock) != 0)
 405         panic("unable to release filp lock on filp2");
 406   if (mutex_unlock(&filp1->filp_lock) != 0)
 407         panic("unable to release filp lock on filp1");
 408 }
 409
 410 /*===========================================================================*
 411  *                              close_filp                                   *
 412  *===========================================================================*/
 413 int
 414 close_filp(struct filp * f, int may_suspend)
 415 {
 416 /* Close a file.  Will also unlock filp when done.  The 'may_suspend' flag
 417  * indicates whether the current process may be suspended closing a socket.
 418  * That is currently supported only when the user issued a close(2), and (only)
 419  * in that case may this function return SUSPEND instead of OK.  In all other
 420  * cases, this function will always return OK.  It will never return another
 421  * error code, for reasons explained below.
 422  */
 423   int r, rw;
 424   dev_t dev;
 425   struct vnode *vp;
 426
 427   /* Must be locked */
 428   assert(mutex_trylock(&f->filp_lock) == -EDEADLK);
 429   assert(tll_islocked(&f->filp_vno->v_lock));
 430
 431   vp = f->filp_vno;
 432
 433   r = OK;
 434
 435   if (f->filp_count - 1 == 0 && f->filp_mode != FILP_CLOSED) {
 436         /* Check to see if the file is special. */
 437         if (S_ISCHR(vp->v_mode) || S_ISBLK(vp->v_mode) ||
 438             S_ISSOCK(vp->v_mode)) {
 439                 dev = vp->v_sdev;
 440                 if (S_ISBLK(vp->v_mode))  {
 441                         lock_bsf();
 442                         if (vp->v_bfs_e == ROOT_FS_E && dev != ROOT_DEV) {
 443                                 /* Invalidate the cache unless the special is
 444                                  * mounted. Be careful not to flush the root
 445                                  * file system either.
 446                                  */
 447                                 (void) req_flush(vp->v_bfs_e, dev);
 448                         }
 449                         unlock_bsf();
 450
 451                         (void) bdev_close(dev); /* Ignore errors */
 452                 } else if (S_ISCHR(vp->v_mode)) {
 453                         (void) cdev_close(dev); /* Ignore errors */
 454                 } else {
 455                         /*
 456                          * Sockets may take a while to be closed (SO_LINGER),
 457                          * and thus, we may issue a suspending close to a
 458                          * socket driver.  Getting this working for close(2) is
 459                          * the easy case, and that is all we support for now.
 460                          * However, there is also eg dup2(2), which if
 461                          * interrupted by a signal should technically fail
 462                          * without closing the file descriptor.  Then there are
 463                          * cases where the close should never block: process
 464                          * exit and close-on-exec for example.  Getting all
 465                          * such cases right is left to future work; currently
 466                          * they all perform thread-blocking socket closes and
 467                          * thus cause the socket to perform lingering in the
 468                          * background if at all.
 469                          */
 470                         assert(!may_suspend || job_call_nr == VFS_CLOSE);
 471
 472                         if (f->filp_flags & O_NONBLOCK)
 473                                 may_suspend = FALSE;
 474
 475                         r = sdev_close(dev, may_suspend);
 476
 477                         /*
 478                          * Returning a non-OK error is a bad idea, because it
 479                          * will leave the application wondering whether closing
 480                          * the file descriptor actually succeeded.
 481                          */
 482                         if (r != SUSPEND)
 483                                 r = OK;
 484                 }
 485
 486                 f->filp_mode = FILP_CLOSED;
 487         }
 488   }
 489
 490   /* If the inode being closed is a pipe, release everyone hanging on it. */
 491   if (S_ISFIFO(vp->v_mode)) {
 492         rw = (f->filp_mode & R_BIT ? VFS_WRITE : VFS_READ);
 493         release(vp, rw, susp_count);
 494   }
 495
 496   if (--f->filp_count == 0) {
 497         if (S_ISFIFO(vp->v_mode)) {
 498                 /* Last reader or writer is going. Tell PFS about latest
 499                  * pipe size.
 500                  */
 501                 truncate_vnode(vp, vp->v_size);
 502         }
 503
 504         unlock_vnode(f->filp_vno);
 505         put_vnode(f->filp_vno);
 506         f->filp_vno = NULL;
 507         f->filp_mode = FILP_CLOSED;
 508         f->filp_count = 0;
 509   } else if (f->filp_count < 0) {
 510         panic("VFS: invalid filp count: %d ino %llx/%llu", f->filp_count,
 511               vp->v_dev, vp->v_inode_nr);
 512   } else {
 513         unlock_vnode(f->filp_vno);
 514   }
 515
 516   mutex_unlock(&f->filp_lock);
 517
 518   return r;
 519 }
 520
 521 /*===========================================================================*
 522  *                              do_copyfd                                    *
 523  *===========================================================================*/
 524 int do_copyfd(void)
 525 {
 526 /* Copy a file descriptor between processes, or close a remote file descriptor.
 527  * This call is used as back-call by device drivers (UDS, VND), and is expected
 528  * to be used in response to either an IOCTL to VND or a SEND or RECV socket
 529  * request to UDS.
 530  */
 531   struct fproc *rfp;
 532   struct filp *rfilp;
 533   struct vnode *vp;
 534   struct smap *sp;
 535   endpoint_t endpt;
 536   int r, fd, what, flags, slot;
 537
 538   /* This should be replaced with an ACL check. */
 539   if (!super_user) return(EPERM);
 540
 541   endpt = job_m_in.m_lsys_vfs_copyfd.endpt;
 542   fd = job_m_in.m_lsys_vfs_copyfd.fd;
 543   what = job_m_in.m_lsys_vfs_copyfd.what;
 544
 545   flags = what & COPYFD_FLAGS;
 546   what &= ~COPYFD_FLAGS;
 547
 548   if (isokendpt(endpt, &slot) != OK) return(EINVAL);
 549   rfp = &fproc[slot];
 550
 551   /* FIXME: we should now check that the user process is indeed blocked on an
 552    * IOCTL or socket call, so that we can safely mess with its file
 553    * descriptors.  We currently do not have the necessary state to verify this,
 554    * so we assume that the call is always used in the right way.
 555    */
 556
 557   /* Depending on the operation, get the file descriptor from the caller or the
 558    * user process.  Do not lock the filp yet: we first need to make sure that
 559    * locking it will not result in a deadlock.
 560    */
 561   rfilp = get_filp2((what == COPYFD_TO) ? fp : rfp, fd, VNODE_NONE);
 562   if (rfilp == NULL)
 563         return(err_code);
 564
 565   /* If the filp is involved in an IOCTL by the user process, locking the filp
 566    * here would result in a deadlock.  This would happen if a user process
 567    * passes in the file descriptor to the device node on which it is performing
 568    * the IOCTL.  We do not allow manipulation of such device nodes.  In
 569    * practice, this only applies to block-special files (and thus VND), because
 570    * socket files (as used by UDS) are unlocked during the socket operation.
 571    */
 572   if (rfilp->filp_ioctl_fp == rfp)
 573         return(EBADF);
 574
 575   /* Now we can safely lock the filp, copy or close it, and unlock it again. */
 576   lock_filp(rfilp, VNODE_READ);
 577
 578   switch (what) {
 579   case COPYFD_FROM:
 580         /*
 581          * If the caller is a socket driver (namely, UDS) and the file
 582          * descriptor being copied in is a socket for that socket driver, then
 583          * deny the call, because of at least two known issues.  Both issues
 584          * are related to UDS having an in-flight file descriptor that is the
 585          * last reference to a UDS socket:
 586          *
 587          * 1) if UDS tries to close the file descriptor, this will prompt VFS
 588          *    to close the underlying object, which is a UDS socket.  As a
 589          *    result, while UDS is blocked in the close(2), VFS will try to
 590          *    send a request to UDS to close the socket.  This results in a
 591          *    deadlock of the UDS service.
 592          *
 593          * 2) if a file descriptor for a UDS socket is sent across that same
 594          *    UDS socket, the socket will remain referenced by UDS, thus open
 595          *    in VFS, and therefore also open in UDS.  The socket and file
 596          *    descriptor will both remain in use for the rest of UDS' lifetime.
 597          *    This can easily result in denial-of-service in the UDS service.
 598          *    The same problem can be triggered using multiple sockets that
 599          *    have in-flight file descriptors referencing each other.
 600          *
 601          * A proper solution for these problems may consist of some form of
 602          * "soft reference counting" where VFS does not count UDS having a
 603          * filp open as a real reference.  That is tricky business, so for now
 604          * we prevent any such problems with the check here.
 605          */
 606         if ((vp = rfilp->filp_vno) != NULL && S_ISSOCK(vp->v_mode) &&
 607             (sp = get_smap_by_dev(vp->v_sdev, NULL)) != NULL &&
 608             sp->smap_endpt == who_e) {
 609                 r = EDEADLK;
 610
 611                 break;
 612         }
 613
 614         rfp = fp;
 615         flags &= ~COPYFD_CLOEXEC;
 616
 617         /* FALLTHROUGH */
 618   case COPYFD_TO:
 619         /* Find a free file descriptor slot in the local or remote process. */
 620         for (fd = 0; fd < OPEN_MAX; fd++)
 621                 if (rfp->fp_filp[fd] == NULL)
 622                         break;
 623
 624         /* If found, fill the slot and return the slot number. */
 625         if (fd < OPEN_MAX) {
 626                 rfp->fp_filp[fd] = rfilp;
 627                 if (flags & COPYFD_CLOEXEC)
 628                         FD_SET(fd, &rfp->fp_cloexec_set);
 629                 rfilp->filp_count++;
 630                 r = fd;
 631         } else
 632                 r = EMFILE;
 633
 634         break;
 635
 636   case COPYFD_CLOSE:
 637         /* This should be used ONLY to revert a successful copy-to operation,
 638          * and assumes that the filp is still in use by the caller as well.
 639          */
 640         if (rfilp->filp_count > 1) {
 641                 rfilp->filp_count--;
 642                 rfp->fp_filp[fd] = NULL;
 643                 r = OK;
 644         } else
 645                 r = EBADF;
 646
 647         break;
 648
 649   default:
 650         r = EINVAL;
 651   }
 652
 653   unlock_filp(rfilp);
 654
 655   return(r);
 656 }