minix/servers/vfs/main.c

   1 /*
   2  * a loop that gets messages requesting work, carries out the work, and sends
   3  * replies.
   4  *
   5  * The entry points into this file are:
   6  *   main:      main program of the Virtual File System
   7  *   reply:     send a reply to a process after the requested work is done
   8  *
   9  */
  10
  11 #include "fs.h"
  12 #include <fcntl.h>
  13 #include <string.h>
  14 #include <stdio.h>
  15 #include <signal.h>
  16 #include <assert.h>
  17 #include <stdlib.h>
  18 #include <sys/ioc_memory.h>
  19 #include <sys/svrctl.h>
  20 #include <sys/select.h>
  21 #include <minix/callnr.h>
  22 #include <minix/com.h>
  23 #include <minix/const.h>
  24 #include <minix/endpoint.h>
  25 #include <minix/safecopies.h>
  26 #include <minix/debug.h>
  27 #include <minix/vfsif.h>
  28 #include "file.h"
  29 #include "vmnt.h"
  30 #include "vnode.h"
  31
  32 #if ENABLE_SYSCALL_STATS
  33 EXTERN unsigned long calls_stats[NR_VFS_CALLS];
  34 #endif
  35
  36 /* Thread related prototypes */
  37 static void do_reply(struct worker_thread *wp);
  38 static void do_work(void);
  39 static void do_init_root(void);
  40 static void handle_work(void (*func)(void));
  41
  42 static int get_work(void);
  43 static void service_pm(void);
  44 static int unblock(struct fproc *rfp);
  45
  46 /* SEF functions and variables. */
  47 static void sef_local_startup(void);
  48 static int sef_cb_init_fresh(int type, sef_init_info_t *info);
  49 static int sef_cb_init_lu(int type, sef_init_info_t *info);
  50
  51 /*===========================================================================*
  52  *                              main                                         *
  53  *===========================================================================*/
  54 int main(void)
  55 {
  56 /* This is the main program of the file system.  The main loop consists of
  57  * three major activities: getting new work, processing the work, and sending
  58  * the reply.  This loop never terminates as long as the file system runs.
  59  */
  60   int transid;
  61   struct worker_thread *wp;
  62
  63   /* SEF local startup. */
  64   sef_local_startup();
  65
  66   printf("Started VFS: %d worker thread(s)\n", NR_WTHREADS);
  67
  68   /* This is the main loop that gets work, processes it, and sends replies. */
  69   while (TRUE) {
  70         worker_yield(); /* let other threads run */
  71
  72         send_work();
  73
  74         /* The get_work() function returns TRUE if we have a new message to
  75          * process. It returns FALSE if it spawned other thread activities.
  76          */
  77         if (!get_work())
  78                 continue;
  79
  80         transid = TRNS_GET_ID(m_in.m_type);
  81         if (IS_VFS_FS_TRANSID(transid)) {
  82                 wp = worker_get((thread_t) transid - VFS_TRANSID);
  83                 if (wp == NULL || wp->w_fp == NULL) {
  84                         printf("VFS: spurious message %d from endpoint %d\n",
  85                                 m_in.m_type, m_in.m_source);
  86                         continue;
  87                 }
  88                 m_in.m_type = TRNS_DEL_ID(m_in.m_type);
  89                 do_reply(wp);
  90                 continue;
  91         } else if (who_e == PM_PROC_NR) { /* Calls from PM */
  92                 /* Special control messages from PM */
  93                 service_pm();
  94                 continue;
  95         } else if (is_notify(call_nr)) {
  96                 /* A task ipc_notify()ed us */
  97                 switch (who_e) {
  98                 case DS_PROC_NR:
  99                         /* Start a thread to handle DS events, if no thread
 100                          * is pending or active for it already. DS is not
 101                          * supposed to issue calls to VFS or be the subject of
 102                          * postponed PM requests, so this should be no problem.
 103                          */
 104                         if (worker_can_start(fp))
 105                                 handle_work(ds_event);
 106                         break;
 107                 case KERNEL:
 108                         mthread_stacktraces();
 109                         break;
 110                 case CLOCK:
 111                         /* Timer expired. Used only for select(). Check it. */
 112                         expire_timers(m_in.m_notify.timestamp);
 113                         break;
 114                 default:
 115                         printf("VFS: ignoring notification from %d\n", who_e);
 116                 }
 117                 continue;
 118         } else if (who_p < 0) { /* i.e., message comes from a task */
 119                 /* We're going to ignore this message. Tasks should
 120                  * send ipc_notify()s only.
 121                  */
 122                  printf("VFS: ignoring message from %d (%d)\n", who_e, call_nr);
 123                  continue;
 124         }
 125
 126         if (IS_BDEV_RS(call_nr)) {
 127                 /* We've got results for a block device request. */
 128                 bdev_reply();
 129         } else if (IS_CDEV_RS(call_nr)) {
 130                 /* We've got results for a character device request. */
 131                 cdev_reply();
 132         } else if (IS_SDEV_RS(call_nr)) {
 133                 /* We've got results for a socket driver request. */
 134                 sdev_reply();
 135         } else {
 136                 /* Normal syscall. This spawns a new thread. */
 137                 handle_work(do_work);
 138         }
 139   }
 140   return(OK);                           /* shouldn't come here */
 141 }
 142
 143 /*===========================================================================*
 144  *                             handle_work                                   *
 145  *===========================================================================*/
 146 static void handle_work(void (*func)(void))
 147 {
 148 /* Handle asynchronous device replies and new system calls. If the originating
 149  * endpoint is an FS endpoint, take extra care not to get in deadlock. */
 150   struct vmnt *vmp = NULL;
 151   endpoint_t proc_e;
 152   int use_spare = FALSE;
 153
 154   proc_e = m_in.m_source;
 155
 156   if (fp->fp_flags & FP_SRV_PROC) {
 157         vmp = find_vmnt(proc_e);
 158         if (vmp != NULL) {
 159                 /* A callback from an FS endpoint. Can do only one at once. */
 160                 if (vmp->m_flags & VMNT_CALLBACK) {
 161                         replycode(proc_e, EAGAIN);
 162                         return;
 163                 }
 164                 /* Already trying to resolve a deadlock? Can't handle more. */
 165                 if (worker_available() == 0) {
 166                         replycode(proc_e, EAGAIN);
 167                         return;
 168                 }
 169                 /* A thread is available. Set callback flag. */
 170                 vmp->m_flags |= VMNT_CALLBACK;
 171                 if (vmp->m_flags & VMNT_MOUNTING) {
 172                         vmp->m_flags |= VMNT_FORCEROOTBSF;
 173                 }
 174         }
 175
 176         /* Use the spare thread to handle this request if needed. */
 177         use_spare = TRUE;
 178   }
 179
 180   worker_start(fp, func, &m_in, use_spare);
 181 }
 182
 183
 184 /*===========================================================================*
 185  *                             do_reply                                      *
 186  *===========================================================================*/
 187 static void do_reply(struct worker_thread *wp)
 188 {
 189   struct vmnt *vmp = NULL;
 190
 191   if(who_e != VM_PROC_NR && (vmp = find_vmnt(who_e)) == NULL)
 192         panic("Couldn't find vmnt for endpoint %d", who_e);
 193
 194   if (wp->w_task != who_e) {
 195         printf("VFS: tid %d: expected %d to reply, not %d\n",
 196                 wp->w_tid, wp->w_task, who_e);
 197         return;
 198   }
 199   /* It should be impossible to trigger the following case, but it is here for
 200    * consistency reasons: worker_stop() resets w_sendrec but not w_task.
 201    */
 202   if (wp->w_sendrec == NULL) {
 203         printf("VFS: tid %d: late reply from %d ignored\n", wp->w_tid, who_e);
 204         return;
 205   }
 206   *wp->w_sendrec = m_in;
 207   wp->w_sendrec = NULL;
 208   wp->w_task = NONE;
 209   if(vmp) vmp->m_comm.c_cur_reqs--; /* We've got our reply, make room for others */
 210   worker_signal(wp); /* Continue this thread */
 211 }
 212
 213 /*===========================================================================*
 214  *                             do_pending_pipe                               *
 215  *===========================================================================*/
 216 static void do_pending_pipe(void)
 217 {
 218   vir_bytes buf;
 219   size_t nbytes, cum_io;
 220   int r, op, fd;
 221   struct filp *f;
 222   tll_access_t locktype;
 223
 224   assert(fp->fp_blocked_on == FP_BLOCKED_ON_NONE);
 225
 226   /*
 227    * We take all our needed resumption state from the m_in message, which is
 228    * filled by unblock().  Since this is an internal resumption, there is no
 229    * need to perform extensive checks on the message fields.
 230    */
 231   fd = job_m_in.m_lc_vfs_readwrite.fd;
 232   buf = job_m_in.m_lc_vfs_readwrite.buf;
 233   nbytes = job_m_in.m_lc_vfs_readwrite.len;
 234   cum_io = job_m_in.m_lc_vfs_readwrite.cum_io;
 235
 236   f = fp->fp_filp[fd];
 237   assert(f != NULL);
 238
 239   locktype = (job_call_nr == VFS_READ) ? VNODE_READ : VNODE_WRITE;
 240   op = (job_call_nr == VFS_READ) ? READING : WRITING;
 241   lock_filp(f, locktype);
 242
 243   r = rw_pipe(op, who_e, f, job_call_nr, fd, buf, nbytes, cum_io);
 244
 245   if (r != SUSPEND) { /* Do we have results to report? */
 246         /* Process is writing, but there is no reader. Send a SIGPIPE signal.
 247          * This should match the corresponding code in read_write().
 248          */
 249         if (r == EPIPE && op == WRITING) {
 250                 if (!(f->filp_flags & O_NOSIGPIPE))
 251                         sys_kill(fp->fp_endpoint, SIGPIPE);
 252         }
 253
 254         replycode(fp->fp_endpoint, r);
 255   }
 256
 257   unlock_filp(f);
 258 }
 259
 260 /*===========================================================================*
 261  *                             do_work                                       *
 262  *===========================================================================*/
 263 static void do_work(void)
 264 {
 265   unsigned int call_index;
 266   int error;
 267
 268   if (fp->fp_pid == PID_FREE) {
 269         /* Process vanished before we were able to handle request.
 270          * Replying has no use. Just drop it.
 271          */
 272         return;
 273   }
 274
 275   memset(&job_m_out, 0, sizeof(job_m_out));
 276
 277   /* At this point we assume that we're dealing with a call that has been
 278    * made specifically to VFS. Typically it will be a POSIX call from a
 279    * normal process, but we also handle a few calls made by drivers such
 280    * such as UDS and VND through here. Call the internal function that
 281    * does the work.
 282    */
 283   if (IS_VFS_CALL(job_call_nr)) {
 284         call_index = (unsigned int) (job_call_nr - VFS_BASE);
 285
 286         if (call_index < NR_VFS_CALLS && call_vec[call_index] != NULL) {
 287 #if ENABLE_SYSCALL_STATS
 288                 calls_stats[call_index]++;
 289 #endif
 290                 error = (*call_vec[call_index])();
 291         } else
 292                 error = ENOSYS;
 293   } else
 294         error = ENOSYS;
 295
 296   /* Copy the results back to the user and send reply. */
 297   if (error != SUSPEND) reply(&job_m_out, fp->fp_endpoint, error);
 298 }
 299
 300 /*===========================================================================*
 301  *                              sef_cb_lu_prepare                            *
 302  *===========================================================================*/
 303 static int sef_cb_lu_prepare(int state)
 304 {
 305 /* This function is called to decide whether we can enter the given live
 306  * update state, and to prepare for such an update. If we are requested to
 307  * update to a request-free or protocol-free state, make sure there is no work
 308  * pending or being processed, and shut down all worker threads.
 309  */
 310
 311   switch (state) {
 312   case SEF_LU_STATE_REQUEST_FREE:
 313   case SEF_LU_STATE_PROTOCOL_FREE:
 314         if (!worker_idle()) {
 315                 printf("VFS: worker threads not idle, blocking update\n");
 316                 break;
 317         }
 318
 319         worker_cleanup();
 320
 321         return OK;
 322   }
 323
 324   return ENOTREADY;
 325 }
 326
 327 /*===========================================================================*
 328  *                             sef_cb_lu_state_changed                       *
 329  *===========================================================================*/
 330 static void sef_cb_lu_state_changed(int old_state, int state)
 331 {
 332 /* Worker threads (especially their stacks) pose a serious problem for state
 333  * transfer during live update, and therefore, we shut down all worker threads
 334  * during live update and restart them afterwards. This function is called in
 335  * the old VFS instance when the state changed. We use it to restart worker
 336  * threads after a failed live update.
 337  */
 338
 339   if (state != SEF_LU_STATE_NULL)
 340         return;
 341
 342   switch (old_state) {
 343   case SEF_LU_STATE_REQUEST_FREE:
 344   case SEF_LU_STATE_PROTOCOL_FREE:
 345         worker_init();
 346   }
 347 }
 348
 349 /*===========================================================================*
 350  *                              sef_cb_init_lu                               *
 351  *===========================================================================*/
 352 static int sef_cb_init_lu(int type, sef_init_info_t *info)
 353 {
 354 /* This function is called in the new VFS instance during a live update. */
 355   int r;
 356
 357   /* Perform regular state transfer. */
 358   if ((r = SEF_CB_INIT_LU_DEFAULT(type, info)) != OK)
 359         return r;
 360
 361   /* Recreate worker threads, if necessary. */
 362   switch (info->prepare_state) {
 363   case SEF_LU_STATE_REQUEST_FREE:
 364   case SEF_LU_STATE_PROTOCOL_FREE:
 365         worker_init();
 366   }
 367
 368   return OK;
 369 }
 370
 371 /*===========================================================================*
 372  *                             sef_local_startup                             *
 373  *===========================================================================*/
 374 static void sef_local_startup(void)
 375 {
 376   /* Register init callbacks. */
 377   sef_setcb_init_fresh(sef_cb_init_fresh);
 378   sef_setcb_init_restart(SEF_CB_INIT_RESTART_STATEFUL);
 379
 380   /* Register live update callbacks. */
 381   sef_setcb_init_lu(sef_cb_init_lu);
 382   sef_setcb_lu_prepare(sef_cb_lu_prepare);
 383   sef_setcb_lu_state_changed(sef_cb_lu_state_changed);
 384   sef_setcb_lu_state_isvalid(sef_cb_lu_state_isvalid_standard);
 385
 386   /* Let SEF perform startup. */
 387   sef_startup();
 388 }
 389
 390 /*===========================================================================*
 391  *                              sef_cb_init_fresh                            *
 392  *===========================================================================*/
 393 static int sef_cb_init_fresh(int UNUSED(type), sef_init_info_t *info)
 394 {
 395 /* Initialize the virtual file server. */
 396   int s, i;
 397   struct fproc *rfp;
 398   message mess;
 399   struct rprocpub rprocpub[NR_BOOT_PROCS];
 400
 401   self = NULL;
 402   verbose = 0;
 403
 404   /* Initialize proc endpoints to NONE */
 405   for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
 406         rfp->fp_endpoint = NONE;
 407         rfp->fp_pid = PID_FREE;
 408   }
 409
 410   /* Initialize the process table with help of the process manager messages.
 411    * Expect one message for each system process with its slot number and pid.
 412    * When no more processes follow, the magic process number NONE is sent.
 413    * Then, stop and synchronize with the PM.
 414    */
 415   do {
 416         if ((s = sef_receive(PM_PROC_NR, &mess)) != OK)
 417                 panic("VFS: couldn't receive from PM: %d", s);
 418
 419         if (mess.m_type != VFS_PM_INIT)
 420                 panic("unexpected message from PM: %d", mess.m_type);
 421
 422         if (NONE == mess.VFS_PM_ENDPT) break;
 423
 424         rfp = &fproc[mess.VFS_PM_SLOT];
 425         rfp->fp_flags = FP_NOFLAGS;
 426         rfp->fp_pid = mess.VFS_PM_PID;
 427         rfp->fp_endpoint = mess.VFS_PM_ENDPT;
 428         rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
 429         rfp->fp_realuid = (uid_t) SYS_UID;
 430         rfp->fp_effuid = (uid_t) SYS_UID;
 431         rfp->fp_realgid = (gid_t) SYS_GID;
 432         rfp->fp_effgid = (gid_t) SYS_GID;
 433         rfp->fp_umask = ~0;
 434   } while (TRUE);                       /* continue until process NONE */
 435   mess.m_type = OK;                     /* tell PM that we succeeded */
 436   s = ipc_send(PM_PROC_NR, &mess);              /* send synchronization message */
 437
 438   system_hz = sys_hz();
 439
 440   /* Subscribe to block and character driver events. */
 441   s = ds_subscribe("drv\\.[bc]..\\..*", DSF_INITIAL | DSF_OVERWRITE);
 442   if (s != OK) panic("VFS: can't subscribe to driver events (%d)", s);
 443
 444   /* Initialize worker threads */
 445   worker_init();
 446
 447   /* Initialize global locks */
 448   if (mthread_mutex_init(&bsf_lock, NULL) != 0)
 449         panic("VFS: couldn't initialize block special file lock");
 450
 451   init_dmap();                  /* Initialize device table. */
 452   init_smap();                  /* Initialize socket table. */
 453
 454   /* Map all the services in the boot image. */
 455   if ((s = sys_safecopyfrom(RS_PROC_NR, info->rproctab_gid, 0,
 456                             (vir_bytes) rprocpub, sizeof(rprocpub))) != OK){
 457         panic("sys_safecopyfrom failed: %d", s);
 458   }
 459   for (i = 0; i < NR_BOOT_PROCS; i++) {
 460         if (rprocpub[i].in_use) {
 461                 if ((s = map_service(&rprocpub[i])) != OK) {
 462                         panic("VFS: unable to map service: %d", s);
 463                 }
 464         }
 465   }
 466
 467   /* Initialize locks and initial values for all processes. */
 468   for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
 469         if (mutex_init(&rfp->fp_lock, NULL) != 0)
 470                 panic("unable to initialize fproc lock");
 471         rfp->fp_worker = NULL;
 472 #if LOCK_DEBUG
 473         rfp->fp_vp_rdlocks = 0;
 474         rfp->fp_vmnt_rdlocks = 0;
 475 #endif
 476
 477         /* Initialize process directories. mount_fs will set them to the
 478          * correct values.
 479          */
 480         for (i = 0; i < OPEN_MAX; i++)
 481                 rfp->fp_filp[i] = NULL;
 482         rfp->fp_rd = NULL;
 483         rfp->fp_wd = NULL;
 484   }
 485
 486   init_vnodes();                /* init vnodes */
 487   init_vmnts();                 /* init vmnt structures */
 488   init_select();                /* init select() structures */
 489   init_filps();                 /* Init filp structures */
 490
 491   /* Mount PFS and initial file system root. */
 492   worker_start(fproc_addr(VFS_PROC_NR), do_init_root, &mess /*unused*/,
 493         FALSE /*use_spare*/);
 494
 495   return(OK);
 496 }
 497
 498 /*===========================================================================*
 499  *                             do_init_root                                  *
 500  *===========================================================================*/
 501 static void do_init_root(void)
 502 {
 503   char *mount_type, *mount_label;
 504   int r;
 505
 506   /* Disallow requests from e.g. init(8) while doing the initial mounting. */
 507   worker_allow(FALSE);
 508
 509   /* Mount the pipe file server. */
 510   mount_pfs();
 511
 512   /* Mount the root file system. */
 513   mount_type = "mfs";       /* FIXME: use boot image process name instead */
 514   mount_label = "fs_imgrd"; /* FIXME: obtain this from RS */
 515
 516   r = mount_fs(DEV_IMGRD, "bootramdisk", "/", MFS_PROC_NR, 0, mount_type,
 517         mount_label);
 518   if (r != OK)
 519         panic("Failed to initialize root");
 520
 521   /* All done with mounting, allow requests now. */
 522   worker_allow(TRUE);
 523 }
 524
 525 /*===========================================================================*
 526  *                              lock_proc                                    *
 527  *===========================================================================*/
 528 void lock_proc(struct fproc *rfp)
 529 {
 530   int r;
 531   struct worker_thread *org_self;
 532
 533   r = mutex_trylock(&rfp->fp_lock);
 534   if (r == 0) return;
 535
 536   org_self = worker_suspend();
 537
 538   if ((r = mutex_lock(&rfp->fp_lock)) != 0)
 539         panic("unable to lock fproc lock: %d", r);
 540
 541   worker_resume(org_self);
 542 }
 543
 544 /*===========================================================================*
 545  *                              unlock_proc                                  *
 546  *===========================================================================*/
 547 void unlock_proc(struct fproc *rfp)
 548 {
 549   int r;
 550
 551   if ((r = mutex_unlock(&rfp->fp_lock)) != 0)
 552         panic("Failed to unlock: %d", r);
 553 }
 554
 555 /*===========================================================================*
 556  *                              thread_cleanup                               *
 557  *===========================================================================*/
 558 void thread_cleanup(void)
 559 {
 560 /* Perform cleanup actions for a worker thread. */
 561
 562 #if LOCK_DEBUG
 563   check_filp_locks_by_me();
 564   check_vnode_locks_by_me(fp);
 565   check_vmnt_locks_by_me(fp);
 566 #endif
 567
 568   if (fp->fp_flags & FP_SRV_PROC) {
 569         struct vmnt *vmp;
 570
 571         if ((vmp = find_vmnt(fp->fp_endpoint)) != NULL) {
 572                 vmp->m_flags &= ~VMNT_CALLBACK;
 573         }
 574   }
 575 }
 576
 577 /*===========================================================================*
 578  *                              get_work                                     *
 579  *===========================================================================*/
 580 static int get_work(void)
 581 {
 582   /* Normally wait for new input.  However, if 'reviving' is nonzero, a
 583    * suspended process must be awakened.  Return TRUE if there is a message to
 584    * process (usually newly received, but possibly a resumed request), or FALSE
 585    * if a thread for other activities has been spawned instead.
 586    */
 587   int r, proc_p;
 588   register struct fproc *rp;
 589
 590   if (reviving != 0) {
 591         /* Find a suspended process. */
 592         for (rp = &fproc[0]; rp < &fproc[NR_PROCS]; rp++)
 593                 if (rp->fp_pid != PID_FREE && (rp->fp_flags & FP_REVIVED))
 594                         return unblock(rp); /* So main loop can process job */
 595
 596         panic("VFS: get_work couldn't revive anyone");
 597   }
 598
 599   for(;;) {
 600         /* Normal case.  No one to revive. Get a useful request. */
 601         if ((r = sef_receive(ANY, &m_in)) != OK) {
 602                 panic("VFS: sef_receive error: %d", r);
 603         }
 604
 605         proc_p = _ENDPOINT_P(m_in.m_source);
 606         if (proc_p < 0 || proc_p >= NR_PROCS) fp = NULL;
 607         else fp = &fproc[proc_p];
 608
 609         /* Negative who_p is never used to access the fproc array. Negative
 610          * numbers (kernel tasks) are treated in a special way.
 611          */
 612         if (fp && fp->fp_endpoint == NONE) {
 613                 printf("VFS: ignoring request from %d: NONE endpoint %d (%d)\n",
 614                         m_in.m_source, who_p, m_in.m_type);
 615                 continue;
 616         }
 617
 618         /* Internal consistency check; our mental image of process numbers and
 619          * endpoints must match with how the rest of the system thinks of them.
 620          */
 621         if (fp && fp->fp_endpoint != who_e) {
 622                 if (fproc[who_p].fp_endpoint == NONE)
 623                         printf("slot unknown even\n");
 624
 625                 panic("VFS: receive endpoint inconsistent (source %d, who_p "
 626                         "%d, stored ep %d, who_e %d).\n", m_in.m_source, who_p,
 627                         fproc[who_p].fp_endpoint, who_e);
 628         }
 629
 630         return TRUE;
 631   }
 632   /* NOTREACHED */
 633 }
 634
 635 /*===========================================================================*
 636  *                              reply                                        *
 637  *===========================================================================*/
 638 void reply(message *m_out, endpoint_t whom, int result)
 639 {
 640 /* Send a reply to a user process.  If the send fails, just ignore it. */
 641   int r;
 642
 643   m_out->m_type = result;
 644   r = ipc_sendnb(whom, m_out);
 645   if (r != OK) {
 646         printf("VFS: %d couldn't send reply %d to %d: %d\n", mthread_self(),
 647                 result, whom, r);
 648         util_stacktrace();
 649   }
 650 }
 651
 652 /*===========================================================================*
 653  *                              replycode                                    *
 654  *===========================================================================*/
 655 void replycode(endpoint_t whom, int result)
 656 {
 657 /* Send a reply to a user process.  If the send fails, just ignore it. */
 658   message m_out;
 659
 660   memset(&m_out, 0, sizeof(m_out));
 661
 662   reply(&m_out, whom, result);
 663 }
 664
 665 /*===========================================================================*
 666  *                              service_pm_postponed                         *
 667  *===========================================================================*/
 668 void service_pm_postponed(void)
 669 {
 670   int r, term_signal;
 671   vir_bytes core_path;
 672   vir_bytes exec_path, stack_frame, pc, newsp, ps_str;
 673   size_t exec_path_len, stack_frame_len;
 674   endpoint_t proc_e;
 675   message m_out;
 676
 677   memset(&m_out, 0, sizeof(m_out));
 678
 679   switch(job_call_nr) {
 680   case VFS_PM_EXEC:
 681         proc_e = job_m_in.VFS_PM_ENDPT;
 682         exec_path = (vir_bytes) job_m_in.VFS_PM_PATH;
 683         exec_path_len = (size_t) job_m_in.VFS_PM_PATH_LEN;
 684         stack_frame = (vir_bytes) job_m_in.VFS_PM_FRAME;
 685         stack_frame_len = (size_t) job_m_in.VFS_PM_FRAME_LEN;
 686         ps_str = (vir_bytes) job_m_in.VFS_PM_PS_STR;
 687
 688         assert(proc_e == fp->fp_endpoint);
 689
 690         r = pm_exec(exec_path, exec_path_len, stack_frame, stack_frame_len,
 691                 &pc, &newsp, &ps_str);
 692
 693         /* Reply status to PM */
 694         m_out.m_type = VFS_PM_EXEC_REPLY;
 695         m_out.VFS_PM_ENDPT = proc_e;
 696         m_out.VFS_PM_PC = (void *) pc;
 697         m_out.VFS_PM_STATUS = r;
 698         m_out.VFS_PM_NEWSP = (void *) newsp;
 699         m_out.VFS_PM_NEWPS_STR = ps_str;
 700
 701         break;
 702
 703   case VFS_PM_EXIT:
 704         proc_e = job_m_in.VFS_PM_ENDPT;
 705
 706         assert(proc_e == fp->fp_endpoint);
 707
 708         pm_exit();
 709
 710         /* Reply dummy status to PM for synchronization */
 711         m_out.m_type = VFS_PM_EXIT_REPLY;
 712         m_out.VFS_PM_ENDPT = proc_e;
 713
 714         break;
 715
 716   case VFS_PM_DUMPCORE:
 717         proc_e = job_m_in.VFS_PM_ENDPT;
 718         term_signal = job_m_in.VFS_PM_TERM_SIG;
 719         core_path = (vir_bytes) job_m_in.VFS_PM_PATH;
 720
 721         /* A zero signal used to indicate that a coredump should be generated
 722          * without terminating the target process, but this was broken in so
 723          * many ways that we no longer support this. Userland should implement
 724          * this functionality itself, for example through ptrace(2).
 725          */
 726         if (term_signal == 0)
 727                 panic("no termination signal given for coredump!");
 728
 729         assert(proc_e == fp->fp_endpoint);
 730
 731         r = pm_dumpcore(term_signal, core_path);
 732
 733         /* Reply status to PM */
 734         m_out.m_type = VFS_PM_CORE_REPLY;
 735         m_out.VFS_PM_ENDPT = proc_e;
 736         m_out.VFS_PM_STATUS = r;
 737
 738         break;
 739
 740   case VFS_PM_UNPAUSE:
 741         proc_e = job_m_in.VFS_PM_ENDPT;
 742
 743         assert(proc_e == fp->fp_endpoint);
 744
 745         unpause();
 746
 747         m_out.m_type = VFS_PM_UNPAUSE_REPLY;
 748         m_out.VFS_PM_ENDPT = proc_e;
 749
 750         break;
 751
 752   default:
 753         panic("Unhandled postponed PM call %d", job_m_in.m_type);
 754   }
 755
 756   r = ipc_send(PM_PROC_NR, &m_out);
 757   if (r != OK)
 758         panic("service_pm_postponed: ipc_send failed: %d", r);
 759 }
 760
 761 /*===========================================================================*
 762  *                              service_pm                                   *
 763  *===========================================================================*/
 764 static void service_pm(void)
 765 {
 766 /* Process a request from PM. This function is called from the main thread, and
 767  * may therefore not block. Any requests that may require blocking the calling
 768  * thread must be executed in a separate thread. Aside from VFS_PM_REBOOT, all
 769  * requests from PM involve another, target process: for example, PM tells VFS
 770  * that a process is performing a setuid() call. For some requests however,
 771  * that other process may not be idle, and in that case VFS must serialize the
 772  * PM request handling with any operation is it handling for that target
 773  * process. As it happens, the requests that may require blocking are also the
 774  * ones where the target process may not be idle. For both these reasons, such
 775  * requests are run in worker threads associated to the target process.
 776  */
 777   struct fproc *rfp;
 778   int r, slot;
 779   message m_out;
 780
 781   memset(&m_out, 0, sizeof(m_out));
 782
 783   switch (call_nr) {
 784   case VFS_PM_SETUID:
 785         {
 786                 endpoint_t proc_e;
 787                 uid_t euid, ruid;
 788
 789                 proc_e = m_in.VFS_PM_ENDPT;
 790                 euid = m_in.VFS_PM_EID;
 791                 ruid = m_in.VFS_PM_RID;
 792
 793                 pm_setuid(proc_e, euid, ruid);
 794
 795                 m_out.m_type = VFS_PM_SETUID_REPLY;
 796                 m_out.VFS_PM_ENDPT = proc_e;
 797         }
 798         break;
 799
 800   case VFS_PM_SETGID:
 801         {
 802                 endpoint_t proc_e;
 803                 gid_t egid, rgid;
 804
 805                 proc_e = m_in.VFS_PM_ENDPT;
 806                 egid = m_in.VFS_PM_EID;
 807                 rgid = m_in.VFS_PM_RID;
 808
 809                 pm_setgid(proc_e, egid, rgid);
 810
 811                 m_out.m_type = VFS_PM_SETGID_REPLY;
 812                 m_out.VFS_PM_ENDPT = proc_e;
 813         }
 814         break;
 815
 816   case VFS_PM_SETSID:
 817         {
 818                 endpoint_t proc_e;
 819
 820                 proc_e = m_in.VFS_PM_ENDPT;
 821                 pm_setsid(proc_e);
 822
 823                 m_out.m_type = VFS_PM_SETSID_REPLY;
 824                 m_out.VFS_PM_ENDPT = proc_e;
 825         }
 826         break;
 827
 828   case VFS_PM_EXEC:
 829   case VFS_PM_EXIT:
 830   case VFS_PM_DUMPCORE:
 831   case VFS_PM_UNPAUSE:
 832         {
 833                 endpoint_t proc_e = m_in.VFS_PM_ENDPT;
 834
 835                 if(isokendpt(proc_e, &slot) != OK) {
 836                         printf("VFS: proc ep %d not ok\n", proc_e);
 837                         return;
 838                 }
 839
 840                 rfp = &fproc[slot];
 841
 842                 /* PM requests on behalf of a proc are handled after the
 843                  * system call that might be in progress for that proc has
 844                  * finished. If the proc is not busy, we start a new thread.
 845                  */
 846                 worker_start(rfp, NULL, &m_in, FALSE /*use_spare*/);
 847
 848                 return;
 849         }
 850   case VFS_PM_FORK:
 851   case VFS_PM_SRV_FORK:
 852         {
 853                 endpoint_t pproc_e, proc_e;
 854                 pid_t child_pid;
 855                 uid_t reuid;
 856                 gid_t regid;
 857
 858                 pproc_e = m_in.VFS_PM_PENDPT;
 859                 proc_e = m_in.VFS_PM_ENDPT;
 860                 child_pid = m_in.VFS_PM_CPID;
 861                 reuid = m_in.VFS_PM_REUID;
 862                 regid = m_in.VFS_PM_REGID;
 863
 864                 pm_fork(pproc_e, proc_e, child_pid);
 865                 m_out.m_type = VFS_PM_FORK_REPLY;
 866
 867                 if (call_nr == VFS_PM_SRV_FORK) {
 868                         m_out.m_type = VFS_PM_SRV_FORK_REPLY;
 869                         pm_setuid(proc_e, reuid, reuid);
 870                         pm_setgid(proc_e, regid, regid);
 871                 }
 872
 873                 m_out.VFS_PM_ENDPT = proc_e;
 874         }
 875         break;
 876   case VFS_PM_SETGROUPS:
 877         {
 878                 endpoint_t proc_e;
 879                 int group_no;
 880                 gid_t *group_addr;
 881
 882                 proc_e = m_in.VFS_PM_ENDPT;
 883                 group_no = m_in.VFS_PM_GROUP_NO;
 884                 group_addr = (gid_t *) m_in.VFS_PM_GROUP_ADDR;
 885
 886                 pm_setgroups(proc_e, group_no, group_addr);
 887
 888                 m_out.m_type = VFS_PM_SETGROUPS_REPLY;
 889                 m_out.VFS_PM_ENDPT = proc_e;
 890         }
 891         break;
 892
 893   case VFS_PM_REBOOT:
 894         /* Reboot requests are not considered postponed PM work and are instead
 895          * handled from a separate worker thread that is associated with PM's
 896          * process. PM makes no regular VFS calls, and thus, from VFS's
 897          * perspective, PM is always idle. Therefore, we can safely do this.
 898          * We do assume that PM sends us only one VFS_PM_REBOOT message at
 899          * once, or ever for that matter. :)
 900          */
 901         worker_start(fproc_addr(PM_PROC_NR), pm_reboot, &m_in,
 902                 FALSE /*use_spare*/);
 903
 904         return;
 905
 906     default:
 907         printf("VFS: don't know how to handle PM request %d\n", call_nr);
 908
 909         return;
 910   }
 911
 912   r = ipc_send(PM_PROC_NR, &m_out);
 913   if (r != OK)
 914         panic("service_pm: ipc_send failed: %d", r);
 915 }
 916
 917
 918 /*===========================================================================*
 919  *                              unblock                                      *
 920  *===========================================================================*/
 921 static int
 922 unblock(struct fproc *rfp)
 923 {
 924 /* Unblock a process that was previously blocked on a pipe or a lock.  This is
 925  * done by reconstructing the original request and continuing/repeating it.
 926  * This function returns TRUE when it has restored a request for execution, and
 927  * FALSE if the caller should continue looking for work to do.
 928  */
 929   int blocked_on;
 930
 931   blocked_on = rfp->fp_blocked_on;
 932
 933   /* Reconstruct the original request from the saved data. */
 934   memset(&m_in, 0, sizeof(m_in));
 935   m_in.m_source = rfp->fp_endpoint;
 936   switch (blocked_on) {
 937   case FP_BLOCKED_ON_PIPE:
 938         assert(rfp->fp_pipe.callnr == VFS_READ ||
 939             rfp->fp_pipe.callnr == VFS_WRITE);
 940         m_in.m_type = rfp->fp_pipe.callnr;
 941         m_in.m_lc_vfs_readwrite.fd = rfp->fp_pipe.fd;
 942         m_in.m_lc_vfs_readwrite.buf = rfp->fp_pipe.buf;
 943         m_in.m_lc_vfs_readwrite.len = rfp->fp_pipe.nbytes;
 944         m_in.m_lc_vfs_readwrite.cum_io = rfp->fp_pipe.cum_io;
 945         break;
 946   case FP_BLOCKED_ON_FLOCK:
 947         assert(rfp->fp_flock.cmd == F_SETLKW);
 948         m_in.m_type = VFS_FCNTL;
 949         m_in.m_lc_vfs_fcntl.fd = rfp->fp_flock.fd;
 950         m_in.m_lc_vfs_fcntl.cmd = rfp->fp_flock.cmd;
 951         m_in.m_lc_vfs_fcntl.arg_ptr = rfp->fp_flock.arg;
 952         break;
 953   default:
 954         panic("unblocking call blocked on %d ??", blocked_on);
 955   }
 956
 957   rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;      /* no longer blocked */
 958   rfp->fp_flags &= ~FP_REVIVED;
 959   reviving--;
 960   assert(reviving >= 0);
 961
 962   /* Pending pipe reads/writes cannot be repeated as is, and thus require a
 963    * special resumption procedure.
 964    */
 965   if (blocked_on == FP_BLOCKED_ON_PIPE) {
 966         worker_start(rfp, do_pending_pipe, &m_in, FALSE /*use_spare*/);
 967         return(FALSE);  /* Retrieve more work */
 968   }
 969
 970   /* A lock request. Repeat the original request as though it just came in. */
 971   fp = rfp;
 972   return(TRUE); /* We've unblocked a process */
 973 }