minix/kernel/system.c

   1 /* This task handles the interface between the kernel and user-level servers.
   2  * System services can be accessed by doing a system call. System calls are
   3  * transformed into request messages, which are handled by this task. By
   4  * convention, a sys_call() is transformed in a SYS_CALL request message that
   5  * is handled in a function named do_call().
   6  *
   7  * A private call vector is used to map all system calls to the functions that
   8  * handle them. The actual handler functions are contained in separate files
   9  * to keep this file clean. The call vector is used in the system task's main
  10  * loop to handle all incoming requests.
  11  *
  12  * In addition to the main sys_task() entry point, which starts the main loop,
  13  * there are several other minor entry points:
  14  *   get_priv:          assign privilege structure to user or system process
  15  *   set_sendto_bit:    allow a process to send messages to a new target
  16  *   unset_sendto_bit:  disallow a process from sending messages to a target
  17  *   fill_sendto_mask:  fill the target mask of a given process
  18  *   send_sig:          send a signal directly to a system process
  19  *   cause_sig:         take action to cause a signal to occur via a signal mgr
  20  *   sig_delay_done:    tell PM that a process is not sending
  21  *   send_diag_sig:     send a diagnostics signal to interested processes
  22  *   get_randomness:    accumulate randomness in a buffer
  23  *   clear_endpoint:    remove a process' ability to send and receive messages
  24  *   sched_proc:        schedule a process
  25  *
  26  * Changes:
  27 *    Nov 22, 2009   get_priv supports static priv ids (Cristiano Giuffrida)
  28  *   Aug 04, 2005   check if system call is allowed  (Jorrit N. Herder)
  29  *   Jul 20, 2005   send signal to services with message  (Jorrit N. Herder)
  30  *   Jan 15, 2005   new, generalized virtual copy function  (Jorrit N. Herder)
  31  *   Oct 10, 2004   dispatch system calls from call vector  (Jorrit N. Herder)
  32  *   Sep 30, 2004   source code documentation updated  (Jorrit N. Herder)
  33  */
  34
  35 #include "kernel/system.h"
  36 #include "kernel/vm.h"
  37 #include "kernel/clock.h"
  38 #include <stdlib.h>
  39 #include <stddef.h>
  40 #include <assert.h>
  41 #include <signal.h>
  42 #include <unistd.h>
  43 #include <minix/endpoint.h>
  44 #include <minix/safecopies.h>
  45
  46 /* Declaration of the call vector that defines the mapping of system calls
  47  * to handler functions. The vector is initialized in sys_init() with map(),
  48  * which makes sure the system call numbers are ok. No space is allocated,
  49  * because the dummy is declared extern. If an illegal call is given, the
  50  * array size will be negative and this won't compile.
  51  */
  52 static int (*call_vec[NR_SYS_CALLS])(struct proc * caller, message *m_ptr);
  53
  54 #define map(call_nr, handler)                                   \
  55     {   int call_index = call_nr-KERNEL_CALL;                           \
  56         assert(call_index >= 0 && call_index < NR_SYS_CALLS);                   \
  57     call_vec[call_index] = (handler)  ; }
  58
  59 static void kernel_call_finish(struct proc * caller, message *msg, int result)
  60 {
  61   if(result == VMSUSPEND) {
  62           /* Special case: message has to be saved for handling
  63            * until VM tells us it's allowed. VM has been notified
  64            * and we must wait for its reply to restart the call.
  65            */
  66           assert(RTS_ISSET(caller, RTS_VMREQUEST));
  67           assert(caller->p_vmrequest.type == VMSTYPE_KERNELCALL);
  68           caller->p_vmrequest.saved.reqmsg = *msg;
  69           caller->p_misc_flags |= MF_KCALL_RESUME;
  70   } else {
  71           /*
  72            * call is finished, we could have been suspended because of VM,
  73            * remove the request message
  74            */
  75           caller->p_vmrequest.saved.reqmsg.m_source = NONE;
  76           if (result != EDONTREPLY) {
  77                   /* copy the result as a message to the original user buffer */
  78                   msg->m_source = SYSTEM;
  79                   msg->m_type = result;         /* report status of call */
  80 #if DEBUG_IPC_HOOK
  81         hook_ipc_msgkresult(msg, caller);
  82 #endif
  83                   if (copy_msg_to_user(msg, (message *)caller->p_delivermsg_vir)) {
  84                           printf("WARNING wrong user pointer 0x%08x from "
  85                                           "process %s / %d\n",
  86                                           caller->p_delivermsg_vir,
  87                                           caller->p_name,
  88                                           caller->p_endpoint);
  89                           cause_sig(proc_nr(caller), SIGSEGV);
  90                   }
  91           }
  92   }
  93 }
  94
  95 static int kernel_call_dispatch(struct proc * caller, message *msg)
  96 {
  97   int result = OK;
  98   int call_nr;
  99
 100 #if DEBUG_IPC_HOOK
 101         hook_ipc_msgkcall(msg, caller);
 102 #endif
 103   call_nr = msg->m_type - KERNEL_CALL;
 104
 105   /* See if the caller made a valid request and try to handle it. */
 106   if (call_nr < 0 || call_nr >= NR_SYS_CALLS) { /* check call number */
 107           printf("SYSTEM: illegal request %d from %d.\n",
 108                           call_nr,msg->m_source);
 109           result = EBADREQUEST;                 /* illegal message type */
 110   }
 111   else if (!GET_BIT(priv(caller)->s_k_call_mask, call_nr)) {
 112           printf("SYSTEM: denied request %d from %d.\n",
 113                           call_nr,msg->m_source);
 114           result = ECALLDENIED;                 /* illegal message type */
 115   } else {
 116           /* handle the system call */
 117           if (call_vec[call_nr])
 118                   result = (*call_vec[call_nr])(caller, msg);
 119           else {
 120                   printf("Unused kernel call %d from %d\n",
 121                                   call_nr, caller->p_endpoint);
 122                   result = EBADREQUEST;
 123           }
 124   }
 125
 126   return result;
 127 }
 128
 129 /*===========================================================================*
 130  *                              kernel_call                                  *
 131  *===========================================================================*/
 132 /*
 133  * this function checks the basic syscall parameters and if accepted it
 134  * dispatches its handling to the right handler
 135  */
 136 void kernel_call(message *m_user, struct proc * caller)
 137 {
 138   int result = OK;
 139   message msg;
 140
 141   caller->p_delivermsg_vir = (vir_bytes) m_user;
 142   /*
 143    * the ldt and cr3 of the caller process is loaded because it just've trapped
 144    * into the kernel or was already set in switch_to_user() before we resume
 145    * execution of an interrupted kernel call
 146    */
 147   if (copy_msg_from_user(m_user, &msg) == 0) {
 148           msg.m_source = caller->p_endpoint;
 149           result = kernel_call_dispatch(caller, &msg);
 150   }
 151   else {
 152           printf("WARNING wrong user pointer 0x%08x from process %s / %d\n",
 153                           m_user, caller->p_name, caller->p_endpoint);
 154           cause_sig(proc_nr(caller), SIGSEGV);
 155           return;
 156   }
 157
 158
 159   /* remember who invoked the kcall so we can bill it its time */
 160   kbill_kcall = caller;
 161
 162   kernel_call_finish(caller, &msg, result);
 163 }
 164
 165 /*===========================================================================*
 166  *                              initialize                                   *
 167  *===========================================================================*/
 168 void system_init(void)
 169 {
 170   register struct priv *sp;
 171   int i;
 172
 173   /* Initialize IRQ handler hooks. Mark all hooks available. */
 174   for (i=0; i<NR_IRQ_HOOKS; i++) {
 175       irq_hooks[i].proc_nr_e = NONE;
 176   }
 177
 178   /* Initialize all alarm timers for all processes. */
 179   for (sp=BEG_PRIV_ADDR; sp < END_PRIV_ADDR; sp++) {
 180     tmr_inittimer(&(sp->s_alarm_timer));
 181   }
 182
 183   /* Initialize the call vector to a safe default handler. Some system calls
 184    * may be disabled or nonexistant. Then explicitly map known calls to their
 185    * handler functions. This is done with a macro that gives a compile error
 186    * if an illegal call number is used. The ordering is not important here.
 187    */
 188   for (i=0; i<NR_SYS_CALLS; i++) {
 189       call_vec[i] = NULL;
 190   }
 191
 192   /* Process management. */
 193   map(SYS_FORK, do_fork);               /* a process forked a new process */
 194   map(SYS_EXEC, do_exec);               /* update process after execute */
 195   map(SYS_CLEAR, do_clear);             /* clean up after process exit */
 196   map(SYS_EXIT, do_exit);               /* a system process wants to exit */
 197   map(SYS_PRIVCTL, do_privctl);         /* system privileges control */
 198   map(SYS_TRACE, do_trace);             /* request a trace operation */
 199   map(SYS_SETGRANT, do_setgrant);       /* get/set own parameters */
 200   map(SYS_RUNCTL, do_runctl);           /* set/clear stop flag of a process */
 201   map(SYS_UPDATE, do_update);           /* update a process into another */
 202   map(SYS_STATECTL, do_statectl);       /* let a process control its state */
 203
 204   /* Signal handling. */
 205   map(SYS_KILL, do_kill);               /* cause a process to be signaled */
 206   map(SYS_GETKSIG, do_getksig);         /* signal manager checks for signals */
 207   map(SYS_ENDKSIG, do_endksig);         /* signal manager finished signal */
 208   map(SYS_SIGSEND, do_sigsend);         /* start POSIX-style signal */
 209   map(SYS_SIGRETURN, do_sigreturn);     /* return from POSIX-style signal */
 210
 211   /* Device I/O. */
 212   map(SYS_IRQCTL, do_irqctl);           /* interrupt control operations */
 213 #if defined(__i386__)
 214   map(SYS_DEVIO, do_devio);             /* inb, inw, inl, outb, outw, outl */
 215   map(SYS_VDEVIO, do_vdevio);           /* vector with devio requests */
 216 #endif
 217
 218   /* Memory management. */
 219   map(SYS_MEMSET, do_memset);           /* write char to memory area */
 220   map(SYS_VMCTL, do_vmctl);             /* various VM process settings */
 221
 222   /* Copying. */
 223   map(SYS_UMAP, do_umap);               /* map virtual to physical address */
 224   map(SYS_UMAP_REMOTE, do_umap_remote); /* do_umap for non-caller process */
 225   map(SYS_VUMAP, do_vumap);             /* vectored virtual to physical map */
 226   map(SYS_VIRCOPY, do_vircopy);         /* use pure virtual addressing */
 227   map(SYS_PHYSCOPY, do_copy);           /* use physical addressing */
 228   map(SYS_SAFECOPYFROM, do_safecopy_from);/* copy with pre-granted permission */
 229   map(SYS_SAFECOPYTO, do_safecopy_to);  /* copy with pre-granted permission */
 230   map(SYS_VSAFECOPY, do_vsafecopy);     /* vectored safecopy */
 231
 232   /* safe memset */
 233   map(SYS_SAFEMEMSET, do_safememset);   /* safememset */
 234
 235   /* Clock functionality. */
 236   map(SYS_TIMES, do_times);             /* get uptime and process times */
 237   map(SYS_SETALARM, do_setalarm);       /* schedule a synchronous alarm */
 238   map(SYS_STIME, do_stime);             /* set the boottime */
 239   map(SYS_SETTIME, do_settime);         /* set the system time (realtime) */
 240   map(SYS_VTIMER, do_vtimer);           /* set or retrieve a virtual timer */
 241
 242   /* System control. */
 243   map(SYS_ABORT, do_abort);             /* abort MINIX */
 244   map(SYS_GETINFO, do_getinfo);         /* request system information */
 245   map(SYS_DIAGCTL, do_diagctl);         /* diagnostics-related functionality */
 246
 247   /* Profiling. */
 248   map(SYS_SPROF, do_sprofile);         /* start/stop statistical profiling */
 249
 250   /* arm-specific. */
 251 #if defined(__arm__)
 252   map(SYS_PADCONF, do_padconf);         /* configure pinmux */
 253 #endif
 254
 255   /* i386-specific. */
 256 #if defined(__i386__)
 257   map(SYS_READBIOS, do_readbios);       /* read from BIOS locations */
 258   map(SYS_IOPENABLE, do_iopenable);     /* Enable I/O */
 259   map(SYS_SDEVIO, do_sdevio);           /* phys_insb, _insw, _outsb, _outsw */
 260 #endif
 261
 262   /* Machine state switching. */
 263   map(SYS_SETMCONTEXT, do_setmcontext); /* set machine context */
 264   map(SYS_GETMCONTEXT, do_getmcontext); /* get machine context */
 265
 266   /* Scheduling */
 267   map(SYS_SCHEDULE, do_schedule);       /* reschedule a process */
 268   map(SYS_SCHEDCTL, do_schedctl);       /* change process scheduler */
 269
 270 }
 271 /*===========================================================================*
 272  *                              get_priv                                     *
 273  *===========================================================================*/
 274 int get_priv(
 275   register struct proc *rc,             /* new (child) process pointer */
 276   int priv_id                           /* privilege id */
 277 )
 278 {
 279 /* Allocate a new privilege structure for a system process. Privilege ids
 280  * can be assigned either statically or dynamically.
 281  */
 282   register struct priv *sp;                 /* privilege structure */
 283
 284   if(priv_id == NULL_PRIV_ID) {             /* allocate slot dynamically */
 285       for (sp = BEG_DYN_PRIV_ADDR; sp < END_DYN_PRIV_ADDR; ++sp)
 286           if (sp->s_proc_nr == NONE) break;
 287       if (sp >= END_DYN_PRIV_ADDR) return(ENOSPC);
 288   }
 289   else {                                    /* allocate slot from id */
 290       if(!is_static_priv_id(priv_id)) {
 291           return EINVAL;                    /* invalid static priv id */
 292       }
 293       if(priv[priv_id].s_proc_nr != NONE) {
 294           return EBUSY;                     /* slot already in use */
 295       }
 296       sp = &priv[priv_id];
 297   }
 298   rc->p_priv = sp;                          /* assign new slot */
 299   rc->p_priv->s_proc_nr = proc_nr(rc);      /* set association */
 300
 301   return(OK);
 302 }
 303
 304 /*===========================================================================*
 305  *                              set_sendto_bit                               *
 306  *===========================================================================*/
 307 void set_sendto_bit(const struct proc *rp, int id)
 308 {
 309 /* Allow a process to send messages to the process(es) associated with the
 310  * system privilege structure with the given ID.
 311  */
 312
 313   /* Disallow the process from sending to a process privilege structure with no
 314    * associated process, and disallow the process from sending to itself.
 315    */
 316   if (id_to_nr(id) == NONE || priv_id(rp) == id) {
 317         unset_sys_bit(priv(rp)->s_ipc_to, id);
 318         return;
 319   }
 320
 321   set_sys_bit(priv(rp)->s_ipc_to, id);
 322
 323   /* The process that this process can now send to, must be able to reply (or
 324    * vice versa). Therefore, its send mask should be updated as well. Ignore
 325    * receivers that don't support traps other than RECEIVE, they can't reply
 326    * or send messages anyway.
 327    */
 328   if (priv_addr(id)->s_trap_mask & ~((1 << RECEIVE)))
 329       set_sys_bit(priv_addr(id)->s_ipc_to, priv_id(rp));
 330 }
 331
 332 /*===========================================================================*
 333  *                              unset_sendto_bit                             *
 334  *===========================================================================*/
 335 void unset_sendto_bit(const struct proc *rp, int id)
 336 {
 337 /* Prevent a process from sending to another process. Retain the send mask
 338  * symmetry by also unsetting the bit for the other direction.
 339  */
 340
 341   unset_sys_bit(priv(rp)->s_ipc_to, id);
 342
 343   unset_sys_bit(priv_addr(id)->s_ipc_to, priv_id(rp));
 344 }
 345
 346 /*===========================================================================*
 347  *                            fill_sendto_mask                               *
 348  *===========================================================================*/
 349 void fill_sendto_mask(const struct proc *rp, sys_map_t *map)
 350 {
 351   int i;
 352
 353   for (i=0; i < NR_SYS_PROCS; i++) {
 354         if (get_sys_bit(*map, i))
 355                 set_sendto_bit(rp, i);
 356         else
 357                 unset_sendto_bit(rp, i);
 358   }
 359 }
 360
 361 /*===========================================================================*
 362  *                              send_sig                                     *
 363  *===========================================================================*/
 364 int send_sig(endpoint_t ep, int sig_nr)
 365 {
 366 /* Notify a system process about a signal. This is straightforward. Simply
 367  * set the signal that is to be delivered in the pending signals map and
 368  * send a notification with source SYSTEM.
 369  */
 370   register struct proc *rp;
 371   struct priv *priv;
 372   int proc_nr;
 373
 374   if(!isokendpt(ep, &proc_nr) || isemptyn(proc_nr))
 375         return EINVAL;
 376
 377   rp = proc_addr(proc_nr);
 378   priv = priv(rp);
 379   if(!priv) return ENOENT;
 380   sigaddset(&priv->s_sig_pending, sig_nr);
 381   mini_notify(proc_addr(SYSTEM), rp->p_endpoint);
 382
 383   return OK;
 384 }
 385
 386 /*===========================================================================*
 387  *                              cause_sig                                    *
 388  *===========================================================================*/
 389 void cause_sig(proc_nr_t proc_nr, int sig_nr)
 390 {
 391 /* A system process wants to send signal 'sig_nr' to process 'proc_nr'.
 392  * Examples are:
 393  *  - HARDWARE wanting to cause a SIGSEGV after a CPU exception
 394  *  - TTY wanting to cause SIGINT upon getting a DEL
 395  *  - FS wanting to cause SIGPIPE for a broken pipe
 396  * Signals are handled by sending a message to the signal manager assigned to
 397  * the process. This function handles the signals and makes sure the signal
 398  * manager gets them by sending a notification. The process being signaled
 399  * is blocked while the signal manager has not finished all signals for it.
 400  * Race conditions between calls to this function and the system calls that
 401  * process pending kernel signals cannot exist. Signal related functions are
 402  * only called when a user process causes a CPU exception and from the kernel
 403  * process level, which runs to completion.
 404  */
 405   register struct proc *rp, *sig_mgr_rp;
 406   endpoint_t sig_mgr;
 407   int sig_mgr_proc_nr;
 408   int s;
 409
 410   /* Lookup signal manager. */
 411   rp = proc_addr(proc_nr);
 412   sig_mgr = priv(rp)->s_sig_mgr;
 413   if(sig_mgr == SELF) sig_mgr = rp->p_endpoint;
 414
 415   /* If the target is the signal manager of itself, send the signal directly. */
 416   if(rp->p_endpoint == sig_mgr) {
 417        if(SIGS_IS_LETHAL(sig_nr)) {
 418            /* If the signal is lethal, see if a backup signal manager exists. */
 419            sig_mgr = priv(rp)->s_bak_sig_mgr;
 420            if(sig_mgr != NONE && isokendpt(sig_mgr, &sig_mgr_proc_nr)) {
 421                priv(rp)->s_sig_mgr = sig_mgr;
 422                priv(rp)->s_bak_sig_mgr = NONE;
 423                sig_mgr_rp = proc_addr(sig_mgr_proc_nr);
 424                RTS_UNSET(sig_mgr_rp, RTS_NO_PRIV);
 425                cause_sig(proc_nr, sig_nr); /* try again with the new sig mgr. */
 426                return;
 427            }
 428            /* We are out of luck. Time to panic. */
 429            proc_stacktrace(rp);
 430            panic("cause_sig: sig manager %d gets lethal signal %d for itself",
 431                 rp->p_endpoint, sig_nr);
 432        }
 433        sigaddset(&priv(rp)->s_sig_pending, sig_nr);
 434        if(OK != send_sig(rp->p_endpoint, SIGKSIGSM))
 435         panic("send_sig failed");
 436        return;
 437   }
 438
 439   s = sigismember(&rp->p_pending, sig_nr);
 440   /* Check if the signal is already pending. Process it otherwise. */
 441   if (!s) {
 442       sigaddset(&rp->p_pending, sig_nr);
 443       if (! (RTS_ISSET(rp, RTS_SIGNALED))) {            /* other pending */
 444           RTS_SET(rp, RTS_SIGNALED | RTS_SIG_PENDING);
 445           if(OK != send_sig(sig_mgr, SIGKSIG))
 446                 panic("send_sig failed");
 447       }
 448   }
 449 }
 450
 451 /*===========================================================================*
 452  *                              sig_delay_done                               *
 453  *===========================================================================*/
 454 void sig_delay_done(struct proc *rp)
 455 {
 456 /* A process is now known not to send any direct messages.
 457  * Tell PM that the stop delay has ended, by sending a signal to the process.
 458  * Used for actual signal delivery.
 459  */
 460
 461   rp->p_misc_flags &= ~MF_SIG_DELAY;
 462
 463   cause_sig(proc_nr(rp), SIGSNDELAY);
 464 }
 465
 466 /*===========================================================================*
 467  *                              send_diag_sig                                *
 468  *===========================================================================*/
 469 void send_diag_sig(void)
 470 {
 471 /* Send a SIGKMESS signal to all processes in receiving updates about new
 472  * diagnostics messages.
 473  */
 474   struct priv *privp;
 475   endpoint_t ep;
 476
 477   for (privp = BEG_PRIV_ADDR; privp < END_PRIV_ADDR; privp++) {
 478         if (privp->s_proc_nr != NONE && privp->s_diag_sig == TRUE) {
 479                 ep = proc_addr(privp->s_proc_nr)->p_endpoint;
 480                 send_sig(ep, SIGKMESS);
 481         }
 482   }
 483 }
 484
 485 /*===========================================================================*
 486  *                               clear_memreq                                *
 487  *===========================================================================*/
 488 static void clear_memreq(struct proc *rp)
 489 {
 490   struct proc **rpp;
 491
 492   if (!RTS_ISSET(rp, RTS_VMREQUEST))
 493         return; /* nothing to do */
 494
 495   for (rpp = &vmrequest; *rpp != NULL;
 496      rpp = &(*rpp)->p_vmrequest.nextrequestor) {
 497         if (*rpp == rp) {
 498                 *rpp = rp->p_vmrequest.nextrequestor;
 499                 break;
 500         }
 501   }
 502
 503   RTS_UNSET(rp, RTS_VMREQUEST);
 504 }
 505
 506 /*===========================================================================*
 507  *                               clear_ipc                                   *
 508  *===========================================================================*/
 509 static void clear_ipc(
 510   register struct proc *rc      /* slot of process to clean up */
 511 )
 512 {
 513 /* Clear IPC data for a given process slot. */
 514   struct proc **xpp;                    /* iterate over caller queue */
 515
 516   if (RTS_ISSET(rc, RTS_SENDING)) {
 517       int target_proc;
 518
 519       okendpt(rc->p_sendto_e, &target_proc);
 520       xpp = &proc_addr(target_proc)->p_caller_q; /* destination's queue */
 521       while (*xpp) {            /* check entire queue */
 522           if (*xpp == rc) {                     /* process is on the queue */
 523               *xpp = (*xpp)->p_q_link;          /* replace by next process */
 524 #if DEBUG_ENABLE_IPC_WARNINGS
 525               printf("endpoint %d / %s removed from queue at %d\n",
 526                   rc->p_endpoint, rc->p_name, rc->p_sendto_e);
 527 #endif
 528               break;                            /* can only be queued once */
 529           }
 530           xpp = &(*xpp)->p_q_link;              /* proceed to next queued */
 531       }
 532       RTS_UNSET(rc, RTS_SENDING);
 533   }
 534   RTS_UNSET(rc, RTS_RECEIVING);
 535 }
 536
 537 /*===========================================================================*
 538  *                               clear_endpoint                              *
 539  *===========================================================================*/
 540 void clear_endpoint(struct proc * rc)
 541 {
 542 /* Clean up the slot of the process given as 'rc'. */
 543   if(isemptyp(rc)) panic("clear_proc: empty process: %d",  rc->p_endpoint);
 544
 545
 546 #if DEBUG_IPC_HOOK
 547   hook_ipc_clear(rc);
 548 #endif
 549
 550   /* Make sure that the exiting process is no longer scheduled. */
 551   RTS_SET(rc, RTS_NO_ENDPOINT);
 552   if (priv(rc)->s_flags & SYS_PROC)
 553   {
 554         priv(rc)->s_asynsize= 0;
 555   }
 556
 557   /* If the process happens to be queued trying to send a
 558    * message, then it must be removed from the message queues.
 559    */
 560   clear_ipc(rc);
 561
 562   /* Likewise, if another process was sending or receive a message to or from
 563    * the exiting process, it must be alerted that process no longer is alive.
 564    * Check all processes.
 565    */
 566   clear_ipc_refs(rc, EDEADSRCDST);
 567
 568   /* Finally, if the process was blocked on a VM request, remove it from the
 569    * queue of processes waiting to be processed by VM.
 570    */
 571   clear_memreq(rc);
 572 }
 573
 574 /*===========================================================================*
 575  *                             clear_ipc_refs                                *
 576  *===========================================================================*/
 577 void clear_ipc_refs(
 578   register struct proc *rc,             /* slot of process to clean up */
 579   int caller_ret                        /* code to return on callers */
 580 )
 581 {
 582 /* Clear IPC references for a given process slot. */
 583   struct proc *rp;                      /* iterate over process table */
 584   int src_id;
 585
 586   /* Tell processes that sent asynchronous messages to 'rc' they are not
 587    * going to be delivered */
 588   while ((src_id = has_pending_asend(rc, ANY)) != NULL_PRIV_ID)
 589       cancel_async(proc_addr(id_to_nr(src_id)), rc);
 590
 591   for (rp = BEG_PROC_ADDR; rp < END_PROC_ADDR; rp++) {
 592       if(isemptyp(rp))
 593         continue;
 594
 595       /* Unset pending notification bits. */
 596       unset_sys_bit(priv(rp)->s_notify_pending, priv(rc)->s_id);
 597
 598       /* Unset pending asynchronous messages */
 599       unset_sys_bit(priv(rp)->s_asyn_pending, priv(rc)->s_id);
 600
 601       /* Check if process depends on given process. */
 602       if (P_BLOCKEDON(rp) == rc->p_endpoint) {
 603           rp->p_reg.retreg = caller_ret;        /* return requested code */
 604           clear_ipc(rp);
 605       }
 606   }
 607 }
 608
 609 /*===========================================================================*
 610  *                              kernel_call_resume                           *
 611  *===========================================================================*/
 612 void kernel_call_resume(struct proc *caller)
 613 {
 614         int result;
 615
 616         assert(!RTS_ISSET(caller, RTS_SLOT_FREE));
 617         assert(!RTS_ISSET(caller, RTS_VMREQUEST));
 618
 619         assert(caller->p_vmrequest.saved.reqmsg.m_source == caller->p_endpoint);
 620
 621         /*
 622         printf("KERNEL_CALL restart from %s / %d rts 0x%08x misc 0x%08x\n",
 623                         caller->p_name, caller->p_endpoint,
 624                         caller->p_rts_flags, caller->p_misc_flags);
 625          */
 626
 627         /* re-execute the kernel call, with MF_KCALL_RESUME still set so
 628          * the call knows this is a retry.
 629          */
 630         result = kernel_call_dispatch(caller, &caller->p_vmrequest.saved.reqmsg);
 631         /*
 632          * we are resuming the kernel call so we have to remove this flag so it
 633          * can be set again
 634          */
 635         caller->p_misc_flags &= ~MF_KCALL_RESUME;
 636         kernel_call_finish(caller, &caller->p_vmrequest.saved.reqmsg, result);
 637 }
 638
 639 /*===========================================================================*
 640  *                               sched_proc                                  *
 641  *===========================================================================*/
 642 int sched_proc(struct proc *p, int priority, int quantum, int cpu, int niced)
 643 {
 644         /* Make sure the values given are within the allowed range.*/
 645         if ((priority < TASK_Q && priority != -1) || priority > NR_SCHED_QUEUES)
 646                 return(EINVAL);
 647
 648         if (quantum < 1 && quantum != -1)
 649                 return(EINVAL);
 650
 651 #ifdef CONFIG_SMP
 652         if ((cpu < 0 && cpu != -1) || (cpu > 0 && (unsigned) cpu >= ncpus))
 653                 return(EINVAL);
 654         if (cpu != -1 && !(cpu_is_ready(cpu)))
 655                 return EBADCPU;
 656 #endif
 657
 658         /* In some cases, we might be rescheduling a runnable process. In such
 659          * a case (i.e. if we are updating the priority) we set the NO_QUANTUM
 660          * flag before the generic unset to dequeue/enqueue the process
 661          */
 662
 663         /* FIXME this preempts the process, do we really want to do that ?*/
 664
 665         /* FIXME this is a problem for SMP if the processes currently runs on a
 666          * different CPU */
 667         if (proc_is_runnable(p)) {
 668 #ifdef CONFIG_SMP
 669                 if (p->p_cpu != cpuid && cpu != -1 && cpu != p->p_cpu) {
 670                         smp_schedule_migrate_proc(p, cpu);
 671                 }
 672 #endif
 673
 674                 RTS_SET(p, RTS_NO_QUANTUM);
 675         }
 676
 677         if (proc_is_runnable(p))
 678                 RTS_SET(p, RTS_NO_QUANTUM);
 679
 680         if (priority != -1)
 681                 p->p_priority = priority;
 682         if (quantum != -1) {
 683                 p->p_quantum_size_ms = quantum;
 684                 p->p_cpu_time_left = ms_2_cpu_time(quantum);
 685         }
 686 #ifdef CONFIG_SMP
 687         if (cpu != -1)
 688                 p->p_cpu = cpu;
 689 #endif
 690
 691         if (niced)
 692                 p->p_misc_flags |= MF_NICED;
 693         else
 694                 p->p_misc_flags &= ~MF_NICED;
 695
 696         /* Clear the scheduling bit and enqueue the process */
 697         RTS_UNSET(p, RTS_NO_QUANTUM);
 698
 699         return OK;
 700 }
 701
 702 /*===========================================================================*
 703  *                              add_ipc_filter                               *
 704  *===========================================================================*/
 705 int add_ipc_filter(struct proc *rp, int type, vir_bytes address,
 706         size_t length)
 707 {
 708         int num_elements, r;
 709         ipc_filter_t *ipcf, **ipcfp;
 710
 711         /* Validate arguments. */
 712         if (type != IPCF_BLACKLIST && type != IPCF_WHITELIST)
 713                 return EINVAL;
 714
 715         if (length % sizeof(ipc_filter_el_t) != 0)
 716                 return EINVAL;
 717
 718         num_elements = length / sizeof(ipc_filter_el_t);
 719         if (num_elements <= 0 || num_elements > IPCF_MAX_ELEMENTS)
 720                 return E2BIG;
 721
 722         /* Allocate a new IPC filter slot. */
 723         IPCF_POOL_ALLOCATE_SLOT(type, &ipcf);
 724         if (ipcf == NULL)
 725                 return ENOMEM;
 726
 727         /* Fill details. */
 728         ipcf->num_elements = num_elements;
 729         ipcf->next = NULL;
 730         r = data_copy(rp->p_endpoint, address,
 731                 KERNEL, (vir_bytes)ipcf->elements, length);
 732         if (r == OK)
 733                 r = check_ipc_filter(ipcf, TRUE /*fill_flags*/);
 734         if (r != OK) {
 735                 IPCF_POOL_FREE_SLOT(ipcf);
 736                 return r;
 737         }
 738
 739         /* Add the new filter at the end of the IPC filter chain. */
 740         for (ipcfp = &priv(rp)->s_ipcf; *ipcfp != NULL;
 741             ipcfp = &(*ipcfp)->next)
 742                 ;
 743         *ipcfp = ipcf;
 744
 745         return OK;
 746 }
 747
 748 /*===========================================================================*
 749  *                              clear_ipc_filters                            *
 750  *===========================================================================*/
 751 void clear_ipc_filters(struct proc *rp)
 752 {
 753         ipc_filter_t *curr_ipcf, *ipcf;
 754
 755         ipcf = priv(rp)->s_ipcf;
 756         while (ipcf != NULL) {
 757                 curr_ipcf = ipcf;
 758                 ipcf = ipcf->next;
 759                 IPCF_POOL_FREE_SLOT(curr_ipcf);
 760         }
 761
 762         priv(rp)->s_ipcf = NULL;
 763
 764         /* VM is a special case here: since the cleared IPC filter may have
 765          * blocked memory handling requests, we may now have to tell VM that
 766          * there are "new" requests pending.
 767          */
 768         if (rp->p_endpoint == VM_PROC_NR && vmrequest != NULL)
 769                 if (send_sig(VM_PROC_NR, SIGKMEM) != OK)
 770                         panic("send_sig failed");
 771 }
 772
 773 /*===========================================================================*
 774  *                              check_ipc_filter                             *
 775  *===========================================================================*/
 776 int check_ipc_filter(ipc_filter_t *ipcf, int fill_flags)
 777 {
 778         ipc_filter_el_t *ipcf_el;
 779         int i, num_elements, flags;
 780
 781         if (ipcf == NULL)
 782                 return OK;
 783
 784         num_elements = ipcf->num_elements;
 785         flags = 0;
 786         for (i = 0; i < num_elements; i++) {
 787                 ipcf_el = &ipcf->elements[i];
 788                 if (!IPCF_EL_CHECK(ipcf_el))
 789                         return EINVAL;
 790                 flags |= ipcf_el->flags;
 791         }
 792
 793         if (fill_flags)
 794                 ipcf->flags = flags;
 795         else if (ipcf->flags != flags)
 796                 return EINVAL;
 797         return OK;
 798 }
 799
 800 /*===========================================================================*
 801  *                              allow_ipc_filtered_msg                       *
 802  *===========================================================================*/
 803 int allow_ipc_filtered_msg(struct proc *rp, endpoint_t src_e,
 804         vir_bytes m_src_v, message *m_src_p)
 805 {
 806         int i, r, num_elements, get_mtype, allow;
 807         ipc_filter_t *ipcf;
 808         ipc_filter_el_t *ipcf_el;
 809         message m_buff;
 810
 811         ipcf = priv(rp)->s_ipcf;
 812         if (ipcf == NULL)
 813                 return TRUE; /* no IPC filters, always allow */
 814
 815         if (m_src_p == NULL) {
 816                 assert(m_src_v != 0);
 817
 818                 /* Should we copy in the message type? */
 819                 get_mtype = FALSE;
 820                 do {
 821 #if DEBUG_DUMPIPCF
 822                         if (TRUE) {
 823 #else
 824                         if (ipcf->flags & IPCF_MATCH_M_TYPE) {
 825 #endif
 826                                 get_mtype = TRUE;
 827                                 break;
 828                         }
 829                         ipcf = ipcf->next;
 830                 } while (ipcf);
 831                 ipcf = priv(rp)->s_ipcf; /* reset to start */
 832
 833                 /* If so, copy it in from the process. */
 834                 if (get_mtype) {
 835                         r = data_copy(src_e,
 836                             m_src_v + offsetof(message, m_type), KERNEL,
 837                             (vir_bytes)&m_buff.m_type, sizeof(m_buff.m_type));
 838                         if (r != OK) {
 839                                 /* allow for now, this will fail later anyway */
 840 #if DEBUG_DUMPIPCF
 841                                 printf("KERNEL: allow_ipc_filtered_msg: data "
 842                                     "copy error %d, allowing message...\n", r);
 843 #endif
 844                                 return TRUE;
 845                         }
 846                 }
 847                 m_src_p = &m_buff;
 848         }
 849
 850         m_src_p->m_source = src_e;
 851
 852         /* See if the message is allowed. */
 853         allow = (ipcf->type == IPCF_BLACKLIST);
 854         do {
 855                 if (allow != (ipcf->type == IPCF_WHITELIST)) {
 856                         num_elements = ipcf->num_elements;
 857                         for (i = 0; i < num_elements; i++) {
 858                                 ipcf_el = &ipcf->elements[i];
 859                                 if (IPCF_EL_MATCH(ipcf_el, m_src_p)) {
 860                                         allow = (ipcf->type == IPCF_WHITELIST);
 861                                         break;
 862                                 }
 863                         }
 864                 }
 865                 ipcf = ipcf->next;
 866         } while (ipcf);
 867
 868 #if DEBUG_DUMPIPCF
 869         printmsg(m_src_p, proc_addr(_ENDPOINT_P(src_e)), rp, allow ? '+' : '-',
 870             TRUE /*printparams*/);
 871 #endif
 872
 873         return allow;
 874 }
 875
 876 /*===========================================================================*
 877  *                        allow_ipc_filtered_memreq                          *
 878  *===========================================================================*/
 879 int allow_ipc_filtered_memreq(struct proc *src_rp, struct proc *dst_rp)
 880 {
 881         /* Determine whether VM should receive a request to handle memory
 882          * that is the result of process 'src_rp' trying to access currently
 883          * unavailable memory in process 'dst_rp'. Return TRUE if VM should
 884          * be given the request, FALSE otherwise.
 885          */
 886
 887         struct proc *vmp;
 888         message m_buf;
 889
 890         vmp = proc_addr(VM_PROC_NR);
 891
 892         /* If VM has no filter in place, all requests should go through. */
 893         if (priv(vmp)->s_ipcf == NULL)
 894                 return TRUE;
 895
 896         /* VM obtains memory requests in response to a SIGKMEM signal, which
 897          * is a notification sent from SYSTEM. Thus, if VM blocks such
 898          * notifications, it also should not get any memory requests. Of
 899          * course, VM should not be asking for requests in that case either,
 900          * but the extra check doesn't hurt.
 901          */
 902         m_buf.m_type = NOTIFY_MESSAGE;
 903         if (!allow_ipc_filtered_msg(vmp, SYSTEM, 0, &m_buf))
 904                 return FALSE;
 905
 906         /* A more refined policy may be implemented here, for example to
 907          * ensure that both the source and the destination (if different)
 908          * are in the group of processes that VM wants to talk to. Since VM
 909          * is basically not able to handle any memory requests during an
 910          * update, we will not get here, and none of that is needed.
 911          */
 912         return TRUE;
 913 }
 914
 915 /*===========================================================================*
 916  *                             priv_add_irq                                  *
 917  *===========================================================================*/
 918 int priv_add_irq(struct proc *rp, int irq)
 919 {
 920         struct priv *priv = priv(rp);
 921         int i;
 922
 923         priv->s_flags |= CHECK_IRQ;     /* Check IRQ */
 924
 925         /* When restarting a driver, check if it already has the permission */
 926         for (i = 0; i < priv->s_nr_irq; i++) {
 927                 if (priv->s_irq_tab[i] == irq)
 928                         return OK;
 929         }
 930
 931         i= priv->s_nr_irq;
 932         if (i >= NR_IRQ) {
 933                 printf("do_privctl: %d already has %d irq's.\n",
 934                         rp->p_endpoint, i);
 935                 return ENOMEM;
 936         }
 937         priv->s_irq_tab[i]= irq;
 938         priv->s_nr_irq++;
 939         return OK;
 940 }
 941
 942 /*===========================================================================*
 943  *                             priv_add_io                                   *
 944  *===========================================================================*/
 945 int priv_add_io(struct proc *rp, struct io_range *ior)
 946 {
 947         struct priv *priv = priv(rp);
 948         int i;
 949
 950         priv->s_flags |= CHECK_IO_PORT; /* Check I/O accesses */
 951
 952         for (i = 0; i < priv->s_nr_io_range; i++) {
 953                 if (priv->s_io_tab[i].ior_base == ior->ior_base &&
 954                         priv->s_io_tab[i].ior_limit == ior->ior_limit)
 955                         return OK;
 956         }
 957
 958         i= priv->s_nr_io_range;
 959         if (i >= NR_IO_RANGE) {
 960                 printf("do_privctl: %d already has %d i/o ranges.\n",
 961                         rp->p_endpoint, i);
 962                 return ENOMEM;
 963         }
 964
 965         priv->s_io_tab[i] = *ior;
 966         priv->s_nr_io_range++;
 967         return OK;
 968 }
 969
 970 /*===========================================================================*
 971  *                             priv_add_mem                                  *
 972  *===========================================================================*/
 973 int priv_add_mem(struct proc *rp, struct minix_mem_range *memr)
 974 {
 975         struct priv *priv = priv(rp);
 976         int i;
 977
 978         priv->s_flags |= CHECK_MEM;     /* Check memory mappings */
 979
 980         /* When restarting a driver, check if it already has the permission */
 981         for (i = 0; i < priv->s_nr_mem_range; i++) {
 982                 if (priv->s_mem_tab[i].mr_base == memr->mr_base &&
 983                         priv->s_mem_tab[i].mr_limit == memr->mr_limit)
 984                         return OK;
 985         }
 986
 987         i= priv->s_nr_mem_range;
 988         if (i >= NR_MEM_RANGE) {
 989                 printf("do_privctl: %d already has %d mem ranges.\n",
 990                         rp->p_endpoint, i);
 991                 return ENOMEM;
 992         }
 993         priv->s_mem_tab[i]= *memr;
 994         priv->s_nr_mem_range++;
 995         return OK;
 996 }
 997