kernel/proc.c

   1 /* This file contains essentially all of the process and message handling.
   2  * Together with "mpx.s" it forms the lowest layer of the MINIX kernel.
   3  * There is one entry point from the outside:
   4  *
   5  *   sys_call:        a system call, i.e., the kernel is trapped with an INT
   6  *
   7  * Changes:
   8  *   Aug 19, 2005     rewrote scheduling code  (Jorrit N. Herder)
   9  *   Jul 25, 2005     rewrote system call handling  (Jorrit N. Herder)
  10  *   May 26, 2005     rewrote message passing functions  (Jorrit N. Herder)
  11  *   May 24, 2005     new notification system call  (Jorrit N. Herder)
  12  *   Oct 28, 2004     nonblocking send and receive calls  (Jorrit N. Herder)
  13  *
  14  * The code here is critical to make everything work and is important for the
  15  * overall performance of the system. A large fraction of the code deals with
  16  * list manipulation. To make this both easy to understand and fast to execute
  17  * pointer pointers are used throughout the code. Pointer pointers prevent
  18  * exceptions for the head or tail of a linked list.
  19  *
  20  *  node_t *queue, *new_node;   // assume these as global variables
  21  *  node_t **xpp = &queue;      // get pointer pointer to head of queue
  22  *  while (*xpp != NULL)        // find last pointer of the linked list
  23  *      xpp = &(*xpp)->next;    // get pointer to next pointer
  24  *  *xpp = new_node;            // now replace the end (the NULL pointer)
  25  *  new_node->next = NULL;      // and mark the new end of the list
  26  *
  27  * For example, when adding a new node to the end of the list, one normally
  28  * makes an exception for an empty list and looks up the end of the list for
  29  * nonempty lists. As shown above, this is not required with pointer pointers.
  30  */
  31
  32 #include <minix/com.h>
  33 #include <minix/ipcconst.h>
  34 #include <stddef.h>
  35 #include <signal.h>
  36 #include <assert.h>
  37
  38 #include "kernel.h"
  39 #include "vm.h"
  40 #include "clock.h"
  41 #include "spinlock.h"
  42 #include "arch_proto.h"
  43
  44 #include <minix/syslib.h>
  45
  46 /* Scheduling and message passing functions */
  47 static void idle(void);
  48 /**
  49  * Made public for use in clock.c (for user-space scheduling)
  50 static int mini_send(struct proc *caller_ptr, endpoint_t dst_e, message
  51         *m_ptr, int flags);
  52 */
  53 static int mini_receive(struct proc *caller_ptr, endpoint_t src,
  54         message *m_ptr, int flags);
  55 static int mini_senda(struct proc *caller_ptr, asynmsg_t *table, size_t
  56         size);
  57 static int deadlock(int function, register struct proc *caller,
  58         endpoint_t src_dst_e);
  59 static int try_async(struct proc *caller_ptr);
  60 static int try_one(struct proc *src_ptr, struct proc *dst_ptr);
  61 static struct proc * pick_proc(void);
  62 static void enqueue_head(struct proc *rp);
  63
  64 /* all idles share the same idle_priv structure */
  65 static struct priv idle_priv;
  66
  67 static void set_idle_name(char * name, int n)
  68 {
  69         int i, c;
  70         int p_z = 0;
  71
  72         if (n > 999)
  73                 n = 999;
  74
  75         name[0] = 'i';
  76         name[1] = 'd';
  77         name[2] = 'l';
  78         name[3] = 'e';
  79
  80         for (i = 4, c = 100; c > 0; c /= 10) {
  81                 int digit;
  82
  83                 digit = n / c;
  84                 n -= digit * c;
  85
  86                 if (p_z || digit != 0 || c == 1) {
  87                         p_z = 1;
  88                         name[i++] = '0' + digit;
  89                 }
  90         }
  91
  92         name[i] = '\0';
  93
  94 }
  95
  96
  97 #define PICK_ANY        1
  98 #define PICK_HIGHERONLY 2
  99
 100 #define BuildNotifyMessage(m_ptr, src, dst_ptr) \
 101         (m_ptr)->m_type = NOTIFY_MESSAGE;                               \
 102         (m_ptr)->NOTIFY_TIMESTAMP = get_uptime();                       \
 103         switch (src) {                                                  \
 104         case HARDWARE:                                                  \
 105                 (m_ptr)->NOTIFY_ARG = priv(dst_ptr)->s_int_pending;     \
 106                 priv(dst_ptr)->s_int_pending = 0;                       \
 107                 break;                                                  \
 108         case SYSTEM:                                                    \
 109                 (m_ptr)->NOTIFY_ARG = priv(dst_ptr)->s_sig_pending;     \
 110                 priv(dst_ptr)->s_sig_pending = 0;                       \
 111                 break;                                                  \
 112         }
 113
 114 void proc_init(void)
 115 {
 116         struct proc * rp;
 117         struct priv *sp;
 118         int i;
 119
 120         /* Clear the process table. Anounce each slot as empty and set up
 121          * mappings for proc_addr() and proc_nr() macros. Do the same for the
 122          * table with privilege structures for the system processes.
 123          */
 124         for (rp = BEG_PROC_ADDR, i = -NR_TASKS; rp < END_PROC_ADDR; ++rp, ++i) {
 125                 rp->p_rts_flags = RTS_SLOT_FREE;/* initialize free slot */
 126                 rp->p_magic = PMAGIC;
 127                 rp->p_nr = i;                   /* proc number from ptr */
 128                 rp->p_endpoint = _ENDPOINT(0, rp->p_nr); /* generation no. 0 */
 129                 rp->p_scheduler = NULL;         /* no user space scheduler */
 130                 rp->p_priority = 0;             /* no priority */
 131                 rp->p_quantum_size_ms = 0;      /* no quantum size */
 132
 133                 /* arch-specific initialization */
 134                 arch_proc_reset(rp);
 135         }
 136         for (sp = BEG_PRIV_ADDR, i = 0; sp < END_PRIV_ADDR; ++sp, ++i) {
 137                 sp->s_proc_nr = NONE;           /* initialize as free */
 138                 sp->s_id = (sys_id_t) i;        /* priv structure index */
 139                 ppriv_addr[i] = sp;             /* priv ptr from number */
 140                 sp->s_sig_mgr = NONE;           /* clear signal managers */
 141                 sp->s_bak_sig_mgr = NONE;
 142         }
 143
 144         idle_priv.s_flags = IDL_F;
 145         /* initialize IDLE structures for every CPU */
 146         for (i = 0; i < CONFIG_MAX_CPUS; i++) {
 147                 struct proc * ip = get_cpu_var_ptr(i, idle_proc);
 148                 ip->p_endpoint = IDLE;
 149                 ip->p_priv = &idle_priv;
 150                 /* must not let idle ever get scheduled */
 151                 ip->p_rts_flags |= RTS_PROC_STOP;
 152                 set_idle_name(ip->p_name, i);
 153         }
 154 }
 155
 156 static void switch_address_space_idle(void)
 157 {
 158 #ifdef CONFIG_SMP
 159         /*
 160          * currently we bet that VM is always alive and its pages available so
 161          * when the CPU wakes up the kernel is mapped and no surprises happen.
 162          * This is only a problem if more than 1 cpus are available
 163          */
 164         switch_address_space(proc_addr(VM_PROC_NR));
 165 #endif
 166 }
 167
 168 /*===========================================================================*
 169  *                              idle                                         *
 170  *===========================================================================*/
 171 static void idle(void)
 172 {
 173         struct proc * p;
 174
 175         /* This function is called whenever there is no work to do.
 176          * Halt the CPU, and measure how many timestamp counter ticks are
 177          * spent not doing anything. This allows test setups to measure
 178          * the CPU utiliziation of certain workloads with high precision.
 179          */
 180
 181         p = get_cpulocal_var(proc_ptr) = get_cpulocal_var_ptr(idle_proc);
 182         if (priv(p)->s_flags & BILLABLE)
 183                 get_cpulocal_var(bill_ptr) = p;
 184
 185         switch_address_space_idle();
 186
 187 #ifdef CONFIG_SMP
 188         get_cpulocal_var(cpu_is_idle) = 1;
 189         /* we don't need to keep time on APs as it is handled on the BSP */
 190         if (cpuid != bsp_cpu_id)
 191                 stop_local_timer();
 192         else
 193 #endif
 194         {
 195                 /*
 196                  * If the timer has expired while in kernel we must
 197                  * rearm it before we go to sleep
 198                  */
 199                 restart_local_timer();
 200         }
 201
 202         /* start accounting for the idle time */
 203         context_stop(proc_addr(KERNEL));
 204 #if !SPROFILE
 205         halt_cpu();
 206 #else
 207         if (!sprofiling)
 208                 halt_cpu();
 209         else {
 210                 volatile int * v;
 211
 212                 v = get_cpulocal_var_ptr(idle_interrupted);
 213                 interrupts_enable();
 214                 while (!*v)
 215                         arch_pause();
 216                 interrupts_disable();
 217                 *v = 0;
 218         }
 219 #endif
 220         /*
 221          * end of accounting for the idle task does not happen here, the kernel
 222          * is handling stuff for quite a while before it gets back here!
 223          */
 224 }
 225
 226 /*===========================================================================*
 227  *                              switch_to_user                               *
 228  *===========================================================================*/
 229 void switch_to_user(void)
 230 {
 231         /* This function is called an instant before proc_ptr is
 232          * to be scheduled again.
 233          */
 234         struct proc * p;
 235 #ifdef CONFIG_SMP
 236         int tlb_must_refresh = 0;
 237 #endif
 238
 239         p = get_cpulocal_var(proc_ptr);
 240         /*
 241          * if the current process is still runnable check the misc flags and let
 242          * it run unless it becomes not runnable in the meantime
 243          */
 244         if (proc_is_runnable(p))
 245                 goto check_misc_flags;
 246         /*
 247          * if a process becomes not runnable while handling the misc flags, we
 248          * need to pick a new one here and start from scratch. Also if the
 249          * current process wasn' runnable, we pick a new one here
 250          */
 251 not_runnable_pick_new:
 252         if (proc_is_preempted(p)) {
 253                 p->p_rts_flags &= ~RTS_PREEMPTED;
 254                 if (proc_is_runnable(p)) {
 255                         if (!is_zero64(p->p_cpu_time_left))
 256                                 enqueue_head(p);
 257                         else
 258                                 enqueue(p);
 259                 }
 260         }
 261
 262         /*
 263          * if we have no process to run, set IDLE as the current process for
 264          * time accounting and put the cpu in and idle state. After the next
 265          * timer interrupt the execution resumes here and we can pick another
 266          * process. If there is still nothing runnable we "schedule" IDLE again
 267          */
 268         while (!(p = pick_proc())) {
 269                 idle();
 270         }
 271
 272         /* update the global variable */
 273         get_cpulocal_var(proc_ptr) = p;
 274
 275 #ifdef CONFIG_SMP
 276         if (p->p_misc_flags & MF_FLUSH_TLB && get_cpulocal_var(ptproc) == p)
 277                 tlb_must_refresh = 1;
 278 #endif
 279         switch_address_space(p);
 280
 281 check_misc_flags:
 282
 283         assert(p);
 284         assert(proc_is_runnable(p));
 285         while (p->p_misc_flags &
 286                 (MF_KCALL_RESUME | MF_DELIVERMSG |
 287                  MF_SC_DEFER | MF_SC_TRACE | MF_SC_ACTIVE)) {
 288
 289                 assert(proc_is_runnable(p));
 290                 if (p->p_misc_flags & MF_KCALL_RESUME) {
 291                         kernel_call_resume(p);
 292                 }
 293                 else if (p->p_misc_flags & MF_DELIVERMSG) {
 294                         TRACE(VF_SCHEDULING, printf("delivering to %s / %d\n",
 295                                 p->p_name, p->p_endpoint););
 296                         delivermsg(p);
 297                 }
 298                 else if (p->p_misc_flags & MF_SC_DEFER) {
 299                         /* Perform the system call that we deferred earlier. */
 300
 301                         assert (!(p->p_misc_flags & MF_SC_ACTIVE));
 302
 303                         arch_do_syscall(p);
 304
 305                         /* If the process is stopped for signal delivery, and
 306                          * not blocked sending a message after the system call,
 307                          * inform PM.
 308                          */
 309                         if ((p->p_misc_flags & MF_SIG_DELAY) &&
 310                                         !RTS_ISSET(p, RTS_SENDING))
 311                                 sig_delay_done(p);
 312                 }
 313                 else if (p->p_misc_flags & MF_SC_TRACE) {
 314                         /* Trigger a system call leave event if this was a
 315                          * system call. We must do this after processing the
 316                          * other flags above, both for tracing correctness and
 317                          * to be able to use 'break'.
 318                          */
 319                         if (!(p->p_misc_flags & MF_SC_ACTIVE))
 320                                 break;
 321
 322                         p->p_misc_flags &=
 323                                 ~(MF_SC_TRACE | MF_SC_ACTIVE);
 324
 325                         /* Signal the "leave system call" event.
 326                          * Block the process.
 327                          */
 328                         cause_sig(proc_nr(p), SIGTRAP);
 329                 }
 330                 else if (p->p_misc_flags & MF_SC_ACTIVE) {
 331                         /* If MF_SC_ACTIVE was set, remove it now:
 332                          * we're leaving the system call.
 333                          */
 334                         p->p_misc_flags &= ~MF_SC_ACTIVE;
 335
 336                         break;
 337                 }
 338
 339                 /*
 340                  * the selected process might not be runnable anymore. We have
 341                  * to checkit and schedule another one
 342                  */
 343                 if (!proc_is_runnable(p))
 344                         goto not_runnable_pick_new;
 345         }
 346         /*
 347          * check the quantum left before it runs again. We must do it only here
 348          * as we are sure that a possible out-of-quantum message to the
 349          * scheduler will not collide with the regular ipc
 350          */
 351         if (is_zero64(p->p_cpu_time_left))
 352                 proc_no_time(p);
 353         /*
 354          * After handling the misc flags the selected process might not be
 355          * runnable anymore. We have to checkit and schedule another one
 356          */
 357         if (!proc_is_runnable(p))
 358                 goto not_runnable_pick_new;
 359
 360         TRACE(VF_SCHEDULING, printf("cpu %d starting %s / %d "
 361                                 "pc 0x%08x\n",
 362                 cpuid, p->p_name, p->p_endpoint, p->p_reg.pc););
 363 #if DEBUG_TRACE
 364         p->p_schedules++;
 365 #endif
 366
 367         p = arch_finish_switch_to_user();
 368         assert(!is_zero64(p->p_cpu_time_left));
 369
 370         context_stop(proc_addr(KERNEL));
 371
 372         /* If the process isn't the owner of FPU, enable the FPU exception */
 373         if(get_cpulocal_var(fpu_owner) != p)
 374                 enable_fpu_exception();
 375         else
 376                 disable_fpu_exception();
 377
 378         /* If MF_CONTEXT_SET is set, don't clobber process state within
 379          * the kernel. The next kernel entry is OK again though.
 380          */
 381         p->p_misc_flags &= ~MF_CONTEXT_SET;
 382
 383 #if defined(__i386__)
 384         assert(p->p_seg.p_cr3 != 0);
 385 #elif defined(__arm__)
 386         assert(p->p_seg.p_ttbr != 0);
 387 #endif
 388 #ifdef CONFIG_SMP
 389         if (p->p_misc_flags & MF_FLUSH_TLB) {
 390                 if (tlb_must_refresh)
 391                         refresh_tlb();
 392                 p->p_misc_flags &= ~MF_FLUSH_TLB;
 393         }
 394 #endif
 395
 396         restart_local_timer();
 397
 398         /*
 399          * restore_user_context() carries out the actual mode switch from kernel
 400          * to userspace. This function does not return
 401          */
 402         restore_user_context(p);
 403         NOT_REACHABLE;
 404 }
 405
 406 /*
 407  * handler for all synchronous IPC calls
 408  */
 409 static int do_sync_ipc(struct proc * caller_ptr, /* who made the call */
 410                         int call_nr,    /* system call number and flags */
 411                         endpoint_t src_dst_e,   /* src or dst of the call */
 412                         message *m_ptr) /* users pointer to a message */
 413 {
 414   int result;                                   /* the system call's result */
 415   int src_dst_p;                                /* Process slot number */
 416   char *callname;
 417
 418   /* Check destination. RECEIVE is the only call that accepts ANY (in addition
 419    * to a real endpoint). The other calls (SEND, SENDREC, and NOTIFY) require an
 420    * endpoint to corresponds to a process. In addition, it is necessary to check
 421    * whether a process is allowed to send to a given destination.
 422    */
 423   assert(call_nr != SENDA);
 424
 425   /* Only allow non-negative call_nr values less than 32 */
 426   if (call_nr < 0 || call_nr > IPCNO_HIGHEST || call_nr >= 32
 427       || !(callname = ipc_call_names[call_nr])) {
 428 #if DEBUG_ENABLE_IPC_WARNINGS
 429       printf("sys_call: trap %d not allowed, caller %d, src_dst %d\n",
 430           call_nr, proc_nr(caller_ptr), src_dst_e);
 431 #endif
 432         return(ETRAPDENIED);            /* trap denied by mask or kernel */
 433   }
 434
 435   if (src_dst_e == ANY)
 436   {
 437         if (call_nr != RECEIVE)
 438         {
 439 #if 0
 440                 printf("sys_call: %s by %d with bad endpoint %d\n",
 441                         callname,
 442                         proc_nr(caller_ptr), src_dst_e);
 443 #endif
 444                 return EINVAL;
 445         }
 446         src_dst_p = (int) src_dst_e;
 447   }
 448   else
 449   {
 450         /* Require a valid source and/or destination process. */
 451         if(!isokendpt(src_dst_e, &src_dst_p)) {
 452 #if 0
 453                 printf("sys_call: %s by %d with bad endpoint %d\n",
 454                         callname,
 455                         proc_nr(caller_ptr), src_dst_e);
 456 #endif
 457                 return EDEADSRCDST;
 458         }
 459
 460         /* If the call is to send to a process, i.e., for SEND, SENDNB,
 461          * SENDREC or NOTIFY, verify that the caller is allowed to send to
 462          * the given destination.
 463          */
 464         if (call_nr != RECEIVE)
 465         {
 466                 if (!may_send_to(caller_ptr, src_dst_p)) {
 467 #if DEBUG_ENABLE_IPC_WARNINGS
 468                         printf(
 469                         "sys_call: ipc mask denied %s from %d to %d\n",
 470                                 callname,
 471                                 caller_ptr->p_endpoint, src_dst_e);
 472 #endif
 473                         return(ECALLDENIED);    /* call denied by ipc mask */
 474                 }
 475         }
 476   }
 477
 478   /* Check if the process has privileges for the requested call. Calls to the
 479    * kernel may only be SENDREC, because tasks always reply and may not block
 480    * if the caller doesn't do receive().
 481    */
 482   if (!(priv(caller_ptr)->s_trap_mask & (1 << call_nr))) {
 483 #if DEBUG_ENABLE_IPC_WARNINGS
 484       printf("sys_call: %s not allowed, caller %d, src_dst %d\n",
 485           callname, proc_nr(caller_ptr), src_dst_p);
 486 #endif
 487         return(ETRAPDENIED);            /* trap denied by mask or kernel */
 488   }
 489
 490   if (call_nr != SENDREC && call_nr != RECEIVE && iskerneln(src_dst_p)) {
 491 #if DEBUG_ENABLE_IPC_WARNINGS
 492       printf("sys_call: trap %s not allowed, caller %d, src_dst %d\n",
 493            callname, proc_nr(caller_ptr), src_dst_e);
 494 #endif
 495         return(ETRAPDENIED);            /* trap denied by mask or kernel */
 496   }
 497
 498   switch(call_nr) {
 499   case SENDREC:
 500         /* A flag is set so that notifications cannot interrupt SENDREC. */
 501         caller_ptr->p_misc_flags |= MF_REPLY_PEND;
 502         /* fall through */
 503   case SEND:
 504         result = mini_send(caller_ptr, src_dst_e, m_ptr, 0);
 505         if (call_nr == SEND || result != OK)
 506                 break;                          /* done, or SEND failed */
 507         /* fall through for SENDREC */
 508   case RECEIVE:
 509         if (call_nr == RECEIVE) {
 510                 caller_ptr->p_misc_flags &= ~MF_REPLY_PEND;
 511                 IPC_STATUS_CLEAR(caller_ptr);  /* clear IPC status code */
 512         }
 513         result = mini_receive(caller_ptr, src_dst_e, m_ptr, 0);
 514         break;
 515   case NOTIFY:
 516         result = mini_notify(caller_ptr, src_dst_e);
 517         break;
 518   case SENDNB:
 519         result = mini_send(caller_ptr, src_dst_e, m_ptr, NON_BLOCKING);
 520         break;
 521   default:
 522         result = EBADCALL;                      /* illegal system call */
 523   }
 524
 525   /* Now, return the result of the system call to the caller. */
 526   return(result);
 527 }
 528
 529 int do_ipc(reg_t r1, reg_t r2, reg_t r3)
 530 {
 531   struct proc *const caller_ptr = get_cpulocal_var(proc_ptr);   /* get pointer to caller */
 532   int call_nr = (int) r1;
 533
 534   assert(!RTS_ISSET(caller_ptr, RTS_SLOT_FREE));
 535
 536   /* bill kernel time to this process. */
 537   kbill_ipc = caller_ptr;
 538
 539   /* If this process is subject to system call tracing, handle that first. */
 540   if (caller_ptr->p_misc_flags & (MF_SC_TRACE | MF_SC_DEFER)) {
 541         /* Are we tracing this process, and is it the first sys_call entry? */
 542         if ((caller_ptr->p_misc_flags & (MF_SC_TRACE | MF_SC_DEFER)) ==
 543                                                         MF_SC_TRACE) {
 544                 /* We must notify the tracer before processing the actual
 545                  * system call. If we don't, the tracer could not obtain the
 546                  * input message. Postpone the entire system call.
 547                  */
 548                 caller_ptr->p_misc_flags &= ~MF_SC_TRACE;
 549                 caller_ptr->p_misc_flags |= MF_SC_DEFER;
 550
 551                 /* Signal the "enter system call" event. Block the process. */
 552                 cause_sig(proc_nr(caller_ptr), SIGTRAP);
 553
 554                 /* Preserve the return register's value. */
 555                 return caller_ptr->p_reg.retreg;
 556         }
 557
 558         /* If the MF_SC_DEFER flag is set, the syscall is now being resumed. */
 559         caller_ptr->p_misc_flags &= ~MF_SC_DEFER;
 560
 561         assert (!(caller_ptr->p_misc_flags & MF_SC_ACTIVE));
 562
 563         /* Set a flag to allow reliable tracing of leaving the system call. */
 564         caller_ptr->p_misc_flags |= MF_SC_ACTIVE;
 565   }
 566
 567   if(caller_ptr->p_misc_flags & MF_DELIVERMSG) {
 568         panic("sys_call: MF_DELIVERMSG on for %s / %d\n",
 569                 caller_ptr->p_name, caller_ptr->p_endpoint);
 570   }
 571
 572   /* Now check if the call is known and try to perform the request. The only
 573    * system calls that exist in MINIX are sending and receiving messages.
 574    *   - SENDREC: combines SEND and RECEIVE in a single system call
 575    *   - SEND:    sender blocks until its message has been delivered
 576    *   - RECEIVE: receiver blocks until an acceptable message has arrived
 577    *   - NOTIFY:  asynchronous call; deliver notification or mark pending
 578    *   - SENDA:   list of asynchronous send requests
 579    */
 580   switch(call_nr) {
 581         case SENDREC:
 582         case SEND:
 583         case RECEIVE:
 584         case NOTIFY:
 585         case SENDNB:
 586         {
 587             /* Process accounting for scheduling */
 588             caller_ptr->p_accounting.ipc_sync++;
 589
 590             return do_sync_ipc(caller_ptr, call_nr, (endpoint_t) r2,
 591                             (message *) r3);
 592         }
 593         case SENDA:
 594         {
 595             /*
 596              * Get and check the size of the argument in bytes as it is a
 597              * table
 598              */
 599             size_t msg_size = (size_t) r2;
 600
 601             /* Process accounting for scheduling */
 602             caller_ptr->p_accounting.ipc_async++;
 603
 604             /* Limit size to something reasonable. An arbitrary choice is 16
 605              * times the number of process table entries.
 606              */
 607             if (msg_size > 16*(NR_TASKS + NR_PROCS))
 608                 return EDOM;
 609             return mini_senda(caller_ptr, (asynmsg_t *) r3, msg_size);
 610         }
 611         case MINIX_KERNINFO:
 612         {
 613                 /* It might not be initialized yet. */
 614                 if(!minix_kerninfo_user) {
 615                         return EBADCALL;
 616                 }
 617
 618                 arch_set_secondary_ipc_return(caller_ptr, minix_kerninfo_user);
 619                 return OK;
 620         }
 621         default:
 622         return EBADCALL;                /* illegal system call */
 623   }
 624 }
 625
 626 /*===========================================================================*
 627  *                              deadlock                                     *
 628  *===========================================================================*/
 629 static int deadlock(function, cp, src_dst_e)
 630 int function;                                   /* trap number */
 631 register struct proc *cp;                       /* pointer to caller */
 632 endpoint_t src_dst_e;                           /* src or dst process */
 633 {
 634 /* Check for deadlock. This can happen if 'caller_ptr' and 'src_dst' have
 635  * a cyclic dependency of blocking send and receive calls. The only cyclic
 636  * depency that is not fatal is if the caller and target directly SEND(REC)
 637  * and RECEIVE to each other. If a deadlock is found, the group size is
 638  * returned. Otherwise zero is returned.
 639  */
 640   register struct proc *xp;                     /* process pointer */
 641   int group_size = 1;                           /* start with only caller */
 642 #if DEBUG_ENABLE_IPC_WARNINGS
 643   static struct proc *processes[NR_PROCS + NR_TASKS];
 644   processes[0] = cp;
 645 #endif
 646
 647   while (src_dst_e != ANY) {                    /* check while process nr */
 648       int src_dst_slot;
 649       okendpt(src_dst_e, &src_dst_slot);
 650       xp = proc_addr(src_dst_slot);             /* follow chain of processes */
 651       assert(proc_ptr_ok(xp));
 652       assert(!RTS_ISSET(xp, RTS_SLOT_FREE));
 653 #if DEBUG_ENABLE_IPC_WARNINGS
 654       processes[group_size] = xp;
 655 #endif
 656       group_size ++;                            /* extra process in group */
 657
 658       /* Check whether the last process in the chain has a dependency. If it
 659        * has not, the cycle cannot be closed and we are done.
 660        */
 661       if((src_dst_e = P_BLOCKEDON(xp)) == NONE)
 662         return 0;
 663
 664       /* Now check if there is a cyclic dependency. For group sizes of two,
 665        * a combination of SEND(REC) and RECEIVE is not fatal. Larger groups
 666        * or other combinations indicate a deadlock.
 667        */
 668       if (src_dst_e == cp->p_endpoint) {        /* possible deadlock */
 669           if (group_size == 2) {                /* caller and src_dst */
 670               /* The function number is magically converted to flags. */
 671               if ((xp->p_rts_flags ^ (function << 2)) & RTS_SENDING) {
 672                   return(0);                    /* not a deadlock */
 673               }
 674           }
 675 #if DEBUG_ENABLE_IPC_WARNINGS
 676           {
 677                 int i;
 678                 printf("deadlock between these processes:\n");
 679                 for(i = 0; i < group_size; i++) {
 680                         printf(" %10s ", processes[i]->p_name);
 681                 }
 682                 printf("\n\n");
 683                 for(i = 0; i < group_size; i++) {
 684                         print_proc(processes[i]);
 685                         proc_stacktrace(processes[i]);
 686                 }
 687           }
 688 #endif
 689           return(group_size);                   /* deadlock found */
 690       }
 691   }
 692   return(0);                                    /* not a deadlock */
 693 }
 694
 695 /*===========================================================================*
 696  *                              has_pending                                  *
 697  *===========================================================================*/
 698 static int has_pending(sys_map_t *map, int src_p, int asynm)
 699 {
 700 /* Check to see if there is a pending message from the desired source
 701  * available.
 702  */
 703
 704   int src_id;
 705   sys_id_t id = NULL_PRIV_ID;
 706 #ifdef CONFIG_SMP
 707   struct proc * p;
 708 #endif
 709
 710   /* Either check a specific bit in the mask map, or find the first bit set in
 711    * it (if any), depending on whether the receive was called on a specific
 712    * source endpoint.
 713    */
 714   if (src_p != ANY) {
 715         src_id = nr_to_id(src_p);
 716         if (get_sys_bit(*map, src_id)) {
 717 #ifdef CONFIG_SMP
 718                 p = proc_addr(id_to_nr(src_id));
 719                 if (asynm && RTS_ISSET(p, RTS_VMINHIBIT))
 720                         p->p_misc_flags |= MF_SENDA_VM_MISS;
 721                 else
 722 #endif
 723                         id = src_id;
 724         }
 725   } else {
 726         /* Find a source with a pending message */
 727         for (src_id = 0; src_id < NR_SYS_PROCS; src_id += BITCHUNK_BITS) {
 728                 if (get_sys_bits(*map, src_id) != 0) {
 729 #ifdef CONFIG_SMP
 730                         while (src_id < NR_SYS_PROCS) {
 731                                 while (!get_sys_bit(*map, src_id)) {
 732                                         if (src_id == NR_SYS_PROCS)
 733                                                 goto quit_search;
 734                                         src_id++;
 735                                 }
 736                                 p = proc_addr(id_to_nr(src_id));
 737                                 /*
 738                                  * We must not let kernel fiddle with pages of a
 739                                  * process which are currently being changed by
 740                                  * VM.  It is dangerous! So do not report such a
 741                                  * process as having pending async messages.
 742                                  * Skip it.
 743                                  */
 744                                 if (asynm && RTS_ISSET(p, RTS_VMINHIBIT)) {
 745                                         p->p_misc_flags |= MF_SENDA_VM_MISS;
 746                                         src_id++;
 747                                 } else
 748                                         goto quit_search;
 749                         }
 750 #else
 751                         while (!get_sys_bit(*map, src_id)) src_id++;
 752                         goto quit_search;
 753 #endif
 754                 }
 755         }
 756
 757 quit_search:
 758         if (src_id < NR_SYS_PROCS)      /* Found one */
 759                 id = src_id;
 760   }
 761
 762   return(id);
 763 }
 764
 765 /*===========================================================================*
 766  *                              has_pending_notify                           *
 767  *===========================================================================*/
 768 int has_pending_notify(struct proc * caller, int src_p)
 769 {
 770         sys_map_t * map = &priv(caller)->s_notify_pending;
 771         return has_pending(map, src_p, 0);
 772 }
 773
 774 /*===========================================================================*
 775  *                              has_pending_asend                            *
 776  *===========================================================================*/
 777 int has_pending_asend(struct proc * caller, int src_p)
 778 {
 779         sys_map_t * map = &priv(caller)->s_asyn_pending;
 780         return has_pending(map, src_p, 1);
 781 }
 782
 783 /*===========================================================================*
 784  *                              unset_notify_pending                         *
 785  *===========================================================================*/
 786 void unset_notify_pending(struct proc * caller, int src_p)
 787 {
 788         sys_map_t * map = &priv(caller)->s_notify_pending;
 789         unset_sys_bit(*map, src_p);
 790 }
 791
 792 /*===========================================================================*
 793  *                              mini_send                                    *
 794  *===========================================================================*/
 795 int mini_send(
 796   register struct proc *caller_ptr,     /* who is trying to send a message? */
 797   endpoint_t dst_e,                     /* to whom is message being sent? */
 798   message *m_ptr,                       /* pointer to message buffer */
 799   const int flags
 800 )
 801 {
 802 /* Send a message from 'caller_ptr' to 'dst'. If 'dst' is blocked waiting
 803  * for this message, copy the message to it and unblock 'dst'. If 'dst' is
 804  * not waiting at all, or is waiting for another source, queue 'caller_ptr'.
 805  */
 806   register struct proc *dst_ptr;
 807   register struct proc **xpp;
 808   int dst_p;
 809   dst_p = _ENDPOINT_P(dst_e);
 810   dst_ptr = proc_addr(dst_p);
 811
 812   if (RTS_ISSET(dst_ptr, RTS_NO_ENDPOINT))
 813   {
 814         return EDEADSRCDST;
 815   }
 816
 817   /* Check if 'dst' is blocked waiting for this message. The destination's
 818    * RTS_SENDING flag may be set when its SENDREC call blocked while sending.
 819    */
 820   if (WILLRECEIVE(dst_ptr, caller_ptr->p_endpoint)) {
 821         int call;
 822         /* Destination is indeed waiting for this message. */
 823         assert(!(dst_ptr->p_misc_flags & MF_DELIVERMSG));
 824
 825         if (!(flags & FROM_KERNEL)) {
 826                 if(copy_msg_from_user(m_ptr, &dst_ptr->p_delivermsg))
 827                         return EFAULT;
 828         } else {
 829                 dst_ptr->p_delivermsg = *m_ptr;
 830                 IPC_STATUS_ADD_FLAGS(dst_ptr, IPC_FLG_MSG_FROM_KERNEL);
 831         }
 832
 833         dst_ptr->p_delivermsg.m_source = caller_ptr->p_endpoint;
 834         dst_ptr->p_misc_flags |= MF_DELIVERMSG;
 835
 836         call = (caller_ptr->p_misc_flags & MF_REPLY_PEND ? SENDREC
 837                 : (flags & NON_BLOCKING ? SENDNB : SEND));
 838         IPC_STATUS_ADD_CALL(dst_ptr, call);
 839
 840         if (dst_ptr->p_misc_flags & MF_REPLY_PEND)
 841                 dst_ptr->p_misc_flags &= ~MF_REPLY_PEND;
 842
 843         RTS_UNSET(dst_ptr, RTS_RECEIVING);
 844
 845 #if DEBUG_IPC_HOOK
 846         hook_ipc_msgsend(&dst_ptr->p_delivermsg, caller_ptr, dst_ptr);
 847         hook_ipc_msgrecv(&dst_ptr->p_delivermsg, caller_ptr, dst_ptr);
 848 #endif
 849   } else {
 850         if(flags & NON_BLOCKING) {
 851                 return(ENOTREADY);
 852         }
 853
 854         /* Check for a possible deadlock before actually blocking. */
 855         if (deadlock(SEND, caller_ptr, dst_e)) {
 856                 return(ELOCKED);
 857         }
 858
 859         /* Destination is not waiting.  Block and dequeue caller. */
 860         if (!(flags & FROM_KERNEL)) {
 861                 if(copy_msg_from_user(m_ptr, &caller_ptr->p_sendmsg))
 862                         return EFAULT;
 863         } else {
 864                 caller_ptr->p_sendmsg = *m_ptr;
 865                 /*
 866                  * we need to remember that this message is from kernel so we
 867                  * can set the delivery status flags when the message is
 868                  * actually delivered
 869                  */
 870                 caller_ptr->p_misc_flags |= MF_SENDING_FROM_KERNEL;
 871         }
 872
 873         RTS_SET(caller_ptr, RTS_SENDING);
 874         caller_ptr->p_sendto_e = dst_e;
 875
 876         /* Process is now blocked.  Put in on the destination's queue. */
 877         assert(caller_ptr->p_q_link == NULL);
 878         xpp = &dst_ptr->p_caller_q;             /* find end of list */
 879         while (*xpp) xpp = &(*xpp)->p_q_link;
 880         *xpp = caller_ptr;                      /* add caller to end */
 881
 882 #if DEBUG_IPC_HOOK
 883         hook_ipc_msgsend(&caller_ptr->p_sendmsg, caller_ptr, dst_ptr);
 884 #endif
 885   }
 886   return(OK);
 887 }
 888
 889 /*===========================================================================*
 890  *                              mini_receive                                 *
 891  *===========================================================================*/
 892 static int mini_receive(struct proc * caller_ptr,
 893                         endpoint_t src_e, /* which message source is wanted */
 894                         message * m_buff_usr, /* pointer to message buffer */
 895                         const int flags)
 896 {
 897 /* A process or task wants to get a message.  If a message is already queued,
 898  * acquire it and deblock the sender.  If no message from the desired source
 899  * is available block the caller.
 900  */
 901   register struct proc **xpp;
 902   int r, src_id, src_proc_nr, src_p;
 903
 904   assert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG));
 905
 906   /* This is where we want our message. */
 907   caller_ptr->p_delivermsg_vir = (vir_bytes) m_buff_usr;
 908
 909   if(src_e == ANY) src_p = ANY;
 910   else
 911   {
 912         okendpt(src_e, &src_p);
 913         if (RTS_ISSET(proc_addr(src_p), RTS_NO_ENDPOINT))
 914         {
 915                 return EDEADSRCDST;
 916         }
 917   }
 918
 919
 920   /* Check to see if a message from desired source is already available.  The
 921    * caller's RTS_SENDING flag may be set if SENDREC couldn't send. If it is
 922    * set, the process should be blocked.
 923    */
 924   if (!RTS_ISSET(caller_ptr, RTS_SENDING)) {
 925
 926     /* Check if there are pending notifications, except for SENDREC. */
 927     if (! (caller_ptr->p_misc_flags & MF_REPLY_PEND)) {
 928
 929         /* Check for pending notifications */
 930         if ((src_id = has_pending_notify(caller_ptr, src_p)) != NULL_PRIV_ID) {
 931             endpoint_t hisep;
 932
 933             src_proc_nr = id_to_nr(src_id);             /* get source proc */
 934 #if DEBUG_ENABLE_IPC_WARNINGS
 935             if(src_proc_nr == NONE) {
 936                 printf("mini_receive: sending notify from NONE\n");
 937             }
 938 #endif
 939             assert(src_proc_nr != NONE);
 940             unset_notify_pending(caller_ptr, src_id);   /* no longer pending */
 941
 942             /* Found a suitable source, deliver the notification message. */
 943             hisep = proc_addr(src_proc_nr)->p_endpoint;
 944             assert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG));
 945             assert(src_e == ANY || hisep == src_e);
 946
 947             /* assemble message */
 948             BuildNotifyMessage(&caller_ptr->p_delivermsg, src_proc_nr, caller_ptr);
 949             caller_ptr->p_delivermsg.m_source = hisep;
 950             caller_ptr->p_misc_flags |= MF_DELIVERMSG;
 951
 952             IPC_STATUS_ADD_CALL(caller_ptr, NOTIFY);
 953
 954             goto receive_done;
 955         }
 956     }
 957
 958     /* Check for pending asynchronous messages */
 959     if (has_pending_asend(caller_ptr, src_p) != NULL_PRIV_ID) {
 960         if (src_p != ANY)
 961                 r = try_one(proc_addr(src_p), caller_ptr);
 962         else
 963                 r = try_async(caller_ptr);
 964
 965         if (r == OK) {
 966             IPC_STATUS_ADD_CALL(caller_ptr, SENDA);
 967             goto receive_done;
 968         }
 969     }
 970
 971     /* Check caller queue. Use pointer pointers to keep code simple. */
 972     xpp = &caller_ptr->p_caller_q;
 973     while (*xpp) {
 974         struct proc * sender = *xpp;
 975
 976         if (src_e == ANY || src_p == proc_nr(sender)) {
 977             int call;
 978             assert(!RTS_ISSET(sender, RTS_SLOT_FREE));
 979             assert(!RTS_ISSET(sender, RTS_NO_ENDPOINT));
 980
 981             /* Found acceptable message. Copy it and update status. */
 982             assert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG));
 983             caller_ptr->p_delivermsg = sender->p_sendmsg;
 984             caller_ptr->p_delivermsg.m_source = sender->p_endpoint;
 985             caller_ptr->p_misc_flags |= MF_DELIVERMSG;
 986             RTS_UNSET(sender, RTS_SENDING);
 987
 988             call = (sender->p_misc_flags & MF_REPLY_PEND ? SENDREC : SEND);
 989             IPC_STATUS_ADD_CALL(caller_ptr, call);
 990
 991             /*
 992              * if the message is originaly from the kernel on behalf of this
 993              * process, we must send the status flags accordingly
 994              */
 995             if (sender->p_misc_flags & MF_SENDING_FROM_KERNEL) {
 996                 IPC_STATUS_ADD_FLAGS(caller_ptr, IPC_FLG_MSG_FROM_KERNEL);
 997                 /* we can clean the flag now, not need anymore */
 998                 sender->p_misc_flags &= ~MF_SENDING_FROM_KERNEL;
 999             }
1000             if (sender->p_misc_flags & MF_SIG_DELAY)
1001                 sig_delay_done(sender);
1002
1003 #if DEBUG_IPC_HOOK
1004             hook_ipc_msgrecv(&caller_ptr->p_delivermsg, *xpp, caller_ptr);
1005 #endif
1006
1007             *xpp = sender->p_q_link;            /* remove from queue */
1008             sender->p_q_link = NULL;
1009             goto receive_done;
1010         }
1011         xpp = &sender->p_q_link;                /* proceed to next */
1012     }
1013   }
1014
1015   /* No suitable message is available or the caller couldn't send in SENDREC.
1016    * Block the process trying to receive, unless the flags tell otherwise.
1017    */
1018   if ( ! (flags & NON_BLOCKING)) {
1019       /* Check for a possible deadlock before actually blocking. */
1020       if (deadlock(RECEIVE, caller_ptr, src_e)) {
1021           return(ELOCKED);
1022       }
1023
1024       caller_ptr->p_getfrom_e = src_e;
1025       RTS_SET(caller_ptr, RTS_RECEIVING);
1026       return(OK);
1027   } else {
1028         return(ENOTREADY);
1029   }
1030
1031 receive_done:
1032   if (caller_ptr->p_misc_flags & MF_REPLY_PEND)
1033           caller_ptr->p_misc_flags &= ~MF_REPLY_PEND;
1034   return OK;
1035 }
1036
1037 /*===========================================================================*
1038  *                              mini_notify                                  *
1039  *===========================================================================*/
1040 int mini_notify(
1041   const struct proc *caller_ptr,        /* sender of the notification */
1042   endpoint_t dst_e                      /* which process to notify */
1043 )
1044 {
1045   register struct proc *dst_ptr;
1046   int src_id;                           /* source id for late delivery */
1047   int dst_p;
1048
1049   if (!isokendpt(dst_e, &dst_p)) {
1050         util_stacktrace();
1051         printf("mini_notify: bogus endpoint %d\n", dst_e);
1052         return EDEADSRCDST;
1053   }
1054
1055   dst_ptr = proc_addr(dst_p);
1056
1057   /* Check to see if target is blocked waiting for this message. A process
1058    * can be both sending and receiving during a SENDREC system call.
1059    */
1060     if (WILLRECEIVE(dst_ptr, caller_ptr->p_endpoint) &&
1061       ! (dst_ptr->p_misc_flags & MF_REPLY_PEND)) {
1062       /* Destination is indeed waiting for a message. Assemble a notification
1063        * message and deliver it. Copy from pseudo-source HARDWARE, since the
1064        * message is in the kernel's address space.
1065        */
1066       assert(!(dst_ptr->p_misc_flags & MF_DELIVERMSG));
1067
1068       BuildNotifyMessage(&dst_ptr->p_delivermsg, proc_nr(caller_ptr), dst_ptr);
1069       dst_ptr->p_delivermsg.m_source = caller_ptr->p_endpoint;
1070       dst_ptr->p_misc_flags |= MF_DELIVERMSG;
1071
1072       IPC_STATUS_ADD_CALL(dst_ptr, NOTIFY);
1073       RTS_UNSET(dst_ptr, RTS_RECEIVING);
1074
1075       return(OK);
1076   }
1077
1078   /* Destination is not ready to receive the notification. Add it to the
1079    * bit map with pending notifications. Note the indirectness: the privilege id
1080    * instead of the process number is used in the pending bit map.
1081    */
1082   src_id = priv(caller_ptr)->s_id;
1083   set_sys_bit(priv(dst_ptr)->s_notify_pending, src_id);
1084   return(OK);
1085 }
1086
1087 #define ASCOMPLAIN(caller, entry, field)        \
1088         printf("kernel:%s:%d: asyn failed for %s in %s "        \
1089         "(%d/%d, tab 0x%lx)\n",__FILE__,__LINE__,       \
1090 field, caller->p_name, entry, priv(caller)->s_asynsize, priv(caller)->s_asyntab)
1091
1092 #define A_RETR_FLD(entry, field)        \
1093   if(data_copy(caller_ptr->p_endpoint,  \
1094          table_v + (entry)*sizeof(asynmsg_t) + offsetof(struct asynmsg,field),\
1095                 KERNEL, (vir_bytes) &tabent.field,      \
1096                         sizeof(tabent.field)) != OK) {\
1097                 ASCOMPLAIN(caller_ptr, entry, #field);  \
1098                 r = EFAULT; \
1099                 goto asyn_error; \
1100         }
1101
1102 #define A_RETR(entry) do {                      \
1103   if (data_copy(                                \
1104                 caller_ptr->p_endpoint, table_v + (entry)*sizeof(asynmsg_t),\
1105                 KERNEL, (vir_bytes) &tabent,    \
1106                 sizeof(tabent)) != OK) {        \
1107                         ASCOMPLAIN(caller_ptr, entry, "message entry"); \
1108                         r = EFAULT;             \
1109                         goto asyn_error; \
1110   }                                             \
1111                          } while(0)
1112
1113 #define A_INSRT_FLD(entry, field)       \
1114   if(data_copy(KERNEL, (vir_bytes) &tabent.field, \
1115         caller_ptr->p_endpoint, \
1116         table_v + (entry)*sizeof(asynmsg_t) + offsetof(struct asynmsg,field),\
1117                 sizeof(tabent.field)) != OK) {\
1118                 ASCOMPLAIN(caller_ptr, entry, #field);  \
1119                 r = EFAULT; \
1120                 goto asyn_error; \
1121         }
1122
1123 #define A_INSRT(entry) do {                     \
1124   if (data_copy(KERNEL, (vir_bytes) &tabent,    \
1125                 caller_ptr->p_endpoint, table_v + (entry)*sizeof(asynmsg_t),\
1126                 sizeof(tabent)) != OK) {        \
1127                         ASCOMPLAIN(caller_ptr, entry, "message entry"); \
1128                         r = EFAULT;             \
1129                         goto asyn_error; \
1130   }                                             \
1131                           } while(0)
1132
1133 /*===========================================================================*
1134  *                              try_deliver_senda                            *
1135  *===========================================================================*/
1136 int try_deliver_senda(struct proc *caller_ptr,
1137                                 asynmsg_t *table,
1138                                 size_t size)
1139 {
1140   int r, dst_p, done, do_notify;
1141   unsigned int i;
1142   unsigned flags;
1143   endpoint_t dst;
1144   struct proc *dst_ptr;
1145   struct priv *privp;
1146   asynmsg_t tabent;
1147   const vir_bytes table_v = (vir_bytes) table;
1148
1149   privp = priv(caller_ptr);
1150
1151   /* Clear table */
1152   privp->s_asyntab = -1;
1153   privp->s_asynsize = 0;
1154
1155   if (size == 0) return(OK);  /* Nothing to do, just return */
1156
1157   /* Scan the table */
1158   do_notify = FALSE;
1159   done = TRUE;
1160
1161   /* Limit size to something reasonable. An arbitrary choice is 16
1162    * times the number of process table entries.
1163    *
1164    * (this check has been duplicated in sys_call but is left here
1165    * as a sanity check)
1166    */
1167   if (size > 16*(NR_TASKS + NR_PROCS)) {
1168     r = EDOM;
1169     return r;
1170   }
1171
1172   for (i = 0; i < size; i++) {
1173         /* Process each entry in the table and store the result in the table.
1174          * If we're done handling a message, copy the result to the sender. */
1175
1176         dst = NONE;
1177         /* Copy message to kernel */
1178         A_RETR(i);
1179         flags = tabent.flags;
1180         dst = tabent.dst;
1181
1182         if (flags == 0) continue; /* Skip empty entries */
1183
1184         /* 'flags' field must contain only valid bits */
1185         if(flags & ~(AMF_VALID|AMF_DONE|AMF_NOTIFY|AMF_NOREPLY|AMF_NOTIFY_ERR)) {
1186                 r = EINVAL;
1187                 goto asyn_error;
1188         }
1189         if (!(flags & AMF_VALID)) { /* Must contain message */
1190                 r = EINVAL;
1191                 goto asyn_error;
1192         }
1193         if (flags & AMF_DONE) continue; /* Already done processing */
1194
1195         r = OK;
1196         if (!isokendpt(tabent.dst, &dst_p))
1197                 r = EDEADSRCDST; /* Bad destination, report the error */
1198         else if (iskerneln(dst_p))
1199                 r = ECALLDENIED; /* Asyn sends to the kernel are not allowed */
1200         else if (!may_send_to(caller_ptr, dst_p))
1201                 r = ECALLDENIED; /* Send denied by IPC mask */
1202         else    /* r == OK */
1203                 dst_ptr = proc_addr(dst_p);
1204
1205         /* XXX: RTS_NO_ENDPOINT should be removed */
1206         if (r == OK && RTS_ISSET(dst_ptr, RTS_NO_ENDPOINT)) {
1207                 r = EDEADSRCDST;
1208         }
1209
1210         /* Check if 'dst' is blocked waiting for this message.
1211          * If AMF_NOREPLY is set, do not satisfy the receiving part of
1212          * a SENDREC.
1213          */
1214         if (r == OK && WILLRECEIVE(dst_ptr, caller_ptr->p_endpoint) &&
1215             (!(flags&AMF_NOREPLY) || !(dst_ptr->p_misc_flags&MF_REPLY_PEND))) {
1216                 /* Destination is indeed waiting for this message. */
1217                 dst_ptr->p_delivermsg = tabent.msg;
1218                 dst_ptr->p_delivermsg.m_source = caller_ptr->p_endpoint;
1219                 dst_ptr->p_misc_flags |= MF_DELIVERMSG;
1220                 IPC_STATUS_ADD_CALL(dst_ptr, SENDA);
1221                 RTS_UNSET(dst_ptr, RTS_RECEIVING);
1222         } else if (r == OK) {
1223                 /* Inform receiver that something is pending */
1224                 set_sys_bit(priv(dst_ptr)->s_asyn_pending,
1225                             priv(caller_ptr)->s_id);
1226                 done = FALSE;
1227                 continue;
1228         }
1229
1230         /* Store results */
1231         tabent.result = r;
1232         tabent.flags = flags | AMF_DONE;
1233         if (flags & AMF_NOTIFY)
1234                 do_notify = TRUE;
1235         else if (r != OK && (flags & AMF_NOTIFY_ERR))
1236                 do_notify = TRUE;
1237         A_INSRT(i);     /* Copy results to caller */
1238         continue;
1239
1240 asyn_error:
1241         if (dst != NONE)
1242                 printf("KERNEL senda error %d to %d\n", r, dst);
1243         else
1244                 printf("KERNEL senda error %d\n", r);
1245   }
1246
1247   if (do_notify)
1248         mini_notify(proc_addr(ASYNCM), caller_ptr->p_endpoint);
1249
1250   if (!done) {
1251         privp->s_asyntab = (vir_bytes) table;
1252         privp->s_asynsize = size;
1253   }
1254
1255   return(OK);
1256 }
1257
1258 /*===========================================================================*
1259  *                              mini_senda                                   *
1260  *===========================================================================*/
1261 static int mini_senda(struct proc *caller_ptr, asynmsg_t *table, size_t size)
1262 {
1263   struct priv *privp;
1264
1265   privp = priv(caller_ptr);
1266   if (!(privp->s_flags & SYS_PROC)) {
1267         printf( "mini_senda: warning caller has no privilege structure\n");
1268         return(EPERM);
1269   }
1270
1271   return try_deliver_senda(caller_ptr, table, size);
1272 }
1273
1274
1275 /*===========================================================================*
1276  *                              try_async                                    *
1277  *===========================================================================*/
1278 static int try_async(caller_ptr)
1279 struct proc *caller_ptr;
1280 {
1281   int r;
1282   struct priv *privp;
1283   struct proc *src_ptr;
1284   sys_map_t *map;
1285
1286   map = &priv(caller_ptr)->s_asyn_pending;
1287
1288   /* Try all privilege structures */
1289   for (privp = BEG_PRIV_ADDR; privp < END_PRIV_ADDR; ++privp)  {
1290         if (privp->s_proc_nr == NONE)
1291                 continue;
1292
1293         if (!get_sys_bit(*map, privp->s_id))
1294                 continue;
1295
1296         src_ptr = proc_addr(privp->s_proc_nr);
1297
1298 #ifdef CONFIG_SMP
1299         /*
1300          * Do not copy from a process which does not have a stable address space
1301          * due to VM fiddling with it
1302          */
1303         if (RTS_ISSET(src_ptr, RTS_VMINHIBIT)) {
1304                 src_ptr->p_misc_flags |= MF_SENDA_VM_MISS;
1305                 continue;
1306         }
1307 #endif
1308
1309         assert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG));
1310         if ((r = try_one(src_ptr, caller_ptr)) == OK)
1311                 return(r);
1312   }
1313
1314   return(ESRCH);
1315 }
1316
1317
1318 /*===========================================================================*
1319  *                              try_one                                      *
1320  *===========================================================================*/
1321 static int try_one(struct proc *src_ptr, struct proc *dst_ptr)
1322 {
1323 /* Try to receive an asynchronous message from 'src_ptr' */
1324   int r = EAGAIN, done, do_notify;
1325   unsigned int flags, i;
1326   size_t size;
1327   endpoint_t dst;
1328   struct proc *caller_ptr;
1329   struct priv *privp;
1330   asynmsg_t tabent;
1331   vir_bytes table_v;
1332
1333   privp = priv(src_ptr);
1334   if (!(privp->s_flags & SYS_PROC)) return(EPERM);
1335   size = privp->s_asynsize;
1336   table_v = privp->s_asyntab;
1337
1338   /* Clear table pending message flag. We're done unless we're not. */
1339   unset_sys_bit(priv(dst_ptr)->s_asyn_pending, privp->s_id);
1340
1341   if (size == 0) return(EAGAIN);
1342   if (!may_send_to(src_ptr, proc_nr(dst_ptr))) return(ECALLDENIED);
1343
1344   caller_ptr = src_ptr; /* Needed for A_ macros later on */
1345
1346   /* Scan the table */
1347   do_notify = FALSE;
1348   done = TRUE;
1349
1350   for (i = 0; i < size; i++) {
1351         /* Process each entry in the table and store the result in the table.
1352          * If we're done handling a message, copy the result to the sender.
1353          * Some checks done in mini_senda are duplicated here, as the sender
1354          * could've altered the contents of the table in the meantime.
1355          */
1356
1357         /* Copy message to kernel */
1358         A_RETR(i);
1359         flags = tabent.flags;
1360         dst = tabent.dst;
1361
1362         if (flags == 0) continue;       /* Skip empty entries */
1363
1364         /* 'flags' field must contain only valid bits */
1365         if(flags & ~(AMF_VALID|AMF_DONE|AMF_NOTIFY|AMF_NOREPLY|AMF_NOTIFY_ERR))
1366                 r = EINVAL;
1367         else if (!(flags & AMF_VALID)) /* Must contain message */
1368                 r = EINVAL;
1369         else if (flags & AMF_DONE) continue; /* Already done processing */
1370
1371         /* Clear done flag. The sender is done sending when all messages in the
1372          * table are marked done or empty. However, we will know that only
1373          * the next time we enter this function or when the sender decides to
1374          * send additional asynchronous messages and manages to deliver them
1375          * all.
1376          */
1377         done = FALSE;
1378
1379         if (r == EINVAL)
1380                 goto store_result;
1381
1382         /* Message must be directed at receiving end */
1383         if (dst != dst_ptr->p_endpoint) continue;
1384
1385         /* If AMF_NOREPLY is set, then this message is not a reply to a
1386          * SENDREC and thus should not satisfy the receiving part of the
1387          * SENDREC. This message is to be delivered later.
1388          */
1389         if ((flags & AMF_NOREPLY) && (dst_ptr->p_misc_flags & MF_REPLY_PEND))
1390                 continue;
1391
1392         /* Destination is ready to receive the message; deliver it */
1393         r = OK;
1394         dst_ptr->p_delivermsg = tabent.msg;
1395         dst_ptr->p_delivermsg.m_source = src_ptr->p_endpoint;
1396         dst_ptr->p_misc_flags |= MF_DELIVERMSG;
1397
1398 store_result:
1399         /* Store results for sender */
1400         tabent.result = r;
1401         tabent.flags = flags | AMF_DONE;
1402         if (flags & AMF_NOTIFY) do_notify = TRUE;
1403         else if (r != OK && (flags & AMF_NOTIFY_ERR)) do_notify = TRUE;
1404         A_INSRT(i);     /* Copy results to sender */
1405
1406         break;
1407   }
1408
1409   if (do_notify)
1410         mini_notify(proc_addr(ASYNCM), src_ptr->p_endpoint);
1411
1412   if (done) {
1413         privp->s_asyntab = -1;
1414         privp->s_asynsize = 0;
1415   } else {
1416         set_sys_bit(priv(dst_ptr)->s_asyn_pending, privp->s_id);
1417   }
1418
1419 asyn_error:
1420   return(r);
1421 }
1422
1423 /*===========================================================================*
1424  *                              cancel_async                                 *
1425  *===========================================================================*/
1426 int cancel_async(struct proc *src_ptr, struct proc *dst_ptr)
1427 {
1428 /* Cancel asynchronous messages from src to dst, because dst is not interested
1429  * in them (e.g., dst has been restarted) */
1430   int done, do_notify;
1431   unsigned int flags, i;
1432   size_t size;
1433   endpoint_t dst;
1434   struct proc *caller_ptr;
1435   struct priv *privp;
1436   asynmsg_t tabent;
1437   vir_bytes table_v;
1438
1439   privp = priv(src_ptr);
1440   if (!(privp->s_flags & SYS_PROC)) return(EPERM);
1441   size = privp->s_asynsize;
1442   table_v = privp->s_asyntab;
1443
1444   /* Clear table pending message flag. We're done unless we're not. */
1445   privp->s_asyntab = -1;
1446   privp->s_asynsize = 0;
1447   unset_sys_bit(priv(dst_ptr)->s_asyn_pending, privp->s_id);
1448
1449   if (size == 0) return(EAGAIN);
1450   if (!may_send_to(src_ptr, proc_nr(dst_ptr))) return(ECALLDENIED);
1451
1452   caller_ptr = src_ptr; /* Needed for A_ macros later on */
1453
1454   /* Scan the table */
1455   do_notify = FALSE;
1456   done = TRUE;
1457
1458
1459   for (i = 0; i < size; i++) {
1460         /* Process each entry in the table and store the result in the table.
1461          * If we're done handling a message, copy the result to the sender.
1462          * Some checks done in mini_senda are duplicated here, as the sender
1463          * could've altered the contents of the table in the mean time.
1464          */
1465
1466         int r = EDEADSRCDST;    /* Cancel delivery due to dead dst */
1467
1468         /* Copy message to kernel */
1469         A_RETR(i);
1470         flags = tabent.flags;
1471         dst = tabent.dst;
1472
1473         if (flags == 0) continue;       /* Skip empty entries */
1474
1475         /* 'flags' field must contain only valid bits */
1476         if(flags & ~(AMF_VALID|AMF_DONE|AMF_NOTIFY|AMF_NOREPLY|AMF_NOTIFY_ERR))
1477                 r = EINVAL;
1478         else if (!(flags & AMF_VALID)) /* Must contain message */
1479                 r = EINVAL;
1480         else if (flags & AMF_DONE) continue; /* Already done processing */
1481
1482         /* Message must be directed at receiving end */
1483         if (dst != dst_ptr->p_endpoint) {
1484                 done = FALSE;
1485                 continue;
1486         }
1487
1488         /* Store results for sender */
1489         tabent.result = r;
1490         tabent.flags = flags | AMF_DONE;
1491         if (flags & AMF_NOTIFY) do_notify = TRUE;
1492         else if (r != OK && (flags & AMF_NOTIFY_ERR)) do_notify = TRUE;
1493         A_INSRT(i);     /* Copy results to sender */
1494   }
1495
1496   if (do_notify)
1497         mini_notify(proc_addr(ASYNCM), src_ptr->p_endpoint);
1498
1499   if (!done) {
1500         privp->s_asyntab = table_v;
1501         privp->s_asynsize = size;
1502   }
1503
1504 asyn_error:
1505   return(OK);
1506 }
1507
1508 /*===========================================================================*
1509  *                              enqueue                                      *
1510  *===========================================================================*/
1511 void enqueue(
1512   register struct proc *rp      /* this process is now runnable */
1513 )
1514 {
1515 /* Add 'rp' to one of the queues of runnable processes.  This function is
1516  * responsible for inserting a process into one of the scheduling queues.
1517  * The mechanism is implemented here.   The actual scheduling policy is
1518  * defined in sched() and pick_proc().
1519  *
1520  * This function can be used x-cpu as it always uses the queues of the cpu the
1521  * process is assigned to.
1522  */
1523   int q = rp->p_priority;                       /* scheduling queue to use */
1524   struct proc **rdy_head, **rdy_tail;
1525
1526   assert(proc_is_runnable(rp));
1527
1528   assert(q >= 0);
1529
1530   rdy_head = get_cpu_var(rp->p_cpu, run_q_head);
1531   rdy_tail = get_cpu_var(rp->p_cpu, run_q_tail);
1532
1533   /* Now add the process to the queue. */
1534   if (!rdy_head[q]) {           /* add to empty queue */
1535       rdy_head[q] = rdy_tail[q] = rp;           /* create a new queue */
1536       rp->p_nextready = NULL;           /* mark new end */
1537   }
1538   else {                                        /* add to tail of queue */
1539       rdy_tail[q]->p_nextready = rp;            /* chain tail of queue */
1540       rdy_tail[q] = rp;                         /* set new queue tail */
1541       rp->p_nextready = NULL;           /* mark new end */
1542   }
1543
1544   if (cpuid == rp->p_cpu) {
1545           /*
1546            * enqueueing a process with a higher priority than the current one,
1547            * it gets preempted. The current process must be preemptible. Testing
1548            * the priority also makes sure that a process does not preempt itself
1549            */
1550           struct proc * p;
1551           p = get_cpulocal_var(proc_ptr);
1552           assert(p);
1553           if((p->p_priority > rp->p_priority) &&
1554                           (priv(p)->s_flags & PREEMPTIBLE))
1555                   RTS_SET(p, RTS_PREEMPTED); /* calls dequeue() */
1556   }
1557 #ifdef CONFIG_SMP
1558   /*
1559    * if the process was enqueued on a different cpu and the cpu is idle, i.e.
1560    * the time is off, we need to wake up that cpu and let it schedule this new
1561    * process
1562    */
1563   else if (get_cpu_var(rp->p_cpu, cpu_is_idle)) {
1564           smp_schedule(rp->p_cpu);
1565   }
1566 #endif
1567
1568   /* Make note of when this process was added to queue */
1569   read_tsc_64(&(get_cpulocal_var(proc_ptr)->p_accounting.enter_queue));
1570
1571
1572 #if DEBUG_SANITYCHECKS
1573   assert(runqueues_ok_local());
1574 #endif
1575 }
1576
1577 /*===========================================================================*
1578  *                              enqueue_head                                 *
1579  *===========================================================================*/
1580 /*
1581  * put a process at the front of its run queue. It comes handy when a process is
1582  * preempted and removed from run queue to not to have a currently not-runnable
1583  * process on a run queue. We have to put this process back at the fron to be
1584  * fair
1585  */
1586 static void enqueue_head(struct proc *rp)
1587 {
1588   const int q = rp->p_priority;                 /* scheduling queue to use */
1589
1590   struct proc **rdy_head, **rdy_tail;
1591
1592   assert(proc_ptr_ok(rp));
1593   assert(proc_is_runnable(rp));
1594
1595   /*
1596    * the process was runnable without its quantum expired when dequeued. A
1597    * process with no time left should vahe been handled else and differently
1598    */
1599   assert(!is_zero64(rp->p_cpu_time_left));
1600
1601   assert(q >= 0);
1602
1603
1604   rdy_head = get_cpu_var(rp->p_cpu, run_q_head);
1605   rdy_tail = get_cpu_var(rp->p_cpu, run_q_tail);
1606
1607   /* Now add the process to the queue. */
1608   if (!rdy_head[q]) {           /* add to empty queue */
1609       rdy_head[q] = rdy_tail[q] = rp;           /* create a new queue */
1610       rp->p_nextready = NULL;           /* mark new end */
1611   }
1612   else                                          /* add to head of queue */
1613       rp->p_nextready = rdy_head[q];            /* chain head of queue */
1614       rdy_head[q] = rp;                         /* set new queue head */
1615
1616   /* Make note of when this process was added to queue */
1617   read_tsc_64(&(get_cpulocal_var(proc_ptr->p_accounting.enter_queue)));
1618
1619
1620   /* Process accounting for scheduling */
1621   rp->p_accounting.dequeues--;
1622   rp->p_accounting.preempted++;
1623
1624 #if DEBUG_SANITYCHECKS
1625   assert(runqueues_ok_local());
1626 #endif
1627 }
1628
1629 /*===========================================================================*
1630  *                              dequeue                                      *
1631  *===========================================================================*/
1632 void dequeue(struct proc *rp)
1633 /* this process is no longer runnable */
1634 {
1635 /* A process must be removed from the scheduling queues, for example, because
1636  * it has blocked.  If the currently active process is removed, a new process
1637  * is picked to run by calling pick_proc().
1638  *
1639  * This function can operate x-cpu as it always removes the process from the
1640  * queue of the cpu the process is currently assigned to.
1641  */
1642   int q = rp->p_priority;               /* queue to use */
1643   struct proc **xpp;                    /* iterate over queue */
1644   struct proc *prev_xp;
1645   u64_t tsc, tsc_delta;
1646
1647   struct proc **rdy_tail;
1648
1649   assert(proc_ptr_ok(rp));
1650   assert(!proc_is_runnable(rp));
1651
1652   /* Side-effect for kernel: check if the task's stack still is ok? */
1653   assert (!iskernelp(rp) || *priv(rp)->s_stack_guard == STACK_GUARD);
1654
1655   rdy_tail = get_cpu_var(rp->p_cpu, run_q_tail);
1656
1657   /* Now make sure that the process is not in its ready queue. Remove the
1658    * process if it is found. A process can be made unready even if it is not
1659    * running by being sent a signal that kills it.
1660    */
1661   prev_xp = NULL;
1662   for (xpp = get_cpu_var_ptr(rp->p_cpu, run_q_head[q]); *xpp;
1663                   xpp = &(*xpp)->p_nextready) {
1664       if (*xpp == rp) {                         /* found process to remove */
1665           *xpp = (*xpp)->p_nextready;           /* replace with next chain */
1666           if (rp == rdy_tail[q]) {              /* queue tail removed */
1667               rdy_tail[q] = prev_xp;            /* set new tail */
1668           }
1669
1670           break;
1671       }
1672       prev_xp = *xpp;                           /* save previous in chain */
1673   }
1674
1675
1676   /* Process accounting for scheduling */
1677   rp->p_accounting.dequeues++;
1678
1679   /* this is not all that accurate on virtual machines, especially with
1680      IO bound processes that only spend a short amount of time in the queue
1681      at a time. */
1682   if (!is_zero64(rp->p_accounting.enter_queue)) {
1683         read_tsc_64(&tsc);
1684         tsc_delta = sub64(tsc, rp->p_accounting.enter_queue);
1685         rp->p_accounting.time_in_queue = add64(rp->p_accounting.time_in_queue,
1686                 tsc_delta);
1687         make_zero64(rp->p_accounting.enter_queue);
1688   }
1689
1690
1691 #if DEBUG_SANITYCHECKS
1692   assert(runqueues_ok_local());
1693 #endif
1694 }
1695
1696 /*===========================================================================*
1697  *                              pick_proc                                    *
1698  *===========================================================================*/
1699 static struct proc * pick_proc(void)
1700 {
1701 /* Decide who to run now.  A new process is selected an returned.
1702  * When a billable process is selected, record it in 'bill_ptr', so that the
1703  * clock task can tell who to bill for system time.
1704  *
1705  * This function always uses the run queues of the local cpu!
1706  */
1707   register struct proc *rp;                     /* process to run */
1708   struct proc **rdy_head;
1709   int q;                                /* iterate over queues */
1710
1711   /* Check each of the scheduling queues for ready processes. The number of
1712    * queues is defined in proc.h, and priorities are set in the task table.
1713    * If there are no processes ready to run, return NULL.
1714    */
1715   rdy_head = get_cpulocal_var(run_q_head);
1716   for (q=0; q < NR_SCHED_QUEUES; q++) {
1717         if(!(rp = rdy_head[q])) {
1718                 TRACE(VF_PICKPROC, printf("cpu %d queue %d empty\n", cpuid, q););
1719                 continue;
1720         }
1721         assert(proc_is_runnable(rp));
1722         if (priv(rp)->s_flags & BILLABLE)
1723                 get_cpulocal_var(bill_ptr) = rp; /* bill for system time */
1724         return rp;
1725   }
1726   return NULL;
1727 }
1728
1729 /*===========================================================================*
1730  *                              endpoint_lookup                              *
1731  *===========================================================================*/
1732 struct proc *endpoint_lookup(endpoint_t e)
1733 {
1734         int n;
1735
1736         if(!isokendpt(e, &n)) return NULL;
1737
1738         return proc_addr(n);
1739 }
1740
1741 /*===========================================================================*
1742  *                              isokendpt_f                                  *
1743  *===========================================================================*/
1744 #if DEBUG_ENABLE_IPC_WARNINGS
1745 int isokendpt_f(file, line, e, p, fatalflag)
1746 const char *file;
1747 int line;
1748 #else
1749 int isokendpt_f(e, p, fatalflag)
1750 #endif
1751 endpoint_t e;
1752 int *p;
1753 const int fatalflag;
1754 {
1755         int ok = 0;
1756         /* Convert an endpoint number into a process number.
1757          * Return nonzero if the process is alive with the corresponding
1758          * generation number, zero otherwise.
1759          *
1760          * This function is called with file and line number by the
1761          * isokendpt_d macro if DEBUG_ENABLE_IPC_WARNINGS is defined,
1762          * otherwise without. This allows us to print the where the
1763          * conversion was attempted, making the errors verbose without
1764          * adding code for that at every call.
1765          *
1766          * If fatalflag is nonzero, we must panic if the conversion doesn't
1767          * succeed.
1768          */
1769         *p = _ENDPOINT_P(e);
1770         ok = 0;
1771         if(isokprocn(*p) && !isemptyn(*p) && proc_addr(*p)->p_endpoint == e)
1772                 ok = 1;
1773         if(!ok && fatalflag)
1774                 panic("invalid endpoint: %d",  e);
1775         return ok;
1776 }
1777
1778 static void notify_scheduler(struct proc *p)
1779 {
1780         message m_no_quantum;
1781         int err;
1782
1783         assert(!proc_kernel_scheduler(p));
1784
1785         /* dequeue the process */
1786         RTS_SET(p, RTS_NO_QUANTUM);
1787         /*
1788          * Notify the process's scheduler that it has run out of
1789          * quantum. This is done by sending a message to the scheduler
1790          * on the process's behalf
1791          */
1792         m_no_quantum.m_source = p->p_endpoint;
1793         m_no_quantum.m_type   = SCHEDULING_NO_QUANTUM;
1794         m_no_quantum.SCHEDULING_ACNT_QUEUE = cpu_time_2_ms(p->p_accounting.time_in_queue);
1795         m_no_quantum.SCHEDULING_ACNT_DEQS      = p->p_accounting.dequeues;
1796         m_no_quantum.SCHEDULING_ACNT_IPC_SYNC  = p->p_accounting.ipc_sync;
1797         m_no_quantum.SCHEDULING_ACNT_IPC_ASYNC = p->p_accounting.ipc_async;
1798         m_no_quantum.SCHEDULING_ACNT_PREEMPT   = p->p_accounting.preempted;
1799         m_no_quantum.SCHEDULING_ACNT_CPU       = cpuid;
1800         m_no_quantum.SCHEDULING_ACNT_CPU_LOAD  = cpu_load();
1801
1802         /* Reset accounting */
1803         reset_proc_accounting(p);
1804
1805         if ((err = mini_send(p, p->p_scheduler->p_endpoint,
1806                                         &m_no_quantum, FROM_KERNEL))) {
1807                 panic("WARNING: Scheduling: mini_send returned %d\n", err);
1808         }
1809 }
1810
1811 void proc_no_time(struct proc * p)
1812 {
1813         if (!proc_kernel_scheduler(p) && priv(p)->s_flags & PREEMPTIBLE) {
1814                 /* this dequeues the process */
1815                 notify_scheduler(p);
1816         }
1817         else {
1818                 /*
1819                  * non-preemptible processes only need their quantum to
1820                  * be renewed. In fact, they by pass scheduling
1821                  */
1822                 p->p_cpu_time_left = ms_2_cpu_time(p->p_quantum_size_ms);
1823 #if DEBUG_RACE
1824                 RTS_SET(p, RTS_PREEMPTED);
1825                 RTS_UNSET(p, RTS_PREEMPTED);
1826 #endif
1827         }
1828 }
1829
1830 void reset_proc_accounting(struct proc *p)
1831 {
1832   p->p_accounting.preempted = 0;
1833   p->p_accounting.ipc_sync  = 0;
1834   p->p_accounting.ipc_async = 0;
1835   p->p_accounting.dequeues  = 0;
1836   make_zero64(p->p_accounting.time_in_queue);
1837   make_zero64(p->p_accounting.enter_queue);
1838 }
1839
1840 void copr_not_available_handler(void)
1841 {
1842         struct proc * p;
1843         struct proc ** local_fpu_owner;
1844         /*
1845          * Disable the FPU exception (both for the kernel and for the process
1846          * once it's scheduled), and initialize or restore the FPU state.
1847          */
1848
1849         disable_fpu_exception();
1850
1851         p = get_cpulocal_var(proc_ptr);
1852
1853         /* if FPU is not owned by anyone, do not store anything */
1854         local_fpu_owner = get_cpulocal_var_ptr(fpu_owner);
1855         if (*local_fpu_owner != NULL) {
1856                 assert(*local_fpu_owner != p);
1857                 save_local_fpu(*local_fpu_owner, FALSE /*retain*/);
1858         }
1859
1860         /*
1861          * restore the current process' state and let it run again, do not
1862          * schedule!
1863          */
1864         if (restore_fpu(p) != OK) {
1865                 /* Restoring FPU state failed. This is always the process's own
1866                  * fault. Send a signal, and schedule another process instead.
1867                  */
1868                 *local_fpu_owner = NULL;                /* release FPU */
1869                 cause_sig(proc_nr(p), SIGFPE);
1870                 return;
1871         }
1872
1873         *local_fpu_owner = p;
1874         context_stop(proc_addr(KERNEL));
1875         restore_user_context(p);
1876         NOT_REACHABLE;
1877 }
1878
1879 void release_fpu(struct proc * p) {
1880         struct proc ** fpu_owner_ptr;
1881
1882         fpu_owner_ptr = get_cpu_var_ptr(p->p_cpu, fpu_owner);
1883
1884         if (*fpu_owner_ptr == p)
1885                 *fpu_owner_ptr = NULL;
1886 }