kernel: separate state for trace-deferred syscalls
[minix3.git] / kernel / proc.c
blob3bdabd25ffd7f8936fa092a463224c3dd540dbdb
1 /* This file contains essentially all of the process and message handling.
2 * Together with "mpx.s" it forms the lowest layer of the MINIX kernel.
3 * There is one entry point from the outside:
5 * sys_call: a system call, i.e., the kernel is trapped with an INT
7 * Changes:
8 * Aug 19, 2005 rewrote scheduling code (Jorrit N. Herder)
9 * Jul 25, 2005 rewrote system call handling (Jorrit N. Herder)
10 * May 26, 2005 rewrote message passing functions (Jorrit N. Herder)
11 * May 24, 2005 new notification system call (Jorrit N. Herder)
12 * Oct 28, 2004 nonblocking send and receive calls (Jorrit N. Herder)
14 * The code here is critical to make everything work and is important for the
15 * overall performance of the system. A large fraction of the code deals with
16 * list manipulation. To make this both easy to understand and fast to execute
17 * pointer pointers are used throughout the code. Pointer pointers prevent
18 * exceptions for the head or tail of a linked list.
20 * node_t *queue, *new_node; // assume these as global variables
21 * node_t **xpp = &queue; // get pointer pointer to head of queue
22 * while (*xpp != NULL) // find last pointer of the linked list
23 * xpp = &(*xpp)->next; // get pointer to next pointer
24 * *xpp = new_node; // now replace the end (the NULL pointer)
25 * new_node->next = NULL; // and mark the new end of the list
27 * For example, when adding a new node to the end of the list, one normally
28 * makes an exception for an empty list and looks up the end of the list for
29 * nonempty lists. As shown above, this is not required with pointer pointers.
32 #include <minix/com.h>
33 #include <minix/ipcconst.h>
34 #include <stddef.h>
35 #include <signal.h>
36 #include <assert.h>
38 #include "kernel.h"
39 #include "vm.h"
40 #include "clock.h"
41 #include "spinlock.h"
42 #include "arch_proto.h"
44 #include <minix/syslib.h>
46 /* Scheduling and message passing functions */
47 static void idle(void);
48 /**
49 * Made public for use in clock.c (for user-space scheduling)
50 static int mini_send(struct proc *caller_ptr, endpoint_t dst_e, message
51 *m_ptr, int flags);
53 static int mini_receive(struct proc *caller_ptr, endpoint_t src,
54 message *m_ptr, int flags);
55 static int mini_senda(struct proc *caller_ptr, asynmsg_t *table, size_t
56 size);
57 static int deadlock(int function, register struct proc *caller,
58 endpoint_t src_dst_e);
59 static int try_async(struct proc *caller_ptr);
60 static int try_one(struct proc *src_ptr, struct proc *dst_ptr);
61 static struct proc * pick_proc(void);
62 static void enqueue_head(struct proc *rp);
64 /* all idles share the same idle_priv structure */
65 static struct priv idle_priv;
67 static void set_idle_name(char * name, int n)
69 int i, c;
70 int p_z = 0;
72 if (n > 999)
73 n = 999;
75 name[0] = 'i';
76 name[1] = 'd';
77 name[2] = 'l';
78 name[3] = 'e';
80 for (i = 4, c = 100; c > 0; c /= 10) {
81 int digit;
83 digit = n / c;
84 n -= digit * c;
86 if (p_z || digit != 0 || c == 1) {
87 p_z = 1;
88 name[i++] = '0' + digit;
92 name[i] = '\0';
97 #define PICK_ANY 1
98 #define PICK_HIGHERONLY 2
100 #define BuildNotifyMessage(m_ptr, src, dst_ptr) \
101 (m_ptr)->m_type = NOTIFY_MESSAGE; \
102 (m_ptr)->NOTIFY_TIMESTAMP = get_uptime(); \
103 switch (src) { \
104 case HARDWARE: \
105 (m_ptr)->NOTIFY_ARG = priv(dst_ptr)->s_int_pending; \
106 priv(dst_ptr)->s_int_pending = 0; \
107 break; \
108 case SYSTEM: \
109 (m_ptr)->NOTIFY_ARG = priv(dst_ptr)->s_sig_pending; \
110 priv(dst_ptr)->s_sig_pending = 0; \
111 break; \
114 void proc_init(void)
116 struct proc * rp;
117 struct priv *sp;
118 int i;
120 /* Clear the process table. Anounce each slot as empty and set up
121 * mappings for proc_addr() and proc_nr() macros. Do the same for the
122 * table with privilege structures for the system processes.
124 for (rp = BEG_PROC_ADDR, i = -NR_TASKS; rp < END_PROC_ADDR; ++rp, ++i) {
125 rp->p_rts_flags = RTS_SLOT_FREE;/* initialize free slot */
126 rp->p_magic = PMAGIC;
127 rp->p_nr = i; /* proc number from ptr */
128 rp->p_endpoint = _ENDPOINT(0, rp->p_nr); /* generation no. 0 */
129 rp->p_scheduler = NULL; /* no user space scheduler */
130 rp->p_priority = 0; /* no priority */
131 rp->p_quantum_size_ms = 0; /* no quantum size */
133 /* arch-specific initialization */
134 arch_proc_reset(rp);
136 for (sp = BEG_PRIV_ADDR, i = 0; sp < END_PRIV_ADDR; ++sp, ++i) {
137 sp->s_proc_nr = NONE; /* initialize as free */
138 sp->s_id = (sys_id_t) i; /* priv structure index */
139 ppriv_addr[i] = sp; /* priv ptr from number */
140 sp->s_sig_mgr = NONE; /* clear signal managers */
141 sp->s_bak_sig_mgr = NONE;
144 idle_priv.s_flags = IDL_F;
145 /* initialize IDLE structures for every CPU */
146 for (i = 0; i < CONFIG_MAX_CPUS; i++) {
147 struct proc * ip = get_cpu_var_ptr(i, idle_proc);
148 ip->p_endpoint = IDLE;
149 ip->p_priv = &idle_priv;
150 /* must not let idle ever get scheduled */
151 ip->p_rts_flags |= RTS_PROC_STOP;
152 set_idle_name(ip->p_name, i);
156 static void switch_address_space_idle(void)
158 #ifdef CONFIG_SMP
160 * currently we bet that VM is always alive and its pages available so
161 * when the CPU wakes up the kernel is mapped and no surprises happen.
162 * This is only a problem if more than 1 cpus are available
164 switch_address_space(proc_addr(VM_PROC_NR));
165 #endif
168 /*===========================================================================*
169 * idle *
170 *===========================================================================*/
171 static void idle(void)
173 struct proc * p;
175 /* This function is called whenever there is no work to do.
176 * Halt the CPU, and measure how many timestamp counter ticks are
177 * spent not doing anything. This allows test setups to measure
178 * the CPU utiliziation of certain workloads with high precision.
181 p = get_cpulocal_var(proc_ptr) = get_cpulocal_var_ptr(idle_proc);
182 if (priv(p)->s_flags & BILLABLE)
183 get_cpulocal_var(bill_ptr) = p;
185 switch_address_space_idle();
187 #ifdef CONFIG_SMP
188 get_cpulocal_var(cpu_is_idle) = 1;
189 /* we don't need to keep time on APs as it is handled on the BSP */
190 if (cpuid != bsp_cpu_id)
191 stop_local_timer();
192 else
193 #endif
196 * If the timer has expired while in kernel we must
197 * rearm it before we go to sleep
199 restart_local_timer();
202 /* start accounting for the idle time */
203 context_stop(proc_addr(KERNEL));
204 #if !SPROFILE
205 halt_cpu();
206 #else
207 if (!sprofiling)
208 halt_cpu();
209 else {
210 volatile int * v;
212 v = get_cpulocal_var_ptr(idle_interrupted);
213 interrupts_enable();
214 while (!*v)
215 arch_pause();
216 interrupts_disable();
217 *v = 0;
219 #endif
221 * end of accounting for the idle task does not happen here, the kernel
222 * is handling stuff for quite a while before it gets back here!
226 /*===========================================================================*
227 * switch_to_user *
228 *===========================================================================*/
229 void switch_to_user(void)
231 /* This function is called an instant before proc_ptr is
232 * to be scheduled again.
234 struct proc * p;
235 #ifdef CONFIG_SMP
236 int tlb_must_refresh = 0;
237 #endif
239 p = get_cpulocal_var(proc_ptr);
241 * if the current process is still runnable check the misc flags and let
242 * it run unless it becomes not runnable in the meantime
244 if (proc_is_runnable(p))
245 goto check_misc_flags;
247 * if a process becomes not runnable while handling the misc flags, we
248 * need to pick a new one here and start from scratch. Also if the
249 * current process wasn' runnable, we pick a new one here
251 not_runnable_pick_new:
252 if (proc_is_preempted(p)) {
253 p->p_rts_flags &= ~RTS_PREEMPTED;
254 if (proc_is_runnable(p)) {
255 if (!is_zero64(p->p_cpu_time_left))
256 enqueue_head(p);
257 else
258 enqueue(p);
263 * if we have no process to run, set IDLE as the current process for
264 * time accounting and put the cpu in and idle state. After the next
265 * timer interrupt the execution resumes here and we can pick another
266 * process. If there is still nothing runnable we "schedule" IDLE again
268 while (!(p = pick_proc())) {
269 idle();
272 /* update the global variable */
273 get_cpulocal_var(proc_ptr) = p;
275 #ifdef CONFIG_SMP
276 if (p->p_misc_flags & MF_FLUSH_TLB && get_cpulocal_var(ptproc) == p)
277 tlb_must_refresh = 1;
278 #endif
279 switch_address_space(p);
281 check_misc_flags:
283 assert(p);
284 assert(proc_is_runnable(p));
285 while (p->p_misc_flags &
286 (MF_KCALL_RESUME | MF_DELIVERMSG |
287 MF_SC_DEFER | MF_SC_TRACE | MF_SC_ACTIVE)) {
289 assert(proc_is_runnable(p));
290 if (p->p_misc_flags & MF_KCALL_RESUME) {
291 kernel_call_resume(p);
293 else if (p->p_misc_flags & MF_DELIVERMSG) {
294 TRACE(VF_SCHEDULING, printf("delivering to %s / %d\n",
295 p->p_name, p->p_endpoint););
296 delivermsg(p);
298 else if (p->p_misc_flags & MF_SC_DEFER) {
299 /* Perform the system call that we deferred earlier. */
301 assert (!(p->p_misc_flags & MF_SC_ACTIVE));
303 arch_do_syscall(p);
305 /* If the process is stopped for signal delivery, and
306 * not blocked sending a message after the system call,
307 * inform PM.
309 if ((p->p_misc_flags & MF_SIG_DELAY) &&
310 !RTS_ISSET(p, RTS_SENDING))
311 sig_delay_done(p);
313 else if (p->p_misc_flags & MF_SC_TRACE) {
314 /* Trigger a system call leave event if this was a
315 * system call. We must do this after processing the
316 * other flags above, both for tracing correctness and
317 * to be able to use 'break'.
319 if (!(p->p_misc_flags & MF_SC_ACTIVE))
320 break;
322 p->p_misc_flags &=
323 ~(MF_SC_TRACE | MF_SC_ACTIVE);
325 /* Signal the "leave system call" event.
326 * Block the process.
328 cause_sig(proc_nr(p), SIGTRAP);
330 else if (p->p_misc_flags & MF_SC_ACTIVE) {
331 /* If MF_SC_ACTIVE was set, remove it now:
332 * we're leaving the system call.
334 p->p_misc_flags &= ~MF_SC_ACTIVE;
336 break;
340 * the selected process might not be runnable anymore. We have
341 * to checkit and schedule another one
343 if (!proc_is_runnable(p))
344 goto not_runnable_pick_new;
347 * check the quantum left before it runs again. We must do it only here
348 * as we are sure that a possible out-of-quantum message to the
349 * scheduler will not collide with the regular ipc
351 if (is_zero64(p->p_cpu_time_left))
352 proc_no_time(p);
354 * After handling the misc flags the selected process might not be
355 * runnable anymore. We have to checkit and schedule another one
357 if (!proc_is_runnable(p))
358 goto not_runnable_pick_new;
360 TRACE(VF_SCHEDULING, printf("cpu %d starting %s / %d "
361 "pc 0x%08x\n",
362 cpuid, p->p_name, p->p_endpoint, p->p_reg.pc););
363 #if DEBUG_TRACE
364 p->p_schedules++;
365 #endif
367 p = arch_finish_switch_to_user();
368 assert(!is_zero64(p->p_cpu_time_left));
370 context_stop(proc_addr(KERNEL));
372 /* If the process isn't the owner of FPU, enable the FPU exception */
373 if(get_cpulocal_var(fpu_owner) != p)
374 enable_fpu_exception();
375 else
376 disable_fpu_exception();
378 /* If MF_CONTEXT_SET is set, don't clobber process state within
379 * the kernel. The next kernel entry is OK again though.
381 p->p_misc_flags &= ~MF_CONTEXT_SET;
383 #if defined(__i386__)
384 assert(p->p_seg.p_cr3 != 0);
385 #elif defined(__arm__)
386 assert(p->p_seg.p_ttbr != 0);
387 #endif
388 #ifdef CONFIG_SMP
389 if (p->p_misc_flags & MF_FLUSH_TLB) {
390 if (tlb_must_refresh)
391 refresh_tlb();
392 p->p_misc_flags &= ~MF_FLUSH_TLB;
394 #endif
396 restart_local_timer();
399 * restore_user_context() carries out the actual mode switch from kernel
400 * to userspace. This function does not return
402 restore_user_context(p);
403 NOT_REACHABLE;
407 * handler for all synchronous IPC calls
409 static int do_sync_ipc(struct proc * caller_ptr, /* who made the call */
410 int call_nr, /* system call number and flags */
411 endpoint_t src_dst_e, /* src or dst of the call */
412 message *m_ptr) /* users pointer to a message */
414 int result; /* the system call's result */
415 int src_dst_p; /* Process slot number */
416 char *callname;
418 /* Check destination. RECEIVE is the only call that accepts ANY (in addition
419 * to a real endpoint). The other calls (SEND, SENDREC, and NOTIFY) require an
420 * endpoint to corresponds to a process. In addition, it is necessary to check
421 * whether a process is allowed to send to a given destination.
423 assert(call_nr != SENDA);
425 /* Only allow non-negative call_nr values less than 32 */
426 if (call_nr < 0 || call_nr > IPCNO_HIGHEST || call_nr >= 32
427 || !(callname = ipc_call_names[call_nr])) {
428 #if DEBUG_ENABLE_IPC_WARNINGS
429 printf("sys_call: trap %d not allowed, caller %d, src_dst %d\n",
430 call_nr, proc_nr(caller_ptr), src_dst_e);
431 #endif
432 return(ETRAPDENIED); /* trap denied by mask or kernel */
435 if (src_dst_e == ANY)
437 if (call_nr != RECEIVE)
439 #if 0
440 printf("sys_call: %s by %d with bad endpoint %d\n",
441 callname,
442 proc_nr(caller_ptr), src_dst_e);
443 #endif
444 return EINVAL;
446 src_dst_p = (int) src_dst_e;
448 else
450 /* Require a valid source and/or destination process. */
451 if(!isokendpt(src_dst_e, &src_dst_p)) {
452 #if 0
453 printf("sys_call: %s by %d with bad endpoint %d\n",
454 callname,
455 proc_nr(caller_ptr), src_dst_e);
456 #endif
457 return EDEADSRCDST;
460 /* If the call is to send to a process, i.e., for SEND, SENDNB,
461 * SENDREC or NOTIFY, verify that the caller is allowed to send to
462 * the given destination.
464 if (call_nr != RECEIVE)
466 if (!may_send_to(caller_ptr, src_dst_p)) {
467 #if DEBUG_ENABLE_IPC_WARNINGS
468 printf(
469 "sys_call: ipc mask denied %s from %d to %d\n",
470 callname,
471 caller_ptr->p_endpoint, src_dst_e);
472 #endif
473 return(ECALLDENIED); /* call denied by ipc mask */
478 /* Check if the process has privileges for the requested call. Calls to the
479 * kernel may only be SENDREC, because tasks always reply and may not block
480 * if the caller doesn't do receive().
482 if (!(priv(caller_ptr)->s_trap_mask & (1 << call_nr))) {
483 #if DEBUG_ENABLE_IPC_WARNINGS
484 printf("sys_call: %s not allowed, caller %d, src_dst %d\n",
485 callname, proc_nr(caller_ptr), src_dst_p);
486 #endif
487 return(ETRAPDENIED); /* trap denied by mask or kernel */
490 if (call_nr != SENDREC && call_nr != RECEIVE && iskerneln(src_dst_p)) {
491 #if DEBUG_ENABLE_IPC_WARNINGS
492 printf("sys_call: trap %s not allowed, caller %d, src_dst %d\n",
493 callname, proc_nr(caller_ptr), src_dst_e);
494 #endif
495 return(ETRAPDENIED); /* trap denied by mask or kernel */
498 switch(call_nr) {
499 case SENDREC:
500 /* A flag is set so that notifications cannot interrupt SENDREC. */
501 caller_ptr->p_misc_flags |= MF_REPLY_PEND;
502 /* fall through */
503 case SEND:
504 result = mini_send(caller_ptr, src_dst_e, m_ptr, 0);
505 if (call_nr == SEND || result != OK)
506 break; /* done, or SEND failed */
507 /* fall through for SENDREC */
508 case RECEIVE:
509 if (call_nr == RECEIVE) {
510 caller_ptr->p_misc_flags &= ~MF_REPLY_PEND;
511 IPC_STATUS_CLEAR(caller_ptr); /* clear IPC status code */
513 result = mini_receive(caller_ptr, src_dst_e, m_ptr, 0);
514 break;
515 case NOTIFY:
516 result = mini_notify(caller_ptr, src_dst_e);
517 break;
518 case SENDNB:
519 result = mini_send(caller_ptr, src_dst_e, m_ptr, NON_BLOCKING);
520 break;
521 default:
522 result = EBADCALL; /* illegal system call */
525 /* Now, return the result of the system call to the caller. */
526 return(result);
529 int do_ipc(reg_t r1, reg_t r2, reg_t r3)
531 struct proc *const caller_ptr = get_cpulocal_var(proc_ptr); /* get pointer to caller */
532 int call_nr = (int) r1;
534 assert(!RTS_ISSET(caller_ptr, RTS_SLOT_FREE));
536 /* bill kernel time to this process. */
537 kbill_ipc = caller_ptr;
539 /* If this process is subject to system call tracing, handle that first. */
540 if (caller_ptr->p_misc_flags & (MF_SC_TRACE | MF_SC_DEFER)) {
541 /* Are we tracing this process, and is it the first sys_call entry? */
542 if ((caller_ptr->p_misc_flags & (MF_SC_TRACE | MF_SC_DEFER)) ==
543 MF_SC_TRACE) {
544 /* We must notify the tracer before processing the actual
545 * system call. If we don't, the tracer could not obtain the
546 * input message. Postpone the entire system call.
548 caller_ptr->p_misc_flags &= ~MF_SC_TRACE;
549 assert(!(caller_ptr->p_misc_flags & MF_SC_DEFER));
550 caller_ptr->p_misc_flags |= MF_SC_DEFER;
551 caller_ptr->p_defer.r1 = r1;
552 caller_ptr->p_defer.r2 = r2;
553 caller_ptr->p_defer.r3 = r3;
555 /* Signal the "enter system call" event. Block the process. */
556 cause_sig(proc_nr(caller_ptr), SIGTRAP);
558 /* Preserve the return register's value. */
559 return caller_ptr->p_reg.retreg;
562 /* If the MF_SC_DEFER flag is set, the syscall is now being resumed. */
563 caller_ptr->p_misc_flags &= ~MF_SC_DEFER;
565 assert (!(caller_ptr->p_misc_flags & MF_SC_ACTIVE));
567 /* Set a flag to allow reliable tracing of leaving the system call. */
568 caller_ptr->p_misc_flags |= MF_SC_ACTIVE;
571 if(caller_ptr->p_misc_flags & MF_DELIVERMSG) {
572 panic("sys_call: MF_DELIVERMSG on for %s / %d\n",
573 caller_ptr->p_name, caller_ptr->p_endpoint);
576 /* Now check if the call is known and try to perform the request. The only
577 * system calls that exist in MINIX are sending and receiving messages.
578 * - SENDREC: combines SEND and RECEIVE in a single system call
579 * - SEND: sender blocks until its message has been delivered
580 * - RECEIVE: receiver blocks until an acceptable message has arrived
581 * - NOTIFY: asynchronous call; deliver notification or mark pending
582 * - SENDA: list of asynchronous send requests
584 switch(call_nr) {
585 case SENDREC:
586 case SEND:
587 case RECEIVE:
588 case NOTIFY:
589 case SENDNB:
591 /* Process accounting for scheduling */
592 caller_ptr->p_accounting.ipc_sync++;
594 return do_sync_ipc(caller_ptr, call_nr, (endpoint_t) r2,
595 (message *) r3);
597 case SENDA:
600 * Get and check the size of the argument in bytes as it is a
601 * table
603 size_t msg_size = (size_t) r2;
605 /* Process accounting for scheduling */
606 caller_ptr->p_accounting.ipc_async++;
608 /* Limit size to something reasonable. An arbitrary choice is 16
609 * times the number of process table entries.
611 if (msg_size > 16*(NR_TASKS + NR_PROCS))
612 return EDOM;
613 return mini_senda(caller_ptr, (asynmsg_t *) r3, msg_size);
615 case MINIX_KERNINFO:
617 /* It might not be initialized yet. */
618 if(!minix_kerninfo_user) {
619 return EBADCALL;
622 arch_set_secondary_ipc_return(caller_ptr, minix_kerninfo_user);
623 return OK;
625 default:
626 return EBADCALL; /* illegal system call */
630 /*===========================================================================*
631 * deadlock *
632 *===========================================================================*/
633 static int deadlock(function, cp, src_dst_e)
634 int function; /* trap number */
635 register struct proc *cp; /* pointer to caller */
636 endpoint_t src_dst_e; /* src or dst process */
638 /* Check for deadlock. This can happen if 'caller_ptr' and 'src_dst' have
639 * a cyclic dependency of blocking send and receive calls. The only cyclic
640 * depency that is not fatal is if the caller and target directly SEND(REC)
641 * and RECEIVE to each other. If a deadlock is found, the group size is
642 * returned. Otherwise zero is returned.
644 register struct proc *xp; /* process pointer */
645 int group_size = 1; /* start with only caller */
646 #if DEBUG_ENABLE_IPC_WARNINGS
647 static struct proc *processes[NR_PROCS + NR_TASKS];
648 processes[0] = cp;
649 #endif
651 while (src_dst_e != ANY) { /* check while process nr */
652 int src_dst_slot;
653 okendpt(src_dst_e, &src_dst_slot);
654 xp = proc_addr(src_dst_slot); /* follow chain of processes */
655 assert(proc_ptr_ok(xp));
656 assert(!RTS_ISSET(xp, RTS_SLOT_FREE));
657 #if DEBUG_ENABLE_IPC_WARNINGS
658 processes[group_size] = xp;
659 #endif
660 group_size ++; /* extra process in group */
662 /* Check whether the last process in the chain has a dependency. If it
663 * has not, the cycle cannot be closed and we are done.
665 if((src_dst_e = P_BLOCKEDON(xp)) == NONE)
666 return 0;
668 /* Now check if there is a cyclic dependency. For group sizes of two,
669 * a combination of SEND(REC) and RECEIVE is not fatal. Larger groups
670 * or other combinations indicate a deadlock.
672 if (src_dst_e == cp->p_endpoint) { /* possible deadlock */
673 if (group_size == 2) { /* caller and src_dst */
674 /* The function number is magically converted to flags. */
675 if ((xp->p_rts_flags ^ (function << 2)) & RTS_SENDING) {
676 return(0); /* not a deadlock */
679 #if DEBUG_ENABLE_IPC_WARNINGS
681 int i;
682 printf("deadlock between these processes:\n");
683 for(i = 0; i < group_size; i++) {
684 printf(" %10s ", processes[i]->p_name);
686 printf("\n\n");
687 for(i = 0; i < group_size; i++) {
688 print_proc(processes[i]);
689 proc_stacktrace(processes[i]);
692 #endif
693 return(group_size); /* deadlock found */
696 return(0); /* not a deadlock */
699 /*===========================================================================*
700 * has_pending *
701 *===========================================================================*/
702 static int has_pending(sys_map_t *map, int src_p, int asynm)
704 /* Check to see if there is a pending message from the desired source
705 * available.
708 int src_id;
709 sys_id_t id = NULL_PRIV_ID;
710 #ifdef CONFIG_SMP
711 struct proc * p;
712 #endif
714 /* Either check a specific bit in the mask map, or find the first bit set in
715 * it (if any), depending on whether the receive was called on a specific
716 * source endpoint.
718 if (src_p != ANY) {
719 src_id = nr_to_id(src_p);
720 if (get_sys_bit(*map, src_id)) {
721 #ifdef CONFIG_SMP
722 p = proc_addr(id_to_nr(src_id));
723 if (asynm && RTS_ISSET(p, RTS_VMINHIBIT))
724 p->p_misc_flags |= MF_SENDA_VM_MISS;
725 else
726 #endif
727 id = src_id;
729 } else {
730 /* Find a source with a pending message */
731 for (src_id = 0; src_id < NR_SYS_PROCS; src_id += BITCHUNK_BITS) {
732 if (get_sys_bits(*map, src_id) != 0) {
733 #ifdef CONFIG_SMP
734 while (src_id < NR_SYS_PROCS) {
735 while (!get_sys_bit(*map, src_id)) {
736 if (src_id == NR_SYS_PROCS)
737 goto quit_search;
738 src_id++;
740 p = proc_addr(id_to_nr(src_id));
742 * We must not let kernel fiddle with pages of a
743 * process which are currently being changed by
744 * VM. It is dangerous! So do not report such a
745 * process as having pending async messages.
746 * Skip it.
748 if (asynm && RTS_ISSET(p, RTS_VMINHIBIT)) {
749 p->p_misc_flags |= MF_SENDA_VM_MISS;
750 src_id++;
751 } else
752 goto quit_search;
754 #else
755 while (!get_sys_bit(*map, src_id)) src_id++;
756 goto quit_search;
757 #endif
761 quit_search:
762 if (src_id < NR_SYS_PROCS) /* Found one */
763 id = src_id;
766 return(id);
769 /*===========================================================================*
770 * has_pending_notify *
771 *===========================================================================*/
772 int has_pending_notify(struct proc * caller, int src_p)
774 sys_map_t * map = &priv(caller)->s_notify_pending;
775 return has_pending(map, src_p, 0);
778 /*===========================================================================*
779 * has_pending_asend *
780 *===========================================================================*/
781 int has_pending_asend(struct proc * caller, int src_p)
783 sys_map_t * map = &priv(caller)->s_asyn_pending;
784 return has_pending(map, src_p, 1);
787 /*===========================================================================*
788 * unset_notify_pending *
789 *===========================================================================*/
790 void unset_notify_pending(struct proc * caller, int src_p)
792 sys_map_t * map = &priv(caller)->s_notify_pending;
793 unset_sys_bit(*map, src_p);
796 /*===========================================================================*
797 * mini_send *
798 *===========================================================================*/
799 int mini_send(
800 register struct proc *caller_ptr, /* who is trying to send a message? */
801 endpoint_t dst_e, /* to whom is message being sent? */
802 message *m_ptr, /* pointer to message buffer */
803 const int flags
806 /* Send a message from 'caller_ptr' to 'dst'. If 'dst' is blocked waiting
807 * for this message, copy the message to it and unblock 'dst'. If 'dst' is
808 * not waiting at all, or is waiting for another source, queue 'caller_ptr'.
810 register struct proc *dst_ptr;
811 register struct proc **xpp;
812 int dst_p;
813 dst_p = _ENDPOINT_P(dst_e);
814 dst_ptr = proc_addr(dst_p);
816 if (RTS_ISSET(dst_ptr, RTS_NO_ENDPOINT))
818 return EDEADSRCDST;
821 /* Check if 'dst' is blocked waiting for this message. The destination's
822 * RTS_SENDING flag may be set when its SENDREC call blocked while sending.
824 if (WILLRECEIVE(dst_ptr, caller_ptr->p_endpoint)) {
825 int call;
826 /* Destination is indeed waiting for this message. */
827 assert(!(dst_ptr->p_misc_flags & MF_DELIVERMSG));
829 if (!(flags & FROM_KERNEL)) {
830 if(copy_msg_from_user(m_ptr, &dst_ptr->p_delivermsg))
831 return EFAULT;
832 } else {
833 dst_ptr->p_delivermsg = *m_ptr;
834 IPC_STATUS_ADD_FLAGS(dst_ptr, IPC_FLG_MSG_FROM_KERNEL);
837 dst_ptr->p_delivermsg.m_source = caller_ptr->p_endpoint;
838 dst_ptr->p_misc_flags |= MF_DELIVERMSG;
840 call = (caller_ptr->p_misc_flags & MF_REPLY_PEND ? SENDREC
841 : (flags & NON_BLOCKING ? SENDNB : SEND));
842 IPC_STATUS_ADD_CALL(dst_ptr, call);
844 if (dst_ptr->p_misc_flags & MF_REPLY_PEND)
845 dst_ptr->p_misc_flags &= ~MF_REPLY_PEND;
847 RTS_UNSET(dst_ptr, RTS_RECEIVING);
849 #if DEBUG_IPC_HOOK
850 hook_ipc_msgsend(&dst_ptr->p_delivermsg, caller_ptr, dst_ptr);
851 hook_ipc_msgrecv(&dst_ptr->p_delivermsg, caller_ptr, dst_ptr);
852 #endif
853 } else {
854 if(flags & NON_BLOCKING) {
855 return(ENOTREADY);
858 /* Check for a possible deadlock before actually blocking. */
859 if (deadlock(SEND, caller_ptr, dst_e)) {
860 return(ELOCKED);
863 /* Destination is not waiting. Block and dequeue caller. */
864 if (!(flags & FROM_KERNEL)) {
865 if(copy_msg_from_user(m_ptr, &caller_ptr->p_sendmsg))
866 return EFAULT;
867 } else {
868 caller_ptr->p_sendmsg = *m_ptr;
870 * we need to remember that this message is from kernel so we
871 * can set the delivery status flags when the message is
872 * actually delivered
874 caller_ptr->p_misc_flags |= MF_SENDING_FROM_KERNEL;
877 RTS_SET(caller_ptr, RTS_SENDING);
878 caller_ptr->p_sendto_e = dst_e;
880 /* Process is now blocked. Put in on the destination's queue. */
881 assert(caller_ptr->p_q_link == NULL);
882 xpp = &dst_ptr->p_caller_q; /* find end of list */
883 while (*xpp) xpp = &(*xpp)->p_q_link;
884 *xpp = caller_ptr; /* add caller to end */
886 #if DEBUG_IPC_HOOK
887 hook_ipc_msgsend(&caller_ptr->p_sendmsg, caller_ptr, dst_ptr);
888 #endif
890 return(OK);
893 /*===========================================================================*
894 * mini_receive *
895 *===========================================================================*/
896 static int mini_receive(struct proc * caller_ptr,
897 endpoint_t src_e, /* which message source is wanted */
898 message * m_buff_usr, /* pointer to message buffer */
899 const int flags)
901 /* A process or task wants to get a message. If a message is already queued,
902 * acquire it and deblock the sender. If no message from the desired source
903 * is available block the caller.
905 register struct proc **xpp;
906 int r, src_id, src_proc_nr, src_p;
908 assert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG));
910 /* This is where we want our message. */
911 caller_ptr->p_delivermsg_vir = (vir_bytes) m_buff_usr;
913 if(src_e == ANY) src_p = ANY;
914 else
916 okendpt(src_e, &src_p);
917 if (RTS_ISSET(proc_addr(src_p), RTS_NO_ENDPOINT))
919 return EDEADSRCDST;
924 /* Check to see if a message from desired source is already available. The
925 * caller's RTS_SENDING flag may be set if SENDREC couldn't send. If it is
926 * set, the process should be blocked.
928 if (!RTS_ISSET(caller_ptr, RTS_SENDING)) {
930 /* Check if there are pending notifications, except for SENDREC. */
931 if (! (caller_ptr->p_misc_flags & MF_REPLY_PEND)) {
933 /* Check for pending notifications */
934 if ((src_id = has_pending_notify(caller_ptr, src_p)) != NULL_PRIV_ID) {
935 endpoint_t hisep;
937 src_proc_nr = id_to_nr(src_id); /* get source proc */
938 #if DEBUG_ENABLE_IPC_WARNINGS
939 if(src_proc_nr == NONE) {
940 printf("mini_receive: sending notify from NONE\n");
942 #endif
943 assert(src_proc_nr != NONE);
944 unset_notify_pending(caller_ptr, src_id); /* no longer pending */
946 /* Found a suitable source, deliver the notification message. */
947 hisep = proc_addr(src_proc_nr)->p_endpoint;
948 assert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG));
949 assert(src_e == ANY || hisep == src_e);
951 /* assemble message */
952 BuildNotifyMessage(&caller_ptr->p_delivermsg, src_proc_nr, caller_ptr);
953 caller_ptr->p_delivermsg.m_source = hisep;
954 caller_ptr->p_misc_flags |= MF_DELIVERMSG;
956 IPC_STATUS_ADD_CALL(caller_ptr, NOTIFY);
958 goto receive_done;
962 /* Check for pending asynchronous messages */
963 if (has_pending_asend(caller_ptr, src_p) != NULL_PRIV_ID) {
964 if (src_p != ANY)
965 r = try_one(proc_addr(src_p), caller_ptr);
966 else
967 r = try_async(caller_ptr);
969 if (r == OK) {
970 IPC_STATUS_ADD_CALL(caller_ptr, SENDA);
971 goto receive_done;
975 /* Check caller queue. Use pointer pointers to keep code simple. */
976 xpp = &caller_ptr->p_caller_q;
977 while (*xpp) {
978 struct proc * sender = *xpp;
980 if (src_e == ANY || src_p == proc_nr(sender)) {
981 int call;
982 assert(!RTS_ISSET(sender, RTS_SLOT_FREE));
983 assert(!RTS_ISSET(sender, RTS_NO_ENDPOINT));
985 /* Found acceptable message. Copy it and update status. */
986 assert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG));
987 caller_ptr->p_delivermsg = sender->p_sendmsg;
988 caller_ptr->p_delivermsg.m_source = sender->p_endpoint;
989 caller_ptr->p_misc_flags |= MF_DELIVERMSG;
990 RTS_UNSET(sender, RTS_SENDING);
992 call = (sender->p_misc_flags & MF_REPLY_PEND ? SENDREC : SEND);
993 IPC_STATUS_ADD_CALL(caller_ptr, call);
996 * if the message is originaly from the kernel on behalf of this
997 * process, we must send the status flags accordingly
999 if (sender->p_misc_flags & MF_SENDING_FROM_KERNEL) {
1000 IPC_STATUS_ADD_FLAGS(caller_ptr, IPC_FLG_MSG_FROM_KERNEL);
1001 /* we can clean the flag now, not need anymore */
1002 sender->p_misc_flags &= ~MF_SENDING_FROM_KERNEL;
1004 if (sender->p_misc_flags & MF_SIG_DELAY)
1005 sig_delay_done(sender);
1007 #if DEBUG_IPC_HOOK
1008 hook_ipc_msgrecv(&caller_ptr->p_delivermsg, *xpp, caller_ptr);
1009 #endif
1011 *xpp = sender->p_q_link; /* remove from queue */
1012 sender->p_q_link = NULL;
1013 goto receive_done;
1015 xpp = &sender->p_q_link; /* proceed to next */
1019 /* No suitable message is available or the caller couldn't send in SENDREC.
1020 * Block the process trying to receive, unless the flags tell otherwise.
1022 if ( ! (flags & NON_BLOCKING)) {
1023 /* Check for a possible deadlock before actually blocking. */
1024 if (deadlock(RECEIVE, caller_ptr, src_e)) {
1025 return(ELOCKED);
1028 caller_ptr->p_getfrom_e = src_e;
1029 RTS_SET(caller_ptr, RTS_RECEIVING);
1030 return(OK);
1031 } else {
1032 return(ENOTREADY);
1035 receive_done:
1036 if (caller_ptr->p_misc_flags & MF_REPLY_PEND)
1037 caller_ptr->p_misc_flags &= ~MF_REPLY_PEND;
1038 return OK;
1041 /*===========================================================================*
1042 * mini_notify *
1043 *===========================================================================*/
1044 int mini_notify(
1045 const struct proc *caller_ptr, /* sender of the notification */
1046 endpoint_t dst_e /* which process to notify */
1049 register struct proc *dst_ptr;
1050 int src_id; /* source id for late delivery */
1051 int dst_p;
1053 if (!isokendpt(dst_e, &dst_p)) {
1054 util_stacktrace();
1055 printf("mini_notify: bogus endpoint %d\n", dst_e);
1056 return EDEADSRCDST;
1059 dst_ptr = proc_addr(dst_p);
1061 /* Check to see if target is blocked waiting for this message. A process
1062 * can be both sending and receiving during a SENDREC system call.
1064 if (WILLRECEIVE(dst_ptr, caller_ptr->p_endpoint) &&
1065 ! (dst_ptr->p_misc_flags & MF_REPLY_PEND)) {
1066 /* Destination is indeed waiting for a message. Assemble a notification
1067 * message and deliver it. Copy from pseudo-source HARDWARE, since the
1068 * message is in the kernel's address space.
1070 assert(!(dst_ptr->p_misc_flags & MF_DELIVERMSG));
1072 BuildNotifyMessage(&dst_ptr->p_delivermsg, proc_nr(caller_ptr), dst_ptr);
1073 dst_ptr->p_delivermsg.m_source = caller_ptr->p_endpoint;
1074 dst_ptr->p_misc_flags |= MF_DELIVERMSG;
1076 IPC_STATUS_ADD_CALL(dst_ptr, NOTIFY);
1077 RTS_UNSET(dst_ptr, RTS_RECEIVING);
1079 return(OK);
1082 /* Destination is not ready to receive the notification. Add it to the
1083 * bit map with pending notifications. Note the indirectness: the privilege id
1084 * instead of the process number is used in the pending bit map.
1086 src_id = priv(caller_ptr)->s_id;
1087 set_sys_bit(priv(dst_ptr)->s_notify_pending, src_id);
1088 return(OK);
1091 #define ASCOMPLAIN(caller, entry, field) \
1092 printf("kernel:%s:%d: asyn failed for %s in %s " \
1093 "(%d/%d, tab 0x%lx)\n",__FILE__,__LINE__, \
1094 field, caller->p_name, entry, priv(caller)->s_asynsize, priv(caller)->s_asyntab)
1096 #define A_RETR_FLD(entry, field) \
1097 if(data_copy(caller_ptr->p_endpoint, \
1098 table_v + (entry)*sizeof(asynmsg_t) + offsetof(struct asynmsg,field),\
1099 KERNEL, (vir_bytes) &tabent.field, \
1100 sizeof(tabent.field)) != OK) {\
1101 ASCOMPLAIN(caller_ptr, entry, #field); \
1102 r = EFAULT; \
1103 goto asyn_error; \
1106 #define A_RETR(entry) do { \
1107 if (data_copy( \
1108 caller_ptr->p_endpoint, table_v + (entry)*sizeof(asynmsg_t),\
1109 KERNEL, (vir_bytes) &tabent, \
1110 sizeof(tabent)) != OK) { \
1111 ASCOMPLAIN(caller_ptr, entry, "message entry"); \
1112 r = EFAULT; \
1113 goto asyn_error; \
1115 } while(0)
1117 #define A_INSRT_FLD(entry, field) \
1118 if(data_copy(KERNEL, (vir_bytes) &tabent.field, \
1119 caller_ptr->p_endpoint, \
1120 table_v + (entry)*sizeof(asynmsg_t) + offsetof(struct asynmsg,field),\
1121 sizeof(tabent.field)) != OK) {\
1122 ASCOMPLAIN(caller_ptr, entry, #field); \
1123 r = EFAULT; \
1124 goto asyn_error; \
1127 #define A_INSRT(entry) do { \
1128 if (data_copy(KERNEL, (vir_bytes) &tabent, \
1129 caller_ptr->p_endpoint, table_v + (entry)*sizeof(asynmsg_t),\
1130 sizeof(tabent)) != OK) { \
1131 ASCOMPLAIN(caller_ptr, entry, "message entry"); \
1132 r = EFAULT; \
1133 goto asyn_error; \
1135 } while(0)
1137 /*===========================================================================*
1138 * try_deliver_senda *
1139 *===========================================================================*/
1140 int try_deliver_senda(struct proc *caller_ptr,
1141 asynmsg_t *table,
1142 size_t size)
1144 int r, dst_p, done, do_notify;
1145 unsigned int i;
1146 unsigned flags;
1147 endpoint_t dst;
1148 struct proc *dst_ptr;
1149 struct priv *privp;
1150 asynmsg_t tabent;
1151 const vir_bytes table_v = (vir_bytes) table;
1153 privp = priv(caller_ptr);
1155 /* Clear table */
1156 privp->s_asyntab = -1;
1157 privp->s_asynsize = 0;
1159 if (size == 0) return(OK); /* Nothing to do, just return */
1161 /* Scan the table */
1162 do_notify = FALSE;
1163 done = TRUE;
1165 /* Limit size to something reasonable. An arbitrary choice is 16
1166 * times the number of process table entries.
1168 * (this check has been duplicated in sys_call but is left here
1169 * as a sanity check)
1171 if (size > 16*(NR_TASKS + NR_PROCS)) {
1172 r = EDOM;
1173 return r;
1176 for (i = 0; i < size; i++) {
1177 /* Process each entry in the table and store the result in the table.
1178 * If we're done handling a message, copy the result to the sender. */
1180 dst = NONE;
1181 /* Copy message to kernel */
1182 A_RETR(i);
1183 flags = tabent.flags;
1184 dst = tabent.dst;
1186 if (flags == 0) continue; /* Skip empty entries */
1188 /* 'flags' field must contain only valid bits */
1189 if(flags & ~(AMF_VALID|AMF_DONE|AMF_NOTIFY|AMF_NOREPLY|AMF_NOTIFY_ERR)) {
1190 r = EINVAL;
1191 goto asyn_error;
1193 if (!(flags & AMF_VALID)) { /* Must contain message */
1194 r = EINVAL;
1195 goto asyn_error;
1197 if (flags & AMF_DONE) continue; /* Already done processing */
1199 r = OK;
1200 if (!isokendpt(tabent.dst, &dst_p))
1201 r = EDEADSRCDST; /* Bad destination, report the error */
1202 else if (iskerneln(dst_p))
1203 r = ECALLDENIED; /* Asyn sends to the kernel are not allowed */
1204 else if (!may_send_to(caller_ptr, dst_p))
1205 r = ECALLDENIED; /* Send denied by IPC mask */
1206 else /* r == OK */
1207 dst_ptr = proc_addr(dst_p);
1209 /* XXX: RTS_NO_ENDPOINT should be removed */
1210 if (r == OK && RTS_ISSET(dst_ptr, RTS_NO_ENDPOINT)) {
1211 r = EDEADSRCDST;
1214 /* Check if 'dst' is blocked waiting for this message.
1215 * If AMF_NOREPLY is set, do not satisfy the receiving part of
1216 * a SENDREC.
1218 if (r == OK && WILLRECEIVE(dst_ptr, caller_ptr->p_endpoint) &&
1219 (!(flags&AMF_NOREPLY) || !(dst_ptr->p_misc_flags&MF_REPLY_PEND))) {
1220 /* Destination is indeed waiting for this message. */
1221 dst_ptr->p_delivermsg = tabent.msg;
1222 dst_ptr->p_delivermsg.m_source = caller_ptr->p_endpoint;
1223 dst_ptr->p_misc_flags |= MF_DELIVERMSG;
1224 IPC_STATUS_ADD_CALL(dst_ptr, SENDA);
1225 RTS_UNSET(dst_ptr, RTS_RECEIVING);
1226 } else if (r == OK) {
1227 /* Inform receiver that something is pending */
1228 set_sys_bit(priv(dst_ptr)->s_asyn_pending,
1229 priv(caller_ptr)->s_id);
1230 done = FALSE;
1231 continue;
1234 /* Store results */
1235 tabent.result = r;
1236 tabent.flags = flags | AMF_DONE;
1237 if (flags & AMF_NOTIFY)
1238 do_notify = TRUE;
1239 else if (r != OK && (flags & AMF_NOTIFY_ERR))
1240 do_notify = TRUE;
1241 A_INSRT(i); /* Copy results to caller */
1242 continue;
1244 asyn_error:
1245 if (dst != NONE)
1246 printf("KERNEL senda error %d to %d\n", r, dst);
1247 else
1248 printf("KERNEL senda error %d\n", r);
1251 if (do_notify)
1252 mini_notify(proc_addr(ASYNCM), caller_ptr->p_endpoint);
1254 if (!done) {
1255 privp->s_asyntab = (vir_bytes) table;
1256 privp->s_asynsize = size;
1259 return(OK);
1262 /*===========================================================================*
1263 * mini_senda *
1264 *===========================================================================*/
1265 static int mini_senda(struct proc *caller_ptr, asynmsg_t *table, size_t size)
1267 struct priv *privp;
1269 privp = priv(caller_ptr);
1270 if (!(privp->s_flags & SYS_PROC)) {
1271 printf( "mini_senda: warning caller has no privilege structure\n");
1272 return(EPERM);
1275 return try_deliver_senda(caller_ptr, table, size);
1279 /*===========================================================================*
1280 * try_async *
1281 *===========================================================================*/
1282 static int try_async(caller_ptr)
1283 struct proc *caller_ptr;
1285 int r;
1286 struct priv *privp;
1287 struct proc *src_ptr;
1288 sys_map_t *map;
1290 map = &priv(caller_ptr)->s_asyn_pending;
1292 /* Try all privilege structures */
1293 for (privp = BEG_PRIV_ADDR; privp < END_PRIV_ADDR; ++privp) {
1294 if (privp->s_proc_nr == NONE)
1295 continue;
1297 if (!get_sys_bit(*map, privp->s_id))
1298 continue;
1300 src_ptr = proc_addr(privp->s_proc_nr);
1302 #ifdef CONFIG_SMP
1304 * Do not copy from a process which does not have a stable address space
1305 * due to VM fiddling with it
1307 if (RTS_ISSET(src_ptr, RTS_VMINHIBIT)) {
1308 src_ptr->p_misc_flags |= MF_SENDA_VM_MISS;
1309 continue;
1311 #endif
1313 assert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG));
1314 if ((r = try_one(src_ptr, caller_ptr)) == OK)
1315 return(r);
1318 return(ESRCH);
1322 /*===========================================================================*
1323 * try_one *
1324 *===========================================================================*/
1325 static int try_one(struct proc *src_ptr, struct proc *dst_ptr)
1327 /* Try to receive an asynchronous message from 'src_ptr' */
1328 int r = EAGAIN, done, do_notify;
1329 unsigned int flags, i;
1330 size_t size;
1331 endpoint_t dst;
1332 struct proc *caller_ptr;
1333 struct priv *privp;
1334 asynmsg_t tabent;
1335 vir_bytes table_v;
1337 privp = priv(src_ptr);
1338 if (!(privp->s_flags & SYS_PROC)) return(EPERM);
1339 size = privp->s_asynsize;
1340 table_v = privp->s_asyntab;
1342 /* Clear table pending message flag. We're done unless we're not. */
1343 unset_sys_bit(priv(dst_ptr)->s_asyn_pending, privp->s_id);
1345 if (size == 0) return(EAGAIN);
1346 if (!may_send_to(src_ptr, proc_nr(dst_ptr))) return(ECALLDENIED);
1348 caller_ptr = src_ptr; /* Needed for A_ macros later on */
1350 /* Scan the table */
1351 do_notify = FALSE;
1352 done = TRUE;
1354 for (i = 0; i < size; i++) {
1355 /* Process each entry in the table and store the result in the table.
1356 * If we're done handling a message, copy the result to the sender.
1357 * Some checks done in mini_senda are duplicated here, as the sender
1358 * could've altered the contents of the table in the meantime.
1361 /* Copy message to kernel */
1362 A_RETR(i);
1363 flags = tabent.flags;
1364 dst = tabent.dst;
1366 if (flags == 0) continue; /* Skip empty entries */
1368 /* 'flags' field must contain only valid bits */
1369 if(flags & ~(AMF_VALID|AMF_DONE|AMF_NOTIFY|AMF_NOREPLY|AMF_NOTIFY_ERR))
1370 r = EINVAL;
1371 else if (!(flags & AMF_VALID)) /* Must contain message */
1372 r = EINVAL;
1373 else if (flags & AMF_DONE) continue; /* Already done processing */
1375 /* Clear done flag. The sender is done sending when all messages in the
1376 * table are marked done or empty. However, we will know that only
1377 * the next time we enter this function or when the sender decides to
1378 * send additional asynchronous messages and manages to deliver them
1379 * all.
1381 done = FALSE;
1383 if (r == EINVAL)
1384 goto store_result;
1386 /* Message must be directed at receiving end */
1387 if (dst != dst_ptr->p_endpoint) continue;
1389 /* If AMF_NOREPLY is set, then this message is not a reply to a
1390 * SENDREC and thus should not satisfy the receiving part of the
1391 * SENDREC. This message is to be delivered later.
1393 if ((flags & AMF_NOREPLY) && (dst_ptr->p_misc_flags & MF_REPLY_PEND))
1394 continue;
1396 /* Destination is ready to receive the message; deliver it */
1397 r = OK;
1398 dst_ptr->p_delivermsg = tabent.msg;
1399 dst_ptr->p_delivermsg.m_source = src_ptr->p_endpoint;
1400 dst_ptr->p_misc_flags |= MF_DELIVERMSG;
1402 store_result:
1403 /* Store results for sender */
1404 tabent.result = r;
1405 tabent.flags = flags | AMF_DONE;
1406 if (flags & AMF_NOTIFY) do_notify = TRUE;
1407 else if (r != OK && (flags & AMF_NOTIFY_ERR)) do_notify = TRUE;
1408 A_INSRT(i); /* Copy results to sender */
1410 break;
1413 if (do_notify)
1414 mini_notify(proc_addr(ASYNCM), src_ptr->p_endpoint);
1416 if (done) {
1417 privp->s_asyntab = -1;
1418 privp->s_asynsize = 0;
1419 } else {
1420 set_sys_bit(priv(dst_ptr)->s_asyn_pending, privp->s_id);
1423 asyn_error:
1424 return(r);
1427 /*===========================================================================*
1428 * cancel_async *
1429 *===========================================================================*/
1430 int cancel_async(struct proc *src_ptr, struct proc *dst_ptr)
1432 /* Cancel asynchronous messages from src to dst, because dst is not interested
1433 * in them (e.g., dst has been restarted) */
1434 int done, do_notify;
1435 unsigned int flags, i;
1436 size_t size;
1437 endpoint_t dst;
1438 struct proc *caller_ptr;
1439 struct priv *privp;
1440 asynmsg_t tabent;
1441 vir_bytes table_v;
1443 privp = priv(src_ptr);
1444 if (!(privp->s_flags & SYS_PROC)) return(EPERM);
1445 size = privp->s_asynsize;
1446 table_v = privp->s_asyntab;
1448 /* Clear table pending message flag. We're done unless we're not. */
1449 privp->s_asyntab = -1;
1450 privp->s_asynsize = 0;
1451 unset_sys_bit(priv(dst_ptr)->s_asyn_pending, privp->s_id);
1453 if (size == 0) return(EAGAIN);
1454 if (!may_send_to(src_ptr, proc_nr(dst_ptr))) return(ECALLDENIED);
1456 caller_ptr = src_ptr; /* Needed for A_ macros later on */
1458 /* Scan the table */
1459 do_notify = FALSE;
1460 done = TRUE;
1463 for (i = 0; i < size; i++) {
1464 /* Process each entry in the table and store the result in the table.
1465 * If we're done handling a message, copy the result to the sender.
1466 * Some checks done in mini_senda are duplicated here, as the sender
1467 * could've altered the contents of the table in the mean time.
1470 int r = EDEADSRCDST; /* Cancel delivery due to dead dst */
1472 /* Copy message to kernel */
1473 A_RETR(i);
1474 flags = tabent.flags;
1475 dst = tabent.dst;
1477 if (flags == 0) continue; /* Skip empty entries */
1479 /* 'flags' field must contain only valid bits */
1480 if(flags & ~(AMF_VALID|AMF_DONE|AMF_NOTIFY|AMF_NOREPLY|AMF_NOTIFY_ERR))
1481 r = EINVAL;
1482 else if (!(flags & AMF_VALID)) /* Must contain message */
1483 r = EINVAL;
1484 else if (flags & AMF_DONE) continue; /* Already done processing */
1486 /* Message must be directed at receiving end */
1487 if (dst != dst_ptr->p_endpoint) {
1488 done = FALSE;
1489 continue;
1492 /* Store results for sender */
1493 tabent.result = r;
1494 tabent.flags = flags | AMF_DONE;
1495 if (flags & AMF_NOTIFY) do_notify = TRUE;
1496 else if (r != OK && (flags & AMF_NOTIFY_ERR)) do_notify = TRUE;
1497 A_INSRT(i); /* Copy results to sender */
1500 if (do_notify)
1501 mini_notify(proc_addr(ASYNCM), src_ptr->p_endpoint);
1503 if (!done) {
1504 privp->s_asyntab = table_v;
1505 privp->s_asynsize = size;
1508 asyn_error:
1509 return(OK);
1512 /*===========================================================================*
1513 * enqueue *
1514 *===========================================================================*/
1515 void enqueue(
1516 register struct proc *rp /* this process is now runnable */
1519 /* Add 'rp' to one of the queues of runnable processes. This function is
1520 * responsible for inserting a process into one of the scheduling queues.
1521 * The mechanism is implemented here. The actual scheduling policy is
1522 * defined in sched() and pick_proc().
1524 * This function can be used x-cpu as it always uses the queues of the cpu the
1525 * process is assigned to.
1527 int q = rp->p_priority; /* scheduling queue to use */
1528 struct proc **rdy_head, **rdy_tail;
1530 assert(proc_is_runnable(rp));
1532 assert(q >= 0);
1534 rdy_head = get_cpu_var(rp->p_cpu, run_q_head);
1535 rdy_tail = get_cpu_var(rp->p_cpu, run_q_tail);
1537 /* Now add the process to the queue. */
1538 if (!rdy_head[q]) { /* add to empty queue */
1539 rdy_head[q] = rdy_tail[q] = rp; /* create a new queue */
1540 rp->p_nextready = NULL; /* mark new end */
1542 else { /* add to tail of queue */
1543 rdy_tail[q]->p_nextready = rp; /* chain tail of queue */
1544 rdy_tail[q] = rp; /* set new queue tail */
1545 rp->p_nextready = NULL; /* mark new end */
1548 if (cpuid == rp->p_cpu) {
1550 * enqueueing a process with a higher priority than the current one,
1551 * it gets preempted. The current process must be preemptible. Testing
1552 * the priority also makes sure that a process does not preempt itself
1554 struct proc * p;
1555 p = get_cpulocal_var(proc_ptr);
1556 assert(p);
1557 if((p->p_priority > rp->p_priority) &&
1558 (priv(p)->s_flags & PREEMPTIBLE))
1559 RTS_SET(p, RTS_PREEMPTED); /* calls dequeue() */
1561 #ifdef CONFIG_SMP
1563 * if the process was enqueued on a different cpu and the cpu is idle, i.e.
1564 * the time is off, we need to wake up that cpu and let it schedule this new
1565 * process
1567 else if (get_cpu_var(rp->p_cpu, cpu_is_idle)) {
1568 smp_schedule(rp->p_cpu);
1570 #endif
1572 /* Make note of when this process was added to queue */
1573 read_tsc_64(&(get_cpulocal_var(proc_ptr)->p_accounting.enter_queue));
1576 #if DEBUG_SANITYCHECKS
1577 assert(runqueues_ok_local());
1578 #endif
1581 /*===========================================================================*
1582 * enqueue_head *
1583 *===========================================================================*/
1585 * put a process at the front of its run queue. It comes handy when a process is
1586 * preempted and removed from run queue to not to have a currently not-runnable
1587 * process on a run queue. We have to put this process back at the fron to be
1588 * fair
1590 static void enqueue_head(struct proc *rp)
1592 const int q = rp->p_priority; /* scheduling queue to use */
1594 struct proc **rdy_head, **rdy_tail;
1596 assert(proc_ptr_ok(rp));
1597 assert(proc_is_runnable(rp));
1600 * the process was runnable without its quantum expired when dequeued. A
1601 * process with no time left should vahe been handled else and differently
1603 assert(!is_zero64(rp->p_cpu_time_left));
1605 assert(q >= 0);
1608 rdy_head = get_cpu_var(rp->p_cpu, run_q_head);
1609 rdy_tail = get_cpu_var(rp->p_cpu, run_q_tail);
1611 /* Now add the process to the queue. */
1612 if (!rdy_head[q]) { /* add to empty queue */
1613 rdy_head[q] = rdy_tail[q] = rp; /* create a new queue */
1614 rp->p_nextready = NULL; /* mark new end */
1616 else /* add to head of queue */
1617 rp->p_nextready = rdy_head[q]; /* chain head of queue */
1618 rdy_head[q] = rp; /* set new queue head */
1620 /* Make note of when this process was added to queue */
1621 read_tsc_64(&(get_cpulocal_var(proc_ptr->p_accounting.enter_queue)));
1624 /* Process accounting for scheduling */
1625 rp->p_accounting.dequeues--;
1626 rp->p_accounting.preempted++;
1628 #if DEBUG_SANITYCHECKS
1629 assert(runqueues_ok_local());
1630 #endif
1633 /*===========================================================================*
1634 * dequeue *
1635 *===========================================================================*/
1636 void dequeue(struct proc *rp)
1637 /* this process is no longer runnable */
1639 /* A process must be removed from the scheduling queues, for example, because
1640 * it has blocked. If the currently active process is removed, a new process
1641 * is picked to run by calling pick_proc().
1643 * This function can operate x-cpu as it always removes the process from the
1644 * queue of the cpu the process is currently assigned to.
1646 int q = rp->p_priority; /* queue to use */
1647 struct proc **xpp; /* iterate over queue */
1648 struct proc *prev_xp;
1649 u64_t tsc, tsc_delta;
1651 struct proc **rdy_tail;
1653 assert(proc_ptr_ok(rp));
1654 assert(!proc_is_runnable(rp));
1656 /* Side-effect for kernel: check if the task's stack still is ok? */
1657 assert (!iskernelp(rp) || *priv(rp)->s_stack_guard == STACK_GUARD);
1659 rdy_tail = get_cpu_var(rp->p_cpu, run_q_tail);
1661 /* Now make sure that the process is not in its ready queue. Remove the
1662 * process if it is found. A process can be made unready even if it is not
1663 * running by being sent a signal that kills it.
1665 prev_xp = NULL;
1666 for (xpp = get_cpu_var_ptr(rp->p_cpu, run_q_head[q]); *xpp;
1667 xpp = &(*xpp)->p_nextready) {
1668 if (*xpp == rp) { /* found process to remove */
1669 *xpp = (*xpp)->p_nextready; /* replace with next chain */
1670 if (rp == rdy_tail[q]) { /* queue tail removed */
1671 rdy_tail[q] = prev_xp; /* set new tail */
1674 break;
1676 prev_xp = *xpp; /* save previous in chain */
1680 /* Process accounting for scheduling */
1681 rp->p_accounting.dequeues++;
1683 /* this is not all that accurate on virtual machines, especially with
1684 IO bound processes that only spend a short amount of time in the queue
1685 at a time. */
1686 if (!is_zero64(rp->p_accounting.enter_queue)) {
1687 read_tsc_64(&tsc);
1688 tsc_delta = sub64(tsc, rp->p_accounting.enter_queue);
1689 rp->p_accounting.time_in_queue = add64(rp->p_accounting.time_in_queue,
1690 tsc_delta);
1691 make_zero64(rp->p_accounting.enter_queue);
1695 #if DEBUG_SANITYCHECKS
1696 assert(runqueues_ok_local());
1697 #endif
1700 /*===========================================================================*
1701 * pick_proc *
1702 *===========================================================================*/
1703 static struct proc * pick_proc(void)
1705 /* Decide who to run now. A new process is selected an returned.
1706 * When a billable process is selected, record it in 'bill_ptr', so that the
1707 * clock task can tell who to bill for system time.
1709 * This function always uses the run queues of the local cpu!
1711 register struct proc *rp; /* process to run */
1712 struct proc **rdy_head;
1713 int q; /* iterate over queues */
1715 /* Check each of the scheduling queues for ready processes. The number of
1716 * queues is defined in proc.h, and priorities are set in the task table.
1717 * If there are no processes ready to run, return NULL.
1719 rdy_head = get_cpulocal_var(run_q_head);
1720 for (q=0; q < NR_SCHED_QUEUES; q++) {
1721 if(!(rp = rdy_head[q])) {
1722 TRACE(VF_PICKPROC, printf("cpu %d queue %d empty\n", cpuid, q););
1723 continue;
1725 assert(proc_is_runnable(rp));
1726 if (priv(rp)->s_flags & BILLABLE)
1727 get_cpulocal_var(bill_ptr) = rp; /* bill for system time */
1728 return rp;
1730 return NULL;
1733 /*===========================================================================*
1734 * endpoint_lookup *
1735 *===========================================================================*/
1736 struct proc *endpoint_lookup(endpoint_t e)
1738 int n;
1740 if(!isokendpt(e, &n)) return NULL;
1742 return proc_addr(n);
1745 /*===========================================================================*
1746 * isokendpt_f *
1747 *===========================================================================*/
1748 #if DEBUG_ENABLE_IPC_WARNINGS
1749 int isokendpt_f(file, line, e, p, fatalflag)
1750 const char *file;
1751 int line;
1752 #else
1753 int isokendpt_f(e, p, fatalflag)
1754 #endif
1755 endpoint_t e;
1756 int *p;
1757 const int fatalflag;
1759 int ok = 0;
1760 /* Convert an endpoint number into a process number.
1761 * Return nonzero if the process is alive with the corresponding
1762 * generation number, zero otherwise.
1764 * This function is called with file and line number by the
1765 * isokendpt_d macro if DEBUG_ENABLE_IPC_WARNINGS is defined,
1766 * otherwise without. This allows us to print the where the
1767 * conversion was attempted, making the errors verbose without
1768 * adding code for that at every call.
1770 * If fatalflag is nonzero, we must panic if the conversion doesn't
1771 * succeed.
1773 *p = _ENDPOINT_P(e);
1774 ok = 0;
1775 if(isokprocn(*p) && !isemptyn(*p) && proc_addr(*p)->p_endpoint == e)
1776 ok = 1;
1777 if(!ok && fatalflag)
1778 panic("invalid endpoint: %d", e);
1779 return ok;
1782 static void notify_scheduler(struct proc *p)
1784 message m_no_quantum;
1785 int err;
1787 assert(!proc_kernel_scheduler(p));
1789 /* dequeue the process */
1790 RTS_SET(p, RTS_NO_QUANTUM);
1792 * Notify the process's scheduler that it has run out of
1793 * quantum. This is done by sending a message to the scheduler
1794 * on the process's behalf
1796 m_no_quantum.m_source = p->p_endpoint;
1797 m_no_quantum.m_type = SCHEDULING_NO_QUANTUM;
1798 m_no_quantum.SCHEDULING_ACNT_QUEUE = cpu_time_2_ms(p->p_accounting.time_in_queue);
1799 m_no_quantum.SCHEDULING_ACNT_DEQS = p->p_accounting.dequeues;
1800 m_no_quantum.SCHEDULING_ACNT_IPC_SYNC = p->p_accounting.ipc_sync;
1801 m_no_quantum.SCHEDULING_ACNT_IPC_ASYNC = p->p_accounting.ipc_async;
1802 m_no_quantum.SCHEDULING_ACNT_PREEMPT = p->p_accounting.preempted;
1803 m_no_quantum.SCHEDULING_ACNT_CPU = cpuid;
1804 m_no_quantum.SCHEDULING_ACNT_CPU_LOAD = cpu_load();
1806 /* Reset accounting */
1807 reset_proc_accounting(p);
1809 if ((err = mini_send(p, p->p_scheduler->p_endpoint,
1810 &m_no_quantum, FROM_KERNEL))) {
1811 panic("WARNING: Scheduling: mini_send returned %d\n", err);
1815 void proc_no_time(struct proc * p)
1817 if (!proc_kernel_scheduler(p) && priv(p)->s_flags & PREEMPTIBLE) {
1818 /* this dequeues the process */
1819 notify_scheduler(p);
1821 else {
1823 * non-preemptible processes only need their quantum to
1824 * be renewed. In fact, they by pass scheduling
1826 p->p_cpu_time_left = ms_2_cpu_time(p->p_quantum_size_ms);
1827 #if DEBUG_RACE
1828 RTS_SET(p, RTS_PREEMPTED);
1829 RTS_UNSET(p, RTS_PREEMPTED);
1830 #endif
1834 void reset_proc_accounting(struct proc *p)
1836 p->p_accounting.preempted = 0;
1837 p->p_accounting.ipc_sync = 0;
1838 p->p_accounting.ipc_async = 0;
1839 p->p_accounting.dequeues = 0;
1840 make_zero64(p->p_accounting.time_in_queue);
1841 make_zero64(p->p_accounting.enter_queue);
1844 void copr_not_available_handler(void)
1846 struct proc * p;
1847 struct proc ** local_fpu_owner;
1849 * Disable the FPU exception (both for the kernel and for the process
1850 * once it's scheduled), and initialize or restore the FPU state.
1853 disable_fpu_exception();
1855 p = get_cpulocal_var(proc_ptr);
1857 /* if FPU is not owned by anyone, do not store anything */
1858 local_fpu_owner = get_cpulocal_var_ptr(fpu_owner);
1859 if (*local_fpu_owner != NULL) {
1860 assert(*local_fpu_owner != p);
1861 save_local_fpu(*local_fpu_owner, FALSE /*retain*/);
1865 * restore the current process' state and let it run again, do not
1866 * schedule!
1868 if (restore_fpu(p) != OK) {
1869 /* Restoring FPU state failed. This is always the process's own
1870 * fault. Send a signal, and schedule another process instead.
1872 *local_fpu_owner = NULL; /* release FPU */
1873 cause_sig(proc_nr(p), SIGFPE);
1874 return;
1877 *local_fpu_owner = p;
1878 context_stop(proc_addr(KERNEL));
1879 restore_user_context(p);
1880 NOT_REACHABLE;
1883 void release_fpu(struct proc * p) {
1884 struct proc ** fpu_owner_ptr;
1886 fpu_owner_ptr = get_cpu_var_ptr(p->p_cpu, fpu_owner);
1888 if (*fpu_owner_ptr == p)
1889 *fpu_owner_ptr = NULL;