<sys/socket.h>: turn off MSG_NOSIGNAL
[minix3.git] / kernel / proc.c
blob3dff67ca562e4c66b6415397121e33be17b4930c
1 /* This file contains essentially all of the process and message handling.
2 * Together with "mpx.s" it forms the lowest layer of the MINIX kernel.
3 * There is one entry point from the outside:
5 * sys_call: a system call, i.e., the kernel is trapped with an INT
7 * Changes:
8 * Aug 19, 2005 rewrote scheduling code (Jorrit N. Herder)
9 * Jul 25, 2005 rewrote system call handling (Jorrit N. Herder)
10 * May 26, 2005 rewrote message passing functions (Jorrit N. Herder)
11 * May 24, 2005 new notification system call (Jorrit N. Herder)
12 * Oct 28, 2004 nonblocking send and receive calls (Jorrit N. Herder)
14 * The code here is critical to make everything work and is important for the
15 * overall performance of the system. A large fraction of the code deals with
16 * list manipulation. To make this both easy to understand and fast to execute
17 * pointer pointers are used throughout the code. Pointer pointers prevent
18 * exceptions for the head or tail of a linked list.
20 * node_t *queue, *new_node; // assume these as global variables
21 * node_t **xpp = &queue; // get pointer pointer to head of queue
22 * while (*xpp != NULL) // find last pointer of the linked list
23 * xpp = &(*xpp)->next; // get pointer to next pointer
24 * *xpp = new_node; // now replace the end (the NULL pointer)
25 * new_node->next = NULL; // and mark the new end of the list
27 * For example, when adding a new node to the end of the list, one normally
28 * makes an exception for an empty list and looks up the end of the list for
29 * nonempty lists. As shown above, this is not required with pointer pointers.
32 #include <minix/com.h>
33 #include <minix/ipcconst.h>
34 #include <stddef.h>
35 #include <signal.h>
36 #include <assert.h>
37 #include <string.h>
39 #include "kernel/kernel.h"
40 #include "vm.h"
41 #include "clock.h"
42 #include "spinlock.h"
43 #include "arch_proto.h"
45 #include <minix/syslib.h>
47 /* Scheduling and message passing functions */
48 static void idle(void);
49 /**
50 * Made public for use in clock.c (for user-space scheduling)
51 static int mini_send(struct proc *caller_ptr, endpoint_t dst_e, message
52 *m_ptr, int flags);
54 static int mini_receive(struct proc *caller_ptr, endpoint_t src,
55 message *m_ptr, int flags);
56 static int mini_senda(struct proc *caller_ptr, asynmsg_t *table, size_t
57 size);
58 static int deadlock(int function, register struct proc *caller,
59 endpoint_t src_dst_e);
60 static int try_async(struct proc *caller_ptr);
61 static int try_one(struct proc *src_ptr, struct proc *dst_ptr);
62 static struct proc * pick_proc(void);
63 static void enqueue_head(struct proc *rp);
65 /* all idles share the same idle_priv structure */
66 static struct priv idle_priv;
68 static void set_idle_name(char * name, int n)
70 int i, c;
71 int p_z = 0;
73 if (n > 999)
74 n = 999;
76 name[0] = 'i';
77 name[1] = 'd';
78 name[2] = 'l';
79 name[3] = 'e';
81 for (i = 4, c = 100; c > 0; c /= 10) {
82 int digit;
84 digit = n / c;
85 n -= digit * c;
87 if (p_z || digit != 0 || c == 1) {
88 p_z = 1;
89 name[i++] = '0' + digit;
93 name[i] = '\0';
98 #define PICK_ANY 1
99 #define PICK_HIGHERONLY 2
101 #define BuildNotifyMessage(m_ptr, src, dst_ptr) \
102 memset((m_ptr), 0, sizeof(*(m_ptr))); \
103 (m_ptr)->m_type = NOTIFY_MESSAGE; \
104 (m_ptr)->m_notify.timestamp = get_monotonic(); \
105 switch (src) { \
106 case HARDWARE: \
107 (m_ptr)->m_notify.interrupts = \
108 priv(dst_ptr)->s_int_pending; \
109 priv(dst_ptr)->s_int_pending = 0; \
110 break; \
111 case SYSTEM: \
112 memcpy(&(m_ptr)->m_notify.sigset, \
113 &priv(dst_ptr)->s_sig_pending, \
114 sizeof(sigset_t)); \
115 sigemptyset(&priv(dst_ptr)->s_sig_pending); \
116 break; \
119 void proc_init(void)
121 struct proc * rp;
122 struct priv *sp;
123 int i;
125 /* Clear the process table. Announce each slot as empty and set up
126 * mappings for proc_addr() and proc_nr() macros. Do the same for the
127 * table with privilege structures for the system processes.
129 for (rp = BEG_PROC_ADDR, i = -NR_TASKS; rp < END_PROC_ADDR; ++rp, ++i) {
130 rp->p_rts_flags = RTS_SLOT_FREE;/* initialize free slot */
131 rp->p_magic = PMAGIC;
132 rp->p_nr = i; /* proc number from ptr */
133 rp->p_endpoint = _ENDPOINT(0, rp->p_nr); /* generation no. 0 */
134 rp->p_scheduler = NULL; /* no user space scheduler */
135 rp->p_priority = 0; /* no priority */
136 rp->p_quantum_size_ms = 0; /* no quantum size */
138 /* arch-specific initialization */
139 arch_proc_reset(rp);
141 for (sp = BEG_PRIV_ADDR, i = 0; sp < END_PRIV_ADDR; ++sp, ++i) {
142 sp->s_proc_nr = NONE; /* initialize as free */
143 sp->s_id = (sys_id_t) i; /* priv structure index */
144 ppriv_addr[i] = sp; /* priv ptr from number */
145 sp->s_sig_mgr = NONE; /* clear signal managers */
146 sp->s_bak_sig_mgr = NONE;
149 idle_priv.s_flags = IDL_F;
150 /* initialize IDLE structures for every CPU */
151 for (i = 0; i < CONFIG_MAX_CPUS; i++) {
152 struct proc * ip = get_cpu_var_ptr(i, idle_proc);
153 ip->p_endpoint = IDLE;
154 ip->p_priv = &idle_priv;
155 /* must not let idle ever get scheduled */
156 ip->p_rts_flags |= RTS_PROC_STOP;
157 set_idle_name(ip->p_name, i);
161 static void switch_address_space_idle(void)
163 #ifdef CONFIG_SMP
165 * currently we bet that VM is always alive and its pages available so
166 * when the CPU wakes up the kernel is mapped and no surprises happen.
167 * This is only a problem if more than 1 cpus are available
169 switch_address_space(proc_addr(VM_PROC_NR));
170 #endif
173 /*===========================================================================*
174 * idle *
175 *===========================================================================*/
176 static void idle(void)
178 struct proc * p;
180 /* This function is called whenever there is no work to do.
181 * Halt the CPU, and measure how many timestamp counter ticks are
182 * spent not doing anything. This allows test setups to measure
183 * the CPU utilization of certain workloads with high precision.
186 p = get_cpulocal_var(proc_ptr) = get_cpulocal_var_ptr(idle_proc);
187 if (priv(p)->s_flags & BILLABLE)
188 get_cpulocal_var(bill_ptr) = p;
190 switch_address_space_idle();
192 #ifdef CONFIG_SMP
193 get_cpulocal_var(cpu_is_idle) = 1;
194 /* we don't need to keep time on APs as it is handled on the BSP */
195 if (cpuid != bsp_cpu_id)
196 stop_local_timer();
197 else
198 #endif
201 * If the timer has expired while in kernel we must
202 * rearm it before we go to sleep
204 restart_local_timer();
207 /* start accounting for the idle time */
208 context_stop(proc_addr(KERNEL));
209 #if !SPROFILE
210 halt_cpu();
211 #else
212 if (!sprofiling)
213 halt_cpu();
214 else {
215 volatile int * v;
217 v = get_cpulocal_var_ptr(idle_interrupted);
218 interrupts_enable();
219 while (!*v)
220 arch_pause();
221 interrupts_disable();
222 *v = 0;
224 #endif
226 * end of accounting for the idle task does not happen here, the kernel
227 * is handling stuff for quite a while before it gets back here!
231 /*===========================================================================*
232 * switch_to_user *
233 *===========================================================================*/
234 void switch_to_user(void)
236 /* This function is called an instant before proc_ptr is
237 * to be scheduled again.
239 struct proc * p;
240 #ifdef CONFIG_SMP
241 int tlb_must_refresh = 0;
242 #endif
244 p = get_cpulocal_var(proc_ptr);
246 * if the current process is still runnable check the misc flags and let
247 * it run unless it becomes not runnable in the meantime
249 if (proc_is_runnable(p))
250 goto check_misc_flags;
252 * if a process becomes not runnable while handling the misc flags, we
253 * need to pick a new one here and start from scratch. Also if the
254 * current process wasn't runnable, we pick a new one here
256 not_runnable_pick_new:
257 if (proc_is_preempted(p)) {
258 p->p_rts_flags &= ~RTS_PREEMPTED;
259 if (proc_is_runnable(p)) {
260 if (p->p_cpu_time_left)
261 enqueue_head(p);
262 else
263 enqueue(p);
268 * if we have no process to run, set IDLE as the current process for
269 * time accounting and put the cpu in an idle state. After the next
270 * timer interrupt the execution resumes here and we can pick another
271 * process. If there is still nothing runnable we "schedule" IDLE again
273 while (!(p = pick_proc())) {
274 idle();
277 /* update the global variable */
278 get_cpulocal_var(proc_ptr) = p;
280 #ifdef CONFIG_SMP
281 if (p->p_misc_flags & MF_FLUSH_TLB && get_cpulocal_var(ptproc) == p)
282 tlb_must_refresh = 1;
283 #endif
284 switch_address_space(p);
286 check_misc_flags:
288 assert(p);
289 assert(proc_is_runnable(p));
290 while (p->p_misc_flags &
291 (MF_KCALL_RESUME | MF_DELIVERMSG |
292 MF_SC_DEFER | MF_SC_TRACE | MF_SC_ACTIVE)) {
294 assert(proc_is_runnable(p));
295 if (p->p_misc_flags & MF_KCALL_RESUME) {
296 kernel_call_resume(p);
298 else if (p->p_misc_flags & MF_DELIVERMSG) {
299 TRACE(VF_SCHEDULING, printf("delivering to %s / %d\n",
300 p->p_name, p->p_endpoint););
301 delivermsg(p);
303 else if (p->p_misc_flags & MF_SC_DEFER) {
304 /* Perform the system call that we deferred earlier. */
306 assert (!(p->p_misc_flags & MF_SC_ACTIVE));
308 arch_do_syscall(p);
310 /* If the process is stopped for signal delivery, and
311 * not blocked sending a message after the system call,
312 * inform PM.
314 if ((p->p_misc_flags & MF_SIG_DELAY) &&
315 !RTS_ISSET(p, RTS_SENDING))
316 sig_delay_done(p);
318 else if (p->p_misc_flags & MF_SC_TRACE) {
319 /* Trigger a system call leave event if this was a
320 * system call. We must do this after processing the
321 * other flags above, both for tracing correctness and
322 * to be able to use 'break'.
324 if (!(p->p_misc_flags & MF_SC_ACTIVE))
325 break;
327 p->p_misc_flags &=
328 ~(MF_SC_TRACE | MF_SC_ACTIVE);
330 /* Signal the "leave system call" event.
331 * Block the process.
333 cause_sig(proc_nr(p), SIGTRAP);
335 else if (p->p_misc_flags & MF_SC_ACTIVE) {
336 /* If MF_SC_ACTIVE was set, remove it now:
337 * we're leaving the system call.
339 p->p_misc_flags &= ~MF_SC_ACTIVE;
341 break;
345 * the selected process might not be runnable anymore. We have
346 * to checkit and schedule another one
348 if (!proc_is_runnable(p))
349 goto not_runnable_pick_new;
352 * check the quantum left before it runs again. We must do it only here
353 * as we are sure that a possible out-of-quantum message to the
354 * scheduler will not collide with the regular ipc
356 if (!p->p_cpu_time_left)
357 proc_no_time(p);
359 * After handling the misc flags the selected process might not be
360 * runnable anymore. We have to checkit and schedule another one
362 if (!proc_is_runnable(p))
363 goto not_runnable_pick_new;
365 TRACE(VF_SCHEDULING, printf("cpu %d starting %s / %d "
366 "pc 0x%08x\n",
367 cpuid, p->p_name, p->p_endpoint, p->p_reg.pc););
368 #if DEBUG_TRACE
369 p->p_schedules++;
370 #endif
372 p = arch_finish_switch_to_user();
373 assert(p->p_cpu_time_left);
375 context_stop(proc_addr(KERNEL));
377 /* If the process isn't the owner of FPU, enable the FPU exception */
378 if (get_cpulocal_var(fpu_owner) != p)
379 enable_fpu_exception();
380 else
381 disable_fpu_exception();
383 /* If MF_CONTEXT_SET is set, don't clobber process state within
384 * the kernel. The next kernel entry is OK again though.
386 p->p_misc_flags &= ~MF_CONTEXT_SET;
388 #if defined(__i386__)
389 assert(p->p_seg.p_cr3 != 0);
390 #elif defined(__arm__)
391 assert(p->p_seg.p_ttbr != 0);
392 #endif
393 #ifdef CONFIG_SMP
394 if (p->p_misc_flags & MF_FLUSH_TLB) {
395 if (tlb_must_refresh)
396 refresh_tlb();
397 p->p_misc_flags &= ~MF_FLUSH_TLB;
399 #endif
401 restart_local_timer();
404 * restore_user_context() carries out the actual mode switch from kernel
405 * to userspace. This function does not return
407 restore_user_context(p);
408 NOT_REACHABLE;
412 * handler for all synchronous IPC calls
414 static int do_sync_ipc(struct proc * caller_ptr, /* who made the call */
415 int call_nr, /* system call number and flags */
416 endpoint_t src_dst_e, /* src or dst of the call */
417 message *m_ptr) /* users pointer to a message */
419 int result; /* the system call's result */
420 int src_dst_p; /* Process slot number */
421 char *callname;
423 /* Check destination. RECEIVE is the only call that accepts ANY (in addition
424 * to a real endpoint). The other calls (SEND, SENDREC, and NOTIFY) require an
425 * endpoint to corresponds to a process. In addition, it is necessary to check
426 * whether a process is allowed to send to a given destination.
428 assert(call_nr != SENDA);
430 /* Only allow non-negative call_nr values less than 32 */
431 if (call_nr < 0 || call_nr > IPCNO_HIGHEST || call_nr >= 32
432 || !(callname = ipc_call_names[call_nr])) {
433 #if DEBUG_ENABLE_IPC_WARNINGS
434 printf("sys_call: trap %d not allowed, caller %d, src_dst %d\n",
435 call_nr, proc_nr(caller_ptr), src_dst_e);
436 #endif
437 return(ETRAPDENIED); /* trap denied by mask or kernel */
440 if (src_dst_e == ANY)
442 if (call_nr != RECEIVE)
444 #if 0
445 printf("sys_call: %s by %d with bad endpoint %d\n",
446 callname,
447 proc_nr(caller_ptr), src_dst_e);
448 #endif
449 return EINVAL;
451 src_dst_p = (int) src_dst_e;
453 else
455 /* Require a valid source and/or destination process. */
456 if(!isokendpt(src_dst_e, &src_dst_p)) {
457 #if 0
458 printf("sys_call: %s by %d with bad endpoint %d\n",
459 callname,
460 proc_nr(caller_ptr), src_dst_e);
461 #endif
462 return EDEADSRCDST;
465 /* If the call is to send to a process, i.e., for SEND, SENDNB,
466 * SENDREC or NOTIFY, verify that the caller is allowed to send to
467 * the given destination.
469 if (call_nr != RECEIVE)
471 if (!may_send_to(caller_ptr, src_dst_p)) {
472 #if DEBUG_ENABLE_IPC_WARNINGS
473 printf(
474 "sys_call: ipc mask denied %s from %d to %d\n",
475 callname,
476 caller_ptr->p_endpoint, src_dst_e);
477 #endif
478 return(ECALLDENIED); /* call denied by ipc mask */
483 /* Check if the process has privileges for the requested call. Calls to the
484 * kernel may only be SENDREC, because tasks always reply and may not block
485 * if the caller doesn't do receive().
487 if (!(priv(caller_ptr)->s_trap_mask & (1 << call_nr))) {
488 #if DEBUG_ENABLE_IPC_WARNINGS
489 printf("sys_call: %s not allowed, caller %d, src_dst %d\n",
490 callname, proc_nr(caller_ptr), src_dst_p);
491 #endif
492 return(ETRAPDENIED); /* trap denied by mask or kernel */
495 if (call_nr != SENDREC && call_nr != RECEIVE && iskerneln(src_dst_p)) {
496 #if DEBUG_ENABLE_IPC_WARNINGS
497 printf("sys_call: trap %s not allowed, caller %d, src_dst %d\n",
498 callname, proc_nr(caller_ptr), src_dst_e);
499 #endif
500 return(ETRAPDENIED); /* trap denied by mask or kernel */
503 switch(call_nr) {
504 case SENDREC:
505 /* A flag is set so that notifications cannot interrupt SENDREC. */
506 caller_ptr->p_misc_flags |= MF_REPLY_PEND;
507 /* fall through */
508 case SEND:
509 result = mini_send(caller_ptr, src_dst_e, m_ptr, 0);
510 if (call_nr == SEND || result != OK)
511 break; /* done, or SEND failed */
512 /* fall through for SENDREC */
513 case RECEIVE:
514 if (call_nr == RECEIVE) {
515 caller_ptr->p_misc_flags &= ~MF_REPLY_PEND;
516 IPC_STATUS_CLEAR(caller_ptr); /* clear IPC status code */
518 result = mini_receive(caller_ptr, src_dst_e, m_ptr, 0);
519 break;
520 case NOTIFY:
521 result = mini_notify(caller_ptr, src_dst_e);
522 break;
523 case SENDNB:
524 result = mini_send(caller_ptr, src_dst_e, m_ptr, NON_BLOCKING);
525 break;
526 default:
527 result = EBADCALL; /* illegal system call */
530 /* Now, return the result of the system call to the caller. */
531 return(result);
534 int do_ipc(reg_t r1, reg_t r2, reg_t r3)
536 struct proc *const caller_ptr = get_cpulocal_var(proc_ptr); /* get pointer to caller */
537 int call_nr = (int) r1;
539 assert(!RTS_ISSET(caller_ptr, RTS_SLOT_FREE));
541 /* bill kernel time to this process. */
542 kbill_ipc = caller_ptr;
544 /* If this process is subject to system call tracing, handle that first. */
545 if (caller_ptr->p_misc_flags & (MF_SC_TRACE | MF_SC_DEFER)) {
546 /* Are we tracing this process, and is it the first sys_call entry? */
547 if ((caller_ptr->p_misc_flags & (MF_SC_TRACE | MF_SC_DEFER)) ==
548 MF_SC_TRACE) {
549 /* We must notify the tracer before processing the actual
550 * system call. If we don't, the tracer could not obtain the
551 * input message. Postpone the entire system call.
553 caller_ptr->p_misc_flags &= ~MF_SC_TRACE;
554 assert(!(caller_ptr->p_misc_flags & MF_SC_DEFER));
555 caller_ptr->p_misc_flags |= MF_SC_DEFER;
556 caller_ptr->p_defer.r1 = r1;
557 caller_ptr->p_defer.r2 = r2;
558 caller_ptr->p_defer.r3 = r3;
560 /* Signal the "enter system call" event. Block the process. */
561 cause_sig(proc_nr(caller_ptr), SIGTRAP);
563 /* Preserve the return register's value. */
564 return caller_ptr->p_reg.retreg;
567 /* If the MF_SC_DEFER flag is set, the syscall is now being resumed. */
568 caller_ptr->p_misc_flags &= ~MF_SC_DEFER;
570 assert (!(caller_ptr->p_misc_flags & MF_SC_ACTIVE));
572 /* Set a flag to allow reliable tracing of leaving the system call. */
573 caller_ptr->p_misc_flags |= MF_SC_ACTIVE;
576 if(caller_ptr->p_misc_flags & MF_DELIVERMSG) {
577 panic("sys_call: MF_DELIVERMSG on for %s / %d\n",
578 caller_ptr->p_name, caller_ptr->p_endpoint);
581 /* Now check if the call is known and try to perform the request. The only
582 * system calls that exist in MINIX are sending and receiving messages.
583 * - SENDREC: combines SEND and RECEIVE in a single system call
584 * - SEND: sender blocks until its message has been delivered
585 * - RECEIVE: receiver blocks until an acceptable message has arrived
586 * - NOTIFY: asynchronous call; deliver notification or mark pending
587 * - SENDA: list of asynchronous send requests
589 switch(call_nr) {
590 case SENDREC:
591 case SEND:
592 case RECEIVE:
593 case NOTIFY:
594 case SENDNB:
596 /* Process accounting for scheduling */
597 caller_ptr->p_accounting.ipc_sync++;
599 return do_sync_ipc(caller_ptr, call_nr, (endpoint_t) r2,
600 (message *) r3);
602 case SENDA:
605 * Get and check the size of the argument in bytes as it is a
606 * table
608 size_t msg_size = (size_t) r2;
610 /* Process accounting for scheduling */
611 caller_ptr->p_accounting.ipc_async++;
613 /* Limit size to something reasonable. An arbitrary choice is 16
614 * times the number of process table entries.
616 if (msg_size > 16*(NR_TASKS + NR_PROCS))
617 return EDOM;
618 return mini_senda(caller_ptr, (asynmsg_t *) r3, msg_size);
620 case MINIX_KERNINFO:
622 /* It might not be initialized yet. */
623 if(!minix_kerninfo_user) {
624 return EBADCALL;
627 arch_set_secondary_ipc_return(caller_ptr, minix_kerninfo_user);
628 return OK;
630 default:
631 return EBADCALL; /* illegal system call */
635 /*===========================================================================*
636 * deadlock *
637 *===========================================================================*/
638 static int deadlock(function, cp, src_dst_e)
639 int function; /* trap number */
640 register struct proc *cp; /* pointer to caller */
641 endpoint_t src_dst_e; /* src or dst process */
643 /* Check for deadlock. This can happen if 'caller_ptr' and 'src_dst' have
644 * a cyclic dependency of blocking send and receive calls. The only cyclic
645 * dependency that is not fatal is if the caller and target directly SEND(REC)
646 * and RECEIVE to each other. If a deadlock is found, the group size is
647 * returned. Otherwise zero is returned.
649 register struct proc *xp; /* process pointer */
650 int group_size = 1; /* start with only caller */
651 #if DEBUG_ENABLE_IPC_WARNINGS
652 static struct proc *processes[NR_PROCS + NR_TASKS];
653 processes[0] = cp;
654 #endif
656 while (src_dst_e != ANY) { /* check while process nr */
657 int src_dst_slot;
658 okendpt(src_dst_e, &src_dst_slot);
659 xp = proc_addr(src_dst_slot); /* follow chain of processes */
660 assert(proc_ptr_ok(xp));
661 assert(!RTS_ISSET(xp, RTS_SLOT_FREE));
662 #if DEBUG_ENABLE_IPC_WARNINGS
663 processes[group_size] = xp;
664 #endif
665 group_size ++; /* extra process in group */
667 /* Check whether the last process in the chain has a dependency. If it
668 * has not, the cycle cannot be closed and we are done.
670 if((src_dst_e = P_BLOCKEDON(xp)) == NONE)
671 return 0;
673 /* Now check if there is a cyclic dependency. For group sizes of two,
674 * a combination of SEND(REC) and RECEIVE is not fatal. Larger groups
675 * or other combinations indicate a deadlock.
677 if (src_dst_e == cp->p_endpoint) { /* possible deadlock */
678 if (group_size == 2) { /* caller and src_dst */
679 /* The function number is magically converted to flags. */
680 if ((xp->p_rts_flags ^ (function << 2)) & RTS_SENDING) {
681 return(0); /* not a deadlock */
684 #if DEBUG_ENABLE_IPC_WARNINGS
686 int i;
687 printf("deadlock between these processes:\n");
688 for(i = 0; i < group_size; i++) {
689 printf(" %10s ", processes[i]->p_name);
691 printf("\n\n");
692 for(i = 0; i < group_size; i++) {
693 print_proc(processes[i]);
694 proc_stacktrace(processes[i]);
697 #endif
698 return(group_size); /* deadlock found */
701 return(0); /* not a deadlock */
704 /*===========================================================================*
705 * has_pending *
706 *===========================================================================*/
707 static int has_pending(sys_map_t *map, int src_p, int asynm)
709 /* Check to see if there is a pending message from the desired source
710 * available.
713 int src_id;
714 sys_id_t id = NULL_PRIV_ID;
715 #ifdef CONFIG_SMP
716 struct proc * p;
717 #endif
719 /* Either check a specific bit in the mask map, or find the first bit set in
720 * it (if any), depending on whether the receive was called on a specific
721 * source endpoint.
723 if (src_p != ANY) {
724 src_id = nr_to_id(src_p);
725 if (get_sys_bit(*map, src_id)) {
726 #ifdef CONFIG_SMP
727 p = proc_addr(id_to_nr(src_id));
728 if (asynm && RTS_ISSET(p, RTS_VMINHIBIT))
729 p->p_misc_flags |= MF_SENDA_VM_MISS;
730 else
731 #endif
732 id = src_id;
734 } else {
735 /* Find a source with a pending message */
736 for (src_id = 0; src_id < NR_SYS_PROCS; src_id += BITCHUNK_BITS) {
737 if (get_sys_bits(*map, src_id) != 0) {
738 #ifdef CONFIG_SMP
739 while (src_id < NR_SYS_PROCS) {
740 while (!get_sys_bit(*map, src_id)) {
741 if (src_id == NR_SYS_PROCS)
742 goto quit_search;
743 src_id++;
745 p = proc_addr(id_to_nr(src_id));
747 * We must not let kernel fiddle with pages of a
748 * process which are currently being changed by
749 * VM. It is dangerous! So do not report such a
750 * process as having pending async messages.
751 * Skip it.
753 if (asynm && RTS_ISSET(p, RTS_VMINHIBIT)) {
754 p->p_misc_flags |= MF_SENDA_VM_MISS;
755 src_id++;
756 } else
757 goto quit_search;
759 #else
760 while (!get_sys_bit(*map, src_id)) src_id++;
761 goto quit_search;
762 #endif
766 quit_search:
767 if (src_id < NR_SYS_PROCS) /* Found one */
768 id = src_id;
771 return(id);
774 /*===========================================================================*
775 * has_pending_notify *
776 *===========================================================================*/
777 int has_pending_notify(struct proc * caller, int src_p)
779 sys_map_t * map = &priv(caller)->s_notify_pending;
780 return has_pending(map, src_p, 0);
783 /*===========================================================================*
784 * has_pending_asend *
785 *===========================================================================*/
786 int has_pending_asend(struct proc * caller, int src_p)
788 sys_map_t * map = &priv(caller)->s_asyn_pending;
789 return has_pending(map, src_p, 1);
792 /*===========================================================================*
793 * unset_notify_pending *
794 *===========================================================================*/
795 void unset_notify_pending(struct proc * caller, int src_p)
797 sys_map_t * map = &priv(caller)->s_notify_pending;
798 unset_sys_bit(*map, src_p);
801 /*===========================================================================*
802 * mini_send *
803 *===========================================================================*/
804 int mini_send(
805 register struct proc *caller_ptr, /* who is trying to send a message? */
806 endpoint_t dst_e, /* to whom is message being sent? */
807 message *m_ptr, /* pointer to message buffer */
808 const int flags
811 /* Send a message from 'caller_ptr' to 'dst'. If 'dst' is blocked waiting
812 * for this message, copy the message to it and unblock 'dst'. If 'dst' is
813 * not waiting at all, or is waiting for another source, queue 'caller_ptr'.
815 register struct proc *dst_ptr;
816 register struct proc **xpp;
817 int dst_p;
818 dst_p = _ENDPOINT_P(dst_e);
819 dst_ptr = proc_addr(dst_p);
821 if (RTS_ISSET(dst_ptr, RTS_NO_ENDPOINT))
823 return EDEADSRCDST;
826 /* Check if 'dst' is blocked waiting for this message. The destination's
827 * RTS_SENDING flag may be set when its SENDREC call blocked while sending.
829 if (WILLRECEIVE(dst_ptr, caller_ptr->p_endpoint)) {
830 int call;
831 /* Destination is indeed waiting for this message. */
832 assert(!(dst_ptr->p_misc_flags & MF_DELIVERMSG));
834 if (!(flags & FROM_KERNEL)) {
835 if(copy_msg_from_user(m_ptr, &dst_ptr->p_delivermsg))
836 return EFAULT;
837 } else {
838 dst_ptr->p_delivermsg = *m_ptr;
839 IPC_STATUS_ADD_FLAGS(dst_ptr, IPC_FLG_MSG_FROM_KERNEL);
842 dst_ptr->p_delivermsg.m_source = caller_ptr->p_endpoint;
843 dst_ptr->p_misc_flags |= MF_DELIVERMSG;
845 call = (caller_ptr->p_misc_flags & MF_REPLY_PEND ? SENDREC
846 : (flags & NON_BLOCKING ? SENDNB : SEND));
847 IPC_STATUS_ADD_CALL(dst_ptr, call);
849 if (dst_ptr->p_misc_flags & MF_REPLY_PEND)
850 dst_ptr->p_misc_flags &= ~MF_REPLY_PEND;
852 RTS_UNSET(dst_ptr, RTS_RECEIVING);
854 #if DEBUG_IPC_HOOK
855 hook_ipc_msgsend(&dst_ptr->p_delivermsg, caller_ptr, dst_ptr);
856 hook_ipc_msgrecv(&dst_ptr->p_delivermsg, caller_ptr, dst_ptr);
857 #endif
858 } else {
859 if(flags & NON_BLOCKING) {
860 return(ENOTREADY);
863 /* Check for a possible deadlock before actually blocking. */
864 if (deadlock(SEND, caller_ptr, dst_e)) {
865 return(ELOCKED);
868 /* Destination is not waiting. Block and dequeue caller. */
869 if (!(flags & FROM_KERNEL)) {
870 if(copy_msg_from_user(m_ptr, &caller_ptr->p_sendmsg))
871 return EFAULT;
872 } else {
873 caller_ptr->p_sendmsg = *m_ptr;
875 * we need to remember that this message is from kernel so we
876 * can set the delivery status flags when the message is
877 * actually delivered
879 caller_ptr->p_misc_flags |= MF_SENDING_FROM_KERNEL;
882 RTS_SET(caller_ptr, RTS_SENDING);
883 caller_ptr->p_sendto_e = dst_e;
885 /* Process is now blocked. Put in on the destination's queue. */
886 assert(caller_ptr->p_q_link == NULL);
887 xpp = &dst_ptr->p_caller_q; /* find end of list */
888 while (*xpp) xpp = &(*xpp)->p_q_link;
889 *xpp = caller_ptr; /* add caller to end */
891 #if DEBUG_IPC_HOOK
892 hook_ipc_msgsend(&caller_ptr->p_sendmsg, caller_ptr, dst_ptr);
893 #endif
895 return(OK);
898 /*===========================================================================*
899 * mini_receive *
900 *===========================================================================*/
901 static int mini_receive(struct proc * caller_ptr,
902 endpoint_t src_e, /* which message source is wanted */
903 message * m_buff_usr, /* pointer to message buffer */
904 const int flags)
906 /* A process or task wants to get a message. If a message is already queued,
907 * acquire it and deblock the sender. If no message from the desired source
908 * is available block the caller.
910 register struct proc **xpp;
911 int r, src_id, src_proc_nr, src_p;
913 assert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG));
915 /* This is where we want our message. */
916 caller_ptr->p_delivermsg_vir = (vir_bytes) m_buff_usr;
918 if(src_e == ANY) src_p = ANY;
919 else
921 okendpt(src_e, &src_p);
922 if (RTS_ISSET(proc_addr(src_p), RTS_NO_ENDPOINT))
924 return EDEADSRCDST;
929 /* Check to see if a message from desired source is already available. The
930 * caller's RTS_SENDING flag may be set if SENDREC couldn't send. If it is
931 * set, the process should be blocked.
933 if (!RTS_ISSET(caller_ptr, RTS_SENDING)) {
935 /* Check if there are pending notifications, except for SENDREC. */
936 if (! (caller_ptr->p_misc_flags & MF_REPLY_PEND)) {
938 /* Check for pending notifications */
939 if ((src_id = has_pending_notify(caller_ptr, src_p)) != NULL_PRIV_ID) {
940 endpoint_t hisep;
942 src_proc_nr = id_to_nr(src_id); /* get source proc */
943 #if DEBUG_ENABLE_IPC_WARNINGS
944 if(src_proc_nr == NONE) {
945 printf("mini_receive: sending notify from NONE\n");
947 #endif
948 assert(src_proc_nr != NONE);
949 unset_notify_pending(caller_ptr, src_id); /* no longer pending */
951 /* Found a suitable source, deliver the notification message. */
952 hisep = proc_addr(src_proc_nr)->p_endpoint;
953 assert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG));
954 assert(src_e == ANY || hisep == src_e);
956 /* assemble message */
957 BuildNotifyMessage(&caller_ptr->p_delivermsg, src_proc_nr, caller_ptr);
958 caller_ptr->p_delivermsg.m_source = hisep;
959 caller_ptr->p_misc_flags |= MF_DELIVERMSG;
961 IPC_STATUS_ADD_CALL(caller_ptr, NOTIFY);
963 goto receive_done;
967 /* Check for pending asynchronous messages */
968 if (has_pending_asend(caller_ptr, src_p) != NULL_PRIV_ID) {
969 if (src_p != ANY)
970 r = try_one(proc_addr(src_p), caller_ptr);
971 else
972 r = try_async(caller_ptr);
974 if (r == OK) {
975 IPC_STATUS_ADD_CALL(caller_ptr, SENDA);
976 goto receive_done;
980 /* Check caller queue. Use pointer pointers to keep code simple. */
981 xpp = &caller_ptr->p_caller_q;
982 while (*xpp) {
983 struct proc * sender = *xpp;
985 if (src_e == ANY || src_p == proc_nr(sender)) {
986 int call;
987 assert(!RTS_ISSET(sender, RTS_SLOT_FREE));
988 assert(!RTS_ISSET(sender, RTS_NO_ENDPOINT));
990 /* Found acceptable message. Copy it and update status. */
991 assert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG));
992 caller_ptr->p_delivermsg = sender->p_sendmsg;
993 caller_ptr->p_delivermsg.m_source = sender->p_endpoint;
994 caller_ptr->p_misc_flags |= MF_DELIVERMSG;
995 RTS_UNSET(sender, RTS_SENDING);
997 call = (sender->p_misc_flags & MF_REPLY_PEND ? SENDREC : SEND);
998 IPC_STATUS_ADD_CALL(caller_ptr, call);
1001 * if the message is originally from the kernel on behalf of this
1002 * process, we must send the status flags accordingly
1004 if (sender->p_misc_flags & MF_SENDING_FROM_KERNEL) {
1005 IPC_STATUS_ADD_FLAGS(caller_ptr, IPC_FLG_MSG_FROM_KERNEL);
1006 /* we can clean the flag now, not need anymore */
1007 sender->p_misc_flags &= ~MF_SENDING_FROM_KERNEL;
1009 if (sender->p_misc_flags & MF_SIG_DELAY)
1010 sig_delay_done(sender);
1012 #if DEBUG_IPC_HOOK
1013 hook_ipc_msgrecv(&caller_ptr->p_delivermsg, *xpp, caller_ptr);
1014 #endif
1016 *xpp = sender->p_q_link; /* remove from queue */
1017 sender->p_q_link = NULL;
1018 goto receive_done;
1020 xpp = &sender->p_q_link; /* proceed to next */
1024 /* No suitable message is available or the caller couldn't send in SENDREC.
1025 * Block the process trying to receive, unless the flags tell otherwise.
1027 if ( ! (flags & NON_BLOCKING)) {
1028 /* Check for a possible deadlock before actually blocking. */
1029 if (deadlock(RECEIVE, caller_ptr, src_e)) {
1030 return(ELOCKED);
1033 caller_ptr->p_getfrom_e = src_e;
1034 RTS_SET(caller_ptr, RTS_RECEIVING);
1035 return(OK);
1036 } else {
1037 return(ENOTREADY);
1040 receive_done:
1041 if (caller_ptr->p_misc_flags & MF_REPLY_PEND)
1042 caller_ptr->p_misc_flags &= ~MF_REPLY_PEND;
1043 return OK;
1046 /*===========================================================================*
1047 * mini_notify *
1048 *===========================================================================*/
1049 int mini_notify(
1050 const struct proc *caller_ptr, /* sender of the notification */
1051 endpoint_t dst_e /* which process to notify */
1054 register struct proc *dst_ptr;
1055 int src_id; /* source id for late delivery */
1056 int dst_p;
1058 if (!isokendpt(dst_e, &dst_p)) {
1059 util_stacktrace();
1060 printf("mini_notify: bogus endpoint %d\n", dst_e);
1061 return EDEADSRCDST;
1064 dst_ptr = proc_addr(dst_p);
1066 /* Check to see if target is blocked waiting for this message. A process
1067 * can be both sending and receiving during a SENDREC system call.
1069 if (WILLRECEIVE(dst_ptr, caller_ptr->p_endpoint) &&
1070 ! (dst_ptr->p_misc_flags & MF_REPLY_PEND)) {
1071 /* Destination is indeed waiting for a message. Assemble a notification
1072 * message and deliver it. Copy from pseudo-source HARDWARE, since the
1073 * message is in the kernel's address space.
1075 assert(!(dst_ptr->p_misc_flags & MF_DELIVERMSG));
1077 BuildNotifyMessage(&dst_ptr->p_delivermsg, proc_nr(caller_ptr), dst_ptr);
1078 dst_ptr->p_delivermsg.m_source = caller_ptr->p_endpoint;
1079 dst_ptr->p_misc_flags |= MF_DELIVERMSG;
1081 IPC_STATUS_ADD_CALL(dst_ptr, NOTIFY);
1082 RTS_UNSET(dst_ptr, RTS_RECEIVING);
1084 return(OK);
1087 /* Destination is not ready to receive the notification. Add it to the
1088 * bit map with pending notifications. Note the indirectness: the privilege id
1089 * instead of the process number is used in the pending bit map.
1091 src_id = priv(caller_ptr)->s_id;
1092 set_sys_bit(priv(dst_ptr)->s_notify_pending, src_id);
1093 return(OK);
1096 #define ASCOMPLAIN(caller, entry, field) \
1097 printf("kernel:%s:%d: asyn failed for %s in %s " \
1098 "(%d/%d, tab 0x%lx)\n",__FILE__,__LINE__, \
1099 field, caller->p_name, entry, priv(caller)->s_asynsize, priv(caller)->s_asyntab)
1101 #define A_RETR_FLD(entry, field) \
1102 if(data_copy(caller_ptr->p_endpoint, \
1103 table_v + (entry)*sizeof(asynmsg_t) + offsetof(struct asynmsg,field),\
1104 KERNEL, (vir_bytes) &tabent.field, \
1105 sizeof(tabent.field)) != OK) {\
1106 ASCOMPLAIN(caller_ptr, entry, #field); \
1107 r = EFAULT; \
1108 goto asyn_error; \
1111 #define A_RETR(entry) do { \
1112 if (data_copy( \
1113 caller_ptr->p_endpoint, table_v + (entry)*sizeof(asynmsg_t),\
1114 KERNEL, (vir_bytes) &tabent, \
1115 sizeof(tabent)) != OK) { \
1116 ASCOMPLAIN(caller_ptr, entry, "message entry"); \
1117 r = EFAULT; \
1118 goto asyn_error; \
1120 } while(0)
1122 #define A_INSRT_FLD(entry, field) \
1123 if(data_copy(KERNEL, (vir_bytes) &tabent.field, \
1124 caller_ptr->p_endpoint, \
1125 table_v + (entry)*sizeof(asynmsg_t) + offsetof(struct asynmsg,field),\
1126 sizeof(tabent.field)) != OK) {\
1127 ASCOMPLAIN(caller_ptr, entry, #field); \
1128 r = EFAULT; \
1129 goto asyn_error; \
1132 #define A_INSRT(entry) do { \
1133 if (data_copy(KERNEL, (vir_bytes) &tabent, \
1134 caller_ptr->p_endpoint, table_v + (entry)*sizeof(asynmsg_t),\
1135 sizeof(tabent)) != OK) { \
1136 ASCOMPLAIN(caller_ptr, entry, "message entry"); \
1137 r = EFAULT; \
1138 goto asyn_error; \
1140 } while(0)
1142 /*===========================================================================*
1143 * try_deliver_senda *
1144 *===========================================================================*/
1145 int try_deliver_senda(struct proc *caller_ptr,
1146 asynmsg_t *table,
1147 size_t size)
1149 int r, dst_p, done, do_notify;
1150 unsigned int i;
1151 unsigned flags;
1152 endpoint_t dst;
1153 struct proc *dst_ptr;
1154 struct priv *privp;
1155 asynmsg_t tabent;
1156 const vir_bytes table_v = (vir_bytes) table;
1158 privp = priv(caller_ptr);
1160 /* Clear table */
1161 privp->s_asyntab = -1;
1162 privp->s_asynsize = 0;
1164 if (size == 0) return(OK); /* Nothing to do, just return */
1166 /* Scan the table */
1167 do_notify = FALSE;
1168 done = TRUE;
1170 /* Limit size to something reasonable. An arbitrary choice is 16
1171 * times the number of process table entries.
1173 * (this check has been duplicated in sys_call but is left here
1174 * as a sanity check)
1176 if (size > 16*(NR_TASKS + NR_PROCS)) {
1177 r = EDOM;
1178 return r;
1181 for (i = 0; i < size; i++) {
1182 /* Process each entry in the table and store the result in the table.
1183 * If we're done handling a message, copy the result to the sender. */
1185 dst = NONE;
1186 /* Copy message to kernel */
1187 A_RETR(i);
1188 flags = tabent.flags;
1189 dst = tabent.dst;
1191 if (flags == 0) continue; /* Skip empty entries */
1193 /* 'flags' field must contain only valid bits */
1194 if(flags & ~(AMF_VALID|AMF_DONE|AMF_NOTIFY|AMF_NOREPLY|AMF_NOTIFY_ERR)) {
1195 r = EINVAL;
1196 goto asyn_error;
1198 if (!(flags & AMF_VALID)) { /* Must contain message */
1199 r = EINVAL;
1200 goto asyn_error;
1202 if (flags & AMF_DONE) continue; /* Already done processing */
1204 r = OK;
1205 if (!isokendpt(tabent.dst, &dst_p))
1206 r = EDEADSRCDST; /* Bad destination, report the error */
1207 else if (iskerneln(dst_p))
1208 r = ECALLDENIED; /* Asyn sends to the kernel are not allowed */
1209 else if (!may_send_to(caller_ptr, dst_p))
1210 r = ECALLDENIED; /* Send denied by IPC mask */
1211 else /* r == OK */
1212 dst_ptr = proc_addr(dst_p);
1214 /* XXX: RTS_NO_ENDPOINT should be removed */
1215 if (r == OK && RTS_ISSET(dst_ptr, RTS_NO_ENDPOINT)) {
1216 r = EDEADSRCDST;
1219 /* Check if 'dst' is blocked waiting for this message.
1220 * If AMF_NOREPLY is set, do not satisfy the receiving part of
1221 * a SENDREC.
1223 if (r == OK && WILLRECEIVE(dst_ptr, caller_ptr->p_endpoint) &&
1224 (!(flags&AMF_NOREPLY) || !(dst_ptr->p_misc_flags&MF_REPLY_PEND))) {
1225 /* Destination is indeed waiting for this message. */
1226 dst_ptr->p_delivermsg = tabent.msg;
1227 dst_ptr->p_delivermsg.m_source = caller_ptr->p_endpoint;
1228 dst_ptr->p_misc_flags |= MF_DELIVERMSG;
1229 IPC_STATUS_ADD_CALL(dst_ptr, SENDA);
1230 RTS_UNSET(dst_ptr, RTS_RECEIVING);
1231 #if DEBUG_IPC_HOOK
1232 hook_ipc_msgrecv(&dst_ptr->p_delivermsg, caller_ptr, dst_ptr);
1233 #endif
1234 } else if (r == OK) {
1235 /* Inform receiver that something is pending */
1236 set_sys_bit(priv(dst_ptr)->s_asyn_pending,
1237 priv(caller_ptr)->s_id);
1238 done = FALSE;
1239 continue;
1242 /* Store results */
1243 tabent.result = r;
1244 tabent.flags = flags | AMF_DONE;
1245 if (flags & AMF_NOTIFY)
1246 do_notify = TRUE;
1247 else if (r != OK && (flags & AMF_NOTIFY_ERR))
1248 do_notify = TRUE;
1249 A_INSRT(i); /* Copy results to caller */
1250 continue;
1252 asyn_error:
1253 if (dst != NONE)
1254 printf("KERNEL senda error %d to %d\n", r, dst);
1255 else
1256 printf("KERNEL senda error %d\n", r);
1259 if (do_notify)
1260 mini_notify(proc_addr(ASYNCM), caller_ptr->p_endpoint);
1262 if (!done) {
1263 privp->s_asyntab = (vir_bytes) table;
1264 privp->s_asynsize = size;
1267 return(OK);
1270 /*===========================================================================*
1271 * mini_senda *
1272 *===========================================================================*/
1273 static int mini_senda(struct proc *caller_ptr, asynmsg_t *table, size_t size)
1275 struct priv *privp;
1277 privp = priv(caller_ptr);
1278 if (!(privp->s_flags & SYS_PROC)) {
1279 printf( "mini_senda: warning caller has no privilege structure\n");
1280 return(EPERM);
1283 return try_deliver_senda(caller_ptr, table, size);
1287 /*===========================================================================*
1288 * try_async *
1289 *===========================================================================*/
1290 static int try_async(caller_ptr)
1291 struct proc *caller_ptr;
1293 int r;
1294 struct priv *privp;
1295 struct proc *src_ptr;
1296 sys_map_t *map;
1298 map = &priv(caller_ptr)->s_asyn_pending;
1300 /* Try all privilege structures */
1301 for (privp = BEG_PRIV_ADDR; privp < END_PRIV_ADDR; ++privp) {
1302 if (privp->s_proc_nr == NONE)
1303 continue;
1305 if (!get_sys_bit(*map, privp->s_id))
1306 continue;
1308 src_ptr = proc_addr(privp->s_proc_nr);
1310 #ifdef CONFIG_SMP
1312 * Do not copy from a process which does not have a stable address space
1313 * due to VM fiddling with it
1315 if (RTS_ISSET(src_ptr, RTS_VMINHIBIT)) {
1316 src_ptr->p_misc_flags |= MF_SENDA_VM_MISS;
1317 continue;
1319 #endif
1321 assert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG));
1322 if ((r = try_one(src_ptr, caller_ptr)) == OK)
1323 return(r);
1326 return(ESRCH);
1330 /*===========================================================================*
1331 * try_one *
1332 *===========================================================================*/
1333 static int try_one(struct proc *src_ptr, struct proc *dst_ptr)
1335 /* Try to receive an asynchronous message from 'src_ptr' */
1336 int r = EAGAIN, done, do_notify;
1337 unsigned int flags, i;
1338 size_t size;
1339 endpoint_t dst;
1340 struct proc *caller_ptr;
1341 struct priv *privp;
1342 asynmsg_t tabent;
1343 vir_bytes table_v;
1345 privp = priv(src_ptr);
1346 if (!(privp->s_flags & SYS_PROC)) return(EPERM);
1347 size = privp->s_asynsize;
1348 table_v = privp->s_asyntab;
1350 /* Clear table pending message flag. We're done unless we're not. */
1351 unset_sys_bit(priv(dst_ptr)->s_asyn_pending, privp->s_id);
1353 if (size == 0) return(EAGAIN);
1354 if (!may_send_to(src_ptr, proc_nr(dst_ptr))) return(ECALLDENIED);
1356 caller_ptr = src_ptr; /* Needed for A_ macros later on */
1358 /* Scan the table */
1359 do_notify = FALSE;
1360 done = TRUE;
1362 for (i = 0; i < size; i++) {
1363 /* Process each entry in the table and store the result in the table.
1364 * If we're done handling a message, copy the result to the sender.
1365 * Some checks done in mini_senda are duplicated here, as the sender
1366 * could've altered the contents of the table in the meantime.
1369 /* Copy message to kernel */
1370 A_RETR(i);
1371 flags = tabent.flags;
1372 dst = tabent.dst;
1374 if (flags == 0) continue; /* Skip empty entries */
1376 /* 'flags' field must contain only valid bits */
1377 if(flags & ~(AMF_VALID|AMF_DONE|AMF_NOTIFY|AMF_NOREPLY|AMF_NOTIFY_ERR))
1378 r = EINVAL;
1379 else if (!(flags & AMF_VALID)) /* Must contain message */
1380 r = EINVAL;
1381 else if (flags & AMF_DONE) continue; /* Already done processing */
1383 /* Clear done flag. The sender is done sending when all messages in the
1384 * table are marked done or empty. However, we will know that only
1385 * the next time we enter this function or when the sender decides to
1386 * send additional asynchronous messages and manages to deliver them
1387 * all.
1389 done = FALSE;
1391 if (r == EINVAL)
1392 goto store_result;
1394 /* Message must be directed at receiving end */
1395 if (dst != dst_ptr->p_endpoint) continue;
1397 /* If AMF_NOREPLY is set, then this message is not a reply to a
1398 * SENDREC and thus should not satisfy the receiving part of the
1399 * SENDREC. This message is to be delivered later.
1401 if ((flags & AMF_NOREPLY) && (dst_ptr->p_misc_flags & MF_REPLY_PEND))
1402 continue;
1404 /* Destination is ready to receive the message; deliver it */
1405 r = OK;
1406 dst_ptr->p_delivermsg = tabent.msg;
1407 dst_ptr->p_delivermsg.m_source = src_ptr->p_endpoint;
1408 dst_ptr->p_misc_flags |= MF_DELIVERMSG;
1409 #if DEBUG_IPC_HOOK
1410 hook_ipc_msgrecv(&dst_ptr->p_delivermsg, src_ptr, dst_ptr);
1411 #endif
1413 store_result:
1414 /* Store results for sender */
1415 tabent.result = r;
1416 tabent.flags = flags | AMF_DONE;
1417 if (flags & AMF_NOTIFY) do_notify = TRUE;
1418 else if (r != OK && (flags & AMF_NOTIFY_ERR)) do_notify = TRUE;
1419 A_INSRT(i); /* Copy results to sender */
1421 break;
1424 if (do_notify)
1425 mini_notify(proc_addr(ASYNCM), src_ptr->p_endpoint);
1427 if (done) {
1428 privp->s_asyntab = -1;
1429 privp->s_asynsize = 0;
1430 } else {
1431 set_sys_bit(priv(dst_ptr)->s_asyn_pending, privp->s_id);
1434 asyn_error:
1435 return(r);
1438 /*===========================================================================*
1439 * cancel_async *
1440 *===========================================================================*/
1441 int cancel_async(struct proc *src_ptr, struct proc *dst_ptr)
1443 /* Cancel asynchronous messages from src to dst, because dst is not interested
1444 * in them (e.g., dst has been restarted) */
1445 int done, do_notify;
1446 unsigned int flags, i;
1447 size_t size;
1448 endpoint_t dst;
1449 struct proc *caller_ptr;
1450 struct priv *privp;
1451 asynmsg_t tabent;
1452 vir_bytes table_v;
1454 privp = priv(src_ptr);
1455 if (!(privp->s_flags & SYS_PROC)) return(EPERM);
1456 size = privp->s_asynsize;
1457 table_v = privp->s_asyntab;
1459 /* Clear table pending message flag. We're done unless we're not. */
1460 privp->s_asyntab = -1;
1461 privp->s_asynsize = 0;
1462 unset_sys_bit(priv(dst_ptr)->s_asyn_pending, privp->s_id);
1464 if (size == 0) return(EAGAIN);
1465 if (!may_send_to(src_ptr, proc_nr(dst_ptr))) return(ECALLDENIED);
1467 caller_ptr = src_ptr; /* Needed for A_ macros later on */
1469 /* Scan the table */
1470 do_notify = FALSE;
1471 done = TRUE;
1474 for (i = 0; i < size; i++) {
1475 /* Process each entry in the table and store the result in the table.
1476 * If we're done handling a message, copy the result to the sender.
1477 * Some checks done in mini_senda are duplicated here, as the sender
1478 * could've altered the contents of the table in the mean time.
1481 int r = EDEADSRCDST; /* Cancel delivery due to dead dst */
1483 /* Copy message to kernel */
1484 A_RETR(i);
1485 flags = tabent.flags;
1486 dst = tabent.dst;
1488 if (flags == 0) continue; /* Skip empty entries */
1490 /* 'flags' field must contain only valid bits */
1491 if(flags & ~(AMF_VALID|AMF_DONE|AMF_NOTIFY|AMF_NOREPLY|AMF_NOTIFY_ERR))
1492 r = EINVAL;
1493 else if (!(flags & AMF_VALID)) /* Must contain message */
1494 r = EINVAL;
1495 else if (flags & AMF_DONE) continue; /* Already done processing */
1497 /* Message must be directed at receiving end */
1498 if (dst != dst_ptr->p_endpoint) {
1499 done = FALSE;
1500 continue;
1503 /* Store results for sender */
1504 tabent.result = r;
1505 tabent.flags = flags | AMF_DONE;
1506 if (flags & AMF_NOTIFY) do_notify = TRUE;
1507 else if (r != OK && (flags & AMF_NOTIFY_ERR)) do_notify = TRUE;
1508 A_INSRT(i); /* Copy results to sender */
1511 if (do_notify)
1512 mini_notify(proc_addr(ASYNCM), src_ptr->p_endpoint);
1514 if (!done) {
1515 privp->s_asyntab = table_v;
1516 privp->s_asynsize = size;
1519 asyn_error:
1520 return(OK);
1523 /*===========================================================================*
1524 * enqueue *
1525 *===========================================================================*/
1526 void enqueue(
1527 register struct proc *rp /* this process is now runnable */
1530 /* Add 'rp' to one of the queues of runnable processes. This function is
1531 * responsible for inserting a process into one of the scheduling queues.
1532 * The mechanism is implemented here. The actual scheduling policy is
1533 * defined in sched() and pick_proc().
1535 * This function can be used x-cpu as it always uses the queues of the cpu the
1536 * process is assigned to.
1538 int q = rp->p_priority; /* scheduling queue to use */
1539 struct proc **rdy_head, **rdy_tail;
1541 assert(proc_is_runnable(rp));
1543 assert(q >= 0);
1545 rdy_head = get_cpu_var(rp->p_cpu, run_q_head);
1546 rdy_tail = get_cpu_var(rp->p_cpu, run_q_tail);
1548 /* Now add the process to the queue. */
1549 if (!rdy_head[q]) { /* add to empty queue */
1550 rdy_head[q] = rdy_tail[q] = rp; /* create a new queue */
1551 rp->p_nextready = NULL; /* mark new end */
1553 else { /* add to tail of queue */
1554 rdy_tail[q]->p_nextready = rp; /* chain tail of queue */
1555 rdy_tail[q] = rp; /* set new queue tail */
1556 rp->p_nextready = NULL; /* mark new end */
1559 if (cpuid == rp->p_cpu) {
1561 * enqueueing a process with a higher priority than the current one,
1562 * it gets preempted. The current process must be preemptible. Testing
1563 * the priority also makes sure that a process does not preempt itself
1565 struct proc * p;
1566 p = get_cpulocal_var(proc_ptr);
1567 assert(p);
1568 if((p->p_priority > rp->p_priority) &&
1569 (priv(p)->s_flags & PREEMPTIBLE))
1570 RTS_SET(p, RTS_PREEMPTED); /* calls dequeue() */
1572 #ifdef CONFIG_SMP
1574 * if the process was enqueued on a different cpu and the cpu is idle, i.e.
1575 * the time is off, we need to wake up that cpu and let it schedule this new
1576 * process
1578 else if (get_cpu_var(rp->p_cpu, cpu_is_idle)) {
1579 smp_schedule(rp->p_cpu);
1581 #endif
1583 /* Make note of when this process was added to queue */
1584 read_tsc_64(&(get_cpulocal_var(proc_ptr)->p_accounting.enter_queue));
1587 #if DEBUG_SANITYCHECKS
1588 assert(runqueues_ok_local());
1589 #endif
1592 /*===========================================================================*
1593 * enqueue_head *
1594 *===========================================================================*/
1596 * put a process at the front of its run queue. It comes handy when a process is
1597 * preempted and removed from run queue to not to have a currently not-runnable
1598 * process on a run queue. We have to put this process back at the fron to be
1599 * fair
1601 static void enqueue_head(struct proc *rp)
1603 const int q = rp->p_priority; /* scheduling queue to use */
1605 struct proc **rdy_head, **rdy_tail;
1607 assert(proc_ptr_ok(rp));
1608 assert(proc_is_runnable(rp));
1611 * the process was runnable without its quantum expired when dequeued. A
1612 * process with no time left should have been handled else and differently
1614 assert(rp->p_cpu_time_left);
1616 assert(q >= 0);
1619 rdy_head = get_cpu_var(rp->p_cpu, run_q_head);
1620 rdy_tail = get_cpu_var(rp->p_cpu, run_q_tail);
1622 /* Now add the process to the queue. */
1623 if (!rdy_head[q]) { /* add to empty queue */
1624 rdy_head[q] = rdy_tail[q] = rp; /* create a new queue */
1625 rp->p_nextready = NULL; /* mark new end */
1626 } else { /* add to head of queue */
1627 rp->p_nextready = rdy_head[q]; /* chain head of queue */
1628 rdy_head[q] = rp; /* set new queue head */
1631 /* Make note of when this process was added to queue */
1632 read_tsc_64(&(get_cpulocal_var(proc_ptr->p_accounting.enter_queue)));
1635 /* Process accounting for scheduling */
1636 rp->p_accounting.dequeues--;
1637 rp->p_accounting.preempted++;
1639 #if DEBUG_SANITYCHECKS
1640 assert(runqueues_ok_local());
1641 #endif
1644 /*===========================================================================*
1645 * dequeue *
1646 *===========================================================================*/
1647 void dequeue(struct proc *rp)
1648 /* this process is no longer runnable */
1650 /* A process must be removed from the scheduling queues, for example, because
1651 * it has blocked. If the currently active process is removed, a new process
1652 * is picked to run by calling pick_proc().
1654 * This function can operate x-cpu as it always removes the process from the
1655 * queue of the cpu the process is currently assigned to.
1657 int q = rp->p_priority; /* queue to use */
1658 struct proc **xpp; /* iterate over queue */
1659 struct proc *prev_xp;
1660 u64_t tsc, tsc_delta;
1662 struct proc **rdy_tail;
1664 assert(proc_ptr_ok(rp));
1665 assert(!proc_is_runnable(rp));
1667 /* Side-effect for kernel: check if the task's stack still is ok? */
1668 assert (!iskernelp(rp) || *priv(rp)->s_stack_guard == STACK_GUARD);
1670 rdy_tail = get_cpu_var(rp->p_cpu, run_q_tail);
1672 /* Now make sure that the process is not in its ready queue. Remove the
1673 * process if it is found. A process can be made unready even if it is not
1674 * running by being sent a signal that kills it.
1676 prev_xp = NULL;
1677 for (xpp = get_cpu_var_ptr(rp->p_cpu, run_q_head[q]); *xpp;
1678 xpp = &(*xpp)->p_nextready) {
1679 if (*xpp == rp) { /* found process to remove */
1680 *xpp = (*xpp)->p_nextready; /* replace with next chain */
1681 if (rp == rdy_tail[q]) { /* queue tail removed */
1682 rdy_tail[q] = prev_xp; /* set new tail */
1685 break;
1687 prev_xp = *xpp; /* save previous in chain */
1691 /* Process accounting for scheduling */
1692 rp->p_accounting.dequeues++;
1694 /* this is not all that accurate on virtual machines, especially with
1695 IO bound processes that only spend a short amount of time in the queue
1696 at a time. */
1697 if (rp->p_accounting.enter_queue) {
1698 read_tsc_64(&tsc);
1699 tsc_delta = tsc - rp->p_accounting.enter_queue;
1700 rp->p_accounting.time_in_queue = rp->p_accounting.time_in_queue +
1701 tsc_delta;
1702 rp->p_accounting.enter_queue = 0;
1706 #if DEBUG_SANITYCHECKS
1707 assert(runqueues_ok_local());
1708 #endif
1711 /*===========================================================================*
1712 * pick_proc *
1713 *===========================================================================*/
1714 static struct proc * pick_proc(void)
1716 /* Decide who to run now. A new process is selected an returned.
1717 * When a billable process is selected, record it in 'bill_ptr', so that the
1718 * clock task can tell who to bill for system time.
1720 * This function always uses the run queues of the local cpu!
1722 register struct proc *rp; /* process to run */
1723 struct proc **rdy_head;
1724 int q; /* iterate over queues */
1726 /* Check each of the scheduling queues for ready processes. The number of
1727 * queues is defined in proc.h, and priorities are set in the task table.
1728 * If there are no processes ready to run, return NULL.
1730 rdy_head = get_cpulocal_var(run_q_head);
1731 for (q=0; q < NR_SCHED_QUEUES; q++) {
1732 if(!(rp = rdy_head[q])) {
1733 TRACE(VF_PICKPROC, printf("cpu %d queue %d empty\n", cpuid, q););
1734 continue;
1736 assert(proc_is_runnable(rp));
1737 if (priv(rp)->s_flags & BILLABLE)
1738 get_cpulocal_var(bill_ptr) = rp; /* bill for system time */
1739 return rp;
1741 return NULL;
1744 /*===========================================================================*
1745 * endpoint_lookup *
1746 *===========================================================================*/
1747 struct proc *endpoint_lookup(endpoint_t e)
1749 int n;
1751 if(!isokendpt(e, &n)) return NULL;
1753 return proc_addr(n);
1756 /*===========================================================================*
1757 * isokendpt_f *
1758 *===========================================================================*/
1759 #if DEBUG_ENABLE_IPC_WARNINGS
1760 int isokendpt_f(file, line, e, p, fatalflag)
1761 const char *file;
1762 int line;
1763 #else
1764 int isokendpt_f(e, p, fatalflag)
1765 #endif
1766 endpoint_t e;
1767 int *p;
1768 const int fatalflag;
1770 int ok = 0;
1771 /* Convert an endpoint number into a process number.
1772 * Return nonzero if the process is alive with the corresponding
1773 * generation number, zero otherwise.
1775 * This function is called with file and line number by the
1776 * isokendpt_d macro if DEBUG_ENABLE_IPC_WARNINGS is defined,
1777 * otherwise without. This allows us to print the where the
1778 * conversion was attempted, making the errors verbose without
1779 * adding code for that at every call.
1781 * If fatalflag is nonzero, we must panic if the conversion doesn't
1782 * succeed.
1784 *p = _ENDPOINT_P(e);
1785 ok = 0;
1786 if(isokprocn(*p) && !isemptyn(*p) && proc_addr(*p)->p_endpoint == e)
1787 ok = 1;
1788 if(!ok && fatalflag)
1789 panic("invalid endpoint: %d", e);
1790 return ok;
1793 static void notify_scheduler(struct proc *p)
1795 message m_no_quantum;
1796 int err;
1798 assert(!proc_kernel_scheduler(p));
1800 /* dequeue the process */
1801 RTS_SET(p, RTS_NO_QUANTUM);
1803 * Notify the process's scheduler that it has run out of
1804 * quantum. This is done by sending a message to the scheduler
1805 * on the process's behalf
1807 m_no_quantum.m_source = p->p_endpoint;
1808 m_no_quantum.m_type = SCHEDULING_NO_QUANTUM;
1809 m_no_quantum.m_krn_lsys_schedule.acnt_queue = cpu_time_2_ms(p->p_accounting.time_in_queue);
1810 m_no_quantum.m_krn_lsys_schedule.acnt_deqs = p->p_accounting.dequeues;
1811 m_no_quantum.m_krn_lsys_schedule.acnt_ipc_sync = p->p_accounting.ipc_sync;
1812 m_no_quantum.m_krn_lsys_schedule.acnt_ipc_async = p->p_accounting.ipc_async;
1813 m_no_quantum.m_krn_lsys_schedule.acnt_preempt = p->p_accounting.preempted;
1814 m_no_quantum.m_krn_lsys_schedule.acnt_cpu = cpuid;
1815 m_no_quantum.m_krn_lsys_schedule.acnt_cpu_load = cpu_load();
1817 /* Reset accounting */
1818 reset_proc_accounting(p);
1820 if ((err = mini_send(p, p->p_scheduler->p_endpoint,
1821 &m_no_quantum, FROM_KERNEL))) {
1822 panic("WARNING: Scheduling: mini_send returned %d\n", err);
1826 void proc_no_time(struct proc * p)
1828 if (!proc_kernel_scheduler(p) && priv(p)->s_flags & PREEMPTIBLE) {
1829 /* this dequeues the process */
1830 notify_scheduler(p);
1832 else {
1834 * non-preemptible processes only need their quantum to
1835 * be renewed. In fact, they by pass scheduling
1837 p->p_cpu_time_left = ms_2_cpu_time(p->p_quantum_size_ms);
1838 #if DEBUG_RACE
1839 RTS_SET(p, RTS_PREEMPTED);
1840 RTS_UNSET(p, RTS_PREEMPTED);
1841 #endif
1845 void reset_proc_accounting(struct proc *p)
1847 p->p_accounting.preempted = 0;
1848 p->p_accounting.ipc_sync = 0;
1849 p->p_accounting.ipc_async = 0;
1850 p->p_accounting.dequeues = 0;
1851 p->p_accounting.time_in_queue = 0;
1852 p->p_accounting.enter_queue = 0;
1855 void copr_not_available_handler(void)
1857 struct proc * p;
1858 struct proc ** local_fpu_owner;
1860 * Disable the FPU exception (both for the kernel and for the process
1861 * once it's scheduled), and initialize or restore the FPU state.
1864 disable_fpu_exception();
1866 p = get_cpulocal_var(proc_ptr);
1868 /* if FPU is not owned by anyone, do not store anything */
1869 local_fpu_owner = get_cpulocal_var_ptr(fpu_owner);
1870 if (*local_fpu_owner != NULL) {
1871 assert(*local_fpu_owner != p);
1872 save_local_fpu(*local_fpu_owner, FALSE /*retain*/);
1876 * restore the current process' state and let it run again, do not
1877 * schedule!
1879 if (restore_fpu(p) != OK) {
1880 /* Restoring FPU state failed. This is always the process's own
1881 * fault. Send a signal, and schedule another process instead.
1883 *local_fpu_owner = NULL; /* release FPU */
1884 cause_sig(proc_nr(p), SIGFPE);
1885 return;
1888 *local_fpu_owner = p;
1889 context_stop(proc_addr(KERNEL));
1890 restore_user_context(p);
1891 NOT_REACHABLE;
1894 void release_fpu(struct proc * p) {
1895 struct proc ** fpu_owner_ptr;
1897 fpu_owner_ptr = get_cpu_var_ptr(p->p_cpu, fpu_owner);
1899 if (*fpu_owner_ptr == p)
1900 *fpu_owner_ptr = NULL;
1903 void ser_dump_proc()
1905 struct proc *pp;
1907 for (pp= BEG_PROC_ADDR; pp < END_PROC_ADDR; pp++)
1909 if (isemptyp(pp))
1910 continue;
1911 print_proc_recursive(pp);
1915 void increase_proc_signals(struct proc *p)
1917 p->p_signal_received++;