Sync with cat.c from netbsd-8
[minix3.git] / minix / kernel / proc.c
blobda74c409ba777f935805aec3a8654e489a7184ac
1 /* This file contains essentially all of the process and message handling.
2 * Together with "mpx.s" it forms the lowest layer of the MINIX kernel.
3 * There is one entry point from the outside:
5 * sys_call: a system call, i.e., the kernel is trapped with an INT
7 * Changes:
8 * Aug 19, 2005 rewrote scheduling code (Jorrit N. Herder)
9 * Jul 25, 2005 rewrote system call handling (Jorrit N. Herder)
10 * May 26, 2005 rewrote message passing functions (Jorrit N. Herder)
11 * May 24, 2005 new notification system call (Jorrit N. Herder)
12 * Oct 28, 2004 nonblocking send and receive calls (Jorrit N. Herder)
14 * The code here is critical to make everything work and is important for the
15 * overall performance of the system. A large fraction of the code deals with
16 * list manipulation. To make this both easy to understand and fast to execute
17 * pointer pointers are used throughout the code. Pointer pointers prevent
18 * exceptions for the head or tail of a linked list.
20 * node_t *queue, *new_node; // assume these as global variables
21 * node_t **xpp = &queue; // get pointer pointer to head of queue
22 * while (*xpp != NULL) // find last pointer of the linked list
23 * xpp = &(*xpp)->next; // get pointer to next pointer
24 * *xpp = new_node; // now replace the end (the NULL pointer)
25 * new_node->next = NULL; // and mark the new end of the list
27 * For example, when adding a new node to the end of the list, one normally
28 * makes an exception for an empty list and looks up the end of the list for
29 * nonempty lists. As shown above, this is not required with pointer pointers.
32 #include <stddef.h>
33 #include <signal.h>
34 #include <assert.h>
35 #include <string.h>
37 #include "vm.h"
38 #include "clock.h"
39 #include "spinlock.h"
40 #include "arch_proto.h"
42 #include <minix/syslib.h>
44 /* Scheduling and message passing functions */
45 static void idle(void);
46 /**
47 * Made public for use in clock.c (for user-space scheduling)
48 static int mini_send(struct proc *caller_ptr, endpoint_t dst_e, message
49 *m_ptr, int flags);
51 static int mini_receive(struct proc *caller_ptr, endpoint_t src,
52 message *m_buff_usr, int flags);
53 static int mini_senda(struct proc *caller_ptr, asynmsg_t *table, size_t
54 size);
55 static int deadlock(int function, register struct proc *caller,
56 endpoint_t src_dst_e);
57 static int try_async(struct proc *caller_ptr);
58 static int try_one(endpoint_t receive_e, struct proc *src_ptr,
59 struct proc *dst_ptr);
60 static struct proc * pick_proc(void);
61 static void enqueue_head(struct proc *rp);
63 /* all idles share the same idle_priv structure */
64 static struct priv idle_priv;
66 static void set_idle_name(char * name, int n)
68 int i, c;
69 int p_z = 0;
71 if (n > 999)
72 n = 999;
74 name[0] = 'i';
75 name[1] = 'd';
76 name[2] = 'l';
77 name[3] = 'e';
79 for (i = 4, c = 100; c > 0; c /= 10) {
80 int digit;
82 digit = n / c;
83 n -= digit * c;
85 if (p_z || digit != 0 || c == 1) {
86 p_z = 1;
87 name[i++] = '0' + digit;
91 name[i] = '\0';
96 #define PICK_ANY 1
97 #define PICK_HIGHERONLY 2
99 #define BuildNotifyMessage(m_ptr, src, dst_ptr) \
100 memset((m_ptr), 0, sizeof(*(m_ptr))); \
101 (m_ptr)->m_type = NOTIFY_MESSAGE; \
102 (m_ptr)->m_notify.timestamp = get_monotonic(); \
103 switch (src) { \
104 case HARDWARE: \
105 (m_ptr)->m_notify.interrupts = \
106 priv(dst_ptr)->s_int_pending; \
107 priv(dst_ptr)->s_int_pending = 0; \
108 break; \
109 case SYSTEM: \
110 memcpy(&(m_ptr)->m_notify.sigset, \
111 &priv(dst_ptr)->s_sig_pending, \
112 sizeof(sigset_t)); \
113 sigemptyset(&priv(dst_ptr)->s_sig_pending); \
114 break; \
117 static message m_notify_buff = { 0, NOTIFY_MESSAGE };
119 void proc_init(void)
121 struct proc * rp;
122 struct priv *sp;
123 int i;
125 /* Clear the process table. Announce each slot as empty and set up
126 * mappings for proc_addr() and proc_nr() macros. Do the same for the
127 * table with privilege structures for the system processes.
129 for (rp = BEG_PROC_ADDR, i = -NR_TASKS; rp < END_PROC_ADDR; ++rp, ++i) {
130 rp->p_rts_flags = RTS_SLOT_FREE;/* initialize free slot */
131 rp->p_magic = PMAGIC;
132 rp->p_nr = i; /* proc number from ptr */
133 rp->p_endpoint = _ENDPOINT(0, rp->p_nr); /* generation no. 0 */
134 rp->p_scheduler = NULL; /* no user space scheduler */
135 rp->p_priority = 0; /* no priority */
136 rp->p_quantum_size_ms = 0; /* no quantum size */
138 /* arch-specific initialization */
139 arch_proc_reset(rp);
141 for (sp = BEG_PRIV_ADDR, i = 0; sp < END_PRIV_ADDR; ++sp, ++i) {
142 sp->s_proc_nr = NONE; /* initialize as free */
143 sp->s_id = (sys_id_t) i; /* priv structure index */
144 ppriv_addr[i] = sp; /* priv ptr from number */
145 sp->s_sig_mgr = NONE; /* clear signal managers */
146 sp->s_bak_sig_mgr = NONE;
149 idle_priv.s_flags = IDL_F;
150 /* initialize IDLE structures for every CPU */
151 for (i = 0; i < CONFIG_MAX_CPUS; i++) {
152 struct proc * ip = get_cpu_var_ptr(i, idle_proc);
153 ip->p_endpoint = IDLE;
154 ip->p_priv = &idle_priv;
155 /* must not let idle ever get scheduled */
156 ip->p_rts_flags |= RTS_PROC_STOP;
157 set_idle_name(ip->p_name, i);
161 static void switch_address_space_idle(void)
163 #ifdef CONFIG_SMP
165 * currently we bet that VM is always alive and its pages available so
166 * when the CPU wakes up the kernel is mapped and no surprises happen.
167 * This is only a problem if more than 1 cpus are available
169 switch_address_space(proc_addr(VM_PROC_NR));
170 #endif
173 /*===========================================================================*
174 * idle *
175 *===========================================================================*/
176 static void idle(void)
178 struct proc * p;
180 /* This function is called whenever there is no work to do.
181 * Halt the CPU, and measure how many timestamp counter ticks are
182 * spent not doing anything. This allows test setups to measure
183 * the CPU utilization of certain workloads with high precision.
186 p = get_cpulocal_var(proc_ptr) = get_cpulocal_var_ptr(idle_proc);
187 if (priv(p)->s_flags & BILLABLE)
188 get_cpulocal_var(bill_ptr) = p;
190 switch_address_space_idle();
192 #ifdef CONFIG_SMP
193 get_cpulocal_var(cpu_is_idle) = 1;
194 /* we don't need to keep time on APs as it is handled on the BSP */
195 if (cpuid != bsp_cpu_id)
196 stop_local_timer();
197 else
198 #endif
201 * If the timer has expired while in kernel we must
202 * rearm it before we go to sleep
204 restart_local_timer();
207 /* start accounting for the idle time */
208 context_stop(proc_addr(KERNEL));
209 #if !SPROFILE
210 halt_cpu();
211 #else
212 if (!sprofiling)
213 halt_cpu();
214 else {
215 volatile int * v;
217 v = get_cpulocal_var_ptr(idle_interrupted);
218 interrupts_enable();
219 while (!*v)
220 arch_pause();
221 interrupts_disable();
222 *v = 0;
224 #endif
226 * end of accounting for the idle task does not happen here, the kernel
227 * is handling stuff for quite a while before it gets back here!
231 /*===========================================================================*
232 * vm_suspend *
233 *===========================================================================*/
234 void vm_suspend(struct proc *caller, const struct proc *target,
235 const vir_bytes linaddr, const vir_bytes len, const int type,
236 const int writeflag)
238 /* This range is not OK for this process. Set parameters
239 * of the request and notify VM about the pending request.
241 assert(!RTS_ISSET(caller, RTS_VMREQUEST));
242 assert(!RTS_ISSET(target, RTS_VMREQUEST));
244 RTS_SET(caller, RTS_VMREQUEST);
246 caller->p_vmrequest.req_type = VMPTYPE_CHECK;
247 caller->p_vmrequest.target = target->p_endpoint;
248 caller->p_vmrequest.params.check.start = linaddr;
249 caller->p_vmrequest.params.check.length = len;
250 caller->p_vmrequest.params.check.writeflag = writeflag;
251 caller->p_vmrequest.type = type;
253 /* Connect caller on vmrequest wait queue. */
254 if(!(caller->p_vmrequest.nextrequestor = vmrequest))
255 if(OK != send_sig(VM_PROC_NR, SIGKMEM))
256 panic("send_sig failed");
257 vmrequest = caller;
260 /*===========================================================================*
261 * delivermsg *
262 *===========================================================================*/
263 static void delivermsg(struct proc *rp)
265 assert(!RTS_ISSET(rp, RTS_VMREQUEST));
266 assert(rp->p_misc_flags & MF_DELIVERMSG);
267 assert(rp->p_delivermsg.m_source != NONE);
269 if (copy_msg_to_user(&rp->p_delivermsg,
270 (message *) rp->p_delivermsg_vir)) {
271 if(rp->p_misc_flags & MF_MSGFAILED) {
272 /* 2nd consecutive failure means this won't succeed */
273 printf("WARNING wrong user pointer 0x%08lx from "
274 "process %s / %d\n",
275 rp->p_delivermsg_vir,
276 rp->p_name,
277 rp->p_endpoint);
278 cause_sig(rp->p_nr, SIGSEGV);
279 } else {
280 /* 1st failure means we have to ask VM to handle it */
281 vm_suspend(rp, rp, rp->p_delivermsg_vir,
282 sizeof(message), VMSTYPE_DELIVERMSG, 1);
283 rp->p_misc_flags |= MF_MSGFAILED;
285 } else {
286 /* Indicate message has been delivered; address is 'used'. */
287 rp->p_delivermsg.m_source = NONE;
288 rp->p_misc_flags &= ~(MF_DELIVERMSG|MF_MSGFAILED);
290 if(!(rp->p_misc_flags & MF_CONTEXT_SET)) {
291 rp->p_reg.retreg = OK;
296 /*===========================================================================*
297 * switch_to_user *
298 *===========================================================================*/
299 void switch_to_user(void)
301 /* This function is called an instant before proc_ptr is
302 * to be scheduled again.
304 struct proc * p;
305 #ifdef CONFIG_SMP
306 int tlb_must_refresh = 0;
307 #endif
309 p = get_cpulocal_var(proc_ptr);
311 * if the current process is still runnable check the misc flags and let
312 * it run unless it becomes not runnable in the meantime
314 if (proc_is_runnable(p))
315 goto check_misc_flags;
317 * if a process becomes not runnable while handling the misc flags, we
318 * need to pick a new one here and start from scratch. Also if the
319 * current process wasn't runnable, we pick a new one here
321 not_runnable_pick_new:
322 if (proc_is_preempted(p)) {
323 p->p_rts_flags &= ~RTS_PREEMPTED;
324 if (proc_is_runnable(p)) {
325 if (p->p_cpu_time_left)
326 enqueue_head(p);
327 else
328 enqueue(p);
333 * if we have no process to run, set IDLE as the current process for
334 * time accounting and put the cpu in an idle state. After the next
335 * timer interrupt the execution resumes here and we can pick another
336 * process. If there is still nothing runnable we "schedule" IDLE again
338 while (!(p = pick_proc())) {
339 idle();
342 /* update the global variable */
343 get_cpulocal_var(proc_ptr) = p;
345 #ifdef CONFIG_SMP
346 if (p->p_misc_flags & MF_FLUSH_TLB && get_cpulocal_var(ptproc) == p)
347 tlb_must_refresh = 1;
348 #endif
349 switch_address_space(p);
351 check_misc_flags:
353 assert(p);
354 assert(proc_is_runnable(p));
355 while (p->p_misc_flags &
356 (MF_KCALL_RESUME | MF_DELIVERMSG |
357 MF_SC_DEFER | MF_SC_TRACE | MF_SC_ACTIVE)) {
359 assert(proc_is_runnable(p));
360 if (p->p_misc_flags & MF_KCALL_RESUME) {
361 kernel_call_resume(p);
363 else if (p->p_misc_flags & MF_DELIVERMSG) {
364 TRACE(VF_SCHEDULING, printf("delivering to %s / %d\n",
365 p->p_name, p->p_endpoint););
366 delivermsg(p);
368 else if (p->p_misc_flags & MF_SC_DEFER) {
369 /* Perform the system call that we deferred earlier. */
371 assert (!(p->p_misc_flags & MF_SC_ACTIVE));
373 arch_do_syscall(p);
375 /* If the process is stopped for signal delivery, and
376 * not blocked sending a message after the system call,
377 * inform PM.
379 if ((p->p_misc_flags & MF_SIG_DELAY) &&
380 !RTS_ISSET(p, RTS_SENDING))
381 sig_delay_done(p);
383 else if (p->p_misc_flags & MF_SC_TRACE) {
384 /* Trigger a system call leave event if this was a
385 * system call. We must do this after processing the
386 * other flags above, both for tracing correctness and
387 * to be able to use 'break'.
389 if (!(p->p_misc_flags & MF_SC_ACTIVE))
390 break;
392 p->p_misc_flags &=
393 ~(MF_SC_TRACE | MF_SC_ACTIVE);
395 /* Signal the "leave system call" event.
396 * Block the process.
398 cause_sig(proc_nr(p), SIGTRAP);
400 else if (p->p_misc_flags & MF_SC_ACTIVE) {
401 /* If MF_SC_ACTIVE was set, remove it now:
402 * we're leaving the system call.
404 p->p_misc_flags &= ~MF_SC_ACTIVE;
406 break;
410 * the selected process might not be runnable anymore. We have
411 * to checkit and schedule another one
413 if (!proc_is_runnable(p))
414 goto not_runnable_pick_new;
417 * check the quantum left before it runs again. We must do it only here
418 * as we are sure that a possible out-of-quantum message to the
419 * scheduler will not collide with the regular ipc
421 if (!p->p_cpu_time_left)
422 proc_no_time(p);
424 * After handling the misc flags the selected process might not be
425 * runnable anymore. We have to checkit and schedule another one
427 if (!proc_is_runnable(p))
428 goto not_runnable_pick_new;
430 TRACE(VF_SCHEDULING, printf("cpu %d starting %s / %d "
431 "pc 0x%08x\n",
432 cpuid, p->p_name, p->p_endpoint, p->p_reg.pc););
433 #if DEBUG_TRACE
434 p->p_schedules++;
435 #endif
437 p = arch_finish_switch_to_user();
438 assert(p->p_cpu_time_left);
440 context_stop(proc_addr(KERNEL));
442 /* If the process isn't the owner of FPU, enable the FPU exception */
443 if (get_cpulocal_var(fpu_owner) != p)
444 enable_fpu_exception();
445 else
446 disable_fpu_exception();
448 /* If MF_CONTEXT_SET is set, don't clobber process state within
449 * the kernel. The next kernel entry is OK again though.
451 p->p_misc_flags &= ~MF_CONTEXT_SET;
453 #if defined(__i386__)
454 assert(p->p_seg.p_cr3 != 0);
455 #elif defined(__arm__)
456 assert(p->p_seg.p_ttbr != 0);
457 #endif
458 #ifdef CONFIG_SMP
459 if (p->p_misc_flags & MF_FLUSH_TLB) {
460 if (tlb_must_refresh)
461 refresh_tlb();
462 p->p_misc_flags &= ~MF_FLUSH_TLB;
464 #endif
466 restart_local_timer();
469 * restore_user_context() carries out the actual mode switch from kernel
470 * to userspace. This function does not return
472 restore_user_context(p);
473 NOT_REACHABLE;
477 * handler for all synchronous IPC calls
479 static int do_sync_ipc(struct proc * caller_ptr, /* who made the call */
480 int call_nr, /* system call number and flags */
481 endpoint_t src_dst_e, /* src or dst of the call */
482 message *m_ptr) /* users pointer to a message */
484 int result; /* the system call's result */
485 int src_dst_p; /* Process slot number */
486 char *callname;
488 /* Check destination. RECEIVE is the only call that accepts ANY (in addition
489 * to a real endpoint). The other calls (SEND, SENDREC, and NOTIFY) require an
490 * endpoint to corresponds to a process. In addition, it is necessary to check
491 * whether a process is allowed to send to a given destination.
493 assert(call_nr != SENDA);
495 /* Only allow non-negative call_nr values less than 32 */
496 if (call_nr < 0 || call_nr > IPCNO_HIGHEST || call_nr >= 32
497 || !(callname = ipc_call_names[call_nr])) {
498 #if DEBUG_ENABLE_IPC_WARNINGS
499 printf("sys_call: trap %d not allowed, caller %d, src_dst %d\n",
500 call_nr, proc_nr(caller_ptr), src_dst_e);
501 #endif
502 return(ETRAPDENIED); /* trap denied by mask or kernel */
505 if (src_dst_e == ANY)
507 if (call_nr != RECEIVE)
509 #if 0
510 printf("sys_call: %s by %d with bad endpoint %d\n",
511 callname,
512 proc_nr(caller_ptr), src_dst_e);
513 #endif
514 return EINVAL;
516 src_dst_p = (int) src_dst_e;
518 else
520 /* Require a valid source and/or destination process. */
521 if(!isokendpt(src_dst_e, &src_dst_p)) {
522 #if 0
523 printf("sys_call: %s by %d with bad endpoint %d\n",
524 callname,
525 proc_nr(caller_ptr), src_dst_e);
526 #endif
527 return EDEADSRCDST;
530 /* If the call is to send to a process, i.e., for SEND, SENDNB,
531 * SENDREC or NOTIFY, verify that the caller is allowed to send to
532 * the given destination.
534 if (call_nr != RECEIVE)
536 if (!may_send_to(caller_ptr, src_dst_p)) {
537 #if DEBUG_ENABLE_IPC_WARNINGS
538 printf(
539 "sys_call: ipc mask denied %s from %d to %d\n",
540 callname,
541 caller_ptr->p_endpoint, src_dst_e);
542 #endif
543 return(ECALLDENIED); /* call denied by ipc mask */
548 /* Check if the process has privileges for the requested call. Calls to the
549 * kernel may only be SENDREC, because tasks always reply and may not block
550 * if the caller doesn't do receive().
552 if (!(priv(caller_ptr)->s_trap_mask & (1 << call_nr))) {
553 #if DEBUG_ENABLE_IPC_WARNINGS
554 printf("sys_call: %s not allowed, caller %d, src_dst %d\n",
555 callname, proc_nr(caller_ptr), src_dst_p);
556 #endif
557 return(ETRAPDENIED); /* trap denied by mask or kernel */
560 if (call_nr != SENDREC && call_nr != RECEIVE && iskerneln(src_dst_p)) {
561 #if DEBUG_ENABLE_IPC_WARNINGS
562 printf("sys_call: trap %s not allowed, caller %d, src_dst %d\n",
563 callname, proc_nr(caller_ptr), src_dst_e);
564 #endif
565 return(ETRAPDENIED); /* trap denied by mask or kernel */
568 switch(call_nr) {
569 case SENDREC:
570 /* A flag is set so that notifications cannot interrupt SENDREC. */
571 caller_ptr->p_misc_flags |= MF_REPLY_PEND;
572 /* fall through */
573 case SEND:
574 result = mini_send(caller_ptr, src_dst_e, m_ptr, 0);
575 if (call_nr == SEND || result != OK)
576 break; /* done, or SEND failed */
577 /* fall through for SENDREC */
578 case RECEIVE:
579 if (call_nr == RECEIVE) {
580 caller_ptr->p_misc_flags &= ~MF_REPLY_PEND;
581 IPC_STATUS_CLEAR(caller_ptr); /* clear IPC status code */
583 result = mini_receive(caller_ptr, src_dst_e, m_ptr, 0);
584 break;
585 case NOTIFY:
586 result = mini_notify(caller_ptr, src_dst_e);
587 break;
588 case SENDNB:
589 result = mini_send(caller_ptr, src_dst_e, m_ptr, NON_BLOCKING);
590 break;
591 default:
592 result = EBADCALL; /* illegal system call */
595 /* Now, return the result of the system call to the caller. */
596 return(result);
599 int do_ipc(reg_t r1, reg_t r2, reg_t r3)
601 struct proc *const caller_ptr = get_cpulocal_var(proc_ptr); /* get pointer to caller */
602 int call_nr = (int) r1;
604 assert(!RTS_ISSET(caller_ptr, RTS_SLOT_FREE));
606 /* bill kernel time to this process. */
607 kbill_ipc = caller_ptr;
609 /* If this process is subject to system call tracing, handle that first. */
610 if (caller_ptr->p_misc_flags & (MF_SC_TRACE | MF_SC_DEFER)) {
611 /* Are we tracing this process, and is it the first sys_call entry? */
612 if ((caller_ptr->p_misc_flags & (MF_SC_TRACE | MF_SC_DEFER)) ==
613 MF_SC_TRACE) {
614 /* We must notify the tracer before processing the actual
615 * system call. If we don't, the tracer could not obtain the
616 * input message. Postpone the entire system call.
618 caller_ptr->p_misc_flags &= ~MF_SC_TRACE;
619 assert(!(caller_ptr->p_misc_flags & MF_SC_DEFER));
620 caller_ptr->p_misc_flags |= MF_SC_DEFER;
621 caller_ptr->p_defer.r1 = r1;
622 caller_ptr->p_defer.r2 = r2;
623 caller_ptr->p_defer.r3 = r3;
625 /* Signal the "enter system call" event. Block the process. */
626 cause_sig(proc_nr(caller_ptr), SIGTRAP);
628 /* Preserve the return register's value. */
629 return caller_ptr->p_reg.retreg;
632 /* If the MF_SC_DEFER flag is set, the syscall is now being resumed. */
633 caller_ptr->p_misc_flags &= ~MF_SC_DEFER;
635 assert (!(caller_ptr->p_misc_flags & MF_SC_ACTIVE));
637 /* Set a flag to allow reliable tracing of leaving the system call. */
638 caller_ptr->p_misc_flags |= MF_SC_ACTIVE;
641 if(caller_ptr->p_misc_flags & MF_DELIVERMSG) {
642 panic("sys_call: MF_DELIVERMSG on for %s / %d\n",
643 caller_ptr->p_name, caller_ptr->p_endpoint);
646 /* Now check if the call is known and try to perform the request. The only
647 * system calls that exist in MINIX are sending and receiving messages.
648 * - SENDREC: combines SEND and RECEIVE in a single system call
649 * - SEND: sender blocks until its message has been delivered
650 * - RECEIVE: receiver blocks until an acceptable message has arrived
651 * - NOTIFY: asynchronous call; deliver notification or mark pending
652 * - SENDA: list of asynchronous send requests
654 switch(call_nr) {
655 case SENDREC:
656 case SEND:
657 case RECEIVE:
658 case NOTIFY:
659 case SENDNB:
661 /* Process accounting for scheduling */
662 caller_ptr->p_accounting.ipc_sync++;
664 return do_sync_ipc(caller_ptr, call_nr, (endpoint_t) r2,
665 (message *) r3);
667 case SENDA:
670 * Get and check the size of the argument in bytes as it is a
671 * table
673 size_t msg_size = (size_t) r2;
675 /* Process accounting for scheduling */
676 caller_ptr->p_accounting.ipc_async++;
678 /* Limit size to something reasonable. An arbitrary choice is 16
679 * times the number of process table entries.
681 if (msg_size > 16*(NR_TASKS + NR_PROCS))
682 return EDOM;
683 return mini_senda(caller_ptr, (asynmsg_t *) r3, msg_size);
685 case MINIX_KERNINFO:
687 /* It might not be initialized yet. */
688 if(!minix_kerninfo_user) {
689 return EBADCALL;
692 arch_set_secondary_ipc_return(caller_ptr, minix_kerninfo_user);
693 return OK;
695 default:
696 return EBADCALL; /* illegal system call */
700 /*===========================================================================*
701 * deadlock *
702 *===========================================================================*/
703 static int deadlock(
704 int function, /* trap number */
705 register struct proc *cp, /* pointer to caller */
706 endpoint_t src_dst_e /* src or dst process */
709 /* Check for deadlock. This can happen if 'caller_ptr' and 'src_dst' have
710 * a cyclic dependency of blocking send and receive calls. The only cyclic
711 * dependency that is not fatal is if the caller and target directly SEND(REC)
712 * and RECEIVE to each other. If a deadlock is found, the group size is
713 * returned. Otherwise zero is returned.
715 register struct proc *xp; /* process pointer */
716 int group_size = 1; /* start with only caller */
717 #if DEBUG_ENABLE_IPC_WARNINGS
718 static struct proc *processes[NR_PROCS + NR_TASKS];
719 processes[0] = cp;
720 #endif
722 while (src_dst_e != ANY) { /* check while process nr */
723 int src_dst_slot;
724 okendpt(src_dst_e, &src_dst_slot);
725 xp = proc_addr(src_dst_slot); /* follow chain of processes */
726 assert(proc_ptr_ok(xp));
727 assert(!RTS_ISSET(xp, RTS_SLOT_FREE));
728 #if DEBUG_ENABLE_IPC_WARNINGS
729 processes[group_size] = xp;
730 #endif
731 group_size ++; /* extra process in group */
733 /* Check whether the last process in the chain has a dependency. If it
734 * has not, the cycle cannot be closed and we are done.
736 if((src_dst_e = P_BLOCKEDON(xp)) == NONE)
737 return 0;
739 /* Now check if there is a cyclic dependency. For group sizes of two,
740 * a combination of SEND(REC) and RECEIVE is not fatal. Larger groups
741 * or other combinations indicate a deadlock.
743 if (src_dst_e == cp->p_endpoint) { /* possible deadlock */
744 if (group_size == 2) { /* caller and src_dst */
745 /* The function number is magically converted to flags. */
746 if ((xp->p_rts_flags ^ (function << 2)) & RTS_SENDING) {
747 return(0); /* not a deadlock */
750 #if DEBUG_ENABLE_IPC_WARNINGS
752 int i;
753 printf("deadlock between these processes:\n");
754 for(i = 0; i < group_size; i++) {
755 printf(" %10s ", processes[i]->p_name);
757 printf("\n\n");
758 for(i = 0; i < group_size; i++) {
759 print_proc(processes[i]);
760 proc_stacktrace(processes[i]);
763 #endif
764 return(group_size); /* deadlock found */
767 return(0); /* not a deadlock */
770 /*===========================================================================*
771 * has_pending *
772 *===========================================================================*/
773 static int has_pending(sys_map_t *map, int src_p, int asynm)
775 /* Check to see if there is a pending message from the desired source
776 * available.
779 int src_id;
780 sys_id_t id = NULL_PRIV_ID;
781 #ifdef CONFIG_SMP
782 struct proc * p;
783 #endif
785 /* Either check a specific bit in the mask map, or find the first bit set in
786 * it (if any), depending on whether the receive was called on a specific
787 * source endpoint.
789 if (src_p != ANY) {
790 src_id = nr_to_id(src_p);
791 if (get_sys_bit(*map, src_id)) {
792 #ifdef CONFIG_SMP
793 p = proc_addr(id_to_nr(src_id));
794 if (asynm && RTS_ISSET(p, RTS_VMINHIBIT))
795 p->p_misc_flags |= MF_SENDA_VM_MISS;
796 else
797 #endif
798 id = src_id;
800 } else {
801 /* Find a source with a pending message */
802 for (src_id = 0; src_id < NR_SYS_PROCS; src_id += BITCHUNK_BITS) {
803 if (get_sys_bits(*map, src_id) != 0) {
804 #ifdef CONFIG_SMP
805 while (src_id < NR_SYS_PROCS) {
806 while (!get_sys_bit(*map, src_id)) {
807 if (src_id == NR_SYS_PROCS)
808 goto quit_search;
809 src_id++;
811 p = proc_addr(id_to_nr(src_id));
813 * We must not let kernel fiddle with pages of a
814 * process which are currently being changed by
815 * VM. It is dangerous! So do not report such a
816 * process as having pending async messages.
817 * Skip it.
819 if (asynm && RTS_ISSET(p, RTS_VMINHIBIT)) {
820 p->p_misc_flags |= MF_SENDA_VM_MISS;
821 src_id++;
822 } else
823 goto quit_search;
825 #else
826 while (!get_sys_bit(*map, src_id)) src_id++;
827 goto quit_search;
828 #endif
832 quit_search:
833 if (src_id < NR_SYS_PROCS) /* Found one */
834 id = src_id;
837 return(id);
840 /*===========================================================================*
841 * has_pending_notify *
842 *===========================================================================*/
843 int has_pending_notify(struct proc * caller, int src_p)
845 sys_map_t * map = &priv(caller)->s_notify_pending;
846 return has_pending(map, src_p, 0);
849 /*===========================================================================*
850 * has_pending_asend *
851 *===========================================================================*/
852 int has_pending_asend(struct proc * caller, int src_p)
854 sys_map_t * map = &priv(caller)->s_asyn_pending;
855 return has_pending(map, src_p, 1);
858 /*===========================================================================*
859 * unset_notify_pending *
860 *===========================================================================*/
861 void unset_notify_pending(struct proc * caller, int src_p)
863 sys_map_t * map = &priv(caller)->s_notify_pending;
864 unset_sys_bit(*map, src_p);
867 /*===========================================================================*
868 * mini_send *
869 *===========================================================================*/
870 int mini_send(
871 register struct proc *caller_ptr, /* who is trying to send a message? */
872 endpoint_t dst_e, /* to whom is message being sent? */
873 message *m_ptr, /* pointer to message buffer */
874 const int flags
877 /* Send a message from 'caller_ptr' to 'dst'. If 'dst' is blocked waiting
878 * for this message, copy the message to it and unblock 'dst'. If 'dst' is
879 * not waiting at all, or is waiting for another source, queue 'caller_ptr'.
881 register struct proc *dst_ptr;
882 register struct proc **xpp;
883 int dst_p;
884 dst_p = _ENDPOINT_P(dst_e);
885 dst_ptr = proc_addr(dst_p);
887 if (RTS_ISSET(dst_ptr, RTS_NO_ENDPOINT))
889 return EDEADSRCDST;
892 /* Check if 'dst' is blocked waiting for this message. The destination's
893 * RTS_SENDING flag may be set when its SENDREC call blocked while sending.
895 if (WILLRECEIVE(caller_ptr->p_endpoint, dst_ptr, (vir_bytes)m_ptr, NULL)) {
896 int call;
897 /* Destination is indeed waiting for this message. */
898 assert(!(dst_ptr->p_misc_flags & MF_DELIVERMSG));
900 if (!(flags & FROM_KERNEL)) {
901 if(copy_msg_from_user(m_ptr, &dst_ptr->p_delivermsg))
902 return EFAULT;
903 } else {
904 dst_ptr->p_delivermsg = *m_ptr;
905 IPC_STATUS_ADD_FLAGS(dst_ptr, IPC_FLG_MSG_FROM_KERNEL);
908 dst_ptr->p_delivermsg.m_source = caller_ptr->p_endpoint;
909 dst_ptr->p_misc_flags |= MF_DELIVERMSG;
911 call = (caller_ptr->p_misc_flags & MF_REPLY_PEND ? SENDREC
912 : (flags & NON_BLOCKING ? SENDNB : SEND));
913 IPC_STATUS_ADD_CALL(dst_ptr, call);
915 if (dst_ptr->p_misc_flags & MF_REPLY_PEND)
916 dst_ptr->p_misc_flags &= ~MF_REPLY_PEND;
918 RTS_UNSET(dst_ptr, RTS_RECEIVING);
920 #if DEBUG_IPC_HOOK
921 hook_ipc_msgsend(&dst_ptr->p_delivermsg, caller_ptr, dst_ptr);
922 hook_ipc_msgrecv(&dst_ptr->p_delivermsg, caller_ptr, dst_ptr);
923 #endif
924 } else {
925 if(flags & NON_BLOCKING) {
926 return(ENOTREADY);
929 /* Check for a possible deadlock before actually blocking. */
930 if (deadlock(SEND, caller_ptr, dst_e)) {
931 return(ELOCKED);
934 /* Destination is not waiting. Block and dequeue caller. */
935 if (!(flags & FROM_KERNEL)) {
936 if(copy_msg_from_user(m_ptr, &caller_ptr->p_sendmsg))
937 return EFAULT;
938 } else {
939 caller_ptr->p_sendmsg = *m_ptr;
941 * we need to remember that this message is from kernel so we
942 * can set the delivery status flags when the message is
943 * actually delivered
945 caller_ptr->p_misc_flags |= MF_SENDING_FROM_KERNEL;
948 RTS_SET(caller_ptr, RTS_SENDING);
949 caller_ptr->p_sendto_e = dst_e;
951 /* Process is now blocked. Put in on the destination's queue. */
952 assert(caller_ptr->p_q_link == NULL);
953 xpp = &dst_ptr->p_caller_q; /* find end of list */
954 while (*xpp) xpp = &(*xpp)->p_q_link;
955 *xpp = caller_ptr; /* add caller to end */
957 #if DEBUG_IPC_HOOK
958 hook_ipc_msgsend(&caller_ptr->p_sendmsg, caller_ptr, dst_ptr);
959 #endif
961 return(OK);
964 /*===========================================================================*
965 * mini_receive *
966 *===========================================================================*/
967 static int mini_receive(struct proc * caller_ptr,
968 endpoint_t src_e, /* which message source is wanted */
969 message * m_buff_usr, /* pointer to message buffer */
970 const int flags)
972 /* A process or task wants to get a message. If a message is already queued,
973 * acquire it and deblock the sender. If no message from the desired source
974 * is available block the caller.
976 register struct proc **xpp;
977 int r, src_id, found, src_proc_nr, src_p;
978 endpoint_t sender_e;
980 assert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG));
982 /* This is where we want our message. */
983 caller_ptr->p_delivermsg_vir = (vir_bytes) m_buff_usr;
985 if(src_e == ANY) src_p = ANY;
986 else
988 okendpt(src_e, &src_p);
989 if (RTS_ISSET(proc_addr(src_p), RTS_NO_ENDPOINT))
991 return EDEADSRCDST;
996 /* Check to see if a message from desired source is already available. The
997 * caller's RTS_SENDING flag may be set if SENDREC couldn't send. If it is
998 * set, the process should be blocked.
1000 if (!RTS_ISSET(caller_ptr, RTS_SENDING)) {
1002 /* Check if there are pending notifications, except for SENDREC. */
1003 if (! (caller_ptr->p_misc_flags & MF_REPLY_PEND)) {
1005 /* Check for pending notifications */
1006 src_id = has_pending_notify(caller_ptr, src_p);
1007 found = src_id != NULL_PRIV_ID;
1008 if(found) {
1009 src_proc_nr = id_to_nr(src_id); /* get source proc */
1010 sender_e = proc_addr(src_proc_nr)->p_endpoint;
1013 if (found && CANRECEIVE(src_e, sender_e, caller_ptr, 0,
1014 &m_notify_buff)) {
1016 #if DEBUG_ENABLE_IPC_WARNINGS
1017 if(src_proc_nr == NONE) {
1018 printf("mini_receive: sending notify from NONE\n");
1020 #endif
1021 assert(src_proc_nr != NONE);
1022 unset_notify_pending(caller_ptr, src_id); /* no longer pending */
1024 /* Found a suitable source, deliver the notification message. */
1025 assert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG));
1026 assert(src_e == ANY || sender_e == src_e);
1028 /* assemble message */
1029 BuildNotifyMessage(&caller_ptr->p_delivermsg, src_proc_nr, caller_ptr);
1030 caller_ptr->p_delivermsg.m_source = sender_e;
1031 caller_ptr->p_misc_flags |= MF_DELIVERMSG;
1033 IPC_STATUS_ADD_CALL(caller_ptr, NOTIFY);
1035 goto receive_done;
1039 /* Check for pending asynchronous messages */
1040 if (has_pending_asend(caller_ptr, src_p) != NULL_PRIV_ID) {
1041 if (src_p != ANY)
1042 r = try_one(src_e, proc_addr(src_p), caller_ptr);
1043 else
1044 r = try_async(caller_ptr);
1046 if (r == OK) {
1047 IPC_STATUS_ADD_CALL(caller_ptr, SENDA);
1048 goto receive_done;
1052 /* Check caller queue. Use pointer pointers to keep code simple. */
1053 xpp = &caller_ptr->p_caller_q;
1054 while (*xpp) {
1055 struct proc * sender = *xpp;
1056 endpoint_t sender_e = sender->p_endpoint;
1058 if (CANRECEIVE(src_e, sender_e, caller_ptr, 0, &sender->p_sendmsg)) {
1059 int call;
1060 assert(!RTS_ISSET(sender, RTS_SLOT_FREE));
1061 assert(!RTS_ISSET(sender, RTS_NO_ENDPOINT));
1063 /* Found acceptable message. Copy it and update status. */
1064 assert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG));
1065 caller_ptr->p_delivermsg = sender->p_sendmsg;
1066 caller_ptr->p_delivermsg.m_source = sender->p_endpoint;
1067 caller_ptr->p_misc_flags |= MF_DELIVERMSG;
1068 RTS_UNSET(sender, RTS_SENDING);
1070 call = (sender->p_misc_flags & MF_REPLY_PEND ? SENDREC : SEND);
1071 IPC_STATUS_ADD_CALL(caller_ptr, call);
1074 * if the message is originally from the kernel on behalf of this
1075 * process, we must send the status flags accordingly
1077 if (sender->p_misc_flags & MF_SENDING_FROM_KERNEL) {
1078 IPC_STATUS_ADD_FLAGS(caller_ptr, IPC_FLG_MSG_FROM_KERNEL);
1079 /* we can clean the flag now, not need anymore */
1080 sender->p_misc_flags &= ~MF_SENDING_FROM_KERNEL;
1082 if (sender->p_misc_flags & MF_SIG_DELAY)
1083 sig_delay_done(sender);
1085 #if DEBUG_IPC_HOOK
1086 hook_ipc_msgrecv(&caller_ptr->p_delivermsg, *xpp, caller_ptr);
1087 #endif
1089 *xpp = sender->p_q_link; /* remove from queue */
1090 sender->p_q_link = NULL;
1091 goto receive_done;
1093 xpp = &sender->p_q_link; /* proceed to next */
1097 /* No suitable message is available or the caller couldn't send in SENDREC.
1098 * Block the process trying to receive, unless the flags tell otherwise.
1100 if ( ! (flags & NON_BLOCKING)) {
1101 /* Check for a possible deadlock before actually blocking. */
1102 if (deadlock(RECEIVE, caller_ptr, src_e)) {
1103 return(ELOCKED);
1106 caller_ptr->p_getfrom_e = src_e;
1107 RTS_SET(caller_ptr, RTS_RECEIVING);
1108 return(OK);
1109 } else {
1110 return(ENOTREADY);
1113 receive_done:
1114 if (caller_ptr->p_misc_flags & MF_REPLY_PEND)
1115 caller_ptr->p_misc_flags &= ~MF_REPLY_PEND;
1116 return OK;
1119 /*===========================================================================*
1120 * mini_notify *
1121 *===========================================================================*/
1122 int mini_notify(
1123 const struct proc *caller_ptr, /* sender of the notification */
1124 endpoint_t dst_e /* which process to notify */
1127 register struct proc *dst_ptr;
1128 int src_id; /* source id for late delivery */
1129 int dst_p;
1131 if (!isokendpt(dst_e, &dst_p)) {
1132 util_stacktrace();
1133 printf("mini_notify: bogus endpoint %d\n", dst_e);
1134 return EDEADSRCDST;
1137 dst_ptr = proc_addr(dst_p);
1139 /* Check to see if target is blocked waiting for this message. A process
1140 * can be both sending and receiving during a SENDREC system call.
1142 if (WILLRECEIVE(caller_ptr->p_endpoint, dst_ptr, 0, &m_notify_buff) &&
1143 !(dst_ptr->p_misc_flags & MF_REPLY_PEND)) {
1144 /* Destination is indeed waiting for a message. Assemble a notification
1145 * message and deliver it. Copy from pseudo-source HARDWARE, since the
1146 * message is in the kernel's address space.
1148 assert(!(dst_ptr->p_misc_flags & MF_DELIVERMSG));
1150 BuildNotifyMessage(&dst_ptr->p_delivermsg, proc_nr(caller_ptr), dst_ptr);
1151 dst_ptr->p_delivermsg.m_source = caller_ptr->p_endpoint;
1152 dst_ptr->p_misc_flags |= MF_DELIVERMSG;
1154 IPC_STATUS_ADD_CALL(dst_ptr, NOTIFY);
1155 RTS_UNSET(dst_ptr, RTS_RECEIVING);
1157 return(OK);
1160 /* Destination is not ready to receive the notification. Add it to the
1161 * bit map with pending notifications. Note the indirectness: the privilege id
1162 * instead of the process number is used in the pending bit map.
1164 src_id = priv(caller_ptr)->s_id;
1165 set_sys_bit(priv(dst_ptr)->s_notify_pending, src_id);
1166 return(OK);
1169 #define ASCOMPLAIN(caller, entry, field) \
1170 printf("kernel:%s:%d: asyn failed for %s in %s " \
1171 "(%d/%zu, tab 0x%lx)\n",__FILE__,__LINE__, \
1172 field, caller->p_name, entry, priv(caller)->s_asynsize, priv(caller)->s_asyntab)
1174 #define A_RETR(entry) do { \
1175 if (data_copy( \
1176 caller_ptr->p_endpoint, table_v + (entry)*sizeof(asynmsg_t),\
1177 KERNEL, (vir_bytes) &tabent, \
1178 sizeof(tabent)) != OK) { \
1179 ASCOMPLAIN(caller_ptr, entry, "message entry"); \
1180 r = EFAULT; \
1181 goto asyn_error; \
1183 else if(tabent.dst == SELF) { \
1184 tabent.dst = caller_ptr->p_endpoint; \
1186 } while(0)
1188 #define A_INSRT(entry) do { \
1189 if (data_copy(KERNEL, (vir_bytes) &tabent, \
1190 caller_ptr->p_endpoint, table_v + (entry)*sizeof(asynmsg_t),\
1191 sizeof(tabent)) != OK) { \
1192 ASCOMPLAIN(caller_ptr, entry, "message entry"); \
1193 /* Do NOT set r or goto asyn_error here! */ \
1195 } while(0)
1197 /*===========================================================================*
1198 * try_deliver_senda *
1199 *===========================================================================*/
1200 int try_deliver_senda(struct proc *caller_ptr,
1201 asynmsg_t *table,
1202 size_t size)
1204 int r, dst_p, done, do_notify;
1205 unsigned int i;
1206 unsigned flags;
1207 endpoint_t dst;
1208 struct proc *dst_ptr;
1209 struct priv *privp;
1210 asynmsg_t tabent;
1211 const vir_bytes table_v = (vir_bytes) table;
1212 message *m_ptr = NULL;
1214 privp = priv(caller_ptr);
1216 /* Clear table */
1217 privp->s_asyntab = -1;
1218 privp->s_asynsize = 0;
1219 privp->s_asynendpoint = caller_ptr->p_endpoint;
1221 if (size == 0) return(OK); /* Nothing to do, just return */
1223 /* Scan the table */
1224 do_notify = FALSE;
1225 done = TRUE;
1227 /* Limit size to something reasonable. An arbitrary choice is 16
1228 * times the number of process table entries.
1230 * (this check has been duplicated in sys_call but is left here
1231 * as a sanity check)
1233 if (size > 16*(NR_TASKS + NR_PROCS)) {
1234 r = EDOM;
1235 return r;
1238 for (i = 0; i < size; i++) {
1239 /* Process each entry in the table and store the result in the table.
1240 * If we're done handling a message, copy the result to the sender. */
1242 dst = NONE;
1243 /* Copy message to kernel */
1244 A_RETR(i);
1245 flags = tabent.flags;
1246 dst = tabent.dst;
1248 if (flags == 0) continue; /* Skip empty entries */
1250 /* 'flags' field must contain only valid bits */
1251 if(flags & ~(AMF_VALID|AMF_DONE|AMF_NOTIFY|AMF_NOREPLY|AMF_NOTIFY_ERR)) {
1252 r = EINVAL;
1253 goto asyn_error;
1255 if (!(flags & AMF_VALID)) { /* Must contain message */
1256 r = EINVAL;
1257 goto asyn_error;
1259 if (flags & AMF_DONE) continue; /* Already done processing */
1261 r = OK;
1262 if (!isokendpt(tabent.dst, &dst_p))
1263 r = EDEADSRCDST; /* Bad destination, report the error */
1264 else if (iskerneln(dst_p))
1265 r = ECALLDENIED; /* Asyn sends to the kernel are not allowed */
1266 else if (!may_asynsend_to(caller_ptr, dst_p))
1267 r = ECALLDENIED; /* Send denied by IPC mask */
1268 else /* r == OK */
1269 dst_ptr = proc_addr(dst_p);
1271 /* XXX: RTS_NO_ENDPOINT should be removed */
1272 if (r == OK && RTS_ISSET(dst_ptr, RTS_NO_ENDPOINT)) {
1273 r = EDEADSRCDST;
1276 /* Check if 'dst' is blocked waiting for this message.
1277 * If AMF_NOREPLY is set, do not satisfy the receiving part of
1278 * a SENDREC.
1280 if (r == OK && WILLRECEIVE(caller_ptr->p_endpoint, dst_ptr,
1281 (vir_bytes)&table[i].msg, NULL) &&
1282 (!(flags&AMF_NOREPLY) || !(dst_ptr->p_misc_flags&MF_REPLY_PEND))) {
1283 /* Destination is indeed waiting for this message. */
1284 dst_ptr->p_delivermsg = tabent.msg;
1285 dst_ptr->p_delivermsg.m_source = caller_ptr->p_endpoint;
1286 dst_ptr->p_misc_flags |= MF_DELIVERMSG;
1287 IPC_STATUS_ADD_CALL(dst_ptr, SENDA);
1288 RTS_UNSET(dst_ptr, RTS_RECEIVING);
1289 #if DEBUG_IPC_HOOK
1290 hook_ipc_msgrecv(&dst_ptr->p_delivermsg, caller_ptr, dst_ptr);
1291 #endif
1292 } else if (r == OK) {
1293 /* Inform receiver that something is pending */
1294 set_sys_bit(priv(dst_ptr)->s_asyn_pending,
1295 priv(caller_ptr)->s_id);
1296 done = FALSE;
1297 continue;
1300 /* Store results */
1301 tabent.result = r;
1302 tabent.flags = flags | AMF_DONE;
1303 if (flags & AMF_NOTIFY)
1304 do_notify = TRUE;
1305 else if (r != OK && (flags & AMF_NOTIFY_ERR))
1306 do_notify = TRUE;
1307 A_INSRT(i); /* Copy results to caller; ignore errors */
1308 continue;
1310 asyn_error:
1311 if (dst != NONE)
1312 printf("KERNEL senda error %d to %d\n", r, dst);
1313 else
1314 printf("KERNEL senda error %d\n", r);
1317 if (do_notify)
1318 mini_notify(proc_addr(ASYNCM), caller_ptr->p_endpoint);
1320 if (!done) {
1321 privp->s_asyntab = (vir_bytes) table;
1322 privp->s_asynsize = size;
1325 return(OK);
1328 /*===========================================================================*
1329 * mini_senda *
1330 *===========================================================================*/
1331 static int mini_senda(struct proc *caller_ptr, asynmsg_t *table, size_t size)
1333 struct priv *privp;
1335 privp = priv(caller_ptr);
1336 if (!(privp->s_flags & SYS_PROC)) {
1337 printf( "mini_senda: warning caller has no privilege structure\n");
1338 return(EPERM);
1341 return try_deliver_senda(caller_ptr, table, size);
1345 /*===========================================================================*
1346 * try_async *
1347 *===========================================================================*/
1348 static int try_async(struct proc * caller_ptr)
1350 int r;
1351 struct priv *privp;
1352 struct proc *src_ptr;
1353 sys_map_t *map;
1355 map = &priv(caller_ptr)->s_asyn_pending;
1357 /* Try all privilege structures */
1358 for (privp = BEG_PRIV_ADDR; privp < END_PRIV_ADDR; ++privp) {
1359 if (privp->s_proc_nr == NONE)
1360 continue;
1362 if (!get_sys_bit(*map, privp->s_id))
1363 continue;
1365 src_ptr = proc_addr(privp->s_proc_nr);
1367 #ifdef CONFIG_SMP
1369 * Do not copy from a process which does not have a stable address space
1370 * due to VM fiddling with it
1372 if (RTS_ISSET(src_ptr, RTS_VMINHIBIT)) {
1373 src_ptr->p_misc_flags |= MF_SENDA_VM_MISS;
1374 continue;
1376 #endif
1378 assert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG));
1379 if ((r = try_one(ANY, src_ptr, caller_ptr)) == OK)
1380 return(r);
1383 return(ESRCH);
1387 /*===========================================================================*
1388 * try_one *
1389 *===========================================================================*/
1390 static int try_one(endpoint_t receive_e, struct proc *src_ptr,
1391 struct proc *dst_ptr)
1393 /* Try to receive an asynchronous message from 'src_ptr' */
1394 int r = EAGAIN, done, do_notify;
1395 unsigned int flags, i;
1396 size_t size;
1397 endpoint_t dst, src_e;
1398 struct proc *caller_ptr;
1399 struct priv *privp;
1400 asynmsg_t tabent;
1401 vir_bytes table_v;
1403 privp = priv(src_ptr);
1404 if (!(privp->s_flags & SYS_PROC)) return(EPERM);
1405 size = privp->s_asynsize;
1406 table_v = privp->s_asyntab;
1408 /* Clear table pending message flag. We're done unless we're not. */
1409 unset_sys_bit(priv(dst_ptr)->s_asyn_pending, privp->s_id);
1411 if (size == 0) return(EAGAIN);
1412 if (privp->s_asynendpoint != src_ptr->p_endpoint) return EAGAIN;
1413 if (!may_asynsend_to(src_ptr, proc_nr(dst_ptr))) return (ECALLDENIED);
1415 caller_ptr = src_ptr; /* Needed for A_ macros later on */
1416 src_e = src_ptr->p_endpoint;
1418 /* Scan the table */
1419 do_notify = FALSE;
1420 done = TRUE;
1422 for (i = 0; i < size; i++) {
1423 /* Process each entry in the table and store the result in the table.
1424 * If we're done handling a message, copy the result to the sender.
1425 * Some checks done in mini_senda are duplicated here, as the sender
1426 * could've altered the contents of the table in the meantime.
1429 /* Copy message to kernel */
1430 A_RETR(i);
1431 flags = tabent.flags;
1432 dst = tabent.dst;
1434 if (flags == 0) continue; /* Skip empty entries */
1436 /* 'flags' field must contain only valid bits */
1437 if(flags & ~(AMF_VALID|AMF_DONE|AMF_NOTIFY|AMF_NOREPLY|AMF_NOTIFY_ERR))
1438 r = EINVAL;
1439 else if (!(flags & AMF_VALID)) /* Must contain message */
1440 r = EINVAL;
1441 else if (flags & AMF_DONE) continue; /* Already done processing */
1443 /* Clear done flag. The sender is done sending when all messages in the
1444 * table are marked done or empty. However, we will know that only
1445 * the next time we enter this function or when the sender decides to
1446 * send additional asynchronous messages and manages to deliver them
1447 * all.
1449 done = FALSE;
1451 if (r == EINVAL)
1452 goto store_result;
1454 /* Message must be directed at receiving end */
1455 if (dst != dst_ptr->p_endpoint) continue;
1457 if (!CANRECEIVE(receive_e, src_e, dst_ptr,
1458 table_v + i*sizeof(asynmsg_t) + offsetof(struct asynmsg,msg),
1459 NULL)) {
1460 continue;
1463 /* If AMF_NOREPLY is set, then this message is not a reply to a
1464 * SENDREC and thus should not satisfy the receiving part of the
1465 * SENDREC. This message is to be delivered later.
1467 if ((flags & AMF_NOREPLY) && (dst_ptr->p_misc_flags & MF_REPLY_PEND))
1468 continue;
1470 /* Destination is ready to receive the message; deliver it */
1471 r = OK;
1472 dst_ptr->p_delivermsg = tabent.msg;
1473 dst_ptr->p_delivermsg.m_source = src_ptr->p_endpoint;
1474 dst_ptr->p_misc_flags |= MF_DELIVERMSG;
1475 #if DEBUG_IPC_HOOK
1476 hook_ipc_msgrecv(&dst_ptr->p_delivermsg, src_ptr, dst_ptr);
1477 #endif
1479 store_result:
1480 /* Store results for sender. We may just have started delivering a
1481 * message, so we must not return an error to the caller in the case
1482 * that storing the results triggers an error!
1484 tabent.result = r;
1485 tabent.flags = flags | AMF_DONE;
1486 if (flags & AMF_NOTIFY) do_notify = TRUE;
1487 else if (r != OK && (flags & AMF_NOTIFY_ERR)) do_notify = TRUE;
1488 A_INSRT(i); /* Copy results to sender; ignore errors */
1490 break;
1493 if (do_notify)
1494 mini_notify(proc_addr(ASYNCM), src_ptr->p_endpoint);
1496 if (done) {
1497 privp->s_asyntab = -1;
1498 privp->s_asynsize = 0;
1499 } else {
1500 set_sys_bit(priv(dst_ptr)->s_asyn_pending, privp->s_id);
1503 asyn_error:
1504 return(r);
1507 /*===========================================================================*
1508 * cancel_async *
1509 *===========================================================================*/
1510 int cancel_async(struct proc *src_ptr, struct proc *dst_ptr)
1512 /* Cancel asynchronous messages from src to dst, because dst is not interested
1513 * in them (e.g., dst has been restarted) */
1514 int done, do_notify;
1515 unsigned int flags, i;
1516 size_t size;
1517 endpoint_t dst;
1518 struct proc *caller_ptr;
1519 struct priv *privp;
1520 asynmsg_t tabent;
1521 vir_bytes table_v;
1523 privp = priv(src_ptr);
1524 if (!(privp->s_flags & SYS_PROC)) return(EPERM);
1525 size = privp->s_asynsize;
1526 table_v = privp->s_asyntab;
1528 /* Clear table pending message flag. We're done unless we're not. */
1529 privp->s_asyntab = -1;
1530 privp->s_asynsize = 0;
1531 unset_sys_bit(priv(dst_ptr)->s_asyn_pending, privp->s_id);
1533 if (size == 0) return(EAGAIN);
1534 if (!may_send_to(src_ptr, proc_nr(dst_ptr))) return(ECALLDENIED);
1536 caller_ptr = src_ptr; /* Needed for A_ macros later on */
1538 /* Scan the table */
1539 do_notify = FALSE;
1540 done = TRUE;
1543 for (i = 0; i < size; i++) {
1544 /* Process each entry in the table and store the result in the table.
1545 * If we're done handling a message, copy the result to the sender.
1546 * Some checks done in mini_senda are duplicated here, as the sender
1547 * could've altered the contents of the table in the mean time.
1550 int r = EDEADSRCDST; /* Cancel delivery due to dead dst */
1552 /* Copy message to kernel */
1553 A_RETR(i);
1554 flags = tabent.flags;
1555 dst = tabent.dst;
1557 if (flags == 0) continue; /* Skip empty entries */
1559 /* 'flags' field must contain only valid bits */
1560 if(flags & ~(AMF_VALID|AMF_DONE|AMF_NOTIFY|AMF_NOREPLY|AMF_NOTIFY_ERR))
1561 r = EINVAL;
1562 else if (!(flags & AMF_VALID)) /* Must contain message */
1563 r = EINVAL;
1564 else if (flags & AMF_DONE) continue; /* Already done processing */
1566 /* Message must be directed at receiving end */
1567 if (dst != dst_ptr->p_endpoint) {
1568 done = FALSE;
1569 continue;
1572 /* Store results for sender */
1573 tabent.result = r;
1574 tabent.flags = flags | AMF_DONE;
1575 if (flags & AMF_NOTIFY) do_notify = TRUE;
1576 else if (r != OK && (flags & AMF_NOTIFY_ERR)) do_notify = TRUE;
1577 A_INSRT(i); /* Copy results to sender; ignore errors */
1580 if (do_notify)
1581 mini_notify(proc_addr(ASYNCM), src_ptr->p_endpoint);
1583 if (!done) {
1584 privp->s_asyntab = table_v;
1585 privp->s_asynsize = size;
1588 asyn_error:
1589 return(OK);
1592 /*===========================================================================*
1593 * enqueue *
1594 *===========================================================================*/
1595 void enqueue(
1596 register struct proc *rp /* this process is now runnable */
1599 /* Add 'rp' to one of the queues of runnable processes. This function is
1600 * responsible for inserting a process into one of the scheduling queues.
1601 * The mechanism is implemented here. The actual scheduling policy is
1602 * defined in sched() and pick_proc().
1604 * This function can be used x-cpu as it always uses the queues of the cpu the
1605 * process is assigned to.
1607 int q = rp->p_priority; /* scheduling queue to use */
1608 struct proc **rdy_head, **rdy_tail;
1610 assert(proc_is_runnable(rp));
1612 assert(q >= 0);
1614 rdy_head = get_cpu_var(rp->p_cpu, run_q_head);
1615 rdy_tail = get_cpu_var(rp->p_cpu, run_q_tail);
1617 /* Now add the process to the queue. */
1618 if (!rdy_head[q]) { /* add to empty queue */
1619 rdy_head[q] = rdy_tail[q] = rp; /* create a new queue */
1620 rp->p_nextready = NULL; /* mark new end */
1622 else { /* add to tail of queue */
1623 rdy_tail[q]->p_nextready = rp; /* chain tail of queue */
1624 rdy_tail[q] = rp; /* set new queue tail */
1625 rp->p_nextready = NULL; /* mark new end */
1628 if (cpuid == rp->p_cpu) {
1630 * enqueueing a process with a higher priority than the current one,
1631 * it gets preempted. The current process must be preemptible. Testing
1632 * the priority also makes sure that a process does not preempt itself
1634 struct proc * p;
1635 p = get_cpulocal_var(proc_ptr);
1636 assert(p);
1637 if((p->p_priority > rp->p_priority) &&
1638 (priv(p)->s_flags & PREEMPTIBLE))
1639 RTS_SET(p, RTS_PREEMPTED); /* calls dequeue() */
1641 #ifdef CONFIG_SMP
1643 * if the process was enqueued on a different cpu and the cpu is idle, i.e.
1644 * the time is off, we need to wake up that cpu and let it schedule this new
1645 * process
1647 else if (get_cpu_var(rp->p_cpu, cpu_is_idle)) {
1648 smp_schedule(rp->p_cpu);
1650 #endif
1652 /* Make note of when this process was added to queue */
1653 read_tsc_64(&(get_cpulocal_var(proc_ptr)->p_accounting.enter_queue));
1656 #if DEBUG_SANITYCHECKS
1657 assert(runqueues_ok_local());
1658 #endif
1661 /*===========================================================================*
1662 * enqueue_head *
1663 *===========================================================================*/
1665 * put a process at the front of its run queue. It comes handy when a process is
1666 * preempted and removed from run queue to not to have a currently not-runnable
1667 * process on a run queue. We have to put this process back at the fron to be
1668 * fair
1670 static void enqueue_head(struct proc *rp)
1672 const int q = rp->p_priority; /* scheduling queue to use */
1674 struct proc **rdy_head, **rdy_tail;
1676 assert(proc_ptr_ok(rp));
1677 assert(proc_is_runnable(rp));
1680 * the process was runnable without its quantum expired when dequeued. A
1681 * process with no time left should have been handled else and differently
1683 assert(rp->p_cpu_time_left);
1685 assert(q >= 0);
1688 rdy_head = get_cpu_var(rp->p_cpu, run_q_head);
1689 rdy_tail = get_cpu_var(rp->p_cpu, run_q_tail);
1691 /* Now add the process to the queue. */
1692 if (!rdy_head[q]) { /* add to empty queue */
1693 rdy_head[q] = rdy_tail[q] = rp; /* create a new queue */
1694 rp->p_nextready = NULL; /* mark new end */
1695 } else { /* add to head of queue */
1696 rp->p_nextready = rdy_head[q]; /* chain head of queue */
1697 rdy_head[q] = rp; /* set new queue head */
1700 /* Make note of when this process was added to queue */
1701 read_tsc_64(&(get_cpulocal_var(proc_ptr->p_accounting.enter_queue)));
1704 /* Process accounting for scheduling */
1705 rp->p_accounting.dequeues--;
1706 rp->p_accounting.preempted++;
1708 #if DEBUG_SANITYCHECKS
1709 assert(runqueues_ok_local());
1710 #endif
1713 /*===========================================================================*
1714 * dequeue *
1715 *===========================================================================*/
1716 void dequeue(struct proc *rp)
1717 /* this process is no longer runnable */
1719 /* A process must be removed from the scheduling queues, for example, because
1720 * it has blocked. If the currently active process is removed, a new process
1721 * is picked to run by calling pick_proc().
1723 * This function can operate x-cpu as it always removes the process from the
1724 * queue of the cpu the process is currently assigned to.
1726 int q = rp->p_priority; /* queue to use */
1727 struct proc **xpp; /* iterate over queue */
1728 struct proc *prev_xp;
1729 u64_t tsc, tsc_delta;
1731 struct proc **rdy_tail;
1733 assert(proc_ptr_ok(rp));
1734 assert(!proc_is_runnable(rp));
1736 /* Side-effect for kernel: check if the task's stack still is ok? */
1737 assert (!iskernelp(rp) || *priv(rp)->s_stack_guard == STACK_GUARD);
1739 rdy_tail = get_cpu_var(rp->p_cpu, run_q_tail);
1741 /* Now make sure that the process is not in its ready queue. Remove the
1742 * process if it is found. A process can be made unready even if it is not
1743 * running by being sent a signal that kills it.
1745 prev_xp = NULL;
1746 for (xpp = get_cpu_var_ptr(rp->p_cpu, run_q_head[q]); *xpp;
1747 xpp = &(*xpp)->p_nextready) {
1748 if (*xpp == rp) { /* found process to remove */
1749 *xpp = (*xpp)->p_nextready; /* replace with next chain */
1750 if (rp == rdy_tail[q]) { /* queue tail removed */
1751 rdy_tail[q] = prev_xp; /* set new tail */
1754 break;
1756 prev_xp = *xpp; /* save previous in chain */
1760 /* Process accounting for scheduling */
1761 rp->p_accounting.dequeues++;
1763 /* this is not all that accurate on virtual machines, especially with
1764 IO bound processes that only spend a short amount of time in the queue
1765 at a time. */
1766 if (rp->p_accounting.enter_queue) {
1767 read_tsc_64(&tsc);
1768 tsc_delta = tsc - rp->p_accounting.enter_queue;
1769 rp->p_accounting.time_in_queue = rp->p_accounting.time_in_queue +
1770 tsc_delta;
1771 rp->p_accounting.enter_queue = 0;
1774 /* For ps(1), remember when the process was last dequeued. */
1775 rp->p_dequeued = get_monotonic();
1777 #if DEBUG_SANITYCHECKS
1778 assert(runqueues_ok_local());
1779 #endif
1782 /*===========================================================================*
1783 * pick_proc *
1784 *===========================================================================*/
1785 static struct proc * pick_proc(void)
1787 /* Decide who to run now. A new process is selected an returned.
1788 * When a billable process is selected, record it in 'bill_ptr', so that the
1789 * clock task can tell who to bill for system time.
1791 * This function always uses the run queues of the local cpu!
1793 register struct proc *rp; /* process to run */
1794 struct proc **rdy_head;
1795 int q; /* iterate over queues */
1797 /* Check each of the scheduling queues for ready processes. The number of
1798 * queues is defined in proc.h, and priorities are set in the task table.
1799 * If there are no processes ready to run, return NULL.
1801 rdy_head = get_cpulocal_var(run_q_head);
1802 for (q=0; q < NR_SCHED_QUEUES; q++) {
1803 if(!(rp = rdy_head[q])) {
1804 TRACE(VF_PICKPROC, printf("cpu %d queue %d empty\n", cpuid, q););
1805 continue;
1807 assert(proc_is_runnable(rp));
1808 if (priv(rp)->s_flags & BILLABLE)
1809 get_cpulocal_var(bill_ptr) = rp; /* bill for system time */
1810 return rp;
1812 return NULL;
1815 /*===========================================================================*
1816 * endpoint_lookup *
1817 *===========================================================================*/
1818 struct proc *endpoint_lookup(endpoint_t e)
1820 int n;
1822 if(!isokendpt(e, &n)) return NULL;
1824 return proc_addr(n);
1827 /*===========================================================================*
1828 * isokendpt_f *
1829 *===========================================================================*/
1830 #if DEBUG_ENABLE_IPC_WARNINGS
1831 int isokendpt_f(const char * file, int line, endpoint_t e, int * p,
1832 const int fatalflag)
1833 #else
1834 int isokendpt_f(endpoint_t e, int * p, const int fatalflag)
1835 #endif
1837 int ok = 0;
1838 /* Convert an endpoint number into a process number.
1839 * Return nonzero if the process is alive with the corresponding
1840 * generation number, zero otherwise.
1842 * This function is called with file and line number by the
1843 * isokendpt_d macro if DEBUG_ENABLE_IPC_WARNINGS is defined,
1844 * otherwise without. This allows us to print the where the
1845 * conversion was attempted, making the errors verbose without
1846 * adding code for that at every call.
1848 * If fatalflag is nonzero, we must panic if the conversion doesn't
1849 * succeed.
1851 *p = _ENDPOINT_P(e);
1852 ok = 0;
1853 if(isokprocn(*p) && !isemptyn(*p) && proc_addr(*p)->p_endpoint == e)
1854 ok = 1;
1855 if(!ok && fatalflag)
1856 panic("invalid endpoint: %d", e);
1857 return ok;
1860 static void notify_scheduler(struct proc *p)
1862 message m_no_quantum;
1863 int err;
1865 assert(!proc_kernel_scheduler(p));
1867 /* dequeue the process */
1868 RTS_SET(p, RTS_NO_QUANTUM);
1870 * Notify the process's scheduler that it has run out of
1871 * quantum. This is done by sending a message to the scheduler
1872 * on the process's behalf
1874 m_no_quantum.m_source = p->p_endpoint;
1875 m_no_quantum.m_type = SCHEDULING_NO_QUANTUM;
1876 m_no_quantum.m_krn_lsys_schedule.acnt_queue = cpu_time_2_ms(p->p_accounting.time_in_queue);
1877 m_no_quantum.m_krn_lsys_schedule.acnt_deqs = p->p_accounting.dequeues;
1878 m_no_quantum.m_krn_lsys_schedule.acnt_ipc_sync = p->p_accounting.ipc_sync;
1879 m_no_quantum.m_krn_lsys_schedule.acnt_ipc_async = p->p_accounting.ipc_async;
1880 m_no_quantum.m_krn_lsys_schedule.acnt_preempt = p->p_accounting.preempted;
1881 m_no_quantum.m_krn_lsys_schedule.acnt_cpu = cpuid;
1882 m_no_quantum.m_krn_lsys_schedule.acnt_cpu_load = cpu_load();
1884 /* Reset accounting */
1885 reset_proc_accounting(p);
1887 if ((err = mini_send(p, p->p_scheduler->p_endpoint,
1888 &m_no_quantum, FROM_KERNEL))) {
1889 panic("WARNING: Scheduling: mini_send returned %d\n", err);
1893 void proc_no_time(struct proc * p)
1895 if (!proc_kernel_scheduler(p) && priv(p)->s_flags & PREEMPTIBLE) {
1896 /* this dequeues the process */
1897 notify_scheduler(p);
1899 else {
1901 * non-preemptible processes only need their quantum to
1902 * be renewed. In fact, they by pass scheduling
1904 p->p_cpu_time_left = ms_2_cpu_time(p->p_quantum_size_ms);
1905 #if DEBUG_RACE
1906 RTS_SET(p, RTS_PREEMPTED);
1907 RTS_UNSET(p, RTS_PREEMPTED);
1908 #endif
1912 void reset_proc_accounting(struct proc *p)
1914 p->p_accounting.preempted = 0;
1915 p->p_accounting.ipc_sync = 0;
1916 p->p_accounting.ipc_async = 0;
1917 p->p_accounting.dequeues = 0;
1918 p->p_accounting.time_in_queue = 0;
1919 p->p_accounting.enter_queue = 0;
1922 void copr_not_available_handler(void)
1924 struct proc * p;
1925 struct proc ** local_fpu_owner;
1927 * Disable the FPU exception (both for the kernel and for the process
1928 * once it's scheduled), and initialize or restore the FPU state.
1931 disable_fpu_exception();
1933 p = get_cpulocal_var(proc_ptr);
1935 /* if FPU is not owned by anyone, do not store anything */
1936 local_fpu_owner = get_cpulocal_var_ptr(fpu_owner);
1937 if (*local_fpu_owner != NULL) {
1938 assert(*local_fpu_owner != p);
1939 save_local_fpu(*local_fpu_owner, FALSE /*retain*/);
1943 * restore the current process' state and let it run again, do not
1944 * schedule!
1946 if (restore_fpu(p) != OK) {
1947 /* Restoring FPU state failed. This is always the process's own
1948 * fault. Send a signal, and schedule another process instead.
1950 *local_fpu_owner = NULL; /* release FPU */
1951 cause_sig(proc_nr(p), SIGFPE);
1952 return;
1955 *local_fpu_owner = p;
1956 context_stop(proc_addr(KERNEL));
1957 restore_user_context(p);
1958 NOT_REACHABLE;
1961 void release_fpu(struct proc * p) {
1962 struct proc ** fpu_owner_ptr;
1964 fpu_owner_ptr = get_cpu_var_ptr(p->p_cpu, fpu_owner);
1966 if (*fpu_owner_ptr == p)
1967 *fpu_owner_ptr = NULL;
1970 void ser_dump_proc(void)
1972 struct proc *pp;
1974 for (pp= BEG_PROC_ADDR; pp < END_PROC_ADDR; pp++)
1976 if (isemptyp(pp))
1977 continue;
1978 print_proc_recursive(pp);