vm: restore stacktrace on SIGSEGV
[minix.git] / kernel / system.c
bloba28625c61960e3edd87dc19bb5d61371786486ed
1 /* This task handles the interface between the kernel and user-level servers.
2 * System services can be accessed by doing a system call. System calls are
3 * transformed into request messages, which are handled by this task. By
4 * convention, a sys_call() is transformed in a SYS_CALL request message that
5 * is handled in a function named do_call().
7 * A private call vector is used to map all system calls to the functions that
8 * handle them. The actual handler functions are contained in separate files
9 * to keep this file clean. The call vector is used in the system task's main
10 * loop to handle all incoming requests.
12 * In addition to the main sys_task() entry point, which starts the main loop,
13 * there are several other minor entry points:
14 * get_priv: assign privilege structure to user or system process
15 * set_sendto_bit: allow a process to send messages to a new target
16 * unset_sendto_bit: disallow a process from sending messages to a target
17 * fill_sendto_mask: fill the target mask of a given process
18 * send_sig: send a signal directly to a system process
19 * cause_sig: take action to cause a signal to occur via a signal mgr
20 * sig_delay_done: tell PM that a process is not sending
21 * get_randomness: accumulate randomness in a buffer
22 * clear_endpoint: remove a process' ability to send and receive messages
23 * sched_proc: schedule a process
25 * Changes:
26 * Nov 22, 2009 get_priv supports static priv ids (Cristiano Giuffrida)
27 * Aug 04, 2005 check if system call is allowed (Jorrit N. Herder)
28 * Jul 20, 2005 send signal to services with message (Jorrit N. Herder)
29 * Jan 15, 2005 new, generalized virtual copy function (Jorrit N. Herder)
30 * Oct 10, 2004 dispatch system calls from call vector (Jorrit N. Herder)
31 * Sep 30, 2004 source code documentation updated (Jorrit N. Herder)
34 #include "kernel.h"
35 #include "system.h"
36 #include "vm.h"
37 #include "kernel/clock.h"
38 #include <stdlib.h>
39 #include <assert.h>
40 #include <signal.h>
41 #include <unistd.h>
42 #include <minix/endpoint.h>
43 #include <minix/safecopies.h>
45 /* Declaration of the call vector that defines the mapping of system calls
46 * to handler functions. The vector is initialized in sys_init() with map(),
47 * which makes sure the system call numbers are ok. No space is allocated,
48 * because the dummy is declared extern. If an illegal call is given, the
49 * array size will be negative and this won't compile.
51 static int (*call_vec[NR_SYS_CALLS])(struct proc * caller, message *m_ptr);
53 #define map(call_nr, handler) \
54 { int call_index = call_nr-KERNEL_CALL; \
55 assert(call_index >= 0 && call_index < NR_SYS_CALLS); \
56 call_vec[call_index] = (handler) ; }
58 static void kernel_call_finish(struct proc * caller, message *msg, int result)
60 if(result == VMSUSPEND) {
61 /* Special case: message has to be saved for handling
62 * until VM tells us it's allowed. VM has been notified
63 * and we must wait for its reply to restart the call.
65 assert(RTS_ISSET(caller, RTS_VMREQUEST));
66 assert(caller->p_vmrequest.type == VMSTYPE_KERNELCALL);
67 caller->p_vmrequest.saved.reqmsg = *msg;
68 caller->p_misc_flags |= MF_KCALL_RESUME;
69 } else {
71 * call is finished, we could have been suspended because of VM,
72 * remove the request message
74 caller->p_vmrequest.saved.reqmsg.m_source = NONE;
75 if (result != EDONTREPLY) {
76 /* copy the result as a message to the original user buffer */
77 msg->m_source = SYSTEM;
78 msg->m_type = result; /* report status of call */
79 #if DEBUG_IPC_HOOK
80 hook_ipc_msgkresult(msg, caller);
81 #endif
82 if (copy_msg_to_user(msg, (message *)caller->p_delivermsg_vir)) {
83 printf("WARNING wrong user pointer 0x%08x from "
84 "process %s / %d\n",
85 caller->p_delivermsg_vir,
86 caller->p_name,
87 caller->p_endpoint);
88 cause_sig(proc_nr(caller), SIGSEGV);
94 static int kernel_call_dispatch(struct proc * caller, message *msg)
96 int result = OK;
97 int call_nr;
99 #if DEBUG_IPC_HOOK
100 hook_ipc_msgkcall(msg, caller);
101 #endif
102 call_nr = msg->m_type - KERNEL_CALL;
104 /* See if the caller made a valid request and try to handle it. */
105 if (call_nr < 0 || call_nr >= NR_SYS_CALLS) { /* check call number */
106 printf("SYSTEM: illegal request %d from %d.\n",
107 call_nr,msg->m_source);
108 result = EBADREQUEST; /* illegal message type */
110 else if (!GET_BIT(priv(caller)->s_k_call_mask, call_nr)) {
111 printf("SYSTEM: denied request %d from %d.\n",
112 call_nr,msg->m_source);
113 result = ECALLDENIED; /* illegal message type */
114 } else {
115 /* handle the system call */
116 if (call_vec[call_nr])
117 result = (*call_vec[call_nr])(caller, msg);
118 else {
119 printf("Unused kernel call %d from %d\n",
120 call_nr, caller->p_endpoint);
121 result = EBADREQUEST;
125 return result;
128 /*===========================================================================*
129 * kernel_call *
130 *===========================================================================*/
132 * this function checks the basic syscall parameters and if accepted it
133 * dispatches its handling to the right handler
135 void kernel_call(message *m_user, struct proc * caller)
137 int result = OK;
138 message msg;
140 caller->p_delivermsg_vir = (vir_bytes) m_user;
142 * the ldt and cr3 of the caller process is loaded because it just've trapped
143 * into the kernel or was already set in switch_to_user() before we resume
144 * execution of an interrupted kernel call
146 if (copy_msg_from_user(m_user, &msg) == 0) {
147 msg.m_source = caller->p_endpoint;
148 result = kernel_call_dispatch(caller, &msg);
150 else {
151 printf("WARNING wrong user pointer 0x%08x from process %s / %d\n",
152 m_user, caller->p_name, caller->p_endpoint);
153 cause_sig(proc_nr(caller), SIGSEGV);
154 return;
158 /* remember who invoked the kcall so we can bill it its time */
159 kbill_kcall = caller;
161 kernel_call_finish(caller, &msg, result);
164 /*===========================================================================*
165 * initialize *
166 *===========================================================================*/
167 void system_init(void)
169 register struct priv *sp;
170 int i;
172 /* Initialize IRQ handler hooks. Mark all hooks available. */
173 for (i=0; i<NR_IRQ_HOOKS; i++) {
174 irq_hooks[i].proc_nr_e = NONE;
177 /* Initialize all alarm timers for all processes. */
178 for (sp=BEG_PRIV_ADDR; sp < END_PRIV_ADDR; sp++) {
179 tmr_inittimer(&(sp->s_alarm_timer));
182 /* Initialize the call vector to a safe default handler. Some system calls
183 * may be disabled or nonexistant. Then explicitely map known calls to their
184 * handler functions. This is done with a macro that gives a compile error
185 * if an illegal call number is used. The ordering is not important here.
187 for (i=0; i<NR_SYS_CALLS; i++) {
188 call_vec[i] = NULL;
191 /* Process management. */
192 map(SYS_FORK, do_fork); /* a process forked a new process */
193 map(SYS_EXEC, do_exec); /* update process after execute */
194 map(SYS_CLEAR, do_clear); /* clean up after process exit */
195 map(SYS_EXIT, do_exit); /* a system process wants to exit */
196 map(SYS_PRIVCTL, do_privctl); /* system privileges control */
197 map(SYS_TRACE, do_trace); /* request a trace operation */
198 map(SYS_SETGRANT, do_setgrant); /* get/set own parameters */
199 map(SYS_RUNCTL, do_runctl); /* set/clear stop flag of a process */
200 map(SYS_UPDATE, do_update); /* update a process into another */
201 map(SYS_STATECTL, do_statectl); /* let a process control its state */
203 /* Signal handling. */
204 map(SYS_KILL, do_kill); /* cause a process to be signaled */
205 map(SYS_GETKSIG, do_getksig); /* signal manager checks for signals */
206 map(SYS_ENDKSIG, do_endksig); /* signal manager finished signal */
207 map(SYS_SIGSEND, do_sigsend); /* start POSIX-style signal */
208 map(SYS_SIGRETURN, do_sigreturn); /* return from POSIX-style signal */
210 /* Device I/O. */
211 map(SYS_IRQCTL, do_irqctl); /* interrupt control operations */
212 #if defined(__i386__)
213 map(SYS_DEVIO, do_devio); /* inb, inw, inl, outb, outw, outl */
214 map(SYS_VDEVIO, do_vdevio); /* vector with devio requests */
215 #endif
217 /* Memory management. */
218 map(SYS_MEMSET, do_memset); /* write char to memory area */
219 map(SYS_VMCTL, do_vmctl); /* various VM process settings */
221 /* Copying. */
222 map(SYS_UMAP, do_umap); /* map virtual to physical address */
223 map(SYS_UMAP_REMOTE, do_umap_remote); /* do_umap for non-caller process */
224 map(SYS_VUMAP, do_vumap); /* vectored virtual to physical map */
225 map(SYS_VIRCOPY, do_vircopy); /* use pure virtual addressing */
226 map(SYS_PHYSCOPY, do_copy); /* use physical addressing */
227 map(SYS_SAFECOPYFROM, do_safecopy_from);/* copy with pre-granted permission */
228 map(SYS_SAFECOPYTO, do_safecopy_to); /* copy with pre-granted permission */
229 map(SYS_VSAFECOPY, do_vsafecopy); /* vectored safecopy */
231 /* safe memset */
232 map(SYS_SAFEMEMSET, do_safememset); /* safememset */
234 /* Clock functionality. */
235 map(SYS_TIMES, do_times); /* get uptime and process times */
236 map(SYS_SETALARM, do_setalarm); /* schedule a synchronous alarm */
237 map(SYS_STIME, do_stime); /* set the boottime */
238 map(SYS_VTIMER, do_vtimer); /* set or retrieve a virtual timer */
240 /* System control. */
241 map(SYS_ABORT, do_abort); /* abort MINIX */
242 map(SYS_GETINFO, do_getinfo); /* request system information */
243 map(SYS_SYSCTL, do_sysctl); /* misc system manipulation */
245 /* Profiling. */
246 map(SYS_SPROF, do_sprofile); /* start/stop statistical profiling */
247 map(SYS_CPROF, do_cprofile); /* get/reset call profiling data */
248 map(SYS_PROFBUF, do_profbuf); /* announce locations to kernel */
250 /* i386-specific. */
251 #if defined(__i386__)
252 map(SYS_READBIOS, do_readbios); /* read from BIOS locations */
253 map(SYS_IOPENABLE, do_iopenable); /* Enable I/O */
254 map(SYS_SDEVIO, do_sdevio); /* phys_insb, _insw, _outsb, _outsw */
255 #endif
257 /* Machine state switching. */
258 map(SYS_SETMCONTEXT, do_setmcontext); /* set machine context */
259 map(SYS_GETMCONTEXT, do_getmcontext); /* get machine context */
261 /* Scheduling */
262 map(SYS_SCHEDULE, do_schedule); /* reschedule a process */
263 map(SYS_SCHEDCTL, do_schedctl); /* change process scheduler */
266 /*===========================================================================*
267 * get_priv *
268 *===========================================================================*/
269 int get_priv(rc, priv_id)
270 register struct proc *rc; /* new (child) process pointer */
271 int priv_id; /* privilege id */
273 /* Allocate a new privilege structure for a system process. Privilege ids
274 * can be assigned either statically or dynamically.
276 register struct priv *sp; /* privilege structure */
278 if(priv_id == NULL_PRIV_ID) { /* allocate slot dynamically */
279 for (sp = BEG_DYN_PRIV_ADDR; sp < END_DYN_PRIV_ADDR; ++sp)
280 if (sp->s_proc_nr == NONE) break;
281 if (sp >= END_DYN_PRIV_ADDR) return(ENOSPC);
283 else { /* allocate slot from id */
284 if(!is_static_priv_id(priv_id)) {
285 return EINVAL; /* invalid static priv id */
287 if(priv[priv_id].s_proc_nr != NONE) {
288 return EBUSY; /* slot already in use */
290 sp = &priv[priv_id];
292 rc->p_priv = sp; /* assign new slot */
293 rc->p_priv->s_proc_nr = proc_nr(rc); /* set association */
295 return(OK);
298 /*===========================================================================*
299 * set_sendto_bit *
300 *===========================================================================*/
301 void set_sendto_bit(const struct proc *rp, int id)
303 /* Allow a process to send messages to the process(es) associated with the
304 * system privilege structure with the given ID.
307 /* Disallow the process from sending to a process privilege structure with no
308 * associated process, and disallow the process from sending to itself.
310 if (id_to_nr(id) == NONE || priv_id(rp) == id) {
311 unset_sys_bit(priv(rp)->s_ipc_to, id);
312 return;
315 set_sys_bit(priv(rp)->s_ipc_to, id);
317 /* The process that this process can now send to, must be able to reply (or
318 * vice versa). Therefore, its send mask should be updated as well. Ignore
319 * receivers that don't support traps other than RECEIVE, they can't reply
320 * or send messages anyway.
322 if (priv_addr(id)->s_trap_mask & ~((1 << RECEIVE)))
323 set_sys_bit(priv_addr(id)->s_ipc_to, priv_id(rp));
326 /*===========================================================================*
327 * unset_sendto_bit *
328 *===========================================================================*/
329 void unset_sendto_bit(const struct proc *rp, int id)
331 /* Prevent a process from sending to another process. Retain the send mask
332 * symmetry by also unsetting the bit for the other direction.
335 unset_sys_bit(priv(rp)->s_ipc_to, id);
337 unset_sys_bit(priv_addr(id)->s_ipc_to, priv_id(rp));
340 /*===========================================================================*
341 * fill_sendto_mask *
342 *===========================================================================*/
343 void fill_sendto_mask(const struct proc *rp, sys_map_t *map)
345 int i;
347 for (i=0; i < NR_SYS_PROCS; i++) {
348 if (get_sys_bit(*map, i))
349 set_sendto_bit(rp, i);
350 else
351 unset_sendto_bit(rp, i);
355 /*===========================================================================*
356 * send_sig *
357 *===========================================================================*/
358 int send_sig(endpoint_t ep, int sig_nr)
360 /* Notify a system process about a signal. This is straightforward. Simply
361 * set the signal that is to be delivered in the pending signals map and
362 * send a notification with source SYSTEM.
364 register struct proc *rp;
365 struct priv *priv;
366 int proc_nr;
368 if(!isokendpt(ep, &proc_nr) || isemptyn(proc_nr))
369 return EINVAL;
371 rp = proc_addr(proc_nr);
372 priv = priv(rp);
373 if(!priv) return ENOENT;
374 sigaddset(&priv->s_sig_pending, sig_nr);
375 mini_notify(proc_addr(SYSTEM), rp->p_endpoint);
377 return OK;
380 /*===========================================================================*
381 * cause_sig *
382 *===========================================================================*/
383 void cause_sig(proc_nr, sig_nr)
384 proc_nr_t proc_nr; /* process to be signalled */
385 int sig_nr; /* signal to be sent */
387 /* A system process wants to send a signal to a process. Examples are:
388 * - HARDWARE wanting to cause a SIGSEGV after a CPU exception
389 * - TTY wanting to cause SIGINT upon getting a DEL
390 * - FS wanting to cause SIGPIPE for a broken pipe
391 * Signals are handled by sending a message to the signal manager assigned to
392 * the process. This function handles the signals and makes sure the signal
393 * manager gets them by sending a notification. The process being signaled
394 * is blocked while the signal manager has not finished all signals for it.
395 * Race conditions between calls to this function and the system calls that
396 * process pending kernel signals cannot exist. Signal related functions are
397 * only called when a user process causes a CPU exception and from the kernel
398 * process level, which runs to completion.
400 register struct proc *rp, *sig_mgr_rp;
401 endpoint_t sig_mgr;
402 int sig_mgr_proc_nr;
404 /* Lookup signal manager. */
405 rp = proc_addr(proc_nr);
406 sig_mgr = priv(rp)->s_sig_mgr;
407 if(sig_mgr == SELF) sig_mgr = rp->p_endpoint;
409 /* If the target is the signal manager of itself, send the signal directly. */
410 if(rp->p_endpoint == sig_mgr) {
411 if(SIGS_IS_LETHAL(sig_nr)) {
412 /* If the signal is lethal, see if a backup signal manager exists. */
413 sig_mgr = priv(rp)->s_bak_sig_mgr;
414 if(sig_mgr != NONE && isokendpt(sig_mgr, &sig_mgr_proc_nr)) {
415 priv(rp)->s_sig_mgr = sig_mgr;
416 priv(rp)->s_bak_sig_mgr = NONE;
417 sig_mgr_rp = proc_addr(sig_mgr_proc_nr);
418 RTS_UNSET(sig_mgr_rp, RTS_NO_PRIV);
419 cause_sig(proc_nr, sig_nr); /* try again with the new sig mgr. */
420 return;
422 /* We are out of luck. Time to panic. */
423 proc_stacktrace(rp);
424 panic("cause_sig: sig manager %d gets lethal signal %d for itself",
425 rp->p_endpoint, sig_nr);
427 sigaddset(&priv(rp)->s_sig_pending, sig_nr);
428 if(OK != send_sig(rp->p_endpoint, SIGKSIGSM))
429 panic("send_sig failed");
430 return;
433 /* Check if the signal is already pending. Process it otherwise. */
434 if (! sigismember(&rp->p_pending, sig_nr)) {
435 sigaddset(&rp->p_pending, sig_nr);
436 if (! (RTS_ISSET(rp, RTS_SIGNALED))) { /* other pending */
437 RTS_SET(rp, RTS_SIGNALED | RTS_SIG_PENDING);
438 if(OK != send_sig(sig_mgr, SIGKSIG))
439 panic("send_sig failed");
444 /*===========================================================================*
445 * sig_delay_done *
446 *===========================================================================*/
447 void sig_delay_done(struct proc *rp)
449 /* A process is now known not to send any direct messages.
450 * Tell PM that the stop delay has ended, by sending a signal to the process.
451 * Used for actual signal delivery.
454 rp->p_misc_flags &= ~MF_SIG_DELAY;
456 cause_sig(proc_nr(rp), SIGSNDELAY);
459 /*===========================================================================*
460 * clear_ipc *
461 *===========================================================================*/
462 static void clear_ipc(
463 register struct proc *rc /* slot of process to clean up */
466 /* Clear IPC data for a given process slot. */
467 struct proc **xpp; /* iterate over caller queue */
469 if (RTS_ISSET(rc, RTS_SENDING)) {
470 int target_proc;
472 okendpt(rc->p_sendto_e, &target_proc);
473 xpp = &proc_addr(target_proc)->p_caller_q; /* destination's queue */
474 while (*xpp) { /* check entire queue */
475 if (*xpp == rc) { /* process is on the queue */
476 *xpp = (*xpp)->p_q_link; /* replace by next process */
477 #if DEBUG_ENABLE_IPC_WARNINGS
478 printf("endpoint %d / %s removed from queue at %d\n",
479 rc->p_endpoint, rc->p_name, rc->p_sendto_e);
480 #endif
481 break; /* can only be queued once */
483 xpp = &(*xpp)->p_q_link; /* proceed to next queued */
485 RTS_UNSET(rc, RTS_SENDING);
487 RTS_UNSET(rc, RTS_RECEIVING);
490 /*===========================================================================*
491 * clear_endpoint *
492 *===========================================================================*/
493 void clear_endpoint(rc)
494 register struct proc *rc; /* slot of process to clean up */
496 if(isemptyp(rc)) panic("clear_proc: empty process: %d", rc->p_endpoint);
499 #if DEBUG_IPC_HOOK
500 hook_ipc_clear(rc);
501 #endif
503 /* Make sure that the exiting process is no longer scheduled. */
504 RTS_SET(rc, RTS_NO_ENDPOINT);
505 if (priv(rc)->s_flags & SYS_PROC)
507 priv(rc)->s_asynsize= 0;
510 /* If the process happens to be queued trying to send a
511 * message, then it must be removed from the message queues.
513 clear_ipc(rc);
515 /* Likewise, if another process was sending or receive a message to or from
516 * the exiting process, it must be alerted that process no longer is alive.
517 * Check all processes.
519 clear_ipc_refs(rc, EDEADSRCDST);
523 /*===========================================================================*
524 * clear_ipc_refs *
525 *===========================================================================*/
526 void clear_ipc_refs(rc, caller_ret)
527 register struct proc *rc; /* slot of process to clean up */
528 int caller_ret; /* code to return on callers */
530 /* Clear IPC references for a given process slot. */
531 struct proc *rp; /* iterate over process table */
532 int src_id;
534 /* Tell processes that sent asynchronous messages to 'rc' they are not
535 * going to be delivered */
536 while ((src_id = has_pending_asend(rc, ANY)) != NULL_PRIV_ID)
537 cancel_async(proc_addr(id_to_nr(src_id)), rc);
539 for (rp = BEG_PROC_ADDR; rp < END_PROC_ADDR; rp++) {
540 if(isemptyp(rp))
541 continue;
543 /* Unset pending notification bits. */
544 unset_sys_bit(priv(rp)->s_notify_pending, priv(rc)->s_id);
546 /* Unset pending asynchronous messages */
547 unset_sys_bit(priv(rp)->s_asyn_pending, priv(rc)->s_id);
549 /* Check if process depends on given process. */
550 if (P_BLOCKEDON(rp) == rc->p_endpoint) {
551 rp->p_reg.retreg = caller_ret; /* return requested code */
552 clear_ipc(rp);
557 /*===========================================================================*
558 * kernel_call_resume *
559 *===========================================================================*/
560 void kernel_call_resume(struct proc *caller)
562 int result;
564 assert(!RTS_ISSET(caller, RTS_SLOT_FREE));
565 assert(!RTS_ISSET(caller, RTS_VMREQUEST));
567 assert(caller->p_vmrequest.saved.reqmsg.m_source == caller->p_endpoint);
570 printf("KERNEL_CALL restart from %s / %d rts 0x%08x misc 0x%08x\n",
571 caller->p_name, caller->p_endpoint,
572 caller->p_rts_flags, caller->p_misc_flags);
575 /* re-execute the kernel call, with MF_KCALL_RESUME still set so
576 * the call knows this is a retry.
578 result = kernel_call_dispatch(caller, &caller->p_vmrequest.saved.reqmsg);
580 * we are resuming the kernel call so we have to remove this flag so it
581 * can be set again
583 caller->p_misc_flags &= ~MF_KCALL_RESUME;
584 kernel_call_finish(caller, &caller->p_vmrequest.saved.reqmsg, result);
587 /*===========================================================================*
588 * sched_proc *
589 *===========================================================================*/
590 int sched_proc(struct proc *p,
591 int priority,
592 int quantum,
593 int cpu)
595 /* Make sure the values given are within the allowed range.*/
596 if ((priority < TASK_Q && priority != -1) || priority > NR_SCHED_QUEUES)
597 return(EINVAL);
599 if (quantum < 1 && quantum != -1)
600 return(EINVAL);
602 #ifdef CONFIG_SMP
603 if ((cpu < 0 && cpu != -1) || (cpu > 0 && (unsigned) cpu >= ncpus))
604 return(EINVAL);
605 if (cpu != -1 && !(cpu_is_ready(cpu)))
606 return EBADCPU;
607 #endif
609 /* In some cases, we might be rescheduling a runnable process. In such
610 * a case (i.e. if we are updating the priority) we set the NO_QUANTUM
611 * flag before the generic unset to dequeue/enqueue the process
614 /* FIXME this preempts the process, do we really want to do that ?*/
616 /* FIXME this is a problem for SMP if the processes currently runs on a
617 * different CPU */
618 if (proc_is_runnable(p)) {
619 #ifdef CONFIG_SMP
620 if (p->p_cpu != cpuid && cpu != -1 && cpu != p->p_cpu) {
621 smp_schedule_migrate_proc(p, cpu);
623 #endif
625 RTS_SET(p, RTS_NO_QUANTUM);
628 if (proc_is_runnable(p))
629 RTS_SET(p, RTS_NO_QUANTUM);
631 if (priority != -1)
632 p->p_priority = priority;
633 if (quantum != -1) {
634 p->p_quantum_size_ms = quantum;
635 p->p_cpu_time_left = ms_2_cpu_time(quantum);
637 #ifdef CONFIG_SMP
638 if (cpu != -1)
639 p->p_cpu = cpu;
640 #endif
642 /* Clear the scheduling bit and enqueue the process */
643 RTS_UNSET(p, RTS_NO_QUANTUM);
645 return OK;