New P_BLOCKEDON for kernel - a macro that encodes the "who is this
[minix.git] / kernel / system.c
blob6539066621e4e47a7253872edc9915f3d0ecdbf3
1 /* This task handles the interface between the kernel and user-level servers.
2 * System services can be accessed by doing a system call. System calls are
3 * transformed into request messages, which are handled by this task. By
4 * convention, a sys_call() is transformed in a SYS_CALL request message that
5 * is handled in a function named do_call().
7 * A private call vector is used to map all system calls to the functions that
8 * handle them. The actual handler functions are contained in separate files
9 * to keep this file clean. The call vector is used in the system task's main
10 * loop to handle all incoming requests.
12 * In addition to the main sys_task() entry point, which starts the main loop,
13 * there are several other minor entry points:
14 * get_priv: assign privilege structure to user or system process
15 * set_sendto_bit: allow a process to send messages to a new target
16 * unset_sendto_bit: disallow a process from sending messages to a target
17 * send_sig: send a signal directly to a system process
18 * cause_sig: take action to cause a signal to occur via PM
19 * sig_delay_done: tell PM that a process is not sending
20 * umap_bios: map virtual address in BIOS_SEG to physical
21 * get_randomness: accumulate randomness in a buffer
22 * clear_endpoint: remove a process' ability to send and receive messages
24 * Changes:
25 * Nov 22, 2009 get_priv supports static priv ids (Cristiano Giuffrida)
26 * Aug 04, 2005 check if system call is allowed (Jorrit N. Herder)
27 * Jul 20, 2005 send signal to services with message (Jorrit N. Herder)
28 * Jan 15, 2005 new, generalized virtual copy function (Jorrit N. Herder)
29 * Oct 10, 2004 dispatch system calls from call vector (Jorrit N. Herder)
30 * Sep 30, 2004 source code documentation updated (Jorrit N. Herder)
33 #include "debug.h"
34 #include "kernel.h"
35 #include "system.h"
36 #include "proc.h"
37 #include "vm.h"
38 #include <stdlib.h>
39 #include <signal.h>
40 #include <unistd.h>
41 #include <sys/sigcontext.h>
42 #include <minix/endpoint.h>
43 #include <minix/safecopies.h>
45 /* Declaration of the call vector that defines the mapping of system calls
46 * to handler functions. The vector is initialized in sys_init() with map(),
47 * which makes sure the system call numbers are ok. No space is allocated,
48 * because the dummy is declared extern. If an illegal call is given, the
49 * array size will be negative and this won't compile.
51 PUBLIC int (*call_vec[NR_SYS_CALLS])(struct proc * caller, message *m_ptr);
52 char *callnames[NR_SYS_CALLS];
54 #define map(call_nr, handler) \
55 {extern int dummy[NR_SYS_CALLS>(unsigned)(call_nr-KERNEL_CALL) ? 1:-1];} \
56 callnames[(call_nr-KERNEL_CALL)] = #call_nr; \
57 call_vec[(call_nr-KERNEL_CALL)] = (handler)
59 PRIVATE void kernel_call_finish(struct proc * caller, message *msg, int result)
61 if(result == VMSUSPEND) {
62 /* Special case: message has to be saved for handling
63 * until VM tells us it's allowed. VM has been notified
64 * and we must wait for its reply to restart the call.
66 vmassert(RTS_ISSET(caller, RTS_VMREQUEST));
67 vmassert(caller->p_vmrequest.type == VMSTYPE_KERNELCALL);
68 caller->p_vmrequest.saved.reqmsg = *msg;
69 caller->p_misc_flags |= MF_KCALL_RESUME;
70 } else {
72 * call is finished, we could have been suspended because of VM,
73 * remove the request message
75 caller->p_vmrequest.saved.reqmsg.m_source = NONE;
76 if (result != EDONTREPLY) {
77 /* copy the result as a message to the original user buffer */
78 msg->m_source = SYSTEM;
79 msg->m_type = result; /* report status of call */
80 if (copy_msg_to_user(caller, msg,
81 (message *)caller->p_delivermsg_vir)) {
82 kprintf("WARNING wrong user pointer 0x%08x from "
83 "process %s / %d\n",
84 caller->p_delivermsg_vir,
85 caller->p_name,
86 caller->p_endpoint);
87 result = EBADREQUEST;
93 PRIVATE int kernel_call_dispatch(struct proc * caller, message *msg)
95 int result = OK;
96 int call_nr;
98 call_nr = msg->m_type - KERNEL_CALL;
100 /* See if the caller made a valid request and try to handle it. */
101 if (call_nr < 0 || call_nr >= NR_SYS_CALLS) { /* check call number */
102 kprintf("SYSTEM: illegal request %d from %d.\n",
103 call_nr,msg->m_source);
104 result = EBADREQUEST; /* illegal message type */
106 else if (!GET_BIT(priv(caller)->s_k_call_mask, call_nr)) {
107 result = ECALLDENIED; /* illegal message type */
108 } else {
109 /* handle the system call */
110 result = (*call_vec[call_nr])(caller, msg);
113 return result;
116 /*===========================================================================*
117 * kernel_call *
118 *===========================================================================*/
120 * this function checks the basic syscall parameters and if accepted it
121 * dispatches its handling to the right handler
123 PUBLIC void kernel_call(message *m_user, struct proc * caller)
125 int result = OK;
126 message msg;
128 caller->p_delivermsg_vir = (vir_bytes) m_user;
130 * the ldt and cr3 of the caller process is loaded because it just've trapped
131 * into the kernel or was already set in schedcheck() before we resume
132 * execution of an interrupted kernel call
134 if (copy_msg_from_user(caller, m_user, &msg) == 0) {
135 msg.m_source = caller->p_endpoint;
136 result = kernel_call_dispatch(caller, &msg);
138 else {
139 kprintf("WARNING wrong user pointer 0x%08x from process %s / %d\n",
140 m_user, caller->p_name, caller->p_endpoint);
141 result = EBADREQUEST;
144 kernel_call_finish(caller, &msg, result);
147 /*===========================================================================*
148 * initialize *
149 *===========================================================================*/
150 PUBLIC void system_init(void)
152 register struct priv *sp;
153 int i;
155 /* Initialize IRQ handler hooks. Mark all hooks available. */
156 for (i=0; i<NR_IRQ_HOOKS; i++) {
157 irq_hooks[i].proc_nr_e = NONE;
160 /* Initialize all alarm timers for all processes. */
161 for (sp=BEG_PRIV_ADDR; sp < END_PRIV_ADDR; sp++) {
162 tmr_inittimer(&(sp->s_alarm_timer));
165 /* Initialize the call vector to a safe default handler. Some system calls
166 * may be disabled or nonexistant. Then explicitely map known calls to their
167 * handler functions. This is done with a macro that gives a compile error
168 * if an illegal call number is used. The ordering is not important here.
170 for (i=0; i<NR_SYS_CALLS; i++) {
171 call_vec[i] = do_unused;
172 callnames[i] = "unused";
175 /* Process management. */
176 map(SYS_FORK, do_fork); /* a process forked a new process */
177 map(SYS_EXEC, do_exec); /* update process after execute */
178 map(SYS_EXIT, do_exit); /* clean up after process exit */
179 map(SYS_NICE, do_nice); /* set scheduling priority */
180 map(SYS_PRIVCTL, do_privctl); /* system privileges control */
181 map(SYS_TRACE, do_trace); /* request a trace operation */
182 map(SYS_SETGRANT, do_setgrant); /* get/set own parameters */
183 map(SYS_RUNCTL, do_runctl); /* set/clear stop flag of a process */
185 /* Signal handling. */
186 map(SYS_KILL, do_kill); /* cause a process to be signaled */
187 map(SYS_GETKSIG, do_getksig); /* PM checks for pending signals */
188 map(SYS_ENDKSIG, do_endksig); /* PM finished processing signal */
189 map(SYS_SIGSEND, do_sigsend); /* start POSIX-style signal */
190 map(SYS_SIGRETURN, do_sigreturn); /* return from POSIX-style signal */
192 /* Device I/O. */
193 map(SYS_IRQCTL, do_irqctl); /* interrupt control operations */
194 map(SYS_DEVIO, do_devio); /* inb, inw, inl, outb, outw, outl */
195 map(SYS_VDEVIO, do_vdevio); /* vector with devio requests */
197 /* Memory management. */
198 map(SYS_NEWMAP, do_newmap); /* set up a process memory map */
199 map(SYS_SEGCTL, do_segctl); /* add segment and get selector */
200 map(SYS_MEMSET, do_memset); /* write char to memory area */
201 map(SYS_VMCTL, do_vmctl); /* various VM process settings */
203 /* Copying. */
204 map(SYS_UMAP, do_umap); /* map virtual to physical address */
205 map(SYS_VIRCOPY, do_vircopy); /* use pure virtual addressing */
206 map(SYS_PHYSCOPY, do_copy); /* use physical addressing */
207 map(SYS_SAFECOPYFROM, do_safecopy); /* copy with pre-granted permission */
208 map(SYS_SAFECOPYTO, do_safecopy); /* copy with pre-granted permission */
209 map(SYS_VSAFECOPY, do_vsafecopy); /* vectored safecopy */
211 /* Mapping. */
212 map(SYS_SAFEMAP, do_safemap); /* map pages from other process */
213 map(SYS_SAFEREVMAP, do_saferevmap); /* grantor revokes the map grant */
214 map(SYS_SAFEUNMAP, do_safeunmap); /* requestor unmaps the mapped pages */
216 /* Clock functionality. */
217 map(SYS_TIMES, do_times); /* get uptime and process times */
218 map(SYS_SETALARM, do_setalarm); /* schedule a synchronous alarm */
219 map(SYS_STIME, do_stime); /* set the boottime */
220 map(SYS_VTIMER, do_vtimer); /* set or retrieve a virtual timer */
222 /* System control. */
223 map(SYS_ABORT, do_abort); /* abort MINIX */
224 map(SYS_GETINFO, do_getinfo); /* request system information */
225 map(SYS_SYSCTL, do_sysctl); /* misc system manipulation */
227 /* Profiling. */
228 map(SYS_SPROF, do_sprofile); /* start/stop statistical profiling */
229 map(SYS_CPROF, do_cprofile); /* get/reset call profiling data */
230 map(SYS_PROFBUF, do_profbuf); /* announce locations to kernel */
232 /* i386-specific. */
233 #if _MINIX_CHIP == _CHIP_INTEL
234 map(SYS_INT86, do_int86); /* real-mode BIOS calls */
235 map(SYS_READBIOS, do_readbios); /* read from BIOS locations */
236 map(SYS_IOPENABLE, do_iopenable); /* Enable I/O */
237 map(SYS_SDEVIO, do_sdevio); /* phys_insb, _insw, _outsb, _outsw */
238 #endif
241 /*===========================================================================*
242 * get_priv *
243 *===========================================================================*/
244 PUBLIC int get_priv(rc, priv_id)
245 register struct proc *rc; /* new (child) process pointer */
246 int priv_id; /* privilege id */
248 /* Allocate a new privilege structure for a system process. Privilege ids
249 * can be assigned either statically or dynamically.
251 register struct priv *sp; /* privilege structure */
253 if(priv_id == NULL_PRIV_ID) { /* allocate slot dynamically */
254 for (sp = BEG_DYN_PRIV_ADDR; sp < END_DYN_PRIV_ADDR; ++sp)
255 if (sp->s_proc_nr == NONE) break;
256 if (sp >= END_DYN_PRIV_ADDR) return(ENOSPC);
258 else { /* allocate slot from id */
259 if(!is_static_priv_id(priv_id)) {
260 return EINVAL; /* invalid static priv id */
262 if(priv[priv_id].s_proc_nr != NONE) {
263 return EBUSY; /* slot already in use */
265 sp = &priv[priv_id];
267 rc->p_priv = sp; /* assign new slot */
268 rc->p_priv->s_proc_nr = proc_nr(rc); /* set association */
270 return(OK);
273 /*===========================================================================*
274 * set_sendto_bit *
275 *===========================================================================*/
276 PUBLIC void set_sendto_bit(struct proc *rp, int id)
278 /* Allow a process to send messages to the process(es) associated with the
279 * system privilege structure with the given ID.
282 /* Disallow the process from sending to a process privilege structure with no
283 * associated process, and disallow the process from sending to itself.
285 if (id_to_nr(id) == NONE || priv_id(rp) == id) {
286 unset_sys_bit(priv(rp)->s_ipc_to, id);
287 return;
290 set_sys_bit(priv(rp)->s_ipc_to, id);
292 /* The process that this process can now send to, must be able to reply (or
293 * vice versa). Therefore, its send mask should be updated as well. Ignore
294 * receivers that don't support traps other than RECEIVE, they can't reply
295 * or send messages anyway.
297 if (priv_addr(id)->s_trap_mask & ~((1 << RECEIVE)))
298 set_sys_bit(priv_addr(id)->s_ipc_to, priv_id(rp));
301 /*===========================================================================*
302 * unset_sendto_bit *
303 *===========================================================================*/
304 PUBLIC void unset_sendto_bit(struct proc *rp, int id)
306 /* Prevent a process from sending to another process. Retain the send mask
307 * symmetry by also unsetting the bit for the other direction.
310 unset_sys_bit(priv(rp)->s_ipc_to, id);
312 unset_sys_bit(priv_addr(id)->s_ipc_to, priv_id(rp));
315 /*===========================================================================*
316 * send_sig *
317 *===========================================================================*/
318 PUBLIC void send_sig(int proc_nr, int sig_nr)
320 /* Notify a system process about a signal. This is straightforward. Simply
321 * set the signal that is to be delivered in the pending signals map and
322 * send a notification with source SYSTEM.
324 register struct proc *rp;
326 if(!isokprocn(proc_nr) || isemptyn(proc_nr))
327 minix_panic("send_sig to empty process", proc_nr);
329 rp = proc_addr(proc_nr);
330 sigaddset(&priv(rp)->s_sig_pending, sig_nr);
331 mini_notify(proc_addr(SYSTEM), rp->p_endpoint);
334 /*===========================================================================*
335 * cause_sig *
336 *===========================================================================*/
337 PUBLIC void cause_sig(proc_nr, sig_nr)
338 proc_nr_t proc_nr; /* process to be signalled */
339 int sig_nr; /* signal to be sent */
341 /* A system process wants to send a signal to a process. Examples are:
342 * - HARDWARE wanting to cause a SIGSEGV after a CPU exception
343 * - TTY wanting to cause SIGINT upon getting a DEL
344 * - FS wanting to cause SIGPIPE for a broken pipe
345 * Signals are handled by sending a message to PM. This function handles the
346 * signals and makes sure the PM gets them by sending a notification. The
347 * process being signaled is blocked while PM has not finished all signals
348 * for it.
349 * Race conditions between calls to this function and the system calls that
350 * process pending kernel signals cannot exist. Signal related functions are
351 * only called when a user process causes a CPU exception and from the kernel
352 * process level, which runs to completion.
354 register struct proc *rp;
356 if (proc_nr == PM_PROC_NR)
357 minix_panic("cause_sig: PM gets signal", NO_NUM);
359 /* Check if the signal is already pending. Process it otherwise. */
360 rp = proc_addr(proc_nr);
361 if (! sigismember(&rp->p_pending, sig_nr)) {
362 sigaddset(&rp->p_pending, sig_nr);
363 if (! (RTS_ISSET(rp, RTS_SIGNALED))) { /* other pending */
364 RTS_SET(rp, RTS_SIGNALED | RTS_SIG_PENDING);
365 send_sig(PM_PROC_NR, SIGKSIG);
370 /*===========================================================================*
371 * sig_delay_done *
372 *===========================================================================*/
373 PUBLIC void sig_delay_done(rp)
374 struct proc *rp;
376 /* A process is now known not to send any direct messages.
377 * Tell PM that the stop delay has ended, by sending a signal to the process.
378 * Used for actual signal delivery.
381 rp->p_misc_flags &= ~MF_SIG_DELAY;
383 cause_sig(proc_nr(rp), SIGNDELAY);
386 #if _MINIX_CHIP == _CHIP_INTEL
388 /*===========================================================================*
389 * umap_bios *
390 *===========================================================================*/
391 PUBLIC phys_bytes umap_bios(vir_addr, bytes)
392 vir_bytes vir_addr; /* virtual address in BIOS segment */
393 vir_bytes bytes; /* # of bytes to be copied */
395 /* Calculate the physical memory address at the BIOS. Note: currently, BIOS
396 * address zero (the first BIOS interrupt vector) is not considered as an
397 * error here, but since the physical address will be zero as well, the
398 * calling function will think an error occurred. This is not a problem,
399 * since no one uses the first BIOS interrupt vector.
402 /* Check all acceptable ranges. */
403 if (vir_addr >= BIOS_MEM_BEGIN && vir_addr + bytes <= BIOS_MEM_END)
404 return (phys_bytes) vir_addr;
405 else if (vir_addr >= BASE_MEM_TOP && vir_addr + bytes <= UPPER_MEM_END)
406 return (phys_bytes) vir_addr;
408 kprintf("Warning, error in umap_bios, virtual address 0x%x\n", vir_addr);
409 return 0;
411 #endif
413 /*===========================================================================*
414 * umap_grant *
415 *===========================================================================*/
416 PUBLIC phys_bytes umap_grant(rp, grant, bytes)
417 struct proc *rp; /* pointer to proc table entry for process */
418 cp_grant_id_t grant; /* grant no. */
419 vir_bytes bytes; /* size */
421 int proc_nr;
422 vir_bytes offset, ret;
423 endpoint_t granter;
425 /* See if the grant in that process is sensible, and
426 * find out the virtual address and (optionally) new
427 * process for that address.
429 * Then convert that process to a slot number.
431 if(verify_grant(rp->p_endpoint, ANY, grant, bytes, 0, 0,
432 &offset, &granter) != OK) {
433 kprintf("SYSTEM: umap_grant: verify_grant failed\n");
434 return 0;
437 if(!isokendpt(granter, &proc_nr)) {
438 kprintf("SYSTEM: umap_grant: isokendpt failed\n");
439 return 0;
442 /* Do the mapping from virtual to physical. */
443 ret = umap_virtual(proc_addr(proc_nr), D, offset, bytes);
444 if(!ret) {
445 kprintf("SYSTEM:umap_grant:umap_virtual failed; grant %s:%d -> %s: vir 0x%lx\n",
446 rp->p_name, grant,
447 proc_addr(proc_nr)->p_name, offset);
449 return ret;
452 /*===========================================================================*
453 * clear_endpoint *
454 *===========================================================================*/
455 PUBLIC void clear_endpoint(rc)
456 register struct proc *rc; /* slot of process to clean up */
458 register struct proc *rp; /* iterate over process table */
459 register struct proc **xpp; /* iterate over caller queue */
461 if(isemptyp(rc)) minix_panic("clear_proc: empty process", rc->p_endpoint);
463 if(rc->p_endpoint == PM_PROC_NR || rc->p_endpoint == VFS_PROC_NR ||
464 rc->p_endpoint == VM_PROC_NR)
466 /* This test is great for debugging system processes dying,
467 * but as this happens normally on reboot, not good permanent code.
469 kprintf("died: ");
470 proc_stacktrace(rc);
471 minix_panic("system process died", rc->p_endpoint);
474 /* Make sure that the exiting process is no longer scheduled. */
475 RTS_SET(rc, RTS_NO_ENDPOINT);
476 if (priv(rc)->s_flags & SYS_PROC)
478 if (priv(rc)->s_asynsize) {
479 #if 0
480 kprintf("clear_endpoint: clearing s_asynsize of %s / %d\n",
481 rc->p_name, rc->p_endpoint);
482 proc_stacktrace(rc);
483 #endif
485 priv(rc)->s_asynsize= 0;
488 /* If the process happens to be queued trying to send a
489 * message, then it must be removed from the message queues.
491 if (RTS_ISSET(rc, RTS_SENDING)) {
492 int target_proc;
494 okendpt(rc->p_sendto_e, &target_proc);
495 xpp = &proc_addr(target_proc)->p_caller_q; /* destination's queue */
496 while (*xpp != NIL_PROC) { /* check entire queue */
497 if (*xpp == rc) { /* process is on the queue */
498 *xpp = (*xpp)->p_q_link; /* replace by next process */
499 #if DEBUG_ENABLE_IPC_WARNINGS
500 kprintf("endpoint %d / %s removed from queue at %d\n",
501 rc->p_endpoint, rc->p_name, rc->p_sendto_e);
502 #endif
503 break; /* can only be queued once */
505 xpp = &(*xpp)->p_q_link; /* proceed to next queued */
507 rc->p_rts_flags &= ~RTS_SENDING;
509 rc->p_rts_flags &= ~RTS_RECEIVING;
511 /* Likewise, if another process was sending or receive a message to or from
512 * the exiting process, it must be alerted that process no longer is alive.
513 * Check all processes.
515 for (rp = BEG_PROC_ADDR; rp < END_PROC_ADDR; rp++) {
516 if(isemptyp(rp))
517 continue;
519 /* Unset pending notification bits. */
520 unset_sys_bit(priv(rp)->s_notify_pending, priv(rc)->s_id);
522 /* Check if process is depends on exiting process. */
523 if (P_BLOCKEDON(rp) == rc->p_endpoint) {
524 rp->p_reg.retreg = EDEADSRCDST; /* report source died */
525 RTS_UNSET(rp, (RTS_RECEIVING|RTS_SENDING)); /* no longer blocking */
526 #if DEBUG_ENABLE_IPC_WARNINGS
527 kprintf("endpoint %d / %s blocked on dead src ep %d / %s\n",
528 rp->p_endpoint, rp->p_name, rc->p_endpoint, rc->p_name);
529 #endif
534 /*===========================================================================*
535 * kernel_call_resume *
536 *===========================================================================*/
537 PUBLIC void kernel_call_resume(struct proc *caller)
539 int result;
541 vmassert(!RTS_ISSET(p, RTS_SLOT_FREE));
542 vmassert(!RTS_ISSET(p, RTS_VMREQUEST));
544 vmassert(p->p_vmrequest.saved.reqmsg.m_source == p->p_endpoint);
547 printf("KERNEL_CALL restart from %s / %d rts 0x%08x misc 0x%08x\n",
548 caller->p_name, caller->p_endpoint,
549 caller->p_rts_flags, caller->p_misc_flags);
553 * we are resuming the kernel call so we have to remove this flag so it
554 * can be set again
556 caller->p_misc_flags &= ~MF_KCALL_RESUME;
557 result = kernel_call_dispatch(caller, &caller->p_vmrequest.saved.reqmsg);
558 kernel_call_finish(caller, &caller->p_vmrequest.saved.reqmsg, result);