1 /* This task handles the interface between the kernel and user-level servers.
2 * System services can be accessed by doing a system call. System calls are
3 * transformed into request messages, which are handled by this task. By
4 * convention, a sys_call() is transformed in a SYS_CALL request message that
5 * is handled in a function named do_call().
7 * A private call vector is used to map all system calls to the functions that
8 * handle them. The actual handler functions are contained in separate files
9 * to keep this file clean. The call vector is used in the system task's main
10 * loop to handle all incoming requests.
12 * In addition to the main sys_task() entry point, which starts the main loop,
13 * there are several other minor entry points:
14 * get_priv: assign privilege structure to user or system process
15 * set_sendto_bit: allow a process to send messages to a new target
16 * unset_sendto_bit: disallow a process from sending messages to a target
17 * fill_sendto_mask: fill the target mask of a given process
18 * send_sig: send a signal directly to a system process
19 * cause_sig: take action to cause a signal to occur via a signal mgr
20 * sig_delay_done: tell PM that a process is not sending
21 * get_randomness: accumulate randomness in a buffer
22 * clear_endpoint: remove a process' ability to send and receive messages
23 * sched_proc: schedule a process
26 * Nov 22, 2009 get_priv supports static priv ids (Cristiano Giuffrida)
27 * Aug 04, 2005 check if system call is allowed (Jorrit N. Herder)
28 * Jul 20, 2005 send signal to services with message (Jorrit N. Herder)
29 * Jan 15, 2005 new, generalized virtual copy function (Jorrit N. Herder)
30 * Oct 10, 2004 dispatch system calls from call vector (Jorrit N. Herder)
31 * Sep 30, 2004 source code documentation updated (Jorrit N. Herder)
37 #include "kernel/clock.h"
42 #include <minix/endpoint.h>
43 #include <minix/safecopies.h>
45 /* Declaration of the call vector that defines the mapping of system calls
46 * to handler functions. The vector is initialized in sys_init() with map(),
47 * which makes sure the system call numbers are ok. No space is allocated,
48 * because the dummy is declared extern. If an illegal call is given, the
49 * array size will be negative and this won't compile.
51 static int (*call_vec
[NR_SYS_CALLS
])(struct proc
* caller
, message
*m_ptr
);
53 #define map(call_nr, handler) \
54 { int call_index = call_nr-KERNEL_CALL; \
55 assert(call_index >= 0 && call_index < NR_SYS_CALLS); \
56 call_vec[call_index] = (handler) ; }
58 static void kernel_call_finish(struct proc
* caller
, message
*msg
, int result
)
60 if(result
== VMSUSPEND
) {
61 /* Special case: message has to be saved for handling
62 * until VM tells us it's allowed. VM has been notified
63 * and we must wait for its reply to restart the call.
65 assert(RTS_ISSET(caller
, RTS_VMREQUEST
));
66 assert(caller
->p_vmrequest
.type
== VMSTYPE_KERNELCALL
);
67 caller
->p_vmrequest
.saved
.reqmsg
= *msg
;
68 caller
->p_misc_flags
|= MF_KCALL_RESUME
;
71 * call is finished, we could have been suspended because of VM,
72 * remove the request message
74 caller
->p_vmrequest
.saved
.reqmsg
.m_source
= NONE
;
75 if (result
!= EDONTREPLY
) {
76 /* copy the result as a message to the original user buffer */
77 msg
->m_source
= SYSTEM
;
78 msg
->m_type
= result
; /* report status of call */
80 hook_ipc_msgkresult(msg
, caller
);
82 if (copy_msg_to_user(msg
, (message
*)caller
->p_delivermsg_vir
)) {
83 printf("WARNING wrong user pointer 0x%08x from "
85 caller
->p_delivermsg_vir
,
88 cause_sig(proc_nr(caller
), SIGSEGV
);
94 static int kernel_call_dispatch(struct proc
* caller
, message
*msg
)
100 hook_ipc_msgkcall(msg
, caller
);
102 call_nr
= msg
->m_type
- KERNEL_CALL
;
104 /* See if the caller made a valid request and try to handle it. */
105 if (call_nr
< 0 || call_nr
>= NR_SYS_CALLS
) { /* check call number */
106 printf("SYSTEM: illegal request %d from %d.\n",
107 call_nr
,msg
->m_source
);
108 result
= EBADREQUEST
; /* illegal message type */
110 else if (!GET_BIT(priv(caller
)->s_k_call_mask
, call_nr
)) {
111 printf("SYSTEM: denied request %d from %d.\n",
112 call_nr
,msg
->m_source
);
113 result
= ECALLDENIED
; /* illegal message type */
115 /* handle the system call */
116 if (call_vec
[call_nr
])
117 result
= (*call_vec
[call_nr
])(caller
, msg
);
119 printf("Unused kernel call %d from %d\n",
120 call_nr
, caller
->p_endpoint
);
121 result
= EBADREQUEST
;
128 /*===========================================================================*
130 *===========================================================================*/
132 * this function checks the basic syscall parameters and if accepted it
133 * dispatches its handling to the right handler
135 void kernel_call(message
*m_user
, struct proc
* caller
)
140 caller
->p_delivermsg_vir
= (vir_bytes
) m_user
;
142 * the ldt and cr3 of the caller process is loaded because it just've trapped
143 * into the kernel or was already set in switch_to_user() before we resume
144 * execution of an interrupted kernel call
146 if (copy_msg_from_user(m_user
, &msg
) == 0) {
147 msg
.m_source
= caller
->p_endpoint
;
148 result
= kernel_call_dispatch(caller
, &msg
);
151 printf("WARNING wrong user pointer 0x%08x from process %s / %d\n",
152 m_user
, caller
->p_name
, caller
->p_endpoint
);
153 cause_sig(proc_nr(caller
), SIGSEGV
);
158 /* remember who invoked the kcall so we can bill it its time */
159 kbill_kcall
= caller
;
161 kernel_call_finish(caller
, &msg
, result
);
164 /*===========================================================================*
166 *===========================================================================*/
167 void system_init(void)
169 register struct priv
*sp
;
172 /* Initialize IRQ handler hooks. Mark all hooks available. */
173 for (i
=0; i
<NR_IRQ_HOOKS
; i
++) {
174 irq_hooks
[i
].proc_nr_e
= NONE
;
177 /* Initialize all alarm timers for all processes. */
178 for (sp
=BEG_PRIV_ADDR
; sp
< END_PRIV_ADDR
; sp
++) {
179 tmr_inittimer(&(sp
->s_alarm_timer
));
182 /* Initialize the call vector to a safe default handler. Some system calls
183 * may be disabled or nonexistant. Then explicitely map known calls to their
184 * handler functions. This is done with a macro that gives a compile error
185 * if an illegal call number is used. The ordering is not important here.
187 for (i
=0; i
<NR_SYS_CALLS
; i
++) {
191 /* Process management. */
192 map(SYS_FORK
, do_fork
); /* a process forked a new process */
193 map(SYS_EXEC
, do_exec
); /* update process after execute */
194 map(SYS_CLEAR
, do_clear
); /* clean up after process exit */
195 map(SYS_EXIT
, do_exit
); /* a system process wants to exit */
196 map(SYS_PRIVCTL
, do_privctl
); /* system privileges control */
197 map(SYS_TRACE
, do_trace
); /* request a trace operation */
198 map(SYS_SETGRANT
, do_setgrant
); /* get/set own parameters */
199 map(SYS_RUNCTL
, do_runctl
); /* set/clear stop flag of a process */
200 map(SYS_UPDATE
, do_update
); /* update a process into another */
201 map(SYS_STATECTL
, do_statectl
); /* let a process control its state */
203 /* Signal handling. */
204 map(SYS_KILL
, do_kill
); /* cause a process to be signaled */
205 map(SYS_GETKSIG
, do_getksig
); /* signal manager checks for signals */
206 map(SYS_ENDKSIG
, do_endksig
); /* signal manager finished signal */
207 map(SYS_SIGSEND
, do_sigsend
); /* start POSIX-style signal */
208 map(SYS_SIGRETURN
, do_sigreturn
); /* return from POSIX-style signal */
211 map(SYS_IRQCTL
, do_irqctl
); /* interrupt control operations */
212 #if defined(__i386__)
213 map(SYS_DEVIO
, do_devio
); /* inb, inw, inl, outb, outw, outl */
214 map(SYS_VDEVIO
, do_vdevio
); /* vector with devio requests */
217 /* Memory management. */
218 map(SYS_MEMSET
, do_memset
); /* write char to memory area */
219 map(SYS_VMCTL
, do_vmctl
); /* various VM process settings */
222 map(SYS_UMAP
, do_umap
); /* map virtual to physical address */
223 map(SYS_UMAP_REMOTE
, do_umap_remote
); /* do_umap for non-caller process */
224 map(SYS_VUMAP
, do_vumap
); /* vectored virtual to physical map */
225 map(SYS_VIRCOPY
, do_vircopy
); /* use pure virtual addressing */
226 map(SYS_PHYSCOPY
, do_copy
); /* use physical addressing */
227 map(SYS_SAFECOPYFROM
, do_safecopy_from
);/* copy with pre-granted permission */
228 map(SYS_SAFECOPYTO
, do_safecopy_to
); /* copy with pre-granted permission */
229 map(SYS_VSAFECOPY
, do_vsafecopy
); /* vectored safecopy */
232 map(SYS_SAFEMEMSET
, do_safememset
); /* safememset */
235 map(SYS_SAFEMAP
, do_safemap
); /* map pages from other process */
236 map(SYS_SAFEREVMAP
, do_saferevmap
); /* grantor revokes the map grant */
237 map(SYS_SAFEUNMAP
, do_safeunmap
); /* requestor unmaps the mapped pages */
239 /* Clock functionality. */
240 map(SYS_TIMES
, do_times
); /* get uptime and process times */
241 map(SYS_SETALARM
, do_setalarm
); /* schedule a synchronous alarm */
242 map(SYS_STIME
, do_stime
); /* set the boottime */
243 map(SYS_VTIMER
, do_vtimer
); /* set or retrieve a virtual timer */
245 /* System control. */
246 map(SYS_ABORT
, do_abort
); /* abort MINIX */
247 map(SYS_GETINFO
, do_getinfo
); /* request system information */
248 map(SYS_SYSCTL
, do_sysctl
); /* misc system manipulation */
251 map(SYS_SPROF
, do_sprofile
); /* start/stop statistical profiling */
252 map(SYS_CPROF
, do_cprofile
); /* get/reset call profiling data */
253 map(SYS_PROFBUF
, do_profbuf
); /* announce locations to kernel */
256 #if defined(__i386__)
257 map(SYS_READBIOS
, do_readbios
); /* read from BIOS locations */
258 map(SYS_IOPENABLE
, do_iopenable
); /* Enable I/O */
259 map(SYS_SDEVIO
, do_sdevio
); /* phys_insb, _insw, _outsb, _outsw */
262 /* Machine state switching. */
263 map(SYS_SETMCONTEXT
, do_setmcontext
); /* set machine context */
264 map(SYS_GETMCONTEXT
, do_getmcontext
); /* get machine context */
267 map(SYS_SCHEDULE
, do_schedule
); /* reschedule a process */
268 map(SYS_SCHEDCTL
, do_schedctl
); /* change process scheduler */
271 /*===========================================================================*
273 *===========================================================================*/
274 int get_priv(rc
, priv_id
)
275 register struct proc
*rc
; /* new (child) process pointer */
276 int priv_id
; /* privilege id */
278 /* Allocate a new privilege structure for a system process. Privilege ids
279 * can be assigned either statically or dynamically.
281 register struct priv
*sp
; /* privilege structure */
283 if(priv_id
== NULL_PRIV_ID
) { /* allocate slot dynamically */
284 for (sp
= BEG_DYN_PRIV_ADDR
; sp
< END_DYN_PRIV_ADDR
; ++sp
)
285 if (sp
->s_proc_nr
== NONE
) break;
286 if (sp
>= END_DYN_PRIV_ADDR
) return(ENOSPC
);
288 else { /* allocate slot from id */
289 if(!is_static_priv_id(priv_id
)) {
290 return EINVAL
; /* invalid static priv id */
292 if(priv
[priv_id
].s_proc_nr
!= NONE
) {
293 return EBUSY
; /* slot already in use */
297 rc
->p_priv
= sp
; /* assign new slot */
298 rc
->p_priv
->s_proc_nr
= proc_nr(rc
); /* set association */
303 /*===========================================================================*
305 *===========================================================================*/
306 void set_sendto_bit(const struct proc
*rp
, int id
)
308 /* Allow a process to send messages to the process(es) associated with the
309 * system privilege structure with the given ID.
312 /* Disallow the process from sending to a process privilege structure with no
313 * associated process, and disallow the process from sending to itself.
315 if (id_to_nr(id
) == NONE
|| priv_id(rp
) == id
) {
316 unset_sys_bit(priv(rp
)->s_ipc_to
, id
);
320 set_sys_bit(priv(rp
)->s_ipc_to
, id
);
322 /* The process that this process can now send to, must be able to reply (or
323 * vice versa). Therefore, its send mask should be updated as well. Ignore
324 * receivers that don't support traps other than RECEIVE, they can't reply
325 * or send messages anyway.
327 if (priv_addr(id
)->s_trap_mask
& ~((1 << RECEIVE
)))
328 set_sys_bit(priv_addr(id
)->s_ipc_to
, priv_id(rp
));
331 /*===========================================================================*
333 *===========================================================================*/
334 void unset_sendto_bit(const struct proc
*rp
, int id
)
336 /* Prevent a process from sending to another process. Retain the send mask
337 * symmetry by also unsetting the bit for the other direction.
340 unset_sys_bit(priv(rp
)->s_ipc_to
, id
);
342 unset_sys_bit(priv_addr(id
)->s_ipc_to
, priv_id(rp
));
345 /*===========================================================================*
347 *===========================================================================*/
348 void fill_sendto_mask(const struct proc
*rp
, sys_map_t
*map
)
352 for (i
=0; i
< NR_SYS_PROCS
; i
++) {
353 if (get_sys_bit(*map
, i
))
354 set_sendto_bit(rp
, i
);
356 unset_sendto_bit(rp
, i
);
360 /*===========================================================================*
362 *===========================================================================*/
363 int send_sig(endpoint_t ep
, int sig_nr
)
365 /* Notify a system process about a signal. This is straightforward. Simply
366 * set the signal that is to be delivered in the pending signals map and
367 * send a notification with source SYSTEM.
369 register struct proc
*rp
;
373 if(!isokendpt(ep
, &proc_nr
) || isemptyn(proc_nr
))
376 rp
= proc_addr(proc_nr
);
378 if(!priv
) return ENOENT
;
379 sigaddset(&priv
->s_sig_pending
, sig_nr
);
380 mini_notify(proc_addr(SYSTEM
), rp
->p_endpoint
);
385 /*===========================================================================*
387 *===========================================================================*/
388 void cause_sig(proc_nr
, sig_nr
)
389 proc_nr_t proc_nr
; /* process to be signalled */
390 int sig_nr
; /* signal to be sent */
392 /* A system process wants to send a signal to a process. Examples are:
393 * - HARDWARE wanting to cause a SIGSEGV after a CPU exception
394 * - TTY wanting to cause SIGINT upon getting a DEL
395 * - FS wanting to cause SIGPIPE for a broken pipe
396 * Signals are handled by sending a message to the signal manager assigned to
397 * the process. This function handles the signals and makes sure the signal
398 * manager gets them by sending a notification. The process being signaled
399 * is blocked while the signal manager has not finished all signals for it.
400 * Race conditions between calls to this function and the system calls that
401 * process pending kernel signals cannot exist. Signal related functions are
402 * only called when a user process causes a CPU exception and from the kernel
403 * process level, which runs to completion.
405 register struct proc
*rp
, *sig_mgr_rp
;
409 /* Lookup signal manager. */
410 rp
= proc_addr(proc_nr
);
411 sig_mgr
= priv(rp
)->s_sig_mgr
;
412 if(sig_mgr
== SELF
) sig_mgr
= rp
->p_endpoint
;
414 /* If the target is the signal manager of itself, send the signal directly. */
415 if(rp
->p_endpoint
== sig_mgr
) {
416 if(SIGS_IS_LETHAL(sig_nr
)) {
417 /* If the signal is lethal, see if a backup signal manager exists. */
418 sig_mgr
= priv(rp
)->s_bak_sig_mgr
;
419 if(sig_mgr
!= NONE
&& isokendpt(sig_mgr
, &sig_mgr_proc_nr
)) {
420 priv(rp
)->s_sig_mgr
= sig_mgr
;
421 priv(rp
)->s_bak_sig_mgr
= NONE
;
422 sig_mgr_rp
= proc_addr(sig_mgr_proc_nr
);
423 RTS_UNSET(sig_mgr_rp
, RTS_NO_PRIV
);
424 cause_sig(proc_nr
, sig_nr
); /* try again with the new sig mgr. */
427 /* We are out of luck. Time to panic. */
429 panic("cause_sig: sig manager %d gets lethal signal %d for itself",
430 rp
->p_endpoint
, sig_nr
);
432 sigaddset(&priv(rp
)->s_sig_pending
, sig_nr
);
433 if(OK
!= send_sig(rp
->p_endpoint
, SIGKSIGSM
))
434 panic("send_sig failed");
438 /* Check if the signal is already pending. Process it otherwise. */
439 if (! sigismember(&rp
->p_pending
, sig_nr
)) {
440 sigaddset(&rp
->p_pending
, sig_nr
);
441 if (! (RTS_ISSET(rp
, RTS_SIGNALED
))) { /* other pending */
442 RTS_SET(rp
, RTS_SIGNALED
| RTS_SIG_PENDING
);
443 if(OK
!= send_sig(sig_mgr
, SIGKSIG
))
444 panic("send_sig failed");
449 /*===========================================================================*
451 *===========================================================================*/
452 void sig_delay_done(struct proc
*rp
)
454 /* A process is now known not to send any direct messages.
455 * Tell PM that the stop delay has ended, by sending a signal to the process.
456 * Used for actual signal delivery.
459 rp
->p_misc_flags
&= ~MF_SIG_DELAY
;
461 cause_sig(proc_nr(rp
), SIGSNDELAY
);
464 /*===========================================================================*
466 *===========================================================================*/
467 static void clear_ipc(
468 register struct proc
*rc
/* slot of process to clean up */
471 /* Clear IPC data for a given process slot. */
472 struct proc
**xpp
; /* iterate over caller queue */
474 if (RTS_ISSET(rc
, RTS_SENDING
)) {
477 okendpt(rc
->p_sendto_e
, &target_proc
);
478 xpp
= &proc_addr(target_proc
)->p_caller_q
; /* destination's queue */
479 while (*xpp
) { /* check entire queue */
480 if (*xpp
== rc
) { /* process is on the queue */
481 *xpp
= (*xpp
)->p_q_link
; /* replace by next process */
482 #if DEBUG_ENABLE_IPC_WARNINGS
483 printf("endpoint %d / %s removed from queue at %d\n",
484 rc
->p_endpoint
, rc
->p_name
, rc
->p_sendto_e
);
486 break; /* can only be queued once */
488 xpp
= &(*xpp
)->p_q_link
; /* proceed to next queued */
490 RTS_UNSET(rc
, RTS_SENDING
);
492 RTS_UNSET(rc
, RTS_RECEIVING
);
495 /*===========================================================================*
497 *===========================================================================*/
498 void clear_endpoint(rc
)
499 register struct proc
*rc
; /* slot of process to clean up */
501 if(isemptyp(rc
)) panic("clear_proc: empty process: %d", rc
->p_endpoint
);
508 /* Make sure that the exiting process is no longer scheduled. */
509 RTS_SET(rc
, RTS_NO_ENDPOINT
);
510 if (priv(rc
)->s_flags
& SYS_PROC
)
512 priv(rc
)->s_asynsize
= 0;
515 /* If the process happens to be queued trying to send a
516 * message, then it must be removed from the message queues.
520 /* Likewise, if another process was sending or receive a message to or from
521 * the exiting process, it must be alerted that process no longer is alive.
522 * Check all processes.
524 clear_ipc_refs(rc
, EDEADSRCDST
);
528 /*===========================================================================*
530 *===========================================================================*/
531 void clear_ipc_refs(rc
, caller_ret
)
532 register struct proc
*rc
; /* slot of process to clean up */
533 int caller_ret
; /* code to return on callers */
535 /* Clear IPC references for a given process slot. */
536 struct proc
*rp
; /* iterate over process table */
539 /* Tell processes that sent asynchronous messages to 'rc' they are not
540 * going to be delivered */
541 while ((src_id
= has_pending_asend(rc
, ANY
)) != NULL_PRIV_ID
)
542 cancel_async(proc_addr(id_to_nr(src_id
)), rc
);
544 for (rp
= BEG_PROC_ADDR
; rp
< END_PROC_ADDR
; rp
++) {
548 /* Unset pending notification bits. */
549 unset_sys_bit(priv(rp
)->s_notify_pending
, priv(rc
)->s_id
);
551 /* Unset pending asynchronous messages */
552 unset_sys_bit(priv(rp
)->s_asyn_pending
, priv(rc
)->s_id
);
554 /* Check if process depends on given process. */
555 if (P_BLOCKEDON(rp
) == rc
->p_endpoint
) {
556 rp
->p_reg
.retreg
= caller_ret
; /* return requested code */
562 /*===========================================================================*
563 * kernel_call_resume *
564 *===========================================================================*/
565 void kernel_call_resume(struct proc
*caller
)
569 assert(!RTS_ISSET(caller
, RTS_SLOT_FREE
));
570 assert(!RTS_ISSET(caller
, RTS_VMREQUEST
));
572 assert(caller
->p_vmrequest
.saved
.reqmsg
.m_source
== caller
->p_endpoint
);
575 printf("KERNEL_CALL restart from %s / %d rts 0x%08x misc 0x%08x\n",
576 caller->p_name, caller->p_endpoint,
577 caller->p_rts_flags, caller->p_misc_flags);
580 /* re-execute the kernel call, with MF_KCALL_RESUME still set so
581 * the call knows this is a retry.
583 result
= kernel_call_dispatch(caller
, &caller
->p_vmrequest
.saved
.reqmsg
);
585 * we are resuming the kernel call so we have to remove this flag so it
588 caller
->p_misc_flags
&= ~MF_KCALL_RESUME
;
589 kernel_call_finish(caller
, &caller
->p_vmrequest
.saved
.reqmsg
, result
);
592 /*===========================================================================*
594 *===========================================================================*/
595 int sched_proc(struct proc
*p
,
600 /* Make sure the values given are within the allowed range.*/
601 if ((priority
< TASK_Q
&& priority
!= -1) || priority
> NR_SCHED_QUEUES
)
604 if (quantum
< 1 && quantum
!= -1)
608 if ((cpu
< 0 && cpu
!= -1) || (cpu
> 0 && (unsigned) cpu
>= ncpus
))
610 if (cpu
!= -1 && !(cpu_is_ready(cpu
)))
614 /* In some cases, we might be rescheduling a runnable process. In such
615 * a case (i.e. if we are updating the priority) we set the NO_QUANTUM
616 * flag before the generic unset to dequeue/enqueue the process
619 /* FIXME this preempts the process, do we really want to do that ?*/
621 /* FIXME this is a problem for SMP if the processes currently runs on a
623 if (proc_is_runnable(p
)) {
625 if (p
->p_cpu
!= cpuid
&& cpu
!= -1 && cpu
!= p
->p_cpu
) {
626 smp_schedule_migrate_proc(p
, cpu
);
630 RTS_SET(p
, RTS_NO_QUANTUM
);
633 if (proc_is_runnable(p
))
634 RTS_SET(p
, RTS_NO_QUANTUM
);
637 p
->p_priority
= priority
;
639 p
->p_quantum_size_ms
= quantum
;
640 p
->p_cpu_time_left
= ms_2_cpu_time(quantum
);
647 /* Clear the scheduling bit and enqueue the process */
648 RTS_UNSET(p
, RTS_NO_QUANTUM
);