For /dev/mem, map in memory to be copied to memory's own address space
[minix3.git] / kernel / system.c
blobfc7f94f25bbd5eb01279f094c8851558ae9e1f49
1 /* This task handles the interface between the kernel and user-level servers.
2 * System services can be accessed by doing a system call. System calls are
3 * transformed into request messages, which are handled by this task. By
4 * convention, a sys_call() is transformed in a SYS_CALL request message that
5 * is handled in a function named do_call().
7 * A private call vector is used to map all system calls to the functions that
8 * handle them. The actual handler functions are contained in separate files
9 * to keep this file clean. The call vector is used in the system task's main
10 * loop to handle all incoming requests.
12 * In addition to the main sys_task() entry point, which starts the main loop,
13 * there are several other minor entry points:
14 * get_priv: assign privilege structure to user or system process
15 * send_sig: send a signal directly to a system process
16 * cause_sig: take action to cause a signal to occur via PM
17 * umap_bios: map virtual address in BIOS_SEG to physical
18 * virtual_copy: copy bytes from one virtual address to another
19 * get_randomness: accumulate randomness in a buffer
20 * clear_endpoint: remove a process' ability to send and receive messages
22 * Changes:
23 * Aug 04, 2005 check if system call is allowed (Jorrit N. Herder)
24 * Jul 20, 2005 send signal to services with message (Jorrit N. Herder)
25 * Jan 15, 2005 new, generalized virtual copy function (Jorrit N. Herder)
26 * Oct 10, 2004 dispatch system calls from call vector (Jorrit N. Herder)
27 * Sep 30, 2004 source code documentation updated (Jorrit N. Herder)
30 #include "debug.h"
31 #include "kernel.h"
32 #include "system.h"
33 #include "proc.h"
34 #include <stdlib.h>
35 #include <signal.h>
36 #include <unistd.h>
37 #include <sys/sigcontext.h>
38 #include <minix/endpoint.h>
39 #include <minix/safecopies.h>
41 /* Declaration of the call vector that defines the mapping of system calls
42 * to handler functions. The vector is initialized in sys_init() with map(),
43 * which makes sure the system call numbers are ok. No space is allocated,
44 * because the dummy is declared extern. If an illegal call is given, the
45 * array size will be negative and this won't compile.
47 PUBLIC int (*call_vec[NR_SYS_CALLS])(message *m_ptr);
49 #define map(call_nr, handler) \
50 {extern int dummy[NR_SYS_CALLS>(unsigned)(call_nr-KERNEL_CALL) ? 1:-1];} \
51 call_vec[(call_nr-KERNEL_CALL)] = (handler)
53 FORWARD _PROTOTYPE( void initialize, (void));
55 /*===========================================================================*
56 * sys_task *
57 *===========================================================================*/
58 PUBLIC void sys_task()
60 /* Main entry point of sys_task. Get the message and dispatch on type. */
61 static message m;
62 register int result;
63 register struct proc *caller_ptr;
64 int s;
65 int call_nr;
67 /* Initialize the system task. */
68 initialize();
70 while (TRUE) {
71 int r;
72 /* Get work. Block and wait until a request message arrives. */
73 if((r=receive(ANY, &m)) != OK) panic("system: receive() failed", r);
74 sys_call_code = (unsigned) m.m_type;
75 call_nr = sys_call_code - KERNEL_CALL;
76 who_e = m.m_source;
77 okendpt(who_e, &who_p);
78 caller_ptr = proc_addr(who_p);
80 /* See if the caller made a valid request and try to handle it. */
81 if (call_nr < 0 || call_nr >= NR_SYS_CALLS) { /* check call number */
82 #if DEBUG_ENABLE_IPC_WARNINGS
83 kprintf("SYSTEM: illegal request %d from %d.\n",
84 call_nr,m.m_source);
85 #endif
86 result = EBADREQUEST; /* illegal message type */
88 else if (!GET_BIT(priv(caller_ptr)->s_k_call_mask, call_nr)) {
89 #if DEBUG_ENABLE_IPC_WARNINGS
90 kprintf("SYSTEM: request %d from %d denied.\n",
91 call_nr,m.m_source);
92 #endif
93 result = ECALLDENIED; /* illegal message type */
95 else {
96 result = (*call_vec[call_nr])(&m); /* handle the system call */
99 /* Send a reply, unless inhibited by a handler function. Use the kernel
100 * function lock_send() to prevent a system call trap. The destination
101 * is known to be blocked waiting for a message.
103 if (result != EDONTREPLY) {
104 m.m_type = result; /* report status of call */
105 if (OK != (s=lock_send(m.m_source, &m))) {
106 kprintf("SYSTEM, reply to %d failed: %d\n", m.m_source, s);
112 /*===========================================================================*
113 * initialize *
114 *===========================================================================*/
115 PRIVATE void initialize(void)
117 register struct priv *sp;
118 int i;
120 /* Initialize IRQ handler hooks. Mark all hooks available. */
121 for (i=0; i<NR_IRQ_HOOKS; i++) {
122 irq_hooks[i].proc_nr_e = NONE;
125 /* Initialize all alarm timers for all processes. */
126 for (sp=BEG_PRIV_ADDR; sp < END_PRIV_ADDR; sp++) {
127 tmr_inittimer(&(sp->s_alarm_timer));
130 /* Initialize the call vector to a safe default handler. Some system calls
131 * may be disabled or nonexistant. Then explicitely map known calls to their
132 * handler functions. This is done with a macro that gives a compile error
133 * if an illegal call number is used. The ordering is not important here.
135 for (i=0; i<NR_SYS_CALLS; i++) {
136 call_vec[i] = do_unused;
139 /* Process management. */
140 map(SYS_FORK, do_fork); /* a process forked a new process */
141 map(SYS_EXEC, do_exec); /* update process after execute */
142 map(SYS_EXIT, do_exit); /* clean up after process exit */
143 map(SYS_NICE, do_nice); /* set scheduling priority */
144 map(SYS_PRIVCTL, do_privctl); /* system privileges control */
145 map(SYS_TRACE, do_trace); /* request a trace operation */
146 map(SYS_SETGRANT, do_setgrant); /* get/set own parameters */
148 /* Signal handling. */
149 map(SYS_KILL, do_kill); /* cause a process to be signaled */
150 map(SYS_GETKSIG, do_getksig); /* PM checks for pending signals */
151 map(SYS_ENDKSIG, do_endksig); /* PM finished processing signal */
152 map(SYS_SIGSEND, do_sigsend); /* start POSIX-style signal */
153 map(SYS_SIGRETURN, do_sigreturn); /* return from POSIX-style signal */
155 /* Device I/O. */
156 map(SYS_IRQCTL, do_irqctl); /* interrupt control operations */
157 map(SYS_DEVIO, do_devio); /* inb, inw, inl, outb, outw, outl */
158 map(SYS_VDEVIO, do_vdevio); /* vector with devio requests */
160 /* Memory management. */
161 map(SYS_NEWMAP, do_newmap); /* set up a process memory map */
162 map(SYS_SEGCTL, do_segctl); /* add segment and get selector */
163 map(SYS_MEMSET, do_memset); /* write char to memory area */
164 map(SYS_VM_SETBUF, do_vm_setbuf); /* PM passes buffer for page tables */
165 map(SYS_VM_MAP, do_vm_map); /* Map/unmap physical (device) memory */
167 /* Copying. */
168 map(SYS_UMAP, do_umap); /* map virtual to physical address */
169 map(SYS_VIRCOPY, do_vircopy); /* use pure virtual addressing */
170 map(SYS_PHYSCOPY, do_physcopy); /* use physical addressing */
171 map(SYS_VIRVCOPY, do_virvcopy); /* vector with copy requests */
172 map(SYS_PHYSVCOPY, do_physvcopy); /* vector with copy requests */
173 map(SYS_SAFECOPYFROM, do_safecopy); /* copy with pre-granted permission */
174 map(SYS_SAFECOPYTO, do_safecopy); /* copy with pre-granted permission */
175 map(SYS_VSAFECOPY, do_vsafecopy); /* vectored safecopy */
177 /* Clock functionality. */
178 map(SYS_TIMES, do_times); /* get uptime and process times */
179 map(SYS_SETALARM, do_setalarm); /* schedule a synchronous alarm */
181 /* System control. */
182 map(SYS_ABORT, do_abort); /* abort MINIX */
183 map(SYS_GETINFO, do_getinfo); /* request system information */
185 /* Profiling. */
186 map(SYS_SPROF, do_sprofile); /* start/stop statistical profiling */
187 map(SYS_CPROF, do_cprofile); /* get/reset call profiling data */
188 map(SYS_PROFBUF, do_profbuf); /* announce locations to kernel */
190 /* i386-specific. */
191 #if _MINIX_CHIP == _CHIP_INTEL
192 map(SYS_INT86, do_int86); /* real-mode BIOS calls */
193 map(SYS_READBIOS, do_readbios); /* read from BIOS locations */
194 map(SYS_IOPENABLE, do_iopenable); /* Enable I/O */
195 map(SYS_SDEVIO, do_sdevio); /* phys_insb, _insw, _outsb, _outsw */
196 #endif
199 /*===========================================================================*
200 * get_priv *
201 *===========================================================================*/
202 PUBLIC int get_priv(rc, proc_type)
203 register struct proc *rc; /* new (child) process pointer */
204 int proc_type; /* system or user process flag */
206 /* Get a privilege structure. All user processes share the same privilege
207 * structure. System processes get their own privilege structure.
209 register struct priv *sp; /* privilege structure */
211 if (proc_type == SYS_PROC) { /* find a new slot */
212 for (sp = BEG_PRIV_ADDR; sp < END_PRIV_ADDR; ++sp)
213 if (sp->s_proc_nr == NONE && sp->s_id != USER_PRIV_ID) break;
214 if (sp->s_proc_nr != NONE) return(ENOSPC);
215 rc->p_priv = sp; /* assign new slot */
216 rc->p_priv->s_proc_nr = proc_nr(rc); /* set association */
217 rc->p_priv->s_flags = SYS_PROC; /* mark as privileged */
218 } else {
219 rc->p_priv = &priv[USER_PRIV_ID]; /* use shared slot */
220 rc->p_priv->s_proc_nr = INIT_PROC_NR; /* set association */
222 /* s_flags of this shared structure are to be once at system startup. */
224 return(OK);
227 /*===========================================================================*
228 * get_randomness *
229 *===========================================================================*/
230 PUBLIC void get_randomness(source)
231 int source;
233 /* Use architecture-dependent high-resolution clock for
234 * raw entropy gathering.
236 int r_next;
237 unsigned long tsc_high, tsc_low;
239 source %= RANDOM_SOURCES;
240 r_next= krandom.bin[source].r_next;
241 read_tsc(&tsc_high, &tsc_low);
242 krandom.bin[source].r_buf[r_next] = tsc_low;
243 if (krandom.bin[source].r_size < RANDOM_ELEMENTS) {
244 krandom.bin[source].r_size ++;
246 krandom.bin[source].r_next = (r_next + 1 ) % RANDOM_ELEMENTS;
249 /*===========================================================================*
250 * send_sig *
251 *===========================================================================*/
252 PUBLIC void send_sig(int proc_nr, int sig_nr)
254 /* Notify a system process about a signal. This is straightforward. Simply
255 * set the signal that is to be delivered in the pending signals map and
256 * send a notification with source SYSTEM.
258 * Process number is verified to avoid writing in random places, but we
259 * don't kprintf() or panic() because that causes send_sig() invocations.
261 register struct proc *rp;
262 static int n;
264 if(!isokprocn(proc_nr) || isemptyn(proc_nr))
265 return;
267 rp = proc_addr(proc_nr);
268 sigaddset(&priv(rp)->s_sig_pending, sig_nr);
269 lock_notify(SYSTEM, rp->p_endpoint);
272 /*===========================================================================*
273 * cause_sig *
274 *===========================================================================*/
275 PUBLIC void cause_sig(proc_nr, sig_nr)
276 int proc_nr; /* process to be signalled */
277 int sig_nr; /* signal to be sent, 1 to _NSIG */
279 /* A system process wants to send a signal to a process. Examples are:
280 * - HARDWARE wanting to cause a SIGSEGV after a CPU exception
281 * - TTY wanting to cause SIGINT upon getting a DEL
282 * - FS wanting to cause SIGPIPE for a broken pipe
283 * Signals are handled by sending a message to PM. This function handles the
284 * signals and makes sure the PM gets them by sending a notification. The
285 * process being signaled is blocked while PM has not finished all signals
286 * for it.
287 * Race conditions between calls to this function and the system calls that
288 * process pending kernel signals cannot exist. Signal related functions are
289 * only called when a user process causes a CPU exception and from the kernel
290 * process level, which runs to completion.
292 register struct proc *rp;
294 /* Check if the signal is already pending. Process it otherwise. */
295 rp = proc_addr(proc_nr);
296 if (! sigismember(&rp->p_pending, sig_nr)) {
297 sigaddset(&rp->p_pending, sig_nr);
298 if (! (RTS_ISSET(rp, SIGNALED))) { /* other pending */
299 RTS_LOCK_SET(rp, SIGNALED | SIG_PENDING);
300 send_sig(PM_PROC_NR, SIGKSIG);
305 #if _MINIX_CHIP == _CHIP_INTEL
307 /*===========================================================================*
308 * umap_bios *
309 *===========================================================================*/
310 PUBLIC phys_bytes umap_bios(rp, vir_addr, bytes)
311 register struct proc *rp; /* pointer to proc table entry for process */
312 vir_bytes vir_addr; /* virtual address in BIOS segment */
313 vir_bytes bytes; /* # of bytes to be copied */
315 /* Calculate the physical memory address at the BIOS. Note: currently, BIOS
316 * address zero (the first BIOS interrupt vector) is not considered as an
317 * error here, but since the physical address will be zero as well, the
318 * calling function will think an error occurred. This is not a problem,
319 * since no one uses the first BIOS interrupt vector.
322 /* Check all acceptable ranges. */
323 if (vir_addr >= BIOS_MEM_BEGIN && vir_addr + bytes <= BIOS_MEM_END)
324 return (phys_bytes) vir_addr;
325 else if (vir_addr >= BASE_MEM_TOP && vir_addr + bytes <= UPPER_MEM_END)
326 return (phys_bytes) vir_addr;
328 kprintf("Warning, error in umap_bios, virtual address 0x%x\n", vir_addr);
329 return 0;
331 #endif
333 /*===========================================================================*
334 * umap_verify_grant *
335 *===========================================================================*/
336 PUBLIC phys_bytes umap_verify_grant(rp, grantee, grant, offset, bytes, access)
337 struct proc *rp; /* pointer to proc table entry for process */
338 endpoint_t grantee; /* who wants to do this */
339 cp_grant_id_t grant; /* grant no. */
340 vir_bytes offset; /* offset into grant */
341 vir_bytes bytes; /* size */
342 int access; /* does grantee want to CPF_READ or _WRITE? */
344 int proc_nr;
345 vir_bytes v_offset;
346 endpoint_t granter;
348 /* See if the grant in that process is sensible, and
349 * find out the virtual address and (optionally) new
350 * process for that address.
352 * Then convert that process to a slot number.
354 if(verify_grant(rp->p_endpoint, grantee, grant, bytes, access, offset,
355 &v_offset, &granter) != OK
356 || !isokendpt(granter, &proc_nr)) {
357 return 0;
360 /* Do the mapping from virtual to physical. */
361 return umap_local(proc_addr(proc_nr), D, v_offset, bytes);
364 /*===========================================================================*
365 * umap_grant *
366 *===========================================================================*/
367 PUBLIC phys_bytes umap_grant(rp, grant, bytes)
368 struct proc *rp; /* pointer to proc table entry for process */
369 cp_grant_id_t grant; /* grant no. */
370 vir_bytes bytes; /* size */
372 int proc_nr;
373 vir_bytes offset;
374 endpoint_t granter;
376 /* See if the grant in that process is sensible, and
377 * find out the virtual address and (optionally) new
378 * process for that address.
380 * Then convert that process to a slot number.
382 if(verify_grant(rp->p_endpoint, ANY, grant, bytes, 0, 0,
383 &offset, &granter) != OK) {
384 return 0;
387 if(!isokendpt(granter, &proc_nr)) {
388 return 0;
391 /* Do the mapping from virtual to physical. */
392 return umap_local(proc_addr(proc_nr), D, offset, bytes);
395 /*===========================================================================*
396 * virtual_copy *
397 *===========================================================================*/
398 PUBLIC int virtual_copy(src_addr, dst_addr, bytes)
399 struct vir_addr *src_addr; /* source virtual address */
400 struct vir_addr *dst_addr; /* destination virtual address */
401 vir_bytes bytes; /* # of bytes to copy */
403 /* Copy bytes from virtual address src_addr to virtual address dst_addr.
404 * Virtual addresses can be in ABS, LOCAL_SEG, REMOTE_SEG, or BIOS_SEG.
406 struct vir_addr *vir_addr[2]; /* virtual source and destination address */
407 phys_bytes phys_addr[2]; /* absolute source and destination */
408 int seg_index;
409 int i;
411 /* Check copy count. */
412 if (bytes <= 0) return(EDOM);
414 /* Do some more checks and map virtual addresses to physical addresses. */
415 vir_addr[_SRC_] = src_addr;
416 vir_addr[_DST_] = dst_addr;
417 for (i=_SRC_; i<=_DST_; i++) {
418 int proc_nr, type;
419 struct proc *p;
421 type = vir_addr[i]->segment & SEGMENT_TYPE;
422 if(type != PHYS_SEG && isokendpt(vir_addr[i]->proc_nr_e, &proc_nr))
423 p = proc_addr(proc_nr);
424 else
425 p = NULL;
427 /* Get physical address. */
428 switch(type) {
429 case LOCAL_SEG:
430 if(!p) return EDEADSRCDST;
431 seg_index = vir_addr[i]->segment & SEGMENT_INDEX;
432 phys_addr[i] = umap_local(p, seg_index, vir_addr[i]->offset, bytes);
433 break;
434 case REMOTE_SEG:
435 if(!p) return EDEADSRCDST;
436 seg_index = vir_addr[i]->segment & SEGMENT_INDEX;
437 phys_addr[i] = umap_remote(p, seg_index, vir_addr[i]->offset, bytes);
438 break;
439 #if _MINIX_CHIP == _CHIP_INTEL
440 case BIOS_SEG:
441 if(!p) return EDEADSRCDST;
442 phys_addr[i] = umap_bios(p, vir_addr[i]->offset, bytes );
443 break;
444 #endif
445 case PHYS_SEG:
446 phys_addr[i] = vir_addr[i]->offset;
447 break;
448 case GRANT_SEG:
449 phys_addr[i] = umap_grant(p, vir_addr[i]->offset, bytes);
450 break;
451 default:
452 return(EINVAL);
455 /* Check if mapping succeeded. */
456 if (phys_addr[i] <= 0 && vir_addr[i]->segment != PHYS_SEG)
457 return(EFAULT);
460 /* Now copy bytes between physical addresseses. */
461 phys_copy(phys_addr[_SRC_], phys_addr[_DST_], (phys_bytes) bytes);
462 return(OK);
466 /*===========================================================================*
467 * clear_endpoint *
468 *===========================================================================*/
469 PUBLIC void clear_endpoint(rc)
470 register struct proc *rc; /* slot of process to clean up */
472 register struct proc *rp; /* iterate over process table */
473 register struct proc **xpp; /* iterate over caller queue */
475 if(isemptyp(rc)) panic("clear_proc: empty process", proc_nr(rc));
477 /* Make sure that the exiting process is no longer scheduled. */
478 RTS_LOCK_SET(rc, NO_ENDPOINT);
480 /* If the process happens to be queued trying to send a
481 * message, then it must be removed from the message queues.
483 if (RTS_ISSET(rc, SENDING)) {
484 int target_proc;
486 okendpt(rc->p_sendto_e, &target_proc);
487 xpp = &proc_addr(target_proc)->p_caller_q; /* destination's queue */
488 while (*xpp != NIL_PROC) { /* check entire queue */
489 if (*xpp == rc) { /* process is on the queue */
490 *xpp = (*xpp)->p_q_link; /* replace by next process */
491 #if DEBUG_ENABLE_IPC_WARNINGS
492 kprintf("Proc %d removed from queue at %d\n",
493 proc_nr(rc), rc->p_sendto_e);
494 #endif
495 break; /* can only be queued once */
497 xpp = &(*xpp)->p_q_link; /* proceed to next queued */
499 rc->p_rts_flags &= ~SENDING;
501 rc->p_rts_flags &= ~RECEIVING;
503 /* Likewise, if another process was sending or receive a message to or from
504 * the exiting process, it must be alerted that process no longer is alive.
505 * Check all processes.
507 for (rp = BEG_PROC_ADDR; rp < END_PROC_ADDR; rp++) {
508 if(isemptyp(rp))
509 continue;
511 /* Unset pending notification bits. */
512 unset_sys_bit(priv(rp)->s_notify_pending, priv(rc)->s_id);
514 /* Check if process is receiving from exiting process. */
515 if (RTS_ISSET(rp, RECEIVING) && rp->p_getfrom_e == rc->p_endpoint) {
516 rp->p_reg.retreg = ESRCDIED; /* report source died */
517 RTS_LOCK_UNSET(rp, RECEIVING); /* no longer receiving */
518 #if DEBUG_ENABLE_IPC_WARNINGS
519 kprintf("Proc %d receive dead src %d\n", proc_nr(rp), proc_nr(rc));
520 #endif
522 if (RTS_ISSET(rp, SENDING) &&
523 rp->p_sendto_e == rc->p_endpoint) {
524 rp->p_reg.retreg = EDSTDIED; /* report destination died */
525 RTS_LOCK_UNSET(rp, SENDING);
526 #if DEBUG_ENABLE_IPC_WARNINGS
527 kprintf("Proc %d send dead dst %d\n", proc_nr(rp), proc_nr(rc));
528 #endif