Drop main() prototype. Syncs with NetBSD-8
[minix.git] / minix / kernel / system.c
blob01d716c7e4ef493bcc42a7c21704676cd3b5a61f
1 /* This task handles the interface between the kernel and user-level servers.
2 * System services can be accessed by doing a system call. System calls are
3 * transformed into request messages, which are handled by this task. By
4 * convention, a sys_call() is transformed in a SYS_CALL request message that
5 * is handled in a function named do_call().
7 * A private call vector is used to map all system calls to the functions that
8 * handle them. The actual handler functions are contained in separate files
9 * to keep this file clean. The call vector is used in the system task's main
10 * loop to handle all incoming requests.
12 * In addition to the main sys_task() entry point, which starts the main loop,
13 * there are several other minor entry points:
14 * get_priv: assign privilege structure to user or system process
15 * set_sendto_bit: allow a process to send messages to a new target
16 * unset_sendto_bit: disallow a process from sending messages to a target
17 * fill_sendto_mask: fill the target mask of a given process
18 * send_sig: send a signal directly to a system process
19 * cause_sig: take action to cause a signal to occur via a signal mgr
20 * sig_delay_done: tell PM that a process is not sending
21 * send_diag_sig: send a diagnostics signal to interested processes
22 * get_randomness: accumulate randomness in a buffer
23 * clear_endpoint: remove a process' ability to send and receive messages
24 * sched_proc: schedule a process
26 * Changes:
27 * Nov 22, 2009 get_priv supports static priv ids (Cristiano Giuffrida)
28 * Aug 04, 2005 check if system call is allowed (Jorrit N. Herder)
29 * Jul 20, 2005 send signal to services with message (Jorrit N. Herder)
30 * Jan 15, 2005 new, generalized virtual copy function (Jorrit N. Herder)
31 * Oct 10, 2004 dispatch system calls from call vector (Jorrit N. Herder)
32 * Sep 30, 2004 source code documentation updated (Jorrit N. Herder)
35 #include "kernel/system.h"
36 #include "kernel/vm.h"
37 #include "kernel/clock.h"
38 #include <stdlib.h>
39 #include <stddef.h>
40 #include <assert.h>
41 #include <signal.h>
42 #include <unistd.h>
43 #include <minix/endpoint.h>
44 #include <minix/safecopies.h>
46 /* Declaration of the call vector that defines the mapping of system calls
47 * to handler functions. The vector is initialized in sys_init() with map(),
48 * which makes sure the system call numbers are ok. No space is allocated,
49 * because the dummy is declared extern. If an illegal call is given, the
50 * array size will be negative and this won't compile.
52 static int (*call_vec[NR_SYS_CALLS])(struct proc * caller, message *m_ptr);
54 #define map(call_nr, handler) \
55 { int call_index = call_nr-KERNEL_CALL; \
56 assert(call_index >= 0 && call_index < NR_SYS_CALLS); \
57 call_vec[call_index] = (handler) ; }
59 static void kernel_call_finish(struct proc * caller, message *msg, int result)
61 if(result == VMSUSPEND) {
62 /* Special case: message has to be saved for handling
63 * until VM tells us it's allowed. VM has been notified
64 * and we must wait for its reply to restart the call.
66 assert(RTS_ISSET(caller, RTS_VMREQUEST));
67 assert(caller->p_vmrequest.type == VMSTYPE_KERNELCALL);
68 caller->p_vmrequest.saved.reqmsg = *msg;
69 caller->p_misc_flags |= MF_KCALL_RESUME;
70 } else {
72 * call is finished, we could have been suspended because of VM,
73 * remove the request message
75 caller->p_vmrequest.saved.reqmsg.m_source = NONE;
76 if (result != EDONTREPLY) {
77 /* copy the result as a message to the original user buffer */
78 msg->m_source = SYSTEM;
79 msg->m_type = result; /* report status of call */
80 #if DEBUG_IPC_HOOK
81 hook_ipc_msgkresult(msg, caller);
82 #endif
83 if (copy_msg_to_user(msg, (message *)caller->p_delivermsg_vir)) {
84 printf("WARNING wrong user pointer 0x%08x from "
85 "process %s / %d\n",
86 caller->p_delivermsg_vir,
87 caller->p_name,
88 caller->p_endpoint);
89 cause_sig(proc_nr(caller), SIGSEGV);
95 static int kernel_call_dispatch(struct proc * caller, message *msg)
97 int result = OK;
98 int call_nr;
100 #if DEBUG_IPC_HOOK
101 hook_ipc_msgkcall(msg, caller);
102 #endif
103 call_nr = msg->m_type - KERNEL_CALL;
105 /* See if the caller made a valid request and try to handle it. */
106 if (call_nr < 0 || call_nr >= NR_SYS_CALLS) { /* check call number */
107 printf("SYSTEM: illegal request %d from %d.\n",
108 call_nr,msg->m_source);
109 result = EBADREQUEST; /* illegal message type */
111 else if (!GET_BIT(priv(caller)->s_k_call_mask, call_nr)) {
112 printf("SYSTEM: denied request %d from %d.\n",
113 call_nr,msg->m_source);
114 result = ECALLDENIED; /* illegal message type */
115 } else {
116 /* handle the system call */
117 if (call_vec[call_nr])
118 result = (*call_vec[call_nr])(caller, msg);
119 else {
120 printf("Unused kernel call %d from %d\n",
121 call_nr, caller->p_endpoint);
122 result = EBADREQUEST;
126 return result;
129 /*===========================================================================*
130 * kernel_call *
131 *===========================================================================*/
133 * this function checks the basic syscall parameters and if accepted it
134 * dispatches its handling to the right handler
136 void kernel_call(message *m_user, struct proc * caller)
138 int result = OK;
139 message msg;
141 caller->p_delivermsg_vir = (vir_bytes) m_user;
143 * the ldt and cr3 of the caller process is loaded because it just've trapped
144 * into the kernel or was already set in switch_to_user() before we resume
145 * execution of an interrupted kernel call
147 if (copy_msg_from_user(m_user, &msg) == 0) {
148 msg.m_source = caller->p_endpoint;
149 result = kernel_call_dispatch(caller, &msg);
151 else {
152 printf("WARNING wrong user pointer 0x%08x from process %s / %d\n",
153 m_user, caller->p_name, caller->p_endpoint);
154 cause_sig(proc_nr(caller), SIGSEGV);
155 return;
159 /* remember who invoked the kcall so we can bill it its time */
160 kbill_kcall = caller;
162 kernel_call_finish(caller, &msg, result);
165 /*===========================================================================*
166 * initialize *
167 *===========================================================================*/
168 void system_init(void)
170 register struct priv *sp;
171 int i;
173 /* Initialize IRQ handler hooks. Mark all hooks available. */
174 for (i=0; i<NR_IRQ_HOOKS; i++) {
175 irq_hooks[i].proc_nr_e = NONE;
178 /* Initialize all alarm timers for all processes. */
179 for (sp=BEG_PRIV_ADDR; sp < END_PRIV_ADDR; sp++) {
180 tmr_inittimer(&(sp->s_alarm_timer));
183 /* Initialize the call vector to a safe default handler. Some system calls
184 * may be disabled or nonexistant. Then explicitly map known calls to their
185 * handler functions. This is done with a macro that gives a compile error
186 * if an illegal call number is used. The ordering is not important here.
188 for (i=0; i<NR_SYS_CALLS; i++) {
189 call_vec[i] = NULL;
192 /* Process management. */
193 map(SYS_FORK, do_fork); /* a process forked a new process */
194 map(SYS_EXEC, do_exec); /* update process after execute */
195 map(SYS_CLEAR, do_clear); /* clean up after process exit */
196 map(SYS_EXIT, do_exit); /* a system process wants to exit */
197 map(SYS_PRIVCTL, do_privctl); /* system privileges control */
198 map(SYS_TRACE, do_trace); /* request a trace operation */
199 map(SYS_SETGRANT, do_setgrant); /* get/set own parameters */
200 map(SYS_RUNCTL, do_runctl); /* set/clear stop flag of a process */
201 map(SYS_UPDATE, do_update); /* update a process into another */
202 map(SYS_STATECTL, do_statectl); /* let a process control its state */
204 /* Signal handling. */
205 map(SYS_KILL, do_kill); /* cause a process to be signaled */
206 map(SYS_GETKSIG, do_getksig); /* signal manager checks for signals */
207 map(SYS_ENDKSIG, do_endksig); /* signal manager finished signal */
208 map(SYS_SIGSEND, do_sigsend); /* start POSIX-style signal */
209 map(SYS_SIGRETURN, do_sigreturn); /* return from POSIX-style signal */
211 /* Device I/O. */
212 map(SYS_IRQCTL, do_irqctl); /* interrupt control operations */
213 #if defined(__i386__)
214 map(SYS_DEVIO, do_devio); /* inb, inw, inl, outb, outw, outl */
215 map(SYS_VDEVIO, do_vdevio); /* vector with devio requests */
216 #endif
218 /* Memory management. */
219 map(SYS_MEMSET, do_memset); /* write char to memory area */
220 map(SYS_VMCTL, do_vmctl); /* various VM process settings */
222 /* Copying. */
223 map(SYS_UMAP, do_umap); /* map virtual to physical address */
224 map(SYS_UMAP_REMOTE, do_umap_remote); /* do_umap for non-caller process */
225 map(SYS_VUMAP, do_vumap); /* vectored virtual to physical map */
226 map(SYS_VIRCOPY, do_vircopy); /* use pure virtual addressing */
227 map(SYS_PHYSCOPY, do_copy); /* use physical addressing */
228 map(SYS_SAFECOPYFROM, do_safecopy_from);/* copy with pre-granted permission */
229 map(SYS_SAFECOPYTO, do_safecopy_to); /* copy with pre-granted permission */
230 map(SYS_VSAFECOPY, do_vsafecopy); /* vectored safecopy */
232 /* safe memset */
233 map(SYS_SAFEMEMSET, do_safememset); /* safememset */
235 /* Clock functionality. */
236 map(SYS_TIMES, do_times); /* get uptime and process times */
237 map(SYS_SETALARM, do_setalarm); /* schedule a synchronous alarm */
238 map(SYS_STIME, do_stime); /* set the boottime */
239 map(SYS_SETTIME, do_settime); /* set the system time (realtime) */
240 map(SYS_VTIMER, do_vtimer); /* set or retrieve a virtual timer */
242 /* System control. */
243 map(SYS_ABORT, do_abort); /* abort MINIX */
244 map(SYS_GETINFO, do_getinfo); /* request system information */
245 map(SYS_DIAGCTL, do_diagctl); /* diagnostics-related functionality */
247 /* Profiling. */
248 map(SYS_SPROF, do_sprofile); /* start/stop statistical profiling */
250 /* arm-specific. */
251 #if defined(__arm__)
252 map(SYS_PADCONF, do_padconf); /* configure pinmux */
253 #endif
255 /* i386-specific. */
256 #if defined(__i386__)
257 map(SYS_READBIOS, do_readbios); /* read from BIOS locations */
258 map(SYS_IOPENABLE, do_iopenable); /* Enable I/O */
259 map(SYS_SDEVIO, do_sdevio); /* phys_insb, _insw, _outsb, _outsw */
260 #endif
262 /* Machine state switching. */
263 map(SYS_SETMCONTEXT, do_setmcontext); /* set machine context */
264 map(SYS_GETMCONTEXT, do_getmcontext); /* get machine context */
266 /* Scheduling */
267 map(SYS_SCHEDULE, do_schedule); /* reschedule a process */
268 map(SYS_SCHEDCTL, do_schedctl); /* change process scheduler */
271 /*===========================================================================*
272 * get_priv *
273 *===========================================================================*/
274 int get_priv(
275 register struct proc *rc, /* new (child) process pointer */
276 int priv_id /* privilege id */
279 /* Allocate a new privilege structure for a system process. Privilege ids
280 * can be assigned either statically or dynamically.
282 register struct priv *sp; /* privilege structure */
284 if(priv_id == NULL_PRIV_ID) { /* allocate slot dynamically */
285 for (sp = BEG_DYN_PRIV_ADDR; sp < END_DYN_PRIV_ADDR; ++sp)
286 if (sp->s_proc_nr == NONE) break;
287 if (sp >= END_DYN_PRIV_ADDR) return(ENOSPC);
289 else { /* allocate slot from id */
290 if(!is_static_priv_id(priv_id)) {
291 return EINVAL; /* invalid static priv id */
293 if(priv[priv_id].s_proc_nr != NONE) {
294 return EBUSY; /* slot already in use */
296 sp = &priv[priv_id];
298 rc->p_priv = sp; /* assign new slot */
299 rc->p_priv->s_proc_nr = proc_nr(rc); /* set association */
301 return(OK);
304 /*===========================================================================*
305 * set_sendto_bit *
306 *===========================================================================*/
307 void set_sendto_bit(const struct proc *rp, int id)
309 /* Allow a process to send messages to the process(es) associated with the
310 * system privilege structure with the given ID.
313 /* Disallow the process from sending to a process privilege structure with no
314 * associated process, and disallow the process from sending to itself.
316 if (id_to_nr(id) == NONE || priv_id(rp) == id) {
317 unset_sys_bit(priv(rp)->s_ipc_to, id);
318 return;
321 set_sys_bit(priv(rp)->s_ipc_to, id);
323 /* The process that this process can now send to, must be able to reply (or
324 * vice versa). Therefore, its send mask should be updated as well. Ignore
325 * receivers that don't support traps other than RECEIVE, they can't reply
326 * or send messages anyway.
328 if (priv_addr(id)->s_trap_mask & ~((1 << RECEIVE)))
329 set_sys_bit(priv_addr(id)->s_ipc_to, priv_id(rp));
332 /*===========================================================================*
333 * unset_sendto_bit *
334 *===========================================================================*/
335 void unset_sendto_bit(const struct proc *rp, int id)
337 /* Prevent a process from sending to another process. Retain the send mask
338 * symmetry by also unsetting the bit for the other direction.
341 unset_sys_bit(priv(rp)->s_ipc_to, id);
343 unset_sys_bit(priv_addr(id)->s_ipc_to, priv_id(rp));
346 /*===========================================================================*
347 * fill_sendto_mask *
348 *===========================================================================*/
349 void fill_sendto_mask(const struct proc *rp, sys_map_t *map)
351 int i;
353 for (i=0; i < NR_SYS_PROCS; i++) {
354 if (get_sys_bit(*map, i))
355 set_sendto_bit(rp, i);
356 else
357 unset_sendto_bit(rp, i);
361 /*===========================================================================*
362 * send_sig *
363 *===========================================================================*/
364 int send_sig(endpoint_t ep, int sig_nr)
366 /* Notify a system process about a signal. This is straightforward. Simply
367 * set the signal that is to be delivered in the pending signals map and
368 * send a notification with source SYSTEM.
370 register struct proc *rp;
371 struct priv *priv;
372 int proc_nr;
374 if(!isokendpt(ep, &proc_nr) || isemptyn(proc_nr))
375 return EINVAL;
377 rp = proc_addr(proc_nr);
378 priv = priv(rp);
379 if(!priv) return ENOENT;
380 sigaddset(&priv->s_sig_pending, sig_nr);
381 mini_notify(proc_addr(SYSTEM), rp->p_endpoint);
383 return OK;
386 /*===========================================================================*
387 * cause_sig *
388 *===========================================================================*/
389 void cause_sig(proc_nr_t proc_nr, int sig_nr)
391 /* A system process wants to send signal 'sig_nr' to process 'proc_nr'.
392 * Examples are:
393 * - HARDWARE wanting to cause a SIGSEGV after a CPU exception
394 * - TTY wanting to cause SIGINT upon getting a DEL
395 * - FS wanting to cause SIGPIPE for a broken pipe
396 * Signals are handled by sending a message to the signal manager assigned to
397 * the process. This function handles the signals and makes sure the signal
398 * manager gets them by sending a notification. The process being signaled
399 * is blocked while the signal manager has not finished all signals for it.
400 * Race conditions between calls to this function and the system calls that
401 * process pending kernel signals cannot exist. Signal related functions are
402 * only called when a user process causes a CPU exception and from the kernel
403 * process level, which runs to completion.
405 register struct proc *rp, *sig_mgr_rp;
406 endpoint_t sig_mgr;
407 int sig_mgr_proc_nr;
408 int s;
410 /* Lookup signal manager. */
411 rp = proc_addr(proc_nr);
412 sig_mgr = priv(rp)->s_sig_mgr;
413 if(sig_mgr == SELF) sig_mgr = rp->p_endpoint;
415 /* If the target is the signal manager of itself, send the signal directly. */
416 if(rp->p_endpoint == sig_mgr) {
417 if(SIGS_IS_LETHAL(sig_nr)) {
418 /* If the signal is lethal, see if a backup signal manager exists. */
419 sig_mgr = priv(rp)->s_bak_sig_mgr;
420 if(sig_mgr != NONE && isokendpt(sig_mgr, &sig_mgr_proc_nr)) {
421 priv(rp)->s_sig_mgr = sig_mgr;
422 priv(rp)->s_bak_sig_mgr = NONE;
423 sig_mgr_rp = proc_addr(sig_mgr_proc_nr);
424 RTS_UNSET(sig_mgr_rp, RTS_NO_PRIV);
425 cause_sig(proc_nr, sig_nr); /* try again with the new sig mgr. */
426 return;
428 /* We are out of luck. Time to panic. */
429 proc_stacktrace(rp);
430 panic("cause_sig: sig manager %d gets lethal signal %d for itself",
431 rp->p_endpoint, sig_nr);
433 sigaddset(&priv(rp)->s_sig_pending, sig_nr);
434 if(OK != send_sig(rp->p_endpoint, SIGKSIGSM))
435 panic("send_sig failed");
436 return;
439 s = sigismember(&rp->p_pending, sig_nr);
440 /* Check if the signal is already pending. Process it otherwise. */
441 if (!s) {
442 sigaddset(&rp->p_pending, sig_nr);
443 if (! (RTS_ISSET(rp, RTS_SIGNALED))) { /* other pending */
444 RTS_SET(rp, RTS_SIGNALED | RTS_SIG_PENDING);
445 if(OK != send_sig(sig_mgr, SIGKSIG))
446 panic("send_sig failed");
451 /*===========================================================================*
452 * sig_delay_done *
453 *===========================================================================*/
454 void sig_delay_done(struct proc *rp)
456 /* A process is now known not to send any direct messages.
457 * Tell PM that the stop delay has ended, by sending a signal to the process.
458 * Used for actual signal delivery.
461 rp->p_misc_flags &= ~MF_SIG_DELAY;
463 cause_sig(proc_nr(rp), SIGSNDELAY);
466 /*===========================================================================*
467 * send_diag_sig *
468 *===========================================================================*/
469 void send_diag_sig(void)
471 /* Send a SIGKMESS signal to all processes in receiving updates about new
472 * diagnostics messages.
474 struct priv *privp;
475 endpoint_t ep;
477 for (privp = BEG_PRIV_ADDR; privp < END_PRIV_ADDR; privp++) {
478 if (privp->s_proc_nr != NONE && privp->s_diag_sig == TRUE) {
479 ep = proc_addr(privp->s_proc_nr)->p_endpoint;
480 send_sig(ep, SIGKMESS);
485 /*===========================================================================*
486 * clear_memreq *
487 *===========================================================================*/
488 static void clear_memreq(struct proc *rp)
490 struct proc **rpp;
492 if (!RTS_ISSET(rp, RTS_VMREQUEST))
493 return; /* nothing to do */
495 for (rpp = &vmrequest; *rpp != NULL;
496 rpp = &(*rpp)->p_vmrequest.nextrequestor) {
497 if (*rpp == rp) {
498 *rpp = rp->p_vmrequest.nextrequestor;
499 break;
503 RTS_UNSET(rp, RTS_VMREQUEST);
506 /*===========================================================================*
507 * clear_ipc *
508 *===========================================================================*/
509 static void clear_ipc(
510 register struct proc *rc /* slot of process to clean up */
513 /* Clear IPC data for a given process slot. */
514 struct proc **xpp; /* iterate over caller queue */
516 if (RTS_ISSET(rc, RTS_SENDING)) {
517 int target_proc;
519 okendpt(rc->p_sendto_e, &target_proc);
520 xpp = &proc_addr(target_proc)->p_caller_q; /* destination's queue */
521 while (*xpp) { /* check entire queue */
522 if (*xpp == rc) { /* process is on the queue */
523 *xpp = (*xpp)->p_q_link; /* replace by next process */
524 #if DEBUG_ENABLE_IPC_WARNINGS
525 printf("endpoint %d / %s removed from queue at %d\n",
526 rc->p_endpoint, rc->p_name, rc->p_sendto_e);
527 #endif
528 break; /* can only be queued once */
530 xpp = &(*xpp)->p_q_link; /* proceed to next queued */
532 RTS_UNSET(rc, RTS_SENDING);
534 RTS_UNSET(rc, RTS_RECEIVING);
537 /*===========================================================================*
538 * clear_endpoint *
539 *===========================================================================*/
540 void clear_endpoint(struct proc * rc)
542 /* Clean up the slot of the process given as 'rc'. */
543 if(isemptyp(rc)) panic("clear_proc: empty process: %d", rc->p_endpoint);
546 #if DEBUG_IPC_HOOK
547 hook_ipc_clear(rc);
548 #endif
550 /* Make sure that the exiting process is no longer scheduled. */
551 RTS_SET(rc, RTS_NO_ENDPOINT);
552 if (priv(rc)->s_flags & SYS_PROC)
554 priv(rc)->s_asynsize= 0;
557 /* If the process happens to be queued trying to send a
558 * message, then it must be removed from the message queues.
560 clear_ipc(rc);
562 /* Likewise, if another process was sending or receive a message to or from
563 * the exiting process, it must be alerted that process no longer is alive.
564 * Check all processes.
566 clear_ipc_refs(rc, EDEADSRCDST);
568 /* Finally, if the process was blocked on a VM request, remove it from the
569 * queue of processes waiting to be processed by VM.
571 clear_memreq(rc);
574 /*===========================================================================*
575 * clear_ipc_refs *
576 *===========================================================================*/
577 void clear_ipc_refs(
578 register struct proc *rc, /* slot of process to clean up */
579 int caller_ret /* code to return on callers */
582 /* Clear IPC references for a given process slot. */
583 struct proc *rp; /* iterate over process table */
584 int src_id;
586 /* Tell processes that sent asynchronous messages to 'rc' they are not
587 * going to be delivered */
588 while ((src_id = has_pending_asend(rc, ANY)) != NULL_PRIV_ID)
589 cancel_async(proc_addr(id_to_nr(src_id)), rc);
591 for (rp = BEG_PROC_ADDR; rp < END_PROC_ADDR; rp++) {
592 if(isemptyp(rp))
593 continue;
595 /* Unset pending notification bits. */
596 unset_sys_bit(priv(rp)->s_notify_pending, priv(rc)->s_id);
598 /* Unset pending asynchronous messages */
599 unset_sys_bit(priv(rp)->s_asyn_pending, priv(rc)->s_id);
601 /* Check if process depends on given process. */
602 if (P_BLOCKEDON(rp) == rc->p_endpoint) {
603 rp->p_reg.retreg = caller_ret; /* return requested code */
604 clear_ipc(rp);
609 /*===========================================================================*
610 * kernel_call_resume *
611 *===========================================================================*/
612 void kernel_call_resume(struct proc *caller)
614 int result;
616 assert(!RTS_ISSET(caller, RTS_SLOT_FREE));
617 assert(!RTS_ISSET(caller, RTS_VMREQUEST));
619 assert(caller->p_vmrequest.saved.reqmsg.m_source == caller->p_endpoint);
622 printf("KERNEL_CALL restart from %s / %d rts 0x%08x misc 0x%08x\n",
623 caller->p_name, caller->p_endpoint,
624 caller->p_rts_flags, caller->p_misc_flags);
627 /* re-execute the kernel call, with MF_KCALL_RESUME still set so
628 * the call knows this is a retry.
630 result = kernel_call_dispatch(caller, &caller->p_vmrequest.saved.reqmsg);
632 * we are resuming the kernel call so we have to remove this flag so it
633 * can be set again
635 caller->p_misc_flags &= ~MF_KCALL_RESUME;
636 kernel_call_finish(caller, &caller->p_vmrequest.saved.reqmsg, result);
639 /*===========================================================================*
640 * sched_proc *
641 *===========================================================================*/
642 int sched_proc(struct proc *p, int priority, int quantum, int cpu, int niced)
644 /* Make sure the values given are within the allowed range.*/
645 if ((priority < TASK_Q && priority != -1) || priority > NR_SCHED_QUEUES)
646 return(EINVAL);
648 if (quantum < 1 && quantum != -1)
649 return(EINVAL);
651 #ifdef CONFIG_SMP
652 if ((cpu < 0 && cpu != -1) || (cpu > 0 && (unsigned) cpu >= ncpus))
653 return(EINVAL);
654 if (cpu != -1 && !(cpu_is_ready(cpu)))
655 return EBADCPU;
656 #endif
658 /* In some cases, we might be rescheduling a runnable process. In such
659 * a case (i.e. if we are updating the priority) we set the NO_QUANTUM
660 * flag before the generic unset to dequeue/enqueue the process
663 /* FIXME this preempts the process, do we really want to do that ?*/
665 /* FIXME this is a problem for SMP if the processes currently runs on a
666 * different CPU */
667 if (proc_is_runnable(p)) {
668 #ifdef CONFIG_SMP
669 if (p->p_cpu != cpuid && cpu != -1 && cpu != p->p_cpu) {
670 smp_schedule_migrate_proc(p, cpu);
672 #endif
674 RTS_SET(p, RTS_NO_QUANTUM);
677 if (proc_is_runnable(p))
678 RTS_SET(p, RTS_NO_QUANTUM);
680 if (priority != -1)
681 p->p_priority = priority;
682 if (quantum != -1) {
683 p->p_quantum_size_ms = quantum;
684 p->p_cpu_time_left = ms_2_cpu_time(quantum);
686 #ifdef CONFIG_SMP
687 if (cpu != -1)
688 p->p_cpu = cpu;
689 #endif
691 if (niced)
692 p->p_misc_flags |= MF_NICED;
693 else
694 p->p_misc_flags &= ~MF_NICED;
696 /* Clear the scheduling bit and enqueue the process */
697 RTS_UNSET(p, RTS_NO_QUANTUM);
699 return OK;
702 /*===========================================================================*
703 * add_ipc_filter *
704 *===========================================================================*/
705 int add_ipc_filter(struct proc *rp, int type, vir_bytes address,
706 size_t length)
708 int num_elements, r;
709 ipc_filter_t *ipcf, **ipcfp;
711 /* Validate arguments. */
712 if (type != IPCF_BLACKLIST && type != IPCF_WHITELIST)
713 return EINVAL;
715 if (length % sizeof(ipc_filter_el_t) != 0)
716 return EINVAL;
718 num_elements = length / sizeof(ipc_filter_el_t);
719 if (num_elements <= 0 || num_elements > IPCF_MAX_ELEMENTS)
720 return E2BIG;
722 /* Allocate a new IPC filter slot. */
723 IPCF_POOL_ALLOCATE_SLOT(type, &ipcf);
724 if (ipcf == NULL)
725 return ENOMEM;
727 /* Fill details. */
728 ipcf->num_elements = num_elements;
729 ipcf->next = NULL;
730 r = data_copy(rp->p_endpoint, address,
731 KERNEL, (vir_bytes)ipcf->elements, length);
732 if (r == OK)
733 r = check_ipc_filter(ipcf, TRUE /*fill_flags*/);
734 if (r != OK) {
735 IPCF_POOL_FREE_SLOT(ipcf);
736 return r;
739 /* Add the new filter at the end of the IPC filter chain. */
740 for (ipcfp = &priv(rp)->s_ipcf; *ipcfp != NULL;
741 ipcfp = &(*ipcfp)->next)
743 *ipcfp = ipcf;
745 return OK;
748 /*===========================================================================*
749 * clear_ipc_filters *
750 *===========================================================================*/
751 void clear_ipc_filters(struct proc *rp)
753 ipc_filter_t *curr_ipcf, *ipcf;
755 ipcf = priv(rp)->s_ipcf;
756 while (ipcf != NULL) {
757 curr_ipcf = ipcf;
758 ipcf = ipcf->next;
759 IPCF_POOL_FREE_SLOT(curr_ipcf);
762 priv(rp)->s_ipcf = NULL;
764 /* VM is a special case here: since the cleared IPC filter may have
765 * blocked memory handling requests, we may now have to tell VM that
766 * there are "new" requests pending.
768 if (rp->p_endpoint == VM_PROC_NR && vmrequest != NULL)
769 if (send_sig(VM_PROC_NR, SIGKMEM) != OK)
770 panic("send_sig failed");
773 /*===========================================================================*
774 * check_ipc_filter *
775 *===========================================================================*/
776 int check_ipc_filter(ipc_filter_t *ipcf, int fill_flags)
778 ipc_filter_el_t *ipcf_el;
779 int i, num_elements, flags;
781 if (ipcf == NULL)
782 return OK;
784 num_elements = ipcf->num_elements;
785 flags = 0;
786 for (i = 0; i < num_elements; i++) {
787 ipcf_el = &ipcf->elements[i];
788 if (!IPCF_EL_CHECK(ipcf_el))
789 return EINVAL;
790 flags |= ipcf_el->flags;
793 if (fill_flags)
794 ipcf->flags = flags;
795 else if (ipcf->flags != flags)
796 return EINVAL;
797 return OK;
800 /*===========================================================================*
801 * allow_ipc_filtered_msg *
802 *===========================================================================*/
803 int allow_ipc_filtered_msg(struct proc *rp, endpoint_t src_e,
804 vir_bytes m_src_v, message *m_src_p)
806 int i, r, num_elements, get_mtype, allow;
807 ipc_filter_t *ipcf;
808 ipc_filter_el_t *ipcf_el;
809 message m_buff;
811 ipcf = priv(rp)->s_ipcf;
812 if (ipcf == NULL)
813 return TRUE; /* no IPC filters, always allow */
815 if (m_src_p == NULL) {
816 assert(m_src_v != 0);
818 /* Should we copy in the message type? */
819 get_mtype = FALSE;
820 do {
821 #if DEBUG_DUMPIPCF
822 if (TRUE) {
823 #else
824 if (ipcf->flags & IPCF_MATCH_M_TYPE) {
825 #endif
826 get_mtype = TRUE;
827 break;
829 ipcf = ipcf->next;
830 } while (ipcf);
831 ipcf = priv(rp)->s_ipcf; /* reset to start */
833 /* If so, copy it in from the process. */
834 if (get_mtype) {
835 r = data_copy(src_e,
836 m_src_v + offsetof(message, m_type), KERNEL,
837 (vir_bytes)&m_buff.m_type, sizeof(m_buff.m_type));
838 if (r != OK) {
839 /* allow for now, this will fail later anyway */
840 #if DEBUG_DUMPIPCF
841 printf("KERNEL: allow_ipc_filtered_msg: data "
842 "copy error %d, allowing message...\n", r);
843 #endif
844 return TRUE;
847 m_src_p = &m_buff;
850 m_src_p->m_source = src_e;
852 /* See if the message is allowed. */
853 allow = (ipcf->type == IPCF_BLACKLIST);
854 do {
855 if (allow != (ipcf->type == IPCF_WHITELIST)) {
856 num_elements = ipcf->num_elements;
857 for (i = 0; i < num_elements; i++) {
858 ipcf_el = &ipcf->elements[i];
859 if (IPCF_EL_MATCH(ipcf_el, m_src_p)) {
860 allow = (ipcf->type == IPCF_WHITELIST);
861 break;
865 ipcf = ipcf->next;
866 } while (ipcf);
868 #if DEBUG_DUMPIPCF
869 printmsg(m_src_p, proc_addr(_ENDPOINT_P(src_e)), rp, allow ? '+' : '-',
870 TRUE /*printparams*/);
871 #endif
873 return allow;
876 /*===========================================================================*
877 * allow_ipc_filtered_memreq *
878 *===========================================================================*/
879 int allow_ipc_filtered_memreq(struct proc *src_rp, struct proc *dst_rp)
881 /* Determine whether VM should receive a request to handle memory
882 * that is the result of process 'src_rp' trying to access currently
883 * unavailable memory in process 'dst_rp'. Return TRUE if VM should
884 * be given the request, FALSE otherwise.
887 struct proc *vmp;
888 message m_buf;
890 vmp = proc_addr(VM_PROC_NR);
892 /* If VM has no filter in place, all requests should go through. */
893 if (priv(vmp)->s_ipcf == NULL)
894 return TRUE;
896 /* VM obtains memory requests in response to a SIGKMEM signal, which
897 * is a notification sent from SYSTEM. Thus, if VM blocks such
898 * notifications, it also should not get any memory requests. Of
899 * course, VM should not be asking for requests in that case either,
900 * but the extra check doesn't hurt.
902 m_buf.m_type = NOTIFY_MESSAGE;
903 if (!allow_ipc_filtered_msg(vmp, SYSTEM, 0, &m_buf))
904 return FALSE;
906 /* A more refined policy may be implemented here, for example to
907 * ensure that both the source and the destination (if different)
908 * are in the group of processes that VM wants to talk to. Since VM
909 * is basically not able to handle any memory requests during an
910 * update, we will not get here, and none of that is needed.
912 return TRUE;
915 /*===========================================================================*
916 * priv_add_irq *
917 *===========================================================================*/
918 int priv_add_irq(struct proc *rp, int irq)
920 struct priv *priv = priv(rp);
921 int i;
923 priv->s_flags |= CHECK_IRQ; /* Check IRQ */
925 /* When restarting a driver, check if it already has the permission */
926 for (i = 0; i < priv->s_nr_irq; i++) {
927 if (priv->s_irq_tab[i] == irq)
928 return OK;
931 i= priv->s_nr_irq;
932 if (i >= NR_IRQ) {
933 printf("do_privctl: %d already has %d irq's.\n",
934 rp->p_endpoint, i);
935 return ENOMEM;
937 priv->s_irq_tab[i]= irq;
938 priv->s_nr_irq++;
939 return OK;
942 /*===========================================================================*
943 * priv_add_io *
944 *===========================================================================*/
945 int priv_add_io(struct proc *rp, struct io_range *ior)
947 struct priv *priv = priv(rp);
948 int i;
950 priv->s_flags |= CHECK_IO_PORT; /* Check I/O accesses */
952 for (i = 0; i < priv->s_nr_io_range; i++) {
953 if (priv->s_io_tab[i].ior_base == ior->ior_base &&
954 priv->s_io_tab[i].ior_limit == ior->ior_limit)
955 return OK;
958 i= priv->s_nr_io_range;
959 if (i >= NR_IO_RANGE) {
960 printf("do_privctl: %d already has %d i/o ranges.\n",
961 rp->p_endpoint, i);
962 return ENOMEM;
965 priv->s_io_tab[i] = *ior;
966 priv->s_nr_io_range++;
967 return OK;
970 /*===========================================================================*
971 * priv_add_mem *
972 *===========================================================================*/
973 int priv_add_mem(struct proc *rp, struct minix_mem_range *memr)
975 struct priv *priv = priv(rp);
976 int i;
978 priv->s_flags |= CHECK_MEM; /* Check memory mappings */
980 /* When restarting a driver, check if it already has the permission */
981 for (i = 0; i < priv->s_nr_mem_range; i++) {
982 if (priv->s_mem_tab[i].mr_base == memr->mr_base &&
983 priv->s_mem_tab[i].mr_limit == memr->mr_limit)
984 return OK;
987 i= priv->s_nr_mem_range;
988 if (i >= NR_MEM_RANGE) {
989 printf("do_privctl: %d already has %d mem ranges.\n",
990 rp->p_endpoint, i);
991 return ENOMEM;
993 priv->s_mem_tab[i]= *memr;
994 priv->s_nr_mem_range++;
995 return OK;