<sys/poll.h>
[minix3.git] / servers / rs / main.c
blob7ac1f4c49e4c83e210db4e111849758fefab16ff
1 /* Reincarnation Server. This servers starts new system services and detects
2 * they are exiting. In case of errors, system services can be restarted.
3 * The RS server periodically checks the status of all registered services
4 * services to see whether they are still alive. The system services are
5 * expected to periodically send a heartbeat message.
6 *
7 * Changes:
8 * Nov 22, 2009: rewrite of boot process (Cristiano Giuffrida)
9 * Jul 22, 2005: Created (Jorrit N. Herder)
11 #include "inc.h"
12 #include <fcntl.h>
13 #include "kernel/const.h"
14 #include "kernel/type.h"
15 #include "kernel/proc.h"
17 /* Declare some local functions. */
18 static void boot_image_info_lookup( endpoint_t endpoint, struct
19 boot_image *image, struct boot_image **ip, struct boot_image_priv **pp,
20 struct boot_image_sys **sp, struct boot_image_dev **dp);
21 static void catch_boot_init_ready(endpoint_t endpoint);
22 static void get_work(message *m_ptr, int *status_ptr);
24 /* SEF functions and variables. */
25 static void sef_local_startup(void);
26 static int sef_cb_init_fresh(int type, sef_init_info_t *info);
27 static void sef_cb_signal_handler(int signo);
28 static int sef_cb_signal_manager(endpoint_t target, int signo);
31 /*===========================================================================*
32 * main *
33 *===========================================================================*/
34 int main(void)
36 /* This is the main routine of this service. The main loop consists of
37 * three major activities: getting new work, processing the work, and
38 * sending the reply. The loop never terminates, unless a panic occurs.
40 message m; /* request message */
41 int ipc_status; /* status code */
42 int call_nr, who_e,who_p; /* call number and caller */
43 int result; /* result to return */
44 int s;
46 /* SEF local startup. */
47 sef_local_startup();
49 if (OK != (s=sys_getmachine(&machine)))
50 panic("couldn't get machine info: %d", s);
52 if (OK != (s=sys_getkinfo(&kinfo)))
53 panic("couldn't get kernel kinfo: %d", s);
55 /* Main loop - get work and do it, forever. */
56 while (TRUE) {
58 /* Wait for request message. */
59 get_work(&m, &ipc_status);
60 who_e = m.m_source;
61 if(rs_isokendpt(who_e, &who_p) != OK) {
62 panic("message from bogus source: %d", who_e);
65 call_nr = m.m_type;
67 /* Now determine what to do. Four types of requests are expected:
68 * - Heartbeat messages (notifications from registered system services)
69 * - System notifications (synchronous alarm)
70 * - User requests (control messages to manage system services)
71 * - Ready messages (reply messages from registered services)
74 /* Notification messages are control messages and do not need a reply.
75 * These include heartbeat messages and system notifications.
77 if (is_ipc_notify(ipc_status)) {
78 switch (who_p) {
79 case CLOCK:
80 do_period(&m); /* check services status */
81 continue;
82 default: /* heartbeat notification */
83 if (rproc_ptr[who_p] != NULL) { /* mark heartbeat time */
84 rproc_ptr[who_p]->r_alive_tm = m.m_notify.timestamp;
85 } else {
86 printf("RS: warning: got unexpected notify message from %d\n",
87 m.m_source);
92 /* If we get this far, this is a normal request.
93 * Handle the request and send a reply to the caller.
95 else {
96 /* Handler functions are responsible for permission checking. */
97 switch(call_nr) {
98 /* User requests. */
99 case RS_UP: result = do_up(&m); break;
100 case RS_DOWN: result = do_down(&m); break;
101 case RS_REFRESH: result = do_refresh(&m); break;
102 case RS_RESTART: result = do_restart(&m); break;
103 case RS_SHUTDOWN: result = do_shutdown(&m); break;
104 case RS_UPDATE: result = do_update(&m); break;
105 case RS_CLONE: result = do_clone(&m); break;
106 case RS_EDIT: result = do_edit(&m); break;
107 case RS_GETSYSINFO: result = do_getsysinfo(&m); break;
108 case RS_LOOKUP: result = do_lookup(&m); break;
109 /* Ready messages. */
110 case RS_INIT: result = do_init_ready(&m); break;
111 case RS_LU_PREPARE: result = do_upd_ready(&m); break;
112 default:
113 printf("RS: warning: got unexpected request %d from %d\n",
114 m.m_type, m.m_source);
115 result = ENOSYS;
118 /* Finally send reply message, unless disabled. */
119 if (result != EDONTREPLY) {
120 m.m_type = result;
121 reply(who_e, NULL, &m);
127 /*===========================================================================*
128 * sef_local_startup *
129 *===========================================================================*/
130 static void sef_local_startup()
132 /* Register init callbacks. */
133 sef_setcb_init_response(do_init_ready);
134 sef_setcb_init_fresh(sef_cb_init_fresh);
135 sef_setcb_init_restart(sef_cb_init_fail);
137 /* Register live update callbacks. */
138 sef_setcb_lu_response(do_upd_ready);
140 /* Register signal callbacks. */
141 sef_setcb_signal_handler(sef_cb_signal_handler);
142 sef_setcb_signal_manager(sef_cb_signal_manager);
144 /* Let SEF perform startup. */
145 sef_startup();
148 /*===========================================================================*
149 * sef_cb_init_fresh *
150 *===========================================================================*/
151 static int sef_cb_init_fresh(int UNUSED(type), sef_init_info_t *UNUSED(info))
153 /* Initialize the reincarnation server. */
154 struct boot_image *ip;
155 int s,i;
156 int nr_image_srvs, nr_image_priv_srvs, nr_uncaught_init_srvs;
157 struct rproc *rp;
158 struct rprocpub *rpub;
159 struct boot_image image[NR_BOOT_PROCS];
160 struct boot_image_priv *boot_image_priv;
161 struct boot_image_sys *boot_image_sys;
162 struct boot_image_dev *boot_image_dev;
163 int ipc_to;
164 int *calls;
165 int all_c[] = { ALL_C, NULL_C };
166 int no_c[] = { NULL_C };
168 /* See if we run in verbose mode. */
169 env_parse("rs_verbose", "d", 0, &rs_verbose, 0, 1);
171 if ((s = sys_getinfo(GET_HZ, &system_hz, sizeof(system_hz), 0, 0)) != OK)
172 panic("Cannot get system timer frequency\n");
174 /* Initialize the global init descriptor. */
175 rinit.rproctab_gid = cpf_grant_direct(ANY, (vir_bytes) rprocpub,
176 sizeof(rprocpub), CPF_READ);
177 if(!GRANT_VALID(rinit.rproctab_gid)) {
178 panic("unable to create rprocpub table grant: %d", rinit.rproctab_gid);
181 /* Initialize some global variables. */
182 rupdate.flags = 0;
183 shutting_down = FALSE;
185 /* Get a copy of the boot image table. */
186 if ((s = sys_getimage(image)) != OK) {
187 panic("unable to get copy of boot image table: %d", s);
190 /* Determine the number of system services in the boot image table. */
191 nr_image_srvs = 0;
192 for(i=0;i<NR_BOOT_PROCS;i++) {
193 ip = &image[i];
195 /* System services only. */
196 if(iskerneln(_ENDPOINT_P(ip->endpoint))) {
197 continue;
199 nr_image_srvs++;
202 /* Determine the number of entries in the boot image priv table and make sure
203 * it matches the number of system services in the boot image table.
205 nr_image_priv_srvs = 0;
206 for (i=0; boot_image_priv_table[i].endpoint != NULL_BOOT_NR; i++) {
207 boot_image_priv = &boot_image_priv_table[i];
209 /* System services only. */
210 if(iskerneln(_ENDPOINT_P(boot_image_priv->endpoint))) {
211 continue;
213 nr_image_priv_srvs++;
215 if(nr_image_srvs != nr_image_priv_srvs) {
216 panic("boot image table and boot image priv table mismatch");
219 /* Reset the system process table. */
220 for (rp=BEG_RPROC_ADDR; rp<END_RPROC_ADDR; rp++) {
221 rp->r_flags = 0;
222 rp->r_pub = &rprocpub[rp - rproc];
223 rp->r_pub->in_use = FALSE;
226 /* Initialize the system process table in 4 steps, each of them following
227 * the appearance of system services in the boot image priv table.
228 * - Step 1: set priviliges, sys properties, and dev properties (if any)
229 * for every system service.
231 for (i=0; boot_image_priv_table[i].endpoint != NULL_BOOT_NR; i++) {
232 boot_image_priv = &boot_image_priv_table[i];
234 /* System services only. */
235 if(iskerneln(_ENDPOINT_P(boot_image_priv->endpoint))) {
236 continue;
239 /* Lookup the corresponding entries in other tables. */
240 boot_image_info_lookup(boot_image_priv->endpoint, image,
241 &ip, NULL, &boot_image_sys, &boot_image_dev);
242 rp = &rproc[boot_image_priv - boot_image_priv_table];
243 rpub = rp->r_pub;
246 * Set privileges.
248 /* Get label. */
249 strcpy(rpub->label, boot_image_priv->label);
251 /* Force a static priv id for system services in the boot image. */
252 rp->r_priv.s_id = static_priv_id(
253 _ENDPOINT_P(boot_image_priv->endpoint));
255 /* Initialize privilege bitmaps and signal manager. */
256 rp->r_priv.s_flags = boot_image_priv->flags; /* priv flags */
257 rp->r_priv.s_trap_mask= SRV_OR_USR(rp, SRV_T, USR_T); /* traps */
258 ipc_to = SRV_OR_USR(rp, SRV_M, USR_M); /* targets */
259 fill_send_mask(&rp->r_priv.s_ipc_to, ipc_to == ALL_M);
260 rp->r_priv.s_sig_mgr= SRV_OR_USR(rp, SRV_SM, USR_SM); /* sig mgr */
261 rp->r_priv.s_bak_sig_mgr = NONE; /* backup sig mgr */
263 /* Initialize kernel call mask bitmap. */
264 calls = SRV_OR_USR(rp, SRV_KC, USR_KC) == ALL_C ? all_c : no_c;
265 fill_call_mask(calls, NR_SYS_CALLS,
266 rp->r_priv.s_k_call_mask, KERNEL_CALL, TRUE);
268 /* Set the privilege structure. RS and VM are exceptions and are already
269 * running.
271 if(boot_image_priv->endpoint != RS_PROC_NR &&
272 boot_image_priv->endpoint != VM_PROC_NR) {
273 if ((s = sys_privctl(ip->endpoint, SYS_PRIV_SET_SYS, &(rp->r_priv)))
274 != OK) {
275 panic("unable to set privilege structure: %d", s);
279 /* Synch the privilege structure with the kernel. */
280 if ((s = sys_getpriv(&(rp->r_priv), ip->endpoint)) != OK) {
281 panic("unable to synch privilege structure: %d", s);
285 * Set sys properties.
287 rpub->sys_flags = boot_image_sys->flags; /* sys flags */
290 * Set dev properties.
292 rpub->dev_nr = boot_image_dev->dev_nr; /* major device number */
294 /* Build command settings. This will also set the process name. */
295 strlcpy(rp->r_cmd, ip->proc_name, sizeof(rp->r_cmd));
296 rp->r_script[0]= '\0';
297 build_cmd_dep(rp);
299 /* Initialize vm call mask bitmap. */
300 calls = SRV_OR_USR(rp, SRV_VC, USR_VC) == ALL_C ? all_c : no_c;
301 fill_call_mask(calls, NR_VM_CALLS, rpub->vm_call_mask, VM_RQ_BASE, TRUE);
303 /* Scheduling parameters. */
304 rp->r_scheduler = SRV_OR_USR(rp, SRV_SCH, USR_SCH);
305 rp->r_priority = SRV_OR_USR(rp, SRV_Q, USR_Q);
306 rp->r_quantum = SRV_OR_USR(rp, SRV_QT, USR_QT);
308 /* Get some settings from the boot image table. */
309 rpub->endpoint = ip->endpoint;
311 /* Set some defaults. */
312 rp->r_old_rp = NULL; /* no old version yet */
313 rp->r_new_rp = NULL; /* no new version yet */
314 rp->r_prev_rp = NULL; /* no prev replica yet */
315 rp->r_next_rp = NULL; /* no next replica yet */
316 rp->r_uid = 0; /* root */
317 rp->r_check_tm = 0; /* not checked yet */
318 getticks(&rp->r_alive_tm); /* currently alive */
319 rp->r_stop_tm = 0; /* not exiting yet */
320 rp->r_restarts = 0; /* no restarts so far */
321 rp->r_period = 0; /* no period yet */
322 rp->r_exec = NULL; /* no in-memory copy yet */
323 rp->r_exec_len = 0;
325 /* Mark as in use and active. */
326 rp->r_flags = RS_IN_USE | RS_ACTIVE;
327 rproc_ptr[_ENDPOINT_P(rpub->endpoint)]= rp;
328 rpub->in_use = TRUE;
331 /* - Step 2: allow every system service in the boot image to run. */
332 nr_uncaught_init_srvs = 0;
333 for (i=0; boot_image_priv_table[i].endpoint != NULL_BOOT_NR; i++) {
334 boot_image_priv = &boot_image_priv_table[i];
336 /* System services only. */
337 if(iskerneln(_ENDPOINT_P(boot_image_priv->endpoint))) {
338 continue;
341 /* Lookup the corresponding slot in the system process table. */
342 rp = &rproc[boot_image_priv - boot_image_priv_table];
343 rpub = rp->r_pub;
345 /* RS/VM are already running as we speak. */
346 if(boot_image_priv->endpoint == RS_PROC_NR ||
347 boot_image_priv->endpoint == VM_PROC_NR) {
348 if ((s = init_service(rp, SEF_INIT_FRESH)) != OK) {
349 panic("unable to initialize %d: %d", boot_image_priv->endpoint, s);
351 continue;
354 /* Allow the service to run. */
355 if ((s = sched_init_proc(rp)) != OK) {
356 panic("unable to initialize scheduling: %d", s);
358 if ((s = sys_privctl(rpub->endpoint, SYS_PRIV_ALLOW, NULL)) != OK) {
359 panic("unable to initialize privileges: %d", s);
362 /* Initialize service. We assume every service will always get
363 * back to us here at boot time.
365 if(boot_image_priv->flags & SYS_PROC) {
366 if ((s = init_service(rp, SEF_INIT_FRESH)) != OK) {
367 panic("unable to initialize service: %d", s);
369 if(rpub->sys_flags & SF_SYNCH_BOOT) {
370 /* Catch init ready message now to synchronize. */
371 catch_boot_init_ready(rpub->endpoint);
373 else {
374 /* Catch init ready message later. */
375 nr_uncaught_init_srvs++;
380 /* - Step 3: let every system service complete initialization by
381 * catching all the init ready messages left.
383 while(nr_uncaught_init_srvs) {
384 catch_boot_init_ready(ANY);
385 nr_uncaught_init_srvs--;
388 /* - Step 4: all the system services in the boot image are now running.
389 * Complete the initialization of the system process table in collaboration
390 * with other system services.
392 for (i=0; boot_image_priv_table[i].endpoint != NULL_BOOT_NR; i++) {
393 boot_image_priv = &boot_image_priv_table[i];
395 /* System services only. */
396 if(iskerneln(_ENDPOINT_P(boot_image_priv->endpoint))) {
397 continue;
400 /* Lookup the corresponding slot in the system process table. */
401 rp = &rproc[boot_image_priv - boot_image_priv_table];
402 rpub = rp->r_pub;
404 /* Get pid from PM. */
405 rp->r_pid = getnpid(rpub->endpoint);
406 if(rp->r_pid < 0) {
407 panic("unable to get pid: %d", rp->r_pid);
411 /* Set alarm to periodically check service status. */
412 if (OK != (s=sys_setalarm(RS_DELTA_T, 0)))
413 panic("couldn't set alarm: %d", s);
415 #if USE_LIVEUPDATE
416 /* Now create a new RS instance and let the current
417 * instance live update into the replica. Clone RS' own slot first.
419 rp = rproc_ptr[_ENDPOINT_P(RS_PROC_NR)];
420 if((s = clone_slot(rp, &replica_rp)) != OK) {
421 panic("unable to clone current RS instance: %d", s);
424 /* Fork a new RS instance with root:operator. */
425 pid = srv_fork(0, 0);
426 if(pid < 0) {
427 panic("unable to fork a new RS instance: %d", pid);
429 replica_pid = pid ? pid : getpid();
430 if ((s = getprocnr(replica_pid, &replica_endpoint)) != 0)
431 panic("unable to get replica endpoint: %d", s);
432 replica_rp->r_pid = replica_pid;
433 replica_rp->r_pub->endpoint = replica_endpoint;
435 if(pid == 0) {
436 /* New RS instance running. */
438 /* Live update the old instance into the new one. */
439 s = update_service(&rp, &replica_rp, RS_SWAP);
440 if(s != OK) {
441 panic("unable to live update RS: %d", s);
443 cpf_reload();
445 /* Clean up the old RS instance, the new instance will take over. */
446 cleanup_service(rp);
448 /* Ask VM to pin memory for the new RS instance. */
449 if((s = vm_memctl(RS_PROC_NR, VM_RS_MEM_PIN)) != OK) {
450 panic("unable to pin memory for the new RS instance: %d", s);
453 else {
454 /* Old RS instance running. */
456 /* Set up privileges for the new instance and let it run. */
457 s = sys_privctl(replica_endpoint, SYS_PRIV_SET_SYS, &(replica_rp->r_priv));
458 if(s != OK) {
459 panic("unable to set privileges for the new RS instance: %d", s);
461 if ((s = sched_init_proc(replica_rp)) != OK) {
462 panic("unable to initialize RS replica scheduling: %d", s);
464 s = sys_privctl(replica_endpoint, SYS_PRIV_YIELD, NULL);
465 if(s != OK) {
466 panic("unable to yield control to the new RS instance: %d", s);
468 NOT_REACHABLE;
470 #endif /* USE_LIVEUPDATE */
472 return(OK);
475 /*===========================================================================*
476 * sef_cb_signal_handler *
477 *===========================================================================*/
478 static void sef_cb_signal_handler(int signo)
480 /* Check for known signals, ignore anything else. */
481 switch(signo) {
482 case SIGCHLD:
483 do_sigchld();
484 break;
485 case SIGTERM:
486 do_shutdown(NULL);
487 break;
491 /*===========================================================================*
492 * sef_cb_signal_manager *
493 *===========================================================================*/
494 static int sef_cb_signal_manager(endpoint_t target, int signo)
496 /* Process system signal on behalf of the kernel. */
497 int target_p;
498 struct rproc *rp;
499 struct rprocpub *rpub;
500 message m;
502 /* Lookup slot. */
503 if(rs_isokendpt(target, &target_p) != OK || rproc_ptr[target_p] == NULL) {
504 if(rs_verbose)
505 printf("RS: ignoring spurious signal %d for process %d\n",
506 signo, target);
507 return OK; /* clear the signal */
509 rp = rproc_ptr[target_p];
510 rpub = rp->r_pub;
512 /* Don't bother if a termination signal has already been processed. */
513 if((rp->r_flags & RS_TERMINATED) && !(rp->r_flags & RS_EXITING)) {
514 return EDEADEPT; /* process is gone */
517 /* Ignore external signals for inactive service instances. */
518 if( !(rp->r_flags & RS_ACTIVE) && !(rp->r_flags & RS_EXITING)) {
519 if(rs_verbose)
520 printf("RS: ignoring signal %d for inactive %s\n",
521 signo, srv_to_string(rp));
522 return OK; /* clear the signal */
525 if(rs_verbose)
526 printf("RS: %s got %s signal %d\n", srv_to_string(rp),
527 SIGS_IS_TERMINATION(signo) ? "termination" : "non-termination",signo);
529 /* Print stacktrace if necessary. */
530 if(SIGS_IS_STACKTRACE(signo)) {
531 sys_diagctl_stacktrace(target);
534 /* In case of termination signal handle the event. */
535 if(SIGS_IS_TERMINATION(signo)) {
536 rp->r_flags |= RS_TERMINATED;
537 terminate_service(rp);
539 return EDEADEPT; /* process is now gone */
542 /* Translate every non-termination signal into a message. */
543 m.m_type = SIGS_SIGNAL_RECEIVED;
544 m.SIGS_SIG_NUM = signo;
545 asynsend3(rpub->endpoint, &m, AMF_NOREPLY);
547 return OK; /* signal has been delivered */
550 /*===========================================================================*
551 * boot_image_info_lookup *
552 *===========================================================================*/
553 static void boot_image_info_lookup(endpoint, image, ip, pp, sp, dp)
554 endpoint_t endpoint;
555 struct boot_image *image;
556 struct boot_image **ip;
557 struct boot_image_priv **pp;
558 struct boot_image_sys **sp;
559 struct boot_image_dev **dp;
561 /* Lookup entries in boot image tables. */
562 int i;
564 /* When requested, locate the corresponding entry in the boot image table
565 * or panic if not found.
567 if(ip) {
568 for (i=0; i < NR_BOOT_PROCS; i++) {
569 if(image[i].endpoint == endpoint) {
570 *ip = &image[i];
571 break;
574 if(i == NR_BOOT_PROCS) {
575 panic("boot image table lookup failed");
579 /* When requested, locate the corresponding entry in the boot image priv table
580 * or panic if not found.
582 if(pp) {
583 for (i=0; boot_image_priv_table[i].endpoint != NULL_BOOT_NR; i++) {
584 if(boot_image_priv_table[i].endpoint == endpoint) {
585 *pp = &boot_image_priv_table[i];
586 break;
589 if(i == NULL_BOOT_NR) {
590 panic("boot image priv table lookup failed");
594 /* When requested, locate the corresponding entry in the boot image sys table
595 * or resort to the default entry if not found.
597 if(sp) {
598 for (i=0; boot_image_sys_table[i].endpoint != DEFAULT_BOOT_NR; i++) {
599 if(boot_image_sys_table[i].endpoint == endpoint) {
600 *sp = &boot_image_sys_table[i];
601 break;
604 if(boot_image_sys_table[i].endpoint == DEFAULT_BOOT_NR) {
605 *sp = &boot_image_sys_table[i]; /* accept the default entry */
609 /* When requested, locate the corresponding entry in the boot image dev table
610 * or resort to the default entry if not found.
612 if(dp) {
613 for (i=0; boot_image_dev_table[i].endpoint != DEFAULT_BOOT_NR; i++) {
614 if(boot_image_dev_table[i].endpoint == endpoint) {
615 *dp = &boot_image_dev_table[i];
616 break;
619 if(boot_image_dev_table[i].endpoint == DEFAULT_BOOT_NR) {
620 *dp = &boot_image_dev_table[i]; /* accept the default entry */
625 /*===========================================================================*
626 * catch_boot_init_ready *
627 *===========================================================================*/
628 static void catch_boot_init_ready(endpoint)
629 endpoint_t endpoint;
631 /* Block and catch an init ready message from the given source. */
632 int r;
633 int ipc_status;
634 message m;
635 struct rproc *rp;
636 int result;
638 /* Receive init ready message. */
639 if ((r = sef_receive_status(endpoint, &m, &ipc_status)) != OK) {
640 panic("unable to receive init reply: %d", r);
642 if(m.m_type != RS_INIT) {
643 panic("unexpected reply from service: %d", m.m_source);
645 result = m.RS_INIT_RESULT;
646 rp = rproc_ptr[_ENDPOINT_P(m.m_source)];
648 /* Check result. */
649 if(result != OK) {
650 panic("unable to complete init for service: %d", m.m_source);
653 /* Send a reply to unblock the service. */
654 m.m_type = OK;
655 reply(m.m_source, rp, &m);
657 /* Mark the slot as no longer initializing. */
658 rp->r_flags &= ~RS_INITIALIZING;
659 rp->r_check_tm = 0;
660 getticks(&rp->r_alive_tm);
663 /*===========================================================================*
664 * get_work *
665 *===========================================================================*/
666 static void get_work(m_ptr, status_ptr)
667 message *m_ptr; /* pointer to message */
668 int *status_ptr; /* pointer to status */
670 int r;
671 if (OK != (r=sef_receive_status(ANY, m_ptr, status_ptr)))
672 panic("sef_receive_status failed: %d", r);