3 * Jan 22, 2010: Created (Cristiano Giuffrida)
8 /*===========================================================================*
10 *===========================================================================*/
11 PUBLIC
int do_up(m_ptr
)
12 message
*m_ptr
; /* request message pointer */
14 /* A request was made to start a new system service. */
16 struct rprocpub
*rpub
;
18 struct rs_start rs_start
;
21 /* Check if the call can be allowed. */
22 if((r
= check_call_permission(m_ptr
->m_source
, RS_UP
, NULL
)) != OK
)
25 /* Allocate a new system service slot. */
28 printf("RS: do_up: unable to allocate a new slot: %d\n", r
);
33 /* Copy the request structure. */
34 r
= copy_rs_start(m_ptr
->m_source
, m_ptr
->RS_CMD_ADDR
, &rs_start
);
38 noblock
= (rs_start
.rss_flags
& RSS_NOBLOCK
);
40 /* Initialize the slot as requested. */
41 r
= init_slot(rp
, &rs_start
, m_ptr
->m_source
);
43 printf("RS: do_up: unable to init the new slot: %d\n", r
);
47 /* Check for duplicates */
48 if(lookup_slot_by_label(rpub
->label
)) {
49 printf("RS: service with the same label '%s' already exists\n",
53 if(rpub
->dev_nr
>0 && lookup_slot_by_dev_nr(rpub
->dev_nr
)) {
54 printf("RS: service with the same device number %d already exists\n",
59 /* All information was gathered. Now try to start the system service. */
60 r
= start_service(rp
);
61 activate_service(rp
, NULL
);
66 /* Unblock the caller immediately if requested. */
71 /* Late reply - send a reply when service completes initialization. */
72 rp
->r_flags
|= RS_LATEREPLY
;
73 rp
->r_caller
= m_ptr
->m_source
;
74 rp
->r_caller_request
= RS_UP
;
79 /*===========================================================================*
81 *===========================================================================*/
82 PUBLIC
int do_down(message
*m_ptr
)
84 register struct rproc
*rp
;
85 register struct rprocpub
*rpub
;
87 char label
[RS_MAX_LABEL_LEN
];
90 s
= copy_label(m_ptr
->m_source
, m_ptr
->RS_CMD_ADDR
,
91 m_ptr
->RS_CMD_LEN
, label
, sizeof(label
));
96 /* Lookup slot by label. */
97 rp
= lookup_slot_by_label(label
);
100 printf("RS: do_down: service '%s' not found\n", label
);
105 /* Check if the call can be allowed. */
106 if((s
= check_call_permission(m_ptr
->m_source
, RS_DOWN
, rp
)) != OK
)
110 if (rp
->r_flags
& RS_TERMINATED
) {
111 /* A recovery script is requesting us to bring down the service.
112 * The service is already gone, simply perform cleanup.
115 printf("RS: recovery script performs service down...\n");
116 unpublish_service(rp
);
117 unpublish_process(rp
);
121 stop_service(rp
,RS_EXITING
);
123 /* Late reply - send a reply when service dies. */
124 rp
->r_flags
|= RS_LATEREPLY
;
125 rp
->r_caller
= m_ptr
->m_source
;
126 rp
->r_caller_request
= RS_DOWN
;
131 /*===========================================================================*
133 *===========================================================================*/
134 PUBLIC
int do_restart(message
*m_ptr
)
138 char label
[RS_MAX_LABEL_LEN
];
139 char script
[MAX_SCRIPT_LEN
];
142 s
= copy_label(m_ptr
->m_source
, m_ptr
->RS_CMD_ADDR
,
143 m_ptr
->RS_CMD_LEN
, label
, sizeof(label
));
148 /* Lookup slot by label. */
149 rp
= lookup_slot_by_label(label
);
152 printf("RS: do_restart: service '%s' not found\n", label
);
156 /* Check if the call can be allowed. */
157 if((r
= check_call_permission(m_ptr
->m_source
, RS_RESTART
, rp
)) != OK
)
160 /* We can only be asked to restart a service from a recovery script. */
161 if (! (rp
->r_flags
& RS_TERMINATED
) ) {
163 printf("RS: %s is still running\n", srv_to_string(rp
));
168 printf("RS: recovery script performs service restart...\n");
170 /* Restart the service, but make sure we don't call the script again. */
171 strcpy(script
, rp
->r_script
);
172 rp
->r_script
[0] = '\0';
174 strcpy(rp
->r_script
, script
);
180 /*===========================================================================*
182 *===========================================================================*/
183 PUBLIC
int do_refresh(message
*m_ptr
)
185 register struct rproc
*rp
;
186 register struct rprocpub
*rpub
;
188 char label
[RS_MAX_LABEL_LEN
];
191 s
= copy_label(m_ptr
->m_source
, m_ptr
->RS_CMD_ADDR
,
192 m_ptr
->RS_CMD_LEN
, label
, sizeof(label
));
197 /* Lookup slot by label. */
198 rp
= lookup_slot_by_label(label
);
201 printf("RS: do_refresh: service '%s' not found\n", label
);
206 /* Check if the call can be allowed. */
207 if((s
= check_call_permission(m_ptr
->m_source
, RS_REFRESH
, rp
)) != OK
)
210 /* Refresh service. */
212 printf("RS: %s refreshing\n", srv_to_string(rp
));
213 stop_service(rp
,RS_REFRESHING
);
218 /*===========================================================================*
220 *===========================================================================*/
221 PUBLIC
int do_shutdown(message
*m_ptr
)
227 /* Check if the call can be allowed. */
229 if((r
= check_call_permission(m_ptr
->m_source
, RS_SHUTDOWN
, NULL
)) != OK
)
234 printf("RS: shutting down...\n");
236 /* Set flag to tell RS we are shutting down. */
237 shutting_down
= TRUE
;
239 /* Don't restart dead services. */
240 for (slot_nr
= 0; slot_nr
< NR_SYS_PROCS
; slot_nr
++) {
241 rp
= &rproc
[slot_nr
];
242 if (rp
->r_flags
& RS_IN_USE
) {
243 rp
->r_flags
|= RS_EXITING
;
249 /*===========================================================================*
251 *===========================================================================*/
252 PUBLIC
int do_init_ready(message
*m_ptr
)
256 struct rprocpub
*rpub
;
259 who_p
= _ENDPOINT_P(m_ptr
->m_source
);
260 rp
= rproc_ptr
[who_p
];
262 result
= m_ptr
->RS_INIT_RESULT
;
264 /* Make sure the originating service was requested to initialize. */
265 if(! (rp
->r_flags
& RS_INITIALIZING
) ) {
267 printf("RS: do_init_ready: got unexpected init ready msg from %d\n",
272 /* Check if something went wrong and the service failed to init.
273 * In that case, kill the service.
277 printf("RS: %s initialization error: %s\n", srv_to_string(rp
),
278 init_strerror(result
));
279 crash_service(rp
); /* simulate crash */
283 /* Mark the slot as no longer initializing. */
284 rp
->r_flags
&= ~RS_INITIALIZING
;
286 getuptime(&rp
->r_alive_tm
);
288 /* See if a late reply has to be sent. */
292 printf("RS: %s initialized\n", srv_to_string(rp
));
294 /* If the service has completed initialization after a live
295 * update, end the update now.
297 if(rp
->r_flags
& RS_UPDATING
) {
298 printf("RS: update succeeded\n");
302 /* If the service has completed initialization after a crash
303 * make the new instance active and cleanup the old replica.
306 activate_service(rp
, rp
->r_prev_rp
);
307 cleanup_service(rp
->r_prev_rp
);
308 rp
->r_prev_rp
= NULL
;
311 printf("RS: %s completed restart\n", srv_to_string(rp
));
317 /*===========================================================================*
319 *===========================================================================*/
320 PUBLIC
int do_update(message
*m_ptr
)
323 struct rproc
*new_rp
;
324 struct rprocpub
*rpub
;
325 struct rs_start rs_start
;
328 char label
[RS_MAX_LABEL_LEN
];
332 /* Copy the request structure. */
333 s
= copy_rs_start(m_ptr
->m_source
, m_ptr
->RS_CMD_ADDR
, &rs_start
);
337 noblock
= (rs_start
.rss_flags
& RSS_NOBLOCK
);
340 s
= copy_label(m_ptr
->m_source
, rs_start
.rss_label
.l_addr
,
341 rs_start
.rss_label
.l_len
, label
, sizeof(label
));
346 /* Lookup slot by label. */
347 rp
= lookup_slot_by_label(label
);
350 printf("RS: do_update: service '%s' not found\n", label
);
355 /* Check if the call can be allowed. */
356 if((s
= check_call_permission(m_ptr
->m_source
, RS_UPDATE
, rp
)) != OK
)
359 /* Retrieve live update state. */
360 lu_state
= m_ptr
->RS_LU_STATE
;
361 if(lu_state
== SEF_LU_STATE_NULL
) {
365 /* Retrieve prepare max time. */
366 prepare_maxtime
= m_ptr
->RS_LU_PREPARE_MAXTIME
;
367 if(prepare_maxtime
) {
368 if(prepare_maxtime
< 0 || prepare_maxtime
> RS_MAX_PREPARE_MAXTIME
) {
373 prepare_maxtime
= RS_DEFAULT_PREPARE_MAXTIME
;
376 /* Make sure we are not already updating. */
377 if(rupdate
.flags
& RS_UPDATING
) {
379 printf("RS: do_update: an update is already in progress\n");
383 /* Allocate a system service slot for the new version. */
384 s
= alloc_slot(&new_rp
);
386 printf("RS: do_update: unable to allocate a new slot: %d\n", s
);
390 /* Initialize the slot as requested. */
391 s
= init_slot(new_rp
, &rs_start
, m_ptr
->m_source
);
393 printf("RS: do_update: unable to init the new slot: %d\n", s
);
397 /* Let the new version inherit defaults from the old one. */
398 inherit_service_defaults(rp
, new_rp
);
400 /* Create new version of the service but don't let it run. */
401 s
= create_service(new_rp
);
403 printf("RS: do_update: unable to create a new service: %d\n", s
);
407 /* Publish process-wide properties. */
408 s
= publish_process(new_rp
);
410 printf("RS: do_update: publish_process failed: %d\n", s
);
414 /* Link old version to new version and mark both as updating. */
415 rp
->r_new_rp
= new_rp
;
416 new_rp
->r_old_rp
= rp
;
417 rp
->r_flags
|= RS_UPDATING
;
418 rp
->r_new_rp
->r_flags
|= RS_UPDATING
;
419 rupdate
.flags
|= RS_UPDATING
;
420 getuptime(&rupdate
.prepare_tm
);
421 rupdate
.prepare_maxtime
= prepare_maxtime
;
425 printf("RS: %s updating\n", srv_to_string(rp
));
427 /* Request to update. */
428 m_ptr
->m_type
= RS_LU_PREPARE
;
429 asynsend3(rpub
->endpoint
, m_ptr
, AMF_NOREPLY
);
431 /* Unblock the caller immediately if requested. */
436 /* Late reply - send a reply when the new version completes initialization. */
437 rp
->r_flags
|= RS_LATEREPLY
;
438 rp
->r_caller
= m_ptr
->m_source
;
439 rp
->r_caller_request
= RS_UPDATE
;
444 /*===========================================================================*
446 *===========================================================================*/
447 PUBLIC
int do_upd_ready(message
*m_ptr
)
449 struct rproc
*rp
, *old_rp
, *new_rp
;
454 who_p
= _ENDPOINT_P(m_ptr
->m_source
);
455 rp
= rproc_ptr
[who_p
];
456 result
= m_ptr
->RS_LU_RESULT
;
458 /* Make sure the originating service was requested to prepare for update. */
459 if(rp
!= rupdate
.rp
) {
461 printf("RS: do_upd_ready: got unexpected update ready msg from %d\n",
466 /* Check if something went wrong and the service failed to prepare
467 * for the update. In that case, end the update process. The old version will
468 * be replied to and continue executing.
473 printf("RS: update failed: %s\n", lu_strerror(result
));
477 /* Perform the update. */
479 new_rp
= rp
->r_new_rp
;
480 r
= update_service(&old_rp
, &new_rp
);
483 printf("RS: update failed: error %d\n", r
);
487 /* Let the new version run. */
488 r
= run_service(new_rp
, SEF_INIT_LU
);
490 update_service(&new_rp
, &old_rp
); /* rollback, can't fail. */
492 printf("RS: update failed: error %d\n", r
);
499 /*===========================================================================*
501 *===========================================================================*/
502 PUBLIC
void do_period(m_ptr
)
505 register struct rproc
*rp
;
506 register struct rprocpub
*rpub
;
507 clock_t now
= m_ptr
->NOTIFY_TIMESTAMP
;
511 /* If an update is in progress, check its status. */
512 if(rupdate
.flags
& RS_UPDATING
) {
513 update_period(m_ptr
);
516 /* Search system services table. Only check slots that are in use and not
519 for (rp
=BEG_RPROC_ADDR
; rp
<END_RPROC_ADDR
; rp
++) {
521 if ((rp
->r_flags
& RS_IN_USE
) && !(rp
->r_flags
& RS_UPDATING
)) {
523 /* Compute period. */
524 period
= rpub
->period
;
525 if(rp
->r_flags
& RS_INITIALIZING
) {
529 /* If the service is to be revived (because it repeatedly exited,
530 * and was not directly restarted), the binary backoff field is
533 if (rp
->r_backoff
> 0) {
535 if (rp
->r_backoff
== 0) {
540 /* If the service was signaled with a SIGTERM and fails to respond,
541 * kill the system service with a SIGKILL signal.
543 else if (rp
->r_stop_tm
> 0 && now
- rp
->r_stop_tm
> 2*RS_DELTA_T
545 crash_service(rp
); /* simulate crash */
549 /* There seems to be no special conditions. If the service has a
550 * period assigned check its status.
552 else if (period
> 0) {
554 /* Check if an answer to a status request is still pending. If
555 * the service didn't respond within time, kill it to simulate
556 * a crash. The failure will be detected and the service will
557 * be restarted automatically.
559 if (rp
->r_alive_tm
< rp
->r_check_tm
) {
560 if (now
- rp
->r_alive_tm
> 2*period
&&
561 rp
->r_pid
> 0 && !(rp
->r_flags
& RS_NOPINGREPLY
)) {
563 printf("RS: %s reported late\n",
565 rp
->r_flags
|= RS_NOPINGREPLY
;
566 crash_service(rp
); /* simulate crash */
570 /* No answer pending. Check if a period expired since the last
571 * check and, if so request the system service's status.
573 else if (now
- rp
->r_check_tm
> rpub
->period
) {
574 notify(rpub
->endpoint
); /* request status */
575 rp
->r_check_tm
= now
; /* mark time */
581 /* Reschedule a synchronous alarm for the next period. */
582 if (OK
!= (s
=sys_setalarm(RS_DELTA_T
, 0)))
583 panic("couldn't set alarm: %d", s
);
586 /*===========================================================================*
588 *===========================================================================*/
589 PUBLIC
void do_sigchld()
591 /* PM informed us that there are dead children to cleanup. Go get them. */
596 struct rprocpub
*rpub
;
600 printf("RS: got SIGCHLD signal, cleaning up dead children\n");
602 while ( (pid
= waitpid(-1, &status
, WNOHANG
)) != 0 ) {
603 rp
= lookup_slot_by_pid(pid
);
608 printf("RS: %s exited via another signal manager\n",
611 /* The slot is still there. This means RS is not the signal
612 * manager assigned to the process. Ignore the event but
613 * free slots for all the service instances and send a late
614 * reply if necessary.
616 get_service_instances(rp
, &rps
, &nr_rps
);
617 for(i
=0;i
<nr_rps
;i
++) {
618 if(rupdate
.flags
& RS_UPDATING
) {
619 rupdate
.flags
&= ~RS_UPDATING
;
627 /*===========================================================================*
629 *===========================================================================*/
630 PUBLIC
int do_getsysinfo(m_ptr
)
633 vir_bytes src_addr
, dst_addr
;
638 /* Check if the call can be allowed. */
639 if((s
= check_call_permission(m_ptr
->m_source
, 0, NULL
)) != OK
)
642 switch(m_ptr
->m1_i1
) {
644 src_addr
= (vir_bytes
) rproc
;
645 len
= sizeof(struct rproc
) * NR_SYS_PROCS
;
648 src_addr
= (vir_bytes
) rprocpub
;
649 len
= sizeof(struct rprocpub
) * NR_SYS_PROCS
;
655 dst_proc
= m_ptr
->m_source
;
656 dst_addr
= (vir_bytes
) m_ptr
->m1_p1
;
657 if (OK
!= (s
=sys_datacopy(SELF
, src_addr
, dst_proc
, dst_addr
, len
)))
662 /*===========================================================================*
664 *===========================================================================*/
665 PUBLIC
int do_lookup(m_ptr
)
668 static char namebuf
[100];
671 struct rprocpub
*rrpub
;
673 len
= m_ptr
->RS_NAME_LEN
;
675 if(len
< 2 || len
>= sizeof(namebuf
)) {
676 printf("RS: len too weird (%d)\n", len
);
680 if((r
=sys_vircopy(m_ptr
->m_source
, D
, (vir_bytes
) m_ptr
->RS_NAME
,
681 SELF
, D
, (vir_bytes
) namebuf
, len
)) != OK
) {
682 printf("RS: name copy failed\n");
689 rrp
= lookup_slot_by_label(namebuf
);
694 m_ptr
->RS_ENDPOINT
= rrpub
->endpoint
;