7 dmaster - Manage continuously running processes
16 Start programs defined in B<daemontab> file.
17 Restart them when ended.
19 Re-read the B<daemontab> file on B<HUP> signal,
20 stops processes which are no longer in it (or I<ID> changed),
22 Those daemons whose I<COMMAND> I<ARGS> changed, are not restarted automatically.
24 Automatically respawn a daemon when it exits.
25 If a daemon exits too often, suppress it a little while.
26 Default parameters: at most 5 spawns in 2 sec, hold up for 10 sec.
32 =item F</etc/daemontab>
34 Format is I<ID> I<COMMAND> I<ARGS>, delimited by any amount of space/tab, one line per each.
35 I<ID> is an arbitrary word choosed by the user, to identify daemon commands across reloads.
36 Command arguments may enclosed in double quotes (C<">), but not part of the argument. See:
38 d1 my-daemon --param="foo bar"
40 is invalid (parsed as 2 parameters: C<--param="foo> and C<bar"> with dangling quotes).
42 d1 my-daemon "--param=foo bar"
46 It's not recommended to write complex scripts directly in daemontab.
47 Put them in F</etc/daemons> directory instead.
49 =item F</var/run/dmaster.sock>
51 dmaster(1) listens on a Unix Domain socket (see unix(7)).
52 When a client connects, internal state is dumped on the socket
53 in B<Single Value per Line with Hierarchical Keys> format.
55 Internal state includes all started daemon's B<PIDFD>,
56 which may be copied by an other program by pidfd_getfd(2),
57 in order to reliably send signals to it.
58 After the internal state dump is closed by a line containing a lone C<END>,
59 it reads one line on the management socket.
60 Currently you can not control dmaster(1) this way (use signals for this, see L</SIGNALS>),
61 but while it waits for that one line, does not do anything,
62 so it's time for the management client side to call pidfd_getfd(2)
63 to reliably copy the PIDFD representing a given daemon process.
65 B<Note>, this also means that daemon management is paused until
66 normal operation is resumed by sending a newline on the management socket.
67 Due to this potential DoS vector, management socket is chmod(2) to 0660.
69 =item F</var/run/shm/dmaster.state>
71 Dump internal state to it.
72 See USR1 signal below.
82 Write internal state into F</var/run/shm/dmaster.state> file.
86 Terminate managed daemons (always with B<TERM> signal),
87 wait for them to exit, then exit dmaster(1) itself.
88 If a daemon is a process group leader, sends signal to the whole process group.
92 Re-read B<daemontab>, stop daemons which are disappeared and start new ones.
101 =item XDG_CONFIG_HOME
103 Where to find C<daemontab>.
108 Where to create F<dmaster.sock>
109 Default is F</var/run>.
111 =item DAEMONS_RESPAWN_BURST_PERIOD
113 Measure this much time (seconds) to detect respawn-burst.
115 =item DAEMONS_RESPAWN_BURST_LIMIT
117 If a daemon respawns this many times within DAEMONS_RESPAWN_BURST_PERIOD,
118 consider it a respawn-burst, and hold it back a while.
120 =item DAEMONS_RESPAWN_BURST_HOLD
122 How much time (seconds) to wait before starting a daemon
123 after a respawn-burst.
127 =head1 EXAMPLE INSTALLATION
129 Put this in your inittab(5):
131 dm:2345:respawn:loggerexec daemon dmaster /sbin/dmaster
133 This one removes the socket file in case of it's left there:
135 dm:2345:respawn:@multicmd rm /var/run/dmaster.sock ; loggerexec daemon dmaster dmaster
139 supervisor(1), supervise(8), daemon(1), runit(1)
145 use POSIX
":sys_wait_h";
146 no if ($] >= 5.018), 'warnings' => 'experimental::smartmatch';
149 require 'syscall.ph';
150 @signal_names = split ' ', $Config{'sig_name'};
152 $pidfd_support = eval q{ use Linux::FD::Pid; 1; };
153 warn "deprecating PIDFD support: $@" if $@
;
158 $respawn_burst_period = $ENV{'DAEMONS_RESPAWN_BURST_PERIOD'} // 2;
159 $respawn_burst_limit = $ENV{'DAEMONS_RESPAWN_BURST_LIMIT'} // 5;
160 $respawn_burst_hold = $ENV{'DAEMONS_RESPAWN_BURST_HOLD'} // 10;
169 sub append_fixsizelist
171 my ($listref, $size, @items) = @_;
172 push @
$listref, @items;
173 while(scalar @
$listref > $size)
182 return syscall(SYS_getpgid
(), int $pid);
188 my $pid = $Daemon{$did}->{'pid'};
191 my $pgid = getpgid
($pid);
194 logger
"daemon $did is a process group leader, terminating PGID $pgid";
199 logger
"terminating $did PID $pid";
206 logger
"daemon $did has no PID";
213 my $cmd_args = $Daemon{$id}->{'cmd_args'};
214 logger
"starting $id: @$cmd_args";
219 close $management_socket;
220 close $_ for map {$Daemon->{$_}->{'pidfd'}} grep {exists $Daemon->{$_}->{'pidfd'}} keys %Daemon;
221 # leave STDIO there for convenience
226 elsif(not defined $pid)
232 logger
"started $id PID $pid";
233 $Daemon{$id}->{'pid'} = $pid;
234 $Daemon{$id}->{'pidfd'} = Linux
::FD
::Pid
->new($pid) if $pidfd_support;
235 append_fixsizelist \@
{$Daemon{$id}->{'starttimes'}}, $respawn_burst_limit, time;
243 while($cmd =~ /^("([^""]*)"|\S+)\s*/)
245 my $arg = defined $2 ?
$2 : $1;
246 push @cmd_args, $arg;
252 sub read_daemon_inittab
256 open my $fh, '<', $path or logger "open $path: $!";
262 if(my ($id, $cmd) = /^(\S+)\s+(.+)$/)
264 my @cmd_args = parse_cmd_string $cmd;
265 $new_daemon{$id}->{'cmd_args
'} = \@cmd_args;
269 logger "can not parse line $.";
278 my $daemontab = ($ENV{'XDG_CONFIG_HOME
'} || '/etc').'/daemontab
';
279 logger "reloading $daemontab";
280 my %new_daemon = read_daemon_inittab $daemontab;
282 for my $id (keys %Daemon)
284 if($id ~~ [keys %new_daemon])
287 $Daemon{$id}->{'cmd_args
'} = $new_daemon{$id}->{'cmd_args
'};
288 delete $new_daemon{$id};
292 logger "terminate $id as it's
no longer managed
";
293 delete $Daemon{$id}->{'cmd_args'};
298 for my $id (keys %new_daemon)
300 $Daemon{$id}->{'cmd_args'} = $new_daemon{$id}->{'cmd_args'};
302 for my $id (keys %new_daemon)
319 my $reaped_pid = waitpid(-1, WNOHANG); # check if any children terminated
320 last if $reaped_pid <= 0; # no more zombie children
321 my $status = ${^CHILD_ERROR_NATIVE};
322 my $exitcode = WIFEXITED($status) ? WEXITSTATUS($status) : undef;
323 my $exitsignal = WIFSIGNALED($status) ? WTERMSIG($status) : undef;
326 for my $id (keys %Daemon)
328 my $daemon_pid = $Daemon{$id}->{'pid'};
329 next if not defined $daemon_pid;
330 next if $daemon_pid != $reaped_pid;
332 logger sprintf "daemon
%s PID
%d deceased
, exit status
%s (%s) signal
%s (%s)",
333 $id, $daemon_pid, $exitcode // 'none', errstr($exitcode), $exitsignal // 'none', $signal_names[$exitsignal];
334 $Daemon{$id}->{'exitcode'} = $exitcode;
335 $Daemon{$id}->{'exitsignal'} = $exitsignal;
336 append_fixsizelist \@{$Daemon{$id}->{'exittimes'}}, $respawn_burst_limit, time;
337 delete $Daemon{$id}->{'pid'};
343 logger sprintf "unknown PID
%d deceased
, exit status
%s (%s) signal
%s (%s)",
344 $reaped_pid, $exitcode // 'none', errstr($exitcode), $exitsignal // 'none', $signal_names[$exitsignal];
355 my $type = ref $objref;
359 $cb_ref->("$prefix $objref\n");
361 elsif($type eq 'SCALAR')
363 $cb_ref->("$prefix $$objref\n");
365 elsif($type eq 'ARRAY')
368 for my $elem (@$objref)
370 svlhk_dump($elem, "$prefix.$idx", $cb_ref);
374 elsif($type eq 'HASH')
376 for my $key (keys %$objref)
378 svlhk_dump($objref->{$key}, "$prefix.$key", $cb_ref);
383 $cb_ref->("$prefix $type\n");
384 if($type eq 'Linux::FD::Pid')
386 my $fileno = $objref->fileno;
387 $cb_ref->("$prefix.fileno $fileno\n");
392 sub init_management_socket
394 #my $management_socket_path = chr(0).'dmaster-management'.$ENV{'DMASTER_SOCKET_NAME_SUFFIX'};
395 #my $management_socket_addr = pack('S', AF_UNIX) . $management_socket_path;
396 my $management_socket_path = ($ENV{'XDG_CACHE_HOME'} || '/var/run').'/dmaster.sock';
397 my $management_socket_addr = sockaddr_un($management_socket_path);
398 socket(my $management_socket, AF_UNIX, SOCK_STREAM, 0) or die "socket: $!\n";
399 bind($management_socket, $management_socket_addr) or die "bind: $management_socket_path: $!\n";
400 chmod(0660, $management_socket_path) or die "chmod: $management_socket_path: $!\n";
401 listen($management_socket, SOMAXCONN) or die "listen: $!\n";
402 return $management_socket;
412 my $next_alarm_ts = $now + $sec;
413 if($NextAlarm_ts <= $now or $next_alarm_ts < $NextAlarm_ts)
415 $NextAlarm_ts = $next_alarm_ts;
421 $SIG{'HUP'} = sub { $do_reload = 1 unless $do_shutdown; };
422 $SIG{'CHLD'} = sub { $do_handle_children = 1; };
423 $SIG{'TERM'} = sub { $shutdown_signalname = $_[0]; $do_shutdown = 1; };
424 $SIG{'INT'} = $SIG{'TERM'};
425 $SIG{'ALRM'} = sub { 1; };
426 $SIG{'USR1'} = sub { $do_dump_state = 1; };
431 $management_socket = init_management_socket();
432 $shutdown_signalname = undef;
440 # go idle. but it will be interrupted by signals.
441 my $management_client_addr = accept(my $management_client_socket, $management_socket);
443 if(defined $management_client_addr)
445 print {$management_client_socket} "dmaster
.pid
$$\n";
446 svlhk_dump(\%Daemon, 'daemon', sub { print {$management_client_socket} $_[0]; });
447 print {$management_client_socket} "END\n";
448 $management_client_socket->flush;
449 logger "holding on management command
";
450 <$management_client_socket>;
451 logger "resume from management command
";
452 close $management_client_socket;
453 undef $management_client_socket;
456 if($do_handle_children)
458 $do_handle_children = 0;
464 if(not $prev_do_shutdown)
466 logger "got SIG
$shutdown_signalname, exiting
...";
467 $prev_do_shutdown = $do_shutdown;
470 my $subprocesses = 0;
471 for my $id (keys %Daemon)
473 if(exists $Daemon{$id}->{'pid'})
479 if($subprocesses == 0)
493 # check if daemons need respawn
494 for my $id (keys %Daemon)
496 my $dmon = $Daemon{$id};
497 if(not exists $dmon->{'pid'})
499 if(exists $dmon->{'cmd_args'})
501 if(scalar @{$dmon->{'exittimes'}} >= $respawn_burst_limit and $dmon->{'exittimes'}->[0] > time - $respawn_burst_period)
503 my $hold_sec = $dmon->{'exittimes'}->[0] + $respawn_burst_hold - time;
504 $hold_sec = 1 if $hold_sec < 1; # alarm(0) takes int but alarm(0) means cancel
505 logger "daemon
$id respawned too often
, hold up
for $hold_sec sec
";
506 wakeup_at_most $hold_sec;
515 # it's ended and command deleted, so
516 # we no longer manage this daemon
526 my $dump_path = '/var/run/shm/dmaster.state';
527 open my $fh, '>', $dump_path or warn "$dump_path: $!\n";
530 print {$fh} Dumper \%Daemon;