make getpeername() return the original socket address which before it was intercepted
[hband-tools.git] / admin-tools / dmaster
blobbd409b6708e3fe7a9c1afff7b61264a7893647b7
1 #!/usr/bin/env perl
3 =pod
5 =head1 NAME
7 dmaster - Manage continuously running processes
9 =head1 SYNOPSIS
11 dmaster
13 =head1 DESCRIPTION
15 Daemon master.
16 Start programs defined in B<daemontab> file.
17 Restart them when ended.
19 Re-read the B<daemontab> file on B<HUP> signal,
20 stops processes which are no longer in it (or I<ID> changed),
21 and starts new ones.
22 Those daemons whose I<COMMAND> I<ARGS> changed, are not restarted automatically.
24 Automatically respawn a daemon when it exits.
25 If a daemon exits too often, suppress it a little while.
26 Default parameters: at most 5 spawns in 2 sec, hold up for 10 sec.
28 =head1 FILES
30 =over 4
32 =item F</etc/daemontab>
34 Format is I<ID> I<COMMAND> I<ARGS>, delimited by any amount of space/tab, one line per each.
35 I<ID> is an arbitrary word choosed by the user, to identify daemon commands across reloads.
36 Command arguments may enclosed in double quotes (C<">), but not part of the argument. See:
38 d1 my-daemon --param="foo bar"
40 is invalid (parsed as 2 parameters: C<--param="foo> and C<bar"> with dangling quotes).
42 d1 my-daemon "--param=foo bar"
44 is what you want.
46 It's not recommended to write complex scripts directly in daemontab.
47 Put them in F</etc/daemons> directory instead.
49 =item F</var/run/dmaster.sock>
51 dmaster(1) listens on a Unix Domain socket (see unix(7)).
52 When a client connects, internal state is dumped on the socket
53 in B<Single Value per Line with Hierarchical Keys> format.
55 Internal state includes all started daemon's B<PIDFD>,
56 which may be copied by an other program by pidfd_getfd(2),
57 in order to reliably send signals to it.
58 After the internal state dump is closed by a line containing a lone C<END>,
59 it reads one line on the management socket.
60 Currently you can not control dmaster(1) this way (use signals for this, see L</SIGNALS>),
61 but while it waits for that one line, does not do anything,
62 so it's time for the management client side to call pidfd_getfd(2)
63 to reliably copy the PIDFD representing a given daemon process.
65 B<Note>, this also means that daemon management is paused until
66 normal operation is resumed by sending a newline on the management socket.
67 Due to this potential DoS vector, management socket is chmod(2) to 0660.
69 =item F</var/run/shm/dmaster.state>
71 Dump internal state to it.
72 See USR1 signal below.
74 =back
76 =head1 SIGNALS
78 =over 4
80 =item USR1
82 Write internal state into F</var/run/shm/dmaster.state> file.
84 =item TERM, INT
86 Terminate managed daemons (always with B<TERM> signal),
87 wait for them to exit, then exit dmaster(1) itself.
88 If a daemon is a process group leader, sends signal to the whole process group.
90 =item HUP
92 Re-read B<daemontab>, stop daemons which are disappeared and start new ones.
93 Leave others running.
95 =back
97 =head1 ENVIRONMENT
99 =over 4
101 =item XDG_CONFIG_HOME
103 Where to find C<daemontab>.
104 Default is F</etc>.
106 =item XDG_CACHE_HOME
108 Where to create F<dmaster.sock>
109 Default is F</var/run>.
111 =item DAEMONS_RESPAWN_BURST_PERIOD
113 Measure this much time (seconds) to detect respawn-burst.
115 =item DAEMONS_RESPAWN_BURST_LIMIT
117 If a daemon respawns this many times within DAEMONS_RESPAWN_BURST_PERIOD,
118 consider it a respawn-burst, and hold it back a while.
120 =item DAEMONS_RESPAWN_BURST_HOLD
122 How much time (seconds) to wait before starting a daemon
123 after a respawn-burst.
125 =back
127 =head1 EXAMPLE INSTALLATION
129 Put this in your inittab(5):
131 dm:2345:respawn:loggerexec daemon dmaster /sbin/dmaster
133 This one removes the socket file in case of it's left there:
135 dm:2345:respawn:@multicmd rm /var/run/dmaster.sock ; loggerexec daemon dmaster dmaster
137 =head1 SEE ALSO
139 supervisor(1), supervise(8), daemon(1), runit(1)
141 =cut
144 use Data::Dumper;
145 use POSIX ":sys_wait_h";
146 no if ($] >= 5.018), 'warnings' => 'experimental::smartmatch';
147 use Socket;
148 use Config;
149 require 'syscall.ph';
150 @signal_names = split ' ', $Config{'sig_name'};
152 $pidfd_support = eval q{ use Linux::FD::Pid; 1; };
153 warn "deprecating PIDFD support: $@" if $@;
156 $0 = 'dmaster';
157 %Daemon = ();
158 $respawn_burst_period = $ENV{'DAEMONS_RESPAWN_BURST_PERIOD'} // 2;
159 $respawn_burst_limit = $ENV{'DAEMONS_RESPAWN_BURST_LIMIT'} // 5;
160 $respawn_burst_hold = $ENV{'DAEMONS_RESPAWN_BURST_HOLD'} // 10;
163 sub logger
165 my $s = shift;
166 warn "$0: $s\n";
169 sub append_fixsizelist
171 my ($listref, $size, @items) = @_;
172 push @$listref, @items;
173 while(scalar @$listref > $size)
175 shift @$listref;
179 sub getpgid
181 my $pid = shift;
182 return syscall(SYS_getpgid(), int $pid);
185 sub stop_daemon
187 my $did = shift;
188 my $pid = $Daemon{$did}->{'pid'};
189 if(defined $pid)
191 my $pgid = getpgid($pid);
192 if($pgid == $pid)
194 logger "daemon $did is a process group leader, terminating PGID $pgid";
195 $kid = -$pid;
197 else
199 logger "terminating $did PID $pid";
200 $kid = $pid;
202 kill 'TERM', $kid;
204 else
206 logger "daemon $did has no PID";
210 sub start_daemon
212 my $id = shift;
213 my $cmd_args = $Daemon{$id}->{'cmd_args'};
214 logger "starting $id: @$cmd_args";
216 my $pid = fork;
217 if($pid == 0)
219 close $management_socket;
220 close $_ for map {$Daemon->{$_}->{'pidfd'}} grep {exists $Daemon->{$_}->{'pidfd'}} keys %Daemon;
221 # leave STDIO there for convenience
223 exec @$cmd_args;
224 exit 127;
226 elsif(not defined $pid)
228 logger "fork: $!";
230 else
232 logger "started $id PID $pid";
233 $Daemon{$id}->{'pid'} = $pid;
234 $Daemon{$id}->{'pidfd'} = Linux::FD::Pid->new($pid) if $pidfd_support;
235 append_fixsizelist \@{$Daemon{$id}->{'starttimes'}}, $respawn_burst_limit, time;
239 sub parse_cmd_string
241 my $cmd = shift;
242 my @cmd_args = ();
243 while($cmd =~ /^("([^""]*)"|\S+)\s*/)
245 my $arg = defined $2 ? $2 : $1;
246 push @cmd_args, $arg;
247 $cmd = $';
249 return @cmd_args;
252 sub read_daemon_inittab
254 my $path = shift;
255 # TODO: lock
256 open my $fh, '<', $path or logger "open $path: $!";
257 return unless $fh;
258 my %new_daemon = ();
259 while(<$fh>)
261 next if /^\s*(#|$)/;
262 if(my ($id, $cmd) = /^(\S+)\s+(.+)$/)
264 my @cmd_args = parse_cmd_string $cmd;
265 $new_daemon{$id}->{'cmd_args'} = \@cmd_args;
267 else
269 logger "can not parse line $.";
272 close $fh;
273 return %new_daemon;
276 sub reload
278 my $daemontab = ($ENV{'XDG_CONFIG_HOME'} || '/etc').'/daemontab';
279 logger "reloading $daemontab";
280 my %new_daemon = read_daemon_inittab $daemontab;
282 for my $id (keys %Daemon)
284 if($id ~~ [keys %new_daemon])
286 logger "keep $id";
287 $Daemon{$id}->{'cmd_args'} = $new_daemon{$id}->{'cmd_args'};
288 delete $new_daemon{$id};
290 else
292 logger "terminate $id as it's no longer managed";
293 delete $Daemon{$id}->{'cmd_args'};
294 stop_daemon $id;
298 for my $id (keys %new_daemon)
300 $Daemon{$id}->{'cmd_args'} = $new_daemon{$id}->{'cmd_args'};
302 for my $id (keys %new_daemon)
304 start_daemon $id;
308 sub errstr
310 my $errno = shift;
311 $! = $errno;
312 return $^E;
315 sub handle_children
317 while(1)
319 my $reaped_pid = waitpid(-1, WNOHANG); # check if any children terminated
320 last if $reaped_pid <= 0; # no more zombie children
321 my $status = ${^CHILD_ERROR_NATIVE};
322 my $exitcode = WIFEXITED($status) ? WEXITSTATUS($status) : undef;
323 my $exitsignal = WIFSIGNALED($status) ? WTERMSIG($status) : undef;
324 my $reaped = 0;
326 for my $id (keys %Daemon)
328 my $daemon_pid = $Daemon{$id}->{'pid'};
329 next if not defined $daemon_pid;
330 next if $daemon_pid != $reaped_pid;
332 logger sprintf "daemon %s PID %d deceased, exit status %s (%s) signal %s (%s)",
333 $id, $daemon_pid, $exitcode // 'none', errstr($exitcode), $exitsignal // 'none', $signal_names[$exitsignal];
334 $Daemon{$id}->{'exitcode'} = $exitcode;
335 $Daemon{$id}->{'exitsignal'} = $exitsignal;
336 append_fixsizelist \@{$Daemon{$id}->{'exittimes'}}, $respawn_burst_limit, time;
337 delete $Daemon{$id}->{'pid'};
338 $reaped = 1;
339 last;
341 if(not $reaped)
343 logger sprintf "unknown PID %d deceased, exit status %s (%s) signal %s (%s)",
344 $reaped_pid, $exitcode // 'none', errstr($exitcode), $exitsignal // 'none', $signal_names[$exitsignal];
349 sub svlhk_dump
351 my $objref = shift;
352 my $prefix = shift;
353 my $cb_ref = shift;
355 my $type = ref $objref;
357 if($type eq '')
359 $cb_ref->("$prefix $objref\n");
361 elsif($type eq 'SCALAR')
363 $cb_ref->("$prefix $$objref\n");
365 elsif($type eq 'ARRAY')
367 my $idx = 0;
368 for my $elem (@$objref)
370 svlhk_dump($elem, "$prefix.$idx", $cb_ref);
371 $idx++;
374 elsif($type eq 'HASH')
376 for my $key (keys %$objref)
378 svlhk_dump($objref->{$key}, "$prefix.$key", $cb_ref);
381 else
383 $cb_ref->("$prefix $type\n");
384 if($type eq 'Linux::FD::Pid')
386 my $fileno = $objref->fileno;
387 $cb_ref->("$prefix.fileno $fileno\n");
392 sub init_management_socket
394 #my $management_socket_path = chr(0).'dmaster-management'.$ENV{'DMASTER_SOCKET_NAME_SUFFIX'};
395 #my $management_socket_addr = pack('S', AF_UNIX) . $management_socket_path;
396 my $management_socket_path = ($ENV{'XDG_CACHE_HOME'} || '/var/run').'/dmaster.sock';
397 my $management_socket_addr = sockaddr_un($management_socket_path);
398 socket(my $management_socket, AF_UNIX, SOCK_STREAM, 0) or die "socket: $!\n";
399 bind($management_socket, $management_socket_addr) or die "bind: $management_socket_path: $!\n";
400 chmod(0660, $management_socket_path) or die "chmod: $management_socket_path: $!\n";
401 listen($management_socket, SOMAXCONN) or die "listen: $!\n";
402 return $management_socket;
406 $NextAlarm_ts = 0;
408 sub wakeup_at_most
410 my $sec = shift;
411 my $now = time;
412 my $next_alarm_ts = $now + $sec;
413 if($NextAlarm_ts <= $now or $next_alarm_ts < $NextAlarm_ts)
415 $NextAlarm_ts = $next_alarm_ts;
416 alarm $sec;
421 $SIG{'HUP'} = sub { $do_reload = 1 unless $do_shutdown; };
422 $SIG{'CHLD'} = sub { $do_handle_children = 1; };
423 $SIG{'TERM'} = sub { $shutdown_signalname = $_[0]; $do_shutdown = 1; };
424 $SIG{'INT'} = $SIG{'TERM'};
425 $SIG{'ALRM'} = sub { 1; };
426 $SIG{'USR1'} = sub { $do_dump_state = 1; };
427 #$SIG{'PIPE'} = TODO
430 logger "init";
431 $management_socket = init_management_socket();
432 $shutdown_signalname = undef;
435 logger "start";
436 reload;
438 while(1)
440 # go idle. but it will be interrupted by signals.
441 my $management_client_addr = accept(my $management_client_socket, $management_socket);
443 if(defined $management_client_addr)
445 print {$management_client_socket} "dmaster.pid $$\n";
446 svlhk_dump(\%Daemon, 'daemon', sub { print {$management_client_socket} $_[0]; });
447 print {$management_client_socket} "END\n";
448 $management_client_socket->flush;
449 logger "holding on management command";
450 <$management_client_socket>;
451 logger "resume from management command";
452 close $management_client_socket;
453 undef $management_client_socket;
456 if($do_handle_children)
458 $do_handle_children = 0;
459 handle_children;
462 if($do_shutdown)
464 if(not $prev_do_shutdown)
466 logger "got SIG$shutdown_signalname, exiting...";
467 $prev_do_shutdown = $do_shutdown;
470 my $subprocesses = 0;
471 for my $id (keys %Daemon)
473 if(exists $Daemon{$id}->{'pid'})
475 stop_daemon $id;
476 $subprocesses++;
479 if($subprocesses == 0)
481 logger "exit";
482 exit;
485 else
487 if($do_reload)
489 $do_reload = 0;
490 reload;
493 # check if daemons need respawn
494 for my $id (keys %Daemon)
496 my $dmon = $Daemon{$id};
497 if(not exists $dmon->{'pid'})
499 if(exists $dmon->{'cmd_args'})
501 if(scalar @{$dmon->{'exittimes'}} >= $respawn_burst_limit and $dmon->{'exittimes'}->[0] > time - $respawn_burst_period)
503 my $hold_sec = $dmon->{'exittimes'}->[0] + $respawn_burst_hold - time;
504 $hold_sec = 1 if $hold_sec < 1; # alarm(0) takes int but alarm(0) means cancel
505 logger "daemon $id respawned too often, hold up for $hold_sec sec";
506 wakeup_at_most $hold_sec;
508 else
510 start_daemon $id;
513 else
515 # it's ended and command deleted, so
516 # we no longer manage this daemon
517 delete $Daemon{$id};
523 if($do_dump_state)
525 $do_dump_state = 0;
526 my $dump_path = '/var/run/shm/dmaster.state';
527 open my $fh, '>', $dump_path or warn "$dump_path: $!\n";
528 if($fh)
530 print {$fh} Dumper \%Daemon;
531 close $fh;