1:255.16-alt1
[systemd_ALT.git] / src / nspawn / nspawn-stub-pid1.c
blob47f7155b195c19a2e05c12d229c10c53a96d251f
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
3 #include <sys/ioctl.h>
4 #include <sys/reboot.h>
5 #include <sys/wait.h>
6 #include <sys/prctl.h>
7 #include <unistd.h>
9 #include "argv-util.h"
10 #include "constants.h"
11 #include "exit-status.h"
12 #include "fd-util.h"
13 #include "log.h"
14 #include "nspawn-stub-pid1.h"
15 #include "process-util.h"
16 #include "signal-util.h"
17 #include "time-util.h"
19 static int reset_environ(const char *new_environment, size_t length) {
20 unsigned long start, end;
22 start = (unsigned long) new_environment;
23 end = start + length;
25 if (prctl(PR_SET_MM, PR_SET_MM_ENV_START, start, 0, 0) < 0)
26 return -errno;
28 if (prctl(PR_SET_MM, PR_SET_MM_ENV_END, end, 0, 0) < 0)
29 return -errno;
31 return 0;
34 int stub_pid1(sd_id128_t uuid) {
35 enum {
36 STATE_RUNNING,
37 STATE_REBOOT,
38 STATE_POWEROFF,
39 } state = STATE_RUNNING;
41 sigset_t fullmask, oldmask, waitmask;
42 usec_t quit_usec = USEC_INFINITY;
43 pid_t pid;
44 int r;
46 /* The new environment we set up, on the stack. */
47 char new_environment[] =
48 "container=systemd-nspawn\0"
49 "container_uuid=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";
51 /* Implements a stub PID 1, that reaps all processes and processes a couple of standard signals. This is useful
52 * for allowing arbitrary processes run in a container, and still have all zombies reaped. */
54 assert_se(sigfillset(&fullmask) >= 0);
55 assert_se(sigprocmask(SIG_BLOCK, &fullmask, &oldmask) >= 0);
57 pid = fork();
58 if (pid < 0)
59 return log_error_errno(errno, "Failed to fork child pid: %m");
61 if (pid == 0) {
62 /* Return in the child */
63 assert_se(sigprocmask(SIG_SETMASK, &oldmask, NULL) >= 0);
65 if (setsid() < 0)
66 return log_error_errno(errno, "Failed to become session leader in payload process: %m");
68 return 0;
71 reset_all_signal_handlers();
73 log_close();
74 (void) close_all_fds(NULL, 0);
75 log_open();
77 if (ioctl(STDIN_FILENO, TIOCNOTTY) < 0) {
78 if (errno != ENOTTY)
79 log_warning_errno(errno, "Unexpected error from TIOCNOTTY ioctl in init stub process, ignoring: %m");
80 } else
81 log_warning("Expected TIOCNOTTY to fail, but it succeeded in init stub process, ignoring.");
83 /* Flush out /proc/self/environ, so that we don't leak the environment from the host into the container. Also,
84 * set $container= and $container_uuid= so that clients in the container that query it from /proc/1/environ
85 * find them set. */
86 sd_id128_to_string(uuid, new_environment + sizeof(new_environment) - SD_ID128_STRING_MAX);
87 reset_environ(new_environment, sizeof(new_environment));
89 (void) rename_process("(sd-stubinit)");
91 assert_se(sigemptyset(&waitmask) >= 0);
92 assert_se(sigset_add_many(&waitmask,
93 SIGCHLD, /* posix: process died */
94 SIGINT, /* sysv: ctrl-alt-del */
95 SIGRTMIN+3, /* systemd: halt */
96 SIGRTMIN+4, /* systemd: poweroff */
97 SIGRTMIN+5, /* systemd: reboot */
98 SIGRTMIN+6, /* systemd: kexec */
99 SIGRTMIN+13, /* systemd: halt */
100 SIGRTMIN+14, /* systemd: poweroff */
101 SIGRTMIN+15, /* systemd: reboot */
102 SIGRTMIN+16, /* systemd: kexec */
103 -1) >= 0);
105 /* Note that we ignore SIGTERM (sysv's reexec), SIGHUP (reload), and all other signals here, since we don't
106 * support reexec/reloading in this stub process. */
108 for (;;) {
109 siginfo_t si;
110 usec_t current_usec;
112 si.si_pid = 0;
113 r = waitid(P_ALL, 0, &si, WEXITED|WNOHANG);
114 if (r < 0) {
115 r = log_error_errno(errno, "Failed to reap children: %m");
116 goto finish;
119 current_usec = now(CLOCK_MONOTONIC);
121 if (si.si_pid == pid || current_usec >= quit_usec) {
123 /* The child we started ourselves died or we reached a timeout. */
125 if (state == STATE_REBOOT) { /* dispatch a queued reboot */
126 (void) reboot(RB_AUTOBOOT);
127 r = log_error_errno(errno, "Failed to reboot: %m");
128 goto finish;
130 } else if (state == STATE_POWEROFF)
131 (void) reboot(RB_POWER_OFF); /* if this fails, fall back to normal exit. */
133 if (si.si_pid == pid && si.si_code == CLD_EXITED)
134 r = si.si_status; /* pass on exit code */
135 else
136 r = EXIT_EXCEPTION; /* signal, coredump, timeout, … */
138 goto finish;
140 if (si.si_pid != 0)
141 /* We reaped something. Retry until there's nothing more to reap. */
142 continue;
144 if (quit_usec == USEC_INFINITY)
145 r = sigwaitinfo(&waitmask, &si);
146 else
147 r = sigtimedwait(&waitmask, &si, TIMESPEC_STORE(quit_usec - current_usec));
148 if (r < 0) {
149 if (errno == EINTR) /* strace -p attach can result in EINTR, let's handle this nicely. */
150 continue;
151 if (errno == EAGAIN) /* timeout reached */
152 continue;
154 r = log_error_errno(errno, "Failed to wait for signal: %m");
155 goto finish;
158 if (si.si_signo == SIGCHLD)
159 continue; /* Let's reap this */
161 if (state != STATE_RUNNING)
162 continue;
164 /* Would love to use a switch() statement here, but SIGRTMIN is actually a function call, not a
165 * constant… */
167 if (si.si_signo == SIGRTMIN+3 ||
168 si.si_signo == SIGRTMIN+4 ||
169 si.si_signo == SIGRTMIN+13 ||
170 si.si_signo == SIGRTMIN+14)
172 state = STATE_POWEROFF;
174 else if (si.si_signo == SIGINT ||
175 si.si_signo == SIGRTMIN+5 ||
176 si.si_signo == SIGRTMIN+6 ||
177 si.si_signo == SIGRTMIN+15 ||
178 si.si_signo == SIGRTMIN+16)
180 state = STATE_REBOOT;
181 else
182 assert_not_reached();
184 r = kill_and_sigcont(pid, SIGTERM);
186 /* Let's send a SIGHUP after the SIGTERM, as shells tend to ignore SIGTERM but do react to SIGHUP. We
187 * do it strictly in this order, so that the SIGTERM is dispatched first, and SIGHUP second for those
188 * processes which handle both. That's because services tend to bind configuration reload or something
189 * else to SIGHUP. */
191 if (r != -ESRCH)
192 (void) kill(pid, SIGHUP);
194 quit_usec = now(CLOCK_MONOTONIC) + DEFAULT_TIMEOUT_USEC;
197 finish:
198 _exit(r < 0 ? EXIT_FAILURE : r);