linux: implement filesystem-side clone ioctls
[zfs.git] / cmd / zed / zed_exec.c
blobe45acfb2c69f43c3dab241259e38d57102670b09
1 /*
2 * This file is part of the ZFS Event Daemon (ZED).
4 * Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049).
5 * Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC.
6 * Refer to the OpenZFS git commit log for authoritative copyright attribution.
8 * The contents of this file are subject to the terms of the
9 * Common Development and Distribution License Version 1.0 (CDDL-1.0).
10 * You can obtain a copy of the license from the top-level file
11 * "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>.
12 * You may not use this file except in compliance with the license.
15 #include <assert.h>
16 #include <ctype.h>
17 #include <errno.h>
18 #include <fcntl.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <stddef.h>
22 #include <sys/avl.h>
23 #include <sys/resource.h>
24 #include <sys/stat.h>
25 #include <sys/wait.h>
26 #include <time.h>
27 #include <unistd.h>
28 #include <pthread.h>
29 #include <signal.h>
31 #include "zed_exec.h"
32 #include "zed_log.h"
33 #include "zed_strings.h"
35 #define ZEVENT_FILENO 3
37 struct launched_process_node {
38 avl_node_t node;
39 pid_t pid;
40 uint64_t eid;
41 char *name;
44 static int
45 _launched_process_node_compare(const void *x1, const void *x2)
47 pid_t p1;
48 pid_t p2;
50 assert(x1 != NULL);
51 assert(x2 != NULL);
53 p1 = ((const struct launched_process_node *) x1)->pid;
54 p2 = ((const struct launched_process_node *) x2)->pid;
56 if (p1 < p2)
57 return (-1);
58 else if (p1 == p2)
59 return (0);
60 else
61 return (1);
64 static pthread_t _reap_children_tid = (pthread_t)-1;
65 static volatile boolean_t _reap_children_stop;
66 static avl_tree_t _launched_processes;
67 static pthread_mutex_t _launched_processes_lock = PTHREAD_MUTEX_INITIALIZER;
68 static int16_t _launched_processes_limit;
71 * Create an environment string array for passing to execve() using the
72 * NAME=VALUE strings in container [zsp].
73 * Return a newly-allocated environment, or NULL on error.
75 static char **
76 _zed_exec_create_env(zed_strings_t *zsp)
78 int num_ptrs;
79 int buflen;
80 char *buf;
81 char **pp;
82 char *p;
83 const char *q;
84 int i;
85 int len;
87 num_ptrs = zed_strings_count(zsp) + 1;
88 buflen = num_ptrs * sizeof (char *);
89 for (q = zed_strings_first(zsp); q; q = zed_strings_next(zsp))
90 buflen += strlen(q) + 1;
92 buf = calloc(1, buflen);
93 if (!buf)
94 return (NULL);
96 pp = (char **)buf;
97 p = buf + (num_ptrs * sizeof (char *));
98 i = 0;
99 for (q = zed_strings_first(zsp); q; q = zed_strings_next(zsp)) {
100 pp[i] = p;
101 len = strlen(q) + 1;
102 memcpy(p, q, len);
103 p += len;
104 i++;
106 pp[i] = NULL;
107 assert(buf + buflen == p);
108 return ((char **)buf);
112 * Fork a child process to handle event [eid]. The program [prog]
113 * in directory [dir] is executed with the environment [env].
115 * The file descriptor [zfd] is the zevent_fd used to track the
116 * current cursor location within the zevent nvlist.
118 static void
119 _zed_exec_fork_child(uint64_t eid, const char *dir, const char *prog,
120 char *env[], int zfd, boolean_t in_foreground)
122 char path[PATH_MAX];
123 int n;
124 pid_t pid;
125 int fd;
126 struct launched_process_node *node;
127 sigset_t mask;
128 struct timespec launch_timeout =
129 { .tv_sec = 0, .tv_nsec = 200 * 1000 * 1000, };
131 assert(dir != NULL);
132 assert(prog != NULL);
133 assert(env != NULL);
134 assert(zfd >= 0);
136 while (__atomic_load_n(&_launched_processes_limit,
137 __ATOMIC_SEQ_CST) <= 0)
138 (void) nanosleep(&launch_timeout, NULL);
140 n = snprintf(path, sizeof (path), "%s/%s", dir, prog);
141 if ((n < 0) || (n >= sizeof (path))) {
142 zed_log_msg(LOG_WARNING,
143 "Failed to fork \"%s\" for eid=%llu: %s",
144 prog, eid, strerror(ENAMETOOLONG));
145 return;
147 (void) pthread_mutex_lock(&_launched_processes_lock);
148 pid = fork();
149 if (pid < 0) {
150 (void) pthread_mutex_unlock(&_launched_processes_lock);
151 zed_log_msg(LOG_WARNING,
152 "Failed to fork \"%s\" for eid=%llu: %s",
153 prog, eid, strerror(errno));
154 return;
155 } else if (pid == 0) {
156 (void) sigemptyset(&mask);
157 (void) sigprocmask(SIG_SETMASK, &mask, NULL);
159 (void) umask(022);
160 if (in_foreground && /* we're already devnulled if daemonised */
161 (fd = open("/dev/null", O_RDWR | O_CLOEXEC)) != -1) {
162 (void) dup2(fd, STDIN_FILENO);
163 (void) dup2(fd, STDOUT_FILENO);
164 (void) dup2(fd, STDERR_FILENO);
166 (void) dup2(zfd, ZEVENT_FILENO);
167 execle(path, prog, NULL, env);
168 _exit(127);
171 /* parent process */
173 node = calloc(1, sizeof (*node));
174 if (node) {
175 node->pid = pid;
176 node->eid = eid;
177 node->name = strdup(prog);
178 if (node->name == NULL) {
179 perror("strdup");
180 exit(EXIT_FAILURE);
183 avl_add(&_launched_processes, node);
185 (void) pthread_mutex_unlock(&_launched_processes_lock);
187 __atomic_sub_fetch(&_launched_processes_limit, 1, __ATOMIC_SEQ_CST);
188 zed_log_msg(LOG_INFO, "Invoking \"%s\" eid=%llu pid=%d",
189 prog, eid, pid);
192 static void
193 _nop(int sig)
195 (void) sig;
198 static void *
199 _reap_children(void *arg)
201 (void) arg;
202 struct launched_process_node node, *pnode;
203 pid_t pid;
204 int status;
205 struct rusage usage;
206 struct sigaction sa = {};
208 (void) sigfillset(&sa.sa_mask);
209 (void) sigdelset(&sa.sa_mask, SIGCHLD);
210 (void) pthread_sigmask(SIG_SETMASK, &sa.sa_mask, NULL);
212 (void) sigemptyset(&sa.sa_mask);
213 sa.sa_handler = _nop;
214 sa.sa_flags = SA_NOCLDSTOP;
215 (void) sigaction(SIGCHLD, &sa, NULL);
217 for (_reap_children_stop = B_FALSE; !_reap_children_stop; ) {
218 (void) pthread_mutex_lock(&_launched_processes_lock);
219 pid = wait4(0, &status, WNOHANG, &usage);
221 if (pid == 0 || pid == (pid_t)-1) {
222 (void) pthread_mutex_unlock(&_launched_processes_lock);
223 if (pid == 0 || errno == ECHILD)
224 pause();
225 else if (errno != EINTR)
226 zed_log_msg(LOG_WARNING,
227 "Failed to wait for children: %s",
228 strerror(errno));
229 } else {
230 memset(&node, 0, sizeof (node));
231 node.pid = pid;
232 pnode = avl_find(&_launched_processes, &node, NULL);
233 if (pnode) {
234 memcpy(&node, pnode, sizeof (node));
236 avl_remove(&_launched_processes, pnode);
237 free(pnode);
239 (void) pthread_mutex_unlock(&_launched_processes_lock);
240 __atomic_add_fetch(&_launched_processes_limit, 1,
241 __ATOMIC_SEQ_CST);
243 usage.ru_utime.tv_sec += usage.ru_stime.tv_sec;
244 usage.ru_utime.tv_usec += usage.ru_stime.tv_usec;
245 usage.ru_utime.tv_sec +=
246 usage.ru_utime.tv_usec / (1000 * 1000);
247 usage.ru_utime.tv_usec %= 1000 * 1000;
249 if (WIFEXITED(status)) {
250 zed_log_msg(LOG_INFO,
251 "Finished \"%s\" eid=%llu pid=%d "
252 "time=%llu.%06us exit=%d",
253 node.name, node.eid, pid,
254 (unsigned long long) usage.ru_utime.tv_sec,
255 (unsigned int) usage.ru_utime.tv_usec,
256 WEXITSTATUS(status));
257 } else if (WIFSIGNALED(status)) {
258 zed_log_msg(LOG_INFO,
259 "Finished \"%s\" eid=%llu pid=%d "
260 "time=%llu.%06us sig=%d/%s",
261 node.name, node.eid, pid,
262 (unsigned long long) usage.ru_utime.tv_sec,
263 (unsigned int) usage.ru_utime.tv_usec,
264 WTERMSIG(status),
265 strsignal(WTERMSIG(status)));
266 } else {
267 zed_log_msg(LOG_INFO,
268 "Finished \"%s\" eid=%llu pid=%d "
269 "time=%llu.%06us status=0x%X",
270 node.name, node.eid, pid,
271 (unsigned long long) usage.ru_utime.tv_sec,
272 (unsigned int) usage.ru_utime.tv_usec,
273 (unsigned int) status);
276 free(node.name);
280 return (NULL);
283 void
284 zed_exec_fini(void)
286 struct launched_process_node *node;
287 void *ck = NULL;
289 if (_reap_children_tid == (pthread_t)-1)
290 return;
292 _reap_children_stop = B_TRUE;
293 (void) pthread_kill(_reap_children_tid, SIGCHLD);
294 (void) pthread_join(_reap_children_tid, NULL);
296 while ((node = avl_destroy_nodes(&_launched_processes, &ck)) != NULL) {
297 free(node->name);
298 free(node);
300 avl_destroy(&_launched_processes);
302 (void) pthread_mutex_destroy(&_launched_processes_lock);
303 (void) pthread_mutex_init(&_launched_processes_lock, NULL);
305 _reap_children_tid = (pthread_t)-1;
309 * Process the event [eid] by synchronously invoking all zedlets with a
310 * matching class prefix.
312 * Each executable in [zcp->zedlets] from the directory [zcp->zedlet_dir]
313 * is matched against the event's [class], [subclass], and the "all" class
314 * (which matches all events).
315 * Every zedlet with a matching class prefix is invoked.
316 * The NAME=VALUE strings in [envs] will be passed to the zedlet as
317 * environment variables.
319 * The file descriptor [zcp->zevent_fd] is the zevent_fd used to track the
320 * current cursor location within the zevent nvlist.
322 * Return 0 on success, -1 on error.
325 zed_exec_process(uint64_t eid, const char *class, const char *subclass,
326 struct zed_conf *zcp, zed_strings_t *envs)
328 const char *class_strings[4];
329 const char *allclass = "all";
330 const char **csp;
331 const char *z;
332 char **e;
333 int n;
335 if (!zcp->zedlet_dir || !zcp->zedlets || !envs || zcp->zevent_fd < 0)
336 return (-1);
338 if (_reap_children_tid == (pthread_t)-1) {
339 _launched_processes_limit = zcp->max_jobs;
341 if (pthread_create(&_reap_children_tid, NULL,
342 _reap_children, NULL) != 0)
343 return (-1);
344 pthread_setname_np(_reap_children_tid, "reap ZEDLETs");
346 avl_create(&_launched_processes, _launched_process_node_compare,
347 sizeof (struct launched_process_node),
348 offsetof(struct launched_process_node, node));
351 csp = class_strings;
353 if (class)
354 *csp++ = class;
356 if (subclass)
357 *csp++ = subclass;
359 if (allclass)
360 *csp++ = allclass;
362 *csp = NULL;
364 e = _zed_exec_create_env(envs);
366 for (z = zed_strings_first(zcp->zedlets); z;
367 z = zed_strings_next(zcp->zedlets)) {
368 for (csp = class_strings; *csp; csp++) {
369 n = strlen(*csp);
370 if ((strncmp(z, *csp, n) == 0) && !isalpha(z[n]))
371 _zed_exec_fork_child(eid, zcp->zedlet_dir,
372 z, e, zcp->zevent_fd, zcp->do_foreground);
375 free(e);
376 return (0);