Index gzip-compressed SVG files
[xapian.git] / xapian-applications / omega / runfilter.cc
blob61595279ee9dfbb7a9dc0e7dbd990b701d573517
1 /** @file
2 * @brief Run an external filter and capture its output in a std::string.
3 */
4 /* Copyright (C) 2003,2006,2007,2009,2010,2011,2013,2015,2017,2018 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #include <config.h>
23 #include "runfilter.h"
25 #include <iostream>
26 #include <string>
27 #include <vector>
29 #include <sys/types.h>
30 #include "safefcntl.h"
31 #include <cerrno>
32 #include <cstdio>
33 #include <cstring>
34 #ifdef HAVE_SYS_TIME_H
35 # include <sys/time.h>
36 #endif
37 #ifdef HAVE_SYS_RESOURCE_H
38 # include <sys/resource.h>
39 #endif
40 #include "safesysselect.h"
41 #ifdef HAVE_SYS_SOCKET_H
42 # include <sys/socket.h>
43 #endif
44 #include "safesyswait.h"
45 #include "safeunistd.h"
47 #if defined HAVE_FORK && defined HAVE_SOCKETPAIR
48 # include <signal.h>
49 #endif
51 #include "freemem.h"
52 #include "setenv.h"
53 #include "stringutils.h"
55 #ifdef _MSC_VER
56 # define popen _popen
57 # define pclose _pclose
58 #endif
60 using namespace std;
62 static int devnull = -1;
64 #if defined HAVE_FORK && defined HAVE_SOCKETPAIR
65 bool
66 command_needs_shell(const char * p)
68 for ( ; *p; ++p) {
69 // Probably overly conservative, but suitable for
70 // real-world cases.
71 if (strchr("!\"#$&()*;<>?[\\]^`{|}~", *p) != NULL) {
72 return true;
75 return false;
78 static bool
79 unquote(string & s, size_t & j)
81 bool quoted = false;
82 if (s[j] == '\'') {
83 single_quoted:
84 quoted = true;
85 s.erase(j, 1);
86 while (true) {
87 j = s.find('\'', j + 1);
88 if (j == s.npos) {
89 // Unmatched ' in command string.
90 // dash exits 2 in this case, bash exits 1.
91 _exit(2);
93 // Replace four character sequence '\'' with ' - this is
94 // how a single quote inside single quotes gets escaped.
95 if (s[j + 1] != '\\' ||
96 s[j + 2] != '\'' ||
97 s[j + 3] != '\'') {
98 break;
100 s.erase(j + 1, 3);
102 if (j + 1 != s.size()) {
103 char ch = s[j + 1];
104 if (ch != ' ' && ch != '\t' && ch != '\n') {
105 // Handle the expansion of e.g.: --input=%f,html
106 s.erase(j, 1);
107 goto out_of_quotes;
110 } else {
111 out_of_quotes:
112 j = s.find_first_of(" \t\n'", j + 1);
113 // Handle the expansion of e.g.: --input=%f
114 if (j != s.npos && s[j] == '\'') goto single_quoted;
116 if (j != s.npos) {
117 s[j++] = '\0';
119 return quoted;
122 static pid_t pid_to_kill_on_signal;
124 #ifdef HAVE_SIGACTION
125 static struct sigaction old_hup_handler;
126 static struct sigaction old_int_handler;
127 static struct sigaction old_quit_handler;
128 static struct sigaction old_term_handler;
130 extern "C" {
132 static void
133 handle_signal(int signum)
135 if (pid_to_kill_on_signal) {
136 kill(pid_to_kill_on_signal, SIGKILL);
137 pid_to_kill_on_signal = 0;
139 switch (signum) {
140 case SIGHUP:
141 sigaction(signum, &old_hup_handler, NULL);
142 break;
143 case SIGINT:
144 sigaction(signum, &old_int_handler, NULL);
145 break;
146 case SIGQUIT:
147 sigaction(signum, &old_quit_handler, NULL);
148 break;
149 case SIGTERM:
150 sigaction(signum, &old_term_handler, NULL);
151 break;
152 default:
153 return;
155 raise(signum);
160 static inline void
161 runfilter_init_signal_handlers_()
163 struct sigaction sa;
164 sa.sa_handler = handle_signal;
165 sigemptyset(&sa.sa_mask);
166 sa.sa_flags = 0;
168 sigaction(SIGHUP, &sa, &old_hup_handler);
169 sigaction(SIGINT, &sa, &old_int_handler);
170 sigaction(SIGQUIT, &sa, &old_quit_handler);
171 sigaction(SIGTERM, &sa, &old_term_handler);
173 #else
174 static sighandler_t old_hup_handler;
175 static sighandler_t old_int_handler;
176 static sighandler_t old_quit_handler;
177 static sighandler_t old_term_handler;
179 extern "C" {
181 static void
182 handle_signal(int signum)
184 if (pid_to_kill_on_signal) {
185 kill(pid_to_kill_on_signal, SIGKILL);
186 pid_to_kill_on_signal = 0;
188 switch (signum) {
189 case SIGHUP:
190 signal(signum, old_hup_handler);
191 break;
192 case SIGINT:
193 signal(signum, old_int_handler);
194 break;
195 case SIGQUIT:
196 signal(signum, old_quit_handler);
197 break;
198 case SIGTERM:
199 signal(signum, old_term_handler);
200 break;
201 default:
202 return;
204 raise(signum);
209 static inline void
210 runfilter_init_signal_handlers_()
212 old_hup_handler = signal(SIGHUP, handle_signal);
213 old_int_handler = signal(SIGINT, handle_signal);
214 old_quit_handler = signal(SIGQUIT, handle_signal);
215 old_term_handler = signal(SIGTERM, handle_signal);
217 #endif
218 #else
219 bool
220 command_needs_shell(const char *)
222 // We don't try to avoid the shell on this platform, so don't waste time
223 // analysing commands to see if they could.
224 return true;
227 static inline void
228 runfilter_init_signal_handlers_()
231 #endif
233 void
234 runfilter_init()
236 runfilter_init_signal_handlers_();
237 devnull = open("/dev/null", O_WRONLY);
238 if (devnull < 0) {
239 cerr << "Failed to open /dev/null: " << strerror(errno) << endl;
240 exit(1);
242 // Ensure that devnull isn't fd 0, 1 or 2 (stdin, stdout or stderr) and
243 // that we have open fds for stdin, stdout and stderr. This simplifies the
244 // code after fork() because it doesn't need to worry about such corner
245 // cases.
246 while (devnull <= 2) {
247 devnull = dup(devnull);
251 void
252 run_filter(int fd_in, const string& cmd, bool use_shell, string* out,
253 int alt_status)
255 #if defined HAVE_FORK && defined HAVE_SOCKETPAIR
256 // We want to be able to get the exit status of the child process.
257 signal(SIGCHLD, SIG_DFL);
259 int fds[2];
260 if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, fds) < 0)
261 throw ReadError("socketpair failed");
263 pid_t child = fork();
264 if (child == 0) {
265 // We're the child process.
267 #ifdef HAVE_SETPGID
268 // Put the child process into its own process group, so that we can
269 // easily kill it and any children it in turn forks if we need to.
270 setpgid(0, 0);
271 #endif
273 // Close the parent's side of the socket pair.
274 close(fds[0]);
276 if (fd_in != -1) {
277 // Connect piped input to stdin.
278 dup2(fd_in, 0);
279 close(fd_in);
282 // Connect stdout to our side of the socket pair.
283 dup2(fds[1], 1);
285 #ifdef HAVE_SETRLIMIT
286 // Impose some pretty generous resource limits to prevent run-away
287 // filter programs from causing problems.
289 // Limit CPU time to 300 seconds (5 minutes).
290 struct rlimit cpu_limit = { 300, RLIM_INFINITY };
291 setrlimit(RLIMIT_CPU, &cpu_limit);
293 #if defined RLIMIT_AS || defined RLIMIT_VMEM || defined RLIMIT_DATA
294 // Limit process data to free physical memory.
295 long mem = get_free_physical_memory();
296 if (mem > 0) {
297 struct rlimit ram_limit = {
298 static_cast<rlim_t>(mem),
299 RLIM_INFINITY
301 #ifdef RLIMIT_AS
302 setrlimit(RLIMIT_AS, &ram_limit);
303 #elif defined RLIMIT_VMEM
304 setrlimit(RLIMIT_VMEM, &ram_limit);
305 #else
306 // Only limits the data segment rather than the total address
307 // space, but that's better than nothing.
308 setrlimit(RLIMIT_DATA, &ram_limit);
309 #endif
311 #endif
312 #endif
314 if (use_shell) {
315 execl("/bin/sh", "/bin/sh", "-c", cmd.c_str(), (void*)NULL);
316 _exit(-1);
319 string s(cmd);
320 // Handle any environment variable assignments.
321 // Name must start with alpha or '_', contain only alphanumerics and
322 // '_', and there must be no quoting of either the name or the '='.
323 size_t j = 0;
324 while (true) {
325 j = s.find_first_not_of(" \t\n", j);
326 if (!(C_isalnum(s[j]) || s[j] == '_')) break;
327 size_t i = j;
328 do ++j; while (C_isalnum(s[j]) || s[j] == '_');
329 if (s[j] != '=') {
330 j = i;
331 break;
334 size_t eq = j;
335 unquote(s, j);
336 s[eq] = '\0';
337 setenv(&s[i], &s[eq + 1], 1);
338 j = s.find_first_not_of(" \t\n", j);
341 vector<const char *> argv;
342 while (true) {
343 size_t i = s.find_first_not_of(" \t\n", j);
344 if (i == string::npos) break;
345 bool quoted = unquote(s, j);
346 const char * word = s.c_str() + i;
347 if (!quoted) {
348 // Handle simple cases of redirection.
349 if (strcmp(word, ">/dev/null") == 0) {
350 dup2(devnull, 1);
351 continue;
353 if (strcmp(word, "2>/dev/null") == 0) {
354 dup2(devnull, 2);
355 continue;
357 if (strcmp(word, "2>&1") == 0) {
358 dup2(1, 2);
359 continue;
361 if (strcmp(word, "1>&2") == 0) {
362 dup2(2, 1);
363 continue;
366 argv.push_back(word);
368 if (argv.empty()) _exit(0);
369 argv.push_back(NULL);
371 execvp(argv[0], const_cast<char **>(&argv[0]));
372 // Emulate shell behaviour and exit with status 127 if the command
373 // isn't found, and status 126 for other problems. In particular, we
374 // rely on 127 below to throw NoSuchFilter.
375 _exit(errno == ENOENT ? 127 : 126);
378 // We're the parent process.
379 #ifdef HAVE_SETPGID
380 pid_to_kill_on_signal = -child;
381 #else
382 pid_to_kill_on_signal = child;
383 #endif
385 // Close the child's side of the socket pair.
386 close(fds[1]);
387 if (child == -1) {
388 // fork() failed.
389 close(fds[0]);
390 throw ReadError("fork failed");
393 int fd = fds[0];
395 fd_set readfds;
396 FD_ZERO(&readfds);
397 while (true) {
398 // If we wait 300 seconds (5 minutes) without getting data from the
399 // filter, then give up to avoid waiting forever for a filter which
400 // has ended up blocked waiting for something which will never happen.
401 struct timeval tv;
402 tv.tv_sec = 300;
403 tv.tv_usec = 0;
404 FD_SET(fd, &readfds);
405 int r = select(fd + 1, &readfds, NULL, NULL, &tv);
406 if (r <= 0) {
407 if (r < 0) {
408 if (errno == EINTR || errno == EAGAIN) {
409 // select() interrupted by a signal, so retry.
410 continue;
412 cerr << "Reading from filter failed (" << strerror(errno) << ")"
413 << endl;
414 } else {
415 cerr << "Filter inactive for too long" << endl;
417 #ifdef HAVE_SETPGID
418 kill(-child, SIGKILL);
419 #else
420 kill(child, SIGKILL);
421 #endif
422 close(fd);
423 int status = 0;
424 while (waitpid(child, &status, 0) < 0 && errno == EINTR) { }
425 pid_to_kill_on_signal = 0;
426 throw ReadError(status);
429 char buf[4096];
430 ssize_t res = read(fd, buf, sizeof(buf));
431 if (res == 0) break;
432 if (res == -1) {
433 if (errno == EINTR) {
434 // read() interrupted by a signal, so retry.
435 continue;
437 close(fd);
438 #ifdef HAVE_SETPGID
439 kill(-child, SIGKILL);
440 #endif
441 int status = 0;
442 while (waitpid(child, &status, 0) < 0 && errno == EINTR) { }
443 pid_to_kill_on_signal = 0;
444 throw ReadError(status);
446 if (out) out->append(buf, res);
449 close(fd);
450 #ifdef HAVE_SETPGID
451 kill(-child, SIGKILL);
452 #endif
453 int status = 0;
454 while (waitpid(child, &status, 0) < 0) {
455 if (errno != EINTR)
456 throw ReadError("wait pid failed");
458 pid_to_kill_on_signal = 0;
459 #else
460 (void)use_shell;
461 FILE * fh = popen(cmd.c_str(), "r");
462 if (fh == NULL) throw ReadError("popen failed");
463 while (!feof(fh)) {
464 char buf[4096];
465 size_t len = fread(buf, 1, 4096, fh);
466 if (ferror(fh)) {
467 (void)pclose(fh);
468 throw ReadError("fread failed");
470 if (out) out->append(buf, len);
472 int status = pclose(fh);
473 #endif
475 if (WIFEXITED(status)) {
476 int exit_status = WEXITSTATUS(status);
477 if (exit_status == 0 || exit_status == alt_status)
478 return;
479 if (exit_status == 127)
480 throw NoSuchFilter();
482 #ifdef SIGXCPU
483 if (WIFSIGNALED(status) && WTERMSIG(status) == SIGXCPU) {
484 cerr << "Filter process consumed too much CPU time" << endl;
486 #endif
487 throw ReadError(status);