Fix bug in PL2+ implementation
[xapian.git] / xapian-applications / omega / runfilter.cc
blob50cecf5783fda09ac1587f528bda0c8887f8911b
1 /** @file
2 * @brief Run an external filter and capture its output in a std::string.
3 */
4 /* Copyright (C) 2003-2024 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #include <config.h>
23 #include "runfilter.h"
25 #include <iostream>
26 #include <string>
27 #include <vector>
29 #include <sys/types.h>
30 #include "safefcntl.h"
31 #include <cerrno>
32 #include <cinttypes>
33 #include <cstdio>
34 #include <cstring>
35 #ifdef HAVE_SYS_TIME_H
36 # include <sys/time.h>
37 #endif
38 #ifdef HAVE_SYS_RESOURCE_H
39 # include <sys/resource.h>
40 #endif
41 #include "safesysselect.h"
42 #include "safesyssocket.h"
43 #include "safesyswait.h"
44 #include "safeunistd.h"
46 #if defined HAVE_FORK && defined HAVE_SOCKETPAIR
47 # include <signal.h>
48 #endif
50 #include "closefrom.h"
51 #include "freemem.h"
52 #include "setenv.h"
53 #include "stringutils.h"
55 using namespace std;
57 #ifndef __WIN32__
58 static int devnull = -1;
59 #endif
61 #if defined HAVE_FORK && defined HAVE_SOCKETPAIR
62 bool
63 command_needs_shell(const char * p)
65 for ( ; *p; ++p) {
66 // Probably overly conservative, but suitable for
67 // real-world cases.
68 if (strchr("!\"#$&()*;<>?[\\]^`{|}~", *p) != NULL) {
69 return true;
72 return false;
75 static bool
76 unquote(string & s, size_t & j)
78 bool quoted = false;
79 if (s[j] == '\'') {
80 single_quoted:
81 quoted = true;
82 s.erase(j, 1);
83 while (true) {
84 j = s.find('\'', j + 1);
85 if (j == s.npos) {
86 // Unmatched ' in command string.
87 // dash exits 2 in this case, bash exits 1.
88 _exit(2);
90 // Replace four character sequence '\'' with ' - this is
91 // how a single quote inside single quotes gets escaped.
92 if (s[j + 1] != '\\' ||
93 s[j + 2] != '\'' ||
94 s[j + 3] != '\'') {
95 break;
97 s.erase(j + 1, 3);
99 if (j + 1 != s.size()) {
100 char ch = s[j + 1];
101 if (ch != ' ' && ch != '\t' && ch != '\n') {
102 // Handle the expansion of e.g.: --input=%f,html
103 s.erase(j, 1);
104 goto out_of_quotes;
107 } else {
108 out_of_quotes:
109 j = s.find_first_of(" \t\n'", j + 1);
110 // Handle the expansion of e.g.: --input=%f
111 if (j != s.npos && s[j] == '\'') goto single_quoted;
113 if (j != s.npos) {
114 s[j++] = '\0';
116 return quoted;
119 static pid_t pid_to_kill_on_signal;
121 #ifdef HAVE_SIGACTION
122 static struct sigaction old_hup_handler;
123 static struct sigaction old_int_handler;
124 static struct sigaction old_quit_handler;
125 static struct sigaction old_term_handler;
127 extern "C" {
129 static void
130 handle_signal(int signum)
132 if (pid_to_kill_on_signal) {
133 kill(pid_to_kill_on_signal, SIGKILL);
134 pid_to_kill_on_signal = 0;
136 switch (signum) {
137 case SIGHUP:
138 sigaction(signum, &old_hup_handler, NULL);
139 break;
140 case SIGINT:
141 sigaction(signum, &old_int_handler, NULL);
142 break;
143 case SIGQUIT:
144 sigaction(signum, &old_quit_handler, NULL);
145 break;
146 case SIGTERM:
147 sigaction(signum, &old_term_handler, NULL);
148 break;
149 default:
150 return;
152 raise(signum);
157 static inline void
158 runfilter_init_signal_handlers_()
160 struct sigaction sa;
161 sa.sa_handler = handle_signal;
162 sigemptyset(&sa.sa_mask);
163 sa.sa_flags = 0;
165 sigaction(SIGHUP, &sa, &old_hup_handler);
166 sigaction(SIGINT, &sa, &old_int_handler);
167 sigaction(SIGQUIT, &sa, &old_quit_handler);
168 sigaction(SIGTERM, &sa, &old_term_handler);
170 #else
171 static sighandler_t old_hup_handler;
172 static sighandler_t old_int_handler;
173 static sighandler_t old_quit_handler;
174 static sighandler_t old_term_handler;
176 extern "C" {
178 static void
179 handle_signal(int signum)
181 if (pid_to_kill_on_signal) {
182 kill(pid_to_kill_on_signal, SIGKILL);
183 pid_to_kill_on_signal = 0;
185 switch (signum) {
186 case SIGHUP:
187 signal(signum, old_hup_handler);
188 break;
189 case SIGINT:
190 signal(signum, old_int_handler);
191 break;
192 case SIGQUIT:
193 signal(signum, old_quit_handler);
194 break;
195 case SIGTERM:
196 signal(signum, old_term_handler);
197 break;
198 default:
199 return;
201 raise(signum);
206 static inline void
207 runfilter_init_signal_handlers_()
209 old_hup_handler = signal(SIGHUP, handle_signal);
210 old_int_handler = signal(SIGINT, handle_signal);
211 old_quit_handler = signal(SIGQUIT, handle_signal);
212 old_term_handler = signal(SIGTERM, handle_signal);
214 #endif
215 #else
216 bool
217 command_needs_shell(const char *)
219 // We don't try to avoid the shell on this platform, so don't waste time
220 // analysing commands to see if they could.
221 return true;
224 static inline void
225 runfilter_init_signal_handlers_()
228 #endif
230 void
231 runfilter_init()
233 runfilter_init_signal_handlers_();
234 #ifndef __WIN32__
235 devnull = open("/dev/null", O_WRONLY);
236 if (devnull < 0) {
237 cerr << "Failed to open /dev/null: " << strerror(errno) << endl;
238 exit(1);
240 // Ensure that devnull isn't fd 0, 1 or 2 (stdin, stdout or stderr) and
241 // that we have open fds for stdin, stdout and stderr. This simplifies the
242 // code after fork() because it doesn't need to worry about such corner
243 // cases.
244 while (devnull <= 2) {
245 devnull = dup(devnull);
247 #endif
250 void
251 run_filter(int fd_in, const string& cmd, bool use_shell, string* out,
252 int alt_status)
254 #if defined HAVE_FORK && defined HAVE_SOCKETPAIR
255 // We want to be able to get the exit status of the child process.
256 signal(SIGCHLD, SIG_DFL);
258 int fds[2];
259 if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, fds) < 0)
260 throw ReadError("socketpair failed");
261 // Ensure fds[1] != 0 to simplify handling in child process.
262 if (rare(fds[1] == 0)) swap(fds[0], fds[1]);
264 pid_t child = fork();
265 if (child == 0) {
266 // We're the child process.
268 #ifdef HAVE_SETPGID
269 // Put the child process into its own process group, so that we can
270 // easily kill it and any children it in turn forks if we need to.
271 setpgid(0, 0);
272 #endif
274 // Close the parent's side of the socket pair.
275 close(fds[0]);
277 if (fd_in > -1) {
278 // Connect piped input to stdin if it's not already fd 0.
279 if (fd_in != 0) {
280 dup2(fd_in, 0);
281 close(fd_in);
285 // Connect stdout to our side of the socket pair.
286 dup2(fds[1], 1);
288 // Close extraneous file descriptors (but leave stderr alone).
289 closefrom(3);
291 #ifdef HAVE_SETRLIMIT
292 // Impose some pretty generous resource limits to prevent run-away
293 // filter programs from causing problems.
295 // Limit CPU time to 300 seconds (5 minutes).
296 struct rlimit cpu_limit = { 300, RLIM_INFINITY };
297 setrlimit(RLIMIT_CPU, &cpu_limit);
299 #if defined RLIMIT_AS || defined RLIMIT_VMEM || defined RLIMIT_DATA
300 // Limit process data to free physical memory.
301 long mem = get_free_physical_memory();
302 if (mem > 0) {
303 struct rlimit ram_limit = {
304 static_cast<rlim_t>(mem),
305 RLIM_INFINITY
307 #ifdef RLIMIT_AS
308 setrlimit(RLIMIT_AS, &ram_limit);
309 #elif defined RLIMIT_VMEM
310 setrlimit(RLIMIT_VMEM, &ram_limit);
311 #else
312 // Only limits the data segment rather than the total address
313 // space, but that's better than nothing.
314 setrlimit(RLIMIT_DATA, &ram_limit);
315 #endif
317 #endif
318 #endif
320 if (use_shell) {
321 execl("/bin/sh", "/bin/sh", "-c", cmd.c_str(), (void*)NULL);
322 _exit(-1);
325 string s(cmd);
326 // Handle any environment variable assignments.
327 // Name must start with alpha or '_', contain only alphanumerics and
328 // '_', and there must be no quoting of either the name or the '='.
329 size_t j = 0;
330 while (true) {
331 j = s.find_first_not_of(" \t\n", j);
332 if (!(C_isalpha(s[j]) || s[j] == '_')) break;
333 size_t i = j;
334 do ++j; while (C_isalnum(s[j]) || s[j] == '_');
335 if (s[j] != '=') {
336 j = i;
337 break;
340 size_t eq = j;
341 unquote(s, j);
342 s[eq] = '\0';
343 setenv(&s[i], &s[eq + 1], 1);
344 j = s.find_first_not_of(" \t\n", j);
347 vector<const char *> argv;
348 while (true) {
349 size_t i = s.find_first_not_of(" \t\n", j);
350 if (i == string::npos) break;
351 bool quoted = unquote(s, j);
352 const char * word = s.c_str() + i;
353 if (!quoted) {
354 // Handle simple cases of redirection.
355 if (strcmp(word, ">/dev/null") == 0) {
356 dup2(devnull, 1);
357 continue;
359 if (strcmp(word, "2>/dev/null") == 0) {
360 dup2(devnull, 2);
361 continue;
363 if (strcmp(word, "2>&1") == 0) {
364 dup2(1, 2);
365 continue;
367 if (strcmp(word, "1>&2") == 0) {
368 dup2(2, 1);
369 continue;
372 argv.push_back(word);
374 if (argv.empty()) _exit(0);
375 argv.push_back(NULL);
377 execvp(argv[0], const_cast<char **>(&argv[0]));
378 // Emulate shell behaviour and exit with status 127 if the command
379 // isn't found, and status 126 for other problems. In particular, we
380 // rely on 127 below to throw NoSuchFilter.
381 _exit(errno == ENOENT ? 127 : 126);
384 // We're the parent process.
385 #ifdef HAVE_SETPGID
386 pid_to_kill_on_signal = -child;
387 #else
388 pid_to_kill_on_signal = child;
389 #endif
391 // Close the child's side of the socket pair.
392 close(fds[1]);
393 if (child == -1) {
394 // fork() failed.
395 close(fds[0]);
396 throw ReadError("fork failed");
399 int fd = fds[0];
401 fd_set readfds;
402 FD_ZERO(&readfds);
403 while (true) {
404 // If we wait 300 seconds (5 minutes) without getting data from the
405 // filter, then give up to avoid waiting forever for a filter which
406 // has ended up blocked waiting for something which will never happen.
407 struct timeval tv;
408 tv.tv_sec = 300;
409 tv.tv_usec = 0;
410 FD_SET(fd, &readfds);
411 int r = select(fd + 1, &readfds, NULL, NULL, &tv);
412 if (r <= 0) {
413 if (r < 0) {
414 if (errno == EINTR || errno == EAGAIN) {
415 // select() interrupted by a signal, so retry.
416 continue;
418 cerr << "Reading from filter failed (" << strerror(errno) << ")"
419 << endl;
420 } else {
421 cerr << "Filter inactive for too long" << endl;
423 #ifdef HAVE_SETPGID
424 kill(-child, SIGKILL);
425 #else
426 kill(child, SIGKILL);
427 #endif
428 close(fd);
429 int status = 0;
430 while (waitpid(child, &status, 0) < 0 && errno == EINTR) { }
431 pid_to_kill_on_signal = 0;
432 throw ReadError(status);
435 char buf[4096];
436 ssize_t res = read(fd, buf, sizeof(buf));
437 if (res == 0) break;
438 if (res == -1) {
439 if (errno == EINTR) {
440 // read() interrupted by a signal, so retry.
441 continue;
443 close(fd);
444 #ifdef HAVE_SETPGID
445 kill(-child, SIGKILL);
446 #endif
447 int status = 0;
448 while (waitpid(child, &status, 0) < 0 && errno == EINTR) { }
449 pid_to_kill_on_signal = 0;
450 throw ReadError(status);
452 if (out) out->append(buf, res);
455 close(fd);
456 #ifdef HAVE_SETPGID
457 kill(-child, SIGKILL);
458 #endif
459 int status = 0;
460 while (waitpid(child, &status, 0) < 0) {
461 if (errno != EINTR)
462 throw ReadError("wait pid failed");
464 pid_to_kill_on_signal = 0;
466 if (WIFEXITED(status)) {
467 int exit_status = WEXITSTATUS(status);
468 if (exit_status == 0 || exit_status == alt_status)
469 return;
470 if (exit_status == 127)
471 throw NoSuchFilter();
473 # ifdef SIGXCPU
474 if (WIFSIGNALED(status) && WTERMSIG(status) == SIGXCPU) {
475 cerr << "Filter process consumed too much CPU time" << endl;
477 # endif
478 #else
479 (void)use_shell;
480 LARGE_INTEGER counter;
481 // QueryPerformanceCounter() will always succeed on XP and later
482 // and gives us a counter which increments each CPU clock cycle
483 // on modern hardware (Pentium or newer).
484 QueryPerformanceCounter(&counter);
485 char pipename[256];
486 snprintf(pipename, sizeof(pipename),
487 "\\\\.\\pipe\\xapian-omega-filter-%lx-%lx_%" PRIx64,
488 static_cast<unsigned long>(GetCurrentProcessId()),
489 static_cast<unsigned long>(GetCurrentThreadId()),
490 static_cast<unsigned long long>(counter.QuadPart));
491 pipename[sizeof(pipename) - 1] = '\0';
492 // Create a pipe so we can read stdout from the child process.
493 HANDLE hPipe = CreateNamedPipe(pipename,
494 PIPE_ACCESS_DUPLEX|FILE_FLAG_OVERLAPPED,
496 1, 4096, 4096, NMPWAIT_USE_DEFAULT_WAIT,
497 NULL);
499 if (hPipe == INVALID_HANDLE_VALUE) {
500 throw ReadError("CreateNamedPipe failed");
503 HANDLE hClient = CreateFile(pipename,
504 GENERIC_READ|GENERIC_WRITE, 0, NULL,
505 OPEN_EXISTING,
506 FILE_FLAG_OVERLAPPED, NULL);
508 if (hClient == INVALID_HANDLE_VALUE) {
509 throw ReadError("CreateFile failed");
512 if (!ConnectNamedPipe(hPipe, NULL) &&
513 GetLastError() != ERROR_PIPE_CONNECTED) {
514 throw ReadError("ConnectNamedPipe failed");
517 // Set the appropriate handles to be inherited by the child process.
518 SetHandleInformation(hClient, HANDLE_FLAG_INHERIT, 1);
520 // Create the child process.
521 PROCESS_INFORMATION procinfo;
522 memset(&procinfo, 0, sizeof(PROCESS_INFORMATION));
524 STARTUPINFO startupinfo;
525 memset(&startupinfo, 0, sizeof(STARTUPINFO));
526 startupinfo.cb = sizeof(STARTUPINFO);
527 startupinfo.hStdError = GetStdHandle(STD_ERROR_HANDLE);
528 startupinfo.hStdOutput = hClient;
529 // FIXME: Is NULL the way to say "/dev/null"?
530 // It's what GetStdHandle() is documented to return if "an application does
531 // not have associated standard handles"...
532 startupinfo.hStdInput = fd_in >= 0 ? (HANDLE) _get_osfhandle(fd_in) : NULL;
533 startupinfo.dwFlags |= STARTF_USESTDHANDLES;
535 string cmdline{cmd};
536 // For some reason Windows wants a modifiable command line so we
537 // pass `&cmdline[0]` rather than `cmdline.c_str()`.
538 if (!CreateProcess(NULL, &cmdline[0],
539 0, 0, TRUE, 0, 0, 0,
540 &startupinfo, &procinfo)) {
541 if (GetLastError() == ERROR_FILE_NOT_FOUND)
542 throw NoSuchFilter();
543 throw ReadError("CreateProcess failed");
546 CloseHandle(hClient);
547 CloseHandle(procinfo.hThread);
548 HANDLE child = procinfo.hProcess;
550 while (true) {
551 char buf[4096];
552 DWORD received;
553 if (!ReadFile(hPipe, buf, sizeof(buf), &received, NULL)) {
554 throw ReadError("ReadFile failed");
556 if (received == 0) break;
558 if (out) out->append(buf, received);
560 CloseHandle(hPipe);
562 WaitForSingleObject(child, INFINITE);
563 DWORD rc;
564 while (GetExitCodeProcess(child, &rc) && rc == STILL_ACTIVE) {
565 Sleep(100);
567 CloseHandle(child);
568 int status = int(rc);
569 if (status == 0 || status == alt_status)
570 return;
572 #endif
573 throw ReadError(status);