2 * @brief Run an external filter and capture its output in a std::string.
4 /* Copyright (C) 2003-2024 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 #include "runfilter.h"
29 #include <sys/types.h>
30 #include "safefcntl.h"
35 #ifdef HAVE_SYS_TIME_H
36 # include <sys/time.h>
38 #ifdef HAVE_SYS_RESOURCE_H
39 # include <sys/resource.h>
41 #include "safesysselect.h"
42 #include "safesyssocket.h"
43 #include "safesyswait.h"
44 #include "safeunistd.h"
46 #if defined HAVE_FORK && defined HAVE_SOCKETPAIR
50 #include "closefrom.h"
53 #include "stringutils.h"
58 static int devnull
= -1;
61 #if defined HAVE_FORK && defined HAVE_SOCKETPAIR
63 command_needs_shell(const char * p
)
66 // Probably overly conservative, but suitable for
68 if (strchr("!\"#$&()*;<>?[\\]^`{|}~", *p
) != NULL
) {
76 unquote(string
& s
, size_t & j
)
84 j
= s
.find('\'', j
+ 1);
86 // Unmatched ' in command string.
87 // dash exits 2 in this case, bash exits 1.
90 // Replace four character sequence '\'' with ' - this is
91 // how a single quote inside single quotes gets escaped.
92 if (s
[j
+ 1] != '\\' ||
99 if (j
+ 1 != s
.size()) {
101 if (ch
!= ' ' && ch
!= '\t' && ch
!= '\n') {
102 // Handle the expansion of e.g.: --input=%f,html
109 j
= s
.find_first_of(" \t\n'", j
+ 1);
110 // Handle the expansion of e.g.: --input=%f
111 if (j
!= s
.npos
&& s
[j
] == '\'') goto single_quoted
;
119 static pid_t pid_to_kill_on_signal
;
121 #ifdef HAVE_SIGACTION
122 static struct sigaction old_hup_handler
;
123 static struct sigaction old_int_handler
;
124 static struct sigaction old_quit_handler
;
125 static struct sigaction old_term_handler
;
130 handle_signal(int signum
)
132 if (pid_to_kill_on_signal
) {
133 kill(pid_to_kill_on_signal
, SIGKILL
);
134 pid_to_kill_on_signal
= 0;
138 sigaction(signum
, &old_hup_handler
, NULL
);
141 sigaction(signum
, &old_int_handler
, NULL
);
144 sigaction(signum
, &old_quit_handler
, NULL
);
147 sigaction(signum
, &old_term_handler
, NULL
);
158 runfilter_init_signal_handlers_()
161 sa
.sa_handler
= handle_signal
;
162 sigemptyset(&sa
.sa_mask
);
165 sigaction(SIGHUP
, &sa
, &old_hup_handler
);
166 sigaction(SIGINT
, &sa
, &old_int_handler
);
167 sigaction(SIGQUIT
, &sa
, &old_quit_handler
);
168 sigaction(SIGTERM
, &sa
, &old_term_handler
);
171 static sighandler_t old_hup_handler
;
172 static sighandler_t old_int_handler
;
173 static sighandler_t old_quit_handler
;
174 static sighandler_t old_term_handler
;
179 handle_signal(int signum
)
181 if (pid_to_kill_on_signal
) {
182 kill(pid_to_kill_on_signal
, SIGKILL
);
183 pid_to_kill_on_signal
= 0;
187 signal(signum
, old_hup_handler
);
190 signal(signum
, old_int_handler
);
193 signal(signum
, old_quit_handler
);
196 signal(signum
, old_term_handler
);
207 runfilter_init_signal_handlers_()
209 old_hup_handler
= signal(SIGHUP
, handle_signal
);
210 old_int_handler
= signal(SIGINT
, handle_signal
);
211 old_quit_handler
= signal(SIGQUIT
, handle_signal
);
212 old_term_handler
= signal(SIGTERM
, handle_signal
);
217 command_needs_shell(const char *)
219 // We don't try to avoid the shell on this platform, so don't waste time
220 // analysing commands to see if they could.
225 runfilter_init_signal_handlers_()
233 runfilter_init_signal_handlers_();
235 devnull
= open("/dev/null", O_WRONLY
);
237 cerr
<< "Failed to open /dev/null: " << strerror(errno
) << endl
;
240 // Ensure that devnull isn't fd 0, 1 or 2 (stdin, stdout or stderr) and
241 // that we have open fds for stdin, stdout and stderr. This simplifies the
242 // code after fork() because it doesn't need to worry about such corner
244 while (devnull
<= 2) {
245 devnull
= dup(devnull
);
251 run_filter(int fd_in
, const string
& cmd
, bool use_shell
, string
* out
,
254 #if defined HAVE_FORK && defined HAVE_SOCKETPAIR
255 // We want to be able to get the exit status of the child process.
256 signal(SIGCHLD
, SIG_DFL
);
259 if (socketpair(AF_UNIX
, SOCK_STREAM
, PF_UNSPEC
, fds
) < 0)
260 throw ReadError("socketpair failed");
261 // Ensure fds[1] != 0 to simplify handling in child process.
262 if (rare(fds
[1] == 0)) swap(fds
[0], fds
[1]);
264 pid_t child
= fork();
266 // We're the child process.
269 // Put the child process into its own process group, so that we can
270 // easily kill it and any children it in turn forks if we need to.
274 // Close the parent's side of the socket pair.
278 // Connect piped input to stdin if it's not already fd 0.
285 // Connect stdout to our side of the socket pair.
288 // Close extraneous file descriptors (but leave stderr alone).
291 #ifdef HAVE_SETRLIMIT
292 // Impose some pretty generous resource limits to prevent run-away
293 // filter programs from causing problems.
295 // Limit CPU time to 300 seconds (5 minutes).
296 struct rlimit cpu_limit
= { 300, RLIM_INFINITY
};
297 setrlimit(RLIMIT_CPU
, &cpu_limit
);
299 #if defined RLIMIT_AS || defined RLIMIT_VMEM || defined RLIMIT_DATA
300 // Limit process data to free physical memory.
301 long mem
= get_free_physical_memory();
303 struct rlimit ram_limit
= {
304 static_cast<rlim_t
>(mem
),
308 setrlimit(RLIMIT_AS
, &ram_limit
);
309 #elif defined RLIMIT_VMEM
310 setrlimit(RLIMIT_VMEM
, &ram_limit
);
312 // Only limits the data segment rather than the total address
313 // space, but that's better than nothing.
314 setrlimit(RLIMIT_DATA
, &ram_limit
);
321 execl("/bin/sh", "/bin/sh", "-c", cmd
.c_str(), (void*)NULL
);
326 // Handle any environment variable assignments.
327 // Name must start with alpha or '_', contain only alphanumerics and
328 // '_', and there must be no quoting of either the name or the '='.
331 j
= s
.find_first_not_of(" \t\n", j
);
332 if (!(C_isalpha(s
[j
]) || s
[j
] == '_')) break;
334 do ++j
; while (C_isalnum(s
[j
]) || s
[j
] == '_');
343 setenv(&s
[i
], &s
[eq
+ 1], 1);
344 j
= s
.find_first_not_of(" \t\n", j
);
347 vector
<const char *> argv
;
349 size_t i
= s
.find_first_not_of(" \t\n", j
);
350 if (i
== string::npos
) break;
351 bool quoted
= unquote(s
, j
);
352 const char * word
= s
.c_str() + i
;
354 // Handle simple cases of redirection.
355 if (strcmp(word
, ">/dev/null") == 0) {
359 if (strcmp(word
, "2>/dev/null") == 0) {
363 if (strcmp(word
, "2>&1") == 0) {
367 if (strcmp(word
, "1>&2") == 0) {
372 argv
.push_back(word
);
374 if (argv
.empty()) _exit(0);
375 argv
.push_back(NULL
);
377 execvp(argv
[0], const_cast<char **>(&argv
[0]));
378 // Emulate shell behaviour and exit with status 127 if the command
379 // isn't found, and status 126 for other problems. In particular, we
380 // rely on 127 below to throw NoSuchFilter.
381 _exit(errno
== ENOENT
? 127 : 126);
384 // We're the parent process.
386 pid_to_kill_on_signal
= -child
;
388 pid_to_kill_on_signal
= child
;
391 // Close the child's side of the socket pair.
396 throw ReadError("fork failed");
404 // If we wait 300 seconds (5 minutes) without getting data from the
405 // filter, then give up to avoid waiting forever for a filter which
406 // has ended up blocked waiting for something which will never happen.
410 FD_SET(fd
, &readfds
);
411 int r
= select(fd
+ 1, &readfds
, NULL
, NULL
, &tv
);
414 if (errno
== EINTR
|| errno
== EAGAIN
) {
415 // select() interrupted by a signal, so retry.
418 cerr
<< "Reading from filter failed (" << strerror(errno
) << ")"
421 cerr
<< "Filter inactive for too long" << endl
;
424 kill(-child
, SIGKILL
);
426 kill(child
, SIGKILL
);
430 while (waitpid(child
, &status
, 0) < 0 && errno
== EINTR
) { }
431 pid_to_kill_on_signal
= 0;
432 throw ReadError(status
);
436 ssize_t res
= read(fd
, buf
, sizeof(buf
));
439 if (errno
== EINTR
) {
440 // read() interrupted by a signal, so retry.
445 kill(-child
, SIGKILL
);
448 while (waitpid(child
, &status
, 0) < 0 && errno
== EINTR
) { }
449 pid_to_kill_on_signal
= 0;
450 throw ReadError(status
);
452 if (out
) out
->append(buf
, res
);
457 kill(-child
, SIGKILL
);
460 while (waitpid(child
, &status
, 0) < 0) {
462 throw ReadError("wait pid failed");
464 pid_to_kill_on_signal
= 0;
466 if (WIFEXITED(status
)) {
467 int exit_status
= WEXITSTATUS(status
);
468 if (exit_status
== 0 || exit_status
== alt_status
)
470 if (exit_status
== 127)
471 throw NoSuchFilter();
474 if (WIFSIGNALED(status
) && WTERMSIG(status
) == SIGXCPU
) {
475 cerr
<< "Filter process consumed too much CPU time" << endl
;
480 LARGE_INTEGER counter
;
481 // QueryPerformanceCounter() will always succeed on XP and later
482 // and gives us a counter which increments each CPU clock cycle
483 // on modern hardware (Pentium or newer).
484 QueryPerformanceCounter(&counter
);
486 snprintf(pipename
, sizeof(pipename
),
487 "\\\\.\\pipe\\xapian-omega-filter-%lx-%lx_%" PRIx64
,
488 static_cast<unsigned long>(GetCurrentProcessId()),
489 static_cast<unsigned long>(GetCurrentThreadId()),
490 static_cast<unsigned long long>(counter
.QuadPart
));
491 pipename
[sizeof(pipename
) - 1] = '\0';
492 // Create a pipe so we can read stdout from the child process.
493 HANDLE hPipe
= CreateNamedPipe(pipename
,
494 PIPE_ACCESS_DUPLEX
|FILE_FLAG_OVERLAPPED
,
496 1, 4096, 4096, NMPWAIT_USE_DEFAULT_WAIT
,
499 if (hPipe
== INVALID_HANDLE_VALUE
) {
500 throw ReadError("CreateNamedPipe failed");
503 HANDLE hClient
= CreateFile(pipename
,
504 GENERIC_READ
|GENERIC_WRITE
, 0, NULL
,
506 FILE_FLAG_OVERLAPPED
, NULL
);
508 if (hClient
== INVALID_HANDLE_VALUE
) {
509 throw ReadError("CreateFile failed");
512 if (!ConnectNamedPipe(hPipe
, NULL
) &&
513 GetLastError() != ERROR_PIPE_CONNECTED
) {
514 throw ReadError("ConnectNamedPipe failed");
517 // Set the appropriate handles to be inherited by the child process.
518 SetHandleInformation(hClient
, HANDLE_FLAG_INHERIT
, 1);
520 // Create the child process.
521 PROCESS_INFORMATION procinfo
;
522 memset(&procinfo
, 0, sizeof(PROCESS_INFORMATION
));
524 STARTUPINFO startupinfo
;
525 memset(&startupinfo
, 0, sizeof(STARTUPINFO
));
526 startupinfo
.cb
= sizeof(STARTUPINFO
);
527 startupinfo
.hStdError
= GetStdHandle(STD_ERROR_HANDLE
);
528 startupinfo
.hStdOutput
= hClient
;
529 // FIXME: Is NULL the way to say "/dev/null"?
530 // It's what GetStdHandle() is documented to return if "an application does
531 // not have associated standard handles"...
532 startupinfo
.hStdInput
= fd_in
>= 0 ? (HANDLE
) _get_osfhandle(fd_in
) : NULL
;
533 startupinfo
.dwFlags
|= STARTF_USESTDHANDLES
;
536 // For some reason Windows wants a modifiable command line so we
537 // pass `&cmdline[0]` rather than `cmdline.c_str()`.
538 if (!CreateProcess(NULL
, &cmdline
[0],
540 &startupinfo
, &procinfo
)) {
541 if (GetLastError() == ERROR_FILE_NOT_FOUND
)
542 throw NoSuchFilter();
543 throw ReadError("CreateProcess failed");
546 CloseHandle(hClient
);
547 CloseHandle(procinfo
.hThread
);
548 HANDLE child
= procinfo
.hProcess
;
553 if (!ReadFile(hPipe
, buf
, sizeof(buf
), &received
, NULL
)) {
554 throw ReadError("ReadFile failed");
556 if (received
== 0) break;
558 if (out
) out
->append(buf
, received
);
562 WaitForSingleObject(child
, INFINITE
);
564 while (GetExitCodeProcess(child
, &rc
) && rc
== STILL_ACTIVE
) {
568 int status
= int(rc
);
569 if (status
== 0 || status
== alt_status
)
573 throw ReadError(status
);