2 * @brief Run an external filter and capture its output in a std::string.
4 /* Copyright (C) 2003-2024 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 #include "runfilter.h"
29 #include <sys/types.h>
30 #include "safefcntl.h"
35 #ifdef HAVE_SYS_TIME_H
36 # include <sys/time.h>
38 #ifdef HAVE_SYS_RESOURCE_H
39 # include <sys/resource.h>
41 #include "safesysselect.h"
42 #include "safesyssocket.h"
43 #include "safesyswait.h"
44 #include "safeunistd.h"
46 #if defined HAVE_FORK && defined HAVE_SOCKETPAIR
50 #include "closefrom.h"
53 #include "stringutils.h"
56 # include "append_filename_arg.h"
62 static int devnull
= -1;
65 #if defined HAVE_FORK && defined HAVE_SOCKETPAIR
67 command_needs_shell(const char * p
)
70 // Probably overly conservative, but suitable for
72 if (strchr("!\"#$&()*;<>?[\\]^`{|}~", *p
) != NULL
) {
80 unquote(string
& s
, size_t & j
)
88 j
= s
.find('\'', j
+ 1);
90 // Unmatched ' in command string.
91 // dash exits 2 in this case, bash exits 1.
92 throw ReadError(2 << 8);
94 // Replace four character sequence '\'' with ' - this is
95 // how a single quote inside single quotes gets escaped.
96 if (s
[j
+ 1] != '\\' ||
103 if (j
+ 1 != s
.size()) {
105 if (ch
!= ' ' && ch
!= '\t' && ch
!= '\n') {
106 // Handle the expansion of e.g.: --input=%f,html
113 j
= s
.find_first_of(" \t\n'", j
+ 1);
114 // Handle the expansion of e.g.: --input=%f
115 if (j
!= s
.npos
&& s
[j
] == '\'') goto single_quoted
;
123 static pid_t pid_to_kill_on_signal
;
125 #ifdef HAVE_SIGACTION
126 static struct sigaction old_hup_handler
;
127 static struct sigaction old_int_handler
;
128 static struct sigaction old_quit_handler
;
129 static struct sigaction old_term_handler
;
134 handle_signal(int signum
)
136 if (pid_to_kill_on_signal
) {
137 kill(pid_to_kill_on_signal
, SIGKILL
);
138 pid_to_kill_on_signal
= 0;
142 sigaction(signum
, &old_hup_handler
, NULL
);
145 sigaction(signum
, &old_int_handler
, NULL
);
148 sigaction(signum
, &old_quit_handler
, NULL
);
151 sigaction(signum
, &old_term_handler
, NULL
);
162 runfilter_init_signal_handlers_()
165 sa
.sa_handler
= handle_signal
;
166 sigemptyset(&sa
.sa_mask
);
169 sigaction(SIGHUP
, &sa
, &old_hup_handler
);
170 sigaction(SIGINT
, &sa
, &old_int_handler
);
171 sigaction(SIGQUIT
, &sa
, &old_quit_handler
);
172 sigaction(SIGTERM
, &sa
, &old_term_handler
);
175 static sighandler_t old_hup_handler
;
176 static sighandler_t old_int_handler
;
177 static sighandler_t old_quit_handler
;
178 static sighandler_t old_term_handler
;
183 handle_signal(int signum
)
185 if (pid_to_kill_on_signal
) {
186 kill(pid_to_kill_on_signal
, SIGKILL
);
187 pid_to_kill_on_signal
= 0;
191 signal(signum
, old_hup_handler
);
194 signal(signum
, old_int_handler
);
197 signal(signum
, old_quit_handler
);
200 signal(signum
, old_term_handler
);
211 runfilter_init_signal_handlers_()
213 old_hup_handler
= signal(SIGHUP
, handle_signal
);
214 old_int_handler
= signal(SIGINT
, handle_signal
);
215 old_quit_handler
= signal(SIGQUIT
, handle_signal
);
216 old_term_handler
= signal(SIGTERM
, handle_signal
);
221 command_needs_shell(const char *)
223 // We don't try to avoid the shell on this platform, so don't waste time
224 // analysing commands to see if they could.
229 runfilter_init_signal_handlers_()
237 runfilter_init_signal_handlers_();
239 devnull
= open("/dev/null", O_WRONLY
);
241 cerr
<< "Failed to open /dev/null: " << strerror(errno
) << endl
;
244 // Ensure that devnull isn't fd 0, 1 or 2 (stdin, stdout or stderr) and
245 // that we have open fds for stdin, stdout and stderr. This simplifies the
246 // code after fork() because it doesn't need to worry about such corner
248 while (devnull
<= 2) {
249 devnull
= dup(devnull
);
255 run_filter(int fd_in
, const char* const cmd
[], string
* out
, int alt_status
)
257 #if defined HAVE_FORK && defined HAVE_SOCKETPAIR
258 // We want to be able to get the exit status of the child process.
259 signal(SIGCHLD
, SIG_DFL
);
262 if (socketpair(AF_UNIX
, SOCK_STREAM
, PF_UNSPEC
, fds
) < 0)
263 throw ReadError("socketpair failed");
264 // Ensure fds[1] != 0 to simplify handling in child process.
265 if (rare(fds
[1] == 0)) swap(fds
[0], fds
[1]);
267 pid_t child
= fork();
269 // We're the child process.
272 // Put the child process into its own process group, so that we can
273 // easily kill it and any children it in turn forks if we need to.
277 // Close the parent's side of the socket pair.
281 // Connect piped input to stdin if it's not already fd 0.
288 // Connect stdout to our side of the socket pair.
291 // Close extraneous file descriptors (but leave stderr alone).
294 #ifdef HAVE_SETRLIMIT
295 // Impose some pretty generous resource limits to prevent run-away
296 // filter programs from causing problems.
298 // Limit CPU time to 300 seconds (5 minutes).
299 struct rlimit cpu_limit
= { 300, RLIM_INFINITY
};
300 setrlimit(RLIMIT_CPU
, &cpu_limit
);
302 #if defined RLIMIT_AS || defined RLIMIT_VMEM || defined RLIMIT_DATA
303 // Limit process data to free physical memory.
304 long mem
= get_free_physical_memory();
306 struct rlimit ram_limit
= {
307 static_cast<rlim_t
>(mem
),
310 // FIXME: setrlimit() is not listed in signal-safety(7) as safe to
311 // call between fork() and exec...
313 setrlimit(RLIMIT_AS
, &ram_limit
);
314 #elif defined RLIMIT_VMEM
315 setrlimit(RLIMIT_VMEM
, &ram_limit
);
317 // Only limits the data segment rather than the total address
318 // space, but that's better than nothing.
319 setrlimit(RLIMIT_DATA
, &ram_limit
);
325 execvp(cmd
[0], const_cast<char **>(cmd
));
326 // Emulate shell behaviour and exit with status 127 if the command
327 // isn't found, and status 126 for other problems. In particular, we
328 // rely on 127 below to throw NoSuchFilter.
329 _exit(errno
== ENOENT
? 127 : 126);
332 // We're the parent process.
334 pid_to_kill_on_signal
= -child
;
336 pid_to_kill_on_signal
= child
;
339 // Close the child's side of the socket pair.
344 throw ReadError("fork failed");
352 // If we wait 300 seconds (5 minutes) without getting data from the
353 // filter, then give up to avoid waiting forever for a filter which
354 // has ended up blocked waiting for something which will never happen.
358 FD_SET(fd
, &readfds
);
359 int r
= select(fd
+ 1, &readfds
, NULL
, NULL
, &tv
);
362 if (errno
== EINTR
|| errno
== EAGAIN
) {
363 // select() interrupted by a signal, so retry.
366 cerr
<< "Reading from filter failed (" << strerror(errno
) << ")"
369 cerr
<< "Filter inactive for too long" << endl
;
372 kill(-child
, SIGKILL
);
374 kill(child
, SIGKILL
);
378 while (waitpid(child
, &status
, 0) < 0 && errno
== EINTR
) { }
379 pid_to_kill_on_signal
= 0;
380 throw ReadError(status
);
384 ssize_t res
= read(fd
, buf
, sizeof(buf
));
387 if (errno
== EINTR
) {
388 // read() interrupted by a signal, so retry.
393 kill(-child
, SIGKILL
);
396 while (waitpid(child
, &status
, 0) < 0 && errno
== EINTR
) { }
397 pid_to_kill_on_signal
= 0;
398 throw ReadError(status
);
400 if (out
) out
->append(buf
, res
);
405 kill(-child
, SIGKILL
);
408 while (waitpid(child
, &status
, 0) < 0) {
410 throw ReadError("wait pid failed");
412 pid_to_kill_on_signal
= 0;
414 if (WIFEXITED(status
)) {
415 int exit_status
= WEXITSTATUS(status
);
416 if (exit_status
== 0 || exit_status
== alt_status
)
418 if (exit_status
== 127)
419 throw NoSuchFilter();
422 if (WIFSIGNALED(status
) && WTERMSIG(status
) == SIGXCPU
) {
423 cerr
<< "Filter process consumed too much CPU time" << endl
;
427 LARGE_INTEGER counter
;
428 // QueryPerformanceCounter() will always succeed on XP and later
429 // and gives us a counter which increments each CPU clock cycle
430 // on modern hardware (Pentium or newer).
431 QueryPerformanceCounter(&counter
);
433 snprintf(pipename
, sizeof(pipename
),
434 "\\\\.\\pipe\\xapian-omega-filter-%lx-%lx_%" PRIx64
,
435 static_cast<unsigned long>(GetCurrentProcessId()),
436 static_cast<unsigned long>(GetCurrentThreadId()),
437 static_cast<unsigned long long>(counter
.QuadPart
));
438 pipename
[sizeof(pipename
) - 1] = '\0';
439 // Create a pipe so we can read stdout from the child process.
440 HANDLE hPipe
= CreateNamedPipe(pipename
,
441 PIPE_ACCESS_DUPLEX
|FILE_FLAG_OVERLAPPED
,
443 1, 4096, 4096, NMPWAIT_USE_DEFAULT_WAIT
,
446 if (hPipe
== INVALID_HANDLE_VALUE
) {
447 throw ReadError("CreateNamedPipe failed");
450 HANDLE hClient
= CreateFile(pipename
,
451 GENERIC_READ
|GENERIC_WRITE
, 0, NULL
,
453 FILE_FLAG_OVERLAPPED
, NULL
);
455 if (hClient
== INVALID_HANDLE_VALUE
) {
456 throw ReadError("CreateFile failed");
459 if (!ConnectNamedPipe(hPipe
, NULL
) &&
460 GetLastError() != ERROR_PIPE_CONNECTED
) {
461 throw ReadError("ConnectNamedPipe failed");
464 // Set the appropriate handles to be inherited by the child process.
465 SetHandleInformation(hClient
, HANDLE_FLAG_INHERIT
, 1);
467 // Create the child process.
468 PROCESS_INFORMATION procinfo
;
469 memset(&procinfo
, 0, sizeof(PROCESS_INFORMATION
));
471 STARTUPINFO startupinfo
;
472 memset(&startupinfo
, 0, sizeof(STARTUPINFO
));
473 startupinfo
.cb
= sizeof(STARTUPINFO
);
474 startupinfo
.hStdError
= GetStdHandle(STD_ERROR_HANDLE
);
475 startupinfo
.hStdOutput
= hClient
;
476 // FIXME: Is NULL the way to say "/dev/null"?
477 // It's what GetStdHandle() is documented to return if "an application does
478 // not have associated standard handles"...
479 startupinfo
.hStdInput
= fd_in
>= 0 ? (HANDLE
) _get_osfhandle(fd_in
) : NULL
;
480 startupinfo
.dwFlags
|= STARTF_USESTDHANDLES
;
483 for (auto i
= cmd
; *i
; ++i
) {
484 append_filename_argument(cmdline
, *i
, (i
!= cmd
));
486 // For some reason Windows wants a modifiable command line so we
487 // pass `&cmdline[0]` rather than `cmdline.c_str()`.
488 if (!CreateProcess(NULL
, &cmdline
[0],
490 &startupinfo
, &procinfo
)) {
491 if (GetLastError() == ERROR_FILE_NOT_FOUND
)
492 throw NoSuchFilter();
493 throw ReadError("CreateProcess failed");
496 CloseHandle(hClient
);
497 CloseHandle(procinfo
.hThread
);
498 HANDLE child
= procinfo
.hProcess
;
503 if (!ReadFile(hPipe
, buf
, sizeof(buf
), &received
, NULL
)) {
504 throw ReadError("ReadFile failed");
506 if (received
== 0) break;
508 if (out
) out
->append(buf
, received
);
512 WaitForSingleObject(child
, INFINITE
);
514 while (GetExitCodeProcess(child
, &rc
) && rc
== STILL_ACTIVE
) {
518 int status
= int(rc
);
519 if (status
== 0 || status
== alt_status
)
523 throw ReadError(status
);
527 run_filter(int fd_in
, const string
& cmd
, bool use_shell
, string
* out
,
530 #if defined HAVE_FORK && defined HAVE_SOCKETPAIR
531 // We want to be able to get the exit status of the child process.
532 signal(SIGCHLD
, SIG_DFL
);
535 if (socketpair(AF_UNIX
, SOCK_STREAM
, PF_UNSPEC
, fds
) < 0)
536 throw ReadError("socketpair failed");
537 // Ensure fds[1] != 0 to simplify handling in child process.
538 if (rare(fds
[1] == 0)) swap(fds
[0], fds
[1]);
541 vector
<const char *> argv
;
542 vector
<pair
<const char *, const char*>> env
;
543 vector
<pair
<int, int>> dups
;
545 // Parse the command line before we fork() it's not safe to call
546 // malloc() between fork() and exec and std::string and std::vector
547 // creation is likely to need to allocate memory.
549 // FIXME: Maybe we should do this once per command and cache the
552 // Handle any environment variable assignments.
553 // Name must start with alpha or '_', contain only alphanumerics and
554 // '_', and there must be no quoting of either the name or the '='.
557 j
= s
.find_first_not_of(" \t\n", j
);
558 if (!(C_isalpha(s
[j
]) || s
[j
] == '_')) break;
560 do ++j
; while (C_isalnum(s
[j
]) || s
[j
] == '_');
569 env
.emplace_back(&s
[i
], &s
[eq
+ 1]);
570 j
= s
.find_first_not_of(" \t\n", j
);
574 size_t i
= s
.find_first_not_of(" \t\n", j
);
575 if (i
== string::npos
) break;
576 bool quoted
= unquote(s
, j
);
577 const char * word
= s
.c_str() + i
;
579 // Handle simple cases of redirection.
580 if (strcmp(word
, ">/dev/null") == 0) {
581 dups
.emplace_back(devnull
, 1);
584 if (strcmp(word
, "2>/dev/null") == 0) {
585 dups
.emplace_back(devnull
, 2);
588 if (strcmp(word
, "2>&1") == 0) {
589 dups
.emplace_back(1, 2);
592 if (strcmp(word
, "1>&2") == 0) {
593 dups
.emplace_back(2, 1);
597 argv
.push_back(word
);
599 if (argv
.empty()) return; // Empty command!
600 argv
.push_back(NULL
);
603 pid_t child
= fork();
605 // We're the child process.
608 // Put the child process into its own process group, so that we can
609 // easily kill it and any children it in turn forks if we need to.
613 // Close the parent's side of the socket pair.
617 // Connect piped input to stdin if it's not already fd 0.
624 // Connect stdout to our side of the socket pair.
627 // Close extraneous file descriptors (but leave stderr alone).
630 #ifdef HAVE_SETRLIMIT
631 // Impose some pretty generous resource limits to prevent run-away
632 // filter programs from causing problems.
634 // Limit CPU time to 300 seconds (5 minutes).
635 struct rlimit cpu_limit
= { 300, RLIM_INFINITY
};
636 setrlimit(RLIMIT_CPU
, &cpu_limit
);
638 #if defined RLIMIT_AS || defined RLIMIT_VMEM || defined RLIMIT_DATA
639 // Limit process data to free physical memory.
640 long mem
= get_free_physical_memory();
642 struct rlimit ram_limit
= {
643 static_cast<rlim_t
>(mem
),
646 // FIXME: setrlimit() is not listed in signal-safety(7) as safe to
647 // call between fork() and exec...
649 setrlimit(RLIMIT_AS
, &ram_limit
);
650 #elif defined RLIMIT_VMEM
651 setrlimit(RLIMIT_VMEM
, &ram_limit
);
653 // Only limits the data segment rather than the total address
654 // space, but that's better than nothing.
655 setrlimit(RLIMIT_DATA
, &ram_limit
);
662 execl("/bin/sh", "/bin/sh", "-c", cmd
.c_str(), (void*)NULL
);
666 // Process any environment variable assignments.
667 for (auto& e
: env
) {
668 setenv(e
.first
, e
.second
, 1);
671 // Process any redirections.
672 for (auto& d
: dups
) {
673 dup2(d
.first
, d
.second
);
676 execvp(argv
[0], const_cast<char **>(&argv
[0]));
677 // Emulate shell behaviour and exit with status 127 if the command
678 // isn't found, and status 126 for other problems. In particular, we
679 // rely on 127 below to throw NoSuchFilter.
680 _exit(errno
== ENOENT
? 127 : 126);
683 // We're the parent process.
685 pid_to_kill_on_signal
= -child
;
687 pid_to_kill_on_signal
= child
;
690 // Close the child's side of the socket pair.
695 throw ReadError("fork failed");
703 // If we wait 300 seconds (5 minutes) without getting data from the
704 // filter, then give up to avoid waiting forever for a filter which
705 // has ended up blocked waiting for something which will never happen.
709 FD_SET(fd
, &readfds
);
710 int r
= select(fd
+ 1, &readfds
, NULL
, NULL
, &tv
);
713 if (errno
== EINTR
|| errno
== EAGAIN
) {
714 // select() interrupted by a signal, so retry.
717 cerr
<< "Reading from filter failed (" << strerror(errno
) << ")"
720 cerr
<< "Filter inactive for too long" << endl
;
723 kill(-child
, SIGKILL
);
725 kill(child
, SIGKILL
);
729 while (waitpid(child
, &status
, 0) < 0 && errno
== EINTR
) { }
730 pid_to_kill_on_signal
= 0;
731 throw ReadError(status
);
735 ssize_t res
= read(fd
, buf
, sizeof(buf
));
738 if (errno
== EINTR
) {
739 // read() interrupted by a signal, so retry.
744 kill(-child
, SIGKILL
);
747 while (waitpid(child
, &status
, 0) < 0 && errno
== EINTR
) { }
748 pid_to_kill_on_signal
= 0;
749 throw ReadError(status
);
751 if (out
) out
->append(buf
, res
);
756 kill(-child
, SIGKILL
);
759 while (waitpid(child
, &status
, 0) < 0) {
761 throw ReadError("wait pid failed");
763 pid_to_kill_on_signal
= 0;
765 if (WIFEXITED(status
)) {
766 int exit_status
= WEXITSTATUS(status
);
767 if (exit_status
== 0 || exit_status
== alt_status
)
769 if (exit_status
== 127)
770 throw NoSuchFilter();
773 if (WIFSIGNALED(status
) && WTERMSIG(status
) == SIGXCPU
) {
774 cerr
<< "Filter process consumed too much CPU time" << endl
;
779 LARGE_INTEGER counter
;
780 // QueryPerformanceCounter() will always succeed on XP and later
781 // and gives us a counter which increments each CPU clock cycle
782 // on modern hardware (Pentium or newer).
783 QueryPerformanceCounter(&counter
);
785 snprintf(pipename
, sizeof(pipename
),
786 "\\\\.\\pipe\\xapian-omega-filter-%lx-%lx_%" PRIx64
,
787 static_cast<unsigned long>(GetCurrentProcessId()),
788 static_cast<unsigned long>(GetCurrentThreadId()),
789 static_cast<unsigned long long>(counter
.QuadPart
));
790 pipename
[sizeof(pipename
) - 1] = '\0';
791 // Create a pipe so we can read stdout from the child process.
792 HANDLE hPipe
= CreateNamedPipe(pipename
,
793 PIPE_ACCESS_DUPLEX
|FILE_FLAG_OVERLAPPED
,
795 1, 4096, 4096, NMPWAIT_USE_DEFAULT_WAIT
,
798 if (hPipe
== INVALID_HANDLE_VALUE
) {
799 throw ReadError("CreateNamedPipe failed");
802 HANDLE hClient
= CreateFile(pipename
,
803 GENERIC_READ
|GENERIC_WRITE
, 0, NULL
,
805 FILE_FLAG_OVERLAPPED
, NULL
);
807 if (hClient
== INVALID_HANDLE_VALUE
) {
808 throw ReadError("CreateFile failed");
811 if (!ConnectNamedPipe(hPipe
, NULL
) &&
812 GetLastError() != ERROR_PIPE_CONNECTED
) {
813 throw ReadError("ConnectNamedPipe failed");
816 // Set the appropriate handles to be inherited by the child process.
817 SetHandleInformation(hClient
, HANDLE_FLAG_INHERIT
, 1);
819 // Create the child process.
820 PROCESS_INFORMATION procinfo
;
821 memset(&procinfo
, 0, sizeof(PROCESS_INFORMATION
));
823 STARTUPINFO startupinfo
;
824 memset(&startupinfo
, 0, sizeof(STARTUPINFO
));
825 startupinfo
.cb
= sizeof(STARTUPINFO
);
826 startupinfo
.hStdError
= GetStdHandle(STD_ERROR_HANDLE
);
827 startupinfo
.hStdOutput
= hClient
;
828 // FIXME: Is NULL the way to say "/dev/null"?
829 // It's what GetStdHandle() is documented to return if "an application does
830 // not have associated standard handles"...
831 startupinfo
.hStdInput
= fd_in
>= 0 ? (HANDLE
) _get_osfhandle(fd_in
) : NULL
;
832 startupinfo
.dwFlags
|= STARTF_USESTDHANDLES
;
835 // For some reason Windows wants a modifiable command line so we
836 // pass `&cmdline[0]` rather than `cmdline.c_str()`.
837 if (!CreateProcess(NULL
, &cmdline
[0],
839 &startupinfo
, &procinfo
)) {
840 if (GetLastError() == ERROR_FILE_NOT_FOUND
)
841 throw NoSuchFilter();
842 throw ReadError("CreateProcess failed");
845 CloseHandle(hClient
);
846 CloseHandle(procinfo
.hThread
);
847 HANDLE child
= procinfo
.hProcess
;
852 if (!ReadFile(hPipe
, buf
, sizeof(buf
), &received
, NULL
)) {
853 throw ReadError("ReadFile failed");
855 if (received
== 0) break;
857 if (out
) out
->append(buf
, received
);
861 WaitForSingleObject(child
, INFINITE
);
863 while (GetExitCodeProcess(child
, &rc
) && rc
== STILL_ACTIVE
) {
867 int status
= int(rc
);
868 if (status
== 0 || status
== alt_status
)
872 throw ReadError(status
);