Fix tg_termpos1 for 64-bit termpos
[xapian.git] / xapian-applications / omega / worker.h
blobae935723bb864b15d5a0b91baa452806d6503261
1 /** @file
2 * @brief Class representing worker process.
3 */
4 /* Copyright (C) 2011,2019,2022,2023 Olly Betts
5 * Copyright (C) 2019 Bruno Baruffaldi
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20 * USA
23 #include <cstdio>
24 #include <string>
25 #include <sys/types.h>
27 #ifdef __WIN32__
28 # include "safewindows.h"
29 #endif
31 /** An object to communicate with the assistant process
33 * It is possible that an external library contain errors that can cause omindex
34 * termination or blocking indexing. For that reason, it is used a subprocess
35 * 'assistant' that use the external library and communicate the results. This
36 * way, library bugs are isolated and they cannot damage omindex.
38 * Each worker is associated to a particular assistant.
40 class Worker {
41 /// Workers ignore SIGPIPE.
42 static bool ignoring_sigpipe;
44 /// PID of the assistant process.
45 #ifndef __WIN32__
46 pid_t child;
47 #else
48 HANDLE child;
49 #endif
51 /** Socket for supporting communication between the worker
52 * and its assistant.
54 std::FILE* sockt = NULL;
56 /** Pathname of the assistant program.
58 * Set to empty on hard failure so we can hard fail right away if retried
59 * via a different mimemap entry.
61 std::string filter_module;
63 /** Prefix to add to error messages.
65 * This is the leafname of the assistant program followed by ": ".
67 std::string error_prefix;
69 /** This method creates the assistant subprocess.
71 * Return a negative or 0 or positive integer with the same semantics as
72 * the extract() method's return value.
74 int start_worker_subprocess();
76 /// In case of failure, an error message will be write in it
77 std::string error;
79 public:
80 /** Construct a Worker.
82 * @param path Path to the assistant process.
84 * The assistant will not be started until it is necessary.
86 Worker(const std::string& path)
87 : filter_module(path) { }
89 /** Extract information from a file through the assistant process.
91 * This methods check whether its assistant process is alive and start it
92 * if it is necessary.
94 * @param filename Path to the file.
95 * @param mimetype Mimetype of the file.
96 * @param[out] dump Any body text.
97 * @param[out] title The title of the document.
98 * @param[out] keyword Any keywords.
99 * @param[out] author The author(s).
100 * @param[out] to Direct recipients (To: in email).
101 * @param[out] cc Additional recipients (Cc: in email).
102 * @param[out] bcc Hidden recipients (Bcc: in email).
103 * @param[out] message_id Message identifier (Message-Id: in email).
104 * @param[out] pages The number of pages (-1 if unknown).
105 * @param[out] created Created timestamp as time_t (-1 if unknown).
107 * @return 0 on success.
109 * Negative integer for a hard error (e.g. we fail to find the
110 * worker binary to run) - there's no point trying the same filter
111 * again in this run.
113 * Positive integer for a failure which is likely specific to the
114 * specified input file.
116 * Note: If it is not possible to get some information, the corresponding
117 * variable will hold an empty string. This situation is not considered
118 * to be an error.
120 int extract(const std::string& filename,
121 const std::string& mimetype,
122 std::string& dump,
123 std::string& title,
124 std::string& keywords,
125 std::string& author,
126 std::string& to,
127 std::string& cc,
128 std::string& bcc,
129 std::string& message_id,
130 int& pages,
131 time_t& created);
133 /** Returns an error message if the extraction fails, or an empty string
134 * if everything is okay.
136 std::string get_error() const {
137 return error;