2 * @brief Handle indexing a document from a file
4 /* Copyright 1999,2000,2001 BrightStation PLC
5 * Copyright 2001,2005 James Aylett
6 * Copyright 2001,2002 Ananova Ltd
7 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2017 Olly Betts
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License as
11 * published by the Free Software Foundation; either version 2 of the
12 * License, or (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
25 #ifndef OMEGA_INCLUDED_INDEX_FILE_H
26 #define OMEGA_INCLUDED_INDEX_FILE_H
28 #include <sys/types.h>
33 class DirectoryIterator
;
35 enum skip_flags
{ SKIP_VERBOSE_ONLY
= 0x01, SKIP_SHOW_FILENAME
= 0x02 };
37 enum empty_body_type
{
38 EMPTY_BODY_WARN
, EMPTY_BODY_INDEX
, EMPTY_BODY_SKIP
41 enum dup_action_type
{
42 DUP_SKIP
, DUP_CHECK_LAZILY
45 // Commands which take a filename as the last argument, and output UTF-8
46 // text or some other mime type are common, so we handle these with a std::map.
49 std::string output_type
;
50 std::string output_charset
;
52 Filter() : cmd(), output_type(), no_shell(false) { }
53 explicit Filter(const std::string
& cmd_
, bool use_shell_
= true)
54 : cmd(cmd_
), output_type(), no_shell(!use_shell_
) { }
55 Filter(const std::string
& cmd_
, const std::string
& output_type_
,
56 bool use_shell_
= true)
57 : cmd(cmd_
), output_type(output_type_
), no_shell(!use_shell_
) { }
58 Filter(const std::string
& cmd_
, const std::string
& output_type_
,
59 const std::string
& output_charset_
,
60 bool use_shell_
= true)
61 : cmd(cmd_
), output_type(output_type_
),
62 output_charset(output_charset_
), no_shell(!use_shell_
) { }
63 bool use_shell() const { return !no_shell
; }
66 extern std::map
<std::string
, Filter
> commands
;
69 index_command(const std::string
& type
, const Filter
& filter
)
71 commands
[type
] = filter
;
75 index_command(const char * type
, const Filter
& filter
)
77 commands
[type
] = filter
;
81 skip(const std::string
& urlterm
, const std::string
& context
,
82 const std::string
& msg
,
83 off_t size
, time_t last_mod
, unsigned flags
= 0);
85 /// Call index_command() to set up the default command filters.
87 index_add_default_filters();
91 index_init(const std::string
& dbpath
, const Xapian::Stem
& stemmer
,
92 const std::string
& root_
,
93 const std::string
& site_term_
, const std::string
& host_term_
,
94 empty_body_type empty_body_
, dup_action_type dup_action_
,
95 size_t sample_size_
, size_t title_size_
,
97 bool overwrite
, bool retry_failed_
,
98 bool delete_removed_documents
, bool verbose_
, bool use_ctime_
,
99 bool spelling
, bool ignore_exclusions_
, bool description_as_sample
);
102 index_remove_failed_entry(const std::string
& urlterm
);
105 index_add_document(const std::string
& urlterm
, time_t last_altered
,
106 Xapian::docid did
, const Xapian::Document
& doc
);
108 /// Index a file into the database.
110 index_mimetype(const std::string
& file
, const std::string
& urlterm
,
111 const std::string
& url
,
112 const std::string
& ext
,
113 const std::string
&mimetype
, DirectoryIterator
&d
,
114 Xapian::Document
&doc
,
117 /// Delete any previously indexed documents we haven't seen.
118 void index_handle_deletion();
120 /// Commit any pending changes.
123 /// Clean up and release any resources, etc.
126 #endif // OMEGA_INCLUDED_INDEX_FILE_H