Fix integer type used by ESet
[xapian.git] / xapian-core / include / xapian / stem.h
blob47ed9edfee15c554b2ac024ace0d5718b9fd8903
1 /** @file
2 * @brief stemming algorithms
3 */
4 /* Copyright (C) 2005,2007,2010,2011,2013,2014,2015,2018,2019 Olly Betts
5 * Copyright (C) 2010 Evgeny Sizikov
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #ifndef XAPIAN_INCLUDED_STEM_H
23 #define XAPIAN_INCLUDED_STEM_H
25 #if !defined XAPIAN_IN_XAPIAN_H && !defined XAPIAN_LIB_BUILD
26 # error Never use <xapian/stem.h> directly; include <xapian.h> instead.
27 #endif
29 #include <xapian/constinfo.h>
30 #include <xapian/intrusive_ptr.h>
31 #include <xapian/visibility.h>
33 #include <string>
35 namespace Xapian {
37 /// Class representing a stemming algorithm implementation.
38 class XAPIAN_VISIBILITY_DEFAULT StemImplementation
39 : public Xapian::Internal::intrusive_base
41 /// Don't allow assignment.
42 void operator=(const StemImplementation &);
44 /// Don't allow copying.
45 StemImplementation(const StemImplementation &);
47 public:
48 /// Default constructor.
49 StemImplementation() { }
51 /// Virtual destructor.
52 virtual ~StemImplementation();
54 /// Stem the specified word.
55 virtual std::string operator()(const std::string & word) = 0;
57 /// Return a string describing this object.
58 virtual std::string get_description() const = 0;
61 /// Class representing a stemming algorithm.
62 class XAPIAN_VISIBILITY_DEFAULT Stem {
63 public:
64 /// @private @internal Reference counted internals.
65 Xapian::Internal::intrusive_ptr<StemImplementation> internal;
67 /// Copy constructor.
68 Stem(const Stem & o);
70 /// Assignment.
71 Stem & operator=(const Stem & o);
73 #ifdef XAPIAN_MOVE_SEMANTICS
74 /// Move constructor.
75 Stem(Stem && o);
77 /// Move assignment operator.
78 Stem & operator=(Stem && o);
79 #endif
81 /** Construct a Xapian::Stem object which doesn't change terms.
83 * Equivalent to Stem("none").
85 Stem();
87 /** Construct a Xapian::Stem object for a particular language.
89 * @param language Either the English name for the language
90 * or the two letter ISO639 code.
92 * The following language names are understood (aliases follow the
93 * name):
95 * - none - don't stem terms
96 * - arabic (ar) - Since Xapian 1.3.5
97 * - armenian (hy) - Since Xapian 1.3.0
98 * - basque (eu) - Since Xapian 1.3.0
99 * - catalan (ca) - Since Xapian 1.3.0
100 * - danish (da)
101 * - dutch (nl)
102 * - english (en) - Martin Porter's 2002 revision of his stemmer
103 * - earlyenglish - Early English (e.g. Shakespeare, Dickens) stemmer
104 * (since Xapian 1.3.2)
105 * - english_lovins (lovins) - Lovin's stemmer
106 * - english_porter (porter) - Porter's stemmer as described in
107 * his 1980 paper
108 * - finnish (fi)
109 * - french (fr)
110 * - german (de)
111 * - german2 - Normalises umlauts and &szlig;
112 * - hungarian (hu)
113 * - indonesian (id) - Since Xapian 1.4.6
114 * - irish (ga) - Since Xapian 1.4.7
115 * - italian (it)
116 * - kraaij_pohlmann - A different Dutch stemmer
117 * - lithuanian (lt) - Since Xapian 1.4.7
118 * - nepali (ne) - Since Xapian 1.4.7
119 * - norwegian (nb, nn, no)
120 * - portuguese (pt)
121 * - romanian (ro)
122 * - russian (ru)
123 * - spanish (es)
124 * - swedish (sv)
125 * - tamil (ta) - Since Xapian 1.4.7
126 * - turkish (tr)
128 * @param fallback If true then treat unknown @a language as "none",
129 * otherwise an exception is thrown (default: false).
130 * Parameter added in Xapian 1.4.14 - older versions
131 * always threw an exception.
133 * @exception Xapian::InvalidArgumentError is thrown if
134 * @a language isn't recognised and @a fallback is false.
136 * @{
138 explicit Stem(const std::string& language);
139 Stem(const std::string& language, bool fallback);
140 /** @} */
142 /** Construct a Xapian::Stem object with a user-provided stemming algorithm.
144 * You can subclass Xapian::StemImplementation to implement your own
145 * stemming algorithm (or to wrap a third-party algorithm) and then wrap
146 * your implementation in a Xapian::Stem object to pass to the Xapian API.
148 * @param p The user-subclassed StemImplementation object. This
149 * is reference counted, and so will be automatically
150 * deleted by the Xapian::Stem wrapper when no longer
151 * required.
153 explicit Stem(StemImplementation * p);
155 /// Destructor.
156 ~Stem();
158 /** Stem a word.
160 * @param word a word to stem.
161 * @return the stem
163 std::string operator()(const std::string &word) const;
165 /// Return true if this is a no-op stemmer.
166 bool is_none() const { return !internal.get(); }
168 /// Return a string describing this object.
169 std::string get_description() const;
171 /** Return a list of available languages.
173 * Each stemmer is only included once in the list (not once for
174 * each alias). The name included is the English name of the
175 * language.
177 * The list is returned as a string, with language names separated by
178 * spaces. This is a static method, so a Xapian::Stem object is not
179 * required for this operation.
181 static std::string get_available_languages() {
182 const struct Xapian::Internal::constinfo * info =
183 Xapian::Internal::get_constinfo_();
184 return std::string(info->stemmer_data, info->stemmer_name_len);
190 #endif // XAPIAN_INCLUDED_STEM_H