2 * @brief Base class for implementations of stemming algorithms
4 /* Copyright (C) 2007,2009,2010,2016 Olly Betts
5 * Copyright (C) 2010 Evgeny Sizikov
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #ifndef XAPIAN_INCLUDED_STEMINTERNAL_H
23 #define XAPIAN_INCLUDED_STEMINTERNAL_H
25 #include <xapian/stem.h>
27 #include "alignment_cast.h"
32 typedef unsigned char symbol
;
34 #define HEAD (2 * sizeof(int))
36 typedef int (*among_function
)(Xapian::StemImplementation
*);
39 int s_size
; /* length of search string (in symbols) */
40 unsigned s
; /* offset in pool to search string */
41 int substring_i
; /* index to longest matching substring */
42 int result
; /* result of the lookup */
45 inline void lose_s(symbol
* p
) {
46 if (p
) std::free(reinterpret_cast<char *>(p
) - HEAD
);
51 class SnowballStemImplementation
: public StemImplementation
{
56 int c
, l
, lb
, bra
, ket
;
61 return alignment_cast
<const int *>(p
)[-1];
65 SET_SIZE(symbol
* p
, int n
)
67 alignment_cast
<int *>(p
)[-1] = n
;
71 CAPACITY(const symbol
* p
)
73 return alignment_cast
<const int *>(p
)[-2];
77 SET_CAPACITY(symbol
* p
, int n
)
79 alignment_cast
<int *>(p
)[-2] = n
;
82 static int skip_utf8(const symbol
* p
, int c
, int lb
, int l
, int n
);
84 static symbol
* increase_size(symbol
* p
, int n
);
86 static symbol
* create_s();
88 int get_utf8(int * slot
);
89 int get_b_utf8(int * slot
);
91 int in_grouping_U(const unsigned char * s
, int min
, int max
, int repeat
);
92 int in_grouping_b_U(const unsigned char * s
, int min
, int max
, int repeat
);
93 int out_grouping_U(const unsigned char * s
, int min
, int max
, int repeat
);
94 int out_grouping_b_U(const unsigned char * s
, int min
, int max
, int repeat
);
96 int eq_s(int s_size
, const symbol
* s
);
97 int eq_s_b(int s_size
, const symbol
* s
);
98 int eq_v(const symbol
* v
) { return eq_s(SIZE(v
), v
); }
99 int eq_v_b(const symbol
* v
) { return eq_s_b(SIZE(v
), v
); }
101 int find_among(const symbol
*pool
, const struct among
* v
, int v_size
,
102 const unsigned char * fnum
, const among_function
* f
);
103 int find_among_b(const symbol
*pool
, const struct among
* v
, int v_size
,
104 const unsigned char * fnum
, const among_function
* f
);
106 int replace_s(int c_bra
, int c_ket
, int s_size
, const symbol
* s
);
107 int slice_from_s(int s_size
, const symbol
* s
);
108 int slice_from_v(const symbol
* v
) { return slice_from_s(SIZE(v
), v
); }
110 int slice_del() { return slice_from_s(0, 0); }
112 void insert_s(int c_bra
, int c_ket
, int s_size
, const symbol
* s
);
113 void insert_v(int c_bra
, int c_ket
, const symbol
* v
) {
114 insert_s(c_bra
, c_ket
, SIZE(v
), v
);
117 symbol
* slice_to(symbol
* v
);
118 symbol
* assign_to(symbol
* v
);
120 int len_utf8(const symbol
* v
);
123 void debug(int number
, int line_count
);
127 /// Perform initialisation common to all Snowball stemmers.
128 SnowballStemImplementation()
129 : p(create_s()), c(0), l(0), lb(0), bra(0), ket(0) { }
131 /// Perform cleanup common to all Snowball stemmers.
132 virtual ~SnowballStemImplementation();
134 /// Stem the specified word.
135 virtual std::string
operator()(const std::string
& word
);
137 /// Virtual method implemented by the subclass to actually do the work.
138 virtual int stem() = 0;
143 #endif // XAPIAN_INCLUDED_STEMINTERNAL_H