Check-in hunspell source code.
[chromium-blink-merge.git] / third_party / hunspell_new / src / hunspell / affixmgr.hxx
blob79345dfe199c75221a7b3165087e8326703eec13
1 #ifndef _AFFIXMGR_HXX_
2 #define _AFFIXMGR_HXX_
4 #include "hunvisapi.h"
6 #include <stdio.h>
8 #include "atypes.hxx"
9 #include "baseaffix.hxx"
10 #include "hashmgr.hxx"
11 #include "phonet.hxx"
12 #include "replist.hxx"
14 // check flag duplication
15 #define dupSFX (1 << 0)
16 #define dupPFX (1 << 1)
18 class PfxEntry;
19 class SfxEntry;
21 #ifdef HUNSPELL_CHROME_CLIENT
23 #include <vector>
25 // This class provides an implementation of the contclasses array in AffixMgr
26 // that is normally a large static array. We should almost never need more than
27 // 256 elements, so this class only allocates that much to start off with. If
28 // elements higher than that are actually used, we'll automatically expand.
29 class ContClasses {
30 public:
31 ContClasses() {
32 // Pre-allocate a buffer so that typically, we'll never have to resize.
33 EnsureSizeIs(256);
36 char& operator[](size_t index) {
37 EnsureSizeIs(index + 1);
38 return data[index];
41 void EnsureSizeIs(size_t new_size) {
42 if (data.size() >= new_size)
43 return; // Nothing to do.
45 size_t old_size = data.size();
46 data.resize(new_size);
47 memset(&data[old_size], 0, new_size - old_size);
50 std::vector<char> data;
53 #endif // HUNSPELL_CHROME_CLIENT
55 class LIBHUNSPELL_DLL_EXPORTED AffixMgr
58 PfxEntry * pStart[SETSIZE];
59 SfxEntry * sStart[SETSIZE];
60 PfxEntry * pFlag[SETSIZE];
61 SfxEntry * sFlag[SETSIZE];
62 HashMgr * pHMgr;
63 HashMgr ** alldic;
64 int * maxdic;
65 char * keystring;
66 char * trystring;
67 char * encoding;
68 struct cs_info * csconv;
69 int utf8;
70 int complexprefixes;
71 FLAG compoundflag;
72 FLAG compoundbegin;
73 FLAG compoundmiddle;
74 FLAG compoundend;
75 FLAG compoundroot;
76 FLAG compoundforbidflag;
77 FLAG compoundpermitflag;
78 int compoundmoresuffixes;
79 int checkcompounddup;
80 int checkcompoundrep;
81 int checkcompoundcase;
82 int checkcompoundtriple;
83 int simplifiedtriple;
84 FLAG forbiddenword;
85 FLAG nosuggest;
86 FLAG nongramsuggest;
87 FLAG needaffix;
88 int cpdmin;
89 int numrep;
90 replentry * reptable;
91 RepList * iconvtable;
92 RepList * oconvtable;
93 int nummap;
94 mapentry * maptable;
95 int numbreak;
96 char ** breaktable;
97 int numcheckcpd;
98 patentry * checkcpdtable;
99 int simplifiedcpd;
100 int numdefcpd;
101 flagentry * defcpdtable;
102 phonetable * phone;
103 int maxngramsugs;
104 int maxcpdsugs;
105 int maxdiff;
106 int onlymaxdiff;
107 int nosplitsugs;
108 int sugswithdots;
109 int cpdwordmax;
110 int cpdmaxsyllable;
111 char * cpdvowels;
112 w_char * cpdvowels_utf16;
113 int cpdvowels_utf16_len;
114 char * cpdsyllablenum;
115 const char * pfxappnd; // BUG: not stateless
116 const char * sfxappnd; // BUG: not stateless
117 FLAG sfxflag; // BUG: not stateless
118 char * derived; // BUG: not stateless
119 SfxEntry * sfx; // BUG: not stateless
120 PfxEntry * pfx; // BUG: not stateless
121 int checknum;
122 char * wordchars;
123 unsigned short * wordchars_utf16;
124 int wordchars_utf16_len;
125 char * ignorechars;
126 unsigned short * ignorechars_utf16;
127 int ignorechars_utf16_len;
128 char * version;
129 char * lang;
130 int langnum;
131 FLAG lemma_present;
132 FLAG circumfix;
133 FLAG onlyincompound;
134 FLAG keepcase;
135 FLAG forceucase;
136 FLAG warn;
137 int forbidwarn;
138 FLAG substandard;
139 int checksharps;
140 int fullstrip;
142 int havecontclass; // boolean variable
143 #ifdef HUNSPELL_CHROME_CLIENT
144 ContClasses contclasses;
145 #else
146 char contclasses[CONTSIZE]; // flags of possible continuing classes (twofold affix)
147 #endif
149 public:
151 #ifdef HUNSPELL_CHROME_CLIENT
152 AffixMgr(hunspell::BDictReader* reader, HashMgr** ptr, int * md);
153 #else
154 AffixMgr(const char * affpath, HashMgr** ptr, int * md,
155 const char * key = NULL);
156 #endif
157 ~AffixMgr();
158 struct hentry * affix_check(const char * word, int len,
159 const unsigned short needflag = (unsigned short) 0,
160 char in_compound = IN_CPD_NOT);
161 struct hentry * prefix_check(const char * word, int len,
162 char in_compound, const FLAG needflag = FLAG_NULL);
163 inline int isSubset(const char * s1, const char * s2);
164 struct hentry * prefix_check_twosfx(const char * word, int len,
165 char in_compound, const FLAG needflag = FLAG_NULL);
166 inline int isRevSubset(const char * s1, const char * end_of_s2, int len);
167 struct hentry * suffix_check(const char * word, int len, int sfxopts,
168 PfxEntry* ppfx, char ** wlst, int maxSug, int * ns,
169 const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL,
170 char in_compound = IN_CPD_NOT);
171 struct hentry * suffix_check_twosfx(const char * word, int len,
172 int sfxopts, PfxEntry* ppfx, const FLAG needflag = FLAG_NULL);
174 char * affix_check_morph(const char * word, int len,
175 const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
176 char * prefix_check_morph(const char * word, int len,
177 char in_compound, const FLAG needflag = FLAG_NULL);
178 char * suffix_check_morph (const char * word, int len, int sfxopts,
179 PfxEntry * ppfx, const FLAG cclass = FLAG_NULL,
180 const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
182 char * prefix_check_twosfx_morph(const char * word, int len,
183 char in_compound, const FLAG needflag = FLAG_NULL);
184 char * suffix_check_twosfx_morph(const char * word, int len,
185 int sfxopts, PfxEntry * ppfx, const FLAG needflag = FLAG_NULL);
187 char * morphgen(char * ts, int wl, const unsigned short * ap,
188 unsigned short al, char * morph, char * targetmorph, int level);
190 int expand_rootword(struct guessword * wlst, int maxn, const char * ts,
191 int wl, const unsigned short * ap, unsigned short al, char * bad,
192 int, char *);
194 short get_syllable (const char * word, int wlen);
195 int cpdrep_check(const char * word, int len);
196 int cpdpat_check(const char * word, int len, hentry * r1, hentry * r2,
197 const char affixed);
198 int defcpd_check(hentry *** words, short wnum, hentry * rv,
199 hentry ** rwords, char all);
200 int cpdcase_check(const char * word, int len);
201 inline int candidate_check(const char * word, int len);
202 void setcminmax(int * cmin, int * cmax, const char * word, int len);
203 struct hentry * compound_check(const char * word, int len, short wordnum,
204 short numsyllable, short maxwordnum, short wnum, hentry ** words,
205 char hu_mov_rule, char is_sug, int * info);
207 int compound_check_morph(const char * word, int len, short wordnum,
208 short numsyllable, short maxwordnum, short wnum, hentry ** words,
209 char hu_mov_rule, char ** result, char * partresult);
211 struct hentry * lookup(const char * word);
212 int get_numrep() const;
213 struct replentry * get_reptable() const;
214 RepList * get_iconvtable() const;
215 RepList * get_oconvtable() const;
216 struct phonetable * get_phonetable() const;
217 int get_nummap() const;
218 struct mapentry * get_maptable() const;
219 int get_numbreak() const;
220 char ** get_breaktable() const;
221 char * get_encoding();
222 int get_langnum() const;
223 char * get_key_string();
224 char * get_try_string() const;
225 const char * get_wordchars() const;
226 unsigned short * get_wordchars_utf16(int * len) const;
227 char * get_ignore() const;
228 unsigned short * get_ignore_utf16(int * len) const;
229 int get_compound() const;
230 FLAG get_compoundflag() const;
231 FLAG get_compoundbegin() const;
232 FLAG get_forbiddenword() const;
233 FLAG get_nosuggest() const;
234 FLAG get_nongramsuggest() const;
235 FLAG get_needaffix() const;
236 FLAG get_onlyincompound() const;
237 FLAG get_compoundroot() const;
238 FLAG get_lemma_present() const;
239 int get_checknum() const;
240 const char * get_prefix() const;
241 const char * get_suffix() const;
242 const char * get_derived() const;
243 const char * get_version() const;
244 int have_contclass() const;
245 int get_utf8() const;
246 int get_complexprefixes() const;
247 char * get_suffixed(char ) const;
248 int get_maxngramsugs() const;
249 int get_maxcpdsugs() const;
250 int get_maxdiff() const;
251 int get_onlymaxdiff() const;
252 int get_nosplitsugs() const;
253 int get_sugswithdots(void) const;
254 FLAG get_keepcase(void) const;
255 FLAG get_forceucase(void) const;
256 FLAG get_warn(void) const;
257 int get_forbidwarn(void) const;
258 int get_checksharps(void) const;
259 char * encode_flag(unsigned short aflag) const;
260 int get_fullstrip() const;
262 private:
263 #ifdef HUNSPELL_CHROME_CLIENT
264 // Not owned by us, owned by the Hunspell object.
265 hunspell::BDictReader* bdict_reader;
266 #endif
267 int parse_file(const char * affpath, const char * key);
268 int parse_flag(char * line, unsigned short * out, FileMgr * af);
269 int parse_num(char * line, int * out, FileMgr * af);
270 int parse_cpdsyllable(char * line, FileMgr * af);
271 int parse_reptable(char * line, FileMgr * af);
272 int parse_convtable(char * line, FileMgr * af, RepList ** rl, const char * keyword);
273 int parse_phonetable(char * line, FileMgr * af);
274 int parse_maptable(char * line, FileMgr * af);
275 int parse_breaktable(char * line, FileMgr * af);
276 int parse_checkcpdtable(char * line, FileMgr * af);
277 int parse_defcpdtable(char * line, FileMgr * af);
278 int parse_affix(char * line, const char at, FileMgr * af, char * dupflags);
280 void reverse_condition(char *);
281 void debugflag(char * result, unsigned short flag);
282 int condlen(char *);
283 int encodeit(affentry &entry, char * cs);
284 int build_pfxtree(PfxEntry* pfxptr);
285 int build_sfxtree(SfxEntry* sfxptr);
286 int process_pfx_order();
287 int process_sfx_order();
288 PfxEntry * process_pfx_in_order(PfxEntry * ptr, PfxEntry * nptr);
289 SfxEntry * process_sfx_in_order(SfxEntry * ptr, SfxEntry * nptr);
290 int process_pfx_tree_to_list();
291 int process_sfx_tree_to_list();
292 int redundant_condition(char, char * strip, int stripl,
293 const char * cond, int);
294 void finishFileMgr(FileMgr *afflst);
297 #endif