1 /******* BEGIN LICENSE BLOCK *******
2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 * The contents of this file are subject to the Mozilla Public License Version
5 * 1.1 (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 * http://www.mozilla.org/MPL/
9 * Software distributed under the License is distributed on an "AS IS" basis,
10 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11 * for the specific language governing rights and limitations under the
14 * The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
15 * and László Németh (Hunspell). Portions created by the Initial Developers
16 * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
18 * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
19 * David Einstein (deinst@world.std.com)
20 * László Németh (nemethl@gyorsposta.hu)
43 * Alternatively, the contents of this file may be used under the terms of
44 * either the GNU General Public License Version 2 or later (the "GPL"), or
45 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
46 * in which case the provisions of the GPL or the LGPL are applicable instead
47 * of those above. If you wish to allow use of your version of this file only
48 * under the terms of either the GPL or the LGPL, and not to allow others to
49 * use your version of this file under the terms of the MPL, indicate your
50 * decision by deleting the provisions above and replace them with the notice
51 * and other provisions required by the GPL or the LGPL. If you do not delete
52 * the provisions above, a recipient may use your version of this file under
53 * the terms of any one of the MPL, the GPL or the LGPL.
55 ******* END LICENSE BLOCK *******/
60 // First some base level utility routines
71 // default encoding and keystring
72 #define SPELL_ENCODING "ISO8859-1"
73 #define SPELL_KEYSTRING "qwertyuiop|asdfghjkl|zxcvbnm"
75 // default morphological fields
76 #define MORPH_STEM "st:"
77 #define MORPH_ALLOMORPH "al:"
78 #define MORPH_POS "po:"
79 #define MORPH_DERI_PFX "dp:"
80 #define MORPH_INFL_PFX "ip:"
81 #define MORPH_TERM_PFX "tp:"
82 #define MORPH_DERI_SFX "ds:"
83 #define MORPH_INFL_SFX "is:"
84 #define MORPH_TERM_SFX "ts:"
85 #define MORPH_SURF_PFX "sp:"
86 #define MORPH_FREQ "fr:"
87 #define MORPH_PHON "ph:"
88 #define MORPH_HYPH "hy:"
89 #define MORPH_PART "pa:"
90 #define MORPH_FLAG "fl:"
91 #define MORPH_HENTRY "_H:"
92 #define MORPH_TAG_LEN strlen(MORPH_STEM)
99 #define DEFAULTFLAGS 65510
100 #define FORBIDDENWORD 65510
101 #define ONLYUPCASEFLAG 65511
104 #define HENTRY_DATA(h) (h->var ? ((h->var & H_OPT_ALIASM) ? \
105 get_stored_pointer(&(h->word) + h->blen + 1) : &(h->word) + h->blen + 1) : NULL)
106 // NULL-free version for warning-free OOo build
107 #define HENTRY_DATA2(h) (h->var ? ((h->var & H_OPT_ALIASM) ? \
108 get_stored_pointer(&(h->word) + h->blen + 1) : &(h->word) + h->blen + 1) : "")
109 #define HENTRY_FIND(h,p) (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL)
111 #define w_char_eq(a,b) (((a).l == (b).l) && ((a).h == (b).h))
113 // convert UTF-16 characters to UTF-8
114 char * u16_u8(char * dest
, int size
, const w_char
* src
, int srclen
);
116 // convert UTF-8 characters to UTF-16
117 int u8_u16(w_char
* dest
, int size
, const char * src
);
119 // sort 2-byte vector
120 void flag_qsort(unsigned short flags
[], int begin
, int end
);
122 // binary search in 2-byte vector
123 int flag_bsearch(unsigned short flags
[], unsigned short flag
, int right
);
125 // remove end of line char(s)
126 void mychomp(char * s
);
129 char * mystrdup(const char * s
);
131 // strcat for limited length destination string
132 char * mystrcat(char * dest
, const char * st
, int max
);
134 // duplicate reverse of string
135 char * myrevstrdup(const char * s
);
137 // parse into tokens with char delimiter
138 char * mystrsep(char ** sptr
, const char delim
);
139 // parse into tokens with char delimiter
140 char * mystrsep2(char ** sptr
, const char delim
);
142 // parse into tokens with char delimiter
143 char * mystrrep(char *, const char *, const char *);
145 // append s to ends of every lines in text
146 void strlinecat(char * lines
, const char * s
);
148 // tokenize into lines with new line
149 int line_tok(const char * text
, char *** lines
, char breakchar
);
151 // tokenize into lines with new line and uniq in place
152 char * line_uniq(char * text
, char breakchar
);
153 char * line_uniq_app(char ** text
, char breakchar
);
155 // change oldchar to newchar in place
156 char * tr(char * text
, char oldc
, char newc
);
159 int reverseword(char *);
162 int reverseword_utf(char *);
165 int uniqlist(char ** list
, int n
);
167 // free character array list
168 void freelist(char *** list
, int n
);
170 // character encoding information
173 unsigned char clower
;
174 unsigned char cupper
;
177 // Unicode character encoding information
178 struct unicode_info
{
180 unsigned short cupper
;
181 unsigned short clower
;
184 struct unicode_info2
{
186 unsigned short cupper
;
187 unsigned short clower
;
190 int initialize_utf_tbl();
192 unsigned short unicodetoupper(unsigned short c
, int langnum
);
193 unsigned short unicodetolower(unsigned short c
, int langnum
);
194 int unicodeisalpha(unsigned short c
);
197 const char * enc_name
;
198 struct cs_info
* cs_table
;
201 // language to encoding default map
205 const char * def_enc
;
209 struct cs_info
* get_current_cs(const char * es
);
211 const char * get_default_enc(const char * lang
);
213 // get language identifiers of language codes
214 int get_lang_num(const char * lang
);
216 // get characters of the given 8bit encoding with lower- and uppercase forms
217 char * get_casechars(const char * enc
);
219 // convert null terminated string to all caps using encoding
220 void enmkallcap(char * d
, const char * p
, const char * encoding
);
222 // convert null terminated string to all little using encoding
223 void enmkallsmall(char * d
, const char * p
, const char * encoding
);
225 // convert null terminated string to have intial capital using encoding
226 void enmkinitcap(char * d
, const char * p
, const char * encoding
);
228 // convert null terminated string to all caps
229 void mkallcap(char * p
, const struct cs_info
* csconv
);
231 // convert null terminated string to all little
232 void mkallsmall(char * p
, const struct cs_info
* csconv
);
234 // convert null terminated string to have intial capital
235 void mkinitcap(char * p
, const struct cs_info
* csconv
);
237 // convert first nc characters of UTF-8 string to little
238 void mkallsmall_utf(w_char
* u
, int nc
, int langnum
);
240 // convert first nc characters of UTF-8 string to capital
241 void mkallcap_utf(w_char
* u
, int nc
, int langnum
);
243 // get type of capitalization
244 int get_captype(char * q
, int nl
, cs_info
*);
246 // get type of capitalization (UTF-8)
247 int get_captype_utf8(w_char
* q
, int nl
, int langnum
);
249 // strip all ignored characters in the string
250 void remove_ignored_chars_utf(char * word
, unsigned short ignored_chars
[], int ignored_len
);
252 // strip all ignored characters in the string
253 void remove_ignored_chars(char * word
, char * ignored_chars
);
255 int parse_string(char * line
, char ** out
, int ln
);
257 int parse_array(char * line
, char ** out
, unsigned short ** out_utf16
,
258 int * out_utf16_len
, int utf8
, int ln
);
260 int fieldlen(const char * r
);
261 char * copy_field(char * dest
, const char * morph
, const char * var
);
263 int morphcmp(const char * s
, const char * t
);
265 int get_sfxcount(const char * morph
);
267 // conversion function for protected memory
268 void store_pointer(char * dest
, char * source
);
270 // conversion function for protected memory
271 char * get_stored_pointer(char * s
);