Bug 470455 - test_database_sync_embed_visits.js leaks, r=sdwilsh
[wine-gecko.git] / extensions / spellcheck / hunspell / src / csutil.hxx
blobfef60c2803090da7fbb203f502d85573aeaaa266
1 /******* BEGIN LICENSE BLOCK *******
2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3 *
4 * The contents of this file are subject to the Mozilla Public License Version
5 * 1.1 (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 * http://www.mozilla.org/MPL/
8 *
9 * Software distributed under the License is distributed on an "AS IS" basis,
10 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11 * for the specific language governing rights and limitations under the
12 * License.
14 * The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
15 * and László Németh (Hunspell). Portions created by the Initial Developers
16 * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
18 * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
19 * David Einstein (deinst@world.std.com)
20 * László Németh (nemethl@gyorsposta.hu)
21 * Davide Prina
22 * Giuseppe Modugno
23 * Gianluca Turconi
24 * Simon Brouwer
25 * Noll Janos
26 * Biro Arpad
27 * Goldman Eleonora
28 * Sarlos Tamas
29 * Bencsath Boldizsar
30 * Halacsy Peter
31 * Dvornik Laszlo
32 * Gefferth Andras
33 * Nagy Viktor
34 * Varga Daniel
35 * Chris Halls
36 * Rene Engelhard
37 * Bram Moolenaar
38 * Dafydd Jones
39 * Harri Pitkanen
40 * Andras Timar
41 * Tor Lillqvist
43 * Alternatively, the contents of this file may be used under the terms of
44 * either the GNU General Public License Version 2 or later (the "GPL"), or
45 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
46 * in which case the provisions of the GPL or the LGPL are applicable instead
47 * of those above. If you wish to allow use of your version of this file only
48 * under the terms of either the GPL or the LGPL, and not to allow others to
49 * use your version of this file under the terms of the MPL, indicate your
50 * decision by deleting the provisions above and replace them with the notice
51 * and other provisions required by the GPL or the LGPL. If you do not delete
52 * the provisions above, a recipient may use your version of this file under
53 * the terms of any one of the MPL, the GPL or the LGPL.
55 ******* END LICENSE BLOCK *******/
57 #ifndef __CSUTILHXX__
58 #define __CSUTILHXX__
60 // First some base level utility routines
62 #include "w_char.hxx"
64 // casing
65 #define NOCAP 0
66 #define INITCAP 1
67 #define ALLCAP 2
68 #define HUHCAP 3
69 #define HUHINITCAP 4
71 // default encoding and keystring
72 #define SPELL_ENCODING "ISO8859-1"
73 #define SPELL_KEYSTRING "qwertyuiop|asdfghjkl|zxcvbnm"
75 // default morphological fields
76 #define MORPH_STEM "st:"
77 #define MORPH_ALLOMORPH "al:"
78 #define MORPH_POS "po:"
79 #define MORPH_DERI_PFX "dp:"
80 #define MORPH_INFL_PFX "ip:"
81 #define MORPH_TERM_PFX "tp:"
82 #define MORPH_DERI_SFX "ds:"
83 #define MORPH_INFL_SFX "is:"
84 #define MORPH_TERM_SFX "ts:"
85 #define MORPH_SURF_PFX "sp:"
86 #define MORPH_FREQ "fr:"
87 #define MORPH_PHON "ph:"
88 #define MORPH_HYPH "hy:"
89 #define MORPH_PART "pa:"
90 #define MORPH_FLAG "fl:"
91 #define MORPH_HENTRY "_H:"
92 #define MORPH_TAG_LEN strlen(MORPH_STEM)
94 #define MSEP_FLD ' '
95 #define MSEP_REC '\n'
96 #define MSEP_ALT '\v'
98 // default flags
99 #define DEFAULTFLAGS 65510
100 #define FORBIDDENWORD 65510
101 #define ONLYUPCASEFLAG 65511
103 // hash entry macros
104 #define HENTRY_DATA(h) (h->var ? ((h->var & H_OPT_ALIASM) ? \
105 get_stored_pointer(&(h->word) + h->blen + 1) : &(h->word) + h->blen + 1) : NULL)
106 // NULL-free version for warning-free OOo build
107 #define HENTRY_DATA2(h) (h->var ? ((h->var & H_OPT_ALIASM) ? \
108 get_stored_pointer(&(h->word) + h->blen + 1) : &(h->word) + h->blen + 1) : "")
109 #define HENTRY_FIND(h,p) (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL)
111 #define w_char_eq(a,b) (((a).l == (b).l) && ((a).h == (b).h))
113 // convert UTF-16 characters to UTF-8
114 char * u16_u8(char * dest, int size, const w_char * src, int srclen);
116 // convert UTF-8 characters to UTF-16
117 int u8_u16(w_char * dest, int size, const char * src);
119 // sort 2-byte vector
120 void flag_qsort(unsigned short flags[], int begin, int end);
122 // binary search in 2-byte vector
123 int flag_bsearch(unsigned short flags[], unsigned short flag, int right);
125 // remove end of line char(s)
126 void mychomp(char * s);
128 // duplicate string
129 char * mystrdup(const char * s);
131 // strcat for limited length destination string
132 char * mystrcat(char * dest, const char * st, int max);
134 // duplicate reverse of string
135 char * myrevstrdup(const char * s);
137 // parse into tokens with char delimiter
138 char * mystrsep(char ** sptr, const char delim);
139 // parse into tokens with char delimiter
140 char * mystrsep2(char ** sptr, const char delim);
142 // parse into tokens with char delimiter
143 char * mystrrep(char *, const char *, const char *);
145 // append s to ends of every lines in text
146 void strlinecat(char * lines, const char * s);
148 // tokenize into lines with new line
149 int line_tok(const char * text, char *** lines, char breakchar);
151 // tokenize into lines with new line and uniq in place
152 char * line_uniq(char * text, char breakchar);
153 char * line_uniq_app(char ** text, char breakchar);
155 // change oldchar to newchar in place
156 char * tr(char * text, char oldc, char newc);
158 // reverse word
159 int reverseword(char *);
161 // reverse word
162 int reverseword_utf(char *);
164 // remove duplicates
165 int uniqlist(char ** list, int n);
167 // free character array list
168 void freelist(char *** list, int n);
170 // character encoding information
171 struct cs_info {
172 unsigned char ccase;
173 unsigned char clower;
174 unsigned char cupper;
177 // Unicode character encoding information
178 struct unicode_info {
179 unsigned short c;
180 unsigned short cupper;
181 unsigned short clower;
184 struct unicode_info2 {
185 char cletter;
186 unsigned short cupper;
187 unsigned short clower;
190 int initialize_utf_tbl();
191 void free_utf_tbl();
192 unsigned short unicodetoupper(unsigned short c, int langnum);
193 unsigned short unicodetolower(unsigned short c, int langnum);
194 int unicodeisalpha(unsigned short c);
196 struct enc_entry {
197 const char * enc_name;
198 struct cs_info * cs_table;
201 // language to encoding default map
203 struct lang_map {
204 const char * lang;
205 const char * def_enc;
206 int num;
209 struct cs_info * get_current_cs(const char * es);
211 const char * get_default_enc(const char * lang);
213 // get language identifiers of language codes
214 int get_lang_num(const char * lang);
216 // get characters of the given 8bit encoding with lower- and uppercase forms
217 char * get_casechars(const char * enc);
219 // convert null terminated string to all caps using encoding
220 void enmkallcap(char * d, const char * p, const char * encoding);
222 // convert null terminated string to all little using encoding
223 void enmkallsmall(char * d, const char * p, const char * encoding);
225 // convert null terminated string to have intial capital using encoding
226 void enmkinitcap(char * d, const char * p, const char * encoding);
228 // convert null terminated string to all caps
229 void mkallcap(char * p, const struct cs_info * csconv);
231 // convert null terminated string to all little
232 void mkallsmall(char * p, const struct cs_info * csconv);
234 // convert null terminated string to have intial capital
235 void mkinitcap(char * p, const struct cs_info * csconv);
237 // convert first nc characters of UTF-8 string to little
238 void mkallsmall_utf(w_char * u, int nc, int langnum);
240 // convert first nc characters of UTF-8 string to capital
241 void mkallcap_utf(w_char * u, int nc, int langnum);
243 // get type of capitalization
244 int get_captype(char * q, int nl, cs_info *);
246 // get type of capitalization (UTF-8)
247 int get_captype_utf8(w_char * q, int nl, int langnum);
249 // strip all ignored characters in the string
250 void remove_ignored_chars_utf(char * word, unsigned short ignored_chars[], int ignored_len);
252 // strip all ignored characters in the string
253 void remove_ignored_chars(char * word, char * ignored_chars);
255 int parse_string(char * line, char ** out, int ln);
257 int parse_array(char * line, char ** out, unsigned short ** out_utf16,
258 int * out_utf16_len, int utf8, int ln);
260 int fieldlen(const char * r);
261 char * copy_field(char * dest, const char * morph, const char * var);
263 int morphcmp(const char * s, const char * t);
265 int get_sfxcount(const char * morph);
267 // conversion function for protected memory
268 void store_pointer(char * dest, char * source);
270 // conversion function for protected memory
271 char * get_stored_pointer(char * s);
273 #endif