1 /******* BEGIN LICENSE BLOCK *******
2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 * The contents of this file are subject to the Mozilla Public License Version
5 * 1.1 (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 * http://www.mozilla.org/MPL/
9 * Software distributed under the License is distributed on an "AS IS" basis,
10 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11 * for the specific language governing rights and limitations under the
14 * The Initial Developer of the Original Code is Björn Jacke. Portions created
15 * by the Initial Developers are Copyright (C) 2000-2007 the Initial
16 * Developers. All Rights Reserved.
18 * Contributor(s): Björn Jacke (bjoern.jacke@gmx.de)
19 * László Németh (nemethl@gyorsposta.hu)
21 * Alternatively, the contents of this file may be used under the terms of
22 * either the GNU General Public License Version 2 or later (the "GPL"), or
23 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
24 * in which case the provisions of the GPL or the LGPL are applicable instead
25 * of those above. If you wish to allow use of your version of this file only
26 * under the terms of either the GPL or the LGPL, and not to allow others to
27 * use your version of this file under the terms of the MPL, indicate your
28 * decision by deleting the provisions above and replace them with the notice
29 * and other provisions required by the GPL or the LGPL. If you do not delete
30 * the provisions above, a recipient may use your version of this file under
31 * the terms of any one of the MPL, the GPL or the LGPL.
34 * 2000-01-05 Björn Jacke <bjoern.jacke AT gmx.de>
35 * Initial Release insprired by the article about phonetic
36 * transformations out of c't 25/1999
38 * 2007-07-26 Björn Jacke <bjoern.jacke AT gmx.de>
39 * Released under MPL/GPL/LGPL tri-license for Hunspell
41 * 2007-08-23 László Németh <nemeth at OOo>
42 * Porting from Aspell to Hunspell using C-like structs
44 ******* END LICENSE BLOCK *******/
46 #ifndef MOZILLA_CLIENT
61 void init_phonet_hash(phonetable
& parms
)
65 for (i
= 0; i
< HASHSIZE
; i
++) {
69 for (i
= 0; parms
.rules
[i
][0] != '\0'; i
+= 2) {
70 /** set hash value **/
71 k
= (unsigned char) parms
.rules
[i
][0];
73 if (parms
.hash
[k
] < 0) {
79 // like strcpy but safe if the strings overlap
80 // but only if dest < src
81 static inline void strmove(char * dest
, char * src
) {
87 int myisalpha(char ch
) {
88 if ((unsigned char) ch
< 128) return isalpha(ch
);
92 /* phonetic transcription algorithm */
93 /* see: http://aspell.net/man-html/Phonetic-Code.html */
94 /* convert string to uppercase before this call */
95 int phonet (const char * inword
, char * target
,
99 /** Do phonetic transformation. **/
100 /** "len" = length of "inword" incl. '\0'. **/
102 /** result: >= 0: length of "target" **/
103 /** otherwise: error **/
106 int k0
,n0
,p0
=-333,z0
;
109 typedef unsigned char uchar
;
110 char word
[MAXPHONETUTF8LEN
+ 1];
111 if (len
== -1) len
= strlen(inword
);
112 if (len
> MAXPHONETUTF8LEN
) return 0;
113 strcpy(word
, inword
);
117 while ((c
= word
[i
]) != '\0') {
118 n
= parms
.hash
[(uchar
) c
];
122 /** check all rules for the same letter **/
123 while (parms
.rules
[n
][0] == c
) {
125 /** check whole string **/
126 k
= 1; /** number of found letters **/
127 p
= 5; /** default priority **/
129 s
++; /** important for (see below) "*(s-1)" **/
131 while (*s
!= '\0' && word
[i
+k
] == *s
132 && !isdigit ((unsigned char) *s
) && strchr ("(-<^$", *s
) == NULL
) {
137 /** check letters in "(..)" **/
138 if (myisalpha(word
[i
+k
]) // ...could be implied?
139 && strchr(s
+1, word
[i
+k
]) != NULL
) {
148 while (*s
== '-' && k
> 1) {
154 if (isdigit ((unsigned char) *s
)) {
155 /** determine priority **/
159 if (*s
== '^' && *(s
+1) == '^')
164 && (i
== 0 || ! myisalpha(word
[i
-1]))
166 || (! myisalpha(word
[i
+k0
]) )))
167 || (*s
== '$' && i
> 0
168 && myisalpha(word
[i
-1])
169 && (! myisalpha(word
[i
+k0
]) )))
171 /** search for followup rules, if: **/
172 /** parms.followup and k > 1 and NO '-' in searchstring **/
174 n0
= parms
.hash
[(uchar
) c0
];
176 // if (parms.followup && k > 1 && n0 >= 0
178 && p0
!= (int) '-' && word
[i
+k
] != '\0') {
179 /** test follow-up rule for "word[i+k]" **/
180 while (parms
.rules
[n0
][0] == c0
) {
182 /** check whole string **/
187 while (*s
!= '\0' && word
[i
+k0
] == *s
188 && ! isdigit((unsigned char) *s
) && strchr("(-<^$",*s
) == NULL
) {
193 /** check letters **/
194 if (myisalpha(word
[i
+k0
])
195 && strchr (s
+1, word
[i
+k0
]) != NULL
) {
197 while (*s
!= ')' && *s
!= '\0')
204 /** "k0" gets NOT reduced **/
205 /** because "if (k0 == k)" **/
210 if (isdigit ((unsigned char) *s
)) {
216 /** *s == '^' cuts **/
217 || (*s
== '$' && ! myisalpha(word
[i
+k0
])))
220 /** this is just a piece of the string **/
226 /** priority too low **/
230 /** rule fits; stop search **/
234 } /** End of "while (parms.rules[n0][0] == c0)" **/
236 if (p0
>= p
&& parms
.rules
[n0
][0] == c0
) {
240 } /** end of follow-up stuff **/
242 /** replace string **/
243 s
= parms
.rules
[n
+1];
244 p0
= (parms
.rules
[n
][0] != '\0'
245 && strchr (parms
.rules
[n
]+1,'<') != NULL
) ? 1:0;
246 if (p0
== 1 && z
== 0) {
247 /** rule with '<' is used **/
248 if (j
> 0 && *s
!= '\0'
249 && (target
[j
-1] == c
|| target
[j
-1] == *s
)) {
255 while (*s
!= '\0' && word
[i
+k0
] != '\0') {
261 strmove (&word
[0]+i
+k0
, &word
[0]+i
+k
);
263 /** new "actual letter" **/
266 else { /** no '<' rule used **/
270 && *(s
+1) != '\0' && j
< len
) {
271 if (j
== 0 || target
[j
-1] != *s
) {
277 /** new "actual letter" **/
279 if (parms
.rules
[n
][0] != '\0'
280 && strstr (parms
.rules
[n
]+1, "^^") != NULL
) {
285 strmove (&word
[0], &word
[0]+i
+1);
291 } /** end of follow-up stuff **/
293 } /** end of while (parms.rules[n][0] == c) **/
294 } /** end of if (n >= 0) **/
296 // if (k && (assert(p0!=-333),!p0) && j < len && c != '\0'
297 // && (!parms.collapse_result || j == 0 || target[j-1] != c)){
298 if (k
&& !p0
&& j
< len
&& c
!= '\0'
299 && (1 || j
== 0 || target
[j
-1] != c
)){
300 /** condense only double letters **/
302 ///printf("\n setting \n");
310 } /** end of while ((c = word[i]) != '\0') **/
315 } /** end of function "phonet" **/