1 #include "license.hunspell"
2 #include "license.myspell"
9 #include "suggestmgr.hxx"
13 const w_char W_VLINE
= { '\0', '|' };
15 #ifdef HUNSPELL_CHROME_CLIENT
17 // A simple class which creates temporary hentry objects which are available
18 // only in a scope. To conceal memory operations from SuggestMgr functions,
19 // this object automatically deletes all hentry objects created through
20 // CreateScopedHashEntry() calls in its destructor. So, the following snippet
21 // raises a memory error.
23 // hentry* bad_copy = NULL;
25 // ScopedHashEntryFactory factory;
26 // hentry* scoped_copy = factory.CreateScopedHashEntry(0, source);
28 // bad_copy = scoped_copy;
30 // if (bad_copy->word[0]) // memory for scoped_copy has been deleted!
32 // As listed in the above snippet, it is simple to use this class.
33 // 1. Declare an instance of this ScopedHashEntryFactory, and;
34 // 2. Call its CreateHashEntry() member instead of using 'new hentry' or
37 class ScopedHashEntryFactory
{
39 ScopedHashEntryFactory();
40 ~ScopedHashEntryFactory();
42 // Creates a temporary copy of the given hentry struct.
43 // The returned copy is available only while this object is available.
44 // NOTE: this function just calls memcpy() in creating a copy of the given
45 // hentry struct, i.e. it does NOT copy objects referred by pointers of the
46 // given hentry struct.
47 hentry
* CreateScopedHashEntry(int index
, const hentry
* source
);
50 // A struct which encapsulates the new hentry struct introduced in hunspell
51 // 1.2.8. For a pointer to an hentry struct 'h', hunspell 1.2.8 stores a word
52 // (including a NUL character) into 'h->word[0]',...,'h->word[h->blen]' even
53 // though arraysize(h->word[]) is 1. Also, it changed 'astr' to a pointer so
54 // it can store affix flags into 'h->astr[0]',...,'h->astr[alen-1]'. To handle
55 // this new hentry struct, we define a struct which combines three values: an
56 // hentry struct 'hentry'; a char array 'word[kMaxWordLen]', and; an unsigned
57 // short array 'astr' so a hentry struct 'h' returned from
58 // CreateScopedHashEntry() satisfies the following equations:
59 // hentry* h = factory.CreateScopedHashEntry(0, source);
60 // h->word[0] == ((HashEntryItem*)h)->entry.word[0].
61 // h->word[1] == ((HashEntryItem*)h)->word[0].
63 // h->word[h->blen] == ((HashEntryItem*)h)->word[h->blen-1].
64 // h->astr[0] == ((HashEntryItem*)h)->astr[0].
65 // h->astr[1] == ((HashEntryItem*)h)->astr[1].
67 // h->astr[h->alen-1] == ((HashEntryItem*)h)->astr[h->alen-1].
72 struct HashEntryItem
{
74 char word
[kMaxWordLen
];
75 unsigned short astr
[kMaxAffixLen
];
78 HashEntryItem hash_items_
[MAX_ROOTS
];
81 ScopedHashEntryFactory::ScopedHashEntryFactory() {
82 memset(&hash_items_
[0], 0, sizeof(hash_items_
));
85 ScopedHashEntryFactory::~ScopedHashEntryFactory() {
88 hentry
* ScopedHashEntryFactory::CreateScopedHashEntry(int index
,
89 const hentry
* source
) {
90 if (index
>= MAX_ROOTS
|| source
->blen
>= kMaxWordLen
)
93 // Retrieve a HashEntryItem struct from our spool, initialize it, and
94 // returns the address of its 'hentry' member.
95 size_t source_size
= sizeof(hentry
) + source
->blen
+ 1;
96 HashEntryItem
* hash_item
= &hash_items_
[index
];
97 memcpy(&hash_item
->entry
, source
, source_size
);
99 hash_item
->entry
.alen
= source
->alen
;
100 if (hash_item
->entry
.alen
> kMaxAffixLen
)
101 hash_item
->entry
.alen
= kMaxAffixLen
;
102 memcpy(hash_item
->astr
, source
->astr
, hash_item
->entry
.alen
* sizeof(hash_item
->astr
[0]));
103 hash_item
->entry
.astr
= &hash_item
->astr
[0];
105 return &hash_item
->entry
;
112 #ifdef HUNSPELL_CHROME_CLIENT
113 SuggestMgr::SuggestMgr(hunspell::BDictReader
* reader
,
114 const char * tryme
, int maxn
,
117 bdict_reader
= reader
;
119 SuggestMgr::SuggestMgr(const char * tryme
, int maxn
,
124 // register affix manager and check in string of chars to
125 // try when building candidate suggestions
144 maxngramsugs
= MAXNGRAMSUGS
;
145 maxcpdsugs
= MAXCOMPOUNDSUGS
;
148 langnum
= pAMgr
->get_langnum();
149 ckey
= pAMgr
->get_key_string();
150 nosplitsugs
= pAMgr
->get_nosplitsugs();
151 if (pAMgr
->get_maxngramsugs() >= 0)
152 maxngramsugs
= pAMgr
->get_maxngramsugs();
153 utf8
= pAMgr
->get_utf8();
154 if (pAMgr
->get_maxcpdsugs() >= 0)
155 maxcpdsugs
= pAMgr
->get_maxcpdsugs();
158 char * enc
= pAMgr
->get_encoding();
159 csconv
= get_current_cs(enc
);
162 complexprefixes
= pAMgr
->get_complexprefixes();
168 ckeyl
= u8_u16(t
, MAXSWL
, ckey
);
169 ckey_utf
= (w_char
*) malloc(ckeyl
* sizeof(w_char
));
170 if (ckey_utf
) memcpy(ckey_utf
, t
, ckeyl
* sizeof(w_char
));
173 ckeyl
= strlen(ckey
);
178 ctry
= mystrdup(tryme
);
179 if (ctry
) ctryl
= strlen(ctry
);
182 ctryl
= u8_u16(t
, MAXSWL
, tryme
);
183 ctry_utf
= (w_char
*) malloc(ctryl
* sizeof(w_char
));
184 if (ctry_utf
) memcpy(ctry_utf
, t
, ctryl
* sizeof(w_char
));
191 SuggestMgr::~SuggestMgr()
194 if (ckey
) free(ckey
);
196 if (ckey_utf
) free(ckey_utf
);
199 if (ctry
) free(ctry
);
201 if (ctry_utf
) free(ctry_utf
);
205 #ifdef MOZILLA_CLIENT
210 int SuggestMgr::testsug(char** wlst
, const char * candidate
, int wl
, int ns
, int cpdsuggest
,
211 int * timer
, clock_t * timelimit
) {
213 if (ns
== maxSug
) return maxSug
;
214 for (int k
=0; k
< ns
; k
++) {
215 if (strcmp(candidate
,wlst
[k
]) == 0) cwrd
= 0;
217 if ((cwrd
) && checkword(candidate
, wl
, cpdsuggest
, timer
, timelimit
)) {
218 wlst
[ns
] = mystrdup(candidate
);
219 if (wlst
[ns
] == NULL
) {
220 for (int j
=0; j
<ns
; j
++) free(wlst
[j
]);
228 // generate suggestions for a misspelled word
229 // pass in address of array of char * pointers
230 // onlycompoundsug: probably bad suggestions (need for ngram sugs, too)
232 int SuggestMgr::suggest(char*** slst
, const char * w
, int nsug
,
233 int * onlycompoundsug
)
235 int nocompoundtwowords
= 0;
237 w_char word_utf
[MAXSWL
];
240 char w2
[MAXWORDUTF8LEN
];
241 const char * word
= w
;
244 // word reversing wrapper for complex prefixes
245 if (complexprefixes
) {
247 if (utf8
) reverseword_utf(w2
); else reverseword(w2
);
254 wlst
= (char **) malloc(maxSug
* sizeof(char *));
255 if (wlst
== NULL
) return -1;
256 for (int i
= 0; i
< maxSug
; i
++) {
262 wl
= u8_u16(word_utf
, MAXSWL
, word
);
269 for (int cpdsuggest
=0; (cpdsuggest
<2) && (nocompoundtwowords
==0); cpdsuggest
++) {
271 // limit compound suggestion
272 if (cpdsuggest
> 0) oldSug
= nsug
;
274 // suggestions for an uppercase word (html -> HTML)
275 if ((nsug
< maxSug
) && (nsug
> -1)) {
276 nsug
= (utf8
) ? capchars_utf(wlst
, word_utf
, wl
, nsug
, cpdsuggest
) :
277 capchars(wlst
, word
, nsug
, cpdsuggest
);
280 // perhaps we made a typical fault of spelling
281 if ((nsug
< maxSug
) && (nsug
> -1) && (!cpdsuggest
|| (nsug
< oldSug
+ maxcpdsugs
))) {
282 nsug
= replchars(wlst
, word
, nsug
, cpdsuggest
);
285 // perhaps we made chose the wrong char from a related set
286 if ((nsug
< maxSug
) && (nsug
> -1) && (!cpdsuggest
|| (nsug
< oldSug
+ maxcpdsugs
))) {
287 nsug
= mapchars(wlst
, word
, nsug
, cpdsuggest
);
290 // only suggest compound words when no other suggestion
291 if ((cpdsuggest
== 0) && (nsug
> nsugorig
)) nocompoundtwowords
=1;
293 // did we swap the order of chars by mistake
294 if ((nsug
< maxSug
) && (nsug
> -1) && (!cpdsuggest
|| (nsug
< oldSug
+ maxcpdsugs
))) {
295 nsug
= (utf8
) ? swapchar_utf(wlst
, word_utf
, wl
, nsug
, cpdsuggest
) :
296 swapchar(wlst
, word
, nsug
, cpdsuggest
);
299 // did we swap the order of non adjacent chars by mistake
300 if ((nsug
< maxSug
) && (nsug
> -1) && (!cpdsuggest
|| (nsug
< oldSug
+ maxcpdsugs
))) {
301 nsug
= (utf8
) ? longswapchar_utf(wlst
, word_utf
, wl
, nsug
, cpdsuggest
) :
302 longswapchar(wlst
, word
, nsug
, cpdsuggest
);
305 // did we just hit the wrong key in place of a good char (case and keyboard)
306 if ((nsug
< maxSug
) && (nsug
> -1) && (!cpdsuggest
|| (nsug
< oldSug
+ maxcpdsugs
))) {
307 nsug
= (utf8
) ? badcharkey_utf(wlst
, word_utf
, wl
, nsug
, cpdsuggest
) :
308 badcharkey(wlst
, word
, nsug
, cpdsuggest
);
311 // did we add a char that should not be there
312 if ((nsug
< maxSug
) && (nsug
> -1) && (!cpdsuggest
|| (nsug
< oldSug
+ maxcpdsugs
))) {
313 nsug
= (utf8
) ? extrachar_utf(wlst
, word_utf
, wl
, nsug
, cpdsuggest
) :
314 extrachar(wlst
, word
, nsug
, cpdsuggest
);
318 // did we forgot a char
319 if ((nsug
< maxSug
) && (nsug
> -1) && (!cpdsuggest
|| (nsug
< oldSug
+ maxcpdsugs
))) {
320 nsug
= (utf8
) ? forgotchar_utf(wlst
, word_utf
, wl
, nsug
, cpdsuggest
) :
321 forgotchar(wlst
, word
, nsug
, cpdsuggest
);
324 // did we move a char
325 if ((nsug
< maxSug
) && (nsug
> -1) && (!cpdsuggest
|| (nsug
< oldSug
+ maxcpdsugs
))) {
326 nsug
= (utf8
) ? movechar_utf(wlst
, word_utf
, wl
, nsug
, cpdsuggest
) :
327 movechar(wlst
, word
, nsug
, cpdsuggest
);
330 // did we just hit the wrong key in place of a good char
331 if ((nsug
< maxSug
) && (nsug
> -1) && (!cpdsuggest
|| (nsug
< oldSug
+ maxcpdsugs
))) {
332 nsug
= (utf8
) ? badchar_utf(wlst
, word_utf
, wl
, nsug
, cpdsuggest
) :
333 badchar(wlst
, word
, nsug
, cpdsuggest
);
336 // did we double two characters
337 if ((nsug
< maxSug
) && (nsug
> -1) && (!cpdsuggest
|| (nsug
< oldSug
+ maxcpdsugs
))) {
338 nsug
= (utf8
) ? doubletwochars_utf(wlst
, word_utf
, wl
, nsug
, cpdsuggest
) :
339 doubletwochars(wlst
, word
, nsug
, cpdsuggest
);
342 // perhaps we forgot to hit space and two words ran together
343 if (!nosplitsugs
&& (nsug
< maxSug
) && (nsug
> -1) && (!cpdsuggest
|| (nsug
< oldSug
+ maxcpdsugs
))) {
344 nsug
= twowords(wlst
, word
, nsug
, cpdsuggest
);
347 } // repeating ``for'' statement compounding support
350 // we ran out of memory - we should free up as much as possible
351 for (int i
= 0; i
< maxSug
; i
++)
352 if (wlst
[i
] != NULL
) free(wlst
[i
]);
357 if (!nocompoundtwowords
&& (nsug
> 0) && onlycompoundsug
) *onlycompoundsug
= 1;
363 // generate suggestions for a word with typical mistake
364 // pass in address of array of char * pointers
365 #ifdef HUNSPELL_EXPERIMENTAL
366 int SuggestMgr::suggest_auto(char*** slst
, const char * w
, int nsug
)
368 int nocompoundtwowords
= 0;
372 char w2
[MAXWORDUTF8LEN
];
373 const char * word
= w
;
375 // word reversing wrapper for complex prefixes
376 if (complexprefixes
) {
378 if (utf8
) reverseword_utf(w2
); else reverseword(w2
);
385 wlst
= (char **) malloc(maxSug
* sizeof(char *));
386 if (wlst
== NULL
) return -1;
389 for (int cpdsuggest
=0; (cpdsuggest
<2) && (nocompoundtwowords
==0); cpdsuggest
++) {
391 // limit compound suggestion
392 if (cpdsuggest
> 0) oldSug
= nsug
;
394 // perhaps we made a typical fault of spelling
395 if ((nsug
< maxSug
) && (nsug
> -1))
396 nsug
= replchars(wlst
, word
, nsug
, cpdsuggest
);
398 // perhaps we made chose the wrong char from a related set
399 if ((nsug
< maxSug
) && (nsug
> -1) && (!cpdsuggest
|| (nsug
< oldSug
+ maxcpdsugs
)))
400 nsug
= mapchars(wlst
, word
, nsug
, cpdsuggest
);
402 if ((cpdsuggest
==0) && (nsug
>0)) nocompoundtwowords
=1;
404 // perhaps we forgot to hit space and two words ran together
406 if ((nsug
< maxSug
) && (nsug
> -1) && (!cpdsuggest
|| (nsug
< oldSug
+ maxcpdsugs
)) && check_forbidden(word
, strlen(word
))) {
407 nsug
= twowords(wlst
, word
, nsug
, cpdsuggest
);
410 } // repeating ``for'' statement compounding support
413 for (int i
=0;i
<maxSug
; i
++)
414 if (wlst
[i
] != NULL
) free(wlst
[i
]);
422 #endif // END OF HUNSPELL_EXPERIMENTAL CODE
424 // suggestions for an uppercase word (html -> HTML)
425 int SuggestMgr::capchars_utf(char ** wlst
, const w_char
* word
, int wl
, int ns
, int cpdsuggest
)
427 char candidate
[MAXSWUTF8L
];
428 w_char candidate_utf
[MAXSWL
];
429 memcpy(candidate_utf
, word
, wl
* sizeof(w_char
));
430 mkallcap_utf(candidate_utf
, wl
, langnum
);
431 u16_u8(candidate
, MAXSWUTF8L
, candidate_utf
, wl
);
432 return testsug(wlst
, candidate
, strlen(candidate
), ns
, cpdsuggest
, NULL
, NULL
);
435 // suggestions for an uppercase word (html -> HTML)
436 int SuggestMgr::capchars(char** wlst
, const char * word
, int ns
, int cpdsuggest
)
438 char candidate
[MAXSWUTF8L
];
439 strcpy(candidate
, word
);
440 mkallcap(candidate
, csconv
);
441 return testsug(wlst
, candidate
, strlen(candidate
), ns
, cpdsuggest
, NULL
, NULL
);
444 // suggestions for when chose the wrong char out of a related set
445 int SuggestMgr::mapchars(char** wlst
, const char * word
, int ns
, int cpdsuggest
)
447 char candidate
[MAXSWUTF8L
];
452 int wl
= strlen(word
);
453 if (wl
< 2 || ! pAMgr
) return ns
;
455 int nummap
= pAMgr
->get_nummap();
456 struct mapentry
* maptable
= pAMgr
->get_maptable();
457 if (maptable
==NULL
) return ns
;
461 return map_related(word
, (char *) &candidate
, 0, 0, wlst
, cpdsuggest
, ns
, maptable
, nummap
, &timer
, &timelimit
);
464 int SuggestMgr::map_related(const char * word
, char * candidate
, int wn
, int cn
,
465 char** wlst
, int cpdsuggest
, int ns
,
466 const mapentry
* maptable
, int nummap
, int * timer
, clock_t * timelimit
)
468 if (*(word
+ wn
) == '\0') {
470 *(candidate
+ cn
) = '\0';
471 int wl
= strlen(candidate
);
472 for (int m
=0; m
< ns
; m
++)
473 if (strcmp(candidate
, wlst
[m
]) == 0) cwrd
= 0;
474 if ((cwrd
) && checkword(candidate
, wl
, cpdsuggest
, timer
, timelimit
)) {
476 wlst
[ns
] = mystrdup(candidate
);
477 if (wlst
[ns
] == NULL
) return -1;
484 for (int j
= 0; j
< nummap
; j
++) {
485 for (int k
= 0; k
< maptable
[j
].len
; k
++) {
486 int len
= strlen(maptable
[j
].set
[k
]);
487 if (strncmp(maptable
[j
].set
[k
], word
+ wn
, len
) == 0) {
489 for (int l
= 0; l
< maptable
[j
].len
; l
++) {
490 strcpy(candidate
+ cn
, maptable
[j
].set
[l
]);
491 ns
= map_related(word
, candidate
, wn
+ len
, strlen(candidate
), wlst
,
492 cpdsuggest
, ns
, maptable
, nummap
, timer
, timelimit
);
493 if (!(*timer
)) return ns
;
499 *(candidate
+ cn
) = *(word
+ wn
);
500 ns
= map_related(word
, candidate
, wn
+ 1, cn
+ 1, wlst
, cpdsuggest
,
501 ns
, maptable
, nummap
, timer
, timelimit
);
506 // suggestions for a typical fault of spelling, that
507 // differs with more, than 1 letter from the right form.
508 int SuggestMgr::replchars(char** wlst
, const char * word
, int ns
, int cpdsuggest
)
510 char candidate
[MAXSWUTF8L
];
513 int wl
= strlen(word
);
514 if (wl
< 2 || ! pAMgr
) return ns
;
516 #ifdef HUNSPELL_CHROME_CLIENT
517 const char *pattern
, *pattern2
;
518 hunspell::ReplacementIterator iterator
= bdict_reader
->GetReplacementIterator();
519 while (iterator
.GetNext(&pattern
, &pattern2
)) {
521 lenr
= strlen(pattern2
);
522 lenp
= strlen(pattern
);
524 // search every occurence of the pattern in the word
525 while ((r
=strstr(r
, pattern
)) != NULL
) {
526 strcpy(candidate
, word
);
527 if (r
-word
+ lenr
+ strlen(r
+lenp
) >= MAXLNLEN
) break;
528 strcpy(candidate
+(r
-word
), pattern2
);
529 strcpy(candidate
+(r
-word
)+lenr
, r
+lenp
);
530 ns
= testsug(wlst
, candidate
, wl
-lenp
+lenr
, ns
, cpdsuggest
, NULL
, NULL
);
531 if (ns
== -1) return -1;
532 // check REP suggestions with space
533 char * sp
= strchr(candidate
, ' ');
535 char * prev
= candidate
;
538 if (checkword(prev
, strlen(prev
), 0, NULL
, NULL
)) {
541 ns
= testsug(wlst
, sp
+ 1, strlen(sp
+ 1), ns
, cpdsuggest
, NULL
, NULL
);
542 if (ns
== -1) return -1;
545 wlst
[ns
- 1] = mystrdup(candidate
);
546 if (!wlst
[ns
- 1]) return -1;
551 sp
= strchr(prev
, ' ');
554 r
++; // search for the next letter
558 int numrep
= pAMgr
->get_numrep();
559 struct replentry
* reptable
= pAMgr
->get_reptable();
560 if (reptable
==NULL
) return ns
;
561 for (int i
=0; i
< numrep
; i
++ ) {
563 lenr
= strlen(reptable
[i
].pattern2
);
564 lenp
= strlen(reptable
[i
].pattern
);
565 // search every occurence of the pattern in the word
566 while ((r
=strstr(r
, reptable
[i
].pattern
)) != NULL
&& (!reptable
[i
].end
|| strlen(r
) == strlen(reptable
[i
].pattern
)) &&
567 (!reptable
[i
].start
|| r
== word
)) {
568 strcpy(candidate
, word
);
569 if (r
-word
+ lenr
+ strlen(r
+lenp
) >= MAXSWUTF8L
) break;
570 strcpy(candidate
+(r
-word
),reptable
[i
].pattern2
);
571 strcpy(candidate
+(r
-word
)+lenr
, r
+lenp
);
572 ns
= testsug(wlst
, candidate
, wl
-lenp
+lenr
, ns
, cpdsuggest
, NULL
, NULL
);
573 if (ns
== -1) return -1;
574 // check REP suggestions with space
575 char * sp
= strchr(candidate
, ' ');
577 char * prev
= candidate
;
580 if (checkword(prev
, strlen(prev
), 0, NULL
, NULL
)) {
583 ns
= testsug(wlst
, sp
+ 1, strlen(sp
+ 1), ns
, cpdsuggest
, NULL
, NULL
);
584 if (ns
== -1) return -1;
587 wlst
[ns
- 1] = mystrdup(candidate
);
588 if (!wlst
[ns
- 1]) return -1;
593 sp
= strchr(prev
, ' ');
596 r
++; // search for the next letter
603 // perhaps we doubled two characters (pattern aba -> ababa, for example vacation -> vacacation)
604 int SuggestMgr::doubletwochars(char** wlst
, const char * word
, int ns
, int cpdsuggest
)
606 char candidate
[MAXSWUTF8L
];
608 int wl
= strlen(word
);
609 if (wl
< 5 || ! pAMgr
) return ns
;
610 for (int i
=2; i
< wl
; i
++ ) {
611 if (word
[i
]==word
[i
-2]) {
614 strcpy(candidate
,word
);
615 strcpy(candidate
+i
-1,word
+i
+1);
616 ns
= testsug(wlst
, candidate
, wl
-2, ns
, cpdsuggest
, NULL
, NULL
);
617 if (ns
== -1) return -1;
627 // perhaps we doubled two characters (pattern aba -> ababa, for example vacation -> vacacation)
628 int SuggestMgr::doubletwochars_utf(char ** wlst
, const w_char
* word
, int wl
, int ns
, int cpdsuggest
)
630 w_char candidate_utf
[MAXSWL
];
631 char candidate
[MAXSWUTF8L
];
633 if (wl
< 5 || ! pAMgr
) return ns
;
634 for (int i
=2; i
< wl
; i
++) {
635 if (w_char_eq(word
[i
], word
[i
-2])) {
638 memcpy(candidate_utf
, word
, (i
- 1) * sizeof(w_char
));
639 memcpy(candidate_utf
+i
-1, word
+i
+1, (wl
-i
-1) * sizeof(w_char
));
640 u16_u8(candidate
, MAXSWUTF8L
, candidate_utf
, wl
-2);
641 ns
= testsug(wlst
, candidate
, strlen(candidate
), ns
, cpdsuggest
, NULL
, NULL
);
642 if (ns
== -1) return -1;
652 // error is wrong char in place of correct one (case and keyboard related version)
653 int SuggestMgr::badcharkey(char ** wlst
, const char * word
, int ns
, int cpdsuggest
)
656 char candidate
[MAXSWUTF8L
];
657 int wl
= strlen(word
);
658 strcpy(candidate
, word
);
659 // swap out each char one by one and try uppercase and neighbor
660 // keyboard chars in its place to see if that makes a good word
662 for (int i
=0; i
< wl
; i
++) {
664 // check with uppercase letters
665 candidate
[i
] = csconv
[((unsigned char)tmpc
)].cupper
;
666 if (tmpc
!= candidate
[i
]) {
667 ns
= testsug(wlst
, candidate
, wl
, ns
, cpdsuggest
, NULL
, NULL
);
668 if (ns
== -1) return -1;
671 // check neighbor characters in keyboard string
673 char * loc
= strchr(ckey
, tmpc
);
675 if ((loc
> ckey
) && (*(loc
- 1) != '|')) {
676 candidate
[i
] = *(loc
- 1);
677 ns
= testsug(wlst
, candidate
, wl
, ns
, cpdsuggest
, NULL
, NULL
);
678 if (ns
== -1) return -1;
680 if ((*(loc
+ 1) != '|') && (*(loc
+ 1) != '\0')) {
681 candidate
[i
] = *(loc
+ 1);
682 ns
= testsug(wlst
, candidate
, wl
, ns
, cpdsuggest
, NULL
, NULL
);
683 if (ns
== -1) return -1;
685 loc
= strchr(loc
+ 1, tmpc
);
692 // error is wrong char in place of correct one (case and keyboard related version)
693 int SuggestMgr::badcharkey_utf(char ** wlst
, const w_char
* word
, int wl
, int ns
, int cpdsuggest
)
696 w_char candidate_utf
[MAXSWL
];
697 char candidate
[MAXSWUTF8L
];
698 memcpy(candidate_utf
, word
, wl
* sizeof(w_char
));
699 // swap out each char one by one and try all the tryme
700 // chars in its place to see if that makes a good word
701 for (int i
=0; i
< wl
; i
++) {
702 tmpc
= candidate_utf
[i
];
703 // check with uppercase letters
704 mkallcap_utf(candidate_utf
+ i
, 1, langnum
);
705 if (!w_char_eq(tmpc
, candidate_utf
[i
])) {
706 u16_u8(candidate
, MAXSWUTF8L
, candidate_utf
, wl
);
707 ns
= testsug(wlst
, candidate
, strlen(candidate
), ns
, cpdsuggest
, NULL
, NULL
);
708 if (ns
== -1) return -1;
709 candidate_utf
[i
] = tmpc
;
711 // check neighbor characters in keyboard string
713 w_char
* loc
= ckey_utf
;
714 while ((loc
< (ckey_utf
+ ckeyl
)) && !w_char_eq(*loc
, tmpc
)) loc
++;
715 while (loc
< (ckey_utf
+ ckeyl
)) {
716 if ((loc
> ckey_utf
) && !w_char_eq(*(loc
- 1), W_VLINE
)) {
717 candidate_utf
[i
] = *(loc
- 1);
718 u16_u8(candidate
, MAXSWUTF8L
, candidate_utf
, wl
);
719 ns
= testsug(wlst
, candidate
, strlen(candidate
), ns
, cpdsuggest
, NULL
, NULL
);
720 if (ns
== -1) return -1;
722 if (((loc
+ 1) < (ckey_utf
+ ckeyl
)) && !w_char_eq(*(loc
+ 1), W_VLINE
)) {
723 candidate_utf
[i
] = *(loc
+ 1);
724 u16_u8(candidate
, MAXSWUTF8L
, candidate_utf
, wl
);
725 ns
= testsug(wlst
, candidate
, strlen(candidate
), ns
, cpdsuggest
, NULL
, NULL
);
726 if (ns
== -1) return -1;
728 do { loc
++; } while ((loc
< (ckey_utf
+ ckeyl
)) && !w_char_eq(*loc
, tmpc
));
730 candidate_utf
[i
] = tmpc
;
735 // error is wrong char in place of correct one
736 int SuggestMgr::badchar(char ** wlst
, const char * word
, int ns
, int cpdsuggest
)
739 char candidate
[MAXSWUTF8L
];
740 clock_t timelimit
= clock();
741 int timer
= MINTIMER
;
742 int wl
= strlen(word
);
743 strcpy(candidate
, word
);
744 // swap out each char one by one and try all the tryme
745 // chars in its place to see if that makes a good word
746 for (int j
=0; j
< ctryl
; j
++) {
747 for (int i
=wl
-1; i
>= 0; i
--) {
749 if (ctry
[j
] == tmpc
) continue;
750 candidate
[i
] = ctry
[j
];
751 ns
= testsug(wlst
, candidate
, wl
, ns
, cpdsuggest
, &timer
, &timelimit
);
752 if (ns
== -1) return -1;
753 if (!timer
) return ns
;
760 // error is wrong char in place of correct one
761 int SuggestMgr::badchar_utf(char ** wlst
, const w_char
* word
, int wl
, int ns
, int cpdsuggest
)
764 w_char candidate_utf
[MAXSWL
];
765 char candidate
[MAXSWUTF8L
];
766 clock_t timelimit
= clock();
767 int timer
= MINTIMER
;
768 memcpy(candidate_utf
, word
, wl
* sizeof(w_char
));
769 // swap out each char one by one and try all the tryme
770 // chars in its place to see if that makes a good word
771 for (int j
=0; j
< ctryl
; j
++) {
772 for (int i
=wl
-1; i
>= 0; i
--) {
773 tmpc
= candidate_utf
[i
];
774 if (w_char_eq(tmpc
, ctry_utf
[j
])) continue;
775 candidate_utf
[i
] = ctry_utf
[j
];
776 u16_u8(candidate
, MAXSWUTF8L
, candidate_utf
, wl
);
777 ns
= testsug(wlst
, candidate
, strlen(candidate
), ns
, cpdsuggest
, &timer
, &timelimit
);
778 if (ns
== -1) return -1;
779 if (!timer
) return ns
;
780 candidate_utf
[i
] = tmpc
;
786 // error is word has an extra letter it does not need
787 int SuggestMgr::extrachar_utf(char** wlst
, const w_char
* word
, int wl
, int ns
, int cpdsuggest
)
789 char candidate
[MAXSWUTF8L
];
790 w_char candidate_utf
[MAXSWL
];
792 w_char tmpc
= W_VLINE
; // not used value, only for VCC warning message
793 if (wl
< 2) return ns
;
794 // try omitting one char of word at a time
795 memcpy(candidate_utf
, word
, wl
* sizeof(w_char
));
796 for (p
= candidate_utf
+ wl
- 1; p
>= candidate_utf
; p
--) {
798 if (p
< candidate_utf
+ wl
- 1) *p
= tmpc
;
799 u16_u8(candidate
, MAXSWUTF8L
, candidate_utf
, wl
- 1);
800 ns
= testsug(wlst
, candidate
, strlen(candidate
), ns
, cpdsuggest
, NULL
, NULL
);
801 if (ns
== -1) return -1;
807 // error is word has an extra letter it does not need
808 int SuggestMgr::extrachar(char** wlst
, const char * word
, int ns
, int cpdsuggest
)
811 char candidate
[MAXSWUTF8L
];
813 int wl
= strlen(word
);
814 if (wl
< 2) return ns
;
815 // try omitting one char of word at a time
816 strcpy (candidate
, word
);
817 for (p
= candidate
+ wl
- 1; p
>=candidate
; p
--) {
820 ns
= testsug(wlst
, candidate
, wl
-1, ns
, cpdsuggest
, NULL
, NULL
);
821 if (ns
== -1) return -1;
827 // error is missing a letter it needs
828 int SuggestMgr::forgotchar(char ** wlst
, const char * word
, int ns
, int cpdsuggest
)
830 // TODO(rouslan): Remove the interim change below when this patch lands:
831 // http://sf.net/tracker/?func=detail&aid=3595024&group_id=143754&atid=756395
832 char candidate
[MAXSWUTF8L
+ 4];
834 clock_t timelimit
= clock();
835 int timer
= MINTIMER
;
836 int wl
= strlen(word
);
837 // try inserting a tryme character before every letter (and the null terminator)
838 for (int i
= 0; i
< ctryl
; i
++) {
839 strcpy(candidate
, word
);
840 for (p
= candidate
+ wl
; p
>= candidate
; p
--) {
843 ns
= testsug(wlst
, candidate
, wl
+1, ns
, cpdsuggest
, &timer
, &timelimit
);
844 if (ns
== -1) return -1;
845 if (!timer
) return ns
;
851 // error is missing a letter it needs
852 int SuggestMgr::forgotchar_utf(char ** wlst
, const w_char
* word
, int wl
, int ns
, int cpdsuggest
)
854 // TODO(rouslan): Remove the interim change below when this patch lands:
855 // http://sf.net/tracker/?func=detail&aid=3595024&group_id=143754&atid=756395
856 w_char candidate_utf
[MAXSWL
+ 1];
857 char candidate
[MAXSWUTF8L
+ 4];
859 clock_t timelimit
= clock();
860 int timer
= MINTIMER
;
861 // try inserting a tryme character at the end of the word and before every letter
862 for (int i
= 0; i
< ctryl
; i
++) {
863 memcpy (candidate_utf
, word
, wl
* sizeof(w_char
));
864 for (p
= candidate_utf
+ wl
; p
>= candidate_utf
; p
--) {
867 u16_u8(candidate
, MAXSWUTF8L
, candidate_utf
, wl
+ 1);
868 ns
= testsug(wlst
, candidate
, strlen(candidate
), ns
, cpdsuggest
, &timer
, &timelimit
);
869 if (ns
== -1) return -1;
870 if (!timer
) return ns
;
877 /* error is should have been two words */
878 int SuggestMgr::twowords(char ** wlst
, const char * word
, int ns
, int cpdsuggest
)
880 char candidate
[MAXSWUTF8L
];
887 if (wl
< 3) return ns
;
889 if (langnum
== LANG_hu
) forbidden
= check_forbidden(word
, wl
);
891 strcpy(candidate
+ 1, word
);
892 // split the string into two pieces after every char
893 // if both pieces are good words make them a suggestion
894 for (p
= candidate
+ 1; p
[1] != '\0'; p
++) {
896 // go to end of the UTF-8 character
897 while (utf8
&& ((p
[1] & 0xc0) == 0x80)) {
901 if (utf8
&& p
[1] == '\0') break; // last UTF-8 character
903 c1
= checkword(candidate
,strlen(candidate
), cpdsuggest
, NULL
, NULL
);
905 c2
= checkword((p
+1),strlen(p
+1), cpdsuggest
, NULL
, NULL
);
909 // spec. Hungarian code (need a better compound word support)
910 if ((langnum
== LANG_hu
) && !forbidden
&&
911 // if 3 repeating letter, use - instead of space
912 (((p
[-1] == p
[1]) && (((p
>candidate
+1) && (p
[-1] == p
[-2])) || (p
[-1] == p
[2]))) ||
913 // or multiple compounding, with more, than 6 syllables
914 ((c1
== 3) && (c2
>= 2)))) *p
= '-';
917 for (int k
=0; k
< ns
; k
++)
918 if (strcmp(candidate
,wlst
[k
]) == 0) cwrd
= 0;
921 wlst
[ns
] = mystrdup(candidate
);
922 if (wlst
[ns
] == NULL
) return -1;
926 // add two word suggestion with dash, if TRY string contains
928 // NOTE: cwrd doesn't modified for REP twoword sugg.
929 if (ctry
&& (strchr(ctry
, 'a') || strchr(ctry
, '-')) &&
930 mystrlen(p
+ 1) > 1 &&
931 mystrlen(candidate
) - mystrlen(p
) > 1) {
933 for (int k
=0; k
< ns
; k
++)
934 if (strcmp(candidate
,wlst
[k
]) == 0) cwrd
= 0;
937 wlst
[ns
] = mystrdup(candidate
);
938 if (wlst
[ns
] == NULL
) return -1;
950 // error is adjacent letter were swapped
951 int SuggestMgr::swapchar(char ** wlst
, const char * word
, int ns
, int cpdsuggest
)
953 char candidate
[MAXSWUTF8L
];
957 // try swapping adjacent chars one by one
958 strcpy(candidate
, word
);
959 for (p
= candidate
; p
[1] != 0; p
++) {
963 ns
= testsug(wlst
, candidate
, wl
, ns
, cpdsuggest
, NULL
, NULL
);
964 if (ns
== -1) return -1;
968 // try double swaps for short words
969 // ahev -> have, owudl -> would
970 if (wl
== 4 || wl
== 5) {
971 candidate
[0] = word
[1];
972 candidate
[1] = word
[0];
973 candidate
[2] = word
[2];
974 candidate
[wl
- 2] = word
[wl
- 1];
975 candidate
[wl
- 1] = word
[wl
- 2];
976 ns
= testsug(wlst
, candidate
, wl
, ns
, cpdsuggest
, NULL
, NULL
);
977 if (ns
== -1) return -1;
979 candidate
[0] = word
[0];
980 candidate
[1] = word
[2];
981 candidate
[2] = word
[1];
982 ns
= testsug(wlst
, candidate
, wl
, ns
, cpdsuggest
, NULL
, NULL
);
983 if (ns
== -1) return -1;
989 // error is adjacent letter were swapped
990 int SuggestMgr::swapchar_utf(char ** wlst
, const w_char
* word
, int wl
, int ns
, int cpdsuggest
)
992 w_char candidate_utf
[MAXSWL
];
993 char candidate
[MAXSWUTF8L
];
997 // try swapping adjacent chars one by one
998 memcpy (candidate_utf
, word
, wl
* sizeof(w_char
));
999 for (p
= candidate_utf
; p
< (candidate_utf
+ wl
- 1); p
++) {
1003 u16_u8(candidate
, MAXSWUTF8L
, candidate_utf
, wl
);
1004 if (len
== 0) len
= strlen(candidate
);
1005 ns
= testsug(wlst
, candidate
, len
, ns
, cpdsuggest
, NULL
, NULL
);
1006 if (ns
== -1) return -1;
1010 // try double swaps for short words
1011 // ahev -> have, owudl -> would, suodn -> sound
1012 if (wl
== 4 || wl
== 5) {
1013 candidate_utf
[0] = word
[1];
1014 candidate_utf
[1] = word
[0];
1015 candidate_utf
[2] = word
[2];
1016 candidate_utf
[wl
- 2] = word
[wl
- 1];
1017 candidate_utf
[wl
- 1] = word
[wl
- 2];
1018 u16_u8(candidate
, MAXSWUTF8L
, candidate_utf
, wl
);
1019 ns
= testsug(wlst
, candidate
, len
, ns
, cpdsuggest
, NULL
, NULL
);
1020 if (ns
== -1) return -1;
1022 candidate_utf
[0] = word
[0];
1023 candidate_utf
[1] = word
[2];
1024 candidate_utf
[2] = word
[1];
1025 u16_u8(candidate
, MAXSWUTF8L
, candidate_utf
, wl
);
1026 ns
= testsug(wlst
, candidate
, len
, ns
, cpdsuggest
, NULL
, NULL
);
1027 if (ns
== -1) return -1;
1033 // error is not adjacent letter were swapped
1034 int SuggestMgr::longswapchar(char ** wlst
, const char * word
, int ns
, int cpdsuggest
)
1036 char candidate
[MAXSWUTF8L
];
1040 int wl
=strlen(word
);
1041 // try swapping not adjacent chars one by one
1042 strcpy(candidate
, word
);
1043 for (p
= candidate
; *p
!= 0; p
++) {
1044 for (q
= candidate
; *q
!= 0; q
++) {
1045 if (abs((int)(p
-q
)) > 1) {
1049 ns
= testsug(wlst
, candidate
, wl
, ns
, cpdsuggest
, NULL
, NULL
);
1050 if (ns
== -1) return -1;
1060 // error is adjacent letter were swapped
1061 int SuggestMgr::longswapchar_utf(char ** wlst
, const w_char
* word
, int wl
, int ns
, int cpdsuggest
)
1063 w_char candidate_utf
[MAXSWL
];
1064 char candidate
[MAXSWUTF8L
];
1068 // try swapping not adjacent chars
1069 memcpy (candidate_utf
, word
, wl
* sizeof(w_char
));
1070 for (p
= candidate_utf
; p
< (candidate_utf
+ wl
); p
++) {
1071 for (q
= candidate_utf
; q
< (candidate_utf
+ wl
); q
++) {
1072 if (abs((int)(p
-q
)) > 1) {
1076 u16_u8(candidate
, MAXSWUTF8L
, candidate_utf
, wl
);
1077 ns
= testsug(wlst
, candidate
, strlen(candidate
), ns
, cpdsuggest
, NULL
, NULL
);
1078 if (ns
== -1) return -1;
1087 // error is a letter was moved
1088 int SuggestMgr::movechar(char ** wlst
, const char * word
, int ns
, int cpdsuggest
)
1090 char candidate
[MAXSWUTF8L
];
1095 int wl
=strlen(word
);
1096 // try moving a char
1097 strcpy(candidate
, word
);
1098 for (p
= candidate
; *p
!= 0; p
++) {
1099 for (q
= p
+ 1; (*q
!= 0) && ((q
- p
) < 10); q
++) {
1103 if ((q
-p
) < 2) continue; // omit swap char
1104 ns
= testsug(wlst
, candidate
, wl
, ns
, cpdsuggest
, NULL
, NULL
);
1105 if (ns
== -1) return -1;
1107 strcpy(candidate
, word
);
1109 for (p
= candidate
+ wl
- 1; p
> candidate
; p
--) {
1110 for (q
= p
- 1; (q
>= candidate
) && ((p
- q
) < 10); q
--) {
1114 if ((p
-q
) < 2) continue; // omit swap char
1115 ns
= testsug(wlst
, candidate
, wl
, ns
, cpdsuggest
, NULL
, NULL
);
1116 if (ns
== -1) return -1;
1118 strcpy(candidate
, word
);
1123 // error is a letter was moved
1124 int SuggestMgr::movechar_utf(char ** wlst
, const w_char
* word
, int wl
, int ns
, int cpdsuggest
)
1126 w_char candidate_utf
[MAXSWL
];
1127 char candidate
[MAXSWUTF8L
];
1131 // try moving a char
1132 memcpy (candidate_utf
, word
, wl
* sizeof(w_char
));
1133 for (p
= candidate_utf
; p
< (candidate_utf
+ wl
); p
++) {
1134 for (q
= p
+ 1; (q
< (candidate_utf
+ wl
)) && ((q
- p
) < 10); q
++) {
1138 if ((q
-p
) < 2) continue; // omit swap char
1139 u16_u8(candidate
, MAXSWUTF8L
, candidate_utf
, wl
);
1140 ns
= testsug(wlst
, candidate
, strlen(candidate
), ns
, cpdsuggest
, NULL
, NULL
);
1141 if (ns
== -1) return -1;
1143 memcpy (candidate_utf
, word
, wl
* sizeof(w_char
));
1145 for (p
= candidate_utf
+ wl
- 1; p
> candidate_utf
; p
--) {
1146 for (q
= p
- 1; (q
>= candidate_utf
) && ((p
- q
) < 10); q
--) {
1150 if ((p
-q
) < 2) continue; // omit swap char
1151 u16_u8(candidate
, MAXSWUTF8L
, candidate_utf
, wl
);
1152 ns
= testsug(wlst
, candidate
, strlen(candidate
), ns
, cpdsuggest
, NULL
, NULL
);
1153 if (ns
== -1) return -1;
1155 memcpy (candidate_utf
, word
, wl
* sizeof(w_char
));
1160 // generate a set of suggestions for very poorly spelled words
1161 int SuggestMgr::ngsuggest(char** wlst
, char * w
, int ns
, HashMgr
** pHMgr
, int md
)
1170 // exhaustively search through all root words
1171 // keeping track of the MAX_ROOTS most similar root words
1172 struct hentry
* roots
[MAX_ROOTS
];
1173 char * rootsphon
[MAX_ROOTS
];
1174 int scores
[MAX_ROOTS
];
1175 int scoresphon
[MAX_ROOTS
];
1176 for (i
= 0; i
< MAX_ROOTS
; i
++) {
1178 scores
[i
] = -100 * i
;
1179 rootsphon
[i
] = NULL
;
1180 scoresphon
[i
] = -100 * i
;
1183 lpphon
= MAX_ROOTS
- 1;
1185 int low
= NGRAM_LOWERING
;
1187 char w2
[MAXWORDUTF8LEN
];
1191 // word reversing wrapper for complex prefixes
1192 if (complexprefixes
) {
1194 if (utf8
) reverseword_utf(w2
); else reverseword(w2
);
1198 char mw
[MAXSWUTF8L
];
1200 int nc
= strlen(word
);
1201 int n
= (utf8
) ? u8_u16(u8
, MAXSWL
, word
) : nc
;
1203 // set character based ngram suggestion for words with non-BMP Unicode characters
1205 utf8
= 0; // XXX not state-free
1211 struct hentry
* hp
= NULL
;
1213 #ifdef HUNSPELL_CHROME_CLIENT
1214 ScopedHashEntryFactory hash_entry_factory
;
1216 phonetable
* ph
= (pAMgr
) ? pAMgr
->get_phonetable() : NULL
;
1217 char target
[MAXSWUTF8L
];
1218 char candidate
[MAXSWUTF8L
];
1222 int _wl
= u8_u16(_w
, MAXSWL
, word
);
1223 mkallcap_utf(_w
, _wl
, langnum
);
1224 u16_u8(candidate
, MAXSWUTF8L
, _w
, _wl
);
1226 strcpy(candidate
, word
);
1227 if (!nonbmp
) mkallcap(candidate
, csconv
);
1229 phonet(candidate
, target
, nc
, *ph
); // XXX phonet() is 8-bit (nc, not n)
1232 FLAG forbiddenword
= pAMgr
? pAMgr
->get_forbiddenword() : FLAG_NULL
;
1233 FLAG nosuggest
= pAMgr
? pAMgr
->get_nosuggest() : FLAG_NULL
;
1234 FLAG nongramsuggest
= pAMgr
? pAMgr
->get_nongramsuggest() : FLAG_NULL
;
1235 FLAG onlyincompound
= pAMgr
? pAMgr
->get_onlyincompound() : FLAG_NULL
;
1237 for (i
= 0; i
< md
; i
++) {
1238 while (0 != (hp
= (pHMgr
[i
])->walk_hashtable(col
, hp
))) {
1239 if ((hp
->astr
) && (pAMgr
) &&
1240 (TESTAFF(hp
->astr
, forbiddenword
, hp
->alen
) ||
1241 TESTAFF(hp
->astr
, ONLYUPCASEFLAG
, hp
->alen
) ||
1242 TESTAFF(hp
->astr
, nosuggest
, hp
->alen
) ||
1243 TESTAFF(hp
->astr
, nongramsuggest
, hp
->alen
) ||
1244 TESTAFF(hp
->astr
, onlyincompound
, hp
->alen
))) continue;
1246 sc
= ngram(3, word
, HENTRY_WORD(hp
), NGRAM_LONGER_WORSE
+ low
) +
1247 leftcommonsubstring(word
, HENTRY_WORD(hp
));
1249 // check special pronounciation
1250 if ((hp
->var
& H_OPT_PHON
) && copy_field(f
, HENTRY_DATA(hp
), MORPH_PHON
)) {
1251 int sc2
= ngram(3, word
, f
, NGRAM_LONGER_WORSE
+ low
) +
1252 + leftcommonsubstring(word
, f
);
1253 if (sc2
> sc
) sc
= sc2
;
1257 if (ph
&& (sc
> 2) && (abs(n
- (int) hp
->clen
) <= 3)) {
1258 char target2
[MAXSWUTF8L
];
1261 int _wl
= u8_u16(_w
, MAXSWL
, HENTRY_WORD(hp
));
1262 mkallcap_utf(_w
, _wl
, langnum
);
1263 u16_u8(candidate
, MAXSWUTF8L
, _w
, _wl
);
1265 strcpy(candidate
, HENTRY_WORD(hp
));
1266 mkallcap(candidate
, csconv
);
1268 phonet(candidate
, target2
, -1, *ph
);
1269 scphon
= 2 * ngram(3, target
, target2
, NGRAM_LONGER_WORSE
);
1272 if (sc
> scores
[lp
]) {
1274 #ifdef HUNSPELL_CHROME_CLIENT
1275 roots
[lp
] = hash_entry_factory
.CreateScopedHashEntry(lp
, hp
);
1280 for (j
=0; j
< MAX_ROOTS
; j
++)
1281 if (scores
[j
] < lval
) {
1288 if (scphon
> scoresphon
[lpphon
]) {
1289 scoresphon
[lpphon
] = scphon
;
1290 rootsphon
[lpphon
] = HENTRY_WORD(hp
);
1292 for (j
=0; j
< MAX_ROOTS
; j
++)
1293 if (scoresphon
[j
] < lval
) {
1295 lval
= scoresphon
[j
];
1300 // find minimum threshold for a passable suggestion
1301 // mangle original word three differnt ways
1302 // and score them to generate a minimum acceptable score
1304 for (int sp
= 1; sp
< 4; sp
++) {
1306 for (int k
=sp
; k
< n
; k
+=4) *((unsigned short *) u8
+ k
) = '*';
1307 u16_u8(mw
, MAXSWUTF8L
, u8
, n
);
1308 thresh
= thresh
+ ngram(n
, word
, mw
, NGRAM_ANY_MISMATCH
+ low
);
1311 for (int k
=sp
; k
< n
; k
+=4) *(mw
+ k
) = '*';
1312 thresh
= thresh
+ ngram(n
, word
, mw
, NGRAM_ANY_MISMATCH
+ low
);
1315 thresh
= thresh
/ 3;
1318 // now expand affixes on each of these root words and
1319 // and use length adjusted ngram scores to select
1320 // possible suggestions
1321 char * guess
[MAX_GUESS
];
1322 char * guessorig
[MAX_GUESS
];
1323 int gscore
[MAX_GUESS
];
1324 for(i
=0;i
<MAX_GUESS
;i
++) {
1326 guessorig
[i
] = NULL
;
1327 gscore
[i
] = -100 * i
;
1332 struct guessword
* glst
;
1333 glst
= (struct guessword
*) calloc(MAX_WORDS
,sizeof(struct guessword
));
1335 if (nonbmp
) utf8
= 1;
1339 for (i
= 0; i
< MAX_ROOTS
; i
++) {
1341 struct hentry
* rp
= roots
[i
];
1342 int nw
= pAMgr
->expand_rootword(glst
, MAX_WORDS
, HENTRY_WORD(rp
), rp
->blen
,
1343 rp
->astr
, rp
->alen
, word
, nc
,
1344 ((rp
->var
& H_OPT_PHON
) ? copy_field(f
, HENTRY_DATA(rp
), MORPH_PHON
) : NULL
));
1346 for (int k
= 0; k
< nw
; k
++) {
1347 sc
= ngram(n
, word
, glst
[k
].word
, NGRAM_ANY_MISMATCH
+ low
) +
1348 leftcommonsubstring(word
, glst
[k
].word
);
1351 if (sc
> gscore
[lp
]) {
1354 if (guessorig
[lp
]) {
1355 free(guessorig
[lp
]);
1356 guessorig
[lp
] = NULL
;
1360 guess
[lp
] = glst
[k
].word
;
1361 guessorig
[lp
] = glst
[k
].orig
;
1363 for (j
=0; j
< MAX_GUESS
; j
++)
1364 if (gscore
[j
] < lval
) {
1370 if (glst
[k
].orig
) free(glst
[k
].orig
);
1374 if (glst
[k
].orig
) free(glst
[k
].orig
);
1381 // now we are done generating guesses
1382 // sort in order of decreasing score
1385 bubblesort(&guess
[0], &guessorig
[0], &gscore
[0], MAX_GUESS
);
1386 if (ph
) bubblesort(&rootsphon
[0], NULL
, &scoresphon
[0], MAX_ROOTS
);
1388 // weight suggestions with a similarity index, based on
1389 // the longest common subsequent algorithm and resort
1395 int maxd
= pAMgr
->get_maxdiff();
1396 if (maxd
>= 0) fact
= (10.0 - maxd
)/5.0;
1399 for (i
=0; i
< MAX_GUESS
; i
++) {
1401 // lowering guess[i]
1402 char gl
[MAXSWUTF8L
];
1406 len
= u8_u16(_w
, MAXSWL
, guess
[i
]);
1407 mkallsmall_utf(_w
, len
, langnum
);
1408 u16_u8(gl
, MAXSWUTF8L
, _w
, len
);
1410 strcpy(gl
, guess
[i
]);
1411 if (!nonbmp
) mkallsmall(gl
, csconv
);
1412 len
= strlen(guess
[i
]);
1415 int _lcs
= lcslen(word
, gl
);
1417 // same characters with different casing
1418 if ((n
== len
) && (n
== _lcs
)) {
1422 // using 2-gram instead of 3, and other weightening
1424 re
= ngram(2, word
, gl
, NGRAM_ANY_MISMATCH
+ low
+ NGRAM_WEIGHTED
) +
1425 ngram(2, gl
, word
, NGRAM_ANY_MISMATCH
+ low
+ NGRAM_WEIGHTED
);
1428 // length of longest common subsequent minus length difference
1429 2 * _lcs
- abs((int) (n
- len
)) +
1430 // weight length of the left common substring
1431 leftcommonsubstring(word
, gl
) +
1432 // weight equal character positions
1433 (!nonbmp
&& commoncharacterpositions(word
, gl
, &is_swap
) ? 1: 0) +
1434 // swap character (not neighboring)
1435 ((is_swap
) ? 10 : 0) +
1437 ngram(4, word
, gl
, NGRAM_ANY_MISMATCH
+ low
) +
1440 // different limit for dictionaries with PHONE rules
1441 (ph
? (re
< len
* fact
? -1000 : 0) : (re
< (n
+ len
)*fact
? -1000 : 0));
1445 bubblesort(&guess
[0], &guessorig
[0], &gscore
[0], MAX_GUESS
);
1448 if (ph
) for (i
=0; i
< MAX_ROOTS
; i
++) {
1450 // lowering rootphon[i]
1451 char gl
[MAXSWUTF8L
];
1455 len
= u8_u16(_w
, MAXSWL
, rootsphon
[i
]);
1456 mkallsmall_utf(_w
, len
, langnum
);
1457 u16_u8(gl
, MAXSWUTF8L
, _w
, len
);
1459 strcpy(gl
, rootsphon
[i
]);
1460 if (!nonbmp
) mkallsmall(gl
, csconv
);
1461 len
= strlen(rootsphon
[i
]);
1464 // heuristic weigthing of ngram scores
1465 scoresphon
[i
] += 2 * lcslen(word
, gl
) - abs((int) (n
- len
)) +
1466 // weight length of the left common substring
1467 leftcommonsubstring(word
, gl
);
1471 if (ph
) bubblesort(&rootsphon
[0], NULL
, &scoresphon
[0], MAX_ROOTS
);
1477 for (i
=0; i
< MAX_GUESS
; i
++) {
1479 if ((ns
< oldns
+ maxngramsugs
) && (ns
< maxSug
) && (!same
|| (gscore
[i
] > 1000))) {
1481 // leave only excellent suggestions, if exists
1482 if (gscore
[i
] > 1000) same
= 1; else if (gscore
[i
] < -100) {
1484 // keep the best ngram suggestions, unless in ONLYMAXDIFF mode
1485 if (ns
> oldns
|| (pAMgr
&& pAMgr
->get_onlymaxdiff())) {
1487 if (guessorig
[i
]) free(guessorig
[i
]);
1491 for (j
= 0; j
< ns
; j
++) {
1492 // don't suggest previous suggestions or a previous suggestion with prefixes or affixes
1493 if ((!guessorig
[i
] && strstr(guess
[i
], wlst
[j
])) ||
1494 (guessorig
[i
] && strstr(guessorig
[i
], wlst
[j
])) ||
1495 // check forbidden words
1496 !checkword(guess
[i
], strlen(guess
[i
]), 0, NULL
, NULL
)) unique
= 0;
1499 wlst
[ns
++] = guess
[i
];
1502 wlst
[ns
-1] = guessorig
[i
];
1506 if (guessorig
[i
]) free(guessorig
[i
]);
1510 if (guessorig
[i
]) free(guessorig
[i
]);
1516 if (ph
) for (i
=0; i
< MAX_ROOTS
; i
++) {
1518 if ((ns
< oldns
+ MAXPHONSUGS
) && (ns
< maxSug
)) {
1520 for (j
= 0; j
< ns
; j
++) {
1521 // don't suggest previous suggestions or a previous suggestion with prefixes or affixes
1522 if (strstr(rootsphon
[i
], wlst
[j
]) ||
1523 // check forbidden words
1524 !checkword(rootsphon
[i
], strlen(rootsphon
[i
]), 0, NULL
, NULL
)) unique
= 0;
1527 wlst
[ns
++] = mystrdup(rootsphon
[i
]);
1528 if (!wlst
[ns
- 1]) return ns
- 1;
1534 if (nonbmp
) utf8
= 1;
1539 // see if a candidate suggestion is spelled correctly
1540 // needs to check both root words and words with affixes
1542 // obsolote MySpell-HU modifications:
1543 // return value 2 and 3 marks compounding with hyphen (-)
1544 // `3' marks roots without suffix
1545 int SuggestMgr::checkword(const char * word
, int len
, int cpdsuggest
, int * timer
, clock_t * timelimit
)
1547 struct hentry
* rv
=NULL
;
1548 struct hentry
* rv2
=NULL
;
1554 if (!(*timer
) && timelimit
) {
1555 if ((clock() - *timelimit
) > TIMELIMIT
) return 0;
1556 *timer
= MAXPLUSTIMER
;
1561 if (cpdsuggest
==1) {
1562 if (pAMgr
->get_compound()) {
1563 rv
= pAMgr
->compound_check(word
, len
, 0, 0, 100, 0, NULL
, 0, 1, 0); //EXT
1564 if (rv
&& (!(rv2
= pAMgr
->lookup(word
)) || !rv2
->astr
||
1565 !(TESTAFF(rv2
->astr
,pAMgr
->get_forbiddenword(),rv2
->alen
) ||
1566 TESTAFF(rv2
->astr
,pAMgr
->get_nosuggest(),rv2
->alen
)))) return 3; // XXX obsolote categorisation + only ICONV needs affix flag check?
1571 rv
= pAMgr
->lookup(word
);
1574 if ((rv
->astr
) && (TESTAFF(rv
->astr
,pAMgr
->get_forbiddenword(),rv
->alen
)
1575 || TESTAFF(rv
->astr
,pAMgr
->get_nosuggest(),rv
->alen
))) return 0;
1577 if (rv
->astr
&& (TESTAFF(rv
->astr
,pAMgr
->get_needaffix(),rv
->alen
) ||
1578 TESTAFF(rv
->astr
, ONLYUPCASEFLAG
, rv
->alen
) ||
1579 TESTAFF(rv
->astr
,pAMgr
->get_onlyincompound(),rv
->alen
))) {
1580 rv
= rv
->next_homonym
;
1583 } else rv
= pAMgr
->prefix_check(word
, len
, 0); // only prefix, and prefix + suffix XXX
1588 rv
= pAMgr
->suffix_check(word
, len
, 0, NULL
, NULL
, 0, NULL
); // only suffix
1591 if (!rv
&& pAMgr
->have_contclass()) {
1592 rv
= pAMgr
->suffix_check_twosfx(word
, len
, 0, NULL
, FLAG_NULL
);
1593 if (!rv
) rv
= pAMgr
->prefix_check_twosfx(word
, len
, 1, FLAG_NULL
);
1596 // check forbidden words
1597 if ((rv
) && (rv
->astr
) && (TESTAFF(rv
->astr
,pAMgr
->get_forbiddenword(),rv
->alen
) ||
1598 TESTAFF(rv
->astr
, ONLYUPCASEFLAG
, rv
->alen
) ||
1599 TESTAFF(rv
->astr
,pAMgr
->get_nosuggest(),rv
->alen
) ||
1600 TESTAFF(rv
->astr
,pAMgr
->get_onlyincompound(),rv
->alen
))) return 0;
1602 if (rv
) { // XXX obsolote
1603 if ((pAMgr
->get_compoundflag()) &&
1604 TESTAFF(rv
->astr
, pAMgr
->get_compoundflag(), rv
->alen
)) return 2 + nosuffix
;
1611 int SuggestMgr::check_forbidden(const char * word
, int len
)
1613 struct hentry
* rv
= NULL
;
1616 rv
= pAMgr
->lookup(word
);
1617 if (rv
&& rv
->astr
&& (TESTAFF(rv
->astr
,pAMgr
->get_needaffix(),rv
->alen
) ||
1618 TESTAFF(rv
->astr
,pAMgr
->get_onlyincompound(),rv
->alen
))) rv
= NULL
;
1619 if (!(pAMgr
->prefix_check(word
,len
,1)))
1620 rv
= pAMgr
->suffix_check(word
,len
, 0, NULL
, NULL
, 0, NULL
); // prefix+suffix, suffix
1621 // check forbidden words
1622 if ((rv
) && (rv
->astr
) && TESTAFF(rv
->astr
,pAMgr
->get_forbiddenword(),rv
->alen
)) return 1;
1627 #ifdef HUNSPELL_EXPERIMENTAL
1628 // suggest possible stems
1629 int SuggestMgr::suggest_pos_stems(char*** slst
, const char * w
, int nsug
)
1633 struct hentry
* rv
= NULL
;
1635 char w2
[MAXSWUTF8L
];
1636 const char * word
= w
;
1638 // word reversing wrapper for complex prefixes
1639 if (complexprefixes
) {
1641 if (utf8
) reverseword_utf(w2
); else reverseword(w2
);
1645 int wl
= strlen(word
);
1651 wlst
= (char **) calloc(maxSug
, sizeof(char *));
1652 if (wlst
== NULL
) return -1;
1655 rv
= pAMgr
->suffix_check(word
, wl
, 0, NULL
, wlst
, maxSug
, &nsug
);
1657 // delete dash from end of word
1659 for (int j
=0; j
< nsug
; j
++) {
1660 if (wlst
[j
][strlen(wlst
[j
]) - 1] == '-') wlst
[j
][strlen(wlst
[j
]) - 1] = '\0';
1667 #endif // END OF HUNSPELL_EXPERIMENTAL CODE
1670 char * SuggestMgr::suggest_morph(const char * w
)
1672 char result
[MAXLNLEN
];
1673 char * r
= (char *) result
;
1676 struct hentry
* rv
= NULL
;
1680 if (! pAMgr
) return NULL
;
1682 char w2
[MAXSWUTF8L
];
1683 const char * word
= w
;
1685 // word reversing wrapper for complex prefixes
1686 if (complexprefixes
) {
1688 if (utf8
) reverseword_utf(w2
); else reverseword(w2
);
1692 rv
= pAMgr
->lookup(word
);
1695 if ((!rv
->astr
) || !(TESTAFF(rv
->astr
, pAMgr
->get_forbiddenword(), rv
->alen
) ||
1696 TESTAFF(rv
->astr
, pAMgr
->get_needaffix(), rv
->alen
) ||
1697 TESTAFF(rv
->astr
,pAMgr
->get_onlyincompound(),rv
->alen
))) {
1698 if (!HENTRY_FIND(rv
, MORPH_STEM
)) {
1699 mystrcat(result
, " ", MAXLNLEN
);
1700 mystrcat(result
, MORPH_STEM
, MAXLNLEN
);
1701 mystrcat(result
, word
, MAXLNLEN
);
1703 if (HENTRY_DATA(rv
)) {
1704 mystrcat(result
, " ", MAXLNLEN
);
1705 mystrcat(result
, HENTRY_DATA2(rv
), MAXLNLEN
);
1707 mystrcat(result
, "\n", MAXLNLEN
);
1709 rv
= rv
->next_homonym
;
1712 st
= pAMgr
->affix_check_morph(word
,strlen(word
));
1714 mystrcat(result
, st
, MAXLNLEN
);
1718 if (pAMgr
->get_compound() && (*result
== '\0'))
1719 pAMgr
->compound_check_morph(word
, strlen(word
),
1720 0, 0, 100, 0,NULL
, 0, &r
, NULL
);
1722 return (*result
) ? mystrdup(line_uniq(result
, MSEP_REC
)) : NULL
;
1725 #ifdef HUNSPELL_EXPERIMENTAL
1726 char * SuggestMgr::suggest_morph_for_spelling_error(const char * word
)
1729 char ** wlst
= (char **) calloc(maxSug
, sizeof(char *));
1730 if (!**wlst
) return NULL
;
1731 // we will use only the first suggestion
1732 for (int i
= 0; i
< maxSug
- 1; i
++) wlst
[i
] = "";
1733 int ns
= suggest(&wlst
, word
, maxSug
- 1, NULL
);
1735 p
= suggest_morph(wlst
[maxSug
- 1]);
1736 free(wlst
[maxSug
- 1]);
1738 if (wlst
) free(wlst
);
1741 #endif // END OF HUNSPELL_EXPERIMENTAL CODE
1744 char * SuggestMgr::suggest_hentry_gen(hentry
* rv
, char * pattern
)
1746 char result
[MAXLNLEN
];
1748 int sfxcount
= get_sfxcount(pattern
);
1750 if (get_sfxcount(HENTRY_DATA(rv
)) > sfxcount
) return NULL
;
1752 if (HENTRY_DATA(rv
)) {
1753 char * aff
= pAMgr
->morphgen(HENTRY_WORD(rv
), rv
->blen
, rv
->astr
, rv
->alen
,
1754 HENTRY_DATA(rv
), pattern
, 0);
1756 mystrcat(result
, aff
, MAXLNLEN
);
1757 mystrcat(result
, "\n", MAXLNLEN
);
1762 // check all allomorphs
1763 char allomorph
[MAXLNLEN
];
1765 if (HENTRY_DATA(rv
)) p
= (char *) strstr(HENTRY_DATA2(rv
), MORPH_ALLOMORPH
);
1767 struct hentry
* rv2
= NULL
;
1769 int plen
= fieldlen(p
);
1770 strncpy(allomorph
, p
, plen
);
1771 allomorph
[plen
] = '\0';
1772 rv2
= pAMgr
->lookup(allomorph
);
1774 // if (HENTRY_DATA(rv2) && get_sfxcount(HENTRY_DATA(rv2)) <= sfxcount) {
1775 if (HENTRY_DATA(rv2
)) {
1776 char * st
= (char *) strstr(HENTRY_DATA2(rv2
), MORPH_STEM
);
1777 if (st
&& (strncmp(st
+ MORPH_TAG_LEN
,
1778 HENTRY_WORD(rv
), fieldlen(st
+ MORPH_TAG_LEN
)) == 0)) {
1779 char * aff
= pAMgr
->morphgen(HENTRY_WORD(rv2
), rv2
->blen
, rv2
->astr
, rv2
->alen
,
1780 HENTRY_DATA(rv2
), pattern
, 0);
1782 mystrcat(result
, aff
, MAXLNLEN
);
1783 mystrcat(result
, "\n", MAXLNLEN
);
1788 rv2
= rv2
->next_homonym
;
1790 p
= strstr(p
+ plen
, MORPH_ALLOMORPH
);
1793 return (*result
) ? mystrdup(result
) : NULL
;
1796 char * SuggestMgr::suggest_gen(char ** desc
, int n
, char * pattern
) {
1797 char result
[MAXLNLEN
];
1798 char result2
[MAXLNLEN
];
1799 char newpattern
[MAXLNLEN
];
1801 if (n
== 0) return 0;
1803 struct hentry
* rv
= NULL
;
1804 if (!pAMgr
) return NULL
;
1806 // search affixed forms with and without derivational suffixes
1809 for (int k
= 0; k
< n
; k
++) {
1811 // add compound word parts (except the last one)
1812 char * s
= (char *) desc
[k
];
1813 char * part
= strstr(s
, MORPH_PART
);
1815 char * nextpart
= strstr(part
+ 1, MORPH_PART
);
1817 copy_field(result
+ strlen(result
), part
, MORPH_PART
);
1819 nextpart
= strstr(part
+ 1, MORPH_PART
);
1827 char * alt
= strstr(tok
, " | ");
1830 alt
= strstr(alt
, " | ");
1832 int pln
= line_tok(tok
, &pl
, MSEP_ALT
);
1833 for (int i
= 0; i
< pln
; i
++) {
1834 // remove inflectional and terminal suffixes
1835 char * is
= strstr(pl
[i
], MORPH_INFL_SFX
);
1837 char * ts
= strstr(pl
[i
], MORPH_TERM_SFX
);
1840 ts
= strstr(pl
[i
], MORPH_TERM_SFX
);
1842 char * st
= strstr(s
, MORPH_STEM
);
1844 copy_field(tok
, st
, MORPH_STEM
);
1845 rv
= pAMgr
->lookup(tok
);
1847 char newpat
[MAXLNLEN
];
1848 strcpy(newpat
, pl
[i
]);
1849 strcat(newpat
, pattern
);
1850 char * sg
= suggest_hentry_gen(rv
, newpat
);
1851 if (!sg
) sg
= suggest_hentry_gen(rv
, pattern
);
1854 int genl
= line_tok(sg
, &gen
, MSEP_REC
);
1857 for (int j
= 0; j
< genl
; j
++) {
1858 if (strstr(pl
[i
], MORPH_SURF_PFX
)) {
1859 int r2l
= strlen(result2
);
1860 result2
[r2l
] = MSEP_REC
;
1861 strcpy(result2
+ r2l
+ 1, result
);
1862 copy_field(result2
+ strlen(result2
), pl
[i
], MORPH_SURF_PFX
);
1863 mystrcat(result2
, gen
[j
], MAXLNLEN
);
1865 sprintf(result2
+ strlen(result2
), "%c%s%s",
1866 MSEP_REC
, result
, gen
[j
]);
1869 freelist(&gen
, genl
);
1871 rv
= rv
->next_homonym
;
1878 if (*result2
|| !strstr(pattern
, MORPH_DERI_SFX
)) break;
1879 strcpy(newpattern
, pattern
);
1880 pattern
= newpattern
;
1881 char * ds
= strstr(pattern
, MORPH_DERI_SFX
);
1883 strncpy(ds
, MORPH_TERM_SFX
, MORPH_TAG_LEN
);
1884 ds
= strstr(pattern
, MORPH_DERI_SFX
);
1887 return (*result2
? mystrdup(result2
) : NULL
);
1891 // generate an n-gram score comparing s1 and s2
1892 int SuggestMgr::ngram(int n
, char * s1
, const char * s2
, int opt
)
1903 l1
= u8_u16(su1
, MAXSWL
, s1
);
1904 l2
= u8_u16(su2
, MAXSWL
, s2
);
1905 if ((l2
<= 0) || (l1
== -1)) return 0;
1906 // lowering dictionary word
1907 if (opt
& NGRAM_LOWERING
) mkallsmall_utf(su2
, l2
, langnum
);
1908 for (int j
= 1; j
<= n
; j
++) {
1910 for (int i
= 0; i
<= (l1
-j
); i
++) {
1912 for (int l
= 0; l
<= (l2
-j
); l
++) {
1913 for (k
= 0; k
< j
; k
++) {
1914 w_char
* c1
= su1
+ i
+ k
;
1915 w_char
* c2
= su2
+ l
+ k
;
1916 if ((c1
->l
!= c2
->l
) || (c1
->h
!= c2
->h
)) break;
1923 if (k
!= j
&& opt
& NGRAM_WEIGHTED
) {
1926 if (i
== 0 || i
== l1
-j
) ns
--; // side weight
1929 nscore
= nscore
+ ns
;
1930 if (ns
< 2 && !(opt
& NGRAM_WEIGHTED
)) break;
1934 if (l2
== 0) return 0;
1936 char *t
= mystrdup(s2
);
1937 if (opt
& NGRAM_LOWERING
) mkallsmall(t
, csconv
);
1938 for (int j
= 1; j
<= n
; j
++) {
1940 for (int i
= 0; i
<= (l1
-j
); i
++) {
1941 char c
= *(s1
+ i
+ j
);
1942 *(s1
+ i
+ j
) = '\0';
1943 if (strstr(t
,(s1
+i
))) {
1945 } else if (opt
& NGRAM_WEIGHTED
) {
1948 if (i
== 0 || i
== l1
-j
) ns
--; // side weight
1952 nscore
= nscore
+ ns
;
1953 if (ns
< 2 && !(opt
& NGRAM_WEIGHTED
)) break;
1959 if (opt
& NGRAM_LONGER_WORSE
) ns
= (l2
-l1
)-2;
1960 if (opt
& NGRAM_ANY_MISMATCH
) ns
= abs(l2
-l1
)-2;
1961 ns
= (nscore
- ((ns
> 0) ? ns
: 0));
1965 // length of the left common substring of s1 and (decapitalised) s2
1966 int SuggestMgr::leftcommonsubstring(char * s1
, const char * s2
) {
1970 su1
[0].l
= su2
[0].l
= su1
[0].h
= su2
[0].h
= 0;
1971 // decapitalize dictionary word
1972 if (complexprefixes
) {
1973 int l1
= u8_u16(su1
, MAXSWL
, s1
);
1974 int l2
= u8_u16(su2
, MAXSWL
, s2
);
1975 if (*((short *)su1
+l1
-1) == *((short *)su2
+l2
-1)) return 1;
1980 unsigned short idx
= (su2
->h
<< 8) + su2
->l
;
1981 unsigned short otheridx
= (su1
->h
<< 8) + su1
->l
;
1982 if (otheridx
!= idx
&&
1983 (otheridx
!= unicodetolower(idx
, langnum
))) return 0;
1984 int l1
= u8_u16(su1
, MAXSWL
, s1
);
1985 int l2
= u8_u16(su2
, MAXSWL
, s2
);
1986 for(i
= 1; (i
< l1
) && (i
< l2
) &&
1987 (su1
[i
].l
== su2
[i
].l
) && (su1
[i
].h
== su2
[i
].h
); i
++);
1991 if (complexprefixes
) {
1992 int l1
= strlen(s1
);
1993 int l2
= strlen(s2
);
1994 if (*(s2
+l1
-1) == *(s2
+l2
-1)) return 1;
1997 // decapitalise dictionary word
1998 if ((*s1
!= *s2
) && (*s1
!= csconv
[((unsigned char)*s2
)].clower
)) return 0;
2001 } while ((*s1
== *s2
) && (*s1
!= '\0'));
2002 return (int)(s1
- olds
);
2008 int SuggestMgr::commoncharacterpositions(char * s1
, const char * s2
, int * is_swap
) {
2016 int l1
= u8_u16(su1
, MAXSWL
, s1
);
2017 int l2
= u8_u16(su2
, MAXSWL
, s2
);
2018 // decapitalize dictionary word
2019 if (complexprefixes
) {
2020 mkallsmall_utf(su2
+l2
-1, 1, langnum
);
2022 mkallsmall_utf(su2
, 1, langnum
);
2024 for (int i
= 0; (i
< l1
) && (i
< l2
); i
++) {
2025 if (((short *) su1
)[i
] == ((short *) su2
)[i
]) {
2028 if (diff
< 2) diffpos
[diff
] = i
;
2032 if ((diff
== 2) && (l1
== l2
) &&
2033 (((short *) su1
)[diffpos
[0]] == ((short *) su2
)[diffpos
[1]]) &&
2034 (((short *) su1
)[diffpos
[1]] == ((short *) su2
)[diffpos
[0]])) *is_swap
= 1;
2039 // decapitalize dictionary word
2040 if (complexprefixes
) {
2042 *(t
+l2
-1) = csconv
[((unsigned char)*(t
+l2
-1))].clower
;
2044 mkallsmall(t
, csconv
);
2046 for (i
= 0; (*(s1
+i
) != 0) && (*(t
+i
) != 0); i
++) {
2047 if (*(s1
+i
) == *(t
+i
)) {
2050 if (diff
< 2) diffpos
[diff
] = i
;
2054 if ((diff
== 2) && (*(s1
+i
) == 0) && (*(t
+i
) == 0) &&
2055 (*(s1
+diffpos
[0]) == *(t
+diffpos
[1])) &&
2056 (*(s1
+diffpos
[1]) == *(t
+diffpos
[0]))) *is_swap
= 1;
2061 int SuggestMgr::mystrlen(const char * word
) {
2064 return u8_u16(w
, MAXSWL
, word
);
2065 } else return strlen(word
);
2068 // sort in decreasing order of score
2069 void SuggestMgr::bubblesort(char** rword
, char** rword2
, int* rsc
, int n
)
2075 if (rsc
[j
-1] < rsc
[j
]) {
2076 int sctmp
= rsc
[j
-1];
2077 char * wdtmp
= rword
[j
-1];
2079 rword
[j
-1] = rword
[j
];
2083 wdtmp
= rword2
[j
-1];
2084 rword2
[j
-1] = rword2
[j
];
2095 // longest common subsequence
2096 void SuggestMgr::lcs(const char * s
, const char * s2
, int * l1
, int * l2
, char ** result
) {
2105 m
= u8_u16(su
, MAXSWL
, s
);
2106 n
= u8_u16(su2
, MAXSWL
, s2
);
2111 c
= (char *) calloc(m
+ 1, n
+ 1);
2112 b
= (char *) calloc(m
+ 1, n
+ 1);
2119 for (i
= 1; i
<= m
; i
++) {
2120 for (j
= 1; j
<= n
; j
++) {
2121 if ( ((utf8
) && (*((short *) su
+i
-1) == *((short *)su2
+j
-1)))
2122 || ((!utf8
) && ((*(s
+i
-1)) == (*(s2
+j
-1))))) {
2123 c
[i
*(n
+1) + j
] = c
[(i
-1)*(n
+1) + j
-1]+1;
2124 b
[i
*(n
+1) + j
] = LCS_UPLEFT
;
2125 } else if (c
[(i
-1)*(n
+1) + j
] >= c
[i
*(n
+1) + j
-1]) {
2126 c
[i
*(n
+1) + j
] = c
[(i
-1)*(n
+1) + j
];
2127 b
[i
*(n
+1) + j
] = LCS_UP
;
2129 c
[i
*(n
+1) + j
] = c
[i
*(n
+1) + j
-1];
2130 b
[i
*(n
+1) + j
] = LCS_LEFT
;
2140 int SuggestMgr::lcslen(const char * s
, const char* s2
) {
2147 lcs(s
, s2
, &m
, &n
, &result
);
2148 if (!result
) return 0;
2151 while ((i
!= 0) && (j
!= 0)) {
2152 if (result
[i
*(n
+1) + j
] == LCS_UPLEFT
) {
2156 } else if (result
[i
*(n
+1) + j
] == LCS_UP
) {