third_party/hunspell_new/src/hunspell/suggestmgr.cxx

   1 #include "license.hunspell"
   2 #include "license.myspell"
   3
   4 #include <stdlib.h>
   5 #include <string.h>
   6 #include <stdio.h>
   7 #include <ctype.h>
   8
   9 #include "suggestmgr.hxx"
  10 #include "htypes.hxx"
  11 #include "csutil.hxx"
  12
  13 const w_char W_VLINE = { '\0', '|' };
  14
  15 #ifdef HUNSPELL_CHROME_CLIENT
  16 namespace {
  17 // A simple class which creates temporary hentry objects which are available
  18 // only in a scope. To conceal memory operations from SuggestMgr functions,
  19 // this object automatically deletes all hentry objects created through
  20 // CreateScopedHashEntry() calls in its destructor. So, the following snippet
  21 // raises a memory error.
  22 //
  23 //   hentry* bad_copy = NULL;
  24 //   {
  25 //     ScopedHashEntryFactory factory;
  26 //     hentry* scoped_copy = factory.CreateScopedHashEntry(0, source);
  27 //     ...
  28 //     bad_copy = scoped_copy;
  29 //   }
  30 //   if (bad_copy->word[0])  // memory for scoped_copy has been deleted!
  31 //
  32 // As listed in the above snippet, it is simple to use this class.
  33 // 1. Declare an instance of this ScopedHashEntryFactory, and;
  34 // 2. Call its CreateHashEntry() member instead of using 'new hentry' or
  35 //    'operator='.
  36 //
  37 class ScopedHashEntryFactory {
  38  public:
  39   ScopedHashEntryFactory();
  40   ~ScopedHashEntryFactory();
  41
  42   // Creates a temporary copy of the given hentry struct.
  43   // The returned copy is available only while this object is available.
  44   // NOTE: this function just calls memcpy() in creating a copy of the given
  45   // hentry struct, i.e. it does NOT copy objects referred by pointers of the
  46   // given hentry struct.
  47   hentry* CreateScopedHashEntry(int index, const hentry* source);
  48
  49  private:
  50   // A struct which encapsulates the new hentry struct introduced in hunspell
  51   // 1.2.8. For a pointer to an hentry struct 'h', hunspell 1.2.8 stores a word
  52   // (including a NUL character) into 'h->word[0]',...,'h->word[h->blen]' even
  53   // though arraysize(h->word[]) is 1. Also, it changed 'astr' to a pointer so
  54   // it can store affix flags into 'h->astr[0]',...,'h->astr[alen-1]'. To handle
  55   // this new hentry struct, we define a struct which combines three values: an
  56   // hentry struct 'hentry'; a char array 'word[kMaxWordLen]', and; an unsigned
  57   // short array 'astr' so a hentry struct 'h' returned from
  58   // CreateScopedHashEntry() satisfies the following equations:
  59   //   hentry* h = factory.CreateScopedHashEntry(0, source);
  60   //   h->word[0] == ((HashEntryItem*)h)->entry.word[0].
  61   //   h->word[1] == ((HashEntryItem*)h)->word[0].
  62   //   ...
  63   //   h->word[h->blen] == ((HashEntryItem*)h)->word[h->blen-1].
  64   //   h->astr[0] == ((HashEntryItem*)h)->astr[0].
  65   //   h->astr[1] == ((HashEntryItem*)h)->astr[1].
  66   //   ...
  67   //   h->astr[h->alen-1] == ((HashEntryItem*)h)->astr[h->alen-1].
  68   enum {
  69     kMaxWordLen = 128,
  70     kMaxAffixLen = 8,
  71   };
  72   struct HashEntryItem {
  73     hentry entry;
  74     char word[kMaxWordLen];
  75     unsigned short astr[kMaxAffixLen];
  76   };
  77
  78   HashEntryItem hash_items_[MAX_ROOTS];
  79 };
  80
  81 ScopedHashEntryFactory::ScopedHashEntryFactory() {
  82   memset(&hash_items_[0], 0, sizeof(hash_items_));
  83 }
  84
  85 ScopedHashEntryFactory::~ScopedHashEntryFactory() {
  86 }
  87
  88 hentry* ScopedHashEntryFactory::CreateScopedHashEntry(int index,
  89                                                       const hentry* source) {
  90   if (index >= MAX_ROOTS || source->blen >= kMaxWordLen)
  91     return NULL;
  92
  93   // Retrieve a HashEntryItem struct from our spool, initialize it, and
  94   // returns the address of its 'hentry' member.
  95   size_t source_size = sizeof(hentry) + source->blen + 1;
  96   HashEntryItem* hash_item = &hash_items_[index];
  97   memcpy(&hash_item->entry, source, source_size);
  98   if (source->astr) {
  99     hash_item->entry.alen = source->alen;
 100     if (hash_item->entry.alen > kMaxAffixLen)
 101       hash_item->entry.alen = kMaxAffixLen;
 102     memcpy(hash_item->astr, source->astr, hash_item->entry.alen * sizeof(hash_item->astr[0]));
 103     hash_item->entry.astr = &hash_item->astr[0];
 104   }
 105   return &hash_item->entry;
 106 }
 107
 108 }  // namespace
 109 #endif
 110
 111
 112 #ifdef HUNSPELL_CHROME_CLIENT
 113 SuggestMgr::SuggestMgr(hunspell::BDictReader* reader,
 114                        const char * tryme, int maxn,
 115                        AffixMgr * aptr)
 116 {
 117   bdict_reader = reader;
 118 #else
 119 SuggestMgr::SuggestMgr(const char * tryme, int maxn,
 120                        AffixMgr * aptr)
 121 {
 122 #endif
 123
 124   // register affix manager and check in string of chars to
 125   // try when building candidate suggestions
 126   pAMgr = aptr;
 127
 128   csconv = NULL;
 129
 130   ckeyl = 0;
 131   ckey = NULL;
 132   ckey_utf = NULL;
 133
 134   ctryl = 0;
 135   ctry = NULL;
 136   ctry_utf = NULL;
 137
 138   utf8 = 0;
 139   langnum = 0;
 140   complexprefixes = 0;
 141
 142   maxSug = maxn;
 143   nosplitsugs = 0;
 144   maxngramsugs = MAXNGRAMSUGS;
 145   maxcpdsugs = MAXCOMPOUNDSUGS;
 146
 147   if (pAMgr) {
 148         langnum = pAMgr->get_langnum();
 149         ckey = pAMgr->get_key_string();
 150         nosplitsugs = pAMgr->get_nosplitsugs();
 151         if (pAMgr->get_maxngramsugs() >= 0)
 152             maxngramsugs = pAMgr->get_maxngramsugs();
 153         utf8 = pAMgr->get_utf8();
 154         if (pAMgr->get_maxcpdsugs() >= 0)
 155             maxcpdsugs = pAMgr->get_maxcpdsugs();
 156         if (!utf8)
 157         {
 158             char * enc = pAMgr->get_encoding();
 159             csconv = get_current_cs(enc);
 160             free(enc);
 161         }
 162         complexprefixes = pAMgr->get_complexprefixes();
 163   }
 164
 165   if (ckey) {
 166     if (utf8) {
 167         w_char t[MAXSWL];
 168         ckeyl = u8_u16(t, MAXSWL, ckey);
 169         ckey_utf = (w_char *) malloc(ckeyl * sizeof(w_char));
 170         if (ckey_utf) memcpy(ckey_utf, t, ckeyl * sizeof(w_char));
 171         else ckeyl = 0;
 172     } else {
 173         ckeyl = strlen(ckey);
 174     }
 175   }
 176
 177   if (tryme) {
 178     ctry = mystrdup(tryme);
 179     if (ctry) ctryl = strlen(ctry);
 180     if (ctry && utf8) {
 181         w_char t[MAXSWL];
 182         ctryl = u8_u16(t, MAXSWL, tryme);
 183         ctry_utf = (w_char *) malloc(ctryl * sizeof(w_char));
 184         if (ctry_utf) memcpy(ctry_utf, t, ctryl * sizeof(w_char));
 185         else ctryl = 0;
 186     }
 187   }
 188 }
 189
 190
 191 SuggestMgr::~SuggestMgr()
 192 {
 193   pAMgr = NULL;
 194   if (ckey) free(ckey);
 195   ckey = NULL;
 196   if (ckey_utf) free(ckey_utf);
 197   ckey_utf = NULL;
 198   ckeyl = 0;
 199   if (ctry) free(ctry);
 200   ctry = NULL;
 201   if (ctry_utf) free(ctry_utf);
 202   ctry_utf = NULL;
 203   ctryl = 0;
 204   maxSug = 0;
 205 #ifdef MOZILLA_CLIENT
 206   delete [] csconv;
 207 #endif
 208 }
 209
 210 int SuggestMgr::testsug(char** wlst, const char * candidate, int wl, int ns, int cpdsuggest,
 211    int * timer, clock_t * timelimit) {
 212       int cwrd = 1;
 213       if (ns == maxSug) return maxSug;
 214       for (int k=0; k < ns; k++) {
 215         if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
 216       }
 217       if ((cwrd) && checkword(candidate, wl, cpdsuggest, timer, timelimit)) {
 218         wlst[ns] = mystrdup(candidate);
 219         if (wlst[ns] == NULL) {
 220             for (int j=0; j<ns; j++) free(wlst[j]);
 221             return -1;
 222         }
 223         ns++;
 224       }
 225       return ns;
 226 }
 227
 228 // generate suggestions for a misspelled word
 229 //    pass in address of array of char * pointers
 230 // onlycompoundsug: probably bad suggestions (need for ngram sugs, too)
 231
 232 int SuggestMgr::suggest(char*** slst, const char * w, int nsug,
 233     int * onlycompoundsug)
 234 {
 235   int nocompoundtwowords = 0;
 236   char ** wlst;
 237   w_char word_utf[MAXSWL];
 238   int wl = 0;
 239   int nsugorig = nsug;
 240   char w2[MAXWORDUTF8LEN];
 241   const char * word = w;
 242   int oldSug = 0;
 243
 244   // word reversing wrapper for complex prefixes
 245   if (complexprefixes) {
 246     strcpy(w2, w);
 247     if (utf8) reverseword_utf(w2); else reverseword(w2);
 248     word = w2;
 249   }
 250
 251     if (*slst) {
 252         wlst = *slst;
 253     } else {
 254         wlst = (char **) malloc(maxSug * sizeof(char *));
 255         if (wlst == NULL) return -1;
 256         for (int i = 0; i < maxSug; i++) {
 257             wlst[i] = NULL;
 258         }
 259     }
 260
 261     if (utf8) {
 262         wl = u8_u16(word_utf, MAXSWL, word);
 263         if (wl == -1) {
 264                 *slst = wlst;
 265                  return nsug;
 266         }
 267     }
 268
 269     for (int cpdsuggest=0; (cpdsuggest<2) && (nocompoundtwowords==0); cpdsuggest++) {
 270
 271     // limit compound suggestion
 272     if (cpdsuggest > 0) oldSug = nsug;
 273
 274     // suggestions for an uppercase word (html -> HTML)
 275     if ((nsug < maxSug) && (nsug > -1)) {
 276         nsug = (utf8) ? capchars_utf(wlst, word_utf, wl, nsug, cpdsuggest) :
 277                     capchars(wlst, word, nsug, cpdsuggest);
 278     }
 279
 280     // perhaps we made a typical fault of spelling
 281     if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) {
 282       nsug = replchars(wlst, word, nsug, cpdsuggest);
 283     }
 284
 285     // perhaps we made chose the wrong char from a related set
 286     if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) {
 287       nsug = mapchars(wlst, word, nsug, cpdsuggest);
 288     }
 289
 290     // only suggest compound words when no other suggestion
 291     if ((cpdsuggest == 0) && (nsug > nsugorig)) nocompoundtwowords=1;
 292
 293     // did we swap the order of chars by mistake
 294     if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) {
 295         nsug = (utf8) ? swapchar_utf(wlst, word_utf, wl, nsug, cpdsuggest) :
 296                     swapchar(wlst, word, nsug, cpdsuggest);
 297     }
 298
 299     // did we swap the order of non adjacent chars by mistake
 300     if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) {
 301         nsug = (utf8) ? longswapchar_utf(wlst, word_utf, wl, nsug, cpdsuggest) :
 302                     longswapchar(wlst, word, nsug, cpdsuggest);
 303     }
 304
 305     // did we just hit the wrong key in place of a good char (case and keyboard)
 306     if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) {
 307         nsug = (utf8) ? badcharkey_utf(wlst, word_utf, wl, nsug, cpdsuggest) :
 308                     badcharkey(wlst, word, nsug, cpdsuggest);
 309     }
 310
 311     // did we add a char that should not be there
 312     if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) {
 313         nsug = (utf8) ? extrachar_utf(wlst, word_utf, wl, nsug, cpdsuggest) :
 314                     extrachar(wlst, word, nsug, cpdsuggest);
 315     }
 316
 317
 318     // did we forgot a char
 319     if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) {
 320         nsug = (utf8) ? forgotchar_utf(wlst, word_utf, wl, nsug, cpdsuggest) :
 321                     forgotchar(wlst, word, nsug, cpdsuggest);
 322     }
 323
 324     // did we move a char
 325     if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) {
 326         nsug = (utf8) ? movechar_utf(wlst, word_utf, wl, nsug, cpdsuggest) :
 327                     movechar(wlst, word, nsug, cpdsuggest);
 328     }
 329
 330     // did we just hit the wrong key in place of a good char
 331     if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) {
 332         nsug = (utf8) ? badchar_utf(wlst, word_utf, wl, nsug, cpdsuggest) :
 333                     badchar(wlst, word, nsug, cpdsuggest);
 334     }
 335
 336     // did we double two characters
 337     if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) {
 338         nsug = (utf8) ? doubletwochars_utf(wlst, word_utf, wl, nsug, cpdsuggest) :
 339                     doubletwochars(wlst, word, nsug, cpdsuggest);
 340     }
 341
 342     // perhaps we forgot to hit space and two words ran together
 343     if (!nosplitsugs && (nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) {
 344         nsug = twowords(wlst, word, nsug, cpdsuggest);
 345     }
 346
 347     } // repeating ``for'' statement compounding support
 348
 349     if (nsug < 0) {
 350      // we ran out of memory - we should free up as much as possible
 351        for (int i = 0; i < maxSug; i++)
 352          if (wlst[i] != NULL) free(wlst[i]);
 353        free(wlst);
 354        wlst = NULL;
 355     }
 356
 357     if (!nocompoundtwowords && (nsug > 0) && onlycompoundsug) *onlycompoundsug = 1;
 358
 359     *slst = wlst;
 360     return nsug;
 361 }
 362
 363 // generate suggestions for a word with typical mistake
 364 //    pass in address of array of char * pointers
 365 #ifdef HUNSPELL_EXPERIMENTAL
 366 int SuggestMgr::suggest_auto(char*** slst, const char * w, int nsug)
 367 {
 368     int nocompoundtwowords = 0;
 369     char ** wlst;
 370     int oldSug;
 371
 372   char w2[MAXWORDUTF8LEN];
 373   const char * word = w;
 374
 375   // word reversing wrapper for complex prefixes
 376   if (complexprefixes) {
 377     strcpy(w2, w);
 378     if (utf8) reverseword_utf(w2); else reverseword(w2);
 379     word = w2;
 380   }
 381
 382     if (*slst) {
 383         wlst = *slst;
 384     } else {
 385         wlst = (char **) malloc(maxSug * sizeof(char *));
 386         if (wlst == NULL) return -1;
 387     }
 388
 389     for (int cpdsuggest=0; (cpdsuggest<2) && (nocompoundtwowords==0); cpdsuggest++) {
 390
 391     // limit compound suggestion
 392     if (cpdsuggest > 0) oldSug = nsug;
 393
 394     // perhaps we made a typical fault of spelling
 395     if ((nsug < maxSug) && (nsug > -1))
 396     nsug = replchars(wlst, word, nsug, cpdsuggest);
 397
 398     // perhaps we made chose the wrong char from a related set
 399     if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs)))
 400       nsug = mapchars(wlst, word, nsug, cpdsuggest);
 401
 402     if ((cpdsuggest==0) && (nsug>0)) nocompoundtwowords=1;
 403
 404     // perhaps we forgot to hit space and two words ran together
 405
 406     if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs)) && check_forbidden(word, strlen(word))) {
 407                 nsug = twowords(wlst, word, nsug, cpdsuggest);
 408         }
 409
 410     } // repeating ``for'' statement compounding support
 411
 412     if (nsug < 0) {
 413        for (int i=0;i<maxSug; i++)
 414          if (wlst[i] != NULL) free(wlst[i]);
 415        free(wlst);
 416        return -1;
 417     }
 418
 419     *slst = wlst;
 420     return nsug;
 421 }
 422 #endif // END OF HUNSPELL_EXPERIMENTAL CODE
 423
 424 // suggestions for an uppercase word (html -> HTML)
 425 int SuggestMgr::capchars_utf(char ** wlst, const w_char * word, int wl, int ns, int cpdsuggest)
 426 {
 427   char candidate[MAXSWUTF8L];
 428   w_char candidate_utf[MAXSWL];
 429   memcpy(candidate_utf, word, wl * sizeof(w_char));
 430   mkallcap_utf(candidate_utf, wl, langnum);
 431   u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl);
 432   return testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, NULL);
 433 }
 434
 435 // suggestions for an uppercase word (html -> HTML)
 436 int SuggestMgr::capchars(char** wlst, const char * word, int ns, int cpdsuggest)
 437 {
 438   char candidate[MAXSWUTF8L];
 439   strcpy(candidate, word);
 440   mkallcap(candidate, csconv);
 441   return testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, NULL);
 442 }
 443
 444 // suggestions for when chose the wrong char out of a related set
 445 int SuggestMgr::mapchars(char** wlst, const char * word, int ns, int cpdsuggest)
 446 {
 447   char candidate[MAXSWUTF8L];
 448   clock_t timelimit;
 449   int timer;
 450   candidate[0] = '\0';
 451
 452   int wl = strlen(word);
 453   if (wl < 2 || ! pAMgr) return ns;
 454
 455   int nummap = pAMgr->get_nummap();
 456   struct mapentry* maptable = pAMgr->get_maptable();
 457   if (maptable==NULL) return ns;
 458
 459   timelimit = clock();
 460   timer = MINTIMER;
 461   return map_related(word, (char *) &candidate, 0, 0, wlst, cpdsuggest, ns, maptable, nummap, &timer, &timelimit);
 462 }
 463
 464 int SuggestMgr::map_related(const char * word, char * candidate, int wn, int cn,
 465     char** wlst, int cpdsuggest,  int ns,
 466     const mapentry* maptable, int nummap, int * timer, clock_t * timelimit)
 467 {
 468   if (*(word + wn) == '\0') {
 469       int cwrd = 1;
 470       *(candidate + cn) = '\0';
 471       int wl = strlen(candidate);
 472       for (int m=0; m < ns; m++)
 473           if (strcmp(candidate, wlst[m]) == 0) cwrd = 0;
 474       if ((cwrd) && checkword(candidate, wl, cpdsuggest, timer, timelimit)) {
 475           if (ns < maxSug) {
 476               wlst[ns] = mystrdup(candidate);
 477               if (wlst[ns] == NULL) return -1;
 478               ns++;
 479           }
 480       }
 481       return ns;
 482   }
 483   int in_map = 0;
 484   for (int j = 0; j < nummap; j++) {
 485     for (int k = 0; k < maptable[j].len; k++) {
 486       int len = strlen(maptable[j].set[k]);
 487       if (strncmp(maptable[j].set[k], word + wn, len) == 0) {
 488         in_map = 1;
 489         for (int l = 0; l < maptable[j].len; l++) {
 490           strcpy(candidate + cn, maptable[j].set[l]);
 491           ns = map_related(word, candidate, wn + len, strlen(candidate), wlst,
 492                 cpdsuggest, ns, maptable, nummap, timer, timelimit);
 493           if (!(*timer)) return ns;
 494         }
 495       }
 496     }
 497   }
 498   if (!in_map) {
 499      *(candidate + cn) = *(word + wn);
 500      ns = map_related(word, candidate, wn + 1, cn + 1, wlst, cpdsuggest,
 501         ns, maptable, nummap, timer, timelimit);
 502   }
 503   return ns;
 504 }
 505
 506 // suggestions for a typical fault of spelling, that
 507 // differs with more, than 1 letter from the right form.
 508 int SuggestMgr::replchars(char** wlst, const char * word, int ns, int cpdsuggest)
 509 {
 510   char candidate[MAXSWUTF8L];
 511   const char * r;
 512   int lenr, lenp;
 513   int wl = strlen(word);
 514   if (wl < 2 || ! pAMgr) return ns;
 515
 516 #ifdef HUNSPELL_CHROME_CLIENT
 517   const char *pattern, *pattern2;
 518   hunspell::ReplacementIterator iterator = bdict_reader->GetReplacementIterator();
 519   while (iterator.GetNext(&pattern, &pattern2)) {
 520       r = word;
 521       lenr = strlen(pattern2);
 522       lenp = strlen(pattern);
 523
 524       // search every occurence of the pattern in the word
 525       while ((r=strstr(r, pattern)) != NULL) {
 526           strcpy(candidate, word);
 527           if (r-word + lenr + strlen(r+lenp) >= MAXLNLEN) break;
 528           strcpy(candidate+(r-word), pattern2);
 529           strcpy(candidate+(r-word)+lenr, r+lenp);
 530           ns = testsug(wlst, candidate, wl-lenp+lenr, ns, cpdsuggest, NULL, NULL);
 531           if (ns == -1) return -1;
 532           // check REP suggestions with space
 533           char * sp = strchr(candidate, ' ');
 534           if (sp) {
 535             char * prev = candidate;
 536             while (sp) {
 537               *sp = '\0';
 538               if (checkword(prev, strlen(prev), 0, NULL, NULL)) {
 539                 int oldns = ns;
 540                 *sp = ' ';
 541                 ns = testsug(wlst, sp + 1, strlen(sp + 1), ns, cpdsuggest, NULL, NULL);
 542                 if (ns == -1) return -1;
 543                 if (oldns < ns) {
 544                   free(wlst[ns - 1]);
 545                   wlst[ns - 1] = mystrdup(candidate);
 546                   if (!wlst[ns - 1]) return -1;
 547                 }
 548               }
 549               *sp = ' ';
 550               prev = sp + 1;
 551               sp = strchr(prev, ' ');
 552             }
 553           }
 554           r++; // search for the next letter
 555     }
 556   }
 557 #else
 558   int numrep = pAMgr->get_numrep();
 559   struct replentry* reptable = pAMgr->get_reptable();
 560   if (reptable==NULL) return ns;
 561   for (int i=0; i < numrep; i++ ) {
 562       r = word;
 563       lenr = strlen(reptable[i].pattern2);
 564       lenp = strlen(reptable[i].pattern);
 565       // search every occurence of the pattern in the word
 566       while ((r=strstr(r, reptable[i].pattern)) != NULL && (!reptable[i].end || strlen(r) == strlen(reptable[i].pattern)) &&
 567         (!reptable[i].start || r == word)) {
 568           strcpy(candidate, word);
 569           if (r-word + lenr + strlen(r+lenp) >= MAXSWUTF8L) break;
 570           strcpy(candidate+(r-word),reptable[i].pattern2);
 571           strcpy(candidate+(r-word)+lenr, r+lenp);
 572           ns = testsug(wlst, candidate, wl-lenp+lenr, ns, cpdsuggest, NULL, NULL);
 573           if (ns == -1) return -1;
 574           // check REP suggestions with space
 575           char * sp = strchr(candidate, ' ');
 576           if (sp) {
 577             char * prev = candidate;
 578             while (sp) {
 579               *sp = '\0';
 580               if (checkword(prev, strlen(prev), 0, NULL, NULL)) {
 581                 int oldns = ns;
 582                 *sp = ' ';
 583                 ns = testsug(wlst, sp + 1, strlen(sp + 1), ns, cpdsuggest, NULL, NULL);
 584                 if (ns == -1) return -1;
 585                 if (oldns < ns) {
 586                   free(wlst[ns - 1]);
 587                   wlst[ns - 1] = mystrdup(candidate);
 588                   if (!wlst[ns - 1]) return -1;
 589                 }
 590               }
 591               *sp = ' ';
 592               prev = sp + 1;
 593               sp = strchr(prev, ' ');
 594             }
 595           }
 596           r++; // search for the next letter
 597       }
 598    }
 599 #endif
 600    return ns;
 601 }
 602
 603 // perhaps we doubled two characters (pattern aba -> ababa, for example vacation -> vacacation)
 604 int SuggestMgr::doubletwochars(char** wlst, const char * word, int ns, int cpdsuggest)
 605 {
 606   char candidate[MAXSWUTF8L];
 607   int state=0;
 608   int wl = strlen(word);
 609   if (wl < 5 || ! pAMgr) return ns;
 610   for (int i=2; i < wl; i++ ) {
 611       if (word[i]==word[i-2]) {
 612           state++;
 613           if (state==3) {
 614             strcpy(candidate,word);
 615             strcpy(candidate+i-1,word+i+1);
 616             ns = testsug(wlst, candidate, wl-2, ns, cpdsuggest, NULL, NULL);
 617             if (ns == -1) return -1;
 618             state=0;
 619           }
 620       } else {
 621             state=0;
 622       }
 623   }
 624   return ns;
 625 }
 626
 627 // perhaps we doubled two characters (pattern aba -> ababa, for example vacation -> vacacation)
 628 int SuggestMgr::doubletwochars_utf(char ** wlst, const w_char * word, int wl, int ns, int cpdsuggest)
 629 {
 630   w_char        candidate_utf[MAXSWL];
 631   char          candidate[MAXSWUTF8L];
 632   int state=0;
 633   if (wl < 5 || ! pAMgr) return ns;
 634   for (int i=2; i < wl; i++) {
 635       if (w_char_eq(word[i], word[i-2]))  {
 636           state++;
 637           if (state==3) {
 638             memcpy(candidate_utf, word, (i - 1) * sizeof(w_char));
 639             memcpy(candidate_utf+i-1, word+i+1, (wl-i-1) * sizeof(w_char));
 640             u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl-2);
 641             ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, NULL);
 642             if (ns == -1) return -1;
 643             state=0;
 644           }
 645       } else {
 646             state=0;
 647       }
 648   }
 649   return ns;
 650 }
 651
 652 // error is wrong char in place of correct one (case and keyboard related version)
 653 int SuggestMgr::badcharkey(char ** wlst, const char * word, int ns, int cpdsuggest)
 654 {
 655   char  tmpc;
 656   char  candidate[MAXSWUTF8L];
 657   int wl = strlen(word);
 658   strcpy(candidate, word);
 659   // swap out each char one by one and try uppercase and neighbor
 660   // keyboard chars in its place to see if that makes a good word
 661
 662   for (int i=0; i < wl; i++) {
 663     tmpc = candidate[i];
 664     // check with uppercase letters
 665     candidate[i] = csconv[((unsigned char)tmpc)].cupper;
 666     if (tmpc != candidate[i]) {
 667        ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL);
 668        if (ns == -1) return -1;
 669        candidate[i] = tmpc;
 670     }
 671     // check neighbor characters in keyboard string
 672     if (!ckey) continue;
 673     char * loc = strchr(ckey, tmpc);
 674     while (loc) {
 675        if ((loc > ckey) && (*(loc - 1) != '|')) {
 676           candidate[i] = *(loc - 1);
 677           ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL);
 678           if (ns == -1) return -1;
 679        }
 680        if ((*(loc + 1) != '|') && (*(loc + 1) != '\0')) {
 681           candidate[i] = *(loc + 1);
 682           ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL);
 683           if (ns == -1) return -1;
 684        }
 685        loc = strchr(loc + 1, tmpc);
 686     }
 687     candidate[i] = tmpc;
 688   }
 689   return ns;
 690 }
 691
 692 // error is wrong char in place of correct one (case and keyboard related version)
 693 int SuggestMgr::badcharkey_utf(char ** wlst, const w_char * word, int wl, int ns, int cpdsuggest)
 694 {
 695   w_char        tmpc;
 696   w_char        candidate_utf[MAXSWL];
 697   char          candidate[MAXSWUTF8L];
 698   memcpy(candidate_utf, word, wl * sizeof(w_char));
 699   // swap out each char one by one and try all the tryme
 700   // chars in its place to see if that makes a good word
 701   for (int i=0; i < wl; i++) {
 702     tmpc = candidate_utf[i];
 703     // check with uppercase letters
 704     mkallcap_utf(candidate_utf + i, 1, langnum);
 705     if (!w_char_eq(tmpc, candidate_utf[i])) {
 706        u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl);
 707        ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, NULL);
 708        if (ns == -1) return -1;
 709        candidate_utf[i] = tmpc;
 710     }
 711     // check neighbor characters in keyboard string
 712     if (!ckey) continue;
 713     w_char * loc = ckey_utf;
 714     while ((loc < (ckey_utf + ckeyl)) && !w_char_eq(*loc, tmpc)) loc++;
 715     while (loc < (ckey_utf + ckeyl)) {
 716        if ((loc > ckey_utf) && !w_char_eq(*(loc - 1), W_VLINE)) {
 717           candidate_utf[i] = *(loc - 1);
 718           u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl);
 719           ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, NULL);
 720           if (ns == -1) return -1;
 721        }
 722        if (((loc + 1) < (ckey_utf + ckeyl)) && !w_char_eq(*(loc + 1), W_VLINE)) {
 723           candidate_utf[i] = *(loc + 1);
 724           u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl);
 725           ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, NULL);
 726           if (ns == -1) return -1;
 727        }
 728        do { loc++; } while ((loc < (ckey_utf + ckeyl)) && !w_char_eq(*loc, tmpc));
 729     }
 730     candidate_utf[i] = tmpc;
 731   }
 732   return ns;
 733 }
 734
 735 // error is wrong char in place of correct one
 736 int SuggestMgr::badchar(char ** wlst, const char * word, int ns, int cpdsuggest)
 737 {
 738   char  tmpc;
 739   char  candidate[MAXSWUTF8L];
 740   clock_t timelimit = clock();
 741   int timer = MINTIMER;
 742   int wl = strlen(word);
 743   strcpy(candidate, word);
 744   // swap out each char one by one and try all the tryme
 745   // chars in its place to see if that makes a good word
 746   for (int j=0; j < ctryl; j++) {
 747     for (int i=wl-1; i >= 0; i--) {
 748        tmpc = candidate[i];
 749        if (ctry[j] == tmpc) continue;
 750        candidate[i] = ctry[j];
 751        ns = testsug(wlst, candidate, wl, ns, cpdsuggest, &timer, &timelimit);
 752        if (ns == -1) return -1;
 753        if (!timer) return ns;
 754        candidate[i] = tmpc;
 755     }
 756   }
 757   return ns;
 758 }
 759
 760 // error is wrong char in place of correct one
 761 int SuggestMgr::badchar_utf(char ** wlst, const w_char * word, int wl, int ns, int cpdsuggest)
 762 {
 763   w_char        tmpc;
 764   w_char        candidate_utf[MAXSWL];
 765   char          candidate[MAXSWUTF8L];
 766   clock_t timelimit = clock();
 767   int timer = MINTIMER;
 768   memcpy(candidate_utf, word, wl * sizeof(w_char));
 769   // swap out each char one by one and try all the tryme
 770   // chars in its place to see if that makes a good word
 771   for (int j=0; j < ctryl; j++) {
 772     for (int i=wl-1; i >= 0; i--) {
 773        tmpc = candidate_utf[i];
 774        if (w_char_eq(tmpc, ctry_utf[j])) continue;
 775        candidate_utf[i] = ctry_utf[j];
 776        u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl);
 777        ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, &timer, &timelimit);
 778        if (ns == -1) return -1;
 779        if (!timer) return ns;
 780        candidate_utf[i] = tmpc;
 781     }
 782   }
 783   return ns;
 784 }
 785
 786 // error is word has an extra letter it does not need
 787 int SuggestMgr::extrachar_utf(char** wlst, const w_char * word, int wl, int ns, int cpdsuggest)
 788 {
 789    char   candidate[MAXSWUTF8L];
 790    w_char candidate_utf[MAXSWL];
 791    w_char * p;
 792    w_char tmpc = W_VLINE; // not used value, only for VCC warning message
 793    if (wl < 2) return ns;
 794    // try omitting one char of word at a time
 795    memcpy(candidate_utf, word, wl * sizeof(w_char));
 796    for (p = candidate_utf + wl - 1;  p >= candidate_utf; p--) {
 797        w_char tmpc2 = *p;
 798        if (p < candidate_utf + wl - 1) *p = tmpc;
 799        u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl - 1);
 800        ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, NULL);
 801        if (ns == -1) return -1;
 802        tmpc = tmpc2;
 803    }
 804    return ns;
 805 }
 806
 807 // error is word has an extra letter it does not need
 808 int SuggestMgr::extrachar(char** wlst, const char * word, int ns, int cpdsuggest)
 809 {
 810    char    tmpc = '\0';
 811    char    candidate[MAXSWUTF8L];
 812    char *  p;
 813    int wl = strlen(word);
 814    if (wl < 2) return ns;
 815    // try omitting one char of word at a time
 816    strcpy (candidate, word);
 817    for (p = candidate + wl - 1; p >=candidate; p--) {
 818       char tmpc2 = *p;
 819       *p = tmpc;
 820       ns = testsug(wlst, candidate, wl-1, ns, cpdsuggest, NULL, NULL);
 821       if (ns == -1) return -1;
 822       tmpc = tmpc2;
 823    }
 824    return ns;
 825 }
 826
 827 // error is missing a letter it needs
 828 int SuggestMgr::forgotchar(char ** wlst, const char * word, int ns, int cpdsuggest)
 829 {
 830    // TODO(rouslan): Remove the interim change below when this patch lands:
 831    // http://sf.net/tracker/?func=detail&aid=3595024&group_id=143754&atid=756395
 832    char candidate[MAXSWUTF8L + 4];
 833    char * p;
 834    clock_t timelimit = clock();
 835    int timer = MINTIMER;
 836    int wl = strlen(word);
 837    // try inserting a tryme character before every letter (and the null terminator)
 838    for (int i = 0;  i < ctryl;  i++) {
 839       strcpy(candidate, word);
 840       for (p = candidate + wl;  p >= candidate; p--)  {
 841          *(p+1) = *p;
 842          *p = ctry[i];
 843          ns = testsug(wlst, candidate, wl+1, ns, cpdsuggest, &timer, &timelimit);
 844          if (ns == -1) return -1;
 845          if (!timer) return ns;
 846       }
 847    }
 848    return ns;
 849 }
 850
 851 // error is missing a letter it needs
 852 int SuggestMgr::forgotchar_utf(char ** wlst, const w_char * word, int wl, int ns, int cpdsuggest)
 853 {
 854    // TODO(rouslan): Remove the interim change below when this patch lands:
 855    // http://sf.net/tracker/?func=detail&aid=3595024&group_id=143754&atid=756395
 856    w_char  candidate_utf[MAXSWL + 1];
 857    char    candidate[MAXSWUTF8L + 4];
 858    w_char * p;
 859    clock_t timelimit = clock();
 860    int timer = MINTIMER;
 861    // try inserting a tryme character at the end of the word and before every letter
 862    for (int i = 0;  i < ctryl;  i++) {
 863       memcpy (candidate_utf, word, wl * sizeof(w_char));
 864       for (p = candidate_utf + wl;  p >= candidate_utf; p--)  {
 865          *(p + 1) = *p;
 866          *p = ctry_utf[i];
 867          u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl + 1);
 868          ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, &timer, &timelimit);
 869          if (ns == -1) return -1;
 870          if (!timer) return ns;
 871       }
 872    }
 873    return ns;
 874 }
 875
 876
 877 /* error is should have been two words */
 878 int SuggestMgr::twowords(char ** wlst, const char * word, int ns, int cpdsuggest)
 879 {
 880     char candidate[MAXSWUTF8L];
 881     char * p;
 882     int c1, c2;
 883     int forbidden = 0;
 884     int cwrd;
 885
 886     int wl=strlen(word);
 887     if (wl < 3) return ns;
 888
 889     if (langnum == LANG_hu) forbidden = check_forbidden(word, wl);
 890
 891     strcpy(candidate + 1, word);
 892     // split the string into two pieces after every char
 893     // if both pieces are good words make them a suggestion
 894     for (p = candidate + 1;  p[1] != '\0';  p++) {
 895        p[-1] = *p;
 896        // go to end of the UTF-8 character
 897        while (utf8 && ((p[1] & 0xc0) == 0x80)) {
 898          *p = p[1];
 899          p++;
 900        }
 901        if (utf8 && p[1] == '\0') break; // last UTF-8 character
 902        *p = '\0';
 903        c1 = checkword(candidate,strlen(candidate), cpdsuggest, NULL, NULL);
 904        if (c1) {
 905          c2 = checkword((p+1),strlen(p+1), cpdsuggest, NULL, NULL);
 906          if (c2) {
 907             *p = ' ';
 908
 909             // spec. Hungarian code (need a better compound word support)
 910             if ((langnum == LANG_hu) && !forbidden &&
 911                 // if 3 repeating letter, use - instead of space
 912                 (((p[-1] == p[1]) && (((p>candidate+1) && (p[-1] == p[-2])) || (p[-1] == p[2]))) ||
 913                 // or multiple compounding, with more, than 6 syllables
 914                 ((c1 == 3) && (c2 >= 2)))) *p = '-';
 915
 916             cwrd = 1;
 917             for (int k=0; k < ns; k++)
 918                 if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
 919             if (ns < maxSug) {
 920                 if (cwrd) {
 921                     wlst[ns] = mystrdup(candidate);
 922                     if (wlst[ns] == NULL) return -1;
 923                     ns++;
 924                 }
 925             } else return ns;
 926             // add two word suggestion with dash, if TRY string contains
 927             // "a" or "-"
 928             // NOTE: cwrd doesn't modified for REP twoword sugg.
 929             if (ctry && (strchr(ctry, 'a') || strchr(ctry, '-')) &&
 930                 mystrlen(p + 1) > 1 &&
 931                 mystrlen(candidate) - mystrlen(p) > 1) {
 932                 *p = '-';
 933                 for (int k=0; k < ns; k++)
 934                     if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
 935                 if (ns < maxSug) {
 936                     if (cwrd) {
 937                         wlst[ns] = mystrdup(candidate);
 938                         if (wlst[ns] == NULL) return -1;
 939                         ns++;
 940                     }
 941                 } else return ns;
 942             }
 943          }
 944        }
 945     }
 946     return ns;
 947 }
 948
 949
 950 // error is adjacent letter were swapped
 951 int SuggestMgr::swapchar(char ** wlst, const char * word, int ns, int cpdsuggest)
 952 {
 953    char candidate[MAXSWUTF8L];
 954    char * p;
 955    char tmpc;
 956    int wl=strlen(word);
 957    // try swapping adjacent chars one by one
 958    strcpy(candidate, word);
 959    for (p = candidate;  p[1] != 0;  p++) {
 960       tmpc = *p;
 961       *p = p[1];
 962       p[1] = tmpc;
 963       ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL);
 964       if (ns == -1) return -1;
 965       p[1] = *p;
 966       *p = tmpc;
 967    }
 968    // try double swaps for short words
 969    // ahev -> have, owudl -> would
 970    if (wl == 4 || wl == 5) {
 971      candidate[0] = word[1];
 972      candidate[1] = word[0];
 973      candidate[2] = word[2];
 974      candidate[wl - 2] = word[wl - 1];
 975      candidate[wl - 1] = word[wl - 2];
 976      ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL);
 977      if (ns == -1) return -1;
 978      if (wl == 5) {
 979         candidate[0] = word[0];
 980         candidate[1] = word[2];
 981         candidate[2] = word[1];
 982         ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL);
 983         if (ns == -1) return -1;
 984      }
 985    }
 986    return ns;
 987 }
 988
 989 // error is adjacent letter were swapped
 990 int SuggestMgr::swapchar_utf(char ** wlst, const w_char * word, int wl, int ns, int cpdsuggest)
 991 {
 992    w_char candidate_utf[MAXSWL];
 993    char   candidate[MAXSWUTF8L];
 994    w_char * p;
 995    w_char tmpc;
 996    int len = 0;
 997    // try swapping adjacent chars one by one
 998    memcpy (candidate_utf, word, wl * sizeof(w_char));
 999    for (p = candidate_utf;  p < (candidate_utf + wl - 1);  p++) {
1000       tmpc = *p;
1001       *p = p[1];
1002       p[1] = tmpc;
1003       u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl);
1004       if (len == 0) len = strlen(candidate);
1005       ns = testsug(wlst, candidate, len, ns, cpdsuggest, NULL, NULL);
1006       if (ns == -1) return -1;
1007       p[1] = *p;
1008       *p = tmpc;
1009    }
1010    // try double swaps for short words
1011    // ahev -> have, owudl -> would, suodn -> sound
1012    if (wl == 4 || wl == 5) {
1013      candidate_utf[0] = word[1];
1014      candidate_utf[1] = word[0];
1015      candidate_utf[2] = word[2];
1016      candidate_utf[wl - 2] = word[wl - 1];
1017      candidate_utf[wl - 1] = word[wl - 2];
1018      u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl);
1019      ns = testsug(wlst, candidate, len, ns, cpdsuggest, NULL, NULL);
1020      if (ns == -1) return -1;
1021      if (wl == 5) {
1022         candidate_utf[0] = word[0];
1023         candidate_utf[1] = word[2];
1024         candidate_utf[2] = word[1];
1025         u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl);
1026         ns = testsug(wlst, candidate, len, ns, cpdsuggest, NULL, NULL);
1027         if (ns == -1) return -1;
1028      }
1029    }
1030    return ns;
1031 }
1032
1033 // error is not adjacent letter were swapped
1034 int SuggestMgr::longswapchar(char ** wlst, const char * word, int ns, int cpdsuggest)
1035 {
1036    char candidate[MAXSWUTF8L];
1037    char * p;
1038    char * q;
1039    char tmpc;
1040    int wl=strlen(word);
1041    // try swapping not adjacent chars one by one
1042    strcpy(candidate, word);
1043    for (p = candidate;  *p != 0;  p++) {
1044     for (q = candidate;  *q != 0;  q++) {
1045      if (abs((int)(p-q)) > 1) {
1046       tmpc = *p;
1047       *p = *q;
1048       *q = tmpc;
1049       ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL);
1050       if (ns == -1) return -1;
1051       *q = *p;
1052       *p = tmpc;
1053      }
1054     }
1055    }
1056    return ns;
1057 }
1058
1059
1060 // error is adjacent letter were swapped
1061 int SuggestMgr::longswapchar_utf(char ** wlst, const w_char * word, int wl, int ns, int cpdsuggest)
1062 {
1063    w_char candidate_utf[MAXSWL];
1064    char   candidate[MAXSWUTF8L];
1065    w_char * p;
1066    w_char * q;
1067    w_char tmpc;
1068    // try swapping not adjacent chars
1069    memcpy (candidate_utf, word, wl * sizeof(w_char));
1070    for (p = candidate_utf;  p < (candidate_utf + wl);  p++) {
1071      for (q = candidate_utf;  q < (candidate_utf + wl);  q++) {
1072        if (abs((int)(p-q)) > 1) {
1073          tmpc = *p;
1074          *p = *q;
1075          *q = tmpc;
1076          u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl);
1077          ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, NULL);
1078          if (ns == -1) return -1;
1079          *q = *p;
1080          *p = tmpc;
1081        }
1082      }
1083    }
1084    return ns;
1085 }
1086
1087 // error is a letter was moved
1088 int SuggestMgr::movechar(char ** wlst, const char * word, int ns, int cpdsuggest)
1089 {
1090    char candidate[MAXSWUTF8L];
1091    char * p;
1092    char * q;
1093    char tmpc;
1094
1095    int wl=strlen(word);
1096    // try moving a char
1097    strcpy(candidate, word);
1098    for (p = candidate;  *p != 0;  p++) {
1099      for (q = p + 1;  (*q != 0) && ((q - p) < 10);  q++) {
1100       tmpc = *(q-1);
1101       *(q-1) = *q;
1102       *q = tmpc;
1103       if ((q-p) < 2) continue; // omit swap char
1104       ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL);
1105       if (ns == -1) return -1;
1106     }
1107     strcpy(candidate, word);
1108    }
1109    for (p = candidate + wl - 1;  p > candidate;  p--) {
1110      for (q = p - 1;  (q >= candidate) && ((p - q) < 10);  q--) {
1111       tmpc = *(q+1);
1112       *(q+1) = *q;
1113       *q = tmpc;
1114       if ((p-q) < 2) continue; // omit swap char
1115       ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL);
1116       if (ns == -1) return -1;
1117     }
1118     strcpy(candidate, word);
1119    }
1120    return ns;
1121 }
1122
1123 // error is a letter was moved
1124 int SuggestMgr::movechar_utf(char ** wlst, const w_char * word, int wl, int ns, int cpdsuggest)
1125 {
1126    w_char candidate_utf[MAXSWL];
1127    char   candidate[MAXSWUTF8L];
1128    w_char * p;
1129    w_char * q;
1130    w_char tmpc;
1131    // try moving a char
1132    memcpy (candidate_utf, word, wl * sizeof(w_char));
1133    for (p = candidate_utf;  p < (candidate_utf + wl);  p++) {
1134      for (q = p + 1;  (q < (candidate_utf + wl)) && ((q - p) < 10);  q++) {
1135          tmpc = *(q-1);
1136          *(q-1) = *q;
1137          *q = tmpc;
1138          if ((q-p) < 2) continue; // omit swap char
1139          u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl);
1140          ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, NULL);
1141          if (ns == -1) return -1;
1142      }
1143      memcpy (candidate_utf, word, wl * sizeof(w_char));
1144    }
1145    for (p = candidate_utf + wl - 1;  p > candidate_utf;  p--) {
1146      for (q = p - 1;  (q >= candidate_utf) && ((p - q) < 10);  q--) {
1147          tmpc = *(q+1);
1148          *(q+1) = *q;
1149          *q = tmpc;
1150          if ((p-q) < 2) continue; // omit swap char
1151          u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl);
1152          ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, NULL);
1153          if (ns == -1) return -1;
1154      }
1155      memcpy (candidate_utf, word, wl * sizeof(w_char));
1156    }
1157    return ns;
1158 }
1159
1160 // generate a set of suggestions for very poorly spelled words
1161 int SuggestMgr::ngsuggest(char** wlst, char * w, int ns, HashMgr** pHMgr, int md)
1162 {
1163
1164   int i, j;
1165   int lval;
1166   int sc, scphon;
1167   int lp, lpphon;
1168   int nonbmp = 0;
1169
1170   // exhaustively search through all root words
1171   // keeping track of the MAX_ROOTS most similar root words
1172   struct hentry * roots[MAX_ROOTS];
1173   char * rootsphon[MAX_ROOTS];
1174   int scores[MAX_ROOTS];
1175   int scoresphon[MAX_ROOTS];
1176   for (i = 0; i < MAX_ROOTS; i++) {
1177     roots[i] = NULL;
1178     scores[i] = -100 * i;
1179     rootsphon[i] = NULL;
1180     scoresphon[i] = -100 * i;
1181   }
1182   lp = MAX_ROOTS - 1;
1183   lpphon = MAX_ROOTS - 1;
1184   scphon = -20000;
1185   int low = NGRAM_LOWERING;
1186
1187   char w2[MAXWORDUTF8LEN];
1188   char f[MAXSWUTF8L];
1189   char * word = w;
1190
1191   // word reversing wrapper for complex prefixes
1192   if (complexprefixes) {
1193     strcpy(w2, w);
1194     if (utf8) reverseword_utf(w2); else reverseword(w2);
1195     word = w2;
1196   }
1197
1198   char mw[MAXSWUTF8L];
1199   w_char u8[MAXSWL];
1200   int nc = strlen(word);
1201   int n = (utf8) ? u8_u16(u8, MAXSWL, word) : nc;
1202
1203   // set character based ngram suggestion for words with non-BMP Unicode characters
1204   if (n == -1) {
1205     utf8 = 0; // XXX not state-free
1206     n = nc;
1207     nonbmp = 1;
1208     low = 0;
1209   }
1210
1211   struct hentry* hp = NULL;
1212   int col = -1;
1213 #ifdef HUNSPELL_CHROME_CLIENT
1214   ScopedHashEntryFactory hash_entry_factory;
1215 #endif
1216   phonetable * ph = (pAMgr) ? pAMgr->get_phonetable() : NULL;
1217   char target[MAXSWUTF8L];
1218   char candidate[MAXSWUTF8L];
1219   if (ph) {
1220     if (utf8) {
1221       w_char _w[MAXSWL];
1222       int _wl = u8_u16(_w, MAXSWL, word);
1223       mkallcap_utf(_w, _wl, langnum);
1224       u16_u8(candidate, MAXSWUTF8L, _w, _wl);
1225     } else {
1226       strcpy(candidate, word);
1227       if (!nonbmp) mkallcap(candidate, csconv);
1228     }
1229     phonet(candidate, target, nc, *ph); // XXX phonet() is 8-bit (nc, not n)
1230   }
1231
1232   FLAG forbiddenword = pAMgr ? pAMgr->get_forbiddenword() : FLAG_NULL;
1233   FLAG nosuggest = pAMgr ? pAMgr->get_nosuggest() : FLAG_NULL;
1234   FLAG nongramsuggest = pAMgr ? pAMgr->get_nongramsuggest() : FLAG_NULL;
1235   FLAG onlyincompound = pAMgr ? pAMgr->get_onlyincompound() : FLAG_NULL;
1236
1237   for (i = 0; i < md; i++) {
1238   while (0 != (hp = (pHMgr[i])->walk_hashtable(col, hp))) {
1239     if ((hp->astr) && (pAMgr) &&
1240        (TESTAFF(hp->astr, forbiddenword, hp->alen) ||
1241           TESTAFF(hp->astr, ONLYUPCASEFLAG, hp->alen) ||
1242           TESTAFF(hp->astr, nosuggest, hp->alen) ||
1243           TESTAFF(hp->astr, nongramsuggest, hp->alen) ||
1244           TESTAFF(hp->astr, onlyincompound, hp->alen))) continue;
1245
1246     sc = ngram(3, word, HENTRY_WORD(hp), NGRAM_LONGER_WORSE + low) +
1247         leftcommonsubstring(word, HENTRY_WORD(hp));
1248
1249     // check special pronounciation
1250     if ((hp->var & H_OPT_PHON) && copy_field(f, HENTRY_DATA(hp), MORPH_PHON)) {
1251         int sc2 = ngram(3, word, f, NGRAM_LONGER_WORSE + low) +
1252                 + leftcommonsubstring(word, f);
1253         if (sc2 > sc) sc = sc2;
1254     }
1255
1256     scphon = -20000;
1257     if (ph && (sc > 2) && (abs(n - (int) hp->clen) <= 3)) {
1258       char target2[MAXSWUTF8L];
1259       if (utf8) {
1260         w_char _w[MAXSWL];
1261         int _wl = u8_u16(_w, MAXSWL, HENTRY_WORD(hp));
1262         mkallcap_utf(_w, _wl, langnum);
1263         u16_u8(candidate, MAXSWUTF8L, _w, _wl);
1264       } else {
1265         strcpy(candidate, HENTRY_WORD(hp));
1266         mkallcap(candidate, csconv);
1267       }
1268       phonet(candidate, target2, -1, *ph);
1269       scphon = 2 * ngram(3, target, target2, NGRAM_LONGER_WORSE);
1270     }
1271
1272     if (sc > scores[lp]) {
1273       scores[lp] = sc;
1274 #ifdef HUNSPELL_CHROME_CLIENT
1275       roots[lp] = hash_entry_factory.CreateScopedHashEntry(lp, hp);
1276 #else
1277       roots[lp] = hp;
1278 #endif
1279       lval = sc;
1280       for (j=0; j < MAX_ROOTS; j++)
1281         if (scores[j] < lval) {
1282           lp = j;
1283           lval = scores[j];
1284         }
1285     }
1286
1287
1288     if (scphon > scoresphon[lpphon]) {
1289       scoresphon[lpphon] = scphon;
1290       rootsphon[lpphon] = HENTRY_WORD(hp);
1291       lval = scphon;
1292       for (j=0; j < MAX_ROOTS; j++)
1293         if (scoresphon[j] < lval) {
1294           lpphon = j;
1295           lval = scoresphon[j];
1296         }
1297     }
1298   }}
1299
1300   // find minimum threshold for a passable suggestion
1301   // mangle original word three differnt ways
1302   // and score them to generate a minimum acceptable score
1303   int thresh = 0;
1304   for (int sp = 1; sp < 4; sp++) {
1305      if (utf8) {
1306        for (int k=sp; k < n; k+=4) *((unsigned short *) u8 + k) = '*';
1307        u16_u8(mw, MAXSWUTF8L, u8, n);
1308        thresh = thresh + ngram(n, word, mw, NGRAM_ANY_MISMATCH + low);
1309      } else {
1310        strcpy(mw, word);
1311        for (int k=sp; k < n; k+=4) *(mw + k) = '*';
1312        thresh = thresh + ngram(n, word, mw, NGRAM_ANY_MISMATCH + low);
1313      }
1314   }
1315   thresh = thresh / 3;
1316   thresh--;
1317
1318  // now expand affixes on each of these root words and
1319   // and use length adjusted ngram scores to select
1320   // possible suggestions
1321   char * guess[MAX_GUESS];
1322   char * guessorig[MAX_GUESS];
1323   int gscore[MAX_GUESS];
1324   for(i=0;i<MAX_GUESS;i++) {
1325      guess[i] = NULL;
1326      guessorig[i] = NULL;
1327      gscore[i] = -100 * i;
1328   }
1329
1330   lp = MAX_GUESS - 1;
1331
1332   struct guessword * glst;
1333   glst = (struct guessword *) calloc(MAX_WORDS,sizeof(struct guessword));
1334   if (! glst) {
1335     if (nonbmp) utf8 = 1;
1336     return ns;
1337   }
1338
1339   for (i = 0; i < MAX_ROOTS; i++) {
1340       if (roots[i]) {
1341         struct hentry * rp = roots[i];
1342         int nw = pAMgr->expand_rootword(glst, MAX_WORDS, HENTRY_WORD(rp), rp->blen,
1343                     rp->astr, rp->alen, word, nc,
1344                     ((rp->var & H_OPT_PHON) ? copy_field(f, HENTRY_DATA(rp), MORPH_PHON) : NULL));
1345
1346         for (int k = 0; k < nw ; k++) {
1347            sc = ngram(n, word, glst[k].word, NGRAM_ANY_MISMATCH + low) +
1348                leftcommonsubstring(word, glst[k].word);
1349
1350            if (sc > thresh) {
1351               if (sc > gscore[lp]) {
1352                  if (guess[lp]) {
1353                     free (guess[lp]);
1354                     if (guessorig[lp]) {
1355                         free(guessorig[lp]);
1356                         guessorig[lp] = NULL;
1357                     }
1358                  }
1359                  gscore[lp] = sc;
1360                  guess[lp] = glst[k].word;
1361                  guessorig[lp] = glst[k].orig;
1362                  lval = sc;
1363                  for (j=0; j < MAX_GUESS; j++)
1364                     if (gscore[j] < lval) {
1365                        lp = j;
1366                        lval = gscore[j];
1367                     }
1368               } else {
1369                 free(glst[k].word);
1370                 if (glst[k].orig) free(glst[k].orig);
1371               }
1372            } else {
1373                 free(glst[k].word);
1374                 if (glst[k].orig) free(glst[k].orig);
1375            }
1376         }
1377       }
1378   }
1379   free(glst);
1380
1381   // now we are done generating guesses
1382   // sort in order of decreasing score
1383
1384
1385   bubblesort(&guess[0], &guessorig[0], &gscore[0], MAX_GUESS);
1386   if (ph) bubblesort(&rootsphon[0], NULL, &scoresphon[0], MAX_ROOTS);
1387
1388   // weight suggestions with a similarity index, based on
1389   // the longest common subsequent algorithm and resort
1390
1391   int is_swap = 0;
1392   int re = 0;
1393   double fact = 1.0;
1394   if (pAMgr) {
1395         int maxd = pAMgr->get_maxdiff();
1396         if (maxd >= 0) fact = (10.0 - maxd)/5.0;
1397   }
1398
1399   for (i=0; i < MAX_GUESS; i++) {
1400       if (guess[i]) {
1401         // lowering guess[i]
1402         char gl[MAXSWUTF8L];
1403         int len;
1404         if (utf8) {
1405           w_char _w[MAXSWL];
1406           len = u8_u16(_w, MAXSWL, guess[i]);
1407           mkallsmall_utf(_w, len, langnum);
1408           u16_u8(gl, MAXSWUTF8L, _w, len);
1409         } else {
1410           strcpy(gl, guess[i]);
1411           if (!nonbmp) mkallsmall(gl, csconv);
1412           len = strlen(guess[i]);
1413         }
1414
1415         int _lcs = lcslen(word, gl);
1416
1417         // same characters with different casing
1418         if ((n == len) && (n == _lcs)) {
1419             gscore[i] += 2000;
1420             break;
1421         }
1422         // using 2-gram instead of 3, and other weightening
1423
1424         re = ngram(2, word, gl, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED) +
1425              ngram(2, gl, word, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED);
1426
1427         gscore[i] =
1428           // length of longest common subsequent minus length difference
1429           2 * _lcs - abs((int) (n - len)) +
1430           // weight length of the left common substring
1431           leftcommonsubstring(word, gl) +
1432           // weight equal character positions
1433           (!nonbmp && commoncharacterpositions(word, gl, &is_swap) ? 1: 0) +
1434           // swap character (not neighboring)
1435           ((is_swap) ? 10 : 0) +
1436           // ngram
1437           ngram(4, word, gl, NGRAM_ANY_MISMATCH + low) +
1438           // weighted ngrams
1439           re +
1440          // different limit for dictionaries with PHONE rules
1441           (ph ? (re < len * fact ? -1000 : 0) : (re < (n + len)*fact? -1000 : 0));
1442       }
1443   }
1444
1445   bubblesort(&guess[0], &guessorig[0], &gscore[0], MAX_GUESS);
1446
1447 // phonetic version
1448   if (ph) for (i=0; i < MAX_ROOTS; i++) {
1449       if (rootsphon[i]) {
1450         // lowering rootphon[i]
1451         char gl[MAXSWUTF8L];
1452         int len;
1453         if (utf8) {
1454           w_char _w[MAXSWL];
1455           len = u8_u16(_w, MAXSWL, rootsphon[i]);
1456           mkallsmall_utf(_w, len, langnum);
1457           u16_u8(gl, MAXSWUTF8L, _w, len);
1458         } else {
1459           strcpy(gl, rootsphon[i]);
1460           if (!nonbmp) mkallsmall(gl, csconv);
1461           len = strlen(rootsphon[i]);
1462         }
1463
1464         // heuristic weigthing of ngram scores
1465         scoresphon[i] += 2 * lcslen(word, gl) - abs((int) (n - len)) +
1466           // weight length of the left common substring
1467           leftcommonsubstring(word, gl);
1468       }
1469   }
1470
1471   if (ph) bubblesort(&rootsphon[0], NULL, &scoresphon[0], MAX_ROOTS);
1472
1473   // copy over
1474   int oldns = ns;
1475
1476   int same = 0;
1477   for (i=0; i < MAX_GUESS; i++) {
1478     if (guess[i]) {
1479       if ((ns < oldns + maxngramsugs) && (ns < maxSug) && (!same || (gscore[i] > 1000))) {
1480         int unique = 1;
1481         // leave only excellent suggestions, if exists
1482         if (gscore[i] > 1000) same = 1; else if (gscore[i] < -100) {
1483             same = 1;
1484             // keep the best ngram suggestions, unless in ONLYMAXDIFF mode
1485             if (ns > oldns || (pAMgr && pAMgr->get_onlymaxdiff())) {
1486                 free(guess[i]);
1487                 if (guessorig[i]) free(guessorig[i]);
1488                 continue;
1489             }
1490         }
1491         for (j = 0; j < ns; j++) {
1492           // don't suggest previous suggestions or a previous suggestion with prefixes or affixes
1493           if ((!guessorig[i] && strstr(guess[i], wlst[j])) ||
1494              (guessorig[i] && strstr(guessorig[i], wlst[j])) ||
1495             // check forbidden words
1496             !checkword(guess[i], strlen(guess[i]), 0, NULL, NULL)) unique = 0;
1497         }
1498         if (unique) {
1499             wlst[ns++] = guess[i];
1500             if (guessorig[i]) {
1501                 free(guess[i]);
1502                 wlst[ns-1] = guessorig[i];
1503             }
1504         } else {
1505             free(guess[i]);
1506             if (guessorig[i]) free(guessorig[i]);
1507         }
1508       } else {
1509         free(guess[i]);
1510         if (guessorig[i]) free(guessorig[i]);
1511       }
1512     }
1513   }
1514
1515   oldns = ns;
1516   if (ph) for (i=0; i < MAX_ROOTS; i++) {
1517     if (rootsphon[i]) {
1518       if ((ns < oldns + MAXPHONSUGS) && (ns < maxSug)) {
1519         int unique = 1;
1520         for (j = 0; j < ns; j++) {
1521           // don't suggest previous suggestions or a previous suggestion with prefixes or affixes
1522           if (strstr(rootsphon[i], wlst[j]) ||
1523             // check forbidden words
1524             !checkword(rootsphon[i], strlen(rootsphon[i]), 0, NULL, NULL)) unique = 0;
1525         }
1526         if (unique) {
1527             wlst[ns++] = mystrdup(rootsphon[i]);
1528             if (!wlst[ns - 1]) return ns - 1;
1529         }
1530       }
1531     }
1532   }
1533
1534   if (nonbmp) utf8 = 1;
1535   return ns;
1536 }
1537
1538
1539 // see if a candidate suggestion is spelled correctly
1540 // needs to check both root words and words with affixes
1541
1542 // obsolote MySpell-HU modifications:
1543 // return value 2 and 3 marks compounding with hyphen (-)
1544 // `3' marks roots without suffix
1545 int SuggestMgr::checkword(const char * word, int len, int cpdsuggest, int * timer, clock_t * timelimit)
1546 {
1547   struct hentry * rv=NULL;
1548   struct hentry * rv2=NULL;
1549   int nosuffix = 0;
1550
1551   // check time limit
1552   if (timer) {
1553     (*timer)--;
1554     if (!(*timer) && timelimit) {
1555       if ((clock() - *timelimit) > TIMELIMIT) return 0;
1556       *timer = MAXPLUSTIMER;
1557     }
1558   }
1559
1560   if (pAMgr) {
1561     if (cpdsuggest==1) {
1562       if (pAMgr->get_compound()) {
1563         rv = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, 0, 1, 0); //EXT
1564         if (rv && (!(rv2 = pAMgr->lookup(word)) || !rv2->astr ||
1565             !(TESTAFF(rv2->astr,pAMgr->get_forbiddenword(),rv2->alen) ||
1566             TESTAFF(rv2->astr,pAMgr->get_nosuggest(),rv2->alen)))) return 3; // XXX obsolote categorisation + only ICONV needs affix flag check?
1567         }
1568         return 0;
1569     }
1570
1571     rv = pAMgr->lookup(word);
1572
1573     if (rv) {
1574         if ((rv->astr) && (TESTAFF(rv->astr,pAMgr->get_forbiddenword(),rv->alen)
1575                || TESTAFF(rv->astr,pAMgr->get_nosuggest(),rv->alen))) return 0;
1576         while (rv) {
1577             if (rv->astr && (TESTAFF(rv->astr,pAMgr->get_needaffix(),rv->alen) ||
1578                 TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
1579             TESTAFF(rv->astr,pAMgr->get_onlyincompound(),rv->alen))) {
1580                 rv = rv->next_homonym;
1581             } else break;
1582         }
1583     } else rv = pAMgr->prefix_check(word, len, 0); // only prefix, and prefix + suffix XXX
1584
1585     if (rv) {
1586         nosuffix=1;
1587     } else {
1588         rv = pAMgr->suffix_check(word, len, 0, NULL, NULL, 0, NULL); // only suffix
1589     }
1590
1591     if (!rv && pAMgr->have_contclass()) {
1592         rv = pAMgr->suffix_check_twosfx(word, len, 0, NULL, FLAG_NULL);
1593         if (!rv) rv = pAMgr->prefix_check_twosfx(word, len, 1, FLAG_NULL);
1594     }
1595
1596     // check forbidden words
1597     if ((rv) && (rv->astr) && (TESTAFF(rv->astr,pAMgr->get_forbiddenword(),rv->alen) ||
1598       TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
1599       TESTAFF(rv->astr,pAMgr->get_nosuggest(),rv->alen) ||
1600       TESTAFF(rv->astr,pAMgr->get_onlyincompound(),rv->alen))) return 0;
1601
1602     if (rv) { // XXX obsolote
1603       if ((pAMgr->get_compoundflag()) &&
1604           TESTAFF(rv->astr, pAMgr->get_compoundflag(), rv->alen)) return 2 + nosuffix;
1605       return 1;
1606     }
1607   }
1608   return 0;
1609 }
1610
1611 int SuggestMgr::check_forbidden(const char * word, int len)
1612 {
1613   struct hentry * rv = NULL;
1614
1615   if (pAMgr) {
1616     rv = pAMgr->lookup(word);
1617     if (rv && rv->astr && (TESTAFF(rv->astr,pAMgr->get_needaffix(),rv->alen) ||
1618         TESTAFF(rv->astr,pAMgr->get_onlyincompound(),rv->alen))) rv = NULL;
1619     if (!(pAMgr->prefix_check(word,len,1)))
1620         rv = pAMgr->suffix_check(word,len, 0, NULL, NULL, 0, NULL); // prefix+suffix, suffix
1621     // check forbidden words
1622     if ((rv) && (rv->astr) && TESTAFF(rv->astr,pAMgr->get_forbiddenword(),rv->alen)) return 1;
1623    }
1624     return 0;
1625 }
1626
1627 #ifdef HUNSPELL_EXPERIMENTAL
1628 // suggest possible stems
1629 int SuggestMgr::suggest_pos_stems(char*** slst, const char * w, int nsug)
1630 {
1631     char ** wlst;
1632
1633     struct hentry * rv = NULL;
1634
1635   char w2[MAXSWUTF8L];
1636   const char * word = w;
1637
1638   // word reversing wrapper for complex prefixes
1639   if (complexprefixes) {
1640     strcpy(w2, w);
1641     if (utf8) reverseword_utf(w2); else reverseword(w2);
1642     word = w2;
1643   }
1644
1645     int wl = strlen(word);
1646
1647
1648     if (*slst) {
1649         wlst = *slst;
1650     } else {
1651         wlst = (char **) calloc(maxSug, sizeof(char *));
1652         if (wlst == NULL) return -1;
1653     }
1654
1655     rv = pAMgr->suffix_check(word, wl, 0, NULL, wlst, maxSug, &nsug);
1656
1657     // delete dash from end of word
1658     if (nsug > 0) {
1659         for (int j=0; j < nsug; j++) {
1660             if (wlst[j][strlen(wlst[j]) - 1] == '-') wlst[j][strlen(wlst[j]) - 1] = '\0';
1661         }
1662     }
1663
1664     *slst = wlst;
1665     return nsug;
1666 }
1667 #endif // END OF HUNSPELL_EXPERIMENTAL CODE
1668
1669
1670 char * SuggestMgr::suggest_morph(const char * w)
1671 {
1672     char result[MAXLNLEN];
1673     char * r = (char *) result;
1674     char * st;
1675
1676     struct hentry * rv = NULL;
1677
1678     *result = '\0';
1679
1680     if (! pAMgr) return NULL;
1681
1682   char w2[MAXSWUTF8L];
1683   const char * word = w;
1684
1685   // word reversing wrapper for complex prefixes
1686   if (complexprefixes) {
1687     strcpy(w2, w);
1688     if (utf8) reverseword_utf(w2); else reverseword(w2);
1689     word = w2;
1690   }
1691
1692     rv = pAMgr->lookup(word);
1693
1694     while (rv) {
1695         if ((!rv->astr) || !(TESTAFF(rv->astr, pAMgr->get_forbiddenword(), rv->alen) ||
1696             TESTAFF(rv->astr, pAMgr->get_needaffix(), rv->alen) ||
1697             TESTAFF(rv->astr,pAMgr->get_onlyincompound(),rv->alen))) {
1698                 if (!HENTRY_FIND(rv, MORPH_STEM)) {
1699                     mystrcat(result, " ", MAXLNLEN);
1700                     mystrcat(result, MORPH_STEM, MAXLNLEN);
1701                     mystrcat(result, word, MAXLNLEN);
1702                 }
1703                 if (HENTRY_DATA(rv)) {
1704                     mystrcat(result, " ", MAXLNLEN);
1705                     mystrcat(result, HENTRY_DATA2(rv), MAXLNLEN);
1706                 }
1707                 mystrcat(result, "\n", MAXLNLEN);
1708         }
1709         rv = rv->next_homonym;
1710     }
1711
1712     st = pAMgr->affix_check_morph(word,strlen(word));
1713     if (st) {
1714         mystrcat(result, st, MAXLNLEN);
1715         free(st);
1716     }
1717
1718     if (pAMgr->get_compound() && (*result == '\0'))
1719         pAMgr->compound_check_morph(word, strlen(word),
1720                      0, 0, 100, 0,NULL, 0, &r, NULL);
1721
1722     return (*result) ? mystrdup(line_uniq(result, MSEP_REC)) : NULL;
1723 }
1724
1725 #ifdef HUNSPELL_EXPERIMENTAL
1726 char * SuggestMgr::suggest_morph_for_spelling_error(const char * word)
1727 {
1728     char * p = NULL;
1729     char ** wlst = (char **) calloc(maxSug, sizeof(char *));
1730     if (!**wlst) return NULL;
1731     // we will use only the first suggestion
1732     for (int i = 0; i < maxSug - 1; i++) wlst[i] = "";
1733     int ns = suggest(&wlst, word, maxSug - 1, NULL);
1734     if (ns == maxSug) {
1735         p = suggest_morph(wlst[maxSug - 1]);
1736         free(wlst[maxSug - 1]);
1737     }
1738     if (wlst) free(wlst);
1739     return p;
1740 }
1741 #endif // END OF HUNSPELL_EXPERIMENTAL CODE
1742
1743 /* affixation */
1744 char * SuggestMgr::suggest_hentry_gen(hentry * rv, char * pattern)
1745 {
1746     char result[MAXLNLEN];
1747     *result = '\0';
1748     int sfxcount = get_sfxcount(pattern);
1749
1750     if (get_sfxcount(HENTRY_DATA(rv)) > sfxcount) return NULL;
1751
1752     if (HENTRY_DATA(rv)) {
1753         char * aff = pAMgr->morphgen(HENTRY_WORD(rv), rv->blen, rv->astr, rv->alen,
1754             HENTRY_DATA(rv), pattern, 0);
1755         if (aff) {
1756             mystrcat(result, aff, MAXLNLEN);
1757             mystrcat(result, "\n", MAXLNLEN);
1758             free(aff);
1759         }
1760     }
1761
1762     // check all allomorphs
1763     char allomorph[MAXLNLEN];
1764     char * p = NULL;
1765     if (HENTRY_DATA(rv)) p = (char *) strstr(HENTRY_DATA2(rv), MORPH_ALLOMORPH);
1766     while (p) {
1767         struct hentry * rv2 = NULL;
1768         p += MORPH_TAG_LEN;
1769         int plen = fieldlen(p);
1770         strncpy(allomorph, p, plen);
1771         allomorph[plen] = '\0';
1772         rv2 = pAMgr->lookup(allomorph);
1773         while (rv2) {
1774 //            if (HENTRY_DATA(rv2) && get_sfxcount(HENTRY_DATA(rv2)) <= sfxcount) {
1775             if (HENTRY_DATA(rv2)) {
1776                 char * st = (char *) strstr(HENTRY_DATA2(rv2), MORPH_STEM);
1777                 if (st && (strncmp(st + MORPH_TAG_LEN,
1778                    HENTRY_WORD(rv), fieldlen(st + MORPH_TAG_LEN)) == 0)) {
1779                     char * aff = pAMgr->morphgen(HENTRY_WORD(rv2), rv2->blen, rv2->astr, rv2->alen,
1780                         HENTRY_DATA(rv2), pattern, 0);
1781                     if (aff) {
1782                         mystrcat(result, aff, MAXLNLEN);
1783                         mystrcat(result, "\n", MAXLNLEN);
1784                         free(aff);
1785                     }
1786                 }
1787             }
1788             rv2 = rv2->next_homonym;
1789         }
1790         p = strstr(p + plen, MORPH_ALLOMORPH);
1791     }
1792
1793     return (*result) ? mystrdup(result) : NULL;
1794 }
1795
1796 char * SuggestMgr::suggest_gen(char ** desc, int n, char * pattern) {
1797   char result[MAXLNLEN];
1798   char result2[MAXLNLEN];
1799   char newpattern[MAXLNLEN];
1800   *newpattern = '\0';
1801   if (n == 0) return 0;
1802   *result2 = '\0';
1803   struct hentry * rv = NULL;
1804   if (!pAMgr) return NULL;
1805
1806 // search affixed forms with and without derivational suffixes
1807   while(1) {
1808
1809   for (int k = 0; k < n; k++) {
1810     *result = '\0';
1811     // add compound word parts (except the last one)
1812     char * s = (char *) desc[k];
1813     char * part = strstr(s, MORPH_PART);
1814     if (part) {
1815         char * nextpart = strstr(part + 1, MORPH_PART);
1816         while (nextpart) {
1817             copy_field(result + strlen(result), part, MORPH_PART);
1818             part = nextpart;
1819             nextpart = strstr(part + 1, MORPH_PART);
1820         }
1821         s = part;
1822     }
1823
1824     char **pl;
1825     char tok[MAXLNLEN];
1826     strcpy(tok, s);
1827     char * alt = strstr(tok, " | ");
1828     while (alt) {
1829         alt[1] = MSEP_ALT;
1830         alt = strstr(alt, " | ");
1831     }
1832     int pln = line_tok(tok, &pl, MSEP_ALT);
1833     for (int i = 0; i < pln; i++) {
1834             // remove inflectional and terminal suffixes
1835             char * is = strstr(pl[i], MORPH_INFL_SFX);
1836             if (is) *is = '\0';
1837             char * ts = strstr(pl[i], MORPH_TERM_SFX);
1838             while (ts) {
1839                 *ts = '_';
1840                 ts = strstr(pl[i], MORPH_TERM_SFX);
1841             }
1842             char * st = strstr(s, MORPH_STEM);
1843             if (st) {
1844                 copy_field(tok, st, MORPH_STEM);
1845                 rv = pAMgr->lookup(tok);
1846                 while (rv) {
1847                     char newpat[MAXLNLEN];
1848                     strcpy(newpat, pl[i]);
1849                     strcat(newpat, pattern);
1850                     char * sg = suggest_hentry_gen(rv, newpat);
1851                     if (!sg) sg = suggest_hentry_gen(rv, pattern);
1852                     if (sg) {
1853                         char ** gen;
1854                         int genl = line_tok(sg, &gen, MSEP_REC);
1855                         free(sg);
1856                         sg = NULL;
1857                         for (int j = 0; j < genl; j++) {
1858                             if (strstr(pl[i], MORPH_SURF_PFX)) {
1859                                 int r2l = strlen(result2);
1860                                 result2[r2l] = MSEP_REC;
1861                                 strcpy(result2 + r2l + 1, result);
1862                                 copy_field(result2 + strlen(result2), pl[i], MORPH_SURF_PFX);
1863                                 mystrcat(result2, gen[j], MAXLNLEN);
1864                             } else {
1865                                 sprintf(result2 + strlen(result2), "%c%s%s",
1866                                     MSEP_REC, result, gen[j]);
1867                             }
1868                         }
1869                         freelist(&gen, genl);
1870                     }
1871                     rv = rv->next_homonym;
1872                 }
1873             }
1874     }
1875     freelist(&pl, pln);
1876   }
1877
1878   if (*result2 || !strstr(pattern, MORPH_DERI_SFX)) break;
1879   strcpy(newpattern, pattern);
1880   pattern = newpattern;
1881   char * ds = strstr(pattern, MORPH_DERI_SFX);
1882   while (ds) {
1883     strncpy(ds, MORPH_TERM_SFX, MORPH_TAG_LEN);
1884     ds = strstr(pattern, MORPH_DERI_SFX);
1885   }
1886  }
1887   return (*result2 ? mystrdup(result2) : NULL);
1888 }
1889
1890
1891 // generate an n-gram score comparing s1 and s2
1892 int SuggestMgr::ngram(int n, char * s1, const char * s2, int opt)
1893 {
1894   int nscore = 0;
1895   int ns;
1896   int l1;
1897   int l2;
1898   int test = 0;
1899
1900   if (utf8) {
1901     w_char su1[MAXSWL];
1902     w_char su2[MAXSWL];
1903     l1 = u8_u16(su1, MAXSWL, s1);
1904     l2 = u8_u16(su2, MAXSWL, s2);
1905     if ((l2 <= 0) || (l1 == -1)) return 0;
1906     // lowering dictionary word
1907     if (opt & NGRAM_LOWERING) mkallsmall_utf(su2, l2, langnum);
1908     for (int j = 1; j <= n; j++) {
1909       ns = 0;
1910       for (int i = 0; i <= (l1-j); i++) {
1911         int k = 0;
1912         for (int l = 0; l <= (l2-j); l++) {
1913             for (k = 0; k < j; k++) {
1914               w_char * c1 = su1 + i + k;
1915               w_char * c2 = su2 + l + k;
1916               if ((c1->l != c2->l) || (c1->h != c2->h)) break;
1917             }
1918             if (k == j) {
1919                 ns++;
1920                 break;
1921             }
1922         }
1923         if (k != j && opt & NGRAM_WEIGHTED) {
1924           ns--;
1925           test++;
1926           if (i == 0 || i == l1-j) ns--; // side weight
1927         }
1928       }
1929       nscore = nscore + ns;
1930       if (ns < 2 && !(opt & NGRAM_WEIGHTED)) break;
1931     }
1932   } else {
1933     l2 = strlen(s2);
1934     if (l2 == 0) return 0;
1935     l1 = strlen(s1);
1936     char *t = mystrdup(s2);
1937     if (opt & NGRAM_LOWERING) mkallsmall(t, csconv);
1938     for (int j = 1; j <= n; j++) {
1939       ns = 0;
1940       for (int i = 0; i <= (l1-j); i++) {
1941         char c = *(s1 + i + j);
1942         *(s1 + i + j) = '\0';
1943         if (strstr(t,(s1+i))) {
1944           ns++;
1945         } else if (opt & NGRAM_WEIGHTED) {
1946           ns--;
1947 test++;
1948           if (i == 0 || i == l1-j) ns--; // side weight
1949         }
1950         *(s1 + i + j ) = c;
1951       }
1952       nscore = nscore + ns;
1953       if (ns < 2 && !(opt & NGRAM_WEIGHTED)) break;
1954     }
1955     free(t);
1956   }
1957
1958   ns = 0;
1959   if (opt & NGRAM_LONGER_WORSE) ns = (l2-l1)-2;
1960   if (opt & NGRAM_ANY_MISMATCH) ns = abs(l2-l1)-2;
1961   ns = (nscore - ((ns > 0) ? ns : 0));
1962   return ns;
1963 }
1964
1965 // length of the left common substring of s1 and (decapitalised) s2
1966 int SuggestMgr::leftcommonsubstring(char * s1, const char * s2) {
1967   if (utf8) {
1968     w_char su1[MAXSWL];
1969     w_char su2[MAXSWL];
1970     su1[0].l = su2[0].l = su1[0].h = su2[0].h = 0;
1971     // decapitalize dictionary word
1972     if (complexprefixes) {
1973       int l1 = u8_u16(su1, MAXSWL, s1);
1974       int l2 = u8_u16(su2, MAXSWL, s2);
1975       if (*((short *)su1+l1-1) == *((short *)su2+l2-1)) return 1;
1976     } else {
1977       int i;
1978       u8_u16(su1, 1, s1);
1979       u8_u16(su2, 1, s2);
1980       unsigned short idx = (su2->h << 8) + su2->l;
1981       unsigned short otheridx = (su1->h << 8) + su1->l;
1982       if (otheridx != idx &&
1983          (otheridx != unicodetolower(idx, langnum))) return 0;
1984       int l1 = u8_u16(su1, MAXSWL, s1);
1985       int l2 = u8_u16(su2, MAXSWL, s2);
1986       for(i = 1; (i < l1) && (i < l2) &&
1987          (su1[i].l == su2[i].l) && (su1[i].h == su2[i].h); i++);
1988       return i;
1989     }
1990   } else {
1991     if (complexprefixes) {
1992       int l1 = strlen(s1);
1993       int l2 = strlen(s2);
1994       if (*(s2+l1-1) == *(s2+l2-1)) return 1;
1995     } else {
1996       char * olds = s1;
1997       // decapitalise dictionary word
1998       if ((*s1 != *s2) && (*s1 != csconv[((unsigned char)*s2)].clower)) return 0;
1999       do {
2000         s1++; s2++;
2001       } while ((*s1 == *s2) && (*s1 != '\0'));
2002       return (int)(s1 - olds);
2003     }
2004   }
2005   return 0;
2006 }
2007
2008 int SuggestMgr::commoncharacterpositions(char * s1, const char * s2, int * is_swap) {
2009   int num = 0;
2010   int diff = 0;
2011   int diffpos[2];
2012   *is_swap = 0;
2013   if (utf8) {
2014     w_char su1[MAXSWL];
2015     w_char su2[MAXSWL];
2016     int l1 = u8_u16(su1, MAXSWL, s1);
2017     int l2 = u8_u16(su2, MAXSWL, s2);
2018     // decapitalize dictionary word
2019     if (complexprefixes) {
2020       mkallsmall_utf(su2+l2-1, 1, langnum);
2021     } else {
2022       mkallsmall_utf(su2, 1, langnum);
2023     }
2024     for (int i = 0; (i < l1) && (i < l2); i++) {
2025       if (((short *) su1)[i] == ((short *) su2)[i]) {
2026         num++;
2027       } else {
2028         if (diff < 2) diffpos[diff] = i;
2029         diff++;
2030       }
2031     }
2032     if ((diff == 2) && (l1 == l2) &&
2033         (((short *) su1)[diffpos[0]] == ((short *) su2)[diffpos[1]]) &&
2034         (((short *) su1)[diffpos[1]] == ((short *) su2)[diffpos[0]])) *is_swap = 1;
2035   } else {
2036     int i;
2037     char t[MAXSWUTF8L];
2038     strcpy(t, s2);
2039     // decapitalize dictionary word
2040     if (complexprefixes) {
2041       int l2 = strlen(t);
2042       *(t+l2-1) = csconv[((unsigned char)*(t+l2-1))].clower;
2043     } else {
2044       mkallsmall(t, csconv);
2045     }
2046     for (i = 0; (*(s1+i) != 0) && (*(t+i) != 0); i++) {
2047       if (*(s1+i) == *(t+i)) {
2048         num++;
2049       } else {
2050         if (diff < 2) diffpos[diff] = i;
2051         diff++;
2052       }
2053     }
2054     if ((diff == 2) && (*(s1+i) == 0) && (*(t+i) == 0) &&
2055       (*(s1+diffpos[0]) == *(t+diffpos[1])) &&
2056       (*(s1+diffpos[1]) == *(t+diffpos[0]))) *is_swap = 1;
2057   }
2058   return num;
2059 }
2060
2061 int SuggestMgr::mystrlen(const char * word) {
2062   if (utf8) {
2063     w_char w[MAXSWL];
2064     return u8_u16(w, MAXSWL, word);
2065   } else return strlen(word);
2066 }
2067
2068 // sort in decreasing order of score
2069 void SuggestMgr::bubblesort(char** rword, char** rword2, int* rsc, int n )
2070 {
2071       int m = 1;
2072       while (m < n) {
2073           int j = m;
2074           while (j > 0) {
2075             if (rsc[j-1] < rsc[j]) {
2076                 int sctmp = rsc[j-1];
2077                 char * wdtmp = rword[j-1];
2078                 rsc[j-1] = rsc[j];
2079                 rword[j-1] = rword[j];
2080                 rsc[j] = sctmp;
2081                 rword[j] = wdtmp;
2082                 if (rword2) {
2083                     wdtmp = rword2[j-1];
2084                     rword2[j-1] = rword2[j];
2085                     rword2[j] = wdtmp;
2086                 }
2087                 j--;
2088             } else break;
2089           }
2090           m++;
2091       }
2092       return;
2093 }
2094
2095 // longest common subsequence
2096 void SuggestMgr::lcs(const char * s, const char * s2, int * l1, int * l2, char ** result) {
2097   int n, m;
2098   w_char su[MAXSWL];
2099   w_char su2[MAXSWL];
2100   char * b;
2101   char * c;
2102   int i;
2103   int j;
2104   if (utf8) {
2105     m = u8_u16(su, MAXSWL, s);
2106     n = u8_u16(su2, MAXSWL, s2);
2107   } else {
2108     m = strlen(s);
2109     n = strlen(s2);
2110   }
2111   c = (char *) calloc(m + 1, n + 1);
2112   b = (char *) calloc(m + 1, n + 1);
2113   if (!c || !b) {
2114     if (c) free(c);
2115     if (b) free(b);
2116     *result = NULL;
2117     return;
2118   }
2119   for (i = 1; i <= m; i++) {
2120     for (j = 1; j <= n; j++) {
2121       if ( ((utf8) && (*((short *) su+i-1) == *((short *)su2+j-1)))
2122           || ((!utf8) && ((*(s+i-1)) == (*(s2+j-1))))) {
2123         c[i*(n+1) + j] = c[(i-1)*(n+1) + j-1]+1;
2124         b[i*(n+1) + j] = LCS_UPLEFT;
2125       } else if (c[(i-1)*(n+1) + j] >= c[i*(n+1) + j-1]) {
2126         c[i*(n+1) + j] = c[(i-1)*(n+1) + j];
2127         b[i*(n+1) + j] = LCS_UP;
2128       } else {
2129         c[i*(n+1) + j] = c[i*(n+1) + j-1];
2130         b[i*(n+1) + j] = LCS_LEFT;
2131       }
2132     }
2133   }
2134   *result = b;
2135   free(c);
2136   *l1 = m;
2137   *l2 = n;
2138 }
2139
2140 int SuggestMgr::lcslen(const char * s, const char* s2) {
2141   int m;
2142   int n;
2143   int i;
2144   int j;
2145   char * result;
2146   int len = 0;
2147   lcs(s, s2, &m, &n, &result);
2148   if (!result) return 0;
2149   i = m;
2150   j = n;
2151   while ((i != 0) && (j != 0)) {
2152     if (result[i*(n+1) + j] == LCS_UPLEFT) {
2153       len++;
2154       i--;
2155       j--;
2156     } else if (result[i*(n+1) + j] == LCS_UP) {
2157       i--;
2158     } else j--;
2159   }
2160   free(result);
2161   return len;
2162 }