extensions/spellcheck/hunspell/src/hunspell.cpp

   1 /******* BEGIN LICENSE BLOCK *******
   2  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   3  *
   4  * The contents of this file are subject to the Mozilla Public License Version
   5  * 1.1 (the "License"); you may not use this file except in compliance with
   6  * the License. You may obtain a copy of the License at
   7  * http://www.mozilla.org/MPL/
   8  *
   9  * Software distributed under the License is distributed on an "AS IS" basis,
  10  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  11  * for the specific language governing rights and limitations under the
  12  * License.
  13  *
  14  * The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
  15  * and László Németh (Hunspell). Portions created by the Initial Developers
  16  * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
  17  *
  18  * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
  19  *                 David Einstein (deinst@world.std.com)
  20  *                 László Németh (nemethl@gyorsposta.hu)
  21  *                 Davide Prina
  22  *                 Giuseppe Modugno
  23  *                 Gianluca Turconi
  24  *                 Simon Brouwer
  25  *                 Noll Janos
  26  *                 Biro Arpad
  27  *                 Goldman Eleonora
  28  *                 Sarlos Tamas
  29  *                 Bencsath Boldizsar
  30  *                 Halacsy Peter
  31  *                 Dvornik Laszlo
  32  *                 Gefferth Andras
  33  *                 Nagy Viktor
  34  *                 Varga Daniel
  35  *                 Chris Halls
  36  *                 Rene Engelhard
  37  *                 Bram Moolenaar
  38  *                 Dafydd Jones
  39  *                 Harri Pitkanen
  40  *                 Andras Timar
  41  *                 Tor Lillqvist
  42  *
  43  * Alternatively, the contents of this file may be used under the terms of
  44  * either the GNU General Public License Version 2 or later (the "GPL"), or
  45  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  46  * in which case the provisions of the GPL or the LGPL are applicable instead
  47  * of those above. If you wish to allow use of your version of this file only
  48  * under the terms of either the GPL or the LGPL, and not to allow others to
  49  * use your version of this file under the terms of the MPL, indicate your
  50  * decision by deleting the provisions above and replace them with the notice
  51  * and other provisions required by the GPL or the LGPL. If you do not delete
  52  * the provisions above, a recipient may use your version of this file under
  53  * the terms of any one of the MPL, the GPL or the LGPL.
  54  *
  55  ******* END LICENSE BLOCK *******/
  56
  57 #ifndef MOZILLA_CLIENT
  58 #include <cstdlib>
  59 #include <cstring>
  60 #include <cstdio>
  61 #else
  62 #include <stdlib.h>
  63 #include <string.h>
  64 #include <stdio.h>
  65 #endif
  66
  67 #include "csutil.hxx"
  68 #include "hunspell.h"
  69 #include "hunspell.hxx"
  70
  71 #ifndef MOZILLA_CLIENT
  72 #ifndef W32
  73 using namespace std;
  74 #endif
  75 #endif
  76
  77 Hunspell::Hunspell(const char * affpath, const char * dpath, const char * key)
  78 {
  79     encoding = NULL;
  80     csconv = NULL;
  81     utf8 = 0;
  82     complexprefixes = 0;
  83     affixpath = mystrdup(affpath);
  84     maxdic = 0;
  85
  86     /* first set up the hash manager */
  87     pHMgr[0] = new HashMgr(dpath, affpath, key);
  88     if (pHMgr[0]) maxdic = 1;
  89
  90     /* next set up the affix manager */
  91     /* it needs access to the hash manager lookup methods */
  92     pAMgr = new AffixMgr(affpath, pHMgr, &maxdic, key);
  93
  94     /* get the preferred try string and the dictionary */
  95     /* encoding from the Affix Manager for that dictionary */
  96     char * try_string = pAMgr->get_try_string();
  97     encoding = pAMgr->get_encoding();
  98     csconv = get_current_cs(encoding);
  99     langnum = pAMgr->get_langnum();
 100     utf8 = pAMgr->get_utf8();
 101     complexprefixes = pAMgr->get_complexprefixes();
 102     wordbreak = pAMgr->get_breaktable();
 103
 104     /* and finally set up the suggestion manager */
 105     pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr);
 106     if (try_string) free(try_string);
 107 }
 108
 109 Hunspell::~Hunspell()
 110 {
 111     if (pSMgr) delete pSMgr;
 112     if (pAMgr) delete pAMgr;
 113     for (int i = 0; i < maxdic; i++) delete pHMgr[i];
 114     maxdic = 0;
 115     pSMgr = NULL;
 116     pAMgr = NULL;
 117 #ifdef MOZILLA_CLIENT
 118     free(csconv);
 119 #endif
 120     csconv= NULL;
 121     if (encoding) free(encoding);
 122     encoding = NULL;
 123     if (affixpath) free(affixpath);
 124     affixpath = NULL;
 125 }
 126
 127 // load extra dictionaries
 128 int Hunspell::add_dic(const char * dpath, const char * key) {
 129     if (maxdic == MAXDIC || !affixpath) return 1;
 130     pHMgr[maxdic] = new HashMgr(dpath, affixpath, key);
 131     if (pHMgr[maxdic]) maxdic++; else return 1;
 132     return 0;
 133 }
 134
 135 // make a copy of src at destination while removing all leading
 136 // blanks and removing any trailing periods after recording
 137 // their presence with the abbreviation flag
 138 // also since already going through character by character,
 139 // set the capitalization type
 140 // return the length of the "cleaned" (and UTF-8 encoded) word
 141
 142 int Hunspell::cleanword2(char * dest, const char * src,
 143     w_char * dest_utf, int * nc, int * pcaptype, int * pabbrev)
 144 {
 145    unsigned char * p = (unsigned char *) dest;
 146    const unsigned char * q = (const unsigned char * ) src;
 147
 148    // first skip over any leading blanks
 149    while ((*q != '\0') && (*q == ' ')) q++;
 150
 151    // now strip off any trailing periods (recording their presence)
 152    *pabbrev = 0;
 153    int nl = strlen((const char *)q);
 154    while ((nl > 0) && (*(q+nl-1)=='.')) {
 155        nl--;
 156        (*pabbrev)++;
 157    }
 158
 159    // if no characters are left it can't be capitalized
 160    if (nl <= 0) {
 161        *pcaptype = NOCAP;
 162        *p = '\0';
 163        return 0;
 164    }
 165
 166    strncpy(dest, (char *) q, nl);
 167    *(dest + nl) = '\0';
 168    nl = strlen(dest);
 169    if (utf8) {
 170       *nc = u8_u16(dest_utf, MAXWORDLEN, dest);
 171       // don't check too long words
 172       if (*nc >= MAXWORDLEN) return 0;
 173       if (*nc == -1) { // big Unicode character (non BMP area)
 174          *pcaptype = NOCAP;
 175          return nl;
 176       }
 177      *pcaptype = get_captype_utf8(dest_utf, *nc, langnum);
 178    } else {
 179      *pcaptype = get_captype(dest, nl, csconv);
 180      *nc = nl;
 181    }
 182    return nl;
 183 }
 184
 185 int Hunspell::cleanword(char * dest, const char * src,
 186     int * pcaptype, int * pabbrev)
 187 {
 188    unsigned char * p = (unsigned char *) dest;
 189    const unsigned char * q = (const unsigned char * ) src;
 190    int firstcap = 0;
 191
 192    // first skip over any leading blanks
 193    while ((*q != '\0') && (*q == ' ')) q++;
 194
 195    // now strip off any trailing periods (recording their presence)
 196    *pabbrev = 0;
 197    int nl = strlen((const char *)q);
 198    while ((nl > 0) && (*(q+nl-1)=='.')) {
 199        nl--;
 200        (*pabbrev)++;
 201    }
 202
 203    // if no characters are left it can't be capitalized
 204    if (nl <= 0) {
 205        *pcaptype = NOCAP;
 206        *p = '\0';
 207        return 0;
 208    }
 209
 210    // now determine the capitalization type of the first nl letters
 211    int ncap = 0;
 212    int nneutral = 0;
 213    int nc = 0;
 214
 215    if (!utf8) {
 216       while (nl > 0) {
 217          nc++;
 218          if (csconv[(*q)].ccase) ncap++;
 219          if (csconv[(*q)].cupper == csconv[(*q)].clower) nneutral++;
 220          *p++ = *q++;
 221          nl--;
 222       }
 223       // remember to terminate the destination string
 224       *p = '\0';
 225       firstcap = csconv[(unsigned char)(*dest)].ccase;
 226    } else {
 227       unsigned short idx;
 228       w_char t[MAXWORDLEN];
 229       nc = u8_u16(t, MAXWORDLEN, src);
 230       for (int i = 0; i < nc; i++) {
 231          idx = (t[i].h << 8) + t[i].l;
 232          unsigned short low = unicodetolower(idx, langnum);
 233          if (idx != low) ncap++;
 234          if (unicodetoupper(idx, langnum) == low) nneutral++;
 235       }
 236       u16_u8(dest, MAXWORDUTF8LEN, t, nc);
 237       if (ncap) {
 238          idx = (t[0].h << 8) + t[0].l;
 239          firstcap = (idx != unicodetolower(idx, langnum));
 240       }
 241    }
 242
 243    // now finally set the captype
 244    if (ncap == 0) {
 245         *pcaptype = NOCAP;
 246    } else if ((ncap == 1) && firstcap) {
 247         *pcaptype = INITCAP;
 248    } else if ((ncap == nc) || ((ncap + nneutral) == nc)){
 249         *pcaptype = ALLCAP;
 250    } else if ((ncap > 1) && firstcap) {
 251         *pcaptype = HUHINITCAP;
 252    } else {
 253         *pcaptype = HUHCAP;
 254    }
 255    return strlen(dest);
 256 }
 257
 258 void Hunspell::mkallcap(char * p)
 259 {
 260   if (utf8) {
 261       w_char u[MAXWORDLEN];
 262       int nc = u8_u16(u, MAXWORDLEN, p);
 263       unsigned short idx;
 264       for (int i = 0; i < nc; i++) {
 265          idx = (u[i].h << 8) + u[i].l;
 266          if (idx != unicodetoupper(idx, langnum)) {
 267             u[i].h = (unsigned char) (unicodetoupper(idx, langnum) >> 8);
 268             u[i].l = (unsigned char) (unicodetoupper(idx, langnum) & 0x00FF);
 269          }
 270       }
 271       u16_u8(p, MAXWORDUTF8LEN, u, nc);
 272   } else {
 273     while (*p != '\0') {
 274         *p = csconv[((unsigned char) *p)].cupper;
 275         p++;
 276     }
 277   }
 278 }
 279
 280 int Hunspell::mkallcap2(char * p, w_char * u, int nc)
 281 {
 282   if (utf8) {
 283       unsigned short idx;
 284       for (int i = 0; i < nc; i++) {
 285          idx = (u[i].h << 8) + u[i].l;
 286          unsigned short up = unicodetoupper(idx, langnum);
 287          if (idx != up) {
 288             u[i].h = (unsigned char) (up >> 8);
 289             u[i].l = (unsigned char) (up & 0x00FF);
 290          }
 291       }
 292       u16_u8(p, MAXWORDUTF8LEN, u, nc);
 293       return strlen(p);
 294   } else {
 295     while (*p != '\0') {
 296         *p = csconv[((unsigned char) *p)].cupper;
 297         p++;
 298     }
 299   }
 300   return nc;
 301 }
 302
 303
 304 void Hunspell::mkallsmall(char * p)
 305 {
 306     while (*p != '\0') {
 307         *p = csconv[((unsigned char) *p)].clower;
 308         p++;
 309     }
 310 }
 311
 312 int Hunspell::mkallsmall2(char * p, w_char * u, int nc)
 313 {
 314   if (utf8) {
 315       unsigned short idx;
 316       for (int i = 0; i < nc; i++) {
 317          idx = (u[i].h << 8) + u[i].l;
 318          unsigned short low = unicodetolower(idx, langnum);
 319          if (idx != low) {
 320             u[i].h = (unsigned char) (low >> 8);
 321             u[i].l = (unsigned char) (low & 0x00FF);
 322          }
 323       }
 324       u16_u8(p, MAXWORDUTF8LEN, u, nc);
 325       return strlen(p);
 326   } else {
 327     while (*p != '\0') {
 328         *p = csconv[((unsigned char) *p)].clower;
 329         p++;
 330     }
 331   }
 332   return nc;
 333 }
 334
 335 // convert UTF-8 sharp S codes to latin 1
 336 char * Hunspell::sharps_u8_l1(char * dest, char * source) {
 337     char * p = dest;
 338     *p = *source;
 339     for (p++, source++; *(source - 1); p++, source++) {
 340         *p = *source;
 341         if (*source == '\x9F') *--p = '\xDF';
 342     }
 343     return dest;
 344 }
 345
 346 // recursive search for right ss - sharp s permutations
 347 hentry * Hunspell::spellsharps(char * base, char * pos, int n,
 348         int repnum, char * tmp, int * info, char **root) {
 349     pos = strstr(pos, "ss");
 350     if (pos && (n < MAXSHARPS)) {
 351         *pos = '\xC3';
 352         *(pos + 1) = '\x9F';
 353         hentry * h = spellsharps(base, pos + 2, n + 1, repnum + 1, tmp, info, root);
 354         if (h) return h;
 355         *pos = 's';
 356         *(pos + 1) = 's';
 357         h = spellsharps(base, pos + 2, n + 1, repnum, tmp, info, root);
 358         if (h) return h;
 359     } else if (repnum > 0) {
 360         if (utf8) return checkword(base, info, root);
 361         return checkword(sharps_u8_l1(tmp, base), info, root);
 362     }
 363     return NULL;
 364 }
 365
 366 int Hunspell::is_keepcase(const hentry * rv) {
 367     return pAMgr && rv->astr && pAMgr->get_keepcase() &&
 368         TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen);
 369 }
 370
 371 /* insert a word to the beginning of the suggestion array and return ns */
 372 int Hunspell::insert_sug(char ***slst, char * word, int ns) {
 373     char * dup = mystrdup(word);
 374     if (!dup) return ns;
 375     if (ns == MAXSUGGESTION) {
 376         ns--;
 377         free((*slst)[ns]);
 378     }
 379     for (int k = ns; k > 0; k--) (*slst)[k] = (*slst)[k - 1];
 380     (*slst)[0] = dup;
 381     return ns + 1;
 382 }
 383
 384 int Hunspell::spell(const char * word, int * info, char ** root)
 385 {
 386   struct hentry * rv=NULL;
 387   // need larger vector. For example, Turkish capital letter I converted a
 388   // 2-byte UTF-8 character (dotless i) by mkallsmall.
 389   char cw[MAXWORDUTF8LEN];
 390   char wspace[MAXWORDUTF8LEN];
 391   w_char unicw[MAXWORDLEN];
 392   // Hunspell supports XML input of the simplified API (see manual)
 393   if (strcmp(word, SPELL_XML) == 0) return 1;
 394   int nc = strlen(word);
 395   int wl2 = 0;
 396   if (utf8) {
 397     if (nc >= MAXWORDUTF8LEN) return 0;
 398   } else {
 399     if (nc >= MAXWORDLEN) return 0;
 400   }
 401   int captype = 0;
 402   int abbv = 0;
 403   int wl = 0;
 404
 405   // input conversion
 406   RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
 407   if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
 408   else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
 409
 410   int info2 = 0;
 411   if (wl == 0 || maxdic == 0) return 1;
 412   if (root) *root = NULL;
 413
 414   // allow numbers with dots, dashes and commas (but forbid double separators: "..", "--" etc.)
 415   enum { NBEGIN, NNUM, NSEP };
 416   int nstate = NBEGIN;
 417   int i;
 418
 419   for (i = 0; (i < wl); i++) {
 420     if ((cw[i] <= '9') && (cw[i] >= '0')) {
 421         nstate = NNUM;
 422     } else if ((cw[i] == ',') || (cw[i] == '.') || (cw[i] == '-')) {
 423         if ((nstate == NSEP) || (i == 0)) break;
 424         nstate = NSEP;
 425     } else break;
 426   }
 427   if ((i == wl) && (nstate == NNUM)) return 1;
 428   if (!info) info = &info2; else *info = 0;
 429
 430   switch(captype) {
 431      case HUHCAP:
 432      case HUHINITCAP:
 433      case NOCAP: {
 434             rv = checkword(cw, info, root);
 435             if ((abbv) && !(rv)) {
 436                 memcpy(wspace,cw,wl);
 437                 *(wspace+wl) = '.';
 438                 *(wspace+wl+1) = '\0';
 439                 rv = checkword(wspace, info, root);
 440             }
 441             break;
 442          }
 443      case ALLCAP: {
 444             rv = checkword(cw, info, root);
 445             if (rv) break;
 446             if (abbv) {
 447                 memcpy(wspace,cw,wl);
 448                 *(wspace+wl) = '.';
 449                 *(wspace+wl+1) = '\0';
 450                 rv = checkword(wspace, info, root);
 451                 if (rv) break;
 452             }
 453             // Spec. prefix handling for Catalan, French, Italian:
 454             // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia).
 455             if (pAMgr && strchr(cw, '\'')) {
 456                 wl = mkallsmall2(cw, unicw, nc);
 457                 char * apostrophe = strchr(cw, '\'');
 458                 if (utf8) {
 459                     w_char tmpword[MAXWORDLEN];
 460                     *apostrophe = '\0';
 461                     wl2 = u8_u16(tmpword, MAXWORDLEN, cw);
 462                     *apostrophe = '\'';
 463                     if (wl2 < nc) {
 464                         mkinitcap2(apostrophe + 1, unicw + wl2 + 1, nc - wl2 - 1);
 465                         rv = checkword(cw, info, root);
 466                         if (rv) break;
 467                     }
 468                 } else {
 469                     mkinitcap2(apostrophe + 1, unicw, nc);
 470                     rv = checkword(cw, info, root);
 471                     if (rv) break;
 472                 }
 473                 mkinitcap2(cw, unicw, nc);
 474                 rv = checkword(cw, info, root);
 475                 if (rv) break;
 476             }
 477             if (pAMgr && pAMgr->get_checksharps() && strstr(cw, "SS")) {
 478                 char tmpword[MAXWORDUTF8LEN];
 479                 wl = mkallsmall2(cw, unicw, nc);
 480                 memcpy(wspace,cw,(wl+1));
 481                 rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
 482                 if (!rv) {
 483                     wl2 = mkinitcap2(cw, unicw, nc);
 484                     rv = spellsharps(cw, cw, 0, 0, tmpword, info, root);
 485                 }
 486                 if ((abbv) && !(rv)) {
 487                     *(wspace+wl) = '.';
 488                     *(wspace+wl+1) = '\0';
 489                     rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
 490                     if (!rv) {
 491                         memcpy(wspace, cw, wl2);
 492                         *(wspace+wl2) = '.';
 493                         *(wspace+wl2+1) = '\0';
 494                         rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
 495                     }
 496                 }
 497                 if (rv) break;
 498             }
 499         }
 500      case INITCAP: {
 501              wl = mkallsmall2(cw, unicw, nc);
 502              memcpy(wspace,cw,(wl+1));
 503              wl2 = mkinitcap2(cw, unicw, nc);
 504              if (captype == INITCAP) *info += SPELL_INITCAP;
 505              rv = checkword(cw, info, root);
 506              if (captype == INITCAP) *info -= SPELL_INITCAP;
 507              // forbid bad capitalization
 508              // (for example, ijs -> Ijs instead of IJs in Dutch)
 509              // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag)
 510              if (*info & SPELL_FORBIDDEN) {
 511                 rv = NULL;
 512                 break;
 513              }
 514              if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL;
 515              if (rv) break;
 516
 517              rv = checkword(wspace, info, root);
 518              if (abbv && !rv) {
 519
 520                  *(wspace+wl) = '.';
 521                  *(wspace+wl+1) = '\0';
 522                  rv = checkword(wspace, info, root);
 523                  if (!rv) {
 524                     memcpy(wspace, cw, wl2);
 525                     *(wspace+wl2) = '.';
 526                     *(wspace+wl2+1) = '\0';
 527                     if (captype == INITCAP) *info += SPELL_INITCAP;
 528                     rv = checkword(wspace, info, root);
 529                     if (captype == INITCAP) *info -= SPELL_INITCAP;
 530                     if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL;
 531                     break;
 532                  }
 533              }
 534              if (rv && is_keepcase(rv) &&
 535                 ((captype == ALLCAP) ||
 536                    // if CHECKSHARPS: KEEPCASE words with \xDF  are allowed
 537                    // in INITCAP form, too.
 538                    !(pAMgr->get_checksharps() &&
 539                       ((utf8 && strstr(wspace, "\xC3\x9F")) ||
 540                       (!utf8 && strchr(wspace, '\xDF')))))) rv = NULL;
 541              break;
 542            }
 543   }
 544
 545   if (rv) return 1;
 546
 547   // recursive breaking at break points
 548   if (wordbreak) {
 549     char * s;
 550     char r;
 551     int corr = 0;
 552     wl = strlen(cw);
 553     int numbreak = pAMgr ? pAMgr->get_numbreak() : 0;
 554     // check boundary patterns (^begin and end$)
 555     for (int j = 0; j < numbreak; j++) {
 556       int plen = strlen(wordbreak[j]);
 557       if (plen == 1 || plen > wl) continue;
 558       if (wordbreak[j][0] == '^' && strncmp(cw, wordbreak[j] + 1, plen - 1) == 0
 559         && spell(cw + plen - 1)) return 1;
 560       if (wordbreak[j][plen - 1] == '$' &&
 561         strncmp(cw + wl - plen + 1, wordbreak[j], plen - 1) == 0) {
 562             r = cw[wl - plen + 1];
 563             cw[wl - plen + 1] = '\0';
 564             if (spell(cw)) return 1;
 565             cw[wl - plen + 1] = r;
 566         }
 567     }
 568     // other patterns
 569     for (int j = 0; j < numbreak; j++) {
 570       int result = 0;
 571       int plen = strlen(wordbreak[j]);
 572       s=(char *) strstr(cw, wordbreak[j]);
 573       if (s && (s > cw) && (s < cw + wl - plen)) {
 574         if (!spell(s + plen)) continue;
 575         r = *s;
 576         *s = '\0';
 577         // examine 2 sides of the break point
 578         if (spell(cw)) return 1;
 579         *s = r;
 580
 581         // LANG_hu: spec. dash rule
 582         if (langnum == LANG_hu && strcmp(wordbreak[j], "-") == 0) {
 583           r = s[1];
 584           s[1] = '\0';
 585           if (spell(cw)) return 1; // check the first part with dash
 586           s[1] = r;
 587         }
 588         // end of LANG speficic region
 589
 590       }
 591     }
 592   }
 593
 594   return 0;
 595 }
 596
 597 struct hentry * Hunspell::checkword(const char * w, int * info, char ** root)
 598 {
 599   struct hentry * he = NULL;
 600   int len, i;
 601   char w2[MAXWORDUTF8LEN];
 602   const char * word;
 603
 604   char * ignoredchars = pAMgr->get_ignore();
 605   if (ignoredchars != NULL) {
 606      strcpy(w2, w);
 607      if (utf8) {
 608         int ignoredchars_utf16_len;
 609         unsigned short * ignoredchars_utf16 = pAMgr->get_ignore_utf16(&ignoredchars_utf16_len);
 610         remove_ignored_chars_utf(w2, ignoredchars_utf16, ignoredchars_utf16_len);
 611      } else {
 612         remove_ignored_chars(w2,ignoredchars);
 613      }
 614      word = w2;
 615   } else word = w;
 616
 617   // word reversing wrapper for complex prefixes
 618   if (complexprefixes) {
 619     if (word != w2) {
 620       strcpy(w2, word);
 621       word = w2;
 622     }
 623     if (utf8) reverseword_utf(w2); else reverseword(w2);
 624   }
 625
 626   // look word in hash table
 627   for (i = 0; (i < maxdic) && !he; i ++) {
 628   he = (pHMgr[i])->lookup(word);
 629
 630   // check forbidden and onlyincompound words
 631   if ((he) && (he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
 632     if (info) *info += SPELL_FORBIDDEN;
 633     // LANG_hu section: set dash information for suggestions
 634     if (langnum == LANG_hu) {
 635         if (pAMgr->get_compoundflag() &&
 636             TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) {
 637                 if (info) *info += SPELL_COMPOUND;
 638         }
 639     }
 640     return NULL;
 641   }
 642
 643   // he = next not needaffix, onlyincompound homonym or onlyupcase word
 644   while (he && (he->astr) &&
 645     ((pAMgr->get_needaffix() && TESTAFF(he->astr, pAMgr->get_needaffix(), he->alen)) ||
 646        (pAMgr->get_onlyincompound() && TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
 647        (info && (*info & SPELL_INITCAP) && TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen))
 648     )) he = he->next_homonym;
 649   }
 650
 651   // check with affixes
 652   if (!he && pAMgr) {
 653      // try stripping off affixes */
 654      len = strlen(word);
 655      he = pAMgr->affix_check(word, len, 0);
 656
 657      // check compound restriction and onlyupcase
 658      if (he && he->astr && (
 659         (pAMgr->get_onlyincompound() &&
 660             TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
 661         (info && (*info & SPELL_INITCAP) &&
 662             TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) {
 663             he = NULL;
 664      }
 665
 666      if (he) {
 667         if ((he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
 668             if (info) *info += SPELL_FORBIDDEN;
 669             return NULL;
 670         }
 671         if (root) {
 672             *root = mystrdup(&(he->word));
 673             if (*root && complexprefixes) {
 674                 if (utf8) reverseword_utf(*root); else reverseword(*root);
 675             }
 676         }
 677      // try check compound word
 678      } else if (pAMgr->get_compound()) {
 679           he = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, 0, 0);
 680           // LANG_hu section: `moving rule' with last dash
 681           if ((!he) && (langnum == LANG_hu) && (word[len-1] == '-')) {
 682              char * dup = mystrdup(word);
 683              if (!dup) return NULL;
 684              dup[len-1] = '\0';
 685              he = pAMgr->compound_check(dup, len-1, -5, 0, 100, 0, NULL, 1, 0);
 686              free(dup);
 687           }
 688           // end of LANG speficic region
 689           if (he) {
 690                 if (root) {
 691                     *root = mystrdup(&(he->word));
 692                     if (*root && complexprefixes) {
 693                         if (utf8) reverseword_utf(*root); else reverseword(*root);
 694                     }
 695                 }
 696                 if (info) *info += SPELL_COMPOUND;
 697           }
 698      }
 699
 700   }
 701
 702   return he;
 703 }
 704
 705 int Hunspell::suggest(char*** slst, const char * word)
 706 {
 707   int onlycmpdsug = 0;
 708   char cw[MAXWORDUTF8LEN];
 709   char wspace[MAXWORDUTF8LEN];
 710   if (!pSMgr || maxdic == 0) return 0;
 711   w_char unicw[MAXWORDLEN];
 712   *slst = NULL;
 713   // process XML input of the simplified API (see manual)
 714   if (strncmp(word, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) {
 715      return spellml(slst, word);
 716   }
 717   int nc = strlen(word);
 718   if (utf8) {
 719     if (nc >= MAXWORDUTF8LEN) return 0;
 720   } else {
 721     if (nc >= MAXWORDLEN) return 0;
 722   }
 723   int captype = 0;
 724   int abbv = 0;
 725   int wl = 0;
 726
 727   // input conversion
 728   RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
 729   if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
 730   else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
 731
 732   if (wl == 0) return 0;
 733   int ns = 0;
 734   int capwords = 0;
 735
 736   switch(captype) {
 737      case NOCAP:   {
 738                      ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
 739                      break;
 740                    }
 741
 742      case INITCAP: {
 743                      capwords = 1;
 744                      ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
 745                      if (ns == -1) break;
 746                      memcpy(wspace,cw,(wl+1));
 747                      mkallsmall2(wspace, unicw, nc);
 748                      ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
 749                      break;
 750                    }
 751      case HUHINITCAP:
 752                     capwords = 1;
 753      case HUHCAP: {
 754                      ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
 755                      if (ns != -1) {
 756                         int prevns;
 757                         // something.The -> something. The
 758                         char * dot = strchr(cw, '.');
 759                         if (dot && (dot > cw)) {
 760                             int captype_;
 761                             if (utf8) {
 762                                w_char w_[MAXWORDLEN];
 763                                int wl_ = u8_u16(w_, MAXWORDLEN, dot + 1);
 764                                captype_ = get_captype_utf8(w_, wl_, langnum);
 765                             } else captype_ = get_captype(dot+1, strlen(dot+1), csconv);
 766                             if (captype_ == INITCAP) {
 767                                 char * st = mystrdup(cw);
 768                                 if (st) st = (char *) realloc(st, wl + 2);
 769                                 if (st) {
 770                                         st[(dot - cw) + 1] = ' ';
 771                                         strcpy(st + (dot - cw) + 2, dot + 1);
 772                                         ns = insert_sug(slst, st, ns);
 773                                         free(st);
 774                                 }
 775                             }
 776                         }
 777                         if (captype == HUHINITCAP) {
 778                             // TheOpenOffice.org -> The OpenOffice.org
 779                             memcpy(wspace,cw,(wl+1));
 780                             mkinitsmall2(wspace, unicw, nc);
 781                             ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
 782                         }
 783                         memcpy(wspace,cw,(wl+1));
 784                         mkallsmall2(wspace, unicw, nc);
 785                         if (spell(wspace)) ns = insert_sug(slst, wspace, ns);
 786                         prevns = ns;
 787                         ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
 788                         if (captype == HUHINITCAP) {
 789                             mkinitcap2(wspace, unicw, nc);
 790                             if (spell(wspace)) ns = insert_sug(slst, wspace, ns);
 791                             ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
 792                         }
 793                         // aNew -> "a New" (instead of "a new")
 794                         for (int j = prevns; j < ns; j++) {
 795                            char * space = strchr((*slst)[j],' ');
 796                            if (space) {
 797                                 int slen = strlen(space + 1);
 798                                 // different case after space (need capitalisation)
 799                                 if ((slen < wl) && strcmp(cw + wl - slen, space + 1)) {
 800                                     w_char w[MAXWORDLEN];
 801                                     int wc = 0;
 802                                     char * r = (*slst)[j];
 803                                     if (utf8) wc = u8_u16(w, MAXWORDLEN, space + 1);
 804                                     mkinitcap2(space + 1, w, wc);
 805                                     // set as first suggestion
 806                                     for (int k = j; k > 0; k--) (*slst)[k] = (*slst)[k - 1];
 807                                     (*slst)[0] = r;
 808                                 }
 809                            }
 810                         }
 811                      }
 812                      break;
 813                    }
 814
 815      case ALLCAP: {
 816                      memcpy(wspace, cw, (wl+1));
 817                      mkallsmall2(wspace, unicw, nc);
 818                      ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
 819                      if (ns == -1) break;
 820                      if (pAMgr && pAMgr->get_keepcase() && spell(wspace))
 821                         ns = insert_sug(slst, wspace, ns);
 822                      mkinitcap2(wspace, unicw, nc);
 823                      ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
 824                      for (int j=0; j < ns; j++) {
 825                         mkallcap((*slst)[j]);
 826                         if (pAMgr && pAMgr->get_checksharps()) {
 827                             char * pos;
 828                             if (utf8) {
 829                                 pos = strstr((*slst)[j], "\xC3\x9F");
 830                                 while (pos) {
 831                                     *pos = 'S';
 832                                     *(pos+1) = 'S';
 833                                     pos = strstr(pos+2, "\xC3\x9F");
 834                                 }
 835                             } else {
 836                                 pos = strchr((*slst)[j], '\xDF');
 837                                 while (pos) {
 838                                     (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 2);
 839                                     mystrrep((*slst)[j], "\xDF", "SS");
 840                                     pos = strchr((*slst)[j], '\xDF');
 841                                 }
 842                             }
 843                         }
 844                      }
 845                      break;
 846                    }
 847   }
 848
 849   // LANG_hu section: replace '-' with ' ' in Hungarian
 850   if (langnum == LANG_hu) {
 851       for (int j=0; j < ns; j++) {
 852           char * pos = strchr((*slst)[j],'-');
 853           if (pos) {
 854               int info;
 855               char w[MAXWORDUTF8LEN];
 856               *pos = '\0';
 857               strcpy(w, (*slst)[j]);
 858               strcat(w, pos + 1);
 859               spell(w, &info, NULL);
 860               if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
 861                   *pos = ' ';
 862               } else *pos = '-';
 863           }
 864       }
 865   }
 866   // END OF LANG_hu section
 867
 868   // try ngram approach since found nothing
 869   if ((ns == 0 || onlycmpdsug) && pAMgr && (pAMgr->get_maxngramsugs() != 0)) {
 870       switch(captype) {
 871           case NOCAP: {
 872               ns = pSMgr->ngsuggest(*slst, cw, ns, pHMgr, maxdic);
 873               break;
 874           }
 875           case HUHINITCAP:
 876               capwords = 1;
 877           case HUHCAP: {
 878               memcpy(wspace,cw,(wl+1));
 879               mkallsmall2(wspace, unicw, nc);
 880               ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
 881               break;
 882           }
 883          case INITCAP: {
 884               capwords = 1;
 885               memcpy(wspace,cw,(wl+1));
 886               mkallsmall2(wspace, unicw, nc);
 887               ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
 888               break;
 889           }
 890           case ALLCAP: {
 891               memcpy(wspace,cw,(wl+1));
 892               mkallsmall2(wspace, unicw, nc);
 893               int oldns = ns;
 894               ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
 895               for (int j = oldns; j < ns; j++)
 896                   mkallcap((*slst)[j]);
 897               break;
 898          }
 899       }
 900   }
 901
 902   // try dash suggestion (Afo-American -> Afro-American)
 903   if (strchr(cw, '-')) {
 904      char * pos = strchr(cw, '-');
 905      char * ppos = cw;
 906      int nodashsug = 1;
 907      char ** nlst = NULL;
 908      int nn = 0;
 909      int last = 0;
 910      for (int j = 0; j < ns && nodashsug == 1; j++) {
 911         if (strchr((*slst)[j], '-')) nodashsug = 0;
 912      }
 913      while (nodashsug && !last) {
 914         if (*pos == '\0') last = 1; else *pos = '\0';
 915         if (!spell(ppos)) {
 916           nn = suggest(&nlst, ppos);
 917           for (int j = nn - 1; j >= 0; j--) {
 918             strncpy(wspace, cw, ppos - cw);
 919             strcpy(wspace + (ppos - cw), nlst[j]);
 920             if (!last) {
 921                 strcat(wspace, "-");
 922                 strcat(wspace, pos + 1);
 923             }
 924             ns = insert_sug(slst, wspace, ns);
 925             free(nlst[j]);
 926           }
 927           if (nlst != NULL) free(nlst);
 928           nodashsug = 0;
 929         }
 930         if (!last) {
 931           *pos = '-';
 932           ppos = pos + 1;
 933           pos = strchr(ppos, '-');
 934         }
 935         if (!pos) pos = cw + strlen(cw);
 936      }
 937   }
 938
 939   // word reversing wrapper for complex prefixes
 940   if (complexprefixes) {
 941     for (int j = 0; j < ns; j++) {
 942       if (utf8) reverseword_utf((*slst)[j]); else reverseword((*slst)[j]);
 943     }
 944   }
 945
 946   // capitalize
 947   if (capwords) for (int j=0; j < ns; j++) {
 948       mkinitcap((*slst)[j]);
 949   }
 950
 951   // expand suggestions with dot(s)
 952   if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
 953     for (int j = 0; j < ns; j++) {
 954       (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv);
 955       strcat((*slst)[j], word + strlen(word) - abbv);
 956     }
 957   }
 958
 959   // remove bad capitalized and forbidden forms
 960   if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) {
 961   switch (captype) {
 962     case INITCAP:
 963     case ALLCAP: {
 964       int l = 0;
 965       for (int j=0; j < ns; j++) {
 966         if (!strchr((*slst)[j],' ') && !spell((*slst)[j])) {
 967           char s[MAXSWUTF8L];
 968           w_char w[MAXSWL];
 969           int len;
 970           if (utf8) {
 971             len = u8_u16(w, MAXSWL, (*slst)[j]);
 972           } else {
 973             strcpy(s, (*slst)[j]);
 974             len = strlen(s);
 975           }
 976           mkallsmall2(s, w, len);
 977           free((*slst)[j]);
 978           if (spell(s)) {
 979             (*slst)[l] = mystrdup(s);
 980             if ((*slst)[l]) l++;
 981           } else {
 982             mkinitcap2(s, w, len);
 983             if (spell(s)) {
 984               (*slst)[l] = mystrdup(s);
 985               if ((*slst)[l]) l++;
 986             }
 987           }
 988         } else {
 989           (*slst)[l] = (*slst)[j];
 990           l++;
 991         }
 992       }
 993       ns = l;
 994     }
 995   }
 996   }
 997
 998   // remove duplications
 999   int l = 0;
1000   for (int j = 0; j < ns; j++) {
1001     (*slst)[l] = (*slst)[j];
1002     for (int k = 0; k < l; k++) {
1003       if (strcmp((*slst)[k], (*slst)[j]) == 0) {
1004         free((*slst)[j]);
1005         l--;
1006       }
1007     }
1008     l++;
1009   }
1010
1011   // output conversion
1012   rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
1013   for (int j = 0; rl && j < ns; j++) {
1014     if (rl->conv((*slst)[j], wspace)) {
1015       free((*slst)[j]);
1016       (*slst)[j] = mystrdup(wspace);
1017     }
1018   }
1019
1020   // if suggestions removed by nosuggest, onlyincompound parameters
1021   if (l == 0 && *slst) {
1022     free(*slst);
1023     *slst = NULL;
1024   }
1025   return l;
1026 }
1027
1028 void Hunspell::free_list(char *** slst, int n) {
1029         freelist(slst, n);
1030 }
1031
1032 char * Hunspell::get_dic_encoding()
1033 {
1034   return encoding;
1035 }
1036
1037 #ifdef HUNSPELL_EXPERIMENTAL
1038 // XXX need UTF-8 support
1039 int Hunspell::suggest_auto(char*** slst, const char * word)
1040 {
1041   char cw[MAXWORDUTF8LEN];
1042   char wspace[MAXWORDUTF8LEN];
1043   if (!pSMgr || maxdic == 0) return 0;
1044   int wl = strlen(word);
1045   if (utf8) {
1046     if (wl >= MAXWORDUTF8LEN) return 0;
1047   } else {
1048     if (wl >= MAXWORDLEN) return 0;
1049   }
1050   int captype = 0;
1051   int abbv = 0;
1052   wl = cleanword(cw, word, &captype, &abbv);
1053   if (wl == 0) return 0;
1054   int ns = 0;
1055   *slst = NULL; // HU, nsug in pSMgr->suggest
1056
1057   switch(captype) {
1058      case NOCAP:   {
1059                      ns = pSMgr->suggest_auto(slst, cw, ns);
1060                      if (ns>0) break;
1061                      break;
1062                    }
1063
1064      case INITCAP: {
1065                      memcpy(wspace,cw,(wl+1));
1066                      mkallsmall(wspace);
1067                      ns = pSMgr->suggest_auto(slst, wspace, ns);
1068                      for (int j=0; j < ns; j++)
1069                        mkinitcap((*slst)[j]);
1070                      ns = pSMgr->suggest_auto(slst, cw, ns);
1071                      break;
1072
1073                    }
1074
1075      case HUHINITCAP:
1076      case HUHCAP: {
1077                      ns = pSMgr->suggest_auto(slst, cw, ns);
1078                      if (ns == 0) {
1079                         memcpy(wspace,cw,(wl+1));
1080                         mkallsmall(wspace);
1081                         ns = pSMgr->suggest_auto(slst, wspace, ns);
1082                      }
1083                      break;
1084                    }
1085
1086      case ALLCAP: {
1087                      memcpy(wspace,cw,(wl+1));
1088                      mkallsmall(wspace);
1089                      ns = pSMgr->suggest_auto(slst, wspace, ns);
1090
1091                      mkinitcap(wspace);
1092                      ns = pSMgr->suggest_auto(slst, wspace, ns);
1093
1094                      for (int j=0; j < ns; j++)
1095                        mkallcap((*slst)[j]);
1096                      break;
1097                    }
1098   }
1099
1100   // word reversing wrapper for complex prefixes
1101   if (complexprefixes) {
1102     for (int j = 0; j < ns; j++) {
1103       if (utf8) reverseword_utf((*slst)[j]); else reverseword((*slst)[j]);
1104     }
1105   }
1106
1107   // expand suggestions with dot(s)
1108   if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
1109     for (int j = 0; j < ns; j++) {
1110       (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv);
1111       strcat((*slst)[j], word + strlen(word) - abbv);
1112     }
1113   }
1114
1115   // LANG_hu section: replace '-' with ' ' in Hungarian
1116   if (langnum == LANG_hu) {
1117       for (int j=0; j < ns; j++) {
1118           char * pos = strchr((*slst)[j],'-');
1119           if (pos) {
1120               int info;
1121               char w[MAXWORDUTF8LEN];
1122               *pos = '\0';
1123               strcpy(w, (*slst)[j]);
1124               strcat(w, pos + 1);
1125               spell(w, &info, NULL);
1126               if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
1127                   *pos = ' ';
1128               } else *pos = '-';
1129           }
1130       }
1131   }
1132   // END OF LANG_hu section
1133   return ns;
1134 }
1135 #endif
1136
1137 int Hunspell::stem(char*** slst, char ** desc, int n)
1138 {
1139   char result[MAXLNLEN];
1140   char result2[MAXLNLEN];
1141   *slst = NULL;
1142   if (n == 0) return 0;
1143   *result2 = '\0';
1144   for (int i = 0; i < n; i++) {
1145     *result = '\0';
1146     // add compound word parts (except the last one)
1147     char * s = (char *) desc[i];
1148     char * part = strstr(s, MORPH_PART);
1149     if (part) {
1150         char * nextpart = strstr(part + 1, MORPH_PART);
1151         while (nextpart) {
1152             copy_field(result + strlen(result), part, MORPH_PART);
1153             part = nextpart;
1154             nextpart = strstr(part + 1, MORPH_PART);
1155         }
1156         s = part;
1157     }
1158
1159     char **pl;
1160     char tok[MAXLNLEN];
1161     strcpy(tok, s);
1162     char * alt = strstr(tok, " | ");
1163     while (alt) {
1164         alt[1] = MSEP_ALT;
1165         alt = strstr(alt, " | ");
1166     }
1167     int pln = line_tok(tok, &pl, MSEP_ALT);
1168     for (int k = 0; k < pln; k++) {
1169         // add derivational suffixes
1170         if (strstr(pl[k], MORPH_DERI_SFX)) {
1171             // remove inflectional suffixes
1172             char * is = strstr(pl[k], MORPH_INFL_SFX);
1173             if (is) *is = '\0';
1174             char * sg = pSMgr->suggest_gen(&(pl[k]), 1, pl[k]);
1175             if (sg) {
1176                 char ** gen;
1177                 int genl = line_tok(sg, &gen, MSEP_REC);
1178                 free(sg);
1179                 for (int j = 0; j < genl; j++) {
1180                     sprintf(result2 + strlen(result2), "%c%s%s",
1181                             MSEP_REC, result, gen[j]);
1182                 }
1183                 freelist(&gen, genl);
1184             }
1185         } else {
1186             sprintf(result2 + strlen(result2), "%c%s", MSEP_REC, result);
1187             if (strstr(pl[k], MORPH_SURF_PFX)) {
1188                 copy_field(result2 + strlen(result2), pl[k], MORPH_SURF_PFX);
1189             }
1190             copy_field(result2 + strlen(result2), pl[k], MORPH_STEM);
1191         }
1192     }
1193     freelist(&pl, pln);
1194   }
1195   int sln = line_tok(result2, slst, MSEP_REC);
1196   return uniqlist(*slst, sln);
1197
1198 }
1199
1200 int Hunspell::stem(char*** slst, const char * word)
1201 {
1202   char ** pl;
1203   int pln = analyze(&pl, word);
1204   int pln2 = stem(slst, pl, pln);
1205   freelist(&pl, pln);
1206   return pln2;
1207 }
1208
1209 #ifdef HUNSPELL_EXPERIMENTAL
1210 int Hunspell::suggest_pos_stems(char*** slst, const char * word)
1211 {
1212   char cw[MAXWORDUTF8LEN];
1213   char wspace[MAXWORDUTF8LEN];
1214   if (! pSMgr || maxdic == 0) return 0;
1215   int wl = strlen(word);
1216   if (utf8) {
1217     if (wl >= MAXWORDUTF8LEN) return 0;
1218   } else {
1219     if (wl >= MAXWORDLEN) return 0;
1220   }
1221   int captype = 0;
1222   int abbv = 0;
1223   wl = cleanword(cw, word, &captype, &abbv);
1224   if (wl == 0) return 0;
1225
1226   int ns = 0; // ns=0 = normalized input
1227
1228   *slst = NULL; // HU, nsug in pSMgr->suggest
1229
1230   switch(captype) {
1231      case HUHCAP:
1232      case NOCAP:   {
1233                      ns = pSMgr->suggest_pos_stems(slst, cw, ns);
1234
1235                      if ((abbv) && (ns == 0)) {
1236                          memcpy(wspace,cw,wl);
1237                          *(wspace+wl) = '.';
1238                          *(wspace+wl+1) = '\0';
1239                          ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
1240                      }
1241
1242                      break;
1243                    }
1244
1245      case INITCAP: {
1246
1247                      ns = pSMgr->suggest_pos_stems(slst, cw, ns);
1248
1249                      if (ns == 0 || ((*slst)[0][0] == '#')) {
1250                         memcpy(wspace,cw,(wl+1));
1251                         mkallsmall(wspace);
1252                         ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
1253                      }
1254
1255                      break;
1256
1257                    }
1258
1259      case ALLCAP: {
1260                      ns = pSMgr->suggest_pos_stems(slst, cw, ns);
1261                      if (ns != 0) break;
1262
1263                      memcpy(wspace,cw,(wl+1));
1264                      mkallsmall(wspace);
1265                      ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
1266
1267                      if (ns == 0) {
1268                          mkinitcap(wspace);
1269                          ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
1270                      }
1271                      break;
1272                    }
1273   }
1274
1275   return ns;
1276 }
1277 #endif // END OF HUNSPELL_EXPERIMENTAL CODE
1278
1279 const char * Hunspell::get_wordchars()
1280 {
1281   return pAMgr->get_wordchars();
1282 }
1283
1284 unsigned short * Hunspell::get_wordchars_utf16(int * len)
1285 {
1286   return pAMgr->get_wordchars_utf16(len);
1287 }
1288
1289 void Hunspell::mkinitcap(char * p)
1290 {
1291   if (!utf8) {
1292     if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;
1293   } else {
1294       int len;
1295       w_char u[MAXWORDLEN];
1296       len = u8_u16(u, MAXWORDLEN, p);
1297       unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);
1298       u[0].h = (unsigned char) (i >> 8);
1299       u[0].l = (unsigned char) (i & 0x00FF);
1300       u16_u8(p, MAXWORDUTF8LEN, u, len);
1301   }
1302 }
1303
1304 int Hunspell::mkinitcap2(char * p, w_char * u, int nc)
1305 {
1306   if (!utf8) {
1307     if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;
1308   } else if (nc > 0) {
1309       unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);
1310       u[0].h = (unsigned char) (i >> 8);
1311       u[0].l = (unsigned char) (i & 0x00FF);
1312       u16_u8(p, MAXWORDUTF8LEN, u, nc);
1313       return strlen(p);
1314   }
1315   return nc;
1316 }
1317
1318 int Hunspell::mkinitsmall2(char * p, w_char * u, int nc)
1319 {
1320   if (!utf8) {
1321     if (*p != '\0') *p = csconv[((unsigned char)*p)].clower;
1322   } else if (nc > 0) {
1323       unsigned short i = unicodetolower((u[0].h << 8) + u[0].l, langnum);
1324       u[0].h = (unsigned char) (i >> 8);
1325       u[0].l = (unsigned char) (i & 0x00FF);
1326       u16_u8(p, MAXWORDUTF8LEN, u, nc);
1327       return strlen(p);
1328   }
1329   return nc;
1330 }
1331
1332 int Hunspell::add(const char * word)
1333 {
1334     if (pHMgr[0]) return (pHMgr[0])->add(word);
1335     return 0;
1336 }
1337
1338 int Hunspell::add_with_affix(const char * word, const char * example)
1339 {
1340     if (pHMgr[0]) return (pHMgr[0])->add_with_affix(word, example);
1341     return 0;
1342 }
1343
1344 int Hunspell::remove(const char * word)
1345 {
1346     if (pHMgr[0]) return (pHMgr[0])->remove(word);
1347     return 0;
1348 }
1349
1350 const char * Hunspell::get_version()
1351 {
1352   return pAMgr->get_version();
1353 }
1354
1355 struct cs_info * Hunspell::get_csconv()
1356 {
1357   return csconv;
1358 }
1359
1360 void Hunspell::cat_result(char * result, char * st)
1361 {
1362     if (st) {
1363         if (*result) mystrcat(result, "\n", MAXLNLEN);
1364         mystrcat(result, st, MAXLNLEN);
1365         free(st);
1366     }
1367 }
1368
1369 int Hunspell::analyze(char*** slst, const char * word)
1370 {
1371   char cw[MAXWORDUTF8LEN];
1372   char wspace[MAXWORDUTF8LEN];
1373   w_char unicw[MAXWORDLEN];
1374   int wl2 = 0;
1375   *slst = NULL;
1376   if (! pSMgr || maxdic == 0) return 0;
1377   int nc = strlen(word);
1378   if (utf8) {
1379     if (nc >= MAXWORDUTF8LEN) return 0;
1380   } else {
1381     if (nc >= MAXWORDLEN) return 0;
1382   }
1383   int captype = 0;
1384   int abbv = 0;
1385   int wl = 0;
1386
1387   // input conversion
1388   RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
1389   if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
1390   else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
1391
1392   if (wl == 0) {
1393       if (abbv) {
1394           for (wl = 0; wl < abbv; wl++) cw[wl] = '.';
1395           cw[wl] = '\0';
1396           abbv = 0;
1397       } else return 0;
1398   }
1399
1400   char result[MAXLNLEN];
1401   char * st = NULL;
1402
1403   *result = '\0';
1404
1405   int n = 0;
1406   int n2 = 0;
1407   int n3 = 0;
1408
1409   // test numbers
1410   // LANG_hu section: set dash information for suggestions
1411   if (langnum == LANG_hu) {
1412   while ((n < wl) &&
1413         (((cw[n] <= '9') && (cw[n] >= '0')) || (((cw[n] == '.') || (cw[n] == ',')) && (n > 0)))) {
1414         n++;
1415         if ((cw[n] == '.') || (cw[n] == ',')) {
1416                 if (((n2 == 0) && (n > 3)) ||
1417                         ((n2 > 0) && ((cw[n-1] == '.') || (cw[n-1] == ',')))) break;
1418                 n2++;
1419                 n3 = n;
1420         }
1421   }
1422
1423   if ((n == wl) && (n3 > 0) && (n - n3 > 3)) return 0;
1424   if ((n == wl) || ((n>0) && ((cw[n]=='%') || (cw[n]=='\xB0')) && checkword(cw+n, NULL, NULL))) {
1425         mystrcat(result, cw, MAXLNLEN);
1426         result[n - 1] = '\0';
1427         if (n == wl) cat_result(result, pSMgr->suggest_morph(cw + n - 1));
1428         else {
1429                 char sign = cw[n];
1430                 cw[n] = '\0';
1431                 cat_result(result, pSMgr->suggest_morph(cw + n - 1));
1432                 mystrcat(result, "+", MAXLNLEN); // XXX SPEC. MORPHCODE
1433                 cw[n] = sign;
1434                 cat_result(result, pSMgr->suggest_morph(cw + n));
1435         }
1436         return line_tok(result, slst, MSEP_REC);
1437   }
1438   }
1439   // END OF LANG_hu section
1440
1441   switch(captype) {
1442      case HUHCAP:
1443      case HUHINITCAP:
1444      case NOCAP:  {
1445                     cat_result(result, pSMgr->suggest_morph(cw));
1446                     if (abbv) {
1447                         memcpy(wspace,cw,wl);
1448                         *(wspace+wl) = '.';
1449                         *(wspace+wl+1) = '\0';
1450                         cat_result(result, pSMgr->suggest_morph(wspace));
1451                     }
1452                     break;
1453                 }
1454      case INITCAP: {
1455                      wl = mkallsmall2(cw, unicw, nc);
1456                      memcpy(wspace,cw,(wl+1));
1457                      wl2 = mkinitcap2(cw, unicw, nc);
1458                      cat_result(result, pSMgr->suggest_morph(wspace));
1459                      cat_result(result, pSMgr->suggest_morph(cw));
1460                      if (abbv) {
1461                          *(wspace+wl) = '.';
1462                          *(wspace+wl+1) = '\0';
1463                          cat_result(result, pSMgr->suggest_morph(wspace));
1464
1465                          memcpy(wspace, cw, wl2);
1466                          *(wspace+wl2) = '.';
1467                          *(wspace+wl2+1) = '\0';
1468
1469                          cat_result(result, pSMgr->suggest_morph(wspace));
1470                      }
1471                      break;
1472                    }
1473      case ALLCAP: {
1474                      cat_result(result, pSMgr->suggest_morph(cw));
1475                      if (abbv) {
1476                          memcpy(wspace,cw,wl);
1477                          *(wspace+wl) = '.';
1478                          *(wspace+wl+1) = '\0';
1479                          cat_result(result, pSMgr->suggest_morph(cw));
1480                      }
1481                      wl = mkallsmall2(cw, unicw, nc);
1482                      memcpy(wspace,cw,(wl+1));
1483                      wl2 = mkinitcap2(cw, unicw, nc);
1484
1485                      cat_result(result, pSMgr->suggest_morph(wspace));
1486                      cat_result(result, pSMgr->suggest_morph(cw));
1487                      if (abbv) {
1488                          *(wspace+wl) = '.';
1489                          *(wspace+wl+1) = '\0';
1490                          cat_result(result, pSMgr->suggest_morph(wspace));
1491
1492                          memcpy(wspace, cw, wl2);
1493                          *(wspace+wl2) = '.';
1494                          *(wspace+wl2+1) = '\0';
1495
1496                          cat_result(result, pSMgr->suggest_morph(wspace));
1497                      }
1498                      break;
1499                    }
1500   }
1501
1502   if (*result) {
1503     // word reversing wrapper for complex prefixes
1504     if (complexprefixes) {
1505       if (utf8) reverseword_utf(result); else reverseword(result);
1506     }
1507     return line_tok(result, slst, MSEP_REC);
1508
1509   }
1510
1511   // compound word with dash (HU) I18n
1512   char * dash = NULL;
1513   int nresult = 0;
1514   // LANG_hu section: set dash information for suggestions
1515   if (langnum == LANG_hu) dash = (char *) strchr(cw,'-');
1516   if ((langnum == LANG_hu) && dash) {
1517       *dash='\0';
1518       // examine 2 sides of the dash
1519       if (dash[1] == '\0') { // base word ending with dash
1520         if (spell(cw)) return line_tok(pSMgr->suggest_morph(cw), slst, MSEP_REC);
1521       } else if ((dash[1] == 'e') && (dash[2] == '\0')) { // XXX (HU) -e hat.
1522         if (spell(cw) && (spell("-e"))) {
1523                         st = pSMgr->suggest_morph(cw);
1524                         if (st) {
1525                                 mystrcat(result, st, MAXLNLEN);
1526                                 free(st);
1527                         }
1528                         mystrcat(result,"+", MAXLNLEN); // XXX spec. separator in MORPHCODE
1529                         st = pSMgr->suggest_morph("-e");
1530                         if (st) {
1531                                 mystrcat(result, st, MAXLNLEN);
1532                                 free(st);
1533                         }
1534                         return line_tok(result, slst, MSEP_REC);
1535                 }
1536       } else {
1537       // first word ending with dash: word- XXX ???
1538         char r2 = *(dash + 1);
1539         dash[0]='-';
1540         dash[1]='\0';
1541         nresult = spell(cw);
1542         dash[1] = r2;
1543         dash[0]='\0';
1544         if (nresult && spell(dash+1) && ((strlen(dash+1) > 1) ||
1545                 ((dash[1] > '0') && (dash[1] < '9')))) {
1546                             st = pSMgr->suggest_morph(cw);
1547                             if (st) {
1548                                 mystrcat(result, st, MAXLNLEN);
1549                                     free(st);
1550                                 mystrcat(result,"+", MAXLNLEN); // XXX spec. separator in MORPHCODE
1551                             }
1552                             st = pSMgr->suggest_morph(dash+1);
1553                             if (st) {
1554                                     mystrcat(result, st, MAXLNLEN);
1555                                     free(st);
1556                             }
1557                             return line_tok(result, slst, MSEP_REC);
1558                         }
1559       }
1560       // affixed number in correct word
1561      if (nresult && (dash > cw) && (((*(dash-1)<='9') &&
1562                         (*(dash-1)>='0')) || (*(dash-1)=='.'))) {
1563          *dash='-';
1564          n = 1;
1565          if (*(dash - n) == '.') n++;
1566          // search first not a number character to left from dash
1567          while (((dash - n)>=cw) && ((*(dash - n)=='0') || (n < 3)) && (n < 6)) {
1568             n++;
1569          }
1570          if ((dash - n) < cw) n--;
1571          // numbers: valami1000000-hoz
1572          // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz,
1573          // 56-hoz, 6-hoz
1574          for(; n >= 1; n--) {
1575             if ((*(dash - n) >= '0') && (*(dash - n) <= '9') && checkword(dash - n, NULL, NULL)) {
1576                     mystrcat(result, cw, MAXLNLEN);
1577                     result[dash - cw - n] = '\0';
1578                         st = pSMgr->suggest_morph(dash - n);
1579                         if (st) {
1580                         mystrcat(result, st, MAXLNLEN);
1581                                 free(st);
1582                         }
1583                         return line_tok(result, slst, MSEP_REC);
1584             }
1585          }
1586      }
1587   }
1588   return 0;
1589 }
1590
1591 int Hunspell::generate(char*** slst, const char * word, char ** pl, int pln)
1592 {
1593   *slst = NULL;
1594   if (!pSMgr || !pln) return 0;
1595   char **pl2;
1596   int pl2n = analyze(&pl2, word);
1597   int captype = 0;
1598   int abbv = 0;
1599   char cw[MAXWORDUTF8LEN];
1600   cleanword(cw, word, &captype, &abbv);
1601   char result[MAXLNLEN];
1602   *result = '\0';
1603
1604   for (int i = 0; i < pln; i++) {
1605     cat_result(result, pSMgr->suggest_gen(pl2, pl2n, pl[i]));
1606   }
1607   freelist(&pl2, pl2n);
1608
1609   if (*result) {
1610     // allcap
1611     if (captype == ALLCAP) mkallcap(result);
1612
1613     // line split
1614     int linenum = line_tok(result, slst, MSEP_REC);
1615
1616     // capitalize
1617     if (captype == INITCAP || captype == HUHINITCAP) {
1618         for (int j=0; j < linenum; j++) mkinitcap((*slst)[j]);
1619     }
1620
1621     // temporary filtering of prefix related errors (eg.
1622     // generate("undrinkable", "eats") --> "undrinkables" and "*undrinks")
1623
1624     int r = 0;
1625     for (int j=0; j < linenum; j++) {
1626         if (!spell((*slst)[j])) {
1627             free((*slst)[j]);
1628             (*slst)[j] = NULL;
1629         } else {
1630             if (r < j) (*slst)[r] = (*slst)[j];
1631             r++;
1632         }
1633     }
1634     if (r > 0) return r;
1635     free(*slst);
1636     *slst = NULL;
1637   }
1638   return 0;
1639 }
1640
1641 int Hunspell::generate(char*** slst, const char * word, const char * pattern)
1642 {
1643   char **pl;
1644   int pln = analyze(&pl, pattern);
1645   int n = generate(slst, word, pl, pln);
1646   freelist(&pl, pln);
1647   return uniqlist(*slst, n);
1648 }
1649
1650 // minimal XML parser functions
1651 int Hunspell::get_xml_par(char * dest, const char * par, int max)
1652 {
1653    char * d = dest;
1654    if (!par) return 0;
1655    char end = *par;
1656    char * dmax = dest + max;
1657    if (end == '>') end = '<';
1658    else if (end != '\'' && end != '"') return 0; // bad XML
1659    for (par++; d < dmax && *par != '\0' && *par != end; par++, d++) *d = *par;
1660    *d = '\0';
1661    mystrrep(dest, "&lt;", "<");
1662    mystrrep(dest, "&amp;", "&");
1663    return d - dest;
1664 }
1665
1666 // return the beginning of the element (attr == NULL) or the attribute
1667 const char * Hunspell::get_xml_pos(const char * s, const char * attr)
1668 {
1669   const char * end = strchr(s, '>');
1670   const char * p = s;
1671   if (attr == NULL) return end;
1672   do {
1673     p = strstr(p, attr);
1674     if (!p || p >= end) return 0;
1675   } while (*(p-1) != ' ' &&  *(p-1) != '\n');
1676   return p + strlen(attr);
1677 }
1678
1679 int Hunspell::check_xml_par(const char * q, const char * attr, const char * value) {
1680   char cw[MAXWORDUTF8LEN];
1681   if (get_xml_par(cw, get_xml_pos(q, attr), MAXWORDUTF8LEN - 1) &&
1682     strcmp(cw, value) == 0) return 1;
1683   return 0;
1684 }
1685
1686 int Hunspell::get_xml_list(char ***slst, char * list, const char * tag) {
1687     int n = 0;
1688     char * p;
1689     if (!list) return 0;
1690     for (p = list; (p = strstr(p, tag)); p++) n++;
1691     if (n == 0) return 0;
1692     *slst = (char **) malloc(sizeof(char *) * n);
1693     if (!*slst) return 0;
1694     for (p = list, n = 0; (p = strstr(p, tag)); p++, n++) {
1695         int l = strlen(p);
1696         (*slst)[n] = (char *) malloc(l);
1697         if (!(*slst)[n]) return (n > 0 ? n - 1 : 0);
1698         get_xml_par((*slst)[n], p + strlen(tag) - 1, l);
1699     }
1700     return n;
1701 }
1702
1703 int Hunspell::spellml(char*** slst, const char * word)
1704 {
1705   char *q, *q2;
1706   char cw[MAXWORDUTF8LEN], cw2[MAXWORDUTF8LEN];
1707   q = (char *) strstr(word, "<query");
1708   if (!q) return 0; // bad XML input
1709   q2 = strchr(q, '>');
1710   if (!q2) return 0; // bad XML input
1711   q2 = strstr(q2, "<word");
1712   if (!q2) return 0; // bad XML input
1713   if (check_xml_par(q, "type=", "analyze")) {
1714       int n = 0, s = 0;
1715       if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN)) n = analyze(slst, cw);
1716       if (n == 0) return 0;
1717       // convert the result to <code><a>ana1</a><a>ana2</a></code> format
1718       for (int i = 0; i < n; i++) s+= strlen((*slst)[i]);
1719       char * r = (char *) malloc(6 + 5 * s + 7 * n + 7 + 1); // XXX 5*s->&->&amp;
1720       if (!r) return 0;
1721       strcpy(r, "<code>");
1722       for (int i = 0; i < n; i++) {
1723         int l = strlen(r);
1724         strcpy(r + l, "<a>");
1725         strcpy(r + l + 3, (*slst)[i]);
1726         mystrrep(r + l + 3, "\t", " ");
1727         mystrrep(r + l + 3, "<", "&lt;");
1728         mystrrep(r + l + 3, "&", "&amp;");
1729         strcat(r, "</a>");
1730         free((*slst)[i]);
1731       }
1732       strcat(r, "</code>");
1733       (*slst)[0] = r;
1734       return 1;
1735   } else if (check_xml_par(q, "type=", "stem")) {
1736       if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN)) return stem(slst, cw);
1737   } else if (check_xml_par(q, "type=", "generate")) {
1738       int n = get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN);
1739       if (n == 0) return 0;
1740       char * q3 = strstr(q2 + 1, "<word");
1741       if (q3) {
1742         if (get_xml_par(cw2, strchr(q3, '>'), MAXWORDUTF8LEN)) {
1743             return generate(slst, cw, cw2);
1744         }
1745       } else {
1746         char ** slst2;
1747         if ((q2 = strstr(q2 + 1, "<code")) &&
1748           (n = get_xml_list(&slst2, strchr(q2, '>'), "<a>"))) {
1749              int n2 = generate(slst, cw, slst2, n);
1750              freelist(&slst2, n);
1751              return uniqlist(*slst, n2);
1752         }
1753       }
1754   }
1755   return 0;
1756 }
1757
1758
1759 #ifdef HUNSPELL_EXPERIMENTAL
1760 // XXX need UTF-8 support
1761 char * Hunspell::morph_with_correction(const char * word)
1762 {
1763   char cw[MAXWORDUTF8LEN];
1764   char wspace[MAXWORDUTF8LEN];
1765   if (! pSMgr || maxdic == 0) return NULL;
1766   int wl = strlen(word);
1767   if (utf8) {
1768     if (wl >= MAXWORDUTF8LEN) return NULL;
1769   } else {
1770     if (wl >= MAXWORDLEN) return NULL;
1771   }
1772   int captype = 0;
1773   int abbv = 0;
1774   wl = cleanword(cw, word, &captype, &abbv);
1775   if (wl == 0) return NULL;
1776
1777   char result[MAXLNLEN];
1778   char * st = NULL;
1779
1780   *result = '\0';
1781
1782
1783   switch(captype) {
1784      case NOCAP:   {
1785                      st = pSMgr->suggest_morph_for_spelling_error(cw);
1786                      if (st) {
1787                         mystrcat(result, st, MAXLNLEN);
1788                         free(st);
1789                      }
1790                      if (abbv) {
1791                          memcpy(wspace,cw,wl);
1792                          *(wspace+wl) = '.';
1793                          *(wspace+wl+1) = '\0';
1794                          st = pSMgr->suggest_morph_for_spelling_error(wspace);
1795                          if (st) {
1796                             if (*result) mystrcat(result, "\n", MAXLNLEN);
1797                             mystrcat(result, st, MAXLNLEN);
1798                             free(st);
1799                                                  }
1800                      }
1801                                          break;
1802                    }
1803      case INITCAP: {
1804                      memcpy(wspace,cw,(wl+1));
1805                      mkallsmall(wspace);
1806                      st = pSMgr->suggest_morph_for_spelling_error(wspace);
1807                      if (st) {
1808                         mystrcat(result, st, MAXLNLEN);
1809                         free(st);
1810                      }
1811                      st = pSMgr->suggest_morph_for_spelling_error(cw);
1812                      if (st) {
1813                         if (*result) mystrcat(result, "\n", MAXLNLEN);
1814                         mystrcat(result, st, MAXLNLEN);
1815                         free(st);
1816                      }
1817                      if (abbv) {
1818                          memcpy(wspace,cw,wl);
1819                          *(wspace+wl) = '.';
1820                          *(wspace+wl+1) = '\0';
1821                          mkallsmall(wspace);
1822                          st = pSMgr->suggest_morph_for_spelling_error(wspace);
1823                          if (st) {
1824                             if (*result) mystrcat(result, "\n", MAXLNLEN);
1825                             mystrcat(result, st, MAXLNLEN);
1826                             free(st);
1827                          }
1828                          mkinitcap(wspace);
1829                          st = pSMgr->suggest_morph_for_spelling_error(wspace);
1830                          if (st) {
1831                             if (*result) mystrcat(result, "\n", MAXLNLEN);
1832                             mystrcat(result, st, MAXLNLEN);
1833                             free(st);
1834                          }
1835                      }
1836                      break;
1837                    }
1838      case HUHCAP: {
1839                      st = pSMgr->suggest_morph_for_spelling_error(cw);
1840                      if (st) {
1841                         mystrcat(result, st, MAXLNLEN);
1842                         free(st);
1843                      }
1844                      memcpy(wspace,cw,(wl+1));
1845                      mkallsmall(wspace);
1846                      st = pSMgr->suggest_morph_for_spelling_error(wspace);
1847                      if (st) {
1848                         if (*result) mystrcat(result, "\n", MAXLNLEN);
1849                         mystrcat(result, st, MAXLNLEN);
1850                         free(st);
1851                      }
1852                      break;
1853                  }
1854      case ALLCAP: {
1855                      memcpy(wspace,cw,(wl+1));
1856                      st = pSMgr->suggest_morph_for_spelling_error(wspace);
1857                      if (st) {
1858                         mystrcat(result, st, MAXLNLEN);
1859                         free(st);
1860                      }
1861                      mkallsmall(wspace);
1862                      st = pSMgr->suggest_morph_for_spelling_error(wspace);
1863                      if (st) {
1864                         if (*result) mystrcat(result, "\n", MAXLNLEN);
1865                         mystrcat(result, st, MAXLNLEN);
1866                         free(st);
1867                      }
1868                      mkinitcap(wspace);
1869                      st = pSMgr->suggest_morph_for_spelling_error(wspace);
1870                      if (st) {
1871                         if (*result) mystrcat(result, "\n", MAXLNLEN);
1872                         mystrcat(result, st, MAXLNLEN);
1873                         free(st);
1874                      }
1875                      if (abbv) {
1876                         memcpy(wspace,cw,(wl+1));
1877                         *(wspace+wl) = '.';
1878                         *(wspace+wl+1) = '\0';
1879                         if (*result) mystrcat(result, "\n", MAXLNLEN);
1880                         st = pSMgr->suggest_morph_for_spelling_error(wspace);
1881                         if (st) {
1882                             mystrcat(result, st, MAXLNLEN);
1883                             free(st);
1884                         }
1885                         mkallsmall(wspace);
1886                         st = pSMgr->suggest_morph_for_spelling_error(wspace);
1887                         if (st) {
1888                           if (*result) mystrcat(result, "\n", MAXLNLEN);
1889                           mystrcat(result, st, MAXLNLEN);
1890                           free(st);
1891                         }
1892                         mkinitcap(wspace);
1893                         st = pSMgr->suggest_morph_for_spelling_error(wspace);
1894                         if (st) {
1895                           if (*result) mystrcat(result, "\n", MAXLNLEN);
1896                           mystrcat(result, st, MAXLNLEN);
1897                           free(st);
1898                         }
1899                      }
1900                      break;
1901                    }
1902   }
1903
1904   if (*result) return mystrdup(result);
1905   return NULL;
1906 }
1907
1908 #endif // END OF HUNSPELL_EXPERIMENTAL CODE
1909
1910 Hunhandle *Hunspell_create(const char * affpath, const char * dpath)
1911 {
1912         return (Hunhandle*)(new Hunspell(affpath, dpath));
1913 }
1914
1915 Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath,
1916     const char * key)
1917 {
1918         return (Hunhandle*)(new Hunspell(affpath, dpath, key));
1919 }
1920
1921 void Hunspell_destroy(Hunhandle *pHunspell)
1922 {
1923         delete (Hunspell*)(pHunspell);
1924 }
1925
1926 int Hunspell_spell(Hunhandle *pHunspell, const char *word)
1927 {
1928         return ((Hunspell*)pHunspell)->spell(word);
1929 }
1930
1931 char *Hunspell_get_dic_encoding(Hunhandle *pHunspell)
1932 {
1933         return ((Hunspell*)pHunspell)->get_dic_encoding();
1934 }
1935
1936 int Hunspell_suggest(Hunhandle *pHunspell, char*** slst, const char * word)
1937 {
1938         return ((Hunspell*)pHunspell)->suggest(slst, word);
1939 }
1940
1941 int Hunspell_analyze(Hunhandle *pHunspell, char*** slst, const char * word)
1942 {
1943         return ((Hunspell*)pHunspell)->analyze(slst, word);
1944 }
1945
1946 int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word)
1947 {
1948         return ((Hunspell*)pHunspell)->stem(slst, word);
1949 }
1950
1951 int Hunspell_stem(Hunhandle *pHunspell, char*** slst, char** desc, int n)
1952 {
1953         return ((Hunspell*)pHunspell)->stem(slst, desc, n);
1954 }
1955
1956 int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,
1957     const char * word2)
1958 {
1959         return ((Hunspell*)pHunspell)->generate(slst, word, word2);
1960 }
1961
1962 int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,
1963     char** desc, int n)
1964 {
1965         return ((Hunspell*)pHunspell)->generate(slst, word, desc, n);
1966 }
1967
1968   /* functions for run-time modification of the dictionary */
1969
1970   /* add word to the run-time dictionary */
1971
1972 int Hunspell_add(Hunhandle *pHunspell, const char * word) {
1973         return ((Hunspell*)pHunspell)->add(word);
1974 }
1975
1976   /* add word to the run-time dictionary with affix flags of
1977    * the example (a dictionary word): Hunspell will recognize
1978    * affixed forms of the new word, too.
1979    */
1980
1981 int Hunspell_add_with_affix(Hunhandle *pHunspell, const char * word,
1982         const char * example) {
1983         return ((Hunspell*)pHunspell)->add_with_affix(word, example);
1984 }
1985
1986   /* remove word from the run-time dictionary */
1987
1988 int Hunspell_remove(Hunhandle *pHunspell, const char * word) {
1989         return ((Hunspell*)pHunspell)->remove(word);
1990 }
1991
1992 void Hunspell_free_list(Hunhandle *pHunspell, char *** slst, int n) {
1993         freelist(slst, n);
1994 }