third_party/hunspell_new/src/hunspell/affixmgr.cxx

   1 #include "license.hunspell"
   2 #include "license.myspell"
   3
   4 #include <stdlib.h>
   5 #include <string.h>
   6 #include <stdio.h>
   7 #include <ctype.h>
   8
   9 #include <vector>
  10
  11 #include "affixmgr.hxx"
  12 #include "affentry.hxx"
  13 #include "langnum.hxx"
  14
  15 #include "csutil.hxx"
  16
  17 #ifdef HUNSPELL_CHROME_CLIENT
  18 AffixMgr::AffixMgr(hunspell::BDictReader* reader, HashMgr** ptr, int * md)
  19 {
  20   bdict_reader = reader;
  21 #else
  22 AffixMgr::AffixMgr(const char * affpath, HashMgr** ptr, int * md, const char * key)
  23 {
  24 #endif
  25   // register hash manager and load affix data from aff file
  26   pHMgr = ptr[0];
  27   alldic = ptr;
  28   maxdic = md;
  29   keystring = NULL;
  30   trystring = NULL;
  31   encoding=NULL;
  32   csconv=NULL;
  33   utf8 = 0;
  34   complexprefixes = 0;
  35   maptable = NULL;
  36   nummap = 0;
  37   breaktable = NULL;
  38   numbreak = -1;
  39   reptable = NULL;
  40   numrep = 0;
  41   iconvtable = NULL;
  42   oconvtable = NULL;
  43   checkcpdtable = NULL;
  44   // allow simplified compound forms (see 3rd field of CHECKCOMPOUNDPATTERN)
  45   simplifiedcpd = 0;
  46   numcheckcpd = 0;
  47   defcpdtable = NULL;
  48   numdefcpd = 0;
  49   phone = NULL;
  50   compoundflag = FLAG_NULL; // permits word in compound forms
  51   compoundbegin = FLAG_NULL; // may be first word in compound forms
  52   compoundmiddle = FLAG_NULL; // may be middle word in compound forms
  53   compoundend = FLAG_NULL; // may be last word in compound forms
  54   compoundroot = FLAG_NULL; // compound word signing flag
  55   compoundpermitflag = FLAG_NULL; // compound permitting flag for suffixed word
  56   compoundforbidflag = FLAG_NULL; // compound fordidden flag for suffixed word
  57   compoundmoresuffixes = 0; // allow more suffixes within compound words
  58   checkcompounddup = 0; // forbid double words in compounds
  59   checkcompoundrep = 0; // forbid bad compounds (may be non compound word with a REP substitution)
  60   checkcompoundcase = 0; // forbid upper and lowercase combinations at word bounds
  61   checkcompoundtriple = 0; // forbid compounds with triple letters
  62   simplifiedtriple = 0; // allow simplified triple letters in compounds (Schiff+fahrt -> Schiffahrt)
  63   forbiddenword = FORBIDDENWORD; // forbidden word signing flag
  64   nosuggest = FLAG_NULL; // don't suggest words signed with NOSUGGEST flag
  65   nongramsuggest = FLAG_NULL;
  66   lang = NULL; // language
  67   langnum = 0; // language code (see http://l10n.openoffice.org/languages.html)
  68   needaffix = FLAG_NULL; // forbidden root, allowed only with suffixes
  69   cpdwordmax = -1; // default: unlimited wordcount in compound words
  70   cpdmin = -1;  // undefined
  71   cpdmaxsyllable = 0; // default: unlimited syllablecount in compound words
  72   cpdvowels=NULL; // vowels (for calculating of Hungarian compounding limit, O(n) search! XXX)
  73   cpdvowels_utf16=NULL; // vowels for UTF-8 encoding (bsearch instead of O(n) search)
  74   cpdvowels_utf16_len=0; // vowels
  75   pfxappnd=NULL; // previous prefix for counting the syllables of prefix BUG
  76   sfxappnd=NULL; // previous suffix for counting a special syllables BUG
  77   cpdsyllablenum=NULL; // syllable count incrementing flag
  78   checknum=0; // checking numbers, and word with numbers
  79   wordchars=NULL; // letters + spec. word characters
  80   wordchars_utf16=NULL; // letters + spec. word characters
  81   wordchars_utf16_len=0; // letters + spec. word characters
  82   ignorechars=NULL; // letters + spec. word characters
  83   ignorechars_utf16=NULL; // letters + spec. word characters
  84   ignorechars_utf16_len=0; // letters + spec. word characters
  85   version=NULL; // affix and dictionary file version string
  86   havecontclass=0; // flags of possible continuing classes (double affix)
  87   // LEMMA_PRESENT: not put root into the morphological output. Lemma presents
  88   // in morhological description in dictionary file. It's often combined with PSEUDOROOT.
  89   lemma_present = FLAG_NULL;
  90   circumfix = FLAG_NULL;
  91   onlyincompound = FLAG_NULL;
  92   maxngramsugs = -1; // undefined
  93   maxdiff = -1; // undefined
  94   onlymaxdiff = 0;
  95   maxcpdsugs = -1; // undefined
  96   nosplitsugs = 0;
  97   sugswithdots = 0;
  98   keepcase = 0;
  99   forceucase = 0;
 100   warn = 0;
 101   forbidwarn = 0;
 102   checksharps = 0;
 103   substandard = FLAG_NULL;
 104   fullstrip = 0;
 105
 106   sfx = NULL;
 107   pfx = NULL;
 108
 109   for (int i=0; i < SETSIZE; i++) {
 110      pStart[i] = NULL;
 111      sStart[i] = NULL;
 112      pFlag[i] = NULL;
 113      sFlag[i] = NULL;
 114   }
 115
 116 #ifdef HUNSPELL_CHROME_CLIENT
 117   // Define dummy parameters for parse_file() to avoid changing the parameters
 118   // of parse_file(). This may make it easier to merge the changes of the
 119   // original hunspell.
 120   const char* affpath = NULL;
 121   const char* key = NULL;
 122 #else
 123   for (int j=0; j < CONTSIZE; j++) {
 124     contclasses[j] = 0;
 125   }
 126 #endif
 127
 128   if (parse_file(affpath, key)) {
 129      HUNSPELL_WARNING(stderr, "Failure loading aff file %s\n",affpath);
 130   }
 131
 132   if (cpdmin == -1) cpdmin = MINCPDLEN;
 133
 134 }
 135
 136
 137 AffixMgr::~AffixMgr()
 138 {
 139   // pass through linked prefix entries and clean up
 140   for (int i=0; i < SETSIZE ;i++) {
 141        pFlag[i] = NULL;
 142        PfxEntry * ptr = pStart[i];
 143        PfxEntry * nptr = NULL;
 144        while (ptr) {
 145             nptr = ptr->getNext();
 146             delete(ptr);
 147             ptr = nptr;
 148             nptr = NULL;
 149        }
 150   }
 151
 152   // pass through linked suffix entries and clean up
 153   for (int j=0; j < SETSIZE ; j++) {
 154        sFlag[j] = NULL;
 155        SfxEntry * ptr = sStart[j];
 156        SfxEntry * nptr = NULL;
 157        while (ptr) {
 158             nptr = ptr->getNext();
 159             delete(ptr);
 160             ptr = nptr;
 161             nptr = NULL;
 162        }
 163        sStart[j] = NULL;
 164   }
 165
 166   if (keystring) free(keystring);
 167   keystring=NULL;
 168   if (trystring) free(trystring);
 169   trystring=NULL;
 170   if (encoding) free(encoding);
 171   encoding=NULL;
 172   if (maptable) {
 173      for (int j=0; j < nummap; j++) {
 174         for (int k=0; k < maptable[j].len; k++) {
 175            if (maptable[j].set[k]) free(maptable[j].set[k]);
 176         }
 177         free(maptable[j].set);
 178         maptable[j].set = NULL;
 179         maptable[j].len = 0;
 180      }
 181      free(maptable);
 182      maptable = NULL;
 183   }
 184   nummap = 0;
 185   if (breaktable) {
 186      for (int j=0; j < numbreak; j++) {
 187         if (breaktable[j]) free(breaktable[j]);
 188         breaktable[j] = NULL;
 189      }
 190      free(breaktable);
 191      breaktable = NULL;
 192   }
 193   numbreak = 0;
 194   if (reptable) {
 195      for (int j=0; j < numrep; j++) {
 196         free(reptable[j].pattern);
 197         free(reptable[j].pattern2);
 198      }
 199      free(reptable);
 200      reptable = NULL;
 201   }
 202   if (iconvtable) delete iconvtable;
 203   if (oconvtable) delete oconvtable;
 204   if (phone && phone->rules) {
 205      for (int j=0; j < phone->num + 1; j++) {
 206         free(phone->rules[j * 2]);
 207         free(phone->rules[j * 2 + 1]);
 208      }
 209      free(phone->rules);
 210      free(phone);
 211      phone = NULL;
 212   }
 213
 214   if (defcpdtable) {
 215      for (int j=0; j < numdefcpd; j++) {
 216         free(defcpdtable[j].def);
 217         defcpdtable[j].def = NULL;
 218      }
 219      free(defcpdtable);
 220      defcpdtable = NULL;
 221   }
 222   numrep = 0;
 223   if (checkcpdtable) {
 224      for (int j=0; j < numcheckcpd; j++) {
 225         free(checkcpdtable[j].pattern);
 226         free(checkcpdtable[j].pattern2);
 227         free(checkcpdtable[j].pattern3);
 228         checkcpdtable[j].pattern = NULL;
 229         checkcpdtable[j].pattern2 = NULL;
 230         checkcpdtable[j].pattern3 = NULL;
 231      }
 232      free(checkcpdtable);
 233      checkcpdtable = NULL;
 234   }
 235   numcheckcpd = 0;
 236   FREE_FLAG(compoundflag);
 237   FREE_FLAG(compoundbegin);
 238   FREE_FLAG(compoundmiddle);
 239   FREE_FLAG(compoundend);
 240   FREE_FLAG(compoundpermitflag);
 241   FREE_FLAG(compoundforbidflag);
 242   FREE_FLAG(compoundroot);
 243   FREE_FLAG(forbiddenword);
 244   FREE_FLAG(nosuggest);
 245   FREE_FLAG(nongramsuggest);
 246   FREE_FLAG(needaffix);
 247   FREE_FLAG(lemma_present);
 248   FREE_FLAG(circumfix);
 249   FREE_FLAG(onlyincompound);
 250
 251   cpdwordmax = 0;
 252   pHMgr = NULL;
 253   cpdmin = 0;
 254   cpdmaxsyllable = 0;
 255   if (cpdvowels) free(cpdvowels);
 256   if (cpdvowels_utf16) free(cpdvowels_utf16);
 257   if (cpdsyllablenum) free(cpdsyllablenum);
 258   free_utf_tbl();
 259   if (lang) free(lang);
 260   if (wordchars) free(wordchars);
 261   if (wordchars_utf16) free(wordchars_utf16);
 262   if (ignorechars) free(ignorechars);
 263   if (ignorechars_utf16) free(ignorechars_utf16);
 264   if (version) free(version);
 265   checknum=0;
 266 #ifdef MOZILLA_CLIENT
 267   delete [] csconv;
 268 #endif
 269 }
 270
 271 void AffixMgr::finishFileMgr(FileMgr *afflst)
 272 {
 273     delete afflst;
 274
 275     // convert affix trees to sorted list
 276     process_pfx_tree_to_list();
 277     process_sfx_tree_to_list();
 278 }
 279
 280 // read in aff file and build up prefix and suffix entry objects
 281 int  AffixMgr::parse_file(const char * affpath, const char * key)
 282 {
 283   char * line; // io buffers
 284   char ft;     // affix type
 285
 286 #ifdef HUNSPELL_CHROME_CLIENT
 287   // open the affix file
 288   // We're always UTF-8
 289   utf8 = 1;
 290
 291   // A BDICT file stores PFX and SFX lines in a special section and it provides
 292   // a special line iterator for reading PFX and SFX lines.
 293   // We create a FileMgr object from this iterator and parse PFX and SFX lines
 294   // before parsing other lines.
 295   hunspell::LineIterator affix_iterator = bdict_reader->GetAffixLineIterator();
 296   FileMgr* iterator = new FileMgr(&affix_iterator);
 297   if (!iterator) {
 298     HUNSPELL_WARNING(stderr,
 299         "error: could not create a FileMgr from an affix line iterator.\n");
 300     return 1;
 301   }
 302
 303   while ((line = iterator->getline())) {
 304     ft = ' ';
 305     if (strncmp(line,"PFX",3) == 0) ft = complexprefixes ? 'S' : 'P';
 306     if (strncmp(line,"SFX",3) == 0) ft = complexprefixes ? 'P' : 'S';
 307     if (ft != ' ')
 308       parse_affix(line, ft, iterator, NULL);
 309   }
 310   delete iterator;
 311
 312   // Create a FileMgr object for reading lines except PFX and SFX lines.
 313   // We don't need to change the loop below since our FileMgr emulates the
 314   // original one.
 315   hunspell::LineIterator other_iterator = bdict_reader->GetOtherLineIterator();
 316   FileMgr * afflst = new FileMgr(&other_iterator);
 317   if (!afflst) {
 318     HUNSPELL_WARNING(stderr,
 319         "error: could not create a FileMgr from an other line iterator.\n");
 320     return 1;
 321   }
 322 #else
 323   // checking flag duplication
 324   char dupflags[CONTSIZE];
 325   char dupflags_ini = 1;
 326
 327   // first line indicator for removing byte order mark
 328   int firstline = 1;
 329
 330   // open the affix file
 331   FileMgr * afflst = new FileMgr(affpath, key);
 332   if (!afflst) {
 333     HUNSPELL_WARNING(stderr, "error: could not open affix description file %s\n",affpath);
 334     return 1;
 335   }
 336 #endif
 337
 338   // step one is to parse the affix file building up the internal
 339   // affix data structures
 340
 341     // read in each line ignoring any that do not
 342     // start with a known line type indicator
 343     while ((line = afflst->getline()) != NULL) {
 344        mychomp(line);
 345
 346 #ifndef HUNSPELL_CHROME_CLIENT
 347        /* remove byte order mark */
 348        if (firstline) {
 349          firstline = 0;
 350          // Affix file begins with byte order mark: possible incompatibility with old Hunspell versions
 351          if (strncmp(line,"\xEF\xBB\xBF",3) == 0) {
 352             memmove(line, line+3, strlen(line+3)+1);
 353          }
 354        }
 355 #endif
 356
 357        /* parse in the keyboard string */
 358        if (strncmp(line,"KEY",3) == 0) {
 359           if (parse_string(line, &keystring, afflst->getlinenum())) {
 360              finishFileMgr(afflst);
 361              return 1;
 362           }
 363        }
 364
 365        /* parse in the try string */
 366        if (strncmp(line,"TRY",3) == 0) {
 367           if (parse_string(line, &trystring, afflst->getlinenum())) {
 368              finishFileMgr(afflst);
 369              return 1;
 370           }
 371        }
 372
 373        /* parse in the name of the character set used by the .dict and .aff */
 374        if (strncmp(line,"SET",3) == 0) {
 375           if (parse_string(line, &encoding, afflst->getlinenum())) {
 376              finishFileMgr(afflst);
 377              return 1;
 378           }
 379           if (strcmp(encoding, "UTF-8") == 0) {
 380              utf8 = 1;
 381 #ifndef OPENOFFICEORG
 382 #ifndef MOZILLA_CLIENT
 383              if (initialize_utf_tbl()) return 1;
 384 #endif
 385 #endif
 386           }
 387        }
 388
 389        /* parse COMPLEXPREFIXES for agglutinative languages with right-to-left writing system */
 390        if (strncmp(line,"COMPLEXPREFIXES",15) == 0)
 391                    complexprefixes = 1;
 392
 393        /* parse in the flag used by the controlled compound words */
 394        if (strncmp(line,"COMPOUNDFLAG",12) == 0) {
 395           if (parse_flag(line, &compoundflag, afflst)) {
 396              finishFileMgr(afflst);
 397              return 1;
 398           }
 399        }
 400
 401        /* parse in the flag used by compound words */
 402        if (strncmp(line,"COMPOUNDBEGIN",13) == 0) {
 403           if (complexprefixes) {
 404             if (parse_flag(line, &compoundend, afflst)) {
 405               finishFileMgr(afflst);
 406               return 1;
 407             }
 408           } else {
 409             if (parse_flag(line, &compoundbegin, afflst)) {
 410               finishFileMgr(afflst);
 411               return 1;
 412             }
 413           }
 414        }
 415
 416        /* parse in the flag used by compound words */
 417        if (strncmp(line,"COMPOUNDMIDDLE",14) == 0) {
 418           if (parse_flag(line, &compoundmiddle, afflst)) {
 419              finishFileMgr(afflst);
 420              return 1;
 421           }
 422        }
 423        /* parse in the flag used by compound words */
 424        if (strncmp(line,"COMPOUNDEND",11) == 0) {
 425           if (complexprefixes) {
 426             if (parse_flag(line, &compoundbegin, afflst)) {
 427               finishFileMgr(afflst);
 428               return 1;
 429             }
 430           } else {
 431             if (parse_flag(line, &compoundend, afflst)) {
 432               finishFileMgr(afflst);
 433               return 1;
 434             }
 435           }
 436        }
 437
 438        /* parse in the data used by compound_check() method */
 439        if (strncmp(line,"COMPOUNDWORDMAX",15) == 0) {
 440           if (parse_num(line, &cpdwordmax, afflst)) {
 441              finishFileMgr(afflst);
 442              return 1;
 443           }
 444        }
 445
 446        /* parse in the flag sign compounds in dictionary */
 447        if (strncmp(line,"COMPOUNDROOT",12) == 0) {
 448           if (parse_flag(line, &compoundroot, afflst)) {
 449              finishFileMgr(afflst);
 450              return 1;
 451           }
 452        }
 453
 454        /* parse in the flag used by compound_check() method */
 455        if (strncmp(line,"COMPOUNDPERMITFLAG",18) == 0) {
 456           if (parse_flag(line, &compoundpermitflag, afflst)) {
 457              finishFileMgr(afflst);
 458              return 1;
 459           }
 460        }
 461
 462        /* parse in the flag used by compound_check() method */
 463        if (strncmp(line,"COMPOUNDFORBIDFLAG",18) == 0) {
 464           if (parse_flag(line, &compoundforbidflag, afflst)) {
 465              finishFileMgr(afflst);
 466              return 1;
 467           }
 468        }
 469
 470        if (strncmp(line,"COMPOUNDMORESUFFIXES",20) == 0) {
 471                    compoundmoresuffixes = 1;
 472        }
 473
 474        if (strncmp(line,"CHECKCOMPOUNDDUP",16) == 0) {
 475                    checkcompounddup = 1;
 476        }
 477
 478        if (strncmp(line,"CHECKCOMPOUNDREP",16) == 0) {
 479                    checkcompoundrep = 1;
 480        }
 481
 482        if (strncmp(line,"CHECKCOMPOUNDTRIPLE",19) == 0) {
 483                    checkcompoundtriple = 1;
 484        }
 485
 486        if (strncmp(line,"SIMPLIFIEDTRIPLE",16) == 0) {
 487                    simplifiedtriple = 1;
 488        }
 489
 490        if (strncmp(line,"CHECKCOMPOUNDCASE",17) == 0) {
 491                    checkcompoundcase = 1;
 492        }
 493
 494        if (strncmp(line,"NOSUGGEST",9) == 0) {
 495           if (parse_flag(line, &nosuggest, afflst)) {
 496              finishFileMgr(afflst);
 497              return 1;
 498           }
 499        }
 500
 501        if (strncmp(line,"NONGRAMSUGGEST",14) == 0) {
 502           if (parse_flag(line, &nongramsuggest, afflst)) {
 503              finishFileMgr(afflst);
 504              return 1;
 505           }
 506        }
 507
 508        /* parse in the flag used by forbidden words */
 509        if (strncmp(line,"FORBIDDENWORD",13) == 0) {
 510           if (parse_flag(line, &forbiddenword, afflst)) {
 511              finishFileMgr(afflst);
 512              return 1;
 513           }
 514        }
 515
 516        /* parse in the flag used by forbidden words */
 517        if (strncmp(line,"LEMMA_PRESENT",13) == 0) {
 518           if (parse_flag(line, &lemma_present, afflst)) {
 519              finishFileMgr(afflst);
 520              return 1;
 521           }
 522        }
 523
 524        /* parse in the flag used by circumfixes */
 525        if (strncmp(line,"CIRCUMFIX",9) == 0) {
 526           if (parse_flag(line, &circumfix, afflst)) {
 527              finishFileMgr(afflst);
 528              return 1;
 529           }
 530        }
 531
 532        /* parse in the flag used by fogemorphemes */
 533        if (strncmp(line,"ONLYINCOMPOUND",14) == 0) {
 534           if (parse_flag(line, &onlyincompound, afflst)) {
 535              finishFileMgr(afflst);
 536              return 1;
 537           }
 538        }
 539
 540        /* parse in the flag used by `needaffixs' */
 541        if (strncmp(line,"PSEUDOROOT",10) == 0) {
 542           if (parse_flag(line, &needaffix, afflst)) {
 543              finishFileMgr(afflst);
 544              return 1;
 545           }
 546        }
 547
 548        /* parse in the flag used by `needaffixs' */
 549        if (strncmp(line,"NEEDAFFIX",9) == 0) {
 550           if (parse_flag(line, &needaffix, afflst)) {
 551              finishFileMgr(afflst);
 552              return 1;
 553           }
 554        }
 555
 556        /* parse in the minimal length for words in compounds */
 557        if (strncmp(line,"COMPOUNDMIN",11) == 0) {
 558           if (parse_num(line, &cpdmin, afflst)) {
 559              finishFileMgr(afflst);
 560              return 1;
 561           }
 562           if (cpdmin < 1) cpdmin = 1;
 563        }
 564
 565        /* parse in the max. words and syllables in compounds */
 566        if (strncmp(line,"COMPOUNDSYLLABLE",16) == 0) {
 567           if (parse_cpdsyllable(line, afflst)) {
 568              finishFileMgr(afflst);
 569              return 1;
 570           }
 571        }
 572
 573        /* parse in the flag used by compound_check() method */
 574        if (strncmp(line,"SYLLABLENUM",11) == 0) {
 575           if (parse_string(line, &cpdsyllablenum, afflst->getlinenum())) {
 576              finishFileMgr(afflst);
 577              return 1;
 578           }
 579        }
 580
 581        /* parse in the flag used by the controlled compound words */
 582        if (strncmp(line,"CHECKNUM",8) == 0) {
 583            checknum=1;
 584        }
 585
 586        /* parse in the extra word characters */
 587        if (strncmp(line,"WORDCHARS",9) == 0) {
 588           if (parse_array(line, &wordchars, &wordchars_utf16, &wordchars_utf16_len, utf8, afflst->getlinenum())) {
 589              finishFileMgr(afflst);
 590              return 1;
 591           }
 592        }
 593
 594        /* parse in the ignored characters (for example, Arabic optional diacretics charachters */
 595        if (strncmp(line,"IGNORE",6) == 0) {
 596           if (parse_array(line, &ignorechars, &ignorechars_utf16, &ignorechars_utf16_len, utf8, afflst->getlinenum())) {
 597              finishFileMgr(afflst);
 598              return 1;
 599           }
 600        }
 601
 602 #ifndef HUNSPELL_CHROME_CLIENT
 603        /* parse in the typical fault correcting table */
 604        if (strncmp(line,"REP",3) == 0) {
 605           if (parse_reptable(line, afflst)) {
 606              finishFileMgr(afflst);
 607              return 1;
 608           }
 609        }
 610 #endif
 611
 612        /* parse in the input conversion table */
 613        if (strncmp(line,"ICONV",5) == 0) {
 614           if (parse_convtable(line, afflst, &iconvtable, "ICONV")) {
 615              finishFileMgr(afflst);
 616              return 1;
 617           }
 618        }
 619
 620        /* parse in the input conversion table */
 621        if (strncmp(line,"OCONV",5) == 0) {
 622           if (parse_convtable(line, afflst, &oconvtable, "OCONV")) {
 623              finishFileMgr(afflst);
 624              return 1;
 625           }
 626        }
 627
 628        /* parse in the phonetic translation table */
 629        if (strncmp(line,"PHONE",5) == 0) {
 630           if (parse_phonetable(line, afflst)) {
 631              finishFileMgr(afflst);
 632              return 1;
 633           }
 634        }
 635
 636        /* parse in the checkcompoundpattern table */
 637        if (strncmp(line,"CHECKCOMPOUNDPATTERN",20) == 0) {
 638           if (parse_checkcpdtable(line, afflst)) {
 639              finishFileMgr(afflst);
 640              return 1;
 641           }
 642        }
 643
 644        /* parse in the defcompound table */
 645        if (strncmp(line,"COMPOUNDRULE",12) == 0) {
 646           if (parse_defcpdtable(line, afflst)) {
 647              finishFileMgr(afflst);
 648              return 1;
 649           }
 650        }
 651
 652        /* parse in the related character map table */
 653        if (strncmp(line,"MAP",3) == 0) {
 654           if (parse_maptable(line, afflst)) {
 655              finishFileMgr(afflst);
 656              return 1;
 657           }
 658        }
 659
 660        /* parse in the word breakpoints table */
 661        if (strncmp(line,"BREAK",5) == 0) {
 662           if (parse_breaktable(line, afflst)) {
 663              finishFileMgr(afflst);
 664              return 1;
 665           }
 666        }
 667
 668        /* parse in the language for language specific codes */
 669        if (strncmp(line,"LANG",4) == 0) {
 670           if (parse_string(line, &lang, afflst->getlinenum())) {
 671              finishFileMgr(afflst);
 672              return 1;
 673           }
 674           langnum = get_lang_num(lang);
 675        }
 676
 677        if (strncmp(line,"VERSION",7) == 0) {
 678           for(line = line + 7; *line == ' ' || *line == '\t'; line++);
 679           version = mystrdup(line);
 680        }
 681
 682        if (strncmp(line,"MAXNGRAMSUGS",12) == 0) {
 683           if (parse_num(line, &maxngramsugs, afflst)) {
 684              finishFileMgr(afflst);
 685              return 1;
 686           }
 687        }
 688
 689        if (strncmp(line,"ONLYMAXDIFF", 11) == 0)
 690                    onlymaxdiff = 1;
 691
 692        if (strncmp(line,"MAXDIFF",7) == 0) {
 693           if (parse_num(line, &maxdiff, afflst)) {
 694              finishFileMgr(afflst);
 695              return 1;
 696           }
 697        }
 698
 699        if (strncmp(line,"MAXCPDSUGS",10) == 0) {
 700           if (parse_num(line, &maxcpdsugs, afflst)) {
 701              finishFileMgr(afflst);
 702              return 1;
 703           }
 704        }
 705
 706        if (strncmp(line,"NOSPLITSUGS",11) == 0) {
 707                    nosplitsugs=1;
 708        }
 709
 710        if (strncmp(line,"FULLSTRIP",9) == 0) {
 711                    fullstrip=1;
 712        }
 713
 714        if (strncmp(line,"SUGSWITHDOTS",12) == 0) {
 715                    sugswithdots=1;
 716        }
 717
 718        /* parse in the flag used by forbidden words */
 719        if (strncmp(line,"KEEPCASE",8) == 0) {
 720           if (parse_flag(line, &keepcase, afflst)) {
 721              finishFileMgr(afflst);
 722              return 1;
 723           }
 724        }
 725
 726        /* parse in the flag used by `forceucase' */
 727        if (strncmp(line,"FORCEUCASE",10) == 0) {
 728           if (parse_flag(line, &forceucase, afflst)) {
 729              finishFileMgr(afflst);
 730              return 1;
 731           }
 732        }
 733
 734        /* parse in the flag used by `warn' */
 735        if (strncmp(line,"WARN",4) == 0) {
 736           if (parse_flag(line, &warn, afflst)) {
 737              finishFileMgr(afflst);
 738              return 1;
 739           }
 740        }
 741
 742        if (strncmp(line,"FORBIDWARN",10) == 0) {
 743                    forbidwarn=1;
 744        }
 745
 746        /* parse in the flag used by the affix generator */
 747        if (strncmp(line,"SUBSTANDARD",11) == 0) {
 748           if (parse_flag(line, &substandard, afflst)) {
 749              finishFileMgr(afflst);
 750              return 1;
 751           }
 752        }
 753
 754        if (strncmp(line,"CHECKSHARPS",11) == 0) {
 755                    checksharps=1;
 756        }
 757
 758 #ifndef HUNSPELL_CHROME_CLIENT
 759        /* parse this affix: P - prefix, S - suffix */
 760        ft = ' ';
 761        if (strncmp(line,"PFX",3) == 0) ft = complexprefixes ? 'S' : 'P';
 762        if (strncmp(line,"SFX",3) == 0) ft = complexprefixes ? 'P' : 'S';
 763        if (ft != ' ') {
 764           if (dupflags_ini) {
 765             memset(dupflags, 0, sizeof(dupflags));
 766             dupflags_ini = 0;
 767           }
 768           if (parse_affix(line, ft, afflst, dupflags)) {
 769              finishFileMgr(afflst);
 770              return 1;
 771           }
 772        }
 773 #endif
 774     }
 775
 776     finishFileMgr(afflst);
 777     // affix trees are sorted now
 778
 779     // now we can speed up performance greatly taking advantage of the
 780     // relationship between the affixes and the idea of "subsets".
 781
 782     // View each prefix as a potential leading subset of another and view
 783     // each suffix (reversed) as a potential trailing subset of another.
 784
 785     // To illustrate this relationship if we know the prefix "ab" is found in the
 786     // word to examine, only prefixes that "ab" is a leading subset of need be examined.
 787     // Furthermore is "ab" is not present then none of the prefixes that "ab" is
 788     // is a subset need be examined.
 789     // The same argument goes for suffix string that are reversed.
 790
 791     // Then to top this off why not examine the first char of the word to quickly
 792     // limit the set of prefixes to examine (i.e. the prefixes to examine must
 793     // be leading supersets of the first character of the word (if they exist)
 794
 795     // To take advantage of this "subset" relationship, we need to add two links
 796     // from entry.  One to take next if the current prefix is found (call it nexteq)
 797     // and one to take next if the current prefix is not found (call it nextne).
 798
 799     // Since we have built ordered lists, all that remains is to properly initialize
 800     // the nextne and nexteq pointers that relate them
 801
 802     process_pfx_order();
 803     process_sfx_order();
 804
 805     /* get encoding for CHECKCOMPOUNDCASE */
 806     if (!utf8) {
 807     char * enc = get_encoding();
 808     csconv = get_current_cs(enc);
 809     free(enc);
 810     enc = NULL;
 811
 812     char expw[MAXLNLEN];
 813     if (wordchars) {
 814         strcpy(expw, wordchars);
 815         free(wordchars);
 816     } else *expw = '\0';
 817
 818     for (int i = 0; i <= 255; i++) {
 819         if ( (csconv[i].cupper != csconv[i].clower) &&
 820             (! strchr(expw, (char) i))) {
 821                 *(expw + strlen(expw) + 1) = '\0';
 822                 *(expw + strlen(expw)) = (char) i;
 823         }
 824     }
 825
 826     wordchars = mystrdup(expw);
 827     }
 828
 829     // default BREAK definition
 830     if (numbreak == -1) {
 831         breaktable = (char **) malloc(sizeof(char *) * 3);
 832         if (!breaktable) return 1;
 833         breaktable[0] = mystrdup("-");
 834         breaktable[1] = mystrdup("^-");
 835         breaktable[2] = mystrdup("-$");
 836         if (breaktable[0] && breaktable[1] && breaktable[2]) numbreak = 3;
 837     }
 838     return 0;
 839 }
 840
 841
 842 // we want to be able to quickly access prefix information
 843 // both by prefix flag, and sorted by prefix string itself
 844 // so we need to set up two indexes
 845
 846 int AffixMgr::build_pfxtree(PfxEntry* pfxptr)
 847 {
 848   PfxEntry * ptr;
 849   PfxEntry * pptr;
 850   PfxEntry * ep = pfxptr;
 851
 852   // get the right starting points
 853   const char * key = ep->getKey();
 854   const unsigned char flg = (unsigned char) (ep->getFlag() & 0x00FF);
 855
 856   // first index by flag which must exist
 857   ptr = pFlag[flg];
 858   ep->setFlgNxt(ptr);
 859   pFlag[flg] = ep;
 860
 861
 862   // handle the special case of null affix string
 863   if (strlen(key) == 0) {
 864     // always inset them at head of list at element 0
 865      ptr = pStart[0];
 866      ep->setNext(ptr);
 867      pStart[0] = ep;
 868      return 0;
 869   }
 870
 871   // now handle the normal case
 872   ep->setNextEQ(NULL);
 873   ep->setNextNE(NULL);
 874
 875   unsigned char sp = *((const unsigned char *)key);
 876   ptr = pStart[sp];
 877
 878   // handle the first insert
 879   if (!ptr) {
 880      pStart[sp] = ep;
 881      return 0;
 882   }
 883
 884
 885   // otherwise use binary tree insertion so that a sorted
 886   // list can easily be generated later
 887   pptr = NULL;
 888   for (;;) {
 889     pptr = ptr;
 890     if (strcmp(ep->getKey(), ptr->getKey() ) <= 0) {
 891        ptr = ptr->getNextEQ();
 892        if (!ptr) {
 893           pptr->setNextEQ(ep);
 894           break;
 895        }
 896     } else {
 897        ptr = ptr->getNextNE();
 898        if (!ptr) {
 899           pptr->setNextNE(ep);
 900           break;
 901        }
 902     }
 903   }
 904   return 0;
 905 }
 906
 907 // we want to be able to quickly access suffix information
 908 // both by suffix flag, and sorted by the reverse of the
 909 // suffix string itself; so we need to set up two indexes
 910 int AffixMgr::build_sfxtree(SfxEntry* sfxptr)
 911 {
 912   SfxEntry * ptr;
 913   SfxEntry * pptr;
 914   SfxEntry * ep = sfxptr;
 915
 916   /* get the right starting point */
 917   const char * key = ep->getKey();
 918   const unsigned char flg = (unsigned char) (ep->getFlag() & 0x00FF);
 919
 920   // first index by flag which must exist
 921   ptr = sFlag[flg];
 922   ep->setFlgNxt(ptr);
 923   sFlag[flg] = ep;
 924
 925   // next index by affix string
 926
 927   // handle the special case of null affix string
 928   if (strlen(key) == 0) {
 929     // always inset them at head of list at element 0
 930      ptr = sStart[0];
 931      ep->setNext(ptr);
 932      sStart[0] = ep;
 933      return 0;
 934   }
 935
 936   // now handle the normal case
 937   ep->setNextEQ(NULL);
 938   ep->setNextNE(NULL);
 939
 940   unsigned char sp = *((const unsigned char *)key);
 941   ptr = sStart[sp];
 942
 943   // handle the first insert
 944   if (!ptr) {
 945      sStart[sp] = ep;
 946      return 0;
 947   }
 948
 949   // otherwise use binary tree insertion so that a sorted
 950   // list can easily be generated later
 951   pptr = NULL;
 952   for (;;) {
 953     pptr = ptr;
 954     if (strcmp(ep->getKey(), ptr->getKey() ) <= 0) {
 955        ptr = ptr->getNextEQ();
 956        if (!ptr) {
 957           pptr->setNextEQ(ep);
 958           break;
 959        }
 960     } else {
 961        ptr = ptr->getNextNE();
 962        if (!ptr) {
 963           pptr->setNextNE(ep);
 964           break;
 965        }
 966     }
 967   }
 968   return 0;
 969 }
 970
 971 // convert from binary tree to sorted list
 972 int AffixMgr::process_pfx_tree_to_list()
 973 {
 974   for (int i=1; i< SETSIZE; i++) {
 975     pStart[i] = process_pfx_in_order(pStart[i],NULL);
 976   }
 977   return 0;
 978 }
 979
 980
 981 PfxEntry* AffixMgr::process_pfx_in_order(PfxEntry* ptr, PfxEntry* nptr)
 982 {
 983   if (ptr) {
 984     nptr = process_pfx_in_order(ptr->getNextNE(), nptr);
 985     ptr->setNext(nptr);
 986     nptr = process_pfx_in_order(ptr->getNextEQ(), ptr);
 987   }
 988   return nptr;
 989 }
 990
 991
 992 // convert from binary tree to sorted list
 993 int AffixMgr:: process_sfx_tree_to_list()
 994 {
 995   for (int i=1; i< SETSIZE; i++) {
 996     sStart[i] = process_sfx_in_order(sStart[i],NULL);
 997   }
 998   return 0;
 999 }
1000
1001 SfxEntry* AffixMgr::process_sfx_in_order(SfxEntry* ptr, SfxEntry* nptr)
1002 {
1003   if (ptr) {
1004     nptr = process_sfx_in_order(ptr->getNextNE(), nptr);
1005     ptr->setNext(nptr);
1006     nptr = process_sfx_in_order(ptr->getNextEQ(), ptr);
1007   }
1008   return nptr;
1009 }
1010
1011
1012 // reinitialize the PfxEntry links NextEQ and NextNE to speed searching
1013 // using the idea of leading subsets this time
1014 int AffixMgr::process_pfx_order()
1015 {
1016     PfxEntry* ptr;
1017
1018     // loop through each prefix list starting point
1019     for (int i=1; i < SETSIZE; i++) {
1020
1021          ptr = pStart[i];
1022
1023          // look through the remainder of the list
1024          //  and find next entry with affix that
1025          // the current one is not a subset of
1026          // mark that as destination for NextNE
1027          // use next in list that you are a subset
1028          // of as NextEQ
1029
1030          for (; ptr != NULL; ptr = ptr->getNext()) {
1031
1032              PfxEntry * nptr = ptr->getNext();
1033              for (; nptr != NULL; nptr = nptr->getNext()) {
1034                  if (! isSubset( ptr->getKey() , nptr->getKey() )) break;
1035              }
1036              ptr->setNextNE(nptr);
1037              ptr->setNextEQ(NULL);
1038              if ((ptr->getNext()) && isSubset(ptr->getKey() , (ptr->getNext())->getKey()))
1039                  ptr->setNextEQ(ptr->getNext());
1040          }
1041
1042          // now clean up by adding smart search termination strings:
1043          // if you are already a superset of the previous prefix
1044          // but not a subset of the next, search can end here
1045          // so set NextNE properly
1046
1047          ptr = pStart[i];
1048          for (; ptr != NULL; ptr = ptr->getNext()) {
1049              PfxEntry * nptr = ptr->getNext();
1050              PfxEntry * mptr = NULL;
1051              for (; nptr != NULL; nptr = nptr->getNext()) {
1052                  if (! isSubset(ptr->getKey(),nptr->getKey())) break;
1053                  mptr = nptr;
1054              }
1055              if (mptr) mptr->setNextNE(NULL);
1056          }
1057     }
1058     return 0;
1059 }
1060
1061 // initialize the SfxEntry links NextEQ and NextNE to speed searching
1062 // using the idea of leading subsets this time
1063 int AffixMgr::process_sfx_order()
1064 {
1065     SfxEntry* ptr;
1066
1067     // loop through each prefix list starting point
1068     for (int i=1; i < SETSIZE; i++) {
1069
1070          ptr = sStart[i];
1071
1072          // look through the remainder of the list
1073          //  and find next entry with affix that
1074          // the current one is not a subset of
1075          // mark that as destination for NextNE
1076          // use next in list that you are a subset
1077          // of as NextEQ
1078
1079          for (; ptr != NULL; ptr = ptr->getNext()) {
1080              SfxEntry * nptr = ptr->getNext();
1081              for (; nptr != NULL; nptr = nptr->getNext()) {
1082                  if (! isSubset(ptr->getKey(),nptr->getKey())) break;
1083              }
1084              ptr->setNextNE(nptr);
1085              ptr->setNextEQ(NULL);
1086              if ((ptr->getNext()) && isSubset(ptr->getKey(),(ptr->getNext())->getKey()))
1087                  ptr->setNextEQ(ptr->getNext());
1088          }
1089
1090
1091          // now clean up by adding smart search termination strings:
1092          // if you are already a superset of the previous suffix
1093          // but not a subset of the next, search can end here
1094          // so set NextNE properly
1095
1096          ptr = sStart[i];
1097          for (; ptr != NULL; ptr = ptr->getNext()) {
1098              SfxEntry * nptr = ptr->getNext();
1099              SfxEntry * mptr = NULL;
1100              for (; nptr != NULL; nptr = nptr->getNext()) {
1101                  if (! isSubset(ptr->getKey(),nptr->getKey())) break;
1102                  mptr = nptr;
1103              }
1104              if (mptr) mptr->setNextNE(NULL);
1105          }
1106     }
1107     return 0;
1108 }
1109
1110 // add flags to the result for dictionary debugging
1111 void AffixMgr::debugflag(char * result, unsigned short flag) {
1112     char * st = encode_flag(flag);
1113     mystrcat(result, " ", MAXLNLEN);
1114     mystrcat(result, MORPH_FLAG, MAXLNLEN);
1115     if (st) {
1116         mystrcat(result, st, MAXLNLEN);
1117         free(st);
1118     }
1119 }
1120
1121 // calculate the character length of the condition
1122 int AffixMgr::condlen(char * st)
1123 {
1124   int l = 0;
1125   bool group = false;
1126   for(; *st; st++) {
1127     if (*st == '[') {
1128         group = true;
1129         l++;
1130     } else if (*st == ']') group = false;
1131     else if (!group && (!utf8 ||
1132         (!(*st & 0x80) || ((*st & 0xc0) == 0x80)))) l++;
1133   }
1134   return l;
1135 }
1136
1137 int AffixMgr::encodeit(affentry &entry, char * cs)
1138 {
1139   if (strcmp(cs,".") != 0) {
1140     entry.numconds = (char) condlen(cs);
1141     strncpy(entry.c.conds, cs, MAXCONDLEN);
1142     // long condition (end of conds padded by strncpy)
1143     if (entry.c.conds[MAXCONDLEN - 1] && cs[MAXCONDLEN]) {
1144       entry.opts += aeLONGCOND;
1145       entry.c.l.conds2 = mystrdup(cs + MAXCONDLEN_1);
1146       if (!entry.c.l.conds2) return 1;
1147     }
1148   } else {
1149     entry.numconds = 0;
1150     entry.c.conds[0] = '\0';
1151   }
1152   return 0;
1153 }
1154
1155 // return 1 if s1 is a leading subset of s2 (dots are for infixes)
1156 inline int AffixMgr::isSubset(const char * s1, const char * s2)
1157  {
1158     while (((*s1 == *s2) || (*s1 == '.')) && (*s1 != '\0')) {
1159         s1++;
1160         s2++;
1161     }
1162     return (*s1 == '\0');
1163  }
1164
1165
1166 // check word for prefixes
1167 struct hentry * AffixMgr::prefix_check(const char * word, int len, char in_compound,
1168     const FLAG needflag)
1169 {
1170     struct hentry * rv= NULL;
1171
1172     pfx = NULL;
1173     pfxappnd = NULL;
1174     sfxappnd = NULL;
1175
1176     // first handle the special case of 0 length prefixes
1177     PfxEntry * pe = pStart[0];
1178     while (pe) {
1179         if (
1180             // fogemorpheme
1181               ((in_compound != IN_CPD_NOT) || !(pe->getCont() &&
1182                   (TESTAFF(pe->getCont(), onlyincompound, pe->getContLen())))) &&
1183             // permit prefixes in compounds
1184               ((in_compound != IN_CPD_END) || (pe->getCont() &&
1185                   (TESTAFF(pe->getCont(), compoundpermitflag, pe->getContLen()))))
1186               ) {
1187                     // check prefix
1188                     rv = pe->checkword(word, len, in_compound, needflag);
1189                     if (rv) {
1190                         pfx=pe; // BUG: pfx not stateless
1191                         return rv;
1192                     }
1193              }
1194        pe = pe->getNext();
1195     }
1196
1197     // now handle the general case
1198     unsigned char sp = *((const unsigned char *)word);
1199     PfxEntry * pptr = pStart[sp];
1200
1201     while (pptr) {
1202         if (isSubset(pptr->getKey(),word)) {
1203              if (
1204             // fogemorpheme
1205               ((in_compound != IN_CPD_NOT) || !(pptr->getCont() &&
1206                   (TESTAFF(pptr->getCont(), onlyincompound, pptr->getContLen())))) &&
1207             // permit prefixes in compounds
1208               ((in_compound != IN_CPD_END) || (pptr->getCont() &&
1209                   (TESTAFF(pptr->getCont(), compoundpermitflag, pptr->getContLen()))))
1210               ) {
1211             // check prefix
1212                   rv = pptr->checkword(word, len, in_compound, needflag);
1213                   if (rv) {
1214                     pfx=pptr; // BUG: pfx not stateless
1215                     return rv;
1216                   }
1217              }
1218              pptr = pptr->getNextEQ();
1219         } else {
1220              pptr = pptr->getNextNE();
1221         }
1222     }
1223
1224     return NULL;
1225 }
1226
1227 // check word for prefixes
1228 struct hentry * AffixMgr::prefix_check_twosfx(const char * word, int len,
1229     char in_compound, const FLAG needflag)
1230 {
1231     struct hentry * rv= NULL;
1232
1233     pfx = NULL;
1234     sfxappnd = NULL;
1235
1236     // first handle the special case of 0 length prefixes
1237     PfxEntry * pe = pStart[0];
1238
1239     while (pe) {
1240         rv = pe->check_twosfx(word, len, in_compound, needflag);
1241         if (rv) return rv;
1242         pe = pe->getNext();
1243     }
1244
1245     // now handle the general case
1246     unsigned char sp = *((const unsigned char *)word);
1247     PfxEntry * pptr = pStart[sp];
1248
1249     while (pptr) {
1250         if (isSubset(pptr->getKey(),word)) {
1251             rv = pptr->check_twosfx(word, len, in_compound, needflag);
1252             if (rv) {
1253                 pfx = pptr;
1254                 return rv;
1255             }
1256             pptr = pptr->getNextEQ();
1257         } else {
1258              pptr = pptr->getNextNE();
1259         }
1260     }
1261
1262     return NULL;
1263 }
1264
1265 // check word for prefixes
1266 char * AffixMgr::prefix_check_morph(const char * word, int len, char in_compound,
1267     const FLAG needflag)
1268 {
1269     char * st;
1270
1271     char result[MAXLNLEN];
1272     result[0] = '\0';
1273
1274     pfx = NULL;
1275     sfxappnd = NULL;
1276
1277     // first handle the special case of 0 length prefixes
1278     PfxEntry * pe = pStart[0];
1279     while (pe) {
1280        st = pe->check_morph(word,len,in_compound, needflag);
1281        if (st) {
1282             mystrcat(result, st, MAXLNLEN);
1283             free(st);
1284        }
1285        // if (rv) return rv;
1286        pe = pe->getNext();
1287     }
1288
1289     // now handle the general case
1290     unsigned char sp = *((const unsigned char *)word);
1291     PfxEntry * pptr = pStart[sp];
1292
1293     while (pptr) {
1294         if (isSubset(pptr->getKey(),word)) {
1295             st = pptr->check_morph(word,len,in_compound, needflag);
1296             if (st) {
1297               // fogemorpheme
1298               if ((in_compound != IN_CPD_NOT) || !((pptr->getCont() &&
1299                         (TESTAFF(pptr->getCont(), onlyincompound, pptr->getContLen()))))) {
1300                     mystrcat(result, st, MAXLNLEN);
1301                     pfx = pptr;
1302                 }
1303                 free(st);
1304             }
1305             pptr = pptr->getNextEQ();
1306         } else {
1307             pptr = pptr->getNextNE();
1308         }
1309     }
1310
1311     if (*result) return mystrdup(result);
1312     return NULL;
1313 }
1314
1315
1316 // check word for prefixes
1317 char * AffixMgr::prefix_check_twosfx_morph(const char * word, int len,
1318     char in_compound, const FLAG needflag)
1319 {
1320     char * st;
1321
1322     char result[MAXLNLEN];
1323     result[0] = '\0';
1324
1325     pfx = NULL;
1326     sfxappnd = NULL;
1327
1328     // first handle the special case of 0 length prefixes
1329     PfxEntry * pe = pStart[0];
1330     while (pe) {
1331         st = pe->check_twosfx_morph(word,len,in_compound, needflag);
1332         if (st) {
1333             mystrcat(result, st, MAXLNLEN);
1334             free(st);
1335         }
1336         pe = pe->getNext();
1337     }
1338
1339     // now handle the general case
1340     unsigned char sp = *((const unsigned char *)word);
1341     PfxEntry * pptr = pStart[sp];
1342
1343     while (pptr) {
1344         if (isSubset(pptr->getKey(),word)) {
1345             st = pptr->check_twosfx_morph(word, len, in_compound, needflag);
1346             if (st) {
1347                 mystrcat(result, st, MAXLNLEN);
1348                 free(st);
1349                 pfx = pptr;
1350             }
1351             pptr = pptr->getNextEQ();
1352         } else {
1353             pptr = pptr->getNextNE();
1354         }
1355     }
1356
1357     if (*result) return mystrdup(result);
1358     return NULL;
1359 }
1360
1361 // Is word a non compound with a REP substitution (see checkcompoundrep)?
1362 int AffixMgr::cpdrep_check(const char * word, int wl)
1363 {
1364   char candidate[MAXLNLEN];
1365   const char * r;
1366   int lenr, lenp;
1367
1368 #ifdef HUNSPELL_CHROME_CLIENT
1369   const char *pattern, *pattern2;
1370   hunspell::ReplacementIterator iterator = bdict_reader->GetReplacementIterator();
1371   while (iterator.GetNext(&pattern, &pattern2)) {
1372     r = word;
1373     lenr = strlen(pattern2);
1374     lenp = strlen(pattern);
1375
1376     // search every occurence of the pattern in the word
1377     while ((r=strstr(r, pattern)) != NULL) {
1378       strcpy(candidate, word);
1379       if (r-word + lenr + strlen(r+lenp) >= MAXLNLEN) break;
1380       strcpy(candidate+(r-word), pattern2);
1381       strcpy(candidate+(r-word)+lenr, r+lenp);
1382       if (candidate_check(candidate,strlen(candidate))) return 1;
1383       r++; // search for the next letter
1384     }
1385   }
1386
1387 #else
1388   if ((wl < 2) || !numrep) return 0;
1389
1390   for (int i=0; i < numrep; i++ ) {
1391       r = word;
1392       lenr = strlen(reptable[i].pattern2);
1393       lenp = strlen(reptable[i].pattern);
1394       // search every occurence of the pattern in the word
1395       while ((r=strstr(r, reptable[i].pattern)) != NULL) {
1396           strcpy(candidate, word);
1397           if (r-word + lenr + strlen(r+lenp) >= MAXLNLEN) break;
1398           strcpy(candidate+(r-word),reptable[i].pattern2);
1399           strcpy(candidate+(r-word)+lenr, r+lenp);
1400           if (candidate_check(candidate,strlen(candidate))) return 1;
1401           r++; // search for the next letter
1402       }
1403    }
1404 #endif
1405    return 0;
1406 }
1407
1408 // forbid compoundings when there are special patterns at word bound
1409 int AffixMgr::cpdpat_check(const char * word, int pos, hentry * r1, hentry * r2, const char /*affixed*/)
1410 {
1411   int len;
1412   for (int i = 0; i < numcheckcpd; i++) {
1413       if (isSubset(checkcpdtable[i].pattern2, word + pos) &&
1414         (!r1 || !checkcpdtable[i].cond ||
1415           (r1->astr && TESTAFF(r1->astr, checkcpdtable[i].cond, r1->alen))) &&
1416         (!r2 || !checkcpdtable[i].cond2 ||
1417           (r2->astr && TESTAFF(r2->astr, checkcpdtable[i].cond2, r2->alen))) &&
1418         // zero length pattern => only TESTAFF
1419         // zero pattern (0/flag) => unmodified stem (zero affixes allowed)
1420         (!*(checkcpdtable[i].pattern) || (
1421             (*(checkcpdtable[i].pattern)=='0' && r1->blen <= pos && strncmp(word + pos - r1->blen, r1->word, r1->blen) == 0) ||
1422             (*(checkcpdtable[i].pattern)!='0' && ((len = strlen(checkcpdtable[i].pattern)) != 0) &&
1423                 strncmp(word + pos - len, checkcpdtable[i].pattern, len) == 0)))) {
1424             return 1;
1425         }
1426   }
1427   return 0;
1428 }
1429
1430 // forbid compounding with neighbouring upper and lower case characters at word bounds
1431 int AffixMgr::cpdcase_check(const char * word, int pos)
1432 {
1433   if (utf8) {
1434       w_char u, w;
1435       const char * p;
1436       u8_u16(&u, 1, word + pos);
1437       for (p = word + pos - 1; (*p & 0xc0) == 0x80; p--);
1438       u8_u16(&w, 1, p);
1439       unsigned short a = (u.h << 8) + u.l;
1440       unsigned short b = (w.h << 8) + w.l;
1441       if (((unicodetoupper(a, langnum) == a) || (unicodetoupper(b, langnum) == b)) &&
1442           (a != '-') && (b != '-')) return 1;
1443   } else {
1444       unsigned char a = *(word + pos - 1);
1445       unsigned char b = *(word + pos);
1446       if ((csconv[a].ccase || csconv[b].ccase) && (a != '-') && (b != '-')) return 1;
1447   }
1448   return 0;
1449 }
1450
1451 // check compound patterns
1452 int AffixMgr::defcpd_check(hentry *** words, short wnum, hentry * rv, hentry ** def, char all)
1453 {
1454   signed short btpp[MAXWORDLEN]; // metacharacter (*, ?) positions for backtracking
1455   signed short btwp[MAXWORDLEN]; // word positions for metacharacters
1456   int btnum[MAXWORDLEN]; // number of matched characters in metacharacter positions
1457   short bt = 0;
1458   int i, j;
1459   int ok;
1460   int w = 0;
1461
1462   if (!*words) {
1463     w = 1;
1464     *words = def;
1465   }
1466
1467   if (!*words) {
1468     return 0;
1469   }
1470
1471   (*words)[wnum] = rv;
1472
1473   // has the last word COMPOUNDRULE flag?
1474   if (rv->alen == 0) {
1475     (*words)[wnum] = NULL;
1476     if (w) *words = NULL;
1477     return 0;
1478   }
1479   ok = 0;
1480   for (i = 0; i < numdefcpd; i++) {
1481     for (j = 0; j < defcpdtable[i].len; j++) {
1482        if (defcpdtable[i].def[j] != '*' && defcpdtable[i].def[j] != '?' &&
1483           TESTAFF(rv->astr, defcpdtable[i].def[j], rv->alen)) ok = 1;
1484     }
1485   }
1486   if (ok == 0) {
1487     (*words)[wnum] = NULL;
1488     if (w) *words = NULL;
1489     return 0;
1490   }
1491
1492   for (i = 0; i < numdefcpd; i++) {
1493     signed short pp = 0; // pattern position
1494     signed short wp = 0; // "words" position
1495     int ok2;
1496     ok = 1;
1497     ok2 = 1;
1498     do {
1499       while ((pp < defcpdtable[i].len) && (wp <= wnum)) {
1500         if (((pp+1) < defcpdtable[i].len) &&
1501           ((defcpdtable[i].def[pp+1] == '*') || (defcpdtable[i].def[pp+1] == '?'))) {
1502             int wend = (defcpdtable[i].def[pp+1] == '?') ? wp : wnum;
1503             ok2 = 1;
1504             pp+=2;
1505             btpp[bt] = pp;
1506             btwp[bt] = wp;
1507             while (wp <= wend) {
1508                 if (!(*words)[wp]->alen ||
1509                   !TESTAFF((*words)[wp]->astr, defcpdtable[i].def[pp-2], (*words)[wp]->alen)) {
1510                     ok2 = 0;
1511                     break;
1512                 }
1513                 wp++;
1514             }
1515             if (wp <= wnum) ok2 = 0;
1516             btnum[bt] = wp - btwp[bt];
1517             if (btnum[bt] > 0) bt++;
1518             if (ok2) break;
1519         } else {
1520             ok2 = 1;
1521             if (!(*words)[wp] || !(*words)[wp]->alen ||
1522               !TESTAFF((*words)[wp]->astr, defcpdtable[i].def[pp], (*words)[wp]->alen)) {
1523                 ok = 0;
1524                 break;
1525             }
1526             pp++;
1527             wp++;
1528             if ((defcpdtable[i].len == pp) && !(wp > wnum)) ok = 0;
1529         }
1530       }
1531     if (ok && ok2) {
1532         int r = pp;
1533         while ((defcpdtable[i].len > r) && ((r+1) < defcpdtable[i].len) &&
1534             ((defcpdtable[i].def[r+1] == '*') || (defcpdtable[i].def[r+1] == '?'))) r+=2;
1535         if (defcpdtable[i].len <= r) return 1;
1536     }
1537     // backtrack
1538     if (bt) do {
1539         ok = 1;
1540         btnum[bt - 1]--;
1541         pp = btpp[bt - 1];
1542         wp = btwp[bt - 1] + (signed short) btnum[bt - 1];
1543     } while ((btnum[bt - 1] < 0) && --bt);
1544   } while (bt);
1545
1546   if (ok && ok2 && (!all || (defcpdtable[i].len <= pp))) return 1;
1547
1548   // check zero ending
1549   while (ok && ok2 && (defcpdtable[i].len > pp) && ((pp+1) < defcpdtable[i].len) &&
1550     ((defcpdtable[i].def[pp+1] == '*') || (defcpdtable[i].def[pp+1] == '?'))) pp+=2;
1551   if (ok && ok2 && (defcpdtable[i].len <= pp)) return 1;
1552   }
1553   (*words)[wnum] = NULL;
1554   if (w) *words = NULL;
1555   return 0;
1556 }
1557
1558 inline int AffixMgr::candidate_check(const char * word, int len)
1559 {
1560   struct hentry * rv=NULL;
1561
1562   rv = lookup(word);
1563   if (rv) return 1;
1564
1565 //  rv = prefix_check(word,len,1);
1566 //  if (rv) return 1;
1567
1568   rv = affix_check(word,len);
1569   if (rv) return 1;
1570   return 0;
1571 }
1572
1573 // calculate number of syllable for compound-checking
1574 short AffixMgr::get_syllable(const char * word, int wlen)
1575 {
1576     if (cpdmaxsyllable==0) return 0;
1577
1578     short num=0;
1579
1580     if (!utf8) {
1581         for (int i=0; i<wlen; i++) {
1582             if (strchr(cpdvowels, word[i])) num++;
1583         }
1584     } else if (cpdvowels_utf16) {
1585         w_char w[MAXWORDUTF8LEN];
1586         int i = u8_u16(w, MAXWORDUTF8LEN, word);
1587         for (; i > 0; i--) {
1588             if (flag_bsearch((unsigned short *) cpdvowels_utf16,
1589                 ((unsigned short *) w)[i - 1], cpdvowels_utf16_len)) num++;
1590         }
1591     }
1592     return num;
1593 }
1594
1595 void AffixMgr::setcminmax(int * cmin, int * cmax, const char * word, int len) {
1596     if (utf8) {
1597         int i;
1598         for (*cmin = 0, i = 0; (i < cpdmin) && word[*cmin]; i++) {
1599           for ((*cmin)++; (word[*cmin] & 0xc0) == 0x80; (*cmin)++);
1600         }
1601         for (*cmax = len, i = 0; (i < (cpdmin - 1)) && *cmax; i++) {
1602           for ((*cmax)--; (word[*cmax] & 0xc0) == 0x80; (*cmax)--);
1603         }
1604     } else {
1605         *cmin = cpdmin;
1606         *cmax = len - cpdmin + 1;
1607     }
1608 }
1609
1610
1611 // check if compound word is correctly spelled
1612 // hu_mov_rule = spec. Hungarian rule (XXX)
1613 struct hentry * AffixMgr::compound_check(const char * word, int len,
1614     short wordnum, short numsyllable, short maxwordnum, short wnum, hentry ** words = NULL,
1615     char hu_mov_rule = 0, char is_sug = 0, int * info = NULL)
1616 {
1617     int i;
1618     short oldnumsyllable, oldnumsyllable2, oldwordnum, oldwordnum2;
1619     struct hentry * rv = NULL;
1620     struct hentry * rv_first;
1621     struct hentry * rwords[MAXWORDLEN]; // buffer for COMPOUND pattern checking
1622     char st [MAXWORDUTF8LEN + 4];
1623     char ch = '\0';
1624     int cmin;
1625     int cmax;
1626     int striple = 0;
1627     int scpd = 0;
1628     int soldi = 0;
1629     int oldcmin = 0;
1630     int oldcmax = 0;
1631     int oldlen = 0;
1632     int checkedstriple = 0;
1633     int onlycpdrule;
1634     char affixed = 0;
1635     hentry ** oldwords = words;
1636
1637     int checked_prefix;
1638
1639     setcminmax(&cmin, &cmax, word, len);
1640
1641     strcpy(st, word);
1642
1643     for (i = cmin; i < cmax; i++) {
1644         // go to end of the UTF-8 character
1645         if (utf8) {
1646             for (; (st[i] & 0xc0) == 0x80; i++);
1647             if (i >= cmax) return NULL;
1648         }
1649
1650         words = oldwords;
1651         onlycpdrule = (words) ? 1 : 0;
1652
1653         do { // onlycpdrule loop
1654
1655         oldnumsyllable = numsyllable;
1656         oldwordnum = wordnum;
1657         checked_prefix = 0;
1658
1659
1660         do { // simplified checkcompoundpattern loop
1661
1662         if (scpd > 0) {
1663           for (; scpd <= numcheckcpd && (!checkcpdtable[scpd-1].pattern3 ||
1664             strncmp(word + i, checkcpdtable[scpd-1].pattern3, strlen(checkcpdtable[scpd-1].pattern3)) != 0); scpd++);
1665
1666           if (scpd > numcheckcpd) break; // break simplified checkcompoundpattern loop
1667           strcpy(st + i, checkcpdtable[scpd-1].pattern);
1668           soldi = i;
1669           i += strlen(checkcpdtable[scpd-1].pattern);
1670           strcpy(st + i, checkcpdtable[scpd-1].pattern2);
1671           strcpy(st + i + strlen(checkcpdtable[scpd-1].pattern2), word + soldi + strlen(checkcpdtable[scpd-1].pattern3));
1672
1673           oldlen = len;
1674           len += strlen(checkcpdtable[scpd-1].pattern) + strlen(checkcpdtable[scpd-1].pattern2) - strlen(checkcpdtable[scpd-1].pattern3);
1675           oldcmin = cmin;
1676           oldcmax = cmax;
1677           setcminmax(&cmin, &cmax, st, len);
1678
1679           cmax = len - cpdmin + 1;
1680         }
1681
1682         ch = st[i];
1683         st[i] = '\0';
1684
1685         sfx = NULL;
1686         pfx = NULL;
1687
1688         // FIRST WORD
1689
1690         affixed = 1;
1691         rv = lookup(st); // perhaps without prefix
1692
1693         // search homonym with compound flag
1694         while ((rv) && !hu_mov_rule &&
1695             ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) ||
1696                 !((compoundflag && !words && !onlycpdrule && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
1697                   (compoundbegin && !wordnum && !onlycpdrule &&
1698                         TESTAFF(rv->astr, compoundbegin, rv->alen)) ||
1699                   (compoundmiddle && wordnum && !words && !onlycpdrule &&
1700                     TESTAFF(rv->astr, compoundmiddle, rv->alen)) ||
1701                   (numdefcpd && onlycpdrule &&
1702                     ((!words && !wordnum && defcpd_check(&words, wnum, rv, (hentry **) &rwords, 0)) ||
1703                     (words && defcpd_check(&words, wnum, rv, (hentry **) &rwords, 0))))) ||
1704                   (scpd != 0 && checkcpdtable[scpd-1].cond != FLAG_NULL &&
1705                     !TESTAFF(rv->astr, checkcpdtable[scpd-1].cond, rv->alen)))
1706                   ) {
1707             rv = rv->next_homonym;
1708         }
1709
1710         if (rv) affixed = 0;
1711
1712         if (!rv) {
1713             if (onlycpdrule) break;
1714             if (compoundflag &&
1715              !(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundflag))) {
1716                 if (((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL,
1717                         FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
1718                         (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundflag)))) && !hu_mov_rule &&
1719                     sfx->getCont() &&
1720                         ((compoundforbidflag && TESTAFF(sfx->getCont(), compoundforbidflag,
1721                             sfx->getContLen())) || (compoundend &&
1722                         TESTAFF(sfx->getCont(), compoundend,
1723                             sfx->getContLen())))) {
1724                         rv = NULL;
1725                 }
1726             }
1727
1728             if (rv ||
1729               (((wordnum == 0) && compoundbegin &&
1730                 ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
1731                 (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundbegin))) || // twofold suffixes + compound
1732                 (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundbegin)))) ||
1733               ((wordnum > 0) && compoundmiddle &&
1734                 ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
1735                 (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundmiddle))) || // twofold suffixes + compound
1736                 (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundmiddle)))))
1737               ) checked_prefix = 1;
1738         // else check forbiddenwords and needaffix
1739         } else if (rv->astr && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
1740             TESTAFF(rv->astr, needaffix, rv->alen) ||
1741             TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
1742             (is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen))
1743              )) {
1744                 st[i] = ch;
1745                 //continue;
1746                 break;
1747         }
1748
1749             // check non_compound flag in suffix and prefix
1750             if ((rv) && !hu_mov_rule &&
1751                 ((pfx && pfx->getCont() &&
1752                     TESTAFF(pfx->getCont(), compoundforbidflag,
1753                         pfx->getContLen())) ||
1754                 (sfx && sfx->getCont() &&
1755                     TESTAFF(sfx->getCont(), compoundforbidflag,
1756                         sfx->getContLen())))) {
1757                     rv = NULL;
1758             }
1759
1760             // check compoundend flag in suffix and prefix
1761             if ((rv) && !checked_prefix && compoundend && !hu_mov_rule &&
1762                 ((pfx && pfx->getCont() &&
1763                     TESTAFF(pfx->getCont(), compoundend,
1764                         pfx->getContLen())) ||
1765                 (sfx && sfx->getCont() &&
1766                     TESTAFF(sfx->getCont(), compoundend,
1767                         sfx->getContLen())))) {
1768                     rv = NULL;
1769             }
1770
1771             // check compoundmiddle flag in suffix and prefix
1772             if ((rv) && !checked_prefix && (wordnum==0) && compoundmiddle && !hu_mov_rule &&
1773                 ((pfx && pfx->getCont() &&
1774                     TESTAFF(pfx->getCont(), compoundmiddle,
1775                         pfx->getContLen())) ||
1776                 (sfx && sfx->getCont() &&
1777                     TESTAFF(sfx->getCont(), compoundmiddle,
1778                         sfx->getContLen())))) {
1779                     rv = NULL;
1780             }
1781
1782         // check forbiddenwords
1783         if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
1784             TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
1785             (is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen)))) {
1786                 return NULL;
1787             }
1788
1789         // increment word number, if the second root has a compoundroot flag
1790         if ((rv) && compoundroot &&
1791             (TESTAFF(rv->astr, compoundroot, rv->alen))) {
1792                 wordnum++;
1793         }
1794
1795         // first word is acceptable in compound words?
1796         if (((rv) &&
1797           ( checked_prefix || (words && words[wnum]) ||
1798             (compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
1799             ((oldwordnum == 0) && compoundbegin && TESTAFF(rv->astr, compoundbegin, rv->alen)) ||
1800             ((oldwordnum > 0) && compoundmiddle && TESTAFF(rv->astr, compoundmiddle, rv->alen))// ||
1801 //            (numdefcpd && )
1802
1803 // LANG_hu section: spec. Hungarian rule
1804             || ((langnum == LANG_hu) && hu_mov_rule && (
1805                     TESTAFF(rv->astr, 'F', rv->alen) || // XXX hardwired Hungarian dictionary codes
1806                     TESTAFF(rv->astr, 'G', rv->alen) ||
1807                     TESTAFF(rv->astr, 'H', rv->alen)
1808                 )
1809               )
1810 // END of LANG_hu section
1811           ) &&
1812           (
1813              // test CHECKCOMPOUNDPATTERN conditions
1814              scpd == 0 || checkcpdtable[scpd-1].cond == FLAG_NULL ||
1815                 TESTAFF(rv->astr, checkcpdtable[scpd-1].cond, rv->alen)
1816           )
1817           && ! (( checkcompoundtriple && scpd == 0 && !words && // test triple letters
1818                    (word[i-1]==word[i]) && (
1819                       ((i>1) && (word[i-1]==word[i-2])) ||
1820                       ((word[i-1]==word[i+1])) // may be word[i+1] == '\0'
1821                    )
1822                ) ||
1823                (
1824                  checkcompoundcase && scpd == 0 && !words && cpdcase_check(word, i)
1825                ))
1826          )
1827 // LANG_hu section: spec. Hungarian rule
1828          || ((!rv) && (langnum == LANG_hu) && hu_mov_rule && (rv = affix_check(st,i)) &&
1829               (sfx && sfx->getCont() && ( // XXX hardwired Hungarian dic. codes
1830                         TESTAFF(sfx->getCont(), (unsigned short) 'x', sfx->getContLen()) ||
1831                         TESTAFF(sfx->getCont(), (unsigned short) '%', sfx->getContLen())
1832                     )
1833                )
1834              )
1835          ) { // first word is ok condition
1836
1837 // LANG_hu section: spec. Hungarian rule
1838             if (langnum == LANG_hu) {
1839                 // calculate syllable number of the word
1840                 numsyllable += get_syllable(st, i);
1841                 // + 1 word, if syllable number of the prefix > 1 (hungarian convention)
1842                 if (pfx && (get_syllable(pfx->getKey(),strlen(pfx->getKey())) > 1)) wordnum++;
1843             }
1844 // END of LANG_hu section
1845
1846             // NEXT WORD(S)
1847             rv_first = rv;
1848             st[i] = ch;
1849
1850         do { // striple loop
1851
1852             // check simplifiedtriple
1853             if (simplifiedtriple) {
1854               if (striple) {
1855                 checkedstriple = 1;
1856                 i--; // check "fahrt" instead of "ahrt" in "Schiffahrt"
1857               } else if (i > 2 && *(word+i - 1) == *(word + i - 2)) striple = 1;
1858             }
1859
1860             rv = lookup((st+i)); // perhaps without prefix
1861
1862         // search homonym with compound flag
1863         while ((rv) && ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) ||
1864                         !((compoundflag && !words && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
1865                           (compoundend && !words && TESTAFF(rv->astr, compoundend, rv->alen)) ||
1866                            (numdefcpd && words && defcpd_check(&words, wnum + 1, rv, NULL,1))) ||
1867                              (scpd != 0 && checkcpdtable[scpd-1].cond2 != FLAG_NULL &&
1868                                 !TESTAFF(rv->astr, checkcpdtable[scpd-1].cond2, rv->alen))
1869                            )) {
1870             rv = rv->next_homonym;
1871         }
1872
1873             // check FORCEUCASE
1874             if (rv && forceucase && (rv) &&
1875                 (TESTAFF(rv->astr, forceucase, rv->alen)) && !(info && *info & SPELL_ORIGCAP)) rv = NULL;
1876
1877             if (rv && words && words[wnum + 1]) return rv_first;
1878
1879             oldnumsyllable2 = numsyllable;
1880             oldwordnum2 = wordnum;
1881
1882
1883 // LANG_hu section: spec. Hungarian rule, XXX hardwired dictionary code
1884             if ((rv) && (langnum == LANG_hu) && (TESTAFF(rv->astr, 'I', rv->alen)) && !(TESTAFF(rv->astr, 'J', rv->alen))) {
1885                 numsyllable--;
1886             }
1887 // END of LANG_hu section
1888
1889             // increment word number, if the second root has a compoundroot flag
1890             if ((rv) && (compoundroot) &&
1891                 (TESTAFF(rv->astr, compoundroot, rv->alen))) {
1892                     wordnum++;
1893             }
1894
1895             // check forbiddenwords
1896             if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
1897                 TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
1898                (is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen)))) return NULL;
1899
1900             // second word is acceptable, as a root?
1901             // hungarian conventions: compounding is acceptable,
1902             // when compound forms consist of 2 words, or if more,
1903             // then the syllable number of root words must be 6, or lesser.
1904
1905             if ((rv) && (
1906                       (compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
1907                       (compoundend && TESTAFF(rv->astr, compoundend, rv->alen))
1908                     )
1909                 && (
1910                       ((cpdwordmax==-1) || (wordnum+1<cpdwordmax)) ||
1911                       ((cpdmaxsyllable!=0) &&
1912                           (numsyllable + get_syllable(HENTRY_WORD(rv), rv->clen)<=cpdmaxsyllable))
1913                     ) &&
1914                (
1915                  // test CHECKCOMPOUNDPATTERN
1916                  !numcheckcpd || scpd != 0 || !cpdpat_check(word, i, rv_first, rv, 0)
1917                ) &&
1918                 (
1919                      (!checkcompounddup || (rv != rv_first))
1920                    )
1921             // test CHECKCOMPOUNDPATTERN conditions
1922                 && (scpd == 0 || checkcpdtable[scpd-1].cond2 == FLAG_NULL ||
1923                       TESTAFF(rv->astr, checkcpdtable[scpd-1].cond2, rv->alen))
1924                 )
1925                  {
1926                       // forbid compound word, if it is a non compound word with typical fault
1927                       if (checkcompoundrep && cpdrep_check(word,len)) return NULL;
1928                       return rv_first;
1929             }
1930
1931             numsyllable = oldnumsyllable2;
1932             wordnum = oldwordnum2;
1933
1934             // perhaps second word has prefix or/and suffix
1935             sfx = NULL;
1936             sfxflag = FLAG_NULL;
1937             rv = (compoundflag && !onlycpdrule) ? affix_check((word+i),strlen(word+i), compoundflag, IN_CPD_END) : NULL;
1938             if (!rv && compoundend && !onlycpdrule) {
1939                 sfx = NULL;
1940                 pfx = NULL;
1941                 rv = affix_check((word+i),strlen(word+i), compoundend, IN_CPD_END);
1942             }
1943
1944             if (!rv && numdefcpd && words) {
1945                 rv = affix_check((word+i),strlen(word+i), 0, IN_CPD_END);
1946                 if (rv && defcpd_check(&words, wnum + 1, rv, NULL, 1)) return rv_first;
1947                 rv = NULL;
1948             }
1949
1950             // test CHECKCOMPOUNDPATTERN conditions (allowed forms)
1951             if (rv && !(scpd == 0 || checkcpdtable[scpd-1].cond2 == FLAG_NULL ||
1952                 TESTAFF(rv->astr, checkcpdtable[scpd-1].cond2, rv->alen))) rv = NULL;
1953
1954             // test CHECKCOMPOUNDPATTERN conditions (forbidden compounds)
1955             if (rv && numcheckcpd && scpd == 0 && cpdpat_check(word, i, rv_first, rv, affixed)) rv = NULL;
1956
1957             // check non_compound flag in suffix and prefix
1958             if ((rv) &&
1959                 ((pfx && pfx->getCont() &&
1960                     TESTAFF(pfx->getCont(), compoundforbidflag,
1961                         pfx->getContLen())) ||
1962                 (sfx && sfx->getCont() &&
1963                     TESTAFF(sfx->getCont(), compoundforbidflag,
1964                         sfx->getContLen())))) {
1965                     rv = NULL;
1966             }
1967
1968             // check FORCEUCASE
1969             if (rv && forceucase && (rv) &&
1970                 (TESTAFF(rv->astr, forceucase, rv->alen)) && !(info && *info & SPELL_ORIGCAP)) rv = NULL;
1971
1972             // check forbiddenwords
1973             if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
1974                 TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
1975                (is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen)))) return NULL;
1976
1977             // pfxappnd = prefix of word+i, or NULL
1978             // calculate syllable number of prefix.
1979             // hungarian convention: when syllable number of prefix is more,
1980             // than 1, the prefix+word counts as two words.
1981
1982             if (langnum == LANG_hu) {
1983                 // calculate syllable number of the word
1984                 numsyllable += get_syllable(word + i, strlen(word + i));
1985
1986                 // - affix syllable num.
1987                 // XXX only second suffix (inflections, not derivations)
1988                 if (sfxappnd) {
1989                     char * tmp = myrevstrdup(sfxappnd);
1990                     numsyllable -= get_syllable(tmp, strlen(tmp));
1991                     free(tmp);
1992                 }
1993
1994                 // + 1 word, if syllable number of the prefix > 1 (hungarian convention)
1995                 if (pfx && (get_syllable(pfx->getKey(),strlen(pfx->getKey())) > 1)) wordnum++;
1996
1997                 // increment syllable num, if last word has a SYLLABLENUM flag
1998                 // and the suffix is beginning `s'
1999
2000                 if (cpdsyllablenum) {
2001                     switch (sfxflag) {
2002                         case 'c': { numsyllable+=2; break; }
2003                         case 'J': { numsyllable += 1; break; }
2004                         case 'I': { if (rv && TESTAFF(rv->astr, 'J', rv->alen)) numsyllable += 1; break; }
2005                     }
2006                 }
2007             }
2008
2009             // increment word number, if the second word has a compoundroot flag
2010             if ((rv) && (compoundroot) &&
2011                 (TESTAFF(rv->astr, compoundroot, rv->alen))) {
2012                     wordnum++;
2013             }
2014
2015             // second word is acceptable, as a word with prefix or/and suffix?
2016             // hungarian conventions: compounding is acceptable,
2017             // when compound forms consist 2 word, otherwise
2018             // the syllable number of root words is 6, or lesser.
2019             if ((rv) &&
2020                     (
2021                       ((cpdwordmax == -1) || (wordnum + 1 < cpdwordmax)) ||
2022                       ((cpdmaxsyllable != 0) &&
2023                           (numsyllable <= cpdmaxsyllable))
2024                     )
2025                 && (
2026                    (!checkcompounddup || (rv != rv_first))
2027                    )) {
2028                     // forbid compound word, if it is a non compound word with typical fault
2029                     if (checkcompoundrep && cpdrep_check(word, len)) return NULL;
2030                     return rv_first;
2031             }
2032
2033             numsyllable = oldnumsyllable2;
2034             wordnum = oldwordnum2;
2035
2036             // perhaps second word is a compound word (recursive call)
2037             if (wordnum < maxwordnum) {
2038                 rv = compound_check((st+i),strlen(st+i), wordnum+1,
2039                      numsyllable, maxwordnum, wnum + 1, words, 0, is_sug, info);
2040
2041                 if (rv && numcheckcpd && ((scpd == 0 && cpdpat_check(word, i, rv_first, rv, affixed)) ||
2042                    (scpd != 0 && !cpdpat_check(word, i, rv_first, rv, affixed)))) rv = NULL;
2043             } else {
2044                 rv=NULL;
2045             }
2046             if (rv) {
2047                 // forbid compound word, if it is a non compound word with typical fault
2048                 if (checkcompoundrep || forbiddenword) {
2049                     struct hentry * rv2 = NULL;
2050
2051                     if (checkcompoundrep && cpdrep_check(word, len)) return NULL;
2052
2053                     // check first part
2054                     if (strncmp(rv->word, word + i, rv->blen) == 0) {
2055                         char r = *(st + i + rv->blen);
2056                         *(st + i + rv->blen) = '\0';
2057
2058                         if (checkcompoundrep && cpdrep_check(st, i + rv->blen)) {
2059                             *(st + i + rv->blen) = r;
2060                             continue;
2061                         }
2062
2063                         if (forbiddenword) {
2064                             rv2 = lookup(word);
2065                             if (!rv2) rv2 = affix_check(word, len);
2066                             if (rv2 && rv2->astr && TESTAFF(rv2->astr, forbiddenword, rv2->alen) &&
2067                                 (strncmp(rv2->word, st, i + rv->blen) == 0)) {
2068                                     return NULL;
2069                             }
2070                         }
2071                         *(st + i + rv->blen) = r;
2072                     }
2073                 }
2074                 return rv_first;
2075             }
2076           } while (striple && !checkedstriple); // end of striple loop
2077
2078           if (checkedstriple) {
2079             i++;
2080             checkedstriple = 0;
2081             striple = 0;
2082           }
2083
2084         } // first word is ok condition
2085
2086         if (soldi != 0) {
2087           i = soldi;
2088           soldi = 0;
2089           len = oldlen;
2090           cmin = oldcmin;
2091           cmax = oldcmax;
2092         }
2093         scpd++;
2094
2095
2096         } while (!onlycpdrule && simplifiedcpd && scpd <= numcheckcpd); // end of simplifiedcpd loop
2097
2098         scpd = 0;
2099         wordnum = oldwordnum;
2100         numsyllable = oldnumsyllable;
2101
2102         if (soldi != 0) {
2103           i = soldi;
2104           strcpy(st, word); // XXX add more optim.
2105           soldi = 0;
2106         } else st[i] = ch;
2107
2108         } while (numdefcpd && oldwordnum == 0 && !onlycpdrule && (onlycpdrule = 1)); // end of onlycpd loop
2109
2110     }
2111
2112     return NULL;
2113 }
2114
2115 // check if compound word is correctly spelled
2116 // hu_mov_rule = spec. Hungarian rule (XXX)
2117 int AffixMgr::compound_check_morph(const char * word, int len,
2118     short wordnum, short numsyllable, short maxwordnum, short wnum, hentry ** words,
2119     char hu_mov_rule = 0, char ** result = NULL, char * partresult = NULL)
2120 {
2121     int i;
2122     short oldnumsyllable, oldnumsyllable2, oldwordnum, oldwordnum2;
2123     int ok = 0;
2124
2125     struct hentry * rv = NULL;
2126     struct hentry * rv_first;
2127     struct hentry * rwords[MAXWORDLEN]; // buffer for COMPOUND pattern checking
2128     char st [MAXWORDUTF8LEN + 4];
2129     char ch;
2130
2131     int checked_prefix;
2132     char presult[MAXLNLEN];
2133
2134     int cmin;
2135     int cmax;
2136
2137     int onlycpdrule;
2138     char affixed = 0;
2139     hentry ** oldwords = words;
2140
2141     setcminmax(&cmin, &cmax, word, len);
2142
2143     strcpy(st, word);
2144
2145     for (i = cmin; i < cmax; i++) {
2146         oldnumsyllable = numsyllable;
2147         oldwordnum = wordnum;
2148         checked_prefix = 0;
2149
2150         // go to end of the UTF-8 character
2151         if (utf8) {
2152             for (; (st[i] & 0xc0) == 0x80; i++);
2153             if (i >= cmax) return 0;
2154         }
2155
2156         words = oldwords;
2157         onlycpdrule = (words) ? 1 : 0;
2158
2159         do { // onlycpdrule loop
2160
2161         oldnumsyllable = numsyllable;
2162         oldwordnum = wordnum;
2163         checked_prefix = 0;
2164
2165         ch = st[i];
2166         st[i] = '\0';
2167         sfx = NULL;
2168
2169         // FIRST WORD
2170
2171         affixed = 1;
2172
2173         *presult = '\0';
2174         if (partresult) mystrcat(presult, partresult, MAXLNLEN);
2175
2176         rv = lookup(st); // perhaps without prefix
2177
2178         // search homonym with compound flag
2179         while ((rv) && !hu_mov_rule &&
2180             ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) ||
2181                 !((compoundflag && !words && !onlycpdrule && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
2182                 (compoundbegin && !wordnum && !onlycpdrule &&
2183                         TESTAFF(rv->astr, compoundbegin, rv->alen)) ||
2184                 (compoundmiddle && wordnum && !words && !onlycpdrule &&
2185                     TESTAFF(rv->astr, compoundmiddle, rv->alen)) ||
2186                   (numdefcpd && onlycpdrule &&
2187                     ((!words && !wordnum && defcpd_check(&words, wnum, rv, (hentry **) &rwords, 0)) ||
2188                     (words && defcpd_check(&words, wnum, rv, (hentry **) &rwords, 0))))
2189                   ))) {
2190             rv = rv->next_homonym;
2191         }
2192
2193         if (rv) affixed = 0;
2194
2195         if (rv)  {
2196             sprintf(presult + strlen(presult), "%c%s%s", MSEP_FLD, MORPH_PART, st);
2197             if (!HENTRY_FIND(rv, MORPH_STEM)) {
2198                 sprintf(presult + strlen(presult), "%c%s%s", MSEP_FLD, MORPH_STEM, st);
2199             }
2200             // store the pointer of the hash entry
2201 //            sprintf(presult + strlen(presult), "%c%s%p", MSEP_FLD, MORPH_HENTRY, rv);
2202             if (HENTRY_DATA(rv)) {
2203                 sprintf(presult + strlen(presult), "%c%s", MSEP_FLD, HENTRY_DATA2(rv));
2204             }
2205         }
2206
2207         if (!rv) {
2208             if (onlycpdrule) break;
2209             if (compoundflag &&
2210              !(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundflag))) {
2211                 if (((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL,
2212                         FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
2213                         (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundflag)))) && !hu_mov_rule &&
2214                     sfx->getCont() &&
2215                         ((compoundforbidflag && TESTAFF(sfx->getCont(), compoundforbidflag,
2216                             sfx->getContLen())) || (compoundend &&
2217                         TESTAFF(sfx->getCont(), compoundend,
2218                             sfx->getContLen())))) {
2219                         rv = NULL;
2220                 }
2221             }
2222
2223             if (rv ||
2224               (((wordnum == 0) && compoundbegin &&
2225                 ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
2226                 (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundbegin))) ||  // twofold suffix+compound
2227                 (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundbegin)))) ||
2228               ((wordnum > 0) && compoundmiddle &&
2229                 ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
2230                 (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundmiddle))) ||  // twofold suffix+compound
2231                 (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundmiddle)))))
2232               ) {
2233                 // char * p = prefix_check_morph(st, i, 0, compound);
2234                 char * p = NULL;
2235                 if (compoundflag) p = affix_check_morph(st, i, compoundflag);
2236                 if (!p || (*p == '\0')) {
2237                    if (p) free(p);
2238                    p = NULL;
2239                    if ((wordnum == 0) && compoundbegin) {
2240                      p = affix_check_morph(st, i, compoundbegin);
2241                    } else if ((wordnum > 0) && compoundmiddle) {
2242                      p = affix_check_morph(st, i, compoundmiddle);
2243                    }
2244                 }
2245                 if (p && (*p != '\0')) {
2246                     sprintf(presult + strlen(presult), "%c%s%s%s", MSEP_FLD,
2247                         MORPH_PART, st, line_uniq_app(&p, MSEP_REC));
2248                 }
2249                 if (p) free(p);
2250                 checked_prefix = 1;
2251             }
2252         // else check forbiddenwords
2253         } else if (rv->astr && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
2254             TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
2255             TESTAFF(rv->astr, needaffix, rv->alen))) {
2256                 st[i] = ch;
2257                 continue;
2258         }
2259
2260             // check non_compound flag in suffix and prefix
2261             if ((rv) && !hu_mov_rule &&
2262                 ((pfx && pfx->getCont() &&
2263                     TESTAFF(pfx->getCont(), compoundforbidflag,
2264                         pfx->getContLen())) ||
2265                 (sfx && sfx->getCont() &&
2266                     TESTAFF(sfx->getCont(), compoundforbidflag,
2267                         sfx->getContLen())))) {
2268                     continue;
2269             }
2270
2271             // check compoundend flag in suffix and prefix
2272             if ((rv) && !checked_prefix && compoundend && !hu_mov_rule &&
2273                 ((pfx && pfx->getCont() &&
2274                     TESTAFF(pfx->getCont(), compoundend,
2275                         pfx->getContLen())) ||
2276                 (sfx && sfx->getCont() &&
2277                     TESTAFF(sfx->getCont(), compoundend,
2278                         sfx->getContLen())))) {
2279                     continue;
2280             }
2281
2282             // check compoundmiddle flag in suffix and prefix
2283             if ((rv) && !checked_prefix && (wordnum==0) && compoundmiddle && !hu_mov_rule &&
2284                 ((pfx && pfx->getCont() &&
2285                     TESTAFF(pfx->getCont(), compoundmiddle,
2286                         pfx->getContLen())) ||
2287                 (sfx && sfx->getCont() &&
2288                     TESTAFF(sfx->getCont(), compoundmiddle,
2289                         sfx->getContLen())))) {
2290                     rv = NULL;
2291             }
2292
2293         // check forbiddenwords
2294         if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen)
2295             || TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen))) continue;
2296
2297         // increment word number, if the second root has a compoundroot flag
2298         if ((rv) && (compoundroot) &&
2299             (TESTAFF(rv->astr, compoundroot, rv->alen))) {
2300                 wordnum++;
2301         }
2302
2303         // first word is acceptable in compound words?
2304         if (((rv) &&
2305           ( checked_prefix || (words && words[wnum]) ||
2306             (compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
2307             ((oldwordnum == 0) && compoundbegin && TESTAFF(rv->astr, compoundbegin, rv->alen)) ||
2308             ((oldwordnum > 0) && compoundmiddle && TESTAFF(rv->astr, compoundmiddle, rv->alen))
2309 // LANG_hu section: spec. Hungarian rule
2310             || ((langnum == LANG_hu) && // hu_mov_rule
2311                 hu_mov_rule && (
2312                     TESTAFF(rv->astr, 'F', rv->alen) ||
2313                     TESTAFF(rv->astr, 'G', rv->alen) ||
2314                     TESTAFF(rv->astr, 'H', rv->alen)
2315                 )
2316               )
2317 // END of LANG_hu section
2318           )
2319           && ! (( checkcompoundtriple && !words && // test triple letters
2320                    (word[i-1]==word[i]) && (
2321                       ((i>1) && (word[i-1]==word[i-2])) ||
2322                       ((word[i-1]==word[i+1])) // may be word[i+1] == '\0'
2323                    )
2324                ) ||
2325                (
2326                    // test CHECKCOMPOUNDPATTERN
2327                    numcheckcpd && !words && cpdpat_check(word, i, rv, NULL, affixed)
2328                ) ||
2329                (
2330                  checkcompoundcase && !words && cpdcase_check(word, i)
2331                ))
2332          )
2333 // LANG_hu section: spec. Hungarian rule
2334          || ((!rv) && (langnum == LANG_hu) && hu_mov_rule && (rv = affix_check(st,i)) &&
2335               (sfx && sfx->getCont() && (
2336                         TESTAFF(sfx->getCont(), (unsigned short) 'x', sfx->getContLen()) ||
2337                         TESTAFF(sfx->getCont(), (unsigned short) '%', sfx->getContLen())
2338                     )
2339                )
2340              )
2341 // END of LANG_hu section
2342          ) {
2343
2344 // LANG_hu section: spec. Hungarian rule
2345             if (langnum == LANG_hu) {
2346                 // calculate syllable number of the word
2347                 numsyllable += get_syllable(st, i);
2348
2349                 // + 1 word, if syllable number of the prefix > 1 (hungarian convention)
2350                 if (pfx && (get_syllable(pfx->getKey(),strlen(pfx->getKey())) > 1)) wordnum++;
2351             }
2352 // END of LANG_hu section
2353
2354             // NEXT WORD(S)
2355             rv_first = rv;
2356             rv = lookup((word+i)); // perhaps without prefix
2357
2358         // search homonym with compound flag
2359         while ((rv) && ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) ||
2360                         !((compoundflag && !words && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
2361                           (compoundend && !words && TESTAFF(rv->astr, compoundend, rv->alen)) ||
2362                            (numdefcpd && words && defcpd_check(&words, wnum + 1, rv, NULL,1))))) {
2363             rv = rv->next_homonym;
2364         }
2365
2366             if (rv && words && words[wnum + 1]) {
2367                   mystrcat(*result, presult, MAXLNLEN);
2368                   mystrcat(*result, " ", MAXLNLEN);
2369                   mystrcat(*result, MORPH_PART, MAXLNLEN);
2370                   mystrcat(*result, word+i, MAXLNLEN);
2371                   if (complexprefixes && HENTRY_DATA(rv)) mystrcat(*result, HENTRY_DATA2(rv), MAXLNLEN);
2372                   if (!HENTRY_FIND(rv, MORPH_STEM)) {
2373                     mystrcat(*result, " ", MAXLNLEN);
2374                     mystrcat(*result, MORPH_STEM, MAXLNLEN);
2375                     mystrcat(*result, HENTRY_WORD(rv), MAXLNLEN);
2376                   }
2377                   // store the pointer of the hash entry
2378 //                  sprintf(*result + strlen(*result), " %s%p", MORPH_HENTRY, rv);
2379                   if (!complexprefixes && HENTRY_DATA(rv)) {
2380                     mystrcat(*result, " ", MAXLNLEN);
2381                     mystrcat(*result, HENTRY_DATA2(rv), MAXLNLEN);
2382                   }
2383                   mystrcat(*result, "\n", MAXLNLEN);
2384                   ok = 1;
2385                   return 0;
2386             }
2387
2388             oldnumsyllable2 = numsyllable;
2389             oldwordnum2 = wordnum;
2390
2391 // LANG_hu section: spec. Hungarian rule
2392             if ((rv) && (langnum == LANG_hu) && (TESTAFF(rv->astr, 'I', rv->alen)) && !(TESTAFF(rv->astr, 'J', rv->alen))) {
2393                 numsyllable--;
2394             }
2395 // END of LANG_hu section
2396             // increment word number, if the second root has a compoundroot flag
2397             if ((rv) && (compoundroot) &&
2398                 (TESTAFF(rv->astr, compoundroot, rv->alen))) {
2399                     wordnum++;
2400             }
2401
2402             // check forbiddenwords
2403             if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
2404                 TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen))) {
2405                 st[i] = ch;
2406                 continue;
2407             }
2408
2409             // second word is acceptable, as a root?
2410             // hungarian conventions: compounding is acceptable,
2411             // when compound forms consist of 2 words, or if more,
2412             // then the syllable number of root words must be 6, or lesser.
2413             if ((rv) && (
2414                       (compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
2415                       (compoundend && TESTAFF(rv->astr, compoundend, rv->alen))
2416                     )
2417                 && (
2418                       ((cpdwordmax==-1) || (wordnum+1<cpdwordmax)) ||
2419                       ((cpdmaxsyllable!=0) &&
2420                           (numsyllable+get_syllable(HENTRY_WORD(rv),rv->blen)<=cpdmaxsyllable))
2421                     )
2422                 && (
2423                      (!checkcompounddup || (rv != rv_first))
2424                    )
2425                 )
2426                  {
2427                       // bad compound word
2428                       mystrcat(*result, presult, MAXLNLEN);
2429                       mystrcat(*result, " ", MAXLNLEN);
2430                       mystrcat(*result, MORPH_PART, MAXLNLEN);
2431                       mystrcat(*result, word+i, MAXLNLEN);
2432
2433                       if (HENTRY_DATA(rv)) {
2434                         if (complexprefixes) mystrcat(*result, HENTRY_DATA2(rv), MAXLNLEN);
2435                         if (! HENTRY_FIND(rv, MORPH_STEM)) {
2436                            mystrcat(*result, " ", MAXLNLEN);
2437                            mystrcat(*result, MORPH_STEM, MAXLNLEN);
2438                            mystrcat(*result, HENTRY_WORD(rv), MAXLNLEN);
2439                         }
2440                         // store the pointer of the hash entry
2441 //                        sprintf(*result + strlen(*result), " %s%p", MORPH_HENTRY, rv);
2442                         if (!complexprefixes) {
2443                             mystrcat(*result, " ", MAXLNLEN);
2444                             mystrcat(*result, HENTRY_DATA2(rv), MAXLNLEN);
2445                         }
2446                       }
2447                       mystrcat(*result, "\n", MAXLNLEN);
2448                               ok = 1;
2449             }
2450
2451             numsyllable = oldnumsyllable2 ;
2452             wordnum = oldwordnum2;
2453
2454             // perhaps second word has prefix or/and suffix
2455             sfx = NULL;
2456             sfxflag = FLAG_NULL;
2457
2458             if (compoundflag && !onlycpdrule) rv = affix_check((word+i),strlen(word+i), compoundflag); else rv = NULL;
2459
2460             if (!rv && compoundend && !onlycpdrule) {
2461                 sfx = NULL;
2462                 pfx = NULL;
2463                 rv = affix_check((word+i),strlen(word+i), compoundend);
2464             }
2465
2466             if (!rv && numdefcpd && words) {
2467                 rv = affix_check((word+i),strlen(word+i), 0, IN_CPD_END);
2468                 if (rv && words && defcpd_check(&words, wnum + 1, rv, NULL, 1)) {
2469                       char * m = NULL;
2470                       if (compoundflag) m = affix_check_morph((word+i),strlen(word+i), compoundflag);
2471                       if ((!m || *m == '\0') && compoundend) {
2472                             if (m) free(m);
2473                             m = affix_check_morph((word+i),strlen(word+i), compoundend);
2474                       }
2475                       mystrcat(*result, presult, MAXLNLEN);
2476                       if (m || (*m != '\0')) {
2477                         sprintf(*result + strlen(*result), "%c%s%s%s", MSEP_FLD,
2478                             MORPH_PART, word + i, line_uniq_app(&m, MSEP_REC));
2479                       }
2480                       if (m) free(m);
2481                       mystrcat(*result, "\n", MAXLNLEN);
2482                       ok = 1;
2483                 }
2484             }
2485
2486             // check non_compound flag in suffix and prefix
2487             if ((rv) &&
2488                 ((pfx && pfx->getCont() &&
2489                     TESTAFF(pfx->getCont(), compoundforbidflag,
2490                         pfx->getContLen())) ||
2491                 (sfx && sfx->getCont() &&
2492                     TESTAFF(sfx->getCont(), compoundforbidflag,
2493                         sfx->getContLen())))) {
2494                     rv = NULL;
2495             }
2496
2497             // check forbiddenwords
2498             if ((rv) && (rv->astr) && (TESTAFF(rv->astr,forbiddenword,rv->alen) ||
2499                     TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen))
2500                     && (! TESTAFF(rv->astr, needaffix, rv->alen))) {
2501                         st[i] = ch;
2502                         continue;
2503                     }
2504
2505             if (langnum == LANG_hu) {
2506                 // calculate syllable number of the word
2507                 numsyllable += get_syllable(word + i, strlen(word + i));
2508
2509                 // - affix syllable num.
2510                 // XXX only second suffix (inflections, not derivations)
2511                 if (sfxappnd) {
2512                     char * tmp = myrevstrdup(sfxappnd);
2513                     numsyllable -= get_syllable(tmp, strlen(tmp));
2514                     free(tmp);
2515                 }
2516
2517                 // + 1 word, if syllable number of the prefix > 1 (hungarian convention)
2518                 if (pfx && (get_syllable(pfx->getKey(),strlen(pfx->getKey())) > 1)) wordnum++;
2519
2520                 // increment syllable num, if last word has a SYLLABLENUM flag
2521                 // and the suffix is beginning `s'
2522
2523                 if (cpdsyllablenum) {
2524                     switch (sfxflag) {
2525                         case 'c': { numsyllable+=2; break; }
2526                         case 'J': { numsyllable += 1; break; }
2527                         case 'I': { if (rv && TESTAFF(rv->astr, 'J', rv->alen)) numsyllable += 1; break; }
2528                     }
2529                 }
2530             }
2531
2532             // increment word number, if the second word has a compoundroot flag
2533             if ((rv) && (compoundroot) &&
2534                 (TESTAFF(rv->astr, compoundroot, rv->alen))) {
2535                     wordnum++;
2536             }
2537             // second word is acceptable, as a word with prefix or/and suffix?
2538             // hungarian conventions: compounding is acceptable,
2539             // when compound forms consist 2 word, otherwise
2540             // the syllable number of root words is 6, or lesser.
2541             if ((rv) &&
2542                     (
2543                       ((cpdwordmax==-1) || (wordnum+1<cpdwordmax)) ||
2544                       ((cpdmaxsyllable!=0) &&
2545                           (numsyllable <= cpdmaxsyllable))
2546                     )
2547                 && (
2548                    (!checkcompounddup || (rv != rv_first))
2549                    )) {
2550                       char * m = NULL;
2551                       if (compoundflag) m = affix_check_morph((word+i),strlen(word+i), compoundflag);
2552                       if ((!m || *m == '\0') && compoundend) {
2553                             if (m) free(m);
2554                             m = affix_check_morph((word+i),strlen(word+i), compoundend);
2555                       }
2556                       mystrcat(*result, presult, MAXLNLEN);
2557                       if (m && (*m != '\0')) {
2558                         sprintf(*result + strlen(*result), "%c%s%s%s", MSEP_FLD,
2559                             MORPH_PART, word + i, line_uniq_app(&m, MSEP_REC));
2560                       }
2561                       if (m) free(m);
2562                       sprintf(*result + strlen(*result), "%c", MSEP_REC);
2563                       ok = 1;
2564             }
2565
2566             numsyllable = oldnumsyllable2;
2567             wordnum = oldwordnum2;
2568
2569             // perhaps second word is a compound word (recursive call)
2570             if ((wordnum < maxwordnum) && (ok == 0)) {
2571                         compound_check_morph((word+i),strlen(word+i), wordnum+1,
2572                              numsyllable, maxwordnum, wnum + 1, words, 0, result, presult);
2573             } else {
2574                 rv=NULL;
2575             }
2576         }
2577         st[i] = ch;
2578         wordnum = oldwordnum;
2579         numsyllable = oldnumsyllable;
2580
2581         } while (numdefcpd && oldwordnum == 0 && !onlycpdrule && (onlycpdrule = 1)); // end of onlycpd loop
2582
2583     }
2584     return 0;
2585 }
2586
2587  // return 1 if s1 (reversed) is a leading subset of end of s2
2588 /* inline int AffixMgr::isRevSubset(const char * s1, const char * end_of_s2, int len)
2589  {
2590     while ((len > 0) && *s1 && (*s1 == *end_of_s2)) {
2591         s1++;
2592         end_of_s2--;
2593         len--;
2594     }
2595     return (*s1 == '\0');
2596  }
2597  */
2598
2599 inline int AffixMgr::isRevSubset(const char * s1, const char * end_of_s2, int len)
2600  {
2601     while ((len > 0) && (*s1 != '\0') && ((*s1 == *end_of_s2) || (*s1 == '.'))) {
2602         s1++;
2603         end_of_s2--;
2604         len--;
2605     }
2606     return (*s1 == '\0');
2607  }
2608
2609 // check word for suffixes
2610
2611 struct hentry * AffixMgr::suffix_check (const char * word, int len,
2612        int sfxopts, PfxEntry * ppfx, char ** wlst, int maxSug, int * ns,
2613        const FLAG cclass, const FLAG needflag, char in_compound)
2614 {
2615     struct hentry * rv = NULL;
2616     PfxEntry* ep = ppfx;
2617
2618     // first handle the special case of 0 length suffixes
2619     SfxEntry * se = sStart[0];
2620
2621     while (se) {
2622         if (!cclass || se->getCont()) {
2623             // suffixes are not allowed in beginning of compounds
2624             if ((((in_compound != IN_CPD_BEGIN)) || // && !cclass
2625              // except when signed with compoundpermitflag flag
2626              (se->getCont() && compoundpermitflag &&
2627                 TESTAFF(se->getCont(),compoundpermitflag,se->getContLen()))) && (!circumfix ||
2628               // no circumfix flag in prefix and suffix
2629               ((!ppfx || !(ep->getCont()) || !TESTAFF(ep->getCont(),
2630                    circumfix, ep->getContLen())) &&
2631                (!se->getCont() || !(TESTAFF(se->getCont(),circumfix,se->getContLen())))) ||
2632               // circumfix flag in prefix AND suffix
2633               ((ppfx && (ep->getCont()) && TESTAFF(ep->getCont(),
2634                    circumfix, ep->getContLen())) &&
2635                (se->getCont() && (TESTAFF(se->getCont(),circumfix,se->getContLen())))))  &&
2636             // fogemorpheme
2637               (in_compound ||
2638                  !(se->getCont() && (TESTAFF(se->getCont(), onlyincompound, se->getContLen())))) &&
2639             // needaffix on prefix or first suffix
2640               (cclass ||
2641                    !(se->getCont() && TESTAFF(se->getCont(), needaffix, se->getContLen())) ||
2642                    (ppfx && !((ep->getCont()) &&
2643                      TESTAFF(ep->getCont(), needaffix,
2644                        ep->getContLen())))
2645               )) {
2646                 rv = se->checkword(word,len, sfxopts, ppfx, wlst, maxSug, ns, (FLAG) cclass,
2647                     needflag, (in_compound ? 0 : onlyincompound));
2648                 if (rv) {
2649                     sfx=se; // BUG: sfx not stateless
2650                     return rv;
2651                 }
2652             }
2653         }
2654        se = se->getNext();
2655     }
2656
2657     // now handle the general case
2658     if (len == 0) return NULL; // FULLSTRIP
2659     unsigned char sp= *((const unsigned char *)(word + len - 1));
2660     SfxEntry * sptr = sStart[sp];
2661
2662     while (sptr) {
2663         if (isRevSubset(sptr->getKey(), word + len - 1, len)
2664         ) {
2665             // suffixes are not allowed in beginning of compounds
2666             if ((((in_compound != IN_CPD_BEGIN)) || // && !cclass
2667              // except when signed with compoundpermitflag flag
2668              (sptr->getCont() && compoundpermitflag &&
2669                 TESTAFF(sptr->getCont(),compoundpermitflag,sptr->getContLen()))) && (!circumfix ||
2670               // no circumfix flag in prefix and suffix
2671               ((!ppfx || !(ep->getCont()) || !TESTAFF(ep->getCont(),
2672                    circumfix, ep->getContLen())) &&
2673                (!sptr->getCont() || !(TESTAFF(sptr->getCont(),circumfix,sptr->getContLen())))) ||
2674               // circumfix flag in prefix AND suffix
2675               ((ppfx && (ep->getCont()) && TESTAFF(ep->getCont(),
2676                    circumfix, ep->getContLen())) &&
2677                (sptr->getCont() && (TESTAFF(sptr->getCont(),circumfix,sptr->getContLen())))))  &&
2678             // fogemorpheme
2679               (in_compound ||
2680                  !((sptr->getCont() && (TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))))) &&
2681             // needaffix on prefix or first suffix
2682               (cclass ||
2683                   !(sptr->getCont() && TESTAFF(sptr->getCont(), needaffix, sptr->getContLen())) ||
2684                   (ppfx && !((ep->getCont()) &&
2685                      TESTAFF(ep->getCont(), needaffix,
2686                        ep->getContLen())))
2687               )
2688             ) if (in_compound != IN_CPD_END || ppfx || !(sptr->getCont() && TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))) {
2689                 rv = sptr->checkword(word,len, sfxopts, ppfx, wlst,
2690                     maxSug, ns, cclass, needflag, (in_compound ? 0 : onlyincompound));
2691                 if (rv) {
2692                     sfx=sptr; // BUG: sfx not stateless
2693                     sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless
2694                     if (!sptr->getCont()) sfxappnd=sptr->getKey(); // BUG: sfxappnd not stateless
2695                     return rv;
2696                 }
2697              }
2698              sptr = sptr->getNextEQ();
2699         } else {
2700              sptr = sptr->getNextNE();
2701         }
2702     }
2703
2704     return NULL;
2705 }
2706
2707 // check word for two-level suffixes
2708
2709 struct hentry * AffixMgr::suffix_check_twosfx(const char * word, int len,
2710        int sfxopts, PfxEntry * ppfx, const FLAG needflag)
2711 {
2712     struct hentry * rv = NULL;
2713
2714     // first handle the special case of 0 length suffixes
2715     SfxEntry * se = sStart[0];
2716     while (se) {
2717         if (contclasses[se->getFlag()])
2718         {
2719             rv = se->check_twosfx(word,len, sfxopts, ppfx, needflag);
2720             if (rv) return rv;
2721         }
2722         se = se->getNext();
2723     }
2724
2725     // now handle the general case
2726     if (len == 0) return NULL; // FULLSTRIP
2727     unsigned char sp = *((const unsigned char *)(word + len - 1));
2728     SfxEntry * sptr = sStart[sp];
2729
2730     while (sptr) {
2731         if (isRevSubset(sptr->getKey(), word + len - 1, len)) {
2732             if (contclasses[sptr->getFlag()])
2733             {
2734                 rv = sptr->check_twosfx(word,len, sfxopts, ppfx, needflag);
2735                 if (rv) {
2736                     sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless
2737                     if (!sptr->getCont()) sfxappnd=sptr->getKey(); // BUG: sfxappnd not stateless
2738                     return rv;
2739                 }
2740             }
2741             sptr = sptr->getNextEQ();
2742         } else {
2743              sptr = sptr->getNextNE();
2744         }
2745     }
2746
2747     return NULL;
2748 }
2749
2750 char * AffixMgr::suffix_check_twosfx_morph(const char * word, int len,
2751        int sfxopts, PfxEntry * ppfx, const FLAG needflag)
2752 {
2753     char result[MAXLNLEN];
2754     char result2[MAXLNLEN];
2755     char result3[MAXLNLEN];
2756
2757     char * st;
2758
2759     result[0] = '\0';
2760     result2[0] = '\0';
2761     result3[0] = '\0';
2762
2763     // first handle the special case of 0 length suffixes
2764     SfxEntry * se = sStart[0];
2765     while (se) {
2766         if (contclasses[se->getFlag()])
2767         {
2768             st = se->check_twosfx_morph(word,len, sfxopts, ppfx, needflag);
2769             if (st) {
2770                 if (ppfx) {
2771                     if (ppfx->getMorph()) {
2772                         mystrcat(result, ppfx->getMorph(), MAXLNLEN);
2773                         mystrcat(result, " ", MAXLNLEN);
2774                     } else debugflag(result, ppfx->getFlag());
2775                 }
2776                 mystrcat(result, st, MAXLNLEN);
2777                 free(st);
2778                 if (se->getMorph()) {
2779                     mystrcat(result, " ", MAXLNLEN);
2780                     mystrcat(result, se->getMorph(), MAXLNLEN);
2781                 } else debugflag(result, se->getFlag());
2782                 mystrcat(result, "\n", MAXLNLEN);
2783             }
2784         }
2785         se = se->getNext();
2786     }
2787
2788     // now handle the general case
2789     if (len == 0) return NULL; // FULLSTRIP
2790     unsigned char sp = *((const unsigned char *)(word + len - 1));
2791     SfxEntry * sptr = sStart[sp];
2792
2793     while (sptr) {
2794         if (isRevSubset(sptr->getKey(), word + len - 1, len)) {
2795             if (contclasses[sptr->getFlag()])
2796             {
2797                 st = sptr->check_twosfx_morph(word,len, sfxopts, ppfx, needflag);
2798                 if (st) {
2799                     sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless
2800                     if (!sptr->getCont()) sfxappnd=sptr->getKey(); // BUG: sfxappnd not stateless
2801                     strcpy(result2, st);
2802                     free(st);
2803
2804                 result3[0] = '\0';
2805
2806                 if (sptr->getMorph()) {
2807                     mystrcat(result3, " ", MAXLNLEN);
2808                     mystrcat(result3, sptr->getMorph(), MAXLNLEN);
2809                 } else debugflag(result3, sptr->getFlag());
2810                 strlinecat(result2, result3);
2811                 mystrcat(result2, "\n", MAXLNLEN);
2812                 mystrcat(result,  result2, MAXLNLEN);
2813                 }
2814             }
2815             sptr = sptr->getNextEQ();
2816         } else {
2817              sptr = sptr->getNextNE();
2818         }
2819     }
2820     if (*result) return mystrdup(result);
2821     return NULL;
2822 }
2823
2824 char * AffixMgr::suffix_check_morph(const char * word, int len,
2825        int sfxopts, PfxEntry * ppfx, const FLAG cclass, const FLAG needflag, char in_compound)
2826 {
2827     char result[MAXLNLEN];
2828
2829     struct hentry * rv = NULL;
2830
2831     result[0] = '\0';
2832
2833     PfxEntry* ep = ppfx;
2834
2835     // first handle the special case of 0 length suffixes
2836     SfxEntry * se = sStart[0];
2837     while (se) {
2838         if (!cclass || se->getCont()) {
2839             // suffixes are not allowed in beginning of compounds
2840             if (((((in_compound != IN_CPD_BEGIN)) || // && !cclass
2841              // except when signed with compoundpermitflag flag
2842              (se->getCont() && compoundpermitflag &&
2843                 TESTAFF(se->getCont(),compoundpermitflag,se->getContLen()))) && (!circumfix ||
2844               // no circumfix flag in prefix and suffix
2845               ((!ppfx || !(ep->getCont()) || !TESTAFF(ep->getCont(),
2846                    circumfix, ep->getContLen())) &&
2847                (!se->getCont() || !(TESTAFF(se->getCont(),circumfix,se->getContLen())))) ||
2848               // circumfix flag in prefix AND suffix
2849               ((ppfx && (ep->getCont()) && TESTAFF(ep->getCont(),
2850                    circumfix, ep->getContLen())) &&
2851                (se->getCont() && (TESTAFF(se->getCont(),circumfix,se->getContLen())))))  &&
2852             // fogemorpheme
2853               (in_compound ||
2854                  !((se->getCont() && (TESTAFF(se->getCont(), onlyincompound, se->getContLen()))))) &&
2855             // needaffix on prefix or first suffix
2856               (cclass ||
2857                    !(se->getCont() && TESTAFF(se->getCont(), needaffix, se->getContLen())) ||
2858                    (ppfx && !((ep->getCont()) &&
2859                      TESTAFF(ep->getCont(), needaffix,
2860                        ep->getContLen())))
2861               )
2862             ))
2863             rv = se->checkword(word, len, sfxopts, ppfx, NULL, 0, 0, cclass, needflag);
2864          while (rv) {
2865            if (ppfx) {
2866                 if (ppfx->getMorph()) {
2867                     mystrcat(result, ppfx->getMorph(), MAXLNLEN);
2868                     mystrcat(result, " ", MAXLNLEN);
2869                 } else debugflag(result, ppfx->getFlag());
2870             }
2871             if (complexprefixes && HENTRY_DATA(rv)) mystrcat(result, HENTRY_DATA2(rv), MAXLNLEN);
2872             if (! HENTRY_FIND(rv, MORPH_STEM)) {
2873                 mystrcat(result, " ", MAXLNLEN);
2874                 mystrcat(result, MORPH_STEM, MAXLNLEN);
2875                 mystrcat(result, HENTRY_WORD(rv), MAXLNLEN);
2876             }
2877             // store the pointer of the hash entry
2878 //            sprintf(result + strlen(result), " %s%p", MORPH_HENTRY, rv);
2879
2880             if (!complexprefixes && HENTRY_DATA(rv)) {
2881                     mystrcat(result, " ", MAXLNLEN);
2882                     mystrcat(result, HENTRY_DATA2(rv), MAXLNLEN);
2883             }
2884             if (se->getMorph()) {
2885                 mystrcat(result, " ", MAXLNLEN);
2886                 mystrcat(result, se->getMorph(), MAXLNLEN);
2887             } else debugflag(result, se->getFlag());
2888             mystrcat(result, "\n", MAXLNLEN);
2889             rv = se->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag);
2890          }
2891        }
2892        se = se->getNext();
2893     }
2894
2895     // now handle the general case
2896     if (len == 0) return NULL; // FULLSTRIP
2897     unsigned char sp = *((const unsigned char *)(word + len - 1));
2898     SfxEntry * sptr = sStart[sp];
2899
2900     while (sptr) {
2901         if (isRevSubset(sptr->getKey(), word + len - 1, len)
2902         ) {
2903             // suffixes are not allowed in beginning of compounds
2904             if (((((in_compound != IN_CPD_BEGIN)) || // && !cclass
2905              // except when signed with compoundpermitflag flag
2906              (sptr->getCont() && compoundpermitflag &&
2907                 TESTAFF(sptr->getCont(),compoundpermitflag,sptr->getContLen()))) && (!circumfix ||
2908               // no circumfix flag in prefix and suffix
2909               ((!ppfx || !(ep->getCont()) || !TESTAFF(ep->getCont(),
2910                    circumfix, ep->getContLen())) &&
2911                (!sptr->getCont() || !(TESTAFF(sptr->getCont(),circumfix,sptr->getContLen())))) ||
2912               // circumfix flag in prefix AND suffix
2913               ((ppfx && (ep->getCont()) && TESTAFF(ep->getCont(),
2914                    circumfix, ep->getContLen())) &&
2915                (sptr->getCont() && (TESTAFF(sptr->getCont(),circumfix,sptr->getContLen())))))  &&
2916             // fogemorpheme
2917               (in_compound ||
2918                  !((sptr->getCont() && (TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))))) &&
2919             // needaffix on first suffix
2920               (cclass || !(sptr->getCont() &&
2921                    TESTAFF(sptr->getCont(), needaffix, sptr->getContLen())))
2922             )) rv = sptr->checkword(word,len, sfxopts, ppfx, NULL, 0, 0, cclass, needflag);
2923             while (rv) {
2924                     if (ppfx) {
2925                         if (ppfx->getMorph()) {
2926                             mystrcat(result, ppfx->getMorph(), MAXLNLEN);
2927                             mystrcat(result, " ", MAXLNLEN);
2928                         } else debugflag(result, ppfx->getFlag());
2929                     }
2930                     if (complexprefixes && HENTRY_DATA(rv)) mystrcat(result, HENTRY_DATA2(rv), MAXLNLEN);
2931                     if (! HENTRY_FIND(rv, MORPH_STEM)) {
2932                             mystrcat(result, " ", MAXLNLEN);
2933                             mystrcat(result, MORPH_STEM, MAXLNLEN);
2934                             mystrcat(result, HENTRY_WORD(rv), MAXLNLEN);
2935                     }
2936                     // store the pointer of the hash entry
2937 //                    sprintf(result + strlen(result), " %s%p", MORPH_HENTRY, rv);
2938
2939                     if (!complexprefixes && HENTRY_DATA(rv)) {
2940                         mystrcat(result, " ", MAXLNLEN);
2941                         mystrcat(result, HENTRY_DATA2(rv), MAXLNLEN);
2942                     }
2943
2944                 if (sptr->getMorph()) {
2945                     mystrcat(result, " ", MAXLNLEN);
2946                     mystrcat(result, sptr->getMorph(), MAXLNLEN);
2947                 } else debugflag(result, sptr->getFlag());
2948                 mystrcat(result, "\n", MAXLNLEN);
2949                 rv = sptr->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag);
2950             }
2951              sptr = sptr->getNextEQ();
2952         } else {
2953              sptr = sptr->getNextNE();
2954         }
2955     }
2956
2957     if (*result) return mystrdup(result);
2958     return NULL;
2959 }
2960
2961 // check if word with affixes is correctly spelled
2962 struct hentry * AffixMgr::affix_check (const char * word, int len, const FLAG needflag, char in_compound)
2963 {
2964     struct hentry * rv= NULL;
2965
2966     // check all prefixes (also crossed with suffixes if allowed)
2967     rv = prefix_check(word, len, in_compound, needflag);
2968     if (rv) return rv;
2969
2970     // if still not found check all suffixes
2971     rv = suffix_check(word, len, 0, NULL, NULL, 0, NULL, FLAG_NULL, needflag, in_compound);
2972
2973     if (havecontclass) {
2974         sfx = NULL;
2975         pfx = NULL;
2976
2977         if (rv) return rv;
2978         // if still not found check all two-level suffixes
2979         rv = suffix_check_twosfx(word, len, 0, NULL, needflag);
2980
2981         if (rv) return rv;
2982         // if still not found check all two-level suffixes
2983         rv = prefix_check_twosfx(word, len, IN_CPD_NOT, needflag);
2984     }
2985
2986     return rv;
2987 }
2988
2989 // check if word with affixes is correctly spelled
2990 char * AffixMgr::affix_check_morph(const char * word, int len, const FLAG needflag, char in_compound)
2991 {
2992     char result[MAXLNLEN];
2993     char * st = NULL;
2994
2995     *result = '\0';
2996
2997     // check all prefixes (also crossed with suffixes if allowed)
2998     st = prefix_check_morph(word, len, in_compound);
2999     if (st) {
3000         mystrcat(result, st, MAXLNLEN);
3001         free(st);
3002     }
3003
3004     // if still not found check all suffixes
3005     st = suffix_check_morph(word, len, 0, NULL, '\0', needflag, in_compound);
3006     if (st) {
3007         mystrcat(result, st, MAXLNLEN);
3008         free(st);
3009     }
3010
3011     if (havecontclass) {
3012         sfx = NULL;
3013         pfx = NULL;
3014         // if still not found check all two-level suffixes
3015         st = suffix_check_twosfx_morph(word, len, 0, NULL, needflag);
3016         if (st) {
3017             mystrcat(result, st, MAXLNLEN);
3018             free(st);
3019         }
3020
3021         // if still not found check all two-level suffixes
3022         st = prefix_check_twosfx_morph(word, len, IN_CPD_NOT, needflag);
3023         if (st) {
3024             mystrcat(result, st, MAXLNLEN);
3025             free(st);
3026         }
3027     }
3028
3029     return mystrdup(result);
3030 }
3031
3032 char * AffixMgr::morphgen(char * ts, int wl, const unsigned short * ap,
3033     unsigned short al, char * morph, char * targetmorph, int level)
3034 {
3035     // handle suffixes
3036     char * stemmorph;
3037     char * stemmorphcatpos;
3038     char mymorph[MAXLNLEN];
3039
3040     if (!morph) return NULL;
3041
3042     // check substandard flag
3043     if (TESTAFF(ap, substandard, al)) return NULL;
3044
3045     if (morphcmp(morph, targetmorph) == 0) return mystrdup(ts);
3046
3047 //    int targetcount = get_sfxcount(targetmorph);
3048
3049     // use input suffix fields, if exist
3050     if (strstr(morph, MORPH_INFL_SFX) || strstr(morph, MORPH_DERI_SFX)) {
3051         stemmorph = mymorph;
3052         strcpy(stemmorph, morph);
3053         mystrcat(stemmorph, " ", MAXLNLEN);
3054         stemmorphcatpos = stemmorph + strlen(stemmorph);
3055     } else {
3056         stemmorph = morph;
3057         stemmorphcatpos = NULL;
3058     }
3059
3060     for (int i = 0; i < al; i++) {
3061         const unsigned char c = (unsigned char) (ap[i] & 0x00FF);
3062         SfxEntry * sptr = sFlag[c];
3063         while (sptr) {
3064             if (sptr->getFlag() == ap[i] && sptr->getMorph() && ((sptr->getContLen() == 0) ||
3065                 // don't generate forms with substandard affixes
3066                 !TESTAFF(sptr->getCont(), substandard, sptr->getContLen()))) {
3067
3068                 if (stemmorphcatpos) strcpy(stemmorphcatpos, sptr->getMorph());
3069                 else stemmorph = (char *) sptr->getMorph();
3070
3071                 int cmp = morphcmp(stemmorph, targetmorph);
3072
3073                 if (cmp == 0) {
3074                     char * newword = sptr->add(ts, wl);
3075                     if (newword) {
3076                         hentry * check = pHMgr->lookup(newword); // XXX extra dic
3077                         if (!check || !check->astr ||
3078                             !(TESTAFF(check->astr, forbiddenword, check->alen) ||
3079                               TESTAFF(check->astr, ONLYUPCASEFLAG, check->alen))) {
3080                                 return newword;
3081                         }
3082                         free(newword);
3083                     }
3084                 }
3085
3086                 // recursive call for secondary suffixes
3087                 if ((level == 0) && (cmp == 1) && (sptr->getContLen() > 0) &&
3088 //                    (get_sfxcount(stemmorph) < targetcount) &&
3089                     !TESTAFF(sptr->getCont(), substandard, sptr->getContLen())) {
3090                     char * newword = sptr->add(ts, wl);
3091                     if (newword) {
3092                         char * newword2 = morphgen(newword, strlen(newword), sptr->getCont(),
3093                             sptr->getContLen(), stemmorph, targetmorph, 1);
3094
3095                         if (newword2) {
3096                             free(newword);
3097                             return newword2;
3098                         }
3099                         free(newword);
3100                         newword = NULL;
3101                     }
3102                 }
3103             }
3104             sptr = sptr->getFlgNxt();
3105         }
3106     }
3107    return NULL;
3108 }
3109
3110
3111 int AffixMgr::expand_rootword(struct guessword * wlst, int maxn, const char * ts,
3112     int wl, const unsigned short * ap, unsigned short al, char * bad, int badl,
3113     char * phon)
3114 {
3115     int nh=0;
3116     // first add root word to list
3117     if ((nh < maxn) && !(al && ((needaffix && TESTAFF(ap, needaffix, al)) ||
3118          (onlyincompound && TESTAFF(ap, onlyincompound, al))))) {
3119        wlst[nh].word = mystrdup(ts);
3120        if (!wlst[nh].word) return 0;
3121        wlst[nh].allow = (1 == 0);
3122        wlst[nh].orig = NULL;
3123        nh++;
3124        // add special phonetic version
3125        if (phon && (nh < maxn)) {
3126             wlst[nh].word = mystrdup(phon);
3127             if (!wlst[nh].word) return nh - 1;
3128             wlst[nh].allow = (1 == 0);
3129             wlst[nh].orig = mystrdup(ts);
3130             if (!wlst[nh].orig) return nh - 1;
3131             nh++;
3132        }
3133     }
3134
3135     // handle suffixes
3136     for (int i = 0; i < al; i++) {
3137        const unsigned char c = (unsigned char) (ap[i] & 0x00FF);
3138        SfxEntry * sptr = sFlag[c];
3139        while (sptr) {
3140          if ((sptr->getFlag() == ap[i]) && (!sptr->getKeyLen() || ((badl > sptr->getKeyLen()) &&
3141                 (strcmp(sptr->getAffix(), bad + badl - sptr->getKeyLen()) == 0))) &&
3142                 // check needaffix flag
3143                 !(sptr->getCont() && ((needaffix &&
3144                       TESTAFF(sptr->getCont(), needaffix, sptr->getContLen())) ||
3145                   (circumfix &&
3146                       TESTAFF(sptr->getCont(), circumfix, sptr->getContLen())) ||
3147                   (onlyincompound &&
3148                       TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))))
3149                 ) {
3150             char * newword = sptr->add(ts, wl);
3151             if (newword) {
3152                 if (nh < maxn) {
3153                     wlst[nh].word = newword;
3154                     wlst[nh].allow = sptr->allowCross();
3155                     wlst[nh].orig = NULL;
3156                     nh++;
3157                     // add special phonetic version
3158                     if (phon && (nh < maxn)) {
3159                         char st[MAXWORDUTF8LEN];
3160                         strcpy(st, phon);
3161                         strcat(st, sptr->getKey());
3162                         reverseword(st + strlen(phon));
3163                         wlst[nh].word = mystrdup(st);
3164                         if (!wlst[nh].word) return nh - 1;
3165                         wlst[nh].allow = (1 == 0);
3166                         wlst[nh].orig = mystrdup(newword);
3167                         if (!wlst[nh].orig) return nh - 1;
3168                         nh++;
3169                     }
3170                 } else {
3171                     free(newword);
3172                 }
3173             }
3174          }
3175          sptr = sptr->getFlgNxt();
3176        }
3177     }
3178
3179     int n = nh;
3180
3181     // handle cross products of prefixes and suffixes
3182     for (int j=1;j<n ;j++)
3183        if (wlst[j].allow) {
3184           for (int k = 0; k < al; k++) {
3185              const unsigned char c = (unsigned char) (ap[k] & 0x00FF);
3186              PfxEntry * cptr = pFlag[c];
3187              while (cptr) {
3188                 if ((cptr->getFlag() == ap[k]) && cptr->allowCross() && (!cptr->getKeyLen() || ((badl > cptr->getKeyLen()) &&
3189                         (strncmp(cptr->getKey(), bad, cptr->getKeyLen()) == 0)))) {
3190                     int l1 = strlen(wlst[j].word);
3191                     char * newword = cptr->add(wlst[j].word, l1);
3192                     if (newword) {
3193                        if (nh < maxn) {
3194                           wlst[nh].word = newword;
3195                           wlst[nh].allow = cptr->allowCross();
3196                           wlst[nh].orig = NULL;
3197                           nh++;
3198                        } else {
3199                           free(newword);
3200                        }
3201                     }
3202                 }
3203                 cptr = cptr->getFlgNxt();
3204              }
3205           }
3206        }
3207
3208
3209     // now handle pure prefixes
3210     for (int m = 0; m < al; m ++) {
3211        const unsigned char c = (unsigned char) (ap[m] & 0x00FF);
3212        PfxEntry * ptr = pFlag[c];
3213        while (ptr) {
3214          if ((ptr->getFlag() == ap[m]) && (!ptr->getKeyLen() || ((badl > ptr->getKeyLen()) &&
3215                 (strncmp(ptr->getKey(), bad, ptr->getKeyLen()) == 0))) &&
3216                 // check needaffix flag
3217                 !(ptr->getCont() && ((needaffix &&
3218                       TESTAFF(ptr->getCont(), needaffix, ptr->getContLen())) ||
3219                      (circumfix &&
3220                       TESTAFF(ptr->getCont(), circumfix, ptr->getContLen())) ||
3221                   (onlyincompound &&
3222                       TESTAFF(ptr->getCont(), onlyincompound, ptr->getContLen()))))
3223                 ) {
3224             char * newword = ptr->add(ts, wl);
3225             if (newword) {
3226                 if (nh < maxn) {
3227                     wlst[nh].word = newword;
3228                     wlst[nh].allow = ptr->allowCross();
3229                     wlst[nh].orig = NULL;
3230                     nh++;
3231                 } else {
3232                     free(newword);
3233                 }
3234             }
3235          }
3236          ptr = ptr->getFlgNxt();
3237        }
3238     }
3239
3240     return nh;
3241 }
3242
3243 // return length of replacing table
3244 int AffixMgr::get_numrep() const
3245 {
3246   return numrep;
3247 }
3248
3249 // return replacing table
3250 struct replentry * AffixMgr::get_reptable() const
3251 {
3252   if (! reptable ) return NULL;
3253   return reptable;
3254 }
3255
3256 // return iconv table
3257 RepList * AffixMgr::get_iconvtable() const
3258 {
3259   if (! iconvtable ) return NULL;
3260   return iconvtable;
3261 }
3262
3263 // return oconv table
3264 RepList * AffixMgr::get_oconvtable() const
3265 {
3266   if (! oconvtable ) return NULL;
3267   return oconvtable;
3268 }
3269
3270 // return replacing table
3271 struct phonetable * AffixMgr::get_phonetable() const
3272 {
3273   if (! phone ) return NULL;
3274   return phone;
3275 }
3276
3277 // return length of character map table
3278 int AffixMgr::get_nummap() const
3279 {
3280   return nummap;
3281 }
3282
3283 // return character map table
3284 struct mapentry * AffixMgr::get_maptable() const
3285 {
3286   if (! maptable ) return NULL;
3287   return maptable;
3288 }
3289
3290 // return length of word break table
3291 int AffixMgr::get_numbreak() const
3292 {
3293   return numbreak;
3294 }
3295
3296 // return character map table
3297 char ** AffixMgr::get_breaktable() const
3298 {
3299   if (! breaktable ) return NULL;
3300   return breaktable;
3301 }
3302
3303 // return text encoding of dictionary
3304 char * AffixMgr::get_encoding()
3305 {
3306   if (! encoding ) encoding = mystrdup(SPELL_ENCODING);
3307   return mystrdup(encoding);
3308 }
3309
3310 // return text encoding of dictionary
3311 int AffixMgr::get_langnum() const
3312 {
3313   return langnum;
3314 }
3315
3316 // return double prefix option
3317 int AffixMgr::get_complexprefixes() const
3318 {
3319   return complexprefixes;
3320 }
3321
3322 // return FULLSTRIP option
3323 int AffixMgr::get_fullstrip() const
3324 {
3325   return fullstrip;
3326 }
3327
3328 FLAG AffixMgr::get_keepcase() const
3329 {
3330   return keepcase;
3331 }
3332
3333 FLAG AffixMgr::get_forceucase() const
3334 {
3335   return forceucase;
3336 }
3337
3338 FLAG AffixMgr::get_warn() const
3339 {
3340   return warn;
3341 }
3342
3343 int AffixMgr::get_forbidwarn() const
3344 {
3345   return forbidwarn;
3346 }
3347
3348 int AffixMgr::get_checksharps() const
3349 {
3350   return checksharps;
3351 }
3352
3353 char * AffixMgr::encode_flag(unsigned short aflag) const
3354 {
3355   return pHMgr->encode_flag(aflag);
3356 }
3357
3358
3359 // return the preferred ignore string for suggestions
3360 char * AffixMgr::get_ignore() const
3361 {
3362   if (!ignorechars) return NULL;
3363   return ignorechars;
3364 }
3365
3366 // return the preferred ignore string for suggestions
3367 unsigned short * AffixMgr::get_ignore_utf16(int * len) const
3368 {
3369   *len = ignorechars_utf16_len;
3370   return ignorechars_utf16;
3371 }
3372
3373 // return the keyboard string for suggestions
3374 char * AffixMgr::get_key_string()
3375 {
3376   if (! keystring ) keystring = mystrdup(SPELL_KEYSTRING);
3377   return mystrdup(keystring);
3378 }
3379
3380 // return the preferred try string for suggestions
3381 char * AffixMgr::get_try_string() const
3382 {
3383   if (! trystring ) return NULL;
3384   return mystrdup(trystring);
3385 }
3386
3387 // return the preferred try string for suggestions
3388 const char * AffixMgr::get_wordchars() const
3389 {
3390   return wordchars;
3391 }
3392
3393 unsigned short * AffixMgr::get_wordchars_utf16(int * len) const
3394 {
3395   *len = wordchars_utf16_len;
3396   return wordchars_utf16;
3397 }
3398
3399 // is there compounding?
3400 int AffixMgr::get_compound() const
3401 {
3402   return compoundflag || compoundbegin || numdefcpd;
3403 }
3404
3405 // return the compound words control flag
3406 FLAG AffixMgr::get_compoundflag() const
3407 {
3408   return compoundflag;
3409 }
3410
3411 // return the forbidden words control flag
3412 FLAG AffixMgr::get_forbiddenword() const
3413 {
3414   return forbiddenword;
3415 }
3416
3417 // return the forbidden words control flag
3418 FLAG AffixMgr::get_nosuggest() const
3419 {
3420   return nosuggest;
3421 }
3422
3423 // return the forbidden words control flag
3424 FLAG AffixMgr::get_nongramsuggest() const
3425 {
3426   return nongramsuggest;
3427 }
3428
3429 // return the forbidden words flag modify flag
3430 FLAG AffixMgr::get_needaffix() const
3431 {
3432   return needaffix;
3433 }
3434
3435 // return the onlyincompound flag
3436 FLAG AffixMgr::get_onlyincompound() const
3437 {
3438   return onlyincompound;
3439 }
3440
3441 // return the compound word signal flag
3442 FLAG AffixMgr::get_compoundroot() const
3443 {
3444   return compoundroot;
3445 }
3446
3447 // return the compound begin signal flag
3448 FLAG AffixMgr::get_compoundbegin() const
3449 {
3450   return compoundbegin;
3451 }
3452
3453 // return the value of checknum
3454 int AffixMgr::get_checknum() const
3455 {
3456   return checknum;
3457 }
3458
3459 // return the value of prefix
3460 const char * AffixMgr::get_prefix() const
3461 {
3462   if (pfx) return pfx->getKey();
3463   return NULL;
3464 }
3465
3466 // return the value of suffix
3467 const char * AffixMgr::get_suffix() const
3468 {
3469   return sfxappnd;
3470 }
3471
3472 // return the value of suffix
3473 const char * AffixMgr::get_version() const
3474 {
3475   return version;
3476 }
3477
3478 // return lemma_present flag
3479 FLAG AffixMgr::get_lemma_present() const
3480 {
3481   return lemma_present;
3482 }
3483
3484 // utility method to look up root words in hash table
3485 struct hentry * AffixMgr::lookup(const char * word)
3486 {
3487   int i;
3488   struct hentry * he = NULL;
3489   for (i = 0; i < *maxdic && !he; i++) {
3490     he = (alldic[i])->lookup(word);
3491   }
3492   return he;
3493 }
3494
3495 // return the value of suffix
3496 int AffixMgr::have_contclass() const
3497 {
3498   return havecontclass;
3499 }
3500
3501 // return utf8
3502 int AffixMgr::get_utf8() const
3503 {
3504   return utf8;
3505 }
3506
3507 int AffixMgr::get_maxngramsugs(void) const
3508 {
3509   return maxngramsugs;
3510 }
3511
3512 int AffixMgr::get_maxcpdsugs(void) const
3513 {
3514   return maxcpdsugs;
3515 }
3516
3517 int AffixMgr::get_maxdiff(void) const
3518 {
3519   return maxdiff;
3520 }
3521
3522 int AffixMgr::get_onlymaxdiff(void) const
3523 {
3524   return onlymaxdiff;
3525 }
3526
3527 // return nosplitsugs
3528 int AffixMgr::get_nosplitsugs(void) const
3529 {
3530   return nosplitsugs;
3531 }
3532
3533 // return sugswithdots
3534 int AffixMgr::get_sugswithdots(void) const
3535 {
3536   return sugswithdots;
3537 }
3538
3539 /* parse flag */
3540 int AffixMgr::parse_flag(char * line, unsigned short * out, FileMgr * af) {
3541    char * s = NULL;
3542    if (*out != FLAG_NULL && !(*out >= DEFAULTFLAGS)) {
3543       HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions of an affix file parameter\n", af->getlinenum());
3544       return 1;
3545    }
3546    if (parse_string(line, &s, af->getlinenum())) return 1;
3547    *out = pHMgr->decode_flag(s);
3548    free(s);
3549    return 0;
3550 }
3551
3552 /* parse num */
3553 int AffixMgr::parse_num(char * line, int * out, FileMgr * af) {
3554    char * s = NULL;
3555    if (*out != -1) {
3556       HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions of an affix file parameter\n", af->getlinenum());
3557       return 1;
3558    }
3559    if (parse_string(line, &s, af->getlinenum())) return 1;
3560    *out = atoi(s);
3561    free(s);
3562    return 0;
3563 }
3564
3565 /* parse in the max syllablecount of compound words and  */
3566 int  AffixMgr::parse_cpdsyllable(char * line, FileMgr * af)
3567 {
3568    char * tp = line;
3569    char * piece;
3570    int i = 0;
3571    int np = 0;
3572    w_char w[MAXWORDLEN];
3573    piece = mystrsep(&tp, 0);
3574    while (piece) {
3575       if (*piece != '\0') {
3576           switch(i) {
3577              case 0: { np++; break; }
3578              case 1: { cpdmaxsyllable = atoi(piece); np++; break; }
3579              case 2: {
3580                 if (!utf8) {
3581                     cpdvowels = mystrdup(piece);
3582                 } else {
3583                     int n = u8_u16(w, MAXWORDLEN, piece);
3584                     if (n > 0) {
3585                         flag_qsort((unsigned short *) w, 0, n);
3586                         cpdvowels_utf16 = (w_char *) malloc(n * sizeof(w_char));
3587                         if (!cpdvowels_utf16) return 1;
3588                         memcpy(cpdvowels_utf16, w, n * sizeof(w_char));
3589                     }
3590                     cpdvowels_utf16_len = n;
3591                 }
3592                 np++;
3593                 break;
3594              }
3595              default: break;
3596           }
3597           i++;
3598       }
3599       piece = mystrsep(&tp, 0);
3600    }
3601    if (np < 2) {
3602       HUNSPELL_WARNING(stderr, "error: line %d: missing compoundsyllable information\n", af->getlinenum());
3603       return 1;
3604    }
3605    if (np == 2) cpdvowels = mystrdup("aeiouAEIOU");
3606    return 0;
3607 }
3608
3609 /* parse in the typical fault correcting table */
3610 int  AffixMgr::parse_reptable(char * line, FileMgr * af)
3611 {
3612    if (numrep != 0) {
3613       HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
3614       return 1;
3615    }
3616    char * tp = line;
3617    char * piece;
3618    int i = 0;
3619    int np = 0;
3620    piece = mystrsep(&tp, 0);
3621    while (piece) {
3622        if (*piece != '\0') {
3623           switch(i) {
3624              case 0: { np++; break; }
3625              case 1: {
3626                        numrep = atoi(piece);
3627                        if (numrep < 1) {
3628                           HUNSPELL_WARNING(stderr, "error: line %d: incorrect entry number\n", af->getlinenum());
3629                           return 1;
3630                        }
3631                        reptable = (replentry *) malloc(numrep * sizeof(struct replentry));
3632                        if (!reptable) return 1;
3633                        np++;
3634                        break;
3635                      }
3636              default: break;
3637           }
3638           i++;
3639        }
3640        piece = mystrsep(&tp, 0);
3641    }
3642    if (np != 2) {
3643       HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
3644       return 1;
3645    }
3646
3647    /* now parse the numrep lines to read in the remainder of the table */
3648    char * nl;
3649    for (int j=0; j < numrep; j++) {
3650         if ((nl = af->getline()) == NULL) return 1;
3651         mychomp(nl);
3652         tp = nl;
3653         i = 0;
3654         reptable[j].pattern = NULL;
3655         reptable[j].pattern2 = NULL;
3656         piece = mystrsep(&tp, 0);
3657         while (piece) {
3658            if (*piece != '\0') {
3659                switch(i) {
3660                   case 0: {
3661                              if (strncmp(piece,"REP",3) != 0) {
3662                                  HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
3663                                  numrep = 0;
3664                                  return 1;
3665                              }
3666                              break;
3667                           }
3668                   case 1: {
3669                             if (*piece == '^') reptable[j].start = true; else reptable[j].start = false;
3670                             reptable[j].pattern = mystrrep(mystrdup(piece + int(reptable[j].start)),"_"," ");
3671                             int lr = strlen(reptable[j].pattern) - 1;
3672                             if (reptable[j].pattern[lr] == '$') {
3673                                 reptable[j].end = true;
3674                                 reptable[j].pattern[lr] = '\0';
3675                             } else reptable[j].end = false;
3676                             break;
3677                           }
3678                   case 2: { reptable[j].pattern2 = mystrrep(mystrdup(piece),"_"," "); break; }
3679                   default: break;
3680                }
3681                i++;
3682            }
3683            piece = mystrsep(&tp, 0);
3684         }
3685         if ((!(reptable[j].pattern)) || (!(reptable[j].pattern2))) {
3686              HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
3687              numrep = 0;
3688              return 1;
3689         }
3690    }
3691    return 0;
3692 }
3693
3694 /* parse in the typical fault correcting table */
3695 int  AffixMgr::parse_convtable(char * line, FileMgr * af, RepList ** rl, const char * keyword)
3696 {
3697    if (*rl) {
3698       HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
3699       return 1;
3700    }
3701    char * tp = line;
3702    char * piece;
3703    int i = 0;
3704    int np = 0;
3705    int numrl = 0;
3706    piece = mystrsep(&tp, 0);
3707    while (piece) {
3708        if (*piece != '\0') {
3709           switch(i) {
3710              case 0: { np++; break; }
3711              case 1: {
3712                        numrl = atoi(piece);
3713                        if (numrl < 1) {
3714                           HUNSPELL_WARNING(stderr, "error: line %d: incorrect entry number\n", af->getlinenum());
3715                           return 1;
3716                        }
3717                        *rl = new RepList(numrl);
3718                        if (!*rl) return 1;
3719                        np++;
3720                        break;
3721                      }
3722              default: break;
3723           }
3724           i++;
3725        }
3726        piece = mystrsep(&tp, 0);
3727    }
3728    if (np != 2) {
3729       HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
3730       return 1;
3731    }
3732
3733    /* now parse the num lines to read in the remainder of the table */
3734    char * nl;
3735    for (int j=0; j < numrl; j++) {
3736         if (!(nl = af->getline())) return 1;
3737         mychomp(nl);
3738         tp = nl;
3739         i = 0;
3740         char * pattern = NULL;
3741         char * pattern2 = NULL;
3742         piece = mystrsep(&tp, 0);
3743         while (piece) {
3744            if (*piece != '\0') {
3745                switch(i) {
3746                   case 0: {
3747                              if (strncmp(piece, keyword, strlen(keyword)) != 0) {
3748                                  HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
3749                                  delete *rl;
3750                                  *rl = NULL;
3751                                  return 1;
3752                              }
3753                              break;
3754                           }
3755                   case 1: { pattern = mystrrep(mystrdup(piece),"_"," "); break; }
3756                   case 2: {
3757                     pattern2 = mystrrep(mystrdup(piece),"_"," ");
3758                     break;
3759                   }
3760                   default: break;
3761                }
3762                i++;
3763            }
3764            piece = mystrsep(&tp, 0);
3765         }
3766         if (!pattern || !pattern2) {
3767             if (pattern)
3768                 free(pattern);
3769             if (pattern2)
3770                 free(pattern2);
3771             HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
3772             return 1;
3773         }
3774         (*rl)->add(pattern, pattern2);
3775    }
3776    return 0;
3777 }
3778
3779
3780 /* parse in the typical fault correcting table */
3781 int  AffixMgr::parse_phonetable(char * line, FileMgr * af)
3782 {
3783    if (phone) {
3784       HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
3785       return 1;
3786    }
3787    char * tp = line;
3788    char * piece;
3789    int i = 0;
3790    int np = 0;
3791    piece = mystrsep(&tp, 0);
3792    while (piece) {
3793        if (*piece != '\0') {
3794           switch(i) {
3795              case 0: { np++; break; }
3796              case 1: {
3797                        phone = (phonetable *) malloc(sizeof(struct phonetable));
3798                        if (!phone) return 1;
3799                        phone->num = atoi(piece);
3800                        phone->rules = NULL;
3801                        phone->utf8 = (char) utf8;
3802                        if (phone->num < 1) {
3803                           HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum());
3804                           return 1;
3805                        }
3806                        phone->rules = (char * *) malloc(2 * (phone->num + 1) * sizeof(char *));
3807                        if (!phone->rules) {
3808                           free(phone);
3809                           phone = NULL;
3810                           return 1;
3811                        }
3812                        np++;
3813                        break;
3814                      }
3815              default: break;
3816           }
3817           i++;
3818        }
3819        piece = mystrsep(&tp, 0);
3820    }
3821    if (np != 2) {
3822       HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
3823       return 1;
3824    }
3825
3826    /* now parse the phone->num lines to read in the remainder of the table */
3827    char * nl;
3828    for (int j=0; j < phone->num; j++) {
3829         if (!(nl = af->getline())) return 1;
3830         mychomp(nl);
3831         tp = nl;
3832         i = 0;
3833         phone->rules[j * 2] = NULL;
3834         phone->rules[j * 2 + 1] = NULL;
3835         piece = mystrsep(&tp, 0);
3836         while (piece) {
3837            if (*piece != '\0') {
3838                switch(i) {
3839                   case 0: {
3840                              if (strncmp(piece,"PHONE",5) != 0) {
3841                                  HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
3842                                  phone->num = 0;
3843                                  return 1;
3844                              }
3845                              break;
3846                           }
3847                   case 1: { phone->rules[j * 2] = mystrrep(mystrdup(piece),"_",""); break; }
3848                   case 2: { phone->rules[j * 2 + 1] = mystrrep(mystrdup(piece),"_",""); break; }
3849                   default: break;
3850                }
3851                i++;
3852            }
3853            piece = mystrsep(&tp, 0);
3854         }
3855         if ((!(phone->rules[j * 2])) || (!(phone->rules[j * 2 + 1]))) {
3856              HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
3857              phone->num = 0;
3858              return 1;
3859         }
3860    }
3861    phone->rules[phone->num * 2] = mystrdup("");
3862    phone->rules[phone->num * 2 + 1] = mystrdup("");
3863    init_phonet_hash(*phone);
3864    return 0;
3865 }
3866
3867 /* parse in the checkcompoundpattern table */
3868 int  AffixMgr::parse_checkcpdtable(char * line, FileMgr * af)
3869 {
3870    if (numcheckcpd != 0) {
3871       HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
3872       return 1;
3873    }
3874    char * tp = line;
3875    char * piece;
3876    int i = 0;
3877    int np = 0;
3878    piece = mystrsep(&tp, 0);
3879    while (piece) {
3880        if (*piece != '\0') {
3881           switch(i) {
3882              case 0: { np++; break; }
3883              case 1: {
3884                        numcheckcpd = atoi(piece);
3885                        if (numcheckcpd < 1) {
3886                           HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum());
3887                           return 1;
3888                        }
3889                        checkcpdtable = (patentry *) malloc(numcheckcpd * sizeof(struct patentry));
3890                        if (!checkcpdtable) return 1;
3891                        np++;
3892                        break;
3893                      }
3894              default: break;
3895           }
3896           i++;
3897        }
3898        piece = mystrsep(&tp, 0);
3899    }
3900    if (np != 2) {
3901       HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",  af->getlinenum());
3902       return 1;
3903    }
3904
3905    /* now parse the numcheckcpd lines to read in the remainder of the table */
3906    char * nl;
3907    for (int j=0; j < numcheckcpd; j++) {
3908         if (!(nl = af->getline())) return 1;
3909         mychomp(nl);
3910         tp = nl;
3911         i = 0;
3912         checkcpdtable[j].pattern = NULL;
3913         checkcpdtable[j].pattern2 = NULL;
3914         checkcpdtable[j].pattern3 = NULL;
3915         checkcpdtable[j].cond = FLAG_NULL;
3916         checkcpdtable[j].cond2 = FLAG_NULL;
3917         piece = mystrsep(&tp, 0);
3918         while (piece) {
3919            if (*piece != '\0') {
3920                switch(i) {
3921                   case 0: {
3922                              if (strncmp(piece,"CHECKCOMPOUNDPATTERN",20) != 0) {
3923                                  HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
3924                                  numcheckcpd = 0;
3925                                  return 1;
3926                              }
3927                              break;
3928                           }
3929                   case 1: {
3930                     checkcpdtable[j].pattern = mystrdup(piece);
3931                     char * p = strchr(checkcpdtable[j].pattern, '/');
3932                     if (p) {
3933                       *p = '\0';
3934                     checkcpdtable[j].cond = pHMgr->decode_flag(p + 1);
3935                     }
3936                     break; }
3937                   case 2: {
3938                     checkcpdtable[j].pattern2 = mystrdup(piece);
3939                     char * p = strchr(checkcpdtable[j].pattern2, '/');
3940                     if (p) {
3941                       *p = '\0';
3942                       checkcpdtable[j].cond2 = pHMgr->decode_flag(p + 1);
3943                     }
3944                     break;
3945                     }
3946                   case 3: { checkcpdtable[j].pattern3 = mystrdup(piece); simplifiedcpd = 1; break; }
3947                   default: break;
3948                }
3949                i++;
3950            }
3951            piece = mystrsep(&tp, 0);
3952         }
3953         if ((!(checkcpdtable[j].pattern)) || (!(checkcpdtable[j].pattern2))) {
3954              HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
3955              numcheckcpd = 0;
3956              return 1;
3957         }
3958    }
3959    return 0;
3960 }
3961
3962 /* parse in the compound rule table */
3963 int  AffixMgr::parse_defcpdtable(char * line, FileMgr * af)
3964 {
3965    if (numdefcpd != 0) {
3966       HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
3967       return 1;
3968    }
3969    char * tp = line;
3970    char * piece;
3971    int i = 0;
3972    int np = 0;
3973    piece = mystrsep(&tp, 0);
3974    while (piece) {
3975        if (*piece != '\0') {
3976           switch(i) {
3977              case 0: { np++; break; }
3978              case 1: {
3979                        numdefcpd = atoi(piece);
3980                        if (numdefcpd < 1) {
3981                           HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum());
3982                           return 1;
3983                        }
3984                        defcpdtable = (flagentry *) malloc(numdefcpd * sizeof(flagentry));
3985                        if (!defcpdtable) return 1;
3986                        np++;
3987                        break;
3988                      }
3989              default: break;
3990           }
3991           i++;
3992        }
3993        piece = mystrsep(&tp, 0);
3994    }
3995    if (np != 2) {
3996       HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
3997       return 1;
3998    }
3999
4000    /* now parse the numdefcpd lines to read in the remainder of the table */
4001    char * nl;
4002    for (int j=0; j < numdefcpd; j++) {
4003         if (!(nl = af->getline())) return 1;
4004         mychomp(nl);
4005         tp = nl;
4006         i = 0;
4007         defcpdtable[j].def = NULL;
4008         piece = mystrsep(&tp, 0);
4009         while (piece) {
4010            if (*piece != '\0') {
4011                switch(i) {
4012                   case 0: {
4013                              if (strncmp(piece, "COMPOUNDRULE", 12) != 0) {
4014                                  HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
4015                                  numdefcpd = 0;
4016                                  return 1;
4017                              }
4018                              break;
4019                           }
4020                   case 1: { // handle parenthesized flags
4021                             if (strchr(piece, '(')) {
4022                                 defcpdtable[j].def = (FLAG *) malloc(strlen(piece) * sizeof(FLAG));
4023                                 defcpdtable[j].len = 0;
4024                                 int end = 0;
4025                                 FLAG * conv;
4026                                 while (!end) {
4027                                     char * par = piece + 1;
4028                                     while (*par != '(' && *par != ')' && *par != '\0') par++;
4029                                     if (*par == '\0') end = 1; else *par = '\0';
4030                                     if (*piece == '(') piece++;
4031                                     if (*piece == '*' || *piece == '?') {
4032                                         defcpdtable[j].def[defcpdtable[j].len++] = (FLAG) *piece;
4033                                     } else if (*piece != '\0') {
4034                                         int l = pHMgr->decode_flags(&conv, piece, af);
4035                                         for (int k = 0; k < l; k++) defcpdtable[j].def[defcpdtable[j].len++] = conv[k];
4036                                         free(conv);
4037                                     }
4038                                     piece = par + 1;
4039                                 }
4040                             } else {
4041                                 defcpdtable[j].len = pHMgr->decode_flags(&(defcpdtable[j].def), piece, af);
4042                             }
4043                             break;
4044                            }
4045                   default: break;
4046                }
4047                i++;
4048            }
4049            piece = mystrsep(&tp, 0);
4050         }
4051         if (!defcpdtable[j].len) {
4052              HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
4053              numdefcpd = 0;
4054              return 1;
4055         }
4056    }
4057    return 0;
4058 }
4059
4060
4061 /* parse in the character map table */
4062 int  AffixMgr::parse_maptable(char * line, FileMgr * af)
4063 {
4064    if (nummap != 0) {
4065       HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
4066       return 1;
4067    }
4068    char * tp = line;
4069    char * piece;
4070    int i = 0;
4071    int np = 0;
4072    piece = mystrsep(&tp, 0);
4073    while (piece) {
4074        if (*piece != '\0') {
4075           switch(i) {
4076              case 0: { np++; break; }
4077              case 1: {
4078                        nummap = atoi(piece);
4079                        if (nummap < 1) {
4080                           HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum());
4081                           return 1;
4082                        }
4083                        maptable = (mapentry *) malloc(nummap * sizeof(struct mapentry));
4084                        if (!maptable) return 1;
4085                        np++;
4086                        break;
4087                      }
4088              default: break;
4089           }
4090           i++;
4091        }
4092        piece = mystrsep(&tp, 0);
4093    }
4094    if (np != 2) {
4095       HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
4096       return 1;
4097    }
4098
4099    /* now parse the nummap lines to read in the remainder of the table */
4100    char * nl;
4101    for (int j=0; j < nummap; j++) {
4102         if (!(nl = af->getline())) return 1;
4103         mychomp(nl);
4104         tp = nl;
4105         i = 0;
4106         maptable[j].set = NULL;
4107         maptable[j].len = 0;
4108         piece = mystrsep(&tp, 0);
4109         while (piece) {
4110            if (*piece != '\0') {
4111                switch(i) {
4112                   case 0: {
4113                              if (strncmp(piece,"MAP",3) != 0) {
4114                                  HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
4115                                  nummap = 0;
4116                                  return 1;
4117                              }
4118                              break;
4119                           }
4120                   case 1: {
4121                             int setn = 0;
4122                             maptable[j].len = strlen(piece);
4123                             maptable[j].set = (char **) malloc(maptable[j].len * sizeof(char*));
4124                             if (!maptable[j].set) return 1;
4125                             for (int k = 0; k < maptable[j].len; k++) {
4126                                 int chl = 1;
4127                                 int chb = k;
4128                                 if (piece[k] == '(') {
4129                                     char * parpos = strchr(piece + k, ')');
4130                                     if (parpos != NULL) {
4131                                         chb = k + 1;
4132                                         chl = (int)(parpos - piece) - k - 1;
4133                                         k = k + chl + 1;
4134                                     }
4135                                 } else {
4136                                     if (utf8 && (piece[k] & 0xc0) == 0xc0) {
4137                                         for (k++; utf8 && (piece[k] & 0xc0) == 0x80; k++);
4138                                         chl = k - chb;
4139                                         k--;
4140                                     }
4141                                 }
4142                                 maptable[j].set[setn] = (char *) malloc(chl + 1);
4143                                 if (!maptable[j].set[setn]) return 1;
4144                                 strncpy(maptable[j].set[setn], piece + chb, chl);
4145                                 maptable[j].set[setn][chl] = '\0';
4146                                 setn++;
4147                             }
4148                             maptable[j].len = setn;
4149                             break; }
4150                   default: break;
4151                }
4152                i++;
4153            }
4154            piece = mystrsep(&tp, 0);
4155         }
4156         if (!maptable[j].set || !maptable[j].len) {
4157              HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
4158              nummap = 0;
4159              return 1;
4160         }
4161    }
4162    return 0;
4163 }
4164
4165 /* parse in the word breakpoint table */
4166 int  AffixMgr::parse_breaktable(char * line, FileMgr * af)
4167 {
4168    if (numbreak > -1) {
4169       HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
4170       return 1;
4171    }
4172    char * tp = line;
4173    char * piece;
4174    int i = 0;
4175    int np = 0;
4176    piece = mystrsep(&tp, 0);
4177    while (piece) {
4178        if (*piece != '\0') {
4179           switch(i) {
4180              case 0: { np++; break; }
4181              case 1: {
4182                        numbreak = atoi(piece);
4183                        if (numbreak < 0) {
4184                           HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum());
4185                           return 1;
4186                        }
4187                        if (numbreak == 0) return 0;
4188                        breaktable = (char **) malloc(numbreak * sizeof(char *));
4189                        if (!breaktable) return 1;
4190                        np++;
4191                        break;
4192                      }
4193              default: break;
4194           }
4195           i++;
4196        }
4197        piece = mystrsep(&tp, 0);
4198    }
4199    if (np != 2) {
4200       HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
4201       return 1;
4202    }
4203
4204    /* now parse the numbreak lines to read in the remainder of the table */
4205    char * nl;
4206    for (int j=0; j < numbreak; j++) {
4207         if (!(nl = af->getline())) return 1;
4208         mychomp(nl);
4209         tp = nl;
4210         i = 0;
4211         piece = mystrsep(&tp, 0);
4212         while (piece) {
4213            if (*piece != '\0') {
4214                switch(i) {
4215                   case 0: {
4216                              if (strncmp(piece,"BREAK",5) != 0) {
4217                                  HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
4218                                  numbreak = 0;
4219                                  return 1;
4220                              }
4221                              break;
4222                           }
4223                   case 1: {
4224                             breaktable[j] = mystrdup(piece);
4225                             break;
4226                           }
4227                   default: break;
4228                }
4229                i++;
4230            }
4231            piece = mystrsep(&tp, 0);
4232         }
4233         if (!breaktable) {
4234              HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
4235              numbreak = 0;
4236              return 1;
4237         }
4238    }
4239    return 0;
4240 }
4241
4242 void AffixMgr::reverse_condition(char * piece) {
4243     int neg = 0;
4244     for (char * k = piece + strlen(piece) - 1; k >= piece; k--) {
4245         switch(*k) {
4246           case '[': {
4247                 if (neg) *(k+1) = '['; else *k = ']';
4248                     break;
4249             }
4250           case ']': {
4251                 *k = '[';
4252                 if (neg) *(k+1) = '^';
4253                 neg = 0;
4254                 break;
4255             }
4256           case '^': {
4257                if (*(k+1) == ']') neg = 1; else *(k+1) = *k;
4258                break;
4259                 }
4260           default: {
4261             if (neg) *(k+1) = *k;
4262           }
4263        }
4264     }
4265 }
4266
4267 int  AffixMgr::parse_affix(char * line, const char at, FileMgr * af, char * dupflags)
4268 {
4269    int numents = 0;      // number of affentry structures to parse
4270
4271    unsigned short aflag = 0;      // affix char identifier
4272
4273    char ff=0;
4274    std::vector<affentry> affentries;
4275
4276    char * tp = line;
4277    char * nl = line;
4278    char * piece;
4279    int i = 0;
4280
4281    // checking lines with bad syntax
4282 #ifdef DEBUG
4283    int basefieldnum = 0;
4284 #endif
4285
4286    // split affix header line into pieces
4287
4288    int np = 0;
4289
4290    piece = mystrsep(&tp, 0);
4291    while (piece) {
4292       if (*piece != '\0') {
4293           switch(i) {
4294              // piece 1 - is type of affix
4295              case 0: { np++; break; }
4296
4297              // piece 2 - is affix char
4298              case 1: {
4299                     np++;
4300                     aflag = pHMgr->decode_flag(piece);
4301 #ifndef HUNSPELL_CHROME_CLIENT // We don't check for duplicates.
4302                     if (((at == 'S') && (dupflags[aflag] & dupSFX)) ||
4303                         ((at == 'P') && (dupflags[aflag] & dupPFX))) {
4304                         HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions of an affix flag\n",
4305                             af->getlinenum());
4306                         // return 1; XXX permissive mode for bad dictionaries
4307                     }
4308                     dupflags[aflag] += (char) ((at == 'S') ? dupSFX : dupPFX);
4309 #endif
4310                     break;
4311                     }
4312              // piece 3 - is cross product indicator
4313              case 2: { np++; if (*piece == 'Y') ff = aeXPRODUCT; break; }
4314
4315              // piece 4 - is number of affentries
4316              case 3: {
4317                        np++;
4318                        numents = atoi(piece);
4319                        if (numents == 0) {
4320                            char * err = pHMgr->encode_flag(aflag);
4321                            if (err) {
4322                                 HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n",
4323                                    af->getlinenum());
4324                                 free(err);
4325                            }
4326                            return 1;
4327                        }
4328                        affentries.resize(numents);
4329                        affentries[0].opts = ff;
4330                        if (utf8) affentries[0].opts += aeUTF8;
4331                        if (pHMgr->is_aliasf()) affentries[0].opts += aeALIASF;
4332                        if (pHMgr->is_aliasm()) affentries[0].opts += aeALIASM;
4333                        affentries[0].aflag = aflag;
4334                      }
4335
4336              default: break;
4337           }
4338           i++;
4339       }
4340       piece = mystrsep(&tp, 0);
4341    }
4342    // check to make sure we parsed enough pieces
4343    if (np != 4) {
4344        char * err = pHMgr->encode_flag(aflag);
4345        if (err) {
4346             HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
4347             free(err);
4348        }
4349        return 1;
4350    }
4351
4352    // now parse numents affentries for this affix
4353    std::vector<affentry>::iterator start = affentries.begin();
4354    std::vector<affentry>::iterator end = affentries.end();
4355    for (std::vector<affentry>::iterator entry = start; entry != end; ++entry) {
4356       if ((nl = af->getline()) == NULL) return 1;
4357       mychomp(nl);
4358       tp = nl;
4359       i = 0;
4360       np = 0;
4361
4362       // split line into pieces
4363       piece = mystrsep(&tp, 0);
4364       while (piece) {
4365          if (*piece != '\0') {
4366              switch(i) {
4367                 // piece 1 - is type
4368                 case 0: {
4369                           np++;
4370                           if (entry != start) entry->opts = start->opts &
4371                              (char) (aeXPRODUCT + aeUTF8 + aeALIASF + aeALIASM);
4372                           break;
4373                         }
4374
4375                 // piece 2 - is affix char
4376                 case 1: {
4377                           np++;
4378                           if (pHMgr->decode_flag(piece) != aflag) {
4379                               char * err = pHMgr->encode_flag(aflag);
4380                               if (err) {
4381                                 HUNSPELL_WARNING(stderr, "error: line %d: affix %s is corrupt\n",
4382                                     af->getlinenum(), err);
4383                                 free(err);
4384                               }
4385                               return 1;
4386                           }
4387
4388                           if (entry != start) entry->aflag = start->aflag;
4389                           break;
4390                         }
4391
4392                 // piece 3 - is string to strip or 0 for null
4393                 case 2: {
4394                           np++;
4395                           if (complexprefixes) {
4396                             if (utf8) reverseword_utf(piece); else reverseword(piece);
4397                           }
4398                           entry->strip = mystrdup(piece);
4399                           entry->stripl = (unsigned char) strlen(entry->strip);
4400                           if (strcmp(entry->strip,"0") == 0) {
4401                               free(entry->strip);
4402                               entry->strip=mystrdup("");
4403                               entry->stripl = 0;
4404                           }
4405                           break;
4406                         }
4407
4408                 // piece 4 - is affix string or 0 for null
4409                 case 3: {
4410                           char * dash;
4411                           entry->morphcode = NULL;
4412                           entry->contclass = NULL;
4413                           entry->contclasslen = 0;
4414                           np++;
4415                           dash = strchr(piece, '/');
4416                           if (dash) {
4417                             *dash = '\0';
4418
4419                             if (ignorechars) {
4420                               if (utf8) {
4421                                 remove_ignored_chars_utf(piece, ignorechars_utf16, ignorechars_utf16_len);
4422                               } else {
4423                                 remove_ignored_chars(piece,ignorechars);
4424                               }
4425                             }
4426
4427                             if (complexprefixes) {
4428                                 if (utf8) reverseword_utf(piece); else reverseword(piece);
4429                             }
4430                             entry->appnd = mystrdup(piece);
4431
4432                             if (pHMgr->is_aliasf()) {
4433                                 int index = atoi(dash + 1);
4434                                 entry->contclasslen = (unsigned short) pHMgr->get_aliasf(index, &(entry->contclass), af);
4435                                 if (!entry->contclasslen) HUNSPELL_WARNING(stderr, "error: bad affix flag alias: \"%s\"\n", dash+1);
4436                             } else {
4437                                 entry->contclasslen = (unsigned short) pHMgr->decode_flags(&(entry->contclass), dash + 1, af);
4438                                 flag_qsort(entry->contclass, 0, entry->contclasslen);
4439                             }
4440                             *dash = '/';
4441
4442                             havecontclass = 1;
4443                             for (unsigned short _i = 0; _i < entry->contclasslen; _i++) {
4444                               contclasses[(entry->contclass)[_i]] = 1;
4445                             }
4446                           } else {
4447                             if (ignorechars) {
4448                               if (utf8) {
4449                                 remove_ignored_chars_utf(piece, ignorechars_utf16, ignorechars_utf16_len);
4450                               } else {
4451                                 remove_ignored_chars(piece,ignorechars);
4452                               }
4453                             }
4454
4455                             if (complexprefixes) {
4456                                 if (utf8) reverseword_utf(piece); else reverseword(piece);
4457                             }
4458                             entry->appnd = mystrdup(piece);
4459                           }
4460
4461                           entry->appndl = (unsigned char) strlen(entry->appnd);
4462                           if (strcmp(entry->appnd,"0") == 0) {
4463                               free(entry->appnd);
4464                               entry->appnd=mystrdup("");
4465                               entry->appndl = 0;
4466                           }
4467                           break;
4468                         }
4469
4470                 // piece 5 - is the conditions descriptions
4471                 case 4: {
4472                           np++;
4473                           if (complexprefixes) {
4474                             if (utf8) reverseword_utf(piece); else reverseword(piece);
4475                             reverse_condition(piece);
4476                           }
4477                           if (entry->stripl && (strcmp(piece, ".") != 0) &&
4478                             redundant_condition(at, entry->strip, entry->stripl, piece, af->getlinenum()))
4479                                 strcpy(piece, ".");
4480                           if (at == 'S') {
4481                             reverseword(piece);
4482                             reverse_condition(piece);
4483                           }
4484                           if (encodeit(*entry, piece)) return 1;
4485                          break;
4486                 }
4487
4488                 case 5: {
4489                           np++;
4490                           if (pHMgr->is_aliasm()) {
4491                             int index = atoi(piece);
4492                             entry->morphcode = pHMgr->get_aliasm(index);
4493                           } else {
4494                             if (complexprefixes) { // XXX - fix me for morph. gen.
4495                                 if (utf8) reverseword_utf(piece); else reverseword(piece);
4496                             }
4497                             // add the remaining of the line
4498                             if (*tp) {
4499                                 *(tp - 1) = ' ';
4500                                 tp = tp + strlen(tp);
4501                             }
4502                             entry->morphcode = mystrdup(piece);
4503                             if (!entry->morphcode) return 1;
4504                           }
4505                           break;
4506                 }
4507                 default: break;
4508              }
4509              i++;
4510          }
4511          piece = mystrsep(&tp, 0);
4512       }
4513       // check to make sure we parsed enough pieces
4514       if (np < 4) {
4515           char * err = pHMgr->encode_flag(aflag);
4516           if (err) {
4517             HUNSPELL_WARNING(stderr, "error: line %d: affix %s is corrupt\n",
4518                 af->getlinenum(), err);
4519             free(err);
4520           }
4521           return 1;
4522       }
4523
4524 #ifdef DEBUG
4525       // detect unnecessary fields, excepting comments
4526       if (basefieldnum) {
4527         int fieldnum = !(entry->morphcode) ? 5 : ((*(entry->morphcode)=='#') ? 5 : 6);
4528           if (fieldnum != basefieldnum)
4529             HUNSPELL_WARNING(stderr, "warning: line %d: bad field number\n", af->getlinenum());
4530       } else {
4531         basefieldnum = !(entry->morphcode) ? 5 : ((*(entry->morphcode)=='#') ? 5 : 6);
4532       }
4533 #endif
4534    }
4535
4536    // now create SfxEntry or PfxEntry objects and use links to
4537    // build an ordered (sorted by affix string) list
4538    for (std::vector<affentry>::iterator entry = start; entry != end; ++entry) {
4539       if (at == 'P') {
4540           PfxEntry * pfxptr = new PfxEntry(this,&(*entry));
4541           build_pfxtree(pfxptr);
4542       } else {
4543           SfxEntry * sfxptr = new SfxEntry(this,&(*entry));
4544           build_sfxtree(sfxptr);
4545       }
4546    }
4547    return 0;
4548 }
4549
4550 int AffixMgr::redundant_condition(char ft, char * strip, int stripl, const char * cond, int linenum) {
4551   int condl = strlen(cond);
4552   int i;
4553   int j;
4554   int neg;
4555   int in;
4556   if (ft == 'P') { // prefix
4557     if (strncmp(strip, cond, condl) == 0) return 1;
4558     if (utf8) {
4559     } else {
4560       for (i = 0, j = 0; (i < stripl) && (j < condl); i++, j++) {
4561         if (cond[j] != '[') {
4562           if (cond[j] != strip[i]) {
4563             HUNSPELL_WARNING(stderr, "warning: line %d: incompatible stripping characters and condition\n", linenum);
4564             return 0;
4565           }
4566         } else {
4567           neg = (cond[j+1] == '^') ? 1 : 0;
4568           in = 0;
4569           do {
4570             j++;
4571             if (strip[i] == cond[j]) in = 1;
4572           } while ((j < (condl - 1)) && (cond[j] != ']'));
4573           if (j == (condl - 1) && (cond[j] != ']')) {
4574             HUNSPELL_WARNING(stderr, "error: line %d: missing ] in condition:\n%s\n", linenum, cond);
4575             return 0;
4576           }
4577           if ((!neg && !in) || (neg && in)) {
4578             HUNSPELL_WARNING(stderr, "warning: line %d: incompatible stripping characters and condition\n", linenum);
4579             return 0;
4580           }
4581         }
4582       }
4583       if (j >= condl) return 1;
4584     }
4585   } else { // suffix
4586     if ((stripl >= condl) && strcmp(strip + stripl - condl, cond) == 0) return 1;
4587     if (utf8) {
4588     } else {
4589       for (i = stripl - 1, j = condl - 1; (i >= 0) && (j >= 0); i--, j--) {
4590         if (cond[j] != ']') {
4591           if (cond[j] != strip[i]) {
4592             HUNSPELL_WARNING(stderr, "warning: line %d: incompatible stripping characters and condition\n", linenum);
4593             return 0;
4594           }
4595         } else {
4596           in = 0;
4597           do {
4598             j--;
4599             if (strip[i] == cond[j]) in = 1;
4600           } while ((j > 0) && (cond[j] != '['));
4601           if ((j == 0) && (cond[j] != '[')) {
4602             HUNSPELL_WARNING(stderr, "error: line: %d: missing ] in condition:\n%s\n", linenum, cond);
4603             return 0;
4604           }
4605           neg = (cond[j+1] == '^') ? 1 : 0;
4606           if ((!neg && !in) || (neg && in)) {
4607             HUNSPELL_WARNING(stderr, "warning: line %d: incompatible stripping characters and condition\n", linenum);
4608             return 0;
4609           }
4610         }
4611       }
4612       if (j < 0) return 1;
4613     }
4614   }
4615   return 0;
4616 }