1 #include "license.hunspell"
2 #include "license.myspell"
11 #include "affixmgr.hxx"
12 #include "affentry.hxx"
13 #include "langnum.hxx"
17 #ifdef HUNSPELL_CHROME_CLIENT
18 AffixMgr::AffixMgr(hunspell::BDictReader
* reader
, HashMgr
** ptr
, int * md
)
20 bdict_reader
= reader
;
22 AffixMgr::AffixMgr(const char * affpath
, HashMgr
** ptr
, int * md
, const char * key
)
25 // register hash manager and load affix data from aff file
44 // allow simplified compound forms (see 3rd field of CHECKCOMPOUNDPATTERN)
50 compoundflag
= FLAG_NULL
; // permits word in compound forms
51 compoundbegin
= FLAG_NULL
; // may be first word in compound forms
52 compoundmiddle
= FLAG_NULL
; // may be middle word in compound forms
53 compoundend
= FLAG_NULL
; // may be last word in compound forms
54 compoundroot
= FLAG_NULL
; // compound word signing flag
55 compoundpermitflag
= FLAG_NULL
; // compound permitting flag for suffixed word
56 compoundforbidflag
= FLAG_NULL
; // compound fordidden flag for suffixed word
57 compoundmoresuffixes
= 0; // allow more suffixes within compound words
58 checkcompounddup
= 0; // forbid double words in compounds
59 checkcompoundrep
= 0; // forbid bad compounds (may be non compound word with a REP substitution)
60 checkcompoundcase
= 0; // forbid upper and lowercase combinations at word bounds
61 checkcompoundtriple
= 0; // forbid compounds with triple letters
62 simplifiedtriple
= 0; // allow simplified triple letters in compounds (Schiff+fahrt -> Schiffahrt)
63 forbiddenword
= FORBIDDENWORD
; // forbidden word signing flag
64 nosuggest
= FLAG_NULL
; // don't suggest words signed with NOSUGGEST flag
65 nongramsuggest
= FLAG_NULL
;
66 lang
= NULL
; // language
67 langnum
= 0; // language code (see http://l10n.openoffice.org/languages.html)
68 needaffix
= FLAG_NULL
; // forbidden root, allowed only with suffixes
69 cpdwordmax
= -1; // default: unlimited wordcount in compound words
70 cpdmin
= -1; // undefined
71 cpdmaxsyllable
= 0; // default: unlimited syllablecount in compound words
72 cpdvowels
=NULL
; // vowels (for calculating of Hungarian compounding limit, O(n) search! XXX)
73 cpdvowels_utf16
=NULL
; // vowels for UTF-8 encoding (bsearch instead of O(n) search)
74 cpdvowels_utf16_len
=0; // vowels
75 pfxappnd
=NULL
; // previous prefix for counting the syllables of prefix BUG
76 sfxappnd
=NULL
; // previous suffix for counting a special syllables BUG
77 cpdsyllablenum
=NULL
; // syllable count incrementing flag
78 checknum
=0; // checking numbers, and word with numbers
79 wordchars
=NULL
; // letters + spec. word characters
80 wordchars_utf16
=NULL
; // letters + spec. word characters
81 wordchars_utf16_len
=0; // letters + spec. word characters
82 ignorechars
=NULL
; // letters + spec. word characters
83 ignorechars_utf16
=NULL
; // letters + spec. word characters
84 ignorechars_utf16_len
=0; // letters + spec. word characters
85 version
=NULL
; // affix and dictionary file version string
86 havecontclass
=0; // flags of possible continuing classes (double affix)
87 // LEMMA_PRESENT: not put root into the morphological output. Lemma presents
88 // in morhological description in dictionary file. It's often combined with PSEUDOROOT.
89 lemma_present
= FLAG_NULL
;
90 circumfix
= FLAG_NULL
;
91 onlyincompound
= FLAG_NULL
;
92 maxngramsugs
= -1; // undefined
93 maxdiff
= -1; // undefined
95 maxcpdsugs
= -1; // undefined
103 substandard
= FLAG_NULL
;
109 for (int i
=0; i
< SETSIZE
; i
++) {
116 #ifdef HUNSPELL_CHROME_CLIENT
117 // Define dummy parameters for parse_file() to avoid changing the parameters
118 // of parse_file(). This may make it easier to merge the changes of the
119 // original hunspell.
120 const char* affpath
= NULL
;
121 const char* key
= NULL
;
123 for (int j
=0; j
< CONTSIZE
; j
++) {
128 if (parse_file(affpath
, key
)) {
129 HUNSPELL_WARNING(stderr
, "Failure loading aff file %s\n",affpath
);
132 if (cpdmin
== -1) cpdmin
= MINCPDLEN
;
137 AffixMgr::~AffixMgr()
139 // pass through linked prefix entries and clean up
140 for (int i
=0; i
< SETSIZE
;i
++) {
142 PfxEntry
* ptr
= pStart
[i
];
143 PfxEntry
* nptr
= NULL
;
145 nptr
= ptr
->getNext();
152 // pass through linked suffix entries and clean up
153 for (int j
=0; j
< SETSIZE
; j
++) {
155 SfxEntry
* ptr
= sStart
[j
];
156 SfxEntry
* nptr
= NULL
;
158 nptr
= ptr
->getNext();
166 if (keystring
) free(keystring
);
168 if (trystring
) free(trystring
);
170 if (encoding
) free(encoding
);
173 for (int j
=0; j
< nummap
; j
++) {
174 for (int k
=0; k
< maptable
[j
].len
; k
++) {
175 if (maptable
[j
].set
[k
]) free(maptable
[j
].set
[k
]);
177 free(maptable
[j
].set
);
178 maptable
[j
].set
= NULL
;
186 for (int j
=0; j
< numbreak
; j
++) {
187 if (breaktable
[j
]) free(breaktable
[j
]);
188 breaktable
[j
] = NULL
;
195 for (int j
=0; j
< numrep
; j
++) {
196 free(reptable
[j
].pattern
);
197 free(reptable
[j
].pattern2
);
202 if (iconvtable
) delete iconvtable
;
203 if (oconvtable
) delete oconvtable
;
204 if (phone
&& phone
->rules
) {
205 for (int j
=0; j
< phone
->num
+ 1; j
++) {
206 free(phone
->rules
[j
* 2]);
207 free(phone
->rules
[j
* 2 + 1]);
215 for (int j
=0; j
< numdefcpd
; j
++) {
216 free(defcpdtable
[j
].def
);
217 defcpdtable
[j
].def
= NULL
;
224 for (int j
=0; j
< numcheckcpd
; j
++) {
225 free(checkcpdtable
[j
].pattern
);
226 free(checkcpdtable
[j
].pattern2
);
227 free(checkcpdtable
[j
].pattern3
);
228 checkcpdtable
[j
].pattern
= NULL
;
229 checkcpdtable
[j
].pattern2
= NULL
;
230 checkcpdtable
[j
].pattern3
= NULL
;
233 checkcpdtable
= NULL
;
236 FREE_FLAG(compoundflag
);
237 FREE_FLAG(compoundbegin
);
238 FREE_FLAG(compoundmiddle
);
239 FREE_FLAG(compoundend
);
240 FREE_FLAG(compoundpermitflag
);
241 FREE_FLAG(compoundforbidflag
);
242 FREE_FLAG(compoundroot
);
243 FREE_FLAG(forbiddenword
);
244 FREE_FLAG(nosuggest
);
245 FREE_FLAG(nongramsuggest
);
246 FREE_FLAG(needaffix
);
247 FREE_FLAG(lemma_present
);
248 FREE_FLAG(circumfix
);
249 FREE_FLAG(onlyincompound
);
255 if (cpdvowels
) free(cpdvowels
);
256 if (cpdvowels_utf16
) free(cpdvowels_utf16
);
257 if (cpdsyllablenum
) free(cpdsyllablenum
);
259 if (lang
) free(lang
);
260 if (wordchars
) free(wordchars
);
261 if (wordchars_utf16
) free(wordchars_utf16
);
262 if (ignorechars
) free(ignorechars
);
263 if (ignorechars_utf16
) free(ignorechars_utf16
);
264 if (version
) free(version
);
266 #ifdef MOZILLA_CLIENT
271 void AffixMgr::finishFileMgr(FileMgr
*afflst
)
275 // convert affix trees to sorted list
276 process_pfx_tree_to_list();
277 process_sfx_tree_to_list();
280 // read in aff file and build up prefix and suffix entry objects
281 int AffixMgr::parse_file(const char * affpath
, const char * key
)
283 char * line
; // io buffers
284 char ft
; // affix type
286 #ifdef HUNSPELL_CHROME_CLIENT
287 // open the affix file
288 // We're always UTF-8
291 // A BDICT file stores PFX and SFX lines in a special section and it provides
292 // a special line iterator for reading PFX and SFX lines.
293 // We create a FileMgr object from this iterator and parse PFX and SFX lines
294 // before parsing other lines.
295 hunspell::LineIterator affix_iterator
= bdict_reader
->GetAffixLineIterator();
296 FileMgr
* iterator
= new FileMgr(&affix_iterator
);
298 HUNSPELL_WARNING(stderr
,
299 "error: could not create a FileMgr from an affix line iterator.\n");
303 while ((line
= iterator
->getline())) {
305 if (strncmp(line
,"PFX",3) == 0) ft
= complexprefixes
? 'S' : 'P';
306 if (strncmp(line
,"SFX",3) == 0) ft
= complexprefixes
? 'P' : 'S';
308 parse_affix(line
, ft
, iterator
, NULL
);
312 // Create a FileMgr object for reading lines except PFX and SFX lines.
313 // We don't need to change the loop below since our FileMgr emulates the
315 hunspell::LineIterator other_iterator
= bdict_reader
->GetOtherLineIterator();
316 FileMgr
* afflst
= new FileMgr(&other_iterator
);
318 HUNSPELL_WARNING(stderr
,
319 "error: could not create a FileMgr from an other line iterator.\n");
323 // checking flag duplication
324 char dupflags
[CONTSIZE
];
325 char dupflags_ini
= 1;
327 // first line indicator for removing byte order mark
330 // open the affix file
331 FileMgr
* afflst
= new FileMgr(affpath
, key
);
333 HUNSPELL_WARNING(stderr
, "error: could not open affix description file %s\n",affpath
);
338 // step one is to parse the affix file building up the internal
339 // affix data structures
341 // read in each line ignoring any that do not
342 // start with a known line type indicator
343 while ((line
= afflst
->getline()) != NULL
) {
346 #ifndef HUNSPELL_CHROME_CLIENT
347 /* remove byte order mark */
350 // Affix file begins with byte order mark: possible incompatibility with old Hunspell versions
351 if (strncmp(line
,"\xEF\xBB\xBF",3) == 0) {
352 memmove(line
, line
+3, strlen(line
+3)+1);
357 /* parse in the keyboard string */
358 if (strncmp(line
,"KEY",3) == 0) {
359 if (parse_string(line
, &keystring
, afflst
->getlinenum())) {
360 finishFileMgr(afflst
);
365 /* parse in the try string */
366 if (strncmp(line
,"TRY",3) == 0) {
367 if (parse_string(line
, &trystring
, afflst
->getlinenum())) {
368 finishFileMgr(afflst
);
373 /* parse in the name of the character set used by the .dict and .aff */
374 if (strncmp(line
,"SET",3) == 0) {
375 if (parse_string(line
, &encoding
, afflst
->getlinenum())) {
376 finishFileMgr(afflst
);
379 if (strcmp(encoding
, "UTF-8") == 0) {
381 #ifndef OPENOFFICEORG
382 #ifndef MOZILLA_CLIENT
383 if (initialize_utf_tbl()) return 1;
389 /* parse COMPLEXPREFIXES for agglutinative languages with right-to-left writing system */
390 if (strncmp(line
,"COMPLEXPREFIXES",15) == 0)
393 /* parse in the flag used by the controlled compound words */
394 if (strncmp(line
,"COMPOUNDFLAG",12) == 0) {
395 if (parse_flag(line
, &compoundflag
, afflst
)) {
396 finishFileMgr(afflst
);
401 /* parse in the flag used by compound words */
402 if (strncmp(line
,"COMPOUNDBEGIN",13) == 0) {
403 if (complexprefixes
) {
404 if (parse_flag(line
, &compoundend
, afflst
)) {
405 finishFileMgr(afflst
);
409 if (parse_flag(line
, &compoundbegin
, afflst
)) {
410 finishFileMgr(afflst
);
416 /* parse in the flag used by compound words */
417 if (strncmp(line
,"COMPOUNDMIDDLE",14) == 0) {
418 if (parse_flag(line
, &compoundmiddle
, afflst
)) {
419 finishFileMgr(afflst
);
423 /* parse in the flag used by compound words */
424 if (strncmp(line
,"COMPOUNDEND",11) == 0) {
425 if (complexprefixes
) {
426 if (parse_flag(line
, &compoundbegin
, afflst
)) {
427 finishFileMgr(afflst
);
431 if (parse_flag(line
, &compoundend
, afflst
)) {
432 finishFileMgr(afflst
);
438 /* parse in the data used by compound_check() method */
439 if (strncmp(line
,"COMPOUNDWORDMAX",15) == 0) {
440 if (parse_num(line
, &cpdwordmax
, afflst
)) {
441 finishFileMgr(afflst
);
446 /* parse in the flag sign compounds in dictionary */
447 if (strncmp(line
,"COMPOUNDROOT",12) == 0) {
448 if (parse_flag(line
, &compoundroot
, afflst
)) {
449 finishFileMgr(afflst
);
454 /* parse in the flag used by compound_check() method */
455 if (strncmp(line
,"COMPOUNDPERMITFLAG",18) == 0) {
456 if (parse_flag(line
, &compoundpermitflag
, afflst
)) {
457 finishFileMgr(afflst
);
462 /* parse in the flag used by compound_check() method */
463 if (strncmp(line
,"COMPOUNDFORBIDFLAG",18) == 0) {
464 if (parse_flag(line
, &compoundforbidflag
, afflst
)) {
465 finishFileMgr(afflst
);
470 if (strncmp(line
,"COMPOUNDMORESUFFIXES",20) == 0) {
471 compoundmoresuffixes
= 1;
474 if (strncmp(line
,"CHECKCOMPOUNDDUP",16) == 0) {
475 checkcompounddup
= 1;
478 if (strncmp(line
,"CHECKCOMPOUNDREP",16) == 0) {
479 checkcompoundrep
= 1;
482 if (strncmp(line
,"CHECKCOMPOUNDTRIPLE",19) == 0) {
483 checkcompoundtriple
= 1;
486 if (strncmp(line
,"SIMPLIFIEDTRIPLE",16) == 0) {
487 simplifiedtriple
= 1;
490 if (strncmp(line
,"CHECKCOMPOUNDCASE",17) == 0) {
491 checkcompoundcase
= 1;
494 if (strncmp(line
,"NOSUGGEST",9) == 0) {
495 if (parse_flag(line
, &nosuggest
, afflst
)) {
496 finishFileMgr(afflst
);
501 if (strncmp(line
,"NONGRAMSUGGEST",14) == 0) {
502 if (parse_flag(line
, &nongramsuggest
, afflst
)) {
503 finishFileMgr(afflst
);
508 /* parse in the flag used by forbidden words */
509 if (strncmp(line
,"FORBIDDENWORD",13) == 0) {
510 if (parse_flag(line
, &forbiddenword
, afflst
)) {
511 finishFileMgr(afflst
);
516 /* parse in the flag used by forbidden words */
517 if (strncmp(line
,"LEMMA_PRESENT",13) == 0) {
518 if (parse_flag(line
, &lemma_present
, afflst
)) {
519 finishFileMgr(afflst
);
524 /* parse in the flag used by circumfixes */
525 if (strncmp(line
,"CIRCUMFIX",9) == 0) {
526 if (parse_flag(line
, &circumfix
, afflst
)) {
527 finishFileMgr(afflst
);
532 /* parse in the flag used by fogemorphemes */
533 if (strncmp(line
,"ONLYINCOMPOUND",14) == 0) {
534 if (parse_flag(line
, &onlyincompound
, afflst
)) {
535 finishFileMgr(afflst
);
540 /* parse in the flag used by `needaffixs' */
541 if (strncmp(line
,"PSEUDOROOT",10) == 0) {
542 if (parse_flag(line
, &needaffix
, afflst
)) {
543 finishFileMgr(afflst
);
548 /* parse in the flag used by `needaffixs' */
549 if (strncmp(line
,"NEEDAFFIX",9) == 0) {
550 if (parse_flag(line
, &needaffix
, afflst
)) {
551 finishFileMgr(afflst
);
556 /* parse in the minimal length for words in compounds */
557 if (strncmp(line
,"COMPOUNDMIN",11) == 0) {
558 if (parse_num(line
, &cpdmin
, afflst
)) {
559 finishFileMgr(afflst
);
562 if (cpdmin
< 1) cpdmin
= 1;
565 /* parse in the max. words and syllables in compounds */
566 if (strncmp(line
,"COMPOUNDSYLLABLE",16) == 0) {
567 if (parse_cpdsyllable(line
, afflst
)) {
568 finishFileMgr(afflst
);
573 /* parse in the flag used by compound_check() method */
574 if (strncmp(line
,"SYLLABLENUM",11) == 0) {
575 if (parse_string(line
, &cpdsyllablenum
, afflst
->getlinenum())) {
576 finishFileMgr(afflst
);
581 /* parse in the flag used by the controlled compound words */
582 if (strncmp(line
,"CHECKNUM",8) == 0) {
586 /* parse in the extra word characters */
587 if (strncmp(line
,"WORDCHARS",9) == 0) {
588 if (parse_array(line
, &wordchars
, &wordchars_utf16
, &wordchars_utf16_len
, utf8
, afflst
->getlinenum())) {
589 finishFileMgr(afflst
);
594 /* parse in the ignored characters (for example, Arabic optional diacretics charachters */
595 if (strncmp(line
,"IGNORE",6) == 0) {
596 if (parse_array(line
, &ignorechars
, &ignorechars_utf16
, &ignorechars_utf16_len
, utf8
, afflst
->getlinenum())) {
597 finishFileMgr(afflst
);
602 #ifndef HUNSPELL_CHROME_CLIENT
603 /* parse in the typical fault correcting table */
604 if (strncmp(line
,"REP",3) == 0) {
605 if (parse_reptable(line
, afflst
)) {
606 finishFileMgr(afflst
);
612 /* parse in the input conversion table */
613 if (strncmp(line
,"ICONV",5) == 0) {
614 if (parse_convtable(line
, afflst
, &iconvtable
, "ICONV")) {
615 finishFileMgr(afflst
);
620 /* parse in the input conversion table */
621 if (strncmp(line
,"OCONV",5) == 0) {
622 if (parse_convtable(line
, afflst
, &oconvtable
, "OCONV")) {
623 finishFileMgr(afflst
);
628 /* parse in the phonetic translation table */
629 if (strncmp(line
,"PHONE",5) == 0) {
630 if (parse_phonetable(line
, afflst
)) {
631 finishFileMgr(afflst
);
636 /* parse in the checkcompoundpattern table */
637 if (strncmp(line
,"CHECKCOMPOUNDPATTERN",20) == 0) {
638 if (parse_checkcpdtable(line
, afflst
)) {
639 finishFileMgr(afflst
);
644 /* parse in the defcompound table */
645 if (strncmp(line
,"COMPOUNDRULE",12) == 0) {
646 if (parse_defcpdtable(line
, afflst
)) {
647 finishFileMgr(afflst
);
652 /* parse in the related character map table */
653 if (strncmp(line
,"MAP",3) == 0) {
654 if (parse_maptable(line
, afflst
)) {
655 finishFileMgr(afflst
);
660 /* parse in the word breakpoints table */
661 if (strncmp(line
,"BREAK",5) == 0) {
662 if (parse_breaktable(line
, afflst
)) {
663 finishFileMgr(afflst
);
668 /* parse in the language for language specific codes */
669 if (strncmp(line
,"LANG",4) == 0) {
670 if (parse_string(line
, &lang
, afflst
->getlinenum())) {
671 finishFileMgr(afflst
);
674 langnum
= get_lang_num(lang
);
677 if (strncmp(line
,"VERSION",7) == 0) {
678 for(line
= line
+ 7; *line
== ' ' || *line
== '\t'; line
++);
679 version
= mystrdup(line
);
682 if (strncmp(line
,"MAXNGRAMSUGS",12) == 0) {
683 if (parse_num(line
, &maxngramsugs
, afflst
)) {
684 finishFileMgr(afflst
);
689 if (strncmp(line
,"ONLYMAXDIFF", 11) == 0)
692 if (strncmp(line
,"MAXDIFF",7) == 0) {
693 if (parse_num(line
, &maxdiff
, afflst
)) {
694 finishFileMgr(afflst
);
699 if (strncmp(line
,"MAXCPDSUGS",10) == 0) {
700 if (parse_num(line
, &maxcpdsugs
, afflst
)) {
701 finishFileMgr(afflst
);
706 if (strncmp(line
,"NOSPLITSUGS",11) == 0) {
710 if (strncmp(line
,"FULLSTRIP",9) == 0) {
714 if (strncmp(line
,"SUGSWITHDOTS",12) == 0) {
718 /* parse in the flag used by forbidden words */
719 if (strncmp(line
,"KEEPCASE",8) == 0) {
720 if (parse_flag(line
, &keepcase
, afflst
)) {
721 finishFileMgr(afflst
);
726 /* parse in the flag used by `forceucase' */
727 if (strncmp(line
,"FORCEUCASE",10) == 0) {
728 if (parse_flag(line
, &forceucase
, afflst
)) {
729 finishFileMgr(afflst
);
734 /* parse in the flag used by `warn' */
735 if (strncmp(line
,"WARN",4) == 0) {
736 if (parse_flag(line
, &warn
, afflst
)) {
737 finishFileMgr(afflst
);
742 if (strncmp(line
,"FORBIDWARN",10) == 0) {
746 /* parse in the flag used by the affix generator */
747 if (strncmp(line
,"SUBSTANDARD",11) == 0) {
748 if (parse_flag(line
, &substandard
, afflst
)) {
749 finishFileMgr(afflst
);
754 if (strncmp(line
,"CHECKSHARPS",11) == 0) {
758 #ifndef HUNSPELL_CHROME_CLIENT
759 /* parse this affix: P - prefix, S - suffix */
761 if (strncmp(line
,"PFX",3) == 0) ft
= complexprefixes
? 'S' : 'P';
762 if (strncmp(line
,"SFX",3) == 0) ft
= complexprefixes
? 'P' : 'S';
765 memset(dupflags
, 0, sizeof(dupflags
));
768 if (parse_affix(line
, ft
, afflst
, dupflags
)) {
769 finishFileMgr(afflst
);
776 finishFileMgr(afflst
);
777 // affix trees are sorted now
779 // now we can speed up performance greatly taking advantage of the
780 // relationship between the affixes and the idea of "subsets".
782 // View each prefix as a potential leading subset of another and view
783 // each suffix (reversed) as a potential trailing subset of another.
785 // To illustrate this relationship if we know the prefix "ab" is found in the
786 // word to examine, only prefixes that "ab" is a leading subset of need be examined.
787 // Furthermore is "ab" is not present then none of the prefixes that "ab" is
788 // is a subset need be examined.
789 // The same argument goes for suffix string that are reversed.
791 // Then to top this off why not examine the first char of the word to quickly
792 // limit the set of prefixes to examine (i.e. the prefixes to examine must
793 // be leading supersets of the first character of the word (if they exist)
795 // To take advantage of this "subset" relationship, we need to add two links
796 // from entry. One to take next if the current prefix is found (call it nexteq)
797 // and one to take next if the current prefix is not found (call it nextne).
799 // Since we have built ordered lists, all that remains is to properly initialize
800 // the nextne and nexteq pointers that relate them
805 /* get encoding for CHECKCOMPOUNDCASE */
807 char * enc
= get_encoding();
808 csconv
= get_current_cs(enc
);
814 strcpy(expw
, wordchars
);
818 for (int i
= 0; i
<= 255; i
++) {
819 if ( (csconv
[i
].cupper
!= csconv
[i
].clower
) &&
820 (! strchr(expw
, (char) i
))) {
821 *(expw
+ strlen(expw
) + 1) = '\0';
822 *(expw
+ strlen(expw
)) = (char) i
;
826 wordchars
= mystrdup(expw
);
829 // default BREAK definition
830 if (numbreak
== -1) {
831 breaktable
= (char **) malloc(sizeof(char *) * 3);
832 if (!breaktable
) return 1;
833 breaktable
[0] = mystrdup("-");
834 breaktable
[1] = mystrdup("^-");
835 breaktable
[2] = mystrdup("-$");
836 if (breaktable
[0] && breaktable
[1] && breaktable
[2]) numbreak
= 3;
842 // we want to be able to quickly access prefix information
843 // both by prefix flag, and sorted by prefix string itself
844 // so we need to set up two indexes
846 int AffixMgr::build_pfxtree(PfxEntry
* pfxptr
)
850 PfxEntry
* ep
= pfxptr
;
852 // get the right starting points
853 const char * key
= ep
->getKey();
854 const unsigned char flg
= (unsigned char) (ep
->getFlag() & 0x00FF);
856 // first index by flag which must exist
862 // handle the special case of null affix string
863 if (strlen(key
) == 0) {
864 // always inset them at head of list at element 0
871 // now handle the normal case
875 unsigned char sp
= *((const unsigned char *)key
);
878 // handle the first insert
885 // otherwise use binary tree insertion so that a sorted
886 // list can easily be generated later
890 if (strcmp(ep
->getKey(), ptr
->getKey() ) <= 0) {
891 ptr
= ptr
->getNextEQ();
897 ptr
= ptr
->getNextNE();
907 // we want to be able to quickly access suffix information
908 // both by suffix flag, and sorted by the reverse of the
909 // suffix string itself; so we need to set up two indexes
910 int AffixMgr::build_sfxtree(SfxEntry
* sfxptr
)
914 SfxEntry
* ep
= sfxptr
;
916 /* get the right starting point */
917 const char * key
= ep
->getKey();
918 const unsigned char flg
= (unsigned char) (ep
->getFlag() & 0x00FF);
920 // first index by flag which must exist
925 // next index by affix string
927 // handle the special case of null affix string
928 if (strlen(key
) == 0) {
929 // always inset them at head of list at element 0
936 // now handle the normal case
940 unsigned char sp
= *((const unsigned char *)key
);
943 // handle the first insert
949 // otherwise use binary tree insertion so that a sorted
950 // list can easily be generated later
954 if (strcmp(ep
->getKey(), ptr
->getKey() ) <= 0) {
955 ptr
= ptr
->getNextEQ();
961 ptr
= ptr
->getNextNE();
971 // convert from binary tree to sorted list
972 int AffixMgr::process_pfx_tree_to_list()
974 for (int i
=1; i
< SETSIZE
; i
++) {
975 pStart
[i
] = process_pfx_in_order(pStart
[i
],NULL
);
981 PfxEntry
* AffixMgr::process_pfx_in_order(PfxEntry
* ptr
, PfxEntry
* nptr
)
984 nptr
= process_pfx_in_order(ptr
->getNextNE(), nptr
);
986 nptr
= process_pfx_in_order(ptr
->getNextEQ(), ptr
);
992 // convert from binary tree to sorted list
993 int AffixMgr:: process_sfx_tree_to_list()
995 for (int i
=1; i
< SETSIZE
; i
++) {
996 sStart
[i
] = process_sfx_in_order(sStart
[i
],NULL
);
1001 SfxEntry
* AffixMgr::process_sfx_in_order(SfxEntry
* ptr
, SfxEntry
* nptr
)
1004 nptr
= process_sfx_in_order(ptr
->getNextNE(), nptr
);
1006 nptr
= process_sfx_in_order(ptr
->getNextEQ(), ptr
);
1012 // reinitialize the PfxEntry links NextEQ and NextNE to speed searching
1013 // using the idea of leading subsets this time
1014 int AffixMgr::process_pfx_order()
1018 // loop through each prefix list starting point
1019 for (int i
=1; i
< SETSIZE
; i
++) {
1023 // look through the remainder of the list
1024 // and find next entry with affix that
1025 // the current one is not a subset of
1026 // mark that as destination for NextNE
1027 // use next in list that you are a subset
1030 for (; ptr
!= NULL
; ptr
= ptr
->getNext()) {
1032 PfxEntry
* nptr
= ptr
->getNext();
1033 for (; nptr
!= NULL
; nptr
= nptr
->getNext()) {
1034 if (! isSubset( ptr
->getKey() , nptr
->getKey() )) break;
1036 ptr
->setNextNE(nptr
);
1037 ptr
->setNextEQ(NULL
);
1038 if ((ptr
->getNext()) && isSubset(ptr
->getKey() , (ptr
->getNext())->getKey()))
1039 ptr
->setNextEQ(ptr
->getNext());
1042 // now clean up by adding smart search termination strings:
1043 // if you are already a superset of the previous prefix
1044 // but not a subset of the next, search can end here
1045 // so set NextNE properly
1048 for (; ptr
!= NULL
; ptr
= ptr
->getNext()) {
1049 PfxEntry
* nptr
= ptr
->getNext();
1050 PfxEntry
* mptr
= NULL
;
1051 for (; nptr
!= NULL
; nptr
= nptr
->getNext()) {
1052 if (! isSubset(ptr
->getKey(),nptr
->getKey())) break;
1055 if (mptr
) mptr
->setNextNE(NULL
);
1061 // initialize the SfxEntry links NextEQ and NextNE to speed searching
1062 // using the idea of leading subsets this time
1063 int AffixMgr::process_sfx_order()
1067 // loop through each prefix list starting point
1068 for (int i
=1; i
< SETSIZE
; i
++) {
1072 // look through the remainder of the list
1073 // and find next entry with affix that
1074 // the current one is not a subset of
1075 // mark that as destination for NextNE
1076 // use next in list that you are a subset
1079 for (; ptr
!= NULL
; ptr
= ptr
->getNext()) {
1080 SfxEntry
* nptr
= ptr
->getNext();
1081 for (; nptr
!= NULL
; nptr
= nptr
->getNext()) {
1082 if (! isSubset(ptr
->getKey(),nptr
->getKey())) break;
1084 ptr
->setNextNE(nptr
);
1085 ptr
->setNextEQ(NULL
);
1086 if ((ptr
->getNext()) && isSubset(ptr
->getKey(),(ptr
->getNext())->getKey()))
1087 ptr
->setNextEQ(ptr
->getNext());
1091 // now clean up by adding smart search termination strings:
1092 // if you are already a superset of the previous suffix
1093 // but not a subset of the next, search can end here
1094 // so set NextNE properly
1097 for (; ptr
!= NULL
; ptr
= ptr
->getNext()) {
1098 SfxEntry
* nptr
= ptr
->getNext();
1099 SfxEntry
* mptr
= NULL
;
1100 for (; nptr
!= NULL
; nptr
= nptr
->getNext()) {
1101 if (! isSubset(ptr
->getKey(),nptr
->getKey())) break;
1104 if (mptr
) mptr
->setNextNE(NULL
);
1110 // add flags to the result for dictionary debugging
1111 void AffixMgr::debugflag(char * result
, unsigned short flag
) {
1112 char * st
= encode_flag(flag
);
1113 mystrcat(result
, " ", MAXLNLEN
);
1114 mystrcat(result
, MORPH_FLAG
, MAXLNLEN
);
1116 mystrcat(result
, st
, MAXLNLEN
);
1121 // calculate the character length of the condition
1122 int AffixMgr::condlen(char * st
)
1130 } else if (*st
== ']') group
= false;
1131 else if (!group
&& (!utf8
||
1132 (!(*st
& 0x80) || ((*st
& 0xc0) == 0x80)))) l
++;
1137 int AffixMgr::encodeit(affentry
&entry
, char * cs
)
1139 if (strcmp(cs
,".") != 0) {
1140 entry
.numconds
= (char) condlen(cs
);
1141 strncpy(entry
.c
.conds
, cs
, MAXCONDLEN
);
1142 // long condition (end of conds padded by strncpy)
1143 if (entry
.c
.conds
[MAXCONDLEN
- 1] && cs
[MAXCONDLEN
]) {
1144 entry
.opts
+= aeLONGCOND
;
1145 entry
.c
.l
.conds2
= mystrdup(cs
+ MAXCONDLEN_1
);
1146 if (!entry
.c
.l
.conds2
) return 1;
1150 entry
.c
.conds
[0] = '\0';
1155 // return 1 if s1 is a leading subset of s2 (dots are for infixes)
1156 inline int AffixMgr::isSubset(const char * s1
, const char * s2
)
1158 while (((*s1
== *s2
) || (*s1
== '.')) && (*s1
!= '\0')) {
1162 return (*s1
== '\0');
1166 // check word for prefixes
1167 struct hentry
* AffixMgr::prefix_check(const char * word
, int len
, char in_compound
,
1168 const FLAG needflag
)
1170 struct hentry
* rv
= NULL
;
1176 // first handle the special case of 0 length prefixes
1177 PfxEntry
* pe
= pStart
[0];
1181 ((in_compound
!= IN_CPD_NOT
) || !(pe
->getCont() &&
1182 (TESTAFF(pe
->getCont(), onlyincompound
, pe
->getContLen())))) &&
1183 // permit prefixes in compounds
1184 ((in_compound
!= IN_CPD_END
) || (pe
->getCont() &&
1185 (TESTAFF(pe
->getCont(), compoundpermitflag
, pe
->getContLen()))))
1188 rv
= pe
->checkword(word
, len
, in_compound
, needflag
);
1190 pfx
=pe
; // BUG: pfx not stateless
1197 // now handle the general case
1198 unsigned char sp
= *((const unsigned char *)word
);
1199 PfxEntry
* pptr
= pStart
[sp
];
1202 if (isSubset(pptr
->getKey(),word
)) {
1205 ((in_compound
!= IN_CPD_NOT
) || !(pptr
->getCont() &&
1206 (TESTAFF(pptr
->getCont(), onlyincompound
, pptr
->getContLen())))) &&
1207 // permit prefixes in compounds
1208 ((in_compound
!= IN_CPD_END
) || (pptr
->getCont() &&
1209 (TESTAFF(pptr
->getCont(), compoundpermitflag
, pptr
->getContLen()))))
1212 rv
= pptr
->checkword(word
, len
, in_compound
, needflag
);
1214 pfx
=pptr
; // BUG: pfx not stateless
1218 pptr
= pptr
->getNextEQ();
1220 pptr
= pptr
->getNextNE();
1227 // check word for prefixes
1228 struct hentry
* AffixMgr::prefix_check_twosfx(const char * word
, int len
,
1229 char in_compound
, const FLAG needflag
)
1231 struct hentry
* rv
= NULL
;
1236 // first handle the special case of 0 length prefixes
1237 PfxEntry
* pe
= pStart
[0];
1240 rv
= pe
->check_twosfx(word
, len
, in_compound
, needflag
);
1245 // now handle the general case
1246 unsigned char sp
= *((const unsigned char *)word
);
1247 PfxEntry
* pptr
= pStart
[sp
];
1250 if (isSubset(pptr
->getKey(),word
)) {
1251 rv
= pptr
->check_twosfx(word
, len
, in_compound
, needflag
);
1256 pptr
= pptr
->getNextEQ();
1258 pptr
= pptr
->getNextNE();
1265 // check word for prefixes
1266 char * AffixMgr::prefix_check_morph(const char * word
, int len
, char in_compound
,
1267 const FLAG needflag
)
1271 char result
[MAXLNLEN
];
1277 // first handle the special case of 0 length prefixes
1278 PfxEntry
* pe
= pStart
[0];
1280 st
= pe
->check_morph(word
,len
,in_compound
, needflag
);
1282 mystrcat(result
, st
, MAXLNLEN
);
1285 // if (rv) return rv;
1289 // now handle the general case
1290 unsigned char sp
= *((const unsigned char *)word
);
1291 PfxEntry
* pptr
= pStart
[sp
];
1294 if (isSubset(pptr
->getKey(),word
)) {
1295 st
= pptr
->check_morph(word
,len
,in_compound
, needflag
);
1298 if ((in_compound
!= IN_CPD_NOT
) || !((pptr
->getCont() &&
1299 (TESTAFF(pptr
->getCont(), onlyincompound
, pptr
->getContLen()))))) {
1300 mystrcat(result
, st
, MAXLNLEN
);
1305 pptr
= pptr
->getNextEQ();
1307 pptr
= pptr
->getNextNE();
1311 if (*result
) return mystrdup(result
);
1316 // check word for prefixes
1317 char * AffixMgr::prefix_check_twosfx_morph(const char * word
, int len
,
1318 char in_compound
, const FLAG needflag
)
1322 char result
[MAXLNLEN
];
1328 // first handle the special case of 0 length prefixes
1329 PfxEntry
* pe
= pStart
[0];
1331 st
= pe
->check_twosfx_morph(word
,len
,in_compound
, needflag
);
1333 mystrcat(result
, st
, MAXLNLEN
);
1339 // now handle the general case
1340 unsigned char sp
= *((const unsigned char *)word
);
1341 PfxEntry
* pptr
= pStart
[sp
];
1344 if (isSubset(pptr
->getKey(),word
)) {
1345 st
= pptr
->check_twosfx_morph(word
, len
, in_compound
, needflag
);
1347 mystrcat(result
, st
, MAXLNLEN
);
1351 pptr
= pptr
->getNextEQ();
1353 pptr
= pptr
->getNextNE();
1357 if (*result
) return mystrdup(result
);
1361 // Is word a non compound with a REP substitution (see checkcompoundrep)?
1362 int AffixMgr::cpdrep_check(const char * word
, int wl
)
1364 char candidate
[MAXLNLEN
];
1368 #ifdef HUNSPELL_CHROME_CLIENT
1369 const char *pattern
, *pattern2
;
1370 hunspell::ReplacementIterator iterator
= bdict_reader
->GetReplacementIterator();
1371 while (iterator
.GetNext(&pattern
, &pattern2
)) {
1373 lenr
= strlen(pattern2
);
1374 lenp
= strlen(pattern
);
1376 // search every occurence of the pattern in the word
1377 while ((r
=strstr(r
, pattern
)) != NULL
) {
1378 strcpy(candidate
, word
);
1379 if (r
-word
+ lenr
+ strlen(r
+lenp
) >= MAXLNLEN
) break;
1380 strcpy(candidate
+(r
-word
), pattern2
);
1381 strcpy(candidate
+(r
-word
)+lenr
, r
+lenp
);
1382 if (candidate_check(candidate
,strlen(candidate
))) return 1;
1383 r
++; // search for the next letter
1388 if ((wl
< 2) || !numrep
) return 0;
1390 for (int i
=0; i
< numrep
; i
++ ) {
1392 lenr
= strlen(reptable
[i
].pattern2
);
1393 lenp
= strlen(reptable
[i
].pattern
);
1394 // search every occurence of the pattern in the word
1395 while ((r
=strstr(r
, reptable
[i
].pattern
)) != NULL
) {
1396 strcpy(candidate
, word
);
1397 if (r
-word
+ lenr
+ strlen(r
+lenp
) >= MAXLNLEN
) break;
1398 strcpy(candidate
+(r
-word
),reptable
[i
].pattern2
);
1399 strcpy(candidate
+(r
-word
)+lenr
, r
+lenp
);
1400 if (candidate_check(candidate
,strlen(candidate
))) return 1;
1401 r
++; // search for the next letter
1408 // forbid compoundings when there are special patterns at word bound
1409 int AffixMgr::cpdpat_check(const char * word
, int pos
, hentry
* r1
, hentry
* r2
, const char /*affixed*/)
1412 for (int i
= 0; i
< numcheckcpd
; i
++) {
1413 if (isSubset(checkcpdtable
[i
].pattern2
, word
+ pos
) &&
1414 (!r1
|| !checkcpdtable
[i
].cond
||
1415 (r1
->astr
&& TESTAFF(r1
->astr
, checkcpdtable
[i
].cond
, r1
->alen
))) &&
1416 (!r2
|| !checkcpdtable
[i
].cond2
||
1417 (r2
->astr
&& TESTAFF(r2
->astr
, checkcpdtable
[i
].cond2
, r2
->alen
))) &&
1418 // zero length pattern => only TESTAFF
1419 // zero pattern (0/flag) => unmodified stem (zero affixes allowed)
1420 (!*(checkcpdtable
[i
].pattern
) || (
1421 (*(checkcpdtable
[i
].pattern
)=='0' && r1
->blen
<= pos
&& strncmp(word
+ pos
- r1
->blen
, r1
->word
, r1
->blen
) == 0) ||
1422 (*(checkcpdtable
[i
].pattern
)!='0' && ((len
= strlen(checkcpdtable
[i
].pattern
)) != 0) &&
1423 strncmp(word
+ pos
- len
, checkcpdtable
[i
].pattern
, len
) == 0)))) {
1430 // forbid compounding with neighbouring upper and lower case characters at word bounds
1431 int AffixMgr::cpdcase_check(const char * word
, int pos
)
1436 u8_u16(&u
, 1, word
+ pos
);
1437 for (p
= word
+ pos
- 1; (*p
& 0xc0) == 0x80; p
--);
1439 unsigned short a
= (u
.h
<< 8) + u
.l
;
1440 unsigned short b
= (w
.h
<< 8) + w
.l
;
1441 if (((unicodetoupper(a
, langnum
) == a
) || (unicodetoupper(b
, langnum
) == b
)) &&
1442 (a
!= '-') && (b
!= '-')) return 1;
1444 unsigned char a
= *(word
+ pos
- 1);
1445 unsigned char b
= *(word
+ pos
);
1446 if ((csconv
[a
].ccase
|| csconv
[b
].ccase
) && (a
!= '-') && (b
!= '-')) return 1;
1451 // check compound patterns
1452 int AffixMgr::defcpd_check(hentry
*** words
, short wnum
, hentry
* rv
, hentry
** def
, char all
)
1454 signed short btpp
[MAXWORDLEN
]; // metacharacter (*, ?) positions for backtracking
1455 signed short btwp
[MAXWORDLEN
]; // word positions for metacharacters
1456 int btnum
[MAXWORDLEN
]; // number of matched characters in metacharacter positions
1471 (*words
)[wnum
] = rv
;
1473 // has the last word COMPOUNDRULE flag?
1474 if (rv
->alen
== 0) {
1475 (*words
)[wnum
] = NULL
;
1476 if (w
) *words
= NULL
;
1480 for (i
= 0; i
< numdefcpd
; i
++) {
1481 for (j
= 0; j
< defcpdtable
[i
].len
; j
++) {
1482 if (defcpdtable
[i
].def
[j
] != '*' && defcpdtable
[i
].def
[j
] != '?' &&
1483 TESTAFF(rv
->astr
, defcpdtable
[i
].def
[j
], rv
->alen
)) ok
= 1;
1487 (*words
)[wnum
] = NULL
;
1488 if (w
) *words
= NULL
;
1492 for (i
= 0; i
< numdefcpd
; i
++) {
1493 signed short pp
= 0; // pattern position
1494 signed short wp
= 0; // "words" position
1499 while ((pp
< defcpdtable
[i
].len
) && (wp
<= wnum
)) {
1500 if (((pp
+1) < defcpdtable
[i
].len
) &&
1501 ((defcpdtable
[i
].def
[pp
+1] == '*') || (defcpdtable
[i
].def
[pp
+1] == '?'))) {
1502 int wend
= (defcpdtable
[i
].def
[pp
+1] == '?') ? wp
: wnum
;
1507 while (wp
<= wend
) {
1508 if (!(*words
)[wp
]->alen
||
1509 !TESTAFF((*words
)[wp
]->astr
, defcpdtable
[i
].def
[pp
-2], (*words
)[wp
]->alen
)) {
1515 if (wp
<= wnum
) ok2
= 0;
1516 btnum
[bt
] = wp
- btwp
[bt
];
1517 if (btnum
[bt
] > 0) bt
++;
1521 if (!(*words
)[wp
] || !(*words
)[wp
]->alen
||
1522 !TESTAFF((*words
)[wp
]->astr
, defcpdtable
[i
].def
[pp
], (*words
)[wp
]->alen
)) {
1528 if ((defcpdtable
[i
].len
== pp
) && !(wp
> wnum
)) ok
= 0;
1533 while ((defcpdtable
[i
].len
> r
) && ((r
+1) < defcpdtable
[i
].len
) &&
1534 ((defcpdtable
[i
].def
[r
+1] == '*') || (defcpdtable
[i
].def
[r
+1] == '?'))) r
+=2;
1535 if (defcpdtable
[i
].len
<= r
) return 1;
1542 wp
= btwp
[bt
- 1] + (signed short) btnum
[bt
- 1];
1543 } while ((btnum
[bt
- 1] < 0) && --bt
);
1546 if (ok
&& ok2
&& (!all
|| (defcpdtable
[i
].len
<= pp
))) return 1;
1548 // check zero ending
1549 while (ok
&& ok2
&& (defcpdtable
[i
].len
> pp
) && ((pp
+1) < defcpdtable
[i
].len
) &&
1550 ((defcpdtable
[i
].def
[pp
+1] == '*') || (defcpdtable
[i
].def
[pp
+1] == '?'))) pp
+=2;
1551 if (ok
&& ok2
&& (defcpdtable
[i
].len
<= pp
)) return 1;
1553 (*words
)[wnum
] = NULL
;
1554 if (w
) *words
= NULL
;
1558 inline int AffixMgr::candidate_check(const char * word
, int len
)
1560 struct hentry
* rv
=NULL
;
1565 // rv = prefix_check(word,len,1);
1566 // if (rv) return 1;
1568 rv
= affix_check(word
,len
);
1573 // calculate number of syllable for compound-checking
1574 short AffixMgr::get_syllable(const char * word
, int wlen
)
1576 if (cpdmaxsyllable
==0) return 0;
1581 for (int i
=0; i
<wlen
; i
++) {
1582 if (strchr(cpdvowels
, word
[i
])) num
++;
1584 } else if (cpdvowels_utf16
) {
1585 w_char w
[MAXWORDUTF8LEN
];
1586 int i
= u8_u16(w
, MAXWORDUTF8LEN
, word
);
1587 for (; i
> 0; i
--) {
1588 if (flag_bsearch((unsigned short *) cpdvowels_utf16
,
1589 ((unsigned short *) w
)[i
- 1], cpdvowels_utf16_len
)) num
++;
1595 void AffixMgr::setcminmax(int * cmin
, int * cmax
, const char * word
, int len
) {
1598 for (*cmin
= 0, i
= 0; (i
< cpdmin
) && word
[*cmin
]; i
++) {
1599 for ((*cmin
)++; (word
[*cmin
] & 0xc0) == 0x80; (*cmin
)++);
1601 for (*cmax
= len
, i
= 0; (i
< (cpdmin
- 1)) && *cmax
; i
++) {
1602 for ((*cmax
)--; (word
[*cmax
] & 0xc0) == 0x80; (*cmax
)--);
1606 *cmax
= len
- cpdmin
+ 1;
1611 // check if compound word is correctly spelled
1612 // hu_mov_rule = spec. Hungarian rule (XXX)
1613 struct hentry
* AffixMgr::compound_check(const char * word
, int len
,
1614 short wordnum
, short numsyllable
, short maxwordnum
, short wnum
, hentry
** words
= NULL
,
1615 char hu_mov_rule
= 0, char is_sug
= 0, int * info
= NULL
)
1618 short oldnumsyllable
, oldnumsyllable2
, oldwordnum
, oldwordnum2
;
1619 struct hentry
* rv
= NULL
;
1620 struct hentry
* rv_first
;
1621 struct hentry
* rwords
[MAXWORDLEN
]; // buffer for COMPOUND pattern checking
1622 char st
[MAXWORDUTF8LEN
+ 4];
1632 int checkedstriple
= 0;
1635 hentry
** oldwords
= words
;
1639 setcminmax(&cmin
, &cmax
, word
, len
);
1643 for (i
= cmin
; i
< cmax
; i
++) {
1644 // go to end of the UTF-8 character
1646 for (; (st
[i
] & 0xc0) == 0x80; i
++);
1647 if (i
>= cmax
) return NULL
;
1651 onlycpdrule
= (words
) ? 1 : 0;
1653 do { // onlycpdrule loop
1655 oldnumsyllable
= numsyllable
;
1656 oldwordnum
= wordnum
;
1660 do { // simplified checkcompoundpattern loop
1663 for (; scpd
<= numcheckcpd
&& (!checkcpdtable
[scpd
-1].pattern3
||
1664 strncmp(word
+ i
, checkcpdtable
[scpd
-1].pattern3
, strlen(checkcpdtable
[scpd
-1].pattern3
)) != 0); scpd
++);
1666 if (scpd
> numcheckcpd
) break; // break simplified checkcompoundpattern loop
1667 strcpy(st
+ i
, checkcpdtable
[scpd
-1].pattern
);
1669 i
+= strlen(checkcpdtable
[scpd
-1].pattern
);
1670 strcpy(st
+ i
, checkcpdtable
[scpd
-1].pattern2
);
1671 strcpy(st
+ i
+ strlen(checkcpdtable
[scpd
-1].pattern2
), word
+ soldi
+ strlen(checkcpdtable
[scpd
-1].pattern3
));
1674 len
+= strlen(checkcpdtable
[scpd
-1].pattern
) + strlen(checkcpdtable
[scpd
-1].pattern2
) - strlen(checkcpdtable
[scpd
-1].pattern3
);
1677 setcminmax(&cmin
, &cmax
, st
, len
);
1679 cmax
= len
- cpdmin
+ 1;
1691 rv
= lookup(st
); // perhaps without prefix
1693 // search homonym with compound flag
1694 while ((rv
) && !hu_mov_rule
&&
1695 ((needaffix
&& TESTAFF(rv
->astr
, needaffix
, rv
->alen
)) ||
1696 !((compoundflag
&& !words
&& !onlycpdrule
&& TESTAFF(rv
->astr
, compoundflag
, rv
->alen
)) ||
1697 (compoundbegin
&& !wordnum
&& !onlycpdrule
&&
1698 TESTAFF(rv
->astr
, compoundbegin
, rv
->alen
)) ||
1699 (compoundmiddle
&& wordnum
&& !words
&& !onlycpdrule
&&
1700 TESTAFF(rv
->astr
, compoundmiddle
, rv
->alen
)) ||
1701 (numdefcpd
&& onlycpdrule
&&
1702 ((!words
&& !wordnum
&& defcpd_check(&words
, wnum
, rv
, (hentry
**) &rwords
, 0)) ||
1703 (words
&& defcpd_check(&words
, wnum
, rv
, (hentry
**) &rwords
, 0))))) ||
1704 (scpd
!= 0 && checkcpdtable
[scpd
-1].cond
!= FLAG_NULL
&&
1705 !TESTAFF(rv
->astr
, checkcpdtable
[scpd
-1].cond
, rv
->alen
)))
1707 rv
= rv
->next_homonym
;
1710 if (rv
) affixed
= 0;
1713 if (onlycpdrule
) break;
1715 !(rv
= prefix_check(st
, i
, hu_mov_rule
? IN_CPD_OTHER
: IN_CPD_BEGIN
, compoundflag
))) {
1716 if (((rv
= suffix_check(st
, i
, 0, NULL
, NULL
, 0, NULL
,
1717 FLAG_NULL
, compoundflag
, hu_mov_rule
? IN_CPD_OTHER
: IN_CPD_BEGIN
)) ||
1718 (compoundmoresuffixes
&& (rv
= suffix_check_twosfx(st
, i
, 0, NULL
, compoundflag
)))) && !hu_mov_rule
&&
1720 ((compoundforbidflag
&& TESTAFF(sfx
->getCont(), compoundforbidflag
,
1721 sfx
->getContLen())) || (compoundend
&&
1722 TESTAFF(sfx
->getCont(), compoundend
,
1723 sfx
->getContLen())))) {
1729 (((wordnum
== 0) && compoundbegin
&&
1730 ((rv
= suffix_check(st
, i
, 0, NULL
, NULL
, 0, NULL
, FLAG_NULL
, compoundbegin
, hu_mov_rule
? IN_CPD_OTHER
: IN_CPD_BEGIN
)) ||
1731 (compoundmoresuffixes
&& (rv
= suffix_check_twosfx(st
, i
, 0, NULL
, compoundbegin
))) || // twofold suffixes + compound
1732 (rv
= prefix_check(st
, i
, hu_mov_rule
? IN_CPD_OTHER
: IN_CPD_BEGIN
, compoundbegin
)))) ||
1733 ((wordnum
> 0) && compoundmiddle
&&
1734 ((rv
= suffix_check(st
, i
, 0, NULL
, NULL
, 0, NULL
, FLAG_NULL
, compoundmiddle
, hu_mov_rule
? IN_CPD_OTHER
: IN_CPD_BEGIN
)) ||
1735 (compoundmoresuffixes
&& (rv
= suffix_check_twosfx(st
, i
, 0, NULL
, compoundmiddle
))) || // twofold suffixes + compound
1736 (rv
= prefix_check(st
, i
, hu_mov_rule
? IN_CPD_OTHER
: IN_CPD_BEGIN
, compoundmiddle
)))))
1737 ) checked_prefix
= 1;
1738 // else check forbiddenwords and needaffix
1739 } else if (rv
->astr
&& (TESTAFF(rv
->astr
, forbiddenword
, rv
->alen
) ||
1740 TESTAFF(rv
->astr
, needaffix
, rv
->alen
) ||
1741 TESTAFF(rv
->astr
, ONLYUPCASEFLAG
, rv
->alen
) ||
1742 (is_sug
&& nosuggest
&& TESTAFF(rv
->astr
, nosuggest
, rv
->alen
))
1749 // check non_compound flag in suffix and prefix
1750 if ((rv
) && !hu_mov_rule
&&
1751 ((pfx
&& pfx
->getCont() &&
1752 TESTAFF(pfx
->getCont(), compoundforbidflag
,
1753 pfx
->getContLen())) ||
1754 (sfx
&& sfx
->getCont() &&
1755 TESTAFF(sfx
->getCont(), compoundforbidflag
,
1756 sfx
->getContLen())))) {
1760 // check compoundend flag in suffix and prefix
1761 if ((rv
) && !checked_prefix
&& compoundend
&& !hu_mov_rule
&&
1762 ((pfx
&& pfx
->getCont() &&
1763 TESTAFF(pfx
->getCont(), compoundend
,
1764 pfx
->getContLen())) ||
1765 (sfx
&& sfx
->getCont() &&
1766 TESTAFF(sfx
->getCont(), compoundend
,
1767 sfx
->getContLen())))) {
1771 // check compoundmiddle flag in suffix and prefix
1772 if ((rv
) && !checked_prefix
&& (wordnum
==0) && compoundmiddle
&& !hu_mov_rule
&&
1773 ((pfx
&& pfx
->getCont() &&
1774 TESTAFF(pfx
->getCont(), compoundmiddle
,
1775 pfx
->getContLen())) ||
1776 (sfx
&& sfx
->getCont() &&
1777 TESTAFF(sfx
->getCont(), compoundmiddle
,
1778 sfx
->getContLen())))) {
1782 // check forbiddenwords
1783 if ((rv
) && (rv
->astr
) && (TESTAFF(rv
->astr
, forbiddenword
, rv
->alen
) ||
1784 TESTAFF(rv
->astr
, ONLYUPCASEFLAG
, rv
->alen
) ||
1785 (is_sug
&& nosuggest
&& TESTAFF(rv
->astr
, nosuggest
, rv
->alen
)))) {
1789 // increment word number, if the second root has a compoundroot flag
1790 if ((rv
) && compoundroot
&&
1791 (TESTAFF(rv
->astr
, compoundroot
, rv
->alen
))) {
1795 // first word is acceptable in compound words?
1797 ( checked_prefix
|| (words
&& words
[wnum
]) ||
1798 (compoundflag
&& TESTAFF(rv
->astr
, compoundflag
, rv
->alen
)) ||
1799 ((oldwordnum
== 0) && compoundbegin
&& TESTAFF(rv
->astr
, compoundbegin
, rv
->alen
)) ||
1800 ((oldwordnum
> 0) && compoundmiddle
&& TESTAFF(rv
->astr
, compoundmiddle
, rv
->alen
))// ||
1803 // LANG_hu section: spec. Hungarian rule
1804 || ((langnum
== LANG_hu
) && hu_mov_rule
&& (
1805 TESTAFF(rv
->astr
, 'F', rv
->alen
) || // XXX hardwired Hungarian dictionary codes
1806 TESTAFF(rv
->astr
, 'G', rv
->alen
) ||
1807 TESTAFF(rv
->astr
, 'H', rv
->alen
)
1810 // END of LANG_hu section
1813 // test CHECKCOMPOUNDPATTERN conditions
1814 scpd
== 0 || checkcpdtable
[scpd
-1].cond
== FLAG_NULL
||
1815 TESTAFF(rv
->astr
, checkcpdtable
[scpd
-1].cond
, rv
->alen
)
1817 && ! (( checkcompoundtriple
&& scpd
== 0 && !words
&& // test triple letters
1818 (word
[i
-1]==word
[i
]) && (
1819 ((i
>1) && (word
[i
-1]==word
[i
-2])) ||
1820 ((word
[i
-1]==word
[i
+1])) // may be word[i+1] == '\0'
1824 checkcompoundcase
&& scpd
== 0 && !words
&& cpdcase_check(word
, i
)
1827 // LANG_hu section: spec. Hungarian rule
1828 || ((!rv
) && (langnum
== LANG_hu
) && hu_mov_rule
&& (rv
= affix_check(st
,i
)) &&
1829 (sfx
&& sfx
->getCont() && ( // XXX hardwired Hungarian dic. codes
1830 TESTAFF(sfx
->getCont(), (unsigned short) 'x', sfx
->getContLen()) ||
1831 TESTAFF(sfx
->getCont(), (unsigned short) '%', sfx
->getContLen())
1835 ) { // first word is ok condition
1837 // LANG_hu section: spec. Hungarian rule
1838 if (langnum
== LANG_hu
) {
1839 // calculate syllable number of the word
1840 numsyllable
+= get_syllable(st
, i
);
1841 // + 1 word, if syllable number of the prefix > 1 (hungarian convention)
1842 if (pfx
&& (get_syllable(pfx
->getKey(),strlen(pfx
->getKey())) > 1)) wordnum
++;
1844 // END of LANG_hu section
1850 do { // striple loop
1852 // check simplifiedtriple
1853 if (simplifiedtriple
) {
1856 i
--; // check "fahrt" instead of "ahrt" in "Schiffahrt"
1857 } else if (i
> 2 && *(word
+i
- 1) == *(word
+ i
- 2)) striple
= 1;
1860 rv
= lookup((st
+i
)); // perhaps without prefix
1862 // search homonym with compound flag
1863 while ((rv
) && ((needaffix
&& TESTAFF(rv
->astr
, needaffix
, rv
->alen
)) ||
1864 !((compoundflag
&& !words
&& TESTAFF(rv
->astr
, compoundflag
, rv
->alen
)) ||
1865 (compoundend
&& !words
&& TESTAFF(rv
->astr
, compoundend
, rv
->alen
)) ||
1866 (numdefcpd
&& words
&& defcpd_check(&words
, wnum
+ 1, rv
, NULL
,1))) ||
1867 (scpd
!= 0 && checkcpdtable
[scpd
-1].cond2
!= FLAG_NULL
&&
1868 !TESTAFF(rv
->astr
, checkcpdtable
[scpd
-1].cond2
, rv
->alen
))
1870 rv
= rv
->next_homonym
;
1874 if (rv
&& forceucase
&& (rv
) &&
1875 (TESTAFF(rv
->astr
, forceucase
, rv
->alen
)) && !(info
&& *info
& SPELL_ORIGCAP
)) rv
= NULL
;
1877 if (rv
&& words
&& words
[wnum
+ 1]) return rv_first
;
1879 oldnumsyllable2
= numsyllable
;
1880 oldwordnum2
= wordnum
;
1883 // LANG_hu section: spec. Hungarian rule, XXX hardwired dictionary code
1884 if ((rv
) && (langnum
== LANG_hu
) && (TESTAFF(rv
->astr
, 'I', rv
->alen
)) && !(TESTAFF(rv
->astr
, 'J', rv
->alen
))) {
1887 // END of LANG_hu section
1889 // increment word number, if the second root has a compoundroot flag
1890 if ((rv
) && (compoundroot
) &&
1891 (TESTAFF(rv
->astr
, compoundroot
, rv
->alen
))) {
1895 // check forbiddenwords
1896 if ((rv
) && (rv
->astr
) && (TESTAFF(rv
->astr
, forbiddenword
, rv
->alen
) ||
1897 TESTAFF(rv
->astr
, ONLYUPCASEFLAG
, rv
->alen
) ||
1898 (is_sug
&& nosuggest
&& TESTAFF(rv
->astr
, nosuggest
, rv
->alen
)))) return NULL
;
1900 // second word is acceptable, as a root?
1901 // hungarian conventions: compounding is acceptable,
1902 // when compound forms consist of 2 words, or if more,
1903 // then the syllable number of root words must be 6, or lesser.
1906 (compoundflag
&& TESTAFF(rv
->astr
, compoundflag
, rv
->alen
)) ||
1907 (compoundend
&& TESTAFF(rv
->astr
, compoundend
, rv
->alen
))
1910 ((cpdwordmax
==-1) || (wordnum
+1<cpdwordmax
)) ||
1911 ((cpdmaxsyllable
!=0) &&
1912 (numsyllable
+ get_syllable(HENTRY_WORD(rv
), rv
->clen
)<=cpdmaxsyllable
))
1915 // test CHECKCOMPOUNDPATTERN
1916 !numcheckcpd
|| scpd
!= 0 || !cpdpat_check(word
, i
, rv_first
, rv
, 0)
1919 (!checkcompounddup
|| (rv
!= rv_first
))
1921 // test CHECKCOMPOUNDPATTERN conditions
1922 && (scpd
== 0 || checkcpdtable
[scpd
-1].cond2
== FLAG_NULL
||
1923 TESTAFF(rv
->astr
, checkcpdtable
[scpd
-1].cond2
, rv
->alen
))
1926 // forbid compound word, if it is a non compound word with typical fault
1927 if (checkcompoundrep
&& cpdrep_check(word
,len
)) return NULL
;
1931 numsyllable
= oldnumsyllable2
;
1932 wordnum
= oldwordnum2
;
1934 // perhaps second word has prefix or/and suffix
1936 sfxflag
= FLAG_NULL
;
1937 rv
= (compoundflag
&& !onlycpdrule
) ? affix_check((word
+i
),strlen(word
+i
), compoundflag
, IN_CPD_END
) : NULL
;
1938 if (!rv
&& compoundend
&& !onlycpdrule
) {
1941 rv
= affix_check((word
+i
),strlen(word
+i
), compoundend
, IN_CPD_END
);
1944 if (!rv
&& numdefcpd
&& words
) {
1945 rv
= affix_check((word
+i
),strlen(word
+i
), 0, IN_CPD_END
);
1946 if (rv
&& defcpd_check(&words
, wnum
+ 1, rv
, NULL
, 1)) return rv_first
;
1950 // test CHECKCOMPOUNDPATTERN conditions (allowed forms)
1951 if (rv
&& !(scpd
== 0 || checkcpdtable
[scpd
-1].cond2
== FLAG_NULL
||
1952 TESTAFF(rv
->astr
, checkcpdtable
[scpd
-1].cond2
, rv
->alen
))) rv
= NULL
;
1954 // test CHECKCOMPOUNDPATTERN conditions (forbidden compounds)
1955 if (rv
&& numcheckcpd
&& scpd
== 0 && cpdpat_check(word
, i
, rv_first
, rv
, affixed
)) rv
= NULL
;
1957 // check non_compound flag in suffix and prefix
1959 ((pfx
&& pfx
->getCont() &&
1960 TESTAFF(pfx
->getCont(), compoundforbidflag
,
1961 pfx
->getContLen())) ||
1962 (sfx
&& sfx
->getCont() &&
1963 TESTAFF(sfx
->getCont(), compoundforbidflag
,
1964 sfx
->getContLen())))) {
1969 if (rv
&& forceucase
&& (rv
) &&
1970 (TESTAFF(rv
->astr
, forceucase
, rv
->alen
)) && !(info
&& *info
& SPELL_ORIGCAP
)) rv
= NULL
;
1972 // check forbiddenwords
1973 if ((rv
) && (rv
->astr
) && (TESTAFF(rv
->astr
, forbiddenword
, rv
->alen
) ||
1974 TESTAFF(rv
->astr
, ONLYUPCASEFLAG
, rv
->alen
) ||
1975 (is_sug
&& nosuggest
&& TESTAFF(rv
->astr
, nosuggest
, rv
->alen
)))) return NULL
;
1977 // pfxappnd = prefix of word+i, or NULL
1978 // calculate syllable number of prefix.
1979 // hungarian convention: when syllable number of prefix is more,
1980 // than 1, the prefix+word counts as two words.
1982 if (langnum
== LANG_hu
) {
1983 // calculate syllable number of the word
1984 numsyllable
+= get_syllable(word
+ i
, strlen(word
+ i
));
1986 // - affix syllable num.
1987 // XXX only second suffix (inflections, not derivations)
1989 char * tmp
= myrevstrdup(sfxappnd
);
1990 numsyllable
-= get_syllable(tmp
, strlen(tmp
));
1994 // + 1 word, if syllable number of the prefix > 1 (hungarian convention)
1995 if (pfx
&& (get_syllable(pfx
->getKey(),strlen(pfx
->getKey())) > 1)) wordnum
++;
1997 // increment syllable num, if last word has a SYLLABLENUM flag
1998 // and the suffix is beginning `s'
2000 if (cpdsyllablenum
) {
2002 case 'c': { numsyllable
+=2; break; }
2003 case 'J': { numsyllable
+= 1; break; }
2004 case 'I': { if (rv
&& TESTAFF(rv
->astr
, 'J', rv
->alen
)) numsyllable
+= 1; break; }
2009 // increment word number, if the second word has a compoundroot flag
2010 if ((rv
) && (compoundroot
) &&
2011 (TESTAFF(rv
->astr
, compoundroot
, rv
->alen
))) {
2015 // second word is acceptable, as a word with prefix or/and suffix?
2016 // hungarian conventions: compounding is acceptable,
2017 // when compound forms consist 2 word, otherwise
2018 // the syllable number of root words is 6, or lesser.
2021 ((cpdwordmax
== -1) || (wordnum
+ 1 < cpdwordmax
)) ||
2022 ((cpdmaxsyllable
!= 0) &&
2023 (numsyllable
<= cpdmaxsyllable
))
2026 (!checkcompounddup
|| (rv
!= rv_first
))
2028 // forbid compound word, if it is a non compound word with typical fault
2029 if (checkcompoundrep
&& cpdrep_check(word
, len
)) return NULL
;
2033 numsyllable
= oldnumsyllable2
;
2034 wordnum
= oldwordnum2
;
2036 // perhaps second word is a compound word (recursive call)
2037 if (wordnum
< maxwordnum
) {
2038 rv
= compound_check((st
+i
),strlen(st
+i
), wordnum
+1,
2039 numsyllable
, maxwordnum
, wnum
+ 1, words
, 0, is_sug
, info
);
2041 if (rv
&& numcheckcpd
&& ((scpd
== 0 && cpdpat_check(word
, i
, rv_first
, rv
, affixed
)) ||
2042 (scpd
!= 0 && !cpdpat_check(word
, i
, rv_first
, rv
, affixed
)))) rv
= NULL
;
2047 // forbid compound word, if it is a non compound word with typical fault
2048 if (checkcompoundrep
|| forbiddenword
) {
2049 struct hentry
* rv2
= NULL
;
2051 if (checkcompoundrep
&& cpdrep_check(word
, len
)) return NULL
;
2054 if (strncmp(rv
->word
, word
+ i
, rv
->blen
) == 0) {
2055 char r
= *(st
+ i
+ rv
->blen
);
2056 *(st
+ i
+ rv
->blen
) = '\0';
2058 if (checkcompoundrep
&& cpdrep_check(st
, i
+ rv
->blen
)) {
2059 *(st
+ i
+ rv
->blen
) = r
;
2063 if (forbiddenword
) {
2065 if (!rv2
) rv2
= affix_check(word
, len
);
2066 if (rv2
&& rv2
->astr
&& TESTAFF(rv2
->astr
, forbiddenword
, rv2
->alen
) &&
2067 (strncmp(rv2
->word
, st
, i
+ rv
->blen
) == 0)) {
2071 *(st
+ i
+ rv
->blen
) = r
;
2076 } while (striple
&& !checkedstriple
); // end of striple loop
2078 if (checkedstriple
) {
2084 } // first word is ok condition
2096 } while (!onlycpdrule
&& simplifiedcpd
&& scpd
<= numcheckcpd
); // end of simplifiedcpd loop
2099 wordnum
= oldwordnum
;
2100 numsyllable
= oldnumsyllable
;
2104 strcpy(st
, word
); // XXX add more optim.
2108 } while (numdefcpd
&& oldwordnum
== 0 && !onlycpdrule
&& (onlycpdrule
= 1)); // end of onlycpd loop
2115 // check if compound word is correctly spelled
2116 // hu_mov_rule = spec. Hungarian rule (XXX)
2117 int AffixMgr::compound_check_morph(const char * word
, int len
,
2118 short wordnum
, short numsyllable
, short maxwordnum
, short wnum
, hentry
** words
,
2119 char hu_mov_rule
= 0, char ** result
= NULL
, char * partresult
= NULL
)
2122 short oldnumsyllable
, oldnumsyllable2
, oldwordnum
, oldwordnum2
;
2125 struct hentry
* rv
= NULL
;
2126 struct hentry
* rv_first
;
2127 struct hentry
* rwords
[MAXWORDLEN
]; // buffer for COMPOUND pattern checking
2128 char st
[MAXWORDUTF8LEN
+ 4];
2132 char presult
[MAXLNLEN
];
2139 hentry
** oldwords
= words
;
2141 setcminmax(&cmin
, &cmax
, word
, len
);
2145 for (i
= cmin
; i
< cmax
; i
++) {
2146 oldnumsyllable
= numsyllable
;
2147 oldwordnum
= wordnum
;
2150 // go to end of the UTF-8 character
2152 for (; (st
[i
] & 0xc0) == 0x80; i
++);
2153 if (i
>= cmax
) return 0;
2157 onlycpdrule
= (words
) ? 1 : 0;
2159 do { // onlycpdrule loop
2161 oldnumsyllable
= numsyllable
;
2162 oldwordnum
= wordnum
;
2174 if (partresult
) mystrcat(presult
, partresult
, MAXLNLEN
);
2176 rv
= lookup(st
); // perhaps without prefix
2178 // search homonym with compound flag
2179 while ((rv
) && !hu_mov_rule
&&
2180 ((needaffix
&& TESTAFF(rv
->astr
, needaffix
, rv
->alen
)) ||
2181 !((compoundflag
&& !words
&& !onlycpdrule
&& TESTAFF(rv
->astr
, compoundflag
, rv
->alen
)) ||
2182 (compoundbegin
&& !wordnum
&& !onlycpdrule
&&
2183 TESTAFF(rv
->astr
, compoundbegin
, rv
->alen
)) ||
2184 (compoundmiddle
&& wordnum
&& !words
&& !onlycpdrule
&&
2185 TESTAFF(rv
->astr
, compoundmiddle
, rv
->alen
)) ||
2186 (numdefcpd
&& onlycpdrule
&&
2187 ((!words
&& !wordnum
&& defcpd_check(&words
, wnum
, rv
, (hentry
**) &rwords
, 0)) ||
2188 (words
&& defcpd_check(&words
, wnum
, rv
, (hentry
**) &rwords
, 0))))
2190 rv
= rv
->next_homonym
;
2193 if (rv
) affixed
= 0;
2196 sprintf(presult
+ strlen(presult
), "%c%s%s", MSEP_FLD
, MORPH_PART
, st
);
2197 if (!HENTRY_FIND(rv
, MORPH_STEM
)) {
2198 sprintf(presult
+ strlen(presult
), "%c%s%s", MSEP_FLD
, MORPH_STEM
, st
);
2200 // store the pointer of the hash entry
2201 // sprintf(presult + strlen(presult), "%c%s%p", MSEP_FLD, MORPH_HENTRY, rv);
2202 if (HENTRY_DATA(rv
)) {
2203 sprintf(presult
+ strlen(presult
), "%c%s", MSEP_FLD
, HENTRY_DATA2(rv
));
2208 if (onlycpdrule
) break;
2210 !(rv
= prefix_check(st
, i
, hu_mov_rule
? IN_CPD_OTHER
: IN_CPD_BEGIN
, compoundflag
))) {
2211 if (((rv
= suffix_check(st
, i
, 0, NULL
, NULL
, 0, NULL
,
2212 FLAG_NULL
, compoundflag
, hu_mov_rule
? IN_CPD_OTHER
: IN_CPD_BEGIN
)) ||
2213 (compoundmoresuffixes
&& (rv
= suffix_check_twosfx(st
, i
, 0, NULL
, compoundflag
)))) && !hu_mov_rule
&&
2215 ((compoundforbidflag
&& TESTAFF(sfx
->getCont(), compoundforbidflag
,
2216 sfx
->getContLen())) || (compoundend
&&
2217 TESTAFF(sfx
->getCont(), compoundend
,
2218 sfx
->getContLen())))) {
2224 (((wordnum
== 0) && compoundbegin
&&
2225 ((rv
= suffix_check(st
, i
, 0, NULL
, NULL
, 0, NULL
, FLAG_NULL
, compoundbegin
, hu_mov_rule
? IN_CPD_OTHER
: IN_CPD_BEGIN
)) ||
2226 (compoundmoresuffixes
&& (rv
= suffix_check_twosfx(st
, i
, 0, NULL
, compoundbegin
))) || // twofold suffix+compound
2227 (rv
= prefix_check(st
, i
, hu_mov_rule
? IN_CPD_OTHER
: IN_CPD_BEGIN
, compoundbegin
)))) ||
2228 ((wordnum
> 0) && compoundmiddle
&&
2229 ((rv
= suffix_check(st
, i
, 0, NULL
, NULL
, 0, NULL
, FLAG_NULL
, compoundmiddle
, hu_mov_rule
? IN_CPD_OTHER
: IN_CPD_BEGIN
)) ||
2230 (compoundmoresuffixes
&& (rv
= suffix_check_twosfx(st
, i
, 0, NULL
, compoundmiddle
))) || // twofold suffix+compound
2231 (rv
= prefix_check(st
, i
, hu_mov_rule
? IN_CPD_OTHER
: IN_CPD_BEGIN
, compoundmiddle
)))))
2233 // char * p = prefix_check_morph(st, i, 0, compound);
2235 if (compoundflag
) p
= affix_check_morph(st
, i
, compoundflag
);
2236 if (!p
|| (*p
== '\0')) {
2239 if ((wordnum
== 0) && compoundbegin
) {
2240 p
= affix_check_morph(st
, i
, compoundbegin
);
2241 } else if ((wordnum
> 0) && compoundmiddle
) {
2242 p
= affix_check_morph(st
, i
, compoundmiddle
);
2245 if (p
&& (*p
!= '\0')) {
2246 sprintf(presult
+ strlen(presult
), "%c%s%s%s", MSEP_FLD
,
2247 MORPH_PART
, st
, line_uniq_app(&p
, MSEP_REC
));
2252 // else check forbiddenwords
2253 } else if (rv
->astr
&& (TESTAFF(rv
->astr
, forbiddenword
, rv
->alen
) ||
2254 TESTAFF(rv
->astr
, ONLYUPCASEFLAG
, rv
->alen
) ||
2255 TESTAFF(rv
->astr
, needaffix
, rv
->alen
))) {
2260 // check non_compound flag in suffix and prefix
2261 if ((rv
) && !hu_mov_rule
&&
2262 ((pfx
&& pfx
->getCont() &&
2263 TESTAFF(pfx
->getCont(), compoundforbidflag
,
2264 pfx
->getContLen())) ||
2265 (sfx
&& sfx
->getCont() &&
2266 TESTAFF(sfx
->getCont(), compoundforbidflag
,
2267 sfx
->getContLen())))) {
2271 // check compoundend flag in suffix and prefix
2272 if ((rv
) && !checked_prefix
&& compoundend
&& !hu_mov_rule
&&
2273 ((pfx
&& pfx
->getCont() &&
2274 TESTAFF(pfx
->getCont(), compoundend
,
2275 pfx
->getContLen())) ||
2276 (sfx
&& sfx
->getCont() &&
2277 TESTAFF(sfx
->getCont(), compoundend
,
2278 sfx
->getContLen())))) {
2282 // check compoundmiddle flag in suffix and prefix
2283 if ((rv
) && !checked_prefix
&& (wordnum
==0) && compoundmiddle
&& !hu_mov_rule
&&
2284 ((pfx
&& pfx
->getCont() &&
2285 TESTAFF(pfx
->getCont(), compoundmiddle
,
2286 pfx
->getContLen())) ||
2287 (sfx
&& sfx
->getCont() &&
2288 TESTAFF(sfx
->getCont(), compoundmiddle
,
2289 sfx
->getContLen())))) {
2293 // check forbiddenwords
2294 if ((rv
) && (rv
->astr
) && (TESTAFF(rv
->astr
, forbiddenword
, rv
->alen
)
2295 || TESTAFF(rv
->astr
, ONLYUPCASEFLAG
, rv
->alen
))) continue;
2297 // increment word number, if the second root has a compoundroot flag
2298 if ((rv
) && (compoundroot
) &&
2299 (TESTAFF(rv
->astr
, compoundroot
, rv
->alen
))) {
2303 // first word is acceptable in compound words?
2305 ( checked_prefix
|| (words
&& words
[wnum
]) ||
2306 (compoundflag
&& TESTAFF(rv
->astr
, compoundflag
, rv
->alen
)) ||
2307 ((oldwordnum
== 0) && compoundbegin
&& TESTAFF(rv
->astr
, compoundbegin
, rv
->alen
)) ||
2308 ((oldwordnum
> 0) && compoundmiddle
&& TESTAFF(rv
->astr
, compoundmiddle
, rv
->alen
))
2309 // LANG_hu section: spec. Hungarian rule
2310 || ((langnum
== LANG_hu
) && // hu_mov_rule
2312 TESTAFF(rv
->astr
, 'F', rv
->alen
) ||
2313 TESTAFF(rv
->astr
, 'G', rv
->alen
) ||
2314 TESTAFF(rv
->astr
, 'H', rv
->alen
)
2317 // END of LANG_hu section
2319 && ! (( checkcompoundtriple
&& !words
&& // test triple letters
2320 (word
[i
-1]==word
[i
]) && (
2321 ((i
>1) && (word
[i
-1]==word
[i
-2])) ||
2322 ((word
[i
-1]==word
[i
+1])) // may be word[i+1] == '\0'
2326 // test CHECKCOMPOUNDPATTERN
2327 numcheckcpd
&& !words
&& cpdpat_check(word
, i
, rv
, NULL
, affixed
)
2330 checkcompoundcase
&& !words
&& cpdcase_check(word
, i
)
2333 // LANG_hu section: spec. Hungarian rule
2334 || ((!rv
) && (langnum
== LANG_hu
) && hu_mov_rule
&& (rv
= affix_check(st
,i
)) &&
2335 (sfx
&& sfx
->getCont() && (
2336 TESTAFF(sfx
->getCont(), (unsigned short) 'x', sfx
->getContLen()) ||
2337 TESTAFF(sfx
->getCont(), (unsigned short) '%', sfx
->getContLen())
2341 // END of LANG_hu section
2344 // LANG_hu section: spec. Hungarian rule
2345 if (langnum
== LANG_hu
) {
2346 // calculate syllable number of the word
2347 numsyllable
+= get_syllable(st
, i
);
2349 // + 1 word, if syllable number of the prefix > 1 (hungarian convention)
2350 if (pfx
&& (get_syllable(pfx
->getKey(),strlen(pfx
->getKey())) > 1)) wordnum
++;
2352 // END of LANG_hu section
2356 rv
= lookup((word
+i
)); // perhaps without prefix
2358 // search homonym with compound flag
2359 while ((rv
) && ((needaffix
&& TESTAFF(rv
->astr
, needaffix
, rv
->alen
)) ||
2360 !((compoundflag
&& !words
&& TESTAFF(rv
->astr
, compoundflag
, rv
->alen
)) ||
2361 (compoundend
&& !words
&& TESTAFF(rv
->astr
, compoundend
, rv
->alen
)) ||
2362 (numdefcpd
&& words
&& defcpd_check(&words
, wnum
+ 1, rv
, NULL
,1))))) {
2363 rv
= rv
->next_homonym
;
2366 if (rv
&& words
&& words
[wnum
+ 1]) {
2367 mystrcat(*result
, presult
, MAXLNLEN
);
2368 mystrcat(*result
, " ", MAXLNLEN
);
2369 mystrcat(*result
, MORPH_PART
, MAXLNLEN
);
2370 mystrcat(*result
, word
+i
, MAXLNLEN
);
2371 if (complexprefixes
&& HENTRY_DATA(rv
)) mystrcat(*result
, HENTRY_DATA2(rv
), MAXLNLEN
);
2372 if (!HENTRY_FIND(rv
, MORPH_STEM
)) {
2373 mystrcat(*result
, " ", MAXLNLEN
);
2374 mystrcat(*result
, MORPH_STEM
, MAXLNLEN
);
2375 mystrcat(*result
, HENTRY_WORD(rv
), MAXLNLEN
);
2377 // store the pointer of the hash entry
2378 // sprintf(*result + strlen(*result), " %s%p", MORPH_HENTRY, rv);
2379 if (!complexprefixes
&& HENTRY_DATA(rv
)) {
2380 mystrcat(*result
, " ", MAXLNLEN
);
2381 mystrcat(*result
, HENTRY_DATA2(rv
), MAXLNLEN
);
2383 mystrcat(*result
, "\n", MAXLNLEN
);
2388 oldnumsyllable2
= numsyllable
;
2389 oldwordnum2
= wordnum
;
2391 // LANG_hu section: spec. Hungarian rule
2392 if ((rv
) && (langnum
== LANG_hu
) && (TESTAFF(rv
->astr
, 'I', rv
->alen
)) && !(TESTAFF(rv
->astr
, 'J', rv
->alen
))) {
2395 // END of LANG_hu section
2396 // increment word number, if the second root has a compoundroot flag
2397 if ((rv
) && (compoundroot
) &&
2398 (TESTAFF(rv
->astr
, compoundroot
, rv
->alen
))) {
2402 // check forbiddenwords
2403 if ((rv
) && (rv
->astr
) && (TESTAFF(rv
->astr
, forbiddenword
, rv
->alen
) ||
2404 TESTAFF(rv
->astr
, ONLYUPCASEFLAG
, rv
->alen
))) {
2409 // second word is acceptable, as a root?
2410 // hungarian conventions: compounding is acceptable,
2411 // when compound forms consist of 2 words, or if more,
2412 // then the syllable number of root words must be 6, or lesser.
2414 (compoundflag
&& TESTAFF(rv
->astr
, compoundflag
, rv
->alen
)) ||
2415 (compoundend
&& TESTAFF(rv
->astr
, compoundend
, rv
->alen
))
2418 ((cpdwordmax
==-1) || (wordnum
+1<cpdwordmax
)) ||
2419 ((cpdmaxsyllable
!=0) &&
2420 (numsyllable
+get_syllable(HENTRY_WORD(rv
),rv
->blen
)<=cpdmaxsyllable
))
2423 (!checkcompounddup
|| (rv
!= rv_first
))
2427 // bad compound word
2428 mystrcat(*result
, presult
, MAXLNLEN
);
2429 mystrcat(*result
, " ", MAXLNLEN
);
2430 mystrcat(*result
, MORPH_PART
, MAXLNLEN
);
2431 mystrcat(*result
, word
+i
, MAXLNLEN
);
2433 if (HENTRY_DATA(rv
)) {
2434 if (complexprefixes
) mystrcat(*result
, HENTRY_DATA2(rv
), MAXLNLEN
);
2435 if (! HENTRY_FIND(rv
, MORPH_STEM
)) {
2436 mystrcat(*result
, " ", MAXLNLEN
);
2437 mystrcat(*result
, MORPH_STEM
, MAXLNLEN
);
2438 mystrcat(*result
, HENTRY_WORD(rv
), MAXLNLEN
);
2440 // store the pointer of the hash entry
2441 // sprintf(*result + strlen(*result), " %s%p", MORPH_HENTRY, rv);
2442 if (!complexprefixes
) {
2443 mystrcat(*result
, " ", MAXLNLEN
);
2444 mystrcat(*result
, HENTRY_DATA2(rv
), MAXLNLEN
);
2447 mystrcat(*result
, "\n", MAXLNLEN
);
2451 numsyllable
= oldnumsyllable2
;
2452 wordnum
= oldwordnum2
;
2454 // perhaps second word has prefix or/and suffix
2456 sfxflag
= FLAG_NULL
;
2458 if (compoundflag
&& !onlycpdrule
) rv
= affix_check((word
+i
),strlen(word
+i
), compoundflag
); else rv
= NULL
;
2460 if (!rv
&& compoundend
&& !onlycpdrule
) {
2463 rv
= affix_check((word
+i
),strlen(word
+i
), compoundend
);
2466 if (!rv
&& numdefcpd
&& words
) {
2467 rv
= affix_check((word
+i
),strlen(word
+i
), 0, IN_CPD_END
);
2468 if (rv
&& words
&& defcpd_check(&words
, wnum
+ 1, rv
, NULL
, 1)) {
2470 if (compoundflag
) m
= affix_check_morph((word
+i
),strlen(word
+i
), compoundflag
);
2471 if ((!m
|| *m
== '\0') && compoundend
) {
2473 m
= affix_check_morph((word
+i
),strlen(word
+i
), compoundend
);
2475 mystrcat(*result
, presult
, MAXLNLEN
);
2476 if (m
|| (*m
!= '\0')) {
2477 sprintf(*result
+ strlen(*result
), "%c%s%s%s", MSEP_FLD
,
2478 MORPH_PART
, word
+ i
, line_uniq_app(&m
, MSEP_REC
));
2481 mystrcat(*result
, "\n", MAXLNLEN
);
2486 // check non_compound flag in suffix and prefix
2488 ((pfx
&& pfx
->getCont() &&
2489 TESTAFF(pfx
->getCont(), compoundforbidflag
,
2490 pfx
->getContLen())) ||
2491 (sfx
&& sfx
->getCont() &&
2492 TESTAFF(sfx
->getCont(), compoundforbidflag
,
2493 sfx
->getContLen())))) {
2497 // check forbiddenwords
2498 if ((rv
) && (rv
->astr
) && (TESTAFF(rv
->astr
,forbiddenword
,rv
->alen
) ||
2499 TESTAFF(rv
->astr
, ONLYUPCASEFLAG
, rv
->alen
))
2500 && (! TESTAFF(rv
->astr
, needaffix
, rv
->alen
))) {
2505 if (langnum
== LANG_hu
) {
2506 // calculate syllable number of the word
2507 numsyllable
+= get_syllable(word
+ i
, strlen(word
+ i
));
2509 // - affix syllable num.
2510 // XXX only second suffix (inflections, not derivations)
2512 char * tmp
= myrevstrdup(sfxappnd
);
2513 numsyllable
-= get_syllable(tmp
, strlen(tmp
));
2517 // + 1 word, if syllable number of the prefix > 1 (hungarian convention)
2518 if (pfx
&& (get_syllable(pfx
->getKey(),strlen(pfx
->getKey())) > 1)) wordnum
++;
2520 // increment syllable num, if last word has a SYLLABLENUM flag
2521 // and the suffix is beginning `s'
2523 if (cpdsyllablenum
) {
2525 case 'c': { numsyllable
+=2; break; }
2526 case 'J': { numsyllable
+= 1; break; }
2527 case 'I': { if (rv
&& TESTAFF(rv
->astr
, 'J', rv
->alen
)) numsyllable
+= 1; break; }
2532 // increment word number, if the second word has a compoundroot flag
2533 if ((rv
) && (compoundroot
) &&
2534 (TESTAFF(rv
->astr
, compoundroot
, rv
->alen
))) {
2537 // second word is acceptable, as a word with prefix or/and suffix?
2538 // hungarian conventions: compounding is acceptable,
2539 // when compound forms consist 2 word, otherwise
2540 // the syllable number of root words is 6, or lesser.
2543 ((cpdwordmax
==-1) || (wordnum
+1<cpdwordmax
)) ||
2544 ((cpdmaxsyllable
!=0) &&
2545 (numsyllable
<= cpdmaxsyllable
))
2548 (!checkcompounddup
|| (rv
!= rv_first
))
2551 if (compoundflag
) m
= affix_check_morph((word
+i
),strlen(word
+i
), compoundflag
);
2552 if ((!m
|| *m
== '\0') && compoundend
) {
2554 m
= affix_check_morph((word
+i
),strlen(word
+i
), compoundend
);
2556 mystrcat(*result
, presult
, MAXLNLEN
);
2557 if (m
&& (*m
!= '\0')) {
2558 sprintf(*result
+ strlen(*result
), "%c%s%s%s", MSEP_FLD
,
2559 MORPH_PART
, word
+ i
, line_uniq_app(&m
, MSEP_REC
));
2562 sprintf(*result
+ strlen(*result
), "%c", MSEP_REC
);
2566 numsyllable
= oldnumsyllable2
;
2567 wordnum
= oldwordnum2
;
2569 // perhaps second word is a compound word (recursive call)
2570 if ((wordnum
< maxwordnum
) && (ok
== 0)) {
2571 compound_check_morph((word
+i
),strlen(word
+i
), wordnum
+1,
2572 numsyllable
, maxwordnum
, wnum
+ 1, words
, 0, result
, presult
);
2578 wordnum
= oldwordnum
;
2579 numsyllable
= oldnumsyllable
;
2581 } while (numdefcpd
&& oldwordnum
== 0 && !onlycpdrule
&& (onlycpdrule
= 1)); // end of onlycpd loop
2587 // return 1 if s1 (reversed) is a leading subset of end of s2
2588 /* inline int AffixMgr::isRevSubset(const char * s1, const char * end_of_s2, int len)
2590 while ((len > 0) && *s1 && (*s1 == *end_of_s2)) {
2595 return (*s1 == '\0');
2599 inline int AffixMgr::isRevSubset(const char * s1
, const char * end_of_s2
, int len
)
2601 while ((len
> 0) && (*s1
!= '\0') && ((*s1
== *end_of_s2
) || (*s1
== '.'))) {
2606 return (*s1
== '\0');
2609 // check word for suffixes
2611 struct hentry
* AffixMgr::suffix_check (const char * word
, int len
,
2612 int sfxopts
, PfxEntry
* ppfx
, char ** wlst
, int maxSug
, int * ns
,
2613 const FLAG cclass
, const FLAG needflag
, char in_compound
)
2615 struct hentry
* rv
= NULL
;
2616 PfxEntry
* ep
= ppfx
;
2618 // first handle the special case of 0 length suffixes
2619 SfxEntry
* se
= sStart
[0];
2622 if (!cclass
|| se
->getCont()) {
2623 // suffixes are not allowed in beginning of compounds
2624 if ((((in_compound
!= IN_CPD_BEGIN
)) || // && !cclass
2625 // except when signed with compoundpermitflag flag
2626 (se
->getCont() && compoundpermitflag
&&
2627 TESTAFF(se
->getCont(),compoundpermitflag
,se
->getContLen()))) && (!circumfix
||
2628 // no circumfix flag in prefix and suffix
2629 ((!ppfx
|| !(ep
->getCont()) || !TESTAFF(ep
->getCont(),
2630 circumfix
, ep
->getContLen())) &&
2631 (!se
->getCont() || !(TESTAFF(se
->getCont(),circumfix
,se
->getContLen())))) ||
2632 // circumfix flag in prefix AND suffix
2633 ((ppfx
&& (ep
->getCont()) && TESTAFF(ep
->getCont(),
2634 circumfix
, ep
->getContLen())) &&
2635 (se
->getCont() && (TESTAFF(se
->getCont(),circumfix
,se
->getContLen()))))) &&
2638 !(se
->getCont() && (TESTAFF(se
->getCont(), onlyincompound
, se
->getContLen())))) &&
2639 // needaffix on prefix or first suffix
2641 !(se
->getCont() && TESTAFF(se
->getCont(), needaffix
, se
->getContLen())) ||
2642 (ppfx
&& !((ep
->getCont()) &&
2643 TESTAFF(ep
->getCont(), needaffix
,
2646 rv
= se
->checkword(word
,len
, sfxopts
, ppfx
, wlst
, maxSug
, ns
, (FLAG
) cclass
,
2647 needflag
, (in_compound
? 0 : onlyincompound
));
2649 sfx
=se
; // BUG: sfx not stateless
2657 // now handle the general case
2658 if (len
== 0) return NULL
; // FULLSTRIP
2659 unsigned char sp
= *((const unsigned char *)(word
+ len
- 1));
2660 SfxEntry
* sptr
= sStart
[sp
];
2663 if (isRevSubset(sptr
->getKey(), word
+ len
- 1, len
)
2665 // suffixes are not allowed in beginning of compounds
2666 if ((((in_compound
!= IN_CPD_BEGIN
)) || // && !cclass
2667 // except when signed with compoundpermitflag flag
2668 (sptr
->getCont() && compoundpermitflag
&&
2669 TESTAFF(sptr
->getCont(),compoundpermitflag
,sptr
->getContLen()))) && (!circumfix
||
2670 // no circumfix flag in prefix and suffix
2671 ((!ppfx
|| !(ep
->getCont()) || !TESTAFF(ep
->getCont(),
2672 circumfix
, ep
->getContLen())) &&
2673 (!sptr
->getCont() || !(TESTAFF(sptr
->getCont(),circumfix
,sptr
->getContLen())))) ||
2674 // circumfix flag in prefix AND suffix
2675 ((ppfx
&& (ep
->getCont()) && TESTAFF(ep
->getCont(),
2676 circumfix
, ep
->getContLen())) &&
2677 (sptr
->getCont() && (TESTAFF(sptr
->getCont(),circumfix
,sptr
->getContLen()))))) &&
2680 !((sptr
->getCont() && (TESTAFF(sptr
->getCont(), onlyincompound
, sptr
->getContLen()))))) &&
2681 // needaffix on prefix or first suffix
2683 !(sptr
->getCont() && TESTAFF(sptr
->getCont(), needaffix
, sptr
->getContLen())) ||
2684 (ppfx
&& !((ep
->getCont()) &&
2685 TESTAFF(ep
->getCont(), needaffix
,
2688 ) if (in_compound
!= IN_CPD_END
|| ppfx
|| !(sptr
->getCont() && TESTAFF(sptr
->getCont(), onlyincompound
, sptr
->getContLen()))) {
2689 rv
= sptr
->checkword(word
,len
, sfxopts
, ppfx
, wlst
,
2690 maxSug
, ns
, cclass
, needflag
, (in_compound
? 0 : onlyincompound
));
2692 sfx
=sptr
; // BUG: sfx not stateless
2693 sfxflag
= sptr
->getFlag(); // BUG: sfxflag not stateless
2694 if (!sptr
->getCont()) sfxappnd
=sptr
->getKey(); // BUG: sfxappnd not stateless
2698 sptr
= sptr
->getNextEQ();
2700 sptr
= sptr
->getNextNE();
2707 // check word for two-level suffixes
2709 struct hentry
* AffixMgr::suffix_check_twosfx(const char * word
, int len
,
2710 int sfxopts
, PfxEntry
* ppfx
, const FLAG needflag
)
2712 struct hentry
* rv
= NULL
;
2714 // first handle the special case of 0 length suffixes
2715 SfxEntry
* se
= sStart
[0];
2717 if (contclasses
[se
->getFlag()])
2719 rv
= se
->check_twosfx(word
,len
, sfxopts
, ppfx
, needflag
);
2725 // now handle the general case
2726 if (len
== 0) return NULL
; // FULLSTRIP
2727 unsigned char sp
= *((const unsigned char *)(word
+ len
- 1));
2728 SfxEntry
* sptr
= sStart
[sp
];
2731 if (isRevSubset(sptr
->getKey(), word
+ len
- 1, len
)) {
2732 if (contclasses
[sptr
->getFlag()])
2734 rv
= sptr
->check_twosfx(word
,len
, sfxopts
, ppfx
, needflag
);
2736 sfxflag
= sptr
->getFlag(); // BUG: sfxflag not stateless
2737 if (!sptr
->getCont()) sfxappnd
=sptr
->getKey(); // BUG: sfxappnd not stateless
2741 sptr
= sptr
->getNextEQ();
2743 sptr
= sptr
->getNextNE();
2750 char * AffixMgr::suffix_check_twosfx_morph(const char * word
, int len
,
2751 int sfxopts
, PfxEntry
* ppfx
, const FLAG needflag
)
2753 char result
[MAXLNLEN
];
2754 char result2
[MAXLNLEN
];
2755 char result3
[MAXLNLEN
];
2763 // first handle the special case of 0 length suffixes
2764 SfxEntry
* se
= sStart
[0];
2766 if (contclasses
[se
->getFlag()])
2768 st
= se
->check_twosfx_morph(word
,len
, sfxopts
, ppfx
, needflag
);
2771 if (ppfx
->getMorph()) {
2772 mystrcat(result
, ppfx
->getMorph(), MAXLNLEN
);
2773 mystrcat(result
, " ", MAXLNLEN
);
2774 } else debugflag(result
, ppfx
->getFlag());
2776 mystrcat(result
, st
, MAXLNLEN
);
2778 if (se
->getMorph()) {
2779 mystrcat(result
, " ", MAXLNLEN
);
2780 mystrcat(result
, se
->getMorph(), MAXLNLEN
);
2781 } else debugflag(result
, se
->getFlag());
2782 mystrcat(result
, "\n", MAXLNLEN
);
2788 // now handle the general case
2789 if (len
== 0) return NULL
; // FULLSTRIP
2790 unsigned char sp
= *((const unsigned char *)(word
+ len
- 1));
2791 SfxEntry
* sptr
= sStart
[sp
];
2794 if (isRevSubset(sptr
->getKey(), word
+ len
- 1, len
)) {
2795 if (contclasses
[sptr
->getFlag()])
2797 st
= sptr
->check_twosfx_morph(word
,len
, sfxopts
, ppfx
, needflag
);
2799 sfxflag
= sptr
->getFlag(); // BUG: sfxflag not stateless
2800 if (!sptr
->getCont()) sfxappnd
=sptr
->getKey(); // BUG: sfxappnd not stateless
2801 strcpy(result2
, st
);
2806 if (sptr
->getMorph()) {
2807 mystrcat(result3
, " ", MAXLNLEN
);
2808 mystrcat(result3
, sptr
->getMorph(), MAXLNLEN
);
2809 } else debugflag(result3
, sptr
->getFlag());
2810 strlinecat(result2
, result3
);
2811 mystrcat(result2
, "\n", MAXLNLEN
);
2812 mystrcat(result
, result2
, MAXLNLEN
);
2815 sptr
= sptr
->getNextEQ();
2817 sptr
= sptr
->getNextNE();
2820 if (*result
) return mystrdup(result
);
2824 char * AffixMgr::suffix_check_morph(const char * word
, int len
,
2825 int sfxopts
, PfxEntry
* ppfx
, const FLAG cclass
, const FLAG needflag
, char in_compound
)
2827 char result
[MAXLNLEN
];
2829 struct hentry
* rv
= NULL
;
2833 PfxEntry
* ep
= ppfx
;
2835 // first handle the special case of 0 length suffixes
2836 SfxEntry
* se
= sStart
[0];
2838 if (!cclass
|| se
->getCont()) {
2839 // suffixes are not allowed in beginning of compounds
2840 if (((((in_compound
!= IN_CPD_BEGIN
)) || // && !cclass
2841 // except when signed with compoundpermitflag flag
2842 (se
->getCont() && compoundpermitflag
&&
2843 TESTAFF(se
->getCont(),compoundpermitflag
,se
->getContLen()))) && (!circumfix
||
2844 // no circumfix flag in prefix and suffix
2845 ((!ppfx
|| !(ep
->getCont()) || !TESTAFF(ep
->getCont(),
2846 circumfix
, ep
->getContLen())) &&
2847 (!se
->getCont() || !(TESTAFF(se
->getCont(),circumfix
,se
->getContLen())))) ||
2848 // circumfix flag in prefix AND suffix
2849 ((ppfx
&& (ep
->getCont()) && TESTAFF(ep
->getCont(),
2850 circumfix
, ep
->getContLen())) &&
2851 (se
->getCont() && (TESTAFF(se
->getCont(),circumfix
,se
->getContLen()))))) &&
2854 !((se
->getCont() && (TESTAFF(se
->getCont(), onlyincompound
, se
->getContLen()))))) &&
2855 // needaffix on prefix or first suffix
2857 !(se
->getCont() && TESTAFF(se
->getCont(), needaffix
, se
->getContLen())) ||
2858 (ppfx
&& !((ep
->getCont()) &&
2859 TESTAFF(ep
->getCont(), needaffix
,
2863 rv
= se
->checkword(word
, len
, sfxopts
, ppfx
, NULL
, 0, 0, cclass
, needflag
);
2866 if (ppfx
->getMorph()) {
2867 mystrcat(result
, ppfx
->getMorph(), MAXLNLEN
);
2868 mystrcat(result
, " ", MAXLNLEN
);
2869 } else debugflag(result
, ppfx
->getFlag());
2871 if (complexprefixes
&& HENTRY_DATA(rv
)) mystrcat(result
, HENTRY_DATA2(rv
), MAXLNLEN
);
2872 if (! HENTRY_FIND(rv
, MORPH_STEM
)) {
2873 mystrcat(result
, " ", MAXLNLEN
);
2874 mystrcat(result
, MORPH_STEM
, MAXLNLEN
);
2875 mystrcat(result
, HENTRY_WORD(rv
), MAXLNLEN
);
2877 // store the pointer of the hash entry
2878 // sprintf(result + strlen(result), " %s%p", MORPH_HENTRY, rv);
2880 if (!complexprefixes
&& HENTRY_DATA(rv
)) {
2881 mystrcat(result
, " ", MAXLNLEN
);
2882 mystrcat(result
, HENTRY_DATA2(rv
), MAXLNLEN
);
2884 if (se
->getMorph()) {
2885 mystrcat(result
, " ", MAXLNLEN
);
2886 mystrcat(result
, se
->getMorph(), MAXLNLEN
);
2887 } else debugflag(result
, se
->getFlag());
2888 mystrcat(result
, "\n", MAXLNLEN
);
2889 rv
= se
->get_next_homonym(rv
, sfxopts
, ppfx
, cclass
, needflag
);
2895 // now handle the general case
2896 if (len
== 0) return NULL
; // FULLSTRIP
2897 unsigned char sp
= *((const unsigned char *)(word
+ len
- 1));
2898 SfxEntry
* sptr
= sStart
[sp
];
2901 if (isRevSubset(sptr
->getKey(), word
+ len
- 1, len
)
2903 // suffixes are not allowed in beginning of compounds
2904 if (((((in_compound
!= IN_CPD_BEGIN
)) || // && !cclass
2905 // except when signed with compoundpermitflag flag
2906 (sptr
->getCont() && compoundpermitflag
&&
2907 TESTAFF(sptr
->getCont(),compoundpermitflag
,sptr
->getContLen()))) && (!circumfix
||
2908 // no circumfix flag in prefix and suffix
2909 ((!ppfx
|| !(ep
->getCont()) || !TESTAFF(ep
->getCont(),
2910 circumfix
, ep
->getContLen())) &&
2911 (!sptr
->getCont() || !(TESTAFF(sptr
->getCont(),circumfix
,sptr
->getContLen())))) ||
2912 // circumfix flag in prefix AND suffix
2913 ((ppfx
&& (ep
->getCont()) && TESTAFF(ep
->getCont(),
2914 circumfix
, ep
->getContLen())) &&
2915 (sptr
->getCont() && (TESTAFF(sptr
->getCont(),circumfix
,sptr
->getContLen()))))) &&
2918 !((sptr
->getCont() && (TESTAFF(sptr
->getCont(), onlyincompound
, sptr
->getContLen()))))) &&
2919 // needaffix on first suffix
2920 (cclass
|| !(sptr
->getCont() &&
2921 TESTAFF(sptr
->getCont(), needaffix
, sptr
->getContLen())))
2922 )) rv
= sptr
->checkword(word
,len
, sfxopts
, ppfx
, NULL
, 0, 0, cclass
, needflag
);
2925 if (ppfx
->getMorph()) {
2926 mystrcat(result
, ppfx
->getMorph(), MAXLNLEN
);
2927 mystrcat(result
, " ", MAXLNLEN
);
2928 } else debugflag(result
, ppfx
->getFlag());
2930 if (complexprefixes
&& HENTRY_DATA(rv
)) mystrcat(result
, HENTRY_DATA2(rv
), MAXLNLEN
);
2931 if (! HENTRY_FIND(rv
, MORPH_STEM
)) {
2932 mystrcat(result
, " ", MAXLNLEN
);
2933 mystrcat(result
, MORPH_STEM
, MAXLNLEN
);
2934 mystrcat(result
, HENTRY_WORD(rv
), MAXLNLEN
);
2936 // store the pointer of the hash entry
2937 // sprintf(result + strlen(result), " %s%p", MORPH_HENTRY, rv);
2939 if (!complexprefixes
&& HENTRY_DATA(rv
)) {
2940 mystrcat(result
, " ", MAXLNLEN
);
2941 mystrcat(result
, HENTRY_DATA2(rv
), MAXLNLEN
);
2944 if (sptr
->getMorph()) {
2945 mystrcat(result
, " ", MAXLNLEN
);
2946 mystrcat(result
, sptr
->getMorph(), MAXLNLEN
);
2947 } else debugflag(result
, sptr
->getFlag());
2948 mystrcat(result
, "\n", MAXLNLEN
);
2949 rv
= sptr
->get_next_homonym(rv
, sfxopts
, ppfx
, cclass
, needflag
);
2951 sptr
= sptr
->getNextEQ();
2953 sptr
= sptr
->getNextNE();
2957 if (*result
) return mystrdup(result
);
2961 // check if word with affixes is correctly spelled
2962 struct hentry
* AffixMgr::affix_check (const char * word
, int len
, const FLAG needflag
, char in_compound
)
2964 struct hentry
* rv
= NULL
;
2966 // check all prefixes (also crossed with suffixes if allowed)
2967 rv
= prefix_check(word
, len
, in_compound
, needflag
);
2970 // if still not found check all suffixes
2971 rv
= suffix_check(word
, len
, 0, NULL
, NULL
, 0, NULL
, FLAG_NULL
, needflag
, in_compound
);
2973 if (havecontclass
) {
2978 // if still not found check all two-level suffixes
2979 rv
= suffix_check_twosfx(word
, len
, 0, NULL
, needflag
);
2982 // if still not found check all two-level suffixes
2983 rv
= prefix_check_twosfx(word
, len
, IN_CPD_NOT
, needflag
);
2989 // check if word with affixes is correctly spelled
2990 char * AffixMgr::affix_check_morph(const char * word
, int len
, const FLAG needflag
, char in_compound
)
2992 char result
[MAXLNLEN
];
2997 // check all prefixes (also crossed with suffixes if allowed)
2998 st
= prefix_check_morph(word
, len
, in_compound
);
3000 mystrcat(result
, st
, MAXLNLEN
);
3004 // if still not found check all suffixes
3005 st
= suffix_check_morph(word
, len
, 0, NULL
, '\0', needflag
, in_compound
);
3007 mystrcat(result
, st
, MAXLNLEN
);
3011 if (havecontclass
) {
3014 // if still not found check all two-level suffixes
3015 st
= suffix_check_twosfx_morph(word
, len
, 0, NULL
, needflag
);
3017 mystrcat(result
, st
, MAXLNLEN
);
3021 // if still not found check all two-level suffixes
3022 st
= prefix_check_twosfx_morph(word
, len
, IN_CPD_NOT
, needflag
);
3024 mystrcat(result
, st
, MAXLNLEN
);
3029 return mystrdup(result
);
3032 char * AffixMgr::morphgen(char * ts
, int wl
, const unsigned short * ap
,
3033 unsigned short al
, char * morph
, char * targetmorph
, int level
)
3037 char * stemmorphcatpos
;
3038 char mymorph
[MAXLNLEN
];
3040 if (!morph
) return NULL
;
3042 // check substandard flag
3043 if (TESTAFF(ap
, substandard
, al
)) return NULL
;
3045 if (morphcmp(morph
, targetmorph
) == 0) return mystrdup(ts
);
3047 // int targetcount = get_sfxcount(targetmorph);
3049 // use input suffix fields, if exist
3050 if (strstr(morph
, MORPH_INFL_SFX
) || strstr(morph
, MORPH_DERI_SFX
)) {
3051 stemmorph
= mymorph
;
3052 strcpy(stemmorph
, morph
);
3053 mystrcat(stemmorph
, " ", MAXLNLEN
);
3054 stemmorphcatpos
= stemmorph
+ strlen(stemmorph
);
3057 stemmorphcatpos
= NULL
;
3060 for (int i
= 0; i
< al
; i
++) {
3061 const unsigned char c
= (unsigned char) (ap
[i
] & 0x00FF);
3062 SfxEntry
* sptr
= sFlag
[c
];
3064 if (sptr
->getFlag() == ap
[i
] && sptr
->getMorph() && ((sptr
->getContLen() == 0) ||
3065 // don't generate forms with substandard affixes
3066 !TESTAFF(sptr
->getCont(), substandard
, sptr
->getContLen()))) {
3068 if (stemmorphcatpos
) strcpy(stemmorphcatpos
, sptr
->getMorph());
3069 else stemmorph
= (char *) sptr
->getMorph();
3071 int cmp
= morphcmp(stemmorph
, targetmorph
);
3074 char * newword
= sptr
->add(ts
, wl
);
3076 hentry
* check
= pHMgr
->lookup(newword
); // XXX extra dic
3077 if (!check
|| !check
->astr
||
3078 !(TESTAFF(check
->astr
, forbiddenword
, check
->alen
) ||
3079 TESTAFF(check
->astr
, ONLYUPCASEFLAG
, check
->alen
))) {
3086 // recursive call for secondary suffixes
3087 if ((level
== 0) && (cmp
== 1) && (sptr
->getContLen() > 0) &&
3088 // (get_sfxcount(stemmorph) < targetcount) &&
3089 !TESTAFF(sptr
->getCont(), substandard
, sptr
->getContLen())) {
3090 char * newword
= sptr
->add(ts
, wl
);
3092 char * newword2
= morphgen(newword
, strlen(newword
), sptr
->getCont(),
3093 sptr
->getContLen(), stemmorph
, targetmorph
, 1);
3104 sptr
= sptr
->getFlgNxt();
3111 int AffixMgr::expand_rootword(struct guessword
* wlst
, int maxn
, const char * ts
,
3112 int wl
, const unsigned short * ap
, unsigned short al
, char * bad
, int badl
,
3116 // first add root word to list
3117 if ((nh
< maxn
) && !(al
&& ((needaffix
&& TESTAFF(ap
, needaffix
, al
)) ||
3118 (onlyincompound
&& TESTAFF(ap
, onlyincompound
, al
))))) {
3119 wlst
[nh
].word
= mystrdup(ts
);
3120 if (!wlst
[nh
].word
) return 0;
3121 wlst
[nh
].allow
= (1 == 0);
3122 wlst
[nh
].orig
= NULL
;
3124 // add special phonetic version
3125 if (phon
&& (nh
< maxn
)) {
3126 wlst
[nh
].word
= mystrdup(phon
);
3127 if (!wlst
[nh
].word
) return nh
- 1;
3128 wlst
[nh
].allow
= (1 == 0);
3129 wlst
[nh
].orig
= mystrdup(ts
);
3130 if (!wlst
[nh
].orig
) return nh
- 1;
3136 for (int i
= 0; i
< al
; i
++) {
3137 const unsigned char c
= (unsigned char) (ap
[i
] & 0x00FF);
3138 SfxEntry
* sptr
= sFlag
[c
];
3140 if ((sptr
->getFlag() == ap
[i
]) && (!sptr
->getKeyLen() || ((badl
> sptr
->getKeyLen()) &&
3141 (strcmp(sptr
->getAffix(), bad
+ badl
- sptr
->getKeyLen()) == 0))) &&
3142 // check needaffix flag
3143 !(sptr
->getCont() && ((needaffix
&&
3144 TESTAFF(sptr
->getCont(), needaffix
, sptr
->getContLen())) ||
3146 TESTAFF(sptr
->getCont(), circumfix
, sptr
->getContLen())) ||
3148 TESTAFF(sptr
->getCont(), onlyincompound
, sptr
->getContLen()))))
3150 char * newword
= sptr
->add(ts
, wl
);
3153 wlst
[nh
].word
= newword
;
3154 wlst
[nh
].allow
= sptr
->allowCross();
3155 wlst
[nh
].orig
= NULL
;
3157 // add special phonetic version
3158 if (phon
&& (nh
< maxn
)) {
3159 char st
[MAXWORDUTF8LEN
];
3161 strcat(st
, sptr
->getKey());
3162 reverseword(st
+ strlen(phon
));
3163 wlst
[nh
].word
= mystrdup(st
);
3164 if (!wlst
[nh
].word
) return nh
- 1;
3165 wlst
[nh
].allow
= (1 == 0);
3166 wlst
[nh
].orig
= mystrdup(newword
);
3167 if (!wlst
[nh
].orig
) return nh
- 1;
3175 sptr
= sptr
->getFlgNxt();
3181 // handle cross products of prefixes and suffixes
3182 for (int j
=1;j
<n
;j
++)
3183 if (wlst
[j
].allow
) {
3184 for (int k
= 0; k
< al
; k
++) {
3185 const unsigned char c
= (unsigned char) (ap
[k
] & 0x00FF);
3186 PfxEntry
* cptr
= pFlag
[c
];
3188 if ((cptr
->getFlag() == ap
[k
]) && cptr
->allowCross() && (!cptr
->getKeyLen() || ((badl
> cptr
->getKeyLen()) &&
3189 (strncmp(cptr
->getKey(), bad
, cptr
->getKeyLen()) == 0)))) {
3190 int l1
= strlen(wlst
[j
].word
);
3191 char * newword
= cptr
->add(wlst
[j
].word
, l1
);
3194 wlst
[nh
].word
= newword
;
3195 wlst
[nh
].allow
= cptr
->allowCross();
3196 wlst
[nh
].orig
= NULL
;
3203 cptr
= cptr
->getFlgNxt();
3209 // now handle pure prefixes
3210 for (int m
= 0; m
< al
; m
++) {
3211 const unsigned char c
= (unsigned char) (ap
[m
] & 0x00FF);
3212 PfxEntry
* ptr
= pFlag
[c
];
3214 if ((ptr
->getFlag() == ap
[m
]) && (!ptr
->getKeyLen() || ((badl
> ptr
->getKeyLen()) &&
3215 (strncmp(ptr
->getKey(), bad
, ptr
->getKeyLen()) == 0))) &&
3216 // check needaffix flag
3217 !(ptr
->getCont() && ((needaffix
&&
3218 TESTAFF(ptr
->getCont(), needaffix
, ptr
->getContLen())) ||
3220 TESTAFF(ptr
->getCont(), circumfix
, ptr
->getContLen())) ||
3222 TESTAFF(ptr
->getCont(), onlyincompound
, ptr
->getContLen()))))
3224 char * newword
= ptr
->add(ts
, wl
);
3227 wlst
[nh
].word
= newword
;
3228 wlst
[nh
].allow
= ptr
->allowCross();
3229 wlst
[nh
].orig
= NULL
;
3236 ptr
= ptr
->getFlgNxt();
3243 // return length of replacing table
3244 int AffixMgr::get_numrep() const
3249 // return replacing table
3250 struct replentry
* AffixMgr::get_reptable() const
3252 if (! reptable
) return NULL
;
3256 // return iconv table
3257 RepList
* AffixMgr::get_iconvtable() const
3259 if (! iconvtable
) return NULL
;
3263 // return oconv table
3264 RepList
* AffixMgr::get_oconvtable() const
3266 if (! oconvtable
) return NULL
;
3270 // return replacing table
3271 struct phonetable
* AffixMgr::get_phonetable() const
3273 if (! phone
) return NULL
;
3277 // return length of character map table
3278 int AffixMgr::get_nummap() const
3283 // return character map table
3284 struct mapentry
* AffixMgr::get_maptable() const
3286 if (! maptable
) return NULL
;
3290 // return length of word break table
3291 int AffixMgr::get_numbreak() const
3296 // return character map table
3297 char ** AffixMgr::get_breaktable() const
3299 if (! breaktable
) return NULL
;
3303 // return text encoding of dictionary
3304 char * AffixMgr::get_encoding()
3306 if (! encoding
) encoding
= mystrdup(SPELL_ENCODING
);
3307 return mystrdup(encoding
);
3310 // return text encoding of dictionary
3311 int AffixMgr::get_langnum() const
3316 // return double prefix option
3317 int AffixMgr::get_complexprefixes() const
3319 return complexprefixes
;
3322 // return FULLSTRIP option
3323 int AffixMgr::get_fullstrip() const
3328 FLAG
AffixMgr::get_keepcase() const
3333 FLAG
AffixMgr::get_forceucase() const
3338 FLAG
AffixMgr::get_warn() const
3343 int AffixMgr::get_forbidwarn() const
3348 int AffixMgr::get_checksharps() const
3353 char * AffixMgr::encode_flag(unsigned short aflag
) const
3355 return pHMgr
->encode_flag(aflag
);
3359 // return the preferred ignore string for suggestions
3360 char * AffixMgr::get_ignore() const
3362 if (!ignorechars
) return NULL
;
3366 // return the preferred ignore string for suggestions
3367 unsigned short * AffixMgr::get_ignore_utf16(int * len
) const
3369 *len
= ignorechars_utf16_len
;
3370 return ignorechars_utf16
;
3373 // return the keyboard string for suggestions
3374 char * AffixMgr::get_key_string()
3376 if (! keystring
) keystring
= mystrdup(SPELL_KEYSTRING
);
3377 return mystrdup(keystring
);
3380 // return the preferred try string for suggestions
3381 char * AffixMgr::get_try_string() const
3383 if (! trystring
) return NULL
;
3384 return mystrdup(trystring
);
3387 // return the preferred try string for suggestions
3388 const char * AffixMgr::get_wordchars() const
3393 unsigned short * AffixMgr::get_wordchars_utf16(int * len
) const
3395 *len
= wordchars_utf16_len
;
3396 return wordchars_utf16
;
3399 // is there compounding?
3400 int AffixMgr::get_compound() const
3402 return compoundflag
|| compoundbegin
|| numdefcpd
;
3405 // return the compound words control flag
3406 FLAG
AffixMgr::get_compoundflag() const
3408 return compoundflag
;
3411 // return the forbidden words control flag
3412 FLAG
AffixMgr::get_forbiddenword() const
3414 return forbiddenword
;
3417 // return the forbidden words control flag
3418 FLAG
AffixMgr::get_nosuggest() const
3423 // return the forbidden words control flag
3424 FLAG
AffixMgr::get_nongramsuggest() const
3426 return nongramsuggest
;
3429 // return the forbidden words flag modify flag
3430 FLAG
AffixMgr::get_needaffix() const
3435 // return the onlyincompound flag
3436 FLAG
AffixMgr::get_onlyincompound() const
3438 return onlyincompound
;
3441 // return the compound word signal flag
3442 FLAG
AffixMgr::get_compoundroot() const
3444 return compoundroot
;
3447 // return the compound begin signal flag
3448 FLAG
AffixMgr::get_compoundbegin() const
3450 return compoundbegin
;
3453 // return the value of checknum
3454 int AffixMgr::get_checknum() const
3459 // return the value of prefix
3460 const char * AffixMgr::get_prefix() const
3462 if (pfx
) return pfx
->getKey();
3466 // return the value of suffix
3467 const char * AffixMgr::get_suffix() const
3472 // return the value of suffix
3473 const char * AffixMgr::get_version() const
3478 // return lemma_present flag
3479 FLAG
AffixMgr::get_lemma_present() const
3481 return lemma_present
;
3484 // utility method to look up root words in hash table
3485 struct hentry
* AffixMgr::lookup(const char * word
)
3488 struct hentry
* he
= NULL
;
3489 for (i
= 0; i
< *maxdic
&& !he
; i
++) {
3490 he
= (alldic
[i
])->lookup(word
);
3495 // return the value of suffix
3496 int AffixMgr::have_contclass() const
3498 return havecontclass
;
3502 int AffixMgr::get_utf8() const
3507 int AffixMgr::get_maxngramsugs(void) const
3509 return maxngramsugs
;
3512 int AffixMgr::get_maxcpdsugs(void) const
3517 int AffixMgr::get_maxdiff(void) const
3522 int AffixMgr::get_onlymaxdiff(void) const
3527 // return nosplitsugs
3528 int AffixMgr::get_nosplitsugs(void) const
3533 // return sugswithdots
3534 int AffixMgr::get_sugswithdots(void) const
3536 return sugswithdots
;
3540 int AffixMgr::parse_flag(char * line
, unsigned short * out
, FileMgr
* af
) {
3542 if (*out
!= FLAG_NULL
&& !(*out
>= DEFAULTFLAGS
)) {
3543 HUNSPELL_WARNING(stderr
, "error: line %d: multiple definitions of an affix file parameter\n", af
->getlinenum());
3546 if (parse_string(line
, &s
, af
->getlinenum())) return 1;
3547 *out
= pHMgr
->decode_flag(s
);
3553 int AffixMgr::parse_num(char * line
, int * out
, FileMgr
* af
) {
3556 HUNSPELL_WARNING(stderr
, "error: line %d: multiple definitions of an affix file parameter\n", af
->getlinenum());
3559 if (parse_string(line
, &s
, af
->getlinenum())) return 1;
3565 /* parse in the max syllablecount of compound words and */
3566 int AffixMgr::parse_cpdsyllable(char * line
, FileMgr
* af
)
3572 w_char w
[MAXWORDLEN
];
3573 piece
= mystrsep(&tp
, 0);
3575 if (*piece
!= '\0') {
3577 case 0: { np
++; break; }
3578 case 1: { cpdmaxsyllable
= atoi(piece
); np
++; break; }
3581 cpdvowels
= mystrdup(piece
);
3583 int n
= u8_u16(w
, MAXWORDLEN
, piece
);
3585 flag_qsort((unsigned short *) w
, 0, n
);
3586 cpdvowels_utf16
= (w_char
*) malloc(n
* sizeof(w_char
));
3587 if (!cpdvowels_utf16
) return 1;
3588 memcpy(cpdvowels_utf16
, w
, n
* sizeof(w_char
));
3590 cpdvowels_utf16_len
= n
;
3599 piece
= mystrsep(&tp
, 0);
3602 HUNSPELL_WARNING(stderr
, "error: line %d: missing compoundsyllable information\n", af
->getlinenum());
3605 if (np
== 2) cpdvowels
= mystrdup("aeiouAEIOU");
3609 /* parse in the typical fault correcting table */
3610 int AffixMgr::parse_reptable(char * line
, FileMgr
* af
)
3613 HUNSPELL_WARNING(stderr
, "error: line %d: multiple table definitions\n", af
->getlinenum());
3620 piece
= mystrsep(&tp
, 0);
3622 if (*piece
!= '\0') {
3624 case 0: { np
++; break; }
3626 numrep
= atoi(piece
);
3628 HUNSPELL_WARNING(stderr
, "error: line %d: incorrect entry number\n", af
->getlinenum());
3631 reptable
= (replentry
*) malloc(numrep
* sizeof(struct replentry
));
3632 if (!reptable
) return 1;
3640 piece
= mystrsep(&tp
, 0);
3643 HUNSPELL_WARNING(stderr
, "error: line %d: missing data\n", af
->getlinenum());
3647 /* now parse the numrep lines to read in the remainder of the table */
3649 for (int j
=0; j
< numrep
; j
++) {
3650 if ((nl
= af
->getline()) == NULL
) return 1;
3654 reptable
[j
].pattern
= NULL
;
3655 reptable
[j
].pattern2
= NULL
;
3656 piece
= mystrsep(&tp
, 0);
3658 if (*piece
!= '\0') {
3661 if (strncmp(piece
,"REP",3) != 0) {
3662 HUNSPELL_WARNING(stderr
, "error: line %d: table is corrupt\n", af
->getlinenum());
3669 if (*piece
== '^') reptable
[j
].start
= true; else reptable
[j
].start
= false;
3670 reptable
[j
].pattern
= mystrrep(mystrdup(piece
+ int(reptable
[j
].start
)),"_"," ");
3671 int lr
= strlen(reptable
[j
].pattern
) - 1;
3672 if (reptable
[j
].pattern
[lr
] == '$') {
3673 reptable
[j
].end
= true;
3674 reptable
[j
].pattern
[lr
] = '\0';
3675 } else reptable
[j
].end
= false;
3678 case 2: { reptable
[j
].pattern2
= mystrrep(mystrdup(piece
),"_"," "); break; }
3683 piece
= mystrsep(&tp
, 0);
3685 if ((!(reptable
[j
].pattern
)) || (!(reptable
[j
].pattern2
))) {
3686 HUNSPELL_WARNING(stderr
, "error: line %d: table is corrupt\n", af
->getlinenum());
3694 /* parse in the typical fault correcting table */
3695 int AffixMgr::parse_convtable(char * line
, FileMgr
* af
, RepList
** rl
, const char * keyword
)
3698 HUNSPELL_WARNING(stderr
, "error: line %d: multiple table definitions\n", af
->getlinenum());
3706 piece
= mystrsep(&tp
, 0);
3708 if (*piece
!= '\0') {
3710 case 0: { np
++; break; }
3712 numrl
= atoi(piece
);
3714 HUNSPELL_WARNING(stderr
, "error: line %d: incorrect entry number\n", af
->getlinenum());
3717 *rl
= new RepList(numrl
);
3726 piece
= mystrsep(&tp
, 0);
3729 HUNSPELL_WARNING(stderr
, "error: line %d: missing data\n", af
->getlinenum());
3733 /* now parse the num lines to read in the remainder of the table */
3735 for (int j
=0; j
< numrl
; j
++) {
3736 if (!(nl
= af
->getline())) return 1;
3740 char * pattern
= NULL
;
3741 char * pattern2
= NULL
;
3742 piece
= mystrsep(&tp
, 0);
3744 if (*piece
!= '\0') {
3747 if (strncmp(piece
, keyword
, strlen(keyword
)) != 0) {
3748 HUNSPELL_WARNING(stderr
, "error: line %d: table is corrupt\n", af
->getlinenum());
3755 case 1: { pattern
= mystrrep(mystrdup(piece
),"_"," "); break; }
3757 pattern2
= mystrrep(mystrdup(piece
),"_"," ");
3764 piece
= mystrsep(&tp
, 0);
3766 if (!pattern
|| !pattern2
) {
3771 HUNSPELL_WARNING(stderr
, "error: line %d: table is corrupt\n", af
->getlinenum());
3774 (*rl
)->add(pattern
, pattern2
);
3780 /* parse in the typical fault correcting table */
3781 int AffixMgr::parse_phonetable(char * line
, FileMgr
* af
)
3784 HUNSPELL_WARNING(stderr
, "error: line %d: multiple table definitions\n", af
->getlinenum());
3791 piece
= mystrsep(&tp
, 0);
3793 if (*piece
!= '\0') {
3795 case 0: { np
++; break; }
3797 phone
= (phonetable
*) malloc(sizeof(struct phonetable
));
3798 if (!phone
) return 1;
3799 phone
->num
= atoi(piece
);
3800 phone
->rules
= NULL
;
3801 phone
->utf8
= (char) utf8
;
3802 if (phone
->num
< 1) {
3803 HUNSPELL_WARNING(stderr
, "error: line %d: bad entry number\n", af
->getlinenum());
3806 phone
->rules
= (char * *) malloc(2 * (phone
->num
+ 1) * sizeof(char *));
3807 if (!phone
->rules
) {
3819 piece
= mystrsep(&tp
, 0);
3822 HUNSPELL_WARNING(stderr
, "error: line %d: missing data\n", af
->getlinenum());
3826 /* now parse the phone->num lines to read in the remainder of the table */
3828 for (int j
=0; j
< phone
->num
; j
++) {
3829 if (!(nl
= af
->getline())) return 1;
3833 phone
->rules
[j
* 2] = NULL
;
3834 phone
->rules
[j
* 2 + 1] = NULL
;
3835 piece
= mystrsep(&tp
, 0);
3837 if (*piece
!= '\0') {
3840 if (strncmp(piece
,"PHONE",5) != 0) {
3841 HUNSPELL_WARNING(stderr
, "error: line %d: table is corrupt\n", af
->getlinenum());
3847 case 1: { phone
->rules
[j
* 2] = mystrrep(mystrdup(piece
),"_",""); break; }
3848 case 2: { phone
->rules
[j
* 2 + 1] = mystrrep(mystrdup(piece
),"_",""); break; }
3853 piece
= mystrsep(&tp
, 0);
3855 if ((!(phone
->rules
[j
* 2])) || (!(phone
->rules
[j
* 2 + 1]))) {
3856 HUNSPELL_WARNING(stderr
, "error: line %d: table is corrupt\n", af
->getlinenum());
3861 phone
->rules
[phone
->num
* 2] = mystrdup("");
3862 phone
->rules
[phone
->num
* 2 + 1] = mystrdup("");
3863 init_phonet_hash(*phone
);
3867 /* parse in the checkcompoundpattern table */
3868 int AffixMgr::parse_checkcpdtable(char * line
, FileMgr
* af
)
3870 if (numcheckcpd
!= 0) {
3871 HUNSPELL_WARNING(stderr
, "error: line %d: multiple table definitions\n", af
->getlinenum());
3878 piece
= mystrsep(&tp
, 0);
3880 if (*piece
!= '\0') {
3882 case 0: { np
++; break; }
3884 numcheckcpd
= atoi(piece
);
3885 if (numcheckcpd
< 1) {
3886 HUNSPELL_WARNING(stderr
, "error: line %d: bad entry number\n", af
->getlinenum());
3889 checkcpdtable
= (patentry
*) malloc(numcheckcpd
* sizeof(struct patentry
));
3890 if (!checkcpdtable
) return 1;
3898 piece
= mystrsep(&tp
, 0);
3901 HUNSPELL_WARNING(stderr
, "error: line %d: missing data\n", af
->getlinenum());
3905 /* now parse the numcheckcpd lines to read in the remainder of the table */
3907 for (int j
=0; j
< numcheckcpd
; j
++) {
3908 if (!(nl
= af
->getline())) return 1;
3912 checkcpdtable
[j
].pattern
= NULL
;
3913 checkcpdtable
[j
].pattern2
= NULL
;
3914 checkcpdtable
[j
].pattern3
= NULL
;
3915 checkcpdtable
[j
].cond
= FLAG_NULL
;
3916 checkcpdtable
[j
].cond2
= FLAG_NULL
;
3917 piece
= mystrsep(&tp
, 0);
3919 if (*piece
!= '\0') {
3922 if (strncmp(piece
,"CHECKCOMPOUNDPATTERN",20) != 0) {
3923 HUNSPELL_WARNING(stderr
, "error: line %d: table is corrupt\n", af
->getlinenum());
3930 checkcpdtable
[j
].pattern
= mystrdup(piece
);
3931 char * p
= strchr(checkcpdtable
[j
].pattern
, '/');
3934 checkcpdtable
[j
].cond
= pHMgr
->decode_flag(p
+ 1);
3938 checkcpdtable
[j
].pattern2
= mystrdup(piece
);
3939 char * p
= strchr(checkcpdtable
[j
].pattern2
, '/');
3942 checkcpdtable
[j
].cond2
= pHMgr
->decode_flag(p
+ 1);
3946 case 3: { checkcpdtable
[j
].pattern3
= mystrdup(piece
); simplifiedcpd
= 1; break; }
3951 piece
= mystrsep(&tp
, 0);
3953 if ((!(checkcpdtable
[j
].pattern
)) || (!(checkcpdtable
[j
].pattern2
))) {
3954 HUNSPELL_WARNING(stderr
, "error: line %d: table is corrupt\n", af
->getlinenum());
3962 /* parse in the compound rule table */
3963 int AffixMgr::parse_defcpdtable(char * line
, FileMgr
* af
)
3965 if (numdefcpd
!= 0) {
3966 HUNSPELL_WARNING(stderr
, "error: line %d: multiple table definitions\n", af
->getlinenum());
3973 piece
= mystrsep(&tp
, 0);
3975 if (*piece
!= '\0') {
3977 case 0: { np
++; break; }
3979 numdefcpd
= atoi(piece
);
3980 if (numdefcpd
< 1) {
3981 HUNSPELL_WARNING(stderr
, "error: line %d: bad entry number\n", af
->getlinenum());
3984 defcpdtable
= (flagentry
*) malloc(numdefcpd
* sizeof(flagentry
));
3985 if (!defcpdtable
) return 1;
3993 piece
= mystrsep(&tp
, 0);
3996 HUNSPELL_WARNING(stderr
, "error: line %d: missing data\n", af
->getlinenum());
4000 /* now parse the numdefcpd lines to read in the remainder of the table */
4002 for (int j
=0; j
< numdefcpd
; j
++) {
4003 if (!(nl
= af
->getline())) return 1;
4007 defcpdtable
[j
].def
= NULL
;
4008 piece
= mystrsep(&tp
, 0);
4010 if (*piece
!= '\0') {
4013 if (strncmp(piece
, "COMPOUNDRULE", 12) != 0) {
4014 HUNSPELL_WARNING(stderr
, "error: line %d: table is corrupt\n", af
->getlinenum());
4020 case 1: { // handle parenthesized flags
4021 if (strchr(piece
, '(')) {
4022 defcpdtable
[j
].def
= (FLAG
*) malloc(strlen(piece
) * sizeof(FLAG
));
4023 defcpdtable
[j
].len
= 0;
4027 char * par
= piece
+ 1;
4028 while (*par
!= '(' && *par
!= ')' && *par
!= '\0') par
++;
4029 if (*par
== '\0') end
= 1; else *par
= '\0';
4030 if (*piece
== '(') piece
++;
4031 if (*piece
== '*' || *piece
== '?') {
4032 defcpdtable
[j
].def
[defcpdtable
[j
].len
++] = (FLAG
) *piece
;
4033 } else if (*piece
!= '\0') {
4034 int l
= pHMgr
->decode_flags(&conv
, piece
, af
);
4035 for (int k
= 0; k
< l
; k
++) defcpdtable
[j
].def
[defcpdtable
[j
].len
++] = conv
[k
];
4041 defcpdtable
[j
].len
= pHMgr
->decode_flags(&(defcpdtable
[j
].def
), piece
, af
);
4049 piece
= mystrsep(&tp
, 0);
4051 if (!defcpdtable
[j
].len
) {
4052 HUNSPELL_WARNING(stderr
, "error: line %d: table is corrupt\n", af
->getlinenum());
4061 /* parse in the character map table */
4062 int AffixMgr::parse_maptable(char * line
, FileMgr
* af
)
4065 HUNSPELL_WARNING(stderr
, "error: line %d: multiple table definitions\n", af
->getlinenum());
4072 piece
= mystrsep(&tp
, 0);
4074 if (*piece
!= '\0') {
4076 case 0: { np
++; break; }
4078 nummap
= atoi(piece
);
4080 HUNSPELL_WARNING(stderr
, "error: line %d: bad entry number\n", af
->getlinenum());
4083 maptable
= (mapentry
*) malloc(nummap
* sizeof(struct mapentry
));
4084 if (!maptable
) return 1;
4092 piece
= mystrsep(&tp
, 0);
4095 HUNSPELL_WARNING(stderr
, "error: line %d: missing data\n", af
->getlinenum());
4099 /* now parse the nummap lines to read in the remainder of the table */
4101 for (int j
=0; j
< nummap
; j
++) {
4102 if (!(nl
= af
->getline())) return 1;
4106 maptable
[j
].set
= NULL
;
4107 maptable
[j
].len
= 0;
4108 piece
= mystrsep(&tp
, 0);
4110 if (*piece
!= '\0') {
4113 if (strncmp(piece
,"MAP",3) != 0) {
4114 HUNSPELL_WARNING(stderr
, "error: line %d: table is corrupt\n", af
->getlinenum());
4122 maptable
[j
].len
= strlen(piece
);
4123 maptable
[j
].set
= (char **) malloc(maptable
[j
].len
* sizeof(char*));
4124 if (!maptable
[j
].set
) return 1;
4125 for (int k
= 0; k
< maptable
[j
].len
; k
++) {
4128 if (piece
[k
] == '(') {
4129 char * parpos
= strchr(piece
+ k
, ')');
4130 if (parpos
!= NULL
) {
4132 chl
= (int)(parpos
- piece
) - k
- 1;
4136 if (utf8
&& (piece
[k
] & 0xc0) == 0xc0) {
4137 for (k
++; utf8
&& (piece
[k
] & 0xc0) == 0x80; k
++);
4142 maptable
[j
].set
[setn
] = (char *) malloc(chl
+ 1);
4143 if (!maptable
[j
].set
[setn
]) return 1;
4144 strncpy(maptable
[j
].set
[setn
], piece
+ chb
, chl
);
4145 maptable
[j
].set
[setn
][chl
] = '\0';
4148 maptable
[j
].len
= setn
;
4154 piece
= mystrsep(&tp
, 0);
4156 if (!maptable
[j
].set
|| !maptable
[j
].len
) {
4157 HUNSPELL_WARNING(stderr
, "error: line %d: table is corrupt\n", af
->getlinenum());
4165 /* parse in the word breakpoint table */
4166 int AffixMgr::parse_breaktable(char * line
, FileMgr
* af
)
4168 if (numbreak
> -1) {
4169 HUNSPELL_WARNING(stderr
, "error: line %d: multiple table definitions\n", af
->getlinenum());
4176 piece
= mystrsep(&tp
, 0);
4178 if (*piece
!= '\0') {
4180 case 0: { np
++; break; }
4182 numbreak
= atoi(piece
);
4184 HUNSPELL_WARNING(stderr
, "error: line %d: bad entry number\n", af
->getlinenum());
4187 if (numbreak
== 0) return 0;
4188 breaktable
= (char **) malloc(numbreak
* sizeof(char *));
4189 if (!breaktable
) return 1;
4197 piece
= mystrsep(&tp
, 0);
4200 HUNSPELL_WARNING(stderr
, "error: line %d: missing data\n", af
->getlinenum());
4204 /* now parse the numbreak lines to read in the remainder of the table */
4206 for (int j
=0; j
< numbreak
; j
++) {
4207 if (!(nl
= af
->getline())) return 1;
4211 piece
= mystrsep(&tp
, 0);
4213 if (*piece
!= '\0') {
4216 if (strncmp(piece
,"BREAK",5) != 0) {
4217 HUNSPELL_WARNING(stderr
, "error: line %d: table is corrupt\n", af
->getlinenum());
4224 breaktable
[j
] = mystrdup(piece
);
4231 piece
= mystrsep(&tp
, 0);
4234 HUNSPELL_WARNING(stderr
, "error: line %d: table is corrupt\n", af
->getlinenum());
4242 void AffixMgr::reverse_condition(char * piece
) {
4244 for (char * k
= piece
+ strlen(piece
) - 1; k
>= piece
; k
--) {
4247 if (neg
) *(k
+1) = '['; else *k
= ']';
4252 if (neg
) *(k
+1) = '^';
4257 if (*(k
+1) == ']') neg
= 1; else *(k
+1) = *k
;
4261 if (neg
) *(k
+1) = *k
;
4267 int AffixMgr::parse_affix(char * line
, const char at
, FileMgr
* af
, char * dupflags
)
4269 int numents
= 0; // number of affentry structures to parse
4271 unsigned short aflag
= 0; // affix char identifier
4274 std::vector
<affentry
> affentries
;
4281 // checking lines with bad syntax
4283 int basefieldnum
= 0;
4286 // split affix header line into pieces
4290 piece
= mystrsep(&tp
, 0);
4292 if (*piece
!= '\0') {
4294 // piece 1 - is type of affix
4295 case 0: { np
++; break; }
4297 // piece 2 - is affix char
4300 aflag
= pHMgr
->decode_flag(piece
);
4301 #ifndef HUNSPELL_CHROME_CLIENT // We don't check for duplicates.
4302 if (((at
== 'S') && (dupflags
[aflag
] & dupSFX
)) ||
4303 ((at
== 'P') && (dupflags
[aflag
] & dupPFX
))) {
4304 HUNSPELL_WARNING(stderr
, "error: line %d: multiple definitions of an affix flag\n",
4306 // return 1; XXX permissive mode for bad dictionaries
4308 dupflags
[aflag
] += (char) ((at
== 'S') ? dupSFX
: dupPFX
);
4312 // piece 3 - is cross product indicator
4313 case 2: { np
++; if (*piece
== 'Y') ff
= aeXPRODUCT
; break; }
4315 // piece 4 - is number of affentries
4318 numents
= atoi(piece
);
4320 char * err
= pHMgr
->encode_flag(aflag
);
4322 HUNSPELL_WARNING(stderr
, "error: line %d: bad entry number\n",
4328 affentries
.resize(numents
);
4329 affentries
[0].opts
= ff
;
4330 if (utf8
) affentries
[0].opts
+= aeUTF8
;
4331 if (pHMgr
->is_aliasf()) affentries
[0].opts
+= aeALIASF
;
4332 if (pHMgr
->is_aliasm()) affentries
[0].opts
+= aeALIASM
;
4333 affentries
[0].aflag
= aflag
;
4340 piece
= mystrsep(&tp
, 0);
4342 // check to make sure we parsed enough pieces
4344 char * err
= pHMgr
->encode_flag(aflag
);
4346 HUNSPELL_WARNING(stderr
, "error: line %d: missing data\n", af
->getlinenum());
4352 // now parse numents affentries for this affix
4353 std::vector
<affentry
>::iterator start
= affentries
.begin();
4354 std::vector
<affentry
>::iterator end
= affentries
.end();
4355 for (std::vector
<affentry
>::iterator entry
= start
; entry
!= end
; ++entry
) {
4356 if ((nl
= af
->getline()) == NULL
) return 1;
4362 // split line into pieces
4363 piece
= mystrsep(&tp
, 0);
4365 if (*piece
!= '\0') {
4367 // piece 1 - is type
4370 if (entry
!= start
) entry
->opts
= start
->opts
&
4371 (char) (aeXPRODUCT
+ aeUTF8
+ aeALIASF
+ aeALIASM
);
4375 // piece 2 - is affix char
4378 if (pHMgr
->decode_flag(piece
) != aflag
) {
4379 char * err
= pHMgr
->encode_flag(aflag
);
4381 HUNSPELL_WARNING(stderr
, "error: line %d: affix %s is corrupt\n",
4382 af
->getlinenum(), err
);
4388 if (entry
!= start
) entry
->aflag
= start
->aflag
;
4392 // piece 3 - is string to strip or 0 for null
4395 if (complexprefixes
) {
4396 if (utf8
) reverseword_utf(piece
); else reverseword(piece
);
4398 entry
->strip
= mystrdup(piece
);
4399 entry
->stripl
= (unsigned char) strlen(entry
->strip
);
4400 if (strcmp(entry
->strip
,"0") == 0) {
4402 entry
->strip
=mystrdup("");
4408 // piece 4 - is affix string or 0 for null
4411 entry
->morphcode
= NULL
;
4412 entry
->contclass
= NULL
;
4413 entry
->contclasslen
= 0;
4415 dash
= strchr(piece
, '/');
4421 remove_ignored_chars_utf(piece
, ignorechars_utf16
, ignorechars_utf16_len
);
4423 remove_ignored_chars(piece
,ignorechars
);
4427 if (complexprefixes
) {
4428 if (utf8
) reverseword_utf(piece
); else reverseword(piece
);
4430 entry
->appnd
= mystrdup(piece
);
4432 if (pHMgr
->is_aliasf()) {
4433 int index
= atoi(dash
+ 1);
4434 entry
->contclasslen
= (unsigned short) pHMgr
->get_aliasf(index
, &(entry
->contclass
), af
);
4435 if (!entry
->contclasslen
) HUNSPELL_WARNING(stderr
, "error: bad affix flag alias: \"%s\"\n", dash
+1);
4437 entry
->contclasslen
= (unsigned short) pHMgr
->decode_flags(&(entry
->contclass
), dash
+ 1, af
);
4438 flag_qsort(entry
->contclass
, 0, entry
->contclasslen
);
4443 for (unsigned short _i
= 0; _i
< entry
->contclasslen
; _i
++) {
4444 contclasses
[(entry
->contclass
)[_i
]] = 1;
4449 remove_ignored_chars_utf(piece
, ignorechars_utf16
, ignorechars_utf16_len
);
4451 remove_ignored_chars(piece
,ignorechars
);
4455 if (complexprefixes
) {
4456 if (utf8
) reverseword_utf(piece
); else reverseword(piece
);
4458 entry
->appnd
= mystrdup(piece
);
4461 entry
->appndl
= (unsigned char) strlen(entry
->appnd
);
4462 if (strcmp(entry
->appnd
,"0") == 0) {
4464 entry
->appnd
=mystrdup("");
4470 // piece 5 - is the conditions descriptions
4473 if (complexprefixes
) {
4474 if (utf8
) reverseword_utf(piece
); else reverseword(piece
);
4475 reverse_condition(piece
);
4477 if (entry
->stripl
&& (strcmp(piece
, ".") != 0) &&
4478 redundant_condition(at
, entry
->strip
, entry
->stripl
, piece
, af
->getlinenum()))
4482 reverse_condition(piece
);
4484 if (encodeit(*entry
, piece
)) return 1;
4490 if (pHMgr
->is_aliasm()) {
4491 int index
= atoi(piece
);
4492 entry
->morphcode
= pHMgr
->get_aliasm(index
);
4494 if (complexprefixes
) { // XXX - fix me for morph. gen.
4495 if (utf8
) reverseword_utf(piece
); else reverseword(piece
);
4497 // add the remaining of the line
4500 tp
= tp
+ strlen(tp
);
4502 entry
->morphcode
= mystrdup(piece
);
4503 if (!entry
->morphcode
) return 1;
4511 piece
= mystrsep(&tp
, 0);
4513 // check to make sure we parsed enough pieces
4515 char * err
= pHMgr
->encode_flag(aflag
);
4517 HUNSPELL_WARNING(stderr
, "error: line %d: affix %s is corrupt\n",
4518 af
->getlinenum(), err
);
4525 // detect unnecessary fields, excepting comments
4527 int fieldnum
= !(entry
->morphcode
) ? 5 : ((*(entry
->morphcode
)=='#') ? 5 : 6);
4528 if (fieldnum
!= basefieldnum
)
4529 HUNSPELL_WARNING(stderr
, "warning: line %d: bad field number\n", af
->getlinenum());
4531 basefieldnum
= !(entry
->morphcode
) ? 5 : ((*(entry
->morphcode
)=='#') ? 5 : 6);
4536 // now create SfxEntry or PfxEntry objects and use links to
4537 // build an ordered (sorted by affix string) list
4538 for (std::vector
<affentry
>::iterator entry
= start
; entry
!= end
; ++entry
) {
4540 PfxEntry
* pfxptr
= new PfxEntry(this,&(*entry
));
4541 build_pfxtree(pfxptr
);
4543 SfxEntry
* sfxptr
= new SfxEntry(this,&(*entry
));
4544 build_sfxtree(sfxptr
);
4550 int AffixMgr::redundant_condition(char ft
, char * strip
, int stripl
, const char * cond
, int linenum
) {
4551 int condl
= strlen(cond
);
4556 if (ft
== 'P') { // prefix
4557 if (strncmp(strip
, cond
, condl
) == 0) return 1;
4560 for (i
= 0, j
= 0; (i
< stripl
) && (j
< condl
); i
++, j
++) {
4561 if (cond
[j
] != '[') {
4562 if (cond
[j
] != strip
[i
]) {
4563 HUNSPELL_WARNING(stderr
, "warning: line %d: incompatible stripping characters and condition\n", linenum
);
4567 neg
= (cond
[j
+1] == '^') ? 1 : 0;
4571 if (strip
[i
] == cond
[j
]) in
= 1;
4572 } while ((j
< (condl
- 1)) && (cond
[j
] != ']'));
4573 if (j
== (condl
- 1) && (cond
[j
] != ']')) {
4574 HUNSPELL_WARNING(stderr
, "error: line %d: missing ] in condition:\n%s\n", linenum
, cond
);
4577 if ((!neg
&& !in
) || (neg
&& in
)) {
4578 HUNSPELL_WARNING(stderr
, "warning: line %d: incompatible stripping characters and condition\n", linenum
);
4583 if (j
>= condl
) return 1;
4586 if ((stripl
>= condl
) && strcmp(strip
+ stripl
- condl
, cond
) == 0) return 1;
4589 for (i
= stripl
- 1, j
= condl
- 1; (i
>= 0) && (j
>= 0); i
--, j
--) {
4590 if (cond
[j
] != ']') {
4591 if (cond
[j
] != strip
[i
]) {
4592 HUNSPELL_WARNING(stderr
, "warning: line %d: incompatible stripping characters and condition\n", linenum
);
4599 if (strip
[i
] == cond
[j
]) in
= 1;
4600 } while ((j
> 0) && (cond
[j
] != '['));
4601 if ((j
== 0) && (cond
[j
] != '[')) {
4602 HUNSPELL_WARNING(stderr
, "error: line: %d: missing ] in condition:\n%s\n", linenum
, cond
);
4605 neg
= (cond
[j
+1] == '^') ? 1 : 0;
4606 if ((!neg
&& !in
) || (neg
&& in
)) {
4607 HUNSPELL_WARNING(stderr
, "warning: line %d: incompatible stripping characters and condition\n", linenum
);
4612 if (j
< 0) return 1;