Force a checkpoint in CREATE DATABASE before starting to copy the files,
[PostgreSQL.git] / src / backend / tsearch / spell.c
blob583296078c01b848eaf191cf99450d64b6a17573
1 /*-------------------------------------------------------------------------
3 * spell.c
4 * Normalizing word with ISpell
6 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
9 * IDENTIFICATION
10 * $PostgreSQL$
12 *-------------------------------------------------------------------------
15 #include "postgres.h"
17 #include "tsearch/dicts/spell.h"
18 #include "tsearch/ts_locale.h"
19 #include "utils/memutils.h"
23 * Initialization requires a lot of memory that's not needed
24 * after the initialization is done. In init function,
25 * CurrentMemoryContext is a long lived memory context associated
26 * with the dictionary cache entry, so we use a temporary context
27 * for the short-lived stuff.
29 static MemoryContext tmpCtx = NULL;
31 #define tmpalloc(sz) MemoryContextAlloc(tmpCtx, (sz))
32 #define tmpalloc0(sz) MemoryContextAllocZero(tmpCtx, (sz))
34 static void
35 checkTmpCtx(void)
38 * XXX: This assumes that CurrentMemoryContext doesn't have any children
39 * other than the one we create here.
41 if (CurrentMemoryContext->firstchild == NULL)
43 tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
44 "Ispell dictionary init context",
45 ALLOCSET_DEFAULT_MINSIZE,
46 ALLOCSET_DEFAULT_INITSIZE,
47 ALLOCSET_DEFAULT_MAXSIZE);
49 else
50 tmpCtx = CurrentMemoryContext->firstchild;
53 static char *
54 lowerstr_ctx(char *src)
56 MemoryContext saveCtx;
57 char *dst;
59 saveCtx = MemoryContextSwitchTo(tmpCtx);
60 dst = lowerstr(src);
61 MemoryContextSwitchTo(saveCtx);
63 return dst;
66 #define MAX_NORM 1024
67 #define MAXNORMLEN 256
69 #define STRNCMP(s,p) strncmp( (s), (p), strlen(p) )
70 #define GETWCHAR(W,L,N,T) ( ((uint8*)(W))[ ((T)==FF_PREFIX) ? (N) : ( (L) - 1 - (N) ) ] )
71 #define GETCHAR(A,N,T) GETWCHAR( (A)->repl, (A)->replen, N, T )
73 static char *VoidString = "";
75 static int
76 cmpspell(const void *s1, const void *s2)
78 return (strcmp((*(const SPELL **) s1)->word, (*(const SPELL **) s2)->word));
80 static int
81 cmpspellaffix(const void *s1, const void *s2)
83 return (strncmp((*(const SPELL **) s1)->p.flag, (*(const SPELL **) s2)->p.flag, MAXFLAGLEN));
86 static char *
87 findchar(char *str, int c)
89 while (*str)
91 if (t_iseq(str, c))
92 return str;
93 str += pg_mblen(str);
96 return NULL;
100 /* backward string compare for suffix tree operations */
101 static int
102 strbcmp(const unsigned char *s1, const unsigned char *s2)
104 int l1 = strlen((const char *) s1) - 1,
105 l2 = strlen((const char *) s2) - 1;
107 while (l1 >= 0 && l2 >= 0)
109 if (s1[l1] < s2[l2])
110 return -1;
111 if (s1[l1] > s2[l2])
112 return 1;
113 l1--;
114 l2--;
116 if (l1 < l2)
117 return -1;
118 if (l1 > l2)
119 return 1;
121 return 0;
123 static int
124 strbncmp(const unsigned char *s1, const unsigned char *s2, size_t count)
126 int l1 = strlen((const char *) s1) - 1,
127 l2 = strlen((const char *) s2) - 1,
128 l = count;
130 while (l1 >= 0 && l2 >= 0 && l > 0)
132 if (s1[l1] < s2[l2])
133 return -1;
134 if (s1[l1] > s2[l2])
135 return 1;
136 l1--;
137 l2--;
138 l--;
140 if (l == 0)
141 return 0;
142 if (l1 < l2)
143 return -1;
144 if (l1 > l2)
145 return 1;
146 return 0;
149 static int
150 cmpaffix(const void *s1, const void *s2)
152 const AFFIX *a1 = (const AFFIX *) s1;
153 const AFFIX *a2 = (const AFFIX *) s2;
155 if (a1->type < a2->type)
156 return -1;
157 if (a1->type > a2->type)
158 return 1;
159 if (a1->type == FF_PREFIX)
160 return strcmp(a1->repl, a2->repl);
161 else
162 return strbcmp((const unsigned char *) a1->repl,
163 (const unsigned char *) a2->repl);
166 static void
167 NIAddSpell(IspellDict *Conf, const char *word, const char *flag)
169 if (Conf->nspell >= Conf->mspell)
171 if (Conf->mspell)
173 Conf->mspell += 1024 * 20;
174 Conf->Spell = (SPELL **) repalloc(Conf->Spell, Conf->mspell * sizeof(SPELL *));
176 else
178 Conf->mspell = 1024 * 20;
179 Conf->Spell = (SPELL **) tmpalloc(Conf->mspell * sizeof(SPELL *));
182 Conf->Spell[Conf->nspell] = (SPELL *) tmpalloc(SPELLHDRSZ + strlen(word) + 1);
183 strcpy(Conf->Spell[Conf->nspell]->word, word);
184 strncpy(Conf->Spell[Conf->nspell]->p.flag, flag, MAXFLAGLEN);
185 Conf->nspell++;
189 * import dictionary
191 * Note caller must already have applied get_tsearch_config_filename
193 void
194 NIImportDictionary(IspellDict *Conf, const char *filename)
196 tsearch_readline_state trst;
197 char *line;
199 checkTmpCtx();
201 if (!tsearch_readline_begin(&trst, filename))
202 ereport(ERROR,
203 (errcode(ERRCODE_CONFIG_FILE_ERROR),
204 errmsg("could not open dictionary file \"%s\": %m",
205 filename)));
207 while ((line = tsearch_readline(&trst)) != NULL)
209 char *s,
210 *pstr;
211 const char *flag;
213 /* Extract flag from the line */
214 flag = NULL;
215 if ((s = findchar(line, '/')))
217 *s++ = '\0';
218 flag = s;
219 while (*s)
221 /* we allow only single encoded flags for faster works */
222 if (pg_mblen(s) == 1 && t_isprint(s) && !t_isspace(s))
223 s++;
224 else
226 *s = '\0';
227 break;
231 else
232 flag = "";
234 /* Remove trailing spaces */
235 s = line;
236 while (*s)
238 if (t_isspace(s))
240 *s = '\0';
241 break;
243 s += pg_mblen(s);
245 pstr = lowerstr_ctx(line);
247 NIAddSpell(Conf, pstr, flag);
248 pfree(pstr);
250 pfree(line);
252 tsearch_readline_end(&trst);
256 static int
257 FindWord(IspellDict *Conf, const char *word, int affixflag, int flag)
259 SPNode *node = Conf->Dictionary;
260 SPNodeData *StopLow,
261 *StopHigh,
262 *StopMiddle;
263 uint8 *ptr = (uint8 *) word;
265 flag &= FF_DICTFLAGMASK;
267 while (node && *ptr)
269 StopLow = node->data;
270 StopHigh = node->data + node->length;
271 while (StopLow < StopHigh)
273 StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
274 if (StopMiddle->val == *ptr)
276 if (*(ptr + 1) == '\0' && StopMiddle->isword)
278 if (flag == 0)
280 if (StopMiddle->compoundflag & FF_COMPOUNDONLY)
281 return 0;
283 else if ((flag & StopMiddle->compoundflag) == 0)
284 return 0;
286 if ((affixflag == 0) || (strchr(Conf->AffixData[StopMiddle->affix], affixflag) != NULL))
287 return 1;
289 node = StopMiddle->node;
290 ptr++;
291 break;
293 else if (StopMiddle->val < *ptr)
294 StopLow = StopMiddle + 1;
295 else
296 StopHigh = StopMiddle;
298 if (StopLow >= StopHigh)
299 break;
301 return 0;
304 static void
305 NIAddAffix(IspellDict *Conf, int flag, char flagflags, const char *mask, const char *find, const char *repl, int type)
307 AFFIX *Affix;
309 if (Conf->naffixes >= Conf->maffixes)
311 if (Conf->maffixes)
313 Conf->maffixes += 16;
314 Conf->Affix = (AFFIX *) repalloc((void *) Conf->Affix, Conf->maffixes * sizeof(AFFIX));
316 else
318 Conf->maffixes = 16;
319 Conf->Affix = (AFFIX *) palloc(Conf->maffixes * sizeof(AFFIX));
323 Affix = Conf->Affix + Conf->naffixes;
325 if (strcmp(mask, ".") == 0)
327 Affix->issimple = 1;
328 Affix->isregis = 0;
330 else if (RS_isRegis(mask))
332 Affix->issimple = 0;
333 Affix->isregis = 1;
334 RS_compile(&(Affix->reg.regis), (type == FF_SUFFIX) ? true : false,
335 (mask && *mask) ? mask : VoidString);
337 else
339 int masklen;
340 int wmasklen;
341 int err;
342 pg_wchar *wmask;
343 char *tmask;
345 Affix->issimple = 0;
346 Affix->isregis = 0;
347 tmask = (char *) tmpalloc(strlen(mask) + 3);
348 if (type == FF_SUFFIX)
349 sprintf(tmask, "%s$", mask);
350 else
351 sprintf(tmask, "^%s", mask);
353 masklen = strlen(tmask);
354 wmask = (pg_wchar *) tmpalloc((masklen + 1) * sizeof(pg_wchar));
355 wmasklen = pg_mb2wchar_with_len(tmask, wmask, masklen);
357 err = pg_regcomp(&(Affix->reg.regex), wmask, wmasklen, REG_ADVANCED | REG_NOSUB);
358 if (err)
360 char errstr[100];
362 pg_regerror(err, &(Affix->reg.regex), errstr, sizeof(errstr));
363 ereport(ERROR,
364 (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
365 errmsg("invalid regular expression: %s", errstr)));
369 Affix->flagflags = flagflags;
370 if ((Affix->flagflags & FF_COMPOUNDONLY) || (Affix->flagflags & FF_COMPOUNDPERMITFLAG))
372 if ((Affix->flagflags & FF_COMPOUNDFLAG) == 0)
373 Affix->flagflags |= FF_COMPOUNDFLAG;
375 Affix->flag = flag;
376 Affix->type = type;
378 Affix->find = (find && *find) ? pstrdup(find) : VoidString;
379 if ((Affix->replen = strlen(repl)) > 0)
380 Affix->repl = pstrdup(repl);
381 else
382 Affix->repl = VoidString;
383 Conf->naffixes++;
386 #define PAE_WAIT_MASK 0
387 #define PAE_INMASK 1
388 #define PAE_WAIT_FIND 2
389 #define PAE_INFIND 3
390 #define PAE_WAIT_REPL 4
391 #define PAE_INREPL 5
393 static bool
394 parse_affentry(char *str, char *mask, char *find, char *repl)
396 int state = PAE_WAIT_MASK;
397 char *pmask = mask,
398 *pfind = find,
399 *prepl = repl;
401 *mask = *find = *repl = '\0';
403 while (*str)
405 if (state == PAE_WAIT_MASK)
407 if (t_iseq(str, '#'))
408 return false;
409 else if (!t_isspace(str))
411 COPYCHAR(pmask, str);
412 pmask += pg_mblen(str);
413 state = PAE_INMASK;
416 else if (state == PAE_INMASK)
418 if (t_iseq(str, '>'))
420 *pmask = '\0';
421 state = PAE_WAIT_FIND;
423 else if (!t_isspace(str))
425 COPYCHAR(pmask, str);
426 pmask += pg_mblen(str);
429 else if (state == PAE_WAIT_FIND)
431 if (t_iseq(str, '-'))
433 state = PAE_INFIND;
435 else if (t_isalpha(str) || t_iseq(str, '\'') /* english 's */ )
437 COPYCHAR(prepl, str);
438 prepl += pg_mblen(str);
439 state = PAE_INREPL;
441 else if (!t_isspace(str))
442 ereport(ERROR,
443 (errcode(ERRCODE_CONFIG_FILE_ERROR),
444 errmsg("syntax error")));
446 else if (state == PAE_INFIND)
448 if (t_iseq(str, ','))
450 *pfind = '\0';
451 state = PAE_WAIT_REPL;
453 else if (t_isalpha(str))
455 COPYCHAR(pfind, str);
456 pfind += pg_mblen(str);
458 else if (!t_isspace(str))
459 ereport(ERROR,
460 (errcode(ERRCODE_CONFIG_FILE_ERROR),
461 errmsg("syntax error")));
463 else if (state == PAE_WAIT_REPL)
465 if (t_iseq(str, '-'))
467 break; /* void repl */
469 else if (t_isalpha(str))
471 COPYCHAR(prepl, str);
472 prepl += pg_mblen(str);
473 state = PAE_INREPL;
475 else if (!t_isspace(str))
476 ereport(ERROR,
477 (errcode(ERRCODE_CONFIG_FILE_ERROR),
478 errmsg("syntax error")));
480 else if (state == PAE_INREPL)
482 if (t_iseq(str, '#'))
484 *prepl = '\0';
485 break;
487 else if (t_isalpha(str))
489 COPYCHAR(prepl, str);
490 prepl += pg_mblen(str);
492 else if (!t_isspace(str))
493 ereport(ERROR,
494 (errcode(ERRCODE_CONFIG_FILE_ERROR),
495 errmsg("syntax error")));
497 else
498 elog(ERROR, "unrecognized state in parse_affentry: %d", state);
500 str += pg_mblen(str);
503 *pmask = *pfind = *prepl = '\0';
505 return (*mask && (*find || *repl)) ? true : false;
508 static void
509 addFlagValue(IspellDict *Conf, char *s, uint32 val)
511 while (*s && t_isspace(s))
512 s += pg_mblen(s);
514 if (!*s)
515 ereport(ERROR,
516 (errcode(ERRCODE_CONFIG_FILE_ERROR),
517 errmsg("syntax error")));
519 if (pg_mblen(s) != 1)
520 ereport(ERROR,
521 (errcode(ERRCODE_CONFIG_FILE_ERROR),
522 errmsg("multibyte flag character is not allowed")));
524 Conf->flagval[(unsigned int) *s] = (unsigned char) val;
525 Conf->usecompound = true;
528 static void
529 NIImportOOAffixes(IspellDict *Conf, const char *filename)
531 char type[BUFSIZ],
532 *ptype = NULL;
533 char sflag[BUFSIZ];
534 char mask[BUFSIZ],
535 *pmask;
536 char find[BUFSIZ],
537 *pfind;
538 char repl[BUFSIZ],
539 *prepl;
540 bool isSuffix = false;
541 int flag = 0;
542 char flagflags = 0;
543 tsearch_readline_state trst;
544 int scanread = 0;
545 char scanbuf[BUFSIZ];
546 char *recoded;
548 checkTmpCtx();
550 /* read file to find any flag */
551 memset(Conf->flagval, 0, sizeof(Conf->flagval));
552 Conf->usecompound = false;
554 if (!tsearch_readline_begin(&trst, filename))
555 ereport(ERROR,
556 (errcode(ERRCODE_CONFIG_FILE_ERROR),
557 errmsg("could not open affix file \"%s\": %m",
558 filename)));
560 while ((recoded = tsearch_readline(&trst)) != NULL)
562 if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#'))
564 pfree(recoded);
565 continue;
568 if (STRNCMP(recoded, "COMPOUNDFLAG") == 0)
569 addFlagValue(Conf, recoded + strlen("COMPOUNDFLAG"),
570 FF_COMPOUNDFLAG);
571 else if (STRNCMP(recoded, "COMPOUNDBEGIN") == 0)
572 addFlagValue(Conf, recoded + strlen("COMPOUNDBEGIN"),
573 FF_COMPOUNDBEGIN);
574 else if (STRNCMP(recoded, "COMPOUNDLAST") == 0)
575 addFlagValue(Conf, recoded + strlen("COMPOUNDLAST"),
576 FF_COMPOUNDLAST);
577 /* COMPOUNDLAST and COMPOUNDEND are synonyms */
578 else if (STRNCMP(recoded, "COMPOUNDEND") == 0)
579 addFlagValue(Conf, recoded + strlen("COMPOUNDEND"),
580 FF_COMPOUNDLAST);
581 else if (STRNCMP(recoded, "COMPOUNDMIDDLE") == 0)
582 addFlagValue(Conf, recoded + strlen("COMPOUNDMIDDLE"),
583 FF_COMPOUNDMIDDLE);
584 else if (STRNCMP(recoded, "ONLYINCOMPOUND") == 0)
585 addFlagValue(Conf, recoded + strlen("ONLYINCOMPOUND"),
586 FF_COMPOUNDONLY);
587 else if (STRNCMP(recoded, "COMPOUNDPERMITFLAG") == 0)
588 addFlagValue(Conf, recoded + strlen("COMPOUNDPERMITFLAG"),
589 FF_COMPOUNDPERMITFLAG);
590 else if (STRNCMP(recoded, "COMPOUNDFORBIDFLAG") == 0)
591 addFlagValue(Conf, recoded + strlen("COMPOUNDFORBIDFLAG"),
592 FF_COMPOUNDFORBIDFLAG);
593 else if (STRNCMP(recoded, "FLAG") == 0)
595 char *s = recoded + strlen("FLAG");
597 while (*s && t_isspace(s))
598 s += pg_mblen(s);
600 if (*s && STRNCMP(s, "default") != 0)
601 ereport(ERROR,
602 (errcode(ERRCODE_CONFIG_FILE_ERROR),
603 errmsg("Ispell dictionary supports only default flag value")));
606 pfree(recoded);
608 tsearch_readline_end(&trst);
610 sprintf(scanbuf, "%%6s %%%ds %%%ds %%%ds %%%ds", BUFSIZ / 5, BUFSIZ / 5, BUFSIZ / 5, BUFSIZ / 5);
612 if (!tsearch_readline_begin(&trst, filename))
613 ereport(ERROR,
614 (errcode(ERRCODE_CONFIG_FILE_ERROR),
615 errmsg("could not open affix file \"%s\": %m",
616 filename)));
618 while ((recoded = tsearch_readline(&trst)) != NULL)
620 if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#'))
621 goto nextline;
623 scanread = sscanf(recoded, scanbuf, type, sflag, find, repl, mask);
625 if (ptype)
626 pfree(ptype);
627 ptype = lowerstr_ctx(type);
628 if (scanread < 4 || (STRNCMP(ptype, "sfx") && STRNCMP(ptype, "pfx")))
629 goto nextline;
631 if (scanread == 4)
633 if (strlen(sflag) != 1)
634 goto nextline;
635 flag = *sflag;
636 isSuffix = (STRNCMP(ptype, "sfx") == 0) ? true : false;
637 if (t_iseq(find, 'y') || t_iseq(find, 'Y'))
638 flagflags = FF_CROSSPRODUCT;
639 else
640 flagflags = 0;
642 else
644 char *ptr;
645 int aflg = 0;
647 if (strlen(sflag) != 1 || flag != *sflag || flag == 0)
648 goto nextline;
649 prepl = lowerstr_ctx(repl);
650 /* affix flag */
651 if ((ptr = strchr(prepl, '/')) != NULL)
653 *ptr = '\0';
654 ptr = repl + (ptr - prepl) + 1;
655 while (*ptr)
657 aflg |= Conf->flagval[(unsigned int) *ptr];
658 ptr++;
661 pfind = lowerstr_ctx(find);
662 pmask = lowerstr_ctx(mask);
663 if (t_iseq(find, '0'))
664 *pfind = '\0';
665 if (t_iseq(repl, '0'))
666 *prepl = '\0';
668 NIAddAffix(Conf, flag, flagflags | aflg, pmask, pfind, prepl,
669 isSuffix ? FF_SUFFIX : FF_PREFIX);
670 pfree(prepl);
671 pfree(pfind);
672 pfree(pmask);
675 nextline:
676 pfree(recoded);
679 tsearch_readline_end(&trst);
680 if (ptype)
681 pfree(ptype);
685 * import affixes
687 * Note caller must already have applied get_tsearch_config_filename
689 void
690 NIImportAffixes(IspellDict *Conf, const char *filename)
692 char *pstr = NULL;
693 char mask[BUFSIZ];
694 char find[BUFSIZ];
695 char repl[BUFSIZ];
696 char *s;
697 bool suffixes = false;
698 bool prefixes = false;
699 int flag = 0;
700 char flagflags = 0;
701 tsearch_readline_state trst;
702 bool oldformat = false;
703 char *recoded = NULL;
705 checkTmpCtx();
707 if (!tsearch_readline_begin(&trst, filename))
708 ereport(ERROR,
709 (errcode(ERRCODE_CONFIG_FILE_ERROR),
710 errmsg("could not open affix file \"%s\": %m",
711 filename)));
713 memset(Conf->flagval, 0, sizeof(Conf->flagval));
714 Conf->usecompound = false;
716 while ((recoded = tsearch_readline(&trst)) != NULL)
718 pstr = lowerstr(recoded);
720 /* Skip comments and empty lines */
721 if (*pstr == '#' || *pstr == '\n')
722 goto nextline;
724 if (STRNCMP(pstr, "compoundwords") == 0)
726 s = findchar(pstr, 'l');
727 if (s)
729 s = recoded + (s - pstr); /* we need non-lowercased
730 * string */
731 while (*s && !t_isspace(s))
732 s += pg_mblen(s);
733 while (*s && t_isspace(s))
734 s += pg_mblen(s);
736 if (*s && pg_mblen(s) == 1)
738 Conf->flagval[(unsigned int) *s] = FF_COMPOUNDFLAG;
739 Conf->usecompound = true;
741 oldformat = true;
742 goto nextline;
745 if (STRNCMP(pstr, "suffixes") == 0)
747 suffixes = true;
748 prefixes = false;
749 oldformat = true;
750 goto nextline;
752 if (STRNCMP(pstr, "prefixes") == 0)
754 suffixes = false;
755 prefixes = true;
756 oldformat = true;
757 goto nextline;
759 if (STRNCMP(pstr, "flag") == 0)
761 s = recoded + 4; /* we need non-lowercased string */
762 flagflags = 0;
764 while (*s && t_isspace(s))
765 s += pg_mblen(s);
766 oldformat = true;
768 /* allow only single-encoded flags */
769 if (pg_mblen(s) != 1)
770 ereport(ERROR,
771 (errcode(ERRCODE_CONFIG_FILE_ERROR),
772 errmsg("multibyte flag character is not allowed")));
774 if (*s == '*')
776 flagflags |= FF_CROSSPRODUCT;
777 s++;
779 else if (*s == '~')
781 flagflags |= FF_COMPOUNDONLY;
782 s++;
785 if (*s == '\\')
786 s++;
788 /* allow only single-encoded flags */
789 if (pg_mblen(s) != 1)
790 ereport(ERROR,
791 (errcode(ERRCODE_CONFIG_FILE_ERROR),
792 errmsg("multibyte flag character is not allowed")));
794 flag = (unsigned char) *s;
795 goto nextline;
797 if (STRNCMP(recoded, "COMPOUNDFLAG") == 0 || STRNCMP(recoded, "COMPOUNDMIN") == 0 ||
798 STRNCMP(recoded, "PFX") == 0 || STRNCMP(recoded, "SFX") == 0)
800 if (oldformat)
801 ereport(ERROR,
802 (errcode(ERRCODE_CONFIG_FILE_ERROR),
803 errmsg("wrong affix file format for flag")));
804 tsearch_readline_end(&trst);
805 NIImportOOAffixes(Conf, filename);
806 return;
808 if ((!suffixes) && (!prefixes))
809 goto nextline;
811 if (!parse_affentry(pstr, mask, find, repl))
812 goto nextline;
814 NIAddAffix(Conf, flag, flagflags, mask, find, repl, suffixes ? FF_SUFFIX : FF_PREFIX);
816 nextline:
817 pfree(recoded);
818 pfree(pstr);
820 tsearch_readline_end(&trst);
823 static int
824 MergeAffix(IspellDict *Conf, int a1, int a2)
826 char **ptr;
828 while (Conf->nAffixData + 1 >= Conf->lenAffixData)
830 Conf->lenAffixData *= 2;
831 Conf->AffixData = (char **) repalloc(Conf->AffixData,
832 sizeof(char *) * Conf->lenAffixData);
835 ptr = Conf->AffixData + Conf->nAffixData;
836 *ptr = palloc(strlen(Conf->AffixData[a1]) + strlen(Conf->AffixData[a2]) +
837 1 /* space */ + 1 /* \0 */ );
838 sprintf(*ptr, "%s %s", Conf->AffixData[a1], Conf->AffixData[a2]);
839 ptr++;
840 *ptr = NULL;
841 Conf->nAffixData++;
843 return Conf->nAffixData - 1;
846 static uint32
847 makeCompoundFlags(IspellDict *Conf, int affix)
849 uint32 flag = 0;
850 char *str = Conf->AffixData[affix];
852 while (str && *str)
854 flag |= Conf->flagval[(unsigned int) *str];
855 str++;
858 return (flag & FF_DICTFLAGMASK);
861 static SPNode *
862 mkSPNode(IspellDict *Conf, int low, int high, int level)
864 int i;
865 int nchar = 0;
866 char lastchar = '\0';
867 SPNode *rs;
868 SPNodeData *data;
869 int lownew = low;
871 for (i = low; i < high; i++)
872 if (Conf->Spell[i]->p.d.len > level && lastchar != Conf->Spell[i]->word[level])
874 nchar++;
875 lastchar = Conf->Spell[i]->word[level];
878 if (!nchar)
879 return NULL;
881 rs = (SPNode *) palloc0(SPNHDRSZ + nchar * sizeof(SPNodeData));
882 rs->length = nchar;
883 data = rs->data;
885 lastchar = '\0';
886 for (i = low; i < high; i++)
887 if (Conf->Spell[i]->p.d.len > level)
889 if (lastchar != Conf->Spell[i]->word[level])
891 if (lastchar)
893 data->node = mkSPNode(Conf, lownew, i, level + 1);
894 lownew = i;
895 data++;
897 lastchar = Conf->Spell[i]->word[level];
899 data->val = ((uint8 *) (Conf->Spell[i]->word))[level];
900 if (Conf->Spell[i]->p.d.len == level + 1)
902 bool clearCompoundOnly = false;
904 if (data->isword && data->affix != Conf->Spell[i]->p.d.affix)
907 * MergeAffix called a few times. If one of word is
908 * allowed to be in compound word and another isn't, then
909 * clear FF_COMPOUNDONLY flag.
912 clearCompoundOnly = (FF_COMPOUNDONLY & data->compoundflag
913 & makeCompoundFlags(Conf, Conf->Spell[i]->p.d.affix))
914 ? false : true;
915 data->affix = MergeAffix(Conf, data->affix, Conf->Spell[i]->p.d.affix);
917 else
918 data->affix = Conf->Spell[i]->p.d.affix;
919 data->isword = 1;
921 data->compoundflag = makeCompoundFlags(Conf, data->affix);
923 if ((data->compoundflag & FF_COMPOUNDONLY) &&
924 (data->compoundflag & FF_COMPOUNDFLAG) == 0)
925 data->compoundflag |= FF_COMPOUNDFLAG;
927 if (clearCompoundOnly)
928 data->compoundflag &= ~FF_COMPOUNDONLY;
932 data->node = mkSPNode(Conf, lownew, high, level + 1);
934 return rs;
938 * Builds the Conf->Dictionary tree and AffixData from the imported dictionary
939 * and affixes.
941 void
942 NISortDictionary(IspellDict *Conf)
944 int i;
945 int naffix = 0;
946 int curaffix;
948 checkTmpCtx();
950 /* compress affixes */
952 /* Count the number of different flags used in the dictionary */
954 qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspellaffix);
956 naffix = 0;
957 for (i = 0; i < Conf->nspell; i++)
959 if (i == 0 || strncmp(Conf->Spell[i]->p.flag, Conf->Spell[i - 1]->p.flag, MAXFLAGLEN))
960 naffix++;
964 * Fill in Conf->AffixData with the affixes that were used in the
965 * dictionary. Replace textual flag-field of Conf->Spell entries with
966 * indexes into Conf->AffixData array.
968 Conf->AffixData = (char **) palloc0(naffix * sizeof(char *));
970 curaffix = -1;
971 for (i = 0; i < Conf->nspell; i++)
973 if (i == 0 || strncmp(Conf->Spell[i]->p.flag, Conf->AffixData[curaffix], MAXFLAGLEN))
975 curaffix++;
976 Assert(curaffix < naffix);
977 Conf->AffixData[curaffix] = pstrdup(Conf->Spell[i]->p.flag);
980 Conf->Spell[i]->p.d.affix = curaffix;
981 Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word);
984 Conf->lenAffixData = Conf->nAffixData = naffix;
986 qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspell);
987 Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0);
989 Conf->Spell = NULL;
992 static AffixNode *
993 mkANode(IspellDict *Conf, int low, int high, int level, int type)
995 int i;
996 int nchar = 0;
997 uint8 lastchar = '\0';
998 AffixNode *rs;
999 AffixNodeData *data;
1000 int lownew = low;
1001 int naff;
1002 AFFIX **aff;
1004 for (i = low; i < high; i++)
1005 if (Conf->Affix[i].replen > level && lastchar != GETCHAR(Conf->Affix + i, level, type))
1007 nchar++;
1008 lastchar = GETCHAR(Conf->Affix + i, level, type);
1011 if (!nchar)
1012 return NULL;
1014 aff = (AFFIX **) tmpalloc(sizeof(AFFIX *) * (high - low + 1));
1015 naff = 0;
1017 rs = (AffixNode *) palloc0(ANHRDSZ + nchar * sizeof(AffixNodeData));
1018 rs->length = nchar;
1019 data = rs->data;
1021 lastchar = '\0';
1022 for (i = low; i < high; i++)
1023 if (Conf->Affix[i].replen > level)
1025 if (lastchar != GETCHAR(Conf->Affix + i, level, type))
1027 if (lastchar)
1029 data->node = mkANode(Conf, lownew, i, level + 1, type);
1030 if (naff)
1032 data->naff = naff;
1033 data->aff = (AFFIX **) palloc(sizeof(AFFIX *) * naff);
1034 memcpy(data->aff, aff, sizeof(AFFIX *) * naff);
1035 naff = 0;
1037 data++;
1038 lownew = i;
1040 lastchar = GETCHAR(Conf->Affix + i, level, type);
1042 data->val = GETCHAR(Conf->Affix + i, level, type);
1043 if (Conf->Affix[i].replen == level + 1)
1044 { /* affix stopped */
1045 aff[naff++] = Conf->Affix + i;
1049 data->node = mkANode(Conf, lownew, high, level + 1, type);
1050 if (naff)
1052 data->naff = naff;
1053 data->aff = (AFFIX **) palloc(sizeof(AFFIX *) * naff);
1054 memcpy(data->aff, aff, sizeof(AFFIX *) * naff);
1055 naff = 0;
1058 pfree(aff);
1060 return rs;
1063 static void
1064 mkVoidAffix(IspellDict *Conf, bool issuffix, int startsuffix)
1066 int i,
1067 cnt = 0;
1068 int start = (issuffix) ? startsuffix : 0;
1069 int end = (issuffix) ? Conf->naffixes : startsuffix;
1070 AffixNode *Affix = (AffixNode *) palloc0(ANHRDSZ + sizeof(AffixNodeData));
1072 Affix->length = 1;
1073 Affix->isvoid = 1;
1075 if (issuffix)
1077 Affix->data->node = Conf->Suffix;
1078 Conf->Suffix = Affix;
1080 else
1082 Affix->data->node = Conf->Prefix;
1083 Conf->Prefix = Affix;
1087 for (i = start; i < end; i++)
1088 if (Conf->Affix[i].replen == 0)
1089 cnt++;
1091 if (cnt == 0)
1092 return;
1094 Affix->data->aff = (AFFIX **) palloc(sizeof(AFFIX *) * cnt);
1095 Affix->data->naff = (uint32) cnt;
1097 cnt = 0;
1098 for (i = start; i < end; i++)
1099 if (Conf->Affix[i].replen == 0)
1101 Affix->data->aff[cnt] = Conf->Affix + i;
1102 cnt++;
1106 static bool
1107 isAffixInUse(IspellDict *Conf, char flag)
1109 int i;
1111 for (i = 0; i < Conf->nAffixData; i++)
1112 if (strchr(Conf->AffixData[i], flag) != NULL)
1113 return true;
1115 return false;
1118 void
1119 NISortAffixes(IspellDict *Conf)
1121 AFFIX *Affix;
1122 size_t i;
1123 CMPDAffix *ptr;
1124 int firstsuffix = Conf->naffixes;
1126 checkTmpCtx();
1128 if (Conf->naffixes == 0)
1129 return;
1131 if (Conf->naffixes > 1)
1132 qsort((void *) Conf->Affix, Conf->naffixes, sizeof(AFFIX), cmpaffix);
1133 Conf->CompoundAffix = ptr = (CMPDAffix *) palloc(sizeof(CMPDAffix) * Conf->naffixes);
1134 ptr->affix = NULL;
1136 for (i = 0; i < Conf->naffixes; i++)
1138 Affix = &(((AFFIX *) Conf->Affix)[i]);
1139 if (Affix->type == FF_SUFFIX && i < firstsuffix)
1140 firstsuffix = i;
1142 if ((Affix->flagflags & FF_COMPOUNDFLAG) && Affix->replen > 0 &&
1143 isAffixInUse(Conf, (char) Affix->flag))
1145 if (ptr == Conf->CompoundAffix ||
1146 ptr->issuffix != (ptr - 1)->issuffix ||
1147 strbncmp((const unsigned char *) (ptr - 1)->affix,
1148 (const unsigned char *) Affix->repl,
1149 (ptr - 1)->len))
1151 /* leave only unique and minimals suffixes */
1152 ptr->affix = Affix->repl;
1153 ptr->len = Affix->replen;
1154 ptr->issuffix = (Affix->type == FF_SUFFIX) ? true : false;
1155 ptr++;
1159 ptr->affix = NULL;
1160 Conf->CompoundAffix = (CMPDAffix *) repalloc(Conf->CompoundAffix, sizeof(CMPDAffix) * (ptr - Conf->CompoundAffix + 1));
1162 Conf->Prefix = mkANode(Conf, 0, firstsuffix, 0, FF_PREFIX);
1163 Conf->Suffix = mkANode(Conf, firstsuffix, Conf->naffixes, 0, FF_SUFFIX);
1164 mkVoidAffix(Conf, true, firstsuffix);
1165 mkVoidAffix(Conf, false, firstsuffix);
1168 static AffixNodeData *
1169 FindAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int type)
1171 AffixNodeData *StopLow,
1172 *StopHigh,
1173 *StopMiddle;
1174 uint8 symbol;
1176 if (node->isvoid)
1177 { /* search void affixes */
1178 if (node->data->naff)
1179 return node->data;
1180 node = node->data->node;
1183 while (node && *level < wrdlen)
1185 StopLow = node->data;
1186 StopHigh = node->data + node->length;
1187 while (StopLow < StopHigh)
1189 StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
1190 symbol = GETWCHAR(word, wrdlen, *level, type);
1192 if (StopMiddle->val == symbol)
1194 (*level)++;
1195 if (StopMiddle->naff)
1196 return StopMiddle;
1197 node = StopMiddle->node;
1198 break;
1200 else if (StopMiddle->val < symbol)
1201 StopLow = StopMiddle + 1;
1202 else
1203 StopHigh = StopMiddle;
1205 if (StopLow >= StopHigh)
1206 break;
1208 return NULL;
1211 static char *
1212 CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *newword, int *baselen)
1215 * Check compound allow flags
1218 if (flagflags == 0)
1220 if (Affix->flagflags & FF_COMPOUNDONLY)
1221 return NULL;
1223 else if (flagflags & FF_COMPOUNDBEGIN)
1225 if (Affix->flagflags & FF_COMPOUNDFORBIDFLAG)
1226 return NULL;
1227 if ((Affix->flagflags & FF_COMPOUNDBEGIN) == 0)
1228 if (Affix->type == FF_SUFFIX)
1229 return NULL;
1231 else if (flagflags & FF_COMPOUNDMIDDLE)
1233 if ((Affix->flagflags & FF_COMPOUNDMIDDLE) == 0 ||
1234 (Affix->flagflags & FF_COMPOUNDFORBIDFLAG))
1235 return NULL;
1237 else if (flagflags & FF_COMPOUNDLAST)
1239 if (Affix->flagflags & FF_COMPOUNDFORBIDFLAG)
1240 return NULL;
1241 if ((Affix->flagflags & FF_COMPOUNDLAST) == 0)
1242 if (Affix->type == FF_PREFIX)
1243 return NULL;
1247 * make replace pattern of affix
1249 if (Affix->type == FF_SUFFIX)
1251 strcpy(newword, word);
1252 strcpy(newword + len - Affix->replen, Affix->find);
1253 if (baselen) /* store length of non-changed part of word */
1254 *baselen = len - Affix->replen;
1256 else
1259 * if prefix is a all non-chaged part's length then all word contains
1260 * only prefix and suffix, so out
1262 if (baselen && *baselen + strlen(Affix->find) <= Affix->replen)
1263 return NULL;
1264 strcpy(newword, Affix->find);
1265 strcat(newword, word + Affix->replen);
1269 * check resulting word
1271 if (Affix->issimple)
1272 return newword;
1273 else if (Affix->isregis)
1275 if (RS_execute(&(Affix->reg.regis), newword))
1276 return newword;
1278 else
1280 int err;
1281 pg_wchar *data;
1282 size_t data_len;
1283 int newword_len;
1285 /* Convert data string to wide characters */
1286 newword_len = strlen(newword);
1287 data = (pg_wchar *) palloc((newword_len + 1) * sizeof(pg_wchar));
1288 data_len = pg_mb2wchar_with_len(newword, data, newword_len);
1290 if (!(err = pg_regexec(&(Affix->reg.regex), data, data_len, 0, NULL, 0, NULL, 0)))
1292 pfree(data);
1293 return newword;
1295 pfree(data);
1298 return NULL;
1301 static int
1302 addToResult(char **forms, char **cur, char *word)
1304 if (cur - forms >= MAX_NORM - 1)
1305 return 0;
1306 if (forms == cur || strcmp(word, *(cur - 1)) != 0)
1308 *cur = pstrdup(word);
1309 *(cur+1) = NULL;
1310 return 1;
1313 return 0;
1316 static char **
1317 NormalizeSubWord(IspellDict *Conf, char *word, int flag)
1319 AffixNodeData *suffix = NULL,
1320 *prefix = NULL;
1321 int slevel = 0,
1322 plevel = 0;
1323 int wrdlen = strlen(word),
1324 swrdlen;
1325 char **forms;
1326 char **cur;
1327 char newword[2 * MAXNORMLEN] = "";
1328 char pnewword[2 * MAXNORMLEN] = "";
1329 AffixNode *snode = Conf->Suffix,
1330 *pnode;
1331 int i,
1334 if (wrdlen > MAXNORMLEN)
1335 return NULL;
1336 cur = forms = (char **) palloc(MAX_NORM * sizeof(char *));
1337 *cur = NULL;
1340 /* Check that the word itself is normal form */
1341 if (FindWord(Conf, word, 0, flag))
1343 *cur = pstrdup(word);
1344 cur++;
1345 *cur = NULL;
1348 /* Find all other NORMAL forms of the 'word' (check only prefix) */
1349 pnode = Conf->Prefix;
1350 plevel = 0;
1351 while (pnode)
1353 prefix = FindAffixes(pnode, word, wrdlen, &plevel, FF_PREFIX);
1354 if (!prefix)
1355 break;
1356 for (j = 0; j < prefix->naff; j++)
1358 if (CheckAffix(word, wrdlen, prefix->aff[j], flag, newword, NULL))
1360 /* prefix success */
1361 if (FindWord(Conf, newword, prefix->aff[j]->flag, flag))
1362 cur += addToResult(forms, cur, newword);
1365 pnode = prefix->node;
1369 * Find all other NORMAL forms of the 'word' (check suffix and then
1370 * prefix)
1372 while (snode)
1374 int baselen = 0;
1376 /* find possible suffix */
1377 suffix = FindAffixes(snode, word, wrdlen, &slevel, FF_SUFFIX);
1378 if (!suffix)
1379 break;
1380 /* foreach suffix check affix */
1381 for (i = 0; i < suffix->naff; i++)
1383 if (CheckAffix(word, wrdlen, suffix->aff[i], flag, newword, &baselen))
1385 /* suffix success */
1386 if (FindWord(Conf, newword, suffix->aff[i]->flag, flag))
1387 cur += addToResult(forms, cur, newword);
1389 /* now we will look changed word with prefixes */
1390 pnode = Conf->Prefix;
1391 plevel = 0;
1392 swrdlen = strlen(newword);
1393 while (pnode)
1395 prefix = FindAffixes(pnode, newword, swrdlen, &plevel, FF_PREFIX);
1396 if (!prefix)
1397 break;
1398 for (j = 0; j < prefix->naff; j++)
1400 if (CheckAffix(newword, swrdlen, prefix->aff[j], flag, pnewword, &baselen))
1402 /* prefix success */
1403 int ff = (prefix->aff[j]->flagflags & suffix->aff[i]->flagflags & FF_CROSSPRODUCT) ?
1404 0 : prefix->aff[j]->flag;
1406 if (FindWord(Conf, pnewword, ff, flag))
1407 cur += addToResult(forms, cur, pnewword);
1410 pnode = prefix->node;
1415 snode = suffix->node;
1418 if (cur == forms)
1420 pfree(forms);
1421 return (NULL);
1423 return (forms);
1426 typedef struct SplitVar
1428 int nstem;
1429 int lenstem;
1430 char **stem;
1431 struct SplitVar *next;
1432 } SplitVar;
1434 static int
1435 CheckCompoundAffixes(CMPDAffix **ptr, char *word, int len, bool CheckInPlace)
1437 bool issuffix;
1439 if (CheckInPlace)
1441 while ((*ptr)->affix)
1443 if (len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len) == 0)
1445 len = (*ptr)->len;
1446 issuffix = (*ptr)->issuffix;
1447 (*ptr)++;
1448 return (issuffix) ? len : 0;
1450 (*ptr)++;
1453 else
1455 char *affbegin;
1457 while ((*ptr)->affix)
1459 if (len > (*ptr)->len && (affbegin = strstr(word, (*ptr)->affix)) != NULL)
1461 len = (*ptr)->len + (affbegin - word);
1462 issuffix = (*ptr)->issuffix;
1463 (*ptr)++;
1464 return (issuffix) ? len : 0;
1466 (*ptr)++;
1469 return -1;
1472 static SplitVar *
1473 CopyVar(SplitVar *s, int makedup)
1475 SplitVar *v = (SplitVar *) palloc(sizeof(SplitVar));
1477 v->next = NULL;
1478 if (s)
1480 int i;
1482 v->lenstem = s->lenstem;
1483 v->stem = (char **) palloc(sizeof(char *) * v->lenstem);
1484 v->nstem = s->nstem;
1485 for (i = 0; i < s->nstem; i++)
1486 v->stem[i] = (makedup) ? pstrdup(s->stem[i]) : s->stem[i];
1488 else
1490 v->lenstem = 16;
1491 v->stem = (char **) palloc(sizeof(char *) * v->lenstem);
1492 v->nstem = 0;
1494 return v;
1497 static void
1498 AddStem(SplitVar *v, char *word)
1500 if ( v->nstem >= v->lenstem )
1502 v->lenstem *= 2;
1503 v->stem = (char **) repalloc(v->stem, sizeof(char *) * v->lenstem);
1506 v->stem[v->nstem] = word;
1507 v->nstem++;
1510 static SplitVar *
1511 SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int wordlen, int startpos, int minpos)
1513 SplitVar *var = NULL;
1514 SPNodeData *StopLow,
1515 *StopHigh,
1516 *StopMiddle = NULL;
1517 SPNode *node = (snode) ? snode : Conf->Dictionary;
1518 int level = (snode) ? minpos : startpos; /* recursive
1519 * minpos==level */
1520 int lenaff;
1521 CMPDAffix *caff;
1522 char *notprobed;
1523 int compoundflag = 0;
1525 notprobed = (char *) palloc(wordlen);
1526 memset(notprobed, 1, wordlen);
1527 var = CopyVar(orig, 1);
1529 while (level < wordlen)
1531 /* find word with epenthetic or/and compound affix */
1532 caff = Conf->CompoundAffix;
1533 while (level > startpos && (lenaff = CheckCompoundAffixes(&caff, word + level, wordlen - level, (node) ? true : false)) >= 0)
1536 * there is one of compound affixes, so check word for existings
1538 char buf[MAXNORMLEN];
1539 char **subres;
1541 lenaff = level - startpos + lenaff;
1543 if (!notprobed[startpos + lenaff - 1])
1544 continue;
1546 if (level + lenaff - 1 <= minpos)
1547 continue;
1549 if ( lenaff >= MAXNORMLEN )
1550 continue; /* skip too big value */
1551 if (lenaff > 0)
1552 memcpy(buf, word + startpos, lenaff);
1553 buf[lenaff] = '\0';
1555 if (level == 0)
1556 compoundflag = FF_COMPOUNDBEGIN;
1557 else if (level == wordlen - 1)
1558 compoundflag = FF_COMPOUNDLAST;
1559 else
1560 compoundflag = FF_COMPOUNDMIDDLE;
1561 subres = NormalizeSubWord(Conf, buf, compoundflag);
1562 if (subres)
1564 /* Yes, it was a word from dictionary */
1565 SplitVar *new = CopyVar(var, 0);
1566 SplitVar *ptr = var;
1567 char **sptr = subres;
1569 notprobed[startpos + lenaff - 1] = 0;
1571 while (*sptr)
1573 AddStem( new, *sptr );
1574 sptr++;
1576 pfree(subres);
1578 while (ptr->next)
1579 ptr = ptr->next;
1580 ptr->next = SplitToVariants(Conf, NULL, new, word, wordlen, startpos + lenaff, startpos + lenaff);
1582 pfree(new->stem);
1583 pfree(new);
1587 if (!node)
1588 break;
1590 StopLow = node->data;
1591 StopHigh = node->data + node->length;
1592 while (StopLow < StopHigh)
1594 StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
1595 if (StopMiddle->val == ((uint8 *) (word))[level])
1596 break;
1597 else if (StopMiddle->val < ((uint8 *) (word))[level])
1598 StopLow = StopMiddle + 1;
1599 else
1600 StopHigh = StopMiddle;
1603 if (StopLow < StopHigh)
1605 if (level == FF_COMPOUNDBEGIN)
1606 compoundflag = FF_COMPOUNDBEGIN;
1607 else if (level == wordlen - 1)
1608 compoundflag = FF_COMPOUNDLAST;
1609 else
1610 compoundflag = FF_COMPOUNDMIDDLE;
1612 /* find infinitive */
1613 if (StopMiddle->isword &&
1614 (StopMiddle->compoundflag & compoundflag) &&
1615 notprobed[level])
1617 /* ok, we found full compoundallowed word */
1618 if (level > minpos)
1620 /* and its length more than minimal */
1621 if (wordlen == level + 1)
1623 /* well, it was last word */
1624 AddStem( var, pnstrdup(word + startpos, wordlen - startpos) );
1625 pfree(notprobed);
1626 return var;
1628 else
1630 /* then we will search more big word at the same point */
1631 SplitVar *ptr = var;
1633 while (ptr->next)
1634 ptr = ptr->next;
1635 ptr->next = SplitToVariants(Conf, node, var, word, wordlen, startpos, level);
1636 /* we can find next word */
1637 level++;
1638 AddStem( var, pnstrdup(word + startpos, level - startpos) );
1639 node = Conf->Dictionary;
1640 startpos = level;
1641 continue;
1645 node = StopMiddle->node;
1647 else
1648 node = NULL;
1649 level++;
1652 AddStem( var, pnstrdup(word + startpos, wordlen - startpos) );
1653 pfree(notprobed);
1654 return var;
1657 static void
1658 addNorm( TSLexeme **lres, TSLexeme **lcur, char *word, int flags, uint16 NVariant)
1660 if ( *lres == NULL )
1661 *lcur = *lres = (TSLexeme *) palloc(MAX_NORM * sizeof(TSLexeme));
1663 if ( *lcur - *lres < MAX_NORM-1 ) {
1664 (*lcur)->lexeme = word;
1665 (*lcur)->flags = flags;
1666 (*lcur)->nvariant = NVariant;
1667 (*lcur)++;
1668 (*lcur)->lexeme = NULL;
1672 TSLexeme *
1673 NINormalizeWord(IspellDict *Conf, char *word)
1675 char **res;
1676 TSLexeme *lcur = NULL,
1677 *lres = NULL;
1678 uint16 NVariant = 1;
1680 res = NormalizeSubWord(Conf, word, 0);
1682 if (res)
1684 char **ptr = res;
1686 while (*ptr && (lcur-lres) < MAX_NORM)
1688 addNorm( &lres, &lcur, *ptr, 0, NVariant++);
1689 ptr++;
1691 pfree(res);
1694 if (Conf->usecompound)
1696 int wordlen = strlen(word);
1697 SplitVar *ptr,
1698 *var = SplitToVariants(Conf, NULL, NULL, word, wordlen, 0, -1);
1699 int i;
1701 while (var)
1703 if (var->nstem > 1)
1705 char **subres = NormalizeSubWord(Conf, var->stem[var->nstem - 1], FF_COMPOUNDLAST);
1707 if (subres)
1709 char **subptr = subres;
1711 while (*subptr)
1713 for (i = 0; i < var->nstem - 1; i++)
1715 addNorm( &lres, &lcur, (subptr == subres) ? var->stem[i] : pstrdup(var->stem[i]), 0, NVariant);
1718 addNorm( &lres, &lcur, *subptr, 0, NVariant);
1719 subptr++;
1720 NVariant++;
1723 pfree(subres);
1724 var->stem[0] = NULL;
1725 pfree(var->stem[var->nstem - 1]);
1729 for (i = 0; i < var->nstem && var->stem[i]; i++)
1730 pfree(var->stem[i]);
1731 ptr = var->next;
1732 pfree(var->stem);
1733 pfree(var);
1734 var = ptr;
1738 return lres;