First post!
[beagle.git] / Lucene.Net / Analysis / RU / RussianStemmer.cs
blobc45eea60554b4167d39fd2a70d3414539ed212f2
1 using System;
2 using System.Text;
4 namespace Lucene.Net.Analysis.Ru
6 /* ====================================================================
7 * The Apache Software License, Version 1.1
9 * Copyright (c) 2001 The Apache Software Foundation. All rights
10 * reserved.
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in
21 * the documentation and/or other materials provided with the
22 * distribution.
24 * 3. The end-user documentation included with the redistribution,
25 * if any, must include the following acknowledgment:
26 * "This product includes software developed by the
27 * Apache Software Foundation (http://www.apache.org/)."
28 * Alternately, this acknowledgment may appear in the software itself,
29 * if and wherever such third-party acknowledgments normally appear.
31 * 4. The names "Apache" and "Apache Software Foundation" and
32 * "Apache Lucene" must not be used to endorse or promote products
33 * derived from this software without prior written permission. For
34 * written permission, please contact apache@apache.org.
36 * 5. Products derived from this software may not be called "Apache",
37 * "Apache Lucene", nor may "Apache" appear in their name, without
38 * prior written permission of the Apache Software Foundation.
40 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
41 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
42 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
43 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
44 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
45 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
46 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
47 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
48 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
49 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
50 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 * ====================================================================
54 * This software consists of voluntary contributions made by many
55 * individuals on behalf of the Apache Software Foundation. For more
56 * information on the Apache Software Foundation, please see
57 * <http://www.apache.org/>.
60 /// <summary>
61 /// Russian stemming algorithm implementation (see http://snowball.sourceforge.net for detailed description).
62 /// </summary>
63 /// <author>Boris Okner, b.okner@rogers.com</author>
64 /// <version>$Id: RussianStemmer.cs,v 1.1.1.1 2004/04/29 22:53:51 trow Exp $</version>
65 public class RussianStemmer
67 private char[] charset;
69 /// <summary>
70 /// positions of RV, R1 and R2 respectively
71 /// </summary>
72 private int RV, R1, R2;
74 /// <summary>
75 /// letters
76 /// </summary>
77 private static char A = (char)0;
78 private static char B = (char)1;
79 private static char V = (char)2;
80 private static char G = (char)3;
81 private static char D = (char)4;
82 private static char E = (char)5;
83 private static char ZH = (char)6;
84 private static char Z = (char)7;
85 private static char I = (char)8;
86 private static char I_ = (char)9;
87 private static char K = (char)10;
88 private static char L = (char)11;
89 private static char M = (char)12;
90 private static char N = (char)13;
91 private static char O = (char)14;
92 private static char P = (char)15;
93 private static char R = (char)16;
94 private static char S = (char)17;
95 private static char T = (char)18;
96 private static char U = (char)19;
97 private static char F = (char)20;
98 private static char X = (char)21;
99 private static char TS = (char)22;
100 private static char CH = (char)23;
101 private static char SH = (char)24;
102 private static char SHCH = (char)25;
103 private static char HARD = (char)26;
104 private static char Y = (char)27;
105 private static char SOFT = (char)28;
106 private static char AE = (char)29;
107 private static char IU = (char)30;
108 private static char IA = (char)31;
110 /// <summary>
111 /// stem definitions
112 /// </summary>
113 private static char[] vowels = { A, E, I, O, U, Y, AE, IU, IA };
115 private static char[][] perfectiveGerundEndings1 = {
116 new char[] { V },
117 new char[] { V, SH, I },
118 new char[] { V, SH, I, S, SOFT }
121 private static char[][] perfectiveGerund1Predessors = {
122 new char[] { A },
123 new char[] { IA }
126 private static char[][] perfectiveGerundEndings2 = {
127 new char[] { I, V },
128 new char[] {Y, V },
129 new char[] {I, V, SH, I },
130 new char[] {Y, V, SH, I },
131 new char[] {I, V, SH, I, S, SOFT },
132 new char[] {Y, V, SH, I, S, SOFT }
135 private static char[][] adjectiveEndings = {
136 new char[] { E, E },
137 new char[] { I, E },
138 new char[] { Y, E },
139 new char[] { O, E },
140 new char[] { E, I_ },
141 new char[] { I, I_ },
142 new char[] { Y, I_ },
143 new char[] { O, I_ },
144 new char[] { E, M },
145 new char[] { I, M },
146 new char[] { Y, M },
147 new char[] { O, M },
148 new char[] { I, X },
149 new char[] { Y, X },
150 new char[] { U, IU },
151 new char[] { IU, IU },
152 new char[] { A, IA },
153 new char[] { IA, IA },
154 new char[] { O, IU },
155 new char[] { E, IU },
156 new char[] { I, M, I },
157 new char[] { Y, M, I },
158 new char[] { E, G, O },
159 new char[] { O, G, O },
160 new char[] { E, M, U },
161 new char[] {O, M, U }
164 private static char[][] participleEndings1 = {
165 new char[] { SHCH },
166 new char[] { E, M },
167 new char[] { N, N },
168 new char[] { V, SH },
169 new char[] { IU, SHCH }
172 private static char[][] participleEndings2 = {
173 new char[] { I, V, SH },
174 new char[] { Y, V, SH },
175 new char[] { U, IU, SHCH }
178 private static char[][] participle1Predessors = {
179 new char[] { A },
180 new char[] { IA }
183 private static char[][] reflexiveEndings = {
184 new char[] { S, IA },
185 new char[] { S, SOFT }
188 private static char[][] verbEndings1 = {
189 new char[] { I_ },
190 new char[] { L },
191 new char[] { N },
192 new char[] { L, O },
193 new char[] { N, O },
194 new char[] { E, T },
195 new char[] { IU, T },
196 new char[] { L, A },
197 new char[] { N, A },
198 new char[] { L, I },
199 new char[] { E, M },
200 new char[] { N, Y },
201 new char[] { E, T, E },
202 new char[] { I_, T, E },
203 new char[] { T, SOFT },
204 new char[] { E, SH, SOFT },
205 new char[] { N, N, O }
208 private static char[][] verbEndings2 = {
209 new char[] { IU },
210 new char[] { U, IU },
211 new char[] { E, N },
212 new char[] { E, I_ },
213 new char[] { IA, T },
214 new char[] { U, I_ },
215 new char[] { I, L },
216 new char[] { Y, L },
217 new char[] { I, M },
218 new char[] { Y, M },
219 new char[] { I, T },
220 new char[] { Y, T },
221 new char[] { I, L, A },
222 new char[] { Y, L, A },
223 new char[] { E, N, A },
224 new char[] { I, T, E },
225 new char[] { I, L, I },
226 new char[] { Y, L, I },
227 new char[] { I, L, O },
228 new char[] { Y, L, O },
229 new char[] { E, N, O },
230 new char[] { U, E, T },
231 new char[] { U, IU, T },
232 new char[] { E, N, Y },
233 new char[] { I, T, SOFT },
234 new char[] { Y, T, SOFT },
235 new char[] { I, SH, SOFT },
236 new char[] { E, I_, T, E },
237 new char[] { U, I_, T, E }
240 private static char[][] verb1Predessors = {
241 new char[] { A },
242 new char[] { IA }
245 private static char[][] nounEndings = {
246 new char[] { A },
247 new char[] { U },
248 new char[] { I_ },
249 new char[] { O },
250 new char[] { U },
251 new char[] { E },
252 new char[] { Y },
253 new char[] { I },
254 new char[] { SOFT },
255 new char[] { IA },
256 new char[] { E, V },
257 new char[] { O, V },
258 new char[] { I, E },
259 new char[] { SOFT, E },
260 new char[] { IA, X },
261 new char[] { I, IU },
262 new char[] { E, I },
263 new char[] { I, I },
264 new char[] { E, I_ },
265 new char[] { O, I_ },
266 new char[] { E, M },
267 new char[] { A, M },
268 new char[] { O, M },
269 new char[] { A, X },
270 new char[] { SOFT, IU },
271 new char[] { I, IA },
272 new char[] { SOFT, IA },
273 new char[] { I, I_ },
274 new char[] { IA, M },
275 new char[] { IA, M, I },
276 new char[] { A, M, I },
277 new char[] { I, E, I_ },
278 new char[] { I, IA, M },
279 new char[] { I, E, M },
280 new char[] { I, IA, X },
281 new char[] { I, IA, M, I }
284 private static char[][] superlativeEndings = {
285 new char[] { E, I_, SH },
286 new char[] { E, I_, SH, E }
289 private static char[][] derivationalEndings = {
290 new char[] { O, S, T },
291 new char[] { O, S, T, SOFT }
294 /// <summary>
295 /// RussianStemmer constructor comment.
296 /// </summary>
297 public RussianStemmer()
301 /// <summary>
302 /// RussianStemmer constructor comment.
303 /// </summary>
304 /// <param name="charset"></param>
305 public RussianStemmer(char[] charset)
307 this.charset = charset;
310 /// <summary>
311 /// Adjectival ending is an adjective ending,
312 /// optionally preceded by participle ending.
313 /// Creation date: (17/03/2002 12:14:58 AM)
314 /// </summary>
315 /// <param name="stemmingZone">StringBuilder</param>
316 /// <returns></returns>
317 private bool Adjectival(StringBuilder stemmingZone)
319 // look for adjective ending in a stemming zone
320 if (!FindAndRemoveEnding(stemmingZone, adjectiveEndings))
321 return false;
322 // if adjective ending was found, try for participle ending
323 bool r =
324 FindAndRemoveEnding(stemmingZone, participleEndings1, participle1Predessors)
326 FindAndRemoveEnding(stemmingZone, participleEndings2);
327 return true;
330 /// <summary>
331 /// Derivational endings
332 /// Creation date: (17/03/2002 12:14:58 AM)
333 /// </summary>
334 /// <param name="stemmingZone">StringBuilder</param>
335 /// <returns></returns>
336 private bool Derivational(StringBuilder stemmingZone)
338 int endingLength = FindEnding(stemmingZone, derivationalEndings);
339 if (endingLength == 0)
340 // no derivational ending found
341 return false;
342 else
344 // Ensure that the ending locates in R2
345 if (R2 - RV <= stemmingZone.Length - endingLength)
347 stemmingZone.Length = stemmingZone.Length - endingLength;
348 return true;
350 else
352 return false;
357 /// <summary>
358 /// Finds ending among given ending class and returns the length of ending found(0, if not found).
359 /// Creation date: (17/03/2002 8:18:34 PM)
360 /// </summary>
361 /// <param name="stemmingZone"></param>
362 /// <param name="startIndex"></param>
363 /// <param name="theEndingClass"></param>
364 /// <returns></returns>
365 private int FindEnding(StringBuilder stemmingZone, int startIndex, char[][] theEndingClass)
367 bool match = false;
368 for (int i = theEndingClass.Length - 1; i >= 0; i--)
370 char[] theEnding = theEndingClass[i];
371 // check if the ending is bigger than stemming zone
372 if (startIndex < theEnding.Length - 1)
374 match = false;
375 continue;
377 match = true;
378 int stemmingIndex = startIndex;
379 for (int j = theEnding.Length - 1; j >= 0; j--)
381 if (stemmingZone[stemmingIndex--] != charset[theEnding[j]])
383 match = false;
384 break;
387 // check if ending was found
388 if (match)
390 return theEndingClass[i].Length; // cut ending
393 return 0;
396 private int FindEnding(StringBuilder stemmingZone, char[][] theEndingClass)
398 return FindEnding(stemmingZone, stemmingZone.Length - 1, theEndingClass);
401 /// <summary>
402 /// Finds the ending among the given class of endings and removes it from stemming zone.
403 /// Creation date: (17/03/2002 8:18:34 PM)
404 /// </summary>
405 /// <param name="stemmingZone"></param>
406 /// <param name="theEndingClass"></param>
407 /// <returns></returns>
408 private bool FindAndRemoveEnding(StringBuilder stemmingZone, char[][] theEndingClass)
410 int endingLength = FindEnding(stemmingZone, theEndingClass);
411 if (endingLength == 0)
412 // not found
413 return false;
414 else
416 stemmingZone.Length = stemmingZone.Length - endingLength;
417 // cut the ending found
418 return true;
422 /// <summary>
423 /// Finds the ending among the given class of endings, then checks if this ending was
424 /// preceded by any of given predessors, and if so, removes it from stemming zone.
425 /// Creation date: (17/03/2002 8:18:34 PM)
426 /// </summary>
427 /// <param name="stemmingZone"></param>
428 /// <param name="theEndingClass"></param>
429 /// <param name="thePredessors"></param>
430 /// <returns></returns>
431 private bool FindAndRemoveEnding(StringBuilder stemmingZone,
432 char[][] theEndingClass, char[][] thePredessors)
434 int endingLength = FindEnding(stemmingZone, theEndingClass);
435 if (endingLength == 0)
436 // not found
437 return false;
438 else
440 int predessorLength =
441 FindEnding(stemmingZone,
442 stemmingZone.Length - endingLength - 1,
443 thePredessors);
444 if (predessorLength == 0)
445 return false;
446 else
448 stemmingZone.Length = stemmingZone.Length - endingLength;
449 // cut the ending found
450 return true;
456 /// <summary>
457 /// Marks positions of RV, R1 and R2 in a given word.
458 /// Creation date: (16/03/2002 3:40:11 PM)
459 /// </summary>
460 /// <param name="word"></param>
461 private void MarkPositions(String word)
463 RV = 0;
464 R1 = 0;
465 R2 = 0;
466 int i = 0;
467 // find RV
468 while (word.Length > i && !IsVowel(word[i]))
470 i++;
472 if (word.Length - 1 < ++i)
473 return; // RV zone is empty
474 RV = i;
475 // find R1
476 while (word.Length > i && IsVowel(word[i]))
478 i++;
480 if (word.Length - 1 < ++i)
481 return; // R1 zone is empty
482 R1 = i;
483 // find R2
484 while (word.Length > i && !IsVowel(word[i]))
486 i++;
488 if (word.Length - 1 < ++i)
489 return; // R2 zone is empty
490 while (word.Length > i && IsVowel(word[i]))
492 i++;
494 if (word.Length - 1 < ++i)
495 return; // R2 zone is empty
496 R2 = i;
499 /// <summary>
500 /// Checks if character is a vowel..
501 /// Creation date: (16/03/2002 10:47:03 PM)
502 /// </summary>
503 /// <param name="letter"></param>
504 /// <returns></returns>
505 private bool IsVowel(char letter)
507 for (int i = 0; i < vowels.Length; i++)
509 if (letter == charset[vowels[i]])
510 return true;
512 return false;
515 /// <summary>
516 /// Noun endings.
517 /// Creation date: (17/03/2002 12:14:58 AM)
518 /// </summary>
519 /// <param name="stemmingZone"></param>
520 /// <returns></returns>
521 private bool Noun(StringBuilder stemmingZone)
523 return FindAndRemoveEnding(stemmingZone, nounEndings);
526 /// <summary>
527 /// Perfective gerund endings.
528 /// Creation date: (17/03/2002 12:14:58 AM)
529 /// </summary>
530 /// <param name="stemmingZone"></param>
531 /// <returns></returns>
532 private bool PerfectiveGerund(StringBuilder stemmingZone)
534 return FindAndRemoveEnding(
535 stemmingZone,
536 perfectiveGerundEndings1,
537 perfectiveGerund1Predessors)
538 || FindAndRemoveEnding(stemmingZone, perfectiveGerundEndings2);
541 /// <summary>
542 /// Reflexive endings.
543 /// Creation date: (17/03/2002 12:14:58 AM)
544 /// </summary>
545 /// <param name="stemmingZone"></param>
546 /// <returns></returns>
547 private bool Reflexive(StringBuilder stemmingZone)
549 return FindAndRemoveEnding(stemmingZone, reflexiveEndings);
552 /// <summary>
553 /// Insert the method's description here.
554 /// Creation date: (17/03/2002 12:14:58 AM)
555 /// </summary>
556 /// <param name="stemmingZone"></param>
557 /// <returns></returns>
558 private bool RemoveI(StringBuilder stemmingZone)
560 if (stemmingZone.Length > 0
561 && stemmingZone[stemmingZone.Length - 1] == charset[I])
563 stemmingZone.Length = stemmingZone.Length - 1;
564 return true;
566 else
568 return false;
572 /// <summary>
573 /// Insert the method's description here.
574 /// Creation date: (17/03/2002 12:14:58 AM)
575 /// </summary>
576 /// <param name="stemmingZone"></param>
577 /// <returns></returns>
578 private bool RemoveSoft(StringBuilder stemmingZone)
580 if (stemmingZone.Length > 0
581 && stemmingZone[stemmingZone.Length - 1] == charset[SOFT])
583 stemmingZone.Length = stemmingZone.Length - 1;
584 return true;
586 else
588 return false;
592 /// <summary>
593 /// Insert the method's description here.
594 /// Creation date: (16/03/2002 10:58:42 PM)
595 /// </summary>
596 /// <param name="newCharset"></param>
597 public void SetCharset(char[] newCharset)
599 charset = newCharset;
602 /// <summary>
603 /// Set ending definition as in Russian stemming algorithm.
604 /// Creation date: (16/03/2002 11:16:36 PM)
605 /// </summary>
606 private void SetEndings()
608 vowels = new char[] { A, E, I, O, U, Y, AE, IU, IA };
610 perfectiveGerundEndings1 = new char[][] {
611 new char[] { V }, new char[] { V, SH, I }, new char[] { V, SH, I, S, SOFT }
614 perfectiveGerund1Predessors = new char[][] {
615 new char[] { A }, new char[] { IA }
618 perfectiveGerundEndings2 = new char[][] {
619 new char[] { I, V },
620 new char[] { Y, V },
621 new char[] { I, V, SH, I },
622 new char[] { Y, V, SH, I },
623 new char[] { I, V, SH, I, S, SOFT },
624 new char[] { Y, V, SH, I, S, SOFT }
627 adjectiveEndings = new char[][] {
628 new char[] { E, E },
629 new char[] { I, E },
630 new char[] { Y, E },
631 new char[] { O, E },
632 new char[] { E, I_ },
633 new char[] { I, I_ },
634 new char[] { Y, I_ },
635 new char[] { O, I_ },
636 new char[] { E, M },
637 new char[] { I, M },
638 new char[] { Y, M },
639 new char[] { O, M },
640 new char[] { I, X },
641 new char[] { Y, X },
642 new char[] { U, IU },
643 new char[] { IU, IU },
644 new char[] { A, IA },
645 new char[] { IA, IA },
646 new char[] { O, IU },
647 new char[] { E, IU },
648 new char[] { I, M, I },
649 new char[] { Y, M, I },
650 new char[] { E, G, O },
651 new char[] { O, G, O },
652 new char[] { E, M, U },
653 new char[] { O, M, U }
656 participleEndings1 = new char[][] {
657 new char[] { SHCH },
658 new char[] { E, M },
659 new char[] { N, N },
660 new char[] { V, SH },
661 new char[] { IU, SHCH }
664 participleEndings2 = new char[][] {
665 new char[] { I, V, SH },
666 new char[] { Y, V, SH },
667 new char[] { U, IU, SHCH }
670 participle1Predessors = new char[][] {
671 new char[] { A },
672 new char[] { IA }
675 reflexiveEndings = new char[][] {
676 new char[] { S, IA },
677 new char[] { S, SOFT }
680 verbEndings1 = new char[][] {
681 new char[] { I_ },
682 new char[] { L },
683 new char[] { N },
684 new char[] { L, O },
685 new char[] { N, O },
686 new char[] { E, T },
687 new char[] { IU, T },
688 new char[] { L, A },
689 new char[] { N, A },
690 new char[] { L, I },
691 new char[] { E, M },
692 new char[] { N, Y },
693 new char[] { E, T, E },
694 new char[] { I_, T, E },
695 new char[] { T, SOFT },
696 new char[] { E, SH, SOFT },
697 new char[] { N, N, O }
700 verbEndings2 = new char[][] {
701 new char[] { IU },
702 new char[] { U, IU },
703 new char[] { E, N },
704 new char[] { E, I_ },
705 new char[] { IA, T },
706 new char[] { U, I_ },
707 new char[] { I, L },
708 new char[] { Y, L },
709 new char[] { I, M },
710 new char[] { Y, M },
711 new char[] { I, T },
712 new char[] { Y, T },
713 new char[] { I, L, A },
714 new char[] { Y, L, A },
715 new char[] { E, N, A },
716 new char[] { I, T, E },
717 new char[] { I, L, I },
718 new char[] { Y, L, I },
719 new char[] { I, L, O },
720 new char[] { Y, L, O },
721 new char[] { E, N, O },
722 new char[] { U, E, T },
723 new char[] { U, IU, T },
724 new char[] { E, N, Y },
725 new char[] { I, T, SOFT },
726 new char[] { Y, T, SOFT },
727 new char[] { I, SH, SOFT },
728 new char[] { E, I_, T, E },
729 new char[] { U, I_, T, E }
732 verb1Predessors = new char[][] {
733 new char[] { A },
734 new char[] { IA }
737 nounEndings = new char[][] {
738 new char[] { A },
739 new char[] { IU },
740 new char[] { I_ },
741 new char[] { O },
742 new char[] { U },
743 new char[] { E },
744 new char[] { Y },
745 new char[] { I },
746 new char[] { SOFT },
747 new char[] { IA },
748 new char[] { E, V },
749 new char[] { O, V },
750 new char[] { I, E },
751 new char[] { SOFT, E },
752 new char[] { IA, X },
753 new char[] { I, IU },
754 new char[] { E, I },
755 new char[] { I, I },
756 new char[] { E, I_ },
757 new char[] { O, I_ },
758 new char[] { E, M },
759 new char[] { A, M },
760 new char[] { O, M },
761 new char[] { A, X },
762 new char[] { SOFT, IU },
763 new char[] { I, IA },
764 new char[] { SOFT, IA },
765 new char[] { I, I_ },
766 new char[] { IA, M },
767 new char[] { IA, M, I },
768 new char[] { A, M, I },
769 new char[] { I, E, I_ },
770 new char[] { I, IA, M },
771 new char[] { I, E, M },
772 new char[] { I, IA, X },
773 new char[] { I, IA, M, I }
776 superlativeEndings = new char[][] {
777 new char[] { E, I_, SH },
778 new char[] { E, I_, SH, E }
781 derivationalEndings = new char[][] {
782 new char[] { O, S, T },
783 new char[] { O, S, T, SOFT }
787 /// <summary>
788 /// Finds the stem for given Russian word.
789 /// Creation date: (16/03/2002 3:36:48 PM)
790 /// </summary>
791 /// <param name="input"></param>
792 /// <returns></returns>
793 public String Stem(String input)
795 MarkPositions(input);
796 if (RV == 0)
797 return input; //RV wasn't detected, nothing to stem
798 StringBuilder stemmingZone = new StringBuilder(input.Substring(RV));
799 // stemming goes on in RV
800 // Step 1
802 if (!PerfectiveGerund(stemmingZone))
804 Reflexive(stemmingZone);
805 bool r =
806 Adjectival(stemmingZone)
807 || Verb(stemmingZone)
808 || Noun(stemmingZone);
810 // Step 2
811 RemoveI(stemmingZone);
812 // Step 3
813 Derivational(stemmingZone);
814 // Step 4
815 Superlative(stemmingZone);
816 UndoubleN(stemmingZone);
817 RemoveSoft(stemmingZone);
818 // return result
819 return input.Substring(0, RV) + stemmingZone.ToString();
822 /// <summary>
823 /// Superlative endings.
824 /// Creation date: (17/03/2002 12:14:58 AM)
825 /// </summary>
826 /// <param name="stemmingZone"></param>
827 /// <returns></returns>
828 private bool Superlative(StringBuilder stemmingZone)
830 return FindAndRemoveEnding(stemmingZone, superlativeEndings);
833 /// <summary>
834 /// Undoubles N.
835 /// Creation date: (17/03/2002 12:14:58 AM)
836 /// </summary>
837 /// <param name="stemmingZone"></param>
838 /// <returns></returns>
839 private bool UndoubleN(StringBuilder stemmingZone)
841 char[][] doubleN = {
842 new char[] { N, N }
844 if (FindEnding(stemmingZone, doubleN) != 0)
846 stemmingZone.Length = stemmingZone.Length - 1;
847 return true;
849 else
851 return false;
855 /// <summary>
856 /// Verb endings.
857 /// Creation date: (17/03/2002 12:14:58 AM)
858 /// </summary>
859 /// <param name="stemmingZone"></param>
860 /// <returns></returns>
861 private bool Verb(StringBuilder stemmingZone)
863 return FindAndRemoveEnding(
864 stemmingZone,
865 verbEndings1,
866 verb1Predessors)
867 || FindAndRemoveEnding(stemmingZone, verbEndings2);
870 /// <summary>
871 /// Static method for stemming with different charsets
872 /// </summary>
873 /// <param name="theWord"></param>
874 /// <param name="charset"></param>
875 /// <returns></returns>
876 public static String Stem(String theWord, char[] charset)
878 RussianStemmer stemmer = new RussianStemmer();
879 stemmer.SetCharset(charset);
880 return stemmer.Stem(theWord);