cvsimport
[beagle.git] / beagled / Lucene.Net / Analysis / Standard / StandardFilter.cs
blob7f4e2ea37106748a0136390f7a05ab26f02ae69d
1 /*
2 * Copyright 2004 The Apache Software Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 using System;
18 using Lucene.Net.Analysis;
20 namespace Lucene.Net.Analysis.Standard
23 /// <summary>Normalizes tokens extracted with {@link StandardTokenizer}. </summary>
25 public sealed class StandardFilter : TokenFilter
29 /// <summary>Construct filtering <i>in</i>. </summary>
30 public StandardFilter(TokenStream in_Renamed) : base(in_Renamed)
34 private static readonly System.String APOSTROPHE_TYPE = Lucene.Net.Analysis.Standard.StandardTokenizerConstants.tokenImage[Lucene.Net.Analysis.Standard.StandardTokenizerConstants.APOSTROPHE];
35 private static readonly System.String ACRONYM_TYPE = Lucene.Net.Analysis.Standard.StandardTokenizerConstants.tokenImage[Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ACRONYM];
37 /// <summary>Returns the next token in the stream, or null at EOS.
38 /// <p>Removes <tt>'s</tt> from the end of words.
39 /// <p>Removes dots from acronyms.
40 /// </summary>
41 public override Lucene.Net.Analysis.Token Next()
43 Lucene.Net.Analysis.Token t = input.Next();
45 if (t == null)
46 return null;
48 System.String text = t.TermText();
49 System.String type = t.Type();
51 if (type == APOSTROPHE_TYPE && (text.EndsWith("'s") || text.EndsWith("'S")))
53 return new Lucene.Net.Analysis.Token(text.Substring(0, (text.Length - 2) - (0)), t.StartOffset(), t.EndOffset(), type);
55 else if (type == ACRONYM_TYPE)
57 // remove dots
58 System.Text.StringBuilder trimmed = new System.Text.StringBuilder();
59 for (int i = 0; i < text.Length; i++)
61 char c = text[i];
62 if (c != '.')
63 trimmed.Append(c);
65 return new Lucene.Net.Analysis.Token(trimmed.ToString(), t.StartOffset(), t.EndOffset(), type);
67 else
69 return t;