2 * Copyright 2004 The Apache Software Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 using Lucene
.Net
.Analysis
;
20 namespace Lucene
.Net
.Analysis
.Standard
23 /// <summary>Normalizes tokens extracted with {@link StandardTokenizer}. </summary>
25 public sealed class StandardFilter
: TokenFilter
29 /// <summary>Construct filtering <i>in</i>. </summary>
30 public StandardFilter(TokenStream in_Renamed
) : base(in_Renamed
)
34 private static readonly System
.String APOSTROPHE_TYPE
= Lucene
.Net
.Analysis
.Standard
.StandardTokenizerConstants
.tokenImage
[Lucene
.Net
.Analysis
.Standard
.StandardTokenizerConstants
.APOSTROPHE
];
35 private static readonly System
.String ACRONYM_TYPE
= Lucene
.Net
.Analysis
.Standard
.StandardTokenizerConstants
.tokenImage
[Lucene
.Net
.Analysis
.Standard
.StandardTokenizerConstants
.ACRONYM
];
37 /// <summary>Returns the next token in the stream, or null at EOS.
38 /// <p>Removes <tt>'s</tt> from the end of words.
39 /// <p>Removes dots from acronyms.
41 public override Lucene
.Net
.Analysis
.Token
Next()
43 Lucene
.Net
.Analysis
.Token t
= input
.Next();
48 System
.String text
= t
.TermText();
49 System
.String type
= t
.Type();
51 if (type
== APOSTROPHE_TYPE
&& (text
.EndsWith("'s") || text
.EndsWith("'S")))
53 return new Lucene
.Net
.Analysis
.Token(text
.Substring(0, (text
.Length
- 2) - (0)), t
.StartOffset(), t
.EndOffset(), type
);
55 else if (type
== ACRONYM_TYPE
)
58 System
.Text
.StringBuilder trimmed
= new System
.Text
.StringBuilder();
59 for (int i
= 0; i
< text
.Length
; i
++)
65 return new Lucene
.Net
.Analysis
.Token(trimmed
.ToString(), t
.StartOffset(), t
.EndOffset(), type
);