2 * Copyright 2004 The Apache Software Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 using Lucene
.Net
.Analysis
;
18 namespace Lucene
.Net
.Analysis
.Standard
21 /// <summary>Normalizes tokens extracted with {@link StandardTokenizer}. </summary>
23 public sealed class StandardFilter
: TokenFilter
27 /// <summary>Construct filtering <i>in</i>. </summary>
28 public StandardFilter(TokenStream in_Renamed
) : base(in_Renamed
)
32 private static readonly System
.String APOSTROPHE_TYPE
= Lucene
.Net
.Analysis
.Standard
.StandardTokenizerConstants
.tokenImage
[Lucene
.Net
.Analysis
.Standard
.StandardTokenizerConstants
.APOSTROPHE
];
33 private static readonly System
.String ACRONYM_TYPE
= Lucene
.Net
.Analysis
.Standard
.StandardTokenizerConstants
.tokenImage
[Lucene
.Net
.Analysis
.Standard
.StandardTokenizerConstants
.ACRONYM
];
35 /// <summary>Returns the next token in the stream, or null at EOS.
36 /// <p>Removes <tt>'s</tt> from the end of words.
37 /// <p>Removes dots from acronyms.
39 public override Lucene
.Net
.Analysis
.Token
Next()
41 Lucene
.Net
.Analysis
.Token t
= input
.Next();
46 System
.String text
= t
.TermText();
47 System
.String type
= t
.Type();
49 if ((System
.Object
) type
== (System
.Object
) APOSTROPHE_TYPE
&& (text
.EndsWith("'s") || text
.EndsWith("'S")))
51 return new Lucene
.Net
.Analysis
.Token(text
.Substring(0, (text
.Length
- 2) - (0)), t
.StartOffset(), t
.EndOffset(), type
);
53 else if ((System
.Object
) type
== (System
.Object
) ACRONYM_TYPE
)
56 System
.Text
.StringBuilder trimmed
= new System
.Text
.StringBuilder();
57 for (int i
= 0; i
< text
.Length
; i
++)
63 return new Lucene
.Net
.Analysis
.Token(trimmed
.ToString(), t
.StartOffset(), t
.EndOffset(), type
);