2 * Copyright 2004-2005 The Apache Software Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 namespace Lucene
.Net
.Analysis
22 /// <summary> A filter that replaces accented characters in the ISO Latin 1 character set
23 /// (ISO-8859-1) by their unaccented equivalent. The case will not be altered.
25 /// For instance, 'à' will be replaced by 'a'.
28 public class ISOLatin1AccentFilter
: TokenFilter
30 public ISOLatin1AccentFilter(TokenStream input
) : base(input
)
34 public override Token
Next()
36 Token t
= input
.Next();
39 // Return a token with filtered characters.
40 return new Token(RemoveAccents(t
.TermText()), t
.StartOffset(), t
.EndOffset(), t
.Type());
43 /// <summary> To replace accented characters in a String by unaccented equivalents.</summary>
44 public static System
.String
RemoveAccents(System
.String input
)
46 System
.Text
.StringBuilder output
= new System
.Text
.StringBuilder();
47 for (int i
= 0; i
< input
.Length
; i
++)
195 output
.Append(input
[i
]);
200 return output
.ToString();