First post!
[beagle.git] / Lucene.Net / Analysis / LowerCaseTokenizer.cs
blobb5283f317ccb8eac1d01b5cab1b5524ea81b7283
1 using System;
2 using System.IO;
3 using System.Text;
5 namespace Lucene.Net.Analysis
7 /* ====================================================================
8 * The Apache Software License, Version 1.1
10 * Copyright (c) 2001 The Apache Software Foundation. All rights
11 * reserved.
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in
22 * the documentation and/or other materials provided with the
23 * distribution.
25 * 3. The end-user documentation included with the redistribution,
26 * if any, must include the following acknowledgment:
27 * "This product includes software developed by the
28 * Apache Software Foundation (http://www.apache.org/)."
29 * Alternately, this acknowledgment may appear in the software itself,
30 * if and wherever such third-party acknowledgments normally appear.
32 * 4. The names "Apache" and "Apache Software Foundation" and
33 * "Apache Lucene" must not be used to endorse or promote products
34 * derived from this software without prior written permission. For
35 * written permission, please contact apache@apache.org.
37 * 5. Products derived from this software may not be called "Apache",
38 * "Apache Lucene", nor may "Apache" appear in their name, without
39 * prior written permission of the Apache Software Foundation.
41 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
42 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
43 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
44 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
45 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
46 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
47 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
48 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
49 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
50 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
51 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * SUCH DAMAGE.
53 * ====================================================================
55 * This software consists of voluntary contributions made by many
56 * individuals on behalf of the Apache Software Foundation. For more
57 * information on the Apache Software Foundation, please see
58 * <http://www.apache.org/>.
61 /// <summary>
62 /// LowerCaseTokenizer performs the function of LetterTokenizer
63 /// and LowerCaseFilter together. It divides text at non-letters and converts
64 /// them to lower case. While it is functionally equivalent to the combination
65 /// of LetterTokenizer and LowerCaseFilter, there is a performance advantage
66 /// to doing the two tasks at once, hence this (redundant) implementation.
67 /// <P>
68 /// Note: this does a decent job for most European languages, but does a terrible
69 /// job for some Asian languages, where words are not separated by spaces.
70 /// </P>
71 /// </summary>
72 public sealed class LowerCaseTokenizer : LetterTokenizer
74 /// <summary>
75 /// Construct a new LowerCaseTokenizer.
76 /// </summary>
77 /// <param name="_in"></param>
78 public LowerCaseTokenizer(TextReader _in) : base(_in)
82 /// <summary>
83 /// Collects only characters which satisfy Char.IsLetter(char).
84 /// </summary>
85 /// <param name="c"></param>
86 /// <returns></returns>
87 protected override char Normalize(char c)
89 return Char.ToLower(c);