First post!
[beagle.git] / Lucene.Net / Analysis / Standard / StandardAnalyzer.cs
blobdfed6d3a21f89e384ebf958928b9582fcac6e0fe
1 using System;
2 using System.IO;
3 using System.Collections;
5 using Lucene.Net.Analysis;
7 namespace Lucene.Net.Analysis.Standard
9 /* ====================================================================
10 * The Apache Software License, Version 1.1
12 * Copyright (c) 2001 The Apache Software Foundation. All rights
13 * reserved.
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer.
22 * 2. Redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in
24 * the documentation and/or other materials provided with the
25 * distribution.
27 * 3. The end-user documentation included with the redistribution,
28 * if any, must include the following acknowledgment:
29 * "This product includes software developed by the
30 * Apache Software Foundation (http://www.apache.org/)."
31 * Alternately, this acknowledgment may appear in the software itself,
32 * if and wherever such third-party acknowledgments normally appear.
34 * 4. The names "Apache" and "Apache Software Foundation" and
35 * "Apache Lucene" must not be used to endorse or promote products
36 * derived from this software without prior written permission. For
37 * written permission, please contact apache@apache.org.
39 * 5. Products derived from this software may not be called "Apache",
40 * "Apache Lucene", nor may "Apache" appear in their name, without
41 * prior written permission of the Apache Software Foundation.
43 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
44 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
45 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
46 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
47 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
48 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
49 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
50 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
51 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
52 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
53 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
54 * SUCH DAMAGE.
55 * ====================================================================
57 * This software consists of voluntary contributions made by many
58 * individuals on behalf of the Apache Software Foundation. For more
59 * information on the Apache Software Foundation, please see
60 * <http://www.apache.org/>.
63 /// <summary>
64 /// Filters StandardTokenizer with StandardFilter, LowerCaseFilter and StopFilter.
65 /// </summary>
66 public class StandardAnalyzer : Analyzer
68 private Hashtable stopTable;
70 /// <summary>
71 /// An array containing some common English words that are usually not
72 /// useful for searching.
73 /// </summary>
74 public static readonly String[] STOP_WORDS = StopAnalyzer.ENGLISH_STOP_WORDS;
75 // {
76 // "a", "and", "are", "as", "at", "be", "but", "by",
77 // "for", "if", "in", "into", "is", "it",
78 // "no", "not", "of", "on", "or", "s", "such",
79 // "t", "that", "the", "their", "then", "there", "these",
80 // "they", "this", "to", "was", "will", "with"
81 // };
83 /// <summary>
84 /// Builds an analyzer.
85 /// </summary>
86 public StandardAnalyzer() : this(STOP_WORDS)
90 /// <summary>
91 /// Builds an analyzer with the given stop words.
92 /// </summary>
93 /// <param name="stopWords"></param>
94 public StandardAnalyzer(String[] stopWords)
96 stopTable = StopFilter.MakeStopTable(stopWords);
99 /// <summary>
100 /// Constructs a StandardTokenizer filtered by a
101 /// StandardFilter, a LowerCaseFilter and a StopFilter.
102 /// </summary>
103 /// <param name="fieldName"></param>
104 /// <param name="reader"></param>
105 /// <returns></returns>
106 public override TokenStream TokenStream(String fieldName, TextReader reader)
108 TokenStream result = new StandardTokenizer(reader);
109 result = new StandardFilter(result);
110 result = new LowerCaseFilter(result);
111 result = new StopFilter(result, stopTable);
112 return result;