First post!
[beagle.git] / Lucene.Net / Analysis / DE / GermanAnalyzer.cs
blobe5ab302d5974e4d880bd5d10bbaa4c39556d4308
1 using System;
2 using System.IO;
3 using System.Collections;
4 using Lucene.Net.Analysis.Standard;
6 namespace Lucene.Net.Analysis.De
9 /* ====================================================================
10 * The Apache Software License, Version 1.1
12 * Copyright (c) 2001 The Apache Software Foundation. All rights
13 * reserved.
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer.
22 * 2. Redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in
24 * the documentation and/or other materials provided with the
25 * distribution.
27 * 3. The end-user documentation included with the redistribution,
28 * if any, must include the following acknowledgment:
29 * "This product includes software developed by the
30 * Apache Software Foundation (http://www.apache.org/)."
31 * Alternately, this acknowledgment may appear in the software itself,
32 * if and wherever such third-party acknowledgments normally appear.
34 * 4. The names "Apache" and "Apache Software Foundation" and
35 * "Apache Lucene" must not be used to endorse or promote products
36 * derived from this software without prior written permission. For
37 * written permission, please contact apache@apache.org.
39 * 5. Products derived from this software may not be called "Apache",
40 * "Apache Lucene", nor may "Apache" appear in their name, without
41 * prior written permission of the Apache Software Foundation.
43 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
44 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
45 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
46 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
47 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
48 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
49 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
50 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
51 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
52 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
53 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
54 * SUCH DAMAGE.
55 * ====================================================================
57 * This software consists of voluntary contributions made by many
58 * individuals on behalf of the Apache Software Foundation. For more
59 * information on the Apache Software Foundation, please see
60 * <http://www.apache.org/>.
63 /// <summary>
64 /// Analyzer for German language. Supports an external list of stopwords (words that
65 /// will not be indexed at all) and an external list of exclusions (word that will
66 /// not be stemmed, but indexed).
67 /// A default set of stopwords is used unless an alternative list is specified, the
68 /// exclusion list is empty by default.
69 /// </summary>
70 /// <author>Gerhard Schwarz</author>
71 /// <version>$Id: GermanAnalyzer.cs,v 1.1.1.1 2004/04/29 22:53:51 trow Exp $</version>
72 public class GermanAnalyzer : Analyzer
74 /// <summary>
75 /// List of typical german stopwords.
76 /// </summary>
77 private String[] GERMAN_STOP_WORDS =
79 "einer", "eine", "eines", "einem", "einen",
80 "der", "die", "das", "dass", "daß",
81 "du", "er", "sie", "es",
82 "was", "wer", "wie", "wir",
83 "und", "oder", "ohne", "mit",
84 "am", "im", "in", "aus", "auf",
85 "ist", "sein", "war", "wird",
86 "ihr", "ihre", "ihres",
87 "als", "für", "von",
88 "dich", "dir", "mich", "mir",
89 "mein", "kein",
90 "durch", "wegen"
93 /// <summary>
94 /// Contains the stopwords used with the StopFilter.
95 /// </summary>
96 private Hashtable stoptable = new Hashtable();
98 /// <summary>
99 /// Contains words that should be indexed but not stemmed.
100 /// </summary>
101 private Hashtable excltable = new Hashtable();
103 /// <summary>
104 /// Builds an analyzer.
105 /// </summary>
106 public GermanAnalyzer()
108 stoptable = StopFilter.MakeStopTable( GERMAN_STOP_WORDS );
111 /// <summary>
112 /// Builds an analyzer with the given stop words.
113 /// </summary>
114 /// <param name="stopwords"></param>
115 public GermanAnalyzer( String[] stopwords )
117 stoptable = StopFilter.MakeStopTable( stopwords );
120 /// <summary>
121 /// Builds an analyzer with the given stop words.
122 /// </summary>
123 /// <param name="stopwords"></param>
124 public GermanAnalyzer( Hashtable stopwords )
126 stoptable = stopwords;
129 /// <summary>
130 /// Builds an analyzer with the given stop words.
131 /// </summary>
132 /// <param name="stopwords"></param>
133 public GermanAnalyzer( FileInfo stopwords )
135 stoptable = WordlistLoader.GetWordtable( stopwords );
138 /// <summary>
139 /// Builds an exclusionlist from an array of Strings.
140 /// </summary>
141 /// <param name="exclusionlist"></param>
142 public void SetStemExclusionTable( String[] exclusionlist )
144 excltable = StopFilter.MakeStopTable( exclusionlist );
147 /// <summary>
148 /// Builds an exclusionlist from a Hashtable.
149 /// </summary>
150 /// <param name="exclusionlist"></param>
151 public void SetStemExclusionTable( Hashtable exclusionlist )
153 excltable = exclusionlist;
156 /// <summary>
157 /// Builds an exclusionlist from the words contained in the given file.
158 /// </summary>
159 /// <param name="exclusionlist"></param>
160 public void SetStemExclusionTable(FileInfo exclusionlist)
162 excltable = WordlistLoader.GetWordtable(exclusionlist);
165 /// <summary>
166 /// Creates a TokenStream which tokenizes all the text in the provided TextReader.
167 /// </summary>
168 /// <param name="fieldName"></param>
169 /// <param name="reader"></param>
170 /// <returns>A TokenStream build from a StandardTokenizer filtered with StandardFilter, StopFilter, GermanStemFilter</returns>
171 public override TokenStream TokenStream(String fieldName, TextReader reader)
173 TokenStream result = new StandardTokenizer( reader );
174 result = new StandardFilter( result );
175 result = new StopFilter( result, stoptable );
176 result = new GermanStemFilter( result, excltable );
177 return result;