3 using System
.Collections
;
4 using Lucene
.Net
.Analysis
.Standard
;
6 namespace Lucene
.Net
.Analysis
.De
9 /* ====================================================================
10 * The Apache Software License, Version 1.1
12 * Copyright (c) 2001 The Apache Software Foundation. All rights
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer.
22 * 2. Redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in
24 * the documentation and/or other materials provided with the
27 * 3. The end-user documentation included with the redistribution,
28 * if any, must include the following acknowledgment:
29 * "This product includes software developed by the
30 * Apache Software Foundation (http://www.apache.org/)."
31 * Alternately, this acknowledgment may appear in the software itself,
32 * if and wherever such third-party acknowledgments normally appear.
34 * 4. The names "Apache" and "Apache Software Foundation" and
35 * "Apache Lucene" must not be used to endorse or promote products
36 * derived from this software without prior written permission. For
37 * written permission, please contact apache@apache.org.
39 * 5. Products derived from this software may not be called "Apache",
40 * "Apache Lucene", nor may "Apache" appear in their name, without
41 * prior written permission of the Apache Software Foundation.
43 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
44 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
45 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
46 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
47 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
48 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
49 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
50 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
51 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
52 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
53 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * ====================================================================
57 * This software consists of voluntary contributions made by many
58 * individuals on behalf of the Apache Software Foundation. For more
59 * information on the Apache Software Foundation, please see
60 * <http://www.apache.org/>.
64 /// Analyzer for German language. Supports an external list of stopwords (words that
65 /// will not be indexed at all) and an external list of exclusions (word that will
66 /// not be stemmed, but indexed).
67 /// A default set of stopwords is used unless an alternative list is specified, the
68 /// exclusion list is empty by default.
70 /// <author>Gerhard Schwarz</author>
71 /// <version>$Id: GermanAnalyzer.cs,v 1.1.1.1 2004/04/29 22:53:51 trow Exp $</version>
72 public class GermanAnalyzer
: Analyzer
75 /// List of typical german stopwords.
77 private String
[] GERMAN_STOP_WORDS
=
79 "einer", "eine", "eines", "einem", "einen",
80 "der", "die", "das", "dass", "daß",
81 "du", "er", "sie", "es",
82 "was", "wer", "wie", "wir",
83 "und", "oder", "ohne", "mit",
84 "am", "im", "in", "aus", "auf",
85 "ist", "sein", "war", "wird",
86 "ihr", "ihre", "ihres",
88 "dich", "dir", "mich", "mir",
94 /// Contains the stopwords used with the StopFilter.
96 private Hashtable stoptable
= new Hashtable();
99 /// Contains words that should be indexed but not stemmed.
101 private Hashtable excltable
= new Hashtable();
104 /// Builds an analyzer.
106 public GermanAnalyzer()
108 stoptable
= StopFilter
.MakeStopTable( GERMAN_STOP_WORDS
);
112 /// Builds an analyzer with the given stop words.
114 /// <param name="stopwords"></param>
115 public GermanAnalyzer( String
[] stopwords
)
117 stoptable
= StopFilter
.MakeStopTable( stopwords
);
121 /// Builds an analyzer with the given stop words.
123 /// <param name="stopwords"></param>
124 public GermanAnalyzer( Hashtable stopwords
)
126 stoptable
= stopwords
;
130 /// Builds an analyzer with the given stop words.
132 /// <param name="stopwords"></param>
133 public GermanAnalyzer( FileInfo stopwords
)
135 stoptable
= WordlistLoader
.GetWordtable( stopwords
);
139 /// Builds an exclusionlist from an array of Strings.
141 /// <param name="exclusionlist"></param>
142 public void SetStemExclusionTable( String
[] exclusionlist
)
144 excltable
= StopFilter
.MakeStopTable( exclusionlist
);
148 /// Builds an exclusionlist from a Hashtable.
150 /// <param name="exclusionlist"></param>
151 public void SetStemExclusionTable( Hashtable exclusionlist
)
153 excltable
= exclusionlist
;
157 /// Builds an exclusionlist from the words contained in the given file.
159 /// <param name="exclusionlist"></param>
160 public void SetStemExclusionTable(FileInfo exclusionlist
)
162 excltable
= WordlistLoader
.GetWordtable(exclusionlist
);
166 /// Creates a TokenStream which tokenizes all the text in the provided TextReader.
168 /// <param name="fieldName"></param>
169 /// <param name="reader"></param>
170 /// <returns>A TokenStream build from a StandardTokenizer filtered with StandardFilter, StopFilter, GermanStemFilter</returns>
171 public override TokenStream
TokenStream(String fieldName
, TextReader reader
)
173 TokenStream result
= new StandardTokenizer( reader
);
174 result
= new StandardFilter( result
);
175 result
= new StopFilter( result
, stoptable
);
176 result
= new GermanStemFilter( result
, excltable
);