cvsimport
[beagle.git] / beagled / Lucene.Net / Analysis / WordlistLoader.cs
blobe08c604a1ffbf599267b927b4731e92b505c0a6e
1 /*
2 * Copyright 2004 The Apache Software Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 using System;
19 namespace Lucene.Net.Analysis
22 /// <summary> Loader for text files that represent a list of stopwords.
23 ///
24 /// </summary>
25 /// <author> Gerhard Schwarz
26 /// </author>
27 /// <version> $Id: WordlistLoader.cs,v 1.2 2006/10/02 17:08:49 joeshaw Exp $
28 /// </version>
29 public class WordlistLoader
32 /// <summary> Loads a text file and adds every line as an entry to a HashSet (omitting
33 /// leading and trailing whitespace). Every line of the file should contain only
34 /// one word. The words need to be in lowercase if you make use of an
35 /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
36 ///
37 /// </summary>
38 /// <param name="wordfile">File containing the wordlist
39 /// </param>
40 /// <returns> A HashSet with the file's words
41 /// </returns>
42 public static System.Collections.Hashtable GetWordSet(System.IO.FileInfo wordfile)
44 System.Collections.Hashtable result = new System.Collections.Hashtable();
45 System.IO.TextReader reader = null;
46 try
48 reader = new System.IO.StreamReader(wordfile.FullName, System.Text.Encoding.Default);
49 result = GetWordSet(reader);
51 finally
53 if (reader != null)
54 reader.Close();
56 return result;
59 /// <summary> Reads lines from a Reader and adds every line as an entry to a HashSet (omitting
60 /// leading and trailing whitespace). Every line of the Reader should contain only
61 /// one word. The words need to be in lowercase if you make use of an
62 /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
63 ///
64 /// </summary>
65 /// <param name="reader">Reader containing the wordlist
66 /// </param>
67 /// <returns> A HashSet with the reader's words
68 /// </returns>
69 public static System.Collections.Hashtable GetWordSet(System.IO.TextReader reader)
71 System.Collections.Hashtable result = new System.Collections.Hashtable();
72 System.IO.TextReader br = null;
73 try
75 br = (System.IO.TextReader) reader;
76 System.String word = null;
77 while ((word = br.ReadLine()) != null)
79 System.String tmp = word.Trim();
80 result.Add(tmp, tmp);
83 finally
85 if (br != null)
86 br.Close();
88 return result;
91 /// <param name="path"> Path to the wordlist
92 /// </param>
93 /// <param name="wordfile"> Name of the wordlist
94 ///
95 /// </param>
96 /// <deprecated> Use {@link #GetWordSet(File)} instead
97 /// </deprecated>
98 public static System.Collections.Hashtable GetWordtable(System.String path, System.String wordfile)
100 return GetWordtable(new System.IO.FileInfo(System.IO.Path.Combine(path, wordfile)));
103 /// <param name="wordfile"> Complete path to the wordlist
104 ///
105 /// </param>
106 /// <deprecated> Use {@link #GetWordSet(File)} instead
107 /// </deprecated>
108 public static System.Collections.Hashtable GetWordtable(System.String wordfile)
110 return GetWordtable(new System.IO.FileInfo(wordfile));
113 /// <param name="wordfile"> File object that points to the wordlist
114 ///
115 /// </param>
116 /// <deprecated> Use {@link #GetWordSet(File)} instead
117 /// </deprecated>
118 public static System.Collections.Hashtable GetWordtable(System.IO.FileInfo wordfile)
120 System.Collections.Hashtable wordSet = (System.Collections.Hashtable) GetWordSet(wordfile);
121 System.Collections.Hashtable result = MakeWordTable(wordSet);
122 return result;
125 /// <summary> Builds a wordlist table, using words as both keys and values
126 /// for backward compatibility.
127 ///
128 /// </summary>
129 /// <param name="wordSet"> stopword set
130 /// </param>
131 private static System.Collections.Hashtable MakeWordTable(System.Collections.Hashtable wordSet)
133 System.Collections.Hashtable table = System.Collections.Hashtable.Synchronized(new System.Collections.Hashtable());
134 for (System.Collections.IEnumerator iter = wordSet.GetEnumerator(); iter.MoveNext(); )
136 System.String word = (System.String) iter.Current;
137 table[word] = word;
139 return table;