2 * Copyright 2004 The Apache Software Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 namespace Lucene
.Net
.Analysis
22 /// <summary> Loader for text files that represent a list of stopwords.
25 /// <author> Gerhard Schwarz
27 /// <version> $Id: WordlistLoader.cs,v 1.2 2006/10/02 17:08:49 joeshaw Exp $
29 public class WordlistLoader
32 /// <summary> Loads a text file and adds every line as an entry to a HashSet (omitting
33 /// leading and trailing whitespace). Every line of the file should contain only
34 /// one word. The words need to be in lowercase if you make use of an
35 /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
38 /// <param name="wordfile">File containing the wordlist
40 /// <returns> A HashSet with the file's words
42 public static System
.Collections
.Hashtable
GetWordSet(System
.IO
.FileInfo wordfile
)
44 System
.Collections
.Hashtable result
= new System
.Collections
.Hashtable();
45 System
.IO
.TextReader reader
= null;
48 reader
= new System
.IO
.StreamReader(wordfile
.FullName
, System
.Text
.Encoding
.Default
);
49 result
= GetWordSet(reader
);
59 /// <summary> Reads lines from a Reader and adds every line as an entry to a HashSet (omitting
60 /// leading and trailing whitespace). Every line of the Reader should contain only
61 /// one word. The words need to be in lowercase if you make use of an
62 /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
65 /// <param name="reader">Reader containing the wordlist
67 /// <returns> A HashSet with the reader's words
69 public static System
.Collections
.Hashtable
GetWordSet(System
.IO
.TextReader reader
)
71 System
.Collections
.Hashtable result
= new System
.Collections
.Hashtable();
72 System
.IO
.TextReader br
= null;
75 br
= (System
.IO
.TextReader
) reader
;
76 System
.String word
= null;
77 while ((word
= br
.ReadLine()) != null)
79 System
.String tmp
= word
.Trim();
91 /// <param name="path"> Path to the wordlist
93 /// <param name="wordfile"> Name of the wordlist
96 /// <deprecated> Use {@link #GetWordSet(File)} instead
98 public static System
.Collections
.Hashtable
GetWordtable(System
.String path
, System
.String wordfile
)
100 return GetWordtable(new System
.IO
.FileInfo(System
.IO
.Path
.Combine(path
, wordfile
)));
103 /// <param name="wordfile"> Complete path to the wordlist
106 /// <deprecated> Use {@link #GetWordSet(File)} instead
108 public static System
.Collections
.Hashtable
GetWordtable(System
.String wordfile
)
110 return GetWordtable(new System
.IO
.FileInfo(wordfile
));
113 /// <param name="wordfile"> File object that points to the wordlist
116 /// <deprecated> Use {@link #GetWordSet(File)} instead
118 public static System
.Collections
.Hashtable
GetWordtable(System
.IO
.FileInfo wordfile
)
120 System
.Collections
.Hashtable wordSet
= (System
.Collections
.Hashtable
) GetWordSet(wordfile
);
121 System
.Collections
.Hashtable result
= MakeWordTable(wordSet
);
125 /// <summary> Builds a wordlist table, using words as both keys and values
126 /// for backward compatibility.
129 /// <param name="wordSet"> stopword set
131 private static System
.Collections
.Hashtable
MakeWordTable(System
.Collections
.Hashtable wordSet
)
133 System
.Collections
.Hashtable table
= System
.Collections
.Hashtable
.Synchronized(new System
.Collections
.Hashtable());
134 for (System
.Collections
.IEnumerator iter
= wordSet
.GetEnumerator(); iter
.MoveNext(); )
136 System
.String word
= (System
.String
) iter
.Current
;