Tokenize 001234 as 1234. Include a testing function in NoiseFilter to figure out...
[beagle.git] / beagled / ManageIndex.cs
blob4712502496a161d8de2d2aec7b8cd109347125d5
1 //
2 // ManageIndex.cs
3 //
4 // Copyright (C) 2005 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in all
16 // copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 // SOFTWARE.
27 using System;
28 using System.Collections;
29 using System.IO;
30 using System.Net;
32 using Beagle;
33 using Beagle.Util;
34 using Beagle.Daemon;
36 using Lucene.Net.Index;
37 using Lucene.Net.Search;
38 using Lucene.Net.Documents;
40 namespace Beagle.Daemon
42 class ManageIndex
44 static private LuceneIndexingDriver driver;
46 // Files and directories that are allowed to be in the target
47 // directory before we blow it away. If we encounter any file
48 // or dir not in this list, we'll bail out.
49 static string [] allowed_files = {
50 "FileAttributesStore.db",
51 "fingerprint",
52 "version"
55 static string [] allowed_dirs = {
56 "Locks",
57 "PrimaryIndex",
58 "SecondaryIndex",
59 "TextCache"
63 static void Main (string [] args)
65 if (args.Length < 2)
66 PrintUsage ();
68 string index_dir = (Path.IsPathRooted (args [0])) ? args [0] : Path.GetFullPath (args [0]);
70 if (!Directory.Exists (index_dir)) {
71 Console.WriteLine ("Could not find index: {0}", index_dir);
72 Environment.Exit (1);
75 // Be *EXTRA PARANOID* about the contents of the target
76 // directory, because creating an indexing driver will
77 // nuke it.
78 if (Directory.Exists (index_dir)) {
80 foreach (FileInfo info in DirectoryWalker.GetFileInfos (index_dir)) {
81 if (Array.IndexOf (allowed_files, info.Name) == -1) {
82 Logger.Log.Error ("{0} doesn't look safe to delete: non-Beagle file {1} was found", index_dir, info.FullName);
83 Environment.Exit (1);
87 foreach (DirectoryInfo info in DirectoryWalker.GetDirectoryInfos (index_dir)) {
88 if (Array.IndexOf (allowed_dirs, info.Name) == -1) {
89 Logger.Log.Error ("{0} doesn't look safe to delete: non-Beagle directory {1} was found", index_dir, info.FullName);
90 Environment.Exit (1);
95 driver = new LuceneIndexingDriver (index_dir, false);
97 switch (args [1]) {
98 #if false
99 case "list":
100 ExecuteList ();
101 break;
102 case "remove":
103 ExecuteRemove (args [2]);
104 break;
105 #endif
106 case "info":
107 ExecuteInfo ();
108 break;
110 case "merge":
111 ExecuteMerge (args [2]);
112 break;
114 case "optimize":
115 ExecuteOptimize ();
116 break;
117 default:
118 Console.WriteLine ("Unknown command: {0}", args [1]);
119 PrintUsage ();
120 break;
124 /////////////////////////////////////////////////////////
126 static void PrintUsage ()
128 string usage =
129 "beagle-manage-index: Low-level Lucene index management\n" +
130 "Web page: http://www.gnome.org/projects/beagle\n" +
131 "Copyright (C) 2004-2005 Novell, Inc.\n\n";
133 usage +=
134 "Usage: beagle-manage-index <index_path> <command> [OPTIONS]\n\n" +
135 "Commands:\n" +
136 #if false
137 " list\t\t\t\tList all entries in the index.\n" +
138 " remove <uri|tag>\t\tRemove entries corresponding to the criterias specified.\n" +
139 #endif
140 " merge <index to merge>\tMerge another Lucene index into the target.\n" +
141 " info\t\t\t\tPrint basic index information.\n" +
142 " optimize\t\t\tOptimize index.\n";
145 Console.WriteLine (usage);
146 Environment.Exit (0);
149 /////////////////////////////////////////////////////////
151 #if false
152 static void ExecuteList ()
154 { LuceneDriver driver = new LuceneDriver (index_dir, true);
156 IndexReader reader = IndexReader.Open (driver.Store);
158 for (int i = 0; i < reader.NumDocs (); i++) {
159 if (reader.IsDeleted (i))
160 continue;
161 Console.WriteLine (reader.Document (i));
164 reader.Close ();
167 /////////////////////////////////////////////////////////
169 static void ExecuteRemove (string arg)
171 LuceneDriver driver = new LuceneDriver (index_dir);
173 if (arg.IndexOf ("://") != -1) {
174 Uri uri = new Uri (arg);
175 ICollection hits = driver.DoQueryByUri (uri);
177 if (hits == null || hits.Count == 0) {
178 Console.WriteLine ("Uri not found in the index: {0}", uri);
179 Environment.Exit (1);
182 driver.Remove (uri);
183 driver.Flush ();
185 Console.WriteLine ("Successfully removed Uri: {0}", uri);
186 } else {
187 IndexSearcher searcher = new IndexSearcher (driver.Store);
188 BooleanQuery query = new BooleanQuery ();
190 Term term = new Term ("prop:k:Tag", arg); // Argh
191 TermQuery term_query = new TermQuery (term);
192 query.Add (term_query, false, false);
194 Hits hits = searcher.Search (query);
195 int n_hits = hits.Length ();
197 string uri;
199 for (int i = 0; i < n_hits; ++i) {
200 Document doc = hits.Doc (i);
202 uri = doc.Get ("Uri");
204 if (uri == null)
205 continue;
207 driver.Remove (UriFu.UriStringToUri (uri));
210 driver.Flush ();
212 Console.WriteLine ("Successfully removed {0} items with tag: {1}", n_hits, arg);
215 #endif
216 /////////////////////////////////////////////////////////
218 // Merge an external Beagle index to the current index. Merging will
219 // join the primary- and secondary lucene indexes and if available, the
220 // file attributes store.
222 static void ExecuteMerge (string index_to_merge)
224 if (!Path.IsPathRooted (index_to_merge))
225 index_to_merge = Path.GetFullPath (index_to_merge);
227 if (!Directory.Exists (index_to_merge)) {
228 Console.WriteLine ("Could not find index to merge: {0}", index_to_merge);
229 Environment.Exit (1);
232 // Set the IO priority so we don't slow down the system
233 IoPriority.ReduceIoPriority ();
235 LuceneQueryingDriver driver_to_merge = new LuceneQueryingDriver (index_to_merge, -1, false);
237 Stopwatch watch = new Stopwatch ();
238 watch.Start ();
240 // Merge the lucene index
242 try {
243 driver.Merge (driver_to_merge);
244 } catch (Exception e) {
245 Console.WriteLine ("Index merging (lucene) failed: {0}", e);
246 Environment.Exit (1);
249 // Merge file attributes stores
251 FileAttributesStore_Sqlite store;
253 try {
254 store = new FileAttributesStore_Sqlite (driver.TopDirectory, driver.Fingerprint);
255 store.Merge (new FileAttributesStore_Sqlite (driver_to_merge.TopDirectory, driver_to_merge.Fingerprint));
256 } catch (Exception e) {
257 Console.WriteLine ("Index merging (attributes store) failed: {0}", e);
258 Environment.Exit (1);
261 watch.Stop ();
263 Console.WriteLine ("Successfully merged index {0} into {1} in {2}", index_to_merge, driver.TopDirectory, watch);
266 /////////////////////////////////////////////////////////
268 // Get the total number of entries from the index.
270 static void ExecuteInfo ()
272 Console.WriteLine ("Total number of entries in index: {0}", driver.GetItemCount());
275 /////////////////////////////////////////////////////////
277 // Execute a lucene optimize-task on the index.
279 static void ExecuteOptimize ()
281 // Set the IO priority so we don't slow down the system
282 IoPriority.ReduceIoPriority ();
284 Stopwatch watch = new Stopwatch ();
285 watch.Start ();
287 driver.OptimizeNow ();
289 watch.Stop ();
291 Console.WriteLine ("Optimized index {0} in {1}", driver.TopDirectory, watch);