2005-07-05 Gabor Kelemen <kelemeng@gnome.hu>
[beagle.git] / beagled / BuildIndex.cs
blobc7d9d398842c710ff1ce2ca6ee5b20b913b76367
1 //
2 // BuildIndex.cs
3 //
4 // Copyright (C) 2005 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in all
16 // copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 // SOFTWARE.
27 // FIXME: Implement a shared textcache
29 using System;
30 using System.IO;
31 using System.Net;
32 using System.Threading;
33 using System.Collections;
35 using System.Xml;
36 using System.Xml.Serialization;
38 using Beagle;
39 using Beagle.Util;
41 namespace Beagle.Daemon
43 class BuildIndex
45 static bool arg_recursive = false, arg_debug = false;
47 static Hashtable remap_table = new Hashtable ();
49 static string arg_output, arg_tag, arg_configuration;
51 /////////////////////////////////////////////////////////
53 static FileAttributesStore_Sqlite backing_fa_store;
54 static FileAttributesStore fa_store;
55 static LuceneDriver driver;
57 static bool crawling = true, shutdown = false;
59 static Queue pending_files = new Queue ();
60 static Queue pending_directories = new Queue ();
62 const int BATCH_SIZE = 30;
64 /////////////////////////////////////////////////////////
66 static void Main (string [] args)
68 if (args.Length < 2)
69 PrintUsage ();
71 int i = 0;
72 while (i < args.Length) {
74 string arg = args [i];
75 ++i;
76 string next_arg = i < args.Length ? args [i] : null;
78 switch (arg) {
79 case "-h":
80 case "--help":
81 PrintUsage ();
82 break;
84 case "--tag":
85 if (next_arg != null)
86 arg_tag = next_arg;
87 ++i;
88 break;
90 case "-r":
91 case "--recursive":
92 arg_recursive = true;
93 break;
95 case "--remap":
96 if (next_arg == null)
97 break;
99 int j = next_arg.IndexOf (":");
101 if (j == -1) {
102 Console.WriteLine ("Invalid remap argument: {0}", next_arg);
103 Environment.Exit (1);
106 remap_table [next_arg.Substring (0, j)] = next_arg.Substring (j+1);
108 ++i;
109 break;
111 default:
112 if (next_arg == null) {
113 arg_output = Path.IsPathRooted (arg) ? arg : Path.GetFullPath (arg);
114 } else {
115 string path = Path.IsPathRooted (arg) ? arg : Path.GetFullPath (arg);
117 if (Directory.Exists (path))
118 pending_directories.Enqueue (new DirectoryInfo (path));
119 else if (File.Exists (path))
120 pending_files.Enqueue (new FileInfo (path));
122 break;
126 /////////////////////////////////////////////////////////
128 if (!Directory.Exists (Path.GetDirectoryName (arg_output))) {
129 Console.WriteLine ("Index directory not available for construction: {0}", arg_output);
130 Environment.Exit (1);
133 driver = new LuceneDriver (arg_output);
134 driver.ChildIndexableEvent += new IIndexerChildIndexableHandler (OnChildIndexableEvent);
136 backing_fa_store = new FileAttributesStore_Sqlite (driver.IndexDirectory, driver.Fingerprint);
137 fa_store = new FileAttributesStore (backing_fa_store);
139 // Set up signal handlers
140 SetupSignalHandlers ();
142 // Start the thread that does the crawling
143 ExceptionHandlingThread.Start (new ThreadStart (CrawlWorker));
145 // Start the thread that does the actual indexing
146 ExceptionHandlingThread.Start (new ThreadStart (IndexWorker));
149 /////////////////////////////////////////////////////////////////
151 static void CrawlWorker ()
153 Logger.Log.Debug ("Starting CrawlWorker");
156 int count_dirs = 0;
158 while (pending_directories.Count > 0) {
159 DirectoryInfo dir = (DirectoryInfo) pending_directories.Dequeue ();
161 try {
162 if (arg_recursive)
163 foreach (DirectoryInfo subdir in DirectoryWalker.GetDirectoryInfos (dir))
164 if (!Ignore (subdir.FullName))
165 pending_directories.Enqueue (subdir);
167 foreach (FileInfo file in DirectoryWalker.GetFileInfos (dir))
168 if (!Ignore (file.FullName))
169 pending_files.Enqueue (file);
171 } catch (DirectoryNotFoundException e) {}
173 count_dirs++;
176 Logger.Log.Debug ("Scanned {0} files in {1} directories", pending_files.Count, count_dirs);
177 Logger.Log.Debug ("CrawlWorker Done");
179 crawling = false;
182 /////////////////////////////////////////////////////////////////
184 static void IndexWorker ()
186 Logger.Log.Debug ("Starting IndexWorker");
188 Indexable indexable;
190 while (!shutdown) {
191 if (pending_files.Count > 0) {
192 FileInfo file = (FileInfo) pending_files.Dequeue ();
193 Uri uri = UriFu.PathToFileUri (file.FullName);
195 // Check that we really should be indexing the file
196 if (!file.Exists || Ignore (file.FullName) || fa_store.IsUpToDate (file.FullName))
197 continue;
199 // Create the indexable
200 indexable = new Indexable (uri);
201 indexable.Uri = RemapUri (uri);
202 indexable.ContentUri = uri;
203 indexable.CacheContent = false;
205 // Tag the item for easy identification (for say, removal)
206 if (arg_tag != null)
207 indexable.AddProperty (Property.NewKeyword("Tag", arg_tag));
209 driver.Add (indexable);
211 fa_store.AttachTimestamp (file.FullName, FileSystem.GetLastWriteTime (file.FullName));
213 if (driver.PendingAdds % BATCH_SIZE == 0) {
214 Logger.Log.Debug ("Flushing driver, {0} items in queue", pending_files.Count);
215 driver.Flush ();
217 } else if (crawling) {
218 Logger.Log.Debug ("IndexWorker: La la la...");
219 Thread.Sleep (50);
220 } else {
221 break;
225 // Flush out any pending changes in either the
226 // LuceneDriver or the sqlite attributes database.
227 while (driver.PendingAdds != 0)
228 driver.Flush ();
230 backing_fa_store.Flush ();
232 Logger.Log.Debug ("IndexWorker Done");
235 static void OnChildIndexableEvent (Indexable[] indexables) {
236 foreach (Indexable indexable in indexables) {
237 indexable.StoreStream ();
238 driver.Add (indexable);
242 /////////////////////////////////////////////////////////////////
244 // From BeagleDaemon.cs
246 // The integer values of the Mono.Posix.Signal enumeration don't actually
247 // match the Linux signal numbers of Linux. Oops!
248 // This is fixed in Mono.Unix, but for the moment we want to maintain
249 // compatibility with mono 1.0.x.
250 const int ACTUAL_LINUX_SIGINT = 2;
251 const int ACTUAL_LINUX_SIGQUIT = 3;
252 const int ACTUAL_LINUX_SIGTERM = 15;
254 static void SetupSignalHandlers ()
256 // Force OurSignalHandler to be JITed
257 OurSignalHandler (-1);
259 // Set up our signal handler
260 Mono.Posix.Syscall.sighandler_t sig_handler;
261 sig_handler = new Mono.Posix.Syscall.sighandler_t (OurSignalHandler);
262 Mono.Posix.Syscall.signal (ACTUAL_LINUX_SIGINT, sig_handler);
263 Mono.Posix.Syscall.signal (ACTUAL_LINUX_SIGQUIT, sig_handler);
264 Mono.Posix.Syscall.signal (ACTUAL_LINUX_SIGTERM, sig_handler);
267 static void OurSignalHandler (int signal)
269 // This allows us to call OurSignalHandler w/o doing anything.
270 // We want to call it once to ensure that it is pre-JITed.
271 if (signal < 0)
272 return;
274 Logger.Log.Debug ("Shutdown Requested");
275 shutdown = true;
278 /////////////////////////////////////////////////////////////////
280 static void PrintUsage ()
282 string usage =
283 "beagle-build-index: Build an index.\n" +
284 "Web page: http://www.gnome.org/projects/beagle\n" +
285 "Copyright (C) 2005 Novell, Inc.\n\n";
287 usage +=
288 "Usage: beagle-build-index [OPTIONS] <path> [path] [path] <index path>\n\n" +
289 "Options:\n" +
290 " --remap [path1:path2]\tRemap data paths to fit target. \n" +
291 " --tag [tag]\t\tTag index data for identification.\n" +
292 " --recursive\t\tCrawl source path recursivly.\n" +
293 " --debug\t\tEcho verbose debugging information.\n";
295 Console.WriteLine (usage);
296 Environment.Exit (0);
299 /////////////////////////////////////////////////////////
301 static Uri RemapUri (Uri uri)
303 // FIXME: This is ghetto
304 foreach (DictionaryEntry dict in remap_table) {
305 if (uri.LocalPath.IndexOf ((string) dict.Key) == -1)
306 continue;
307 return new Uri (uri.LocalPath.Replace ((string) dict.Key, (string) dict.Value));
309 return uri;
312 static bool Ignore (string path)
314 if (FileSystem.IsSymLink (path))
315 return true;
317 // FIXME: Add more stuff here
319 return false;