4 // Copyright (C) 2005 Novell, Inc.
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in all
16 // copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27 // FIXME: Implement a shared textcache
32 using System
.Threading
;
33 using System
.Collections
;
36 using System
.Xml
.Serialization
;
41 namespace Beagle
.Daemon
45 static bool arg_recursive
= false, arg_debug
= false;
47 static Hashtable remap_table
= new Hashtable ();
49 static string arg_output
, arg_tag
, arg_configuration
;
51 /////////////////////////////////////////////////////////
53 static FileAttributesStore_Sqlite backing_fa_store
;
54 static FileAttributesStore fa_store
;
55 static LuceneDriver driver
;
57 static bool crawling
= true, shutdown
= false;
59 static Queue pending_files
= new Queue ();
60 static Queue pending_directories
= new Queue ();
62 const int BATCH_SIZE
= 30;
64 /////////////////////////////////////////////////////////
66 static void Main (string [] args
)
72 while (i
< args
.Length
) {
74 string arg
= args
[i
];
76 string next_arg
= i
< args
.Length
? args
[i
] : null;
99 int j
= next_arg
.IndexOf (":");
102 Console
.WriteLine ("Invalid remap argument: {0}", next_arg
);
103 Environment
.Exit (1);
106 remap_table
[next_arg
.Substring (0, j
)] = next_arg
.Substring (j
+1);
112 if (next_arg
== null) {
113 arg_output
= Path
.IsPathRooted (arg
) ? arg
: Path
.GetFullPath (arg
);
115 string path
= Path
.IsPathRooted (arg
) ? arg
: Path
.GetFullPath (arg
);
117 if (Directory
.Exists (path
))
118 pending_directories
.Enqueue (new DirectoryInfo (path
));
119 else if (File
.Exists (path
))
120 pending_files
.Enqueue (new FileInfo (path
));
126 /////////////////////////////////////////////////////////
128 if (!Directory
.Exists (Path
.GetDirectoryName (arg_output
))) {
129 Console
.WriteLine ("Index directory not available for construction: {0}", arg_output
);
130 Environment
.Exit (1);
133 driver
= new LuceneDriver (arg_output
);
134 driver
.ChildIndexableEvent
+= new IIndexerChildIndexableHandler (OnChildIndexableEvent
);
136 backing_fa_store
= new FileAttributesStore_Sqlite (driver
.IndexDirectory
, driver
.Fingerprint
);
137 fa_store
= new FileAttributesStore (backing_fa_store
);
139 // Set up signal handlers
140 SetupSignalHandlers ();
142 // Start the thread that does the crawling
143 ExceptionHandlingThread
.Start (new ThreadStart (CrawlWorker
));
145 // Start the thread that does the actual indexing
146 ExceptionHandlingThread
.Start (new ThreadStart (IndexWorker
));
149 /////////////////////////////////////////////////////////////////
151 static void CrawlWorker ()
153 Logger
.Log
.Debug ("Starting CrawlWorker");
158 while (pending_directories
.Count
> 0) {
159 DirectoryInfo dir
= (DirectoryInfo
) pending_directories
.Dequeue ();
163 foreach (DirectoryInfo subdir
in DirectoryWalker
.GetDirectoryInfos (dir
))
164 if (!Ignore (subdir
.FullName
))
165 pending_directories
.Enqueue (subdir
);
167 foreach (FileInfo file
in DirectoryWalker
.GetFileInfos (dir
))
168 if (!Ignore (file
.FullName
))
169 pending_files
.Enqueue (file
);
171 } catch (DirectoryNotFoundException e
) {}
176 Logger
.Log
.Debug ("Scanned {0} files in {1} directories", pending_files
.Count
, count_dirs
);
177 Logger
.Log
.Debug ("CrawlWorker Done");
182 /////////////////////////////////////////////////////////////////
184 static void IndexWorker ()
186 Logger
.Log
.Debug ("Starting IndexWorker");
191 if (pending_files
.Count
> 0) {
192 FileInfo file
= (FileInfo
) pending_files
.Dequeue ();
193 Uri uri
= UriFu
.PathToFileUri (file
.FullName
);
195 // Check that we really should be indexing the file
196 if (!file
.Exists
|| Ignore (file
.FullName
) || fa_store
.IsUpToDate (file
.FullName
))
199 // Create the indexable
200 indexable
= new Indexable (uri
);
201 indexable
.Uri
= RemapUri (uri
);
202 indexable
.ContentUri
= uri
;
203 indexable
.CacheContent
= false;
205 // Tag the item for easy identification (for say, removal)
207 indexable
.AddProperty (Property
.NewKeyword("Tag", arg_tag
));
209 driver
.Add (indexable
);
211 fa_store
.AttachTimestamp (file
.FullName
, FileSystem
.GetLastWriteTime (file
.FullName
));
213 if (driver
.PendingAdds
% BATCH_SIZE
== 0) {
214 Logger
.Log
.Debug ("Flushing driver, {0} items in queue", pending_files
.Count
);
217 } else if (crawling
) {
218 Logger
.Log
.Debug ("IndexWorker: La la la...");
225 // Flush out any pending changes in either the
226 // LuceneDriver or the sqlite attributes database.
227 while (driver
.PendingAdds
!= 0)
230 backing_fa_store
.Flush ();
232 Logger
.Log
.Debug ("IndexWorker Done");
235 static void OnChildIndexableEvent (Indexable
[] indexables
) {
236 foreach (Indexable indexable
in indexables
) {
237 indexable
.StoreStream ();
238 driver
.Add (indexable
);
242 /////////////////////////////////////////////////////////////////
244 // From BeagleDaemon.cs
246 // The integer values of the Mono.Posix.Signal enumeration don't actually
247 // match the Linux signal numbers of Linux. Oops!
248 // This is fixed in Mono.Unix, but for the moment we want to maintain
249 // compatibility with mono 1.0.x.
250 const int ACTUAL_LINUX_SIGINT
= 2;
251 const int ACTUAL_LINUX_SIGQUIT
= 3;
252 const int ACTUAL_LINUX_SIGTERM
= 15;
254 static void SetupSignalHandlers ()
256 // Force OurSignalHandler to be JITed
257 OurSignalHandler (-1);
259 // Set up our signal handler
260 Mono
.Posix
.Syscall
.sighandler_t sig_handler
;
261 sig_handler
= new Mono
.Posix
.Syscall
.sighandler_t (OurSignalHandler
);
262 Mono
.Posix
.Syscall
.signal (ACTUAL_LINUX_SIGINT
, sig_handler
);
263 Mono
.Posix
.Syscall
.signal (ACTUAL_LINUX_SIGQUIT
, sig_handler
);
264 Mono
.Posix
.Syscall
.signal (ACTUAL_LINUX_SIGTERM
, sig_handler
);
267 static void OurSignalHandler (int signal
)
269 // This allows us to call OurSignalHandler w/o doing anything.
270 // We want to call it once to ensure that it is pre-JITed.
274 Logger
.Log
.Debug ("Shutdown Requested");
278 /////////////////////////////////////////////////////////////////
280 static void PrintUsage ()
283 "beagle-build-index: Build an index.\n" +
284 "Web page: http://www.gnome.org/projects/beagle\n" +
285 "Copyright (C) 2005 Novell, Inc.\n\n";
288 "Usage: beagle-build-index [OPTIONS] <path> [path] [path] <index path>\n\n" +
290 " --remap [path1:path2]\tRemap data paths to fit target. \n" +
291 " --tag [tag]\t\tTag index data for identification.\n" +
292 " --recursive\t\tCrawl source path recursivly.\n" +
293 " --debug\t\tEcho verbose debugging information.\n";
295 Console
.WriteLine (usage
);
296 Environment
.Exit (0);
299 /////////////////////////////////////////////////////////
301 static Uri
RemapUri (Uri uri
)
303 // FIXME: This is ghetto
304 foreach (DictionaryEntry dict
in remap_table
) {
305 if (uri
.LocalPath
.IndexOf ((string) dict
.Key
) == -1)
307 return new Uri (uri
.LocalPath
.Replace ((string) dict
.Key
, (string) dict
.Value
));
312 static bool Ignore (string path
)
314 if (FileSystem
.IsSymLink (path
))
317 // FIXME: Add more stuff here