4 // Copyright (C) 2005 Novell, Inc.
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in all
16 // copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 using System
.Threading
;
31 using System
.Collections
;
34 using System
.Xml
.Serialization
;
38 using FSQ
= Beagle
.Daemon
.FileSystemQueryable
.FileSystemQueryable
;
40 namespace Beagle
.Daemon
44 static bool arg_recursive
= false, arg_debug
= false, arg_cache_text
= false, arg_disable_filtering
= false;
46 static Hashtable remap_table
= new Hashtable ();
48 static string arg_output
, arg_tag
;
50 /////////////////////////////////////////////////////////
52 static FileAttributesStore_Sqlite backing_fa_store
;
53 static FileAttributesStore fa_store
;
55 static LuceneIndexingDriver driver
;
57 static bool crawling
= true, shutdown
= false;
59 static ArrayList allowed_patterns
= new ArrayList ();
60 static ArrayList denied_patterns
= new ArrayList ();
62 static Queue pending_files
= new Queue ();
63 static Queue pending_directories
= new Queue ();
65 const int BATCH_SIZE
= 30;
67 /////////////////////////////////////////////////////////
69 static void Main (string [] args
)
75 while (i
< args
.Length
) {
77 string arg
= args
[i
];
79 string next_arg
= i
< args
.Length
? args
[i
] : null;
98 case "--enable-text-cache":
99 arg_cache_text
= true;
103 if (next_arg
== null)
106 int j
= next_arg
.IndexOf (":");
109 Console
.WriteLine ("Invalid remap argument: {0}", next_arg
);
110 Environment
.Exit (1);
113 remap_table
[next_arg
.Substring (0, j
)] = next_arg
.Substring (j
+1);
119 if (next_arg
!= null)
120 arg_output
= Path
.IsPathRooted (next_arg
) ? next_arg
: Path
.GetFullPath (next_arg
);
124 case "--disable-filtering":
125 arg_disable_filtering
= true;
128 case "--allow-pattern":
129 if (next_arg
== null)
132 if (next_arg
.IndexOf (',') != -1) {
133 foreach (string pattern
in next_arg
.Split (','))
134 allowed_patterns
.Add (new ExcludeItem (ExcludeType
.Pattern
, pattern
));
137 allowed_patterns
.Add (new ExcludeItem (ExcludeType
.Pattern
, next_arg
));
143 case "--deny-pattern":
144 if (next_arg
== null)
147 if (next_arg
.IndexOf (',') != -1) {
148 foreach (string pattern
in next_arg
.Split (','))
149 denied_patterns
.Add (new ExcludeItem (ExcludeType
.Pattern
, pattern
));
152 denied_patterns
.Add (new ExcludeItem (ExcludeType
.Pattern
, next_arg
));
159 string path
= Path
.IsPathRooted (arg
) ? arg
: Path
.GetFullPath (arg
);
161 if (Directory
.Exists (path
))
162 pending_directories
.Enqueue (new DirectoryInfo (path
));
163 else if (File
.Exists (path
))
164 pending_files
.Enqueue (new FileInfo (path
));
169 /////////////////////////////////////////////////////////
171 if (!Directory
.Exists (Path
.GetDirectoryName (arg_output
))) {
172 Console
.WriteLine ("Index directory not available for construction: {0}", arg_output
);
173 Environment
.Exit (1);
176 driver
= new LuceneIndexingDriver (arg_output
);
177 driver
.TextCache
= (arg_cache_text
) ? new TextCache (arg_output
) : null;
179 backing_fa_store
= new FileAttributesStore_Sqlite (driver
.TopDirectory
, driver
.Fingerprint
);
180 fa_store
= new FileAttributesStore (backing_fa_store
);
182 // Set up signal handlers
183 SetupSignalHandlers ();
185 // Start the thread that does the crawling
186 ExceptionHandlingThread
.Start (new ThreadStart (CrawlWorker
));
188 // Start the thread that does the actual indexing
189 ExceptionHandlingThread
.Start (new ThreadStart (IndexWorker
));
192 /////////////////////////////////////////////////////////////////
194 static void CrawlWorker ()
196 Logger
.Log
.Debug ("Starting CrawlWorker");
201 while (pending_directories
.Count
> 0) {
202 DirectoryInfo dir
= (DirectoryInfo
) pending_directories
.Dequeue ();
206 foreach (DirectoryInfo subdir
in DirectoryWalker
.GetDirectoryInfos (dir
))
207 if (!Ignore (subdir
))
208 pending_directories
.Enqueue (subdir
);
210 foreach (FileInfo file
in DirectoryWalker
.GetFileInfos (dir
))
212 pending_files
.Enqueue (file
);
214 } catch (DirectoryNotFoundException e
) {}
222 Logger
.Log
.Debug ("Scanned {0} files in {1} directories", pending_files
.Count
, count_dirs
);
223 Logger
.Log
.Debug ("CrawlWorker Done");
228 /////////////////////////////////////////////////////////////////
230 static IndexerReceipt
[] FlushIndexer (IIndexer indexer
)
232 IndexerReceipt
[] receipts
;
233 receipts
= indexer
.FlushAndBlock ();
236 foreach (IndexerReceipt raw_r
in receipts
) {
238 if (raw_r
is IndexerAddedReceipt
) {
239 // Update the file attributes
240 IndexerAddedReceipt r
= (IndexerAddedReceipt
) raw_r
;
242 string path
= r
.Uri
.LocalPath
;
245 attr
= fa_store
.ReadOrCreate (path
);
247 attr
.LastWriteTime
= FileSystem
.GetLastWriteTime (path
);
248 attr
.FilterName
= r
.FilterName
;
249 attr
.FilterVersion
= r
.FilterVersion
;
251 fa_store
.Write (attr
);
253 } else if (raw_r
is IndexerChildIndexablesReceipt
) {
254 // Add any child indexables back into our indexer
255 IndexerChildIndexablesReceipt r
= (IndexerChildIndexablesReceipt
) raw_r
;
256 foreach (Indexable i
in r
.Children
)
264 static void IndexWorker ()
266 Logger
.Log
.Debug ("Starting IndexWorker");
269 int pending_adds
= 0;
272 if (pending_files
.Count
> 0) {
273 FileInfo file
= (FileInfo
) pending_files
.Dequeue ();
274 Uri uri
= UriFu
.PathToFileUri (file
.FullName
);
276 // Check that we really should be indexing the file
277 if (!file
.Exists
|| Ignore (file
) || fa_store
.IsUpToDate (file
.FullName
))
280 // Create the indexable and add the standard properties we
281 // use in the FileSystemQueryable.
282 indexable
= new Indexable (uri
);
283 FSQ
.AddStandardPropertiesToIndexable (indexable
, file
.Name
, Guid
.Empty
, false);
285 // Disable filtering and only index file attributes
286 if (arg_disable_filtering
)
287 indexable
.Filtering
= IndexableFiltering
.Never
;
289 // Tag the item for easy identification (for say, removal)
291 indexable
.AddProperty (Property
.NewKeyword("Tag", arg_tag
));
293 driver
.Add (indexable
);
296 if (pending_adds
% BATCH_SIZE
== 0) {
297 Logger
.Log
.Debug ("Flushing driver, {0} items in queue", pending_files
.Count
);
298 FlushIndexer (driver
);
301 } else if (crawling
) {
302 //Logger.Log.Debug ("IndexWorker: La la la...");
309 // Call Flush one last time.
310 // This should be a totally safe no-op if there are no pending operations.
311 // FIXME: This is incorrect. We will drop any children in the final flush.
312 FlushIndexer (driver
);
314 backing_fa_store
.Flush ();
316 Logger
.Log
.Debug ("IndexWorker Done");
319 /////////////////////////////////////////////////////////////////
321 // From BeagleDaemon.cs
323 // The integer values of the Mono.Posix.Signal enumeration don't actually
324 // match the Linux signal numbers of Linux. Oops!
325 // This is fixed in Mono.Unix, but for the moment we want to maintain
326 // compatibility with mono 1.0.x.
327 const int ACTUAL_LINUX_SIGINT
= 2;
328 const int ACTUAL_LINUX_SIGQUIT
= 3;
329 const int ACTUAL_LINUX_SIGTERM
= 15;
331 static void SetupSignalHandlers ()
333 // Force OurSignalHandler to be JITed
334 OurSignalHandler (-1);
336 // Set up our signal handler
337 Mono
.Posix
.Syscall
.sighandler_t sig_handler
;
338 sig_handler
= new Mono
.Posix
.Syscall
.sighandler_t (OurSignalHandler
);
339 Mono
.Posix
.Syscall
.signal (ACTUAL_LINUX_SIGINT
, sig_handler
);
340 Mono
.Posix
.Syscall
.signal (ACTUAL_LINUX_SIGQUIT
, sig_handler
);
341 Mono
.Posix
.Syscall
.signal (ACTUAL_LINUX_SIGTERM
, sig_handler
);
344 static void OurSignalHandler (int signal
)
346 // This allows us to call OurSignalHandler w/o doing anything.
347 // We want to call it once to ensure that it is pre-JITed.
351 Logger
.Log
.Debug ("Shutdown Requested");
355 /////////////////////////////////////////////////////////////////
357 static void PrintUsage ()
360 "beagle-build-index: Build an index.\n" +
361 "Web page: http://www.gnome.org/projects/beagle\n" +
362 "Copyright (C) 2005 Novell, Inc.\n\n";
365 "Usage: beagle-build-index [OPTIONS] --target <index_path> <path> [path]\n\n" +
367 " --remap [path1:path2]\t\tRemap data paths to fit target. \n" +
368 " --tag [tag]\t\t\tTag index data for identification.\n" +
369 " --recursive\t\t\tCrawl source path recursivly.\n" +
370 " --enable-text-cache\t\tBuild text-cache of documents used for snippets.\n" +
371 " --disable-filtering\t\tDisable all filtering of files. Only index attributes.\n" +
372 " --allow-pattern [pattern]\tOnly allow files that match the pattern to be indexed.\n" +
373 " --deny-pattern [pattern]\tKeep any files that match the pattern from being indexed.\n" +
374 " --debug\t\t\tEcho verbose debugging information.\n";
376 Console
.WriteLine (usage
);
377 Environment
.Exit (0);
380 /////////////////////////////////////////////////////////
382 static Uri
RemapUri (Uri uri
)
384 // FIXME: This is ghetto
385 foreach (DictionaryEntry dict
in remap_table
) {
386 if (uri
.LocalPath
.IndexOf ((string) dict
.Key
) == -1)
388 return new Uri (uri
.LocalPath
.Replace ((string) dict
.Key
, (string) dict
.Value
));
393 static bool Ignore (DirectoryInfo directory
)
395 if (directory
.Name
.StartsWith ("."))
401 static bool Ignore (FileInfo file
)
403 if (file
.Name
.StartsWith ("."))
406 if (FileSystem
.IsSymLink (file
.FullName
))
409 if (allowed_patterns
.Count
> 0) {
410 foreach (ExcludeItem pattern
in allowed_patterns
)
411 if (pattern
.IsMatch (file
.Name
))
417 foreach (ExcludeItem pattern
in denied_patterns
)
418 if (pattern
.IsMatch (file
.Name
))
421 // FIXME: Add more stuff here