Tokenize 001234 as 1234. Include a testing function in NoiseFilter to figure out...
[beagle.git] / beagled / BuildIndex.cs
blob154c7ba4de035b9ab37bbde4d42f9f1ae7b2588d
1 //
2 // BuildIndex.cs
3 //
4 // Copyright (C) 2005 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in all
16 // copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 // SOFTWARE.
27 using System;
28 using System.Collections;
29 using System.Diagnostics;
30 using System.IO;
31 using System.Net;
32 using System.Threading;
34 using System.Xml;
35 using System.Xml.Serialization;
37 using Lucene.Net.Documents;
38 using Lucene.Net.Index;
39 using LNS = Lucene.Net.Search;
41 using Beagle;
42 using Beagle.Util;
43 using FSQ = Beagle.Daemon.FileSystemQueryable.FileSystemQueryable;
45 namespace Beagle.Daemon
47 class BuildIndex
49 static string [] argv;
51 static bool arg_recursive = false, arg_delete = false, arg_debug = false, arg_cache_text = false, arg_disable_filtering = false, arg_disable_restart = false, arg_disable_directories = false;
53 static Hashtable remap_table = new Hashtable ();
55 static string arg_output, arg_tag, arg_source;
57 /////////////////////////////////////////////////////////
59 // Files and directories that are allowed to be in the target
60 // directory before we blow it away. If we encounter any file
61 // or dir not in this list, we'll bail out.
62 static string [] allowed_files = {
63 "FileAttributesStore.db",
64 "fingerprint",
65 "version"
68 static string [] allowed_dirs = {
69 "Locks",
70 "PrimaryIndex",
71 "SecondaryIndex",
72 "TextCache"
75 /////////////////////////////////////////////////////////
77 static FileAttributesStore_Sqlite backing_fa_store;
78 static FileAttributesStore fa_store;
80 static LuceneIndexingDriver driver;
82 static bool crawling = true, indexing = true, shutdown = false, restart = false;
84 static ArrayList allowed_patterns = new ArrayList ();
85 static ArrayList denied_patterns = new ArrayList ();
87 static Queue pending_files = new Queue ();
88 static Queue pending_directories = new Queue ();
90 const int BATCH_SIZE = 30;
92 /////////////////////////////////////////////////////////
94 static void Main (string [] args)
96 try {
97 DoMain (args);
98 } catch (Exception ex) {
99 Logger.Log.Error (ex, "Unhandled exception thrown. Exiting immediately.");
100 Environment.Exit (1);
104 static void DoMain (string [] args)
106 SystemInformation.SetProcessName ("beagle-build-index");
108 if (args.Length < 2)
109 PrintUsage ();
111 int i = 0;
112 while (i < args.Length) {
114 string arg = args [i];
115 ++i;
116 string next_arg = i < args.Length ? args [i] : null;
118 switch (arg) {
119 case "-h":
120 case "--help":
121 PrintUsage ();
122 break;
124 case "--tag":
125 if (next_arg != null)
126 arg_tag = next_arg;
127 ++i;
128 break;
130 case "-r":
131 case "--recursive":
132 arg_recursive = true;
133 break;
135 case "--enable-deletion":
136 arg_delete = true;
137 break;
139 case "--disable-directories":
140 arg_disable_directories = true;
141 break;
143 case "--enable-text-cache":
144 arg_cache_text = true;
145 break;
148 case "--remap":
149 if (next_arg == null)
150 break;
152 int j = next_arg.IndexOf (":");
154 if (j == -1) {
155 Logger.Log.Error ("Invalid remap argument: {0}", next_arg);
156 Environment.Exit (1);
159 remap_table [next_arg.Substring (0, j)] = next_arg.Substring (j+1);
161 ++i;
162 break;
164 case "--target":
165 if (next_arg != null)
166 arg_output = Path.IsPathRooted (next_arg) ? next_arg : Path.GetFullPath (next_arg);
167 ++i;
168 break;
170 case "--disable-filtering":
171 arg_disable_filtering = true;
172 break;
174 case "--allow-pattern":
175 if (next_arg == null)
176 break;
178 if (next_arg.IndexOf (',') != -1) {
179 foreach (string pattern in next_arg.Split (','))
180 allowed_patterns.Add (new ExcludeItem (ExcludeType.Pattern, pattern));
182 } else {
183 allowed_patterns.Add (new ExcludeItem (ExcludeType.Pattern, next_arg));
186 ++i;
187 break;
189 case "--deny-pattern":
190 if (next_arg == null)
191 break;
193 if (next_arg.IndexOf (',') != -1) {
194 foreach (string pattern in next_arg.Split (','))
195 denied_patterns.Add (new ExcludeItem (ExcludeType.Pattern, pattern));
197 } else {
198 denied_patterns.Add (new ExcludeItem (ExcludeType.Pattern, next_arg));
201 ++i;
202 break;
204 case "--disable-restart":
205 arg_disable_restart = true;
206 break;
208 case "--source":
209 if (next_arg == null)
210 break;
212 arg_source = next_arg;
213 ++i;
214 break;
216 default:
217 string path = Path.IsPathRooted (arg) ? arg : Path.GetFullPath (arg);
218 if (path != "/" && path.EndsWith ("/"))
219 path = path.TrimEnd ('/');
221 if (Directory.Exists (path))
222 pending_directories.Enqueue (new DirectoryInfo (path));
223 else if (File.Exists (path))
224 pending_files.Enqueue (new FileInfo (path));
225 break;
229 argv = args;
231 /////////////////////////////////////////////////////////
233 if (arg_output == null) {
234 Logger.Log.Error ("--target must be specified");
235 Environment.Exit (1);
238 foreach (FileSystemInfo info in pending_directories) {
239 if (Path.GetFullPath (arg_output) == info.FullName) {
240 Logger.Log.Error ("Target directory cannot be one of the source paths.");
241 Environment.Exit (1);
245 foreach (FileSystemInfo info in pending_files) {
246 if (Path.GetFullPath (arg_output) == info.FullName) {
247 Logger.Log.Error ("Target directory cannot be one of the source paths.");
248 Environment.Exit (1);
252 if (!Directory.Exists (Path.GetDirectoryName (arg_output))) {
253 Logger.Log.Error ("Index directory not available for construction: {0}", arg_output);
254 Environment.Exit (1);
257 // Be *EXTRA PARANOID* about the contents of the target
258 // directory, because creating an indexing driver will
259 // nuke it.
260 if (Directory.Exists (arg_output)) {
262 foreach (FileInfo info in DirectoryWalker.GetFileInfos (arg_output)) {
263 if (Array.IndexOf (allowed_files, info.Name) == -1) {
264 Logger.Log.Error ("{0} doesn't look safe to delete: non-Beagle file {1} was found", arg_output, info.FullName);
265 Environment.Exit (1);
269 foreach (DirectoryInfo info in DirectoryWalker.GetDirectoryInfos (arg_output)) {
270 if (Array.IndexOf (allowed_dirs, info.Name) == -1) {
271 Logger.Log.Error ("{0} doesn't look safe to delete: non-Beagle directory {1} was found", arg_output, info.FullName);
272 Environment.Exit (1);
277 // Set the IO priority so we don't slow down the system
278 IoPriority.ReduceIoPriority ();
280 driver = new LuceneIndexingDriver (arg_output, false);
281 driver.TextCache = (arg_cache_text) ? new TextCache (arg_output) : null;
282 if (driver.TextCache != null)
283 driver.TextCache.WorldReadable = true;
285 backing_fa_store = new FileAttributesStore_Sqlite (driver.TopDirectory, driver.Fingerprint);
286 fa_store = new FileAttributesStore (backing_fa_store);
288 // Set up signal handlers
289 SetupSignalHandlers ();
291 Thread crawl_thread, index_thread, monitor_thread = null;
293 Stopwatch watch = new Stopwatch ();
294 watch.Start ();
296 // Start the thread that does the crawling
297 crawl_thread = ExceptionHandlingThread.Start (new ThreadStart (CrawlWorker));
299 // Start the thread that does the actual indexing
300 index_thread = ExceptionHandlingThread.Start (new ThreadStart (IndexWorker));
302 if (!arg_disable_restart) {
303 // Start the thread that monitors memory usage.
304 monitor_thread = ExceptionHandlingThread.Start (new ThreadStart (MemoryMonitorWorker));
307 // Join all the threads so that we know that we're the only thread still running
308 crawl_thread.Join ();
309 index_thread.Join ();
310 if (monitor_thread != null)
311 monitor_thread.Join ();
313 watch.Stop ();
314 Logger.Log.Debug ("Elapsed time {0}.", watch);
316 if (restart) {
317 Logger.Log.Debug ("Restarting helper");
318 Process p = new Process ();
319 p.StartInfo.UseShellExecute = false;
320 // FIXME: Maybe this isn't the right way to do things? It should be ok,
321 // the PATH is inherited from the shell script which runs mono itself.
322 p.StartInfo.FileName = "mono";
323 p.StartInfo.Arguments = String.Join (" ", Environment.GetCommandLineArgs ());
324 p.Start ();
328 /////////////////////////////////////////////////////////////////
330 static void CrawlWorker ()
332 Logger.Log.Debug ("Starting CrawlWorker");
334 try {
335 int count_dirs = 0;
336 int count_files = 0;
338 while (pending_directories.Count > 0) {
339 DirectoryInfo dir = (DirectoryInfo) pending_directories.Dequeue ();
341 if (! arg_disable_directories)
342 pending_files.Enqueue (dir);
344 try {
345 if (arg_recursive)
346 foreach (DirectoryInfo subdir in DirectoryWalker.GetDirectoryInfos (dir))
347 if (!Ignore (subdir)
348 && !FileSystem.IsSpecialFile (subdir.FullName))
349 pending_directories.Enqueue (subdir);
351 foreach (FileInfo file in DirectoryWalker.GetFileInfos (dir))
352 if (!Ignore (file)
353 && !FileSystem.IsSpecialFile (file.FullName)) {
354 pending_files.Enqueue (file);
355 count_files ++;
358 } catch (DirectoryNotFoundException e) {}
360 if (shutdown)
361 break;
363 count_dirs++;
366 Logger.Log.Debug ("Scanned {0} files and directories in {1} directories", count_dirs + count_files, count_dirs);
367 } finally {
368 Logger.Log.Debug ("CrawlWorker Done");
370 crawling = false;
374 /////////////////////////////////////////////////////////////////
376 static void AddToRequest (IndexerRequest request, Indexable indexable)
378 // Disable filtering and only index file attributes
379 if (arg_disable_filtering)
380 indexable.Filtering = IndexableFiltering.Never;
382 // Tag the item for easy identification (for say, removal)
383 if (arg_tag != null)
384 indexable.AddProperty (Property.NewUnsearched("Tag", arg_tag));
386 if (arg_source == null) {
387 DirectoryInfo dir = new DirectoryInfo (StringFu.SanitizePath (arg_output));
388 arg_source = dir.Name;
391 indexable.Source = arg_source;
393 request.Add (indexable);
396 static IndexerReceipt [] FlushIndexer (IIndexer indexer, IndexerRequest request)
398 IndexerReceipt [] receipts;
399 receipts = indexer.Flush (request);
401 ArrayList pending_children;
402 pending_children = new ArrayList ();
404 foreach (IndexerReceipt raw_r in receipts) {
406 if (raw_r is IndexerAddedReceipt) {
407 // Update the file attributes
408 IndexerAddedReceipt r = (IndexerAddedReceipt) raw_r;
410 Indexable indexable = request.GetByUri (r.Uri);
412 // We don't need to write out any file attributes for
413 // children.
414 if (indexable.ParentUri != null)
415 continue;
417 string path = r.Uri.LocalPath;
419 FileAttributes attr;
420 attr = fa_store.ReadOrCreate (path);
422 attr.LastWriteTime = indexable.Timestamp;
423 attr.FilterName = r.FilterName;
424 attr.FilterVersion = r.FilterVersion;
426 fa_store.Write (attr);
428 } else if (raw_r is IndexerRemovedReceipt) {
429 // Update the file attributes
430 IndexerRemovedReceipt r = (IndexerRemovedReceipt) raw_r;
432 Indexable indexable = request.GetByUri (r.Uri);
434 string path = r.Uri.LocalPath;
435 Logger.Log.Debug ("Removing: '{0}'", path);
436 fa_store.Drop (path);
438 } else if (raw_r is IndexerChildIndexablesReceipt) {
439 // Add any child indexables back into our indexer
440 IndexerChildIndexablesReceipt r = (IndexerChildIndexablesReceipt) raw_r;
441 pending_children.AddRange (r.Children);
445 request.Clear (); // clear out the old request
446 foreach (Indexable i in pending_children) // and then add the children
447 AddToRequest (request, i);
449 return receipts;
452 static Indexable FileToIndexable (FileInfo file)
454 if (!file.Exists || Ignore (file) || fa_store.IsUpToDate (file.FullName))
455 return null;
457 // Create the indexable and add the standard properties we
458 // use in the FileSystemQueryable.
459 Uri uri = UriFu.PathToFileUri (file.FullName);
460 Indexable indexable = new Indexable (uri);
461 indexable.Timestamp = file.LastWriteTimeUtc;
462 FSQ.AddStandardPropertiesToIndexable (indexable, file.Name, Guid.Empty, false);
464 // Store directory name in the index
465 string dirname = file.DirectoryName;
466 indexable.AddProperty (Property.NewUnsearched (ParentDirUriPropKey, UriFu.PathToFileUri (dirname)));
468 return indexable;
471 static Indexable DirectoryToIndexable (DirectoryInfo dir, Queue modified_directories)
473 if (!dir.Exists)
474 return null;
476 // Check if the directory information is stored in attributes store
477 // And if the mtime of the directory is same as that in the attributes store
478 FileAttributes attr = fa_store.Read (dir.FullName);
480 // If the directory exists in the fa store, then it is already indexed
481 if (attr != null) {
482 if (arg_delete && dir.LastWriteTimeUtc > attr.LastWriteTime)
483 modified_directories.Enqueue (dir);
484 return null;
487 // Create the indexable and add the standard properties we
488 // use in the FileSystemQueryable.
489 Uri uri = UriFu.PathToFileUri (dir.FullName);
490 Indexable indexable = new Indexable (uri);
491 indexable.MimeType = "inode/directory";
492 indexable.NoContent = true;
493 indexable.Timestamp = dir.LastWriteTimeUtc;
494 FSQ.AddStandardPropertiesToIndexable (indexable, dir.Name, Guid.Empty, false);
496 // Add directory name property
497 string dirname = dir.Parent.FullName;
498 indexable.AddProperty (Property.NewUnsearched (ParentDirUriPropKey, UriFu.PathToFileUri (dirname)));
500 indexable.AddProperty (Property.NewBool (IsDirectoryPropKey, true));
502 return indexable;
505 static void IndexWorker ()
507 Logger.Log.Debug ("Starting IndexWorker");
508 Queue modified_directories = new Queue ();
510 try {
511 Indexable indexable;
512 IndexerRequest pending_request;
513 pending_request = new IndexerRequest ();
515 while (!shutdown) {
516 if (pending_files.Count > 0) {
517 Object file_or_dir_info = pending_files.Dequeue ();
519 if (file_or_dir_info is DirectoryInfo)
520 indexable = DirectoryToIndexable ((DirectoryInfo) file_or_dir_info, modified_directories);
521 else
522 indexable = FileToIndexable ((FileInfo) file_or_dir_info);
524 if (indexable == null)
525 continue;
527 AddToRequest (pending_request, indexable);
529 if (pending_request.Count >= BATCH_SIZE) {
530 Logger.Log.Debug ("Flushing driver, {0} items in queue", pending_request.Count);
531 FlushIndexer (driver, pending_request);
532 // FlushIndexer clears the pending_request
535 } else if (crawling) {
536 //Logger.Log.Debug ("IndexWorker: La la la...");
537 Thread.Sleep (50);
538 } else {
539 break;
543 // Time to remove deleted directories from the index and attributes store
544 while (modified_directories.Count > 0) {
545 DirectoryInfo subdir = (DirectoryInfo) modified_directories.Dequeue ();
546 Logger.Log.Debug ("Checking {0} for deleted files and directories", subdir.FullName);
548 // Get a list of all documents from lucene index with ParentDirUriPropKey set as that of subdir
549 ICollection all_dirent = GetAllItemsInDirectory (subdir);
550 foreach (Dirent info in all_dirent) {
551 // check if the item exists
552 if (File.Exists (info.FullName) ||
553 (info.IsDirectory && Directory.Exists (info.FullName)))
554 continue;
556 if (info.IsDirectory)
557 // Recursively remove deleted subdirectories
558 modified_directories.Enqueue (new DirectoryInfo (info.FullName));
560 // remove
561 Uri uri = UriFu.PathToFileUri (info.FullName);
562 indexable = new Indexable (IndexableType.Remove, uri);
563 AddToRequest (pending_request, indexable);
567 // Call Flush until our request is empty. We have to do this in a loop
568 // because children can get added back to the pending request in a flush.
569 while (pending_request.Count > 0)
570 FlushIndexer (driver, pending_request);
572 backing_fa_store.Flush ();
574 Logger.Log.Debug ("Optimizing index");
575 driver.OptimizeNow ();
576 } finally {
577 Logger.Log.Debug ("IndexWorker Done");
579 indexing = false;
583 class Dirent {
584 private bool is_directory;
585 private string path;
587 public Dirent (string path, bool is_dir)
589 this.path = path;
590 this.is_directory = is_dir;
593 public bool IsDirectory {
594 get { return is_directory; }
597 public string Path {
598 get { return path; }
601 public string FullName {
602 get { return path.Substring (7); }
606 private class BitArrayHitCollector : LNS.HitCollector {
608 private BetterBitArray matches;
610 public BitArrayHitCollector (BetterBitArray matches)
612 this.matches = matches;
615 public override void Collect (int id, float score)
617 matches [id] = true;
621 private const string ParentDirUriPropKey = "beagle:ParentDirUri";
622 private const string IsDirectoryPropKey = "beagle:IsDirectory";
624 // Returns a list of all files and directories in dir
625 static ICollection GetAllItemsInDirectory (DirectoryInfo dir)
627 // form the query
628 string parent_uri_str = UriFu.PathToFileUri (dir.FullName).ToString ();
629 // Instead of taking the painfull way of using BeagleAnalyzer, lets just add the prefix manually
630 //parent_uri_str = "_:" + parent_uri_str;
631 // LuceneCommon thinks exposing secret property type encoding is bad, I think so too... except for now
632 string key = "prop:k:" + ParentDirUriPropKey;
633 //Logger.Log.Debug ("Querying for {0}={1}", parent_uri_str, key);
634 LNS.Query query = new LNS.TermQuery (new Term (key, parent_uri_str));
636 // do the search
637 LNS.IndexSearcher searcher;
638 searcher = LuceneCommon.GetSearcher (driver.PrimaryStore);
640 BetterBitArray matches;
641 matches = new BetterBitArray (searcher.MaxDoc ());
643 BitArrayHitCollector collector;
644 collector = new BitArrayHitCollector (matches);
646 searcher.Search (query, null, collector);
648 // Finally we pull all of the matching documents,
649 // convert them to Dirent, and store them in a list.
651 ArrayList match_list = new ArrayList ();
652 int i = 0;
653 while (i < matches.Count) {
655 i = matches.GetNextTrueIndex (i);
656 if (i >= matches.Count)
657 break;
659 Document doc;
660 doc = searcher.Doc (i);
662 Dirent info;
663 info = DocumentToDirent (doc);
665 match_list.Add (info);
667 ++i;
670 LuceneCommon.ReleaseSearcher (searcher);
671 //Logger.Log.Debug ("Found {0} items in {1}", match_list.Count, dir.FullName);
673 return match_list;
676 static private Dirent DocumentToDirent (Document doc)
678 string path;
679 bool is_dir = false;
681 path = doc.Get ("Uri");
683 string prop_key = "prop:k:" + IsDirectoryPropKey;
684 foreach (Field f in doc.Fields ()) {
685 if (f.Name () != prop_key)
686 continue;
688 is_dir = (f.StringValue ().Substring (2) == "true");
689 break;
692 //Logger.Log.Debug ("Found: " + path + " (" + is_dir + ")");
693 return new Dirent (path, is_dir);
696 /////////////////////////////////////////////////////////////////
698 static void MemoryMonitorWorker ()
700 int vmrss_original = SystemInformation.VmRss;
702 const double threshold = 6.0;
703 int last_vmrss = 0;
705 while (! shutdown && (crawling || indexing)) {
707 // Check resident memory usage
708 int vmrss = SystemInformation.VmRss;
709 double size = vmrss / (double) vmrss_original;
710 if (vmrss != last_vmrss)
711 Logger.Log.Debug ("Size: VmRSS={0:0.0} MB, size={1:0.00}, {2:0.0}%",
712 vmrss/1024.0, size, 100.0 * (size - 1) / (threshold - 1));
713 last_vmrss = vmrss;
714 if (size > threshold) {
715 Logger.Log.Debug ("Process too big, shutting down!");
716 restart = true;
717 shutdown = true;
718 return;
719 } else {
720 Thread.Sleep (3000);
725 /////////////////////////////////////////////////////////////////
727 // From BeagleDaemon.cs
729 static void SetupSignalHandlers ()
731 // Force OurSignalHandler to be JITed
732 OurSignalHandler (-1);
734 // Set up our signal handler
735 Mono.Unix.Native.Stdlib.signal (Mono.Unix.Native.Signum.SIGINT, OurSignalHandler);
736 Mono.Unix.Native.Stdlib.signal (Mono.Unix.Native.Signum.SIGTERM, OurSignalHandler);
737 if (Environment.GetEnvironmentVariable("BEAGLE_THERE_BE_NO_QUITTIN") == null)
738 Mono.Unix.Native.Stdlib.signal (Mono.Unix.Native.Signum.SIGQUIT, OurSignalHandler);
741 static void OurSignalHandler (int signal)
743 // This allows us to call OurSignalHandler w/o doing anything.
744 // We want to call it once to ensure that it is pre-JITed.
745 if (signal < 0)
746 return;
748 Logger.Log.Debug ("Shutdown Requested");
749 shutdown = true;
752 /////////////////////////////////////////////////////////////////
754 static void PrintUsage ()
756 string usage =
757 "beagle-build-index: Build an index.\n" +
758 "Web page: http://www.gnome.org/projects/beagle\n" +
759 "Copyright (C) 2005-2006 Novell, Inc.\n\n";
761 usage +=
762 "Usage: beagle-build-index [OPTIONS] --target <index_path> <path> [path]\n\n" +
764 "** WARNING **\n" +
765 "beagle-build-index will *delete all existing data* within the target\n" +
766 "directory. Ensure that the target path is set correctly before running.\n\n" +
768 "Options:\n" +
769 " --source [name]\t\tThe index's source name. Defaults to the target directory name\n" +
770 // FIXME: remap doesnt seem to be implemented !
771 // Implementing remap might some fixes to --enable-deletion, see IndexWorker
772 //" --remap [path1:path2]\t\tRemap data paths to fit target. \n" +
773 " --tag [tag]\t\t\tTag index data for identification.\n" +
774 " --recursive\t\t\tCrawl source path recursivly.\n" +
775 " --enable-deletion\t\tRemove deleted files and directories from index.\n" +
776 " --enable-text-cache\t\tBuild text-cache of documents used for snippets.\n" +
777 " --disable-directories\t\tDon't add directories to the index.\n" +
778 " --disable-filtering\t\tDisable all filtering of files. Only index attributes.\n" +
779 " --allow-pattern [pattern]\tOnly allow files that match the pattern to be indexed.\n" +
780 " --deny-pattern [pattern]\tKeep any files that match the pattern from being indexed.\n" +
781 " --disable-restart\t\tDon't restart when memory usage gets above a certain threshold.\n" +
782 " --debug\t\t\tEcho verbose debugging information.\n\n";
785 Console.WriteLine (usage);
786 Environment.Exit (0);
789 /////////////////////////////////////////////////////////
791 static Uri RemapUri (Uri uri)
793 // FIXME: This is ghetto
794 foreach (DictionaryEntry dict in remap_table) {
795 if (uri.LocalPath.IndexOf ((string) dict.Key) == -1)
796 continue;
797 return new Uri (uri.LocalPath.Replace ((string) dict.Key, (string) dict.Value));
799 return uri;
802 static bool Ignore (DirectoryInfo directory)
804 if (directory.Name.StartsWith ("."))
805 return true;
807 return false;
810 static bool Ignore (FileInfo file)
812 if (file.Name.StartsWith ("."))
813 return true;
815 if (FileSystem.IsSpecialFile (file.FullName))
816 return true;
818 if (allowed_patterns.Count > 0) {
819 foreach (ExcludeItem pattern in allowed_patterns)
820 if (pattern.IsMatch (file.Name))
821 return false;
823 return true;
826 foreach (ExcludeItem pattern in denied_patterns)
827 if (pattern.IsMatch (file.Name))
828 return true;
830 // FIXME: Add more stuff here
832 return false;