Remove some debug spew
[beagle.git] / beagled / FileSystemQueryable / FileSystemQueryable.cs
blob0982a98b3de695f8b2d4b49bdf26e73c52c7e516
1 //
2 // FileSystemQueryable.cs
3 //
4 // Copyright (C) 2004 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
27 using System;
28 using System.Collections;
29 using System.IO;
30 using System.Reflection;
31 using System.Text;
32 using System.Threading;
34 using Beagle.Daemon;
35 using Beagle.Util;
37 namespace Beagle.Daemon.FileSystemQueryable {
39 [QueryableFlavor (Name="Files", Domain=QueryDomain.Local, RequireInotify=false)]
40 [PropertyKeywordMapping (Keyword="extension", PropertyName="beagle:FilenameExtension", IsKeyword=true, Description="File extension, e.g. extension:jpeg. Use extension: to search in files with no extension.")]
41 [PropertyKeywordMapping (Keyword="ext", PropertyName="beagle:FilenameExtension", IsKeyword=true, Description="File extension, e.g. ext:jpeg. Use ext: to search in files with no extension.")]
42 public class FileSystemQueryable : LuceneQueryable {
44 static public bool Debug = false;
46 private const string SplitFilenamePropKey = "beagle:SplitFilename";
47 public const string ExactFilenamePropKey = "beagle:ExactFilename";
48 public const string TextFilenamePropKey = "beagle:Filename";
49 public const string NoPunctFilenamePropKey = "beagle:NoPunctFilename";
50 public const string FilenameExtensionPropKey = "beagle:FilenameExtension";
51 public const string ParentDirUriPropKey = LuceneQueryingDriver.PrivateNamespace + "ParentDirUri";
52 public const string IsDirectoryPropKey = LuceneQueryingDriver.PrivateNamespace + "IsDirectory";
54 // History:
55 // 1: Initially set to force a reindex due to NameIndex changes.
56 // 2: Overhauled everything to use new lucene infrastructure.
57 // 3: Switched to UTC for all times, changed the properties a bit.
58 // 4: Changed the key of TextFilenamePropKey to beagle:Filename - it might be useful in clients.
59 // Make SplitFilenamePropKey unstored
60 // 5: Keyword properies in the private namespace are no longer lower cased; this is required to
61 // offset the change in LuceneCommon.cs
62 const int MINOR_VERSION = 5;
64 private object big_lock = new object ();
66 private IFileEventBackend event_backend;
68 // This is the task that walks the tree structure
69 private TreeCrawlTask tree_crawl_task;
71 // This is the task that finds the next place that
72 // needs to be crawled in the tree and spawns off
73 // the appropriate IndexableGenerator.
74 private FileCrawlTask file_crawl_task;
76 private ArrayList roots = new ArrayList ();
77 private ArrayList roots_by_path = new ArrayList ();
79 private FileNameFilter filter;
81 // This is just a copy of the LuceneQueryable's QueryingDriver
82 // cast into the right type for doing internal->external Uri
83 // lookups.
84 private LuceneNameResolver name_resolver;
86 //////////////////////////////////////////////////////////////////////////
88 private Hashtable cached_uid_by_path = new Hashtable ();
90 //////////////////////////////////////////////////////////////////////////
92 public FileSystemQueryable () : base ("FileSystemIndex", MINOR_VERSION)
94 // Set up our event backend
95 if (Inotify.Enabled) {
96 Logger.Log.Debug ("Starting Inotify Backend");
97 event_backend = new InotifyBackend ();
98 } else {
99 Logger.Log.Debug ("Creating null file event backend");
100 event_backend = new NullFileEventBackend ();
103 tree_crawl_task = new TreeCrawlTask (new TreeCrawlTask.Handler (AddDirectory));
104 tree_crawl_task.Source = this;
106 file_crawl_task = new FileCrawlTask (this);
107 file_crawl_task.Source = this;
109 name_resolver = (LuceneNameResolver) Driver;
110 PreloadDirectoryNameInfo ();
112 // Setup our file-name filter
113 filter = new FileNameFilter (this);
115 // Do the right thing when paths expire
116 DirectoryModel.ExpireEvent +=
117 new DirectoryModel.ExpireHandler (ExpireDirectoryPath);
121 override protected IFileAttributesStore BuildFileAttributesStore ()
123 return new FileAttributesStore_Mixed (IndexDirectory, IndexFingerprint);
126 override protected LuceneQueryingDriver BuildLuceneQueryingDriver (string index_name,
127 int minor_version,
128 bool read_only_mode)
130 return new LuceneNameResolver (index_name, minor_version, read_only_mode);
133 public FileNameFilter Filter {
134 get { return filter; }
137 //////////////////////////////////////////////////////////////////////////
140 // This is where we build our Indexables
143 public static void AddStandardPropertiesToIndexable (Indexable indexable,
144 string name,
145 Guid parent_id,
146 bool mutable)
148 StringBuilder sb;
149 sb = new StringBuilder ();
151 string no_ext, ext, no_punct;
152 no_ext = Path.GetFileNameWithoutExtension (name);
153 ext = Path.GetExtension (name).ToLower ();
155 sb.Append (no_ext);
156 for (int i = 0; i < sb.Length; ++i)
157 if (! Char.IsLetterOrDigit (sb [i]))
158 sb [i] = ' ';
159 no_punct = sb.ToString ();
162 Property prop;
164 prop = Property.NewKeyword (ExactFilenamePropKey, name);
165 prop.IsMutable = mutable;
166 indexable.AddProperty (prop);
168 prop = Property.New (TextFilenamePropKey, no_ext);
169 prop.IsMutable = mutable;
170 indexable.AddProperty (prop);
172 prop = Property.New (NoPunctFilenamePropKey, no_punct);
173 prop.IsMutable = mutable;
174 indexable.AddProperty (prop);
176 prop = Property.NewUnsearched (FilenameExtensionPropKey, ext);
177 prop.IsMutable = mutable;
178 indexable.AddProperty (prop);
180 string str;
181 str = StringFu.FuzzyDivide (no_ext);
182 prop = Property.NewUnstored (SplitFilenamePropKey, str);
183 prop.IsMutable = mutable;
184 indexable.AddProperty (prop);
186 if (parent_id == Guid.Empty)
187 return;
189 str = GuidFu.ToUriString (parent_id);
190 // We use the uri here to recycle terms in the index,
191 // since each directory's uri will already be indexed.
192 prop = Property.NewUnsearched (ParentDirUriPropKey, str);
193 prop.IsMutable = mutable;
194 indexable.AddProperty (prop);
197 public static void AddStandardPropertiesToIndexable (Indexable indexable,
198 string name,
199 DirectoryModel parent,
200 bool mutable)
202 AddStandardPropertiesToIndexable (indexable,
203 name,
204 parent == null ? Guid.Empty : parent.UniqueId,
205 mutable);
207 indexable.LocalState ["Parent"] = parent;
210 public static Indexable DirectoryToIndexable (string path,
211 Guid id,
212 DirectoryModel parent)
214 Indexable indexable;
215 try {
216 indexable = new Indexable (IndexableType.Add, GuidFu.ToUri (id));
217 indexable.MimeType = "inode/directory";
218 indexable.NoContent = true;
219 // Set the ContentUri anyway so that we get
220 // nice URIs in the logs.
221 indexable.ContentUri = UriFu.PathToFileUri (path);
222 indexable.Timestamp = Directory.GetLastWriteTimeUtc (path);
223 } catch (IOException) {
224 // Looks like the directory was deleted.
225 return null;
228 string name;
229 if (parent == null)
230 name = path;
231 else
232 name = Path.GetFileName (path);
233 AddStandardPropertiesToIndexable (indexable, name, parent, true);
235 Property prop;
236 prop = Property.NewBool (IsDirectoryPropKey, true);
237 prop.IsMutable = true; // we want this in the secondary index, for efficiency
238 indexable.AddProperty (prop);
240 indexable.LocalState ["Path"] = path;
242 return indexable;
245 public static Indexable FileToIndexable (string path,
246 Guid id,
247 DirectoryModel parent,
248 bool crawl_mode)
250 Indexable indexable;
252 try {
253 indexable = new Indexable (IndexableType.Add, GuidFu.ToUri (id));
254 indexable.Timestamp = File.GetLastWriteTimeUtc (path);
255 indexable.ContentUri = UriFu.PathToFileUri (path);
256 indexable.Crawled = crawl_mode;
257 indexable.Filtering = Beagle.IndexableFiltering.Always;
258 } catch (IOException) {
259 // Looks like the file was deleted.
260 return null;
263 AddStandardPropertiesToIndexable (indexable, Path.GetFileName (path), parent, true);
265 indexable.LocalState ["Path"] = path;
267 return indexable;
270 private static Indexable NewRenamingIndexable (string name,
271 Guid id,
272 DirectoryModel parent,
273 string last_known_path)
275 Indexable indexable;
276 indexable = new Indexable (IndexableType.PropertyChange, GuidFu.ToUri (id));
278 AddStandardPropertiesToIndexable (indexable, name, parent, true);
280 indexable.LocalState ["Id"] = id;
281 indexable.LocalState ["LastKnownPath"] = last_known_path;
283 return indexable;
286 //////////////////////////////////////////////////////////////////////////
289 // Mapping from directory ids to paths
292 private Hashtable dir_models_by_id = new Hashtable ();
293 private Hashtable name_info_by_id = new Hashtable ();
295 // We fall back to using the name information in the index
296 // until we've fully constructed our set of DirectoryModels.
297 private void PreloadDirectoryNameInfo ()
299 ICollection all;
300 all = name_resolver.GetAllDirectoryNameInfo ();
301 foreach (LuceneNameResolver.NameInfo info in all)
302 name_info_by_id [info.Id] = info;
305 // This only works for directories.
306 private string UniqueIdToDirectoryName (Guid id)
308 DirectoryModel dir;
309 dir = dir_models_by_id [id] as DirectoryModel;
310 if (dir != null)
311 return dir.FullName;
313 LuceneNameResolver.NameInfo info;
314 info = name_info_by_id [id] as LuceneNameResolver.NameInfo;
315 if (info != null) {
316 if (info.ParentId == Guid.Empty) // i.e. this is a root
317 return info.Name;
318 else {
319 string parent_name;
320 parent_name = UniqueIdToDirectoryName (info.ParentId);
321 if (parent_name == null)
322 return null;
323 return Path.Combine (parent_name, info.Name);
327 return null;
330 private void CacheDirectoryNameChange (Guid id, Guid new_parent_id, string new_name)
332 LuceneNameResolver.NameInfo info;
333 info = name_info_by_id [id] as LuceneNameResolver.NameInfo;
334 if (info != null) {
335 info.ParentId = new_parent_id;
336 info.Name = new_name;
340 private string ToFullPath (string name, Guid parent_id)
342 // This is the correct behavior for roots.
343 if (parent_id == Guid.Empty)
344 return name;
346 string parent_name;
347 parent_name = UniqueIdToDirectoryName (parent_id);
348 if (parent_name == null)
349 return null;
351 return Path.Combine (parent_name, name);
354 // This works for both files and directories.
355 private string UniqueIdToFullPath (Guid id)
357 // First, check if it is a directory.
358 string path;
359 path = UniqueIdToDirectoryName (id);
360 if (path != null)
361 return path;
363 // If not, try to pull name information out of the index.
364 LuceneNameResolver.NameInfo info;
365 info = name_resolver.GetNameInfoById (id);
366 if (info == null)
367 return null;
368 return ToFullPath (info.Name, info.ParentId);
371 private void RegisterId (string name, DirectoryModel dir, Guid id)
373 cached_uid_by_path [Path.Combine (dir.FullName, name)] = id;
376 private void ForgetId (string path)
378 cached_uid_by_path.Remove (path);
381 // This works for files. (It probably works for directories
382 // too, but you should use one of the more efficient means
383 // above if you know it is a directory.)
384 private Guid NameAndParentToId (string name, DirectoryModel dir)
386 string path;
387 path = Path.Combine (dir.FullName, name);
389 Guid unique_id;
390 if (cached_uid_by_path.Contains (path))
391 unique_id = (Guid) cached_uid_by_path [path];
392 else
393 unique_id = name_resolver.GetIdByNameAndParentId (name, dir.UniqueId);
395 return unique_id;
398 //////////////////////////////////////////////////////////////////////////
401 // Directory-related methods
404 private Hashtable dir_models_by_path = new Hashtable ();
406 private DirectoryModel GetDirectoryModelByPath (string path)
408 DirectoryModel dir;
410 lock (dir_models_by_path) {
411 dir = dir_models_by_path [path] as DirectoryModel;
412 if (dir != null)
413 return dir;
416 // Walk each root until we find the correct path
417 foreach (DirectoryModel root in roots) {
418 dir = root.WalkTree (path);
419 if (dir != null) {
420 lock (dir_models_by_path)
421 dir_models_by_path [path] = dir;
422 break;
426 return dir;
429 private void ExpireDirectoryPath (string expired_path, Guid unique_id)
431 if (Debug)
432 Logger.Log.Debug ("Expired '{0}'", expired_path);
434 lock (dir_models_by_path)
435 dir_models_by_path.Remove (expired_path);
438 public void AddDirectory (DirectoryModel parent, string name)
440 // Ignore the stuff we want to ignore.
441 if (filter.Ignore (parent, name, true))
442 return;
444 // FIXME: ! parent.HasChildWithName (name)
445 if (parent != null && parent.HasChildWithName (name))
446 return;
448 string path;
449 path = (parent == null) ? name : Path.Combine (parent.FullName, name);
451 if (Debug)
452 Logger.Log.Debug ("Adding directory '{0}'", path, name);
454 if (! Directory.Exists (path)) {
455 Logger.Log.Error ("Can't add directory: '{0}' does not exist", path);
456 return;
459 FileAttributes attr;
460 attr = FileAttributesStore.Read (path);
462 // Note that we don't look at the mtime of a directory when
463 // deciding whether or not to index it.
464 bool needs_indexing = false;
465 if (attr == null) {
466 // If it has no attributes, it definitely needs
467 // indexing.
468 needs_indexing = true;
469 } else {
470 // Make sure that it still has the same name as before.
471 // If not, we need to re-index it.
472 // We can do this since we preloaded all of the name
473 // info in the directory via PreloadDirectoryNameInfo.
474 string last_known_name;
475 last_known_name = UniqueIdToDirectoryName (attr.UniqueId);
476 if (last_known_name != path) {
477 Logger.Log.Debug ("'{0}' now seems to be called '{1}'", last_known_name, path);
478 needs_indexing = true;
482 // If we can't descend into this directory, we want to
483 // index it but not build a DirectoryModel for it.
484 // FIXME: We should do the right thing when a
485 // directory's permissions change.
486 bool is_walkable;
487 is_walkable = DirectoryWalker.IsWalkable (path);
488 if (! is_walkable)
489 Logger.Log.Debug ("Can't walk '{0}'", path);
491 if (needs_indexing)
492 ScheduleDirectory (name, parent, attr, is_walkable);
493 else if (is_walkable)
494 RegisterDirectory (name, parent, attr);
497 public void AddRoot (string path)
499 path = StringFu.SanitizePath (path);
500 Logger.Log.Debug ("Adding root: {0}", path);
502 if (roots_by_path.Contains (path)) {
503 Logger.Log.Error ("Trying to add an existing root: {0}", path);
504 return;
507 // We need to have the path key in the roots hashtable
508 // for the filtering to work as we'd like before the root
509 // is actually added.
510 roots_by_path.Add (path);
512 AddDirectory (null, path);
515 public void RemoveRoot (string path)
517 Logger.Log.Debug ("Removing root: {0}", path);
519 if (! roots_by_path.Contains (path)) {
520 Logger.Log.Error ("Trying to remove a non-existing root: {0}", path);
521 return;
524 // Find our directory model for the root
525 DirectoryModel dir;
526 dir = GetDirectoryModelByPath (path);
528 if (dir == null) {
529 Logger.Log.Error ("Could not find directory-model for root: {0}", path);
530 return;
533 // FIXME: Make sure we're emptying the crawler task of any sub-directories
534 // to the root we're removing. It's not a big deal since we do an Ignore-check
535 // in there, but it would be nice.
537 roots_by_path.Remove (path);
538 roots.Remove (dir);
540 // Clean out the root from our directory cache.
541 RemoveDirectory (dir);
544 private void ScheduleDirectory (string name,
545 DirectoryModel parent,
546 FileAttributes attr,
547 bool is_walkable)
549 string path;
550 path = (parent == null) ? name : Path.Combine (parent.FullName, name);
552 Guid id;
553 id = (attr == null) ? Guid.NewGuid () : attr.UniqueId;
555 DateTime last_crawl;
556 last_crawl = (attr == null) ? DateTime.MinValue : attr.LastWriteTime;
558 Indexable indexable;
559 indexable = DirectoryToIndexable (path, id, parent);
561 if (indexable != null) {
562 indexable.LocalState ["Name"] = name;
563 indexable.LocalState ["LastCrawl"] = last_crawl;
564 indexable.LocalState ["IsWalkable"] = is_walkable;
566 Scheduler.Task task;
567 task = NewAddTask (indexable);
568 task.Priority = Scheduler.Priority.Delayed;
569 ThisScheduler.Add (task);
573 private bool RegisterDirectory (string name, DirectoryModel parent, FileAttributes attr)
575 string path;
576 path = (parent == null) ? name : Path.Combine (parent.FullName, name);
578 if (Debug)
579 Logger.Log.Debug ("Registered directory '{0}' ({1})", path, attr.UniqueId);
581 DateTime mtime;
583 try {
584 mtime = Directory.GetLastWriteTimeUtc (path);
585 } catch (IOException) {
586 Log.Debug ("Directory '{0}' ({1}) appears to have gone away", path, attr.UniqueId);
587 return false;
590 DirectoryModel dir;
591 if (parent == null)
592 dir = DirectoryModel.NewRoot (big_lock, path, attr);
593 else
594 dir = parent.AddChild (name, attr);
596 if (mtime > attr.LastWriteTime) {
597 dir.State = DirectoryState.Dirty;
598 if (Debug)
599 Logger.Log.Debug ("'{0}' is dirty", path);
602 if (Debug) {
603 if (dir.IsRoot)
604 Logger.Log.Debug ("Created model '{0}'", dir.FullName);
605 else
606 Logger.Log.Debug ("Created model '{0}' with parent '{1}'", dir.FullName, dir.Parent.FullName);
609 // Add any roots we create to the list of roots
610 if (dir.IsRoot)
611 roots.Add (dir);
613 // Add the directory to our by-id hash, and remove any NameInfo
614 // we might have cached about it.
615 dir_models_by_id [dir.UniqueId] = dir;
616 name_info_by_id.Remove (dir.UniqueId);
618 // Start watching the directory.
619 dir.WatchHandle = event_backend.CreateWatch (path);
621 // Schedule this directory for crawling.
622 if (tree_crawl_task.Add (dir))
623 ThisScheduler.Add (tree_crawl_task);
625 // Make sure that our file crawling task is active,
626 // since presumably we now have something new to crawl.
627 ActivateFileCrawling ();
629 return true;
632 private void ForgetDirectoryRecursively (DirectoryModel dir)
634 foreach (DirectoryModel child in dir.Children)
635 ForgetDirectoryRecursively (child);
637 if (dir.WatchHandle != null)
638 event_backend.ForgetWatch (dir.WatchHandle);
639 dir_models_by_id.Remove (dir.UniqueId);
640 // We rely on the expire event to remove it from dir_models_by_path
643 private void RemoveDirectory (DirectoryModel dir)
645 Uri uri;
646 uri = GuidFu.ToUri (dir.UniqueId);
648 Indexable indexable;
649 indexable = new Indexable (IndexableType.Remove, uri);
651 // Remember a copy of our external Uri, so that we can
652 // easily remap it in the PostRemoveHook.
653 indexable.LocalState ["RemovedUri"] = UriFu.PathToFileUri (dir.FullName);
655 // Forget watches and internal references
656 ForgetDirectoryRecursively (dir);
658 // Calling Remove will expire the path names,
659 // so name caches will be cleaned up accordingly.
660 dir.Remove ();
662 Scheduler.Task task;
663 task = NewAddTask (indexable); // We *add* the indexable to *remove* the index item
664 task.Priority = Scheduler.Priority.Immediate;
665 ThisScheduler.Add (task);
668 public void RemoveDirectory (string path)
670 DirectoryModel dir = GetDirectoryModelByPath (path);
671 if (dir != null)
672 RemoveDirectory (dir);
675 private void MoveDirectory (DirectoryModel dir,
676 DirectoryModel new_parent, // or null if we are just renaming
677 string new_name)
679 if (dir == null) {
680 Logger.Log.Warn ("Couldn't find DirectoryModel for directory moving to '{0}' in '{1}', so it was hopefully never there.",
681 new_name, new_parent.FullName);
682 AddDirectory (new_parent, new_name);
683 return;
686 if (dir.IsRoot)
687 throw new Exception ("Can't move root " + dir.FullName);
689 // We'll need this later in order to generate the
690 // right change notification.
691 string old_path;
692 old_path = dir.FullName;
694 if (new_parent != null && new_parent != dir.Parent)
695 dir.MoveTo (new_parent, new_name);
696 else
697 dir.Name = new_name;
699 // Remember this by path
700 lock (dir_models_by_path)
701 dir_models_by_path [dir.FullName] = dir;
703 CacheDirectoryNameChange (dir.UniqueId, dir.Parent.UniqueId, new_name);
705 Indexable indexable;
706 indexable = NewRenamingIndexable (new_name,
707 dir.UniqueId,
708 dir.Parent, // == new_parent
709 old_path);
710 indexable.LocalState ["OurDirectoryModel"] = dir;
712 Scheduler.Task task;
713 task = NewAddTask (indexable);
714 task.Priority = Scheduler.Priority.Immediate;
715 // Danger Will Robinson!
716 // We need to use BlockUntilNoCollision to get the correct notifications
717 // in a mv a b; mv b c; mv c a situation.
718 // FIXME: And now that type no longer exists!
719 ThisScheduler.Add (task);
722 //////////////////////////////////////////////////////////////////////////
725 // This code controls the directory crawl order
728 private DirectoryModel StupidWalk (DirectoryModel prev_best, DirectoryModel contender)
730 if (contender.NeedsCrawl) {
731 if (prev_best == null || prev_best.CompareTo (contender) < 0)
732 prev_best = contender;
735 foreach (DirectoryModel child in contender.Children)
736 prev_best = StupidWalk (prev_best, child);
738 return prev_best;
741 public DirectoryModel GetNextDirectoryToCrawl ()
743 DirectoryModel next_dir = null;
745 foreach (DirectoryModel root in roots)
746 next_dir = StupidWalk (next_dir, root);
748 return next_dir;
751 public void DoneCrawlingOneDirectory (DirectoryModel dir)
753 if (! dir.IsAttached)
754 return;
756 FileAttributes attr;
757 attr = FileAttributesStore.Read (dir.FullName);
759 // Don't mark ourselves; let the crawler redo us
760 if (attr == null)
761 return;
763 // We don't have to be super-careful about this since
764 // we only use the FileAttributes mtime on a directory
765 // to determine its initial state, not whether or not
766 // its index record is up-to-date.
767 attr.LastWriteTime = DateTime.UtcNow;
769 // ...but we do use this to decide which order directories get
770 // crawled in.
771 dir.LastCrawlTime = DateTime.UtcNow;
773 FileAttributesStore.Write (attr);
774 dir.MarkAsClean ();
777 public void MarkDirectoryAsUncrawlable (DirectoryModel dir)
779 if (! dir.IsAttached)
780 return;
782 // If we managed to get set up a watch on this directory,
783 // drop it.
784 if (dir.WatchHandle != null) {
785 event_backend.ForgetWatch (dir.WatchHandle);
786 dir.WatchHandle = null;
789 dir.MarkAsUncrawlable ();
792 public void Recrawl (string path)
794 // Try to find a directory model for the path specified
795 // so that we can re-crawl it.
796 DirectoryModel dir;
797 dir = GetDirectoryModelByPath (path);
799 bool path_is_registered = true;
801 if (dir == null) {
802 dir = GetDirectoryModelByPath (FileSystem.GetDirectoryNameRootOk (path));
803 path_is_registered = false;
805 if (dir == null) {
806 Logger.Log.Debug ("Unable to get directory-model for path: {0}", path);
807 return;
811 Logger.Log.Debug ("Re-crawling {0}", dir.FullName);
813 if (tree_crawl_task.Add (dir))
814 ThisScheduler.Add (tree_crawl_task);
816 if (path_is_registered)
817 Recrawl_Recursive (dir, DirectoryState.PossiblyClean);
819 ActivateFileCrawling ();
820 ActivateDirectoryCrawling ();
823 public void RecrawlEverything ()
825 Logger.Log.Debug ("Re-crawling all directories");
827 foreach (DirectoryModel root in roots)
828 Recrawl_Recursive (root, DirectoryState.PossiblyClean);
830 ActivateFileCrawling ();
831 ActivateDirectoryCrawling ();
834 private void Recrawl_Recursive (DirectoryModel dir, DirectoryState state)
836 dir.State = state;
837 tree_crawl_task.Add (dir);
838 foreach (DirectoryModel sub_dir in dir.Children)
839 Recrawl_Recursive (sub_dir, state);
842 private void ActivateFileCrawling ()
844 if (! file_crawl_task.IsActive)
845 ThisScheduler.Add (file_crawl_task);
848 private void ActivateDirectoryCrawling ()
850 if (! tree_crawl_task.IsActive)
851 ThisScheduler.Add (tree_crawl_task);
854 //////////////////////////////////////////////////////////////////////////
857 // File-related methods
860 private enum RequiredAction {
861 None,
862 Index,
863 Rename,
864 Forget
867 static DateTime epoch = new DateTime (1970, 1, 1, 0, 0, 0);
869 static DateTime ToDateTimeUtc (long time_t)
871 return epoch.AddSeconds (time_t);
874 private RequiredAction DetermineRequiredAction (DirectoryModel dir,
875 string name,
876 FileAttributes attr,
877 out string last_known_path)
879 last_known_path = null;
881 string path;
882 path = Path.Combine (dir.FullName, name);
884 if (Debug)
885 Logger.Log.Debug ("*** What should we do with {0}?", path);
887 if (filter.Ignore (dir, name, false)) {
888 // If there are attributes on the file, we must have indexed
889 // it previously. Since we are ignoring it now, we should strip
890 // any file attributes from it.
891 if (attr != null) {
892 if (Debug)
893 Logger.Log.Debug ("*** Forget it: File is ignored but has attributes");
894 return RequiredAction.Forget;
896 if (Debug)
897 Logger.Log.Debug ("*** Do nothing: File is ignored");
898 return RequiredAction.None;
901 if (attr == null) {
902 if (Debug)
903 Logger.Log.Debug ("*** Index it: File has no attributes");
904 return RequiredAction.Index;
907 // FIXME: This does not take in to account that we might have a better matching filter to use now
908 // That, however, is kind of expensive to figure out since we'd have to do mime-sniffing and shit.
909 if (attr.FilterName != null && attr.FilterVersion > 0) {
910 int current_filter_version;
911 current_filter_version = FilterFactory.GetFilterVersion (attr.FilterName);
913 if (current_filter_version > attr.FilterVersion) {
914 if (Debug)
915 Logger.Log.Debug ("*** Index it: Newer filter version found for filter {0}", attr.FilterName);
916 return RequiredAction.Index;
920 Mono.Unix.Native.Stat stat;
921 try {
922 Mono.Unix.Native.Syscall.stat (path, out stat);
923 } catch (Exception ex) {
924 Logger.Log.Debug (ex, "Caught exception stat-ing {0}", path);
925 return RequiredAction.None;
928 DateTime last_write_time, last_attr_time;
929 last_write_time = ToDateTimeUtc (stat.st_mtime);
930 last_attr_time = ToDateTimeUtc (stat.st_ctime);
932 if (attr.LastWriteTime != last_write_time) {
933 if (Debug)
934 Logger.Log.Debug ("*** Index it: MTime has changed ({0} vs {1})", attr.LastWriteTime, last_write_time);
936 // If the file has been copied, it will have the
937 // original file's EAs. Thus we have to check to
938 // make sure that the unique id in the EAs actually
939 // belongs to this file. If not, replace it with a new one.
940 // (Thus touching & then immediately renaming a file can
941 // cause its unique id to change, which is less than
942 // optimal but probably can't be helped.)
943 last_known_path = UniqueIdToFullPath (attr.UniqueId);
944 if (path != last_known_path) {
945 if (Debug)
946 Logger.Log.Debug ("*** Name has also changed, assigning new unique id");
947 attr.UniqueId = Guid.NewGuid ();
950 return RequiredAction.Index;
953 // If the inode ctime is newer than the last time we last
954 // set file attributes, we might have been moved. We don't
955 // strictly compare times due to the fact that although
956 // setting xattrs changes the ctime, if we don't have write
957 // access our metadata will be stored in sqlite, and the
958 // ctime will be at some point in the past.
959 if (attr.LastAttrTime < last_attr_time) {
960 if (Debug)
961 Logger.Log.Debug ("*** CTime is newer, checking last known path ({0} vs {1})", attr.LastAttrTime, last_attr_time);
963 last_known_path = UniqueIdToFullPath (attr.UniqueId);
965 if (last_known_path == null) {
966 if (Debug)
967 Logger.Log.Debug ("*** Index it: CTime has changed, but can't determine last known path");
968 return RequiredAction.Index;
971 // If the name has changed but the mtime
972 // hasn't, the only logical conclusion is that
973 // the file has been renamed.
974 if (path != last_known_path) {
975 if (Debug)
976 Logger.Log.Debug ("*** Rename it: CTime and path has changed");
977 return RequiredAction.Rename;
981 // We don't have to do anything, which is always preferable.
982 if (Debug)
983 Logger.Log.Debug ("*** Do nothing");
984 return RequiredAction.None;
987 // Return an indexable that will do the right thing with a file
988 // (or null, if the right thing is to do nothing)
989 public Indexable GetCrawlingFileIndexable (DirectoryModel dir, string name)
991 string path;
992 path = Path.Combine (dir.FullName, name);
994 FileAttributes attr;
995 attr = FileAttributesStore.Read (path);
997 RequiredAction action;
998 string last_known_path;
999 action = DetermineRequiredAction (dir, name, attr, out last_known_path);
1001 if (action == RequiredAction.None)
1002 return null;
1004 Guid unique_id;
1005 if (attr != null)
1006 unique_id = attr.UniqueId;
1007 else
1008 unique_id = Guid.NewGuid ();
1010 Indexable indexable = null;
1012 switch (action) {
1014 case RequiredAction.Index:
1015 indexable = FileToIndexable (path, unique_id, dir, true);
1016 break;
1018 case RequiredAction.Rename:
1019 indexable = NewRenamingIndexable (name, unique_id, dir,
1020 last_known_path);
1021 break;
1023 case RequiredAction.Forget:
1024 FileAttributesStore.Drop (path);
1026 break;
1029 return indexable;
1032 public void AddFile (DirectoryModel dir, string name)
1034 string path;
1035 path = Path.Combine (dir.FullName, name);
1037 if (! File.Exists (path))
1038 return;
1040 if (FileSystem.IsSpecialFile (path))
1041 return;
1043 if (filter.Ignore (dir, name, false))
1044 return;
1046 // If this file already has extended attributes,
1047 // make sure that the name matches the file
1048 // that is in the index. If not, it could be
1049 // a copy of an already-indexed file and should
1050 // be assigned a new unique id.
1051 Guid unique_id = Guid.Empty;
1052 FileAttributes attr;
1053 attr = FileAttributesStore.Read (path);
1054 if (attr != null) {
1055 LuceneNameResolver.NameInfo info;
1056 info = name_resolver.GetNameInfoById (attr.UniqueId);
1057 if (info != null
1058 && info.Name == name
1059 && info.ParentId == dir.UniqueId)
1060 unique_id = attr.UniqueId;
1063 if (unique_id == Guid.Empty)
1064 unique_id = Guid.NewGuid ();
1066 RegisterId (name, dir, unique_id);
1068 Indexable indexable;
1069 indexable = FileToIndexable (path, unique_id, dir, false);
1071 if (indexable != null) {
1072 Scheduler.Task task;
1073 task = NewAddTask (indexable);
1074 task.Priority = Scheduler.Priority.Immediate;
1075 ThisScheduler.Add (task);
1079 public void RemoveFile (DirectoryModel dir, string name)
1081 // FIXME: We might as well remove it, even if it was being ignore.
1082 // Right?
1084 Guid unique_id;
1085 unique_id = NameAndParentToId (name, dir);
1086 if (unique_id == Guid.Empty) {
1087 Logger.Log.Info ("Could not resolve unique id of '{0}' in '{1}' for removal, it is probably already gone",
1088 name, dir.FullName);
1089 return;
1092 Uri uri, file_uri;
1093 uri = GuidFu.ToUri (unique_id);
1094 file_uri = UriFu.PathToFileUri (Path.Combine (dir.FullName, name));
1096 Indexable indexable;
1097 indexable = new Indexable (IndexableType.Remove, uri);
1098 indexable.LocalState ["RemovedUri"] = file_uri;
1100 Scheduler.Task task;
1101 task = NewAddTask (indexable);
1102 task.Priority = Scheduler.Priority.Immediate;
1103 ThisScheduler.Add (task);
1106 public void MoveFile (DirectoryModel old_dir, string old_name,
1107 DirectoryModel new_dir, string new_name)
1109 bool old_ignore, new_ignore;
1110 old_ignore = filter.Ignore (old_dir, old_name, false);
1111 new_ignore = filter.Ignore (new_dir, new_name, false);
1113 if (old_ignore && new_ignore)
1114 return;
1116 // If our ignore-state is changing, synthesize the appropriate
1117 // action.
1119 if (old_ignore && ! new_ignore) {
1120 AddFile (new_dir, new_name);
1121 return;
1124 if (! old_ignore && new_ignore) {
1125 RemoveFile (new_dir, new_name);
1126 return;
1129 // We need to find the file's unique id.
1130 // We can't look at the extended attributes w/o making
1131 // assumptions about whether they follow around the
1132 // file (EAs) or the path (sqlite)...
1133 Guid unique_id;
1134 unique_id = NameAndParentToId (old_name, old_dir);
1135 if (unique_id == Guid.Empty) {
1136 // If we can't find the unique ID, we have to
1137 // assume that the original file never made it
1138 // into the index --- thus we treat this as
1139 // an Add.
1140 AddFile (new_dir, new_name);
1141 return;
1144 RegisterId (new_name, new_dir, unique_id);
1146 string old_path;
1147 old_path = Path.Combine (old_dir.FullName, old_name);
1149 ForgetId (old_path);
1151 // FIXME: I think we need to be more conservative when we seen
1152 // events in a directory that has not been fully scanned, just to
1153 // avoid races. i.e. what if we are in the middle of crawling that
1154 // directory and haven't reached this file yet? Then the rename
1155 // will fail.
1156 Indexable indexable;
1157 indexable = NewRenamingIndexable (new_name,
1158 unique_id,
1159 new_dir,
1160 old_path);
1162 Scheduler.Task task;
1163 task = NewAddTask (indexable);
1164 task.Priority = Scheduler.Priority.Immediate;
1165 // Danger Will Robinson!
1166 // We need to use BlockUntilNoCollision to get the correct notifications
1167 // in a mv a b; mv b c; mv c a situation.
1168 // FIXME: And now AddType no longer exists
1169 ThisScheduler.Add (task);
1172 //////////////////////////////////////////////////////////////////////////
1174 // Configuration stuff
1176 public IList Roots {
1177 get {
1178 return roots_by_path;
1182 private void LoadConfiguration ()
1184 if (Conf.Indexing.IndexHomeDir)
1185 AddRoot (PathFinder.HomeDir);
1187 foreach (string root in Conf.Indexing.Roots)
1188 AddRoot (root);
1190 Conf.Subscribe (typeof (Conf.IndexingConfig), OnConfigurationChanged);
1193 private void OnConfigurationChanged (Conf.Section section)
1195 ArrayList roots_wanted = new ArrayList (Conf.Indexing.Roots);
1197 if (Conf.Indexing.IndexHomeDir)
1198 roots_wanted.Add (PathFinder.HomeDir);
1200 IList roots_to_add, roots_to_remove;
1201 ArrayFu.IntersectListChanges (roots_wanted, Roots, out roots_to_add, out roots_to_remove);
1203 foreach (string root in roots_to_remove)
1204 RemoveRoot (root);
1206 foreach (string root in roots_to_add)
1207 AddRoot (root);
1210 //////////////////////////////////////////////////////////////////////////
1213 // Our magic LuceneQueryable hooks
1216 override protected bool PreChildAddHook (Indexable child)
1218 // FIXME: Handling Uri remapping of children is tricky, and there
1219 // is also the issue of properly serializing file: uris that
1220 // contain fragments. For now we just punt it all by dropping
1221 // any child indexables of file system objects.
1222 return false;
1225 override protected void PostAddHook (Indexable indexable, IndexerAddedReceipt receipt)
1227 // If we just changed properties, remap to our *old* external Uri
1228 // to make notification work out property.
1229 if (indexable.Type == IndexableType.PropertyChange) {
1231 string last_known_path;
1232 last_known_path = (string) indexable.LocalState ["LastKnownPath"];
1233 receipt.Uri = UriFu.PathToFileUri (last_known_path);
1234 Logger.Log.Debug ("Last known path is {0}", last_known_path);
1236 // This rename is now in the index, so we no longer need to keep
1237 // track of the uid in memory.
1238 ForgetId (last_known_path);
1240 return;
1243 string path;
1244 path = (string) indexable.LocalState ["Path"];
1245 ForgetId (path);
1247 DirectoryModel parent;
1248 parent = indexable.LocalState ["Parent"] as DirectoryModel;
1250 // The parent directory might have run away since we were indexed
1251 if (parent != null && ! parent.IsAttached)
1252 return;
1254 Guid unique_id;
1255 unique_id = GuidFu.FromUri (receipt.Uri);
1257 FileAttributes attr;
1258 attr = FileAttributesStore.ReadOrCreate (path, unique_id);
1259 attr.Path = path;
1260 attr.LastWriteTime = indexable.Timestamp;
1262 attr.FilterName = receipt.FilterName;
1263 attr.FilterVersion = receipt.FilterVersion;
1265 if (indexable.LocalState ["IsWalkable"] != null) {
1266 string name;
1267 name = (string) indexable.LocalState ["Name"];
1269 if (! RegisterDirectory (name, parent, attr))
1270 return;
1273 FileAttributesStore.Write (attr);
1275 // Remap the Uri so that change notification will work properly
1276 receipt.Uri = UriFu.PathToFileUri (path);
1279 override protected void PostRemoveHook (Indexable indexable, IndexerRemovedReceipt receipt)
1281 // Find the cached external Uri and remap the Uri in the receipt.
1282 // We have to do this to make change notification work.
1283 Uri external_uri;
1284 external_uri = indexable.LocalState ["RemovedUri"] as Uri;
1285 if (external_uri == null)
1286 throw new Exception ("No cached external Uri for " + receipt.Uri);
1287 receipt.Uri = external_uri;
1288 ForgetId (external_uri.LocalPath);
1291 private bool RemapUri (Hit hit)
1293 // Store the hit's internal uri in a property
1294 Property prop;
1295 prop = Property.NewUnsearched ("beagle:InternalUri",
1296 UriFu.UriToEscapedString (hit.Uri));
1297 hit.AddProperty (prop);
1299 // Now assemble the path by looking at the parent and name
1300 string name, path;
1301 name = hit [ExactFilenamePropKey];
1302 if (name == null) {
1303 // If we don't have the filename property, we have to do a lookup
1304 // based on the guid. This happens with synthetic hits produced by
1305 // index listeners.
1306 Guid hit_id;
1307 hit_id = GuidFu.FromUri (hit.Uri);
1308 path = UniqueIdToFullPath (hit_id);
1309 } else {
1310 string parent_id_uri;
1311 parent_id_uri = hit [ParentDirUriPropKey];
1312 if (parent_id_uri == null)
1313 return false;
1315 Guid parent_id;
1316 parent_id = GuidFu.FromUriString (parent_id_uri);
1318 path = ToFullPath (name, parent_id);
1319 if (path == null)
1320 Logger.Log.Debug ("Couldn't find path of file with name '{0}' and parent '{1}'",
1321 name, GuidFu.ToShortString (parent_id));
1324 if (path != null) {
1325 hit.Uri = UriFu.PathToFileUri (path);
1326 return true;
1329 return false;
1332 // Hit filter: this handles our mapping from internal->external uris,
1333 // and checks to see if the file is still there.
1334 override protected bool HitFilter (Hit hit)
1336 Uri old_uri = hit.Uri;
1338 if (! RemapUri (hit))
1339 return false;
1341 string path;
1342 path = hit.Uri.LocalPath;
1344 bool is_directory;
1345 bool exists = false;
1347 is_directory = hit.MimeType == "inode/directory";
1349 if (hit.MimeType == null && hit.Uri.IsFile && Directory.Exists (path)) {
1350 is_directory = true;
1351 exists = true;
1354 if (! exists) {
1355 if (is_directory)
1356 exists = Directory.Exists (path);
1357 else
1358 exists = File.Exists (path);
1361 // If the file doesn't exist, we do not schedule a removal and
1362 // return false. This is to avoid "losing" files if they are
1363 // in a directory that has been renamed but which we haven't
1364 // scanned yet... if we dropped them from the index, they would
1365 // never get re-indexed (or at least not until the next time they
1366 // were touched) since they would still be stamped with EAs
1367 // indicating they were up-to-date. And that would be bad.
1368 // FIXME: It would be safe if we were in a known state, right?
1369 // i.e. every DirectoryModel is clean.
1370 if (! exists)
1371 return false;
1373 // Fetch the parent directory model from our cache to do clever
1374 // filtering to determine if we're ignoring it or not.
1375 DirectoryModel parent;
1376 parent = GetDirectoryModelByPath (Path.GetDirectoryName (path));
1378 // Check the ignore status of the hit
1379 if (filter.Ignore (parent, Path.GetFileName (path), is_directory))
1380 return false;
1382 return true;
1385 override public string GetSnippet (string [] query_terms, Hit hit)
1387 // Uri remapping from a hit is easy: the internal uri
1388 // is stored in a property.
1389 Uri uri = UriFu.EscapedStringToUri (hit ["beagle:InternalUri"]);
1391 string path = TextCache.UserCache.LookupPathRaw (uri);
1393 if (path == null)
1394 return null;
1396 // If this is self-cached, use the remapped Uri
1397 if (path == TextCache.SELF_CACHE_TAG)
1398 return SnippetFu.GetSnippetFromFile (query_terms, hit.Uri.LocalPath);
1400 return SnippetFu.GetSnippetFromTextCache (query_terms, path);
1403 override public void Start ()
1405 base.Start ();
1407 event_backend.Start (this);
1409 LoadConfiguration ();
1411 Logger.Log.Debug ("Done starting FileSystemQueryable");
1414 //////////////////////////////////////////////////////////////////////////
1416 // These are the methods that the IFileEventBackend implementations should
1417 // call in response to events.
1419 public void ReportEventInDirectory (string directory_name)
1421 DirectoryModel dir;
1422 dir = GetDirectoryModelByPath (directory_name);
1424 // If something goes wrong, just fail silently.
1425 if (dir == null)
1426 return;
1428 // We only use this information to prioritize the order in which
1429 // we crawl directories --- so if this directory doesn't
1430 // actually need to be crawled, we can safely ignore it.
1431 if (! dir.NeedsCrawl)
1432 return;
1434 dir.LastActivityTime = DateTime.Now;
1436 Logger.Log.Debug ("Saw event in '{0}'", directory_name);
1439 public void HandleAddEvent (string directory_name, string file_name, bool is_directory)
1441 Logger.Log.Debug ("*** Add '{0}' '{1}' {2}", directory_name, file_name,
1442 is_directory ? "(dir)" : "(file)");
1444 DirectoryModel dir;
1445 dir = GetDirectoryModelByPath (directory_name);
1446 if (dir == null) {
1447 Logger.Log.Warn ("HandleAddEvent failed: Couldn't find DirectoryModel for '{0}'", directory_name);
1448 return;
1451 if (is_directory)
1452 AddDirectory (dir, file_name);
1453 else
1454 AddFile (dir, file_name);
1457 public void HandleRemoveEvent (string directory_name, string file_name, bool is_directory)
1459 Logger.Log.Debug ("*** Remove '{0}' '{1}' {2}", directory_name, file_name,
1460 is_directory ? "(dir)" : "(file)");
1462 if (is_directory) {
1463 string path;
1464 path = Path.Combine (directory_name, file_name);
1466 DirectoryModel dir;
1467 dir = GetDirectoryModelByPath (path);
1468 if (dir == null) {
1469 Logger.Log.Warn ("HandleRemoveEvent failed: Couldn't find DirectoryModel for '{0}'", path);
1470 return;
1473 dir.WatchHandle = null;
1474 RemoveDirectory (dir);
1475 } else {
1476 DirectoryModel dir;
1477 dir = GetDirectoryModelByPath (directory_name);
1478 if (dir == null) {
1479 Logger.Log.Warn ("HandleRemoveEvent failed: Couldn't find DirectoryModel for '{0}'", directory_name);
1480 return;
1483 RemoveFile (dir, file_name);
1487 public void HandleMoveEvent (string old_directory_name, string old_file_name,
1488 string new_directory_name, string new_file_name,
1489 bool is_directory)
1491 Logger.Log.Debug ("*** Move '{0}' '{1}' -> '{2}' '{3}' {4}",
1492 old_directory_name, old_file_name,
1493 new_directory_name, new_file_name,
1494 is_directory ? "(dir)" : "(file)");
1496 if (is_directory) {
1497 DirectoryModel dir, new_parent;
1498 dir = GetDirectoryModelByPath (Path.Combine (old_directory_name, old_file_name));
1499 new_parent = GetDirectoryModelByPath (new_directory_name);
1500 MoveDirectory (dir, new_parent, new_file_name);
1501 return;
1502 } else {
1503 DirectoryModel old_dir, new_dir;
1504 old_dir = GetDirectoryModelByPath (old_directory_name);
1505 new_dir = GetDirectoryModelByPath (new_directory_name);
1506 MoveFile (old_dir, old_file_name, new_dir, new_file_name);
1510 public void HandleOverflowEvent ()
1512 Logger.Log.Debug ("Queue overflows suck");