Some more fixes wrt child-indexables. Namely, fix proper handling of child indexables...
[beagle.git] / beagled / FileSystemQueryable / FileSystemQueryable.cs
blobbe496bfd68a62c81d0c116e350098ac9bf9023a0
1 //
2 // FileSystemQueryable.cs
3 //
4 // Copyright (C) 2004 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
27 using System;
28 using System.Collections;
29 using System.IO;
30 using System.Reflection;
31 using System.Text;
32 using System.Threading;
34 using Beagle.Daemon;
35 using Beagle.Util;
37 namespace Beagle.Daemon.FileSystemQueryable {
39 [QueryableFlavor (Name="Files", Domain=QueryDomain.Local, RequireInotify=false)]
40 [PropertyKeywordMapping (Keyword="extension", PropertyName="beagle:FilenameExtension", IsKeyword=true, Description="File extension, e.g. extension:jpeg. Use extension: to search in files with no extension.")]
41 [PropertyKeywordMapping (Keyword="ext", PropertyName="beagle:FilenameExtension", IsKeyword=true, Description="File extension, e.g. ext:jpeg. Use ext: to search in files with no extension.")]
42 public class FileSystemQueryable : LuceneQueryable {
44 static public new bool Debug = false;
46 // History:
47 // 1: Initially set to force a reindex due to NameIndex changes.
48 // 2: Overhauled everything to use new lucene infrastructure.
49 // 3: Switched to UTC for all times, changed the properties a bit.
50 // 4: Changed the key of TextFilenamePropKey to beagle:Filename - it might be useful in clients.
51 // Make SplitFilenamePropKey unstored
52 // 5: Keyword properies in the private namespace are no longer lower cased; this is required to
53 // offset the change in LuceneCommon.cs
54 const int MINOR_VERSION = 5;
56 private object big_lock = new object ();
58 private IFileEventBackend event_backend;
60 // This is the task that walks the tree structure
61 private TreeCrawlTask tree_crawl_task;
63 // This is the task that finds the next place that
64 // needs to be crawled in the tree and spawns off
65 // the appropriate IndexableGenerator.
66 private FileCrawlTask file_crawl_task;
68 private ArrayList roots = new ArrayList ();
69 private ArrayList roots_by_path = new ArrayList ();
71 private FileNameFilter filter;
73 // This is just a copy of the LuceneQueryable's QueryingDriver
74 // cast into the right type for doing internal->external Uri
75 // lookups.
76 private LuceneNameResolver name_resolver;
78 //////////////////////////////////////////////////////////////////////////
80 private Hashtable cached_uid_by_path = new Hashtable ();
82 //////////////////////////////////////////////////////////////////////////
84 public FileSystemQueryable () : base ("FileSystemIndex", MINOR_VERSION)
86 // Set up our event backend
87 if (Inotify.Enabled) {
88 Logger.Log.Debug ("Starting Inotify FSQ file event backend");
89 event_backend = new InotifyBackend ();
90 } else {
91 Logger.Log.Debug ("Creating null FSQ file event backend");
92 event_backend = new NullFileEventBackend ();
95 tree_crawl_task = new TreeCrawlTask (new TreeCrawlTask.Handler (AddDirectory));
96 tree_crawl_task.Source = this;
98 file_crawl_task = new FileCrawlTask (this);
99 file_crawl_task.Source = this;
101 name_resolver = (LuceneNameResolver) Driver;
102 PreloadDirectoryNameInfo ();
104 // Setup our file-name filter
105 filter = new FileNameFilter (this);
107 // Do the right thing when paths expire
108 DirectoryModel.ExpireEvent +=
109 new DirectoryModel.ExpireHandler (ExpireDirectoryPath);
113 override protected IFileAttributesStore BuildFileAttributesStore ()
115 return new FileAttributesStore_Mixed (IndexDirectory, IndexFingerprint);
118 override protected LuceneQueryingDriver BuildLuceneQueryingDriver (string index_name,
119 int minor_version,
120 bool read_only_mode)
122 return new LuceneNameResolver (index_name, minor_version, read_only_mode);
125 public FileNameFilter Filter {
126 get { return filter; }
129 //////////////////////////////////////////////////////////////////////////
132 // This is where we build our Indexables
135 public static void AddStandardPropertiesToIndexable (Indexable indexable,
136 string name,
137 Guid parent_id,
138 bool mutable)
140 foreach (Property std_prop in Property.StandardFileProperties (name, mutable))
141 indexable.AddProperty (std_prop);
143 if (parent_id == Guid.Empty)
144 return;
146 string str = GuidFu.ToUriString (parent_id);
147 // We use the uri here to recycle terms in the index,
148 // since each directory's uri will already be indexed.
149 Property prop = Property.NewUnsearched (Property.ParentDirUriPropKey, str);
150 prop.IsMutable = mutable;
151 indexable.AddProperty (prop);
154 public static void AddStandardPropertiesToIndexable (Indexable indexable,
155 string name,
156 DirectoryModel parent,
157 bool mutable)
159 AddStandardPropertiesToIndexable (indexable,
160 name,
161 parent == null ? Guid.Empty : parent.UniqueId,
162 mutable);
164 indexable.LocalState ["Parent"] = parent;
167 public static Indexable DirectoryToIndexable (string path,
168 Guid id,
169 DirectoryModel parent)
171 Indexable indexable;
172 try {
173 indexable = new Indexable (IndexableType.Add, GuidFu.ToUri (id));
174 indexable.MimeType = "inode/directory";
175 indexable.NoContent = true;
176 indexable.DisplayUri = UriFu.PathToFileUri (path);
177 indexable.Timestamp = Directory.GetLastWriteTimeUtc (path);
178 } catch (IOException) {
179 // Looks like the directory was deleted.
180 return null;
183 string name;
184 if (parent == null)
185 name = path;
186 else
187 name = Path.GetFileName (path);
188 AddStandardPropertiesToIndexable (indexable, name, parent, true);
190 Property prop;
191 prop = Property.NewBool (Property.IsDirectoryPropKey, true);
192 prop.IsMutable = true; // we want this in the secondary index, for efficiency
193 indexable.AddProperty (prop);
195 indexable.LocalState ["Path"] = path;
197 return indexable;
200 public static Indexable FileToIndexable (string path,
201 Guid id,
202 DirectoryModel parent,
203 bool crawl_mode)
205 Indexable indexable;
207 try {
208 indexable = new Indexable (IndexableType.Add, GuidFu.ToUri (id));
209 indexable.Timestamp = File.GetLastWriteTimeUtc (path);
210 indexable.ContentUri = UriFu.PathToFileUri (path);
211 indexable.DisplayUri = UriFu.PathToFileUri (path);
212 indexable.Crawled = crawl_mode;
213 indexable.Filtering = Beagle.IndexableFiltering.Always;
214 } catch (IOException) {
215 // Looks like the file was deleted.
216 return null;
219 AddStandardPropertiesToIndexable (indexable, Path.GetFileName (path), parent, true);
221 indexable.LocalState ["Path"] = path;
223 return indexable;
226 private static Indexable NewRenamingIndexable (string name,
227 Guid id,
228 DirectoryModel parent,
229 string last_known_path)
231 Indexable indexable;
232 indexable = new Indexable (IndexableType.PropertyChange, GuidFu.ToUri (id));
234 AddStandardPropertiesToIndexable (indexable, name, parent, true);
236 indexable.LocalState ["Id"] = id;
237 indexable.LocalState ["LastKnownPath"] = last_known_path;
239 return indexable;
242 //////////////////////////////////////////////////////////////////////////
245 // Mapping from directory ids to paths
248 private Hashtable dir_models_by_id = new Hashtable ();
249 private Hashtable name_info_by_id = new Hashtable ();
251 // We fall back to using the name information in the index
252 // until we've fully constructed our set of DirectoryModels.
253 private void PreloadDirectoryNameInfo ()
255 ICollection all;
256 all = name_resolver.GetAllDirectoryNameInfo ();
257 foreach (LuceneNameResolver.NameInfo info in all)
258 name_info_by_id [info.Id] = info;
261 // This only works for directories.
262 private string UniqueIdToDirectoryName (Guid id)
264 DirectoryModel dir;
265 dir = dir_models_by_id [id] as DirectoryModel;
266 if (dir != null)
267 return dir.FullName;
269 LuceneNameResolver.NameInfo info;
270 info = name_info_by_id [id] as LuceneNameResolver.NameInfo;
271 if (info != null) {
272 if (info.ParentId == Guid.Empty) // i.e. this is a root
273 return info.Name;
274 else {
275 string parent_name;
276 parent_name = UniqueIdToDirectoryName (info.ParentId);
277 if (parent_name == null)
278 return null;
279 return Path.Combine (parent_name, info.Name);
283 return null;
286 private void CacheDirectoryNameChange (Guid id, Guid new_parent_id, string new_name)
288 LuceneNameResolver.NameInfo info;
289 info = name_info_by_id [id] as LuceneNameResolver.NameInfo;
290 if (info != null) {
291 info.ParentId = new_parent_id;
292 info.Name = new_name;
296 private string ToFullPath (string name, Guid parent_id)
298 // This is the correct behavior for roots.
299 if (parent_id == Guid.Empty)
300 return name;
302 string parent_name;
303 parent_name = UniqueIdToDirectoryName (parent_id);
304 if (parent_name == null)
305 return null;
307 return Path.Combine (parent_name, name);
310 // This works for both files and directories.
311 private string UniqueIdToFullPath (Guid id)
313 // First, check if it is a directory.
314 string path;
315 path = UniqueIdToDirectoryName (id);
316 if (path != null)
317 return path;
319 // If not, try to pull name information out of the index.
320 LuceneNameResolver.NameInfo info;
321 info = name_resolver.GetNameInfoById (id);
322 if (info == null)
323 return null;
324 return ToFullPath (info.Name, info.ParentId);
327 private string UniqueIdToFileName (Guid id)
329 LuceneNameResolver.NameInfo info;
330 info = name_resolver.GetNameInfoById (id);
331 if (info == null)
332 return null;
333 return info.Name;
336 private void RegisterId (string name, DirectoryModel dir, Guid id)
338 cached_uid_by_path [Path.Combine (dir.FullName, name)] = id;
341 private void ForgetId (string path)
343 cached_uid_by_path.Remove (path);
346 // This works for files. (It probably works for directories
347 // too, but you should use one of the more efficient means
348 // above if you know it is a directory.)
349 private Guid NameAndParentToId (string name, DirectoryModel dir)
351 string path;
352 path = Path.Combine (dir.FullName, name);
354 Guid unique_id;
355 if (cached_uid_by_path.Contains (path))
356 unique_id = (Guid) cached_uid_by_path [path];
357 else
358 unique_id = name_resolver.GetIdByNameAndParentId (name, dir.UniqueId);
360 return unique_id;
363 //////////////////////////////////////////////////////////////////////////
366 // Directory-related methods
369 private Hashtable dir_models_by_path = new Hashtable ();
371 private DirectoryModel GetDirectoryModelByPath (string path)
373 DirectoryModel dir;
375 lock (dir_models_by_path) {
376 dir = dir_models_by_path [path] as DirectoryModel;
377 if (dir != null)
378 return dir;
381 // Walk each root until we find the correct path
382 foreach (DirectoryModel root in roots) {
383 dir = root.WalkTree (path);
384 if (dir != null) {
385 lock (dir_models_by_path)
386 dir_models_by_path [path] = dir;
387 break;
391 return dir;
394 private void ExpireDirectoryPath (string expired_path, Guid unique_id)
396 if (Debug)
397 Logger.Log.Debug ("Expired '{0}'", expired_path);
399 lock (dir_models_by_path)
400 dir_models_by_path.Remove (expired_path);
403 public void AddDirectory (DirectoryModel parent, string name)
405 // Ignore the stuff we want to ignore.
406 if (filter.Ignore (parent, name, true))
407 return;
409 // FIXME: ! parent.HasChildWithName (name)
410 if (parent != null && parent.HasChildWithName (name))
411 return;
413 string path;
414 path = (parent == null) ? name : Path.Combine (parent.FullName, name);
416 if (Debug)
417 Logger.Log.Debug ("Adding directory '{0}'", path, name);
419 if (! Directory.Exists (path)) {
420 Logger.Log.Error ("Can't add directory: '{0}' does not exist", path);
421 return;
424 FileAttributes attr;
425 attr = FileAttributesStore.Read (path);
427 // Note that we don't look at the mtime of a directory when
428 // deciding whether or not to index it.
429 bool needs_indexing = false;
430 if (attr == null) {
431 // If it has no attributes, it definitely needs
432 // indexing.
433 needs_indexing = true;
434 } else {
435 // Make sure that it still has the same name as before.
436 // If not, we need to re-index it.
437 // We can do this since we preloaded all of the name
438 // info in the directory via PreloadDirectoryNameInfo.
439 string last_known_name;
440 last_known_name = UniqueIdToDirectoryName (attr.UniqueId);
441 if (last_known_name != path) {
442 Logger.Log.Debug ("'{0}' now seems to be called '{1}'", last_known_name, path);
443 needs_indexing = true;
447 // If we can't descend into this directory, we want to
448 // index it but not build a DirectoryModel for it.
449 // FIXME: We should do the right thing when a
450 // directory's permissions change.
451 bool is_walkable;
452 is_walkable = DirectoryWalker.IsWalkable (path);
453 if (! is_walkable)
454 Logger.Log.Debug ("Can't walk '{0}'", path);
456 if (needs_indexing)
457 ScheduleDirectory (name, parent, attr, is_walkable);
458 else if (is_walkable)
459 RegisterDirectory (name, parent, attr);
462 public void AddRoot (string path)
464 path = StringFu.SanitizePath (path);
465 Logger.Log.Debug ("Adding root: {0}", path);
467 if (roots_by_path.Contains (path)) {
468 Logger.Log.Error ("Trying to add an existing root: {0}", path);
469 return;
472 // We need to have the path key in the roots hashtable
473 // for the filtering to work as we'd like before the root
474 // is actually added.
475 roots_by_path.Add (path);
477 AddDirectory (null, path);
480 public void RemoveRoot (string path)
482 Logger.Log.Debug ("Removing root: {0}", path);
484 if (! roots_by_path.Contains (path)) {
485 Logger.Log.Error ("Trying to remove a non-existing root: {0}", path);
486 return;
489 // Find our directory model for the root
490 DirectoryModel dir;
491 dir = GetDirectoryModelByPath (path);
493 if (dir == null) {
494 Logger.Log.Error ("Could not find directory-model for root: {0}", path);
495 return;
498 // FIXME: Make sure we're emptying the crawler task of any sub-directories
499 // to the root we're removing. It's not a big deal since we do an Ignore-check
500 // in there, but it would be nice.
502 roots_by_path.Remove (path);
503 roots.Remove (dir);
505 // Clean out the root from our directory cache.
506 RemoveDirectory (dir);
509 private void ScheduleDirectory (string name,
510 DirectoryModel parent,
511 FileAttributes attr,
512 bool is_walkable)
514 string path;
515 path = (parent == null) ? name : Path.Combine (parent.FullName, name);
517 Guid id;
518 id = (attr == null) ? Guid.NewGuid () : attr.UniqueId;
520 DateTime last_crawl;
521 last_crawl = (attr == null) ? DateTime.MinValue : attr.LastWriteTime;
523 Indexable indexable;
524 indexable = DirectoryToIndexable (path, id, parent);
526 if (indexable != null) {
527 indexable.LocalState ["Name"] = name;
528 indexable.LocalState ["LastCrawl"] = last_crawl;
529 indexable.LocalState ["IsWalkable"] = is_walkable;
531 Scheduler.Task task;
532 task = NewAddTask (indexable);
533 task.Priority = Scheduler.Priority.Delayed;
534 ThisScheduler.Add (task);
538 private bool RegisterDirectory (string name, DirectoryModel parent, FileAttributes attr)
540 string path;
541 path = (parent == null) ? name : Path.Combine (parent.FullName, name);
543 if (Debug)
544 Logger.Log.Debug ("Registered directory '{0}' ({1})", path, attr.UniqueId);
546 DateTime mtime;
548 try {
549 mtime = Directory.GetLastWriteTimeUtc (path);
550 } catch (IOException) {
551 Log.Debug ("Directory '{0}' ({1}) appears to have gone away", path, attr.UniqueId);
552 return false;
555 DirectoryModel dir;
556 if (parent == null)
557 dir = DirectoryModel.NewRoot (big_lock, path, attr);
558 else
559 dir = parent.AddChild (name, attr);
561 if (mtime > attr.LastWriteTime) {
562 dir.State = DirectoryState.Dirty;
563 if (Debug)
564 Logger.Log.Debug ("'{0}' is dirty", path);
567 if (Debug) {
568 if (dir.IsRoot)
569 Logger.Log.Debug ("Created model '{0}'", dir.FullName);
570 else
571 Logger.Log.Debug ("Created model '{0}' with parent '{1}'", dir.FullName, dir.Parent.FullName);
574 // Add any roots we create to the list of roots
575 if (dir.IsRoot)
576 roots.Add (dir);
578 // Add the directory to our by-id hash, and remove any NameInfo
579 // we might have cached about it.
580 dir_models_by_id [dir.UniqueId] = dir;
581 name_info_by_id.Remove (dir.UniqueId);
583 // Start watching the directory.
584 dir.WatchHandle = event_backend.CreateWatch (path);
586 // Schedule this directory for crawling.
587 if (tree_crawl_task.Add (dir))
588 ThisScheduler.Add (tree_crawl_task);
590 // Make sure that our file crawling task is active,
591 // since presumably we now have something new to crawl.
592 ActivateFileCrawling ();
594 return true;
597 private void ForgetDirectoryRecursively (DirectoryModel dir)
599 foreach (DirectoryModel child in dir.Children)
600 ForgetDirectoryRecursively (child);
602 if (dir.WatchHandle != null)
603 event_backend.ForgetWatch (dir.WatchHandle);
604 dir_models_by_id.Remove (dir.UniqueId);
605 // We rely on the expire event to remove it from dir_models_by_path
608 private void RemoveDirectory (DirectoryModel dir)
610 Uri uri;
611 uri = GuidFu.ToUri (dir.UniqueId);
613 Indexable indexable;
614 indexable = new Indexable (IndexableType.Remove, uri);
616 // Remember a copy of our external Uri, so that we can
617 // easily remap it in the PostRemoveHook.
618 indexable.LocalState ["RemovedUri"] = UriFu.PathToFileUri (dir.FullName);
620 // Forget watches and internal references
621 ForgetDirectoryRecursively (dir);
623 // Calling Remove will expire the path names,
624 // so name caches will be cleaned up accordingly.
625 dir.Remove ();
627 Scheduler.Task task;
628 task = NewAddTask (indexable); // We *add* the indexable to *remove* the index item
629 task.Priority = Scheduler.Priority.Immediate;
630 ThisScheduler.Add (task);
633 public void RemoveDirectory (string path)
635 DirectoryModel dir = GetDirectoryModelByPath (path);
636 if (dir != null)
637 RemoveDirectory (dir);
640 private void MoveDirectory (DirectoryModel dir,
641 DirectoryModel new_parent, // or null if we are just renaming
642 string new_name)
644 if (dir == null) {
645 Logger.Log.Warn ("Couldn't find DirectoryModel for directory moving to '{0}' in '{1}', so it was hopefully never there.",
646 new_name, new_parent.FullName);
647 AddDirectory (new_parent, new_name);
648 return;
651 if (dir.IsRoot)
652 throw new Exception ("Can't move root " + dir.FullName);
654 // We'll need this later in order to generate the
655 // right change notification.
656 string old_path;
657 old_path = dir.FullName;
659 if (new_parent != null && new_parent != dir.Parent)
660 dir.MoveTo (new_parent, new_name);
661 else
662 dir.Name = new_name;
664 // Remember this by path
665 lock (dir_models_by_path)
666 dir_models_by_path [dir.FullName] = dir;
668 CacheDirectoryNameChange (dir.UniqueId, dir.Parent.UniqueId, new_name);
670 Indexable indexable;
671 indexable = NewRenamingIndexable (new_name,
672 dir.UniqueId,
673 dir.Parent, // == new_parent
674 old_path);
675 indexable.LocalState ["OurDirectoryModel"] = dir;
677 Scheduler.Task task;
678 task = NewAddTask (indexable);
679 task.Priority = Scheduler.Priority.Immediate;
680 // Danger Will Robinson!
681 // We need to use BlockUntilNoCollision to get the correct notifications
682 // in a mv a b; mv b c; mv c a situation.
683 // FIXME: And now that type no longer exists!
684 ThisScheduler.Add (task);
687 //////////////////////////////////////////////////////////////////////////
690 // This code controls the directory crawl order
693 private DirectoryModel StupidWalk (DirectoryModel prev_best, DirectoryModel contender)
695 if (contender.NeedsCrawl) {
696 if (prev_best == null || prev_best.CompareTo (contender) < 0)
697 prev_best = contender;
700 foreach (DirectoryModel child in contender.Children)
701 prev_best = StupidWalk (prev_best, child);
703 return prev_best;
706 public DirectoryModel GetNextDirectoryToCrawl ()
708 DirectoryModel next_dir = null;
710 foreach (DirectoryModel root in roots)
711 next_dir = StupidWalk (next_dir, root);
713 return next_dir;
716 public void DoneCrawlingOneDirectory (DirectoryModel dir)
718 if (! dir.IsAttached)
719 return;
721 FileAttributes attr;
722 attr = FileAttributesStore.Read (dir.FullName);
724 // Don't mark ourselves; let the crawler redo us
725 if (attr == null)
726 return;
728 // We don't have to be super-careful about this since
729 // we only use the FileAttributes mtime on a directory
730 // to determine its initial state, not whether or not
731 // its index record is up-to-date.
732 attr.LastWriteTime = DateTime.UtcNow;
734 // ...but we do use this to decide which order directories get
735 // crawled in.
736 dir.LastCrawlTime = DateTime.UtcNow;
738 FileAttributesStore.Write (attr);
739 dir.MarkAsClean ();
742 public void MarkDirectoryAsUncrawlable (DirectoryModel dir)
744 if (! dir.IsAttached)
745 return;
747 // If we managed to get set up a watch on this directory,
748 // drop it.
749 if (dir.WatchHandle != null) {
750 event_backend.ForgetWatch (dir.WatchHandle);
751 dir.WatchHandle = null;
754 dir.MarkAsUncrawlable ();
757 public void Recrawl (string path)
759 // Try to find a directory model for the path specified
760 // so that we can re-crawl it.
761 DirectoryModel dir;
762 dir = GetDirectoryModelByPath (path);
764 bool path_is_registered = true;
766 if (dir == null) {
767 dir = GetDirectoryModelByPath (FileSystem.GetDirectoryNameRootOk (path));
768 path_is_registered = false;
770 if (dir == null) {
771 Logger.Log.Debug ("Unable to get directory-model for path: {0}", path);
772 return;
776 Logger.Log.Debug ("Re-crawling {0}", dir.FullName);
778 if (tree_crawl_task.Add (dir))
779 ThisScheduler.Add (tree_crawl_task);
781 if (path_is_registered)
782 Recrawl_Recursive (dir, DirectoryState.PossiblyClean);
784 ActivateFileCrawling ();
785 ActivateDirectoryCrawling ();
788 public void RecrawlEverything ()
790 Logger.Log.Debug ("Re-crawling all directories");
792 foreach (DirectoryModel root in roots)
793 Recrawl_Recursive (root, DirectoryState.PossiblyClean);
795 ActivateFileCrawling ();
796 ActivateDirectoryCrawling ();
799 private void Recrawl_Recursive (DirectoryModel dir, DirectoryState state)
801 dir.State = state;
802 tree_crawl_task.Add (dir);
803 foreach (DirectoryModel sub_dir in dir.Children)
804 Recrawl_Recursive (sub_dir, state);
807 private void ActivateFileCrawling ()
809 if (! file_crawl_task.IsActive)
810 ThisScheduler.Add (file_crawl_task);
813 private void ActivateDirectoryCrawling ()
815 if (! tree_crawl_task.IsActive)
816 ThisScheduler.Add (tree_crawl_task);
819 //////////////////////////////////////////////////////////////////////////
822 // File-related methods
825 private enum RequiredAction {
826 None,
827 Index,
828 Rename,
829 Forget
832 private RequiredAction DetermineRequiredAction (DirectoryModel dir,
833 string name,
834 FileAttributes attr,
835 out string last_known_path)
837 last_known_path = null;
839 string path;
840 path = Path.Combine (dir.FullName, name);
842 if (Debug)
843 Logger.Log.Debug ("*** What should we do with {0}?", path);
845 if (filter.Ignore (dir, name, false)) {
846 // If there are attributes on the file, we must have indexed
847 // it previously. Since we are ignoring it now, we should strip
848 // any file attributes from it.
849 if (attr != null) {
850 if (Debug)
851 Logger.Log.Debug ("*** Forget it: File is ignored but has attributes");
852 return RequiredAction.Forget;
854 if (Debug)
855 Logger.Log.Debug ("*** Do nothing: File is ignored");
856 return RequiredAction.None;
859 if (attr == null) {
860 if (Debug)
861 Logger.Log.Debug ("*** Index it: File has no attributes");
862 return RequiredAction.Index;
865 // FIXME: This does not take in to account that we might have a better matching filter to use now
866 // That, however, is kind of expensive to figure out since we'd have to do mime-sniffing and shit.
867 if (attr.FilterName != null && attr.FilterVersion > 0) {
868 int current_filter_version;
869 current_filter_version = FilterFactory.GetFilterVersion (attr.FilterName);
871 if (current_filter_version > attr.FilterVersion) {
872 if (Debug)
873 Logger.Log.Debug ("*** Index it: Newer filter version found for filter {0}", attr.FilterName);
874 return RequiredAction.Index;
878 Mono.Unix.Native.Stat stat;
879 try {
880 Mono.Unix.Native.Syscall.stat (path, out stat);
881 } catch (Exception ex) {
882 Logger.Log.Debug (ex, "Caught exception stat-ing {0}", path);
883 return RequiredAction.None;
886 DateTime last_write_time, last_attr_time;
887 last_write_time = DateTimeUtil.UnixToDateTimeUtc (stat.st_mtime);
888 last_attr_time = DateTimeUtil.UnixToDateTimeUtc (stat.st_ctime);
890 if (attr.LastWriteTime != last_write_time) {
891 if (Debug)
892 Logger.Log.Debug ("*** Index it: MTime has changed ({0} vs {1})",
893 DateTimeUtil.ToString (attr.LastWriteTime),
894 DateTimeUtil.ToString (last_write_time));
896 // If the file has been copied, it will have the
897 // original file's EAs. Thus we have to check to
898 // make sure that the unique id in the EAs actually
899 // belongs to this file. If not, replace it with a new one.
900 // (Thus touching & then immediately renaming a file can
901 // cause its unique id to change, which is less than
902 // optimal but probably can't be helped.)
903 last_known_path = UniqueIdToFullPath (attr.UniqueId);
904 if (path != last_known_path) {
905 if (Debug)
906 Logger.Log.Debug ("*** Name has also changed, assigning new unique id");
907 attr.UniqueId = Guid.NewGuid ();
910 return RequiredAction.Index;
913 // If the inode ctime is newer than the last time we last
914 // set file attributes, we might have been moved. We don't
915 // strictly compare times due to the fact that although
916 // setting xattrs changes the ctime, if we don't have write
917 // access our metadata will be stored in sqlite, and the
918 // ctime will be at some point in the past.
919 if (attr.LastAttrTime < last_attr_time) {
920 if (Debug)
921 Logger.Log.Debug ("*** CTime is newer, checking last known path ({0} vs {1})",
922 DateTimeUtil.ToString (attr.LastAttrTime),
923 DateTimeUtil.ToString (last_attr_time));
925 last_known_path = UniqueIdToFullPath (attr.UniqueId);
927 if (last_known_path == null) {
928 if (Debug)
929 Logger.Log.Debug ("*** Index it: CTime has changed, but can't determine last known path");
930 return RequiredAction.Index;
933 // If the name has changed but the mtime
934 // hasn't, the only logical conclusion is that
935 // the file has been renamed.
936 if (path != last_known_path) {
937 if (Debug)
938 Logger.Log.Debug ("*** Rename it: CTime and path has changed");
939 return RequiredAction.Rename;
943 // We don't have to do anything, which is always preferable.
944 if (Debug)
945 Logger.Log.Debug ("*** Do nothing");
946 return RequiredAction.None;
949 // Return an indexable that will do the right thing with a file
950 // (or null, if the right thing is to do nothing)
951 public Indexable GetCrawlingFileIndexable (DirectoryModel dir, string name)
953 string path;
954 path = Path.Combine (dir.FullName, name);
956 FileAttributes attr;
957 attr = FileAttributesStore.Read (path);
959 RequiredAction action;
960 string last_known_path;
961 action = DetermineRequiredAction (dir, name, attr, out last_known_path);
963 if (action == RequiredAction.None)
964 return null;
966 Guid unique_id;
967 if (attr != null)
968 unique_id = attr.UniqueId;
969 else
970 unique_id = Guid.NewGuid ();
972 Indexable indexable = null;
974 switch (action) {
976 case RequiredAction.Index:
977 indexable = FileToIndexable (path, unique_id, dir, true);
978 break;
980 case RequiredAction.Rename:
981 indexable = NewRenamingIndexable (name, unique_id, dir,
982 last_known_path);
983 break;
985 case RequiredAction.Forget:
986 FileAttributesStore.Drop (path);
988 break;
991 return indexable;
994 public void AddFile (DirectoryModel dir, string name)
996 string path;
997 path = Path.Combine (dir.FullName, name);
999 if (! File.Exists (path))
1000 return;
1002 if (FileSystem.IsSpecialFile (path))
1003 return;
1005 if (filter.Ignore (dir, name, false))
1006 return;
1008 // If this file already has extended attributes,
1009 // make sure that the name matches the file
1010 // that is in the index. If not, it could be
1011 // a copy of an already-indexed file and should
1012 // be assigned a new unique id.
1013 Guid unique_id = Guid.Empty;
1014 FileAttributes attr;
1015 attr = FileAttributesStore.Read (path);
1016 if (attr != null) {
1017 LuceneNameResolver.NameInfo info;
1018 info = name_resolver.GetNameInfoById (attr.UniqueId);
1019 if (info != null
1020 && info.Name == name
1021 && info.ParentId == dir.UniqueId)
1022 unique_id = attr.UniqueId;
1025 if (unique_id == Guid.Empty)
1026 unique_id = Guid.NewGuid ();
1028 RegisterId (name, dir, unique_id);
1030 Indexable indexable;
1031 indexable = FileToIndexable (path, unique_id, dir, false);
1033 if (indexable != null) {
1034 Scheduler.Task task;
1035 task = NewAddTask (indexable);
1036 task.Priority = Scheduler.Priority.Immediate;
1037 ThisScheduler.Add (task);
1041 public void RemoveFile (DirectoryModel dir, string name)
1043 // FIXME: We might as well remove it, even if it was being ignore.
1044 // Right?
1046 Guid unique_id;
1047 unique_id = NameAndParentToId (name, dir);
1048 if (unique_id == Guid.Empty) {
1049 Logger.Log.Info ("Could not resolve unique id of '{0}' in '{1}' for removal, it is probably already gone",
1050 name, dir.FullName);
1051 return;
1054 Uri uri, file_uri;
1055 uri = GuidFu.ToUri (unique_id);
1056 file_uri = UriFu.PathToFileUri (Path.Combine (dir.FullName, name));
1058 Indexable indexable;
1059 indexable = new Indexable (IndexableType.Remove, uri);
1060 indexable.LocalState ["RemovedUri"] = file_uri;
1062 Scheduler.Task task;
1063 task = NewAddTask (indexable);
1064 task.Priority = Scheduler.Priority.Immediate;
1065 ThisScheduler.Add (task);
1068 public void MoveFile (DirectoryModel old_dir, string old_name,
1069 DirectoryModel new_dir, string new_name)
1071 bool old_ignore, new_ignore;
1072 old_ignore = filter.Ignore (old_dir, old_name, false);
1073 new_ignore = filter.Ignore (new_dir, new_name, false);
1075 if (old_ignore && new_ignore)
1076 return;
1078 // If our ignore-state is changing, synthesize the appropriate
1079 // action.
1081 if (old_ignore && ! new_ignore) {
1082 AddFile (new_dir, new_name);
1083 return;
1086 if (! old_ignore && new_ignore) {
1087 RemoveFile (new_dir, new_name);
1088 return;
1091 // We need to find the file's unique id.
1092 // We can't look at the extended attributes w/o making
1093 // assumptions about whether they follow around the
1094 // file (EAs) or the path (sqlite)...
1095 Guid unique_id;
1096 unique_id = NameAndParentToId (old_name, old_dir);
1097 if (unique_id == Guid.Empty) {
1098 // If we can't find the unique ID, we have to
1099 // assume that the original file never made it
1100 // into the index --- thus we treat this as
1101 // an Add.
1102 AddFile (new_dir, new_name);
1103 return;
1106 RegisterId (new_name, new_dir, unique_id);
1108 string old_path;
1109 old_path = Path.Combine (old_dir.FullName, old_name);
1111 ForgetId (old_path);
1113 // FIXME: I think we need to be more conservative when we seen
1114 // events in a directory that has not been fully scanned, just to
1115 // avoid races. i.e. what if we are in the middle of crawling that
1116 // directory and haven't reached this file yet? Then the rename
1117 // will fail.
1118 Indexable indexable;
1119 indexable = NewRenamingIndexable (new_name,
1120 unique_id,
1121 new_dir,
1122 old_path);
1124 Scheduler.Task task;
1125 task = NewAddTask (indexable);
1126 task.Priority = Scheduler.Priority.Immediate;
1127 // Danger Will Robinson!
1128 // We need to use BlockUntilNoCollision to get the correct notifications
1129 // in a mv a b; mv b c; mv c a situation.
1130 // FIXME: And now AddType no longer exists
1131 ThisScheduler.Add (task);
1134 //////////////////////////////////////////////////////////////////////////
1136 // Configuration stuff
1138 public IList Roots {
1139 get {
1140 return roots_by_path;
1144 private void LoadConfiguration ()
1146 if (Conf.Indexing.IndexHomeDir)
1147 AddRoot (PathFinder.HomeDir);
1149 foreach (string root in Conf.Indexing.Roots)
1150 AddRoot (root);
1152 Conf.Subscribe (typeof (Conf.IndexingConfig), OnConfigurationChanged);
1155 private void OnConfigurationChanged (Conf.Section section)
1157 ArrayList roots_wanted = new ArrayList (Conf.Indexing.Roots);
1159 if (Conf.Indexing.IndexHomeDir)
1160 roots_wanted.Add (PathFinder.HomeDir);
1162 IList roots_to_add, roots_to_remove;
1163 ArrayFu.IntersectListChanges (roots_wanted, Roots, out roots_to_add, out roots_to_remove);
1165 foreach (string root in roots_to_remove)
1166 RemoveRoot (root);
1168 foreach (string root in roots_to_add)
1169 AddRoot (root);
1172 //////////////////////////////////////////////////////////////////////////
1175 // Our magic LuceneQueryable hooks
1178 override protected bool IsIndexing {
1179 // FIXME: There is a small race window here, between the starting
1180 // of the backend and when either of these tasks first starts
1181 // running. In reality it doesn't come up much, so it's not
1182 // urgent to fix.
1183 get { return file_crawl_task.IsActive || tree_crawl_task.IsActive; }
1186 override protected void PostAddHook (Indexable indexable, IndexerAddedReceipt receipt)
1188 // We don't have anything to do if we are dealing with a child indexable
1189 if (indexable.ParentUri != null)
1190 return;
1192 // If we just changed properties, remap to our *old* external Uri
1193 // to make notification work out property.
1194 if (indexable.Type == IndexableType.PropertyChange) {
1196 string last_known_path;
1197 last_known_path = (string) indexable.LocalState ["LastKnownPath"];
1198 receipt.Uri = UriFu.PathToFileUri (last_known_path);
1199 Logger.Log.Debug ("Last known path is {0}", last_known_path);
1201 // This rename is now in the index, so we no longer need to keep
1202 // track of the uid in memory.
1203 ForgetId (last_known_path);
1205 return;
1208 string path;
1209 path = (string) indexable.LocalState ["Path"];
1210 if (Debug)
1211 Log.Debug ("PostAddHook for {0} ({1}) and receipt uri={2}", indexable.Uri, path, receipt.Uri);
1213 // Remap the Uri so that change notification will work properly
1214 receipt.Uri = UriFu.PathToFileUri (path);
1217 override protected void PostRemoveHook (Indexable indexable, IndexerRemovedReceipt receipt)
1219 // Find the cached external Uri and remap the Uri in the receipt.
1220 // We have to do this to make change notification work.
1221 Uri external_uri;
1222 external_uri = indexable.LocalState ["RemovedUri"] as Uri;
1223 if (external_uri == null)
1224 throw new Exception ("No cached external Uri for " + receipt.Uri);
1225 receipt.Uri = external_uri;
1226 ForgetId (external_uri.LocalPath);
1229 override protected void PostChildrenIndexedHook (Indexable indexable,
1230 IndexerAddedReceipt receipt,
1231 DateTime Mtime)
1233 // There is no business here for children or if only the property changed
1234 if (indexable.Type == IndexableType.PropertyChange ||
1235 indexable.ParentUri != null)
1236 return;
1238 string path;
1239 path = (string) indexable.LocalState ["Path"];
1240 if (Debug)
1241 Log.Debug ("PostChildrenIndexedHook for {0} ({1}) and receipt uri={2}", indexable.Uri, path, receipt.Uri);
1243 ForgetId (path);
1245 DirectoryModel parent;
1246 parent = indexable.LocalState ["Parent"] as DirectoryModel;
1248 // The parent directory might have run away since we were indexed
1249 if (parent != null && ! parent.IsAttached)
1250 return;
1252 Guid unique_id;
1253 unique_id = GuidFu.FromUri (receipt.Uri);
1255 FileAttributes attr;
1256 attr = FileAttributesStore.ReadOrCreate (path, unique_id);
1258 attr.Path = path;
1259 // FIXME: Should timestamp be indexable.timestamp or parameter Mtime
1260 attr.LastWriteTime = indexable.Timestamp;
1262 attr.FilterName = receipt.FilterName;
1263 attr.FilterVersion = receipt.FilterVersion;
1265 if (indexable.LocalState ["IsWalkable"] != null) {
1266 string name;
1267 name = (string) indexable.LocalState ["Name"];
1269 if (! RegisterDirectory (name, parent, attr))
1270 return;
1273 FileAttributesStore.Write (attr);
1276 private bool RemapUri (Hit hit)
1278 // Store the hit's internal uri in a property
1279 Property prop;
1280 prop = Property.NewUnsearched ("beagle:InternalUri",
1281 UriFu.UriToEscapedString (hit.Uri));
1282 hit.AddProperty (prop);
1284 // Now assemble the path by looking at the parent and name
1285 string name = null, path, is_child;
1286 is_child = hit [Property.IsChildPropKey];
1288 if (is_child == "true")
1289 name = hit ["parent:" + Property.ExactFilenamePropKey];
1290 else
1291 name = hit [Property.ExactFilenamePropKey];
1293 if (name == null) {
1294 // If we don't have the filename property, we have to do a lookup
1295 // based on the guid. This happens with synthetic hits produced by
1296 // index listeners.
1297 Guid hit_id;
1298 hit_id = GuidFu.FromUri (hit.Uri);
1299 path = UniqueIdToFullPath (hit_id);
1300 } else {
1301 string parent_id_uri = null;
1302 parent_id_uri = hit [Property.ParentDirUriPropKey];
1303 if (parent_id_uri == null)
1304 parent_id_uri = hit ["parent:" + Property.ParentDirUriPropKey];
1305 if (parent_id_uri == null)
1306 return false;
1308 Guid parent_id;
1309 parent_id = GuidFu.FromUriString (parent_id_uri);
1311 path = ToFullPath (name, parent_id);
1312 if (path == null)
1313 Logger.Log.Debug ("Couldn't find path of file with name '{0}' and parent '{1}'",
1314 name, GuidFu.ToShortString (parent_id));
1317 if (Debug)
1318 Log.Debug ("Resolved {0} to {1}", hit.Uri, path);
1320 if (path != null) {
1321 hit.Uri = UriFu.PathToFileUri (path);
1322 return true;
1325 return false;
1328 // Hit filter: this handles our mapping from internal->external uris,
1329 // and checks to see if the file is still there.
1330 override protected bool HitFilter (Hit hit)
1332 Uri old_uri = hit.Uri;
1333 if (Debug)
1334 Log.Debug ("HitFilter ({0})", old_uri);
1336 if (! RemapUri (hit))
1337 return false;
1339 string path;
1340 path = hit.Uri.LocalPath;
1342 bool is_directory;
1343 bool exists = false;
1345 is_directory = hit.MimeType == "inode/directory";
1347 if (hit.MimeType == null && hit.Uri.IsFile && Directory.Exists (path)) {
1348 is_directory = true;
1349 exists = true;
1352 if (! exists) {
1353 if (is_directory)
1354 exists = Directory.Exists (path);
1355 else
1356 exists = File.Exists (path);
1359 // If the file doesn't exist, we do not schedule a removal and
1360 // return false. This is to avoid "losing" files if they are
1361 // in a directory that has been renamed but which we haven't
1362 // scanned yet... if we dropped them from the index, they would
1363 // never get re-indexed (or at least not until the next time they
1364 // were touched) since they would still be stamped with EAs
1365 // indicating they were up-to-date. And that would be bad.
1366 // FIXME: It would be safe if we were in a known state, right?
1367 // i.e. every DirectoryModel is clean.
1368 if (! exists)
1369 return false;
1371 // Fetch the parent directory model from our cache to do clever
1372 // filtering to determine if we're ignoring it or not.
1373 DirectoryModel parent;
1374 parent = GetDirectoryModelByPath (Path.GetDirectoryName (path));
1376 // If child indexable, attach the relative URI at the end
1377 // Relative URI starts with '#'
1378 string is_child = hit [Property.IsChildPropKey];
1379 string fragment = null;
1380 if (is_child == "true") {
1381 hit.Uri = UriFu.PathToFileUri (path, old_uri.Fragment);
1382 hit.ParentUri = UriFu.PathToFileUri (path);
1385 // Check the ignore status of the hit
1386 if (filter.Ignore (parent, Path.GetFileName (fragment == null ? path : fragment), is_directory))
1387 return false;
1389 return true;
1392 override public string GetSnippet (string [] query_terms, Hit hit)
1394 // Uri remapping from a hit is easy: the internal uri
1395 // is stored in a property.
1396 Uri uri = UriFu.EscapedStringToUri (hit ["beagle:InternalUri"]);
1398 string path = TextCache.UserCache.LookupPathRaw (uri);
1400 if (path == null)
1401 return null;
1403 // If this is self-cached, use the remapped Uri
1404 if (path == TextCache.SELF_CACHE_TAG)
1405 return SnippetFu.GetSnippetFromFile (query_terms, hit.Uri.LocalPath);
1407 return SnippetFu.GetSnippetFromTextCache (query_terms, path);
1410 override public void Start ()
1412 base.Start ();
1414 event_backend.Start (this);
1416 LoadConfiguration ();
1418 Logger.Log.Debug ("Done starting FileSystemQueryable");
1421 //////////////////////////////////////////////////////////////////////////
1423 // These are the methods that the IFileEventBackend implementations should
1424 // call in response to events.
1426 public void ReportEventInDirectory (string directory_name)
1428 DirectoryModel dir;
1429 dir = GetDirectoryModelByPath (directory_name);
1431 // If something goes wrong, just fail silently.
1432 if (dir == null)
1433 return;
1435 // We only use this information to prioritize the order in which
1436 // we crawl directories --- so if this directory doesn't
1437 // actually need to be crawled, we can safely ignore it.
1438 if (! dir.NeedsCrawl)
1439 return;
1441 dir.LastActivityTime = DateTime.Now;
1443 Logger.Log.Debug ("Saw event in '{0}'", directory_name);
1446 public void HandleAddEvent (string directory_name, string file_name, bool is_directory)
1448 Logger.Log.Debug ("*** Add '{0}' '{1}' {2}", directory_name, file_name,
1449 is_directory ? "(dir)" : "(file)");
1451 DirectoryModel dir;
1452 dir = GetDirectoryModelByPath (directory_name);
1453 if (dir == null) {
1454 Logger.Log.Warn ("HandleAddEvent failed: Couldn't find DirectoryModel for '{0}'", directory_name);
1455 return;
1458 if (is_directory)
1459 AddDirectory (dir, file_name);
1460 else
1461 AddFile (dir, file_name);
1464 public void HandleRemoveEvent (string directory_name, string file_name, bool is_directory)
1466 Logger.Log.Debug ("*** Remove '{0}' '{1}' {2}", directory_name, file_name,
1467 is_directory ? "(dir)" : "(file)");
1469 if (is_directory) {
1470 string path;
1471 path = Path.Combine (directory_name, file_name);
1473 DirectoryModel dir;
1474 dir = GetDirectoryModelByPath (path);
1475 if (dir == null) {
1476 Logger.Log.Warn ("HandleRemoveEvent failed: Couldn't find DirectoryModel for '{0}'", path);
1477 return;
1480 dir.WatchHandle = null;
1481 RemoveDirectory (dir);
1482 } else {
1483 DirectoryModel dir;
1484 dir = GetDirectoryModelByPath (directory_name);
1485 if (dir == null) {
1486 Logger.Log.Warn ("HandleRemoveEvent failed: Couldn't find DirectoryModel for '{0}'", directory_name);
1487 return;
1490 RemoveFile (dir, file_name);
1494 public void HandleMoveEvent (string old_directory_name, string old_file_name,
1495 string new_directory_name, string new_file_name,
1496 bool is_directory)
1498 Logger.Log.Debug ("*** Move '{0}' '{1}' -> '{2}' '{3}' {4}",
1499 old_directory_name, old_file_name,
1500 new_directory_name, new_file_name,
1501 is_directory ? "(dir)" : "(file)");
1503 if (is_directory) {
1504 DirectoryModel dir, new_parent;
1505 dir = GetDirectoryModelByPath (Path.Combine (old_directory_name, old_file_name));
1506 new_parent = GetDirectoryModelByPath (new_directory_name);
1507 MoveDirectory (dir, new_parent, new_file_name);
1508 return;
1509 } else {
1510 DirectoryModel old_dir, new_dir;
1511 old_dir = GetDirectoryModelByPath (old_directory_name);
1512 new_dir = GetDirectoryModelByPath (new_directory_name);
1513 MoveFile (old_dir, old_file_name, new_dir, new_file_name);
1517 public void HandleOverflowEvent ()
1519 Logger.Log.Debug ("Queue overflows suck");