Thumbnail file hits. Based on a patch from D Bera
[beagle.git] / beagled / FileSystemQueryable / FileSystemQueryable.cs
blob85eb62a99f91090789593a8a6e6b4ba53005d744
1 //
2 // FileSystemQueryable.cs
3 //
4 // Copyright (C) 2004 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
27 using System;
28 using System.Collections;
29 using System.IO;
30 using System.Reflection;
31 using System.Text;
32 using System.Threading;
34 using Beagle.Daemon;
35 using Beagle.Util;
37 namespace Beagle.Daemon.FileSystemQueryable {
39 [QueryableFlavor (Name="Files", Domain=QueryDomain.Local, RequireInotify=false)]
40 public class FileSystemQueryable : LuceneQueryable {
42 static public bool Debug = true;
44 private const string OldExternalUriPropKey = LuceneCommon.UnindexedNamespace + "OldExternalUri";
45 private const string SplitFilenamePropKey = "beagle:Filename";
46 public const string ExactFilenamePropKey = "beagle:ExactFilename";
47 public const string ParentDirUriPropKey = LuceneQueryingDriver.PrivateNamespace + "ParentDirUri";
48 public const string IsDirectoryPropKey = LuceneQueryingDriver.PrivateNamespace + "IsDirectory";
50 // History:
51 // 1: Initially set to force a reindex due to NameIndex changes.
52 // 2: Overhauled everything to use new lucene infrastructure.
53 const int MINOR_VERSION = 2;
55 private object big_lock = new object ();
57 private IFileEventBackend event_backend;
59 // This is the task that walks the tree structure
60 private TreeCrawlTask tree_crawl_task;
62 // This is the task that finds the next place that
63 // needs to be crawled in the tree and spawns off
64 // the appropriate IndexableGenerator.
65 private FileCrawlTask file_crawl_task;
67 private ArrayList roots = new ArrayList ();
68 private ArrayList roots_by_path = new ArrayList ();
70 // This is a cache of the external Uris of removed
71 // objects, keyed on their internal Uris. We use this
72 // to remap Uris on removes.
73 private Hashtable removed_uri_cache = UriFu.NewHashtable ();
75 private FileNameFilter filter;
77 // This is just a copy of the LuceneQueryable's QueryingDriver
78 // cast into the right type for doing internal->external Uri
79 // lookups.
80 private LuceneNameResolver name_resolver;
82 //////////////////////////////////////////////////////////////////////////
84 private class PendingInfo {
85 public Uri Uri; // an internal uid: uri
86 public string Path;
87 public bool IsDirectory;
88 public DateTime Mtime;
90 // This is set when we are adding a subdirectory to a
91 // given parent directory.
92 public DirectoryModel Parent;
94 public bool IsRoot { get { return Parent == null; } }
97 private Hashtable pending_info_cache = UriFu.NewHashtable ();
99 //////////////////////////////////////////////////////////////////////////
101 public FileSystemQueryable () : base ("FileSystemIndex", MINOR_VERSION)
103 // Set up our event backend
104 if (Inotify.Enabled) {
105 Logger.Log.Debug ("Starting Inotify Backend");
106 event_backend = new InotifyBackend ();
107 } else {
108 Logger.Log.Debug ("Starting FileSystemWatcher Backend");
109 event_backend = new FileSystemWatcherBackend ();
112 tree_crawl_task = new TreeCrawlTask (new TreeCrawlTask.Handler (AddDirectory));
113 file_crawl_task = new FileCrawlTask (this);
115 name_resolver = (LuceneNameResolver) Driver;
116 PreloadDirectoryNameInfo ();
118 // Setup our file-name filter
119 filter = new FileNameFilter (this);
121 // Do the right thing when paths expire
122 DirectoryModel.ExpireEvent +=
123 new DirectoryModel.ExpireHandler (ExpireDirectoryPath);
127 override protected IFileAttributesStore BuildFileAttributesStore ()
129 return new FileAttributesStore_Mixed (IndexDirectory, IndexFingerprint);
132 override protected LuceneQueryingDriver BuildLuceneQueryingDriver (string index_name,
133 int minor_version,
134 bool read_only_mode)
136 return new LuceneNameResolver (index_name, minor_version, read_only_mode);
139 public FileNameFilter Filter {
140 get { return filter; }
143 //////////////////////////////////////////////////////////////////////////
146 // This is where we build our Indexables
149 private static Indexable NewIndexable (Guid id)
151 // This used to do more. Maybe it will again someday.
152 Indexable indexable;
153 indexable = new Indexable (GuidFu.ToUri (id));
154 return indexable;
157 public static void AddStandardPropertiesToIndexable (Indexable indexable,
158 string name,
159 Guid parent_id,
160 bool mutable)
162 Property prop;
164 prop = Property.NewKeyword (ExactFilenamePropKey, name);
165 prop.IsMutable = mutable;
166 indexable.AddProperty (prop);
168 string str;
169 str = Path.GetFileNameWithoutExtension (name);
170 str = StringFu.FuzzyDivide (str);
171 prop = Property.New (SplitFilenamePropKey, str);
172 prop.IsMutable = mutable;
173 indexable.AddProperty (prop);
175 if (parent_id == Guid.Empty)
176 return;
178 str = GuidFu.ToUriString (parent_id);
179 // We use the uri here to recycle terms in the index,
180 // since each directory's uri will already be indexed.
181 prop = Property.NewKeyword (ParentDirUriPropKey, str);
182 prop.IsMutable = mutable;
183 indexable.AddProperty (prop);
186 public static Indexable DirectoryToIndexable (string path, Guid id, Guid parent_id)
188 Indexable indexable;
189 indexable = NewIndexable (id);
190 indexable.MimeType = "inode/directory";
191 indexable.NoContent = true;
192 indexable.Timestamp = Directory.GetLastWriteTime (path);
194 string name;
195 if (parent_id == Guid.Empty)
196 name = path;
197 else
198 name = Path.GetFileName (path);
199 AddStandardPropertiesToIndexable (indexable, name, parent_id, true);
201 Property prop;
202 prop = Property.NewBool (IsDirectoryPropKey, true);
203 prop.IsMutable = true; // we want this in the secondary index, for efficiency
204 indexable.AddProperty (prop);
206 return indexable;
209 public static Indexable FileToIndexable (string path,
210 Guid id,
211 Guid parent_id,
212 bool crawl_mode)
214 Indexable indexable;
215 indexable = NewIndexable (id);
216 indexable.ContentUri = UriFu.PathToFileUri (path);
217 indexable.Crawled = crawl_mode;
218 indexable.Filtering = Beagle.IndexableFiltering.Always;
220 AddStandardPropertiesToIndexable (indexable, Path.GetFileName (path), parent_id, true);
222 return indexable;
225 private static Indexable NewRenamingIndexable (string name,
226 Guid id,
227 Guid parent_id,
228 string last_known_path)
230 Indexable indexable;
231 indexable = new Indexable (GuidFu.ToUri (id));
232 indexable.PropertyChangesOnly = true;
234 AddStandardPropertiesToIndexable (indexable, name, parent_id, true);
236 Property prop;
237 prop = Property.NewKeyword (OldExternalUriPropKey,
238 StringFu.PathToQuotedFileUri (last_known_path));
239 prop.IsMutable = true; // since this is a property-change-only Indexable
240 indexable.AddProperty (prop);
242 return indexable;
245 //////////////////////////////////////////////////////////////////////////
248 // Mapping from directory ids to paths
251 private Hashtable dir_models_by_id = new Hashtable ();
252 private Hashtable name_info_by_id = new Hashtable ();
254 // We fall back to using the name information in the index
255 // until we've fully constructed our set of DirectoryModels.
256 private void PreloadDirectoryNameInfo ()
258 ICollection all;
259 all = name_resolver.GetAllDirectoryNameInfo ();
260 foreach (LuceneNameResolver.NameInfo info in all)
261 name_info_by_id [info.Id] = info;
264 // This only works for directories.
265 private string UniqueIdToDirectoryName (Guid id)
267 DirectoryModel dir;
268 dir = dir_models_by_id [id] as DirectoryModel;
269 if (dir != null)
270 return dir.FullName;
272 LuceneNameResolver.NameInfo info;
273 info = name_info_by_id [id] as LuceneNameResolver.NameInfo;
274 if (info != null) {
275 if (info.ParentId == Guid.Empty) // i.e. this is a root
276 return info.Name;
277 else {
278 string parent_name;
279 parent_name = UniqueIdToDirectoryName (info.ParentId);
280 if (parent_name == null)
281 return null;
282 return Path.Combine (parent_name, info.Name);
286 return null;
289 private string ToFullPath (string name, Guid parent_id)
291 // This is the correct behavior for roots.
292 if (parent_id == Guid.Empty)
293 return name;
295 string parent_name;
296 parent_name = UniqueIdToDirectoryName (parent_id);
297 if (parent_name == null)
298 return null;
300 return Path.Combine (parent_name, name);
303 // This works for both files and directories.
304 private string UniqueIdToFullPath (Guid id)
306 // First, check if it is a directory.
307 string path;
308 path = UniqueIdToDirectoryName (id);
309 if (path != null)
310 return path;
312 // If not, try to pull name information out of the index.
313 LuceneNameResolver.NameInfo info;
314 info = name_resolver.GetNameInfoById (id);
315 if (info == null)
316 return null;
317 return ToFullPath (info.Name, info.ParentId);
320 //////////////////////////////////////////////////////////////////////////
323 // Directory-related methods
326 private Hashtable dir_models_by_path = new Hashtable ();
328 private DirectoryModel GetDirectoryModelByPath (string path)
330 DirectoryModel dir;
331 dir = dir_models_by_path [path] as DirectoryModel;
332 if (dir != null)
333 return dir;
335 // Walk each root until we find the correct path
336 foreach (DirectoryModel root in roots) {
337 dir = root.WalkTree (path);
338 if (dir != null) {
339 dir_models_by_path [path] = dir;
340 break;
344 return dir;
347 private void ExpireDirectoryPath (string expired_path, Guid unique_id)
349 if (Debug)
350 Logger.Log.Debug ("Expired '{0}'", expired_path);
352 DirectoryModel dir = (DirectoryModel) dir_models_by_id [unique_id];
353 if (dir != null && dir.WatchHandle != null)
354 event_backend.ForgetWatch (dir.WatchHandle);
356 dir_models_by_path.Remove (expired_path);
357 dir_models_by_id.Remove (unique_id);
360 public void AddDirectory (DirectoryModel parent, string name)
362 // Ignore the stuff we want to ignore.
363 if (filter.Ignore (parent, name, true))
364 return;
366 if (parent != null && parent.HasChildWithName (name))
367 return;
369 string path;
370 path = (parent == null) ? name : Path.Combine (parent.FullName, name);
372 if (Debug)
373 Logger.Log.Debug ("Adding directory '{0}'", path, name);
375 if (! Directory.Exists (path)) {
376 Logger.Log.Error ("Can't add directory: '{0}' does not exist", path);
377 return;
380 FileAttributes attr;
381 attr = FileAttributesStore.Read (path);
383 // Note that we don't look at the mtime of a directory when
384 // deciding whether or not to index it.
385 bool needs_indexing = false;
386 if (attr == null) {
387 // If it has no attributes, it definitely needs
388 // indexing.
389 needs_indexing = true;
390 } else {
391 // Make sure that it still has the same name as before.
392 // If not, we need to re-index it.
393 // We can do this since we preloaded all of the name
394 // info in the directory via PreloadDirectoryNameInfo.
395 string last_known_name;
396 last_known_name = UniqueIdToDirectoryName (attr.UniqueId);
397 if (last_known_name != path) {
398 Logger.Log.Debug ("'{0}' now seems to be called '{1}'", last_known_name, path);
399 needs_indexing = true;
403 // If we can't descend into this directory, we want to
404 // index it but not build a DirectoryModel for it.
405 // FIXME: We should do the right thing when a
406 // directory's permissions change.
407 bool is_walkable;
408 is_walkable = DirectoryWalker.IsWalkable (path);
409 if (! is_walkable)
410 Logger.Log.Debug ("Can't walk '{0}'", path);
412 if (needs_indexing)
413 ScheduleDirectory (name, parent, attr, is_walkable);
414 else if (is_walkable)
415 RegisterDirectory (name, parent, attr);
418 public void AddRoot (string path)
420 path = StringFu.SanitizePath (path);
421 Logger.Log.Debug ("Adding root: {0}", path);
423 if (roots_by_path.Contains (path)) {
424 Logger.Log.Error ("Trying to add an existing root: {0}", path);
425 return;
428 // We need to have the path key in the roots hashtable
429 // for the filtering to work as we'd like before the root
430 // is actually added.
431 roots_by_path.Add (path);
433 AddDirectory (null, path);
436 public void RemoveRoot (string path)
438 Logger.Log.Debug ("Removing root: {0}", path);
440 if (! roots_by_path.Contains (path)) {
441 Logger.Log.Error ("Trying to remove a non-existing root: {0}", path);
442 return;
445 // Find our directory model for the root
446 DirectoryModel dir;
447 dir = GetDirectoryModelByPath (path);
449 if (dir == null) {
450 Logger.Log.Error ("Could not find directory-model for root: {0}", path);
451 return;
454 // FIXME: Make sure we're emptying the crawler task of any sub-directories
455 // to the root we're removing. It's not a big deal since we do an Ignore-check
456 // in there, but it would be nice.
458 roots_by_path.Remove (path);
459 roots.Remove (dir);
461 // Clean out the root from our directory cache.
462 RemoveDirectory (dir);
465 private void ScheduleDirectory (string name,
466 DirectoryModel parent,
467 FileAttributes attr,
468 bool is_walkable)
470 string path;
471 path = (parent == null) ? name : Path.Combine (parent.FullName, name);
473 Guid id;
474 id = (attr == null) ? Guid.NewGuid () : attr.UniqueId;
476 Guid parent_id;
477 parent_id = (parent == null) ? Guid.Empty : parent.UniqueId;
479 DateTime last_crawl;
480 last_crawl = (attr == null) ? DateTime.MinValue : attr.LastWriteTime;
482 Indexable indexable;
483 indexable = DirectoryToIndexable (path, id, parent_id);
485 PendingInfo info;
486 info = new PendingInfo ();
487 info.Uri = indexable.Uri;
488 info.Path = path;
489 info.Parent = parent;
490 info.Mtime = last_crawl;
492 // We only set the IsDirectory flag if it is actually
493 // walkable. The IsDirectory flag is what is used to
494 // decide whether or not to call RegisterDirectory
495 // in the PostAddHook. Thus non-walkable directories
496 // will be indexed but will not have DirectoryModels
497 // created for them.
498 info.IsDirectory = is_walkable;
500 pending_info_cache [info.Uri] = info;
502 Scheduler.Task task;
503 task = NewAddTask (indexable);
504 task.Priority = Scheduler.Priority.Delayed;
505 ThisScheduler.Add (task);
508 private void RegisterDirectory (string name, DirectoryModel parent, FileAttributes attr)
510 string path;
511 path = (parent == null) ? name : Path.Combine (parent.FullName, name);
513 if (Debug)
514 Logger.Log.Debug ("Registered directory '{0}' ({1})", path, attr.UniqueId);
516 DirectoryModel dir;
517 if (parent == null)
518 dir = DirectoryModel.NewRoot (big_lock, path, attr);
519 else
520 dir = parent.AddChild (name, attr);
522 if (Directory.GetLastWriteTime (path) > attr.LastWriteTime) {
523 dir.State = DirectoryState.Dirty;
524 Logger.Log.Debug ("'{0}' is dirty", path);
527 if (Debug) {
528 if (dir.IsRoot)
529 Logger.Log.Debug ("Created model '{0}'", dir.FullName);
530 else
531 Logger.Log.Debug ("Created model '{0}' with parent '{1}'", dir.FullName, dir.Parent.FullName);
534 // Add any roots we create to the list of roots
535 if (dir.IsRoot)
536 roots.Add (dir);
538 // Add the directory to our by-id hash, and remove any NameInfo
539 // we might have cached about it.
540 dir_models_by_id [dir.UniqueId] = dir;
541 name_info_by_id.Remove (dir.UniqueId);
543 // Start watching the directory.
544 dir.WatchHandle = event_backend.CreateWatch (path);
546 // Schedule this directory for crawling.
547 if (tree_crawl_task.Add (dir))
548 ThisScheduler.Add (tree_crawl_task);
550 // Make sure that our file crawling task is active,
551 // since presumably we now have something new to crawl.
552 ActivateFileCrawling ();
555 private void RemoveDirectory (DirectoryModel dir)
557 Uri uri;
558 uri = GuidFu.ToUri (dir.UniqueId);
560 // Cache a copy of our external Uri, so that we can
561 // easily remap it in the PostRemoveHook.
562 Uri external_uri;
563 external_uri = UriFu.PathToFileUri (dir.FullName);
564 removed_uri_cache [uri] = external_uri;
566 // Calling Remove will expire the path names,
567 // so name caches will be cleaned up accordingly.
568 dir.Remove ();
570 Scheduler.Task task;
571 task = NewRemoveTask (GuidFu.ToUri (dir.UniqueId));
572 task.Priority = Scheduler.Priority.Immediate;
573 ThisScheduler.Add (task);
576 public void RemoveDirectory (string path)
578 DirectoryModel dir = GetDirectoryModelByPath (path);
579 if (dir != null)
580 RemoveDirectory (dir);
583 private void MoveDirectory (DirectoryModel dir,
584 DirectoryModel new_parent, // or null if we are just renaming
585 string new_name)
587 // We'll need this later in order to generate the
588 // right change notification.
589 string old_path;
590 old_path = dir.FullName;
592 if (new_parent != null && new_parent != dir.Parent)
593 dir.MoveTo (new_parent, new_name);
594 else
595 dir.Name = new_name;
597 Guid parent_id;
598 parent_id = dir.IsRoot ? Guid.Empty : dir.Parent.UniqueId;
600 Indexable indexable;
601 indexable = NewRenamingIndexable (new_name,
602 dir.UniqueId,
603 parent_id,
604 old_path);
606 Scheduler.Task task;
607 task = NewAddTask (indexable);
608 task.Priority = Scheduler.Priority.Immediate;
609 // Danger Will Robinson!
610 // We need to use BlockUntilNoCollision to get the correct notifications
611 // in a mv a b; mv b c; mv c a situation.
612 ThisScheduler.Add (task, Scheduler.AddType.BlockUntilNoCollision);
615 //////////////////////////////////////////////////////////////////////////
618 // This code controls the directory crawl order
621 private DirectoryModel StupidWalk (DirectoryModel prev_best, DirectoryModel contender)
623 if (contender.NeedsCrawl) {
624 if (prev_best == null || prev_best.CompareTo (contender) < 0)
625 prev_best = contender;
628 foreach (DirectoryModel child in contender.Children)
629 prev_best = StupidWalk (prev_best, child);
631 return prev_best;
634 public DirectoryModel GetNextDirectoryToCrawl ()
636 DirectoryModel next_dir = null;
638 foreach (DirectoryModel root in roots)
639 next_dir = StupidWalk (next_dir, root);
641 return next_dir;
644 public void DoneCrawlingOneDirectory (DirectoryModel dir)
646 if (! dir.IsAttached)
647 return;
649 FileAttributes attr;
650 attr = FileAttributesStore.Read (dir.FullName);
652 // We don't have to be super-careful about this since
653 // we only use the FileAttributes mtime on a directory
654 // to determine its initial state, not whether or not
655 // its index record is up-to-date.
656 attr.LastWriteTime = DateTime.Now;
658 FileAttributesStore.Write (attr);
659 dir.MarkAsClean ();
662 public void Recrawl (string path)
664 // Try to find a directory model for the path specified
665 // so that we can re-crawl it.
666 DirectoryModel dir;
667 dir = GetDirectoryModelByPath (path);
669 bool path_is_registered = true;
671 if (dir == null) {
672 dir = GetDirectoryModelByPath (Path.GetDirectoryName (path));
673 path_is_registered = false;
675 if (dir == null) {
676 Logger.Log.Debug ("Unable to get directory-model for path: {0}", path);
677 return;
681 Logger.Log.Debug ("Re-crawling {0}", dir.FullName);
683 if (tree_crawl_task.Add (dir))
684 ThisScheduler.Add (tree_crawl_task);
686 if (path_is_registered)
687 Recrawl_Recursive (dir, DirectoryState.PossiblyClean);
689 ActivateFileCrawling ();
690 ActivateDirectoryCrawling ();
693 public void RecrawlEverything ()
695 Logger.Log.Debug ("Re-crawling all directories");
697 foreach (DirectoryModel root in roots)
698 Recrawl_Recursive (root, DirectoryState.PossiblyClean);
700 ActivateFileCrawling ();
701 ActivateDirectoryCrawling ();
704 private void Recrawl_Recursive (DirectoryModel dir, DirectoryState state)
706 dir.State = state;
707 tree_crawl_task.Add (dir);
708 foreach (DirectoryModel sub_dir in dir.Children)
709 Recrawl_Recursive (sub_dir, state);
712 private void ActivateFileCrawling ()
714 if (! file_crawl_task.IsActive)
715 ThisScheduler.Add (file_crawl_task);
718 private void ActivateDirectoryCrawling ()
720 if (! tree_crawl_task.IsActive)
721 ThisScheduler.Add (tree_crawl_task);
724 //////////////////////////////////////////////////////////////////////////
727 // File-related methods
730 private enum RequiredAction {
731 None,
732 Index,
733 Rename,
734 Forget
737 private RequiredAction DetermineRequiredAction (DirectoryModel dir,
738 string name,
739 FileAttributes attr,
740 out string last_known_path,
741 out DateTime mtime)
743 last_known_path = null;
744 mtime = DateTime.MinValue;
746 string path;
747 path = Path.Combine (dir.FullName, name);
749 if (Debug)
750 Logger.Log.Debug ("*** What should we do with {0}?", path);
752 if (filter.Ignore (dir, name, false)) {
753 // If there are attributes on the file, we must have indexed
754 // it previously. Since we are ignoring it now, we should strip
755 // any file attributes from it.
756 if (attr != null) {
757 if (Debug)
758 Logger.Log.Debug ("*** Forget it: File is ignored but has attributes");
759 return RequiredAction.Forget;
761 if (Debug)
762 Logger.Log.Debug ("*** Do nothing: File is ignored");
763 return RequiredAction.None;
766 if (attr == null) {
767 if (Debug)
768 Logger.Log.Debug ("*** Index it: File has no attributes");
769 return RequiredAction.Index;
772 // FIXME: This does not take in to account that we might have a better matching filter to use now
773 // That, however, is kind of expensive to figure out since we'd have to do mime-sniffing and shit.
774 if (attr.FilterName != null && attr.FilterVersion > 0) {
775 int current_filter_version;
776 current_filter_version = FilterFactory.GetFilterVersion (attr.FilterName);
778 if (current_filter_version > attr.FilterVersion) {
779 if (Debug)
780 Logger.Log.Debug ("*** Index it: Newer filter version found for filter {0}", attr.FilterName);
781 return RequiredAction.Index;
785 Mono.Posix.Stat stat;
786 try {
787 Mono.Posix.Syscall.stat (path, out stat);
788 } catch (Exception ex) {
789 Logger.Log.Debug ("Caught exception stat-ing {0}", path);
790 Logger.Log.Debug (ex);
791 return RequiredAction.None;
793 mtime = stat.MTime;
795 if (! DatesAreTheSame (attr.LastWriteTime, mtime)) {
796 if (Debug)
797 Logger.Log.Debug ("*** Index it: MTime has changed");
799 // If the file has been copied, it will have the
800 // original file's EAs. Thus we have to check to
801 // make sure that the unique id in the EAs actually
802 // belongs to this file. If not, replace it with a new one.
803 // (Thus touching & then immediately renaming a file can
804 // cause its unique id to change, which is less than
805 // optimal but probably can't be helped.)
806 last_known_path = UniqueIdToFullPath (attr.UniqueId);
807 if (path != last_known_path) {
808 if (Debug)
809 Logger.Log.Debug ("*** Name has also changed, assigning new unique id");
810 attr.UniqueId = Guid.NewGuid ();
813 return RequiredAction.Index;
816 // If the inode ctime is different that the time we last
817 // set file attributes, we might have been moved or copied.
818 if (! DatesAreTheSame (attr.LastAttrTime, stat.CTime)) {
819 if (Debug)
820 Logger.Log.Debug ("*** CTime has changed, checking last known path");
822 last_known_path = UniqueIdToFullPath (attr.UniqueId);
824 if (last_known_path == null) {
825 if (Debug)
826 Logger.Log.Debug ("*** Index it: CTime has changed, but can't determine last known path");
827 return RequiredAction.Index;
830 // If the name has changed but the mtime
831 // hasn't, the only logical conclusion is that
832 // the file has been renamed.
833 if (path != last_known_path) {
834 if (Debug)
835 Logger.Log.Debug ("*** Rename it: CTime and path has changed");
836 return RequiredAction.Rename;
840 // We don't have to do anything, which is always preferable.
841 if (Debug)
842 Logger.Log.Debug ("*** Do nothing");
843 return RequiredAction.None;
846 // This works around a mono bug: the DateTimes that we get out of stat
847 // don't correctly account for daylight savings time. We declare the two
848 // dates to be equal if:
849 // (1) They actually are equal
850 // (2) The first date is exactly one hour ahead of the second
851 static private bool DatesAreTheSame (DateTime system_io_datetime, DateTime stat_datetime)
853 const double epsilon = 1e-5;
854 double t = (system_io_datetime - stat_datetime).TotalSeconds;
855 return Math.Abs (t) < epsilon || Math.Abs (t-3600) < epsilon;
858 // Return an indexable that will do the right thing with a file
859 // (or null, if the right thing is to do nothing)
860 public Indexable GetCrawlingFileIndexable (DirectoryModel dir, string name)
862 string path;
863 path = Path.Combine (dir.FullName, name);
865 FileAttributes attr;
866 attr = FileAttributesStore.Read (path);
868 RequiredAction action;
869 string last_known_path;
870 DateTime mtime;
871 action = DetermineRequiredAction (dir, name, attr, out last_known_path, out mtime);
873 if (action == RequiredAction.None)
874 return null;
876 Guid unique_id;
877 if (attr != null)
878 unique_id = attr.UniqueId;
879 else
880 unique_id = Guid.NewGuid ();
882 Indexable indexable = null;
884 switch (action) {
886 case RequiredAction.Index:
887 indexable = FileToIndexable (path, unique_id, dir.UniqueId, true);
888 if (mtime == DateTime.MinValue)
889 mtime = File.GetLastWriteTime (path);
890 break;
892 case RequiredAction.Rename:
893 indexable = NewRenamingIndexable (name, unique_id, dir.UniqueId,
894 last_known_path);
895 break;
897 case RequiredAction.Forget:
898 FileAttributesStore.Drop (path);
900 break;
903 if (indexable != null) {
904 PendingInfo info;
905 info = new PendingInfo ();
906 info.Uri = indexable.Uri;
907 info.Path = path;
908 info.IsDirectory = false;
909 info.Mtime = mtime;
910 info.Parent = dir;
911 pending_info_cache [info.Uri] = info;
914 return indexable;
917 public void AddFile (DirectoryModel dir, string name)
919 string path;
920 path = Path.Combine (dir.FullName, name);
922 if (! File.Exists (path))
923 return;
925 if (filter.Ignore (dir, name, false))
926 return;
928 FileAttributes attr;
929 attr = FileAttributesStore.Read (path);
931 Guid unique_id;
932 unique_id = (attr != null) ? attr.UniqueId : Guid.NewGuid ();
934 Indexable indexable;
935 indexable = FileToIndexable (path, unique_id, dir.UniqueId, false);
937 PendingInfo info;
938 info = new PendingInfo ();
939 info.Uri = indexable.Uri;
940 info.Path = path;
941 info.IsDirectory = false;
942 info.Mtime = File.GetLastWriteTime (path);
943 info.Parent = dir;
944 pending_info_cache [info.Uri] = info;
946 Scheduler.Task task;
947 task = NewAddTask (indexable);
948 task.Priority = Scheduler.Priority.Immediate;
949 ThisScheduler.Add (task);
952 public void RemoveFile (DirectoryModel dir, string name)
954 // FIXME: We might as well remove it, even if it was being ignore.
955 // Right?
957 Guid unique_id;
958 unique_id = name_resolver.GetIdByNameAndParentId (name, dir.UniqueId);
959 if (unique_id == Guid.Empty) {
960 Logger.Log.Warn ("Couldn't find unique id for '{0}' in '{1}' ({2})",
961 name, dir.FullName, dir.UniqueId);
962 return;
965 Uri uri, file_uri;
966 uri = GuidFu.ToUri (unique_id);
967 file_uri = UriFu.PathToFileUri (Path.Combine (dir.FullName, name));
968 removed_uri_cache [uri] = file_uri;
970 Scheduler.Task task;
971 task = NewRemoveTask (uri);
972 task.Priority = Scheduler.Priority.Immediate;
973 ThisScheduler.Add (task);
976 public void MoveFile (DirectoryModel old_dir, string old_name,
977 DirectoryModel new_dir, string new_name)
979 bool old_ignore, new_ignore;
980 old_ignore = filter.Ignore (old_dir, old_name, false);
981 new_ignore = filter.Ignore (new_dir, new_name, false);
983 if (old_ignore && new_ignore)
984 return;
986 // If our ignore-state is changing, synthesize the appropriate
987 // action.
989 if (old_ignore && ! new_ignore) {
990 AddFile (new_dir, new_name);
991 return;
994 if (! old_ignore && new_ignore) {
995 RemoveFile (new_dir, new_name);
996 return;
999 string old_path;
1000 old_path = Path.Combine (old_dir.FullName, old_name);
1002 // We need to find the file's unique id.
1003 // We can't look at the extended attributes w/o making
1004 // assumptions about whether they follow around the
1005 // file (EAs) or the path (sqlite)... so we go straight
1006 // to the name resolver.
1008 Guid unique_id;
1009 unique_id = name_resolver.GetIdByNameAndParentId (old_name, old_dir.UniqueId);
1010 if (unique_id == Guid.Empty) {
1011 Logger.Log.Warn ("Couldn't find unique id for '{0}' in '{1}' ({2})",
1012 old_name, old_dir.FullName, old_dir.UniqueId);
1013 return;
1016 // FIXME: I think we need to be more conservative when we seen
1017 // events in a directory that has not been fully scanned, just to
1018 // avoid races. i.e. what if we are in the middle of crawling that
1019 // directory and haven't reached this file yet? Then the rename
1020 // will fail.
1021 Indexable indexable;
1022 indexable = NewRenamingIndexable (new_name,
1023 unique_id,
1024 new_dir.UniqueId,
1025 old_path);
1027 Scheduler.Task task;
1028 task = NewAddTask (indexable);
1029 task.Priority = Scheduler.Priority.Immediate;
1030 // Danger Will Robinson!
1031 // We need to use BlockUntilNoCollision to get the correct notifications
1032 // in a mv a b; mv b c; mv c a situation.
1033 ThisScheduler.Add (task, Scheduler.AddType.BlockUntilNoCollision);
1036 //////////////////////////////////////////////////////////////////////////
1038 // Configuration stuff
1040 public IList Roots {
1041 get {
1042 return roots_by_path;
1046 private void LoadConfiguration ()
1048 if (Conf.Indexing.IndexHomeDir)
1049 AddRoot (PathFinder.HomeDir);
1051 foreach (string root in Conf.Indexing.Roots)
1052 AddRoot (root);
1054 Conf.Subscribe (typeof (Conf.IndexingConfig), OnConfigurationChanged);
1057 private void OnConfigurationChanged (Conf.Section section)
1059 ArrayList roots_wanted = new ArrayList (Conf.Indexing.Roots);
1061 if (Conf.Indexing.IndexHomeDir)
1062 roots_wanted.Add (PathFinder.HomeDir);
1064 IList roots_to_add, roots_to_remove;
1065 ArrayFu.IntersectListChanges (roots_wanted, Roots, out roots_to_add, out roots_to_remove);
1067 foreach (string root in roots_to_remove)
1068 RemoveRoot (root);
1070 foreach (string root in roots_to_add)
1071 AddRoot (root);
1074 //////////////////////////////////////////////////////////////////////////
1077 // Our magic LuceneQueryable hooks
1080 override protected void PostAddHook (IndexerAddedReceipt receipt)
1082 // If we just changed properties, remap to our *old* external Uri
1083 // to make notification work out property.
1084 if (receipt.PropertyChangesOnly) {
1086 // FIXME: This linear search sucks --- we should
1087 // be able to use the fact that they are sorted.
1088 foreach (Property prop in receipt.Properties) {
1089 if (prop.Key == OldExternalUriPropKey) {
1090 receipt.Uri = UriFu.UriStringToUri (prop.Value);
1091 break;
1095 return;
1098 PendingInfo info;
1099 info = pending_info_cache [receipt.Uri] as PendingInfo;
1100 pending_info_cache.Remove (receipt.Uri);
1102 // The parent directory might have run away since we were indexed
1103 if (info.Parent != null && !info.Parent.IsAttached)
1104 return;
1106 Guid unique_id;
1107 unique_id = GuidFu.FromUri (receipt.Uri);
1109 FileAttributes attr;
1110 attr = FileAttributesStore.ReadOrCreate (info.Path, unique_id);
1111 attr.Path = info.Path;
1112 attr.LastWriteTime = info.Mtime;
1114 attr.FilterName = receipt.FilterName;
1115 attr.FilterVersion = receipt.FilterVersion;
1117 if (info.IsDirectory) {
1118 string name;
1119 if (info.Parent == null)
1120 name = info.Path;
1121 else
1122 name = Path.GetFileName (info.Path);
1123 RegisterDirectory (name, info.Parent, attr);
1126 FileAttributesStore.Write (attr);
1128 // Remap the Uri so that change notification will work properly
1129 receipt.Uri = UriFu.PathToFileUri (info.Path);
1132 override protected void PostRemoveHook (IndexerRemovedReceipt receipt)
1134 // Find the cached external Uri and remap the Uri in the receipt.
1135 // We have to do this to make change notification work.
1136 Uri external_uri;
1137 external_uri = removed_uri_cache [receipt.Uri] as Uri;
1138 if (external_uri == null)
1139 throw new Exception ("No cached external Uri for " + receipt.Uri);
1141 removed_uri_cache.Remove (receipt.Uri);
1143 receipt.Uri = external_uri;
1146 // Hit filter: this handles our mapping from internal->external uris,
1147 // and checks to see if the file is still there.
1148 override protected bool HitFilter (Hit hit)
1150 string name, parent_id_uri;
1151 name = hit [ExactFilenamePropKey];
1152 if (name == null)
1153 return false;
1154 parent_id_uri = hit [ParentDirUriPropKey];
1155 if (parent_id_uri == null)
1156 return false;
1158 Guid parent_id;
1159 parent_id = GuidFu.FromUriString (parent_id_uri);
1161 string path;
1162 path = ToFullPath (name, parent_id);
1164 #if false
1165 if (Debug)
1166 Logger.Log.Debug ("HitFilter mapped '{0}' {1} to '{2}'",
1167 name, parent_id, path);
1168 #endif
1170 bool is_directory = (hit.MimeType == "inode/directory");
1172 bool exists;
1173 if (is_directory)
1174 exists = Directory.Exists (path);
1175 else
1176 exists = File.Exists (path);
1178 // If the file doesn't exist, we do not schedule a removal and
1179 // return false. This is to avoid "losing" files if they are
1180 // in a directory that has been renamed but which we haven't
1181 // scanned yet... if we dropped them from the index, they would
1182 // never get re-indexed (or at least not until the next time they
1183 // were touched) since they would still be stamped with EAs
1184 // indicating they were up-to-date. And that would be bad.
1185 // FIXME: It would be safe if we were in a known state, right?
1186 // i.e. every DirectoryModel is clean.
1187 if (! exists)
1188 return false;
1190 // Fetch the parent directory model from our cache to do clever
1191 // filterint to determine if we're ignoring it or not.
1192 DirectoryModel parent;
1193 parent = GetDirectoryModelByPath (Path.GetDirectoryName (path));
1195 // Check the ignore status of the hit
1196 if (filter.Ignore (parent, Path.GetFileName (path), is_directory))
1197 return false;
1199 // Store the hit's internal uri in a property
1200 Property prop;
1201 prop = Property.NewKeyword ("beagle:InternalUri",
1202 UriFu.UriToSerializableString (hit.Uri));
1203 hit.AddProperty (prop);
1205 // Remap the Uri
1206 hit.Uri = UriFu.PathToFileUri (path);
1208 return true;
1211 override public string GetSnippet (string [] query_terms, Hit hit)
1213 // Uri remapping from a hit is easy: the internal uri
1214 // is stored in a property.
1215 Uri uri;
1216 uri = UriFu.UriStringToUri (hit ["beagle:InternalUri"]);
1218 string path;
1219 path = TextCache.UserCache.LookupPathRaw (uri);
1221 if (path == null)
1222 return null;
1224 // If this is self-cached, use the remapped Uri
1225 if (path == TextCache.SELF_CACHE_TAG)
1226 path = hit.Uri.LocalPath;
1228 return SnippetFu.GetSnippetFromFile (query_terms, path);
1231 override public void Start ()
1233 base.Start ();
1235 event_backend.Start (this);
1237 LoadConfiguration ();
1239 Logger.Log.Debug ("Done starting FileSystemQueryable");
1242 //////////////////////////////////////////////////////////////////////////
1244 // These are the methods that the IFileEventBackend implementations should
1245 // call in response to events.
1247 public void ReportEventInDirectory (string directory_name)
1249 DirectoryModel dir;
1250 dir = GetDirectoryModelByPath (directory_name);
1252 // We only use this information to prioritize the order in which
1253 // we crawl directories --- so if this directory doesn't
1254 // actually need to be crawled, we can safely ignore it.
1255 if (! dir.NeedsCrawl)
1256 return;
1258 dir.LastActivityTime = DateTime.Now;
1260 Logger.Log.Debug ("Saw event in '{0}'", directory_name);
1263 public void HandleAddEvent (string directory_name, string file_name, bool is_directory)
1265 Logger.Log.Debug ("*** Add '{0}' '{1}' {2}", directory_name, file_name,
1266 is_directory ? "(dir)" : "(file)");
1268 DirectoryModel dir;
1269 dir = GetDirectoryModelByPath (directory_name);
1270 if (dir == null) {
1271 Logger.Log.Warn ("HandleAddEvent failed: Couldn't find DirectoryModel for '{0}'", directory_name);
1272 return;
1275 if (is_directory)
1276 AddDirectory (dir, file_name);
1277 else
1278 AddFile (dir, file_name);
1281 public void HandleRemoveEvent (string directory_name, string file_name, bool is_directory)
1283 Logger.Log.Debug ("*** Remove '{0}' '{1}' {2}", directory_name, file_name,
1284 is_directory ? "(dir)" : "(file)");
1286 if (is_directory) {
1287 string path;
1288 path = Path.Combine (directory_name, file_name);
1290 DirectoryModel dir;
1291 dir = GetDirectoryModelByPath (path);
1292 if (dir == null) {
1293 Logger.Log.Warn ("HandleRemoveEvent failed: Couldn't find DirectoryModel for '{0}'", path);
1294 return;
1297 dir.WatchHandle = null;
1298 RemoveDirectory (dir);
1299 } else {
1300 DirectoryModel dir;
1301 dir = GetDirectoryModelByPath (directory_name);
1302 if (dir == null) {
1303 Logger.Log.Warn ("HandleRemoveEvent failed: Couldn't find DirectoryModel for '{0}'", directory_name);
1304 return;
1307 RemoveFile (dir, file_name);
1311 public void HandleMoveEvent (string old_directory_name, string old_file_name,
1312 string new_directory_name, string new_file_name,
1313 bool is_directory)
1315 Logger.Log.Debug ("*** Move '{0}' '{1}' -> '{2}' '{3}' {4}",
1316 old_directory_name, old_file_name,
1317 new_directory_name, new_file_name,
1318 is_directory ? "(dir)" : "(file)");
1320 if (is_directory) {
1321 DirectoryModel dir, new_parent;
1322 dir = GetDirectoryModelByPath (Path.Combine (old_directory_name, old_file_name));
1323 new_parent = GetDirectoryModelByPath (new_directory_name);
1324 MoveDirectory (dir, new_parent, new_file_name);
1325 return;
1326 } else {
1327 DirectoryModel old_dir, new_dir;
1328 old_dir = GetDirectoryModelByPath (new_directory_name);
1329 new_dir = GetDirectoryModelByPath (new_directory_name);
1330 MoveFile (old_dir, old_file_name, new_dir, new_file_name);
1334 public void HandleOverflowEvent ()
1336 Logger.Log.Debug ("Queue overflows suck");