2 // FileSystemQueryable.cs
4 // Copyright (C) 2004 Novell, Inc.
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
28 using System
.Collections
;
30 using System
.Reflection
;
32 using System
.Threading
;
37 namespace Beagle
.Daemon
.FileSystemQueryable
{
39 [QueryableFlavor (Name
="Files", Domain
=QueryDomain
.Local
, RequireInotify
=false)]
40 public class FileSystemQueryable
: LuceneQueryable
{
42 static public bool Debug
= true;
44 private const string OldExternalUriPropKey
= LuceneCommon
.UnindexedNamespace
+ "OldExternalUri";
45 private const string SplitFilenamePropKey
= "beagle:Filename";
46 public const string ExactFilenamePropKey
= "beagle:ExactFilename";
47 public const string ParentDirUriPropKey
= LuceneQueryingDriver
.PrivateNamespace
+ "ParentDirUri";
48 public const string IsDirectoryPropKey
= LuceneQueryingDriver
.PrivateNamespace
+ "IsDirectory";
51 // 1: Initially set to force a reindex due to NameIndex changes.
52 // 2: Overhauled everything to use new lucene infrastructure.
53 const int MINOR_VERSION
= 2;
55 private object big_lock
= new object ();
57 private IFileEventBackend event_backend
;
59 // This is the task that walks the tree structure
60 private TreeCrawlTask tree_crawl_task
;
62 // This is the task that finds the next place that
63 // needs to be crawled in the tree and spawns off
64 // the appropriate IndexableGenerator.
65 private FileCrawlTask file_crawl_task
;
67 private ArrayList roots
= new ArrayList ();
68 private ArrayList roots_by_path
= new ArrayList ();
70 // This is a cache of the external Uris of removed
71 // objects, keyed on their internal Uris. We use this
72 // to remap Uris on removes.
73 private Hashtable removed_uri_cache
= UriFu
.NewHashtable ();
75 private FileNameFilter filter
;
77 // This is just a copy of the LuceneQueryable's QueryingDriver
78 // cast into the right type for doing internal->external Uri
80 private LuceneNameResolver name_resolver
;
82 //////////////////////////////////////////////////////////////////////////
84 private class PendingInfo
{
85 public Uri Uri
; // an internal uid: uri
87 public bool IsDirectory
;
88 public DateTime Mtime
;
90 // This is set when we are adding a subdirectory to a
91 // given parent directory.
92 public DirectoryModel Parent
;
94 public bool IsRoot { get { return Parent == null; }
}
97 private Hashtable pending_info_cache
= UriFu
.NewHashtable ();
99 //////////////////////////////////////////////////////////////////////////
101 public FileSystemQueryable () : base ("FileSystemIndex", MINOR_VERSION
)
103 // Set up our event backend
104 if (Inotify
.Enabled
) {
105 Logger
.Log
.Debug ("Starting Inotify Backend");
106 event_backend
= new InotifyBackend ();
108 Logger
.Log
.Debug ("Starting FileSystemWatcher Backend");
109 event_backend
= new FileSystemWatcherBackend ();
112 tree_crawl_task
= new TreeCrawlTask (new TreeCrawlTask
.Handler (AddDirectory
));
113 file_crawl_task
= new FileCrawlTask (this);
115 name_resolver
= (LuceneNameResolver
) Driver
;
116 PreloadDirectoryNameInfo ();
118 // Setup our file-name filter
119 filter
= new FileNameFilter (this);
121 // Do the right thing when paths expire
122 DirectoryModel
.ExpireEvent
+=
123 new DirectoryModel
.ExpireHandler (ExpireDirectoryPath
);
127 override protected IFileAttributesStore
BuildFileAttributesStore ()
129 return new FileAttributesStore_Mixed (IndexDirectory
, IndexFingerprint
);
132 override protected LuceneQueryingDriver
BuildLuceneQueryingDriver (string index_name
,
136 return new LuceneNameResolver (index_name
, minor_version
, read_only_mode
);
139 public FileNameFilter Filter
{
140 get { return filter; }
143 //////////////////////////////////////////////////////////////////////////
146 // This is where we build our Indexables
149 private static Indexable
NewIndexable (Guid id
)
151 // This used to do more. Maybe it will again someday.
153 indexable
= new Indexable (GuidFu
.ToUri (id
));
157 public static void AddStandardPropertiesToIndexable (Indexable indexable
,
164 prop
= Property
.NewKeyword (ExactFilenamePropKey
, name
);
165 prop
.IsMutable
= mutable
;
166 indexable
.AddProperty (prop
);
169 str
= Path
.GetFileNameWithoutExtension (name
);
170 str
= StringFu
.FuzzyDivide (str
);
171 prop
= Property
.New (SplitFilenamePropKey
, str
);
172 prop
.IsMutable
= mutable
;
173 indexable
.AddProperty (prop
);
175 if (parent_id
== Guid
.Empty
)
178 str
= GuidFu
.ToUriString (parent_id
);
179 // We use the uri here to recycle terms in the index,
180 // since each directory's uri will already be indexed.
181 prop
= Property
.NewKeyword (ParentDirUriPropKey
, str
);
182 prop
.IsMutable
= mutable
;
183 indexable
.AddProperty (prop
);
186 public static Indexable
DirectoryToIndexable (string path
, Guid id
, Guid parent_id
)
189 indexable
= NewIndexable (id
);
190 indexable
.MimeType
= "inode/directory";
191 indexable
.NoContent
= true;
192 indexable
.Timestamp
= Directory
.GetLastWriteTime (path
);
195 if (parent_id
== Guid
.Empty
)
198 name
= Path
.GetFileName (path
);
199 AddStandardPropertiesToIndexable (indexable
, name
, parent_id
, true);
202 prop
= Property
.NewBool (IsDirectoryPropKey
, true);
203 prop
.IsMutable
= true; // we want this in the secondary index, for efficiency
204 indexable
.AddProperty (prop
);
209 public static Indexable
FileToIndexable (string path
,
215 indexable
= NewIndexable (id
);
216 indexable
.ContentUri
= UriFu
.PathToFileUri (path
);
217 indexable
.Crawled
= crawl_mode
;
218 indexable
.Filtering
= Beagle
.IndexableFiltering
.Always
;
220 AddStandardPropertiesToIndexable (indexable
, Path
.GetFileName (path
), parent_id
, true);
225 private static Indexable
NewRenamingIndexable (string name
,
228 string last_known_path
)
231 indexable
= new Indexable (GuidFu
.ToUri (id
));
232 indexable
.PropertyChangesOnly
= true;
234 AddStandardPropertiesToIndexable (indexable
, name
, parent_id
, true);
237 prop
= Property
.NewKeyword (OldExternalUriPropKey
,
238 StringFu
.PathToQuotedFileUri (last_known_path
));
239 prop
.IsMutable
= true; // since this is a property-change-only Indexable
240 indexable
.AddProperty (prop
);
245 //////////////////////////////////////////////////////////////////////////
248 // Mapping from directory ids to paths
251 private Hashtable dir_models_by_id
= new Hashtable ();
252 private Hashtable name_info_by_id
= new Hashtable ();
254 // We fall back to using the name information in the index
255 // until we've fully constructed our set of DirectoryModels.
256 private void PreloadDirectoryNameInfo ()
259 all
= name_resolver
.GetAllDirectoryNameInfo ();
260 foreach (LuceneNameResolver
.NameInfo info
in all
)
261 name_info_by_id
[info
.Id
] = info
;
264 // This only works for directories.
265 private string UniqueIdToDirectoryName (Guid id
)
268 dir
= dir_models_by_id
[id
] as DirectoryModel
;
272 LuceneNameResolver
.NameInfo info
;
273 info
= name_info_by_id
[id
] as LuceneNameResolver
.NameInfo
;
275 if (info
.ParentId
== Guid
.Empty
) // i.e. this is a root
279 parent_name
= UniqueIdToDirectoryName (info
.ParentId
);
280 if (parent_name
== null)
282 return Path
.Combine (parent_name
, info
.Name
);
289 private string ToFullPath (string name
, Guid parent_id
)
291 // This is the correct behavior for roots.
292 if (parent_id
== Guid
.Empty
)
296 parent_name
= UniqueIdToDirectoryName (parent_id
);
297 if (parent_name
== null)
300 return Path
.Combine (parent_name
, name
);
303 // This works for both files and directories.
304 private string UniqueIdToFullPath (Guid id
)
306 // First, check if it is a directory.
308 path
= UniqueIdToDirectoryName (id
);
312 // If not, try to pull name information out of the index.
313 LuceneNameResolver
.NameInfo info
;
314 info
= name_resolver
.GetNameInfoById (id
);
317 return ToFullPath (info
.Name
, info
.ParentId
);
320 //////////////////////////////////////////////////////////////////////////
323 // Directory-related methods
326 private Hashtable dir_models_by_path
= new Hashtable ();
328 private DirectoryModel
GetDirectoryModelByPath (string path
)
331 dir
= dir_models_by_path
[path
] as DirectoryModel
;
335 // Walk each root until we find the correct path
336 foreach (DirectoryModel root
in roots
) {
337 dir
= root
.WalkTree (path
);
339 dir_models_by_path
[path
] = dir
;
347 private void ExpireDirectoryPath (string expired_path
, Guid unique_id
)
350 Logger
.Log
.Debug ("Expired '{0}'", expired_path
);
352 DirectoryModel dir
= (DirectoryModel
) dir_models_by_id
[unique_id
];
353 if (dir
!= null && dir
.WatchHandle
!= null)
354 event_backend
.ForgetWatch (dir
.WatchHandle
);
356 dir_models_by_path
.Remove (expired_path
);
357 dir_models_by_id
.Remove (unique_id
);
360 public void AddDirectory (DirectoryModel parent
, string name
)
362 // Ignore the stuff we want to ignore.
363 if (filter
.Ignore (parent
, name
, true))
366 if (parent
!= null && parent
.HasChildWithName (name
))
370 path
= (parent
== null) ? name
: Path
.Combine (parent
.FullName
, name
);
373 Logger
.Log
.Debug ("Adding directory '{0}'", path
, name
);
375 if (! Directory
.Exists (path
)) {
376 Logger
.Log
.Error ("Can't add directory: '{0}' does not exist", path
);
381 attr
= FileAttributesStore
.Read (path
);
383 // Note that we don't look at the mtime of a directory when
384 // deciding whether or not to index it.
385 bool needs_indexing
= false;
387 // If it has no attributes, it definitely needs
389 needs_indexing
= true;
391 // Make sure that it still has the same name as before.
392 // If not, we need to re-index it.
393 // We can do this since we preloaded all of the name
394 // info in the directory via PreloadDirectoryNameInfo.
395 string last_known_name
;
396 last_known_name
= UniqueIdToDirectoryName (attr
.UniqueId
);
397 if (last_known_name
!= path
) {
398 Logger
.Log
.Debug ("'{0}' now seems to be called '{1}'", last_known_name
, path
);
399 needs_indexing
= true;
403 // If we can't descend into this directory, we want to
404 // index it but not build a DirectoryModel for it.
405 // FIXME: We should do the right thing when a
406 // directory's permissions change.
408 is_walkable
= DirectoryWalker
.IsWalkable (path
);
410 Logger
.Log
.Debug ("Can't walk '{0}'", path
);
413 ScheduleDirectory (name
, parent
, attr
, is_walkable
);
414 else if (is_walkable
)
415 RegisterDirectory (name
, parent
, attr
);
418 public void AddRoot (string path
)
420 path
= StringFu
.SanitizePath (path
);
421 Logger
.Log
.Debug ("Adding root: {0}", path
);
423 if (roots_by_path
.Contains (path
)) {
424 Logger
.Log
.Error ("Trying to add an existing root: {0}", path
);
428 // We need to have the path key in the roots hashtable
429 // for the filtering to work as we'd like before the root
430 // is actually added.
431 roots_by_path
.Add (path
);
433 AddDirectory (null, path
);
436 public void RemoveRoot (string path
)
438 Logger
.Log
.Debug ("Removing root: {0}", path
);
440 if (! roots_by_path
.Contains (path
)) {
441 Logger
.Log
.Error ("Trying to remove a non-existing root: {0}", path
);
445 // Find our directory model for the root
447 dir
= GetDirectoryModelByPath (path
);
450 Logger
.Log
.Error ("Could not find directory-model for root: {0}", path
);
454 // FIXME: Make sure we're emptying the crawler task of any sub-directories
455 // to the root we're removing. It's not a big deal since we do an Ignore-check
456 // in there, but it would be nice.
458 roots_by_path
.Remove (path
);
461 // Clean out the root from our directory cache.
462 RemoveDirectory (dir
);
465 private void ScheduleDirectory (string name
,
466 DirectoryModel parent
,
471 path
= (parent
== null) ? name
: Path
.Combine (parent
.FullName
, name
);
474 id
= (attr
== null) ? Guid
.NewGuid () : attr
.UniqueId
;
477 parent_id
= (parent
== null) ? Guid
.Empty
: parent
.UniqueId
;
480 last_crawl
= (attr
== null) ? DateTime
.MinValue
: attr
.LastWriteTime
;
483 indexable
= DirectoryToIndexable (path
, id
, parent_id
);
486 info
= new PendingInfo ();
487 info
.Uri
= indexable
.Uri
;
489 info
.Parent
= parent
;
490 info
.Mtime
= last_crawl
;
492 // We only set the IsDirectory flag if it is actually
493 // walkable. The IsDirectory flag is what is used to
494 // decide whether or not to call RegisterDirectory
495 // in the PostAddHook. Thus non-walkable directories
496 // will be indexed but will not have DirectoryModels
498 info
.IsDirectory
= is_walkable
;
500 pending_info_cache
[info
.Uri
] = info
;
503 task
= NewAddTask (indexable
);
504 task
.Priority
= Scheduler
.Priority
.Delayed
;
505 ThisScheduler
.Add (task
);
508 private void RegisterDirectory (string name
, DirectoryModel parent
, FileAttributes attr
)
511 path
= (parent
== null) ? name
: Path
.Combine (parent
.FullName
, name
);
514 Logger
.Log
.Debug ("Registered directory '{0}' ({1})", path
, attr
.UniqueId
);
518 dir
= DirectoryModel
.NewRoot (big_lock
, path
, attr
);
520 dir
= parent
.AddChild (name
, attr
);
522 if (Directory
.GetLastWriteTime (path
) > attr
.LastWriteTime
) {
523 dir
.State
= DirectoryState
.Dirty
;
524 Logger
.Log
.Debug ("'{0}' is dirty", path
);
529 Logger
.Log
.Debug ("Created model '{0}'", dir
.FullName
);
531 Logger
.Log
.Debug ("Created model '{0}' with parent '{1}'", dir
.FullName
, dir
.Parent
.FullName
);
534 // Add any roots we create to the list of roots
538 // Add the directory to our by-id hash, and remove any NameInfo
539 // we might have cached about it.
540 dir_models_by_id
[dir
.UniqueId
] = dir
;
541 name_info_by_id
.Remove (dir
.UniqueId
);
543 // Start watching the directory.
544 dir
.WatchHandle
= event_backend
.CreateWatch (path
);
546 // Schedule this directory for crawling.
547 if (tree_crawl_task
.Add (dir
))
548 ThisScheduler
.Add (tree_crawl_task
);
550 // Make sure that our file crawling task is active,
551 // since presumably we now have something new to crawl.
552 ActivateFileCrawling ();
555 private void RemoveDirectory (DirectoryModel dir
)
558 uri
= GuidFu
.ToUri (dir
.UniqueId
);
560 // Cache a copy of our external Uri, so that we can
561 // easily remap it in the PostRemoveHook.
563 external_uri
= UriFu
.PathToFileUri (dir
.FullName
);
564 removed_uri_cache
[uri
] = external_uri
;
566 // Calling Remove will expire the path names,
567 // so name caches will be cleaned up accordingly.
571 task
= NewRemoveTask (GuidFu
.ToUri (dir
.UniqueId
));
572 task
.Priority
= Scheduler
.Priority
.Immediate
;
573 ThisScheduler
.Add (task
);
576 public void RemoveDirectory (string path
)
578 DirectoryModel dir
= GetDirectoryModelByPath (path
);
580 RemoveDirectory (dir
);
583 private void MoveDirectory (DirectoryModel dir
,
584 DirectoryModel new_parent
, // or null if we are just renaming
587 // We'll need this later in order to generate the
588 // right change notification.
590 old_path
= dir
.FullName
;
592 if (new_parent
!= null && new_parent
!= dir
.Parent
)
593 dir
.MoveTo (new_parent
, new_name
);
598 parent_id
= dir
.IsRoot
? Guid
.Empty
: dir
.Parent
.UniqueId
;
601 indexable
= NewRenamingIndexable (new_name
,
607 task
= NewAddTask (indexable
);
608 task
.Priority
= Scheduler
.Priority
.Immediate
;
609 // Danger Will Robinson!
610 // We need to use BlockUntilNoCollision to get the correct notifications
611 // in a mv a b; mv b c; mv c a situation.
612 ThisScheduler
.Add (task
, Scheduler
.AddType
.BlockUntilNoCollision
);
615 //////////////////////////////////////////////////////////////////////////
618 // This code controls the directory crawl order
621 private DirectoryModel
StupidWalk (DirectoryModel prev_best
, DirectoryModel contender
)
623 if (contender
.NeedsCrawl
) {
624 if (prev_best
== null || prev_best
.CompareTo (contender
) < 0)
625 prev_best
= contender
;
628 foreach (DirectoryModel child
in contender
.Children
)
629 prev_best
= StupidWalk (prev_best
, child
);
634 public DirectoryModel
GetNextDirectoryToCrawl ()
636 DirectoryModel next_dir
= null;
638 foreach (DirectoryModel root
in roots
)
639 next_dir
= StupidWalk (next_dir
, root
);
644 public void DoneCrawlingOneDirectory (DirectoryModel dir
)
646 if (! dir
.IsAttached
)
650 attr
= FileAttributesStore
.Read (dir
.FullName
);
652 // We don't have to be super-careful about this since
653 // we only use the FileAttributes mtime on a directory
654 // to determine its initial state, not whether or not
655 // its index record is up-to-date.
656 attr
.LastWriteTime
= DateTime
.Now
;
658 FileAttributesStore
.Write (attr
);
662 public void Recrawl (string path
)
664 // Try to find a directory model for the path specified
665 // so that we can re-crawl it.
667 dir
= GetDirectoryModelByPath (path
);
669 bool path_is_registered
= true;
672 dir
= GetDirectoryModelByPath (Path
.GetDirectoryName (path
));
673 path_is_registered
= false;
676 Logger
.Log
.Debug ("Unable to get directory-model for path: {0}", path
);
681 Logger
.Log
.Debug ("Re-crawling {0}", dir
.FullName
);
683 if (tree_crawl_task
.Add (dir
))
684 ThisScheduler
.Add (tree_crawl_task
);
686 if (path_is_registered
)
687 Recrawl_Recursive (dir
, DirectoryState
.PossiblyClean
);
689 ActivateFileCrawling ();
690 ActivateDirectoryCrawling ();
693 public void RecrawlEverything ()
695 Logger
.Log
.Debug ("Re-crawling all directories");
697 foreach (DirectoryModel root
in roots
)
698 Recrawl_Recursive (root
, DirectoryState
.PossiblyClean
);
700 ActivateFileCrawling ();
701 ActivateDirectoryCrawling ();
704 private void Recrawl_Recursive (DirectoryModel dir
, DirectoryState state
)
707 tree_crawl_task
.Add (dir
);
708 foreach (DirectoryModel sub_dir
in dir
.Children
)
709 Recrawl_Recursive (sub_dir
, state
);
712 private void ActivateFileCrawling ()
714 if (! file_crawl_task
.IsActive
)
715 ThisScheduler
.Add (file_crawl_task
);
718 private void ActivateDirectoryCrawling ()
720 if (! tree_crawl_task
.IsActive
)
721 ThisScheduler
.Add (tree_crawl_task
);
724 //////////////////////////////////////////////////////////////////////////
727 // File-related methods
730 private enum RequiredAction
{
737 private RequiredAction
DetermineRequiredAction (DirectoryModel dir
,
740 out string last_known_path
,
743 last_known_path
= null;
744 mtime
= DateTime
.MinValue
;
747 path
= Path
.Combine (dir
.FullName
, name
);
750 Logger
.Log
.Debug ("*** What should we do with {0}?", path
);
752 if (filter
.Ignore (dir
, name
, false)) {
753 // If there are attributes on the file, we must have indexed
754 // it previously. Since we are ignoring it now, we should strip
755 // any file attributes from it.
758 Logger
.Log
.Debug ("*** Forget it: File is ignored but has attributes");
759 return RequiredAction
.Forget
;
762 Logger
.Log
.Debug ("*** Do nothing: File is ignored");
763 return RequiredAction
.None
;
768 Logger
.Log
.Debug ("*** Index it: File has no attributes");
769 return RequiredAction
.Index
;
772 // FIXME: This does not take in to account that we might have a better matching filter to use now
773 // That, however, is kind of expensive to figure out since we'd have to do mime-sniffing and shit.
774 if (attr
.FilterName
!= null && attr
.FilterVersion
> 0) {
775 int current_filter_version
;
776 current_filter_version
= FilterFactory
.GetFilterVersion (attr
.FilterName
);
778 if (current_filter_version
> attr
.FilterVersion
) {
780 Logger
.Log
.Debug ("*** Index it: Newer filter version found for filter {0}", attr
.FilterName
);
781 return RequiredAction
.Index
;
785 Mono
.Posix
.Stat stat
;
787 Mono
.Posix
.Syscall
.stat (path
, out stat
);
788 } catch (Exception ex
) {
789 Logger
.Log
.Debug ("Caught exception stat-ing {0}", path
);
790 Logger
.Log
.Debug (ex
);
791 return RequiredAction
.None
;
795 if (! DatesAreTheSame (attr
.LastWriteTime
, mtime
)) {
797 Logger
.Log
.Debug ("*** Index it: MTime has changed");
799 // If the file has been copied, it will have the
800 // original file's EAs. Thus we have to check to
801 // make sure that the unique id in the EAs actually
802 // belongs to this file. If not, replace it with a new one.
803 // (Thus touching & then immediately renaming a file can
804 // cause its unique id to change, which is less than
805 // optimal but probably can't be helped.)
806 last_known_path
= UniqueIdToFullPath (attr
.UniqueId
);
807 if (path
!= last_known_path
) {
809 Logger
.Log
.Debug ("*** Name has also changed, assigning new unique id");
810 attr
.UniqueId
= Guid
.NewGuid ();
813 return RequiredAction
.Index
;
816 // If the inode ctime is different that the time we last
817 // set file attributes, we might have been moved or copied.
818 if (! DatesAreTheSame (attr
.LastAttrTime
, stat
.CTime
)) {
820 Logger
.Log
.Debug ("*** CTime has changed, checking last known path");
822 last_known_path
= UniqueIdToFullPath (attr
.UniqueId
);
824 if (last_known_path
== null) {
826 Logger
.Log
.Debug ("*** Index it: CTime has changed, but can't determine last known path");
827 return RequiredAction
.Index
;
830 // If the name has changed but the mtime
831 // hasn't, the only logical conclusion is that
832 // the file has been renamed.
833 if (path
!= last_known_path
) {
835 Logger
.Log
.Debug ("*** Rename it: CTime and path has changed");
836 return RequiredAction
.Rename
;
840 // We don't have to do anything, which is always preferable.
842 Logger
.Log
.Debug ("*** Do nothing");
843 return RequiredAction
.None
;
846 // This works around a mono bug: the DateTimes that we get out of stat
847 // don't correctly account for daylight savings time. We declare the two
848 // dates to be equal if:
849 // (1) They actually are equal
850 // (2) The first date is exactly one hour ahead of the second
851 static private bool DatesAreTheSame (DateTime system_io_datetime
, DateTime stat_datetime
)
853 const double epsilon
= 1e-5;
854 double t
= (system_io_datetime
- stat_datetime
).TotalSeconds
;
855 return Math
.Abs (t
) < epsilon
|| Math
.Abs (t
-3600) < epsilon
;
858 // Return an indexable that will do the right thing with a file
859 // (or null, if the right thing is to do nothing)
860 public Indexable
GetCrawlingFileIndexable (DirectoryModel dir
, string name
)
863 path
= Path
.Combine (dir
.FullName
, name
);
866 attr
= FileAttributesStore
.Read (path
);
868 RequiredAction action
;
869 string last_known_path
;
871 action
= DetermineRequiredAction (dir
, name
, attr
, out last_known_path
, out mtime
);
873 if (action
== RequiredAction
.None
)
878 unique_id
= attr
.UniqueId
;
880 unique_id
= Guid
.NewGuid ();
882 Indexable indexable
= null;
886 case RequiredAction
.Index
:
887 indexable
= FileToIndexable (path
, unique_id
, dir
.UniqueId
, true);
888 if (mtime
== DateTime
.MinValue
)
889 mtime
= File
.GetLastWriteTime (path
);
892 case RequiredAction
.Rename
:
893 indexable
= NewRenamingIndexable (name
, unique_id
, dir
.UniqueId
,
897 case RequiredAction
.Forget
:
898 FileAttributesStore
.Drop (path
);
903 if (indexable
!= null) {
905 info
= new PendingInfo ();
906 info
.Uri
= indexable
.Uri
;
908 info
.IsDirectory
= false;
911 pending_info_cache
[info
.Uri
] = info
;
917 public void AddFile (DirectoryModel dir
, string name
)
920 path
= Path
.Combine (dir
.FullName
, name
);
922 if (! File
.Exists (path
))
925 if (filter
.Ignore (dir
, name
, false))
929 attr
= FileAttributesStore
.Read (path
);
932 unique_id
= (attr
!= null) ? attr
.UniqueId
: Guid
.NewGuid ();
935 indexable
= FileToIndexable (path
, unique_id
, dir
.UniqueId
, false);
938 info
= new PendingInfo ();
939 info
.Uri
= indexable
.Uri
;
941 info
.IsDirectory
= false;
942 info
.Mtime
= File
.GetLastWriteTime (path
);
944 pending_info_cache
[info
.Uri
] = info
;
947 task
= NewAddTask (indexable
);
948 task
.Priority
= Scheduler
.Priority
.Immediate
;
949 ThisScheduler
.Add (task
);
952 public void RemoveFile (DirectoryModel dir
, string name
)
954 // FIXME: We might as well remove it, even if it was being ignore.
958 unique_id
= name_resolver
.GetIdByNameAndParentId (name
, dir
.UniqueId
);
959 if (unique_id
== Guid
.Empty
) {
960 Logger
.Log
.Warn ("Couldn't find unique id for '{0}' in '{1}' ({2})",
961 name
, dir
.FullName
, dir
.UniqueId
);
966 uri
= GuidFu
.ToUri (unique_id
);
967 file_uri
= UriFu
.PathToFileUri (Path
.Combine (dir
.FullName
, name
));
968 removed_uri_cache
[uri
] = file_uri
;
971 task
= NewRemoveTask (uri
);
972 task
.Priority
= Scheduler
.Priority
.Immediate
;
973 ThisScheduler
.Add (task
);
976 public void MoveFile (DirectoryModel old_dir
, string old_name
,
977 DirectoryModel new_dir
, string new_name
)
979 bool old_ignore
, new_ignore
;
980 old_ignore
= filter
.Ignore (old_dir
, old_name
, false);
981 new_ignore
= filter
.Ignore (new_dir
, new_name
, false);
983 if (old_ignore
&& new_ignore
)
986 // If our ignore-state is changing, synthesize the appropriate
989 if (old_ignore
&& ! new_ignore
) {
990 AddFile (new_dir
, new_name
);
994 if (! old_ignore
&& new_ignore
) {
995 RemoveFile (new_dir
, new_name
);
1000 old_path
= Path
.Combine (old_dir
.FullName
, old_name
);
1002 // We need to find the file's unique id.
1003 // We can't look at the extended attributes w/o making
1004 // assumptions about whether they follow around the
1005 // file (EAs) or the path (sqlite)... so we go straight
1006 // to the name resolver.
1009 unique_id
= name_resolver
.GetIdByNameAndParentId (old_name
, old_dir
.UniqueId
);
1010 if (unique_id
== Guid
.Empty
) {
1011 Logger
.Log
.Warn ("Couldn't find unique id for '{0}' in '{1}' ({2})",
1012 old_name
, old_dir
.FullName
, old_dir
.UniqueId
);
1016 // FIXME: I think we need to be more conservative when we seen
1017 // events in a directory that has not been fully scanned, just to
1018 // avoid races. i.e. what if we are in the middle of crawling that
1019 // directory and haven't reached this file yet? Then the rename
1021 Indexable indexable
;
1022 indexable
= NewRenamingIndexable (new_name
,
1027 Scheduler
.Task task
;
1028 task
= NewAddTask (indexable
);
1029 task
.Priority
= Scheduler
.Priority
.Immediate
;
1030 // Danger Will Robinson!
1031 // We need to use BlockUntilNoCollision to get the correct notifications
1032 // in a mv a b; mv b c; mv c a situation.
1033 ThisScheduler
.Add (task
, Scheduler
.AddType
.BlockUntilNoCollision
);
1036 //////////////////////////////////////////////////////////////////////////
1038 // Configuration stuff
1040 public IList Roots
{
1042 return roots_by_path
;
1046 private void LoadConfiguration ()
1048 if (Conf
.Indexing
.IndexHomeDir
)
1049 AddRoot (PathFinder
.HomeDir
);
1051 foreach (string root
in Conf
.Indexing
.Roots
)
1054 Conf
.Subscribe (typeof (Conf
.IndexingConfig
), OnConfigurationChanged
);
1057 private void OnConfigurationChanged (Conf
.Section section
)
1059 ArrayList roots_wanted
= new ArrayList (Conf
.Indexing
.Roots
);
1061 if (Conf
.Indexing
.IndexHomeDir
)
1062 roots_wanted
.Add (PathFinder
.HomeDir
);
1064 IList roots_to_add
, roots_to_remove
;
1065 ArrayFu
.IntersectListChanges (roots_wanted
, Roots
, out roots_to_add
, out roots_to_remove
);
1067 foreach (string root
in roots_to_remove
)
1070 foreach (string root
in roots_to_add
)
1074 //////////////////////////////////////////////////////////////////////////
1077 // Our magic LuceneQueryable hooks
1080 override protected void PostAddHook (IndexerAddedReceipt receipt
)
1082 // If we just changed properties, remap to our *old* external Uri
1083 // to make notification work out property.
1084 if (receipt
.PropertyChangesOnly
) {
1086 // FIXME: This linear search sucks --- we should
1087 // be able to use the fact that they are sorted.
1088 foreach (Property prop
in receipt
.Properties
) {
1089 if (prop
.Key
== OldExternalUriPropKey
) {
1090 receipt
.Uri
= UriFu
.UriStringToUri (prop
.Value
);
1099 info
= pending_info_cache
[receipt
.Uri
] as PendingInfo
;
1100 pending_info_cache
.Remove (receipt
.Uri
);
1102 // The parent directory might have run away since we were indexed
1103 if (info
.Parent
!= null && !info
.Parent
.IsAttached
)
1107 unique_id
= GuidFu
.FromUri (receipt
.Uri
);
1109 FileAttributes attr
;
1110 attr
= FileAttributesStore
.ReadOrCreate (info
.Path
, unique_id
);
1111 attr
.Path
= info
.Path
;
1112 attr
.LastWriteTime
= info
.Mtime
;
1114 attr
.FilterName
= receipt
.FilterName
;
1115 attr
.FilterVersion
= receipt
.FilterVersion
;
1117 if (info
.IsDirectory
) {
1119 if (info
.Parent
== null)
1122 name
= Path
.GetFileName (info
.Path
);
1123 RegisterDirectory (name
, info
.Parent
, attr
);
1126 FileAttributesStore
.Write (attr
);
1128 // Remap the Uri so that change notification will work properly
1129 receipt
.Uri
= UriFu
.PathToFileUri (info
.Path
);
1132 override protected void PostRemoveHook (IndexerRemovedReceipt receipt
)
1134 // Find the cached external Uri and remap the Uri in the receipt.
1135 // We have to do this to make change notification work.
1137 external_uri
= removed_uri_cache
[receipt
.Uri
] as Uri
;
1138 if (external_uri
== null)
1139 throw new Exception ("No cached external Uri for " + receipt
.Uri
);
1141 removed_uri_cache
.Remove (receipt
.Uri
);
1143 receipt
.Uri
= external_uri
;
1146 // Hit filter: this handles our mapping from internal->external uris,
1147 // and checks to see if the file is still there.
1148 override protected bool HitFilter (Hit hit
)
1150 string name
, parent_id_uri
;
1151 name
= hit
[ExactFilenamePropKey
];
1154 parent_id_uri
= hit
[ParentDirUriPropKey
];
1155 if (parent_id_uri
== null)
1159 parent_id
= GuidFu
.FromUriString (parent_id_uri
);
1162 path
= ToFullPath (name
, parent_id
);
1166 Logger
.Log
.Debug ("HitFilter mapped '{0}' {1} to '{2}'",
1167 name
, parent_id
, path
);
1170 bool is_directory
= (hit
.MimeType
== "inode/directory");
1174 exists
= Directory
.Exists (path
);
1176 exists
= File
.Exists (path
);
1178 // If the file doesn't exist, we do not schedule a removal and
1179 // return false. This is to avoid "losing" files if they are
1180 // in a directory that has been renamed but which we haven't
1181 // scanned yet... if we dropped them from the index, they would
1182 // never get re-indexed (or at least not until the next time they
1183 // were touched) since they would still be stamped with EAs
1184 // indicating they were up-to-date. And that would be bad.
1185 // FIXME: It would be safe if we were in a known state, right?
1186 // i.e. every DirectoryModel is clean.
1190 // Fetch the parent directory model from our cache to do clever
1191 // filterint to determine if we're ignoring it or not.
1192 DirectoryModel parent
;
1193 parent
= GetDirectoryModelByPath (Path
.GetDirectoryName (path
));
1195 // Check the ignore status of the hit
1196 if (filter
.Ignore (parent
, Path
.GetFileName (path
), is_directory
))
1199 // Store the hit's internal uri in a property
1201 prop
= Property
.NewKeyword ("beagle:InternalUri",
1202 UriFu
.UriToSerializableString (hit
.Uri
));
1203 hit
.AddProperty (prop
);
1206 hit
.Uri
= UriFu
.PathToFileUri (path
);
1211 override public string GetSnippet (string [] query_terms
, Hit hit
)
1213 // Uri remapping from a hit is easy: the internal uri
1214 // is stored in a property.
1216 uri
= UriFu
.UriStringToUri (hit
["beagle:InternalUri"]);
1219 path
= TextCache
.UserCache
.LookupPathRaw (uri
);
1224 // If this is self-cached, use the remapped Uri
1225 if (path
== TextCache
.SELF_CACHE_TAG
)
1226 path
= hit
.Uri
.LocalPath
;
1228 return SnippetFu
.GetSnippetFromFile (query_terms
, path
);
1231 override public void Start ()
1235 event_backend
.Start (this);
1237 LoadConfiguration ();
1239 Logger
.Log
.Debug ("Done starting FileSystemQueryable");
1242 //////////////////////////////////////////////////////////////////////////
1244 // These are the methods that the IFileEventBackend implementations should
1245 // call in response to events.
1247 public void ReportEventInDirectory (string directory_name
)
1250 dir
= GetDirectoryModelByPath (directory_name
);
1252 // We only use this information to prioritize the order in which
1253 // we crawl directories --- so if this directory doesn't
1254 // actually need to be crawled, we can safely ignore it.
1255 if (! dir
.NeedsCrawl
)
1258 dir
.LastActivityTime
= DateTime
.Now
;
1260 Logger
.Log
.Debug ("Saw event in '{0}'", directory_name
);
1263 public void HandleAddEvent (string directory_name
, string file_name
, bool is_directory
)
1265 Logger
.Log
.Debug ("*** Add '{0}' '{1}' {2}", directory_name
, file_name
,
1266 is_directory
? "(dir)" : "(file)");
1269 dir
= GetDirectoryModelByPath (directory_name
);
1271 Logger
.Log
.Warn ("HandleAddEvent failed: Couldn't find DirectoryModel for '{0}'", directory_name
);
1276 AddDirectory (dir
, file_name
);
1278 AddFile (dir
, file_name
);
1281 public void HandleRemoveEvent (string directory_name
, string file_name
, bool is_directory
)
1283 Logger
.Log
.Debug ("*** Remove '{0}' '{1}' {2}", directory_name
, file_name
,
1284 is_directory
? "(dir)" : "(file)");
1288 path
= Path
.Combine (directory_name
, file_name
);
1291 dir
= GetDirectoryModelByPath (path
);
1293 Logger
.Log
.Warn ("HandleRemoveEvent failed: Couldn't find DirectoryModel for '{0}'", path
);
1297 dir
.WatchHandle
= null;
1298 RemoveDirectory (dir
);
1301 dir
= GetDirectoryModelByPath (directory_name
);
1303 Logger
.Log
.Warn ("HandleRemoveEvent failed: Couldn't find DirectoryModel for '{0}'", directory_name
);
1307 RemoveFile (dir
, file_name
);
1311 public void HandleMoveEvent (string old_directory_name
, string old_file_name
,
1312 string new_directory_name
, string new_file_name
,
1315 Logger
.Log
.Debug ("*** Move '{0}' '{1}' -> '{2}' '{3}' {4}",
1316 old_directory_name
, old_file_name
,
1317 new_directory_name
, new_file_name
,
1318 is_directory
? "(dir)" : "(file)");
1321 DirectoryModel dir
, new_parent
;
1322 dir
= GetDirectoryModelByPath (Path
.Combine (old_directory_name
, old_file_name
));
1323 new_parent
= GetDirectoryModelByPath (new_directory_name
);
1324 MoveDirectory (dir
, new_parent
, new_file_name
);
1327 DirectoryModel old_dir
, new_dir
;
1328 old_dir
= GetDirectoryModelByPath (new_directory_name
);
1329 new_dir
= GetDirectoryModelByPath (new_directory_name
);
1330 MoveFile (old_dir
, old_file_name
, new_dir
, new_file_name
);
1334 public void HandleOverflowEvent ()
1336 Logger
.Log
.Debug ("Queue overflows suck");