2 // FileSystemQueryable.cs
4 // Copyright (C) 2004 Novell, Inc.
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
28 using System
.Collections
;
30 using System
.Reflection
;
32 using System
.Threading
;
37 namespace Beagle
.Daemon
.FileSystemQueryable
{
39 [QueryableFlavor (Name
="Files", Domain
=QueryDomain
.Local
, RequireInotify
=false)]
40 [PropertyKeywordMapping (Keyword
="extension", PropertyName
="beagle:FilenameExtension", IsKeyword
=true, Description
="File extension, e.g. extension:jpeg. Use extension: to search in files with no extension.")]
41 [PropertyKeywordMapping (Keyword
="ext", PropertyName
="beagle:FilenameExtension", IsKeyword
=true, Description
="File extension, e.g. ext:jpeg. Use ext: to search in files with no extension.")]
42 public class FileSystemQueryable
: LuceneQueryable
{
44 static public new bool Debug
= false;
47 // 1: Initially set to force a reindex due to NameIndex changes.
48 // 2: Overhauled everything to use new lucene infrastructure.
49 // 3: Switched to UTC for all times, changed the properties a bit.
50 // 4: Changed the key of TextFilenamePropKey to beagle:Filename - it might be useful in clients.
51 // Make SplitFilenamePropKey unstored
52 // 5: Keyword properies in the private namespace are no longer lower cased; this is required to
53 // offset the change in LuceneCommon.cs
54 const int MINOR_VERSION
= 5;
56 private object big_lock
= new object ();
58 private IFileEventBackend event_backend
;
60 // This is the task that walks the tree structure
61 private TreeCrawlTask tree_crawl_task
;
63 // This is the task that finds the next place that
64 // needs to be crawled in the tree and spawns off
65 // the appropriate IndexableGenerator.
66 private FileCrawlTask file_crawl_task
;
68 private ArrayList roots
= new ArrayList ();
69 private ArrayList roots_by_path
= new ArrayList ();
71 private FileNameFilter filter
;
73 // This is just a copy of the LuceneQueryable's QueryingDriver
74 // cast into the right type for doing internal->external Uri
76 private LuceneNameResolver name_resolver
;
78 //////////////////////////////////////////////////////////////////////////
80 private Hashtable cached_uid_by_path
= new Hashtable ();
82 //////////////////////////////////////////////////////////////////////////
84 public FileSystemQueryable () : base ("FileSystemIndex", MINOR_VERSION
)
86 // Set up our event backend
87 if (Inotify
.Enabled
) {
88 Logger
.Log
.Debug ("Starting Inotify FSQ file event backend");
89 event_backend
= new InotifyBackend ();
91 Logger
.Log
.Debug ("Creating null FSQ file event backend");
92 event_backend
= new NullFileEventBackend ();
95 tree_crawl_task
= new TreeCrawlTask (new TreeCrawlTask
.Handler (AddDirectory
));
96 tree_crawl_task
.Source
= this;
98 file_crawl_task
= new FileCrawlTask (this);
99 file_crawl_task
.Source
= this;
101 name_resolver
= (LuceneNameResolver
) Driver
;
102 PreloadDirectoryNameInfo ();
104 // Setup our file-name filter
105 filter
= new FileNameFilter (this);
107 // Do the right thing when paths expire
108 DirectoryModel
.ExpireEvent
+=
109 new DirectoryModel
.ExpireHandler (ExpireDirectoryPath
);
113 override protected IFileAttributesStore
BuildFileAttributesStore ()
115 return new FileAttributesStore_Mixed (IndexDirectory
, IndexFingerprint
);
118 override protected LuceneQueryingDriver
BuildLuceneQueryingDriver (string index_name
,
122 return new LuceneNameResolver (index_name
, minor_version
, read_only_mode
);
125 public FileNameFilter Filter
{
126 get { return filter; }
129 //////////////////////////////////////////////////////////////////////////
132 // This is where we build our Indexables
135 public static void AddStandardPropertiesToIndexable (Indexable indexable
,
140 foreach (Property std_prop
in Property
.StandardFileProperties (name
, mutable
))
141 indexable
.AddProperty (std_prop
);
143 if (parent_id
== Guid
.Empty
)
146 string str
= GuidFu
.ToUriString (parent_id
);
147 // We use the uri here to recycle terms in the index,
148 // since each directory's uri will already be indexed.
149 Property prop
= Property
.NewUnsearched (Property
.ParentDirUriPropKey
, str
);
150 prop
.IsMutable
= mutable
;
151 indexable
.AddProperty (prop
);
154 public static void AddStandardPropertiesToIndexable (Indexable indexable
,
156 DirectoryModel parent
,
159 AddStandardPropertiesToIndexable (indexable
,
161 parent
== null ? Guid
.Empty
: parent
.UniqueId
,
164 indexable
.LocalState
["Parent"] = parent
;
167 public static Indexable
DirectoryToIndexable (string path
,
169 DirectoryModel parent
)
173 indexable
= new Indexable (IndexableType
.Add
, GuidFu
.ToUri (id
));
174 indexable
.MimeType
= "inode/directory";
175 indexable
.NoContent
= true;
176 indexable
.DisplayUri
= UriFu
.PathToFileUri (path
);
177 indexable
.Timestamp
= Directory
.GetLastWriteTimeUtc (path
);
178 } catch (IOException
) {
179 // Looks like the directory was deleted.
187 name
= Path
.GetFileName (path
);
188 AddStandardPropertiesToIndexable (indexable
, name
, parent
, true);
191 prop
= Property
.NewBool (Property
.IsDirectoryPropKey
, true);
192 prop
.IsMutable
= true; // we want this in the secondary index, for efficiency
193 indexable
.AddProperty (prop
);
195 indexable
.LocalState
["Path"] = path
;
200 public static Indexable
FileToIndexable (string path
,
202 DirectoryModel parent
,
208 indexable
= new Indexable (IndexableType
.Add
, GuidFu
.ToUri (id
));
209 indexable
.Timestamp
= File
.GetLastWriteTimeUtc (path
);
210 indexable
.ContentUri
= UriFu
.PathToFileUri (path
);
211 indexable
.DisplayUri
= UriFu
.PathToFileUri (path
);
212 indexable
.Crawled
= crawl_mode
;
213 indexable
.Filtering
= Beagle
.IndexableFiltering
.Always
;
214 } catch (IOException
) {
215 // Looks like the file was deleted.
219 AddStandardPropertiesToIndexable (indexable
, Path
.GetFileName (path
), parent
, true);
221 indexable
.LocalState
["Path"] = path
;
226 private static Indexable
NewRenamingIndexable (string name
,
228 DirectoryModel parent
,
229 string last_known_path
)
232 indexable
= new Indexable (IndexableType
.PropertyChange
, GuidFu
.ToUri (id
));
234 AddStandardPropertiesToIndexable (indexable
, name
, parent
, true);
236 indexable
.LocalState
["Id"] = id
;
237 indexable
.LocalState
["LastKnownPath"] = last_known_path
;
242 //////////////////////////////////////////////////////////////////////////
245 // Mapping from directory ids to paths
248 private Hashtable dir_models_by_id
= new Hashtable ();
249 private Hashtable name_info_by_id
= new Hashtable ();
251 // We fall back to using the name information in the index
252 // until we've fully constructed our set of DirectoryModels.
253 private void PreloadDirectoryNameInfo ()
256 all
= name_resolver
.GetAllDirectoryNameInfo ();
257 foreach (LuceneNameResolver
.NameInfo info
in all
)
258 name_info_by_id
[info
.Id
] = info
;
261 // This only works for directories.
262 private string UniqueIdToDirectoryName (Guid id
)
265 dir
= dir_models_by_id
[id
] as DirectoryModel
;
269 LuceneNameResolver
.NameInfo info
;
270 info
= name_info_by_id
[id
] as LuceneNameResolver
.NameInfo
;
272 if (info
.ParentId
== Guid
.Empty
) // i.e. this is a root
276 parent_name
= UniqueIdToDirectoryName (info
.ParentId
);
277 if (parent_name
== null)
279 return Path
.Combine (parent_name
, info
.Name
);
286 private void CacheDirectoryNameChange (Guid id
, Guid new_parent_id
, string new_name
)
288 LuceneNameResolver
.NameInfo info
;
289 info
= name_info_by_id
[id
] as LuceneNameResolver
.NameInfo
;
291 info
.ParentId
= new_parent_id
;
292 info
.Name
= new_name
;
296 private string ToFullPath (string name
, Guid parent_id
)
298 // This is the correct behavior for roots.
299 if (parent_id
== Guid
.Empty
)
303 parent_name
= UniqueIdToDirectoryName (parent_id
);
304 if (parent_name
== null)
307 return Path
.Combine (parent_name
, name
);
310 // This works for both files and directories.
311 private string UniqueIdToFullPath (Guid id
)
313 // First, check if it is a directory.
315 path
= UniqueIdToDirectoryName (id
);
319 // If not, try to pull name information out of the index.
320 LuceneNameResolver
.NameInfo info
;
321 info
= name_resolver
.GetNameInfoById (id
);
324 return ToFullPath (info
.Name
, info
.ParentId
);
327 private string UniqueIdToFileName (Guid id
)
329 LuceneNameResolver
.NameInfo info
;
330 info
= name_resolver
.GetNameInfoById (id
);
336 private void RegisterId (string name
, DirectoryModel dir
, Guid id
)
338 cached_uid_by_path
[Path
.Combine (dir
.FullName
, name
)] = id
;
341 private void ForgetId (string path
)
343 cached_uid_by_path
.Remove (path
);
346 // This works for files. (It probably works for directories
347 // too, but you should use one of the more efficient means
348 // above if you know it is a directory.)
349 private Guid
NameAndParentToId (string name
, DirectoryModel dir
)
352 path
= Path
.Combine (dir
.FullName
, name
);
355 if (cached_uid_by_path
.Contains (path
))
356 unique_id
= (Guid
) cached_uid_by_path
[path
];
358 unique_id
= name_resolver
.GetIdByNameAndParentId (name
, dir
.UniqueId
);
363 //////////////////////////////////////////////////////////////////////////
366 // Directory-related methods
369 private Hashtable dir_models_by_path
= new Hashtable ();
371 private DirectoryModel
GetDirectoryModelByPath (string path
)
375 lock (dir_models_by_path
) {
376 dir
= dir_models_by_path
[path
] as DirectoryModel
;
381 // Walk each root until we find the correct path
382 foreach (DirectoryModel root
in roots
) {
383 dir
= root
.WalkTree (path
);
385 lock (dir_models_by_path
)
386 dir_models_by_path
[path
] = dir
;
394 private void ExpireDirectoryPath (string expired_path
, Guid unique_id
)
397 Logger
.Log
.Debug ("Expired '{0}'", expired_path
);
399 lock (dir_models_by_path
)
400 dir_models_by_path
.Remove (expired_path
);
403 public void AddDirectory (DirectoryModel parent
, string name
)
405 // Ignore the stuff we want to ignore.
406 if (filter
.Ignore (parent
, name
, true))
409 // FIXME: ! parent.HasChildWithName (name)
410 if (parent
!= null && parent
.HasChildWithName (name
))
414 path
= (parent
== null) ? name
: Path
.Combine (parent
.FullName
, name
);
417 Logger
.Log
.Debug ("Adding directory '{0}'", path
, name
);
419 if (! Directory
.Exists (path
)) {
420 Logger
.Log
.Error ("Can't add directory: '{0}' does not exist", path
);
425 attr
= FileAttributesStore
.Read (path
);
427 // Note that we don't look at the mtime of a directory when
428 // deciding whether or not to index it.
429 bool needs_indexing
= false;
431 // If it has no attributes, it definitely needs
433 needs_indexing
= true;
435 // Make sure that it still has the same name as before.
436 // If not, we need to re-index it.
437 // We can do this since we preloaded all of the name
438 // info in the directory via PreloadDirectoryNameInfo.
439 string last_known_name
;
440 last_known_name
= UniqueIdToDirectoryName (attr
.UniqueId
);
441 if (last_known_name
!= path
) {
442 Logger
.Log
.Debug ("'{0}' now seems to be called '{1}'", last_known_name
, path
);
443 needs_indexing
= true;
447 // If we can't descend into this directory, we want to
448 // index it but not build a DirectoryModel for it.
449 // FIXME: We should do the right thing when a
450 // directory's permissions change.
452 is_walkable
= DirectoryWalker
.IsWalkable (path
);
454 Logger
.Log
.Debug ("Can't walk '{0}'", path
);
457 ScheduleDirectory (name
, parent
, attr
, is_walkable
);
458 else if (is_walkable
)
459 RegisterDirectory (name
, parent
, attr
);
462 public void AddRoot (string path
)
464 path
= StringFu
.SanitizePath (path
);
465 Logger
.Log
.Debug ("Adding root: {0}", path
);
467 if (roots_by_path
.Contains (path
)) {
468 Logger
.Log
.Error ("Trying to add an existing root: {0}", path
);
472 // We need to have the path key in the roots hashtable
473 // for the filtering to work as we'd like before the root
474 // is actually added.
475 roots_by_path
.Add (path
);
477 AddDirectory (null, path
);
480 public void RemoveRoot (string path
)
482 Logger
.Log
.Debug ("Removing root: {0}", path
);
484 if (! roots_by_path
.Contains (path
)) {
485 Logger
.Log
.Error ("Trying to remove a non-existing root: {0}", path
);
489 // Find our directory model for the root
491 dir
= GetDirectoryModelByPath (path
);
494 Logger
.Log
.Error ("Could not find directory-model for root: {0}", path
);
498 // FIXME: Make sure we're emptying the crawler task of any sub-directories
499 // to the root we're removing. It's not a big deal since we do an Ignore-check
500 // in there, but it would be nice.
502 roots_by_path
.Remove (path
);
505 // Clean out the root from our directory cache.
506 RemoveDirectory (dir
);
509 private void ScheduleDirectory (string name
,
510 DirectoryModel parent
,
515 path
= (parent
== null) ? name
: Path
.Combine (parent
.FullName
, name
);
518 id
= (attr
== null) ? Guid
.NewGuid () : attr
.UniqueId
;
521 last_crawl
= (attr
== null) ? DateTime
.MinValue
: attr
.LastWriteTime
;
524 indexable
= DirectoryToIndexable (path
, id
, parent
);
526 if (indexable
!= null) {
527 indexable
.LocalState
["Name"] = name
;
528 indexable
.LocalState
["LastCrawl"] = last_crawl
;
529 indexable
.LocalState
["IsWalkable"] = is_walkable
;
532 task
= NewAddTask (indexable
);
533 task
.Priority
= Scheduler
.Priority
.Delayed
;
534 ThisScheduler
.Add (task
);
538 private bool RegisterDirectory (string name
, DirectoryModel parent
, FileAttributes attr
)
541 path
= (parent
== null) ? name
: Path
.Combine (parent
.FullName
, name
);
544 Logger
.Log
.Debug ("Registered directory '{0}' ({1})", path
, attr
.UniqueId
);
549 mtime
= Directory
.GetLastWriteTimeUtc (path
);
550 } catch (IOException
) {
551 Log
.Debug ("Directory '{0}' ({1}) appears to have gone away", path
, attr
.UniqueId
);
557 dir
= DirectoryModel
.NewRoot (big_lock
, path
, attr
);
559 dir
= parent
.AddChild (name
, attr
);
561 if (mtime
> attr
.LastWriteTime
) {
562 dir
.State
= DirectoryState
.Dirty
;
564 Logger
.Log
.Debug ("'{0}' is dirty", path
);
569 Logger
.Log
.Debug ("Created model '{0}'", dir
.FullName
);
571 Logger
.Log
.Debug ("Created model '{0}' with parent '{1}'", dir
.FullName
, dir
.Parent
.FullName
);
574 // Add any roots we create to the list of roots
578 // Add the directory to our by-id hash, and remove any NameInfo
579 // we might have cached about it.
580 dir_models_by_id
[dir
.UniqueId
] = dir
;
581 name_info_by_id
.Remove (dir
.UniqueId
);
583 // Start watching the directory.
584 dir
.WatchHandle
= event_backend
.CreateWatch (path
);
586 // Schedule this directory for crawling.
587 if (tree_crawl_task
.Add (dir
))
588 ThisScheduler
.Add (tree_crawl_task
);
590 // Make sure that our file crawling task is active,
591 // since presumably we now have something new to crawl.
592 ActivateFileCrawling ();
597 private void ForgetDirectoryRecursively (DirectoryModel dir
)
599 foreach (DirectoryModel child
in dir
.Children
)
600 ForgetDirectoryRecursively (child
);
602 if (dir
.WatchHandle
!= null)
603 event_backend
.ForgetWatch (dir
.WatchHandle
);
604 dir_models_by_id
.Remove (dir
.UniqueId
);
605 // We rely on the expire event to remove it from dir_models_by_path
608 private void RemoveDirectory (DirectoryModel dir
)
611 uri
= GuidFu
.ToUri (dir
.UniqueId
);
614 indexable
= new Indexable (IndexableType
.Remove
, uri
);
616 // Remember a copy of our external Uri, so that we can
617 // easily remap it in the PostRemoveHook.
618 indexable
.LocalState
["RemovedUri"] = UriFu
.PathToFileUri (dir
.FullName
);
620 // Forget watches and internal references
621 ForgetDirectoryRecursively (dir
);
623 // Calling Remove will expire the path names,
624 // so name caches will be cleaned up accordingly.
628 task
= NewAddTask (indexable
); // We *add* the indexable to *remove* the index item
629 task
.Priority
= Scheduler
.Priority
.Immediate
;
630 ThisScheduler
.Add (task
);
633 public void RemoveDirectory (string path
)
635 DirectoryModel dir
= GetDirectoryModelByPath (path
);
637 RemoveDirectory (dir
);
640 private void MoveDirectory (DirectoryModel dir
,
641 DirectoryModel new_parent
, // or null if we are just renaming
645 Logger
.Log
.Warn ("Couldn't find DirectoryModel for directory moving to '{0}' in '{1}', so it was hopefully never there.",
646 new_name
, new_parent
.FullName
);
647 AddDirectory (new_parent
, new_name
);
652 throw new Exception ("Can't move root " + dir
.FullName
);
654 // We'll need this later in order to generate the
655 // right change notification.
657 old_path
= dir
.FullName
;
659 if (new_parent
!= null && new_parent
!= dir
.Parent
)
660 dir
.MoveTo (new_parent
, new_name
);
664 // Remember this by path
665 lock (dir_models_by_path
)
666 dir_models_by_path
[dir
.FullName
] = dir
;
668 CacheDirectoryNameChange (dir
.UniqueId
, dir
.Parent
.UniqueId
, new_name
);
671 indexable
= NewRenamingIndexable (new_name
,
673 dir
.Parent
, // == new_parent
675 indexable
.LocalState
["OurDirectoryModel"] = dir
;
678 task
= NewAddTask (indexable
);
679 task
.Priority
= Scheduler
.Priority
.Immediate
;
680 // Danger Will Robinson!
681 // We need to use BlockUntilNoCollision to get the correct notifications
682 // in a mv a b; mv b c; mv c a situation.
683 // FIXME: And now that type no longer exists!
684 ThisScheduler
.Add (task
);
687 //////////////////////////////////////////////////////////////////////////
690 // This code controls the directory crawl order
693 private DirectoryModel
StupidWalk (DirectoryModel prev_best
, DirectoryModel contender
)
695 if (contender
.NeedsCrawl
) {
696 if (prev_best
== null || prev_best
.CompareTo (contender
) < 0)
697 prev_best
= contender
;
700 foreach (DirectoryModel child
in contender
.Children
)
701 prev_best
= StupidWalk (prev_best
, child
);
706 public DirectoryModel
GetNextDirectoryToCrawl ()
708 DirectoryModel next_dir
= null;
710 foreach (DirectoryModel root
in roots
)
711 next_dir
= StupidWalk (next_dir
, root
);
716 public void DoneCrawlingOneDirectory (DirectoryModel dir
)
718 if (! dir
.IsAttached
)
722 attr
= FileAttributesStore
.Read (dir
.FullName
);
724 // Don't mark ourselves; let the crawler redo us
728 // We don't have to be super-careful about this since
729 // we only use the FileAttributes mtime on a directory
730 // to determine its initial state, not whether or not
731 // its index record is up-to-date.
732 attr
.LastWriteTime
= DateTime
.UtcNow
;
734 // ...but we do use this to decide which order directories get
736 dir
.LastCrawlTime
= DateTime
.UtcNow
;
738 FileAttributesStore
.Write (attr
);
742 public void MarkDirectoryAsUncrawlable (DirectoryModel dir
)
744 if (! dir
.IsAttached
)
747 // If we managed to get set up a watch on this directory,
749 if (dir
.WatchHandle
!= null) {
750 event_backend
.ForgetWatch (dir
.WatchHandle
);
751 dir
.WatchHandle
= null;
754 dir
.MarkAsUncrawlable ();
757 public void Recrawl (string path
)
759 // Try to find a directory model for the path specified
760 // so that we can re-crawl it.
762 dir
= GetDirectoryModelByPath (path
);
764 bool path_is_registered
= true;
767 dir
= GetDirectoryModelByPath (FileSystem
.GetDirectoryNameRootOk (path
));
768 path_is_registered
= false;
771 Logger
.Log
.Debug ("Unable to get directory-model for path: {0}", path
);
776 Logger
.Log
.Debug ("Re-crawling {0}", dir
.FullName
);
778 if (tree_crawl_task
.Add (dir
))
779 ThisScheduler
.Add (tree_crawl_task
);
781 if (path_is_registered
)
782 Recrawl_Recursive (dir
, DirectoryState
.PossiblyClean
);
784 ActivateFileCrawling ();
785 ActivateDirectoryCrawling ();
788 public void RecrawlEverything ()
790 Logger
.Log
.Debug ("Re-crawling all directories");
792 foreach (DirectoryModel root
in roots
)
793 Recrawl_Recursive (root
, DirectoryState
.PossiblyClean
);
795 ActivateFileCrawling ();
796 ActivateDirectoryCrawling ();
799 private void Recrawl_Recursive (DirectoryModel dir
, DirectoryState state
)
802 tree_crawl_task
.Add (dir
);
803 foreach (DirectoryModel sub_dir
in dir
.Children
)
804 Recrawl_Recursive (sub_dir
, state
);
807 private void ActivateFileCrawling ()
809 if (! file_crawl_task
.IsActive
)
810 ThisScheduler
.Add (file_crawl_task
);
813 private void ActivateDirectoryCrawling ()
815 if (! tree_crawl_task
.IsActive
)
816 ThisScheduler
.Add (tree_crawl_task
);
819 //////////////////////////////////////////////////////////////////////////
822 // File-related methods
825 private enum RequiredAction
{
832 private RequiredAction
DetermineRequiredAction (DirectoryModel dir
,
835 out string last_known_path
)
837 last_known_path
= null;
840 path
= Path
.Combine (dir
.FullName
, name
);
843 Logger
.Log
.Debug ("*** What should we do with {0}?", path
);
845 if (filter
.Ignore (dir
, name
, false)) {
846 // If there are attributes on the file, we must have indexed
847 // it previously. Since we are ignoring it now, we should strip
848 // any file attributes from it.
851 Logger
.Log
.Debug ("*** Forget it: File is ignored but has attributes");
852 return RequiredAction
.Forget
;
855 Logger
.Log
.Debug ("*** Do nothing: File is ignored");
856 return RequiredAction
.None
;
861 Logger
.Log
.Debug ("*** Index it: File has no attributes");
862 return RequiredAction
.Index
;
865 // FIXME: This does not take in to account that we might have a better matching filter to use now
866 // That, however, is kind of expensive to figure out since we'd have to do mime-sniffing and shit.
867 if (attr
.FilterName
!= null && attr
.FilterVersion
> 0) {
868 int current_filter_version
;
869 current_filter_version
= FilterFactory
.GetFilterVersion (attr
.FilterName
);
871 if (current_filter_version
> attr
.FilterVersion
) {
873 Logger
.Log
.Debug ("*** Index it: Newer filter version found for filter {0}", attr
.FilterName
);
874 return RequiredAction
.Index
;
878 Mono
.Unix
.Native
.Stat stat
;
880 Mono
.Unix
.Native
.Syscall
.stat (path
, out stat
);
881 } catch (Exception ex
) {
882 Logger
.Log
.Debug (ex
, "Caught exception stat-ing {0}", path
);
883 return RequiredAction
.None
;
886 DateTime last_write_time
, last_attr_time
;
887 last_write_time
= DateTimeUtil
.UnixToDateTimeUtc (stat
.st_mtime
);
888 last_attr_time
= DateTimeUtil
.UnixToDateTimeUtc (stat
.st_ctime
);
890 if (attr
.LastWriteTime
!= last_write_time
) {
892 Logger
.Log
.Debug ("*** Index it: MTime has changed ({0} vs {1})",
893 DateTimeUtil
.ToString (attr
.LastWriteTime
),
894 DateTimeUtil
.ToString (last_write_time
));
896 // If the file has been copied, it will have the
897 // original file's EAs. Thus we have to check to
898 // make sure that the unique id in the EAs actually
899 // belongs to this file. If not, replace it with a new one.
900 // (Thus touching & then immediately renaming a file can
901 // cause its unique id to change, which is less than
902 // optimal but probably can't be helped.)
903 last_known_path
= UniqueIdToFullPath (attr
.UniqueId
);
904 if (path
!= last_known_path
) {
906 Logger
.Log
.Debug ("*** Name has also changed, assigning new unique id");
907 attr
.UniqueId
= Guid
.NewGuid ();
910 return RequiredAction
.Index
;
913 // If the inode ctime is newer than the last time we last
914 // set file attributes, we might have been moved. We don't
915 // strictly compare times due to the fact that although
916 // setting xattrs changes the ctime, if we don't have write
917 // access our metadata will be stored in sqlite, and the
918 // ctime will be at some point in the past.
919 if (attr
.LastAttrTime
< last_attr_time
) {
921 Logger
.Log
.Debug ("*** CTime is newer, checking last known path ({0} vs {1})",
922 DateTimeUtil
.ToString (attr
.LastAttrTime
),
923 DateTimeUtil
.ToString (last_attr_time
));
925 last_known_path
= UniqueIdToFullPath (attr
.UniqueId
);
927 if (last_known_path
== null) {
929 Logger
.Log
.Debug ("*** Index it: CTime has changed, but can't determine last known path");
930 return RequiredAction
.Index
;
933 // If the name has changed but the mtime
934 // hasn't, the only logical conclusion is that
935 // the file has been renamed.
936 if (path
!= last_known_path
) {
938 Logger
.Log
.Debug ("*** Rename it: CTime and path has changed");
939 return RequiredAction
.Rename
;
943 // We don't have to do anything, which is always preferable.
945 Logger
.Log
.Debug ("*** Do nothing");
946 return RequiredAction
.None
;
949 // Return an indexable that will do the right thing with a file
950 // (or null, if the right thing is to do nothing)
951 public Indexable
GetCrawlingFileIndexable (DirectoryModel dir
, string name
)
954 path
= Path
.Combine (dir
.FullName
, name
);
957 attr
= FileAttributesStore
.Read (path
);
959 RequiredAction action
;
960 string last_known_path
;
961 action
= DetermineRequiredAction (dir
, name
, attr
, out last_known_path
);
963 if (action
== RequiredAction
.None
)
968 unique_id
= attr
.UniqueId
;
970 unique_id
= Guid
.NewGuid ();
972 Indexable indexable
= null;
976 case RequiredAction
.Index
:
977 indexable
= FileToIndexable (path
, unique_id
, dir
, true);
980 case RequiredAction
.Rename
:
981 indexable
= NewRenamingIndexable (name
, unique_id
, dir
,
985 case RequiredAction
.Forget
:
986 FileAttributesStore
.Drop (path
);
994 public void AddFile (DirectoryModel dir
, string name
)
997 path
= Path
.Combine (dir
.FullName
, name
);
999 if (! File
.Exists (path
))
1002 if (FileSystem
.IsSpecialFile (path
))
1005 if (filter
.Ignore (dir
, name
, false))
1008 // If this file already has extended attributes,
1009 // make sure that the name matches the file
1010 // that is in the index. If not, it could be
1011 // a copy of an already-indexed file and should
1012 // be assigned a new unique id.
1013 Guid unique_id
= Guid
.Empty
;
1014 FileAttributes attr
;
1015 attr
= FileAttributesStore
.Read (path
);
1017 LuceneNameResolver
.NameInfo info
;
1018 info
= name_resolver
.GetNameInfoById (attr
.UniqueId
);
1020 && info
.Name
== name
1021 && info
.ParentId
== dir
.UniqueId
)
1022 unique_id
= attr
.UniqueId
;
1025 if (unique_id
== Guid
.Empty
)
1026 unique_id
= Guid
.NewGuid ();
1028 RegisterId (name
, dir
, unique_id
);
1030 Indexable indexable
;
1031 indexable
= FileToIndexable (path
, unique_id
, dir
, false);
1033 if (indexable
!= null) {
1034 Scheduler
.Task task
;
1035 task
= NewAddTask (indexable
);
1036 task
.Priority
= Scheduler
.Priority
.Immediate
;
1037 ThisScheduler
.Add (task
);
1041 public void RemoveFile (DirectoryModel dir
, string name
)
1043 // FIXME: We might as well remove it, even if it was being ignore.
1047 unique_id
= NameAndParentToId (name
, dir
);
1048 if (unique_id
== Guid
.Empty
) {
1049 Logger
.Log
.Info ("Could not resolve unique id of '{0}' in '{1}' for removal, it is probably already gone",
1050 name
, dir
.FullName
);
1055 uri
= GuidFu
.ToUri (unique_id
);
1056 file_uri
= UriFu
.PathToFileUri (Path
.Combine (dir
.FullName
, name
));
1058 Indexable indexable
;
1059 indexable
= new Indexable (IndexableType
.Remove
, uri
);
1060 indexable
.LocalState
["RemovedUri"] = file_uri
;
1062 Scheduler
.Task task
;
1063 task
= NewAddTask (indexable
);
1064 task
.Priority
= Scheduler
.Priority
.Immediate
;
1065 ThisScheduler
.Add (task
);
1068 public void MoveFile (DirectoryModel old_dir
, string old_name
,
1069 DirectoryModel new_dir
, string new_name
)
1071 bool old_ignore
, new_ignore
;
1072 old_ignore
= filter
.Ignore (old_dir
, old_name
, false);
1073 new_ignore
= filter
.Ignore (new_dir
, new_name
, false);
1075 if (old_ignore
&& new_ignore
)
1078 // If our ignore-state is changing, synthesize the appropriate
1081 if (old_ignore
&& ! new_ignore
) {
1082 AddFile (new_dir
, new_name
);
1086 if (! old_ignore
&& new_ignore
) {
1087 RemoveFile (new_dir
, new_name
);
1091 // We need to find the file's unique id.
1092 // We can't look at the extended attributes w/o making
1093 // assumptions about whether they follow around the
1094 // file (EAs) or the path (sqlite)...
1096 unique_id
= NameAndParentToId (old_name
, old_dir
);
1097 if (unique_id
== Guid
.Empty
) {
1098 // If we can't find the unique ID, we have to
1099 // assume that the original file never made it
1100 // into the index --- thus we treat this as
1102 AddFile (new_dir
, new_name
);
1106 RegisterId (new_name
, new_dir
, unique_id
);
1109 old_path
= Path
.Combine (old_dir
.FullName
, old_name
);
1111 ForgetId (old_path
);
1113 // FIXME: I think we need to be more conservative when we seen
1114 // events in a directory that has not been fully scanned, just to
1115 // avoid races. i.e. what if we are in the middle of crawling that
1116 // directory and haven't reached this file yet? Then the rename
1118 Indexable indexable
;
1119 indexable
= NewRenamingIndexable (new_name
,
1124 Scheduler
.Task task
;
1125 task
= NewAddTask (indexable
);
1126 task
.Priority
= Scheduler
.Priority
.Immediate
;
1127 // Danger Will Robinson!
1128 // We need to use BlockUntilNoCollision to get the correct notifications
1129 // in a mv a b; mv b c; mv c a situation.
1130 // FIXME: And now AddType no longer exists
1131 ThisScheduler
.Add (task
);
1134 //////////////////////////////////////////////////////////////////////////
1136 // Configuration stuff
1138 public IList Roots
{
1140 return roots_by_path
;
1144 private void LoadConfiguration ()
1146 if (Conf
.Indexing
.IndexHomeDir
)
1147 AddRoot (PathFinder
.HomeDir
);
1149 foreach (string root
in Conf
.Indexing
.Roots
)
1152 Conf
.Subscribe (typeof (Conf
.IndexingConfig
), OnConfigurationChanged
);
1155 private void OnConfigurationChanged (Conf
.Section section
)
1157 ArrayList roots_wanted
= new ArrayList (Conf
.Indexing
.Roots
);
1159 if (Conf
.Indexing
.IndexHomeDir
)
1160 roots_wanted
.Add (PathFinder
.HomeDir
);
1162 IList roots_to_add
, roots_to_remove
;
1163 ArrayFu
.IntersectListChanges (roots_wanted
, Roots
, out roots_to_add
, out roots_to_remove
);
1165 foreach (string root
in roots_to_remove
)
1168 foreach (string root
in roots_to_add
)
1172 //////////////////////////////////////////////////////////////////////////
1175 // Our magic LuceneQueryable hooks
1178 override protected bool IsIndexing
{
1179 // FIXME: There is a small race window here, between the starting
1180 // of the backend and when either of these tasks first starts
1181 // running. In reality it doesn't come up much, so it's not
1183 get { return file_crawl_task.IsActive || tree_crawl_task.IsActive; }
1186 override protected void PostAddHook (Indexable indexable
, IndexerAddedReceipt receipt
)
1188 // We don't have anything to do if we are dealing with a child indexable
1189 if (indexable
.ParentUri
!= null)
1192 // If we just changed properties, remap to our *old* external Uri
1193 // to make notification work out property.
1194 if (indexable
.Type
== IndexableType
.PropertyChange
) {
1196 string last_known_path
;
1197 last_known_path
= (string) indexable
.LocalState
["LastKnownPath"];
1198 receipt
.Uri
= UriFu
.PathToFileUri (last_known_path
);
1199 Logger
.Log
.Debug ("Last known path is {0}", last_known_path
);
1201 // This rename is now in the index, so we no longer need to keep
1202 // track of the uid in memory.
1203 ForgetId (last_known_path
);
1209 path
= (string) indexable
.LocalState
["Path"];
1211 Log
.Debug ("PostAddHook for {0} ({1}) and receipt uri={2}", indexable
.Uri
, path
, receipt
.Uri
);
1213 // Remap the Uri so that change notification will work properly
1214 receipt
.Uri
= UriFu
.PathToFileUri (path
);
1217 override protected void PostRemoveHook (Indexable indexable
, IndexerRemovedReceipt receipt
)
1219 // Find the cached external Uri and remap the Uri in the receipt.
1220 // We have to do this to make change notification work.
1222 external_uri
= indexable
.LocalState
["RemovedUri"] as Uri
;
1223 if (external_uri
== null)
1224 throw new Exception ("No cached external Uri for " + receipt
.Uri
);
1225 receipt
.Uri
= external_uri
;
1226 ForgetId (external_uri
.LocalPath
);
1229 override protected void PostChildrenIndexedHook (Indexable indexable
,
1230 IndexerAddedReceipt receipt
,
1233 // There is no business here for children or if only the property changed
1234 if (indexable
.Type
== IndexableType
.PropertyChange
||
1235 indexable
.ParentUri
!= null)
1239 path
= (string) indexable
.LocalState
["Path"];
1241 Log
.Debug ("PostChildrenIndexedHook for {0} ({1}) and receipt uri={2}", indexable
.Uri
, path
, receipt
.Uri
);
1245 DirectoryModel parent
;
1246 parent
= indexable
.LocalState
["Parent"] as DirectoryModel
;
1248 // The parent directory might have run away since we were indexed
1249 if (parent
!= null && ! parent
.IsAttached
)
1253 unique_id
= GuidFu
.FromUri (receipt
.Uri
);
1255 FileAttributes attr
;
1256 attr
= FileAttributesStore
.ReadOrCreate (path
, unique_id
);
1259 // FIXME: Should timestamp be indexable.timestamp or parameter Mtime
1260 attr
.LastWriteTime
= indexable
.Timestamp
;
1262 attr
.FilterName
= receipt
.FilterName
;
1263 attr
.FilterVersion
= receipt
.FilterVersion
;
1265 if (indexable
.LocalState
["IsWalkable"] != null) {
1267 name
= (string) indexable
.LocalState
["Name"];
1269 if (! RegisterDirectory (name
, parent
, attr
))
1273 FileAttributesStore
.Write (attr
);
1276 private bool RemapUri (Hit hit
)
1278 // Store the hit's internal uri in a property
1280 prop
= Property
.NewUnsearched ("beagle:InternalUri",
1281 UriFu
.UriToEscapedString (hit
.Uri
));
1282 hit
.AddProperty (prop
);
1284 // Now assemble the path by looking at the parent and name
1285 string name
= null, path
, is_child
;
1286 is_child
= hit
[Property
.IsChildPropKey
];
1288 if (is_child
== "true")
1289 name
= hit
["parent:" + Property
.ExactFilenamePropKey
];
1291 name
= hit
[Property
.ExactFilenamePropKey
];
1294 // If we don't have the filename property, we have to do a lookup
1295 // based on the guid. This happens with synthetic hits produced by
1298 hit_id
= GuidFu
.FromUri (hit
.Uri
);
1299 path
= UniqueIdToFullPath (hit_id
);
1301 string parent_id_uri
= null;
1302 parent_id_uri
= hit
[Property
.ParentDirUriPropKey
];
1303 if (parent_id_uri
== null)
1304 parent_id_uri
= hit
["parent:" + Property
.ParentDirUriPropKey
];
1305 if (parent_id_uri
== null)
1309 parent_id
= GuidFu
.FromUriString (parent_id_uri
);
1311 path
= ToFullPath (name
, parent_id
);
1313 Logger
.Log
.Debug ("Couldn't find path of file with name '{0}' and parent '{1}'",
1314 name
, GuidFu
.ToShortString (parent_id
));
1318 Log
.Debug ("Resolved {0} to {1}", hit
.Uri
, path
);
1321 hit
.Uri
= UriFu
.PathToFileUri (path
);
1328 // Hit filter: this handles our mapping from internal->external uris,
1329 // and checks to see if the file is still there.
1330 override protected bool HitFilter (Hit hit
)
1332 Uri old_uri
= hit
.Uri
;
1334 Log
.Debug ("HitFilter ({0})", old_uri
);
1336 if (! RemapUri (hit
))
1340 path
= hit
.Uri
.LocalPath
;
1343 bool exists
= false;
1345 is_directory
= hit
.MimeType
== "inode/directory";
1347 if (hit
.MimeType
== null && hit
.Uri
.IsFile
&& Directory
.Exists (path
)) {
1348 is_directory
= true;
1354 exists
= Directory
.Exists (path
);
1356 exists
= File
.Exists (path
);
1359 // If the file doesn't exist, we do not schedule a removal and
1360 // return false. This is to avoid "losing" files if they are
1361 // in a directory that has been renamed but which we haven't
1362 // scanned yet... if we dropped them from the index, they would
1363 // never get re-indexed (or at least not until the next time they
1364 // were touched) since they would still be stamped with EAs
1365 // indicating they were up-to-date. And that would be bad.
1366 // FIXME: It would be safe if we were in a known state, right?
1367 // i.e. every DirectoryModel is clean.
1371 // Fetch the parent directory model from our cache to do clever
1372 // filtering to determine if we're ignoring it or not.
1373 DirectoryModel parent
;
1374 parent
= GetDirectoryModelByPath (Path
.GetDirectoryName (path
));
1376 // If child indexable, attach the relative URI at the end
1377 // Relative URI starts with '#'
1378 string is_child
= hit
[Property
.IsChildPropKey
];
1379 string fragment
= null;
1380 if (is_child
== "true") {
1381 hit
.Uri
= UriFu
.PathToFileUri (path
, old_uri
.Fragment
);
1382 hit
.ParentUri
= UriFu
.PathToFileUri (path
);
1385 // Check the ignore status of the hit
1386 if (filter
.Ignore (parent
, Path
.GetFileName (fragment
== null ? path
: fragment
), is_directory
))
1392 override public string GetSnippet (string [] query_terms
, Hit hit
)
1394 // Uri remapping from a hit is easy: the internal uri
1395 // is stored in a property.
1396 Uri uri
= UriFu
.EscapedStringToUri (hit
["beagle:InternalUri"]);
1398 string path
= TextCache
.UserCache
.LookupPathRaw (uri
);
1403 // If this is self-cached, use the remapped Uri
1404 if (path
== TextCache
.SELF_CACHE_TAG
)
1405 return SnippetFu
.GetSnippetFromFile (query_terms
, hit
.Uri
.LocalPath
);
1407 return SnippetFu
.GetSnippetFromTextCache (query_terms
, path
);
1410 override public void Start ()
1414 event_backend
.Start (this);
1416 LoadConfiguration ();
1418 Logger
.Log
.Debug ("Done starting FileSystemQueryable");
1421 //////////////////////////////////////////////////////////////////////////
1423 // These are the methods that the IFileEventBackend implementations should
1424 // call in response to events.
1426 public void ReportEventInDirectory (string directory_name
)
1429 dir
= GetDirectoryModelByPath (directory_name
);
1431 // If something goes wrong, just fail silently.
1435 // We only use this information to prioritize the order in which
1436 // we crawl directories --- so if this directory doesn't
1437 // actually need to be crawled, we can safely ignore it.
1438 if (! dir
.NeedsCrawl
)
1441 dir
.LastActivityTime
= DateTime
.Now
;
1443 Logger
.Log
.Debug ("Saw event in '{0}'", directory_name
);
1446 public void HandleAddEvent (string directory_name
, string file_name
, bool is_directory
)
1448 Logger
.Log
.Debug ("*** Add '{0}' '{1}' {2}", directory_name
, file_name
,
1449 is_directory
? "(dir)" : "(file)");
1452 dir
= GetDirectoryModelByPath (directory_name
);
1454 Logger
.Log
.Warn ("HandleAddEvent failed: Couldn't find DirectoryModel for '{0}'", directory_name
);
1459 AddDirectory (dir
, file_name
);
1461 AddFile (dir
, file_name
);
1464 public void HandleRemoveEvent (string directory_name
, string file_name
, bool is_directory
)
1466 Logger
.Log
.Debug ("*** Remove '{0}' '{1}' {2}", directory_name
, file_name
,
1467 is_directory
? "(dir)" : "(file)");
1471 path
= Path
.Combine (directory_name
, file_name
);
1474 dir
= GetDirectoryModelByPath (path
);
1476 Logger
.Log
.Warn ("HandleRemoveEvent failed: Couldn't find DirectoryModel for '{0}'", path
);
1480 dir
.WatchHandle
= null;
1481 RemoveDirectory (dir
);
1484 dir
= GetDirectoryModelByPath (directory_name
);
1486 Logger
.Log
.Warn ("HandleRemoveEvent failed: Couldn't find DirectoryModel for '{0}'", directory_name
);
1490 RemoveFile (dir
, file_name
);
1494 public void HandleMoveEvent (string old_directory_name
, string old_file_name
,
1495 string new_directory_name
, string new_file_name
,
1498 Logger
.Log
.Debug ("*** Move '{0}' '{1}' -> '{2}' '{3}' {4}",
1499 old_directory_name
, old_file_name
,
1500 new_directory_name
, new_file_name
,
1501 is_directory
? "(dir)" : "(file)");
1504 DirectoryModel dir
, new_parent
;
1505 dir
= GetDirectoryModelByPath (Path
.Combine (old_directory_name
, old_file_name
));
1506 new_parent
= GetDirectoryModelByPath (new_directory_name
);
1507 MoveDirectory (dir
, new_parent
, new_file_name
);
1510 DirectoryModel old_dir
, new_dir
;
1511 old_dir
= GetDirectoryModelByPath (old_directory_name
);
1512 new_dir
= GetDirectoryModelByPath (new_directory_name
);
1513 MoveFile (old_dir
, old_file_name
, new_dir
, new_file_name
);
1517 public void HandleOverflowEvent ()
1519 Logger
.Log
.Debug ("Queue overflows suck");