2 // FileSystemQueryable.cs
4 // Copyright (C) 2004 Novell, Inc.
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
28 using System
.Collections
;
30 using System
.Reflection
;
32 using System
.Threading
;
37 namespace Beagle
.Daemon
.FileSystemQueryable
{
39 [QueryableFlavor (Name
="Files", Domain
=QueryDomain
.Local
, RequireInotify
=false)]
40 [PropertyKeywordMapping (Keyword
="extension", PropertyName
="beagle:FilenameExtension", IsKeyword
=true, Description
="File extension, e.g. extension:jpeg. Use extension: to search in files with no extension.")]
41 [PropertyKeywordMapping (Keyword
="ext", PropertyName
="beagle:FilenameExtension", IsKeyword
=true, Description
="File extension, e.g. ext:jpeg. Use ext: to search in files with no extension.")]
42 public class FileSystemQueryable
: LuceneQueryable
{
44 static public bool Debug
= false;
46 private const string SplitFilenamePropKey
= "beagle:SplitFilename";
47 public const string ExactFilenamePropKey
= "beagle:ExactFilename";
48 public const string TextFilenamePropKey
= "beagle:Filename";
49 public const string NoPunctFilenamePropKey
= "beagle:NoPunctFilename";
50 public const string FilenameExtensionPropKey
= "beagle:FilenameExtension";
51 public const string ParentDirUriPropKey
= LuceneQueryingDriver
.PrivateNamespace
+ "ParentDirUri";
52 public const string IsDirectoryPropKey
= LuceneQueryingDriver
.PrivateNamespace
+ "IsDirectory";
55 // 1: Initially set to force a reindex due to NameIndex changes.
56 // 2: Overhauled everything to use new lucene infrastructure.
57 // 3: Switched to UTC for all times, changed the properties a bit.
58 // 4: Changed the key of TextFilenamePropKey to beagle:Filename - it might be useful in clients.
59 // Make SplitFilenamePropKey unstored
60 const int MINOR_VERSION
= 4;
62 private object big_lock
= new object ();
64 private IFileEventBackend event_backend
;
66 // This is the task that walks the tree structure
67 private TreeCrawlTask tree_crawl_task
;
69 // This is the task that finds the next place that
70 // needs to be crawled in the tree and spawns off
71 // the appropriate IndexableGenerator.
72 private FileCrawlTask file_crawl_task
;
74 private ArrayList roots
= new ArrayList ();
75 private ArrayList roots_by_path
= new ArrayList ();
77 private FileNameFilter filter
;
79 // This is just a copy of the LuceneQueryable's QueryingDriver
80 // cast into the right type for doing internal->external Uri
82 private LuceneNameResolver name_resolver
;
84 //////////////////////////////////////////////////////////////////////////
86 private Hashtable cached_uid_by_path
= new Hashtable ();
88 //////////////////////////////////////////////////////////////////////////
90 public FileSystemQueryable () : base ("FileSystemIndex", MINOR_VERSION
)
92 // Set up our event backend
93 if (Inotify
.Enabled
) {
94 Logger
.Log
.Debug ("Starting Inotify Backend");
95 event_backend
= new InotifyBackend ();
97 Logger
.Log
.Debug ("Creating null file event backend");
98 event_backend
= new NullFileEventBackend ();
101 tree_crawl_task
= new TreeCrawlTask (new TreeCrawlTask
.Handler (AddDirectory
));
102 tree_crawl_task
.Source
= this;
104 file_crawl_task
= new FileCrawlTask (this);
105 file_crawl_task
.Source
= this;
107 name_resolver
= (LuceneNameResolver
) Driver
;
108 PreloadDirectoryNameInfo ();
110 // Setup our file-name filter
111 filter
= new FileNameFilter (this);
113 // Do the right thing when paths expire
114 DirectoryModel
.ExpireEvent
+=
115 new DirectoryModel
.ExpireHandler (ExpireDirectoryPath
);
119 override protected IFileAttributesStore
BuildFileAttributesStore ()
121 return new FileAttributesStore_Mixed (IndexDirectory
, IndexFingerprint
);
124 override protected LuceneQueryingDriver
BuildLuceneQueryingDriver (string index_name
,
128 return new LuceneNameResolver (index_name
, minor_version
, read_only_mode
);
131 public FileNameFilter Filter
{
132 get { return filter; }
135 //////////////////////////////////////////////////////////////////////////
138 // This is where we build our Indexables
141 public static void AddStandardPropertiesToIndexable (Indexable indexable
,
147 sb
= new StringBuilder ();
149 string no_ext
, ext
, no_punct
;
150 no_ext
= Path
.GetFileNameWithoutExtension (name
);
151 ext
= Path
.GetExtension (name
).ToLower ();
154 for (int i
= 0; i
< sb
.Length
; ++i
)
155 if (! Char
.IsLetterOrDigit (sb
[i
]))
157 no_punct
= sb
.ToString ();
162 prop
= Property
.NewKeyword (ExactFilenamePropKey
, name
);
163 prop
.IsMutable
= mutable
;
164 indexable
.AddProperty (prop
);
166 prop
= Property
.New (TextFilenamePropKey
, no_ext
);
167 prop
.IsMutable
= mutable
;
168 indexable
.AddProperty (prop
);
170 prop
= Property
.New (NoPunctFilenamePropKey
, no_punct
);
171 prop
.IsMutable
= mutable
;
172 indexable
.AddProperty (prop
);
174 prop
= Property
.NewUnsearched (FilenameExtensionPropKey
, ext
);
175 prop
.IsMutable
= mutable
;
176 indexable
.AddProperty (prop
);
179 str
= StringFu
.FuzzyDivide (no_ext
);
180 prop
= Property
.NewUnstored (SplitFilenamePropKey
, str
);
181 prop
.IsMutable
= mutable
;
182 indexable
.AddProperty (prop
);
184 if (parent_id
== Guid
.Empty
)
187 str
= GuidFu
.ToUriString (parent_id
);
188 // We use the uri here to recycle terms in the index,
189 // since each directory's uri will already be indexed.
190 prop
= Property
.NewUnsearched (ParentDirUriPropKey
, str
);
191 prop
.IsMutable
= mutable
;
192 indexable
.AddProperty (prop
);
195 public static void AddStandardPropertiesToIndexable (Indexable indexable
,
197 DirectoryModel parent
,
200 AddStandardPropertiesToIndexable (indexable
,
202 parent
== null ? Guid
.Empty
: parent
.UniqueId
,
205 indexable
.LocalState
["Parent"] = parent
;
208 public static Indexable
DirectoryToIndexable (string path
,
210 DirectoryModel parent
)
214 indexable
= new Indexable (IndexableType
.Add
, GuidFu
.ToUri (id
));
215 indexable
.MimeType
= "inode/directory";
216 indexable
.NoContent
= true;
217 indexable
.Timestamp
= Directory
.GetLastWriteTimeUtc (path
);
218 } catch (IOException
) {
219 // Looks like the directory was deleted.
227 name
= Path
.GetFileName (path
);
228 AddStandardPropertiesToIndexable (indexable
, name
, parent
, true);
231 prop
= Property
.NewBool (IsDirectoryPropKey
, true);
232 prop
.IsMutable
= true; // we want this in the secondary index, for efficiency
233 indexable
.AddProperty (prop
);
235 indexable
.LocalState
["Path"] = path
;
240 public static Indexable
FileToIndexable (string path
,
242 DirectoryModel parent
,
248 indexable
= new Indexable (IndexableType
.Add
, GuidFu
.ToUri (id
));
249 indexable
.Timestamp
= File
.GetLastWriteTimeUtc (path
);
250 indexable
.ContentUri
= UriFu
.PathToFileUri (path
);
251 indexable
.Crawled
= crawl_mode
;
252 indexable
.Filtering
= Beagle
.IndexableFiltering
.Always
;
253 } catch (IOException
) {
254 // Looks like the file was deleted.
258 AddStandardPropertiesToIndexable (indexable
, Path
.GetFileName (path
), parent
, true);
260 indexable
.LocalState
["Path"] = path
;
265 private static Indexable
NewRenamingIndexable (string name
,
267 DirectoryModel parent
,
268 string last_known_path
)
271 indexable
= new Indexable (IndexableType
.PropertyChange
, GuidFu
.ToUri (id
));
273 AddStandardPropertiesToIndexable (indexable
, name
, parent
, true);
275 indexable
.LocalState
["Id"] = id
;
276 indexable
.LocalState
["LastKnownPath"] = last_known_path
;
281 //////////////////////////////////////////////////////////////////////////
284 // Mapping from directory ids to paths
287 private Hashtable dir_models_by_id
= new Hashtable ();
288 private Hashtable name_info_by_id
= new Hashtable ();
290 // We fall back to using the name information in the index
291 // until we've fully constructed our set of DirectoryModels.
292 private void PreloadDirectoryNameInfo ()
295 all
= name_resolver
.GetAllDirectoryNameInfo ();
296 foreach (LuceneNameResolver
.NameInfo info
in all
)
297 name_info_by_id
[info
.Id
] = info
;
300 // This only works for directories.
301 private string UniqueIdToDirectoryName (Guid id
)
304 dir
= dir_models_by_id
[id
] as DirectoryModel
;
308 LuceneNameResolver
.NameInfo info
;
309 info
= name_info_by_id
[id
] as LuceneNameResolver
.NameInfo
;
311 if (info
.ParentId
== Guid
.Empty
) // i.e. this is a root
315 parent_name
= UniqueIdToDirectoryName (info
.ParentId
);
316 if (parent_name
== null)
318 return Path
.Combine (parent_name
, info
.Name
);
325 private void CacheDirectoryNameChange (Guid id
, Guid new_parent_id
, string new_name
)
327 LuceneNameResolver
.NameInfo info
;
328 info
= name_info_by_id
[id
] as LuceneNameResolver
.NameInfo
;
330 info
.ParentId
= new_parent_id
;
331 info
.Name
= new_name
;
335 private string ToFullPath (string name
, Guid parent_id
)
337 // This is the correct behavior for roots.
338 if (parent_id
== Guid
.Empty
)
342 parent_name
= UniqueIdToDirectoryName (parent_id
);
343 if (parent_name
== null)
346 return Path
.Combine (parent_name
, name
);
349 // This works for both files and directories.
350 private string UniqueIdToFullPath (Guid id
)
352 // First, check if it is a directory.
354 path
= UniqueIdToDirectoryName (id
);
358 // If not, try to pull name information out of the index.
359 LuceneNameResolver
.NameInfo info
;
360 info
= name_resolver
.GetNameInfoById (id
);
363 return ToFullPath (info
.Name
, info
.ParentId
);
366 private void RegisterId (string name
, DirectoryModel dir
, Guid id
)
368 cached_uid_by_path
[Path
.Combine (dir
.FullName
, name
)] = id
;
371 private void ForgetId (string path
)
373 cached_uid_by_path
.Remove (path
);
376 // This works for files. (It probably works for directories
377 // too, but you should use one of the more efficient means
378 // above if you know it is a directory.)
379 private Guid
NameAndParentToId (string name
, DirectoryModel dir
)
382 path
= Path
.Combine (dir
.FullName
, name
);
385 if (cached_uid_by_path
.Contains (path
))
386 unique_id
= (Guid
) cached_uid_by_path
[path
];
388 unique_id
= name_resolver
.GetIdByNameAndParentId (name
, dir
.UniqueId
);
393 //////////////////////////////////////////////////////////////////////////
396 // Directory-related methods
399 private Hashtable dir_models_by_path
= new Hashtable ();
401 private DirectoryModel
GetDirectoryModelByPath (string path
)
405 lock (dir_models_by_path
) {
406 dir
= dir_models_by_path
[path
] as DirectoryModel
;
411 // Walk each root until we find the correct path
412 foreach (DirectoryModel root
in roots
) {
413 dir
= root
.WalkTree (path
);
415 lock (dir_models_by_path
)
416 dir_models_by_path
[path
] = dir
;
424 private void ExpireDirectoryPath (string expired_path
, Guid unique_id
)
427 Logger
.Log
.Debug ("Expired '{0}'", expired_path
);
429 lock (dir_models_by_path
)
430 dir_models_by_path
.Remove (expired_path
);
433 public void AddDirectory (DirectoryModel parent
, string name
)
435 // Ignore the stuff we want to ignore.
436 if (filter
.Ignore (parent
, name
, true))
439 if (parent
!= null && parent
.HasChildWithName (name
))
443 path
= (parent
== null) ? name
: Path
.Combine (parent
.FullName
, name
);
446 Logger
.Log
.Debug ("Adding directory '{0}'", path
, name
);
448 if (! Directory
.Exists (path
)) {
449 Logger
.Log
.Error ("Can't add directory: '{0}' does not exist", path
);
454 attr
= FileAttributesStore
.Read (path
);
456 // Note that we don't look at the mtime of a directory when
457 // deciding whether or not to index it.
458 bool needs_indexing
= false;
460 // If it has no attributes, it definitely needs
462 needs_indexing
= true;
464 // Make sure that it still has the same name as before.
465 // If not, we need to re-index it.
466 // We can do this since we preloaded all of the name
467 // info in the directory via PreloadDirectoryNameInfo.
468 string last_known_name
;
469 last_known_name
= UniqueIdToDirectoryName (attr
.UniqueId
);
470 if (last_known_name
!= path
) {
471 Logger
.Log
.Debug ("'{0}' now seems to be called '{1}'", last_known_name
, path
);
472 needs_indexing
= true;
476 // If we can't descend into this directory, we want to
477 // index it but not build a DirectoryModel for it.
478 // FIXME: We should do the right thing when a
479 // directory's permissions change.
481 is_walkable
= DirectoryWalker
.IsWalkable (path
);
483 Logger
.Log
.Debug ("Can't walk '{0}'", path
);
486 ScheduleDirectory (name
, parent
, attr
, is_walkable
);
487 else if (is_walkable
)
488 RegisterDirectory (name
, parent
, attr
);
491 public void AddRoot (string path
)
493 path
= StringFu
.SanitizePath (path
);
494 Logger
.Log
.Debug ("Adding root: {0}", path
);
496 if (roots_by_path
.Contains (path
)) {
497 Logger
.Log
.Error ("Trying to add an existing root: {0}", path
);
501 // We need to have the path key in the roots hashtable
502 // for the filtering to work as we'd like before the root
503 // is actually added.
504 roots_by_path
.Add (path
);
506 AddDirectory (null, path
);
509 public void RemoveRoot (string path
)
511 Logger
.Log
.Debug ("Removing root: {0}", path
);
513 if (! roots_by_path
.Contains (path
)) {
514 Logger
.Log
.Error ("Trying to remove a non-existing root: {0}", path
);
518 // Find our directory model for the root
520 dir
= GetDirectoryModelByPath (path
);
523 Logger
.Log
.Error ("Could not find directory-model for root: {0}", path
);
527 // FIXME: Make sure we're emptying the crawler task of any sub-directories
528 // to the root we're removing. It's not a big deal since we do an Ignore-check
529 // in there, but it would be nice.
531 roots_by_path
.Remove (path
);
534 // Clean out the root from our directory cache.
535 RemoveDirectory (dir
);
538 private void ScheduleDirectory (string name
,
539 DirectoryModel parent
,
544 path
= (parent
== null) ? name
: Path
.Combine (parent
.FullName
, name
);
547 id
= (attr
== null) ? Guid
.NewGuid () : attr
.UniqueId
;
550 last_crawl
= (attr
== null) ? DateTime
.MinValue
: attr
.LastWriteTime
;
553 indexable
= DirectoryToIndexable (path
, id
, parent
);
555 if (indexable
!= null) {
556 indexable
.LocalState
["Name"] = name
;
557 indexable
.LocalState
["LastCrawl"] = last_crawl
;
558 indexable
.LocalState
["IsWalkable"] = is_walkable
;
561 task
= NewAddTask (indexable
);
562 task
.Priority
= Scheduler
.Priority
.Delayed
;
563 ThisScheduler
.Add (task
);
567 private bool RegisterDirectory (string name
, DirectoryModel parent
, FileAttributes attr
)
570 path
= (parent
== null) ? name
: Path
.Combine (parent
.FullName
, name
);
573 Logger
.Log
.Debug ("Registered directory '{0}' ({1})", path
, attr
.UniqueId
);
578 mtime
= Directory
.GetLastWriteTimeUtc (path
);
579 } catch (IOException
) {
580 Log
.Debug ("Directory '{0}' ({1}) appears to have gone away", path
, attr
.UniqueId
);
586 dir
= DirectoryModel
.NewRoot (big_lock
, path
, attr
);
588 dir
= parent
.AddChild (name
, attr
);
590 if (mtime
> attr
.LastWriteTime
) {
591 dir
.State
= DirectoryState
.Dirty
;
593 Logger
.Log
.Debug ("'{0}' is dirty", path
);
598 Logger
.Log
.Debug ("Created model '{0}'", dir
.FullName
);
600 Logger
.Log
.Debug ("Created model '{0}' with parent '{1}'", dir
.FullName
, dir
.Parent
.FullName
);
603 // Add any roots we create to the list of roots
607 // Add the directory to our by-id hash, and remove any NameInfo
608 // we might have cached about it.
609 dir_models_by_id
[dir
.UniqueId
] = dir
;
610 name_info_by_id
.Remove (dir
.UniqueId
);
612 // Start watching the directory.
613 dir
.WatchHandle
= event_backend
.CreateWatch (path
);
615 // Schedule this directory for crawling.
616 if (tree_crawl_task
.Add (dir
))
617 ThisScheduler
.Add (tree_crawl_task
);
619 // Make sure that our file crawling task is active,
620 // since presumably we now have something new to crawl.
621 ActivateFileCrawling ();
626 private void ForgetDirectoryRecursively (DirectoryModel dir
)
628 foreach (DirectoryModel child
in dir
.Children
)
629 ForgetDirectoryRecursively (child
);
631 if (dir
.WatchHandle
!= null)
632 event_backend
.ForgetWatch (dir
.WatchHandle
);
633 dir_models_by_id
.Remove (dir
.UniqueId
);
634 // We rely on the expire event to remove it from dir_models_by_path
637 private void RemoveDirectory (DirectoryModel dir
)
640 uri
= GuidFu
.ToUri (dir
.UniqueId
);
643 indexable
= new Indexable (IndexableType
.Remove
, uri
);
645 // Remember a copy of our external Uri, so that we can
646 // easily remap it in the PostRemoveHook.
647 indexable
.LocalState
["RemovedUri"] = UriFu
.PathToFileUri (dir
.FullName
);
649 // Forget watches and internal references
650 ForgetDirectoryRecursively (dir
);
652 // Calling Remove will expire the path names,
653 // so name caches will be cleaned up accordingly.
657 task
= NewAddTask (indexable
); // We *add* the indexable to *remove* the index item
658 task
.Priority
= Scheduler
.Priority
.Immediate
;
659 ThisScheduler
.Add (task
);
662 public void RemoveDirectory (string path
)
664 DirectoryModel dir
= GetDirectoryModelByPath (path
);
666 RemoveDirectory (dir
);
669 private void MoveDirectory (DirectoryModel dir
,
670 DirectoryModel new_parent
, // or null if we are just renaming
674 Logger
.Log
.Warn ("Couldn't find DirectoryModel for directory moving to '{0}' in '{1}', so it was hopefully never there.",
675 new_name
, new_parent
.FullName
);
676 AddDirectory (new_parent
, new_name
);
681 throw new Exception ("Can't move root " + dir
.FullName
);
683 // We'll need this later in order to generate the
684 // right change notification.
686 old_path
= dir
.FullName
;
688 if (new_parent
!= null && new_parent
!= dir
.Parent
)
689 dir
.MoveTo (new_parent
, new_name
);
693 // Remember this by path
694 lock (dir_models_by_path
)
695 dir_models_by_path
[dir
.FullName
] = dir
;
697 CacheDirectoryNameChange (dir
.UniqueId
, dir
.Parent
.UniqueId
, new_name
);
700 indexable
= NewRenamingIndexable (new_name
,
702 dir
.Parent
, // == new_parent
704 indexable
.LocalState
["OurDirectoryModel"] = dir
;
707 task
= NewAddTask (indexable
);
708 task
.Priority
= Scheduler
.Priority
.Immediate
;
709 // Danger Will Robinson!
710 // We need to use BlockUntilNoCollision to get the correct notifications
711 // in a mv a b; mv b c; mv c a situation.
712 // FIXME: And now that type no longer exists!
713 ThisScheduler
.Add (task
);
716 //////////////////////////////////////////////////////////////////////////
719 // This code controls the directory crawl order
722 private DirectoryModel
StupidWalk (DirectoryModel prev_best
, DirectoryModel contender
)
724 if (contender
.NeedsCrawl
) {
725 if (prev_best
== null || prev_best
.CompareTo (contender
) < 0)
726 prev_best
= contender
;
729 foreach (DirectoryModel child
in contender
.Children
)
730 prev_best
= StupidWalk (prev_best
, child
);
735 public DirectoryModel
GetNextDirectoryToCrawl ()
737 DirectoryModel next_dir
= null;
739 foreach (DirectoryModel root
in roots
)
740 next_dir
= StupidWalk (next_dir
, root
);
745 public void DoneCrawlingOneDirectory (DirectoryModel dir
)
747 if (! dir
.IsAttached
)
751 attr
= FileAttributesStore
.Read (dir
.FullName
);
753 // Don't mark ourselves; let the crawler redo us
757 // We don't have to be super-careful about this since
758 // we only use the FileAttributes mtime on a directory
759 // to determine its initial state, not whether or not
760 // its index record is up-to-date.
761 attr
.LastWriteTime
= DateTime
.UtcNow
;
763 // ...but we do use this to decide which order directories get
765 dir
.LastCrawlTime
= DateTime
.UtcNow
;
767 FileAttributesStore
.Write (attr
);
771 public void MarkDirectoryAsUncrawlable (DirectoryModel dir
)
773 if (! dir
.IsAttached
)
776 // If we managed to get set up a watch on this directory,
778 if (dir
.WatchHandle
!= null) {
779 event_backend
.ForgetWatch (dir
.WatchHandle
);
780 dir
.WatchHandle
= null;
783 dir
.MarkAsUncrawlable ();
786 public void Recrawl (string path
)
788 // Try to find a directory model for the path specified
789 // so that we can re-crawl it.
791 dir
= GetDirectoryModelByPath (path
);
793 bool path_is_registered
= true;
796 dir
= GetDirectoryModelByPath (FileSystem
.GetDirectoryNameRootOk (path
));
797 path_is_registered
= false;
800 Logger
.Log
.Debug ("Unable to get directory-model for path: {0}", path
);
805 Logger
.Log
.Debug ("Re-crawling {0}", dir
.FullName
);
807 if (tree_crawl_task
.Add (dir
))
808 ThisScheduler
.Add (tree_crawl_task
);
810 if (path_is_registered
)
811 Recrawl_Recursive (dir
, DirectoryState
.PossiblyClean
);
813 ActivateFileCrawling ();
814 ActivateDirectoryCrawling ();
817 public void RecrawlEverything ()
819 Logger
.Log
.Debug ("Re-crawling all directories");
821 foreach (DirectoryModel root
in roots
)
822 Recrawl_Recursive (root
, DirectoryState
.PossiblyClean
);
824 ActivateFileCrawling ();
825 ActivateDirectoryCrawling ();
828 private void Recrawl_Recursive (DirectoryModel dir
, DirectoryState state
)
831 tree_crawl_task
.Add (dir
);
832 foreach (DirectoryModel sub_dir
in dir
.Children
)
833 Recrawl_Recursive (sub_dir
, state
);
836 private void ActivateFileCrawling ()
838 if (! file_crawl_task
.IsActive
)
839 ThisScheduler
.Add (file_crawl_task
);
842 private void ActivateDirectoryCrawling ()
844 if (! tree_crawl_task
.IsActive
)
845 ThisScheduler
.Add (tree_crawl_task
);
848 //////////////////////////////////////////////////////////////////////////
851 // File-related methods
854 private enum RequiredAction
{
861 static DateTime epoch
= new DateTime (1970, 1, 1, 0, 0, 0);
863 static DateTime
ToDateTimeUtc (long time_t
)
865 return epoch
.AddSeconds (time_t
);
868 private RequiredAction
DetermineRequiredAction (DirectoryModel dir
,
871 out string last_known_path
)
873 last_known_path
= null;
876 path
= Path
.Combine (dir
.FullName
, name
);
879 Logger
.Log
.Debug ("*** What should we do with {0}?", path
);
881 if (filter
.Ignore (dir
, name
, false)) {
882 // If there are attributes on the file, we must have indexed
883 // it previously. Since we are ignoring it now, we should strip
884 // any file attributes from it.
887 Logger
.Log
.Debug ("*** Forget it: File is ignored but has attributes");
888 return RequiredAction
.Forget
;
891 Logger
.Log
.Debug ("*** Do nothing: File is ignored");
892 return RequiredAction
.None
;
897 Logger
.Log
.Debug ("*** Index it: File has no attributes");
898 return RequiredAction
.Index
;
901 // FIXME: This does not take in to account that we might have a better matching filter to use now
902 // That, however, is kind of expensive to figure out since we'd have to do mime-sniffing and shit.
903 if (attr
.FilterName
!= null && attr
.FilterVersion
> 0) {
904 int current_filter_version
;
905 current_filter_version
= FilterFactory
.GetFilterVersion (attr
.FilterName
);
907 if (current_filter_version
> attr
.FilterVersion
) {
909 Logger
.Log
.Debug ("*** Index it: Newer filter version found for filter {0}", attr
.FilterName
);
910 return RequiredAction
.Index
;
914 Mono
.Unix
.Native
.Stat stat
;
916 Mono
.Unix
.Native
.Syscall
.stat (path
, out stat
);
917 } catch (Exception ex
) {
918 Logger
.Log
.Debug ("Caught exception stat-ing {0}", path
);
919 Logger
.Log
.Debug (ex
);
920 return RequiredAction
.None
;
923 DateTime last_write_time
, last_attr_time
;
924 last_write_time
= ToDateTimeUtc (stat
.st_mtime
);
925 last_attr_time
= ToDateTimeUtc (stat
.st_ctime
);
927 if (attr
.LastWriteTime
!= last_write_time
) {
929 Logger
.Log
.Debug ("*** Index it: MTime has changed ({0} vs {1})", attr
.LastWriteTime
, last_write_time
);
931 // If the file has been copied, it will have the
932 // original file's EAs. Thus we have to check to
933 // make sure that the unique id in the EAs actually
934 // belongs to this file. If not, replace it with a new one.
935 // (Thus touching & then immediately renaming a file can
936 // cause its unique id to change, which is less than
937 // optimal but probably can't be helped.)
938 last_known_path
= UniqueIdToFullPath (attr
.UniqueId
);
939 if (path
!= last_known_path
) {
941 Logger
.Log
.Debug ("*** Name has also changed, assigning new unique id");
942 attr
.UniqueId
= Guid
.NewGuid ();
945 return RequiredAction
.Index
;
948 // If the inode ctime is newer than the last time we last
949 // set file attributes, we might have been moved. We don't
950 // strictly compare times due to the fact that although
951 // setting xattrs changes the ctime, if we don't have write
952 // access our metadata will be stored in sqlite, and the
953 // ctime will be at some point in the past.
954 if (attr
.LastAttrTime
< last_attr_time
) {
956 Logger
.Log
.Debug ("*** CTime is newer, checking last known path ({0} vs {1})", attr
.LastAttrTime
, last_attr_time
);
958 last_known_path
= UniqueIdToFullPath (attr
.UniqueId
);
960 if (last_known_path
== null) {
962 Logger
.Log
.Debug ("*** Index it: CTime has changed, but can't determine last known path");
963 return RequiredAction
.Index
;
966 // If the name has changed but the mtime
967 // hasn't, the only logical conclusion is that
968 // the file has been renamed.
969 if (path
!= last_known_path
) {
971 Logger
.Log
.Debug ("*** Rename it: CTime and path has changed");
972 return RequiredAction
.Rename
;
976 // We don't have to do anything, which is always preferable.
978 Logger
.Log
.Debug ("*** Do nothing");
979 return RequiredAction
.None
;
982 // Return an indexable that will do the right thing with a file
983 // (or null, if the right thing is to do nothing)
984 public Indexable
GetCrawlingFileIndexable (DirectoryModel dir
, string name
)
987 path
= Path
.Combine (dir
.FullName
, name
);
990 attr
= FileAttributesStore
.Read (path
);
992 RequiredAction action
;
993 string last_known_path
;
994 action
= DetermineRequiredAction (dir
, name
, attr
, out last_known_path
);
996 if (action
== RequiredAction
.None
)
1001 unique_id
= attr
.UniqueId
;
1003 unique_id
= Guid
.NewGuid ();
1005 Indexable indexable
= null;
1009 case RequiredAction
.Index
:
1010 indexable
= FileToIndexable (path
, unique_id
, dir
, true);
1013 case RequiredAction
.Rename
:
1014 indexable
= NewRenamingIndexable (name
, unique_id
, dir
,
1018 case RequiredAction
.Forget
:
1019 FileAttributesStore
.Drop (path
);
1027 public void AddFile (DirectoryModel dir
, string name
)
1030 path
= Path
.Combine (dir
.FullName
, name
);
1032 if (! File
.Exists (path
))
1035 if (filter
.Ignore (dir
, name
, false))
1038 // If this file already has extended attributes,
1039 // make sure that the name matches the file
1040 // that is in the index. If not, it could be
1041 // a copy of an already-indexed file and should
1042 // be assigned a new unique id.
1043 Guid unique_id
= Guid
.Empty
;
1044 FileAttributes attr
;
1045 attr
= FileAttributesStore
.Read (path
);
1047 LuceneNameResolver
.NameInfo info
;
1048 info
= name_resolver
.GetNameInfoById (attr
.UniqueId
);
1050 && info
.Name
== name
1051 && info
.ParentId
== dir
.UniqueId
)
1052 unique_id
= attr
.UniqueId
;
1055 if (unique_id
== Guid
.Empty
)
1056 unique_id
= Guid
.NewGuid ();
1058 RegisterId (name
, dir
, unique_id
);
1060 Indexable indexable
;
1061 indexable
= FileToIndexable (path
, unique_id
, dir
, false);
1063 if (indexable
!= null) {
1064 Scheduler
.Task task
;
1065 task
= NewAddTask (indexable
);
1066 task
.Priority
= Scheduler
.Priority
.Immediate
;
1067 ThisScheduler
.Add (task
);
1071 public void RemoveFile (DirectoryModel dir
, string name
)
1073 // FIXME: We might as well remove it, even if it was being ignore.
1077 unique_id
= NameAndParentToId (name
, dir
);
1078 if (unique_id
== Guid
.Empty
) {
1079 Logger
.Log
.Info ("Could not resolve unique id of '{0}' in '{1}' for removal, it is probably already gone",
1080 name
, dir
.FullName
);
1085 uri
= GuidFu
.ToUri (unique_id
);
1086 file_uri
= UriFu
.PathToFileUri (Path
.Combine (dir
.FullName
, name
));
1088 Indexable indexable
;
1089 indexable
= new Indexable (IndexableType
.Remove
, uri
);
1090 indexable
.LocalState
["RemovedUri"] = file_uri
;
1092 Scheduler
.Task task
;
1093 task
= NewAddTask (indexable
);
1094 task
.Priority
= Scheduler
.Priority
.Immediate
;
1095 ThisScheduler
.Add (task
);
1098 public void MoveFile (DirectoryModel old_dir
, string old_name
,
1099 DirectoryModel new_dir
, string new_name
)
1101 bool old_ignore
, new_ignore
;
1102 old_ignore
= filter
.Ignore (old_dir
, old_name
, false);
1103 new_ignore
= filter
.Ignore (new_dir
, new_name
, false);
1105 if (old_ignore
&& new_ignore
)
1108 // If our ignore-state is changing, synthesize the appropriate
1111 if (old_ignore
&& ! new_ignore
) {
1112 AddFile (new_dir
, new_name
);
1116 if (! old_ignore
&& new_ignore
) {
1117 RemoveFile (new_dir
, new_name
);
1121 // We need to find the file's unique id.
1122 // We can't look at the extended attributes w/o making
1123 // assumptions about whether they follow around the
1124 // file (EAs) or the path (sqlite)...
1126 unique_id
= NameAndParentToId (old_name
, old_dir
);
1127 if (unique_id
== Guid
.Empty
) {
1128 // If we can't find the unique ID, we have to
1129 // assume that the original file never made it
1130 // into the index --- thus we treat this as
1132 AddFile (new_dir
, new_name
);
1136 RegisterId (new_name
, new_dir
, unique_id
);
1139 old_path
= Path
.Combine (old_dir
.FullName
, old_name
);
1141 ForgetId (old_path
);
1143 // FIXME: I think we need to be more conservative when we seen
1144 // events in a directory that has not been fully scanned, just to
1145 // avoid races. i.e. what if we are in the middle of crawling that
1146 // directory and haven't reached this file yet? Then the rename
1148 Indexable indexable
;
1149 indexable
= NewRenamingIndexable (new_name
,
1154 Scheduler
.Task task
;
1155 task
= NewAddTask (indexable
);
1156 task
.Priority
= Scheduler
.Priority
.Immediate
;
1157 // Danger Will Robinson!
1158 // We need to use BlockUntilNoCollision to get the correct notifications
1159 // in a mv a b; mv b c; mv c a situation.
1160 // FIXME: And now AddType no longer exists
1161 ThisScheduler
.Add (task
);
1164 //////////////////////////////////////////////////////////////////////////
1166 // Configuration stuff
1168 public IList Roots
{
1170 return roots_by_path
;
1174 private void LoadConfiguration ()
1176 if (Conf
.Indexing
.IndexHomeDir
)
1177 AddRoot (PathFinder
.HomeDir
);
1179 foreach (string root
in Conf
.Indexing
.Roots
)
1182 Conf
.Subscribe (typeof (Conf
.IndexingConfig
), OnConfigurationChanged
);
1185 private void OnConfigurationChanged (Conf
.Section section
)
1187 ArrayList roots_wanted
= new ArrayList (Conf
.Indexing
.Roots
);
1189 if (Conf
.Indexing
.IndexHomeDir
)
1190 roots_wanted
.Add (PathFinder
.HomeDir
);
1192 IList roots_to_add
, roots_to_remove
;
1193 ArrayFu
.IntersectListChanges (roots_wanted
, Roots
, out roots_to_add
, out roots_to_remove
);
1195 foreach (string root
in roots_to_remove
)
1198 foreach (string root
in roots_to_add
)
1202 //////////////////////////////////////////////////////////////////////////
1205 // Our magic LuceneQueryable hooks
1208 override protected bool PreChildAddHook (Indexable child
)
1210 // FIXME: Handling Uri remapping of children is tricky, and there
1211 // is also the issue of properly serializing file: uris that
1212 // contain fragments. For now we just punt it all by dropping
1213 // any child indexables of file system objects.
1217 override protected void PostAddHook (Indexable indexable
, IndexerAddedReceipt receipt
)
1219 // If we just changed properties, remap to our *old* external Uri
1220 // to make notification work out property.
1221 if (indexable
.Type
== IndexableType
.PropertyChange
) {
1223 string last_known_path
;
1224 last_known_path
= (string) indexable
.LocalState
["LastKnownPath"];
1225 receipt
.Uri
= UriFu
.PathToFileUri (last_known_path
);
1226 Logger
.Log
.Debug ("Last known path is {0}", last_known_path
);
1228 // This rename is now in the index, so we no longer need to keep
1229 // track of the uid in memory.
1230 ForgetId (last_known_path
);
1236 path
= (string) indexable
.LocalState
["Path"];
1239 DirectoryModel parent
;
1240 parent
= indexable
.LocalState
["Parent"] as DirectoryModel
;
1242 // The parent directory might have run away since we were indexed
1243 if (parent
!= null && ! parent
.IsAttached
)
1247 unique_id
= GuidFu
.FromUri (receipt
.Uri
);
1249 FileAttributes attr
;
1250 attr
= FileAttributesStore
.ReadOrCreate (path
, unique_id
);
1252 attr
.LastWriteTime
= indexable
.Timestamp
;
1254 attr
.FilterName
= receipt
.FilterName
;
1255 attr
.FilterVersion
= receipt
.FilterVersion
;
1257 if (indexable
.LocalState
["IsWalkable"] != null) {
1259 name
= (string) indexable
.LocalState
["Name"];
1261 if (! RegisterDirectory (name
, parent
, attr
))
1265 FileAttributesStore
.Write (attr
);
1267 // Remap the Uri so that change notification will work properly
1268 receipt
.Uri
= UriFu
.PathToFileUri (path
);
1271 override protected void PostRemoveHook (Indexable indexable
, IndexerRemovedReceipt receipt
)
1273 // Find the cached external Uri and remap the Uri in the receipt.
1274 // We have to do this to make change notification work.
1276 external_uri
= indexable
.LocalState
["RemovedUri"] as Uri
;
1277 if (external_uri
== null)
1278 throw new Exception ("No cached external Uri for " + receipt
.Uri
);
1279 receipt
.Uri
= external_uri
;
1280 ForgetId (external_uri
.LocalPath
);
1283 private bool RemapUri (Hit hit
)
1285 // Store the hit's internal uri in a property
1287 prop
= Property
.NewUnsearched ("beagle:InternalUri",
1288 UriFu
.UriToSerializableString (hit
.Uri
));
1289 hit
.AddProperty (prop
);
1291 // Now assemble the path by looking at the parent and name
1293 name
= hit
[ExactFilenamePropKey
];
1295 // If we don't have the filename property, we have to do a lookup
1296 // based on the guid. This happens with synthetic hits produced by
1299 hit_id
= GuidFu
.FromUri (hit
.Uri
);
1300 path
= UniqueIdToFullPath (hit_id
);
1302 string parent_id_uri
;
1303 parent_id_uri
= hit
[ParentDirUriPropKey
];
1304 if (parent_id_uri
== null)
1308 parent_id
= GuidFu
.FromUriString (parent_id_uri
);
1310 path
= ToFullPath (name
, parent_id
);
1312 Logger
.Log
.Debug ("Couldn't find path of file with name '{0}' and parent '{1}'",
1313 name
, GuidFu
.ToShortString (parent_id
));
1317 hit
.Uri
= UriFu
.PathToFileUri (path
);
1324 // Hit filter: this handles our mapping from internal->external uris,
1325 // and checks to see if the file is still there.
1326 override protected bool HitFilter (Hit hit
)
1328 Uri old_uri
= hit
.Uri
;
1330 if (! RemapUri (hit
))
1334 path
= hit
.Uri
.LocalPath
;
1337 bool exists
= false;
1339 is_directory
= hit
.MimeType
== "inode/directory";
1341 if (hit
.MimeType
== null && hit
.Uri
.IsFile
&& Directory
.Exists (path
)) {
1342 is_directory
= true;
1348 exists
= Directory
.Exists (path
);
1350 exists
= File
.Exists (path
);
1353 // If the file doesn't exist, we do not schedule a removal and
1354 // return false. This is to avoid "losing" files if they are
1355 // in a directory that has been renamed but which we haven't
1356 // scanned yet... if we dropped them from the index, they would
1357 // never get re-indexed (or at least not until the next time they
1358 // were touched) since they would still be stamped with EAs
1359 // indicating they were up-to-date. And that would be bad.
1360 // FIXME: It would be safe if we were in a known state, right?
1361 // i.e. every DirectoryModel is clean.
1365 // Fetch the parent directory model from our cache to do clever
1366 // filtering to determine if we're ignoring it or not.
1367 DirectoryModel parent
;
1368 parent
= GetDirectoryModelByPath (Path
.GetDirectoryName (path
));
1370 // Check the ignore status of the hit
1371 if (filter
.Ignore (parent
, Path
.GetFileName (path
), is_directory
))
1377 override public string GetSnippet (string [] query_terms
, Hit hit
)
1379 // Uri remapping from a hit is easy: the internal uri
1380 // is stored in a property.
1382 uri
= UriFu
.UriStringToUri (hit
["beagle:InternalUri"]);
1385 path
= TextCache
.UserCache
.LookupPathRaw (uri
);
1390 // If this is self-cached, use the remapped Uri
1391 if (path
== TextCache
.SELF_CACHE_TAG
)
1392 path
= hit
.Uri
.LocalPath
;
1394 return SnippetFu
.GetSnippetFromFile (query_terms
, path
);
1397 override public void Start ()
1401 event_backend
.Start (this);
1403 LoadConfiguration ();
1405 Logger
.Log
.Debug ("Done starting FileSystemQueryable");
1408 //////////////////////////////////////////////////////////////////////////
1410 // These are the methods that the IFileEventBackend implementations should
1411 // call in response to events.
1413 public void ReportEventInDirectory (string directory_name
)
1416 dir
= GetDirectoryModelByPath (directory_name
);
1418 // If something goes wrong, just fail silently.
1422 // We only use this information to prioritize the order in which
1423 // we crawl directories --- so if this directory doesn't
1424 // actually need to be crawled, we can safely ignore it.
1425 if (! dir
.NeedsCrawl
)
1428 dir
.LastActivityTime
= DateTime
.Now
;
1430 Logger
.Log
.Debug ("Saw event in '{0}'", directory_name
);
1433 public void HandleAddEvent (string directory_name
, string file_name
, bool is_directory
)
1435 Logger
.Log
.Debug ("*** Add '{0}' '{1}' {2}", directory_name
, file_name
,
1436 is_directory
? "(dir)" : "(file)");
1439 dir
= GetDirectoryModelByPath (directory_name
);
1441 Logger
.Log
.Warn ("HandleAddEvent failed: Couldn't find DirectoryModel for '{0}'", directory_name
);
1446 AddDirectory (dir
, file_name
);
1448 AddFile (dir
, file_name
);
1451 public void HandleRemoveEvent (string directory_name
, string file_name
, bool is_directory
)
1453 Logger
.Log
.Debug ("*** Remove '{0}' '{1}' {2}", directory_name
, file_name
,
1454 is_directory
? "(dir)" : "(file)");
1458 path
= Path
.Combine (directory_name
, file_name
);
1461 dir
= GetDirectoryModelByPath (path
);
1463 Logger
.Log
.Warn ("HandleRemoveEvent failed: Couldn't find DirectoryModel for '{0}'", path
);
1467 dir
.WatchHandle
= null;
1468 RemoveDirectory (dir
);
1471 dir
= GetDirectoryModelByPath (directory_name
);
1473 Logger
.Log
.Warn ("HandleRemoveEvent failed: Couldn't find DirectoryModel for '{0}'", directory_name
);
1477 RemoveFile (dir
, file_name
);
1481 public void HandleMoveEvent (string old_directory_name
, string old_file_name
,
1482 string new_directory_name
, string new_file_name
,
1485 Logger
.Log
.Debug ("*** Move '{0}' '{1}' -> '{2}' '{3}' {4}",
1486 old_directory_name
, old_file_name
,
1487 new_directory_name
, new_file_name
,
1488 is_directory
? "(dir)" : "(file)");
1491 DirectoryModel dir
, new_parent
;
1492 dir
= GetDirectoryModelByPath (Path
.Combine (old_directory_name
, old_file_name
));
1493 new_parent
= GetDirectoryModelByPath (new_directory_name
);
1494 MoveDirectory (dir
, new_parent
, new_file_name
);
1497 DirectoryModel old_dir
, new_dir
;
1498 old_dir
= GetDirectoryModelByPath (old_directory_name
);
1499 new_dir
= GetDirectoryModelByPath (new_directory_name
);
1500 MoveFile (old_dir
, old_file_name
, new_dir
, new_file_name
);
1504 public void HandleOverflowEvent ()
1506 Logger
.Log
.Debug ("Queue overflows suck");